Rationalise null pointer checks in both decode_codepage functions, so
[sgt/putty] / windows / winucs.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <ctype.h>
4 #include <time.h>
5 #include <assert.h>
6
7 #include "putty.h"
8 #include "terminal.h"
9 #include "misc.h"
10
11 /* Character conversion arrays; they are usually taken from windows,
12 * the xterm one has the four scanlines that have no unicode 2.0
13 * equivalents mapped to their unicode 3.0 locations.
14 */
15 static const WCHAR unitab_xterm_std[32] = {
16 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
17 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
18 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
19 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
20 };
21
22 /*
23 * If the codepage is non-zero it's a window codepage, zero means use a
24 * local codepage. The name is always converted to the first of any
25 * duplicate definitions.
26 */
27
28 /*
29 * Tables for ISO-8859-{1-10,13-16} derived from those downloaded
30 * 2001-10-02 from <http://www.unicode.org/Public/MAPPINGS/> -- jtn
31 * Table for ISO-8859-11 derived from same on 2002-11-18. -- bjh21
32 */
33
34 /* XXX: This could be done algorithmically, but I'm not sure it's
35 * worth the hassle -- jtn */
36 /* ISO/IEC 8859-1:1998 (Latin-1, "Western", "West European") */
37 static const wchar_t iso_8859_1[] = {
38 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
39 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
40 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
41 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
42 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
43 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
44 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
45 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
46 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
47 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
48 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
49 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
50 };
51
52 /* ISO/IEC 8859-2:1999 (Latin-2, "Central European", "East European") */
53 static const wchar_t iso_8859_2[] = {
54 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
55 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
56 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
57 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
58 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
59 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
60 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
61 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
62 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
63 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
64 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
65 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
66 };
67
68 /* ISO/IEC 8859-3:1999 (Latin-3, "South European", "Maltese & Esperanto") */
69 static const wchar_t iso_8859_3[] = {
70 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
71 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
72 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
73 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
74 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
75 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
76 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
77 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
78 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
79 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
80 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
81 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
82 };
83
84 /* ISO/IEC 8859-4:1998 (Latin-4, "North European") */
85 static const wchar_t iso_8859_4[] = {
86 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
87 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
88 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
89 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
90 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
91 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
92 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
93 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
94 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
95 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
96 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
97 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9
98 };
99
100 /* ISO/IEC 8859-5:1999 (Latin/Cyrillic) */
101 static const wchar_t iso_8859_5[] = {
102 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
103 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
104 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
105 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
106 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
107 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
108 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
109 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
110 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
111 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
112 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
113 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F
114 };
115
116 /* ISO/IEC 8859-6:1999 (Latin/Arabic) */
117 static const wchar_t iso_8859_6[] = {
118 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
119 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
120 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
121 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
122 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
123 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
124 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
125 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
126 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
127 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
128 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
129 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD
130 };
131
132 /* ISO 8859-7:1987 (Latin/Greek) */
133 static const wchar_t iso_8859_7[] = {
134 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
135 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
136 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
137 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
138 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
139 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
140 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
141 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
142 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
143 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
144 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
145 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD
146 };
147
148 /* ISO/IEC 8859-8:1999 (Latin/Hebrew) */
149 static const wchar_t iso_8859_8[] = {
150 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
151 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
152 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
153 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
154 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
155 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
156 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
157 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
158 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
159 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
160 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
161 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD
162 };
163
164 /* ISO/IEC 8859-9:1999 (Latin-5, "Turkish") */
165 static const wchar_t iso_8859_9[] = {
166 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
167 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
168 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
169 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
170 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
171 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
172 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
173 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
174 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
175 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
176 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
177 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
178 };
179
180 /* ISO/IEC 8859-10:1998 (Latin-6, "Nordic" [Sami, Inuit, Icelandic]) */
181 static const wchar_t iso_8859_10[] = {
182 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
183 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
184 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
185 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
186 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
187 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
188 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
189 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
190 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
191 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
192 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
193 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138
194 };
195
196 /* ISO/IEC 8859-11:2001 ("Thai", "TIS620") */
197 static const wchar_t iso_8859_11[] = {
198 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
199 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
200 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
201 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
202 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
203 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
204 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
205 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
206 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
207 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
208 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
209 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD
210 };
211
212 /* ISO/IEC 8859-13:1998 (Latin-7, "Baltic Rim") */
213 static const wchar_t iso_8859_13[] = {
214 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
215 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
216 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
217 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
218 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
219 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
220 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
221 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
222 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
223 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
224 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
225 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019
226 };
227
228 /* ISO/IEC 8859-14:1998 (Latin-8, "Celtic", "Gaelic/Welsh") */
229 static const wchar_t iso_8859_14[] = {
230 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
231 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
232 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
233 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
234 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
235 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
236 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
237 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
238 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
239 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
240 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
241 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF
242 };
243
244 /* ISO/IEC 8859-15:1999 (Latin-9 aka -0, "euro") */
245 static const wchar_t iso_8859_15[] = {
246 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
247 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
248 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
249 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
250 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
251 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
252 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
253 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
254 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
255 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
256 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
257 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
258 };
259
260 /* ISO/IEC 8859-16:2001 (Latin-10, "Balkan") */
261 static const wchar_t iso_8859_16[] = {
262 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
263 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
264 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
265 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
266 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
267 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
268 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
269 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
270 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
271 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
272 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
273 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF
274 };
275
276 static const wchar_t roman8[] = {
277 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
278 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
279 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
280 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
281 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
282 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
283 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
284 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
285 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
286 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
287 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
288 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD
289 };
290
291 static const wchar_t koi8_u[] = {
292 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
293 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
294 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2022, 0x221A, 0x2248,
295 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
296 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
297 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
298 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
299 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
300 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
301 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
302 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
303 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
304 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
305 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
306 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
307 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
308 };
309
310 static const wchar_t vscii[] = {
311 0x0000, 0x0001, 0x1EB2, 0x0003, 0x0004, 0x1EB4, 0x1EAA, 0x0007,
312 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
313 0x0010, 0x0011, 0x0012, 0x0013, 0x1EF6, 0x0015, 0x0016, 0x0017,
314 0x0018, 0x1EF8, 0x001a, 0x001b, 0x001c, 0x001d, 0x1EF4, 0x001f,
315 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
316 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
317 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
318 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
319 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
320 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
321 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
322 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
323 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
324 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
325 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
326 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007f,
327 0x1EA0, 0x1EAE, 0x1EB0, 0x1EB6, 0x1EA4, 0x1EA6, 0x1EA8, 0x1EAC,
328 0x1EBC, 0x1EB8, 0x1EBE, 0x1EC0, 0x1EC2, 0x1EC4, 0x1EC6, 0x1ED0,
329 0x1ED2, 0x1ED4, 0x1ED6, 0x1ED8, 0x1EE2, 0x1EDA, 0x1EDC, 0x1EDE,
330 0x1ECA, 0x1ECE, 0x1ECC, 0x1EC8, 0x1EE6, 0x0168, 0x1EE4, 0x1EF2,
331 0x00D5, 0x1EAF, 0x1EB1, 0x1EB7, 0x1EA5, 0x1EA7, 0x1EA8, 0x1EAD,
332 0x1EBD, 0x1EB9, 0x1EBF, 0x1EC1, 0x1EC3, 0x1EC5, 0x1EC7, 0x1ED1,
333 0x1ED3, 0x1ED5, 0x1ED7, 0x1EE0, 0x01A0, 0x1ED9, 0x1EDD, 0x1EDF,
334 0x1ECB, 0x1EF0, 0x1EE8, 0x1EEA, 0x1EEC, 0x01A1, 0x1EDB, 0x01AF,
335 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x1EA2, 0x0102, 0x1EB3, 0x1EB5,
336 0x00C8, 0x00C9, 0x00CA, 0x1EBA, 0x00CC, 0x00CD, 0x0128, 0x1EF3,
337 0x0110, 0x1EE9, 0x00D2, 0x00D3, 0x00D4, 0x1EA1, 0x1EF7, 0x1EEB,
338 0x1EED, 0x00D9, 0x00DA, 0x1EF9, 0x1EF5, 0x00DD, 0x1EE1, 0x01B0,
339 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x1EA3, 0x0103, 0x1EEF, 0x1EAB,
340 0x00E8, 0x00E9, 0x00EA, 0x1EBB, 0x00EC, 0x00ED, 0x0129, 0x1EC9,
341 0x0111, 0x1EF1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x1ECF, 0x1ECD,
342 0x1EE5, 0x00F9, 0x00FA, 0x0169, 0x1EE7, 0x00FD, 0x1EE3, 0x1EEE
343 };
344
345 static const wchar_t dec_mcs[] = {
346 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0xFFFD, 0x00A5, 0xFFFD, 0x00A7,
347 0x00A4, 0x00A9, 0x00AA, 0x00AB, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
348 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0xFFFD, 0x00B5, 0x00B6, 0x00B7,
349 0xFFFD, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0xFFFD, 0x00BF,
350 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
351 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
352 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0152,
353 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0178, 0xFFFD, 0x00DF,
354 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
355 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
356 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0153,
357 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FF, 0xFFFD, 0xFFFD
358 };
359
360 /* Mazovia (Polish) aka CP620
361 * from "Mazowia to Unicode table", 04/24/96, Mikolaj Jedrzejak */
362 static const wchar_t mazovia[] = {
363 /* Code point 0x9B is "zloty" symbol (z&#0142;), which is not
364 * widely used and for which there is no Unicode equivalent.
365 * One reference shows 0xA8 as U+00A7 SECTION SIGN, but we're
366 * told that's incorrect. */
367 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x0105, 0x00E7,
368 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x0107, 0x00C4, 0x0104,
369 0x0118, 0x0119, 0x0142, 0x00F4, 0x00F6, 0x0106, 0x00FB, 0x00F9,
370 0x015a, 0x00D6, 0x00DC, 0xFFFD, 0x0141, 0x00A5, 0x015b, 0x0192,
371 0x0179, 0x017b, 0x00F3, 0x00d3, 0x0144, 0x0143, 0x017a, 0x017c,
372 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
373 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
374 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
375 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
376 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
377 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
378 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
379 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
380 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
381 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
382 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0
383 };
384
385 struct cp_list_item {
386 char *name;
387 int codepage;
388 int cp_size;
389 const wchar_t *cp_table;
390 };
391
392 static const struct cp_list_item cp_list[] = {
393 {"UTF-8", CP_UTF8},
394
395 {"ISO-8859-1:1998 (Latin-1, West Europe)", 0, 96, iso_8859_1},
396 {"ISO-8859-2:1999 (Latin-2, East Europe)", 0, 96, iso_8859_2},
397 {"ISO-8859-3:1999 (Latin-3, South Europe)", 0, 96, iso_8859_3},
398 {"ISO-8859-4:1998 (Latin-4, North Europe)", 0, 96, iso_8859_4},
399 {"ISO-8859-5:1999 (Latin/Cyrillic)", 0, 96, iso_8859_5},
400 {"ISO-8859-6:1999 (Latin/Arabic)", 0, 96, iso_8859_6},
401 {"ISO-8859-7:1987 (Latin/Greek)", 0, 96, iso_8859_7},
402 {"ISO-8859-8:1999 (Latin/Hebrew)", 0, 96, iso_8859_8},
403 {"ISO-8859-9:1999 (Latin-5, Turkish)", 0, 96, iso_8859_9},
404 {"ISO-8859-10:1998 (Latin-6, Nordic)", 0, 96, iso_8859_10},
405 {"ISO-8859-11:2001 (Latin/Thai)", 0, 96, iso_8859_11},
406 {"ISO-8859-13:1998 (Latin-7, Baltic)", 0, 96, iso_8859_13},
407 {"ISO-8859-14:1998 (Latin-8, Celtic)", 0, 96, iso_8859_14},
408 {"ISO-8859-15:1999 (Latin-9, \"euro\")", 0, 96, iso_8859_15},
409 {"ISO-8859-16:2001 (Latin-10, Balkan)", 0, 96, iso_8859_16},
410
411 {"KOI8-U", 0, 128, koi8_u},
412 {"KOI8-R", 20866},
413 {"HP-ROMAN8", 0, 96, roman8},
414 {"VSCII", 0, 256, vscii},
415 {"DEC-MCS", 0, 96, dec_mcs},
416
417 {"Win1250 (Central European)", 1250},
418 {"Win1251 (Cyrillic)", 1251},
419 {"Win1252 (Western)", 1252},
420 {"Win1253 (Greek)", 1253},
421 {"Win1254 (Turkish)", 1254},
422 {"Win1255 (Hebrew)", 1255},
423 {"Win1256 (Arabic)", 1256},
424 {"Win1257 (Baltic)", 1257},
425 {"Win1258 (Vietnamese)", 1258},
426
427 {"CP437", 437},
428 {"CP620 (Mazovia)", 0, 128, mazovia},
429 {"CP819", 28591},
430 {"CP852", 852},
431 {"CP878", 20866},
432
433 {"Use font encoding", -1},
434
435 {0, 0}
436 };
437
438 static void link_font(WCHAR * line_tbl, WCHAR * font_tbl, WCHAR attr);
439
440 void init_ucs(Conf *conf, struct unicode_data *ucsdata)
441 {
442 int i, j;
443 int used_dtf = 0;
444 char tbuf[256];
445 int vtmode;
446
447 for (i = 0; i < 256; i++)
448 tbuf[i] = i;
449
450 /* Decide on the Line and Font codepages */
451 ucsdata->line_codepage = decode_codepage(conf_get_str(conf,
452 CONF_line_codepage));
453
454 if (ucsdata->font_codepage <= 0) {
455 ucsdata->font_codepage=0;
456 ucsdata->dbcs_screenfont=0;
457 }
458
459 vtmode = conf_get_int(conf, CONF_vtmode);
460 if (vtmode == VT_OEMONLY) {
461 ucsdata->font_codepage = 437;
462 ucsdata->dbcs_screenfont = 0;
463 if (ucsdata->line_codepage <= 0)
464 ucsdata->line_codepage = GetACP();
465 } else if (ucsdata->line_codepage <= 0)
466 ucsdata->line_codepage = ucsdata->font_codepage;
467
468 /* Collect screen font ucs table */
469 if (ucsdata->dbcs_screenfont || ucsdata->font_codepage == 0) {
470 get_unitab(ucsdata->font_codepage, ucsdata->unitab_font, 2);
471 for (i = 128; i < 256; i++)
472 ucsdata->unitab_font[i] = (WCHAR) (CSET_ACP + i);
473 } else {
474 get_unitab(ucsdata->font_codepage, ucsdata->unitab_font, 1);
475
476 /* CP437 fonts are often broken ... */
477 if (ucsdata->font_codepage == 437)
478 ucsdata->unitab_font[0] = ucsdata->unitab_font[255] = 0xFFFF;
479 }
480 if (vtmode == VT_XWINDOWS)
481 memcpy(ucsdata->unitab_font + 1, unitab_xterm_std,
482 sizeof(unitab_xterm_std));
483
484 /* Collect OEMCP ucs table */
485 get_unitab(CP_OEMCP, ucsdata->unitab_oemcp, 1);
486
487 /* Collect CP437 ucs table for SCO acs */
488 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS)
489 memcpy(ucsdata->unitab_scoacs, ucsdata->unitab_oemcp,
490 sizeof(ucsdata->unitab_scoacs));
491 else
492 get_unitab(437, ucsdata->unitab_scoacs, 1);
493
494 /* Collect line set ucs table */
495 if (ucsdata->line_codepage == ucsdata->font_codepage &&
496 (ucsdata->dbcs_screenfont ||
497 vtmode == VT_POORMAN || ucsdata->font_codepage==0)) {
498
499 /* For DBCS and POOR fonts force direct to font */
500 used_dtf = 1;
501 for (i = 0; i < 32; i++)
502 ucsdata->unitab_line[i] = (WCHAR) i;
503 for (i = 32; i < 256; i++)
504 ucsdata->unitab_line[i] = (WCHAR) (CSET_ACP + i);
505 ucsdata->unitab_line[127] = (WCHAR) 127;
506 } else {
507 get_unitab(ucsdata->line_codepage, ucsdata->unitab_line, 0);
508 }
509
510 #if 0
511 debug(
512 ("Line cp%d, Font cp%d%s\n", ucsdata->line_codepage,
513 ucsdata->font_codepage, ucsdata->dbcs_screenfont ? " DBCS" : ""));
514
515 for (i = 0; i < 256; i += 16) {
516 for (j = 0; j < 16; j++) {
517 debug(("%04x%s", ucsdata->unitab_line[i + j], j == 15 ? "" : ","));
518 }
519 debug(("\n"));
520 }
521 #endif
522
523 /* VT100 graphics - NB: Broken for non-ascii CP's */
524 memcpy(ucsdata->unitab_xterm, ucsdata->unitab_line,
525 sizeof(ucsdata->unitab_xterm));
526 memcpy(ucsdata->unitab_xterm + '`', unitab_xterm_std,
527 sizeof(unitab_xterm_std));
528 ucsdata->unitab_xterm['_'] = ' ';
529
530 /* Generate UCS ->line page table. */
531 if (ucsdata->uni_tbl) {
532 for (i = 0; i < 256; i++)
533 if (ucsdata->uni_tbl[i])
534 sfree(ucsdata->uni_tbl[i]);
535 sfree(ucsdata->uni_tbl);
536 ucsdata->uni_tbl = 0;
537 }
538 if (!used_dtf) {
539 for (i = 0; i < 256; i++) {
540 if (DIRECT_CHAR(ucsdata->unitab_line[i]))
541 continue;
542 if (DIRECT_FONT(ucsdata->unitab_line[i]))
543 continue;
544 if (!ucsdata->uni_tbl) {
545 ucsdata->uni_tbl = snewn(256, char *);
546 memset(ucsdata->uni_tbl, 0, 256 * sizeof(char *));
547 }
548 j = ((ucsdata->unitab_line[i] >> 8) & 0xFF);
549 if (!ucsdata->uni_tbl[j]) {
550 ucsdata->uni_tbl[j] = snewn(256, char);
551 memset(ucsdata->uni_tbl[j], 0, 256 * sizeof(char));
552 }
553 ucsdata->uni_tbl[j][ucsdata->unitab_line[i] & 0xFF] = i;
554 }
555 }
556
557 /* Find the line control characters. */
558 for (i = 0; i < 256; i++)
559 if (ucsdata->unitab_line[i] < ' '
560 || (ucsdata->unitab_line[i] >= 0x7F &&
561 ucsdata->unitab_line[i] < 0xA0))
562 ucsdata->unitab_ctrl[i] = i;
563 else
564 ucsdata->unitab_ctrl[i] = 0xFF;
565
566 /* Generate line->screen direct conversion links. */
567 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS)
568 link_font(ucsdata->unitab_scoacs, ucsdata->unitab_oemcp, CSET_OEMCP);
569
570 link_font(ucsdata->unitab_line, ucsdata->unitab_font, CSET_ACP);
571 link_font(ucsdata->unitab_scoacs, ucsdata->unitab_font, CSET_ACP);
572 link_font(ucsdata->unitab_xterm, ucsdata->unitab_font, CSET_ACP);
573
574 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS) {
575 link_font(ucsdata->unitab_line, ucsdata->unitab_oemcp, CSET_OEMCP);
576 link_font(ucsdata->unitab_xterm, ucsdata->unitab_oemcp, CSET_OEMCP);
577 }
578
579 if (ucsdata->dbcs_screenfont &&
580 ucsdata->font_codepage != ucsdata->line_codepage) {
581 /* F***ing Microsoft fonts, Japanese and Korean codepage fonts
582 * have a currency symbol at 0x5C but their unicode value is
583 * still given as U+005C not the correct U+00A5. */
584 ucsdata->unitab_line['\\'] = CSET_OEMCP + '\\';
585 }
586
587 /* Last chance, if !unicode then try poorman links. */
588 if (vtmode != VT_UNICODE) {
589 static const char poorman_scoacs[] =
590 "CueaaaaceeeiiiAAE**ooouuyOUc$YPsaiounNao?++**!<>###||||++||++++++--|-+||++--|-+----++++++++##||#aBTPEsyt******EN=+><++-=... n2* ";
591 static const char poorman_latin1[] =
592 " !cL.Y|S\"Ca<--R~o+23'u|.,1o>///?AAAAAAACEEEEIIIIDNOOOOOxOUUUUYPBaaaaaaaceeeeiiiionooooo/ouuuuypy";
593 static const char poorman_vt100[] = "*#****o~**+++++-----++++|****L.";
594
595 for (i = 160; i < 256; i++)
596 if (!DIRECT_FONT(ucsdata->unitab_line[i]) &&
597 ucsdata->unitab_line[i] >= 160 &&
598 ucsdata->unitab_line[i] < 256) {
599 ucsdata->unitab_line[i] =
600 (WCHAR) (CSET_ACP +
601 poorman_latin1[ucsdata->unitab_line[i] - 160]);
602 }
603 for (i = 96; i < 127; i++)
604 if (!DIRECT_FONT(ucsdata->unitab_xterm[i]))
605 ucsdata->unitab_xterm[i] =
606 (WCHAR) (CSET_ACP + poorman_vt100[i - 96]);
607 for(i=128;i<256;i++)
608 if (!DIRECT_FONT(ucsdata->unitab_scoacs[i]))
609 ucsdata->unitab_scoacs[i] =
610 (WCHAR) (CSET_ACP + poorman_scoacs[i - 128]);
611 }
612 }
613
614 static void link_font(WCHAR * line_tbl, WCHAR * font_tbl, WCHAR attr)
615 {
616 int font_index, line_index, i;
617 for (line_index = 0; line_index < 256; line_index++) {
618 if (DIRECT_FONT(line_tbl[line_index]))
619 continue;
620 for(i = 0; i < 256; i++) {
621 font_index = ((32 + i) & 0xFF);
622 if (line_tbl[line_index] == font_tbl[font_index]) {
623 line_tbl[line_index] = (WCHAR) (attr + font_index);
624 break;
625 }
626 }
627 }
628 }
629
630 wchar_t xlat_uskbd2cyrllic(int ch)
631 {
632 static const wchar_t cyrtab[] = {
633 0, 1, 2, 3, 4, 5, 6, 7,
634 8, 9, 10, 11, 12, 13, 14, 15,
635 16, 17, 18, 19, 20, 21, 22, 23,
636 24, 25, 26, 27, 28, 29, 30, 31,
637 32, 33, 0x042d, 35, 36, 37, 38, 0x044d,
638 40, 41, 42, 0x0406, 0x0431, 0x0454, 0x044e, 0x002e,
639 48, 49, 50, 51, 52, 53, 54, 55,
640 56, 57, 0x0416, 0x0436, 0x0411, 0x0456, 0x042e, 0x002c,
641 64, 0x0424, 0x0418, 0x0421, 0x0412, 0x0423, 0x0410, 0x041f,
642 0x0420, 0x0428, 0x041e, 0x041b, 0x0414, 0x042c, 0x0422, 0x0429,
643 0x0417, 0x0419, 0x041a, 0x042b, 0x0415, 0x0413, 0x041c, 0x0426,
644 0x0427, 0x041d, 0x042f, 0x0445, 0x0457, 0x044a, 94, 0x0404,
645 96, 0x0444, 0x0438, 0x0441, 0x0432, 0x0443, 0x0430, 0x043f,
646 0x0440, 0x0448, 0x043e, 0x043b, 0x0434, 0x044c, 0x0442, 0x0449,
647 0x0437, 0x0439, 0x043a, 0x044b, 0x0435, 0x0433, 0x043c, 0x0446,
648 0x0447, 0x043d, 0x044f, 0x0425, 0x0407, 0x042a, 126, 127
649 };
650 return cyrtab[ch&0x7F];
651 }
652
653 int check_compose_internal(int first, int second, int recurse)
654 {
655
656 static const struct {
657 char first, second;
658 wchar_t composed;
659 } composetbl[] = {
660 {
661 0x2b, 0x2b, 0x0023}, {
662 0x41, 0x41, 0x0040}, {
663 0x28, 0x28, 0x005b}, {
664 0x2f, 0x2f, 0x005c}, {
665 0x29, 0x29, 0x005d}, {
666 0x28, 0x2d, 0x007b}, {
667 0x2d, 0x29, 0x007d}, {
668 0x2f, 0x5e, 0x007c}, {
669 0x21, 0x21, 0x00a1}, {
670 0x43, 0x2f, 0x00a2}, {
671 0x43, 0x7c, 0x00a2}, {
672 0x4c, 0x2d, 0x00a3}, {
673 0x4c, 0x3d, 0x20a4}, {
674 0x58, 0x4f, 0x00a4}, {
675 0x58, 0x30, 0x00a4}, {
676 0x59, 0x2d, 0x00a5}, {
677 0x59, 0x3d, 0x00a5}, {
678 0x7c, 0x7c, 0x00a6}, {
679 0x53, 0x4f, 0x00a7}, {
680 0x53, 0x21, 0x00a7}, {
681 0x53, 0x30, 0x00a7}, {
682 0x22, 0x22, 0x00a8}, {
683 0x43, 0x4f, 0x00a9}, {
684 0x43, 0x30, 0x00a9}, {
685 0x41, 0x5f, 0x00aa}, {
686 0x3c, 0x3c, 0x00ab}, {
687 0x2c, 0x2d, 0x00ac}, {
688 0x2d, 0x2d, 0x00ad}, {
689 0x52, 0x4f, 0x00ae}, {
690 0x2d, 0x5e, 0x00af}, {
691 0x30, 0x5e, 0x00b0}, {
692 0x2b, 0x2d, 0x00b1}, {
693 0x32, 0x5e, 0x00b2}, {
694 0x33, 0x5e, 0x00b3}, {
695 0x27, 0x27, 0x00b4}, {
696 0x2f, 0x55, 0x00b5}, {
697 0x50, 0x21, 0x00b6}, {
698 0x2e, 0x5e, 0x00b7}, {
699 0x2c, 0x2c, 0x00b8}, {
700 0x31, 0x5e, 0x00b9}, {
701 0x4f, 0x5f, 0x00ba}, {
702 0x3e, 0x3e, 0x00bb}, {
703 0x31, 0x34, 0x00bc}, {
704 0x31, 0x32, 0x00bd}, {
705 0x33, 0x34, 0x00be}, {
706 0x3f, 0x3f, 0x00bf}, {
707 0x60, 0x41, 0x00c0}, {
708 0x27, 0x41, 0x00c1}, {
709 0x5e, 0x41, 0x00c2}, {
710 0x7e, 0x41, 0x00c3}, {
711 0x22, 0x41, 0x00c4}, {
712 0x2a, 0x41, 0x00c5}, {
713 0x41, 0x45, 0x00c6}, {
714 0x2c, 0x43, 0x00c7}, {
715 0x60, 0x45, 0x00c8}, {
716 0x27, 0x45, 0x00c9}, {
717 0x5e, 0x45, 0x00ca}, {
718 0x22, 0x45, 0x00cb}, {
719 0x60, 0x49, 0x00cc}, {
720 0x27, 0x49, 0x00cd}, {
721 0x5e, 0x49, 0x00ce}, {
722 0x22, 0x49, 0x00cf}, {
723 0x2d, 0x44, 0x00d0}, {
724 0x7e, 0x4e, 0x00d1}, {
725 0x60, 0x4f, 0x00d2}, {
726 0x27, 0x4f, 0x00d3}, {
727 0x5e, 0x4f, 0x00d4}, {
728 0x7e, 0x4f, 0x00d5}, {
729 0x22, 0x4f, 0x00d6}, {
730 0x58, 0x58, 0x00d7}, {
731 0x2f, 0x4f, 0x00d8}, {
732 0x60, 0x55, 0x00d9}, {
733 0x27, 0x55, 0x00da}, {
734 0x5e, 0x55, 0x00db}, {
735 0x22, 0x55, 0x00dc}, {
736 0x27, 0x59, 0x00dd}, {
737 0x48, 0x54, 0x00de}, {
738 0x73, 0x73, 0x00df}, {
739 0x60, 0x61, 0x00e0}, {
740 0x27, 0x61, 0x00e1}, {
741 0x5e, 0x61, 0x00e2}, {
742 0x7e, 0x61, 0x00e3}, {
743 0x22, 0x61, 0x00e4}, {
744 0x2a, 0x61, 0x00e5}, {
745 0x61, 0x65, 0x00e6}, {
746 0x2c, 0x63, 0x00e7}, {
747 0x60, 0x65, 0x00e8}, {
748 0x27, 0x65, 0x00e9}, {
749 0x5e, 0x65, 0x00ea}, {
750 0x22, 0x65, 0x00eb}, {
751 0x60, 0x69, 0x00ec}, {
752 0x27, 0x69, 0x00ed}, {
753 0x5e, 0x69, 0x00ee}, {
754 0x22, 0x69, 0x00ef}, {
755 0x2d, 0x64, 0x00f0}, {
756 0x7e, 0x6e, 0x00f1}, {
757 0x60, 0x6f, 0x00f2}, {
758 0x27, 0x6f, 0x00f3}, {
759 0x5e, 0x6f, 0x00f4}, {
760 0x7e, 0x6f, 0x00f5}, {
761 0x22, 0x6f, 0x00f6}, {
762 0x3a, 0x2d, 0x00f7}, {
763 0x6f, 0x2f, 0x00f8}, {
764 0x60, 0x75, 0x00f9}, {
765 0x27, 0x75, 0x00fa}, {
766 0x5e, 0x75, 0x00fb}, {
767 0x22, 0x75, 0x00fc}, {
768 0x27, 0x79, 0x00fd}, {
769 0x68, 0x74, 0x00fe}, {
770 0x22, 0x79, 0x00ff},
771 /* Unicode extras. */
772 {
773 0x6f, 0x65, 0x0153}, {
774 0x4f, 0x45, 0x0152},
775 /* Compose pairs from UCS */
776 {
777 0x41, 0x2D, 0x0100}, {
778 0x61, 0x2D, 0x0101}, {
779 0x43, 0x27, 0x0106}, {
780 0x63, 0x27, 0x0107}, {
781 0x43, 0x5E, 0x0108}, {
782 0x63, 0x5E, 0x0109}, {
783 0x45, 0x2D, 0x0112}, {
784 0x65, 0x2D, 0x0113}, {
785 0x47, 0x5E, 0x011C}, {
786 0x67, 0x5E, 0x011D}, {
787 0x47, 0x2C, 0x0122}, {
788 0x67, 0x2C, 0x0123}, {
789 0x48, 0x5E, 0x0124}, {
790 0x68, 0x5E, 0x0125}, {
791 0x49, 0x7E, 0x0128}, {
792 0x69, 0x7E, 0x0129}, {
793 0x49, 0x2D, 0x012A}, {
794 0x69, 0x2D, 0x012B}, {
795 0x4A, 0x5E, 0x0134}, {
796 0x6A, 0x5E, 0x0135}, {
797 0x4B, 0x2C, 0x0136}, {
798 0x6B, 0x2C, 0x0137}, {
799 0x4C, 0x27, 0x0139}, {
800 0x6C, 0x27, 0x013A}, {
801 0x4C, 0x2C, 0x013B}, {
802 0x6C, 0x2C, 0x013C}, {
803 0x4E, 0x27, 0x0143}, {
804 0x6E, 0x27, 0x0144}, {
805 0x4E, 0x2C, 0x0145}, {
806 0x6E, 0x2C, 0x0146}, {
807 0x4F, 0x2D, 0x014C}, {
808 0x6F, 0x2D, 0x014D}, {
809 0x52, 0x27, 0x0154}, {
810 0x72, 0x27, 0x0155}, {
811 0x52, 0x2C, 0x0156}, {
812 0x72, 0x2C, 0x0157}, {
813 0x53, 0x27, 0x015A}, {
814 0x73, 0x27, 0x015B}, {
815 0x53, 0x5E, 0x015C}, {
816 0x73, 0x5E, 0x015D}, {
817 0x53, 0x2C, 0x015E}, {
818 0x73, 0x2C, 0x015F}, {
819 0x54, 0x2C, 0x0162}, {
820 0x74, 0x2C, 0x0163}, {
821 0x55, 0x7E, 0x0168}, {
822 0x75, 0x7E, 0x0169}, {
823 0x55, 0x2D, 0x016A}, {
824 0x75, 0x2D, 0x016B}, {
825 0x55, 0x2A, 0x016E}, {
826 0x75, 0x2A, 0x016F}, {
827 0x57, 0x5E, 0x0174}, {
828 0x77, 0x5E, 0x0175}, {
829 0x59, 0x5E, 0x0176}, {
830 0x79, 0x5E, 0x0177}, {
831 0x59, 0x22, 0x0178}, {
832 0x5A, 0x27, 0x0179}, {
833 0x7A, 0x27, 0x017A}, {
834 0x47, 0x27, 0x01F4}, {
835 0x67, 0x27, 0x01F5}, {
836 0x4E, 0x60, 0x01F8}, {
837 0x6E, 0x60, 0x01F9}, {
838 0x45, 0x2C, 0x0228}, {
839 0x65, 0x2C, 0x0229}, {
840 0x59, 0x2D, 0x0232}, {
841 0x79, 0x2D, 0x0233}, {
842 0x44, 0x2C, 0x1E10}, {
843 0x64, 0x2C, 0x1E11}, {
844 0x47, 0x2D, 0x1E20}, {
845 0x67, 0x2D, 0x1E21}, {
846 0x48, 0x22, 0x1E26}, {
847 0x68, 0x22, 0x1E27}, {
848 0x48, 0x2C, 0x1E28}, {
849 0x68, 0x2C, 0x1E29}, {
850 0x4B, 0x27, 0x1E30}, {
851 0x6B, 0x27, 0x1E31}, {
852 0x4D, 0x27, 0x1E3E}, {
853 0x6D, 0x27, 0x1E3F}, {
854 0x50, 0x27, 0x1E54}, {
855 0x70, 0x27, 0x1E55}, {
856 0x56, 0x7E, 0x1E7C}, {
857 0x76, 0x7E, 0x1E7D}, {
858 0x57, 0x60, 0x1E80}, {
859 0x77, 0x60, 0x1E81}, {
860 0x57, 0x27, 0x1E82}, {
861 0x77, 0x27, 0x1E83}, {
862 0x57, 0x22, 0x1E84}, {
863 0x77, 0x22, 0x1E85}, {
864 0x58, 0x22, 0x1E8C}, {
865 0x78, 0x22, 0x1E8D}, {
866 0x5A, 0x5E, 0x1E90}, {
867 0x7A, 0x5E, 0x1E91}, {
868 0x74, 0x22, 0x1E97}, {
869 0x77, 0x2A, 0x1E98}, {
870 0x79, 0x2A, 0x1E99}, {
871 0x45, 0x7E, 0x1EBC}, {
872 0x65, 0x7E, 0x1EBD}, {
873 0x59, 0x60, 0x1EF2}, {
874 0x79, 0x60, 0x1EF3}, {
875 0x59, 0x7E, 0x1EF8}, {
876 0x79, 0x7E, 0x1EF9},
877 /* Compatible/possibles from UCS */
878 {
879 0x49, 0x4A, 0x0132}, {
880 0x69, 0x6A, 0x0133}, {
881 0x4C, 0x4A, 0x01C7}, {
882 0x4C, 0x6A, 0x01C8}, {
883 0x6C, 0x6A, 0x01C9}, {
884 0x4E, 0x4A, 0x01CA}, {
885 0x4E, 0x6A, 0x01CB}, {
886 0x6E, 0x6A, 0x01CC}, {
887 0x44, 0x5A, 0x01F1}, {
888 0x44, 0x7A, 0x01F2}, {
889 0x64, 0x7A, 0x01F3}, {
890 0x2E, 0x2E, 0x2025}, {
891 0x21, 0x21, 0x203C}, {
892 0x3F, 0x21, 0x2048}, {
893 0x21, 0x3F, 0x2049}, {
894 0x52, 0x73, 0x20A8}, {
895 0x4E, 0x6F, 0x2116}, {
896 0x53, 0x4D, 0x2120}, {
897 0x54, 0x4D, 0x2122}, {
898 0x49, 0x49, 0x2161}, {
899 0x49, 0x56, 0x2163}, {
900 0x56, 0x49, 0x2165}, {
901 0x49, 0x58, 0x2168}, {
902 0x58, 0x49, 0x216A}, {
903 0x69, 0x69, 0x2171}, {
904 0x69, 0x76, 0x2173}, {
905 0x76, 0x69, 0x2175}, {
906 0x69, 0x78, 0x2178}, {
907 0x78, 0x69, 0x217A}, {
908 0x31, 0x30, 0x2469}, {
909 0x31, 0x31, 0x246A}, {
910 0x31, 0x32, 0x246B}, {
911 0x31, 0x33, 0x246C}, {
912 0x31, 0x34, 0x246D}, {
913 0x31, 0x35, 0x246E}, {
914 0x31, 0x36, 0x246F}, {
915 0x31, 0x37, 0x2470}, {
916 0x31, 0x38, 0x2471}, {
917 0x31, 0x39, 0x2472}, {
918 0x32, 0x30, 0x2473}, {
919 0x31, 0x2E, 0x2488}, {
920 0x32, 0x2E, 0x2489}, {
921 0x33, 0x2E, 0x248A}, {
922 0x34, 0x2E, 0x248B}, {
923 0x35, 0x2E, 0x248C}, {
924 0x36, 0x2E, 0x248D}, {
925 0x37, 0x2E, 0x248E}, {
926 0x38, 0x2E, 0x248F}, {
927 0x39, 0x2E, 0x2490}, {
928 0x64, 0x61, 0x3372}, {
929 0x41, 0x55, 0x3373}, {
930 0x6F, 0x56, 0x3375}, {
931 0x70, 0x63, 0x3376}, {
932 0x70, 0x41, 0x3380}, {
933 0x6E, 0x41, 0x3381}, {
934 0x6D, 0x41, 0x3383}, {
935 0x6B, 0x41, 0x3384}, {
936 0x4B, 0x42, 0x3385}, {
937 0x4D, 0x42, 0x3386}, {
938 0x47, 0x42, 0x3387}, {
939 0x70, 0x46, 0x338A}, {
940 0x6E, 0x46, 0x338B}, {
941 0x6D, 0x67, 0x338E}, {
942 0x6B, 0x67, 0x338F}, {
943 0x48, 0x7A, 0x3390}, {
944 0x66, 0x6D, 0x3399}, {
945 0x6E, 0x6D, 0x339A}, {
946 0x6D, 0x6D, 0x339C}, {
947 0x63, 0x6D, 0x339D}, {
948 0x6B, 0x6D, 0x339E}, {
949 0x50, 0x61, 0x33A9}, {
950 0x70, 0x73, 0x33B0}, {
951 0x6E, 0x73, 0x33B1}, {
952 0x6D, 0x73, 0x33B3}, {
953 0x70, 0x56, 0x33B4}, {
954 0x6E, 0x56, 0x33B5}, {
955 0x6D, 0x56, 0x33B7}, {
956 0x6B, 0x56, 0x33B8}, {
957 0x4D, 0x56, 0x33B9}, {
958 0x70, 0x57, 0x33BA}, {
959 0x6E, 0x57, 0x33BB}, {
960 0x6D, 0x57, 0x33BD}, {
961 0x6B, 0x57, 0x33BE}, {
962 0x4D, 0x57, 0x33BF}, {
963 0x42, 0x71, 0x33C3}, {
964 0x63, 0x63, 0x33C4}, {
965 0x63, 0x64, 0x33C5}, {
966 0x64, 0x42, 0x33C8}, {
967 0x47, 0x79, 0x33C9}, {
968 0x68, 0x61, 0x33CA}, {
969 0x48, 0x50, 0x33CB}, {
970 0x69, 0x6E, 0x33CC}, {
971 0x4B, 0x4B, 0x33CD}, {
972 0x4B, 0x4D, 0x33CE}, {
973 0x6B, 0x74, 0x33CF}, {
974 0x6C, 0x6D, 0x33D0}, {
975 0x6C, 0x6E, 0x33D1}, {
976 0x6C, 0x78, 0x33D3}, {
977 0x6D, 0x62, 0x33D4}, {
978 0x50, 0x48, 0x33D7}, {
979 0x50, 0x52, 0x33DA}, {
980 0x73, 0x72, 0x33DB}, {
981 0x53, 0x76, 0x33DC}, {
982 0x57, 0x62, 0x33DD}, {
983 0x66, 0x66, 0xFB00}, {
984 0x66, 0x69, 0xFB01}, {
985 0x66, 0x6C, 0xFB02}, {
986 0x73, 0x74, 0xFB06}, {
987 0, 0, 0}
988 }, *c;
989
990 int nc = -1;
991
992 for (c = composetbl; c->first; c++) {
993 if (c->first == first && c->second == second)
994 return c->composed;
995 }
996
997 if (recurse == 0) {
998 nc = check_compose_internal(second, first, 1);
999 if (nc == -1)
1000 nc = check_compose_internal(toupper(first), toupper(second), 1);
1001 if (nc == -1)
1002 nc = check_compose_internal(toupper(second), toupper(first), 1);
1003 }
1004 return nc;
1005 }
1006
1007 int check_compose(int first, int second)
1008 {
1009 return check_compose_internal(first, second, 0);
1010 }
1011
1012 int decode_codepage(char *cp_name)
1013 {
1014 char *s, *d;
1015 const struct cp_list_item *cpi;
1016 int codepage = -1;
1017 CPINFO cpinfo;
1018
1019 if (!cp_name || !*cp_name)
1020 return CP_UTF8; /* default */
1021
1022 for (cpi = cp_list; cpi->name; cpi++) {
1023 s = cp_name;
1024 d = cpi->name;
1025 for (;;) {
1026 while (*s && !isalnum(*s) && *s != ':')
1027 s++;
1028 while (*d && !isalnum(*d) && *d != ':')
1029 d++;
1030 if (*s == 0) {
1031 codepage = cpi->codepage;
1032 if (codepage == CP_UTF8)
1033 goto break_break;
1034 if (codepage == -1)
1035 return codepage;
1036 if (codepage == 0) {
1037 codepage = 65536 + (cpi - cp_list);
1038 goto break_break;
1039 }
1040
1041 if (GetCPInfo(codepage, &cpinfo) != 0)
1042 goto break_break;
1043 }
1044 if (tolower(*s++) != tolower(*d++))
1045 break;
1046 }
1047 }
1048
1049 d = cp_name;
1050 if (tolower(d[0]) == 'c' && tolower(d[1]) == 'p')
1051 d += 2;
1052 if (tolower(d[0]) == 'i' && tolower(d[1]) == 'b'
1053 && tolower(d[2]) == 'm')
1054 d += 3;
1055 for (s = d; *s >= '0' && *s <= '9'; s++);
1056 if (*s == 0 && s != d)
1057 codepage = atoi(d); /* CP999 or IBM999 */
1058
1059 if (codepage == CP_ACP)
1060 codepage = GetACP();
1061 if (codepage == CP_OEMCP)
1062 codepage = GetOEMCP();
1063 if (codepage > 65535)
1064 codepage = -2;
1065
1066 break_break:;
1067 if (codepage != -1) {
1068 if (codepage != CP_UTF8 && codepage < 65536) {
1069 if (GetCPInfo(codepage, &cpinfo) == 0) {
1070 codepage = -2;
1071 } else if (cpinfo.MaxCharSize > 1)
1072 codepage = -3;
1073 }
1074 }
1075 if (codepage == -1 && *cp_name)
1076 codepage = -2;
1077 return codepage;
1078 }
1079
1080 const char *cp_name(int codepage)
1081 {
1082 const struct cp_list_item *cpi, *cpno;
1083 static char buf[32];
1084
1085 if (codepage == -1) {
1086 sprintf(buf, "Use font encoding");
1087 return buf;
1088 }
1089
1090 if (codepage > 0 && codepage < 65536)
1091 sprintf(buf, "CP%03d", codepage);
1092 else
1093 *buf = 0;
1094
1095 if (codepage >= 65536) {
1096 cpno = 0;
1097 for (cpi = cp_list; cpi->name; cpi++)
1098 if (cpi == cp_list + (codepage - 65536)) {
1099 cpno = cpi;
1100 break;
1101 }
1102 if (cpno)
1103 for (cpi = cp_list; cpi->name; cpi++) {
1104 if (cpno->cp_table == cpi->cp_table)
1105 return cpi->name;
1106 }
1107 } else {
1108 for (cpi = cp_list; cpi->name; cpi++) {
1109 if (codepage == cpi->codepage)
1110 return cpi->name;
1111 }
1112 }
1113 return buf;
1114 }
1115
1116 /*
1117 * Return the nth code page in the list, for use in the GUI
1118 * configurer.
1119 */
1120 const char *cp_enumerate(int index)
1121 {
1122 if (index < 0 || index >= lenof(cp_list))
1123 return NULL;
1124 return cp_list[index].name;
1125 }
1126
1127 void get_unitab(int codepage, wchar_t * unitab, int ftype)
1128 {
1129 char tbuf[4];
1130 int i, max = 256, flg = MB_ERR_INVALID_CHARS;
1131
1132 if (ftype)
1133 flg |= MB_USEGLYPHCHARS;
1134 if (ftype == 2)
1135 max = 128;
1136
1137 if (codepage == CP_UTF8) {
1138 for (i = 0; i < max; i++)
1139 unitab[i] = i;
1140 return;
1141 }
1142
1143 if (codepage == CP_ACP)
1144 codepage = GetACP();
1145 else if (codepage == CP_OEMCP)
1146 codepage = GetOEMCP();
1147
1148 if (codepage > 0 && codepage < 65536) {
1149 for (i = 0; i < max; i++) {
1150 tbuf[0] = i;
1151
1152 if (mb_to_wc(codepage, flg, tbuf, 1, unitab + i, 1)
1153 != 1)
1154 unitab[i] = 0xFFFD;
1155 }
1156 } else {
1157 int j = 256 - cp_list[codepage & 0xFFFF].cp_size;
1158 for (i = 0; i < max; i++)
1159 unitab[i] = i;
1160 for (i = j; i < max; i++)
1161 unitab[i] = cp_list[codepage & 0xFFFF].cp_table[i - j];
1162 }
1163 }
1164
1165 int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
1166 char *mbstr, int mblen, char *defchr, int *defused,
1167 struct unicode_data *ucsdata)
1168 {
1169 char *p;
1170 int i;
1171 if (ucsdata && codepage == ucsdata->line_codepage && ucsdata->uni_tbl) {
1172 /* Do this by array lookup if we can. */
1173 if (wclen < 0) {
1174 for (wclen = 0; wcstr[wclen++] ;); /* will include the NUL */
1175 }
1176 for (p = mbstr, i = 0; i < wclen; i++) {
1177 wchar_t ch = wcstr[i];
1178 int by;
1179 char *p1;
1180 if (ucsdata->uni_tbl && (p1 = ucsdata->uni_tbl[(ch >> 8) & 0xFF])
1181 && (by = p1[ch & 0xFF]))
1182 *p++ = by;
1183 else if (ch < 0x80)
1184 *p++ = (char) ch;
1185 else if (defchr) {
1186 int j;
1187 for (j = 0; defchr[j]; j++)
1188 *p++ = defchr[j];
1189 if (defused) *defused = 1;
1190 }
1191 #if 1
1192 else
1193 *p++ = '.';
1194 #endif
1195 assert(p - mbstr < mblen);
1196 }
1197 return p - mbstr;
1198 } else
1199 return WideCharToMultiByte(codepage, flags, wcstr, wclen,
1200 mbstr, mblen, defchr, defused);
1201 }
1202
1203 int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
1204 wchar_t *wcstr, int wclen)
1205 {
1206 return MultiByteToWideChar(codepage, flags, mbstr, mblen, wcstr, wclen);
1207 }
1208
1209 int is_dbcs_leadbyte(int codepage, char byte)
1210 {
1211 return IsDBCSLeadByteEx(codepage, byte);
1212 }