1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
d4413bd2 |
11 | #include "charset.h" |
887035a5 |
12 | #include "terminal.h" |
1709795f |
13 | #include "misc.h" |
14 | |
15 | /* |
16 | * Unix Unicode-handling routines. |
1709795f |
17 | */ |
18 | |
1709795f |
19 | int is_dbcs_leadbyte(int codepage, char byte) |
20 | { |
21 | return 0; /* we don't do DBCS */ |
22 | } |
23 | |
24 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
25 | wchar_t *wcstr, int wclen) |
26 | { |
2dc6356a |
27 | if (codepage == DEFAULT_CODEPAGE) { |
28 | int n = 0; |
d4e1d591 |
29 | mbstate_t state; |
2dc6356a |
30 | |
d4e1d591 |
31 | memset(&state, 0, sizeof state); |
2dc6356a |
32 | setlocale(LC_CTYPE, ""); |
33 | |
34 | while (mblen > 0) { |
35 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
36 | if (i == (size_t)-1 || i == (size_t)-2) |
37 | break; |
38 | n++; |
39 | mbstr += i; |
40 | mblen -= i; |
41 | } |
42 | |
43 | setlocale(LC_CTYPE, "C"); |
44 | |
45 | return n; |
facd762c |
46 | } else if (codepage == CS_NONE) { |
47 | int n = 0; |
48 | |
49 | while (mblen > 0) { |
50 | wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); |
51 | n++; |
52 | mbstr++; |
53 | mblen--; |
54 | } |
55 | |
56 | return n; |
2dc6356a |
57 | } else |
58 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
59 | NULL, NULL, 0); |
e6346999 |
60 | } |
61 | |
62 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
21d2b241 |
63 | char *mbstr, int mblen, char *defchr, int *defused, |
64 | struct unicode_data *ucsdata) |
e6346999 |
65 | { |
2dc6356a |
66 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
67 | if (defused) |
68 | *defused = 0; |
2dc6356a |
69 | |
70 | if (codepage == DEFAULT_CODEPAGE) { |
71 | char output[MB_LEN_MAX]; |
d4e1d591 |
72 | mbstate_t state; |
2dc6356a |
73 | int n = 0; |
74 | |
d4e1d591 |
75 | memset(&state, 0, sizeof state); |
2dc6356a |
76 | setlocale(LC_CTYPE, ""); |
77 | |
78 | while (wclen > 0) { |
79 | int i = wcrtomb(output, wcstr[0], &state); |
80 | if (i == (size_t)-1 || i > n - mblen) |
81 | break; |
82 | memcpy(mbstr+n, output, i); |
83 | n += i; |
84 | wcstr++; |
85 | wclen--; |
86 | } |
87 | |
88 | setlocale(LC_CTYPE, "C"); |
89 | |
90 | return n; |
facd762c |
91 | } else if (codepage == CS_NONE) { |
92 | int n = 0; |
93 | while (wclen > 0 && n < mblen) { |
94 | if (*wcstr >= 0xD800 && *wcstr < 0xD900) |
95 | mbstr[n++] = (*wcstr & 0xFF); |
96 | else if (defchr) |
97 | mbstr[n++] = *defchr; |
98 | wcstr++; |
99 | wclen--; |
100 | } |
101 | return n; |
102 | } else { |
2dc6356a |
103 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
0f993689 |
104 | NULL, defchr?defchr:NULL, defchr?1:0); |
facd762c |
105 | } |
1709795f |
106 | } |
107 | |
085f4a68 |
108 | /* |
109 | * Return value is TRUE if pterm is to run in direct-to-font mode. |
110 | */ |
6ac7f054 |
111 | int init_ucs(struct unicode_data *ucsdata, char *linecharset, |
112 | int utf8_override, int font_charset, int vtmode) |
1709795f |
113 | { |
085f4a68 |
114 | int i, ret = 0; |
2dc6356a |
115 | |
116 | /* |
117 | * In the platform-independent parts of the code, font_codepage |
118 | * is used only for system DBCS support - which we don't |
119 | * support at all. So we set this to something which will never |
120 | * be used. |
121 | */ |
21d2b241 |
122 | ucsdata->font_codepage = -1; |
2dc6356a |
123 | |
124 | /* |
6ac7f054 |
125 | * If utf8_override is set and the POSIX locale settings |
126 | * dictate a UTF-8 character set, then just go straight for |
127 | * UTF-8. |
2dc6356a |
128 | */ |
6ac7f054 |
129 | ucsdata->line_codepage = CS_NONE; |
130 | if (utf8_override) { |
131 | const char *s; |
132 | if (((s = getenv("LC_ALL")) && *s) || |
133 | ((s = getenv("LC_CTYPE")) && *s) || |
134 | ((s = getenv("LANG")) && *s)) { |
135 | if (strstr(s, "UTF-8")) |
136 | ucsdata->line_codepage = CS_UTF8; |
137 | } |
138 | } |
139 | |
140 | /* |
141 | * Failing that, line_codepage should be decoded from the |
4a693cfc |
142 | * specification in conf. |
6ac7f054 |
143 | */ |
144 | if (ucsdata->line_codepage == CS_NONE) |
145 | ucsdata->line_codepage = decode_codepage(linecharset); |
2dc6356a |
146 | |
facd762c |
147 | /* |
148 | * If line_codepage is _still_ CS_NONE, we assume we're using |
149 | * the font's own encoding. This has been passed in to us, so |
150 | * we use that. If it's still CS_NONE after _that_ - i.e. the |
151 | * font we were given had an incomprehensible charset - then we |
152 | * fall back to using the D800 page. |
153 | */ |
21d2b241 |
154 | if (ucsdata->line_codepage == CS_NONE) |
155 | ucsdata->line_codepage = font_charset; |
2dc6356a |
156 | |
21d2b241 |
157 | if (ucsdata->line_codepage == CS_NONE) |
085f4a68 |
158 | ret = 1; |
159 | |
2dc6356a |
160 | /* |
161 | * Set up unitab_line, by translating each individual character |
162 | * in the line codepage into Unicode. |
163 | */ |
164 | for (i = 0; i < 256; i++) { |
165 | char c[1], *p; |
166 | wchar_t wc[1]; |
167 | int len; |
168 | c[0] = i; |
169 | p = c; |
170 | len = 1; |
21d2b241 |
171 | if (ucsdata->line_codepage == CS_NONE) |
172 | ucsdata->unitab_line[i] = 0xD800 | i; |
173 | else if (1 == charset_to_unicode(&p, &len, wc, 1, |
174 | ucsdata->line_codepage, |
facd762c |
175 | NULL, L"", 0)) |
21d2b241 |
176 | ucsdata->unitab_line[i] = wc[0]; |
1709795f |
177 | else |
21d2b241 |
178 | ucsdata->unitab_line[i] = 0xFFFD; |
2dc6356a |
179 | } |
1709795f |
180 | |
2dc6356a |
181 | /* |
182 | * Set up unitab_xterm. This is the same as unitab_line except |
183 | * in the line-drawing regions, where it follows the Unicode |
184 | * encoding. |
185 | * |
186 | * (Note that the strange X encoding of line-drawing characters |
187 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
188 | * by the font encoding, which will spot such a font and act as |
189 | * if it were in a variant encoding of ISO8859-1.) |
190 | */ |
1709795f |
191 | for (i = 0; i < 256; i++) { |
2dc6356a |
192 | static const wchar_t unitab_xterm_std[32] = { |
193 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
194 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
195 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
196 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
197 | }; |
3900c2d6 |
198 | static const wchar_t unitab_xterm_poorman[32] = |
199 | L"*#****o~**+++++-----++++|****L. "; |
200 | |
201 | const wchar_t *ptr; |
202 | |
203 | if (vtmode == VT_POORMAN) |
204 | ptr = unitab_xterm_poorman; |
205 | else |
206 | ptr = unitab_xterm_std; |
207 | |
2dc6356a |
208 | if (i >= 0x5F && i < 0x7F) |
3900c2d6 |
209 | ucsdata->unitab_xterm[i] = ptr[i & 0x1F]; |
2dc6356a |
210 | else |
21d2b241 |
211 | ucsdata->unitab_xterm[i] = ucsdata->unitab_line[i]; |
1709795f |
212 | } |
2dc6356a |
213 | |
214 | /* |
215 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
216 | * simply CP437. |
217 | */ |
218 | for (i = 0; i < 256; i++) { |
219 | char c[1], *p; |
220 | wchar_t wc[1]; |
221 | int len; |
222 | c[0] = i; |
223 | p = c; |
224 | len = 1; |
facd762c |
225 | if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) |
21d2b241 |
226 | ucsdata->unitab_scoacs[i] = wc[0]; |
2dc6356a |
227 | else |
21d2b241 |
228 | ucsdata->unitab_scoacs[i] = 0xFFFD; |
2dc6356a |
229 | } |
230 | |
facd762c |
231 | /* |
232 | * Find the control characters in the line codepage. For |
233 | * direct-to-font mode using the D800 hack, we assume 00-1F and |
234 | * 7F are controls, but allow 80-9F through. (It's as good a |
235 | * guess as anything; and my bet is that half the weird fonts |
236 | * used in this way will be IBM or MS code pages anyway.) |
237 | */ |
238 | for (i = 0; i < 256; i++) { |
21d2b241 |
239 | int lineval = ucsdata->unitab_line[i]; |
facd762c |
240 | if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || |
241 | (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) |
21d2b241 |
242 | ucsdata->unitab_ctrl[i] = i; |
2dc6356a |
243 | else |
21d2b241 |
244 | ucsdata->unitab_ctrl[i] = 0xFF; |
facd762c |
245 | } |
085f4a68 |
246 | |
247 | return ret; |
126ce234 |
248 | } |
d4413bd2 |
249 | |
250 | const char *cp_name(int codepage) |
251 | { |
252 | if (codepage == CS_NONE) |
253 | return "Use font encoding"; |
254 | return charset_to_localenc(codepage); |
255 | } |
256 | |
257 | const char *cp_enumerate(int index) |
258 | { |
259 | int charset; |
260 | if (index == 0) |
261 | return "Use font encoding"; |
262 | charset = charset_localenc_nth(index-1); |
263 | if (charset == CS_NONE) |
264 | return NULL; |
265 | return charset_to_localenc(charset); |
266 | } |
267 | |
268 | int decode_codepage(char *cp_name) |
269 | { |
270 | if (!*cp_name) |
271 | return CS_NONE; /* use font encoding */ |
272 | return charset_from_localenc(cp_name); |
273 | } |