1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
d4413bd2 |
11 | #include "charset.h" |
887035a5 |
12 | #include "terminal.h" |
1709795f |
13 | #include "misc.h" |
14 | |
15 | /* |
16 | * Unix Unicode-handling routines. |
1709795f |
17 | */ |
18 | |
1709795f |
19 | int is_dbcs_leadbyte(int codepage, char byte) |
20 | { |
21 | return 0; /* we don't do DBCS */ |
22 | } |
23 | |
57191fa4 |
24 | int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen, |
1709795f |
25 | wchar_t *wcstr, int wclen) |
26 | { |
2dc6356a |
27 | if (codepage == DEFAULT_CODEPAGE) { |
28 | int n = 0; |
d4e1d591 |
29 | mbstate_t state; |
2dc6356a |
30 | |
d4e1d591 |
31 | memset(&state, 0, sizeof state); |
2dc6356a |
32 | |
33 | while (mblen > 0) { |
34 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
35 | if (i == (size_t)-1 || i == (size_t)-2) |
36 | break; |
37 | n++; |
38 | mbstr += i; |
39 | mblen -= i; |
40 | } |
41 | |
2dc6356a |
42 | return n; |
facd762c |
43 | } else if (codepage == CS_NONE) { |
44 | int n = 0; |
45 | |
46 | while (mblen > 0) { |
47 | wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); |
48 | n++; |
49 | mbstr++; |
50 | mblen--; |
51 | } |
52 | |
53 | return n; |
2dc6356a |
54 | } else |
55 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
56 | NULL, NULL, 0); |
e6346999 |
57 | } |
58 | |
57191fa4 |
59 | int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen, |
21d2b241 |
60 | char *mbstr, int mblen, char *defchr, int *defused, |
61 | struct unicode_data *ucsdata) |
e6346999 |
62 | { |
2dc6356a |
63 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
64 | if (defused) |
65 | *defused = 0; |
2dc6356a |
66 | |
67 | if (codepage == DEFAULT_CODEPAGE) { |
68 | char output[MB_LEN_MAX]; |
d4e1d591 |
69 | mbstate_t state; |
2dc6356a |
70 | int n = 0; |
71 | |
d4e1d591 |
72 | memset(&state, 0, sizeof state); |
2dc6356a |
73 | |
74 | while (wclen > 0) { |
75 | int i = wcrtomb(output, wcstr[0], &state); |
76 | if (i == (size_t)-1 || i > n - mblen) |
77 | break; |
78 | memcpy(mbstr+n, output, i); |
79 | n += i; |
80 | wcstr++; |
81 | wclen--; |
82 | } |
83 | |
2dc6356a |
84 | return n; |
facd762c |
85 | } else if (codepage == CS_NONE) { |
86 | int n = 0; |
87 | while (wclen > 0 && n < mblen) { |
88 | if (*wcstr >= 0xD800 && *wcstr < 0xD900) |
89 | mbstr[n++] = (*wcstr & 0xFF); |
90 | else if (defchr) |
91 | mbstr[n++] = *defchr; |
92 | wcstr++; |
93 | wclen--; |
94 | } |
95 | return n; |
96 | } else { |
2dc6356a |
97 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
0f993689 |
98 | NULL, defchr?defchr:NULL, defchr?1:0); |
facd762c |
99 | } |
1709795f |
100 | } |
101 | |
085f4a68 |
102 | /* |
103 | * Return value is TRUE if pterm is to run in direct-to-font mode. |
104 | */ |
6ac7f054 |
105 | int init_ucs(struct unicode_data *ucsdata, char *linecharset, |
106 | int utf8_override, int font_charset, int vtmode) |
1709795f |
107 | { |
085f4a68 |
108 | int i, ret = 0; |
2dc6356a |
109 | |
110 | /* |
111 | * In the platform-independent parts of the code, font_codepage |
112 | * is used only for system DBCS support - which we don't |
113 | * support at all. So we set this to something which will never |
114 | * be used. |
115 | */ |
21d2b241 |
116 | ucsdata->font_codepage = -1; |
2dc6356a |
117 | |
118 | /* |
6ac7f054 |
119 | * If utf8_override is set and the POSIX locale settings |
120 | * dictate a UTF-8 character set, then just go straight for |
121 | * UTF-8. |
2dc6356a |
122 | */ |
6ac7f054 |
123 | ucsdata->line_codepage = CS_NONE; |
124 | if (utf8_override) { |
125 | const char *s; |
126 | if (((s = getenv("LC_ALL")) && *s) || |
127 | ((s = getenv("LC_CTYPE")) && *s) || |
128 | ((s = getenv("LANG")) && *s)) { |
129 | if (strstr(s, "UTF-8")) |
130 | ucsdata->line_codepage = CS_UTF8; |
131 | } |
132 | } |
133 | |
134 | /* |
135 | * Failing that, line_codepage should be decoded from the |
4a693cfc |
136 | * specification in conf. |
6ac7f054 |
137 | */ |
138 | if (ucsdata->line_codepage == CS_NONE) |
139 | ucsdata->line_codepage = decode_codepage(linecharset); |
2dc6356a |
140 | |
facd762c |
141 | /* |
142 | * If line_codepage is _still_ CS_NONE, we assume we're using |
143 | * the font's own encoding. This has been passed in to us, so |
144 | * we use that. If it's still CS_NONE after _that_ - i.e. the |
145 | * font we were given had an incomprehensible charset - then we |
146 | * fall back to using the D800 page. |
147 | */ |
21d2b241 |
148 | if (ucsdata->line_codepage == CS_NONE) |
149 | ucsdata->line_codepage = font_charset; |
2dc6356a |
150 | |
21d2b241 |
151 | if (ucsdata->line_codepage == CS_NONE) |
085f4a68 |
152 | ret = 1; |
153 | |
2dc6356a |
154 | /* |
155 | * Set up unitab_line, by translating each individual character |
156 | * in the line codepage into Unicode. |
157 | */ |
158 | for (i = 0; i < 256; i++) { |
57191fa4 |
159 | char c[1]; |
160 | const char *p; |
2dc6356a |
161 | wchar_t wc[1]; |
162 | int len; |
163 | c[0] = i; |
164 | p = c; |
165 | len = 1; |
21d2b241 |
166 | if (ucsdata->line_codepage == CS_NONE) |
167 | ucsdata->unitab_line[i] = 0xD800 | i; |
168 | else if (1 == charset_to_unicode(&p, &len, wc, 1, |
169 | ucsdata->line_codepage, |
facd762c |
170 | NULL, L"", 0)) |
21d2b241 |
171 | ucsdata->unitab_line[i] = wc[0]; |
1709795f |
172 | else |
21d2b241 |
173 | ucsdata->unitab_line[i] = 0xFFFD; |
2dc6356a |
174 | } |
1709795f |
175 | |
2dc6356a |
176 | /* |
177 | * Set up unitab_xterm. This is the same as unitab_line except |
178 | * in the line-drawing regions, where it follows the Unicode |
179 | * encoding. |
180 | * |
181 | * (Note that the strange X encoding of line-drawing characters |
182 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
183 | * by the font encoding, which will spot such a font and act as |
184 | * if it were in a variant encoding of ISO8859-1.) |
185 | */ |
1709795f |
186 | for (i = 0; i < 256; i++) { |
2dc6356a |
187 | static const wchar_t unitab_xterm_std[32] = { |
188 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
189 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
190 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
191 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
192 | }; |
3900c2d6 |
193 | static const wchar_t unitab_xterm_poorman[32] = |
194 | L"*#****o~**+++++-----++++|****L. "; |
195 | |
196 | const wchar_t *ptr; |
197 | |
198 | if (vtmode == VT_POORMAN) |
199 | ptr = unitab_xterm_poorman; |
200 | else |
201 | ptr = unitab_xterm_std; |
202 | |
2dc6356a |
203 | if (i >= 0x5F && i < 0x7F) |
3900c2d6 |
204 | ucsdata->unitab_xterm[i] = ptr[i & 0x1F]; |
2dc6356a |
205 | else |
21d2b241 |
206 | ucsdata->unitab_xterm[i] = ucsdata->unitab_line[i]; |
1709795f |
207 | } |
2dc6356a |
208 | |
209 | /* |
210 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
211 | * simply CP437. |
212 | */ |
213 | for (i = 0; i < 256; i++) { |
57191fa4 |
214 | char c[1]; |
215 | const char *p; |
2dc6356a |
216 | wchar_t wc[1]; |
217 | int len; |
218 | c[0] = i; |
219 | p = c; |
220 | len = 1; |
facd762c |
221 | if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) |
21d2b241 |
222 | ucsdata->unitab_scoacs[i] = wc[0]; |
2dc6356a |
223 | else |
21d2b241 |
224 | ucsdata->unitab_scoacs[i] = 0xFFFD; |
2dc6356a |
225 | } |
226 | |
facd762c |
227 | /* |
228 | * Find the control characters in the line codepage. For |
229 | * direct-to-font mode using the D800 hack, we assume 00-1F and |
230 | * 7F are controls, but allow 80-9F through. (It's as good a |
231 | * guess as anything; and my bet is that half the weird fonts |
232 | * used in this way will be IBM or MS code pages anyway.) |
233 | */ |
234 | for (i = 0; i < 256; i++) { |
21d2b241 |
235 | int lineval = ucsdata->unitab_line[i]; |
facd762c |
236 | if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || |
237 | (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) |
21d2b241 |
238 | ucsdata->unitab_ctrl[i] = i; |
2dc6356a |
239 | else |
21d2b241 |
240 | ucsdata->unitab_ctrl[i] = 0xFF; |
facd762c |
241 | } |
085f4a68 |
242 | |
243 | return ret; |
126ce234 |
244 | } |
d4413bd2 |
245 | |
246 | const char *cp_name(int codepage) |
247 | { |
248 | if (codepage == CS_NONE) |
249 | return "Use font encoding"; |
250 | return charset_to_localenc(codepage); |
251 | } |
252 | |
253 | const char *cp_enumerate(int index) |
254 | { |
255 | int charset; |
256 | if (index == 0) |
257 | return "Use font encoding"; |
258 | charset = charset_localenc_nth(index-1); |
259 | if (charset == CS_NONE) |
260 | return NULL; |
261 | return charset_to_localenc(charset); |
262 | } |
263 | |
264 | int decode_codepage(char *cp_name) |
265 | { |
266 | if (!*cp_name) |
267 | return CS_NONE; /* use font encoding */ |
268 | return charset_from_localenc(cp_name); |
269 | } |