1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
d4413bd2 |
11 | #include "charset.h" |
887035a5 |
12 | #include "terminal.h" |
1709795f |
13 | #include "misc.h" |
14 | |
15 | /* |
16 | * Unix Unicode-handling routines. |
1709795f |
17 | */ |
18 | |
1709795f |
19 | int is_dbcs_leadbyte(int codepage, char byte) |
20 | { |
21 | return 0; /* we don't do DBCS */ |
22 | } |
23 | |
24 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
25 | wchar_t *wcstr, int wclen) |
26 | { |
2dc6356a |
27 | if (codepage == DEFAULT_CODEPAGE) { |
28 | int n = 0; |
29 | mbstate_t state = { 0 }; |
30 | |
31 | setlocale(LC_CTYPE, ""); |
32 | |
33 | while (mblen > 0) { |
34 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
35 | if (i == (size_t)-1 || i == (size_t)-2) |
36 | break; |
37 | n++; |
38 | mbstr += i; |
39 | mblen -= i; |
40 | } |
41 | |
42 | setlocale(LC_CTYPE, "C"); |
43 | |
44 | return n; |
facd762c |
45 | } else if (codepage == CS_NONE) { |
46 | int n = 0; |
47 | |
48 | while (mblen > 0) { |
49 | wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); |
50 | n++; |
51 | mbstr++; |
52 | mblen--; |
53 | } |
54 | |
55 | return n; |
2dc6356a |
56 | } else |
57 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
58 | NULL, NULL, 0); |
e6346999 |
59 | } |
60 | |
61 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
21d2b241 |
62 | char *mbstr, int mblen, char *defchr, int *defused, |
63 | struct unicode_data *ucsdata) |
e6346999 |
64 | { |
2dc6356a |
65 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
66 | if (defused) |
67 | *defused = 0; |
2dc6356a |
68 | |
69 | if (codepage == DEFAULT_CODEPAGE) { |
70 | char output[MB_LEN_MAX]; |
71 | mbstate_t state = { 0 }; |
72 | int n = 0; |
73 | |
74 | setlocale(LC_CTYPE, ""); |
75 | |
76 | while (wclen > 0) { |
77 | int i = wcrtomb(output, wcstr[0], &state); |
78 | if (i == (size_t)-1 || i > n - mblen) |
79 | break; |
80 | memcpy(mbstr+n, output, i); |
81 | n += i; |
82 | wcstr++; |
83 | wclen--; |
84 | } |
85 | |
86 | setlocale(LC_CTYPE, "C"); |
87 | |
88 | return n; |
facd762c |
89 | } else if (codepage == CS_NONE) { |
90 | int n = 0; |
91 | while (wclen > 0 && n < mblen) { |
92 | if (*wcstr >= 0xD800 && *wcstr < 0xD900) |
93 | mbstr[n++] = (*wcstr & 0xFF); |
94 | else if (defchr) |
95 | mbstr[n++] = *defchr; |
96 | wcstr++; |
97 | wclen--; |
98 | } |
99 | return n; |
100 | } else { |
2dc6356a |
101 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
0f993689 |
102 | NULL, defchr?defchr:NULL, defchr?1:0); |
facd762c |
103 | } |
1709795f |
104 | } |
105 | |
085f4a68 |
106 | /* |
107 | * Return value is TRUE if pterm is to run in direct-to-font mode. |
108 | */ |
21d2b241 |
109 | int init_ucs(struct unicode_data *ucsdata, |
110 | char *linecharset, int font_charset) |
1709795f |
111 | { |
085f4a68 |
112 | int i, ret = 0; |
2dc6356a |
113 | |
114 | /* |
115 | * In the platform-independent parts of the code, font_codepage |
116 | * is used only for system DBCS support - which we don't |
117 | * support at all. So we set this to something which will never |
118 | * be used. |
119 | */ |
21d2b241 |
120 | ucsdata->font_codepage = -1; |
2dc6356a |
121 | |
122 | /* |
123 | * line_codepage should be decoded from the specification in |
124 | * cfg. |
125 | */ |
d4413bd2 |
126 | ucsdata->line_codepage = decode_codepage(linecharset); |
2dc6356a |
127 | |
facd762c |
128 | /* |
129 | * If line_codepage is _still_ CS_NONE, we assume we're using |
130 | * the font's own encoding. This has been passed in to us, so |
131 | * we use that. If it's still CS_NONE after _that_ - i.e. the |
132 | * font we were given had an incomprehensible charset - then we |
133 | * fall back to using the D800 page. |
134 | */ |
21d2b241 |
135 | if (ucsdata->line_codepage == CS_NONE) |
136 | ucsdata->line_codepage = font_charset; |
2dc6356a |
137 | |
21d2b241 |
138 | if (ucsdata->line_codepage == CS_NONE) |
085f4a68 |
139 | ret = 1; |
140 | |
2dc6356a |
141 | /* |
142 | * Set up unitab_line, by translating each individual character |
143 | * in the line codepage into Unicode. |
144 | */ |
145 | for (i = 0; i < 256; i++) { |
146 | char c[1], *p; |
147 | wchar_t wc[1]; |
148 | int len; |
149 | c[0] = i; |
150 | p = c; |
151 | len = 1; |
21d2b241 |
152 | if (ucsdata->line_codepage == CS_NONE) |
153 | ucsdata->unitab_line[i] = 0xD800 | i; |
154 | else if (1 == charset_to_unicode(&p, &len, wc, 1, |
155 | ucsdata->line_codepage, |
facd762c |
156 | NULL, L"", 0)) |
21d2b241 |
157 | ucsdata->unitab_line[i] = wc[0]; |
1709795f |
158 | else |
21d2b241 |
159 | ucsdata->unitab_line[i] = 0xFFFD; |
2dc6356a |
160 | } |
1709795f |
161 | |
2dc6356a |
162 | /* |
163 | * Set up unitab_xterm. This is the same as unitab_line except |
164 | * in the line-drawing regions, where it follows the Unicode |
165 | * encoding. |
166 | * |
167 | * (Note that the strange X encoding of line-drawing characters |
168 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
169 | * by the font encoding, which will spot such a font and act as |
170 | * if it were in a variant encoding of ISO8859-1.) |
171 | */ |
1709795f |
172 | for (i = 0; i < 256; i++) { |
2dc6356a |
173 | static const wchar_t unitab_xterm_std[32] = { |
174 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
175 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
176 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
177 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
178 | }; |
179 | if (i >= 0x5F && i < 0x7F) |
21d2b241 |
180 | ucsdata->unitab_xterm[i] = unitab_xterm_std[i & 0x1F]; |
2dc6356a |
181 | else |
21d2b241 |
182 | ucsdata->unitab_xterm[i] = ucsdata->unitab_line[i]; |
1709795f |
183 | } |
2dc6356a |
184 | |
185 | /* |
186 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
187 | * simply CP437. |
188 | */ |
189 | for (i = 0; i < 256; i++) { |
190 | char c[1], *p; |
191 | wchar_t wc[1]; |
192 | int len; |
193 | c[0] = i; |
194 | p = c; |
195 | len = 1; |
facd762c |
196 | if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) |
21d2b241 |
197 | ucsdata->unitab_scoacs[i] = wc[0]; |
2dc6356a |
198 | else |
21d2b241 |
199 | ucsdata->unitab_scoacs[i] = 0xFFFD; |
2dc6356a |
200 | } |
201 | |
facd762c |
202 | /* |
203 | * Find the control characters in the line codepage. For |
204 | * direct-to-font mode using the D800 hack, we assume 00-1F and |
205 | * 7F are controls, but allow 80-9F through. (It's as good a |
206 | * guess as anything; and my bet is that half the weird fonts |
207 | * used in this way will be IBM or MS code pages anyway.) |
208 | */ |
209 | for (i = 0; i < 256; i++) { |
21d2b241 |
210 | int lineval = ucsdata->unitab_line[i]; |
facd762c |
211 | if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || |
212 | (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) |
21d2b241 |
213 | ucsdata->unitab_ctrl[i] = i; |
2dc6356a |
214 | else |
21d2b241 |
215 | ucsdata->unitab_ctrl[i] = 0xFF; |
facd762c |
216 | } |
085f4a68 |
217 | |
218 | return ret; |
126ce234 |
219 | } |
d4413bd2 |
220 | |
221 | const char *cp_name(int codepage) |
222 | { |
223 | if (codepage == CS_NONE) |
224 | return "Use font encoding"; |
225 | return charset_to_localenc(codepage); |
226 | } |
227 | |
228 | const char *cp_enumerate(int index) |
229 | { |
230 | int charset; |
231 | if (index == 0) |
232 | return "Use font encoding"; |
233 | charset = charset_localenc_nth(index-1); |
234 | if (charset == CS_NONE) |
235 | return NULL; |
236 | return charset_to_localenc(charset); |
237 | } |
238 | |
239 | int decode_codepage(char *cp_name) |
240 | { |
241 | if (!*cp_name) |
242 | return CS_NONE; /* use font encoding */ |
243 | return charset_from_localenc(cp_name); |
244 | } |