1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
887035a5 |
11 | #include "terminal.h" |
1709795f |
12 | #include "misc.h" |
13 | |
14 | /* |
15 | * Unix Unicode-handling routines. |
1709795f |
16 | */ |
17 | |
1709795f |
18 | int is_dbcs_leadbyte(int codepage, char byte) |
19 | { |
20 | return 0; /* we don't do DBCS */ |
21 | } |
22 | |
23 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
24 | wchar_t *wcstr, int wclen) |
25 | { |
2dc6356a |
26 | if (codepage == DEFAULT_CODEPAGE) { |
27 | int n = 0; |
28 | mbstate_t state = { 0 }; |
29 | |
30 | setlocale(LC_CTYPE, ""); |
31 | |
32 | while (mblen > 0) { |
33 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
34 | if (i == (size_t)-1 || i == (size_t)-2) |
35 | break; |
36 | n++; |
37 | mbstr += i; |
38 | mblen -= i; |
39 | } |
40 | |
41 | setlocale(LC_CTYPE, "C"); |
42 | |
43 | return n; |
facd762c |
44 | } else if (codepage == CS_NONE) { |
45 | int n = 0; |
46 | |
47 | while (mblen > 0) { |
48 | wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); |
49 | n++; |
50 | mbstr++; |
51 | mblen--; |
52 | } |
53 | |
54 | return n; |
2dc6356a |
55 | } else |
56 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
57 | NULL, NULL, 0); |
e6346999 |
58 | } |
59 | |
60 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
21d2b241 |
61 | char *mbstr, int mblen, char *defchr, int *defused, |
62 | struct unicode_data *ucsdata) |
e6346999 |
63 | { |
2dc6356a |
64 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
65 | if (defused) |
66 | *defused = 0; |
2dc6356a |
67 | |
68 | if (codepage == DEFAULT_CODEPAGE) { |
69 | char output[MB_LEN_MAX]; |
70 | mbstate_t state = { 0 }; |
71 | int n = 0; |
72 | |
73 | setlocale(LC_CTYPE, ""); |
74 | |
75 | while (wclen > 0) { |
76 | int i = wcrtomb(output, wcstr[0], &state); |
77 | if (i == (size_t)-1 || i > n - mblen) |
78 | break; |
79 | memcpy(mbstr+n, output, i); |
80 | n += i; |
81 | wcstr++; |
82 | wclen--; |
83 | } |
84 | |
85 | setlocale(LC_CTYPE, "C"); |
86 | |
87 | return n; |
facd762c |
88 | } else if (codepage == CS_NONE) { |
89 | int n = 0; |
90 | while (wclen > 0 && n < mblen) { |
91 | if (*wcstr >= 0xD800 && *wcstr < 0xD900) |
92 | mbstr[n++] = (*wcstr & 0xFF); |
93 | else if (defchr) |
94 | mbstr[n++] = *defchr; |
95 | wcstr++; |
96 | wclen--; |
97 | } |
98 | return n; |
99 | } else { |
2dc6356a |
100 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
0f993689 |
101 | NULL, defchr?defchr:NULL, defchr?1:0); |
facd762c |
102 | } |
1709795f |
103 | } |
104 | |
085f4a68 |
105 | /* |
106 | * Return value is TRUE if pterm is to run in direct-to-font mode. |
107 | */ |
21d2b241 |
108 | int init_ucs(struct unicode_data *ucsdata, |
109 | char *linecharset, int font_charset) |
1709795f |
110 | { |
085f4a68 |
111 | int i, ret = 0; |
2dc6356a |
112 | |
113 | /* |
114 | * In the platform-independent parts of the code, font_codepage |
115 | * is used only for system DBCS support - which we don't |
116 | * support at all. So we set this to something which will never |
117 | * be used. |
118 | */ |
21d2b241 |
119 | ucsdata->font_codepage = -1; |
2dc6356a |
120 | |
121 | /* |
122 | * line_codepage should be decoded from the specification in |
123 | * cfg. |
124 | */ |
21d2b241 |
125 | ucsdata->line_codepage = charset_from_mimeenc(linecharset); |
126 | if (ucsdata->line_codepage == CS_NONE) |
127 | ucsdata->line_codepage = charset_from_xenc(linecharset); |
2dc6356a |
128 | |
facd762c |
129 | /* |
130 | * If line_codepage is _still_ CS_NONE, we assume we're using |
131 | * the font's own encoding. This has been passed in to us, so |
132 | * we use that. If it's still CS_NONE after _that_ - i.e. the |
133 | * font we were given had an incomprehensible charset - then we |
134 | * fall back to using the D800 page. |
135 | */ |
21d2b241 |
136 | if (ucsdata->line_codepage == CS_NONE) |
137 | ucsdata->line_codepage = font_charset; |
2dc6356a |
138 | |
21d2b241 |
139 | if (ucsdata->line_codepage == CS_NONE) |
085f4a68 |
140 | ret = 1; |
141 | |
2dc6356a |
142 | /* |
143 | * Set up unitab_line, by translating each individual character |
144 | * in the line codepage into Unicode. |
145 | */ |
146 | for (i = 0; i < 256; i++) { |
147 | char c[1], *p; |
148 | wchar_t wc[1]; |
149 | int len; |
150 | c[0] = i; |
151 | p = c; |
152 | len = 1; |
21d2b241 |
153 | if (ucsdata->line_codepage == CS_NONE) |
154 | ucsdata->unitab_line[i] = 0xD800 | i; |
155 | else if (1 == charset_to_unicode(&p, &len, wc, 1, |
156 | ucsdata->line_codepage, |
facd762c |
157 | NULL, L"", 0)) |
21d2b241 |
158 | ucsdata->unitab_line[i] = wc[0]; |
1709795f |
159 | else |
21d2b241 |
160 | ucsdata->unitab_line[i] = 0xFFFD; |
2dc6356a |
161 | } |
1709795f |
162 | |
2dc6356a |
163 | /* |
164 | * Set up unitab_xterm. This is the same as unitab_line except |
165 | * in the line-drawing regions, where it follows the Unicode |
166 | * encoding. |
167 | * |
168 | * (Note that the strange X encoding of line-drawing characters |
169 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
170 | * by the font encoding, which will spot such a font and act as |
171 | * if it were in a variant encoding of ISO8859-1.) |
172 | */ |
1709795f |
173 | for (i = 0; i < 256; i++) { |
2dc6356a |
174 | static const wchar_t unitab_xterm_std[32] = { |
175 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
176 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
177 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
178 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
179 | }; |
180 | if (i >= 0x5F && i < 0x7F) |
21d2b241 |
181 | ucsdata->unitab_xterm[i] = unitab_xterm_std[i & 0x1F]; |
2dc6356a |
182 | else |
21d2b241 |
183 | ucsdata->unitab_xterm[i] = ucsdata->unitab_line[i]; |
1709795f |
184 | } |
2dc6356a |
185 | |
186 | /* |
187 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
188 | * simply CP437. |
189 | */ |
190 | for (i = 0; i < 256; i++) { |
191 | char c[1], *p; |
192 | wchar_t wc[1]; |
193 | int len; |
194 | c[0] = i; |
195 | p = c; |
196 | len = 1; |
facd762c |
197 | if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) |
21d2b241 |
198 | ucsdata->unitab_scoacs[i] = wc[0]; |
2dc6356a |
199 | else |
21d2b241 |
200 | ucsdata->unitab_scoacs[i] = 0xFFFD; |
2dc6356a |
201 | } |
202 | |
facd762c |
203 | /* |
204 | * Find the control characters in the line codepage. For |
205 | * direct-to-font mode using the D800 hack, we assume 00-1F and |
206 | * 7F are controls, but allow 80-9F through. (It's as good a |
207 | * guess as anything; and my bet is that half the weird fonts |
208 | * used in this way will be IBM or MS code pages anyway.) |
209 | */ |
210 | for (i = 0; i < 256; i++) { |
21d2b241 |
211 | int lineval = ucsdata->unitab_line[i]; |
facd762c |
212 | if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || |
213 | (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) |
21d2b241 |
214 | ucsdata->unitab_ctrl[i] = i; |
2dc6356a |
215 | else |
21d2b241 |
216 | ucsdata->unitab_ctrl[i] = 0xFF; |
facd762c |
217 | } |
085f4a68 |
218 | |
219 | return ret; |
126ce234 |
220 | } |