1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
887035a5 |
11 | #include "terminal.h" |
1709795f |
12 | #include "misc.h" |
13 | |
14 | /* |
15 | * Unix Unicode-handling routines. |
1709795f |
16 | */ |
17 | |
1709795f |
18 | int is_dbcs_leadbyte(int codepage, char byte) |
19 | { |
20 | return 0; /* we don't do DBCS */ |
21 | } |
22 | |
23 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
24 | wchar_t *wcstr, int wclen) |
25 | { |
2dc6356a |
26 | if (codepage == DEFAULT_CODEPAGE) { |
27 | int n = 0; |
28 | mbstate_t state = { 0 }; |
29 | |
30 | setlocale(LC_CTYPE, ""); |
31 | |
32 | while (mblen > 0) { |
33 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
34 | if (i == (size_t)-1 || i == (size_t)-2) |
35 | break; |
36 | n++; |
37 | mbstr += i; |
38 | mblen -= i; |
39 | } |
40 | |
41 | setlocale(LC_CTYPE, "C"); |
42 | |
43 | return n; |
facd762c |
44 | } else if (codepage == CS_NONE) { |
45 | int n = 0; |
46 | |
47 | while (mblen > 0) { |
48 | wcstr[n] = 0xD800 | (mbstr[0] & 0xFF); |
49 | n++; |
50 | mbstr++; |
51 | mblen--; |
52 | } |
53 | |
54 | return n; |
2dc6356a |
55 | } else |
56 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
57 | NULL, NULL, 0); |
e6346999 |
58 | } |
59 | |
60 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
61 | char *mbstr, int mblen, char *defchr, int *defused) |
62 | { |
2dc6356a |
63 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
64 | if (defused) |
65 | *defused = 0; |
2dc6356a |
66 | |
67 | if (codepage == DEFAULT_CODEPAGE) { |
68 | char output[MB_LEN_MAX]; |
69 | mbstate_t state = { 0 }; |
70 | int n = 0; |
71 | |
72 | setlocale(LC_CTYPE, ""); |
73 | |
74 | while (wclen > 0) { |
75 | int i = wcrtomb(output, wcstr[0], &state); |
76 | if (i == (size_t)-1 || i > n - mblen) |
77 | break; |
78 | memcpy(mbstr+n, output, i); |
79 | n += i; |
80 | wcstr++; |
81 | wclen--; |
82 | } |
83 | |
84 | setlocale(LC_CTYPE, "C"); |
85 | |
86 | return n; |
facd762c |
87 | } else if (codepage == CS_NONE) { |
88 | int n = 0; |
89 | while (wclen > 0 && n < mblen) { |
90 | if (*wcstr >= 0xD800 && *wcstr < 0xD900) |
91 | mbstr[n++] = (*wcstr & 0xFF); |
92 | else if (defchr) |
93 | mbstr[n++] = *defchr; |
94 | wcstr++; |
95 | wclen--; |
96 | } |
97 | return n; |
98 | } else { |
2dc6356a |
99 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
0f993689 |
100 | NULL, defchr?defchr:NULL, defchr?1:0); |
facd762c |
101 | } |
1709795f |
102 | } |
103 | |
085f4a68 |
104 | /* |
105 | * Return value is TRUE if pterm is to run in direct-to-font mode. |
106 | */ |
8772ac69 |
107 | int init_ucs(char *linecharset, int font_charset) |
1709795f |
108 | { |
085f4a68 |
109 | int i, ret = 0; |
2dc6356a |
110 | |
111 | /* |
112 | * In the platform-independent parts of the code, font_codepage |
113 | * is used only for system DBCS support - which we don't |
114 | * support at all. So we set this to something which will never |
115 | * be used. |
116 | */ |
117 | font_codepage = -1; |
118 | |
119 | /* |
120 | * line_codepage should be decoded from the specification in |
121 | * cfg. |
122 | */ |
8772ac69 |
123 | line_codepage = charset_from_mimeenc(linecharset); |
2dc6356a |
124 | if (line_codepage == CS_NONE) |
8772ac69 |
125 | line_codepage = charset_from_xenc(linecharset); |
2dc6356a |
126 | |
facd762c |
127 | /* |
128 | * If line_codepage is _still_ CS_NONE, we assume we're using |
129 | * the font's own encoding. This has been passed in to us, so |
130 | * we use that. If it's still CS_NONE after _that_ - i.e. the |
131 | * font we were given had an incomprehensible charset - then we |
132 | * fall back to using the D800 page. |
133 | */ |
2dc6356a |
134 | if (line_codepage == CS_NONE) |
facd762c |
135 | line_codepage = font_charset; |
2dc6356a |
136 | |
085f4a68 |
137 | if (line_codepage == CS_NONE) |
138 | ret = 1; |
139 | |
2dc6356a |
140 | /* |
141 | * Set up unitab_line, by translating each individual character |
142 | * in the line codepage into Unicode. |
143 | */ |
144 | for (i = 0; i < 256; i++) { |
145 | char c[1], *p; |
146 | wchar_t wc[1]; |
147 | int len; |
148 | c[0] = i; |
149 | p = c; |
150 | len = 1; |
facd762c |
151 | if (line_codepage == CS_NONE) |
152 | unitab_line[i] = 0xD800 | i; |
153 | else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage, |
154 | NULL, L"", 0)) |
2dc6356a |
155 | unitab_line[i] = wc[0]; |
1709795f |
156 | else |
2dc6356a |
157 | unitab_line[i] = 0xFFFD; |
158 | } |
1709795f |
159 | |
2dc6356a |
160 | /* |
161 | * Set up unitab_xterm. This is the same as unitab_line except |
162 | * in the line-drawing regions, where it follows the Unicode |
163 | * encoding. |
164 | * |
165 | * (Note that the strange X encoding of line-drawing characters |
166 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
167 | * by the font encoding, which will spot such a font and act as |
168 | * if it were in a variant encoding of ISO8859-1.) |
169 | */ |
1709795f |
170 | for (i = 0; i < 256; i++) { |
2dc6356a |
171 | static const wchar_t unitab_xterm_std[32] = { |
172 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
173 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
174 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
175 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
176 | }; |
177 | if (i >= 0x5F && i < 0x7F) |
178 | unitab_xterm[i] = unitab_xterm_std[i & 0x1F]; |
179 | else |
180 | unitab_xterm[i] = unitab_line[i]; |
1709795f |
181 | } |
2dc6356a |
182 | |
183 | /* |
184 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
185 | * simply CP437. |
186 | */ |
187 | for (i = 0; i < 256; i++) { |
188 | char c[1], *p; |
189 | wchar_t wc[1]; |
190 | int len; |
191 | c[0] = i; |
192 | p = c; |
193 | len = 1; |
facd762c |
194 | if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0)) |
2dc6356a |
195 | unitab_scoacs[i] = wc[0]; |
196 | else |
197 | unitab_scoacs[i] = 0xFFFD; |
198 | } |
199 | |
facd762c |
200 | /* |
201 | * Find the control characters in the line codepage. For |
202 | * direct-to-font mode using the D800 hack, we assume 00-1F and |
203 | * 7F are controls, but allow 80-9F through. (It's as good a |
204 | * guess as anything; and my bet is that half the weird fonts |
205 | * used in this way will be IBM or MS code pages anyway.) |
206 | */ |
207 | for (i = 0; i < 256; i++) { |
208 | int lineval = unitab_line[i]; |
209 | if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) || |
210 | (lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F)) |
2dc6356a |
211 | unitab_ctrl[i] = i; |
212 | else |
213 | unitab_ctrl[i] = 0xFF; |
facd762c |
214 | } |
085f4a68 |
215 | |
216 | return ret; |
126ce234 |
217 | } |