1709795f |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <ctype.h> |
2dc6356a |
4 | #include <locale.h> |
5 | #include <limits.h> |
6 | #include <wchar.h> |
1709795f |
7 | |
8 | #include <time.h> |
2dc6356a |
9 | |
1709795f |
10 | #include "putty.h" |
887035a5 |
11 | #include "terminal.h" |
1709795f |
12 | #include "misc.h" |
13 | |
14 | /* |
15 | * Unix Unicode-handling routines. |
1709795f |
16 | */ |
17 | |
1709795f |
18 | int is_dbcs_leadbyte(int codepage, char byte) |
19 | { |
20 | return 0; /* we don't do DBCS */ |
21 | } |
22 | |
23 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
24 | wchar_t *wcstr, int wclen) |
25 | { |
2dc6356a |
26 | if (codepage == DEFAULT_CODEPAGE) { |
27 | int n = 0; |
28 | mbstate_t state = { 0 }; |
29 | |
30 | setlocale(LC_CTYPE, ""); |
31 | |
32 | while (mblen > 0) { |
33 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
34 | if (i == (size_t)-1 || i == (size_t)-2) |
35 | break; |
36 | n++; |
37 | mbstr += i; |
38 | mblen -= i; |
39 | } |
40 | |
41 | setlocale(LC_CTYPE, "C"); |
42 | |
43 | return n; |
44 | } else |
45 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
46 | NULL, NULL, 0); |
e6346999 |
47 | } |
48 | |
49 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
50 | char *mbstr, int mblen, char *defchr, int *defused) |
51 | { |
2dc6356a |
52 | /* FIXME: we should remove the defused param completely... */ |
e6346999 |
53 | if (defused) |
54 | *defused = 0; |
2dc6356a |
55 | |
56 | if (codepage == DEFAULT_CODEPAGE) { |
57 | char output[MB_LEN_MAX]; |
58 | mbstate_t state = { 0 }; |
59 | int n = 0; |
60 | |
61 | setlocale(LC_CTYPE, ""); |
62 | |
63 | while (wclen > 0) { |
64 | int i = wcrtomb(output, wcstr[0], &state); |
65 | if (i == (size_t)-1 || i > n - mblen) |
66 | break; |
67 | memcpy(mbstr+n, output, i); |
68 | n += i; |
69 | wcstr++; |
70 | wclen--; |
71 | } |
72 | |
73 | setlocale(LC_CTYPE, "C"); |
74 | |
75 | return n; |
76 | } else |
77 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
78 | NULL, NULL, 0); |
1709795f |
79 | } |
80 | |
81 | void init_ucs(void) |
82 | { |
83 | int i; |
2dc6356a |
84 | |
85 | /* |
86 | * In the platform-independent parts of the code, font_codepage |
87 | * is used only for system DBCS support - which we don't |
88 | * support at all. So we set this to something which will never |
89 | * be used. |
90 | */ |
91 | font_codepage = -1; |
92 | |
93 | /* |
94 | * line_codepage should be decoded from the specification in |
95 | * cfg. |
96 | */ |
97 | line_codepage = charset_from_mimeenc(cfg.line_codepage); |
98 | if (line_codepage == CS_NONE) |
99 | line_codepage = charset_from_xenc(cfg.line_codepage); |
100 | /* If it's still CS_NONE, we should assume direct-to-font. */ |
101 | |
102 | /* FIXME: this is a hack. Currently fonts with incomprehensible |
103 | * encodings are dealt with by pretending they're 8859-1. It's |
104 | * ugly, but it's good enough to stop things crashing. Should do |
105 | * something better here. */ |
106 | if (line_codepage == CS_NONE) |
107 | line_codepage = CS_ISO8859_1; |
108 | |
109 | /* |
110 | * Set up unitab_line, by translating each individual character |
111 | * in the line codepage into Unicode. |
112 | */ |
113 | for (i = 0; i < 256; i++) { |
114 | char c[1], *p; |
115 | wchar_t wc[1]; |
116 | int len; |
117 | c[0] = i; |
118 | p = c; |
119 | len = 1; |
120 | if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0)) |
121 | unitab_line[i] = wc[0]; |
1709795f |
122 | else |
2dc6356a |
123 | unitab_line[i] = 0xFFFD; |
124 | } |
1709795f |
125 | |
2dc6356a |
126 | /* |
127 | * Set up unitab_xterm. This is the same as unitab_line except |
128 | * in the line-drawing regions, where it follows the Unicode |
129 | * encoding. |
130 | * |
131 | * (Note that the strange X encoding of line-drawing characters |
132 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
133 | * by the font encoding, which will spot such a font and act as |
134 | * if it were in a variant encoding of ISO8859-1.) |
135 | */ |
1709795f |
136 | for (i = 0; i < 256; i++) { |
2dc6356a |
137 | static const wchar_t unitab_xterm_std[32] = { |
138 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
139 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
140 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
141 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
142 | }; |
143 | if (i >= 0x5F && i < 0x7F) |
144 | unitab_xterm[i] = unitab_xterm_std[i & 0x1F]; |
145 | else |
146 | unitab_xterm[i] = unitab_line[i]; |
1709795f |
147 | } |
2dc6356a |
148 | |
149 | /* |
150 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
151 | * simply CP437. |
152 | */ |
153 | for (i = 0; i < 256; i++) { |
154 | char c[1], *p; |
155 | wchar_t wc[1]; |
156 | int len; |
157 | c[0] = i; |
158 | p = c; |
159 | len = 1; |
160 | if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0)) |
161 | unitab_scoacs[i] = wc[0]; |
162 | else |
163 | unitab_scoacs[i] = 0xFFFD; |
164 | } |
165 | |
166 | /* Find the line control characters. */ |
167 | for (i = 0; i < 256; i++) |
168 | if (unitab_line[i] < ' ' |
169 | || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0)) |
170 | unitab_ctrl[i] = i; |
171 | else |
172 | unitab_ctrl[i] = 0xFF; |
126ce234 |
173 | } |