| 1 | #include <stdio.h> |
| 2 | #include <stdlib.h> |
| 3 | #include <ctype.h> |
| 4 | #include <locale.h> |
| 5 | #include <limits.h> |
| 6 | #include <wchar.h> |
| 7 | |
| 8 | #include <time.h> |
| 9 | |
| 10 | #include "putty.h" |
| 11 | #include "terminal.h" |
| 12 | #include "misc.h" |
| 13 | |
| 14 | /* |
| 15 | * Unix Unicode-handling routines. |
| 16 | */ |
| 17 | |
| 18 | int is_dbcs_leadbyte(int codepage, char byte) |
| 19 | { |
| 20 | return 0; /* we don't do DBCS */ |
| 21 | } |
| 22 | |
| 23 | int mb_to_wc(int codepage, int flags, char *mbstr, int mblen, |
| 24 | wchar_t *wcstr, int wclen) |
| 25 | { |
| 26 | if (codepage == DEFAULT_CODEPAGE) { |
| 27 | int n = 0; |
| 28 | mbstate_t state = { 0 }; |
| 29 | |
| 30 | setlocale(LC_CTYPE, ""); |
| 31 | |
| 32 | while (mblen > 0) { |
| 33 | size_t i = mbrtowc(wcstr+n, mbstr, (size_t)mblen, &state); |
| 34 | if (i == (size_t)-1 || i == (size_t)-2) |
| 35 | break; |
| 36 | n++; |
| 37 | mbstr += i; |
| 38 | mblen -= i; |
| 39 | } |
| 40 | |
| 41 | setlocale(LC_CTYPE, "C"); |
| 42 | |
| 43 | return n; |
| 44 | } else |
| 45 | return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage, |
| 46 | NULL, NULL, 0); |
| 47 | } |
| 48 | |
| 49 | int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen, |
| 50 | char *mbstr, int mblen, char *defchr, int *defused) |
| 51 | { |
| 52 | /* FIXME: we should remove the defused param completely... */ |
| 53 | if (defused) |
| 54 | *defused = 0; |
| 55 | |
| 56 | if (codepage == DEFAULT_CODEPAGE) { |
| 57 | char output[MB_LEN_MAX]; |
| 58 | mbstate_t state = { 0 }; |
| 59 | int n = 0; |
| 60 | |
| 61 | setlocale(LC_CTYPE, ""); |
| 62 | |
| 63 | while (wclen > 0) { |
| 64 | int i = wcrtomb(output, wcstr[0], &state); |
| 65 | if (i == (size_t)-1 || i > n - mblen) |
| 66 | break; |
| 67 | memcpy(mbstr+n, output, i); |
| 68 | n += i; |
| 69 | wcstr++; |
| 70 | wclen--; |
| 71 | } |
| 72 | |
| 73 | setlocale(LC_CTYPE, "C"); |
| 74 | |
| 75 | return n; |
| 76 | } else |
| 77 | return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage, |
| 78 | NULL, NULL, 0); |
| 79 | } |
| 80 | |
| 81 | void init_ucs(void) |
| 82 | { |
| 83 | int i; |
| 84 | |
| 85 | /* |
| 86 | * In the platform-independent parts of the code, font_codepage |
| 87 | * is used only for system DBCS support - which we don't |
| 88 | * support at all. So we set this to something which will never |
| 89 | * be used. |
| 90 | */ |
| 91 | font_codepage = -1; |
| 92 | |
| 93 | /* |
| 94 | * line_codepage should be decoded from the specification in |
| 95 | * cfg. |
| 96 | */ |
| 97 | line_codepage = charset_from_mimeenc(cfg.line_codepage); |
| 98 | if (line_codepage == CS_NONE) |
| 99 | line_codepage = charset_from_xenc(cfg.line_codepage); |
| 100 | /* If it's still CS_NONE, we should assume direct-to-font. */ |
| 101 | |
| 102 | /* FIXME: this is a hack. Currently fonts with incomprehensible |
| 103 | * encodings are dealt with by pretending they're 8859-1. It's |
| 104 | * ugly, but it's good enough to stop things crashing. Should do |
| 105 | * something better here. */ |
| 106 | if (line_codepage == CS_NONE) |
| 107 | line_codepage = CS_ISO8859_1; |
| 108 | |
| 109 | /* |
| 110 | * Set up unitab_line, by translating each individual character |
| 111 | * in the line codepage into Unicode. |
| 112 | */ |
| 113 | for (i = 0; i < 256; i++) { |
| 114 | char c[1], *p; |
| 115 | wchar_t wc[1]; |
| 116 | int len; |
| 117 | c[0] = i; |
| 118 | p = c; |
| 119 | len = 1; |
| 120 | if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0)) |
| 121 | unitab_line[i] = wc[0]; |
| 122 | else |
| 123 | unitab_line[i] = 0xFFFD; |
| 124 | } |
| 125 | |
| 126 | /* |
| 127 | * Set up unitab_xterm. This is the same as unitab_line except |
| 128 | * in the line-drawing regions, where it follows the Unicode |
| 129 | * encoding. |
| 130 | * |
| 131 | * (Note that the strange X encoding of line-drawing characters |
| 132 | * in the bottom 32 glyphs of ISO8859-1 fonts is taken care of |
| 133 | * by the font encoding, which will spot such a font and act as |
| 134 | * if it were in a variant encoding of ISO8859-1.) |
| 135 | */ |
| 136 | for (i = 0; i < 256; i++) { |
| 137 | static const wchar_t unitab_xterm_std[32] = { |
| 138 | 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1, |
| 139 | 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba, |
| 140 | 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c, |
| 141 | 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020 |
| 142 | }; |
| 143 | if (i >= 0x5F && i < 0x7F) |
| 144 | unitab_xterm[i] = unitab_xterm_std[i & 0x1F]; |
| 145 | else |
| 146 | unitab_xterm[i] = unitab_line[i]; |
| 147 | } |
| 148 | |
| 149 | /* |
| 150 | * Set up unitab_scoacs. The SCO Alternate Character Set is |
| 151 | * simply CP437. |
| 152 | */ |
| 153 | for (i = 0; i < 256; i++) { |
| 154 | char c[1], *p; |
| 155 | wchar_t wc[1]; |
| 156 | int len; |
| 157 | c[0] = i; |
| 158 | p = c; |
| 159 | len = 1; |
| 160 | if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0)) |
| 161 | unitab_scoacs[i] = wc[0]; |
| 162 | else |
| 163 | unitab_scoacs[i] = 0xFFFD; |
| 164 | } |
| 165 | |
| 166 | /* Find the line control characters. */ |
| 167 | for (i = 0; i < 256; i++) |
| 168 | if (unitab_line[i] < ' ' |
| 169 | || (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0)) |
| 170 | unitab_ctrl[i] = i; |
| 171 | else |
| 172 | unitab_ctrl[i] = 0xFF; |
| 173 | } |