X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/putty/blobdiff_plain/875b193f2f7c9b4168d63a274861c4a1d980c286..6ae18debc04dd13eade6226d481abaa532c1dedf:/unicode.c diff --git a/unicode.c b/unicode.c index 954015ff..e1740dad 100644 --- a/unicode.c +++ b/unicode.c @@ -16,7 +16,7 @@ static void get_unitab(int codepage, wchar_t * unitab, int ftype); /* Character conversion arrays; they are usually taken from windows, * the xterm one has the four scanlines that have no unicode 2.0 - * equlivents mapped into the private area. + * equivalents mapped to their unicode 3.0 locations. */ static char **uni_tbl; @@ -63,7 +63,7 @@ static wchar_t iso_8859_11[] = { 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD }; -static wchar_t iso_8859_12[] = { +static wchar_t iso_8859_13[] = { 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, @@ -108,6 +108,21 @@ static wchar_t iso_8859_15[] = { 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff }; +static wchar_t iso_8859_16[] = { + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, + 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, + 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, + 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, + 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF +}; + static wchar_t roman8[] = { 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, @@ -210,10 +225,11 @@ static struct cp_list_item cp_list[] = { {"ISO-8859-8:1988", 28598}, {"ISO-8859-9:1989", 28599}, {"ISO-8859-10:1993", 0, 96, iso_8859_10}, - {"ISO-8859-11", 0, 96, iso_8859_11}, - {"ISO-8859-12", 0, 96, iso_8859_12}, - {"ISO-8859-14", 0, 96, iso_8859_14}, + {"ISO-8859-11:1997", 0, 96, iso_8859_11}, + {"ISO-8859-13:1998", 0, 96, iso_8859_13}, + {"ISO-8859-14:1998", 0, 96, iso_8859_14}, {"ISO-8859-15:1998", 0, 96, iso_8859_15}, + {"ISO-8859-16:2001", 0, 96, iso_8859_16}, {"UTF-8", CP_UTF8}, @@ -233,36 +249,14 @@ static struct cp_list_item cp_list[] = { {"Win1257 (Baltic)", 1257}, {"Win1258 (Vietnamese)", 1258}, - /* All below here are aliases - First the windows ones. */ - {"Central European (Win1250)", 1250}, - {"Cyrillic (Win1251)", 1251}, - {"Western (Win1252)", 1252}, - {"Greek (Win1253)", 1253}, - {"Turkish (Win1254)", 1254}, - {"Hebrew (Win1255)", 1255}, - {"Arabic (Win1256)", 1256}, - {"Baltic (Win1257)", 1257}, - {"Vietnamese (Win1258)", 1258}, - - {"ROMAN8", 0, 96, roman8}, - {"R8", 0, 96, roman8}, - - /* Note this is Latin ->> */ - {"LATIN0", 0, 96, iso_8859_15}, - {"L0", 0, 96, iso_8859_15}, + {"Win1258 (Vietnamese)", 1258}, + {"CP437", 437}, {"CP819", 28591}, {"CP878", 20866}, - {"L1", 28591}, - {"L2", 28592}, - {"L3", 28593}, - {"L4", 28594}, - {"L5", 28599}, - {"LATIN1", 28591}, - {"LATIN2", 28592}, - {"LATIN3", 28593}, - {"LATIN4", 28594}, - {"LATIN5", 28599}, + + {"Use font encoding", -1}, + {0, 0} }; @@ -273,7 +267,7 @@ void init_ucs_tables(void) int i, j; int used_dtf = 0; char tbuf[256]; - int old_codepage = line_codepage; + for (i = 0; i < 256; i++) tbuf[i] = i; @@ -480,6 +474,9 @@ void luni_send(wchar_t * widebuf, int len) /* UTF is a simple algorithm */ for (p = linebuffer, i = 0; i < len; i++) { wchar_t ch = widebuf[i]; + /* Windows wchar_t is UTF-16 */ + if ((ch&0xF800) == 0xD800) ch = '.'; + if (ch < 0x80) { *p++ = (char) (ch); } else if (ch < 0x800) { @@ -884,6 +881,49 @@ int decode_codepage(char *cp_name) int codepage = -1; CPINFO cpinfo; + if (!*cp_name) { + /* + * Here we select a plausible default code page based on + * the locale the user is in. We wish to select an ISO code + * page or appropriate local default _rather_ than go with + * the Win125* series, because it's more important to have + * CSI and friends enabled by default than the ghastly + * Windows extra quote characters, and because it's more + * likely the user is connecting to a remote server that + * does something Unixy or VMSy and hence standards- + * compliant than that they're connecting back to a Windows + * box using horrible nonstandard charsets. + * + * Accordingly, Robert de Bath suggests a method for + * picking a default character set that runs as follows: + * first call GetACP to get the system's ANSI code page + * identifier, and translate as follows: + * + * 1250 -> ISO 8859-2 + * 1251 -> KOI8-U + * 1252 -> ISO 8859-1 + * 1253 -> ISO 8859-7 + * 1254 -> ISO 8859-9 + * 1255 -> ISO 8859-8 + * 1256 -> ISO 8859-6 + * 1257 -> ISO 8859-4 + * + * and for anything else, choose direct-to-font. + */ + int cp = GetACP(); + switch (cp) { + case 1250: cp_name = "ISO-8859-2"; break; + case 1251: cp_name = "KOI8-U"; break; + case 1252: cp_name = "ISO-8859-1"; break; + case 1253: cp_name = "ISO-8859-7"; break; + case 1254: cp_name = "ISO-8859-9"; break; + case 1255: cp_name = "ISO-8859-8"; break; + case 1256: cp_name = "ISO-8859-6"; break; + case 1257: cp_name = "ISO-8859-4"; break; + /* default: leave it blank, which will select -1, direct->font */ + } + } + if (cp_name && *cp_name) for (cpi = cp_list; cpi->name; cpi++) { s = cp_name; @@ -947,6 +987,12 @@ char *cp_name(int codepage) { struct cp_list_item *cpi, *cpno; static char buf[32]; + + if (codepage == -1) { + sprintf(buf, "Use font encoding"); + return buf; + } + if (codepage > 0 && codepage < 65536) sprintf(buf, "CP%03d", codepage); else