X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/charset/blobdiff_plain/8bade1133caa9fbcecdeed2608dcbb4918180a62..c6cef4fab2c8d40a6256ce9f0c231a97a9f236e4:/iso2022s.c diff --git a/iso2022s.c b/iso2022s.c index 3b01529..e48c885 100644 --- a/iso2022s.c +++ b/iso2022s.c @@ -1,9 +1,5 @@ /* * iso2022s.c - support for ISO-2022 subset encodings. - * - * (The `s' suffix on the filename is there to leave `iso2022.c' - * free for the unlikely event that I ever attempt to implement - * _full_ ISO-2022 in this library!) */ #ifndef ENUM_CHARSETS @@ -450,12 +446,13 @@ static int write_iso2022s(charset_spec const *charset, long int input_chr, static long int iso2022jp_to_ucs(int subcharset, unsigned long bytes) { switch (subcharset) { + case 1: /* JIS X 0201 bottom half */ + if (bytes == 0x5C) + return 0xA5; + else if (bytes == 0x7E) + return 0x203E; + /* else fall through to ASCII */ case 0: return bytes; /* one-byte ASCII */ - case 1: /* JIS X 0201 half-width katakana */ - if (bytes >= 0x21 && bytes <= 0x5F) - return bytes + (0xFF61 - 0x21); - else - return ERROR; /* (no break needed since all control paths have returned) */ case 2: return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21, ((bytes ) & 0xFF) - 0x21); @@ -470,9 +467,9 @@ static int iso2022jp_from_ucs(long int ucs, int *subcharset, *subcharset = 0; *bytes = ucs; return 1; - } else if (ucs >= 0xFF61 && ucs <= 0xFF9F) { + } else if (ucs == 0xA5 || ucs == 0x203E) { *subcharset = 1; - *bytes = ucs - (0xFF61 - 0x21); + *bytes = (ucs == 0xA5 ? 0x5C : 0x7E); return 1; } else if (unicode_to_jisx0208(ucs, &r, &c)) { *subcharset = 2; @@ -713,6 +710,33 @@ static const struct iso2022_escape ctext_escapes[] = { SEQ("\033-H", 1, CTEXT_ISO8859_8), SEQ("\033-L", 1, CTEXT_ISO8859_5), SEQ("\033-M", 1, CTEXT_ISO8859_9), + + /* + * Cross-testing against Xutf8TextListToTextProperty() turns up + * some additional character sets and ISO 2022 features + * supported by that and not by us: + * + * - Single-byte right-hand-half character sets `ESC - f', + * `ESC - T' and `ESC - Y'. + * + * - A really horrifying mechanism used to escape completely + * from the ISO 2022 framework: ESC % / + * . Xutf8* uses this to encode + * "iso8859-14", "iso8859-15" and "big5-0". + * * This mechanism is particularly nasty because we can't + * efficiently encode it on the fly! It requires that the + * length of the text encoded in the foreign charset is + * given _before_ the text in question, so if we're + * receiving one character at a time we simply can't look + * ahead and so we would have to encode each individual + * character in a separate one of these sequences. + * + * - ESC % G and ESC % @ to shift to and from UTF-8 mode, as a + * last resort for anything we still don't support. + * * Interestingly, ctext.ps actually _disallows_ this: it + * says that the above extension mechanism is the only + * one permitted. Ho hum. + */ }; static const struct iso2022 ctext = { ctext_escapes, lenof(ctext_escapes),