/*
* iso2022s.c - support for ISO-2022 subset encodings.
- *
- * (The `s' suffix on the filename is there to leave `iso2022.c'
- * free for the unlikely event that I ever attempt to implement
- * _full_ ISO-2022 in this library!)
*/
#ifndef ENUM_CHARSETS
static long int iso2022jp_to_ucs(int subcharset, unsigned long bytes)
{
switch (subcharset) {
+ case 1: /* JIS X 0201 bottom half */
+ if (bytes == 0x5C)
+ return 0xA5;
+ else if (bytes == 0x7E)
+ return 0x203E;
+ /* else fall through to ASCII */
case 0: return bytes; /* one-byte ASCII */
- case 1: /* JIS X 0201 half-width katakana */
- if (bytes >= 0x21 && bytes <= 0x5F)
- return bytes + (0xFF61 - 0x21);
- else
- return ERROR;
/* (no break needed since all control paths have returned) */
case 2: return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
((bytes ) & 0xFF) - 0x21);
*subcharset = 0;
*bytes = ucs;
return 1;
- } else if (ucs >= 0xFF61 && ucs <= 0xFF9F) {
+ } else if (ucs == 0xA5 || ucs == 0x203E) {
*subcharset = 1;
- *bytes = ucs - (0xFF61 - 0x21);
+ *bytes = (ucs == 0xA5 ? 0x5C : 0x7E);
return 1;
} else if (unicode_to_jisx0208(ucs, &r, &c)) {
*subcharset = 2;
SEQ("\033-H", 1, CTEXT_ISO8859_8),
SEQ("\033-L", 1, CTEXT_ISO8859_5),
SEQ("\033-M", 1, CTEXT_ISO8859_9),
+
+ /*
+ * Cross-testing against Xutf8TextListToTextProperty() turns up
+ * some additional character sets and ISO 2022 features
+ * supported by that and not by us:
+ *
+ * - Single-byte right-hand-half character sets `ESC - f',
+ * `ESC - T' and `ESC - Y'.
+ *
+ * - A really horrifying mechanism used to escape completely
+ * from the ISO 2022 framework: ESC % / <length>
+ * <charset-name> <text>. Xutf8* uses this to encode
+ * "iso8859-14", "iso8859-15" and "big5-0".
+ * * This mechanism is particularly nasty because we can't
+ * efficiently encode it on the fly! It requires that the
+ * length of the text encoded in the foreign charset is
+ * given _before_ the text in question, so if we're
+ * receiving one character at a time we simply can't look
+ * ahead and so we would have to encode each individual
+ * character in a separate one of these sequences.
+ *
+ * - ESC % G and ESC % @ to shift to and from UTF-8 mode, as a
+ * last resort for anything we still don't support.
+ * * Interestingly, ctext.ps actually _disallows_ this: it
+ * says that the above extension mechanism is the only
+ * one permitted. Ho hum.
+ */
};
static const struct iso2022 ctext = {
ctext_escapes, lenof(ctext_escapes),