* in ASCII order, so that we can narrow down the list as
* necessary.
*/
- struct iso2022_escape *escapes; /* must be sorted in ASCII order! */
+ const struct iso2022_escape *escapes;/* must be sorted in ASCII order! */
int nescapes;
/*
return 0;
}
}
-static struct iso2022_escape iso2022jp_escapes[] = {
+static const struct iso2022_escape iso2022jp_escapes[] = {
{"\033$@", 0xFFFFFFC0, 0x00000002, -1, -1}, /* we ignore this one */
{"\033$B", 0xFFFFFFC0, 0x00000002, 0, 2},
{"\033(B", 0xFFFFFFC0, 0x00000000, 0, 0},
{"\033(J", 0xFFFFFFC0, 0x00000001, 0, 1},
};
-static struct iso2022 iso2022jp = {
+static const struct iso2022 iso2022jp = {
iso2022jp_escapes, lenof(iso2022jp_escapes),
"\1\1\2", "\3", 0x80000000, NULL, FALSE,
iso2022jp_to_ucs, iso2022jp_from_ucs
return 0;
}
}
-static struct iso2022_escape iso2022kr_escapes[] = {
+static const struct iso2022_escape iso2022kr_escapes[] = {
{"\016", 0x8FFFFFFF, 0x10000000, -1, -1},
{"\017", 0x8FFFFFFF, 0x00000000, 0, 0},
{"\033$)C", 0xFFFFF03F, 0x00000040, 1, 1}, /* bits[11:6] <- 1 */
};
-static struct iso2022 iso2022kr = {
+static const struct iso2022 iso2022kr = {
iso2022kr_escapes, lenof(iso2022kr_escapes),
"\1\2", "\2", 0x80000040, "\033$)C", FALSE,
iso2022kr_to_ucs, iso2022kr_from_ucs
CTEXT_ISO8859_9,
CTEXT_GB2312,
CTEXT_KSC5601,
- CTEXT_JISX0208
+ CTEXT_JISX0208,
+ CTEXT_JISX0212
};
static long int ctext_to_ucs(int subcharset, unsigned long bytes)
{
case CTEXT_JISX0208:
return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
((bytes ) & 0xFF) - 0x21);
+ case CTEXT_JISX0212:
+ return jisx0212_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
+ ((bytes ) & 0xFF) - 0x21);
default: return ERROR;
}
}
*subcharset = CTEXT_JISX0208;
*bytes = ((r+0x21) << 8) | (c+0x21);
return 1;
+ } else if (unicode_to_jisx0212(ucs, &r, &c)) {
+ *subcharset = CTEXT_JISX0212;
+ *bytes = ((r+0x21) << 8) | (c+0x21);
+ return 1;
} else {
return 0;
}
}
#define SEQ(str,cont,cs) \
- {str,~(63<<(6*((cont&~RO)))),(cs)<<(6*((cont&~RO))),(cont),(cs)}
+ {str,~(63<<(6*(((cont)&~RO)))),(cs)<<(6*(((cont)&~RO))),(cont),(cs)}
/*
* Compound text defines restrictions on which container can take
* which character sets. Things labelled `left half of' can only go
* in either; we prefer GR where possible since this leads to a
* more compact EUC-like encoding.
*/
-static struct iso2022_escape ctext_escapes[] = {
+static const struct iso2022_escape ctext_escapes[] = {
SEQ("\033$(A", 0|RO, CTEXT_GB2312),
SEQ("\033$(B", 0|RO, CTEXT_JISX0208),
SEQ("\033$(C", 0|RO, CTEXT_KSC5601),
+ SEQ("\033$(D", 0|RO, CTEXT_JISX0212),
SEQ("\033$)A", 1, CTEXT_GB2312),
SEQ("\033$)B", 1, CTEXT_JISX0208),
SEQ("\033$)C", 1, CTEXT_KSC5601),
+ SEQ("\033$)D", 1, CTEXT_JISX0212),
SEQ("\033(B", 0, CTEXT_ASCII),
SEQ("\033(J", 0, CTEXT_JISX0201_LEFT),
SEQ("\033)I", 1, CTEXT_JISX0201_RIGHT),
SEQ("\033-H", 1, CTEXT_ISO8859_8),
SEQ("\033-L", 1, CTEXT_ISO8859_5),
SEQ("\033-M", 1, CTEXT_ISO8859_9),
+
+ /*
+ * Cross-testing against Xutf8TextListToTextProperty() turns up
+ * some additional character sets and ISO 2022 features
+ * supported by that and not by us:
+ *
+ * - Single-byte right-hand-half character sets `ESC - f',
+ * `ESC - T' and `ESC - Y'.
+ *
+ * - A really horrifying mechanism used to escape completely
+ * from the ISO 2022 framework: ESC % / <length>
+ * <charset-name> <text>. Xutf8* uses this to encode
+ * "iso8859-14", "iso8859-15" and "big5-0".
+ * * This mechanism is particularly nasty because we can't
+ * efficiently encode it on the fly! It requires that the
+ * length of the text encoded in the foreign charset is
+ * given _before_ the text in question, so if we're
+ * receiving one character at a time we simply can't look
+ * ahead and so we would have to encode each individual
+ * character in a separate one of these sequences.
+ *
+ * - ESC % G and ESC % @ to shift to and from UTF-8 mode, as a
+ * last resort for anything we still don't support.
+ * * Interestingly, ctext.ps actually _disallows_ this: it
+ * says that the above extension mechanism is the only
+ * one permitted. Ho hum.
+ */
};
-static struct iso2022 ctext = {
+static const struct iso2022 ctext = {
ctext_escapes, lenof(ctext_escapes),
- "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\2", /* must match the enum above */
+ "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\2\2", /* must match the enum above */
"", 0x80000000 | (CTEXT_ASCII<<0) | (CTEXT_ISO8859_1<<6), "", TRUE,
ctext_to_ucs, ctext_from_ucs
};