+/*
+ * The COMPOUND_TEXT encoding used in X selections. Defined by the
+ * X consortium.
+ *
+ * This encoding has quite a few sub-charsets. The order I assign
+ * to them here is given in an enum.
+ */
+enum {
+ /* This must match the bytes-per-character string given below. */
+ CTEXT_ASCII,
+ CTEXT_JISX0201_LEFT,
+ CTEXT_JISX0201_RIGHT,
+ CTEXT_ISO8859_1,
+ CTEXT_ISO8859_2,
+ CTEXT_ISO8859_3,
+ CTEXT_ISO8859_4,
+ CTEXT_ISO8859_5,
+ CTEXT_ISO8859_6,
+ CTEXT_ISO8859_7,
+ CTEXT_ISO8859_8,
+ CTEXT_ISO8859_9,
+ CTEXT_GB2312,
+ CTEXT_KSC5601,
+ CTEXT_JISX0208
+};
+static long int ctext_to_ucs(int subcharset, unsigned long bytes)
+{
+ switch (subcharset) {
+ case CTEXT_ASCII: return bytes; /* one-byte ASCII */
+ case CTEXT_JISX0201_LEFT: /* ASCII with yen and overline */
+ return sbcs_to_unicode(&sbcsdata_CS_JISX0201, bytes & 0x7F);
+ case CTEXT_JISX0201_RIGHT: /* JIS X 0201 half-width katakana */
+ return sbcs_to_unicode(&sbcsdata_CS_JISX0201, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_1:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_1, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_2:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_2, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_3:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_3, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_4:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_4, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_5:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_5, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_6:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_6, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_7:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_7, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_8:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_8, (bytes & 0x7F) | 0x80);
+ case CTEXT_ISO8859_9:
+ return sbcs_to_unicode(&sbcsdata_CS_ISO8859_9, (bytes & 0x7F) | 0x80);
+ case CTEXT_GB2312:
+ return gb2312_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
+ ((bytes ) & 0xFF) - 0x21);
+ case CTEXT_KSC5601:
+ return ksx1001_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
+ ((bytes ) & 0xFF) - 0x21);
+ case CTEXT_JISX0208:
+ return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
+ ((bytes ) & 0xFF) - 0x21);
+ default: return ERROR;
+ }
+}
+static int ctext_from_ucs(long int ucs, int *subcharset, unsigned long *bytes)
+{
+ int r, c;
+ if (ucs < 0x80) {
+ *subcharset = CTEXT_ASCII;
+ *bytes = ucs;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_1, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_1;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_2, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_2;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_3, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_3;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_4, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_4;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_5, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_5;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_6, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_6;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_7, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_7;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_8, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_8;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_9, ucs)) != ERROR) {
+ *subcharset = CTEXT_ISO8859_9;
+ *bytes = c - 0x80;
+ return 1;
+ } else if ((c = sbcs_from_unicode(&sbcsdata_CS_JISX0201, ucs)) != ERROR) {
+ if (c < 0x80) {
+ *subcharset = CTEXT_JISX0201_LEFT;
+ } else {
+ *subcharset = CTEXT_JISX0201_RIGHT;
+ c -= 0x80;
+ }
+ *bytes = c;
+ return 1;
+ } else if (unicode_to_gb2312(ucs, &r, &c)) {
+ *subcharset = CTEXT_GB2312;
+ *bytes = ((r+0x21) << 8) | (c+0x21);
+ return 1;
+ } else if (unicode_to_ksx1001(ucs, &r, &c)) {
+ *subcharset = CTEXT_KSC5601;
+ *bytes = ((r+0x21) << 8) | (c+0x21);
+ return 1;
+ } else if (unicode_to_jisx0208(ucs, &r, &c)) {
+ *subcharset = CTEXT_JISX0208;
+ *bytes = ((r+0x21) << 8) | (c+0x21);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+#define SEQ(str,cont,cs) \
+ {str,~(63<<(6*((cont&~RO)))),(cs)<<(6*((cont&~RO))),(cont),(cs)}
+/*
+ * Compound text defines restrictions on which container can take
+ * which character sets. Things labelled `left half of' can only go
+ * in GL; things labelled `right half of' can only go in GR; and 96
+ * or 96^n character sets only _fit_ in GR. Thus:
+ * - ASCII can only go in GL since it is the left half of 8859-*.
+ * - All the 8859 sets can only go in GR.
+ * - JISX0201 left is GL only; JISX0201 right is GR only.
+ * - The three multibyte sets (GB2312, JISX0208, KSC5601) can go
+ * in either; we prefer GR where possible since this leads to a
+ * more compact EUC-like encoding.
+ */
+static struct iso2022_escape ctext_escapes[] = {
+ SEQ("\033$(A", 0|RO, CTEXT_GB2312),
+ SEQ("\033$(B", 0|RO, CTEXT_JISX0208),
+ SEQ("\033$(C", 0|RO, CTEXT_KSC5601),
+ SEQ("\033$)A", 1, CTEXT_GB2312),
+ SEQ("\033$)B", 1, CTEXT_JISX0208),
+ SEQ("\033$)C", 1, CTEXT_KSC5601),
+ SEQ("\033(B", 0, CTEXT_ASCII),
+ SEQ("\033(J", 0, CTEXT_JISX0201_LEFT),
+ SEQ("\033-A", 1, CTEXT_ISO8859_1),
+ SEQ("\033-B", 1, CTEXT_ISO8859_2),
+ SEQ("\033-C", 1, CTEXT_ISO8859_3),
+ SEQ("\033-D", 1, CTEXT_ISO8859_4),
+ SEQ("\033-F", 1, CTEXT_ISO8859_7),
+ SEQ("\033-G", 1, CTEXT_ISO8859_6),
+ SEQ("\033-H", 1, CTEXT_ISO8859_8),
+ SEQ("\033)I", 1, CTEXT_JISX0201_RIGHT),
+ SEQ("\033-L", 1, CTEXT_ISO8859_5),
+ SEQ("\033-M", 1, CTEXT_ISO8859_9),
+};
+static struct iso2022 ctext = {
+ ctext_escapes, lenof(ctext_escapes),
+ "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\2", /* must match the enum above */
+ "", 0x80000000 | (CTEXT_ASCII<<0) | (CTEXT_ASCII<<6), "", TRUE,
+ ctext_to_ucs, ctext_from_ucs
+};
+const charset_spec charset_CS_CTEXT = {
+ CS_CTEXT, read_iso2022s, write_iso2022s, &ctext
+};
+