-/*
- * The COMPOUND_TEXT encoding used in X selections. Defined by the
- * X consortium.
- *
- * This encoding has quite a few sub-charsets. The order I assign
- * to them here is given in an enum.
- */
-enum {
- /* This must match the bytes-per-character string given below. */
- CTEXT_ASCII,
- CTEXT_JISX0201_LEFT,
- CTEXT_JISX0201_RIGHT,
- CTEXT_ISO8859_1,
- CTEXT_ISO8859_2,
- CTEXT_ISO8859_3,
- CTEXT_ISO8859_4,
- CTEXT_ISO8859_5,
- CTEXT_ISO8859_6,
- CTEXT_ISO8859_7,
- CTEXT_ISO8859_8,
- CTEXT_ISO8859_9,
- CTEXT_GB2312,
- CTEXT_KSC5601,
- CTEXT_JISX0208,
- CTEXT_JISX0212
-};
-static long int ctext_to_ucs(int subcharset, unsigned long bytes)
-{
- switch (subcharset) {
- case CTEXT_ASCII: return bytes; /* one-byte ASCII */
- case CTEXT_JISX0201_LEFT: /* ASCII with yen and overline */
- return sbcs_to_unicode(&sbcsdata_CS_JISX0201, bytes & 0x7F);
- case CTEXT_JISX0201_RIGHT: /* JIS X 0201 half-width katakana */
- return sbcs_to_unicode(&sbcsdata_CS_JISX0201, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_1:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_1, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_2:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_2, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_3:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_3, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_4:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_4, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_5:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_5, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_6:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_6, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_7:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_7, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_8:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_8, (bytes & 0x7F) | 0x80);
- case CTEXT_ISO8859_9:
- return sbcs_to_unicode(&sbcsdata_CS_ISO8859_9, (bytes & 0x7F) | 0x80);
- case CTEXT_GB2312:
- return gb2312_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
- ((bytes ) & 0xFF) - 0x21);
- case CTEXT_KSC5601:
- return ksx1001_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
- ((bytes ) & 0xFF) - 0x21);
- case CTEXT_JISX0208:
- return jisx0208_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
- ((bytes ) & 0xFF) - 0x21);
- case CTEXT_JISX0212:
- return jisx0212_to_unicode(((bytes >> 8) & 0xFF) - 0x21,
- ((bytes ) & 0xFF) - 0x21);
- default: return ERROR;
- }
-}
-static int ctext_from_ucs(long int ucs, int *subcharset, unsigned long *bytes)
-{
- int r, c;
- if (ucs < 0x80) {
- *subcharset = CTEXT_ASCII;
- *bytes = ucs;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_1, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_1;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_2, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_2;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_3, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_3;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_4, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_4;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_5, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_5;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_6, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_6;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_7, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_7;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_8, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_8;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_ISO8859_9, ucs)) != ERROR) {
- *subcharset = CTEXT_ISO8859_9;
- *bytes = c - 0x80;
- return 1;
- } else if ((c = sbcs_from_unicode(&sbcsdata_CS_JISX0201, ucs)) != ERROR) {
- if (c < 0x80) {
- *subcharset = CTEXT_JISX0201_LEFT;
- } else {
- *subcharset = CTEXT_JISX0201_RIGHT;
- c -= 0x80;
- }
- *bytes = c;
- return 1;
- } else if (unicode_to_gb2312(ucs, &r, &c)) {
- *subcharset = CTEXT_GB2312;
- *bytes = ((r+0x21) << 8) | (c+0x21);
- return 1;
- } else if (unicode_to_ksx1001(ucs, &r, &c)) {
- *subcharset = CTEXT_KSC5601;
- *bytes = ((r+0x21) << 8) | (c+0x21);
- return 1;
- } else if (unicode_to_jisx0208(ucs, &r, &c)) {
- *subcharset = CTEXT_JISX0208;
- *bytes = ((r+0x21) << 8) | (c+0x21);
- return 1;
- } else if (unicode_to_jisx0212(ucs, &r, &c)) {
- *subcharset = CTEXT_JISX0212;
- *bytes = ((r+0x21) << 8) | (c+0x21);
- return 1;
- } else {
- return 0;
- }
-}
-#define SEQ(str,cont,cs) \
- {str,~(63<<(6*(((cont)&~RO)))),(cs)<<(6*(((cont)&~RO))),(cont),(cs)}
-/*
- * Compound text defines restrictions on which container can take
- * which character sets. Things labelled `left half of' can only go
- * in GL; things labelled `right half of' can only go in GR; and 96
- * or 96^n character sets only _fit_ in GR. Thus:
- * - ASCII can only go in GL since it is the left half of 8859-*.
- * - All the 8859 sets can only go in GR.
- * - JISX0201 left is GL only; JISX0201 right is GR only.
- * - The three multibyte sets (GB2312, JISX0208, KSC5601) can go
- * in either; we prefer GR where possible since this leads to a
- * more compact EUC-like encoding.
- */
-static struct iso2022_escape ctext_escapes[] = {
- SEQ("\033$(A", 0|RO, CTEXT_GB2312),
- SEQ("\033$(B", 0|RO, CTEXT_JISX0208),
- SEQ("\033$(C", 0|RO, CTEXT_KSC5601),
- SEQ("\033$(D", 0|RO, CTEXT_JISX0212),
- SEQ("\033$)A", 1, CTEXT_GB2312),
- SEQ("\033$)B", 1, CTEXT_JISX0208),
- SEQ("\033$)C", 1, CTEXT_KSC5601),
- SEQ("\033$)D", 1, CTEXT_JISX0212),
- SEQ("\033(B", 0, CTEXT_ASCII),
- SEQ("\033(J", 0, CTEXT_JISX0201_LEFT),
- SEQ("\033)I", 1, CTEXT_JISX0201_RIGHT),
- SEQ("\033-A", 1, CTEXT_ISO8859_1),
- SEQ("\033-B", 1, CTEXT_ISO8859_2),
- SEQ("\033-C", 1, CTEXT_ISO8859_3),
- SEQ("\033-D", 1, CTEXT_ISO8859_4),
- SEQ("\033-F", 1, CTEXT_ISO8859_7),
- SEQ("\033-G", 1, CTEXT_ISO8859_6),
- SEQ("\033-H", 1, CTEXT_ISO8859_8),
- SEQ("\033-L", 1, CTEXT_ISO8859_5),
- SEQ("\033-M", 1, CTEXT_ISO8859_9),
-};
-static struct iso2022 ctext = {
- ctext_escapes, lenof(ctext_escapes),
- "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\2\2", /* must match the enum above */
- "", 0x80000000 | (CTEXT_ASCII<<0) | (CTEXT_ISO8859_1<<6), "", TRUE,
- ctext_to_ucs, ctext_from_ucs
-};
-const charset_spec charset_CS_CTEXT = {
- CS_CTEXT, read_iso2022s, write_iso2022s, &ctext
-};
-