Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / emacsenc.c
CommitLineData
32361bda 1/*
2 * emacsenc.c - translate our internal character set codes to and from
3 * GNU Emacs coding system symbols. Derived from running M-x
4 * list-coding-systems in Emacs 21.3.
5 *
6 */
7
8#include <ctype.h>
9#include "charset.h"
10#include "internal.h"
11
12static const struct {
13 const char *name;
14 int charset;
15} emacsencs[] = {
16 /*
17 * Where multiple encoding names map to the same encoding id
18 * (such as iso-latin-1 and iso-8859-1), the first is considered
19 * canonical and will be returned when translating the id to a
20 * string.
21 */
22 { "us-ascii", CS_ASCII },
23 { "iso-latin-9", CS_ISO8859_15 },
24 { "iso-8859-15", CS_ISO8859_15 },
25 { "latin-9", CS_ISO8859_15 },
26 { "latin-0", CS_ISO8859_15 },
27 { "iso-latin-1", CS_ISO8859_1 },
28 { "iso-8859-1", CS_ISO8859_1 },
29 { "latin-1", CS_ISO8859_1 },
30 { "iso-latin-2", CS_ISO8859_2 },
31 { "iso-8859-2", CS_ISO8859_2 },
32 { "latin-2", CS_ISO8859_2 },
33 { "iso-latin-3", CS_ISO8859_3 },
34 { "iso-8859-3", CS_ISO8859_3 },
35 { "latin-3", CS_ISO8859_3 },
36 { "iso-latin-4", CS_ISO8859_4 },
37 { "iso-8859-4", CS_ISO8859_4 },
38 { "latin-4", CS_ISO8859_4 },
39 { "cyrillic-iso-8bit", CS_ISO8859_5 },
40 { "iso-8859-5", CS_ISO8859_5 },
41 { "greek-iso-8bit", CS_ISO8859_7 },
42 { "iso-8859-7", CS_ISO8859_7 },
43 { "hebrew-iso-8bit", CS_ISO8859_8 },
44 { "iso-8859-8", CS_ISO8859_8 },
45 { "iso-8859-8-e", CS_ISO8859_8 },
46 { "iso-8859-8-i", CS_ISO8859_8 },
47 { "iso-latin-5", CS_ISO8859_9 },
48 { "iso-8859-9", CS_ISO8859_9 },
49 { "latin-5", CS_ISO8859_9 },
50 { "chinese-big5", CS_BIG5 },
51 { "big5", CS_BIG5 },
52 { "cn-big5", CS_BIG5 },
53 { "cp437", CS_CP437 },
54 { "cp850", CS_CP850 },
55 { "cp866", CS_CP866 },
56 { "cp1250", CS_CP1250 },
57 { "cp1251", CS_CP1251 },
58 { "cp1253", CS_CP1253 },
59 { "cp1257", CS_CP1257 },
60 { "japanese-iso-8bit", CS_EUC_JP },
61 { "euc-japan-1990", CS_EUC_JP },
62 { "euc-japan", CS_EUC_JP },
63 { "euc-jp", CS_EUC_JP },
64 { "iso-2022-jp", CS_ISO2022_JP },
65 { "junet", CS_ISO2022_JP },
66 { "korean-iso-8bit", CS_EUC_KR },
67 { "euc-kr", CS_EUC_KR },
68 { "euc-korea", CS_EUC_KR },
69 { "iso-2022-kr", CS_ISO2022_KR },
70 { "korean-iso-7bit-lock", CS_ISO2022_KR },
71 { "mac-roman", CS_MAC_ROMAN },
72 { "cyrillic-koi8", CS_KOI8_R },
73 { "koi8-r", CS_KOI8_R },
74 { "koi8", CS_KOI8_R },
75 { "japanese-shift-jis", CS_SHIFT_JIS },
76 { "shift_jis", CS_SHIFT_JIS },
77 { "sjis", CS_SHIFT_JIS },
78 { "thai-tis620", CS_ISO8859_11 },
79 { "th-tis620", CS_ISO8859_11 },
80 { "tis620", CS_ISO8859_11 },
81 { "tis-620", CS_ISO8859_11 },
82 { "mule-utf-16-be", CS_UTF16BE },
83 { "utf-16-be", CS_UTF16BE },
84 { "mule-utf-16-le", CS_UTF16LE },
85 { "utf-16-le", CS_UTF16LE },
86 { "mule-utf-8", CS_UTF8 },
87 { "utf-8", CS_UTF8 },
88 { "vietnamese-viscii", CS_VISCII },
89 { "viscii", CS_VISCII },
90 { "iso-latin-8", CS_ISO8859_14 },
91 { "iso-8859-14", CS_ISO8859_14 },
92 { "latin-8", CS_ISO8859_14 },
93 { "compound-text", CS_CTEXT },
94 { "x-ctext", CS_CTEXT },
95 { "ctext", CS_CTEXT },
96 { "chinese-hz", CS_HZ },
97 { "hz-gb-2312", CS_HZ },
98 { "hz", CS_HZ },
99};
100
101const char *charset_to_emacsenc(int charset)
102{
103 int i;
104
105 for (i = 0; i < (int)lenof(emacsencs); i++)
106 if (charset == emacsencs[i].charset)
107 return emacsencs[i].name;
108
109 return NULL; /* not found */
110}
111
112int charset_from_emacsenc(const char *name)
113{
114 int i;
115
116 for (i = 0; i < (int)lenof(emacsencs); i++) {
117 const char *p, *q;
118 p = name;
119 q = emacsencs[i].name;
120 while (*p || *q) {
273ef1a1 121 if (tolower((unsigned char)*p) != tolower((unsigned char)*q))
32361bda 122 break;
123 p++; q++;
124 }
125 if (!*p && !*q)
126 return emacsencs[i].charset;
127 }
128
129 return CS_NONE; /* not found */
130}