2 * mimeenc.c - translate our internal character set codes to and
3 * from MIME standard character-set names.
16 * Most of these names are taken from
18 * http://www.iana.org/assignments/character-sets
20 * Where multiple encoding names map to the same encoding id
21 * (such as the variety of aliases for ISO-8859-1), the first
22 * is considered canonical and will be returned when
23 * translating the id to a string.
25 * I also list here a few names which aren't in the above web
26 * page, but which I've seen in the wild in real mail. These
27 * are marked with a comment saying WILD.
30 { "US-ASCII", CS_ASCII
},
31 { "ANSI_X3.4-1968", CS_ASCII
},
32 { "iso-ir-6", CS_ASCII
},
33 { "ANSI_X3.4-1986", CS_ASCII
},
34 { "ISO_646.irv:1991", CS_ASCII
},
35 { "ASCII", CS_ASCII
},
36 { "ISO646-US", CS_ASCII
},
38 { "IBM367", CS_ASCII
},
39 { "cp367", CS_ASCII
},
40 { "csASCII", CS_ASCII
},
41 { "646", CS_ASCII
}, /* WILD */
43 { "BS_4730", CS_BS4730
},
44 { "iso-ir-4", CS_BS4730
},
45 { "ISO646-GB", CS_BS4730
},
48 { "csISO4UnitedKingdom", CS_BS4730
},
50 { "ISO-8859-1", CS_ISO8859_1
},
51 { "ISO8859-1", CS_ISO8859_1
}, /* WILD */
52 { "iso-ir-100", CS_ISO8859_1
},
53 { "ISO_8859-1", CS_ISO8859_1
},
54 { "ISO_8859-1:1987", CS_ISO8859_1
},
55 { "latin1", CS_ISO8859_1
},
56 { "l1", CS_ISO8859_1
},
57 { "IBM819", CS_ISO8859_1
},
58 { "CP819", CS_ISO8859_1
},
59 { "csISOLatin1", CS_ISO8859_1
},
61 { "ISO-8859-2", CS_ISO8859_2
},
62 { "ISO8859-2", CS_ISO8859_2
}, /* WILD */
63 { "ISO_8859-2:1987", CS_ISO8859_2
},
64 { "iso-ir-101", CS_ISO8859_2
},
65 { "ISO_8859-2", CS_ISO8859_2
},
66 { "latin2", CS_ISO8859_2
},
67 { "l2", CS_ISO8859_2
},
68 { "csISOLatin2", CS_ISO8859_2
},
70 { "ISO-8859-3", CS_ISO8859_3
},
71 { "ISO8859-3", CS_ISO8859_3
}, /* WILD */
72 { "ISO_8859-3:1988", CS_ISO8859_3
},
73 { "iso-ir-109", CS_ISO8859_3
},
74 { "ISO_8859-3", CS_ISO8859_3
},
75 { "latin3", CS_ISO8859_3
},
76 { "l3", CS_ISO8859_3
},
77 { "csISOLatin3", CS_ISO8859_3
},
79 { "ISO-8859-4", CS_ISO8859_4
},
80 { "ISO8859-4", CS_ISO8859_4
}, /* WILD */
81 { "ISO_8859-4:1988", CS_ISO8859_4
},
82 { "iso-ir-110", CS_ISO8859_4
},
83 { "ISO_8859-4", CS_ISO8859_4
},
84 { "latin4", CS_ISO8859_4
},
85 { "l4", CS_ISO8859_4
},
86 { "csISOLatin4", CS_ISO8859_4
},
88 { "ISO-8859-5", CS_ISO8859_5
},
89 { "ISO8859-5", CS_ISO8859_5
}, /* WILD */
90 { "ISO_8859-5:1988", CS_ISO8859_5
},
91 { "iso-ir-144", CS_ISO8859_5
},
92 { "ISO_8859-5", CS_ISO8859_5
},
93 { "cyrillic", CS_ISO8859_5
},
94 { "csISOLatinCyrillic", CS_ISO8859_5
},
96 { "ISO-8859-6", CS_ISO8859_6
},
97 { "ISO8859-6", CS_ISO8859_6
}, /* WILD */
98 { "ISO_8859-6:1987", CS_ISO8859_6
},
99 { "iso-ir-127", CS_ISO8859_6
},
100 { "ISO_8859-6", CS_ISO8859_6
},
101 { "ECMA-114", CS_ISO8859_6
},
102 { "ASMO-708", CS_ISO8859_6
},
103 { "arabic", CS_ISO8859_6
},
104 { "csISOLatinArabic", CS_ISO8859_6
},
106 { "ISO-8859-7", CS_ISO8859_7
},
107 { "ISO8859-7", CS_ISO8859_7
}, /* WILD */
108 { "ISO_8859-7:1987", CS_ISO8859_7
},
109 { "iso-ir-126", CS_ISO8859_7
},
110 { "ISO_8859-7", CS_ISO8859_7
},
111 { "ELOT_928", CS_ISO8859_7
},
112 { "ECMA-118", CS_ISO8859_7
},
113 { "greek", CS_ISO8859_7
},
114 { "greek8", CS_ISO8859_7
},
115 { "csISOLatinGreek", CS_ISO8859_7
},
117 { "ISO-8859-8", CS_ISO8859_8
},
118 { "ISO8859-8", CS_ISO8859_8
}, /* WILD */
119 { "ISO_8859-8:1988", CS_ISO8859_8
},
120 { "iso-ir-138", CS_ISO8859_8
},
121 { "ISO_8859-8", CS_ISO8859_8
},
122 { "hebrew", CS_ISO8859_8
},
123 { "csISOLatinHebrew", CS_ISO8859_8
},
125 { "ISO-8859-9", CS_ISO8859_9
},
126 { "ISO8859-9", CS_ISO8859_9
}, /* WILD */
127 { "ISO_8859-9:1989", CS_ISO8859_9
},
128 { "iso-ir-148", CS_ISO8859_9
},
129 { "ISO_8859-9", CS_ISO8859_9
},
130 { "latin5", CS_ISO8859_9
},
131 { "l5", CS_ISO8859_9
},
132 { "csISOLatin5", CS_ISO8859_9
},
134 { "ISO-8859-10", CS_ISO8859_10
},
135 { "ISO8859-10", CS_ISO8859_10
}, /* WILD */
136 { "iso-ir-157", CS_ISO8859_10
},
137 { "l6", CS_ISO8859_10
},
138 { "ISO_8859-10:1992", CS_ISO8859_10
},
139 { "csISOLatin6", CS_ISO8859_10
},
140 { "latin6", CS_ISO8859_10
},
142 { "TIS-620", CS_ISO8859_11
},
144 { "ISO-8859-13", CS_ISO8859_13
},
145 { "ISO8859-13", CS_ISO8859_13
}, /* WILD */
147 { "ISO-8859-14", CS_ISO8859_14
},
148 { "ISO8859-14", CS_ISO8859_14
}, /* WILD */
149 { "iso-ir-199", CS_ISO8859_14
},
150 { "ISO_8859-14:1998", CS_ISO8859_14
},
151 { "ISO_8859-14", CS_ISO8859_14
},
152 { "latin8", CS_ISO8859_14
},
153 { "iso-celtic", CS_ISO8859_14
},
154 { "l8", CS_ISO8859_14
},
156 { "ISO-8859-15", CS_ISO8859_15
},
157 { "ISO8859-15", CS_ISO8859_15
}, /* WILD */
158 { "ISO_8859-15", CS_ISO8859_15
},
159 { "Latin-9", CS_ISO8859_15
},
161 { "ISO-8859-16", CS_ISO8859_16
},
162 { "ISO8859-16", CS_ISO8859_16
}, /* WILD */
163 { "iso-ir-226", CS_ISO8859_16
},
164 { "ISO_8859-16", CS_ISO8859_16
},
165 { "ISO_8859-16:2001", CS_ISO8859_16
},
166 { "latin10", CS_ISO8859_16
},
167 { "l10", CS_ISO8859_16
},
169 { "IBM437", CS_CP437
},
170 { "cp437", CS_CP437
},
172 { "csPC8CodePage437", CS_CP437
},
174 { "IBM850", CS_CP850
},
175 { "cp850", CS_CP850
},
177 { "csPC850Multilingual", CS_CP850
},
179 { "IBM866", CS_CP866
},
180 { "cp866", CS_CP866
},
182 { "csIBM866", CS_CP866
},
184 { "windows-874", CS_CP874
}, /* WILD */
186 { "windows-1250", CS_CP1250
},
187 { "win-1250", CS_CP1250
}, /* WILD */
189 { "windows-1251", CS_CP1251
},
190 { "win-1251", CS_CP1251
}, /* WILD */
192 { "windows-1252", CS_CP1252
},
193 { "win-1252", CS_CP1252
}, /* WILD */
195 { "windows-1253", CS_CP1253
},
196 { "win-1253", CS_CP1253
}, /* WILD */
198 { "windows-1254", CS_CP1254
},
199 { "win-1254", CS_CP1254
}, /* WILD */
201 { "windows-1255", CS_CP1255
},
202 { "win-1255", CS_CP1255
}, /* WILD */
204 { "windows-1256", CS_CP1256
},
205 { "win-1256", CS_CP1256
}, /* WILD */
207 { "windows-1257", CS_CP1257
},
208 { "win-1257", CS_CP1257
}, /* WILD */
210 { "windows-1258", CS_CP1258
},
211 { "win-1258", CS_CP1258
}, /* WILD */
213 { "KOI8-R", CS_KOI8_R
},
214 { "csKOI8R", CS_KOI8_R
},
216 { "KOI8-U", CS_KOI8_U
},
218 { "KOI8-RU", CS_KOI8_RU
}, /* WILD */
220 { "JIS_X0201", CS_JISX0201
},
221 { "X0201", CS_JISX0201
},
222 { "csHalfWidthKatakana", CS_JISX0201
},
224 { "macintosh", CS_MAC_ROMAN_OLD
},
225 { "mac", CS_MAC_ROMAN_OLD
},
226 { "csMacintosh", CS_MAC_ROMAN_OLD
},
228 { "VISCII", CS_VISCII
},
229 { "csVISCII", CS_VISCII
},
231 { "hp-roman8", CS_HP_ROMAN8
},
232 { "roman8", CS_HP_ROMAN8
},
233 { "r8", CS_HP_ROMAN8
},
234 { "csHPRoman8", CS_HP_ROMAN8
},
236 { "DEC-MCS", CS_DEC_MCS
},
237 { "dec", CS_DEC_MCS
},
238 { "csDECMCS", CS_DEC_MCS
},
240 { "UTF-8", CS_UTF8
},
242 { "UTF-7", CS_UTF7
},
243 { "UNICODE-1-1-UTF-7", CS_UTF7
},
244 { "csUnicode11UTF7", CS_UTF7
},
247 * Quite why the EUC-CN encoding is known to MIME by the name
248 * of its underlying character set, I'm not entirely sure, but
251 { "GB2312", CS_EUC_CN
},
252 { "csGB2312", CS_EUC_CN
},
254 { "EUC-KR", CS_EUC_KR
},
255 { "csEUCKR", CS_EUC_KR
},
257 { "EUC-JP", CS_EUC_JP
},
258 { "csEUCPkdFmtJapanese", CS_EUC_JP
},
259 { "Extended_UNIX_Code_Packed_Format_for_Japanese", CS_EUC_JP
},
261 { "ISO-2022-JP", CS_ISO2022_JP
},
262 { "csISO2022JP", CS_ISO2022_JP
},
264 { "ISO-2022-KR", CS_ISO2022_KR
},
265 { "csISO2022KR", CS_ISO2022_KR
},
268 { "csBig5", CS_BIG5
},
269 { "Big-5", CS_BIG5
}, /* WILD */
270 { "ChineseBig5", CS_BIG5
}, /* WILD */
272 { "Shift_JIS", CS_SHIFT_JIS
},
273 { "MS_Kanji", CS_SHIFT_JIS
},
274 { "csShiftJIS", CS_SHIFT_JIS
},
276 { "HZ-GB-2312", CS_HZ
},
278 { "UTF-16BE", CS_UTF16BE
},
280 { "UTF-16LE", CS_UTF16LE
},
282 { "UTF-16", CS_UTF16
},
285 * This bit is fiddly and possibly technically incorrect; but
286 * rumour has it that the KSC 5601 encoding is a subset of
287 * Microsoft CP949, and that MS products tend to announce CP949
288 * as KSC 5601 in much the same way they seem willing to
289 * announce CP1252 as its subset ISO 8859-1. So I cheat
290 * shamelessly here by letting KSC 5601 map to CP949.
292 { "KS_C_5601-1987", CS_CP949
},
293 { "iso-ir-149", CS_CP949
},
294 { "KS_C_5601-1989", CS_CP949
},
295 { "KSC_5601", CS_CP949
},
296 { "korean", CS_CP949
},
297 { "csKSC56011987", CS_CP949
},
298 { "KSC5601", CS_CP949
}, /* WILD */
301 { "ISO-2022-JP-2", CS_ISO2022_JP_2
},
302 { "csISO2022JP2", CS_ISO2022_JP_2
},
306 const char *charset_to_mimeenc(int charset
)
310 for (i
= 0; i
< (int)lenof(mimeencs
); i
++)
311 if (charset
== mimeencs
[i
].charset
)
312 return mimeencs
[i
].name
;
314 return NULL
; /* not found */
317 int charset_from_mimeenc(const char *name
)
321 for (i
= 0; i
< (int)lenof(mimeencs
); i
++) {
324 q
= mimeencs
[i
].name
;
326 if (tolower((unsigned char)*p
) != tolower((unsigned char)*q
))
331 return mimeencs
[i
].charset
;
334 return CS_NONE
; /* not found */