EUC-TW implementation, plus an explanation of why ISO-2022-CN is difficult.
[sgt/charset] / cstable.c
CommitLineData
01081d4e 1/*
2 * cstable.c - libcharset supporting utility which draws up a map
3 * of the whole Unicode BMP and annotates it with details of which
4 * other character sets each character appears in.
5 *
6 * Note this is not a libcharset _client_; it is part of the
7 * libcharset _package_, using libcharset internals.
8 */
9
10#include "charset.h"
11#include "internal.h"
12#include "sbcsdat.h"
13
14#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
15#include "enum.c"
16#undef ENUM_CHARSET
17static charset_spec const *const cs_table[] = {
18#define ENUM_CHARSET(x) &charset_##x,
19#include "enum.c"
20#undef ENUM_CHARSET
21};
22
3a17f847 23int main(int argc, char **argv)
01081d4e 24{
25 long int c;
3a17f847 26 int verbose = FALSE;
27
28 if (argc > 1 && !strcmp(argv[1], "-v"))
29 verbose = TRUE;
01081d4e 30
298814b6 31 for (c = 0; c < 0x30000; c++) {
3a17f847 32 int i, plane, row, col, chr;
01081d4e 33 char const *sep = "";
34
35 printf("U+%04x:", c);
36
37 /*
38 * Look up in SBCSes.
39 */
40 for (i = 0; i < lenof(cs_table); i++)
41 if (cs_table[i]->read == read_sbcs &&
3a17f847 42 (chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
01081d4e 43 printf("%s %s", sep,
44 charset_to_localenc(cs_table[i]->charset));
3a17f847 45 if (verbose)
46 printf("[%d]", chr);
01081d4e 47 sep = ";";
48 }
49
50 /*
51 * Look up individually in MBCS base charsets.
52 */
53 if (unicode_to_big5(c, &row, &col)) {
54 printf("%s Big5", sep);
3a17f847 55 if (verbose)
56 printf("[%d,%d]", row, col);
01081d4e 57 sep = ";";
58 }
59 if (unicode_to_gb2312(c, &row, &col)) {
60 printf("%s GB2312", sep);
3a17f847 61 if (verbose)
62 printf("[%d,%d]", row, col);
01081d4e 63 sep = ";";
64 }
65
66 if (unicode_to_jisx0208(c, &row, &col)) {
67 printf("%s JIS X 0208", sep);
3a17f847 68 if (verbose)
69 printf("[%d,%d]", row, col);
01081d4e 70 sep = ";";
71 }
72
73 if (unicode_to_ksx1001(c, &row, &col)) {
74 printf("%s KS X 1001", sep);
3a17f847 75 if (verbose)
76 printf("[%d,%d]", row, col);
01081d4e 77 sep = ";";
78 }
79
80 if (unicode_to_cp949(c, &row, &col)) {
81 printf("%s CP949", sep);
3a17f847 82 if (verbose)
83 printf("[%d,%d]", row, col);
01081d4e 84 sep = ";";
85 }
86
298814b6 87 if (unicode_to_cns11643(c, &plane, &row, &col)) {
88 printf("%s CNS11643", sep);
3a17f847 89 if (verbose)
90 printf("[%d,%d,%d]", plane, row, col);
298814b6 91 sep = ";";
92 }
93
01081d4e 94 if (!*sep)
95 printf(" unicode-only");
96
97 printf("\n");
98 }
99
100 return 0;
101}