Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / cstable.c
1 /*
2 * cstable.c - libcharset supporting utility which draws up a map
3 * of the whole Unicode BMP and annotates it with details of which
4 * other character sets each character appears in.
5 *
6 * Note this is not a libcharset _client_; it is part of the
7 * libcharset _package_, using libcharset internals.
8 */
9
10 #include <stdio.h>
11 #include <string.h>
12
13 #include "charset.h"
14 #include "internal.h"
15 #include "sbcsdat.h"
16
17 #define ENUM_CHARSET(x) extern charset_spec const charset_##x;
18 #include "enum.c"
19 #undef ENUM_CHARSET
20 static charset_spec const *const cs_table[] = {
21 #define ENUM_CHARSET(x) &charset_##x,
22 #include "enum.c"
23 #undef ENUM_CHARSET
24 };
25 static const char *const cs_names[] = {
26 #define ENUM_CHARSET(x) #x,
27 #include "enum.c"
28 #undef ENUM_CHARSET
29 };
30
31 int main(int argc, char **argv)
32 {
33 long int c;
34 int internal_names = FALSE;
35 int verbose = FALSE;
36
37 while (--argc) {
38 char *p = *++argv;
39 if (!strcmp(p, "-i"))
40 internal_names = TRUE;
41 else if (!strcmp(p, "-v"))
42 verbose = TRUE;
43 }
44
45 for (c = 0; c < 0x30000; c++) {
46 int i, plane, row, col, chr;
47 char const *sep = "";
48
49 printf("U+%04x:", (unsigned)c);
50
51 /*
52 * Look up in SBCSes.
53 */
54 for (i = 0; i < lenof(cs_table); i++)
55 if (cs_table[i]->read == read_sbcs &&
56 (chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
57 printf("%s %s", sep,
58 (internal_names ? cs_names[i] :
59 charset_to_localenc(cs_table[i]->charset)));
60 if (verbose)
61 printf("[%d]", chr);
62 sep = ";";
63 }
64
65 /*
66 * Look up individually in MBCS base charsets. The
67 * `internal_names' flag does not affect these, because
68 * MBCS base charsets aren't directly encoded by CS_*
69 * constants.
70 */
71 if (unicode_to_big5(c, &row, &col)) {
72 printf("%s Big5", sep);
73 if (verbose)
74 printf("[%d,%d]", row, col);
75 sep = ";";
76 }
77
78 if (unicode_to_gb2312(c, &row, &col)) {
79 printf("%s GB2312", sep);
80 if (verbose)
81 printf("[%d,%d]", row, col);
82 sep = ";";
83 }
84
85 if (unicode_to_jisx0208(c, &row, &col)) {
86 printf("%s JIS X 0208", sep);
87 if (verbose)
88 printf("[%d,%d]", row, col);
89 sep = ";";
90 }
91
92 if (unicode_to_ksx1001(c, &row, &col)) {
93 printf("%s KS X 1001", sep);
94 if (verbose)
95 printf("[%d,%d]", row, col);
96 sep = ";";
97 }
98
99 if (unicode_to_cp949(c, &row, &col)) {
100 printf("%s CP949", sep);
101 if (verbose)
102 printf("[%d,%d]", row, col);
103 sep = ";";
104 }
105
106 if (unicode_to_cns11643(c, &plane, &row, &col)) {
107 printf("%s CNS11643", sep);
108 if (verbose)
109 printf("[%d,%d,%d]", plane, row, col);
110 sep = ";";
111 }
112
113 if (!*sep)
114 printf(" unicode-only");
115
116 printf("\n");
117 }
118
119 return 0;
120 }