01081d4e |
1 | /* |
2 | * cstable.c - libcharset supporting utility which draws up a map |
3 | * of the whole Unicode BMP and annotates it with details of which |
4 | * other character sets each character appears in. |
5 | * |
6 | * Note this is not a libcharset _client_; it is part of the |
7 | * libcharset _package_, using libcharset internals. |
8 | */ |
9 | |
10 | #include "charset.h" |
11 | #include "internal.h" |
12 | #include "sbcsdat.h" |
13 | |
14 | #define ENUM_CHARSET(x) extern charset_spec const charset_##x; |
15 | #include "enum.c" |
16 | #undef ENUM_CHARSET |
17 | static charset_spec const *const cs_table[] = { |
18 | #define ENUM_CHARSET(x) &charset_##x, |
19 | #include "enum.c" |
20 | #undef ENUM_CHARSET |
21 | }; |
22 | |
3a17f847 |
23 | int main(int argc, char **argv) |
01081d4e |
24 | { |
25 | long int c; |
3a17f847 |
26 | int verbose = FALSE; |
27 | |
28 | if (argc > 1 && !strcmp(argv[1], "-v")) |
29 | verbose = TRUE; |
01081d4e |
30 | |
298814b6 |
31 | for (c = 0; c < 0x30000; c++) { |
3a17f847 |
32 | int i, plane, row, col, chr; |
01081d4e |
33 | char const *sep = ""; |
34 | |
35 | printf("U+%04x:", c); |
36 | |
37 | /* |
38 | * Look up in SBCSes. |
39 | */ |
40 | for (i = 0; i < lenof(cs_table); i++) |
41 | if (cs_table[i]->read == read_sbcs && |
3a17f847 |
42 | (chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) { |
01081d4e |
43 | printf("%s %s", sep, |
44 | charset_to_localenc(cs_table[i]->charset)); |
3a17f847 |
45 | if (verbose) |
46 | printf("[%d]", chr); |
01081d4e |
47 | sep = ";"; |
48 | } |
49 | |
50 | /* |
51 | * Look up individually in MBCS base charsets. |
52 | */ |
53 | if (unicode_to_big5(c, &row, &col)) { |
54 | printf("%s Big5", sep); |
3a17f847 |
55 | if (verbose) |
56 | printf("[%d,%d]", row, col); |
01081d4e |
57 | sep = ";"; |
58 | } |
59 | if (unicode_to_gb2312(c, &row, &col)) { |
60 | printf("%s GB2312", sep); |
3a17f847 |
61 | if (verbose) |
62 | printf("[%d,%d]", row, col); |
01081d4e |
63 | sep = ";"; |
64 | } |
65 | |
66 | if (unicode_to_jisx0208(c, &row, &col)) { |
67 | printf("%s JIS X 0208", sep); |
3a17f847 |
68 | if (verbose) |
69 | printf("[%d,%d]", row, col); |
01081d4e |
70 | sep = ";"; |
71 | } |
72 | |
73 | if (unicode_to_ksx1001(c, &row, &col)) { |
74 | printf("%s KS X 1001", sep); |
3a17f847 |
75 | if (verbose) |
76 | printf("[%d,%d]", row, col); |
01081d4e |
77 | sep = ";"; |
78 | } |
79 | |
80 | if (unicode_to_cp949(c, &row, &col)) { |
81 | printf("%s CP949", sep); |
3a17f847 |
82 | if (verbose) |
83 | printf("[%d,%d]", row, col); |
01081d4e |
84 | sep = ";"; |
85 | } |
86 | |
298814b6 |
87 | if (unicode_to_cns11643(c, &plane, &row, &col)) { |
88 | printf("%s CNS11643", sep); |
3a17f847 |
89 | if (verbose) |
90 | printf("[%d,%d,%d]", plane, row, col); |
298814b6 |
91 | sep = ";"; |
92 | } |
93 | |
01081d4e |
94 | if (!*sep) |
95 | printf(" unicode-only"); |
96 | |
97 | printf("\n"); |
98 | } |
99 | |
100 | return 0; |
101 | } |