01081d4e |
1 | /* |
2 | * cstable.c - libcharset supporting utility which draws up a map |
3 | * of the whole Unicode BMP and annotates it with details of which |
4 | * other character sets each character appears in. |
5 | * |
6 | * Note this is not a libcharset _client_; it is part of the |
7 | * libcharset _package_, using libcharset internals. |
8 | */ |
9 | |
10 | #include "charset.h" |
11 | #include "internal.h" |
12 | #include "sbcsdat.h" |
13 | |
14 | #define ENUM_CHARSET(x) extern charset_spec const charset_##x; |
15 | #include "enum.c" |
16 | #undef ENUM_CHARSET |
17 | static charset_spec const *const cs_table[] = { |
18 | #define ENUM_CHARSET(x) &charset_##x, |
19 | #include "enum.c" |
20 | #undef ENUM_CHARSET |
21 | }; |
22 | |
23 | int main(void) |
24 | { |
25 | long int c; |
26 | |
27 | for (c = 0; c < 0x10000; c++) { |
28 | int i, row, col; |
29 | char const *sep = ""; |
30 | |
31 | printf("U+%04x:", c); |
32 | |
33 | /* |
34 | * Look up in SBCSes. |
35 | */ |
36 | for (i = 0; i < lenof(cs_table); i++) |
37 | if (cs_table[i]->read == read_sbcs && |
38 | sbcs_from_unicode(cs_table[i]->data, c) != ERROR) { |
39 | printf("%s %s", sep, |
40 | charset_to_localenc(cs_table[i]->charset)); |
41 | sep = ";"; |
42 | } |
43 | |
44 | /* |
45 | * Look up individually in MBCS base charsets. |
46 | */ |
47 | if (unicode_to_big5(c, &row, &col)) { |
48 | printf("%s Big5", sep); |
49 | sep = ";"; |
50 | } |
51 | if (unicode_to_gb2312(c, &row, &col)) { |
52 | printf("%s GB2312", sep); |
53 | sep = ";"; |
54 | } |
55 | |
56 | if (unicode_to_jisx0208(c, &row, &col)) { |
57 | printf("%s JIS X 0208", sep); |
58 | sep = ";"; |
59 | } |
60 | |
61 | if (unicode_to_ksx1001(c, &row, &col)) { |
62 | printf("%s KS X 1001", sep); |
63 | sep = ";"; |
64 | } |
65 | |
66 | if (unicode_to_cp949(c, &row, &col)) { |
67 | printf("%s CP949", sep); |
68 | sep = ";"; |
69 | } |
70 | |
71 | if (!*sep) |
72 | printf(" unicode-only"); |
73 | |
74 | printf("\n"); |
75 | } |
76 | |
77 | return 0; |
78 | } |