2 * cstable.c - libcharset supporting utility which draws up a map
3 * of the whole Unicode BMP and annotates it with details of which
4 * other character sets each character appears in.
6 * Note this is not a libcharset _client_; it is part of the
7 * libcharset _package_, using libcharset internals.
17 #define ENUM_CHARSET(x) extern charset_spec const charset_##x;
20 static charset_spec
const *const cs_table
[] = {
21 #define ENUM_CHARSET(x) &charset_##x,
25 static const char *const cs_names
[] = {
26 #define ENUM_CHARSET(x) #x,
31 int main(int argc
, char **argv
)
34 int internal_names
= FALSE
;
40 internal_names
= TRUE
;
41 else if (!strcmp(p
, "-v"))
45 for (c
= 0; c
< 0x30000; c
++) {
46 int i
, plane
, row
, col
, chr
;
49 printf("U+%04x:", (unsigned)c
);
54 for (i
= 0; i
< lenof(cs_table
); i
++)
55 if (cs_table
[i
]->read
== read_sbcs
&&
56 (chr
= sbcs_from_unicode(cs_table
[i
]->data
, c
)) != ERROR
) {
58 (internal_names ? cs_names
[i
] :
59 charset_to_localenc(cs_table
[i
]->charset
)));
66 * Look up individually in MBCS base charsets. The
67 * `internal_names' flag does not affect these, because
68 * MBCS base charsets aren't directly encoded by CS_*
71 if (unicode_to_big5(c
, &row
, &col
)) {
72 printf("%s Big5", sep
);
74 printf("[%d,%d]", row
, col
);
78 if (unicode_to_gb2312(c
, &row
, &col
)) {
79 printf("%s GB2312", sep
);
81 printf("[%d,%d]", row
, col
);
85 if (unicode_to_jisx0208(c
, &row
, &col
)) {
86 printf("%s JIS X 0208", sep
);
88 printf("[%d,%d]", row
, col
);
92 if (unicode_to_ksx1001(c
, &row
, &col
)) {
93 printf("%s KS X 1001", sep
);
95 printf("[%d,%d]", row
, col
);
99 if (unicode_to_cp949(c
, &row
, &col
)) {
100 printf("%s CP949", sep
);
102 printf("[%d,%d]", row
, col
);
106 if (unicode_to_cns11643(c
, &plane
, &row
, &col
)) {
107 printf("%s CNS11643", sep
);
109 printf("[%d,%d,%d]", plane
, row
, col
);
114 printf(" unicode-only");