Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / cstable.c
CommitLineData
01081d4e 1/*
2 * cstable.c - libcharset supporting utility which draws up a map
3 * of the whole Unicode BMP and annotates it with details of which
4 * other character sets each character appears in.
5 *
6 * Note this is not a libcharset _client_; it is part of the
7 * libcharset _package_, using libcharset internals.
8 */
9
04b6f111 10#include <stdio.h>
11#include <string.h>
12
01081d4e 13#include "charset.h"
14#include "internal.h"
15#include "sbcsdat.h"
16
17#define ENUM_CHARSET(x) extern charset_spec const charset_##x;
18#include "enum.c"
19#undef ENUM_CHARSET
20static charset_spec const *const cs_table[] = {
21#define ENUM_CHARSET(x) &charset_##x,
22#include "enum.c"
23#undef ENUM_CHARSET
24};
04b6f111 25static const char *const cs_names[] = {
26#define ENUM_CHARSET(x) #x,
27#include "enum.c"
28#undef ENUM_CHARSET
29};
01081d4e 30
3a17f847 31int main(int argc, char **argv)
01081d4e 32{
33 long int c;
04b6f111 34 int internal_names = FALSE;
3a17f847 35 int verbose = FALSE;
36
04b6f111 37 while (--argc) {
38 char *p = *++argv;
39 if (!strcmp(p, "-i"))
40 internal_names = TRUE;
41 else if (!strcmp(p, "-v"))
42 verbose = TRUE;
43 }
01081d4e 44
298814b6 45 for (c = 0; c < 0x30000; c++) {
3a17f847 46 int i, plane, row, col, chr;
01081d4e 47 char const *sep = "";
48
efaed674 49 printf("U+%04x:", (unsigned)c);
01081d4e 50
51 /*
52 * Look up in SBCSes.
53 */
54 for (i = 0; i < lenof(cs_table); i++)
55 if (cs_table[i]->read == read_sbcs &&
3a17f847 56 (chr = sbcs_from_unicode(cs_table[i]->data, c)) != ERROR) {
01081d4e 57 printf("%s %s", sep,
04b6f111 58 (internal_names ? cs_names[i] :
59 charset_to_localenc(cs_table[i]->charset)));
3a17f847 60 if (verbose)
61 printf("[%d]", chr);
01081d4e 62 sep = ";";
63 }
64
65 /*
04b6f111 66 * Look up individually in MBCS base charsets. The
67 * `internal_names' flag does not affect these, because
68 * MBCS base charsets aren't directly encoded by CS_*
69 * constants.
01081d4e 70 */
71 if (unicode_to_big5(c, &row, &col)) {
72 printf("%s Big5", sep);
3a17f847 73 if (verbose)
74 printf("[%d,%d]", row, col);
01081d4e 75 sep = ";";
76 }
04b6f111 77
01081d4e 78 if (unicode_to_gb2312(c, &row, &col)) {
79 printf("%s GB2312", sep);
3a17f847 80 if (verbose)
81 printf("[%d,%d]", row, col);
01081d4e 82 sep = ";";
83 }
84
85 if (unicode_to_jisx0208(c, &row, &col)) {
86 printf("%s JIS X 0208", sep);
3a17f847 87 if (verbose)
88 printf("[%d,%d]", row, col);
01081d4e 89 sep = ";";
90 }
91
92 if (unicode_to_ksx1001(c, &row, &col)) {
93 printf("%s KS X 1001", sep);
3a17f847 94 if (verbose)
95 printf("[%d,%d]", row, col);
01081d4e 96 sep = ";";
97 }
98
99 if (unicode_to_cp949(c, &row, &col)) {
100 printf("%s CP949", sep);
3a17f847 101 if (verbose)
102 printf("[%d,%d]", row, col);
01081d4e 103 sep = ";";
104 }
105
298814b6 106 if (unicode_to_cns11643(c, &plane, &row, &col)) {
107 printf("%s CNS11643", sep);
3a17f847 108 if (verbose)
109 printf("[%d,%d,%d]", plane, row, col);
298814b6 110 sep = ";";
111 }
112
01081d4e 113 if (!*sep)
114 printf(" unicode-only");
115
116 printf("\n");
117 }
118
119 return 0;
120}