X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/charset/blobdiff_plain/a3a721486088f954229921ea2cde4ba6ad9db089..01081d4e1448c952c0967e71448b504e26360923:/cstable.c diff --git a/cstable.c b/cstable.c new file mode 100644 index 0000000..1336aac --- /dev/null +++ b/cstable.c @@ -0,0 +1,78 @@ +/* + * cstable.c - libcharset supporting utility which draws up a map + * of the whole Unicode BMP and annotates it with details of which + * other character sets each character appears in. + * + * Note this is not a libcharset _client_; it is part of the + * libcharset _package_, using libcharset internals. + */ + +#include "charset.h" +#include "internal.h" +#include "sbcsdat.h" + +#define ENUM_CHARSET(x) extern charset_spec const charset_##x; +#include "enum.c" +#undef ENUM_CHARSET +static charset_spec const *const cs_table[] = { +#define ENUM_CHARSET(x) &charset_##x, +#include "enum.c" +#undef ENUM_CHARSET +}; + +int main(void) +{ + long int c; + + for (c = 0; c < 0x10000; c++) { + int i, row, col; + char const *sep = ""; + + printf("U+%04x:", c); + + /* + * Look up in SBCSes. + */ + for (i = 0; i < lenof(cs_table); i++) + if (cs_table[i]->read == read_sbcs && + sbcs_from_unicode(cs_table[i]->data, c) != ERROR) { + printf("%s %s", sep, + charset_to_localenc(cs_table[i]->charset)); + sep = ";"; + } + + /* + * Look up individually in MBCS base charsets. + */ + if (unicode_to_big5(c, &row, &col)) { + printf("%s Big5", sep); + sep = ";"; + } + if (unicode_to_gb2312(c, &row, &col)) { + printf("%s GB2312", sep); + sep = ";"; + } + + if (unicode_to_jisx0208(c, &row, &col)) { + printf("%s JIS X 0208", sep); + sep = ";"; + } + + if (unicode_to_ksx1001(c, &row, &col)) { + printf("%s KS X 1001", sep); + sep = ";"; + } + + if (unicode_to_cp949(c, &row, &col)) { + printf("%s CP949", sep); + sep = ";"; + } + + if (!*sep) + printf(" unicode-only"); + + printf("\n"); + } + + return 0; +}