Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / internal.h
CommitLineData
c6d25d8d 1/*
2 * internal.h - internal header stuff for the charset library.
3 */
4
5#ifndef charset_internal_h
6#define charset_internal_h
7
8/* This invariably comes in handy */
9#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
10
11/* This is an invalid Unicode value used to indicate an error. */
12#define ERROR 0xFFFFL /* Unicode value representing error */
13
14#undef TRUE
15#define TRUE 1
16#undef FALSE
17#define FALSE 0
18
19typedef struct charset_spec charset_spec;
20typedef struct sbcs_data sbcs_data;
21
22struct charset_spec {
23 int charset; /* numeric identifier */
24
25 /*
26 * A function to read the character set and output Unicode
27 * characters. The `emit' function expects to get Unicode chars
28 * passed to it; it should be sent ERROR for any encoding error
29 * on the input.
30 */
31 void (*read)(charset_spec const *charset, long int input_chr,
32 charset_state *state,
33 void (*emit)(void *ctx, long int output), void *emitctx);
34 /*
35 * A function to read Unicode characters and output in this
36 * character set. The `emit' function expects to get byte
37 * values passed to it.
38 *
39 * A non-representable input character should cause a FALSE
40 * return, _before_ `emit' is called. Successful conversion
41 * causes a TRUE return.
42 *
43 * If `input_chr' is -1, this function must revert the encoding
44 * state to any default required at the end of a piece of
45 * encoded text.
46 */
47 int (*write)(charset_spec const *charset, long int input_chr,
48 charset_state *state,
49 void (*emit)(void *ctx, long int output), void *emitctx);
50 void const *data;
51};
52
53/*
54 * This is the format of `data' used by the SBCS read and write
55 * functions; so it's the format used in all SBCS definitions.
56 */
57struct sbcs_data {
58 /*
59 * This is a simple mapping table converting each SBCS position
60 * to a Unicode code point. Some positions may contain ERROR,
61 * indicating that that byte value is not defined in the SBCS
62 * in question and its occurrence in input is an error.
63 */
64 unsigned long sbcs2ucs[256];
65
66 /*
67 * This lookup table is used to convert Unicode back to the
68 * SBCS. It consists of the valid byte values in the SBCS,
69 * sorted in order of their Unicode translation. So given a
70 * Unicode value U, you can do a binary search on this table
71 * using the above table as a lookup: when testing the Xth
72 * position in this table, you branch according to whether
73 * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal
74 * to U.
75 *
76 * Note that since there may be fewer than 256 valid byte
77 * values in a particular SBCS, we must supply the length of
78 * this table as well as the contents.
79 */
80 unsigned char ucs2sbcs[256];
81 int nvalid;
82};
83
84/*
85 * Prototypes for internal library functions.
86 */
87charset_spec const *charset_find_spec(int charset);
88void read_sbcs(charset_spec const *charset, long int input_chr,
89 charset_state *state,
90 void (*emit)(void *ctx, long int output), void *emitctx);
91int write_sbcs(charset_spec const *charset, long int input_chr,
92 charset_state *state,
93 void (*emit)(void *ctx, long int output), void *emitctx);
01081d4e 94long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr);
95long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr);
c6d25d8d 96
7a7dc0a7 97void read_utf8(charset_spec const *charset, long int input_chr,
98 charset_state *state,
99 void (*emit)(void *ctx, long int output), void *emitctx);
da5d8e85 100int write_utf8(charset_spec const *charset, long int input_chr,
101 charset_state *state,
102 void (*emit)(void *ctx, long int output),
103 void *emitctx);
7a7dc0a7 104
c6d25d8d 105long int big5_to_unicode(int r, int c);
106int unicode_to_big5(long int unicode, int *r, int *c);
707b8810 107long int cns11643_to_unicode(int p, int r, int c);
108int unicode_to_cns11643(long int unicode, int *p, int *r, int *c);
c6d25d8d 109long int cp949_to_unicode(int r, int c);
110int unicode_to_cp949(long int unicode, int *r, int *c);
111long int ksx1001_to_unicode(int r, int c);
112int unicode_to_ksx1001(long int unicode, int *r, int *c);
113long int gb2312_to_unicode(int r, int c);
114int unicode_to_gb2312(long int unicode, int *r, int *c);
115long int jisx0208_to_unicode(int r, int c);
116int unicode_to_jisx0208(long int unicode, int *r, int *c);
117long int jisx0212_to_unicode(int r, int c);
118int unicode_to_jisx0212(long int unicode, int *r, int *c);
119
120/*
121 * Placate compiler warning about unused parameters, of which we
122 * expect to have some in this library.
123 */
124#define UNUSEDARG(x) ( (x) = (x) )
125
126#endif /* charset_internal_h */