c6d25d8d |
1 | /* |
2 | * internal.h - internal header stuff for the charset library. |
3 | */ |
4 | |
5 | #ifndef charset_internal_h |
6 | #define charset_internal_h |
7 | |
8 | /* This invariably comes in handy */ |
9 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
10 | |
11 | /* This is an invalid Unicode value used to indicate an error. */ |
12 | #define ERROR 0xFFFFL /* Unicode value representing error */ |
13 | |
14 | #undef TRUE |
15 | #define TRUE 1 |
16 | #undef FALSE |
17 | #define FALSE 0 |
18 | |
19 | typedef struct charset_spec charset_spec; |
20 | typedef struct sbcs_data sbcs_data; |
21 | |
22 | struct charset_spec { |
23 | int charset; /* numeric identifier */ |
24 | |
25 | /* |
26 | * A function to read the character set and output Unicode |
27 | * characters. The `emit' function expects to get Unicode chars |
28 | * passed to it; it should be sent ERROR for any encoding error |
29 | * on the input. |
30 | */ |
31 | void (*read)(charset_spec const *charset, long int input_chr, |
32 | charset_state *state, |
33 | void (*emit)(void *ctx, long int output), void *emitctx); |
34 | /* |
35 | * A function to read Unicode characters and output in this |
36 | * character set. The `emit' function expects to get byte |
37 | * values passed to it. |
38 | * |
39 | * A non-representable input character should cause a FALSE |
40 | * return, _before_ `emit' is called. Successful conversion |
41 | * causes a TRUE return. |
42 | * |
43 | * If `input_chr' is -1, this function must revert the encoding |
44 | * state to any default required at the end of a piece of |
45 | * encoded text. |
46 | */ |
47 | int (*write)(charset_spec const *charset, long int input_chr, |
48 | charset_state *state, |
49 | void (*emit)(void *ctx, long int output), void *emitctx); |
50 | void const *data; |
51 | }; |
52 | |
53 | /* |
54 | * This is the format of `data' used by the SBCS read and write |
55 | * functions; so it's the format used in all SBCS definitions. |
56 | */ |
57 | struct sbcs_data { |
58 | /* |
59 | * This is a simple mapping table converting each SBCS position |
60 | * to a Unicode code point. Some positions may contain ERROR, |
61 | * indicating that that byte value is not defined in the SBCS |
62 | * in question and its occurrence in input is an error. |
63 | */ |
64 | unsigned long sbcs2ucs[256]; |
65 | |
66 | /* |
67 | * This lookup table is used to convert Unicode back to the |
68 | * SBCS. It consists of the valid byte values in the SBCS, |
69 | * sorted in order of their Unicode translation. So given a |
70 | * Unicode value U, you can do a binary search on this table |
71 | * using the above table as a lookup: when testing the Xth |
72 | * position in this table, you branch according to whether |
73 | * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal |
74 | * to U. |
75 | * |
76 | * Note that since there may be fewer than 256 valid byte |
77 | * values in a particular SBCS, we must supply the length of |
78 | * this table as well as the contents. |
79 | */ |
80 | unsigned char ucs2sbcs[256]; |
81 | int nvalid; |
82 | }; |
83 | |
84 | /* |
85 | * Prototypes for internal library functions. |
86 | */ |
87 | charset_spec const *charset_find_spec(int charset); |
88 | void read_sbcs(charset_spec const *charset, long int input_chr, |
89 | charset_state *state, |
90 | void (*emit)(void *ctx, long int output), void *emitctx); |
91 | int write_sbcs(charset_spec const *charset, long int input_chr, |
92 | charset_state *state, |
93 | void (*emit)(void *ctx, long int output), void *emitctx); |
94 | |
95 | long int big5_to_unicode(int r, int c); |
96 | int unicode_to_big5(long int unicode, int *r, int *c); |
97 | long int cp949_to_unicode(int r, int c); |
98 | int unicode_to_cp949(long int unicode, int *r, int *c); |
99 | long int ksx1001_to_unicode(int r, int c); |
100 | int unicode_to_ksx1001(long int unicode, int *r, int *c); |
101 | long int gb2312_to_unicode(int r, int c); |
102 | int unicode_to_gb2312(long int unicode, int *r, int *c); |
103 | long int jisx0208_to_unicode(int r, int c); |
104 | int unicode_to_jisx0208(long int unicode, int *r, int *c); |
105 | long int jisx0212_to_unicode(int r, int c); |
106 | int unicode_to_jisx0212(long int unicode, int *r, int *c); |
107 | |
108 | /* |
109 | * Placate compiler warning about unused parameters, of which we |
110 | * expect to have some in this library. |
111 | */ |
112 | #define UNUSEDARG(x) ( (x) = (x) ) |
113 | |
114 | #endif /* charset_internal_h */ |