2dc6356a |
1 | /* |
2 | * internal.h - internal header stuff for the charset library. |
3 | */ |
4 | |
5 | #ifndef charset_internal_h |
6 | #define charset_internal_h |
7 | |
8 | /* This invariably comes in handy */ |
9 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
10 | |
11 | /* This is an invalid Unicode value used to indicate an error. */ |
12 | #define ERROR 0xFFFFL /* Unicode value representing error */ |
13 | |
14 | typedef struct charset_spec charset_spec; |
15 | typedef struct sbcs_data sbcs_data; |
16 | |
17 | struct charset_spec { |
18 | int charset; /* numeric identifier */ |
19 | |
20 | /* |
21 | * A function to read the character set and output Unicode |
22 | * characters. The `emit' function expects to get Unicode chars |
23 | * passed to it; it should be sent ERROR for any encoding error |
24 | * on the input. |
25 | */ |
26 | void (*read)(charset_spec const *charset, long int input_chr, |
27 | charset_state *state, |
28 | void (*emit)(void *ctx, long int output), void *emitctx); |
29 | /* |
30 | * A function to read Unicode characters and output in this |
31 | * character set. The `emit' function expects to get byte |
32 | * values passed to it; it should be sent ERROR for any |
33 | * non-representable characters on the input. |
34 | */ |
35 | void (*write)(charset_spec const *charset, long int input_chr, |
36 | charset_state *state, |
37 | void (*emit)(void *ctx, long int output), void *emitctx); |
38 | void const *data; |
39 | }; |
40 | |
41 | /* |
42 | * This is the format of `data' used by the SBCS read and write |
43 | * functions; so it's the format used in all SBCS definitions. |
44 | */ |
45 | struct sbcs_data { |
46 | /* |
47 | * This is a simple mapping table converting each SBCS position |
48 | * to a Unicode code point. Some positions may contain ERROR, |
49 | * indicating that that byte value is not defined in the SBCS |
50 | * in question and its occurrence in input is an error. |
51 | */ |
52 | unsigned long sbcs2ucs[256]; |
53 | |
54 | /* |
55 | * This lookup table is used to convert Unicode back to the |
56 | * SBCS. It consists of the valid byte values in the SBCS, |
57 | * sorted in order of their Unicode translation. So given a |
58 | * Unicode value U, you can do a binary search on this table |
59 | * using the above table as a lookup: when testing the Xth |
60 | * position in this table, you branch according to whether |
61 | * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal |
62 | * to U. |
63 | * |
64 | * Note that since there may be fewer than 256 valid byte |
65 | * values in a particular SBCS, we must supply the length of |
66 | * this table as well as the contents. |
67 | */ |
68 | unsigned char ucs2sbcs[256]; |
69 | int nvalid; |
70 | }; |
71 | |
72 | /* |
73 | * Prototypes for internal library functions. |
74 | */ |
75 | charset_spec const *charset_find_spec(int charset); |
76 | void read_sbcs(charset_spec const *charset, long int input_chr, |
77 | charset_state *state, |
78 | void (*emit)(void *ctx, long int output), void *emitctx); |
79 | void write_sbcs(charset_spec const *charset, long int input_chr, |
80 | charset_state *state, |
81 | void (*emit)(void *ctx, long int output), void *emitctx); |
82 | |
83 | /* |
84 | * Placate compiler warning about unused parameters, of which we |
85 | * expect to have some in this library. |
86 | */ |
87 | #define UNUSEDARG(x) ( (x) = (x) ) |
88 | |
89 | #endif /* charset_internal_h */ |