| 1 | /* |
| 2 | * internal.h - internal header stuff for the charset library. |
| 3 | */ |
| 4 | |
| 5 | #ifndef charset_internal_h |
| 6 | #define charset_internal_h |
| 7 | |
| 8 | /* This invariably comes in handy */ |
| 9 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
| 10 | |
| 11 | /* This is an invalid Unicode value used to indicate an error. */ |
| 12 | #define ERROR 0xFFFFL /* Unicode value representing error */ |
| 13 | |
| 14 | typedef struct charset_spec charset_spec; |
| 15 | typedef struct sbcs_data sbcs_data; |
| 16 | |
| 17 | struct charset_spec { |
| 18 | int charset; /* numeric identifier */ |
| 19 | |
| 20 | /* |
| 21 | * A function to read the character set and output Unicode |
| 22 | * characters. The `emit' function expects to get Unicode chars |
| 23 | * passed to it; it should be sent ERROR for any encoding error |
| 24 | * on the input. |
| 25 | */ |
| 26 | void (*read)(charset_spec const *charset, long int input_chr, |
| 27 | charset_state *state, |
| 28 | void (*emit)(void *ctx, long int output), void *emitctx); |
| 29 | /* |
| 30 | * A function to read Unicode characters and output in this |
| 31 | * character set. The `emit' function expects to get byte |
| 32 | * values passed to it; it should be sent ERROR for any |
| 33 | * non-representable characters on the input. |
| 34 | */ |
| 35 | void (*write)(charset_spec const *charset, long int input_chr, |
| 36 | charset_state *state, |
| 37 | void (*emit)(void *ctx, long int output), void *emitctx); |
| 38 | void const *data; |
| 39 | }; |
| 40 | |
| 41 | /* |
| 42 | * This is the format of `data' used by the SBCS read and write |
| 43 | * functions; so it's the format used in all SBCS definitions. |
| 44 | */ |
| 45 | struct sbcs_data { |
| 46 | /* |
| 47 | * This is a simple mapping table converting each SBCS position |
| 48 | * to a Unicode code point. Some positions may contain ERROR, |
| 49 | * indicating that that byte value is not defined in the SBCS |
| 50 | * in question and its occurrence in input is an error. |
| 51 | */ |
| 52 | unsigned long sbcs2ucs[256]; |
| 53 | |
| 54 | /* |
| 55 | * This lookup table is used to convert Unicode back to the |
| 56 | * SBCS. It consists of the valid byte values in the SBCS, |
| 57 | * sorted in order of their Unicode translation. So given a |
| 58 | * Unicode value U, you can do a binary search on this table |
| 59 | * using the above table as a lookup: when testing the Xth |
| 60 | * position in this table, you branch according to whether |
| 61 | * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal |
| 62 | * to U. |
| 63 | * |
| 64 | * Note that since there may be fewer than 256 valid byte |
| 65 | * values in a particular SBCS, we must supply the length of |
| 66 | * this table as well as the contents. |
| 67 | */ |
| 68 | unsigned char ucs2sbcs[256]; |
| 69 | int nvalid; |
| 70 | }; |
| 71 | |
| 72 | /* |
| 73 | * Prototypes for internal library functions. |
| 74 | */ |
| 75 | charset_spec const *charset_find_spec(int charset); |
| 76 | void read_sbcs(charset_spec const *charset, long int input_chr, |
| 77 | charset_state *state, |
| 78 | void (*emit)(void *ctx, long int output), void *emitctx); |
| 79 | void write_sbcs(charset_spec const *charset, long int input_chr, |
| 80 | charset_state *state, |
| 81 | void (*emit)(void *ctx, long int output), void *emitctx); |
| 82 | |
| 83 | /* |
| 84 | * Placate compiler warning about unused parameters, of which we |
| 85 | * expect to have some in this library. |
| 86 | */ |
| 87 | #define UNUSEDARG(x) ( (x) = (x) ) |
| 88 | |
| 89 | #endif /* charset_internal_h */ |