2 * internal.h - internal header stuff for the charset library.
5 #ifndef charset_internal_h
6 #define charset_internal_h
8 /* This invariably comes in handy */
9 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
11 /* This is an invalid Unicode value used to indicate an error. */
12 #define ERROR 0xFFFFL /* Unicode value representing error */
19 typedef struct charset_spec charset_spec
;
20 typedef struct sbcs_data sbcs_data
;
23 int charset
; /* numeric identifier */
26 * A function to read the character set and output Unicode
27 * characters. The `emit' function expects to get Unicode chars
28 * passed to it; it should be sent ERROR for any encoding error
31 void (*read
)(charset_spec
const *charset
, long int input_chr
,
33 void (*emit
)(void *ctx
, long int output
), void *emitctx
);
35 * A function to read Unicode characters and output in this
36 * character set. The `emit' function expects to get byte
37 * values passed to it.
39 * A non-representable input character should cause a FALSE
40 * return, _before_ `emit' is called. Successful conversion
41 * causes a TRUE return.
43 * If `input_chr' is -1, this function must revert the encoding
44 * state to any default required at the end of a piece of
47 int (*write
)(charset_spec
const *charset
, long int input_chr
,
49 void (*emit
)(void *ctx
, long int output
), void *emitctx
);
54 * This is the format of `data' used by the SBCS read and write
55 * functions; so it's the format used in all SBCS definitions.
59 * This is a simple mapping table converting each SBCS position
60 * to a Unicode code point. Some positions may contain ERROR,
61 * indicating that that byte value is not defined in the SBCS
62 * in question and its occurrence in input is an error.
64 unsigned long sbcs2ucs
[256];
67 * This lookup table is used to convert Unicode back to the
68 * SBCS. It consists of the valid byte values in the SBCS,
69 * sorted in order of their Unicode translation. So given a
70 * Unicode value U, you can do a binary search on this table
71 * using the above table as a lookup: when testing the Xth
72 * position in this table, you branch according to whether
73 * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal
76 * Note that since there may be fewer than 256 valid byte
77 * values in a particular SBCS, we must supply the length of
78 * this table as well as the contents.
80 unsigned char ucs2sbcs
[256];
85 * Prototypes for internal library functions.
87 charset_spec
const *charset_find_spec(int charset
);
88 void read_sbcs(charset_spec
const *charset
, long int input_chr
,
90 void (*emit
)(void *ctx
, long int output
), void *emitctx
);
91 int write_sbcs(charset_spec
const *charset
, long int input_chr
,
93 void (*emit
)(void *ctx
, long int output
), void *emitctx
);
95 long int big5_to_unicode(int r
, int c
);
96 int unicode_to_big5(long int unicode
, int *r
, int *c
);
97 long int cp949_to_unicode(int r
, int c
);
98 int unicode_to_cp949(long int unicode
, int *r
, int *c
);
99 long int ksx1001_to_unicode(int r
, int c
);
100 int unicode_to_ksx1001(long int unicode
, int *r
, int *c
);
101 long int gb2312_to_unicode(int r
, int c
);
102 int unicode_to_gb2312(long int unicode
, int *r
, int *c
);
103 long int jisx0208_to_unicode(int r
, int c
);
104 int unicode_to_jisx0208(long int unicode
, int *r
, int *c
);
105 long int jisx0212_to_unicode(int r
, int c
);
106 int unicode_to_jisx0212(long int unicode
, int *r
, int *c
);
109 * Placate compiler warning about unused parameters, of which we
110 * expect to have some in this library.
112 #define UNUSEDARG(x) ( (x) = (x) )
114 #endif /* charset_internal_h */