Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / big5enc.c
1 /*
2 * big5enc.c - multibyte encoding of Big5
3 */
4
5 #ifndef ENUM_CHARSETS
6
7 #include "charset.h"
8 #include "internal.h"
9
10 /*
11 * Big5 has no associated data, so `charset' may be ignored.
12 */
13
14 static void read_big5(charset_spec const *charset, long int input_chr,
15 charset_state *state,
16 void (*emit)(void *ctx, long int output), void *emitctx)
17 {
18 UNUSEDARG(charset);
19
20 /*
21 * For reading Big5, state->s0 simply contains the single
22 * stored lead byte when we are half way through a double-byte
23 * character, or 0 if we aren't.
24 */
25
26 if (state->s0 == 0) {
27 if (input_chr >= 0xA1 && input_chr <= 0xFE) {
28 /*
29 * Lead byte. Just store it.
30 */
31 state->s0 = input_chr;
32 } else {
33 /*
34 * Anything else we pass straight through unchanged.
35 */
36 emit(emitctx, input_chr);
37 }
38 } else {
39 /*
40 * We have a stored lead byte. We expect a valid followup
41 * byte.
42 */
43 if ((input_chr >= 0x40 && input_chr <= 0x7E) ||
44 (input_chr >= 0xA1 && input_chr <= 0xFE)) {
45 emit(emitctx, big5_to_unicode(state->s0 - 0xA1, input_chr - 0x40));
46 } else {
47 emit(emitctx, ERROR);
48 }
49 state->s0 = 0;
50 }
51 }
52
53 /*
54 * Big5 is a stateless multi-byte encoding (in the sense that just
55 * after any character has been completed, the state is always the
56 * same); hence when writing it, there is no need to use the
57 * charset_state.
58 */
59
60 static int write_big5(charset_spec const *charset, long int input_chr,
61 charset_state *state,
62 void (*emit)(void *ctx, long int output), void *emitctx)
63 {
64 UNUSEDARG(charset);
65 UNUSEDARG(state);
66
67 if (input_chr == -1)
68 return TRUE; /* stateless; no cleanup required */
69
70 if (input_chr < 0x80) {
71 emit(emitctx, input_chr);
72 return TRUE;
73 } else {
74 int r, c;
75 if (unicode_to_big5(input_chr, &r, &c)) {
76 emit(emitctx, r + 0xA1);
77 emit(emitctx, c + 0x40);
78 return TRUE;
79 } else {
80 return FALSE;
81 }
82 }
83 }
84
85 const charset_spec charset_CS_BIG5 = {
86 CS_BIG5, read_big5, write_big5, NULL
87 };
88
89 #else /* ENUM_CHARSETS */
90
91 ENUM_CHARSET(CS_BIG5)
92
93 #endif /* ENUM_CHARSETS */