2 * hz.c - HZ textual encoding of ASCII and GB2312, as defined in RFC 1843.
12 static void read_hz(charset_spec
const *charset
, long int input_chr
,
14 void (*emit
)(void *ctx
, long int output
), void *emitctx
)
17 * When reading, our state variables are:
19 * - s0 is 0 in ASCII mode, 1 in GB2312 mode.
21 * - s1 stores a character we have just seen but not fully
22 * processed. So in ASCII mode, this can only ever be zero
23 * (no character) or 0x7E (~); in GB2312 mode it can be
24 * anything from 0x21-0x7E.
35 assert(state
->s1
== '~');
37 /* Process the character after a tilde. */
40 emit(emitctx
, input_chr
);
43 return; /* ~\n is ignored */
45 state
->s0
= 1; /* switch to GB2312 mode */
48 } else if (input_chr
== '~') {
52 /* In ASCII mode, any non-tildes go straight */
53 emit(emitctx
, input_chr
);
58 * GB2312 mode. As I understand it, we expect never to see
59 * anything in this mode that isn't 0x21-0x7E. So if we do,
60 * we'll simply throw an error and return to ASCII mode.
62 if (input_chr
< 0x21 || input_chr
> 0x7E) {
64 state
->s0
= state
->s1
= 0;
69 * So if we don't have a character stored already, store
73 state
->s1
= input_chr
;
78 * ... otherwise, combine the stored char with this one.
79 * This will give either `~}', the escape sequence to
80 * return to ASCII mode, or something which we translate
83 if (state
->s1
== '~' && input_chr
== '}') {
84 state
->s1
= state
->s0
= 0;
88 emit(emitctx
, gb2312_to_unicode(state
->s1
- 0x21, input_chr
- 0x21));
93 static int write_hz(charset_spec
const *charset
, long int input_chr
,
95 void (*emit
)(void *ctx
, long int output
), void *emitctx
)
97 int desired_state
, r
, c
;
102 * Analyse the input char.
104 if (input_chr
< 0x80) {
107 } else if (unicode_to_gb2312(input_chr
, &r
, &c
)) {
113 if (state
->s0
!= (unsigned)desired_state
) {
115 emit(emitctx
, desired_state ?
'{' : '}');
116 state
->s0
= desired_state
;
120 return TRUE
; /* special case: just reset state */
126 emit(emitctx
, 0x21 + r
);
127 emit(emitctx
, 0x21 + c
);
134 const charset_spec charset_CS_HZ
= {
135 CS_HZ
, read_hz
, write_hz
, NULL
138 #else /* ENUM_CHARSETS */
142 #endif /* ENUM_CHARSETS */