Silly of me to overlook it: another obvious way you might like to
[sgt/charset] / cp949.c
CommitLineData
c6d25d8d 1/*
2 * cp949.c - CP949 / KS_C_5601_1987 multibyte encoding
3 */
4
5#ifndef ENUM_CHARSETS
6
7#include "charset.h"
8#include "internal.h"
9
10/*
11 * CP949 has no associated data, so `charset' may be ignored.
12 */
13
14static void read_cp949(charset_spec const *charset, long int input_chr,
15 charset_state *state,
16 void (*emit)(void *ctx, long int output), void *emitctx)
17{
18 UNUSEDARG(charset);
19
20 /*
21 * For reading CP949, state->s0 simply contains the single
22 * stored lead byte when we are half way through a double-byte
23 * character, or 0 if we aren't.
24 */
25
26 if (state->s0 == 0) {
27 if (input_chr >= 0x81 && input_chr <= 0xFE) {
28 /*
29 * Lead byte. Just store it.
30 */
31 state->s0 = input_chr;
32 } else {
33 /*
34 * Anything else we pass straight through unchanged.
35 */
36 emit(emitctx, input_chr);
37 }
38 } else {
39 /*
40 * We have a stored lead byte. We expect a valid followup
41 * byte.
42 */
43 if ((input_chr >= 0x40 && input_chr <= 0xFF)) {
44 emit(emitctx, cp949_to_unicode(state->s0 - 0x80,
45 input_chr - 0x40));
46 } else {
47 emit(emitctx, ERROR);
48 }
49 state->s0 = 0;
50 }
51}
52
53/*
54 * CP949 is a stateless multi-byte encoding (in the sense that just
55 * after any character has been completed, the state is always the
56 * same); hence when writing it, there is no need to use the
57 * charset_state.
58 */
59
60static int write_cp949(charset_spec const *charset, long int input_chr,
61 charset_state *state,
62 void (*emit)(void *ctx, long int output),
63 void *emitctx)
64{
65 UNUSEDARG(charset);
66 UNUSEDARG(state);
67
68 if (input_chr == -1)
69 return TRUE; /* stateless; no cleanup required */
70
71 if (input_chr < 0x80) {
72 emit(emitctx, input_chr);
73 return TRUE;
74 } else {
75 int r, c;
76 if (unicode_to_cp949(input_chr, &r, &c)) {
77 emit(emitctx, r + 0x80);
78 emit(emitctx, c + 0x40);
79 return TRUE;
80 } else {
81 return FALSE;
82 }
83 }
84}
85
86const charset_spec charset_CS_CP949 = {
87 CS_CP949, read_cp949, write_cp949, NULL
88};
89
90#else /* ENUM_CHARSETS */
91
92ENUM_CHARSET(CS_CP949)
93
94#endif /* ENUM_CHARSETS */