2 * shiftjis.c - multibyte encoding of Shift-JIS
11 * Shift-JIS has no associated data, so `charset' may be ignored.
14 static void read_sjis(charset_spec
const *charset
, long int input_chr
,
16 void (*emit
)(void *ctx
, long int output
), void *emitctx
)
21 * For reading Shift-JIS, state->s0 simply contains the single
22 * stored lead byte when we are half way through a double-byte
23 * character, or 0 if we aren't.
27 if ((input_chr
>= 0x81 && input_chr
<= 0x9F) ||
28 (input_chr
>= 0xE0 && input_chr
<= 0xEF)) {
30 * Lead byte. Just store it.
32 state
->s0
= input_chr
;
35 * Anything else we translate through JIS X 0201.
37 if (input_chr
== 0x5C)
39 else if (input_chr
== 0x7E)
41 else if (input_chr
>= 0xA1 && input_chr
<= 0xDF)
42 input_chr
+= 0xFF61 - 0xA1;
43 else if (input_chr
< 0x80)
47 emit(emitctx
, input_chr
);
51 * We have a stored lead byte. We expect a valid followup
54 if (input_chr
>= 0x40 && input_chr
<= 0xFC && input_chr
!= 0x7F) {
57 if (r
>= 0xE0) r
-= (0xE0 - 0xA0);
65 emit(emitctx
, jisx0208_to_unicode(r
, c
));
74 * Shift-JIS is a stateless multi-byte encoding (in the sense that
75 * just after any character has been completed, the state is always
76 * the same); hence when writing it, there is no need to use the
80 static int write_sjis(charset_spec
const *charset
, long int input_chr
,
82 void (*emit
)(void *ctx
, long int output
), void *emitctx
)
88 return TRUE
; /* stateless; no cleanup required */
90 if (input_chr
< 0x80 && input_chr
!= 0x5C && input_chr
!= 0x7E) {
91 emit(emitctx
, input_chr
);
93 } else if (input_chr
== 0xA5) {
96 } else if (input_chr
== 0x203E) {
99 } else if (input_chr
>= 0xFF61 && input_chr
<= 0xFF9F) {
100 emit(emitctx
, input_chr
- (0xFF61 - 0xA1));
104 if (unicode_to_jisx0208(input_chr
, &r
, &c
)) {
108 if (r
>= 0xA0) r
+= 0xE0 - 0xA0;
120 const charset_spec charset_CS_SHIFT_JIS
= {
121 CS_SHIFT_JIS
, read_sjis
, write_sjis
, NULL
124 #else /* ENUM_CHARSETS */
126 ENUM_CHARSET(CS_SHIFT_JIS
)
128 #endif /* ENUM_CHARSETS */