| 1 | /* |
| 2 | * toucs.c - convert charsets to Unicode. |
| 3 | */ |
| 4 | |
| 5 | #include "charset.h" |
| 6 | #include "internal.h" |
| 7 | |
| 8 | struct unicode_emit_param { |
| 9 | wchar_t *output; |
| 10 | int outlen; |
| 11 | const wchar_t *errstr; |
| 12 | int errlen; |
| 13 | int stopped; |
| 14 | }; |
| 15 | |
| 16 | static void unicode_emit(void *ctx, long int output) |
| 17 | { |
| 18 | struct unicode_emit_param *param = (struct unicode_emit_param *)ctx; |
| 19 | wchar_t outval; |
| 20 | wchar_t const *p; |
| 21 | int outlen; |
| 22 | |
| 23 | if (output == ERROR) { |
| 24 | if (param->errstr) { |
| 25 | p = param->errstr; |
| 26 | outlen = param->errlen; |
| 27 | } else { |
| 28 | outval = 0xFFFD; /* U+FFFD REPLACEMENT CHARACTER */ |
| 29 | p = &outval; |
| 30 | outlen = 1; |
| 31 | } |
| 32 | } else { |
| 33 | outval = output; |
| 34 | p = &outval; |
| 35 | outlen = 1; |
| 36 | } |
| 37 | |
| 38 | if (param->outlen >= outlen) { |
| 39 | while (outlen > 0) { |
| 40 | *param->output++ = *p++; |
| 41 | param->outlen--; |
| 42 | outlen--; |
| 43 | } |
| 44 | } else { |
| 45 | param->stopped = 1; |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | int charset_to_unicode(const char **input, int *inlen, |
| 50 | wchar_t *output, int outlen, |
| 51 | int charset, charset_state *state, |
| 52 | const wchar_t *errstr, int errlen) |
| 53 | { |
| 54 | charset_spec const *spec = charset_find_spec(charset); |
| 55 | charset_state localstate; |
| 56 | struct unicode_emit_param param; |
| 57 | |
| 58 | param.output = output; |
| 59 | param.outlen = outlen; |
| 60 | param.errstr = errstr; |
| 61 | param.errlen = errlen; |
| 62 | param.stopped = 0; |
| 63 | |
| 64 | if (!state) { |
| 65 | localstate.s0 = 0; |
| 66 | } else { |
| 67 | localstate = *state; /* structure copy */ |
| 68 | } |
| 69 | |
| 70 | while (*inlen > 0) { |
| 71 | int lenbefore = param.output - output; |
| 72 | spec->read(spec, (unsigned char)**input, &localstate, |
| 73 | unicode_emit, ¶m); |
| 74 | if (param.stopped) { |
| 75 | /* |
| 76 | * The emit function has _tried_ to output some |
| 77 | * characters, but ran up against the end of the |
| 78 | * buffer. Leave immediately, and return what happened |
| 79 | * _before_ attempting to process this character. |
| 80 | */ |
| 81 | return lenbefore; |
| 82 | } |
| 83 | if (state) |
| 84 | *state = localstate; /* structure copy */ |
| 85 | (*input)++; |
| 86 | (*inlen)--; |
| 87 | } |
| 88 | |
| 89 | return param.output - output; |
| 90 | } |