From 49152469924ab8bc1bb6cad6152297fc85e95be1 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 5 Aug 2007 12:50:57 +0000 Subject: [PATCH] Add the ability to pass a NULL output buffer and/or an unlimited output length to charset_{to,from}_unicode, permitting convenient dry-running of conversions to determine the required output length and/or test for the presence of difficult characters. git-svn-id: svn://svn.tartarus.org/sgt/charset@7677 cda61777-01e9-0310-a592-d414129be87e --- charset.h | 14 ++++++++++++++ fromucs.c | 15 ++++++++++----- toucs.c | 15 ++++++++++----- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/charset.h b/charset.h index 545784f..144187f 100644 --- a/charset.h +++ b/charset.h @@ -135,6 +135,13 @@ extern const charset_state charset_init_state; * NULL, `errlen' will be ignored, and the library will choose * something sensible to do on its own. For Unicode, this will be * U+FFFD (REPLACEMENT CHARACTER). + * + * `output' may be NULL, in which case the entire translation will + * be performed in theory (e.g. a dry run to work out how much + * space needs to be allocated for the real thing). `outlen' may + * also be negative, indicating an unlimited buffer length + * (although this is almost certainly unwise if `output' is _not_ + * NULL). */ int charset_to_unicode(const char **input, int *inlen, @@ -165,6 +172,13 @@ int charset_to_unicode(const char **input, int *inlen, * If `input' is NULL, this routine will output the necessary bytes * to reset the encoding state in any way which might be required * at the end of an output piece of text. + * + * `output' may be NULL, in which case the entire translation will + * be performed in theory (e.g. a dry run to work out how much + * space needs to be allocated for the real thing). `outlen' may + * also be negative, indicating an unlimited buffer length + * (although this is almost certainly unwise if `output' is _not_ + * NULL). */ int charset_from_unicode(const wchar_t **input, int *inlen, diff --git a/fromucs.c b/fromucs.c index 8090c19..da1ea64 100644 --- a/fromucs.c +++ b/fromucs.c @@ -8,6 +8,7 @@ struct charset_emit_param { char *output; int outlen; + int writtenlen; int stopped; }; @@ -15,9 +16,12 @@ static void charset_emit(void *ctx, long int output) { struct charset_emit_param *param = (struct charset_emit_param *)ctx; - if (param->outlen > 0) { - *param->output++ = output; - param->outlen--; + if (param->outlen != 0) { + if (param->output) + *param->output++ = output; + if (param->outlen > 0) + param->outlen--; + param->writtenlen++; } else { param->stopped = 1; } @@ -39,6 +43,7 @@ int charset_from_unicode(const wchar_t **input, int *inlen, param.output = output; param.outlen = outlen; + param.writtenlen = 0; param.stopped = 0; if (state) @@ -47,7 +52,7 @@ int charset_from_unicode(const wchar_t **input, int *inlen, *error = FALSE; while (*inlen > 0) { - int lenbefore = param.output - output; + int lenbefore = param.writtenlen; int ret; if (input) @@ -78,5 +83,5 @@ int charset_from_unicode(const wchar_t **input, int *inlen, (*input)++; (*inlen)--; } - return param.output - output; + return param.writtenlen; } diff --git a/toucs.c b/toucs.c index bee98ab..94689f5 100644 --- a/toucs.c +++ b/toucs.c @@ -8,6 +8,7 @@ struct unicode_emit_param { wchar_t *output; int outlen; + int writtenlen; const wchar_t *errstr; int errlen; int stopped; @@ -35,11 +36,14 @@ static void unicode_emit(void *ctx, long int output) outlen = 1; } - if (param->outlen >= outlen) { + if (param->outlen < 0 || param->outlen >= outlen) { while (outlen > 0) { - *param->output++ = *p++; - param->outlen--; + if (param->output) + *param->output++ = *p++; + if (param->outlen > 0) + param->outlen--; outlen--; + param->writtenlen++; } } else { param->stopped = 1; @@ -59,13 +63,14 @@ int charset_to_unicode(const char **input, int *inlen, param.outlen = outlen; param.errstr = errstr; param.errlen = errlen; + param.writtenlen = 0; param.stopped = 0; if (state) localstate = *state; /* structure copy */ while (*inlen > 0) { - int lenbefore = param.output - output; + int lenbefore = param.writtenlen; spec->read(spec, (unsigned char)**input, &localstate, unicode_emit, ¶m); if (param.stopped) { @@ -83,5 +88,5 @@ int charset_to_unicode(const char **input, int *inlen, (*inlen)--; } - return param.output - output; + return param.writtenlen; } -- 2.11.0