From 49152469924ab8bc1bb6cad6152297fc85e95be1 Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Sun, 5 Aug 2007 12:50:57 +0000
Subject: [PATCH] Add the ability to pass a NULL output buffer and/or an
 unlimited output length to charset_{to,from}_unicode, permitting convenient
 dry-running of conversions to determine the required output length and/or
 test for the presence of difficult characters.

git-svn-id: svn://svn.tartarus.org/sgt/charset@7677 cda61777-01e9-0310-a592-d414129be87e
---
 charset.h | 14 ++++++++++++++
 fromucs.c | 15 ++++++++++-----
 toucs.c   | 15 ++++++++++-----
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/charset.h b/charset.h
index 545784f..144187f 100644
--- a/charset.h
+++ b/charset.h
@@ -135,6 +135,13 @@ extern const charset_state charset_init_state;
  * NULL, `errlen' will be ignored, and the library will choose
  * something sensible to do on its own. For Unicode, this will be
  * U+FFFD (REPLACEMENT CHARACTER).
+ * 
+ * `output' may be NULL, in which case the entire translation will
+ * be performed in theory (e.g. a dry run to work out how much
+ * space needs to be allocated for the real thing). `outlen' may
+ * also be negative, indicating an unlimited buffer length
+ * (although this is almost certainly unwise if `output' is _not_
+ * NULL).
  */
 
 int charset_to_unicode(const char **input, int *inlen,
@@ -165,6 +172,13 @@ int charset_to_unicode(const char **input, int *inlen,
  * If `input' is NULL, this routine will output the necessary bytes
  * to reset the encoding state in any way which might be required
  * at the end of an output piece of text.
+ * 
+ * `output' may be NULL, in which case the entire translation will
+ * be performed in theory (e.g. a dry run to work out how much
+ * space needs to be allocated for the real thing). `outlen' may
+ * also be negative, indicating an unlimited buffer length
+ * (although this is almost certainly unwise if `output' is _not_
+ * NULL).
  */
 
 int charset_from_unicode(const wchar_t **input, int *inlen,
diff --git a/fromucs.c b/fromucs.c
index 8090c19..da1ea64 100644
--- a/fromucs.c
+++ b/fromucs.c
@@ -8,6 +8,7 @@
 struct charset_emit_param {
     char *output;
     int outlen;
+    int writtenlen;
     int stopped;
 };
 
@@ -15,9 +16,12 @@ static void charset_emit(void *ctx, long int output)
 {
     struct charset_emit_param *param = (struct charset_emit_param *)ctx;
 
-    if (param->outlen > 0) {
-	*param->output++ = output;
-	param->outlen--;
+    if (param->outlen != 0) {
+	if (param->output)
+	    *param->output++ = output;
+	if (param->outlen > 0)
+	    param->outlen--;
+	param->writtenlen++;
     } else {
 	param->stopped = 1;
     }
@@ -39,6 +43,7 @@ int charset_from_unicode(const wchar_t **input, int *inlen,
 
     param.output = output;
     param.outlen = outlen;
+    param.writtenlen = 0;
     param.stopped = 0;
 
     if (state)
@@ -47,7 +52,7 @@ int charset_from_unicode(const wchar_t **input, int *inlen,
 	*error = FALSE;
 
     while (*inlen > 0) {
-	int lenbefore = param.output - output;
+	int lenbefore = param.writtenlen;
 	int ret;
 
 	if (input)
@@ -78,5 +83,5 @@ int charset_from_unicode(const wchar_t **input, int *inlen,
 	    (*input)++;
 	(*inlen)--;
     }
-    return param.output - output;
+    return param.writtenlen;
 }
diff --git a/toucs.c b/toucs.c
index bee98ab..94689f5 100644
--- a/toucs.c
+++ b/toucs.c
@@ -8,6 +8,7 @@
 struct unicode_emit_param {
     wchar_t *output;
     int outlen;
+    int writtenlen;
     const wchar_t *errstr;
     int errlen;
     int stopped;
@@ -35,11 +36,14 @@ static void unicode_emit(void *ctx, long int output)
 	outlen = 1;
     }
 
-    if (param->outlen >= outlen) {
+    if (param->outlen < 0 || param->outlen >= outlen) {
 	while (outlen > 0) {
-	    *param->output++ = *p++;
-	    param->outlen--;
+	    if (param->output)
+		*param->output++ = *p++;
+	    if (param->outlen > 0)
+		param->outlen--;
 	    outlen--;
+	    param->writtenlen++;
 	}
     } else {
 	param->stopped = 1;
@@ -59,13 +63,14 @@ int charset_to_unicode(const char **input, int *inlen,
     param.outlen = outlen;
     param.errstr = errstr;
     param.errlen = errlen;
+    param.writtenlen = 0;
     param.stopped = 0;
 
     if (state)
 	localstate = *state;	       /* structure copy */
 
     while (*inlen > 0) {
-	int lenbefore = param.output - output;
+	int lenbefore = param.writtenlen;
 	spec->read(spec, (unsigned char)**input, &localstate,
 		   unicode_emit, &param);
 	if (param.stopped) {
@@ -83,5 +88,5 @@ int charset_to_unicode(const char **input, int *inlen,
 	(*inlen)--;
     }
 
-    return param.output - output;
+    return param.writtenlen;
 }
-- 
2.11.0