X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/charset/blobdiff_plain/8536171f40066d21723672dd8477fc9057cfba7d..53163a60cc595558b83e22af71bf1ec3b1488323:/iso2022.c

diff --git a/iso2022.c b/iso2022.c
index 0b8ddae..8cf3c25 100644
--- a/iso2022.c
+++ b/iso2022.c
@@ -2,11 +2,10 @@
  * iso2022.c - support for ISO/IEC 2022 (alias ECMA-35).
  *
  * This isn't a complete implementation of ISO/IEC 2022, but it's
- * close.  It only handles decoding, because a fully general encoder
- * isn't really useful.  It can decode 8-bit and 7-bit versions, with
- * support for single-byte and multi-byte character sets, all four
- * containers (G0, G1, G2, and G3), using both single-shift and
- * locking-shift sequences.
+ * close.  It can decode 8-bit and 7-bit versions, with support for
+ * single-byte and multi-byte character sets, all four containers
+ * (G0, G1, G2, and G3), using both single-shift and locking-shift
+ * sequences.
  *
  * The general principle is that any valid ISO/IEC 2022 sequence
  * should either be correctly decoded or should emit an ERROR.  The
@@ -22,6 +21,7 @@
 #ifndef ENUM_CHARSETS
 
 #include <assert.h>
+#include <string.h>
 
 #include "charset.h"
 #include "internal.h"
@@ -51,9 +51,42 @@ static int unicode_to_null_dbcs(long int, int *, int *);
 typedef int (*to_dbcs_t)(long int, int *, int *);
 typedef int (*to_dbcs_planar_t)(long int, int *, int *, int *);
 
-/* Cast between to_dbcs_planar_t and to_dbcs_t, type-checking first */
-#define DEPLANARISE(x) ( (x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x) )
-#define REPLANARISE(x) ( (x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x) )
+/*
+ * These macros cast between to_dbcs_planar_t and to_dbcs_t, in
+ * such a way as to cause a compile-time error if the input is not
+ * of the appropriate type.
+ * 
+ * Defining these portably is quite fiddly. My first effort was as
+ * follows:
+ *   #define DEPLANARISE(x) ( (x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x) )
+ * 
+ * so that the comparison on the left of the comma provokes the
+ * type check error, and the cast on the right is the actual
+ * desired result.
+ * 
+ * gcc was entirely happy with this. However, when used in a static
+ * initialiser, MSVC objected - justifiably - that the first half
+ * of the comma expression wasn't constant and thus the expression
+ * as a whole was not a constant expression. We can get round this
+ * by enclosing the comparison in `sizeof', so that it isn't
+ * actually evaluated.
+ * 
+ * But then we run into a second problem, which is that C actually
+ * disallows the use of the comma operator within a constant
+ * expression for any purpose at all! Presumably this is on the
+ * basis that its purpose is to have side effects and constant
+ * expressions can't; unfortunately, this specific case is one in
+ * which the desired side effect is a compile-time rather than a
+ * run-time one.
+ * 
+ * We are permitted to use ?:, however, and that works quite well
+ * since the actual result of the sizeof expression _is_ evaluable
+ * at compile time. So here's my final answer, with the unfortunate
+ * remaining problem of evaluating its arguments multiple times:
+ */
+#define TYPECHECK(x,y) ( sizeof((x)) == sizeof((x)) ? (y) : (y) )
+#define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x))
+#define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x))
 
 /*
  * Values used in the `enable' field. Each of these identifies a
@@ -169,10 +202,15 @@ const struct iso2022_subcharset {
 
 static long int null_dbcs_to_unicode(int r, int c)
 {
+    UNUSEDARG(r);
+    UNUSEDARG(c);
     return ERROR;
 }
 static int unicode_to_null_dbcs(long int unicode, int *r, int *c)
 {
+    UNUSEDARG(unicode);
+    UNUSEDARG(r);
+    UNUSEDARG(c);
     return 0;			       /* failed to convert anything */
 }
 
@@ -421,7 +459,7 @@ static void docs_ctext(long int input_chr,
 	if (input_chr == 2)
 	    state->s0 = (state->s0 & 0xf0000000) | (i << 26) | (0xf << 22);
     } else if (n != 0xf) {
-	while (j < lenof(ctext_encodings) &&
+	while ((unsigned)j < lenof(ctext_encodings) &&
 	       !memcmp(ctext_encodings[j].name,
 		       ctext_encodings[oi].name, n)) {
 	    if (ctext_encodings[j].name[n] < input_chr)
@@ -429,7 +467,7 @@ static void docs_ctext(long int input_chr,
 	    else
 		break;
 	}
-	if (i >= lenof(ctext_encodings) ||
+	if ((unsigned)i >= lenof(ctext_encodings) ||
 	    memcmp(ctext_encodings[i].name,
 		   ctext_encodings[oi].name, n) ||
 	    ctext_encodings[i].name[n] != input_chr) {
@@ -453,7 +491,7 @@ static void docs_ctext(long int input_chr,
 	assert(i < 4 && n < 16);
 	state->s0 = (state->s0 & 0xf0000000) | (i << 26) | (n << 22);
     } else {
-	if (i >= lenof(ctext_encodings))
+	if ((unsigned)i >= lenof(ctext_encodings))
 	    emit(emitctx, ERROR);
 	else {
 	    charset_state substate;
@@ -503,9 +541,9 @@ static void read_iso2022(charset_spec const *charset, long int input_chr,
 #define LEFT 30
 #define RIGHT 28
 #define LOCKING_SHIFT(n,side) \
-	(state->s1 = (state->s1 & ~(3L<<(side))) | ((n ## L)<<(side)))
-#define MODE ((state->s0 & 0xe0000000L) >> 29)
-#define ENTER_MODE(m) (state->s0 = (state->s0 & ~0xe0000000L) | ((m)<<29))
+	(state->s1 = (state->s1 & ~(3UL<<(side))) | ((n ## UL)<<(side)))
+#define MODE ((state->s0 & 0xe0000000UL) >> 29)
+#define ENTER_MODE(m) (state->s0 = (state->s0 & ~0xe0000000UL) | ((unsigned long)(m)<<29))
 #define SINGLE_SHIFT(n) ENTER_MODE(SS2CHAR - 2 + (n))
 #define ASSERT_IDLE do {						\
 	if (state->s0 != 0) emit(emitctx, ERROR);			\
@@ -776,7 +814,7 @@ static void oselect(charset_state *state, int i, int right,
     int shift = (right ? 31-7 : 31-7-7);
     struct iso2022_subcharset const *subcs = &iso2022_subcharsets[i];
 
-    if (((state->s1 >> shift) & 0x7F) != i) {
+    if (((state->s1 >> shift) & 0x7F) != (unsigned)i) {
 	state->s1 &= ~(0x7FL << shift);
 	state->s1 |= (i << shift);
 
@@ -975,7 +1013,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
 	/*
 	 * Start with US-ASCII in GL and also in GR.
 	 */
-	for (i = 0; i < lenof(iso2022_subcharsets); i++) {
+	for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) {
 	    subcs = &iso2022_subcharsets[i];
 	    if (subcs->type == mode->ltype &&
 		subcs->i == mode->li &&
@@ -994,7 +1032,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
 	 */
 	docs_char(state, emit, emitctx, -2, NULL, 0);   /* leave DOCS */
 
-	for (i = 0; i < lenof(iso2022_subcharsets); i++) {
+	for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) {
 	    subcs = &iso2022_subcharsets[i];
 	    if (subcs->type == mode->ltype &&
 		subcs->i == mode->li &&
@@ -1021,7 +1059,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
      * Analyse the input character and work out which subcharset it
      * belongs to.
      */
-    for (i = 0; i < lenof(iso2022_subcharsets); i++) {
+    for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) {
 	subcs = &iso2022_subcharsets[i];
 	if (!(mode->enable_mask & (1 << subcs->enable)))
 	    continue;		       /* this charset is disabled */
@@ -1065,7 +1103,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
 	}
     }
 
-    if (i < lenof(iso2022_subcharsets)) {
+    if ((unsigned)i < lenof(iso2022_subcharsets)) {
 	int right;
 
 	/*
@@ -1129,7 +1167,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
 
 	cs = -2;		       /* means failure */
 
-	for (i = 0; i <= lenof(ctext_encodings); i++) {
+	for (i = 0; (unsigned)i <= lenof(ctext_encodings); i++) {
 	    charset_state substate;
 	    charset_spec const *subcs = ctext_encodings[i].subcs;
 
@@ -1140,7 +1178,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr,
 	    substate.s1 = substate.s0 = 0;
 	    p = data;
 
-	    if (i < lenof(ctext_encodings)) {
+	    if ((unsigned)i < lenof(ctext_encodings)) {
 		if ((mode->enable_mask & (1 << ctext_encodings[i].enable)) &&
 		    subcs->write(subcs, input_chr, &substate,
 				 write_to_pointer, &p)) {