X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/charset/blobdiff_plain/8536171f40066d21723672dd8477fc9057cfba7d..53163a60cc595558b83e22af71bf1ec3b1488323:/iso2022.c diff --git a/iso2022.c b/iso2022.c index 0b8ddae..8cf3c25 100644 --- a/iso2022.c +++ b/iso2022.c @@ -2,11 +2,10 @@ * iso2022.c - support for ISO/IEC 2022 (alias ECMA-35). * * This isn't a complete implementation of ISO/IEC 2022, but it's - * close. It only handles decoding, because a fully general encoder - * isn't really useful. It can decode 8-bit and 7-bit versions, with - * support for single-byte and multi-byte character sets, all four - * containers (G0, G1, G2, and G3), using both single-shift and - * locking-shift sequences. + * close. It can decode 8-bit and 7-bit versions, with support for + * single-byte and multi-byte character sets, all four containers + * (G0, G1, G2, and G3), using both single-shift and locking-shift + * sequences. * * The general principle is that any valid ISO/IEC 2022 sequence * should either be correctly decoded or should emit an ERROR. The @@ -22,6 +21,7 @@ #ifndef ENUM_CHARSETS #include +#include #include "charset.h" #include "internal.h" @@ -51,9 +51,42 @@ static int unicode_to_null_dbcs(long int, int *, int *); typedef int (*to_dbcs_t)(long int, int *, int *); typedef int (*to_dbcs_planar_t)(long int, int *, int *, int *); -/* Cast between to_dbcs_planar_t and to_dbcs_t, type-checking first */ -#define DEPLANARISE(x) ( (x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x) ) -#define REPLANARISE(x) ( (x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x) ) +/* + * These macros cast between to_dbcs_planar_t and to_dbcs_t, in + * such a way as to cause a compile-time error if the input is not + * of the appropriate type. + * + * Defining these portably is quite fiddly. My first effort was as + * follows: + * #define DEPLANARISE(x) ( (x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x) ) + * + * so that the comparison on the left of the comma provokes the + * type check error, and the cast on the right is the actual + * desired result. + * + * gcc was entirely happy with this. However, when used in a static + * initialiser, MSVC objected - justifiably - that the first half + * of the comma expression wasn't constant and thus the expression + * as a whole was not a constant expression. We can get round this + * by enclosing the comparison in `sizeof', so that it isn't + * actually evaluated. + * + * But then we run into a second problem, which is that C actually + * disallows the use of the comma operator within a constant + * expression for any purpose at all! Presumably this is on the + * basis that its purpose is to have side effects and constant + * expressions can't; unfortunately, this specific case is one in + * which the desired side effect is a compile-time rather than a + * run-time one. + * + * We are permitted to use ?:, however, and that works quite well + * since the actual result of the sizeof expression _is_ evaluable + * at compile time. So here's my final answer, with the unfortunate + * remaining problem of evaluating its arguments multiple times: + */ +#define TYPECHECK(x,y) ( sizeof((x)) == sizeof((x)) ? (y) : (y) ) +#define DEPLANARISE(x) TYPECHECK((x) == (to_dbcs_planar_t)NULL, (to_dbcs_t)(x)) +#define REPLANARISE(x) TYPECHECK((x) == (to_dbcs_t)NULL, (to_dbcs_planar_t)(x)) /* * Values used in the `enable' field. Each of these identifies a @@ -169,10 +202,15 @@ const struct iso2022_subcharset { static long int null_dbcs_to_unicode(int r, int c) { + UNUSEDARG(r); + UNUSEDARG(c); return ERROR; } static int unicode_to_null_dbcs(long int unicode, int *r, int *c) { + UNUSEDARG(unicode); + UNUSEDARG(r); + UNUSEDARG(c); return 0; /* failed to convert anything */ } @@ -421,7 +459,7 @@ static void docs_ctext(long int input_chr, if (input_chr == 2) state->s0 = (state->s0 & 0xf0000000) | (i << 26) | (0xf << 22); } else if (n != 0xf) { - while (j < lenof(ctext_encodings) && + while ((unsigned)j < lenof(ctext_encodings) && !memcmp(ctext_encodings[j].name, ctext_encodings[oi].name, n)) { if (ctext_encodings[j].name[n] < input_chr) @@ -429,7 +467,7 @@ static void docs_ctext(long int input_chr, else break; } - if (i >= lenof(ctext_encodings) || + if ((unsigned)i >= lenof(ctext_encodings) || memcmp(ctext_encodings[i].name, ctext_encodings[oi].name, n) || ctext_encodings[i].name[n] != input_chr) { @@ -453,7 +491,7 @@ static void docs_ctext(long int input_chr, assert(i < 4 && n < 16); state->s0 = (state->s0 & 0xf0000000) | (i << 26) | (n << 22); } else { - if (i >= lenof(ctext_encodings)) + if ((unsigned)i >= lenof(ctext_encodings)) emit(emitctx, ERROR); else { charset_state substate; @@ -503,9 +541,9 @@ static void read_iso2022(charset_spec const *charset, long int input_chr, #define LEFT 30 #define RIGHT 28 #define LOCKING_SHIFT(n,side) \ - (state->s1 = (state->s1 & ~(3L<<(side))) | ((n ## L)<<(side))) -#define MODE ((state->s0 & 0xe0000000L) >> 29) -#define ENTER_MODE(m) (state->s0 = (state->s0 & ~0xe0000000L) | ((m)<<29)) + (state->s1 = (state->s1 & ~(3UL<<(side))) | ((n ## UL)<<(side))) +#define MODE ((state->s0 & 0xe0000000UL) >> 29) +#define ENTER_MODE(m) (state->s0 = (state->s0 & ~0xe0000000UL) | ((unsigned long)(m)<<29)) #define SINGLE_SHIFT(n) ENTER_MODE(SS2CHAR - 2 + (n)) #define ASSERT_IDLE do { \ if (state->s0 != 0) emit(emitctx, ERROR); \ @@ -776,7 +814,7 @@ static void oselect(charset_state *state, int i, int right, int shift = (right ? 31-7 : 31-7-7); struct iso2022_subcharset const *subcs = &iso2022_subcharsets[i]; - if (((state->s1 >> shift) & 0x7F) != i) { + if (((state->s1 >> shift) & 0x7F) != (unsigned)i) { state->s1 &= ~(0x7FL << shift); state->s1 |= (i << shift); @@ -975,7 +1013,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, /* * Start with US-ASCII in GL and also in GR. */ - for (i = 0; i < lenof(iso2022_subcharsets); i++) { + for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) { subcs = &iso2022_subcharsets[i]; if (subcs->type == mode->ltype && subcs->i == mode->li && @@ -994,7 +1032,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, */ docs_char(state, emit, emitctx, -2, NULL, 0); /* leave DOCS */ - for (i = 0; i < lenof(iso2022_subcharsets); i++) { + for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) { subcs = &iso2022_subcharsets[i]; if (subcs->type == mode->ltype && subcs->i == mode->li && @@ -1021,7 +1059,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, * Analyse the input character and work out which subcharset it * belongs to. */ - for (i = 0; i < lenof(iso2022_subcharsets); i++) { + for (i = 0; (unsigned)i < lenof(iso2022_subcharsets); i++) { subcs = &iso2022_subcharsets[i]; if (!(mode->enable_mask & (1 << subcs->enable))) continue; /* this charset is disabled */ @@ -1065,7 +1103,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, } } - if (i < lenof(iso2022_subcharsets)) { + if ((unsigned)i < lenof(iso2022_subcharsets)) { int right; /* @@ -1129,7 +1167,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, cs = -2; /* means failure */ - for (i = 0; i <= lenof(ctext_encodings); i++) { + for (i = 0; (unsigned)i <= lenof(ctext_encodings); i++) { charset_state substate; charset_spec const *subcs = ctext_encodings[i].subcs; @@ -1140,7 +1178,7 @@ static int write_iso2022(charset_spec const *charset, long int input_chr, substate.s1 = substate.s0 = 0; p = data; - if (i < lenof(ctext_encodings)) { + if ((unsigned)i < lenof(ctext_encodings)) { if ((mode->enable_mask & (1 << ctext_encodings[i].enable)) && subcs->write(subcs, input_chr, &substate, write_to_pointer, &p)) {