+/* --- @eqish_floating_p@ --- *
+ *
+ * Arguments: @double x, y@ = two numbers to compare
+ * @const struct tvec_floatinfo *fi@ = floating-point info
+ *
+ * Returns: Nonzero if the comparand @x@ is sufficiently close to the
+ * reference @y@, or zero if it's definitely different.
+ */
+
+static int eqish_floating_p(double x, double y,
+ const struct tvec_floatinfo *fi)
+{
+ double t;
+
+ if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
+ if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
+
+ switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
+ case TVFF_EXACT:
+ return (x == y && NEGP(x) == NEGP(y));
+ case TVFF_ABSDELTA:
+ t = x - y; if (t < 0) t = -t; return (t < fi->delta);
+ case TVFF_RELDELTA:
+ t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
+ default:
+ abort();
+ }
+}
+
+/* --- @format_floating@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *gops@ = print operations
+ * @void *go@ = print destination
+ * @double x@ = number to print
+ *
+ * Returns: ---
+ *
+ * Use: Print a floating-point number, accurately.
+ */
+
+static void format_floating(const struct gprintf_ops *gops, void *go,
+ double x)
+{
+ int prec;
+
+ if (NANP(x))
+ gprintf(gops, go, "#nan");
+ else if (INFP(x))
+ gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
+ else {
+ /* Ugh. C doesn't provide any function for just printing a
+ * floating-point number /correctly/, i.e., so that you can read the
+ * result back and recover the number you first thought of. There are
+ * complicated algorithms published for doing this, but I really don't
+ * want to get into that here. So we have this.
+ *
+ * The sign doesn't cause significant difficulty so we're going to ignore
+ * it for now. So suppose we're given a number %$x = f b^e$%, in
+ * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
+ * %$0 \le f < 1$%. We're going to convert it into the nearest integer
+ * of the form %$X = F B^E$%, with similar conditions, only with the
+ * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
+ * or %$F \ge B^{-N}$%.
+ *
+ * We're rounding to the nearest such %$X$%. If there is to be ambiguity
+ * in the conversion, then some %$x = f b^e$% and the next smallest
+ * representable number %$x' = x + b^{e-n}$% must both map to the same
+ * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
+ * any other number representable in the target system. The nest larger
+ * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
+ * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
+ * is actually %$X - B^{E-N-1}$%. We ignore this latter possibility in
+ * the pursuit of a conservative estimate (though actually it doesn't
+ * matter).
+ *
+ * If both %$x$% and %$x'$% map to %$X$% then we must have
+ * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
+ * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
+ * and secondly %$b^{e-n} \le B^{E-N}$%. Since these inequalities are in
+ * opposite senses, we can divide, giving
+ *
+ * %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
+ *
+ * whence
+ *
+ * %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
+ *
+ * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
+ * possible, it must be the case that
+ *
+ * %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
+ *
+ * Then rearrange and take logarithms, obtaining
+ *
+ * %$(N - 1) \log B < n \log b$% ,
+ *
+ * and so
+ *
+ * %$N < n \log b/\log B + 1$% .
+ *
+ * Recall that this is a necessary condition for a collision to occur; we
+ * are therefore safe whenever
+ *
+ * %$N \ge n \log b/\log B + 1$% ;
+ *
+ * so, taking ceilings,
+ *
+ * %$N \ge \lceil n \log b/\log B \rceil + 1$% .
+ *
+ * So that's why we have this.
+ *
+ * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that
+ * we can calculate this without ending up on the wrong side of an
+ * integer boundary.
+ *
+ * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only
+ * as a constant. Except that modern compilers are more than clever
+ * enough to work out that this is a constant anyway.
+ *
+ * This is sometimes an overestimate: we'll print out meaningless digits
+ * that don't represent anything we actually know about the number in
+ * question. To fix that, we'd need a complicated algorithm like Steele
+ * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
+ * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
+ * something else in difficult situations).
+ */
+
+ prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
+ gprintf(gops, go, "%.*g", prec, x);
+ }
+}
+
+/* --- @parse_floating@ --- *
+ *
+ * Arguments: @double *x_out@ = where to put the result
+ * @const char *q_out@ = where to leave end pointer, or null
+ * @const char *p@ = string to parse
+ * @const struct tvec_floatinfo *fi@ = floating-point info
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Parse a floating-point number from a string. Reports any
+ * necessary errors. If @q_out@ is not null then trailing
+ * material is permitted and a pointer to it (or the end of the
+ * string) is left in @*q_out@.
+ */
+
+static int parse_floating(double *x_out, const char **q_out, const char *p,
+ const struct tvec_floatinfo *fi,
+ struct tvec_state *tv)
+{
+ const char *pp; char *q;
+ dstr d = DSTR_INIT;
+ double x;
+ int olderr, rc;
+
+ /* Check for special tokens. */
+ if (STRCMP(p, ==, "#nan")) {
+#ifdef NAN
+ if (q_out) *q_out = p + strlen(p);
+ x = NAN; rc = 0;
+#else
+ tvec_error(tv, "NaN not supported on this system");
+ rc = -1; goto end;
+#endif
+ }
+
+ else if (STRCMP(p, ==, "#inf") ||
+ STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
+#ifdef INFINITY
+ if (q_out) *q_out = p + strlen(p);
+ x = INFINITY; rc = 0;
+#else
+ tvec_error(tv, "infinity not supported on this system");
+ rc = -1; goto end;
+#endif
+ }
+
+ else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
+#ifdef INFINITY
+ if (q_out) *q_out = p + strlen(p);
+ x = -INFINITY; rc = 0;
+#else
+ tvec_error(tv, "infinity not supported on this system");
+ rc = -1; goto end;
+#endif
+ }
+
+ /* Check that this looks like a number, so we can exclude `strtod'
+ * recognizing its own non-finite number tokens.
+ */
+ else {
+ pp = p;
+ if (*pp == '+' || *pp == '-') pp++;
+ if (*pp == '.') pp++;
+ if (!ISDIGIT(*pp)) {
+ tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
+ rc = -1; goto end;
+ }
+
+ /* Parse the number using the system parser. */
+ olderr = errno; errno = 0;
+ x = strtod(p, &q);
+ if (q_out) *q_out = q;
+ else if (*q) { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; }
+ if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
+ tvec_error(tv, "invalid floating-point number `%.*s': %s",
+ (int)(q - p), p, strerror(errno));
+ rc = -1; goto end;
+ }
+ errno = olderr;
+ }
+
+ /* Check that the number is acceptable. */
+ if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
+ tvec_error(tv, "#nan not allowed here");
+ rc = -1; goto end;
+ }
+
+ if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
+ (!(fi->f&TVFF_NOMAX) && x > fi->max))) {
+ dstr_puts(&d, "floating-point number ");
+ format_floating(&dstr_printops, &d, x);
+ dstr_puts(&d, " out of range (must be in ");
+ if (fi->f&TVFF_NOMIN)
+ dstr_puts(&d, "(#-inf");
+ else
+ { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
+ dstr_puts(&d, " .. ");
+ if (fi->f&TVFF_NOMAX)
+ dstr_puts(&d, "#+inf)");
+ else
+ { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
+ dstr_putc(&d, ')'); dstr_putz(&d);
+ tvec_error(tv, "%s", d.buf); rc = -1; goto end;
+ }
+
+ /* All done. */
+ *x_out = x; rc = 0;
+end:
+ dstr_destroy(&d);
+ return (rc);
+}
+
+/*----- String utilities --------------------------------------------------*/
+
+/* Special character name table. */
+static const struct chartab {
+ const char *name; /* character name */
+ int ch; /* character value */
+ unsigned f; /* flags: */
+#define CTF_PREFER 1u /* preferred name */
+#define CTF_SHORT 2u /* short name (compact style) */
+} chartab[] = {
+ { "#eof", EOF, CTF_PREFER | CTF_SHORT },
+ { "#nul", '\0', CTF_PREFER },
+ { "#bell", '\a', CTF_PREFER },
+ { "#ding", '\a', 0 },
+ { "#bel", '\a', CTF_SHORT },
+ { "#backspace", '\b', CTF_PREFER },
+ { "#bs", '\b', CTF_SHORT },
+ { "#escape", '\x1b', CTF_PREFER },
+ { "#esc", '\x1b', CTF_SHORT },
+ { "#formfeed", '\f', CTF_PREFER },
+ { "#ff", '\f', CTF_SHORT },
+ { "#newline", '\n', CTF_PREFER },
+ { "#linefeed", '\n', 0 },
+ { "#lf", '\n', CTF_SHORT },
+ { "#nl", '\n', 0 },
+ { "#return", '\r', CTF_PREFER },
+ { "#carriage-return", '\r', 0 },
+ { "#cr", '\r', CTF_SHORT },
+ { "#tab", '\t', CTF_PREFER | CTF_SHORT },
+ { "#horizontal-tab", '\t', 0 },
+ { "#ht", '\t', 0 },
+ { "#vertical-tab", '\v', CTF_PREFER },
+ { "#vt", '\v', CTF_SHORT },
+ { "#space", ' ', 0 },
+ { "#spc", ' ', CTF_SHORT },
+ { "#delete", '\x7f', CTF_PREFER },
+ { "#del", '\x7f', CTF_SHORT },
+ { 0, 0, 0 }
+};
+
+/* --- @find_charname@ --- *
+ *
+ * Arguments: @int ch@ = character to match
+ * @unsigned f@ = flags (@CTF_...@) to match
+ *
+ * Returns: The name of the character, or null if no match is found.
+ *
+ * Use: Looks up a name for a character. Specifically, it returns
+ * the first entry in the @chartab@ table which matches @ch@ and
+ * which has one of the flags @f@ set.
+ */
+
+static const char *find_charname(int ch, unsigned f)
+{
+ const struct chartab *ct;
+
+ for (ct = chartab; ct->name; ct++)
+ if (ct->ch == ch && (ct->f&f)) return (ct->name);
+ return (0);
+}
+
+/* --- @read_charname@ --- *
+ *
+ * Arguments: @int *ch_out@ = where to put the character
+ * @const char *p@ = character name
+ * @unsigned f@ = flags (@TCF_...@)
+ *
+ * Returns: Zero if a match was found, @-1@ if not.
+ *
+ * Use: Looks up a character by name. If @RCF_EOFOK@ is set in @f@,
+ * then the @EOF@ marker can be matched; otherwise it can't.
+ */
+
+#define RCF_EOFOK 1u
+static int read_charname(int *ch_out, const char *p, unsigned f)
+{
+ const struct chartab *ct;
+
+ for (ct = chartab; ct->name; ct++)
+ if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
+ { *ch_out = ct->ch; return (0); }
+ return (-1);
+}
+
+/* --- @format_charesc@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *gops@ = print operations
+ * @void *go@ = print destination
+ * @int ch@ = character to format
+ * @unsigned f@ = flags (@FCF_...@)
+ *
+ * Returns: ---
+ *
+ * Use: Format a character as an escape sequence, possibly as part of
+ * a larger string. If @FCF_BRACE@ is set in @f@, then put
+ * braces around a `\x...' code, so that it's suitable for use
+ * in a longer string.
+ */
+
+#define FCF_BRACE 1u
+static void format_charesc(const struct gprintf_ops *gops, void *go,
+ int ch, unsigned f)
+{
+ switch (ch) {
+ case '\a': gprintf(gops, go, "\\a"); break;
+ case '\b': gprintf(gops, go, "\\b"); break;
+ case '\x1b': gprintf(gops, go, "\\e"); break;
+ case '\f': gprintf(gops, go, "\\f"); break;
+ case '\r': gprintf(gops, go, "\\r"); break;
+ case '\n': gprintf(gops, go, "\\n"); break;
+ case '\t': gprintf(gops, go, "\\t"); break;
+ case '\v': gprintf(gops, go, "\\v"); break;
+ case '\\': gprintf(gops, go, "\\\\"); break;
+ case '\'': gprintf(gops, go, "\\'"); break;
+ case '\0':
+ if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
+ else gprintf(gops, go, "\\0");
+ break;
+ default:
+ if (f&FCF_BRACE)
+ gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
+ else
+ gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
+ break;
+ }
+}
+
+/* --- @format_char@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *gops@ = print operations
+ * @void *go@ = print destination
+ * @int ch@ = character to format
+ *
+ * Returns: ---
+ *
+ * Use: Format a single character.
+ */
+
+static void format_char(const struct gprintf_ops *gops, void *go, int ch)
+{
+ switch (ch) {
+ case '\\': case '\'': escape:
+ gprintf(gops, go, "'");
+ format_charesc(gops, go, ch, 0);
+ gprintf(gops, go, "'");
+ break;
+ default:
+ if (!isprint(ch)) goto escape;
+ gprintf(gops, go, "'%c'", ch);
+ break;
+ }
+}
+
+/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
+ *
+ * Arguments: @const struct gprintf_ops *gops@ = print operations
+ * @void *go@ = print destination
+ * @unsigned long u@ or @long i@ = an integer
+ *
+ * Returns: ---
+ *
+ * Use: Format a (signed or unsigned) integer as a character, if it's
+ * in range, printing something like `= 'q''. It's assumed that
+ * a comment marker has already been output.
+ */
+
+static void maybe_format_unsigned_char
+ (const struct gprintf_ops *gops, void *go, unsigned long u)
+{
+ const char *p;
+
+ p = find_charname(u, CTF_PREFER);
+ if (p) gprintf(gops, go, " = %s", p);
+ if (u < UCHAR_MAX)
+ { gprintf(gops, go, " = "); format_char(gops, go, u); }
+}
+
+static void maybe_format_signed_char
+ (const struct gprintf_ops *gops, void *go, long i)
+{
+ const char *p;
+
+ p = find_charname(i, CTF_PREFER);
+ if (p) gprintf(gops, go, " = %s", p);
+ if (0 <= i && i < UCHAR_MAX)
+ { gprintf(gops, go, " = "); format_char(gops, go, i); }
+}
+
+/* --- @read_charesc@ --- *
+ *
+ * Arguments: @int *ch_out@ = where to put the result
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Parse and convert an escape sequence from @tv@'s input
+ * stream, assuming that the initial `\' has already been read.
+ * Reports errors as appropriate.
+ */
+
+static int read_charesc(int *ch_out, struct tvec_state *tv)
+{
+ int ch, i, esc;
+ unsigned f = 0;
+#define f_brace 1u
+
+ ch = getc(tv->fp);
+ switch (ch) {
+
+ /* Things we shouldn't find. */
+ case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
+
+ /* Single-character escapes. */
+ case '\'': *ch_out = '\''; break;
+ case '\\': *ch_out = '\\'; break;
+ case '"': *ch_out = '"'; break;
+ case 'a': *ch_out = '\a'; break;
+ case 'b': *ch_out = '\b'; break;
+ case 'e': *ch_out = '\x1b'; break;
+ case 'f': *ch_out = '\f'; break;
+ case 'n': *ch_out = '\n'; break;
+ case 'r': *ch_out = '\r'; break;
+ case 't': *ch_out = '\t'; break;
+ case 'v': *ch_out = '\v'; break;
+
+ /* Hex escapes, with and without braces. */
+ case 'x':
+ ch = getc(tv->fp);
+ if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
+ else f &= ~f_brace;
+ esc = chtodig(ch);
+ if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
+ for (;;) {
+ ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
+ esc = 16*esc + i;
+ if (esc > UCHAR_MAX)
+ return (tvec_error(tv,
+ "character code %d out of range", esc));
+ }
+ if (!(f&f_brace)) ungetc(ch, tv->fp);
+ else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
+ *ch_out = esc;
+ break;
+
+ /* Other things, primarily octal escapes. */
+ case '{':
+ f |= f_brace; ch = getc(tv->fp);
+ /* fall through */
+ default:
+ if ('0' <= ch && ch < '8') {
+ i = 1; esc = ch - '0';
+ for (;;) {
+ ch = getc(tv->fp);
+ if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
+ esc = 8*esc + ch - '0';
+ i++; if (i >= 3) break;
+ }
+ if (f&f_brace) {
+ ch = getc(tv->fp);
+ if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
+ }
+ if (esc > UCHAR_MAX)
+ return (tvec_error(tv,
+ "character code %d out of range", esc));
+ *ch_out = esc; break;
+ } else
+ return (tvec_syntax(tv, ch, "string escape"));
+ }
+
+ /* Done. */
+ return (0);
+
+#undef f_brace
+}
+
+/* --- @read_quoted_string@ --- *
+ *
+ * Arguments: @dstr *d@ = string to write to
+ * @int quote@ = initial quote, `'' or `"'
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Read the rest of a quoted string into @d@, reporting errors
+ * as appropriate.
+ *
+ * A single-quoted string is entirely literal. A double-quoted
+ * string may contain C-like escapes.
+ */
+
+static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
+{
+ int ch;
+
+ for (;;) {
+ ch = getc(tv->fp);
+ switch (ch) {
+ case EOF: case '\n':
+ return (tvec_syntax(tv, ch, "`%c'", quote));
+ case '\\':
+ if (quote == '\'') goto ordinary;
+ ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
+ ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
+ goto ordinary;
+ default:
+ if (ch == quote) goto end;
+ ordinary:
+ DPUTC(d, ch);
+ break;
+ }
+ }
+
+end:
+ DPUTZ(d);
+ return (0);
+}
+
+/* --- @collect_bare@ --- *
+ *
+ * Arguments: @dstr *d@ = string to write to
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Read barewords and the whitespace between them. Stop when we
+ * encounter something which can't start a bareword.
+ */
+
+static int collect_bare(dstr *d, struct tvec_state *tv)
+{
+ size_t pos = d->len;
+ enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
+ int ch, rc;
+
+ for (;;) {
+ ch = getc(tv->fp);
+ switch (ch) {
+ case EOF:
+ tvec_syntax(tv, ch, "bareword");
+ rc = -1; goto end;
+ case '\n':
+ if (s == ESCAPE) { tv->lno++; goto addch; }
+ if (s == WORD) pos = d->len;
+ ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
+ DPUTC(d, ' '); s = SPACE;
+ break;
+ case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
+ if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
+ goto addch;
+ case '\\':
+ s = ESCAPE;
+ break;
+ default:
+ if (s != ESCAPE && isspace(ch)) {
+ if (s == WORD) pos = d->len;
+ DPUTC(d, ch); s = SPACE;
+ break;
+ }
+ addch:
+ DPUTC(d, ch); s = WORD;
+ }
+ }
+
+done:
+ if (s == SPACE) d->len = pos;
+ DPUTZ(d); rc = 0;
+end:
+ return (rc);
+}
+
+/* --- @set_up_encoding@ --- *
+ *
+ * Arguments: @const codec_class **ccl_out@ = where to put the class
+ * @unsigned *f_out@ = where to put the flags
+ * @unsigned code@ = the coding scheme to use (@TVEC_...@)
+ *
+ * Returns: ---
+ *
+ * Use: Helper for @read_compound_string@ below.
+ *
+ * Return the appropriate codec class and flags for @code@.
+ * Leaves @*ccl_out@ null if the coding scheme doesn't have a
+ * backing codec class (e.g., @TVCODE_BARE@).
+ */
+
+enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
+static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
+ unsigned code)
+{
+ switch (code) {
+ case TVCODE_BARE:
+ *ccl_out = 0; *f_out = 0;
+ break;
+ case TVCODE_HEX:
+ *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
+ break;
+ case TVCODE_BASE32:
+ *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
+ break;
+ case TVCODE_BASE64:
+ *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
+ break;
+ default:
+ abort();
+ }
+}
+
+/* --- @flush_codec@ --- *
+ *
+ * Arguments: @codec *cdc@ = a codec, or null
+ * @dstr *d@ = output string
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Helper for @read_compound_string@ below.
+ *
+ * Flush out any final buffered material from @cdc@, and check
+ * that it's in a good state. Frees the codec on success. Does
+ * nothing if @cdc@ is null.
+ */
+
+static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
+{
+ int err;
+
+ if (cdc) {
+ err = cdc->ops->code(cdc, 0, 0, d);
+ if (err)
+ return (tvec_error(tv, "invalid %s sequence end: %s",
+ cdc->ops->c->name, codec_strerror(err)));
+ cdc->ops->destroy(cdc);
+ }
+ return (0);
+}
+
+/* --- @read_compound_string@ --- *
+ *
+ * Arguments: @void **p_inout@ = address of output buffer pointer
+ * @size_t *sz_inout@ = address of buffer size
+ * @unsigned code@ = initial interpretation of barewords
+ * @unsigned f@ = other flags (@RCSF_...@)
+ * @struct tvec_state *tv@ = test vector state
+ *
+ * Returns: Zero on success, @-1@ on error.
+ *
+ * Use: Parse a compound string, i.e., a sequence of stringish pieces
+ * which might be quoted strings, character names, or barewords
+ * to be decoded accoding to @code@, interspersed with
+ * additional directives.
+ *
+ * If the initial buffer pointer is non-null and sufficiently
+ * large, then it will be reused; otherwise, it is freed and a
+ * fresh, sufficiently large buffer is allocated and returned.
+ */
+
+#define RCSF_NESTED 1u
+static int read_compound_string(void **p_inout, size_t *sz_inout,
+ unsigned code, unsigned f,
+ struct tvec_state *tv)
+{
+ const codec_class *ccl; unsigned cdf;
+ codec *cdc;
+ dstr d = DSTR_INIT, w = DSTR_INIT;
+ char *p;
+ const char *q;
+ void *pp = 0; size_t sz;
+ unsigned long n;
+ int ch, err, rc;
+
+ set_up_encoding(&ccl, &cdf, code); cdc = 0;
+
+ if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
+ do {
+ ch = getc(tv->fp);
+ switch (ch) {
+
+ case ')': case ']': case '}':
+ /* Close brackets. Leave these for recursive caller if there is one,
+ * or just complain.
+ */
+
+ if (!(f&RCSF_NESTED))
+ { rc = tvec_syntax(tv, ch, "string"); goto end; }
+ ungetc(ch, tv->fp); goto done;
+
+ case '"': case '\'':
+ /* Quotes. Read a quoted string. */
+
+ if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
+ cdc = 0;
+ if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
+ break;
+
+ case '#':
+ /* A named character. */
+
+ ungetc(ch, tv->fp);
+ if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
+ cdc = 0;
+ DRESET(&w); tvec_readword(tv, &w, 0, ";", "character name");
+ if (STRCMP(w.buf, ==, "#empty")) break;
+ if (read_charname(&ch, w.buf, RCF_EOFOK)) {
+ rc = tvec_error(tv, "unknown character name `%s'", d.buf);
+ goto end;
+ }
+ DPUTC(&d, ch); break;
+
+ case '!':
+ /* A magic keyword. */
+
+ if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
+ cdc = 0;
+ ungetc(ch, tv->fp);
+ DRESET(&w); tvec_readword(tv, &w, 0, ";", "`!'-keyword");
+
+ /* Change bareword coding system. */
+ if (STRCMP(w.buf, ==, "!bare"))
+ { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
+ else if (STRCMP(w.buf, ==, "!hex"))
+ { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
+ else if (STRCMP(w.buf, ==, "!base32"))
+ { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
+ else if (STRCMP(w.buf, ==, "!base64"))
+ { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
+
+ /* Repeated substrings. */
+ else if (STRCMP(w.buf, ==, "!repeat")) {
+ if (tvec_nexttoken(tv)) {
+ rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
+ goto end;
+ }
+ DRESET(&w);
+ if (tvec_readword(tv, &w, 0, ";{", "repeat count"))
+ { rc = -1; goto end; }
+ if (parse_unsigned_integer(&n, &q, w.buf)) {
+ rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
+ goto end;
+ }
+ if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
+ if (tvec_nexttoken(tv))
+ { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
+ ch = getc(tv->fp); if (ch != '{')
+ { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
+ sz = 0;
+ if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
+ { rc = -1; goto end; }
+ ch = getc(tv->fp); if (ch != '}')
+ { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
+ if (sz) {
+ if (n > (size_t)-1/sz)
+ { rc = tvec_error(tv, "repeat size out of range"); goto end; }
+ dstr_ensure(&d, n*sz);
+ if (sz == 1)
+ { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; }
+ else
+ for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz);
+ }
+ xfree(pp); pp = 0;
+ }
+
+ /* Anything else is an error. */
+ else {
+ tvec_error(tv, "unknown string keyword `%s'", w.buf);
+ rc = -1; goto end;
+ }
+ break;
+
+ default:
+ /* A bareword. Process it according to the current coding system. */
+
+ switch (code) {
+ case TVCODE_BARE:
+ ungetc(ch, tv->fp);
+ if (collect_bare(&d, tv)) goto done;
+ break;
+ default:
+ assert(ccl);
+ ungetc(ch, tv->fp); DRESET(&w);
+ if (tvec_readword(tv, &w, 0, ";",
+ "%s-encoded fragment", ccl->name))
+ { rc = -1; goto end; }
+ if (!cdc) cdc = ccl->decoder(cdf);
+ err = cdc->ops->code(cdc, w.buf, w.len, &d);
+ if (err) {
+ tvec_error(tv, "invalid %s fragment `%s': %s",
+ ccl->name, w.buf, codec_strerror(err));
+ rc = -1; goto end;
+ }
+ break;
+ }
+ break;
+ }
+ } while (!tvec_nexttoken(tv));
+
+done:
+ /* Wrap things up. */
+ if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
+ cdc = 0;
+ if (*sz_inout <= d.len)
+ { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); }
+ p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
+ rc = 0;
+
+end:
+ /* Clean up any debris. */
+ if (cdc) cdc->ops->destroy(cdc);
+ if (pp) xfree(pp);
+ dstr_destroy(&d); dstr_destroy(&w);
+ return (rc);
+}
+
+/*----- Signed and unsigned integer types ---------------------------------*/
+
+/* --- @init_int@, @init_uint@ --- *
+ *
+ * Arguments: @union tvec_regval *rv@ = register value
+ * @const struct tvec_regdef *rd@ = register definition
+ *
+ * Returns: ---
+ *
+ * Use: Initialize a register value.
+ *
+ * Integer values are initialized to zero.
+ */
+
+static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
+ { rv->i = 0; }
+
+static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
+ { rv->u = 0; }
+
+/* --- @eq_int@, @eq_uint@ --- *
+ *
+ * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
+ * @const struct tvec_regdef *rd@ = register definition
+ *
+ * Returns: Nonzero if the values are equal, zero if unequal
+ *
+ * Use: Compare register values for equality.
+ */
+
+static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
+ const struct tvec_regdef *rd)
+ { return (rv0->i == rv1->i); }
+
+static int eq_uint(const union tvec_regval *rv0,
+ const union tvec_regval *rv1,
+ const struct tvec_regdef *rd)
+ { return (rv0->u == rv1->u); }
+
+/* --- @tobuf_int@, @tobuf_uint@ --- *
+ *
+ * Arguments: @buf *b@ = buffer
+ * @const union tvec_regval *rv@ = register value
+ * @const struct tvec_regdef *rd@ = register definition
+ *
+ * Returns: Zero on success, %$-1$% on failure.
+ *
+ * Use: Serialize a register value to a buffer.
+ *
+ * Integer values are serialized as little-endian 64-bit signed
+ * or unsigned integers.
+ */
+
+static int tobuf_int(buf *b, const union tvec_regval *rv,
+ const struct tvec_regdef *rd)
+ { return (signed_to_buf(b, rv->i)); }
+
+static int tobuf_uint(buf *b, const union tvec_regval *rv,
+ const struct tvec_regdef *rd)
+ { return (unsigned_to_buf(b, rv->u)); }
+
+/* --- @frombuf_int@, @frombuf_uint@ --- *
+ *
+ * Arguments: @buf *b@ = buffer
+ * @union tvec_regval *rv@ = register value
+ * @const struct tvec_regdef *rd@ = register definition
+ *
+ * Returns: Zero on success, %$-1$% on failure.
+ *
+ * Use: Deserialize a register value from a buffer.
+ *
+ * Integer values are serialized as 64-bit signed or unsigned
+ * integers.
+ */
+
+static int frombuf_int(buf *b, union tvec_regval *rv,
+ const struct tvec_regdef *rd)
+ { return (signed_from_buf(b, &rv->i)); }
+
+static int frombuf_uint(buf *b, union tvec_regval *rv,
+ const struct tvec_regdef *rd)
+ { return (unsigned_from_buf(b, &rv->u)); }
+
+/* --- @parse_int@, @parse_uint@ --- *
+ *
+ * Arguments: @union tvec_regval *rv@ = register value
+ * @const struct tvec_regdef *rd@ = register definition
+ * @struct tvec_state *tv@ = test-vector state
+ *
+ * Returns: Zero on success, %$-1$% on error.
+ *
+ * Use: Parse a register value from an input file.
+ *
+ * Integers may be input in decimal, hex, binary, or octal,
+ * following approximately usual conventions.
+ *
+ * * Signed integers may be preceded with a `+' or `-' sign.
+ *
+ * * Decimal integers are just a sequence of decimal digits
+ * `0' ... `9'.
+ *
+ * * Octal integers are a sequence of digits `0' ... `7',
+ * preceded by `0o' or `0O'.
+ *
+ * * Hexadecimal integers are a sequence of digits `0'
+ * ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
+ * `0X'.
+ *
+ * * Radix-B integers are a sequence of digits `0' ... `9',
+ * `a' ... `f', or `A' ... `F', each with value less than B,
+ * preceded by `Br' or `BR', where 0 < B < 36 is expressed
+ * in decimal without any leading `0' or internal
+ * underscores `_'.
+ *
+ * * A digit sequence may contain internal underscore `_'
+ * separators, but not before or after all of the digits;
+ * and two consecutive `_' characters are not permitted.
+ */
+
+static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
+ struct tvec_state *tv)
+{
+ dstr d = DSTR_INIT;
+ int rc;
+
+ if (tvec_readword(tv, &d, 0, ";", "signed integer"))
+ { rc = -1; goto end; }
+ if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
+ if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
+ rc = 0;
+end:
+ dstr_destroy(&d);
+ return (rc);
+}
+
+static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
+ struct tvec_state *tv)
+{
+ dstr d = DSTR_INIT;
+ int rc;
+
+ if (tvec_readword(tv, &d, 0, ";", "unsigned integer"))
+ { rc = -1; goto end; }
+ if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
+ if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
+ rc = 0;
+end:
+ dstr_destroy(&d);
+ return (rc);