[mLib] / test / tvec-types.c

/* -*-c-*-
 *
 * Types for the test-vector framework
 *
 * (c) 2023 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of the mLib utilities library.
 *
 * mLib is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Library General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * mLib is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with mLib.  If not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 * USA.
 */

/*----- Header files ------------------------------------------------------*/

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <string.h>

#include "buf.h"
#include "codec.h"
#  include "base32.h"
#  include "base64.h"
#  include "hex.h"
#include "dstr.h"
#include "maths.h"
#include "tvec.h"

/*----- Preliminary utilities ---------------------------------------------*/

/* --- @trivial_release@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = a register value
 *		@const struct tvec_regdef@ = the register definition
 *
 * Returns:	---
 *
 * Use:		Does nothing.  Used for register values which don't retain
 *		resources.
 */

static void trivial_release(union tvec_regval *rv,
			    const struct tvec_regdef *rd)
  { ; }

/*----- Integer utilities -------------------------------------------------*/

/* --- @unsigned_to_buf@, @signed_to_buf@ --- *
 *
 * Arguments:	@buf *b@ = buffer to write on
 *		@unsigned long u@ or @long i@ = integer to write
 *
 * Returns:	Zero on success, @-1@ on failure.
 *
 * Use:		Write @i@ to the buffer, in big-endian (two's-complement, it
 *		signed) format.
 */

static int unsigned_to_buf(buf *b, unsigned long u)
  { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }

static int signed_to_buf(buf *b, long i)
{
  kludge64 k;
  unsigned long u;

  u = i;
  if (i >= 0) ASSIGN64(k, u);
  else { ASSIGN64(k, ~u); CPL64(k, k); }
  return (buf_putk64l(b, k));
}

/* --- @unsigned_from_buf@, @signed_from_buf@ --- *
 *
 * Arguments:	@buf *b@ = buffer to write on
 *		@unsigned long *u_out@ or @long *i_out@ = where to put the
 *			result
 *
 * Returns:	Zero on success, @-1@ on failure.
 *
 * Use:		Read an integer, in big-endian (two's-complement, if signed)
 *		format, from the buffer.
 */

static int unsigned_from_buf(buf *b, unsigned long *u_out)
{
  kludge64 k, ulmax;

  ASSIGN64(ulmax, ULONG_MAX);
  if (buf_getk64l(b, &k)) return (-1);
  if (CMP64(k, >, ulmax)) return (-1);
  *u_out = GET64(unsigned long, k); return (0);
}

/* --- @hex_width@ --- *
 *
 * Arguments:	@unsigned long u@ = an integer
 *
 * Returns:	A suitable number of digits to use in order to display @u@ in
 *		hex.  Currently, we select a power of two sufficient to show
 *		the value, but at least 2.
 */

static int hex_width(unsigned long u)
{
  int wd;
  unsigned long t;

  for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
  return (wd/4);
}

/* --- @format_unsigned_hex@, @format_signed_hex@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@unsigned long u@ or @long i@ = integer to print
 *
 * Returns:	---
 *
 * Use:		Print an unsigned or signed integer in hexadecimal.
 */

static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
				unsigned long u)
  { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }

static void format_signed_hex(const struct gprintf_ops *gops, void *go,
			      long i)
{
  unsigned long u = i >= 0 ? i : -(unsigned long)i;
  gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
}

static int signed_from_buf(buf *b, long *i_out)
{
  kludge64 k, lmax, not_lmin;

  ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
  if (buf_getk64l(b, &k)) return (-1);
  if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
  else {
    CPL64(k, k);
    if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
    else return (-1);
  }
  return (0);
}

/* --- @check_unsigned_range@, @check_signed_range@ --- *
 *
 * Arguments:	@unsigned long u@ or @long i@ = an integer
 *		@const struct tvec_urange *ur@ or
 *			@const struct tvec_irange *ir@ = range specification,
 *			or null
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, or @-1@ on error.
 *
 * Use:		Check that the integer is within bounds.  If not, report a
 *		suitable error and return a failure indication.
 */

static int check_signed_range(long i,
			      const struct tvec_irange *ir,
			      struct tvec_state *tv)
{
  if (ir && (ir->min > i || i > ir->max)) {
    tvec_error(tv, "integer %ld out of range (must be in [%ld .. %ld])",
	       i, ir->min, ir->max);
    return (-1);
  }
  return (0);
}

static int check_unsigned_range(unsigned long u,
				const struct tvec_urange *ur,
				struct tvec_state *tv)
{
  if (ur && (ur->min > u || u > ur->max)) {
    tvec_error(tv, "integer %lu out of range (must be in [%lu .. %lu])",
	       u, ur->min, ur->max);
    return (-1);
  }
  return (0);
}

/* --- @chtodig@ --- *
 *
 * Arguments:	@int ch@ = a character
 *
 * Returns:	The numeric value of the character as a digit, or @-1@ if
 *		it's not a digit.  Letters count as extended digits starting
 *		with value 10; case is not significant.
 */

static int chtodig(int ch)
{
  if ('0' <= ch && ch <= '9') return (ch - '0');
  else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
  else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
  else return (-1);
}

/* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
 *
 * Arguments:	@unsigned long *u_out@, @long *i_out@ = where to put the
 *			result
 *		@const char **q_out@ = where to put the end position
 *		@const char *p@ = pointer to the string to parse
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse an integer from a string in the test-vector format.
 *		This is mostly extension of the traditional C @strtoul@
 *		format: supported inputs include:
 *
 *		  * NNN -- a decimal number (even if it starts with `0');
 *		  * 0xNNN -- hexadecimal;
 *		  * 0oNNN -- octal;
 *		  * 0bNNN -- binary;
 *		  * NNrNNN -- base NN.
 *
 *		Furthermore, single underscores are permitted internally as
 *		an insignificant digit separator.
 */

static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
				  const char *p)
{
  unsigned long u;
  int ch, d, r;
  const char *q;
  unsigned f = 0;
#define f_implicit 1u			/* implicitly reading base 10 */
#define f_digit 2u			/* read a real digit */
#define f_uscore 4u			/* found an underscore */

  /* Initial setup
   *
   * This will deal with the traditional `0[box]...' prefixes.  We'll leave
   * our new `NNr...' syntax for later.
   */
  if (p[0] != '0' || !p[1]) {
    d = chtodig(*p); if (0 > d || d >= 10) return (-1);
    r = 10; u = d; p++; f |= f_implicit | f_digit;
  } else {
    u = 0; d = chtodig(p[2]);
    if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
    else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
    else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
    else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
    else { r = 10; f |= f_digit; p++; }
  }

  q = p;
  for (;;) {
    /* Work through the string a character at a time. */

    ch = *p; switch (ch) {

      case '_':
	/* An underscore is OK if we haven't just seen one. */

	if (f&f_uscore) goto done;
	p++; f = (f&~f_implicit) | f_uscore;
	break;

      case 'r': case 'R':
	/* An `r' is OK if the number so far is small enough to be a sensible
	 * base, and we're scanning decimal implicitly.
	 */

	if (!(f&f_implicit) || !u || u >= 36) goto done;
	d = chtodig(p[1]); if (0 > d || d >= u) goto done;
	r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
	break;

      default:
	/* Otherwise we expect a valid digit and accumulate it. */
	d = chtodig(ch); if (d < 0 || d >= r) goto done;
	if (u > ULONG_MAX/r) return (-1);
	u *= r; if (u > ULONG_MAX - d) return (-1);
	u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
	break;
    }
  }

done:
  if (!(f&f_digit)) return (-1);
  *u_out = u; *q_out = q; return (0);

#undef f_implicit
#undef f_digit
#undef f_uscore
}

static int parse_signed_integer(long *i_out, const char **q_out,
				const char *p)
{
  unsigned long u;
  unsigned f = 0;
#define f_neg 1u

  /* Read an initial sign. */
  if (*p == '+') p++;
  else if (*p == '-') { f |= f_neg; p++; }

  /* Scan an unsigned number. */
  if (parse_unsigned_integer(&u, q_out, p)) return (-1);

  /* Check for signed overflow and apply the sign. */
  if (!(f&f_neg)) {
    if (u > LONG_MAX) return (-1);
    *i_out = u;
  } else {
    if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
    *i_out = u ? -(long)(u - 1) - 1 : 0;
  }

  return (0);

#undef f_neg
}

/* --- @parse_unsigned@, @parse_signed@ --- *
 *
 * Arguments:	@unsigned long *u_out@ or @long *i_out@ = where to put the
 *			result
 *		@const char *p@ = string to parse
 *		@const struct tvec_urange *ur@ or
 *			@const struct tvec_irange *ir@ = range specification,
 *			or null
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse and range-check an integer.  Unlike @parse_(un)signed_
 *		integer@, these functions check that there's no cruft
 *		following the final digit, and report errors as they find
 *		them rather than leaving that to the caller.
 */

static int parse_unsigned(unsigned long *u_out, const char *p,
			  const struct tvec_urange *ur,
			  struct tvec_state *tv)
{
  unsigned long u;
  const char *q;

  if (parse_unsigned_integer(&u, &q, p))
    return (tvec_error(tv, "invalid unsigned integer `%s'", p));
  if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
  if (check_unsigned_range(u, ur, tv)) return (-1);
  *u_out = u; return (0);
}

static int parse_signed(long *i_out, const char *p,
			const struct tvec_irange *ir,
			struct tvec_state *tv)
{
  long i;
  const char *q;

  if (parse_signed_integer(&i, &q, p))
    return (tvec_error(tv, "invalid signed integer `%s'", p));
  if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
  if (check_signed_range(i, ir, tv)) return (-1);
  *i_out = i; return (0);
}

/*----- Floating-point utilities ------------------------------------------*/

/* --- @eqish_floating_p@ --- *
 *
 * Arguments:	@double x, y@ = two numbers to compare
 *		@const struct tvec_floatinfo *fi@ = floating-point info
 *
 * Returns:	Nonzero if  the comparand @y@ is sufficiently close to the
 *		reference @x@, or zero if it's definitely different.
 */

static int eqish_floating_p(double x, double y,
			    const struct tvec_floatinfo *fi)
{
  double t;

  if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
  if (INFP(x)) return (x == y); else if (INFP(y)) return (0);

  switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
    case TVFF_EXACT:
      return (x == y && NEGP(x) == NEGP(y));
    case TVFF_ABSDELTA:
      t = x - y; if (t < 0) t = -t; return (t < fi->delta);
    case TVFF_RELDELTA:
      t = 1.0 - y/x; if (t < 0) t = -t; return (t < fi->delta);
    default:
      abort();
  }
}

/* --- @format_floating@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@double x@ = number to print
 *
 * Returns:	---
 *
 * Use:		Print a floating-point number, accurately.
 */

static void format_floating(const struct gprintf_ops *gops, void *go,
			    double x)
{
  int prec;

  if (NANP(x))
    gprintf(gops, go, "#nan");
  else if (INFP(x))
    gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
  else {
    /* Ugh.  C doesn't provide any function for just printing a
     * floating-point number /correctly/, i.e., so that you can read the
     * result back and recover the number you first thought of.  There are
     * complicated algorithms published for doing this, but I really don't
     * want to get into that here.  So we have this.
     *
     * The sign doesn't cause significant difficulty so we're going to ignore
     * it for now.  So suppose we're given a number %$x = f b^e$%, in
     * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
     * %$0 \le f < 1$%.  We're going to convert it into the nearest integer
     * of the form %$X = F B^E$%, with similar conditions, only with the
     * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
     * or %$F \ge B^{-N}$%.
     *
     * We're rounding to the nearest such %$X$%.  If there is to be ambiguity
     * in the conversion, then some %$x = f b^e$% and the next smallest
     * representable number %$x' = x + b^{e-n}$% must both map to the same
     * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
     * any other number representable in the target system.  The nest larger
     * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
     * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
     * is actually %$X - B^{E-N-1}$%.  We ignore this latter possibility in
     * the pursuit of a conservative estimate (though actually it doesn't
     * matter).
     *
     * If both %$x$% and %$x'$% map to %$X$% then we must have
     * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
     * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
     * and secondly %$b^{e-n} \le B^{E-N}$%.  Since these inequalities are in
     * opposite senses, we can divide, giving
     *
     *	       %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
     *
     * whence
     *
     *	       %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
     *
     * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
     * possible, it must be the case that
     *
     *	       %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
     *
     * Then rearrange and take logarithms, obtaining
     *
     *	       %$(N - 1) \log B < n \log b$% ,
     *
     * and so
     *
     *	       %$N < n \log b/\log B + 1$% .
     *
     * Recall that this is a necessary condition for a collision to occur; we
     * are therefore safe whenever
     *
     *	       %$N \ge n \log b/\log B + 1$% ;
     *
     * so, taking ceilings,
     *
     *	       %$N \ge \lceil n \log b/\log B \rceil + 1$% .
     *
     * So that's why we have this.
     *
     * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that
     * we can calculate this without ending up on the wrong side of an
     * integer boundary.
     *
     * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only
     * as a constant.  Except that modern compilers are more than clever
     * enough to work out that this is a constant anyway.
     *
     * This is sometimes an overestimate: we'll print out meaningless digits
     * that don't represent anything we actually know about the number in
     * question.  To fix that, we'd need a complicated algorithm like Steele
     * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
     * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
     * something else in difficult situations).
     */

    prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
    gprintf(gops, go, "%.*g", prec, x);
  }
}

/* --- @parse_floating@ --- *
 *
 * Arguments:	@double *x_out@ = where to put the result
 *		@const char *p@ = string to parse
 *		@const struct tvec_floatinfo *fi@ = floating-point info
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse a floating-point number from a string.  Reports any
 *		necessary errors.
 */

static int parse_floating(double *x_out, const char *p,
			  const struct tvec_floatinfo *fi,
			  struct tvec_state *tv)
{
  const char *pp; char *q;
  dstr d = DSTR_INIT;
  double x;
  int olderr, rc;

  /* Check for special tokens. */
  if (STRCMP(p, ==, "#nan")) {
#ifdef NAN
    x = NAN; rc = 0;
#else
    tvec_error(tv, "NaN not supported on this system");
    rc = -1; goto end;
#endif
  }

  else if (STRCMP(p, ==, "#inf") ||
	   STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
#ifdef INFINITY
    x = INFINITY; rc = 0;
#else
    tvec_error(tv, "infinity not supported on this system");
    rc = -1; goto end;
#endif
  }

  else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
#ifdef INFINITY
    x = -INFINITY; rc = 0;
#else
    tvec_error(tv, "infinity not supported on this system");
    rc = -1; goto end;
#endif
  }

  /* Check that this looks like a number, so we can exclude `strtod'
   * recognizing its own non-finite number tokens.
   */
  else {
    pp = p;
    if (*pp == '+' || *pp == '-') pp++;
    if (*pp == '.') pp++;
    if (!ISDIGIT(*pp)) {
      tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
      rc = -1; goto end;
    }

    /* Parse the number using the system parser. */
    olderr = errno; errno = 0;
    x = strtod(p, &q);
    if (*q) {
      tvec_syntax(tv, *q, "end-of-line");
      rc = -1; goto end;
    }
    if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
      tvec_error(tv, "invalid floating-point number `%s': %s",
		 p, strerror(errno));
      rc = -1; goto end;
    }
    errno = olderr;
  }

  /* Check that the number is acceptable. */
  if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
    tvec_error(tv, "#nan not allowed here");
    rc = -1; goto end;
  }

  if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
	     (!(fi->f&TVFF_NOMAX) && x > fi->max))) {
    dstr_puts(&d, "floating-point number ");
    format_floating(&dstr_printops, &d, x);
    dstr_puts(&d, " out of range (must be in ");
    if (fi->f&TVFF_NOMIN)
      dstr_puts(&d, "(#-inf");
    else
      { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
    dstr_puts(&d, " .. ");
    if (fi->f&TVFF_NOMAX)
      dstr_puts(&d, "#+inf)");
    else
      { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
    dstr_putc(&d, ')'); dstr_putz(&d);
    tvec_error(tv, "%s", d.buf); rc = -1; goto end;
  }

  /* All done. */
  *x_out = x; rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

/*----- String utilities --------------------------------------------------*/

/* Special character name table. */
static const struct chartab {
  const char *name;			/* character name */
  int ch;				/* character value */
  unsigned f;				/* flags: */
#define CTF_PREFER 1u			/*   preferred name */
#define CTF_SHORT 2u			/*   short name (compact style) */
} chartab[] = {
  { "#eof",		EOF,	CTF_PREFER | CTF_SHORT },
  { "#nul",		'\0',	CTF_PREFER },
  { "#bell",		'\a',	CTF_PREFER },
  { "#ding",		'\a',	0 },
  { "#bel",		'\a',	CTF_SHORT },
  { "#backspace",	'\b',	CTF_PREFER },
  { "#bs",		'\b',	CTF_SHORT },
  { "#escape",		'\x1b',	CTF_PREFER },
  { "#esc",		'\x1b',	CTF_SHORT },
  { "#formfeed",	'\f',	CTF_PREFER },
  { "#ff",		'\f',	CTF_SHORT },
  { "#newline",		'\n',	CTF_PREFER },
  { "#linefeed",	'\n',	0 },
  { "#lf",		'\n',	CTF_SHORT },
  { "#nl",		'\n',	0 },
  { "#return",		'\r',	CTF_PREFER },
  { "#carriage-return",	'\r',	0 },
  { "#cr",		'\r',	CTF_SHORT },
  { "#tab",		'\t',	CTF_PREFER | CTF_SHORT },
  { "#horizontal-tab",	'\t',	0 },
  { "#ht",		'\t',	0 },
  { "#vertical-tab",	'\v',	CTF_PREFER },
  { "#vt",		'\v',	CTF_SHORT },
  { "#space",		' ',	0 },
  { "#spc",		' ',	CTF_SHORT },
  { "#delete",		'\x7f',	CTF_PREFER },
  { "#del",		'\x7f',	CTF_SHORT },
  { 0,			0,	0 }
};

/* --- @find_charname@ --- *
 *
 * Arguments:	@int ch@ = character to match
 *		@unsigned f@ = flags (@CTF_...@) to match
 *
 * Returns:	The name of the character, or null if no match is found.
 *
 * Use:		Looks up a name for a character.  Specifically, it returns
 *		the first entry in the @chartab@ table which matches @ch@ and
 *		which has one of the flags @f@ set.
 */

static const char *find_charname(int ch, unsigned f)
{
  const struct chartab *ct;

  for (ct = chartab; ct->name; ct++)
    if (ct->ch == ch && (ct->f&f)) return (ct->name);
  return (0);
}

/* --- @read_charname@ --- *
 *
 * Arguments:	@int *ch_out@ = where to put the character
 *		@const char *p@ = character name
 *		@unsigned f@ = flags (@TCF_...@)
 *
 * Returns:	Zero if a match was found, @-1@ if not.
 *
 * Use:		Looks up a character by name.  If @RCF_EOFOK@ is set in @f@,
 *		then the @EOF@ marker can be matched; otherwise it can't.
 */

#define RCF_EOFOK 1u
static int read_charname(int *ch_out, const char *p, unsigned f)
{
  const struct chartab *ct;

  for (ct = chartab; ct->name; ct++)
    if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
      { *ch_out = ct->ch; return (0); }
  return (-1);
}

/* --- @format_charesc@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@int ch@ = character to format
 *		@unsigned f@ = flags (@FCF_...@)
 *
 * Returns:	---
 *
 * Use:		Format a character as an escape sequence, possibly as part of
 *		a larger string.  If @FCF_BRACE@ is set in @f@, then put
 *		braces around a `\x...'  code, so that it's suitable for use
 *		in a longer string.
 */

#define FCF_BRACE 1u
static void format_charesc(const struct gprintf_ops *gops, void *go,
			   int ch, unsigned f)
{
  switch (ch) {
    case '\a': gprintf(gops, go, "\\a"); break;
    case '\b': gprintf(gops, go, "\\b"); break;
    case '\x1b': gprintf(gops, go, "\\e"); break;
    case '\f': gprintf(gops, go, "\\f"); break;
    case '\r': gprintf(gops, go, "\\r"); break;
    case '\n': gprintf(gops, go, "\\n"); break;
    case '\t': gprintf(gops, go, "\\t"); break;
    case '\v': gprintf(gops, go, "\\v"); break;
    case '\\': gprintf(gops, go, "\\\\"); break;
    case '\'': gprintf(gops, go, "\\'"); break;
    case '\0':
      if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
      else gprintf(gops, go, "\\0");
      break;
    default:
      if (f&FCF_BRACE)
	gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
      else
	gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
      break;
  }
}

/* --- @format_char@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@int ch@ = character to format
 *
 * Returns:	---
 *
 * Use:		Format a single character.
 */

static void format_char(const struct gprintf_ops *gops, void *go, int ch)
{
  switch (ch) {
    case '\\': case '\'': escape:
      gprintf(gops, go, "'");
      format_charesc(gops, go, ch, 0);
      gprintf(gops, go, "'");
      break;
    default:
      if (!isprint(ch)) goto escape;
      gprintf(gops, go, "'%c'", ch);
      break;
  }
}

/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@unsigned long u@ or @long i@ = an integer
 *
 * Returns:	---
 *
 * Use:		Format a (signed or unsigned) integer as a character, if it's
 *		in range, printing something like `= 'q''.  It's assumed that
 *		a comment marker has already been output.
 */

static void maybe_format_unsigned_char
  (const struct gprintf_ops *gops, void *go, unsigned long u)
{
  const char *p;

  p = find_charname(u, CTF_PREFER);
  if (p) gprintf(gops, go, " = %s", p);
  if (u < UCHAR_MAX)
    { gprintf(gops, go, " = "); format_char(gops, go, u); }
}

static void maybe_format_signed_char
  (const struct gprintf_ops *gops, void *go, long i)
{
  const char *p;

  p = find_charname(i, CTF_PREFER);
  if (p) gprintf(gops, go, " = %s", p);
  if (0 <= i && i < UCHAR_MAX)
    { gprintf(gops, go, " = "); format_char(gops, go, i); }
}

/* --- @read_charesc@ --- *
 *
 * Arguments:	@int *ch_out@ = where to put the result
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse and convert an escape sequence from @tv@'s input
 *		stream, assuming that the initial `\' has already been read.
 *		Reports errors as appropriate.
 */

static int read_charesc(int *ch_out, struct tvec_state *tv)
{
  int ch, i, esc;
  unsigned f = 0;
#define f_brace 1u

  ch = getc(tv->fp);
  switch (ch) {

    /* Things we shouldn't find. */
    case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));

    /* Single-character escapes. */
    case '\'': *ch_out = '\''; break;
    case '\\': *ch_out = '\\'; break;
    case '"': *ch_out = '"'; break;
    case 'a': *ch_out = '\a'; break;
    case 'b': *ch_out = '\b'; break;
    case 'e': *ch_out = '\x1b'; break;
    case 'f': *ch_out = '\f'; break;
    case 'n': *ch_out = '\n'; break;
    case 'r': *ch_out = '\r'; break;
    case 't': *ch_out = '\t'; break;
    case 'v': *ch_out = '\v'; break;

    /* Hex escapes, with and without braces. */
    case 'x':
      ch = getc(tv->fp);
      if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
      else f &= ~f_brace;
      esc = chtodig(ch);
      if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
      for (;;) {
	ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
	esc = 16*esc + i;
	if (esc > UCHAR_MAX)
	  return (tvec_error(tv,
			     "character code %d out of range", esc));
      }
      if (!(f&f_brace)) ungetc(ch, tv->fp);
      else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
      *ch_out = esc;
      break;

    /* Other things, primarily octal escapes. */
    case '{':
      f |= f_brace; ch = getc(tv->fp);
      /* fall through */
    default:
      if ('0' <= ch && ch < '8') {
	i = 1; esc = ch - '0';
	for (;;) {
	  ch = getc(tv->fp);
	  if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
	  esc = 8*esc + ch - '0';
	  i++; if (i >= 3) break;
	}
	if (f&f_brace) {
	  ch = getc(tv->fp);
	  if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
	}
	if (esc > UCHAR_MAX)
	  return (tvec_error(tv,
			     "character code %d out of range", esc));
	*ch_out = esc; break;
      } else
	return (tvec_syntax(tv, ch, "string escape"));
  }

  /* Done. */
  return (0);

#undef f_brace
}

/* --- @read_quoted_string@ --- *
 *
 * Arguments:	@dstr *d@ = string to write to
 *		@int quote@ = initial quote, `'' or `"'
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Read the rest of a quoted string into @d@, reporting errors
 *		as appropriate.
 *
 *		A single-quoted string is entirely literal.  A double-quoted
 *		string may contain C-like escapes.
 */

static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
{
  int ch;

  for (;;) {
    ch = getc(tv->fp);
    switch (ch) {
      case EOF: case '\n':
	return (tvec_syntax(tv, ch, "`%c'", quote));
      case '\\':
	if (quote == '\'') goto ordinary;
	ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
	ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
	goto ordinary;
      default:
	if (ch == quote) goto end;
      ordinary:
	DPUTC(d, ch);
	break;
    }
  }

end:
  DPUTZ(d);
  return (0);
}

/* --- @collect_bare@ --- *
 *
 * Arguments:	@dstr *d@ = string to write to
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Read barewords and the whitespace between them.  Stop when we
 *		encounter something which can't start a bareword.
 */

static int collect_bare(dstr *d, struct tvec_state *tv)
{
  size_t pos = d->len;
  enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
  int ch, rc;

  for (;;) {
    ch = getc(tv->fp);
    switch (ch) {
      case EOF:
	tvec_syntax(tv, ch, "bareword");
	rc = -1; goto end;
      case '\n':
	if (s == ESCAPE) { tv->lno++; goto addch; }
	if (s == WORD) pos = d->len;
	ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
	DPUTC(d, ' '); s = SPACE;
	break;
      case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
	if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
	goto addch;
      case '\\':
	s = ESCAPE;
	break;
      default:
	if (s != ESCAPE && isspace(ch)) {
	  if (s == WORD) pos = d->len;
	  DPUTC(d, ch); s = SPACE;
	  break;
	}
      addch:
	DPUTC(d, ch); s = WORD;
    }
  }

done:
  if (s == SPACE) d->len = pos;
  DPUTZ(d); rc = 0;
end:
  return (rc);
}

/* --- @set_up_encoding@ --- *
 *
 * Arguments:	@const codec_class **ccl_out@ = where to put the class
 *		@unsigned *f_out@ = where to put the flags
 *		@unsigned code@ = the coding scheme to use (@TVEC_...@)
 *
 * Returns:	---
 *
 * Use:		Helper for @read_compound_string@ below.
 *
 *		Return the appropriate codec class and flags for @code@.
 *		Leaves @*ccl_out@ null if the coding scheme doesn't have a
 *		backing codec class (e.g., @TVCODE_BARE@).
 */

enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
			    unsigned code)
{
  switch (code) {
    case TVCODE_BARE:
      *ccl_out = 0; *f_out = 0;
      break;
    case TVCODE_HEX:
      *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
      break;
    case TVCODE_BASE32:
      *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
      break;
    case TVCODE_BASE64:
      *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
      break;
    default:
      abort();
  }
}

/* --- @flush_codec@ --- *
 *
 * Arguments:	@codec *cdc@ = a codec, or null
 *		@dstr *d@ = output string
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Helper for @read_compound_string@ below.
 *
 *		Flush out any final buffered material from @cdc@, and check
 *		that it's in a good state.  Frees the codec on success.  Does
 *		nothing if @cdc@ is null.
 */

static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
{
  int err;

  if (cdc) {
    err = cdc->ops->code(cdc, 0, 0, d);
    if (err)
      return (tvec_error(tv, "invalid %s sequence end: %s",
			 cdc->ops->c->name, codec_strerror(err)));
    cdc->ops->destroy(cdc);
  }
  return (0);
}

/* --- @read_compound_string@ --- *
 *
 * Arguments:	@void **p_inout@ = address of output buffer pointer
 *		@size_t *sz_inout@ = address of buffer size
 *		@unsigned code@ = initial interpretation of barewords
 *		@unsigned f@ = other flags (@RCSF_...@)
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse a compound string, i.e., a sequence of stringish pieces
 *		which might be quoted strings, character names, or barewords
 *		to be decoded accoding to @code@, interspersed with
 *		additional directives.
 *
 *		If the initial buffer pointer is non-null and sufficiently
 *		large, then it will be reused; otherwise, it is freed and a
 *		fresh, sufficiently large buffer is allocated and returned.
 */

#define RCSF_NESTED 1u
static int read_compound_string(void **p_inout, size_t *sz_inout,
				unsigned code, unsigned f,
				struct tvec_state *tv)
{
  const codec_class *ccl; unsigned cdf;
  codec *cdc;
  dstr d = DSTR_INIT, w = DSTR_INIT;
  char *p;
  const char *q;
  void *pp = 0; size_t sz;
  unsigned long n;
  int ch, err, rc;

  set_up_encoding(&ccl, &cdf, code); cdc = 0;

  if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
  do {
    ch = getc(tv->fp);
    switch (ch) {

      case ')': case ']': case '}':
	/* Close brackets.  Leave these for recursive caller if there is one,
	 * or just complain.
	 */

	if (!(f&RCSF_NESTED))
	  { rc = tvec_syntax(tv, ch, "string"); goto end; }
	ungetc(ch, tv->fp); goto done;

      case '"': case '\'':
	/* Quotes.  Read a quoted string. */

	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
	break;

      case '#':
	/* A named character. */

	ungetc(ch, tv->fp);
	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	DRESET(&w); tvec_readword(tv, &w, ";", "character name");
	if (read_charname(&ch, w.buf, RCF_EOFOK)) {
	  rc = tvec_error(tv, "unknown character name `%s'", d.buf);
	  goto end;
	}
	DPUTC(&d, ch); break;

      case '!':
	/* A magic keyword. */

	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	ungetc(ch, tv->fp);
	DRESET(&w); tvec_readword(tv, &w, ";", "`!'-keyword");

	/* Change bareword coding system. */
	if (STRCMP(w.buf, ==, "!bare"))
	  { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!hex"))
	  { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!base32"))
	  { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!base64"))
	  { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }

	/* Repeated substrings. */
	else if (STRCMP(w.buf, ==, "!repeat")) {
	  if (tvec_nexttoken(tv)) {
	    rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
	    goto end;
	  }
	  DRESET(&w);
	  if (tvec_readword(tv, &w, ";{", "repeat count"))
	    { rc = -1; goto end;  }
	  if (parse_unsigned_integer(&n, &q, w.buf)) {
	    rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
	    goto end;
	  }
	  if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
	  if (tvec_nexttoken(tv))
	    { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
	  ch = getc(tv->fp); if (ch != '{')
	    { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
	  sz = 0;
	  if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
	    { rc = -1; goto end; }
	  ch = getc(tv->fp); if (ch != '}')
	    { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
	  if (sz) {
	    if (n > (size_t)-1/sz)
	      { rc = tvec_error(tv, "repeat size out of range"); goto end; }
	    dstr_ensure(&d, n*sz);
	    if (sz == 1)
	      { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; }
	    else
	      for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz);
	  }
	  xfree(pp); pp = 0;
	}

	/* Anything else is an error. */
	else {
	  tvec_error(tv, "unknown string keyword `%s'", w.buf);
	  rc = -1; goto end;
	}
	break;

      default:
	/* A bareword.  Process it according to the current coding system. */

	switch (code) {
	  case TVCODE_BARE:
	    ungetc(ch, tv->fp);
	    if (collect_bare(&d, tv)) goto done;
	    break;
	  default:
	    assert(ccl);
	    ungetc(ch, tv->fp); DRESET(&w);
	    if (tvec_readword(tv, &w, ";", "%s-encoded fragment", ccl->name))
	      { rc = -1; goto end; }
	    if (!cdc) cdc = ccl->decoder(cdf);
	    err = cdc->ops->code(cdc, w.buf, w.len, &d);
	    if (err) {
	      tvec_error(tv, "invalid %s fragment `%s': %s",
			 ccl->name, w.buf, codec_strerror(err));
	      rc = -1; goto end;
	    }
	    break;
	}
	break;
    }
  } while (!tvec_nexttoken(tv));

done:
  /* Wrap things up. */
  if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
  cdc = 0;
  if (*sz_inout <= d.len)
    { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); }
  p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
  rc = 0;

end:
  /* Clean up any debris. */
  if (cdc) cdc->ops->destroy(cdc);
  if (pp) xfree(pp);
  dstr_destroy(&d); dstr_destroy(&w);
  return (rc);
}

/*----- Signed and unsigned integer types ---------------------------------*/

/* --- @init_int@, @init_uint@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	---
 *
 * Use:		Initialize a register value.
 *
 *		Integer values are initialized to zero.
 */

static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->i = 0; }

static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->u = 0; }

/* --- @eq_int@, @eq_uint@ --- *
 *
 * Arguments:	@const union tvec_regval *rv0, *rv1@ = register values
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Nonzero if the values are equal, zero if unequal
 *
 * Use:		Compare register values for equality.
 */

static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
		  const struct tvec_regdef *rd)
  { return (rv0->i == rv1->i); }

static int eq_uint(const union tvec_regval *rv0,
		   const union tvec_regval *rv1,
		   const struct tvec_regdef *rd)
  { return (rv0->u == rv1->u); }

/* --- @tobuf_int@, @tobuf_uint@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Integer values are serialized as little-endian 64-bit signed
 *		or unsigned integers.
 */

static int tobuf_int(buf *b, const union tvec_regval *rv,
		     const struct tvec_regdef *rd)
  { return (signed_to_buf(b, rv->i)); }

static int tobuf_uint(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (unsigned_to_buf(b, rv->u)); }

/* --- @frombuf_int@, @frombuf_uint@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Integer values are serialized as 64-bit signed or unsigned
 *		integers.
 */

static int frombuf_int(buf *b, union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (signed_from_buf(b, &rv->i)); }

static int frombuf_uint(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
  { return (unsigned_from_buf(b, &rv->u)); }

/* --- @parse_int@, @parse_uint@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		Integers may be input in decimal, hex, binary, or octal,
 *		following approximately usual conventions.
 *
 *		  * Signed integers may be preceded with a `+' or `-' sign.
 *
 *		  * Decimal integers are just a sequence of decimal digits
 *		    `0' ... `9'.
 *
 *		  * Octal integers are a sequence of digits `0' ... `7',
 *		    preceded by `0o' or `0O'.
 *
 *		  * Hexadecimal integers are a sequence of digits `0'
 *		    ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
 *		    `0X'.
 *
 *		  * Radix-B integers are a sequence of digits `0' ... `9',
 *		    `a' ... `f', or `A' ... `F', each with value less than B,
 *		    preceded by `Br' or `BR', where 0 < B < 36 is expressed
 *		    in decimal without any leading `0' or internal
 *		    underscores `_'.
 *
 *		  * A digit sequence may contain internal underscore `_'
 *		    separators, but not before or after all of the digits;
 *		    and two consecutive `_' characters are not permitted.
 */

static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
		     struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "signed integer")) { rc = -1; goto end; }
  if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
		      struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "unsigned integer")) { rc = -1; goto end; }
  if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

/* --- @dump_int@, @dump_uint@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Integer values are dumped in decimal and, unless compact
 *		output is requested, hex, and maybe a character, as a
 *		comment.
 */

static void dump_int(const union tvec_regval *rv,
		     const struct tvec_regdef *rd,
		     unsigned style,
		     const struct gprintf_ops *gops, void *go)
{

  gprintf(gops, go, "%ld", rv->i);
  if (!(style&TVSF_COMPACT)) {
    gprintf(gops, go, " ; = ");
    format_signed_hex(gops, go, rv->i);
    maybe_format_signed_char(gops, go, rv->i);
  }
}

static void dump_uint(const union tvec_regval *rv,
		      const struct tvec_regdef *rd,
		      unsigned style,
		      const struct gprintf_ops *gops, void *go)
{
  gprintf(gops, go, "%lu", rv->u);
  if (!(style&TVSF_COMPACT)) {
    gprintf(gops, go, " ; = ");
    format_unsigned_hex(gops, go, rv->u);
    maybe_format_unsigned_char(gops, go, rv->u);
  }
}

/* Integer type definitions. */
const struct tvec_regty tvty_int = {
  init_int, trivial_release, eq_int,
  tobuf_int, frombuf_int,
  parse_int, dump_int
};
const struct tvec_regty tvty_uint = {
  init_uint, trivial_release, eq_uint,
  tobuf_uint, frombuf_uint,
  parse_uint, dump_uint
};

/* Predefined integer ranges. */
const struct tvec_irange
  tvrange_schar = { SCHAR_MIN, SCHAR_MAX },
  tvrange_short = { SHRT_MIN, SHRT_MAX },
  tvrange_int = { INT_MIN, INT_MAX },
  tvrange_long = { LONG_MIN, LONG_MAX },
  tvrange_sbyte = { -128, 127 },
  tvrange_i16 = { -32768, +32767 },
  tvrange_i32 = { -2147483648, 2147483647 };
const struct tvec_urange
  tvrange_uchar = { 0, UCHAR_MAX },
  tvrange_ushort = { 0, USHRT_MAX },
  tvrange_uint = { 0, UINT_MAX },
  tvrange_ulong = { 0, ULONG_MAX },
  tvrange_size = { 0, (size_t)-1 },
  tvrange_byte = { 0, 255 },
  tvrange_u16 = { 0, 65535 },
  tvrange_u32 = { 0, 4294967296 };

/* --- @tvec_claimeq_int@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@long i0, i1@ = two signed integers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @i0@ and @i1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @i0@ and @i1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @i0@ is printed as the output
 *		value and @i1@ is printed as the input reference.
 */

int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
		     const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.i = i0; tv->in[0].v.i = i1;
  return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
}

/* --- @tvec_claimeq_uint@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@unsigned long u0, u1@ = two unsigned integers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @u0@ and @u1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @u0@ and @u1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @u0@ is printed as the output
 *		value and @u1@ is printed as the input reference.
 */

int tvec_claimeq_uint(struct tvec_state *tv,
		      unsigned long u0, unsigned long u1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.u = u0; tv->in[0].v.u = u1;
  return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
}

/*----- Floating-point type -----------------------------------------------*/

/* --- @float_int@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	---
 *
 * Use:		Initialize a register value.
 *
 *		Floating-point values are initialized to zero.
 */

static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->f = 0.0; }

/* --- @eq_float@ --- *
 *
 * Arguments:	@const union tvec_regval *rv0, *rv1@ = register values
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Nonzero if the values are equal, zero if unequal
 *
 * Use:		Compare register values for equality.
 *
 *		Floating-point values may be considered equal if their
 *		absolute or relative difference is sufficiently small, as
 *		described in the register definition.
 */

static int eq_float(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
  { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }

/* --- @tobuf_float@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Floating-point values are serialized as little-endian
 *		IEEE 754 Binary64.
 */

static int tobuf_float(buf *b, const union tvec_regval *rv,
		     const struct tvec_regdef *rd)
  { return (buf_putf64l(b, rv->f)); }

/* --- @frombuf_float@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Floating-point values are serialized as little-endian
 *		IEEE 754 Binary64.
 */

static int frombuf_float(buf *b, union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (buf_getf64l(b, &rv->f)); }

/* --- @parse_float@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		Floating-point values are either NaN (%|#nan|%, if supported
 *		by the platform); positive or negative infinity (%|#inf|%,
 *		%|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
 *		or %|#-inf|% (preferring the latter), if supported by the
 *		platform); or a number in strtod(3) syntax.
 */

static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "floating-point number"))
    { rc = -1; goto end; }
  if (parse_floating(&rv->f, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

/* --- @dump_float@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Floating-point values are dumped in decimal or as a special
 *		token beginning with `%|#|%'.  Some effort is taken to ensure
 *		that the output is sufficient to uniquely identify the
 *		original value, but, honestly, C makes this really hard.
 */

static void dump_float(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
  { format_floating(gops, go, rv->f); }

/* Floating-point type definition. */
const struct tvec_regty tvty_float = {
  init_float, trivial_release, eq_float,
  tobuf_float, frombuf_float,
  parse_float, dump_float
};

/* Predefined floating-point ranges. */
const struct tvec_floatinfo
  tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
  tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };

/* --- @tvec_claimeqish_float@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@double f0, f1@ = two floating-point numbers
 *		@unsigned f@ = flags (@TVFF_...@)
 *		@double delta@ = maximum tolerable difference
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @u1@ are sufficiently close, otherwise
 *		zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are sufficiently close.
 *		As for @tvec_claim@ above, a test case is automatically begun
 *		and ended if none is already underway.  If the values are
 *		too far apart, then @tvec_fail@ is called, quoting @expr@,
 *		and the mismatched values are dumped: @f0@ is printed as the
 *		output value and @f1@ is printed as the input reference.
 *
 *		The details for the comparison are as follows.
 *
 *		  * A NaN value matches any other NaN, and nothing else.
 *
 *		  * An infinity matches another infinity of the same sign,
 *		    and nothing else.
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
 *		    representable number matches only itself: in particular,
 *		    positive and negative zero are considered distinct.
 *		    (This allows tests to check that they land on the correct
 *		    side of branch cuts, for example.)
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
 *		    %$y$% when %$|x - y| < \delta$%.
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
 *		    %$y$% when %$|1 - y/x| < \delta$%.  (Note that this
 *		    criterion is asymmetric FIXME
 */

int tvec_claimeqish_float(struct tvec_state *tv,
			  double f0, double f1, unsigned f, double delta,
			  const char *file, unsigned lno,
			  const char *expr)
{
  struct tvec_floatinfo fi;
  union tvec_misc arg;

  fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
  tv->out[0].v.f = f0; tv->in[0].v.f = f1;
  return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
}

/* --- @tvec_claimeq_float@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@double f0, f1@ = two floating-point numbers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @u1@ are identical, otherwise zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are identical.  The
 *		function is exactly equivalent to @tvec_claimeqish_float@
 *		with @f == TVFF_EXACT@.
 */

int tvec_claimeq_float(struct tvec_state *tv,
		       double f0, double f1,
		       const char *file, unsigned lno,
		       const char *expr)
{
  return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
				file, lno, expr));
}

/*----- Enumerations ------------------------------------------------------*/

/* --- @init_tenum@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	---
 *
 * Use:		Initialize a register value.
 *
 *		Integer and floating-point enumeration values are initialized
 *		as their underlying representations.  Pointer enumerations
 *		are initialized to %|#nil|%.
 */

#define init_ienum init_int
#define init_uenum init_uint
#define init_fenum init_float

static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->p = 0; }

/* --- @eq_tenum@ --- *
 *
 * Arguments:	@const union tvec_regval *rv0, *rv1@ = register values
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Nonzero if the values are equal, zero if unequal
 *
 * Use:		Compare register values for equality.
 *
 *		Integer and floating-point enumeration values are compared as
 *		their underlying representations; in particular, floating-
 *		point enumerations may compare equal if their absolute or
 *		relative difference is sufficiently small.  Pointer
 *		enumerations are compared as pointers.
 */

#define eq_ienum eq_int
#define eq_uenum eq_uint

static int eq_fenum(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
{
  const struct tvec_fenuminfo *ei = rd->arg.p;
  return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
}

static int eq_penum(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
  { return (rv0->p == rv1->p); }

/* --- @tobuf_tenum@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Integer and floating-point enumeration values are serialized
 *		as their underlying representations.  Pointer enumerations
 *		are serialized as the signed integer index into the
 *		association table; %|#nil|% serializes as %$-1$%, and
 *		unrecognized pointers cause failure.
 */

#define tobuf_ienum tobuf_int
#define tobuf_uenum tobuf_uint
#define tobuf_fenum tobuf_float

static int tobuf_penum(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
{
  const struct tvec_penuminfo *pei = rd->arg.p;
  const struct tvec_passoc *pa;
  long i;

  for (pa = pei->av, i = 0; pa->tag; pa++, i++)
    if (pa->p == rv->p) goto found;
  if (!rv->p) i = -1;
  else return (-1);
found:
  return (signed_to_buf(b, i));
}

/* --- @frombuf_tenum@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Integer and floating-point enumeration values are serialized
 *		as their underlying representations.  Pointer enumerations
 *		are serialized as the signed integer index into the
 *		association table; %|#nil|% serializes as %$-1$%; out-of-
 *		range indices cause failure.
 */

#define frombuf_ienum frombuf_int
#define frombuf_uenum frombuf_uint
#define frombuf_fenum frombuf_float
static int frombuf_penum(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
{
  const struct tvec_penuminfo *pei = rd->arg.p;
  const struct tvec_passoc *pa;
  long i, n;

  for (pa = pei->av, n = 0; pa->tag; pa++, n++);
  if (signed_from_buf(b, &i)) return (-1);
  if (0 <= i && i < n) rv->p = (/*unconst*/ void *)pei->av[i].p;
  else if (i == -1) rv->p = 0;
  else return (-1);
  return (0);
}

/* --- @parse_tenum@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		An enumerated value may be given by name or as a literal
 *		value.  For enumerations based on numeric types, the literal
 *		values can be written in the same syntax as the underlying
 *		values.  For enumerations based on pointers, the only
 *		permitted literal is %|#nil|%, which denotes a null pointer.
 */

#define DEFPARSE_ENUM(tag_, ty, slot)					\
  static int parse_##slot##enum(union tvec_regval *rv,			\
				const struct tvec_regdef *rd,		\
				struct tvec_state *tv)			\
  {									\
    const struct tvec_##slot##enuminfo *ei = rd->arg.p;			\
    const struct tvec_##slot##assoc *a;					\
    dstr d = DSTR_INIT;							\
    int rc;								\
									\
    if (tvec_readword(tv, &d, ";", "enumeration tag or " LITSTR_##tag_)) \
      { rc = -1; goto end; }						\
    for (a = ei->av; a->tag; a++)					\
      if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; }	\
    MISSING_##tag_							\
    done:								\
    if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }			\
    rc = 0;								\
  end:									\
    dstr_destroy(&d);							\
    return (rc);							\
  }

#define LITSTR_INT	"literal signed integer"
#define FOUND_INT	rv->i = a->i;
#define MISSING_INT	if (parse_signed(&rv->i, d.buf, ei->ir, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_UINT	"literal unsigned integer"
#define FOUND_UINT	rv->u = a->u;
#define MISSING_UINT	if (parse_unsigned(&rv->u, d.buf, ei->ur, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_FLT	"literal floating-point number, "		\
			  "`#-inf', `#+inf', or `#nan'"
#define FOUND_FLT	rv->f = a->f;
#define MISSING_FLT	if (parse_floating(&rv->f, d.buf, ei->fi, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_PTR	"`#nil'"
#define FOUND_PTR	rv->p = (/*unconst*/ void *)a->p;
#define MISSING_PTR	if (STRCMP(d.buf, ==, "#nil"))			\
			  rv->p = 0;					\
			else {						\
			  tvec_error(tv, "unknown `%s' value `%s'",	\
				     ei->name, d.buf);			\
			  rc = -1; goto end;				\
			}

TVEC_MISCSLOTS(DEFPARSE_ENUM)

#undef LITSTR_INT
#undef FOUND_INT
#undef MISSING_INT

#undef LITSTR_UINT
#undef FOUND_UINT
#undef MISSING_UINT

#undef LITSTR_FLT
#undef FOUND_FLT
#undef MISSING_FLT

#undef LITSTR_PTR
#undef FOUND_PTR
#undef MISSING_PTR

#undef DEFPARSE_ENUM

/* --- @dump_tenum@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Enumeration values are dumped as their symbolic names, if
 *		possible, with the underlying values provided as a comment
 *		unless compact output is requested, as for the underlying
 *		representation.  A null pointer is printed as %|#nil|%;
 *		non-null pointers are printed as %|#<TYPE PTR>|%, with the
 *		enumeration TYPE and the raw pointer PTR printed with the
 *		system's %|%p|% format specifier.
 */


#define DEFDUMP_ENUM(tag_, ty, slot)					\
  static void dump_##slot##enum(const union tvec_regval *rv,		\
				const struct tvec_regdef *rd,		\
				unsigned style,				\
				const struct gprintf_ops *gops, void *go) \
  {									\
    const struct tvec_##slot##enuminfo *ei = rd->arg.p;			\
    const struct tvec_##slot##assoc *a;					\
									\
    for (a = ei->av; a->tag; a++)					\
      if (rv->slot == a->slot) {					\
	gprintf(gops, go, "%s", a->tag);				\
	if (style&TVSF_COMPACT) return;					\
	gprintf(gops, go, " ; = "); break;				\
      }									\
									\
    PRINTRAW_##tag_							\
  }

#define MAYBE_PRINT_EXTRA						\
	if (style&TVSF_COMPACT) /* nothing to do */;			\
	else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; }	\
	else if (1) { gprintf(gops, go, " = "); goto _extra; }		\
	else _extra:

#define PRINTRAW_INT	gprintf(gops, go, "%ld", rv->i);		\
			MAYBE_PRINT_EXTRA {				\
			  format_signed_hex(gops, go, rv->i);		\
			  maybe_format_signed_char(gops, go, rv->i);	\
			}

#define PRINTRAW_UINT	gprintf(gops, go, "%lu", rv->u);		\
			MAYBE_PRINT_EXTRA {				\
			  format_unsigned_hex(gops, go, rv->u);		\
			  maybe_format_unsigned_char(gops, go, rv->u);	\
			}

#define PRINTRAW_FLT	format_floating(gops, go, rv->f);

#define PRINTRAW_PTR	if (!rv->p) gprintf(gops, go, "#nil");		\
			else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);

TVEC_MISCSLOTS(DEFDUMP_ENUM)

#undef PRINTRAW_INT
#undef PRINTRAW_UINT
#undef PRINTRAW_FLT
#undef PRINTRAW_PTR

#undef MAYBE_PRINT_EXTRA
#undef DEFDUMP_ENUM

/* Enumeration type definitions. */
#define DEFTY_ENUM(tag, ty, slot)					\
  const struct tvec_regty tvty_##slot##enum = {				\
    init_##slot##enum, trivial_release, eq_##slot##enum,		\
    tobuf_##slot##enum, frombuf_##slot##enum,				\
    parse_##slot##enum, dump_##slot##enum				\
  };
TVEC_MISCSLOTS(DEFTY_ENUM)
#undef DEFTY_ENUM

/* Predefined enumeration types. */
static const struct tvec_iassoc bool_assoc[] = {
  { "nil",		0 },
  { "false",		0 },
  { "f",		0 },
  { "no",		0 },
  { "n",		0 },
  { "off",		0 },

  { "t",		1 },
  { "true",		1 },
  { "yes",		1 },
  { "y",		1 },
  { "on",		1 },

  TVEC_ENDENUM
};

const struct tvec_ienuminfo tvenum_bool =
  { "bool", bool_assoc, &tvrange_int };

static const struct tvec_iassoc cmp_assoc[] = {
  { "<",		-1 },
  { "less",		-1 },
  { "lt",		-1 },

  { "=",		 0 },
  { "equal",		 0 },
  { "eq",		 0 },

  { ">",		+1 },
  { "greater",		+1 },
  { "gt",		+1 },

  TVEC_ENDENUM
};

const struct tvec_ienuminfo tvenum_cmp =
  { "cmp", cmp_assoc, &tvrange_int };

/* --- @tvec_claimeq_tenum@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const struct tvec_typeenuminfo *ei@ = enumeration type info
 *		@ty t0, t1@ = two values
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @t0@ and @t1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @t0@ and @t1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @t0@ is printed as the output
 *		value and @t1@ is printed as the input reference.
 */

#define DEFCLAIM(tag, ty, slot)						\
	int tvec_claimeq_##slot##enum					\
	  (struct tvec_state *tv,					\
	   const struct tvec_##slot##enuminfo *ei, ty e0, ty e1,	\
	   const char *file, unsigned lno, const char *expr)		\
	{								\
	  union tvec_misc arg;						\
									\
	  arg.p = ei;							\
	  tv->out[0].v.slot = GET_##tag(e0);				\
	  tv->in[0].v.slot = GET_##tag(e1);				\
	  return (tvec_claimeq(tv, &tvty_##slot##enum, &arg,		\
			       file, lno, expr));			\
	}
#define GET_INT(e) (e)
#define GET_UINT(e) (e)
#define GET_FLT(e) (e)
#define GET_PTR(e) ((/*unconst*/ void *)(e))
TVEC_MISCSLOTS(DEFCLAIM)
#undef DEFCLAIM
#undef GET_INT
#undef GET_UINT
#undef GET_FLT
#undef GET_PTR

/*----- Flag types --------------------------------------------------------*/

/* Flag types are initialized, compared, and serialized as unsigned
 * integers.
 */

/* --- @parse_flags@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		The input syntax is a sequence of items separated by `|'
 *		signs.  Each item may be the symbolic name of a field value,
 *		or a literal unsigned integer.  The masks associated with the
 *		given symbolic names must be disjoint.  The resulting
 *		numerical value is simply the bitwise OR of the given values.
 */

static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  const struct tvec_flaginfo *fi = rd->arg.p;
  const struct tvec_flag *f;
  unsigned long m = 0, v = 0, t;
  dstr d = DSTR_INIT;
  int ch, rc;

  for (;;) {

    /* Read the next item. */
    DRESET(&d);
    if (tvec_readword(tv, &d, "|;", "flag name or integer"))
      { rc = -1; goto end; }

    /* Try to find a matching entry in the table. */
    for (f = fi->fv; f->tag; f++)
      if (STRCMP(f->tag, ==, d.buf)) {
	if (m&f->m)
	  { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
	else
	  { m |= f->m; v |= f->v; goto next; }
      }

    /* Otherwise, try to parse it as a raw integer. */
    if (parse_unsigned(&t, d.buf, fi->range, tv))
      { rc = -1; goto end; }
    v |= t;

  next:
    /* Advance to the next token.  If it's a separator then consume it, and
     * go round again.  Otherwise we stop here.
     */
    if (tvec_nexttoken(tv)) break;
    ch = getc(tv->fp);
      if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
      if (tvec_nexttoken(tv))
      { tvec_syntax(tv, '\n', "flag name or integer"); rc = -1; goto end; }
  }

  /* Done. */
  rv->u = v; rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

/* --- @dump_flags@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		The table of symbolic names and their associated values and
 *		masks is repeatedly scanned, in order, to find disjoint
 *		matches -- i.e., entries whose value matches the target value
 *		in the bit positions indicated by the mask, and whose mask
 *		doesn't overlap with any previously found matches; the names
 *		are then output, separated by `|'.  Any remaining nonzero
 *		bits not covered by any of the matching masks are output as a
 *		single literal integer, in hex.
 *
 *		Unless compact output is requested, or no symbolic names were
 *		found, the raw numeric value is also printed in hex, as a
 *		comment.
 */

static void dump_flags(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
{
  const struct tvec_flaginfo *fi = rd->arg.p;
  const struct tvec_flag *f;
  unsigned long m = ~0ul, v = rv->u;
  const char *sep;

  for (f = fi->fv, sep = ""; f->tag; f++)
    if ((m&f->m) && (v&f->m) == f->v) {
      gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
      sep = style&TVSF_COMPACT ? "|" : " | ";
    }

  if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);

  if (m != ~0ul && !(style&TVSF_COMPACT))
    gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
}

/* Flags type definition. */
const struct tvec_regty tvty_flags = {
  init_uint, trivial_release, eq_uint,
  tobuf_uint, frombuf_uint,
  parse_flags, dump_flags
};

/* --- @tvec_claimeq_flags@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const struct tvec_flaginfo *fi@ = flags type info
 *		@unsigned long f0, f1@ = two values
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @f1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @f0@ is printed as the output
 *		value and @f1@ is printed as the input reference.
 */

int tvec_claimeq_flags(struct tvec_state *tv,
		       const struct tvec_flaginfo *fi,
		       unsigned long f0, unsigned long f1,
		       const char *file, unsigned lno, const char *expr)
{
  union tvec_misc arg;

  arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
  return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
}

/*----- Characters --------------------------------------------------------*/

/* Character values are initialized and compared as signed integers. */

/* --- @tobuf_char@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Character values are serialized as little-endian 32-bit
 *		unsigned integers, with %|EOF|% serialized as all-bits-set.
 */

static int tobuf_char(buf *b, const union tvec_regval *rv,
		      const struct tvec_regdef *rd)
{
  uint32 u;

  if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
  else if (rv->i == EOF) u = MASK32;
  else return (-1);
  return (buf_putu32l(b, u));
}

/* --- @frombuf_char@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Character values are serialized as little-endian 32-bit
 *		unsigned integers, with %|EOF|% serialized as all-bits-set.
 */

static int frombuf_char(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
{
  uint32 u;

  if (buf_getu32l(b, &u)) return (-1);
  if (0 <= u && u <= UCHAR_MAX) rv->i = u;
  else if (u == MASK32) rv->i = EOF;
  else return (-1);
  return (0);
}

/* --- @parse_char@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		A character value can be given by symbolic name, with a
 *		leading `%|#|%'; or a character or `%|\|%'-escape sequence,
 *		optionally in single quotes.
 *
 *		The following escape sequences and character names are
 *		recognized.
 *
 *		* `%|#eof|%' is the special end-of-file marker.
 *
 *		* `%|#nul|%' is the NUL character, sometimes used to
 *		  terminate strings.
 *
 *		* `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
 *		  character used to ring the terminal bell (or do some other
 *		  thing to attract the user's attention).
 *
 *		* %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
 *		  character, used to move the cursor backwords by one cell.
 *
 *		* %|#escape|% %|#esc|%, or%|\e|% is the escape character,
 *		  used to introduce special terminal commands.
 *
 *		* %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
 *		  character, used to separate pages of text.
 *
 *		* %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
 *		  the newline character, used to terminate lines of text or
 *		  advance the cursor to the next line (perhaps without
 *		  returning it to the start of the line).
 *
 *		* %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
 *		  the carriage-return character, used to return the cursor to
 *		  the start of the line.
 *
 *		* %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
 *		  tab character, used to advance the cursor to the next tab
 *		  stop on the current line.
 *
 *		* %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
 *		  character.
 *
 *		* %|#space|%, %|#spc|% is the space character.
 *
 *		* %|#delete|%, %|#del|% is the delete character, used to
 *		  erase the most recent character.
 *
 *		* %|\'|% is the single-quote character.
 *
 *		* %|\\|% is the backslash character.
 *
 *		* %|\"|% is the double-quote character.
 *
 *		* %|\NNN|% or %|\{NNN}|% is the character with code NNN in
 *		  octal.  The NNN may be up to three digits long.
 *
 *		* %|\xNN|% or %|\x{NN}|% is the character with code NNN in
 *		  hexadecimal.
 */

static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
		      struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int ch, rc;
  unsigned f = 0;
#define f_quote 1u

  /* Inspect the character to see what we're up against. */
  ch = getc(tv->fp);

  if (ch == '#') {
    /* It looks like a special token.  Push the `%|#|%' back and fetch the
     * whole word.  If there's just the `%|#|%' after all, then treat it as
     * literal.
     */

    ungetc(ch, tv->fp);
    if (tvec_readword(tv, &d, ";", "character name")) { rc = -1; goto end; }
    if (STRCMP(d.buf, !=, "#")) {
      if (read_charname(&ch, d.buf, RCF_EOFOK)) {
	rc = tvec_error(tv, "unknown character name `%s'", d.buf);
	goto end;
      }
      if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
      rv->i = ch; rc = 0; goto end;
    }
  }

  /* If this is a single quote then we expect to see a matching one later,
   * and we should process backslash escapes.  Get the next character and see
   * what happens.
   */
  if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }

  /* Main character dispatch. */
  switch (ch) {

    case ';':
      /* Unquoted, semicolon begins a comment. */
      if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
      else goto plain;

    case '\n':
      /* A newline.  If we saw a single quote, then treat that as literal.
       * Otherwise this is an error.
       */
      if (!(f&f_quote)) goto nochar;
      else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }

    case EOF:
      /* End-of-file.  Similar to newline, but with slightly different
       * effects on the parse state.
       */
      if (!(f&f_quote)) goto nochar;
      else { f &= ~f_quote; ch = '\''; goto plain; }

    case '\'': nochar:
      /* A single quote.  This must be the second of a pair, and there should
       * have been a character or escape sequence between them.
       */
      rc = tvec_syntax(tv, ch, "character"); goto end;

    case '\\':
      /* A backslash.  Read a character escape. */
      if (read_charesc(&ch, tv)) return (-1);

    default: plain:
      /* Anything else.  Treat as literal. */
      rv->i = ch; break;
  }

  /* If we saw an opening quote, then expect the closing quote. */
  if (f&f_quote) {
    ch = getc(tv->fp);
    if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
  }

  /* Done. */
  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);

#undef f_quote
}

/* --- @dump_char@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Character values are dumped as their symbolic names, if any,
 *		or as a character or escape sequence within single quotes
 *		(which may be omitted in compact style).  If compact output
 *		is not requested, then the single-quoted representation (for
 *		characters dumped as symbolic names) and integer code in
 *		decimal and hex are printed as a comment.
 */

static void dump_char(const union tvec_regval *rv,
		      const struct tvec_regdef *rd,
		      unsigned style,
		      const struct gprintf_ops *gops, void *go)
{
  const char *p;
  unsigned f = 0;
#define f_semi 1u

  /* Print a character name if we can find one. */
  p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
  if (p) {
    gprintf(gops, go, "%s", p);
    if (style&TVSF_COMPACT) return;
    else { gprintf(gops, go, " ;"); f |= f_semi; }
  }

  /* If the character isn't @EOF@ then print it as a single-quoted thing.
   * In compact style, see if we can omit the quotes.
   */
  if (rv->i >= 0) {
    if (f&f_semi) gprintf(gops, go, " = ");
    switch (rv->i) {
      case ' ': case '\\': case '\'': quote:
	format_char(gops, go, rv->i);
	break;
      default:
	if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
	gprintf(gops, go, "%c", (int)rv->i);
	return;
    }
  }

  /* And the character code as an integer. */
  if (!(style&TVSF_COMPACT)) {
    if (!(f&f_semi)) gprintf(gops, go, " ;");
    gprintf(gops, go, " = %ld = ", rv->i);
    format_signed_hex(gops, go, rv->i);
  }

#undef f_semi
}

/* Character type definition. */
const struct tvec_regty tvty_char = {
  init_int, trivial_release, eq_int,
  tobuf_char, frombuf_char,
  parse_char, dump_char
};

/* --- @tvec_claimeq_char@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@int ch0, ch1@ = two character codes
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @ch0@ and @ch1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @ch0@ is printed as the output
 *		value and @ch1@ is printed as the input reference.
 */

int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.i = c0; tv->in[0].v.i = c1;
  return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
}

/*----- Text and byte strings ---------------------------------------------*/

/* --- @init_text@, @init_bytes@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	---
 *
 * Use:		Initialize a register value.
 *
 *		Text and binary string values are initialized with a null
 *		pointer and zero length.
 */

static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->text.p = 0; rv->text.sz = 0; }

static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->bytes.p = 0; rv->bytes.sz = 0; }

/* --- @release_string@, @release_bytes@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	---
 *
 * Use:		Release resources held by a register value.
 *
 *		Text and binary string buffers are freed.
 */

static void release_text(union tvec_regval *rv,
			 const struct tvec_regdef *rd)
  { xfree(rv->text.p); }

static void release_bytes(union tvec_regval *rv,
			  const struct tvec_regdef *rd)
  { xfree(rv->bytes.p); }

/* --- @eq_text@, @eq_bytes@ --- *
 *
 * Arguments:	@const union tvec_regval *rv0, *rv1@ = register values
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Nonzero if the values are equal, zero if unequal
 *
 * Use:		Compare register values for equality.
 */

static int eq_text(const union tvec_regval *rv0,
		   const union tvec_regval *rv1,
		   const struct tvec_regdef *rd)
{
  return (rv0->text.sz == rv1->text.sz &&
	  (!rv0->text.sz ||
	   MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
}

static int eq_bytes(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
{
  return (rv0->bytes.sz == rv1->bytes.sz &&
	  (!rv0->bytes.sz ||
	   MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
}

/* --- @tobuf_text@, @tobuf_bytes@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Text and binary string values are serialized as a little-
 *		endian 64-bit length %$n$% in bytes followed by %$n$% bytes
 *		of string data.
 */

static int tobuf_text(buf *b, const union tvec_regval *rv,
		      const struct tvec_regdef *rd)
  { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }

static int tobuf_bytes(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }

/* --- @frombuf_text@, @frombuf_bytes@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Text and binary string values are serialized as a little-
 *		endian 64-bit length %$n$% in bytes followed by %$n$% bytes
 *		of string data.
 */

static int frombuf_text(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
{
  const void *p;
  size_t sz;

  p = buf_getmem64l(b, &sz); if (!p) return (-1);
  tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
  return (0);
}

static int frombuf_bytes(buf *b, union tvec_regval *rv,
			 const struct tvec_regdef *rd)
{
  const void *p;
  size_t sz;

  p = buf_getmem64l(b, &sz); if (!p) return (-1);
  tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
  return (0);
}

/* --- @check_string_length@ --- *
 *
 * Arguments:	@size_t sz@ = found string length
 *		@const struct tvec_urange *ur@ = acceptable range
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Checks that @sz@ is within the bounds described by @ur@,
 *		reporting an error if not.
 */

static int check_string_length(size_t sz, const struct tvec_urange *ur,
			       struct tvec_state *tv)
{
  if (ur && (ur->min > sz || sz > ur->max))
    return (tvec_error(tv,
		       "invalid string length %lu; must be in [%lu .. %lu]",
		       (unsigned long)sz, ur->min, ur->max));
  return (0);
}

/* --- @parse_text@, @parse_bytes@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		The input format for both kinds of strings is basically the
 *		same: a `compound string', consisting of
 *
 *		  * single-quoted strings, which are interpreted entirely
 *		    literally, but can't contain single quotes or newlines;
 *
 *		  * double-quoted strings, in which `%|\|%'-escapes are
 *		    interpreted as for characters;
 *
 *		  * character names, marked by an initial `%|#|%' sign;
 *
 *		  * special tokens marked by an initial `%|!|%' sign; or
 *
 *		  * barewords interpreted according to the current coding
 *		    scheme.
 *
 *		The special tokens are
 *
 *		  * `%|!bare|%', which causes subsequent sequences of
 *		    barewords to be treated as plain text;
 *
 *		  * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
 *		    subsequent barewords to be decoded in the requested
 *		    manner.
 *
 *		  * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
 *		    which includes %$n$% copies of the (compound) string.
 *
 *		The only difference between text and binary strings is that
 *		the initial coding scheme is %|bare|% for text strings and
 *		%|hex|% for binary strings.
 */

static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
		      struct tvec_state *tv)
{
  void *p = rv->text.p;

  if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
    return (-1);
  rv->text.p = p;
  if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
  return (0);
}

static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  void *p = rv->bytes.p;

  if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
    return (-1);
  rv->bytes.p = p;
  if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
  return (0);
}

/* --- @dump_text@, @dump_bytes@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Text string values are dumped as plain text, in double quotes
 *		if necessary, and using backslash escape sequences for
 *		nonprintable characters.  Unless compact output is requested,
 *		strings consisting of multiple lines are dumped with each
 *		line of the string on a separate output line.
 *
 *		Binary string values are dumped in hexadecimal.  In compact
 *		style, the output simply consists of a single block of hex
 *		digits.  Otherwise, the dump is a display consisting of
 *		groups of hex digits, with comments showing the offset (if
 *		the string is long enough) and the corresponding plain text.
 *
 *		Empty strings are dumped as %|""|%.
 */

static void dump_text(const union tvec_regval *rv,
		      const struct tvec_regdef *rd,
		      unsigned style,
		      const struct gprintf_ops *gops, void *go)
{
  const unsigned char *p, *q, *l;
  unsigned f = 0;
#define f_nonword 1u
#define f_newline 2u

  if (!rv->text.sz) { gprintf(gops, go, "\"\""); return; }

  p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
  switch (*p) {
    case '!': case '#': case ';': case '"': case '\'':
    case '(': case '{': case '[': case ']': case '}': case ')':
      f |= f_nonword; break;
  }
  for (q = p; q < l; q++)
    if (*q == '\n' && q != l - 1) f |= f_newline;
    else if (!*q || !isgraph(*q) || *q == '\\') f |= f_nonword;
  if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
  else if (f&f_nonword) goto quote;

  gops->putm(go, (const char *)p, rv->text.sz);
  return;

quote:
  gprintf(gops, go, "\"");
  for (q = p; q < l; q++)
    if (!isprint(*q) || *q == '"') {
      if (p < q) gops->putm(go, (const char *)p, q - p);
      if (*q != '\n' || (style&TVSF_COMPACT))
	format_charesc(gops, go, *q, FCF_BRACE);
      else {
	if (q + 1 == l)	{ gprintf(gops, go, "\\n\""); return; }
	else gprintf(gops, go, "\\n\"\n\t\"");
      }
      p = q + 1;
    }
  if (p < q) gops->putm(go, (const char *)p, q - p);
  gprintf(gops, go, "\"");

#undef f_nonword
#undef f_newline
}

static void dump_bytes(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
{
  const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
  size_t off, sz = rv->bytes.sz;
  unsigned i, n;
  int wd;

  if (!sz) {
    gprintf(gops, go, style&TVSF_COMPACT ? "\"\"" : "\"\" ; empty");
    return;
  }

  if (style&TVSF_COMPACT) {
    while (p < l) gprintf(gops, go, "%02x", *p++);
    return;
  }

  if (sz > 16) gprintf(gops, go, "\n\t");

  off = 0; wd = hex_width(sz);
  while (p < l) {
    if (l - p < 16) n = l - p;
    else n = 16;

    for (i = 0; i < n; i++) {
      if (i < n) gprintf(gops, go, "%02x", p[i]);
      else gprintf(gops, go, "  ");
      if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
    }
    gprintf(gops, go, " ; ");
    if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
    for (i = 0; i < n; i++)
      gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
    p += n; off += n;
    if (p < l) gprintf(gops, go, "\n\t");
  }
}

/* Text and byte string type definitions. */
const struct tvec_regty tvty_text = {
  init_text, release_text, eq_text,
  tobuf_text, frombuf_text,
  parse_text, dump_text
};
const struct tvec_regty tvty_bytes = {
  init_bytes, release_bytes, eq_bytes,
  tobuf_bytes, frombuf_bytes,
  parse_bytes, dump_bytes
};

/* --- @tvec_claimeq_text@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const char *p0@, @size_t sz0@ = first string with length
 *		@const char *p1@, @size_t sz1@ = second string with length
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that strings at @p0@ and @p1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @p0@ is printed as the output
 *		value and @p1@ is printed as the input reference.
 */

int tvec_claimeq_text(struct tvec_state *tv,
		      const char *p0, size_t sz0,
		      const char *p1, size_t sz1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.text.p = (/*unconst*/ char *)p0; tv->out[0].v.text.sz = sz0;
  tv->in[0].v.text.p =(/*unconst*/ char *) p1; tv->in[0].v.text.sz = sz1;
  return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
}

/* --- @tvec_claimeq_textz@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const char *p0, *p1@ = two strings to compare
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that strings at @p0@ and @p1@ are equal, as for
 *		@tvec_claimeq_string@, except that the strings are assumed
 *		null-terminated, so their lengths don't need to be supplied
 *		explicitly.
 */

int tvec_claimeq_textz(struct tvec_state *tv,
		       const char *p0, const char *p1,
		       const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.text.p = (/*unconst*/ char *)p0;
    tv->out[0].v.text.sz = strlen(p0);
  tv->in[0].v.text.p = (/*unconst*/ char *)p1;
    tv->in[0].v.text.sz = strlen(p1);
  return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
}

/* --- @tvec_claimeq_bytes@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const void *p0@, @size_t sz0@ = first string with length
 *		@const void *p1@, @size_t sz1@ = second string with length
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that binary strings at @p0@ and @p1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @p0@ is printed as the output
 *		value and @p1@ is printed as the input reference.
 */

int tvec_claimeq_bytes(struct tvec_state *tv,
		       const void *p0, size_t sz0,
		       const void *p1, size_t sz1,
		       const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.bytes.p = (/*unconst*/ void *)p0;
    tv->out[0].v.bytes.sz = sz0;
  tv->in[0].v.bytes.p = (/*unconst*/ void *)p1;
    tv->in[0].v.bytes.sz = sz1;
  return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
}

/* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@size_t sz@ = required size
 *
 * Returns:	---
 *
 * Use:		Allocated space in a text or binary string register.  If the
 *		current register size is sufficient, its buffer is left
 *		alone; otherwise, the old buffer, if any, is freed and a
 *		fresh buffer allocated.  These functions are not intended to
 *		be used to adjust a buffer repeatedly, e.g., while building
 *		output incrementally: (a) they will perform badly, and (b)
 *		the old buffer contents are simply discarded if reallocation
 *		is necessary.  Instead, use a @dbuf@ or @dstr@.
 *
 *		The @tvec_alloctext@ function sneakily allocates an extra
 *		byte for a terminating zero.  The @tvec_allocbytes@ function
 *		doesn't do this.
 */

void tvec_alloctext(union tvec_regval *rv, size_t sz)
{
  if (rv->text.sz <= sz) { xfree(rv->text.p); rv->text.p = xmalloc(sz + 1); }
  rv->text.sz = sz;
}

void tvec_allocbytes(union tvec_regval *rv, size_t sz)
{
  if (rv->bytes.sz < sz) { xfree(rv->bytes.p); rv->bytes.p = xmalloc(sz); }
  rv->bytes.sz = sz;
}

/*----- Buffer type -------------------------------------------------------*/

/* Buffers are initialized and released as binary strings. */

/* --- @eq_buffer@ --- *
 *
 * Arguments:	@const union tvec_regval *rv0, *rv1@ = register values
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Nonzero if the values are equal, zero if unequal
 *
 * Use:		Compare register values for equality.
 *
 *		Buffer values are equal if and only if their sizes are equal;
 *		their contents are %%\emph{not}%% compared.
 */

static int eq_buffer(const union tvec_regval *rv0,
		     const union tvec_regval *rv1,
		     const struct tvec_regdef *rd)
  { return (rv0->bytes.sz == rv1->bytes.sz); }

/* --- @tobuf_buffer@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Serialize a register value to a buffer.
 *
 *		Buffer values are serialized as just their lengths, as
 *		unsigned integers.
 */

static int tobuf_buffer(buf *b, const union tvec_regval *rv,
			 const struct tvec_regdef *rd)
  { return (unsigned_to_buf(b, rv->bytes.sz)); }

/* --- @allocate_buffer@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@size_t sz@ = size to allocate
 *
 * Returns:	---
 *
 * Use:		Allocate @sz@ bytes to the buffer and fill the space with a
 *		distinctive pattern.
 */

static void allocate_buffer(union tvec_regval *rv, size_t sz)
  { tvec_allocbytes(rv, sz); memset(rv->bytes.p, '?', sz); }

/* --- @frombuf_buffer@ --- *
 *
 * Arguments:	@buf *b@ = buffer
 *		@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Deserialize a register value from a buffer.
 *
 *		Buffer values are serialized as just their lengths, as
 *		unsigned integers.  The buffer is allocated on
 *		deserialization and filled with a distinctive pattern.
 */

static int frombuf_buffer(buf *b, union tvec_regval *rv,
			  const struct tvec_regdef *rd)
{
  unsigned long u;

  if (unsigned_from_buf(b, &u)) return (-1);
  if (u > (size_t)-1) return (-1);
  allocate_buffer(rv, u);
  return (0);
}

/* --- @parse_buffer@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@struct tvec_state *tv@ = test-vector state
 *
 * Returns:	Zero on success, %$-1$% on error.
 *
 * Use:		Parse a register value from an input file.
 *
 *		The input format for a buffer value consists of an unsigned
 *		integer followed by an optional unit specifier consisting of
 *		an SI unit prefix and (optionally) the letter `B'.  Unit
 *		prefixes denote %%\emph{binary}%% multipliers, not decimal.
 *
 *		The buffer is allocated and filled with a distinctive
 *		pattern.
 */

static const char units[] = "kMGTPEZY";

static int parse_buffer(union tvec_regval *rv,
			const struct tvec_regdef *rd,
			struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  const char *q, *unit;
  size_t pos;
  unsigned long u, t;
  int rc;
  unsigned f = 0;
#define f_range 1u

  if (tvec_readword(tv, &d, ";", "buffer length")) { rc = -1; goto end; }
  if (parse_unsigned_integer(&u, &q, d.buf)) goto bad;
  if (!*q) {
    tvec_skipspc(tv); pos = d.len;
    if (!tvec_readword(tv, &d, ";", 0)) pos++;
    q = d.buf + pos;
  }

  if (u > (size_t)-1) goto rangerr;
  for (t = u, unit = units; *unit; unit++) {
    if (t > (size_t)-1/1024) f |= f_range;
    else t *= 1024;
    if (*q == *unit) {
      if (f&f_range) goto rangerr;
      u = t; q++; break;
    }
  }
  if (*q == 'B') q++;
  if (*q) goto bad;
  if (check_string_length(u, rd->arg.p, tv)) { rc = -1; goto end; }

  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  allocate_buffer(rv, u);
  rc = 0;
end:
  DDESTROY(&d); return (rc);

bad:
  tvec_error(tv, "invalid buffer length `%s'", d.buf);
  rc = -1; goto end;

rangerr:
  tvec_error(tv, "buffer length `%s' out of range", d.buf);
  rc = -1; goto end;

#undef f_range
}

/* --- @dump_buffer@ --- *
 *
 * Arguments:	@const union tvec_regval *rv@ = register value
 *		@const struct tvec_regdef *rd@ = register definition
 *		@unsigned style@ = output style (@TVSF_...@)
 *		@const struct gprintf_ops *gops@, @void *gp@ = format output
 *
 * Returns:	---
 *
 * Use:		Dump a register value to the format output.
 *
 *		Buffer values are dumped as their size with an appropriate
 *		unit specifier.  A unit prefix is only used if the size is an
 *		exact multiple of the relevant power of two.
 */

static void dump_buffer(const union tvec_regval *rv,
			const struct tvec_regdef *rd,
			unsigned style,
			const struct gprintf_ops *gops, void *go)
{
  const char *unit;
  unsigned long u = rv->bytes.sz;

  if (!u || u%1024)
    gprintf(gops, go, "%lu B", u);
  else {
    for (unit = units, u /= 1024; !(u%1024) && unit[1]; u /= 1024, unit++);
    gprintf(gops, go, "%lu %cB", u, *unit);
  }
}

/* Buffer type definition. */
const struct tvec_regty tvty_buffer = {
  init_bytes, release_bytes, eq_buffer,
  tobuf_buffer, frombuf_buffer,
  parse_buffer, dump_buffer
};

/*----- That's all, folks -------------------------------------------------*/