[mLib] / test / tvec-types.c

/* -*-c-*-
 *
 * Types for the test-vector framework
 *
 * (c) 2023 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of the mLib utilities library.
 *
 * mLib is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Library General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * mLib is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with mLib.  If not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 * USA.
 */

/*----- Header files ------------------------------------------------------*/

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <string.h>

#include "buf.h"
#include "codec.h"
#  include "base32.h"
#  include "base64.h"
#  include "hex.h"
#include "dstr.h"
#include "maths.h"
#include "tvec.h"

/*----- Preliminary utilities ---------------------------------------------*/

/* --- @trivial_release@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = a register value
 *		@const struct tvec_regdef@ = the register definition
 *
 * Returns:	---
 *
 * Use:		Does nothing.  Used for register values which don't retain
 *		resources.
 */

static void trivial_release(union tvec_regval *rv,
			    const struct tvec_regdef *rd)
  { ; }

/*----- Integer utilities -------------------------------------------------*/

/* --- @unsigned_to_buf@, @signed_to_buf@ --- *
 *
 * Arguments:	@buf *b@ = buffer to write on
 *		@unsigned long u@ or @long i@ = integer to write
 *
 * Returns:	Zero on success, @-1@ on failure.
 *
 * Use:		Write @i@ to the buffer, in big-endian (two's-complement, it
 *		signed) format.
 */

static int unsigned_to_buf(buf *b, unsigned long u)
  { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }

static int signed_to_buf(buf *b, long i)
{
  kludge64 k;
  unsigned long u;

  u = i;
  if (i >= 0) ASSIGN64(k, u);
  else { ASSIGN64(k, ~u); CPL64(k, k); }
  return (buf_putk64l(b, k));
}

/* --- @unsigned_from_buf@, @signed_from_buf@ --- *
 *
 * Arguments:	@buf *b@ = buffer to write on
 *		@unsigned long *u_out@ or @long *i_out@ = where to put the
 *			result
 *
 * Returns:	Zero on success, @-1@ on failure.
 *
 * Use:		Read an integer, in big-endian (two's-complement, if signed)
 *		format, from the buffer.
 */

static int unsigned_from_buf(buf *b, unsigned long *u_out)
{
  kludge64 k, ulmax;

  ASSIGN64(ulmax, ULONG_MAX);
  if (buf_getk64l(b, &k)) return (-1);
  if (CMP64(k, >, ulmax)) return (-1);
  *u_out = GET64(unsigned long, k); return (0);
}

/* --- @hex_width@ --- *
 *
 * Arguments:	@unsigned long u@ = an integer
 *
 * Returns:	A suitable number of digits to use in order to display @u@ in
 *		hex.  Currently, we select a power of two sufficient to show
 *		the value, but at least 2.
 */

static int hex_width(unsigned long u)
{
  int wd;
  unsigned long t;

  for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
  return (wd/4);
}

/* --- @format_unsigned_hex@, @format_signed_hex@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@unsigned long u@ or @long i@ = integer to print
 *
 * Returns:	---
 *
 * Use:		Print an unsigned or signed integer in hexadecimal.
 */

static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
				unsigned long u)
  { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }

static void format_signed_hex(const struct gprintf_ops *gops, void *go,
			      long i)
{
  unsigned long u = i >= 0 ? i : -(unsigned long)i;
  gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
}

static int signed_from_buf(buf *b, long *i_out)
{
  kludge64 k, lmax, not_lmin;

  ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
  if (buf_getk64l(b, &k)) return (-1);
  if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
  else {
    CPL64(k, k);
    if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
    else return (-1);
  }
  return (0);
}

/* --- @check_unsigned_range@, @check_signed_range@ --- *
 *
 * Arguments:	@unsigned long u@ or @long i@ = an integer
 *		@const struct tvec_urange *ur@ or
 *			@const struct tvec_irange *ir@ = range specification,
 *			or null
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, or @-1@ on error.
 *
 * Use:		Check that the integer is within bounds.  If not, report a
 *		suitable error and return a failure indication.
 */

static int check_signed_range(long i,
			      const struct tvec_irange *ir,
			      struct tvec_state *tv)
{
  if (ir && (ir->min > i || i > ir->max)) {
    tvec_error(tv, "integer %ld out of range (must be in [%ld .. %ld])",
	       i, ir->min, ir->max);
    return (-1);
  }
  return (0);
}

static int check_unsigned_range(unsigned long u,
				const struct tvec_urange *ur,
				struct tvec_state *tv)
{
  if (ur && (ur->min > u || u > ur->max)) {
    tvec_error(tv, "integer %lu out of range (must be in [%lu .. %lu])",
	       u, ur->min, ur->max);
    return (-1);
  }
  return (0);
}

/* --- @chtodig@ --- *
 *
 * Arguments:	@int ch@ = a character
 *
 * Returns:	The numeric value of the character as a digit, or @-1@ if
 *		it's not a digit.  Letters count as extended digits starting
 *		with value 10; case is not significant.
 */

static int chtodig(int ch)
{
  if ('0' <= ch && ch <= '9') return (ch - '0');
  else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
  else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
  else return (-1);
}

/* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
 *
 * Arguments:	@unsigned long *u_out@, @long *i_out@ = where to put the
 *			result
 *		@const char **q_out@ = where to put the end position
 *		@const char *p@ = pointer to the string to parse
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse an integer from a string in the test-vector format.
 *		This is mostly extension of the traditional C @strtoul@
 *		format: supported inputs include:
 *
 *		  * NNN -- a decimal number (even if it starts with `0');
 *		  * 0xNNN -- hexadecimal;
 *		  * 0oNNN -- octal;
 *		  * 0bNNN -- binary;
 *		  * NNrNNN -- base NN.
 *
 *		Furthermore, single underscores are permitted internally as
 *		an insignificant digit separator.
 */

static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
				  const char *p)
{
  unsigned long u;
  int ch, d, r;
  const char *q;
  unsigned f = 0;
#define f_implicit 1u			/* implicitly reading base 10 */
#define f_digit 2u			/* read a real digit */
#define f_uscore 4u			/* found an underscore */

  /* Initial setup
   *
   * This will deal with the traditional `0[box]...' prefixes.  We'll leave
   * our new `NNr...' syntax for later.
   */
  if (p[0] != '0' || !p[1]) {
    d = chtodig(*p); if (0 > d || d >= 10) return (-1);
    r = 10; u = d; p++; f |= f_implicit | f_digit;
  } else {
    u = 0; d = chtodig(p[2]);
    if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
    else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
    else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
    else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
    else { r = 10; f |= f_digit; p++; }
  }

  q = p;
  for (;;) {
    /* Work through the string a character at a time. */

    ch = *p; switch (ch) {

      case '_':
	/* An underscore is OK if we haven't just seen one. */

	if (f&f_uscore) goto done;
	p++; f = (f&~f_implicit) | f_uscore;
	break;

      case 'r': case 'R':
	/* An `r' is OK if the number so far is small enough to be a sensible
	 * base, and we're scanning decimal implicitly.
	 */

	if (!(f&f_implicit) || !u || u >= 36) goto done;
	d = chtodig(p[1]); if (0 > d || d >= u) goto done;
	r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
	break;

      default:
	/* Otherwise we expect a valid digit and accumulate it. */
	d = chtodig(ch); if (d < 0 || d >= r) goto done;
	if (u > ULONG_MAX/r) return (-1);
	u *= r; if (u > ULONG_MAX - d) return (-1);
	u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
	break;
    }
  }

done:
  if (!(f&f_digit)) return (-1);
  *u_out = u; *q_out = q; return (0);

#undef f_implicit
#undef f_digit
#undef f_uscore
}

static int parse_signed_integer(long *i_out, const char **q_out,
				const char *p)
{
  unsigned long u;
  unsigned f = 0;
#define f_neg 1u

  /* Read an initial sign. */
  if (*p == '+') p++;
  else if (*p == '-') { f |= f_neg; p++; }

  /* Scan an unsigned number. */
  if (parse_unsigned_integer(&u, q_out, p)) return (-1);

  /* Check for signed overflow and apply the sign. */
  if (!(f&f_neg)) {
    if (u > LONG_MAX) return (-1);
    *i_out = u;
  } else {
    if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
    *i_out = u ? -(long)(u - 1) - 1 : 0;
  }

  return (0);

#undef f_neg
}

/* --- @parse_unsigned@, @parse_signed@ --- *
 *
 * Arguments:	@unsigned long *u_out@ or @long *i_out@ = where to put the
 *			result
 *		@const char *p@ = string to parse
 *		@const struct tvec_urange *ur@ or
 *			@const struct tvec_irange *ir@ = range specification,
 *			or null
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse and range-check an integer.  Unlike @parse_(un)signed_
 *		integer@, these functions check that there's no cruft
 *		following the final digit, and report errors as they find
 *		them rather than leaving that to the caller.
 */

static int parse_unsigned(unsigned long *u_out, const char *p,
			  const struct tvec_urange *ur,
			  struct tvec_state *tv)
{
  unsigned long u;
  const char *q;

  if (parse_unsigned_integer(&u, &q, p))
    return (tvec_error(tv, "invalid unsigned integer `%s'", p));
  if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
  if (check_unsigned_range(u, ur, tv)) return (-1);
  *u_out = u; return (0);
}

static int parse_signed(long *i_out, const char *p,
			const struct tvec_irange *ir,
			struct tvec_state *tv)
{
  long i;
  const char *q;

  if (parse_signed_integer(&i, &q, p))
    return (tvec_error(tv, "invalid signed integer `%s'", p));
  if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
  if (check_signed_range(i, ir, tv)) return (-1);
  *i_out = i; return (0);
}

/*----- Floating-point utilities ------------------------------------------*/

/* --- @eqish_floating_p@ --- *
 *
 * Arguments:	@double x, y@ = two numbers to compare
 *		@const struct tvec_floatinfo *fi@ = floating-point info
 *
 * Returns:	Nonzero if  the comparand @y@ is sufficiently close to the
 *		reference @x@, or zero if it's definitely different.
 */

static int eqish_floating_p(double x, double y,
			    const struct tvec_floatinfo *fi)
{
  double t;

  if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
  if (INFP(x)) return (x == y); else if (INFP(y)) return (0);

  switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
    case TVFF_EXACT:
      return (x == y && NEGP(x) == NEGP(y));
    case TVFF_ABSDELTA:
      t = x - y; if (t < 0) t = -t; return (t < fi->delta);
    case TVFF_RELDELTA:
      t = 1.0 - y/x; if (t < 0) t = -t; return (t < fi->delta);
    default:
      abort();
  }
}

/* --- @format_floating@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@double x@ = number to print
 *
 * Returns:	---
 *
 * Use:		Print a floating-point number, accurately.
 */

static void format_floating(const struct gprintf_ops *gops, void *go,
			    double x)
{
  int prec;

  if (NANP(x))
    gprintf(gops, go, "#nan");
  else if (INFP(x))
    gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
  else {
    /* Ugh.  C doesn't provide any function for just printing a
     * floating-point number /correctly/, i.e., so that you can read the
     * result back and recover the number you first thought of.  There are
     * complicated algorithms published for doing this, but I really don't
     * want to get into that here.  So we have this.
     *
     * The sign doesn't cause significant difficulty so we're going to ignore
     * it for now.  So suppose we're given a number %$x = f b^e$%, in
     * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
     * %$0 \le f < 1$%.  We're going to convert it into the nearest integer
     * of the form %$X = F B^E$%, with similar conditions, only with the
     * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
     * or %$F \ge B^{-N}$%.
     *
     * We're rounding to the nearest such %$X$%.  If there is to be ambiguity
     * in the conversion, then some %$x = f b^e$% and the next smallest
     * representable number %$x' = x + b^{e-n}$% must both map to the same
     * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
     * any other number representable in the target system.  The nest larger
     * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
     * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
     * is actually %$X - B^{E-N-1}$%.  We ignore this latter possibility in
     * the pursuit of a conservative estimate (though actually it doesn't
     * matter).
     *
     * If both %$x$% and %$x'$% map to %$X$% then we must have
     * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
     * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
     * and secondly %$b^{e-n} \le B^{E-N}$%.  Since these inequalities are in
     * opposite senses, we can divide, giving
     *
     *	       %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
     *
     * whence
     *
     *	       %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
     *
     * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
     * possible, it must be the case that
     *
     *	       %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
     *
     * Then rearrange and take logarithms, obtaining
     *
     *	       %$(N - 1) \log B < n \log b$% ,
     *
     * and so
     *
     *	       %$N < n \log b/\log B + 1$% .
     *
     * Recall that this is a necessary condition for a collision to occur; we
     * are therefore safe whenever
     *
     *	       %$N \ge n \log b/\log B + 1$% ;
     *
     * so, taking ceilings,
     *
     *	       %$N \ge \lceil n \log b/\log B \rceil + 1$% .
     *
     * So that's why we have this.
     *
     * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that
     * we can calculate this without ending up on the wrong side of an
     * integer boundary.
     *
     * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only
     * as a constant.  Except that modern compilers are more than clever
     * enough to work out that this is a constant anyway.
     *
     * This is sometimes an overestimate: we'll print out meaningless digits
     * that don't represent anything we actually know about the number in
     * question.  To fix that, we'd need a complicated algorithm like Steele
     * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
     * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
     * something else in difficult situations).
     */

    prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
    gprintf(gops, go, "%.*g", prec, x);
  }
}

/* --- @parse_floating@ --- *
 *
 * Arguments:	@double *x_out@ = where to put the result
 *		@const char *p@ = string to parse
 *		@const struct tvec_floatinfo *fi@ = floating-point info
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse a floating-point number from a string.  Reports any
 *		necessary errors.
 */

static int parse_floating(double *x_out, const char *p,
			  const struct tvec_floatinfo *fi,
			  struct tvec_state *tv)
{
  const char *pp; char *q;
  dstr d = DSTR_INIT;
  double x;
  int olderr, rc;

  /* Check for special tokens. */
  if (STRCMP(p, ==, "#nan")) {
#ifdef NAN
    x = NAN; rc = 0;
#else
    tvec_error(tv, "NaN not supported on this system");
    rc = -1; goto end;
#endif
  }

  else if (STRCMP(p, ==, "#inf") ||
	   STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
#ifdef INFINITY
    x = INFINITY; rc = 0;
#else
    tvec_error(tv, "infinity not supported on this system");
    rc = -1; goto end;
#endif
  }

  else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
#ifdef INFINITY
    x = -INFINITY; rc = 0;
#else
    tvec_error(tv, "infinity not supported on this system");
    rc = -1; goto end;
#endif
  }

  /* Check that this looks like a number, so we can exclude `strtod'
   * recognizing its own non-finite number tokens.
   */
  else {
    pp = p;
    if (*pp == '+' || *pp == '-') pp++;
    if (*pp == '.') pp++;
    if (!ISDIGIT(*pp)) {
      tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
      rc = -1; goto end;
    }

    /* Parse the number using the system parser. */
    olderr = errno; errno = 0;
    x = strtod(p, &q);
    if (*q) {
      tvec_syntax(tv, *q, "end-of-line");
      rc = -1; goto end;
    }
    if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
      tvec_error(tv, "invalid floating-point number `%s': %s",
		 p, strerror(errno));
      rc = -1; goto end;
    }
    errno = olderr;
  }

  /* Check that the number is acceptable. */
  if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
    tvec_error(tv, "#nan not allowed here");
    rc = -1; goto end;
  }

  if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
	     (!(fi->f&TVFF_NOMAX) && x > fi->max))) {
    dstr_puts(&d, "floating-point number ");
    format_floating(&dstr_printops, &d, x);
    dstr_puts(&d, " out of range (must be in ");
    if (fi->f&TVFF_NOMIN)
      dstr_puts(&d, "(#-inf");
    else
      { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
    dstr_puts(&d, " .. ");
    if (fi->f&TVFF_NOMAX)
      dstr_puts(&d, "#+inf)");
    else
      { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
    dstr_putc(&d, ')'); dstr_putz(&d);
    tvec_error(tv, "%s", d.buf); rc = -1; goto end;
  }

  /* All done. */
  *x_out = x; rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

/*----- String utilities --------------------------------------------------*/

/* Special character name table. */
static const struct chartab {
  const char *name;			/* character name */
  int ch;				/* character value */
  unsigned f;				/* flags: */
#define CTF_PREFER 1u			/*   preferred name */
#define CTF_SHORT 2u			/*   short name (compact style) */
} chartab[] = {
  { "#eof",		EOF,	CTF_PREFER | CTF_SHORT },
  { "#nul",		'\0',	CTF_PREFER },
  { "#bell",		'\a',	CTF_PREFER },
  { "#ding",		'\a',	0 },
  { "#bel",		'\a',	CTF_SHORT },
  { "#backspace",	'\b',	CTF_PREFER },
  { "#bs",		'\b',	CTF_SHORT },
  { "#escape",		'\x1b',	CTF_PREFER },
  { "#esc",		'\x1b',	CTF_SHORT },
  { "#formfeed",	'\f',	CTF_PREFER },
  { "#ff",		'\f',	CTF_SHORT },
  { "#newline",		'\n',	CTF_PREFER },
  { "#linefeed",	'\n',	0 },
  { "#lf",		'\n',	CTF_SHORT },
  { "#nl",		'\n',	0 },
  { "#return",		'\r',	CTF_PREFER },
  { "#carriage-return",	'\r',	0 },
  { "#cr",		'\r',	CTF_SHORT },
  { "#tab",		'\t',	CTF_PREFER | CTF_SHORT },
  { "#horizontal-tab",	'\t',	0 },
  { "#ht",		'\t',	0 },
  { "#vertical-tab",	'\v',	CTF_PREFER },
  { "#vt",		'\v',	CTF_SHORT },
  { "#space",		' ',	0 },
  { "#spc",		' ',	CTF_SHORT },
  { "#delete",		'\x7f',	CTF_PREFER },
  { "#del",		'\x7f',	CTF_SHORT },
  { 0,			0,	0 }
};

/* --- @find_charname@ --- *
 *
 * Arguments:	@int ch@ = character to match
 *		@unsigned f@ = flags (@CTF_...@) to match
 *
 * Returns:	The name of the character, or null if no match is found.
 *
 * Use:		Looks up a name for a character.  Specifically, it returns
 *		the first entry in the @chartab@ table which matches @ch@ and
 *		which has one of the flags @f@ set.
 */

static const char *find_charname(int ch, unsigned f)
{
  const struct chartab *ct;

  for (ct = chartab; ct->name; ct++)
    if (ct->ch == ch && (ct->f&f)) return (ct->name);
  return (0);
}

/* --- @read_charname@ --- *
 *
 * Arguments:	@int *ch_out@ = where to put the character
 *		@const char *p@ = character name
 *		@unsigned f@ = flags (@TCF_...@)
 *
 * Returns:	Zero if a match was found, @-1@ if not.
 *
 * Use:		Looks up a character by name.  If @RCF_EOFOK@ is set in @f@,
 *		then the @EOF@ marker can be matched; otherwise it can't.
 */

#define RCF_EOFOK 1u
static int read_charname(int *ch_out, const char *p, unsigned f)
{
  const struct chartab *ct;

  for (ct = chartab; ct->name; ct++)
    if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
      { *ch_out = ct->ch; return (0); }
  return (-1);
}

/* --- @format_charesc@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@int ch@ = character to format
 *		@unsigned f@ = flags (@FCF_...@)
 *
 * Returns:	---
 *
 * Use:		Format a character as an escape sequence, possibly as part of
 *		a larger string.  If @FCF_BRACE@ is set in @f@, then put
 *		braces around a `\x...'  code, so that it's suitable for use
 *		in a longer string.
 */

#define FCF_BRACE 1u
static void format_charesc(const struct gprintf_ops *gops, void *go,
			   int ch, unsigned f)
{
  switch (ch) {
    case '\a': gprintf(gops, go, "\\a"); break;
    case '\b': gprintf(gops, go, "\\b"); break;
    case '\x1b': gprintf(gops, go, "\\e"); break;
    case '\f': gprintf(gops, go, "\\f"); break;
    case '\r': gprintf(gops, go, "\\r"); break;
    case '\n': gprintf(gops, go, "\\n"); break;
    case '\t': gprintf(gops, go, "\\t"); break;
    case '\v': gprintf(gops, go, "\\v"); break;
    case '\\': gprintf(gops, go, "\\\\"); break;
    case '\'': gprintf(gops, go, "\\'"); break;
    case '\0':
      if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
      else gprintf(gops, go, "\\0");
      break;
    default:
      if (f&FCF_BRACE)
	gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
      else
	gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
      break;
  }
}

/* --- @format_char@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@int ch@ = character to format
 *
 * Returns:	---
 *
 * Use:		Format a single character.
 */

static void format_char(const struct gprintf_ops *gops, void *go, int ch)
{
  switch (ch) {
    case '\\': case '\'': escape:
      gprintf(gops, go, "'");
      format_charesc(gops, go, ch, 0);
      gprintf(gops, go, "'");
      break;
    default:
      if (!isprint(ch)) goto escape;
      gprintf(gops, go, "'%c'", ch);
      break;
  }
}

/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops@ = print operations
 *		@void *go@ = print destination
 *		@unsigned long u@ or @long i@ = an integer
 *
 * Returns:	---
 *
 * Use:		Format a (signed or unsigned) integer as a character, if it's
 *		in range, printing something like `= 'q''.  It's assumed that
 *		a comment marker has already been output.
 */

static void maybe_format_unsigned_char
  (const struct gprintf_ops *gops, void *go, unsigned long u)
{
  const char *p;

  p = find_charname(u, CTF_PREFER);
  if (p) gprintf(gops, go, " = %s", p);
  if (u < UCHAR_MAX)
    { gprintf(gops, go, " = "); format_char(gops, go, u); }
}

static void maybe_format_signed_char
  (const struct gprintf_ops *gops, void *go, long i)
{
  const char *p;

  p = find_charname(i, CTF_PREFER);
  if (p) gprintf(gops, go, " = %s", p);
  if (0 <= i && i < UCHAR_MAX)
    { gprintf(gops, go, " = "); format_char(gops, go, i); }
}

/* --- @read_charesc@ --- *
 *
 * Arguments:	@int *ch_out@ = where to put the result
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse and convert an escape sequence from @tv@'s input
 *		stream, assuming that the initial `\' has already been read.
 *		Reports errors as appropriate.
 */

static int read_charesc(int *ch_out, struct tvec_state *tv)
{
  int ch, i, esc;
  unsigned f = 0;
#define f_brace 1u

  ch = getc(tv->fp);
  switch (ch) {

    /* Things we shouldn't find. */
    case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));

    /* Single-character escapes. */
    case '\'': *ch_out = '\''; break;
    case '\\': *ch_out = '\\'; break;
    case '"': *ch_out = '"'; break;
    case 'a': *ch_out = '\a'; break;
    case 'b': *ch_out = '\b'; break;
    case 'e': *ch_out = '\x1b'; break;
    case 'f': *ch_out = '\f'; break;
    case 'n': *ch_out = '\n'; break;
    case 'r': *ch_out = '\r'; break;
    case 't': *ch_out = '\t'; break;
    case 'v': *ch_out = '\v'; break;

    /* Hex escapes, with and without braces. */
    case 'x':
      ch = getc(tv->fp);
      if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
      else f &= ~f_brace;
      esc = chtodig(ch);
      if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
      for (;;) {
	ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
	esc = 16*esc + i;
	if (esc > UCHAR_MAX)
	  return (tvec_error(tv,
			     "character code %d out of range", esc));
      }
      if (!(f&f_brace)) ungetc(ch, tv->fp);
      else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
      *ch_out = esc;
      break;

    /* Other things, primarily octal escapes. */
    case '{':
      f |= f_brace; ch = getc(tv->fp);
      /* fall through */
    default:
      if ('0' <= ch && ch < '8') {
	i = 1; esc = ch - '0';
	for (;;) {
	  ch = getc(tv->fp);
	  if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
	  esc = 8*esc + ch - '0';
	  i++; if (i >= 3) break;
	}
	if (f&f_brace) {
	  ch = getc(tv->fp);
	  if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
	}
	if (esc > UCHAR_MAX)
	  return (tvec_error(tv,
			     "character code %d out of range", esc));
	*ch_out = esc; break;
      } else
	return (tvec_syntax(tv, ch, "string escape"));
  }

  /* Done. */
  return (0);

#undef f_brace
}

/* --- @read_quoted_string@ --- *
 *
 * Arguments:	@dstr *d@ = string to write to
 *		@int quote@ = initial quote, `'' or `"'
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Read the rest of a quoted string into @d@, reporting errors
 *		as appropriate.
 *
 *		A single-quoted string is entirely literal.  A double-quoted
 *		string may contain C-like escapes.
 */

static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
{
  int ch;

  for (;;) {
    ch = getc(tv->fp);
    switch (ch) {
      case EOF: case '\n':
	return (tvec_syntax(tv, ch, "`%c'", quote));
      case '\\':
	if (quote == '\'') goto ordinary;
	ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
	ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
	goto ordinary;
      default:
	if (ch == quote) goto end;
      ordinary:
	DPUTC(d, ch);
	break;
    }
  }

end:
  DPUTZ(d);
  return (0);
}

/* --- @collect_bare@ --- *
 *
 * Arguments:	@dstr *d@ = string to write to
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Read barewords and the whitespace between them.  Stop when we
 *		encounter something which can't start a bareword.
 */

static int collect_bare(dstr *d, struct tvec_state *tv)
{
  size_t pos = d->len;
  enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
  int ch, rc;

  for (;;) {
    ch = getc(tv->fp);
    switch (ch) {
      case EOF:
	tvec_syntax(tv, ch, "bareword");
	rc = -1; goto end;
      case '\n':
	if (s == ESCAPE) { tv->lno++; goto addch; }
	if (s == WORD) pos = d->len;
	ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
	DPUTC(d, ' '); s = SPACE;
	break;
      case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
	if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
	goto addch;
      case '\\':
	s = ESCAPE;
	break;
      default:
	if (s != ESCAPE && isspace(ch)) {
	  if (s == WORD) pos = d->len;
	  DPUTC(d, ch); s = SPACE;
	  break;
	}
      addch:
	DPUTC(d, ch); s = WORD;
    }
  }

done:
  if (s == SPACE) d->len = pos;
  DPUTZ(d); rc = 0;
end:
  return (rc);
}

/* --- @set_up_encoding@ --- *
 *
 * Arguments:	@const codec_class **ccl_out@ = where to put the class
 *		@unsigned *f_out@ = where to put the flags
 *		@unsigned code@ = the coding scheme to use (@TVEC_...@)
 *
 * Returns:	---
 *
 * Use:		Helper for @read_compound_string@ below.
 *
 *		Return the appropriate codec class and flags for @code@.
 *		Leaves @*ccl_out@ null if the coding scheme doesn't have a
 *		backing codec class (e.g., @TVCODE_BARE@).
 */

enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
			    unsigned code)
{
  switch (code) {
    case TVCODE_BARE:
      *ccl_out = 0; *f_out = 0;
      break;
    case TVCODE_HEX:
      *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
      break;
    case TVCODE_BASE32:
      *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
      break;
    case TVCODE_BASE64:
      *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
      break;
    default:
      abort();
  }
}

/* --- @flush_codec@ --- *
 *
 * Arguments:	@codec *cdc@ = a codec, or null
 *		@dstr *d@ = output string
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Helper for @read_compound_string@ below.
 *
 *		Flush out any final buffered material from @cdc@, and check
 *		that it's in a good state.  Frees the codec on success.  Does
 *		nothing if @cdc@ is null.
 */

static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
{
  int err;

  if (cdc) {
    err = cdc->ops->code(cdc, 0, 0, d);
    if (err)
      return (tvec_error(tv, "invalid %s sequence end: %s",
			 cdc->ops->c->name, codec_strerror(err)));
    cdc->ops->destroy(cdc);
  }
  return (0);
}

/* --- @read_compound_string@ --- *
 *
 * Arguments:	@void **p_inout@ = address of output buffer pointer
 *		@size_t *sz_inout@ = address of buffer size
 *		@unsigned code@ = initial interpretation of barewords
 *		@unsigned f@ = other flags (@RCSF_...@)
 *		@struct tvec_state *tv@ = test vector state
 *
 * Returns:	Zero on success, @-1@ on error.
 *
 * Use:		Parse a compound string, i.e., a sequence of stringish pieces
 *		which might be quoted strings, character names, or barewords
 *		to be decoded accoding to @code@, interspersed with
 *		additional directives.
 *
 *		If the initial buffer pointer is non-null and sufficiently
 *		large, then it will be reused; otherwise, it is freed and a
 *		fresh, sufficiently large buffer is allocated and returned.
 */

#define RCSF_NESTED 1u
static int read_compound_string(void **p_inout, size_t *sz_inout,
				unsigned code, unsigned f,
				struct tvec_state *tv)
{
  const codec_class *ccl; unsigned cdf;
  codec *cdc;
  dstr d = DSTR_INIT, w = DSTR_INIT;
  char *p;
  const char *q;
  void *pp = 0; size_t sz;
  unsigned long n;
  int ch, err, rc;

  set_up_encoding(&ccl, &cdf, code); cdc = 0;

  if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
  do {
    ch = getc(tv->fp);
    switch (ch) {

      case ')': case ']': case '}':
	/* Close brackets.  Leave these for recursive caller if there is one,
	 * or just complain.
	 */

	if (!(f&RCSF_NESTED))
	  { rc = tvec_syntax(tv, ch, "string"); goto end; }
	ungetc(ch, tv->fp); goto done;

      case '"': case '\'':
	/* Quotes.  Read a quoted string. */

	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
	break;

      case '#':
	/* A named character. */

	ungetc(ch, tv->fp);
	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	DRESET(&w); tvec_readword(tv, &w, ";", "character name");
	if (read_charname(&ch, w.buf, RCF_EOFOK)) {
	  rc = tvec_error(tv, "unknown character name `%s'", d.buf);
	  goto end;
	}
	DPUTC(&d, ch); break;

      case '!':
	/* A magic keyword. */

	if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
	cdc = 0;
	ungetc(ch, tv->fp);
	DRESET(&w); tvec_readword(tv, &w, ";", "`!'-keyword");

	/* Change bareword coding system. */
	if (STRCMP(w.buf, ==, "!bare"))
	  { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!hex"))
	  { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!base32"))
	  { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
	else if (STRCMP(w.buf, ==, "!base64"))
	  { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }

	/* Repeated substrings. */
	else if (STRCMP(w.buf, ==, "!repeat")) {
	  if (tvec_nexttoken(tv)) {
	    rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
	    goto end;
	  }
	  DRESET(&w);
	  if (tvec_readword(tv, &w, ";{", "repeat count"))
	    { rc = -1; goto end;  }
	  if (parse_unsigned_integer(&n, &q, w.buf)) {
	    rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
	    goto end;
	  }
	  if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
	  if (tvec_nexttoken(tv))
	    { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
	  ch = getc(tv->fp); if (ch != '{')
	    { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
	  sz = 0;
	  if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
	    { rc = -1; goto end; }
	  ch = getc(tv->fp); if (ch != '}')
	    { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
	  if (sz) {
	    if (n > (size_t)-1/sz)
	      { rc = tvec_error(tv, "repeat size out of range"); goto end; }
	    dstr_ensure(&d, n*sz);
	    if (sz == 1)
	      { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; }
	    else
	      for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz);
	  }
	  xfree(pp); pp = 0;
	}

	/* Anything else is an error. */
	else {
	  tvec_error(tv, "unknown string keyword `%s'", w.buf);
	  rc = -1; goto end;
	}
	break;

      default:
	/* A bareword.  Process it according to the current coding system. */

	switch (code) {
	  case TVCODE_BARE:
	    ungetc(ch, tv->fp);
	    if (collect_bare(&d, tv)) goto done;
	    break;
	  default:
	    assert(ccl);
	    ungetc(ch, tv->fp); DRESET(&w);
	    if (tvec_readword(tv, &w, ";", "%s-encoded fragment", ccl->name))
	      { rc = -1; goto end; }
	    if (!cdc) cdc = ccl->decoder(cdf);
	    err = cdc->ops->code(cdc, w.buf, w.len, &d);
	    if (err) {
	      tvec_error(tv, "invalid %s fragment `%s': %s",
			 ccl->name, w.buf, codec_strerror(err));
	      rc = -1; goto end;
	    }
	    break;
	}
	break;
    }
  } while (!tvec_nexttoken(tv));

done:
  /* Wrap things up. */
  if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
  cdc = 0;
  if (*sz_inout <= d.len)
    { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); }
  p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
  rc = 0;

end:
  /* Clean up any debris. */
  if (cdc) cdc->ops->destroy(cdc);
  if (pp) xfree(pp);
  dstr_destroy(&d); dstr_destroy(&w);
  return (rc);
}

/*----- Skeleton ----------------------------------------------------------*/
/*
static void init_...(union tvec_regval *rv, const struct tvec_regdef *rd)
static void release_...(union tvec_regval *rv, const struct tvec_regdef *rd)
static int eq_...(const union tvec_regval *rv0, const union tvec_regval *rv1,
		  const struct tvec_regdef *rd)
static int tobuf_...(buf *b, const union tvec_regval *rv,
		     const struct tvec_regdef *rd)
static int frombuf_...(buf *b, union tvec_regval *rv,
		       const struct tvec_regdef *rd)
static int parse_...(union tvec_regval *rv, const struct tvec_regdef *rd,
		     struct tvec_state *tv)
static void dump_...(const union tvec_regval *rv,
		     const struct tvec_regdef *rd,
		     struct tvec_state *tv, unsigned style)

const struct tvec_regty tvty_... = {
  init_..., release_..., eq_...,
  tobuf_..., frombuf_...,
  parse_..., dump_...
};
*/
/*----- Signed and unsigned integer types ---------------------------------*/

static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->i = 0; }

static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->u = 0; }

static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
		  const struct tvec_regdef *rd)
  { return (rv0->i == rv1->i); }

static int eq_uint(const union tvec_regval *rv0,
		   const union tvec_regval *rv1,
		   const struct tvec_regdef *rd)
  { return (rv0->u == rv1->u); }

static int tobuf_int(buf *b, const union tvec_regval *rv,
		     const struct tvec_regdef *rd)
  { return (signed_to_buf(b, rv->i)); }

static int tobuf_uint(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (unsigned_to_buf(b, rv->u)); }

static int frombuf_int(buf *b, union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (signed_from_buf(b, &rv->i)); }

static int frombuf_uint(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
  { return (unsigned_from_buf(b, &rv->u)); }

static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
		     struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "signed integer"))
    { rc = -1; goto end; }
  if (parse_signed(&rv->i, d.buf, rd->arg.p, tv))
    { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0))
    { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
		      struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "unsigned integer"))
    { rc = -1; goto end; }
  if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv))
    { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0))
    { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

static void dump_int(const union tvec_regval *rv,
		     const struct tvec_regdef *rd,
		     unsigned style,
		     const struct gprintf_ops *gops, void *go)
{

  gprintf(gops, go, "%ld", rv->i);
  if (!(style&TVSF_COMPACT)) {
    gprintf(gops, go, " ; = ");
    format_signed_hex(gops, go, rv->i);
    maybe_format_signed_char(gops, go, rv->i);
  }
}

static void dump_uint(const union tvec_regval *rv,
		      const struct tvec_regdef *rd,
		      unsigned style,
		      const struct gprintf_ops *gops, void *go)
{
  gprintf(gops, go, "%lu", rv->u);
  if (!(style&TVSF_COMPACT)) {
    gprintf(gops, go, " ; = ");
    format_unsigned_hex(gops, go, rv->u);
    maybe_format_unsigned_char(gops, go, rv->u);
  }
}

const struct tvec_regty tvty_int = {
  init_int, trivial_release, eq_int,
  tobuf_int, frombuf_int,
  parse_int, dump_int
};

const struct tvec_irange
  tvrange_schar = { SCHAR_MIN, SCHAR_MAX },
  tvrange_short = { SHRT_MIN, SHRT_MAX },
  tvrange_int = { INT_MIN, INT_MAX },
  tvrange_long = { LONG_MIN, LONG_MAX },
  tvrange_sbyte = { -128, 127 },
  tvrange_i16 = { -32768, +32767 },
  tvrange_i32 = { -2147483648, 2147483647 };

const struct tvec_regty tvty_uint = {
  init_uint, trivial_release, eq_uint,
  tobuf_uint, frombuf_uint,
  parse_uint, dump_uint
};

const struct tvec_urange
  tvrange_uchar = { 0, UCHAR_MAX },
  tvrange_ushort = { 0, USHRT_MAX },
  tvrange_uint = { 0, UINT_MAX },
  tvrange_ulong = { 0, ULONG_MAX },
  tvrange_size = { 0, (size_t)-1 },
  tvrange_byte = { 0, 255 },
  tvrange_u16 = { 0, 65535 },
  tvrange_u32 = { 0, 4294967296 };

/* --- @tvec_claimeq_int@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@long i0, i1@ = two signed integers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @i0@ and @i1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @i0@ and @i1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @i0@ is printed as the output
 *		value and @i1@ is printed as the input reference.
 */

int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
		     const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.i = i0; tv->in[0].v.i = i1;
  return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
}

/* --- @tvec_claimeq_uint@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@unsigned long u0, u1@ = two unsigned integers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @u0@ and @u1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @u0@ and @u1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @u0@ is printed as the output
 *		value and @u1@ is printed as the input reference.
 */

int tvec_claimeq_uint(struct tvec_state *tv,
		      unsigned long u0, unsigned long u1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.u = u0; tv->in[0].v.u = u1;
  return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
}

/*----- Floating-point type -----------------------------------------------*/

static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->f = 0.0; }

static int eq_float(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
  { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }

static int tobuf_float(buf *b, const union tvec_regval *rv,
		     const struct tvec_regdef *rd)
  { return (buf_putf64l(b, rv->f)); }
static int frombuf_float(buf *b, union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (buf_getf64l(b, &rv->f)); }

static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int rc;

  if (tvec_readword(tv, &d, ";", "floating-point number"))
    { rc = -1; goto end; }
  if (parse_floating(&rv->f, d.buf, rd->arg.p, tv))
    { rc = -1; goto end; }
  if (tvec_flushtoeol(tv, 0))
    { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

static void dump_float(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
  { format_floating(gops, go, rv->f); }

const struct tvec_regty tvty_float = {
  init_float, trivial_release, eq_float,
  tobuf_float, frombuf_float,
  parse_float, dump_float
};

/* --- @tvec_claimeqish_float@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@double f0, f1@ = two floating-point numbers
 *		@unsigned f@ = flags (@TVFF_...@)
 *		@double delta@ = maximum tolerable difference
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @u1@ are sufficiently close, otherwise
 *		zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are sufficiently close.
 *		As for @tvec_claim@ above, a test case is automatically begun
 *		and ended if none is already underway.  If the values are
 *		too far apart, then @tvec_fail@ is called, quoting @expr@,
 *		and the mismatched values are dumped: @f0@ is printed as the
 *		output value and @f1@ is printed as the input reference.
 *
 *		The details for the comparison are as follows.
 *
 *		  * A NaN value matches any other NaN, and nothing else.
 *
 *		  * An infinity matches another infinity of the same sign,
 *		    and nothing else.
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
 *		    representable number matches only itself: in particular,
 *		    positive and negative zero are considered distinct.
 *		    (This allows tests to check that they land on the correct
 *		    side of branch cuts, for example.)
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
 *		    %$y$% when %$|x - y| < \delta$%.
 *
 *		  * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
 *		    %$y$% when %$|1 - y/x| < \delta$%.  (Note that this
 *		    criterion is asymmetric FIXME
 */

int tvec_claimeqish_float(struct tvec_state *tv,
			  double f0, double f1, unsigned f, double delta,
			  const char *file, unsigned lno,
			  const char *expr)
{
  struct tvec_floatinfo fi;
  union tvec_misc arg;

  fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
  tv->out[0].v.f = f0; tv->in[0].v.f = f1;
  return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
}

/* --- @tvec_claimeq_float@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@double f0, f1@ = two floating-point numbers
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @u1@ are identical, otherwise zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are identical.  The
 *		function is exactly equivalent to @tvec_claimeqish_float@
 *		with @f == TVFF_EXACT@.
 */

int tvec_claimeq_float(struct tvec_state *tv,
		       double f0, double f1,
		       const char *file, unsigned lno,
		       const char *expr)
{
  return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
				file, lno, expr));
}

const struct tvec_floatinfo
  tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
  tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };

/*----- Enumerations ------------------------------------------------------*/

#define init_ienum init_int
#define init_uenum init_uint
#define init_fenum init_float
static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->p = 0; }

#define eq_ienum eq_int
#define eq_uenum eq_uint
static int eq_fenum(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
{
  const struct tvec_fenuminfo *ei = rd->arg.p;
  return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
}
static int eq_penum(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
  { return (rv0->p == rv1->p); }

#define tobuf_ienum tobuf_int
#define tobuf_uenum tobuf_uint
#define tobuf_fenum tobuf_float
static int tobuf_penum(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
{
  const struct tvec_penuminfo *pei = rd->arg.p;
  const struct tvec_passoc *pa;
  long i;

  for (pa = pei->av, i = 0; pa->tag; pa++, i++)
    if (pa->p == rv->p) goto found;
  if (!rv->p) i = -1;
  else return (-1);
found:
  return (signed_to_buf(b, i));
}

#define frombuf_ienum frombuf_int
#define frombuf_uenum frombuf_uint
#define frombuf_fenum frombuf_float
static int frombuf_penum(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
{
  const struct tvec_penuminfo *pei = rd->arg.p;
  const struct tvec_passoc *pa;
  long i, n;

  for (pa = pei->av, n = 0; pa->tag; pa++, n++);
  if (signed_from_buf(b, &i)) return (-1);
  if (0 <= i && i < n) rv->p = (/*unconst*/ void *)pei->av[i].p;
  else if (i == -1) rv->p = 0;
  else return (-1);
  return (0);
}

#define DEFPARSE_ENUM(tag_, ty, slot)					\
  static int parse_##slot##enum(union tvec_regval *rv,			\
				const struct tvec_regdef *rd,		\
				struct tvec_state *tv)			\
  {									\
    const struct tvec_##slot##enuminfo *ei = rd->arg.p;			\
    const struct tvec_##slot##assoc *a;					\
    dstr d = DSTR_INIT;							\
    int rc;								\
									\
    if (tvec_readword(tv, &d, ";", "enumeration tag or " LITSTR_##tag_)) \
      { rc = -1; goto end; }						\
    for (a = ei->av; a->tag; a++)					\
      if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; }	\
    MISSING_##tag_							\
    done:								\
    if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }			\
    rc = 0;								\
  end:									\
    dstr_destroy(&d);							\
    return (rc);							\
  }

#define LITSTR_INT	"literal signed integer"
#define FOUND_INT	rv->i = a->i;
#define MISSING_INT	if (parse_signed(&rv->i, d.buf, ei->ir, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_UINT	"literal unsigned integer"
#define FOUND_UINT	rv->u = a->u;
#define MISSING_UINT	if (parse_unsigned(&rv->u, d.buf, ei->ur, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_FLT	"literal floating-point number, "		\
			  "`#-inf', `#+inf', or `#nan'"
#define FOUND_FLT	rv->f = a->f;
#define MISSING_FLT	if (parse_floating(&rv->f, d.buf, ei->fi, tv))	\
			  { rc = -1; goto end; }

#define LITSTR_PTR	"`#nil'"
#define FOUND_PTR	rv->p = (/*unconst*/ void *)a->p;
#define MISSING_PTR	if (STRCMP(d.buf, ==, "#nil"))			\
			  rv->p = 0;					\
			else {						\
			  tvec_error(tv, "unknown `%s' value `%s'",	\
				     ei->name, d.buf);			\
			  rc = -1; goto end;				\
			}

TVEC_MISCSLOTS(DEFPARSE_ENUM)

#undef LITSTR_INT
#undef FOUND_INT
#undef MISSING_INT

#undef LITSTR_UINT
#undef FOUND_UINT
#undef MISSING_UINT

#undef LITSTR_FLT
#undef FOUND_FLT
#undef MISSING_FLT

#undef LITSTR_PTR
#undef FOUND_PTR
#undef MISSING_PTR

#undef DEFPARSE_ENUM

#define DEFDUMP_ENUM(tag_, ty, slot)					\
  static void dump_##slot##enum(const union tvec_regval *rv,		\
				const struct tvec_regdef *rd,		\
				unsigned style,				\
				const struct gprintf_ops *gops, void *go) \
  {									\
    const struct tvec_##slot##enuminfo *ei = rd->arg.p;			\
    const struct tvec_##slot##assoc *a;					\
									\
    for (a = ei->av; a->tag; a++)					\
      if (rv->slot == a->slot) {					\
	gprintf(gops, go, "%s", a->tag);				\
	if (style&TVSF_COMPACT) return;					\
	gprintf(gops, go, " ; = "); break;				\
      }									\
									\
    PRINTRAW_##tag_							\
  }

#define MAYBE_PRINT_EXTRA						\
	if (style&TVSF_COMPACT) ;					\
	else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; }	\
	else if (1) { gprintf(gops, go, " = "); goto _extra; }		\
	else _extra:

#define PRINTRAW_INT	gprintf(gops, go, "%ld", rv->i);		\
			MAYBE_PRINT_EXTRA {				\
			  format_signed_hex(gops, go, rv->i);		\
			  maybe_format_signed_char(gops, go, rv->i);	\
			}

#define PRINTRAW_UINT	gprintf(gops, go, "%lu", rv->u);		\
			MAYBE_PRINT_EXTRA {				\
			  format_unsigned_hex(gops, go, rv->u);		\
			  maybe_format_unsigned_char(gops, go, rv->u);	\
			}

#define PRINTRAW_FLT	format_floating(gops, go, rv->f);

#define PRINTRAW_PTR	if (!rv->p) gprintf(gops, go, "#nil");		\
			else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);

TVEC_MISCSLOTS(DEFDUMP_ENUM)

#undef PRINTRAW_INT
#undef PRINTRAW_UINT
#undef PRINTRAW_FLT
#undef PRINTRAW_PTR

#undef MAYBE_PRINT_EXTRA
#undef DEFDUMP_ENUM

#define DEFTY_ENUM(tag, ty, slot)					\
  const struct tvec_regty tvty_##slot##enum = {				\
    init_##slot##enum, trivial_release, eq_##slot##enum,		\
    tobuf_##slot##enum, frombuf_##slot##enum,				\
    parse_##slot##enum, dump_##slot##enum				\
  };
TVEC_MISCSLOTS(DEFTY_ENUM)
#undef DEFTY_ENUM

static const struct tvec_iassoc bool_assoc[] = {
  { "nil",		0 },
  { "false",		0 },
  { "f",		0 },
  { "no",		0 },
  { "n",		0 },
  { "off",		0 },

  { "t",		1 },
  { "true",		1 },
  { "yes",		1 },
  { "y",		1 },
  { "on",		1 },

  TVEC_ENDENUM
};

const struct tvec_ienuminfo tvenum_bool =
  { "bool", bool_assoc, &tvrange_int };

static const struct tvec_iassoc cmp_assoc[] = {
  { "<",		-1 },
  { "less",		-1 },
  { "lt",		-1 },

  { "=",		 0 },
  { "equal",		 0 },
  { "eq",		 0 },

  { ">",		+1 },
  { "greater",		+1 },
  { "gt",		+1 },

  TVEC_ENDENUM
};

const struct tvec_ienuminfo tvenum_cmp =
  { "cmp", cmp_assoc, &tvrange_int };

/* --- @tvec_claimeq_tenum@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const struct tvec_typeenuminfo *ei@ = enumeration type info
 *		@ty t0, t1@ = two values
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @t0@ and @t1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @t0@ and @t1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @t0@ is printed as the output
 *		value and @t1@ is printed as the input reference.
 */

#define DEFCLAIM(tag, ty, slot)						\
	int tvec_claimeq_##slot##enum					\
	  (struct tvec_state *tv,					\
	   const struct tvec_##slot##enuminfo *ei, ty e0, ty e1,	\
	   const char *file, unsigned lno, const char *expr)		\
	{								\
	  union tvec_misc arg;						\
									\
	  arg.p = ei;							\
	  tv->out[0].v.slot = GET_##tag(e0);				\
	  tv->in[0].v.slot = GET_##tag(e1);				\
	  return (tvec_claimeq(tv, &tvty_##slot##enum, &arg,		\
			       file, lno, expr));			\
	}
#define GET_INT(e) (e)
#define GET_UINT(e) (e)
#define GET_FLT(e) (e)
#define GET_PTR(e) ((/*unconst*/ void *)(e))
TVEC_MISCSLOTS(DEFCLAIM)
#undef DEFCLAIM
#undef GET_INT
#undef GET_UINT
#undef GET_FLT
#undef GET_PTR

/*----- Flag types --------------------------------------------------------*/

static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  const struct tvec_flaginfo *fi = rd->arg.p;
  const struct tvec_flag *f;
  unsigned long m = 0, v = 0, t;
  dstr d = DSTR_INIT;
  int ch, rc;

  for (;;) {
    DRESET(&d);
    if (tvec_readword(tv, &d, "|;", "flag name or integer"))
      { rc = -1; goto end; }

    for (f = fi->fv; f->tag; f++)
      if (STRCMP(f->tag, ==, d.buf)) {
	if (m&f->m)
	  { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
	else
	  { m |= f->m; v |= f->v; goto next; }
      }

    if (parse_unsigned(&t, d.buf, fi->range, tv))
      { rc = -1; goto end; }
    v |= t;
  next:
    if (tvec_nexttoken(tv)) break;
    ch = getc(tv->fp);
      if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
    if (tvec_nexttoken(tv))
      { tvec_syntax(tv, '\n', "flag name or integer"); rc = -1; goto end; }
  }
  rv->u = v;
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);
}

static void dump_flags(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
{
  const struct tvec_flaginfo *fi = rd->arg.p;
  const struct tvec_flag *f;
  unsigned long m = ~(unsigned long)0, v = rv->u;
  const char *sep;

  for (f = fi->fv, sep = ""; f->tag; f++)
    if ((m&f->m) && (v&f->m) == f->v) {
      gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
      sep = style&TVSF_COMPACT ? "|" : " | ";
    }

  if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);

  if (!(style&TVSF_COMPACT))
    gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
}

const struct tvec_regty tvty_flags = {
  init_uint, trivial_release, eq_uint,
  tobuf_uint, frombuf_uint,
  parse_flags, dump_flags
};

/* --- @tvec_claimeq_flags@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const struct tvec_flaginfo *fi@ = flags type info
 *		@unsigned long f0, f1@ = two values
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @f0@ and @f1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @f0@ and @f1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @f0@ is printed as the output
 *		value and @f1@ is printed as the input reference.
 */

int tvec_claimeq_flags(struct tvec_state *tv,
		       const struct tvec_flaginfo *fi,
		       unsigned long f0, unsigned long f1,
		       const char *file, unsigned lno, const char *expr)
{
  union tvec_misc arg;

  arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
  return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
}

/*----- Characters --------------------------------------------------------*/

static int tobuf_char(buf *b, const union tvec_regval *rv,
		      const struct tvec_regdef *rd)
{
  uint32 u;
  if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
  else if (rv->i == EOF) u = MASK32;
  else return (-1);
  return (buf_putu32l(b, u));
}

static int frombuf_char(buf *b, union tvec_regval *rv,
			const struct tvec_regdef *rd)
{
  uint32 u;

  if (buf_getu32l(b, &u)) return (-1);
  if (0 <= u && u <= UCHAR_MAX) rv->i = u;
  else if (u == MASK32) rv->i = EOF;
  else return (-1);
  return (0);
}

static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
		      struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  int ch, rc;
  unsigned f = 0;
#define f_quote 1u

  ch = getc(tv->fp);
  if (ch == '#') {
    ungetc(ch, tv->fp);
    if (tvec_readword(tv, &d, ";", "character name")) { rc = -1; goto end; }
    if (read_charname(&ch, d.buf, RCF_EOFOK)) {
      rc = tvec_error(tv, "unknown character name `%s'", d.buf);
      goto end;
    }
    if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
    rv->i = ch; rc = 0; goto end;
  }

  if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
  switch (ch) {
    case ';':
      if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
      goto plain;
    case '\n':
      if (f&f_quote)
	{ f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
    case EOF:
      if (f&f_quote) { f &= ~f_quote; ch = '\''; goto plain; }
      /* fall through */
    case '\'':
      rc = tvec_syntax(tv, ch, "character"); goto end;
    case '\\':
      if (read_charesc(&ch, tv)) return (-1);
    default: plain:
      rv->i = ch; break;
  }
  if (f&f_quote) {
    ch = getc(tv->fp);
    if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
  }
  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  rc = 0;
end:
  dstr_destroy(&d);
  return (rc);

#undef f_quote
}

static void dump_char(const union tvec_regval *rv,
		      const struct tvec_regdef *rd,
		      unsigned style,
		      const struct gprintf_ops *gops, void *go)
{
  const char *p;
  unsigned f = 0;
#define f_semi 1u

  p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
  if (p) {
    gprintf(gops, go, "%s", p);
    if (style&TVSF_COMPACT) return;
    else { gprintf(gops, go, " ;"); f |= f_semi; }
  }

  if (rv->i >= 0) {
    if (f&f_semi) gprintf(gops, go, " = ");
    switch (rv->i) {
      case ' ': case '\\': case '\'': quote:
	format_char(gops, go, rv->i);
	break;
      default:
	if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
	gprintf(gops, go, "%c", (int)rv->i);
	return;
    }
  }

  if (!(style&TVSF_COMPACT)) {
    if (!(f&f_semi)) gprintf(gops, go, " ;");
    gprintf(gops, go, " = %ld = ", rv->i);
    format_signed_hex(gops, go, rv->i);
  }

#undef f_semi
}

const struct tvec_regty tvty_char = {
  init_int, trivial_release, eq_int,
  tobuf_char, frombuf_char,
  parse_char, dump_char
};

/* --- @tvec_claimeq_char@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@int ch0, ch1@ = two character codes
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
 *
 * Use:		Check that values of @ch0@ and @ch1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @ch0@ is printed as the output
 *		value and @ch1@ is printed as the input reference.
 */

int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.i = c0; tv->in[0].v.i = c1;
  return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
}

/*----- Text and byte strings ---------------------------------------------*/

static void init_string(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->str.p = 0; rv->str.sz = 0; }

static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
  { rv->bytes.p = 0; rv->bytes.sz = 0; }

static void release_string(union tvec_regval *rv,
			  const struct tvec_regdef *rd)
  { xfree(rv->str.p); }

static void release_bytes(union tvec_regval *rv,
			  const struct tvec_regdef *rd)
  { xfree(rv->bytes.p); }

static int eq_string(const union tvec_regval *rv0,
		     const union tvec_regval *rv1,
		     const struct tvec_regdef *rd)
{
  return (rv0->str.sz == rv1->str.sz &&
	  (!rv0->bytes.sz ||
	   MEMCMP(rv0->str.p, ==, rv1->str.p, rv1->str.sz)));
}

static int eq_bytes(const union tvec_regval *rv0,
		    const union tvec_regval *rv1,
		    const struct tvec_regdef *rd)
{
  return (rv0->bytes.sz == rv1->bytes.sz &&
	  (!rv0->bytes.sz ||
	   MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
}

static int tobuf_string(buf *b, const union tvec_regval *rv,
			const struct tvec_regdef *rd)
  { return (buf_putmem32l(b, rv->str.p, rv->str.sz)); }

static int tobuf_bytes(buf *b, const union tvec_regval *rv,
		       const struct tvec_regdef *rd)
  { return (buf_putmem32l(b, rv->bytes.p, rv->bytes.sz)); }

static int frombuf_string(buf *b, union tvec_regval *rv,
			  const struct tvec_regdef *rd)
{
  const void *p;
  size_t sz;

  p = buf_getmem32l(b, &sz); if (!p) return (-1);
  tvec_allocstring(rv, sz); memcpy(rv->str.p, p, sz); rv->str.p[sz] = 0;
  return (0);
}

static int frombuf_bytes(buf *b, union tvec_regval *rv,
			 const struct tvec_regdef *rd)
{
  const void *p;
  size_t sz;

  p = buf_getmem32l(b, &sz); if (!p) return (-1);
  tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
  return (0);
}

static int check_string_length(size_t sz, const struct tvec_urange *ur,
			       struct tvec_state *tv)
{
  if (ur && (ur->min > sz || sz > ur->max))
    return (tvec_error(tv,
		       "invalid string length %lu; must be in [%lu .. %lu]",
		       (unsigned long)sz, ur->min, ur->max));
  return (0);
}

static int parse_string(union tvec_regval *rv, const struct tvec_regdef *rd,
			struct tvec_state *tv)
{
  void *p = rv->str.p;

  if (read_compound_string(&p, &rv->str.sz, TVCODE_BARE, 0, tv))
    return (-1);
  rv->str.p = p;
  if (check_string_length(rv->str.sz, rd->arg.p, tv)) return (-1);
  return (0);
}

static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
		       struct tvec_state *tv)
{
  void *p = rv->bytes.p;

  if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
    return (-1);
  rv->bytes.p = p;
  if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
  return (0);
}

static void dump_string(const union tvec_regval *rv,
			const struct tvec_regdef *rd,
			unsigned style,
			const struct gprintf_ops *gops, void *go)
{
  const unsigned char *p, *q, *l;
  unsigned f = 0;
#define f_nonword 1u
#define f_newline 2u

  if (!rv->str.sz) { gprintf(gops, go, "\"\""); return; }

  p = (const unsigned char *)rv->str.p; l = p + rv->str.sz;
  switch (*p) {
    case '!': case '#': case ';': case '"': case '\'':
    case '(': case '{': case '[': case ']': case '}': case ')':
      f |= f_nonword; break;
  }
  for (q = p; q < l; q++)
    if (*q == '\n' && q != l - 1) f |= f_newline;
    else if (!*q || !isgraph(*q) || *q == '\\') f |= f_nonword;
  if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
  else if (f&f_nonword) goto quote;

  gops->putm(go, (const char *)p, rv->str.sz);
  return;

quote:
  gprintf(gops, go, "\"");
  for (q = p; q < l; q++)
    if (!isprint(*q) || *q == '"') {
      if (p < q) gops->putm(go, (const char *)p, q - p);
      if (*q != '\n' || (style&TVSF_COMPACT))
	format_charesc(gops, go, *q, FCF_BRACE);
      else {
	if (q + 1 == l)	{ gprintf(gops, go, "\\n\""); return; }
	else gprintf(gops, go, "\\n\"\n\t\"");
      }
      p = q + 1;
    }
  if (p < q) gops->putm(go, (const char *)p, q - p);
  gprintf(gops, go, "\"");

#undef f_nonword
#undef f_newline
}

static void dump_bytes(const union tvec_regval *rv,
		       const struct tvec_regdef *rd,
		       unsigned style,
		       const struct gprintf_ops *gops, void *go)
{
  const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
  size_t off, sz = rv->bytes.sz;
  unsigned i, n;
  int wd;

  if (!sz) {
    gprintf(gops, go, style&TVSF_COMPACT ? "\"\"" : "\"\" ; empty");
    return;
  }

  if (style&TVSF_COMPACT) {
    while (p < l) gprintf(gops, go, "%02x", *p++);
    return;
  }

  if (sz > 16) gprintf(gops, go, "\n\t");

  off = 0; wd = hex_width(sz);
  while (p < l) {
    if (l - p < 16) n = l - p;
    else n = 16;

    for (i = 0; i < n; i++) {
      if (i < n) gprintf(gops, go, "%02x", p[i]);
      else gprintf(gops, go, "  ");
      if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
    }
    gprintf(gops, go, " ; ");
    if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
    for (i = 0; i < n; i++)
      gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
    p += n; off += n;
    if (p < l) gprintf(gops, go, "\n\t");
  }
}

const struct tvec_regty tvty_string = {
  init_string, release_string, eq_string,
  tobuf_string, frombuf_string,
  parse_string, dump_string
};

const struct tvec_regty tvty_bytes = {
  init_bytes, release_bytes, eq_bytes,
  tobuf_bytes, frombuf_bytes,
  parse_bytes, dump_bytes
};

/* --- @tvec_claimeq_string@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const char *p0@, @size_t sz0@ = first string with length
 *		@const char *p1@, @size_t sz1@ = second string with length
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that strings at @p0@ and @p1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @p0@ is printed as the output
 *		value and @p1@ is printed as the input reference.
 */

int tvec_claimeq_string(struct tvec_state *tv,
			const char *p0, size_t sz0,
			const char *p1, size_t sz1,
			const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.str.p = (/*unconst*/ char *)p0; tv->out[0].v.str.sz = sz0;
  tv->in[0].v.str.p =(/*unconst*/ char *) p1; tv->in[0].v.str.sz = sz1;
  return (tvec_claimeq(tv, &tvty_string, 0, file, lno, expr));
}

/* --- @tvec_claimeq_strz@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const char *p0, *p1@ = two strings to compare
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that strings at @p0@ and @p1@ are equal, as for
 *		@tvec_claimeq_string@, except that the strings are assumed
 *		null-terminated, so their lengths don't need to be supplied
 *		explicitly.
 */

int tvec_claimeq_strz(struct tvec_state *tv,
		      const char *p0, const char *p1,
		      const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.str.p = (/*unconst*/ char *)p0;
    tv->out[0].v.str.sz = strlen(p0);
  tv->in[0].v.str.p = (/*unconst*/ char *)p1;
    tv->in[0].v.str.sz = strlen(p1);
  return (tvec_claimeq(tv, &tvty_string, 0, file, lno, expr));
}

/* --- @tvec_claimeq_bytes@ --- *
 *
 * Arguments:	@struct tvec_state *tv@ = test-vector state
 *		@const void *p0@, @size_t sz0@ = first string with length
 *		@const void *p1@, @size_t sz1@ = second string with length
 *		@const char *file@, @unsigned @lno@ = calling file and line
 *		@const char *expr@ = the expression to quote on failure
 *
 * Returns:	Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
 *		zero.
 *
 * Use:		Check that binary strings at @p0@ and @p1@ are equal.  As for
 *		@tvec_claim@ above, a test case is automatically begun and
 *		ended if none is already underway.  If the values are
 *		unequal, then @tvec_fail@ is called, quoting @expr@, and the
 *		mismatched values are dumped: @p0@ is printed as the output
 *		value and @p1@ is printed as the input reference.
 */

int tvec_claimeq_bytes(struct tvec_state *tv,
		       const void *p0, size_t sz0,
		       const void *p1, size_t sz1,
		       const char *file, unsigned lno, const char *expr)
{
  tv->out[0].v.bytes.p = (/*unconst*/ void *)p0;
    tv->out[0].v.bytes.sz = sz0;
  tv->in[0].v.bytes.p = (/*unconst*/ void *)p1;
    tv->in[0].v.bytes.sz = sz1;
  return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
}

/* --- @tvec_allocstring@, @tvec_allocbytes@ --- *
 *
 * Arguments:	@union tvec_regval *rv@ = register value
 *		@size_t sz@ = required size
 *
 * Returns:	---
 *
 * Use:		Allocated space in a text or binary string register.  If the
 *		current register size is sufficient, its buffer is left
 *		alone; otherwise, the old buffer, if any, is freed and a
 *		fresh buffer allocated.  These functions are not intended to
 *		be used to adjust a buffer repeatedly, e.g., while building
 *		output incrementally: (a) they will perform badly, and (b)
 *		the old buffer contents are simply discarded if reallocation
 *		is necessary.  Instead, use a @dbuf@ or @dstr@.
 *
 *		The @tvec_allocstring@ function sneakily allocates an extra
 *		byte for a terminating zero.  The @tvec_allocbytes@ function
 *		doesn't do this.
 */

void tvec_allocstring(union tvec_regval *rv, size_t sz)
{
  if (rv->str.sz <= sz) { xfree(rv->str.p); rv->str.p = xmalloc(sz + 1); }
  rv->str.sz = sz;
}

void tvec_allocbytes(union tvec_regval *rv, size_t sz)
{
  if (rv->bytes.sz < sz) { xfree(rv->bytes.p); rv->bytes.p = xmalloc(sz); }
  rv->bytes.sz = sz;
}

/*----- Buffer type -------------------------------------------------------*/

static int eq_buffer(const union tvec_regval *rv0,
		     const union tvec_regval *rv1,
		     const struct tvec_regdef *rd)
  { return (rv0->bytes.sz == rv1->bytes.sz); }

static int tobuf_buffer(buf *b, const union tvec_regval *rv,
			 const struct tvec_regdef *rd)
  { return (unsigned_to_buf(b, rv->bytes.sz)); }

static int frombuf_buffer(buf *b, union tvec_regval *rv,
			  const struct tvec_regdef *rd)
{
  unsigned long u;

  if (unsigned_from_buf(b, &u)) return (-1);
  if (u > (size_t)-1) return (-1);
  tvec_allocbytes(rv, u); memset(rv->bytes.p, '!', u);
  return (0);
}

static const char units[] = "kMGTPEZY";

static int parse_buffer(union tvec_regval *rv,
			const struct tvec_regdef *rd,
			struct tvec_state *tv)
{
  dstr d = DSTR_INIT;
  const char *q, *unit;
  size_t pos;
  unsigned long u, t;
  int rc;
  unsigned f = 0;
#define f_range 1u

  if (tvec_readword(tv, &d, ";", "buffer length")) { rc = -1; goto end; }
  if (parse_unsigned_integer(&u, &q, d.buf)) goto bad;
  if (!*q) {
    tvec_skipspc(tv); pos = d.len;
    if (!tvec_readword(tv, &d, ";", 0)) pos++;
    q = d.buf + pos;
  }

  if (u > (size_t)-1) goto rangerr;
  for (t = u, unit = units; *unit; unit++) {
    if (t > (size_t)-1/1024) f |= f_range;
    else t *= 1024;
    if (*q == *unit) {
      if (f&f_range) goto rangerr;
      u = t; q++; break;
    }
  }
  if (*q == 'B') q++;
  if (*q) goto bad;
  if (check_string_length(u, rd->arg.p, tv)) { rc = -1; goto end; }

  if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
  tvec_allocbytes(rv, u); memset(rv->bytes.p, '?', u);
  rc = 0;
end:
  DDESTROY(&d); return (rc);

bad:
  tvec_error(tv, "invalid buffer length `%s'", d.buf);
  rc = -1; goto end;

rangerr:
  tvec_error(tv, "buffer length `%s' out of range", d.buf);
  rc = -1; goto end;

#undef f_range
}

static void dump_buffer(const union tvec_regval *rv,
			const struct tvec_regdef *rd,
			unsigned style,
			const struct gprintf_ops *gops, void *go)
{
  const char *unit;
  unsigned long u = rv->bytes.sz;

  if (!u || u%1024)
    gprintf(gops, go, "%lu B", u);
  else {
    for (unit = units, u /= 1024; !(u%1024) && unit[1]; u /= 1024, unit++);
    gprintf(gops, go, "%lu %cB", u, *unit);
  }
}

const struct tvec_regty tvty_buffer = {
  init_bytes, release_bytes, eq_buffer,
  tobuf_buffer, frombuf_buffer,
  parse_buffer, dump_buffer
};

/*----- That's all, folks -------------------------------------------------*/