@@@ tvec doc wip
[mLib] / test / tvec-types.c
CommitLineData
b64eb60f
MW
1/* -*-c-*-
2 *
3 * Types for the test-vector framework
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <assert.h>
31#include <ctype.h>
32#include <errno.h>
e63124bc 33#include <float.h>
b64eb60f 34#include <limits.h>
e63124bc 35#include <math.h>
b64eb60f
MW
36#include <stdio.h>
37#include <string.h>
38
39#include "buf.h"
40#include "codec.h"
41# include "base32.h"
42# include "base64.h"
43# include "hex.h"
44#include "dstr.h"
67b5031e 45#include "maths.h"
b64eb60f
MW
46#include "tvec.h"
47
48/*----- Preliminary utilities ---------------------------------------------*/
49
67b5031e
MW
50/* --- @trivial_release@ --- *
51 *
52 * Arguments: @union tvec_regval *rv@ = a register value
53 * @const struct tvec_regdef@ = the register definition
54 *
55 * Returns: ---
56 *
57 * Use: Does nothing. Used for register values which don't retain
58 * resources.
59 */
3efcfd2d
MW
60
61static void trivial_release(union tvec_regval *rv,
62 const struct tvec_regdef *rd)
63 { ; }
64
67b5031e
MW
65/*----- Integer utilities -------------------------------------------------*/
66
67/* --- @unsigned_to_buf@, @signed_to_buf@ --- *
68 *
69 * Arguments: @buf *b@ = buffer to write on
70 * @unsigned long u@ or @long i@ = integer to write
71 *
72 * Returns: Zero on success, @-1@ on failure.
73 *
74 * Use: Write @i@ to the buffer, in big-endian (two's-complement, it
75 * signed) format.
76 */
77
78static int unsigned_to_buf(buf *b, unsigned long u)
79 { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }
80
b64eb60f
MW
81static int signed_to_buf(buf *b, long i)
82{
83 kludge64 k;
84 unsigned long u;
85
86 u = i;
87 if (i >= 0) ASSIGN64(k, u);
88 else { ASSIGN64(k, ~u); CPL64(k, k); }
89 return (buf_putk64l(b, k));
90}
91
67b5031e
MW
92/* --- @unsigned_from_buf@, @signed_from_buf@ --- *
93 *
94 * Arguments: @buf *b@ = buffer to write on
95 * @unsigned long *u_out@ or @long *i_out@ = where to put the
96 * result
97 *
98 * Returns: Zero on success, @-1@ on failure.
99 *
100 * Use: Read an integer, in big-endian (two's-complement, if signed)
101 * format, from the buffer.
102 */
b64eb60f
MW
103
104static int unsigned_from_buf(buf *b, unsigned long *u_out)
105{
106 kludge64 k, ulmax;
107
108 ASSIGN64(ulmax, ULONG_MAX);
109 if (buf_getk64l(b, &k)) return (-1);
110 if (CMP64(k, >, ulmax)) return (-1);
111 *u_out = GET64(unsigned long, k); return (0);
112}
113
67b5031e
MW
114/* --- @hex_width@ --- *
115 *
116 * Arguments: @unsigned long u@ = an integer
117 *
118 * Returns: A suitable number of digits to use in order to display @u@ in
119 * hex. Currently, we select a power of two sufficient to show
120 * the value, but at least 2.
121 */
122
b64eb60f
MW
123static int hex_width(unsigned long u)
124{
125 int wd;
126 unsigned long t;
127
128 for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
129 return (wd/4);
130}
131
67b5031e
MW
132/* --- @format_unsigned_hex@, @format_signed_hex@ --- *
133 *
134 * Arguments: @const struct gprintf_ops *gops@ = print operations
135 * @void *go@ = print destination
136 * @unsigned long u@ or @long i@ = integer to print
137 *
138 * Returns: ---
139 *
140 * Use: Print an unsigned or signed integer in hexadecimal.
141 */
142
143static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
144 unsigned long u)
145 { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }
146
147static void format_signed_hex(const struct gprintf_ops *gops, void *go,
148 long i)
149{
150 unsigned long u = i >= 0 ? i : -(unsigned long)i;
151 gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
152}
153
154static int signed_from_buf(buf *b, long *i_out)
155{
156 kludge64 k, lmax, not_lmin;
157
158 ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
159 if (buf_getk64l(b, &k)) return (-1);
160 if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
161 else {
162 CPL64(k, k);
163 if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
164 else return (-1);
165 }
166 return (0);
167}
168
169/* --- @check_unsigned_range@, @check_signed_range@ --- *
170 *
171 * Arguments: @unsigned long u@ or @long i@ = an integer
172 * @const struct tvec_urange *ur@ or
173 * @const struct tvec_irange *ir@ = range specification,
174 * or null
175 * @struct tvec_state *tv@ = test vector state
176 *
177 * Returns: Zero on success, or @-1@ on error.
178 *
179 * Use: Check that the integer is within bounds. If not, report a
180 * suitable error and return a failure indication.
181 */
182
882a39c1
MW
183static int check_signed_range(long i,
184 const struct tvec_irange *ir,
185 struct tvec_state *tv)
b64eb60f 186{
882a39c1 187 if (ir && (ir->min > i || i > ir->max)) {
b64eb60f
MW
188 tvec_error(tv, "integer %ld out of range (must be in [%ld .. %ld])",
189 i, ir->min, ir->max);
882a39c1
MW
190 return (-1);
191 }
192 return (0);
b64eb60f
MW
193}
194
882a39c1
MW
195static int check_unsigned_range(unsigned long u,
196 const struct tvec_urange *ur,
197 struct tvec_state *tv)
b64eb60f 198{
882a39c1 199 if (ur && (ur->min > u || u > ur->max)) {
b64eb60f
MW
200 tvec_error(tv, "integer %lu out of range (must be in [%lu .. %lu])",
201 u, ur->min, ur->max);
882a39c1
MW
202 return (-1);
203 }
204 return (0);
b64eb60f
MW
205}
206
67b5031e
MW
207/* --- @chtodig@ --- *
208 *
209 * Arguments: @int ch@ = a character
210 *
211 * Returns: The numeric value of the character as a digit, or @-1@ if
212 * it's not a digit. Letters count as extended digits starting
213 * with value 10; case is not significant.
214 */
215
3efcfd2d
MW
216static int chtodig(int ch)
217{
218 if ('0' <= ch && ch <= '9') return (ch - '0');
219 else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
220 else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
221 else return (-1);
222}
223
67b5031e
MW
224/* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
225 *
226 * Arguments: @unsigned long *u_out@, @long *i_out@ = where to put the
227 * result
228 * @const char **q_out@ = where to put the end position
229 * @const char *p@ = pointer to the string to parse
230 *
231 * Returns: Zero on success, @-1@ on error.
232 *
233 * Use: Parse an integer from a string in the test-vector format.
234 * This is mostly extension of the traditional C @strtoul@
235 * format: supported inputs include:
236 *
237 * * NNN -- a decimal number (even if it starts with `0');
238 * * 0xNNN -- hexadecimal;
239 * * 0oNNN -- octal;
240 * * 0bNNN -- binary;
241 * * NNrNNN -- base NN.
242 *
243 * Furthermore, single underscores are permitted internally as
244 * an insignificant digit separator.
245 */
246
3efcfd2d
MW
247static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
248 const char *p)
249{
250 unsigned long u;
251 int ch, d, r;
252 const char *q;
253 unsigned f = 0;
67b5031e
MW
254#define f_implicit 1u /* implicitly reading base 10 */
255#define f_digit 2u /* read a real digit */
256#define f_uscore 4u /* found an underscore */
257
258 /* Initial setup
259 *
260 * This will deal with the traditional `0[box]...' prefixes. We'll leave
261 * our new `NNr...' syntax for later.
262 */
3efcfd2d
MW
263 if (p[0] != '0' || !p[1]) {
264 d = chtodig(*p); if (0 > d || d >= 10) return (-1);
265 r = 10; u = d; p++; f |= f_implicit | f_digit;
266 } else {
267 u = 0; d = chtodig(p[2]);
268 if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
269 else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
270 else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
271 else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
272 else { r = 10; f |= f_digit; p++; }
273 }
274
275 q = p;
276 for (;;) {
67b5031e
MW
277 /* Work through the string a character at a time. */
278
279 ch = *p; switch (ch) {
280
281 case '_':
282 /* An underscore is OK if we haven't just seen one. */
283
284 if (f&f_uscore) goto done;
285 p++; f = (f&~f_implicit) | f_uscore;
286 break;
287
288 case 'r': case 'R':
289 /* An `r' is OK if the number so far is small enough to be a sensible
290 * base, and we're scanning decimal implicitly.
291 */
292
293 if (!(f&f_implicit) || !u || u >= 36) goto done;
294 d = chtodig(p[1]); if (0 > d || d >= u) goto done;
295 r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
296 break;
297
298 default:
299 /* Otherwise we expect a valid digit and accumulate it. */
300 d = chtodig(ch); if (d < 0 || d >= r) goto done;
301 if (u > ULONG_MAX/r) return (-1);
302 u *= r; if (u > ULONG_MAX - d) return (-1);
303 u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
304 break;
3efcfd2d
MW
305 }
306 }
307
67b5031e 308done:
3efcfd2d
MW
309 if (!(f&f_digit)) return (-1);
310 *u_out = u; *q_out = q; return (0);
311
312#undef f_implicit
313#undef f_digit
314#undef f_uscore
315}
316
317static int parse_signed_integer(long *i_out, const char **q_out,
318 const char *p)
319{
320 unsigned long u;
321 unsigned f = 0;
322#define f_neg 1u
323
67b5031e 324 /* Read an initial sign. */
3efcfd2d
MW
325 if (*p == '+') p++;
326 else if (*p == '-') { f |= f_neg; p++; }
327
67b5031e 328 /* Scan an unsigned number. */
3efcfd2d
MW
329 if (parse_unsigned_integer(&u, q_out, p)) return (-1);
330
67b5031e 331 /* Check for signed overflow and apply the sign. */
3efcfd2d
MW
332 if (!(f&f_neg)) {
333 if (u > LONG_MAX) return (-1);
334 *i_out = u;
335 } else {
336 if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
337 *i_out = u ? -(long)(u - 1) - 1 : 0;
338 }
339
340 return (0);
341
342#undef f_neg
343}
344
67b5031e
MW
345/* --- @parse_unsigned@, @parse_signed@ --- *
346 *
347 * Arguments: @unsigned long *u_out@ or @long *i_out@ = where to put the
348 * result
349 * @const char *p@ = string to parse
350 * @const struct tvec_urange *ur@ or
351 * @const struct tvec_irange *ir@ = range specification,
352 * or null
353 * @struct tvec_state *tv@ = test vector state
354 *
355 * Returns: Zero on success, @-1@ on error.
356 *
357 * Use: Parse and range-check an integer. Unlike @parse_(un)signed_
358 * integer@, these functions check that there's no cruft
359 * following the final digit, and report errors as they find
360 * them rather than leaving that to the caller.
361 */
362
363static int parse_unsigned(unsigned long *u_out, const char *p,
364 const struct tvec_urange *ur,
365 struct tvec_state *tv)
366{
367 unsigned long u;
368 const char *q;
369
370 if (parse_unsigned_integer(&u, &q, p))
371 return (tvec_error(tv, "invalid unsigned integer `%s'", p));
372 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
373 if (check_unsigned_range(u, ur, tv)) return (-1);
374 *u_out = u; return (0);
375}
376
882a39c1
MW
377static int parse_signed(long *i_out, const char *p,
378 const struct tvec_irange *ir,
379 struct tvec_state *tv)
b64eb60f 380{
b64eb60f 381 long i;
3efcfd2d 382 const char *q;
b64eb60f 383
3efcfd2d
MW
384 if (parse_signed_integer(&i, &q, p))
385 return (tvec_error(tv, "invalid signed integer `%s'", p));
386 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
e63124bc 387 if (check_signed_range(i, ir, tv)) return (-1);
3efcfd2d 388 *i_out = i; return (0);
b64eb60f
MW
389}
390
67b5031e 391/*----- Floating-point utilities ------------------------------------------*/
b64eb60f 392
67b5031e
MW
393/* --- @eqish_floating_p@ --- *
394 *
395 * Arguments: @double x, y@ = two numbers to compare
396 * @const struct tvec_floatinfo *fi@ = floating-point info
397 *
398 * Returns: Nonzero if the comparand @y@ is sufficiently close to the
399 * reference @x@, or zero if it's definitely different.
400 */
3efcfd2d 401
67b5031e
MW
402static int eqish_floating_p(double x, double y,
403 const struct tvec_floatinfo *fi)
3efcfd2d 404{
67b5031e
MW
405 double t;
406
407 if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
408 if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
409
410 switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
411 case TVFF_EXACT:
412 return (x == y && NEGP(x) == NEGP(y));
413 case TVFF_ABSDELTA:
414 t = x - y; if (t < 0) t = -t; return (t < fi->delta);
415 case TVFF_RELDELTA:
416 t = 1.0 - y/x; if (t < 0) t = -t; return (t < fi->delta);
417 default:
418 abort();
419 }
b64eb60f
MW
420}
421
67b5031e
MW
422/* --- @format_floating@ --- *
423 *
424 * Arguments: @const struct gprintf_ops *gops@ = print operations
425 * @void *go@ = print destination
426 * @double x@ = number to print
427 *
428 * Returns: ---
429 *
430 * Use: Print a floating-point number, accurately.
431 */
3efcfd2d 432
e63124bc
MW
433static void format_floating(const struct gprintf_ops *gops, void *go,
434 double x)
435{
436 int prec;
437
438 if (NANP(x))
439 gprintf(gops, go, "#nan");
440 else if (INFP(x))
441 gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
442 else {
443 /* Ugh. C doesn't provide any function for just printing a
444 * floating-point number /correctly/, i.e., so that you can read the
445 * result back and recover the number you first thought of. There are
446 * complicated algorithms published for doing this, but I really don't
447 * want to get into that here. So we have this.
448 *
449 * The sign doesn't cause significant difficulty so we're going to ignore
450 * it for now. So suppose we're given a number %$x = f b^e$%, in
451 * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
452 * %$0 \le f < 1$%. We're going to convert it into the nearest integer
453 * of the form %$X = F B^E$%, with similar conditions, only with the
454 * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
455 * or %$F \ge B^{-N}$%.
456 *
457 * We're rounding to the nearest such %$X$%. If there is to be ambiguity
458 * in the conversion, then some %$x = f b^e$% and the next smallest
459 * representable number %$x' = x + b^{e-n}$% must both map to the same
460 * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
461 * any other number representable in the target system. The nest larger
462 * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
463 * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
464 * is actually %$X - B^{E-N-1}$%. We ignore this latter possibility in
465 * the pursuit of a conservative estimate (though actually it doesn't
466 * matter).
467 *
468 * If both %$x$% and %$x'$% map to %$X$% then we must have
469 * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
470 * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
471 * and secondly %$b^{e-n} \le B^{E-N}$%. Since these inequalities are in
472 * opposite senses, we can divide, giving
473 *
474 * %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
475 *
476 * whence
477 *
478 * %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
479 *
480 * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
481 * possible, it must be the case that
482 *
483 * %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
484 *
485 * Then rearrange and take logarithms, obtaining
486 *
487 * %$(N - 1) \log B < n \log b$% ,
488 *
489 * and so
490 *
491 * %$N < n \log b/\log B + 1$% .
492 *
493 * Recall that this is a necessary condition for a collision to occur; we
494 * are therefore safe whenever
495 *
496 * %$N \ge n \log b/\log B + 1$% ;
497 *
498 * so, taking ceilings,
499 *
500 * %$N \ge \lceil n \log b/\log B \rceil + 1$% .
501 *
502 * So that's why we have this.
503 *
504 * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that
505 * we can calculate this without ending up on the wrong side of an
506 * integer boundary.
507 *
508 * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only
509 * as a constant. Except that modern compilers are more than clever
510 * enough to work out that this is a constant anyway.
511 *
512 * This is sometimes an overestimate: we'll print out meaningless digits
513 * that don't represent anything we actually know about the number in
514 * question. To fix that, we'd need a complicated algorithm like Steele
515 * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
516 * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
517 * something else in difficult situations).
518 */
519
520 prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
521 gprintf(gops, go, "%.*g", prec, x);
522 }
523}
524
67b5031e
MW
525/* --- @parse_floating@ --- *
526 *
527 * Arguments: @double *x_out@ = where to put the result
528 * @const char *p@ = string to parse
529 * @const struct tvec_floatinfo *fi@ = floating-point info
530 * @struct tvec_state *tv@ = test vector state
531 *
532 * Returns: Zero on success, @-1@ on error.
533 *
534 * Use: Parse a floating-point number from a string. Reports any
535 * necessary errors.
536 */
e63124bc
MW
537
538static int parse_floating(double *x_out, const char *p,
539 const struct tvec_floatinfo *fi,
540 struct tvec_state *tv)
541{
542 const char *pp; char *q;
543 dstr d = DSTR_INIT;
544 double x;
545 int olderr, rc;
546
67b5031e 547 /* Check for special tokens. */
e63124bc
MW
548 if (STRCMP(p, ==, "#nan")) {
549#ifdef NAN
550 x = NAN; rc = 0;
551#else
552 tvec_error(tv, "NaN not supported on this system");
553 rc = -1; goto end;
554#endif
67b5031e
MW
555 }
556
557 else if (STRCMP(p, ==, "#inf") ||
558 STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
3efcfd2d 559#ifdef INFINITY
e63124bc
MW
560 x = INFINITY; rc = 0;
561#else
562 tvec_error(tv, "infinity not supported on this system");
563 rc = -1; goto end;
564#endif
67b5031e
MW
565 }
566
567 else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
3efcfd2d 568#ifdef INFINITY
e63124bc
MW
569 x = -INFINITY; rc = 0;
570#else
571 tvec_error(tv, "infinity not supported on this system");
572 rc = -1; goto end;
573#endif
67b5031e
MW
574 }
575
576 /* Check that this looks like a number, so we can exclude `strtod'
577 * recognizing its own non-finite number tokens.
578 */
579 else {
e63124bc
MW
580 pp = p;
581 if (*pp == '+' || *pp == '-') pp++;
582 if (*pp == '.') pp++;
583 if (!ISDIGIT(*pp)) {
3efcfd2d 584 tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
e63124bc
MW
585 rc = -1; goto end;
586 }
67b5031e
MW
587
588 /* Parse the number using the system parser. */
e63124bc
MW
589 olderr = errno; errno = 0;
590 x = strtod(p, &q);
591 if (*q) {
592 tvec_syntax(tv, *q, "end-of-line");
593 rc = -1; goto end;
594 }
595 if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
596 tvec_error(tv, "invalid floating-point number `%s': %s",
597 p, strerror(errno));
598 rc = -1; goto end;
599 }
600 errno = olderr;
601 }
602
67b5031e 603 /* Check that the number is acceptable. */
e63124bc
MW
604 if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
605 tvec_error(tv, "#nan not allowed here");
606 rc = -1; goto end;
607 }
67b5031e 608
e63124bc
MW
609 if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
610 (!(fi->f&TVFF_NOMAX) && x > fi->max))) {
611 dstr_puts(&d, "floating-point number ");
612 format_floating(&dstr_printops, &d, x);
613 dstr_puts(&d, " out of range (must be in ");
614 if (fi->f&TVFF_NOMIN)
615 dstr_puts(&d, "(#-inf");
616 else
617 { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
618 dstr_puts(&d, " .. ");
619 if (fi->f&TVFF_NOMAX)
620 dstr_puts(&d, "#+inf)");
621 else
622 { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
623 dstr_putc(&d, ')'); dstr_putz(&d);
624 tvec_error(tv, "%s", d.buf); rc = -1; goto end;
625 }
626
67b5031e
MW
627 /* All done. */
628 *x_out = x; rc = 0;
629end:
630 dstr_destroy(&d);
631 return (rc);
632}
633
634/*----- String utilities --------------------------------------------------*/
635
636/* Special character name table. */
637static const struct chartab {
638 const char *name; /* character name */
639 int ch; /* character value */
640 unsigned f; /* flags: */
641#define CTF_PREFER 1u /* preferred name */
642#define CTF_SHORT 2u /* short name (compact style) */
643} chartab[] = {
644 { "#eof", EOF, CTF_PREFER | CTF_SHORT },
645 { "#nul", '\0', CTF_PREFER },
646 { "#bell", '\a', CTF_PREFER },
647 { "#ding", '\a', 0 },
648 { "#bel", '\a', CTF_SHORT },
649 { "#backspace", '\b', CTF_PREFER },
650 { "#bs", '\b', CTF_SHORT },
651 { "#escape", '\x1b', CTF_PREFER },
652 { "#esc", '\x1b', CTF_SHORT },
653 { "#formfeed", '\f', CTF_PREFER },
654 { "#ff", '\f', CTF_SHORT },
655 { "#newline", '\n', CTF_PREFER },
656 { "#linefeed", '\n', 0 },
657 { "#lf", '\n', CTF_SHORT },
658 { "#nl", '\n', 0 },
659 { "#return", '\r', CTF_PREFER },
660 { "#carriage-return", '\r', 0 },
661 { "#cr", '\r', CTF_SHORT },
662 { "#tab", '\t', CTF_PREFER | CTF_SHORT },
663 { "#horizontal-tab", '\t', 0 },
664 { "#ht", '\t', 0 },
665 { "#vertical-tab", '\v', CTF_PREFER },
666 { "#vt", '\v', CTF_SHORT },
667 { "#space", ' ', 0 },
668 { "#spc", ' ', CTF_SHORT },
669 { "#delete", '\x7f', CTF_PREFER },
670 { "#del", '\x7f', CTF_SHORT },
671 { 0, 0, 0 }
672};
673
674/* --- @find_charname@ --- *
675 *
676 * Arguments: @int ch@ = character to match
677 * @unsigned f@ = flags (@CTF_...@) to match
678 *
679 * Returns: The name of the character, or null if no match is found.
680 *
681 * Use: Looks up a name for a character. Specifically, it returns
682 * the first entry in the @chartab@ table which matches @ch@ and
683 * which has one of the flags @f@ set.
684 */
685
686static const char *find_charname(int ch, unsigned f)
687{
688 const struct chartab *ct;
689
690 for (ct = chartab; ct->name; ct++)
691 if (ct->ch == ch && (ct->f&f)) return (ct->name);
692 return (0);
693}
694
695/* --- @read_charname@ --- *
696 *
697 * Arguments: @int *ch_out@ = where to put the character
698 * @const char *p@ = character name
699 * @unsigned f@ = flags (@TCF_...@)
700 *
701 * Returns: Zero if a match was found, @-1@ if not.
702 *
703 * Use: Looks up a character by name. If @RCF_EOFOK@ is set in @f@,
704 * then the @EOF@ marker can be matched; otherwise it can't.
705 */
706
707#define RCF_EOFOK 1u
708static int read_charname(int *ch_out, const char *p, unsigned f)
709{
710 const struct chartab *ct;
711
712 for (ct = chartab; ct->name; ct++)
713 if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
714 { *ch_out = ct->ch; return (0); }
715 return (-1);
716}
717
718/* --- @format_charesc@ --- *
719 *
720 * Arguments: @const struct gprintf_ops *gops@ = print operations
721 * @void *go@ = print destination
722 * @int ch@ = character to format
723 * @unsigned f@ = flags (@FCF_...@)
724 *
725 * Returns: ---
726 *
727 * Use: Format a character as an escape sequence, possibly as part of
728 * a larger string. If @FCF_BRACE@ is set in @f@, then put
729 * braces around a `\x...' code, so that it's suitable for use
730 * in a longer string.
731 */
732
733#define FCF_BRACE 1u
734static void format_charesc(const struct gprintf_ops *gops, void *go,
735 int ch, unsigned f)
736{
737 switch (ch) {
738 case '\a': gprintf(gops, go, "\\a"); break;
739 case '\b': gprintf(gops, go, "\\b"); break;
740 case '\x1b': gprintf(gops, go, "\\e"); break;
741 case '\f': gprintf(gops, go, "\\f"); break;
742 case '\r': gprintf(gops, go, "\\r"); break;
743 case '\n': gprintf(gops, go, "\\n"); break;
744 case '\t': gprintf(gops, go, "\\t"); break;
745 case '\v': gprintf(gops, go, "\\v"); break;
746 case '\\': gprintf(gops, go, "\\\\"); break;
747 case '\'': gprintf(gops, go, "\\'"); break;
748 case '\0':
749 if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
750 else gprintf(gops, go, "\\0");
751 break;
752 default:
753 if (f&FCF_BRACE)
754 gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
755 else
756 gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
757 break;
758 }
759}
760
761/* --- @format_char@ --- *
762 *
763 * Arguments: @const struct gprintf_ops *gops@ = print operations
764 * @void *go@ = print destination
765 * @int ch@ = character to format
766 *
767 * Returns: ---
768 *
769 * Use: Format a single character.
770 */
771
772static void format_char(const struct gprintf_ops *gops, void *go, int ch)
773{
774 switch (ch) {
775 case '\\': case '\'': escape:
776 gprintf(gops, go, "'");
777 format_charesc(gops, go, ch, 0);
778 gprintf(gops, go, "'");
779 break;
780 default:
781 if (!isprint(ch)) goto escape;
782 gprintf(gops, go, "'%c'", ch);
783 break;
784 }
785}
786
787/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
788 *
789 * Arguments: @const struct gprintf_ops *gops@ = print operations
790 * @void *go@ = print destination
791 * @unsigned long u@ or @long i@ = an integer
792 *
793 * Returns: ---
794 *
795 * Use: Format a (signed or unsigned) integer as a character, if it's
796 * in range, printing something like `= 'q''. It's assumed that
797 * a comment marker has already been output.
798 */
799
800static void maybe_format_unsigned_char
801 (const struct gprintf_ops *gops, void *go, unsigned long u)
802{
803 const char *p;
804
805 p = find_charname(u, CTF_PREFER);
806 if (p) gprintf(gops, go, " = %s", p);
807 if (u < UCHAR_MAX)
808 { gprintf(gops, go, " = "); format_char(gops, go, u); }
e63124bc
MW
809}
810
67b5031e
MW
811static void maybe_format_signed_char
812 (const struct gprintf_ops *gops, void *go, long i)
b64eb60f 813{
67b5031e
MW
814 const char *p;
815
816 p = find_charname(i, CTF_PREFER);
817 if (p) gprintf(gops, go, " = %s", p);
818 if (0 <= i && i < UCHAR_MAX)
819 { gprintf(gops, go, " = "); format_char(gops, go, i); }
b64eb60f
MW
820}
821
67b5031e
MW
822/* --- @read_charesc@ --- *
823 *
824 * Arguments: @int *ch_out@ = where to put the result
825 * @struct tvec_state *tv@ = test vector state
826 *
827 * Returns: Zero on success, @-1@ on error.
828 *
829 * Use: Parse and convert an escape sequence from @tv@'s input
830 * stream, assuming that the initial `\' has already been read.
831 * Reports errors as appropriate.
832 */
833
834static int read_charesc(int *ch_out, struct tvec_state *tv)
b64eb60f 835{
b64eb60f
MW
836 int ch, i, esc;
837 unsigned f = 0;
838#define f_brace 1u
839
e63124bc
MW
840 ch = getc(tv->fp);
841 switch (ch) {
67b5031e
MW
842
843 /* Things we shouldn't find. */
844 case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
845
846 /* Single-character escapes. */
e63124bc
MW
847 case '\'': *ch_out = '\''; break;
848 case '\\': *ch_out = '\\'; break;
849 case '"': *ch_out = '"'; break;
850 case 'a': *ch_out = '\a'; break;
851 case 'b': *ch_out = '\b'; break;
852 case 'e': *ch_out = '\x1b'; break;
853 case 'f': *ch_out = '\f'; break;
854 case 'n': *ch_out = '\n'; break;
855 case 'r': *ch_out = '\r'; break;
856 case 't': *ch_out = '\t'; break;
857 case 'v': *ch_out = '\v'; break;
858
67b5031e 859 /* Hex escapes, with and without braces. */
e63124bc
MW
860 case 'x':
861 ch = getc(tv->fp);
862 if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
863 else f &= ~f_brace;
67b5031e
MW
864 esc = chtodig(ch);
865 if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
e63124bc 866 for (;;) {
67b5031e
MW
867 ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
868 esc = 16*esc + i;
e63124bc
MW
869 if (esc > UCHAR_MAX)
870 return (tvec_error(tv,
871 "character code %d out of range", esc));
872 }
873 if (!(f&f_brace)) ungetc(ch, tv->fp);
874 else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
875 *ch_out = esc;
876 break;
877
67b5031e
MW
878 /* Other things, primarily octal escapes. */
879 case '{':
880 f |= f_brace; ch = getc(tv->fp);
881 /* fall through */
e63124bc
MW
882 default:
883 if ('0' <= ch && ch < '8') {
884 i = 1; esc = ch - '0';
885 for (;;) {
886 ch = getc(tv->fp);
887 if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
888 esc = 8*esc + ch - '0';
889 i++; if (i >= 3) break;
890 }
67b5031e
MW
891 if (f&f_brace) {
892 ch = getc(tv->fp);
893 if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
894 }
e63124bc
MW
895 if (esc > UCHAR_MAX)
896 return (tvec_error(tv,
897 "character code %d out of range", esc));
67b5031e 898 *ch_out = esc; break;
e63124bc
MW
899 } else
900 return (tvec_syntax(tv, ch, "string escape"));
901 }
902
67b5031e 903 /* Done. */
e63124bc
MW
904 return (0);
905
906#undef f_brace
907}
908
67b5031e
MW
909/* --- @read_quoted_string@ --- *
910 *
911 * Arguments: @dstr *d@ = string to write to
912 * @int quote@ = initial quote, `'' or `"'
913 * @struct tvec_state *tv@ = test vector state
914 *
915 * Returns: Zero on success, @-1@ on error.
916 *
917 * Use: Read the rest of a quoted string into @d@, reporting errors
918 * as appropriate.
919 *
920 * A single-quoted string is entirely literal. A double-quoted
921 * string may contain C-like escapes.
922 */
923
e63124bc
MW
924static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
925{
926 int ch;
b64eb60f
MW
927
928 for (;;) {
929 ch = getc(tv->fp);
b64eb60f
MW
930 switch (ch) {
931 case EOF: case '\n':
e63124bc 932 return (tvec_syntax(tv, ch, "`%c'", quote));
b64eb60f
MW
933 case '\\':
934 if (quote == '\'') goto ordinary;
e63124bc 935 ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
67b5031e 936 ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
e63124bc 937 goto ordinary;
b64eb60f
MW
938 default:
939 if (ch == quote) goto end;
940 ordinary:
941 DPUTC(d, ch);
942 break;
943 }
944 }
945
946end:
947 DPUTZ(d);
882a39c1 948 return (0);
e63124bc 949}
b64eb60f 950
67b5031e
MW
951/* --- @collect_bare@ --- *
952 *
953 * Arguments: @dstr *d@ = string to write to
954 * @struct tvec_state *tv@ = test vector state
955 *
956 * Returns: Zero on success, @-1@ on error.
957 *
958 * Use: Read barewords and the whitespace between them. Stop when we
959 * encounter something which can't start a bareword.
960 */
b64eb60f
MW
961
962static int collect_bare(dstr *d, struct tvec_state *tv)
963{
964 size_t pos = d->len;
965 enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
966 int ch, rc;
967
968 for (;;) {
969 ch = getc(tv->fp);
970 switch (ch) {
971 case EOF:
882a39c1
MW
972 tvec_syntax(tv, ch, "bareword");
973 rc = -1; goto end;
b64eb60f
MW
974 case '\n':
975 if (s == ESCAPE) { tv->lno++; goto addch; }
976 if (s == WORD) pos = d->len;
882a39c1 977 ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
b64eb60f
MW
978 DPUTC(d, ' '); s = SPACE;
979 break;
67b5031e 980 case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
882a39c1 981 if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
b64eb60f
MW
982 goto addch;
983 case '\\':
984 s = ESCAPE;
985 break;
986 default:
987 if (s != ESCAPE && isspace(ch)) {
988 if (s == WORD) pos = d->len;
989 DPUTC(d, ch); s = SPACE;
990 break;
991 }
992 addch:
993 DPUTC(d, ch); s = WORD;
994 }
995 }
996
997done:
998 if (s == SPACE) d->len = pos;
882a39c1
MW
999 DPUTZ(d); rc = 0;
1000end:
1001 return (rc);
b64eb60f
MW
1002}
1003
67b5031e
MW
1004/* --- @set_up_encoding@ --- *
1005 *
1006 * Arguments: @const codec_class **ccl_out@ = where to put the class
1007 * @unsigned *f_out@ = where to put the flags
1008 * @unsigned code@ = the coding scheme to use (@TVEC_...@)
1009 *
1010 * Returns: ---
1011 *
1012 * Use: Helper for @read_compound_string@ below.
1013 *
1014 * Return the appropriate codec class and flags for @code@.
1015 * Leaves @*ccl_out@ null if the coding scheme doesn't have a
1016 * backing codec class (e.g., @TVCODE_BARE@).
1017 */
1018
1019enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
b64eb60f
MW
1020static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
1021 unsigned code)
1022{
1023 switch (code) {
1024 case TVCODE_BARE:
1025 *ccl_out = 0; *f_out = 0;
1026 break;
1027 case TVCODE_HEX:
1028 *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
1029 break;
1030 case TVCODE_BASE32:
1031 *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
1032 break;
1033 case TVCODE_BASE64:
1034 *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
1035 break;
1036 default:
1037 abort();
1038 }
1039}
1040
67b5031e
MW
1041/* --- @flush_codec@ --- *
1042 *
1043 * Arguments: @codec *cdc@ = a codec, or null
1044 * @dstr *d@ = output string
1045 * @struct tvec_state *tv@ = test vector state
1046 *
1047 * Returns: Zero on success, @-1@ on error.
1048 *
1049 * Use: Helper for @read_compound_string@ below.
1050 *
1051 * Flush out any final buffered material from @cdc@, and check
1052 * that it's in a good state. Frees the codec on success. Does
1053 * nothing if @cdc@ is null.
1054 */
1055
1056static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
1057{
1058 int err;
1059
1060 if (cdc) {
1061 err = cdc->ops->code(cdc, 0, 0, d);
1062 if (err)
1063 return (tvec_error(tv, "invalid %s sequence end: %s",
1064 cdc->ops->c->name, codec_strerror(err)));
1065 cdc->ops->destroy(cdc);
1066 }
1067 return (0);
1068}
1069
1070/* --- @read_compound_string@ --- *
1071 *
1072 * Arguments: @void **p_inout@ = address of output buffer pointer
1073 * @size_t *sz_inout@ = address of buffer size
1074 * @unsigned code@ = initial interpretation of barewords
1075 * @unsigned f@ = other flags (@RCSF_...@)
1076 * @struct tvec_state *tv@ = test vector state
1077 *
1078 * Returns: Zero on success, @-1@ on error.
1079 *
1080 * Use: Parse a compound string, i.e., a sequence of stringish pieces
1081 * which might be quoted strings, character names, or barewords
1082 * to be decoded accoding to @code@, interspersed with
1083 * additional directives.
1084 *
1085 * If the initial buffer pointer is non-null and sufficiently
1086 * large, then it will be reused; otherwise, it is freed and a
1087 * fresh, sufficiently large buffer is allocated and returned.
1088 */
1089
1090#define RCSF_NESTED 1u
882a39c1 1091static int read_compound_string(void **p_inout, size_t *sz_inout,
67b5031e
MW
1092 unsigned code, unsigned f,
1093 struct tvec_state *tv)
b64eb60f 1094{
67b5031e 1095 const codec_class *ccl; unsigned cdf;
b64eb60f
MW
1096 codec *cdc;
1097 dstr d = DSTR_INIT, w = DSTR_INIT;
1098 char *p;
67b5031e
MW
1099 const char *q;
1100 void *pp = 0; size_t sz;
1101 unsigned long n;
882a39c1 1102 int ch, err, rc;
b64eb60f 1103
67b5031e
MW
1104 set_up_encoding(&ccl, &cdf, code); cdc = 0;
1105
1106 if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
b64eb60f
MW
1107 do {
1108 ch = getc(tv->fp);
67b5031e
MW
1109 switch (ch) {
1110
1111 case ')': case ']': case '}':
1112 /* Close brackets. Leave these for recursive caller if there is one,
1113 * or just complain.
1114 */
1115
1116 if (!(f&RCSF_NESTED))
1117 { rc = tvec_syntax(tv, ch, "string"); goto end; }
1118 ungetc(ch, tv->fp); goto done;
1119
1120 case '"': case '\'':
1121 /* Quotes. Read a quoted string. */
1122
1123 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1124 cdc = 0;
1125 if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
1126 break;
1127
1128 case '#':
1129 /* A named character. */
1130
1131 ungetc(ch, tv->fp);
1132 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1133 cdc = 0;
1134 DRESET(&w); tvec_readword(tv, &w, ";", "character name");
1135 if (read_charname(&ch, w.buf, RCF_EOFOK)) {
1136 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
1137 goto end;
1138 }
1139 DPUTC(&d, ch); break;
1140
1141 case '!':
1142 /* A magic keyword. */
1143
1144 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1145 cdc = 0;
b64eb60f 1146 ungetc(ch, tv->fp);
67b5031e
MW
1147 DRESET(&w); tvec_readword(tv, &w, ";", "`!'-keyword");
1148
1149 /* Change bareword coding system. */
1150 if (STRCMP(w.buf, ==, "!bare"))
1151 { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
1152 else if (STRCMP(w.buf, ==, "!hex"))
1153 { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
1154 else if (STRCMP(w.buf, ==, "!base32"))
1155 { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
1156 else if (STRCMP(w.buf, ==, "!base64"))
1157 { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
1158
1159 /* Repeated substrings. */
1160 else if (STRCMP(w.buf, ==, "!repeat")) {
1161 if (tvec_nexttoken(tv)) {
1162 rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
1163 goto end;
1164 }
1165 DRESET(&w);
1166 if (tvec_readword(tv, &w, ";{", "repeat count"))
1167 { rc = -1; goto end; }
1168 if (parse_unsigned_integer(&n, &q, w.buf)) {
1169 rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
1170 goto end;
1171 }
1172 if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
1173 if (tvec_nexttoken(tv))
1174 { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
1175 ch = getc(tv->fp); if (ch != '{')
1176 { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
1177 sz = 0;
1178 if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
1179 { rc = -1; goto end; }
1180 ch = getc(tv->fp); if (ch != '}')
1181 { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
1182 if (sz) {
1183 if (n > (size_t)-1/sz)
1184 { rc = tvec_error(tv, "repeat size out of range"); goto end; }
1185 dstr_ensure(&d, n*sz);
1186 if (sz == 1)
1187 { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; }
1188 else
1189 for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz);
1190 }
1191 xfree(pp); pp = 0;
1192 }
1193
1194 /* Anything else is an error. */
1195 else {
1196 tvec_error(tv, "unknown string keyword `%s'", w.buf);
1197 rc = -1; goto end;
1198 }
b64eb60f 1199 break;
67b5031e 1200
b64eb60f 1201 default:
67b5031e
MW
1202 /* A bareword. Process it according to the current coding system. */
1203
1204 switch (code) {
1205 case TVCODE_BARE:
1206 ungetc(ch, tv->fp);
1207 if (collect_bare(&d, tv)) goto done;
1208 break;
1209 default:
1210 assert(ccl);
1211 ungetc(ch, tv->fp); DRESET(&w);
1212 if (tvec_readword(tv, &w, ";", "%s-encoded fragment", ccl->name))
1213 { rc = -1; goto end; }
1214 if (!cdc) cdc = ccl->decoder(cdf);
1215 err = cdc->ops->code(cdc, w.buf, w.len, &d);
1216 if (err) {
1217 tvec_error(tv, "invalid %s fragment `%s': %s",
1218 ccl->name, w.buf, codec_strerror(err));
1219 rc = -1; goto end;
1220 }
1221 break;
1222 }
1223 break;
b64eb60f
MW
1224 }
1225 } while (!tvec_nexttoken(tv));
1226
1227done:
67b5031e
MW
1228 /* Wrap things up. */
1229 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1230 cdc = 0;
b64eb60f
MW
1231 if (*sz_inout <= d.len)
1232 { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); }
1233 p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
882a39c1 1234 rc = 0;
67b5031e 1235
882a39c1 1236end:
67b5031e
MW
1237 /* Clean up any debris. */
1238 if (cdc) cdc->ops->destroy(cdc);
1239 if (pp) xfree(pp);
b64eb60f 1240 dstr_destroy(&d); dstr_destroy(&w);
882a39c1 1241 return (rc);
b64eb60f
MW
1242}
1243
b64eb60f
MW
1244/*----- Signed and unsigned integer types ---------------------------------*/
1245
c81c35df
MW
1246/* --- @init_int@, @init_uint@ --- *
1247 *
1248 * Arguments: @union tvec_regval *rv@ = register value
1249 * @const struct tvec_regdef *rd@ = register definition
1250 *
1251 * Returns: ---
1252 *
1253 * Use: Initialize a register value.
1254 *
1255 * Integer values are initialized to zero.
1256 */
1257
b64eb60f
MW
1258static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
1259 { rv->i = 0; }
1260
1261static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
1262 { rv->u = 0; }
1263
c81c35df
MW
1264/* --- @eq_int@, @eq_uint@ --- *
1265 *
1266 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1267 * @const struct tvec_regdef *rd@ = register definition
1268 *
1269 * Returns: Nonzero if the values are equal, zero if unequal
1270 *
1271 * Use: Compare register values for equality.
1272 */
1273
b64eb60f
MW
1274static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
1275 const struct tvec_regdef *rd)
1276 { return (rv0->i == rv1->i); }
1277
1278static int eq_uint(const union tvec_regval *rv0,
1279 const union tvec_regval *rv1,
1280 const struct tvec_regdef *rd)
1281 { return (rv0->u == rv1->u); }
1282
c81c35df
MW
1283/* --- @tobuf_int@, @tobuf_uint@ --- *
1284 *
1285 * Arguments: @buf *b@ = buffer
1286 * @const union tvec_regval *rv@ = register value
1287 * @const struct tvec_regdef *rd@ = register definition
1288 *
1289 * Returns: Zero on success, %$-1$% on failure.
1290 *
1291 * Use: Serialize a register value to a buffer.
1292 *
1293 * Integer values are serialized as little-endian 64-bit signed
1294 * or unsigned integers.
1295 */
1296
b64eb60f
MW
1297static int tobuf_int(buf *b, const union tvec_regval *rv,
1298 const struct tvec_regdef *rd)
1299 { return (signed_to_buf(b, rv->i)); }
1300
1301static int tobuf_uint(buf *b, const union tvec_regval *rv,
1302 const struct tvec_regdef *rd)
1303 { return (unsigned_to_buf(b, rv->u)); }
1304
c81c35df
MW
1305/* --- @frombuf_int@, @frombuf_uint@ --- *
1306 *
1307 * Arguments: @buf *b@ = buffer
1308 * @union tvec_regval *rv@ = register value
1309 * @const struct tvec_regdef *rd@ = register definition
1310 *
1311 * Returns: Zero on success, %$-1$% on failure.
1312 *
1313 * Use: Deserialize a register value from a buffer.
1314 *
1315 * Integer values are serialized as 64-bit signed or unsigned
1316 * integers.
1317 */
1318
b64eb60f
MW
1319static int frombuf_int(buf *b, union tvec_regval *rv,
1320 const struct tvec_regdef *rd)
882a39c1 1321 { return (signed_from_buf(b, &rv->i)); }
b64eb60f
MW
1322
1323static int frombuf_uint(buf *b, union tvec_regval *rv,
1324 const struct tvec_regdef *rd)
1325 { return (unsigned_from_buf(b, &rv->u)); }
1326
c81c35df
MW
1327/* --- @parse_int@, @parse_uint@ --- *
1328 *
1329 * Arguments: @union tvec_regval *rv@ = register value
1330 * @const struct tvec_regdef *rd@ = register definition
1331 * @struct tvec_state *tv@ = test-vector state
1332 *
1333 * Returns: Zero on success, %$-1$% on error.
1334 *
1335 * Use: Parse a register value from an input file.
1336 *
1337 * Integers may be input in decimal, hex, binary, or octal,
1338 * following approximately usual conventions.
1339 *
1340 * * Signed integers may be preceded with a `+' or `-' sign.
1341 *
1342 * * Decimal integers are just a sequence of decimal digits
1343 * `0' ... `9'.
1344 *
1345 * * Octal integers are a sequence of digits `0' ... `7',
1346 * preceded by `0o' or `0O'.
1347 *
1348 * * Hexadecimal integers are a sequence of digits `0'
1349 * ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
1350 * `0X'.
1351 *
1352 * * Radix-B integers are a sequence of digits `0' ... `9',
1353 * `a' ... `f', or `A' ... `F', each with value less than B,
1354 * preceded by `Br' or `BR', where 0 < B < 36 is expressed
1355 * in decimal without any leading `0' or internal
1356 * underscores `_'.
1357 *
1358 * * A digit sequence may contain internal underscore `_'
1359 * separators, but not before or after all of the digits;
1360 * and two consecutive `_' characters are not permitted.
1361 */
1362
882a39c1
MW
1363static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
1364 struct tvec_state *tv)
b64eb60f
MW
1365{
1366 dstr d = DSTR_INIT;
882a39c1 1367 int rc;
b64eb60f 1368
c81c35df
MW
1369 if (tvec_readword(tv, &d, ";", "signed integer")) { rc = -1; goto end; }
1370 if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1371 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1372 rc = 0;
1373end:
b64eb60f 1374 dstr_destroy(&d);
882a39c1 1375 return (rc);
b64eb60f
MW
1376}
1377
882a39c1
MW
1378static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
1379 struct tvec_state *tv)
b64eb60f
MW
1380{
1381 dstr d = DSTR_INIT;
882a39c1 1382 int rc;
b64eb60f 1383
c81c35df
MW
1384 if (tvec_readword(tv, &d, ";", "unsigned integer")) { rc = -1; goto end; }
1385 if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1386 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1387 rc = 0;
1388end:
b64eb60f 1389 dstr_destroy(&d);
882a39c1 1390 return (rc);
b64eb60f
MW
1391}
1392
c81c35df
MW
1393/* --- @dump_int@, @dump_uint@ --- *
1394 *
1395 * Arguments: @const union tvec_regval *rv@ = register value
1396 * @const struct tvec_regdef *rd@ = register definition
1397 * @unsigned style@ = output style (@TVSF_...@)
1398 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1399 *
1400 * Returns: ---
1401 *
1402 * Use: Dump a register value to the format output.
1403 *
1404 * Integer values are dumped in decimal and, unless compact
1405 * output is requested, hex, and maybe a character, as a
1406 * comment.
1407 */
1408
b64eb60f
MW
1409static void dump_int(const union tvec_regval *rv,
1410 const struct tvec_regdef *rd,
e63124bc
MW
1411 unsigned style,
1412 const struct gprintf_ops *gops, void *go)
b64eb60f 1413{
b64eb60f 1414
e63124bc 1415 gprintf(gops, go, "%ld", rv->i);
b64eb60f 1416 if (!(style&TVSF_COMPACT)) {
3efcfd2d
MW
1417 gprintf(gops, go, " ; = ");
1418 format_signed_hex(gops, go, rv->i);
1419 maybe_format_signed_char(gops, go, rv->i);
b64eb60f
MW
1420 }
1421}
1422
1423static void dump_uint(const union tvec_regval *rv,
1424 const struct tvec_regdef *rd,
e63124bc
MW
1425 unsigned style,
1426 const struct gprintf_ops *gops, void *go)
b64eb60f 1427{
e63124bc
MW
1428 gprintf(gops, go, "%lu", rv->u);
1429 if (!(style&TVSF_COMPACT)) {
3efcfd2d
MW
1430 gprintf(gops, go, " ; = ");
1431 format_unsigned_hex(gops, go, rv->u);
1432 maybe_format_unsigned_char(gops, go, rv->u);
e63124bc 1433 }
b64eb60f
MW
1434}
1435
c81c35df 1436/* Integer type definitions. */
b64eb60f 1437const struct tvec_regty tvty_int = {
3efcfd2d 1438 init_int, trivial_release, eq_int,
b64eb60f
MW
1439 tobuf_int, frombuf_int,
1440 parse_int, dump_int
1441};
c81c35df
MW
1442const struct tvec_regty tvty_uint = {
1443 init_uint, trivial_release, eq_uint,
1444 tobuf_uint, frombuf_uint,
1445 parse_uint, dump_uint
1446};
b64eb60f 1447
c81c35df 1448/* Predefined integer ranges. */
b64eb60f
MW
1449const struct tvec_irange
1450 tvrange_schar = { SCHAR_MIN, SCHAR_MAX },
1451 tvrange_short = { SHRT_MIN, SHRT_MAX },
1452 tvrange_int = { INT_MIN, INT_MAX },
1453 tvrange_long = { LONG_MIN, LONG_MAX },
1454 tvrange_sbyte = { -128, 127 },
1455 tvrange_i16 = { -32768, +32767 },
1456 tvrange_i32 = { -2147483648, 2147483647 };
b64eb60f
MW
1457const struct tvec_urange
1458 tvrange_uchar = { 0, UCHAR_MAX },
1459 tvrange_ushort = { 0, USHRT_MAX },
1460 tvrange_uint = { 0, UINT_MAX },
1461 tvrange_ulong = { 0, ULONG_MAX },
1462 tvrange_size = { 0, (size_t)-1 },
1463 tvrange_byte = { 0, 255 },
1464 tvrange_u16 = { 0, 65535 },
1465 tvrange_u32 = { 0, 4294967296 };
1466
67b5031e
MW
1467/* --- @tvec_claimeq_int@ --- *
1468 *
1469 * Arguments: @struct tvec_state *tv@ = test-vector state
1470 * @long i0, i1@ = two signed integers
1471 * @const char *file@, @unsigned @lno@ = calling file and line
1472 * @const char *expr@ = the expression to quote on failure
1473 *
1474 * Returns: Nonzero if @i0@ and @i1@ are equal, otherwise zero.
1475 *
1476 * Use: Check that values of @i0@ and @i1@ are equal. As for
1477 * @tvec_claim@ above, a test case is automatically begun and
1478 * ended if none is already underway. If the values are
1479 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1480 * mismatched values are dumped: @i0@ is printed as the output
1481 * value and @i1@ is printed as the input reference.
1482 */
1483
b64eb60f
MW
1484int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
1485 const char *file, unsigned lno, const char *expr)
1486{
3efcfd2d 1487 tv->out[0].v.i = i0; tv->in[0].v.i = i1;
b64eb60f
MW
1488 return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
1489}
1490
67b5031e
MW
1491/* --- @tvec_claimeq_uint@ --- *
1492 *
1493 * Arguments: @struct tvec_state *tv@ = test-vector state
1494 * @unsigned long u0, u1@ = two unsigned integers
1495 * @const char *file@, @unsigned @lno@ = calling file and line
1496 * @const char *expr@ = the expression to quote on failure
1497 *
1498 * Returns: Nonzero if @u0@ and @u1@ are equal, otherwise zero.
1499 *
1500 * Use: Check that values of @u0@ and @u1@ are equal. As for
1501 * @tvec_claim@ above, a test case is automatically begun and
1502 * ended if none is already underway. If the values are
1503 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1504 * mismatched values are dumped: @u0@ is printed as the output
1505 * value and @u1@ is printed as the input reference.
1506 */
1507
b64eb60f
MW
1508int tvec_claimeq_uint(struct tvec_state *tv,
1509 unsigned long u0, unsigned long u1,
1510 const char *file, unsigned lno, const char *expr)
1511{
3efcfd2d 1512 tv->out[0].v.u = u0; tv->in[0].v.u = u1;
b64eb60f
MW
1513 return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
1514}
1515
3efcfd2d 1516/*----- Floating-point type -----------------------------------------------*/
e63124bc 1517
c81c35df
MW
1518/* --- @float_int@ --- *
1519 *
1520 * Arguments: @union tvec_regval *rv@ = register value
1521 * @const struct tvec_regdef *rd@ = register definition
1522 *
1523 * Returns: ---
1524 *
1525 * Use: Initialize a register value.
1526 *
1527 * Floating-point values are initialized to zero.
1528 */
1529
e63124bc
MW
1530static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
1531 { rv->f = 0.0; }
e63124bc 1532
c81c35df
MW
1533/* --- @eq_float@ --- *
1534 *
1535 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1536 * @const struct tvec_regdef *rd@ = register definition
1537 *
1538 * Returns: Nonzero if the values are equal, zero if unequal
1539 *
1540 * Use: Compare register values for equality.
1541 *
1542 * Floating-point values may be considered equal if their
1543 * absolute or relative difference is sufficiently small, as
1544 * described in the register definition.
1545 */
1546
e63124bc
MW
1547static int eq_float(const union tvec_regval *rv0,
1548 const union tvec_regval *rv1,
1549 const struct tvec_regdef *rd)
1550 { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }
1551
c81c35df
MW
1552/* --- @tobuf_float@ --- *
1553 *
1554 * Arguments: @buf *b@ = buffer
1555 * @const union tvec_regval *rv@ = register value
1556 * @const struct tvec_regdef *rd@ = register definition
1557 *
1558 * Returns: Zero on success, %$-1$% on failure.
1559 *
1560 * Use: Serialize a register value to a buffer.
1561 *
1562 * Floating-point values are serialized as little-endian
1563 * IEEE 754 Binary64.
1564 */
1565
e63124bc
MW
1566static int tobuf_float(buf *b, const union tvec_regval *rv,
1567 const struct tvec_regdef *rd)
1568 { return (buf_putf64l(b, rv->f)); }
c81c35df
MW
1569
1570/* --- @frombuf_float@ --- *
1571 *
1572 * Arguments: @buf *b@ = buffer
1573 * @union tvec_regval *rv@ = register value
1574 * @const struct tvec_regdef *rd@ = register definition
1575 *
1576 * Returns: Zero on success, %$-1$% on failure.
1577 *
1578 * Use: Deserialize a register value from a buffer.
1579 *
1580 * Floating-point values are serialized as little-endian
1581 * IEEE 754 Binary64.
1582 */
1583
e63124bc
MW
1584static int frombuf_float(buf *b, union tvec_regval *rv,
1585 const struct tvec_regdef *rd)
1586 { return (buf_getf64l(b, &rv->f)); }
1587
c81c35df
MW
1588/* --- @parse_float@ --- *
1589 *
1590 * Arguments: @union tvec_regval *rv@ = register value
1591 * @const struct tvec_regdef *rd@ = register definition
1592 * @struct tvec_state *tv@ = test-vector state
1593 *
1594 * Returns: Zero on success, %$-1$% on error.
1595 *
1596 * Use: Parse a register value from an input file.
1597 *
1598 * Floating-point values are either NaN (%|#nan|%, if supported
1599 * by the platform); positive or negative infinity (%|#inf|%,
1600 * %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
1601 * or %|#-inf|% (preferring the latter), if supported by the
1602 * platform); or a number in strtod(3) syntax.
1603 */
1604
e63124bc
MW
1605static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
1606 struct tvec_state *tv)
1607{
1608 dstr d = DSTR_INIT;
1609 int rc;
1610
1611 if (tvec_readword(tv, &d, ";", "floating-point number"))
1612 { rc = -1; goto end; }
c81c35df
MW
1613 if (parse_floating(&rv->f, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1614 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
e63124bc
MW
1615 rc = 0;
1616end:
1617 dstr_destroy(&d);
1618 return (rc);
1619}
1620
c81c35df
MW
1621/* --- @dump_float@ --- *
1622 *
1623 * Arguments: @const union tvec_regval *rv@ = register value
1624 * @const struct tvec_regdef *rd@ = register definition
1625 * @unsigned style@ = output style (@TVSF_...@)
1626 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1627 *
1628 * Returns: ---
1629 *
1630 * Use: Dump a register value to the format output.
1631 *
1632 * Floating-point values are dumped in decimal or as a special
1633 * token beginning with `%|#|%'. Some effort is taken to ensure
1634 * that the output is sufficient to uniquely identify the
1635 * original value, but, honestly, C makes this really hard.
1636 */
1637
e63124bc
MW
1638static void dump_float(const union tvec_regval *rv,
1639 const struct tvec_regdef *rd,
1640 unsigned style,
1641 const struct gprintf_ops *gops, void *go)
1642 { format_floating(gops, go, rv->f); }
1643
c81c35df 1644/* Floating-point type definition. */
e63124bc 1645const struct tvec_regty tvty_float = {
3efcfd2d 1646 init_float, trivial_release, eq_float,
e63124bc
MW
1647 tobuf_float, frombuf_float,
1648 parse_float, dump_float
1649};
1650
c81c35df
MW
1651/* Predefined floating-point ranges. */
1652const struct tvec_floatinfo
1653 tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
1654 tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };
1655
67b5031e
MW
1656/* --- @tvec_claimeqish_float@ --- *
1657 *
1658 * Arguments: @struct tvec_state *tv@ = test-vector state
1659 * @double f0, f1@ = two floating-point numbers
1660 * @unsigned f@ = flags (@TVFF_...@)
1661 * @double delta@ = maximum tolerable difference
1662 * @const char *file@, @unsigned @lno@ = calling file and line
1663 * @const char *expr@ = the expression to quote on failure
1664 *
1665 * Returns: Nonzero if @f0@ and @u1@ are sufficiently close, otherwise
1666 * zero.
1667 *
1668 * Use: Check that values of @f0@ and @f1@ are sufficiently close.
1669 * As for @tvec_claim@ above, a test case is automatically begun
1670 * and ended if none is already underway. If the values are
1671 * too far apart, then @tvec_fail@ is called, quoting @expr@,
1672 * and the mismatched values are dumped: @f0@ is printed as the
1673 * output value and @f1@ is printed as the input reference.
1674 *
1675 * The details for the comparison are as follows.
1676 *
1677 * * A NaN value matches any other NaN, and nothing else.
1678 *
1679 * * An infinity matches another infinity of the same sign,
1680 * and nothing else.
1681 *
1682 * * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
1683 * representable number matches only itself: in particular,
1684 * positive and negative zero are considered distinct.
1685 * (This allows tests to check that they land on the correct
1686 * side of branch cuts, for example.)
1687 *
1688 * * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
1689 * %$y$% when %$|x - y| < \delta$%.
1690 *
1691 * * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
1692 * %$y$% when %$|1 - y/x| < \delta$%. (Note that this
1693 * criterion is asymmetric FIXME
1694 */
1695
e63124bc
MW
1696int tvec_claimeqish_float(struct tvec_state *tv,
1697 double f0, double f1, unsigned f, double delta,
1698 const char *file, unsigned lno,
1699 const char *expr)
1700{
1701 struct tvec_floatinfo fi;
1702 union tvec_misc arg;
1703
1704 fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
3efcfd2d 1705 tv->out[0].v.f = f0; tv->in[0].v.f = f1;
e63124bc
MW
1706 return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
1707}
e63124bc 1708
67b5031e
MW
1709/* --- @tvec_claimeq_float@ --- *
1710 *
1711 * Arguments: @struct tvec_state *tv@ = test-vector state
1712 * @double f0, f1@ = two floating-point numbers
1713 * @const char *file@, @unsigned @lno@ = calling file and line
1714 * @const char *expr@ = the expression to quote on failure
1715 *
1716 * Returns: Nonzero if @f0@ and @u1@ are identical, otherwise zero.
1717 *
1718 * Use: Check that values of @f0@ and @f1@ are identical. The
1719 * function is exactly equivalent to @tvec_claimeqish_float@
1720 * with @f == TVFF_EXACT@.
1721 */
1722
1723int tvec_claimeq_float(struct tvec_state *tv,
1724 double f0, double f1,
1725 const char *file, unsigned lno,
1726 const char *expr)
1727{
1728 return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
1729 file, lno, expr));
1730}
1731
b64eb60f
MW
1732/*----- Enumerations ------------------------------------------------------*/
1733
c81c35df
MW
1734/* --- @init_tenum@ --- *
1735 *
1736 * Arguments: @union tvec_regval *rv@ = register value
1737 * @const struct tvec_regdef *rd@ = register definition
1738 *
1739 * Returns: ---
1740 *
1741 * Use: Initialize a register value.
1742 *
1743 * Integer and floating-point enumeration values are initialized
1744 * as their underlying representations. Pointer enumerations
1745 * are initialized to %|#nil|%.
1746 */
1747
3efcfd2d
MW
1748#define init_ienum init_int
1749#define init_uenum init_uint
1750#define init_fenum init_float
c81c35df 1751
3efcfd2d
MW
1752static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
1753 { rv->p = 0; }
b64eb60f 1754
c81c35df
MW
1755/* --- @eq_tenum@ --- *
1756 *
1757 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1758 * @const struct tvec_regdef *rd@ = register definition
1759 *
1760 * Returns: Nonzero if the values are equal, zero if unequal
1761 *
1762 * Use: Compare register values for equality.
1763 *
1764 * Integer and floating-point enumeration values are compared as
1765 * their underlying representations; in particular, floating-
1766 * point enumerations may compare equal if their absolute or
1767 * relative difference is sufficiently small. Pointer
1768 * enumerations are compared as pointers.
1769 */
1770
3efcfd2d
MW
1771#define eq_ienum eq_int
1772#define eq_uenum eq_uint
c81c35df 1773
3efcfd2d
MW
1774static int eq_fenum(const union tvec_regval *rv0,
1775 const union tvec_regval *rv1,
1776 const struct tvec_regdef *rd)
b64eb60f 1777{
3efcfd2d
MW
1778 const struct tvec_fenuminfo *ei = rd->arg.p;
1779 return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
b64eb60f 1780}
c81c35df 1781
3efcfd2d
MW
1782static int eq_penum(const union tvec_regval *rv0,
1783 const union tvec_regval *rv1,
1784 const struct tvec_regdef *rd)
1785 { return (rv0->p == rv1->p); }
b64eb60f 1786
c81c35df
MW
1787/* --- @tobuf_tenum@ --- *
1788 *
1789 * Arguments: @buf *b@ = buffer
1790 * @const union tvec_regval *rv@ = register value
1791 * @const struct tvec_regdef *rd@ = register definition
1792 *
1793 * Returns: Zero on success, %$-1$% on failure.
1794 *
1795 * Use: Serialize a register value to a buffer.
1796 *
1797 * Integer and floating-point enumeration values are serialized
1798 * as their underlying representations. Pointer enumerations
1799 * are serialized as the signed integer index into the
1800 * association table; %|#nil|% serializes as %$-1$%, and
1801 * unrecognized pointers cause failure.
1802 */
1803
3efcfd2d
MW
1804#define tobuf_ienum tobuf_int
1805#define tobuf_uenum tobuf_uint
1806#define tobuf_fenum tobuf_float
c81c35df 1807
3efcfd2d
MW
1808static int tobuf_penum(buf *b, const union tvec_regval *rv,
1809 const struct tvec_regdef *rd)
b64eb60f 1810{
3efcfd2d 1811 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
1812 const struct tvec_passoc *pa;
1813 long i;
b64eb60f 1814
3efcfd2d
MW
1815 for (pa = pei->av, i = 0; pa->tag; pa++, i++)
1816 if (pa->p == rv->p) goto found;
1817 if (!rv->p) i = -1;
1818 else return (-1);
1819found:
1820 return (signed_to_buf(b, i));
b64eb60f
MW
1821}
1822
c81c35df
MW
1823/* --- @frombuf_tenum@ --- *
1824 *
1825 * Arguments: @buf *b@ = buffer
1826 * @union tvec_regval *rv@ = register value
1827 * @const struct tvec_regdef *rd@ = register definition
1828 *
1829 * Returns: Zero on success, %$-1$% on failure.
1830 *
1831 * Use: Deserialize a register value from a buffer.
1832 *
1833 * Integer and floating-point enumeration values are serialized
1834 * as their underlying representations. Pointer enumerations
1835 * are serialized as the signed integer index into the
1836 * association table; %|#nil|% serializes as %$-1$%; out-of-
1837 * range indices cause failure.
1838 */
1839
3efcfd2d
MW
1840#define frombuf_ienum frombuf_int
1841#define frombuf_uenum frombuf_uint
1842#define frombuf_fenum frombuf_float
1843static int frombuf_penum(buf *b, union tvec_regval *rv,
b64eb60f
MW
1844 const struct tvec_regdef *rd)
1845{
3efcfd2d 1846 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
1847 const struct tvec_passoc *pa;
1848 long i, n;
b64eb60f 1849
3efcfd2d
MW
1850 for (pa = pei->av, n = 0; pa->tag; pa++, n++);
1851 if (signed_from_buf(b, &i)) return (-1);
1852 if (0 <= i && i < n) rv->p = (/*unconst*/ void *)pei->av[i].p;
1853 else if (i == -1) rv->p = 0;
1854 else return (-1);
1855 return (0);
b64eb60f
MW
1856}
1857
c81c35df
MW
1858/* --- @parse_tenum@ --- *
1859 *
1860 * Arguments: @union tvec_regval *rv@ = register value
1861 * @const struct tvec_regdef *rd@ = register definition
1862 * @struct tvec_state *tv@ = test-vector state
1863 *
1864 * Returns: Zero on success, %$-1$% on error.
1865 *
1866 * Use: Parse a register value from an input file.
1867 *
1868 * An enumerated value may be given by name or as a literal
1869 * value. For enumerations based on numeric types, the literal
1870 * values can be written in the same syntax as the underlying
1871 * values. For enumerations based on pointers, the only
1872 * permitted literal is %|#nil|%, which denotes a null pointer.
1873 */
1874
3efcfd2d
MW
1875#define DEFPARSE_ENUM(tag_, ty, slot) \
1876 static int parse_##slot##enum(union tvec_regval *rv, \
1877 const struct tvec_regdef *rd, \
1878 struct tvec_state *tv) \
1879 { \
1880 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
1881 const struct tvec_##slot##assoc *a; \
1882 dstr d = DSTR_INIT; \
1883 int rc; \
1884 \
1885 if (tvec_readword(tv, &d, ";", "enumeration tag or " LITSTR_##tag_)) \
1886 { rc = -1; goto end; } \
1887 for (a = ei->av; a->tag; a++) \
1888 if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; } \
1889 MISSING_##tag_ \
1890 done: \
1891 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } \
1892 rc = 0; \
1893 end: \
1894 dstr_destroy(&d); \
1895 return (rc); \
1896 }
b64eb60f 1897
3efcfd2d
MW
1898#define LITSTR_INT "literal signed integer"
1899#define FOUND_INT rv->i = a->i;
1900#define MISSING_INT if (parse_signed(&rv->i, d.buf, ei->ir, tv)) \
1901 { rc = -1; goto end; }
1902
1903#define LITSTR_UINT "literal unsigned integer"
1904#define FOUND_UINT rv->u = a->u;
1905#define MISSING_UINT if (parse_unsigned(&rv->u, d.buf, ei->ur, tv)) \
1906 { rc = -1; goto end; }
1907
1908#define LITSTR_FLT "literal floating-point number, " \
1909 "`#-inf', `#+inf', or `#nan'"
1910#define FOUND_FLT rv->f = a->f;
1911#define MISSING_FLT if (parse_floating(&rv->f, d.buf, ei->fi, tv)) \
1912 { rc = -1; goto end; }
1913
1914#define LITSTR_PTR "`#nil'"
1915#define FOUND_PTR rv->p = (/*unconst*/ void *)a->p;
1916#define MISSING_PTR if (STRCMP(d.buf, ==, "#nil")) \
1917 rv->p = 0; \
1918 else { \
1919 tvec_error(tv, "unknown `%s' value `%s'", \
1920 ei->name, d.buf); \
1921 rc = -1; goto end; \
1922 }
1923
1924TVEC_MISCSLOTS(DEFPARSE_ENUM)
1925
1926#undef LITSTR_INT
1927#undef FOUND_INT
1928#undef MISSING_INT
1929
1930#undef LITSTR_UINT
1931#undef FOUND_UINT
1932#undef MISSING_UINT
1933
1934#undef LITSTR_FLT
1935#undef FOUND_FLT
1936#undef MISSING_FLT
1937
1938#undef LITSTR_PTR
1939#undef FOUND_PTR
1940#undef MISSING_PTR
1941
1942#undef DEFPARSE_ENUM
1943
c81c35df
MW
1944/* --- @dump_tenum@ --- *
1945 *
1946 * Arguments: @const union tvec_regval *rv@ = register value
1947 * @const struct tvec_regdef *rd@ = register definition
1948 * @unsigned style@ = output style (@TVSF_...@)
1949 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1950 *
1951 * Returns: ---
1952 *
1953 * Use: Dump a register value to the format output.
1954 *
1955 * Enumeration values are dumped as their symbolic names, if
1956 * possible, with the underlying values provided as a comment
1957 * unless compact output is requested, as for the underlying
1958 * representation. A null pointer is printed as %|#nil|%;
1959 * non-null pointers are printed as %|#<TYPE PTR>|%, with the
1960 * enumeration TYPE and the raw pointer PTR printed with the
1961 * system's %|%p|% format specifier.
1962 */
1963
1964
3efcfd2d
MW
1965#define DEFDUMP_ENUM(tag_, ty, slot) \
1966 static void dump_##slot##enum(const union tvec_regval *rv, \
1967 const struct tvec_regdef *rd, \
1968 unsigned style, \
1969 const struct gprintf_ops *gops, void *go) \
1970 { \
1971 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
1972 const struct tvec_##slot##assoc *a; \
1973 \
1974 for (a = ei->av; a->tag; a++) \
1975 if (rv->slot == a->slot) { \
1976 gprintf(gops, go, "%s", a->tag); \
1977 if (style&TVSF_COMPACT) return; \
1978 gprintf(gops, go, " ; = "); break; \
1979 } \
1980 \
1981 PRINTRAW_##tag_ \
b64eb60f
MW
1982 }
1983
3efcfd2d 1984#define MAYBE_PRINT_EXTRA \
c81c35df 1985 if (style&TVSF_COMPACT) /* nothing to do */; \
3efcfd2d
MW
1986 else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; } \
1987 else if (1) { gprintf(gops, go, " = "); goto _extra; } \
1988 else _extra:
b64eb60f 1989
3efcfd2d
MW
1990#define PRINTRAW_INT gprintf(gops, go, "%ld", rv->i); \
1991 MAYBE_PRINT_EXTRA { \
1992 format_signed_hex(gops, go, rv->i); \
1993 maybe_format_signed_char(gops, go, rv->i); \
1994 }
b64eb60f 1995
3efcfd2d
MW
1996#define PRINTRAW_UINT gprintf(gops, go, "%lu", rv->u); \
1997 MAYBE_PRINT_EXTRA { \
1998 format_unsigned_hex(gops, go, rv->u); \
1999 maybe_format_unsigned_char(gops, go, rv->u); \
2000 }
2001
2002#define PRINTRAW_FLT format_floating(gops, go, rv->f);
2003
2004#define PRINTRAW_PTR if (!rv->p) gprintf(gops, go, "#nil"); \
e63124bc 2005 else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);
b64eb60f 2006
3efcfd2d 2007TVEC_MISCSLOTS(DEFDUMP_ENUM)
b64eb60f 2008
3efcfd2d
MW
2009#undef PRINTRAW_INT
2010#undef PRINTRAW_UINT
2011#undef PRINTRAW_FLT
2012#undef PRINTRAW_PTR
2013
2014#undef MAYBE_PRINT_EXTRA
2015#undef DEFDUMP_ENUM
2016
c81c35df 2017/* Enumeration type definitions. */
3efcfd2d
MW
2018#define DEFTY_ENUM(tag, ty, slot) \
2019 const struct tvec_regty tvty_##slot##enum = { \
2020 init_##slot##enum, trivial_release, eq_##slot##enum, \
2021 tobuf_##slot##enum, frombuf_##slot##enum, \
2022 parse_##slot##enum, dump_##slot##enum \
2023 };
2024TVEC_MISCSLOTS(DEFTY_ENUM)
2025#undef DEFTY_ENUM
b64eb60f 2026
c81c35df 2027/* Predefined enumeration types. */
e63124bc
MW
2028static const struct tvec_iassoc bool_assoc[] = {
2029 { "nil", 0 },
2030 { "false", 0 },
2031 { "f", 0 },
2032 { "no", 0 },
2033 { "n", 0 },
2034 { "off", 0 },
2035
2036 { "t", 1 },
2037 { "true", 1 },
2038 { "yes", 1 },
2039 { "y", 1 },
2040 { "on", 1 },
2041
20ba6b0b 2042 TVEC_ENDENUM
e63124bc
MW
2043};
2044
2045const struct tvec_ienuminfo tvenum_bool =
3efcfd2d 2046 { "bool", bool_assoc, &tvrange_int };
e63124bc 2047
20ba6b0b
MW
2048static const struct tvec_iassoc cmp_assoc[] = {
2049 { "<", -1 },
2050 { "less", -1 },
2051 { "lt", -1 },
2052
2053 { "=", 0 },
2054 { "equal", 0 },
2055 { "eq", 0 },
2056
2057 { ">", +1 },
2058 { "greater", +1 },
2059 { "gt", +1 },
2060
2061 TVEC_ENDENUM
2062};
2063
2064const struct tvec_ienuminfo tvenum_cmp =
2065 { "cmp", cmp_assoc, &tvrange_int };
2066
67b5031e
MW
2067/* --- @tvec_claimeq_tenum@ --- *
2068 *
2069 * Arguments: @struct tvec_state *tv@ = test-vector state
2070 * @const struct tvec_typeenuminfo *ei@ = enumeration type info
2071 * @ty t0, t1@ = two values
2072 * @const char *file@, @unsigned @lno@ = calling file and line
2073 * @const char *expr@ = the expression to quote on failure
2074 *
2075 * Returns: Nonzero if @t0@ and @t1@ are equal, otherwise zero.
2076 *
2077 * Use: Check that values of @t0@ and @t1@ are equal. As for
2078 * @tvec_claim@ above, a test case is automatically begun and
2079 * ended if none is already underway. If the values are
2080 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2081 * mismatched values are dumped: @t0@ is printed as the output
2082 * value and @t1@ is printed as the input reference.
2083 */
2084
b64eb60f 2085#define DEFCLAIM(tag, ty, slot) \
e63124bc
MW
2086 int tvec_claimeq_##slot##enum \
2087 (struct tvec_state *tv, \
2088 const struct tvec_##slot##enuminfo *ei, ty e0, ty e1, \
2089 const char *file, unsigned lno, const char *expr) \
b64eb60f
MW
2090 { \
2091 union tvec_misc arg; \
2092 \
b64eb60f 2093 arg.p = ei; \
3efcfd2d
MW
2094 tv->out[0].v.slot = GET_##tag(e0); \
2095 tv->in[0].v.slot = GET_##tag(e1); \
2096 return (tvec_claimeq(tv, &tvty_##slot##enum, &arg, \
2097 file, lno, expr)); \
b64eb60f
MW
2098 }
2099#define GET_INT(e) (e)
2100#define GET_UINT(e) (e)
e63124bc 2101#define GET_FLT(e) (e)
b64eb60f
MW
2102#define GET_PTR(e) ((/*unconst*/ void *)(e))
2103TVEC_MISCSLOTS(DEFCLAIM)
2104#undef DEFCLAIM
2105#undef GET_INT
2106#undef GET_UINT
e63124bc 2107#undef GET_FLT
b64eb60f
MW
2108#undef GET_PTR
2109
2110/*----- Flag types --------------------------------------------------------*/
2111
c81c35df
MW
2112/* Flag types are initialized, compared, and serialized as unsigned
2113 * integers.
2114 */
2115
2116/* --- @parse_flags@ --- *
2117 *
2118 * Arguments: @union tvec_regval *rv@ = register value
2119 * @const struct tvec_regdef *rd@ = register definition
2120 * @struct tvec_state *tv@ = test-vector state
2121 *
2122 * Returns: Zero on success, %$-1$% on error.
2123 *
2124 * Use: Parse a register value from an input file.
2125 *
2126 * The input syntax is a sequence of items separated by `|'
2127 * signs. Each item may be the symbolic name of a field value,
2128 * or a literal unsigned integer. The masks associated with the
2129 * given symbolic names must be disjoint. The resulting
2130 * numerical value is simply the bitwise OR of the given values.
2131 */
2132
882a39c1
MW
2133static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
2134 struct tvec_state *tv)
b64eb60f
MW
2135{
2136 const struct tvec_flaginfo *fi = rd->arg.p;
2137 const struct tvec_flag *f;
2138 unsigned long m = 0, v = 0, t;
2139 dstr d = DSTR_INIT;
882a39c1 2140 int ch, rc;
b64eb60f
MW
2141
2142 for (;;) {
c81c35df
MW
2143
2144 /* Read the next item. */
882a39c1
MW
2145 DRESET(&d);
2146 if (tvec_readword(tv, &d, "|;", "flag name or integer"))
2147 { rc = -1; goto end; }
b64eb60f 2148
c81c35df 2149 /* Try to find a matching entry in the table. */
b64eb60f
MW
2150 for (f = fi->fv; f->tag; f++)
2151 if (STRCMP(f->tag, ==, d.buf)) {
882a39c1
MW
2152 if (m&f->m)
2153 { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2154 else
2155 { m |= f->m; v |= f->v; goto next; }
b64eb60f
MW
2156 }
2157
c81c35df 2158 /* Otherwise, try to parse it as a raw integer. */
e63124bc
MW
2159 if (parse_unsigned(&t, d.buf, fi->range, tv))
2160 { rc = -1; goto end; }
882a39c1 2161 v |= t;
c81c35df 2162
b64eb60f 2163 next:
c81c35df
MW
2164 /* Advance to the next token. If it's a separator then consume it, and
2165 * go round again. Otherwise we stop here.
2166 */
b64eb60f 2167 if (tvec_nexttoken(tv)) break;
882a39c1
MW
2168 ch = getc(tv->fp);
2169 if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
c81c35df 2170 if (tvec_nexttoken(tv))
882a39c1 2171 { tvec_syntax(tv, '\n', "flag name or integer"); rc = -1; goto end; }
b64eb60f 2172 }
c81c35df
MW
2173
2174 /* Done. */
2175 rv->u = v; rc = 0;
882a39c1
MW
2176end:
2177 dstr_destroy(&d);
2178 return (rc);
b64eb60f
MW
2179}
2180
c81c35df
MW
2181/* --- @dump_flags@ --- *
2182 *
2183 * Arguments: @const union tvec_regval *rv@ = register value
2184 * @const struct tvec_regdef *rd@ = register definition
2185 * @unsigned style@ = output style (@TVSF_...@)
2186 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2187 *
2188 * Returns: ---
2189 *
2190 * Use: Dump a register value to the format output.
2191 *
2192 * The table of symbolic names and their associated values and
2193 * masks is repeatedly scanned, in order, to find disjoint
2194 * matches -- i.e., entries whose value matches the target value
2195 * in the bit positions indicated by the mask, and whose mask
2196 * doesn't overlap with any previously found matches; the names
2197 * are then output, separated by `|'. Any remaining nonzero
2198 * bits not covered by any of the matching masks are output as a
2199 * single literal integer, in hex.
2200 *
2201 * Unless compact output is requested, or no symbolic names were
2202 * found, the raw numeric value is also printed in hex, as a
2203 * comment.
2204 */
2205
b64eb60f
MW
2206static void dump_flags(const union tvec_regval *rv,
2207 const struct tvec_regdef *rd,
e63124bc
MW
2208 unsigned style,
2209 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
2210{
2211 const struct tvec_flaginfo *fi = rd->arg.p;
2212 const struct tvec_flag *f;
c81c35df 2213 unsigned long m = ~0ul, v = rv->u;
b64eb60f
MW
2214 const char *sep;
2215
2216 for (f = fi->fv, sep = ""; f->tag; f++)
2217 if ((m&f->m) && (v&f->m) == f->v) {
e63124bc 2218 gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
b64eb60f
MW
2219 sep = style&TVSF_COMPACT ? "|" : " | ";
2220 }
2221
e63124bc 2222 if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);
b64eb60f 2223
c81c35df 2224 if (m != ~0ul && !(style&TVSF_COMPACT))
e63124bc 2225 gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
b64eb60f
MW
2226}
2227
c81c35df 2228/* Flags type definition. */
b64eb60f 2229const struct tvec_regty tvty_flags = {
3efcfd2d 2230 init_uint, trivial_release, eq_uint,
b64eb60f
MW
2231 tobuf_uint, frombuf_uint,
2232 parse_flags, dump_flags
2233};
2234
67b5031e
MW
2235/* --- @tvec_claimeq_flags@ --- *
2236 *
2237 * Arguments: @struct tvec_state *tv@ = test-vector state
2238 * @const struct tvec_flaginfo *fi@ = flags type info
2239 * @unsigned long f0, f1@ = two values
2240 * @const char *file@, @unsigned @lno@ = calling file and line
2241 * @const char *expr@ = the expression to quote on failure
2242 *
2243 * Returns: Nonzero if @f0@ and @f1@ are equal, otherwise zero.
2244 *
2245 * Use: Check that values of @f0@ and @f1@ are equal. As for
2246 * @tvec_claim@ above, a test case is automatically begun and
2247 * ended if none is already underway. If the values are
2248 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2249 * mismatched values are dumped: @f0@ is printed as the output
2250 * value and @f1@ is printed as the input reference.
2251 */
2252
b64eb60f
MW
2253int tvec_claimeq_flags(struct tvec_state *tv,
2254 const struct tvec_flaginfo *fi,
2255 unsigned long f0, unsigned long f1,
2256 const char *file, unsigned lno, const char *expr)
2257{
2258 union tvec_misc arg;
2259
3efcfd2d 2260 arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
b64eb60f
MW
2261 return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
2262}
2263
e63124bc
MW
2264/*----- Characters --------------------------------------------------------*/
2265
c81c35df
MW
2266/* Character values are initialized and compared as signed integers. */
2267
2268/* --- @tobuf_char@ --- *
2269 *
2270 * Arguments: @buf *b@ = buffer
2271 * @const union tvec_regval *rv@ = register value
2272 * @const struct tvec_regdef *rd@ = register definition
2273 *
2274 * Returns: Zero on success, %$-1$% on failure.
2275 *
2276 * Use: Serialize a register value to a buffer.
2277 *
2278 * Character values are serialized as little-endian 32-bit
2279 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2280 */
2281
e63124bc 2282static int tobuf_char(buf *b, const union tvec_regval *rv,
67b5031e 2283 const struct tvec_regdef *rd)
e63124bc
MW
2284{
2285 uint32 u;
c81c35df 2286
e63124bc
MW
2287 if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
2288 else if (rv->i == EOF) u = MASK32;
2289 else return (-1);
2290 return (buf_putu32l(b, u));
2291}
2292
c81c35df
MW
2293/* --- @frombuf_char@ --- *
2294 *
2295 * Arguments: @buf *b@ = buffer
2296 * @union tvec_regval *rv@ = register value
2297 * @const struct tvec_regdef *rd@ = register definition
2298 *
2299 * Returns: Zero on success, %$-1$% on failure.
2300 *
2301 * Use: Deserialize a register value from a buffer.
2302 *
2303 * Character values are serialized as little-endian 32-bit
2304 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2305 */
2306
e63124bc 2307static int frombuf_char(buf *b, union tvec_regval *rv,
67b5031e 2308 const struct tvec_regdef *rd)
e63124bc
MW
2309{
2310 uint32 u;
2311
2312 if (buf_getu32l(b, &u)) return (-1);
2313 if (0 <= u && u <= UCHAR_MAX) rv->i = u;
2314 else if (u == MASK32) rv->i = EOF;
2315 else return (-1);
2316 return (0);
2317}
2318
c81c35df
MW
2319/* --- @parse_char@ --- *
2320 *
2321 * Arguments: @union tvec_regval *rv@ = register value
2322 * @const struct tvec_regdef *rd@ = register definition
2323 * @struct tvec_state *tv@ = test-vector state
2324 *
2325 * Returns: Zero on success, %$-1$% on error.
2326 *
2327 * Use: Parse a register value from an input file.
2328 *
2329 * A character value can be given by symbolic name, with a
2330 * leading `%|#|%'; or a character or `%|\|%'-escape sequence,
2331 * optionally in single quotes.
2332 *
2333 * The following escape sequences and character names are
2334 * recognized.
2335 *
2336 * * `%|#eof|%' is the special end-of-file marker.
2337 *
2338 * * `%|#nul|%' is the NUL character, sometimes used to
2339 * terminate strings.
2340 *
2341 * * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
2342 * character used to ring the terminal bell (or do some other
2343 * thing to attract the user's attention).
2344 *
2345 * * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
2346 * character, used to move the cursor backwords by one cell.
2347 *
2348 * * %|#escape|% %|#esc|%, or%|\e|% is the escape character,
2349 * used to introduce special terminal commands.
2350 *
2351 * * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
2352 * character, used to separate pages of text.
2353 *
2354 * * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
2355 * the newline character, used to terminate lines of text or
2356 * advance the cursor to the next line (perhaps without
2357 * returning it to the start of the line).
2358 *
2359 * * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
2360 * the carriage-return character, used to return the cursor to
2361 * the start of the line.
2362 *
2363 * * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
2364 * tab character, used to advance the cursor to the next tab
2365 * stop on the current line.
2366 *
2367 * * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
2368 * character.
2369 *
2370 * * %|#space|%, %|#spc|% is the space character.
2371 *
2372 * * %|#delete|%, %|#del|% is the delete character, used to
2373 * erase the most recent character.
2374 *
2375 * * %|\'|% is the single-quote character.
2376 *
2377 * * %|\\|% is the backslash character.
2378 *
2379 * * %|\"|% is the double-quote character.
2380 *
2381 * * %|\NNN|% or %|\{NNN}|% is the character with code NNN in
2382 * octal. The NNN may be up to three digits long.
2383 *
2384 * * %|\xNN|% or %|\x{NN}|% is the character with code NNN in
2385 * hexadecimal.
2386 */
2387
e63124bc
MW
2388static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
2389 struct tvec_state *tv)
2390{
2391 dstr d = DSTR_INIT;
2392 int ch, rc;
2393 unsigned f = 0;
2394#define f_quote 1u
2395
c81c35df 2396 /* Inspect the character to see what we're up against. */
e63124bc 2397 ch = getc(tv->fp);
c81c35df 2398
e63124bc 2399 if (ch == '#') {
c81c35df
MW
2400 /* It looks like a special token. Push the `%|#|%' back and fetch the
2401 * whole word. If there's just the `%|#|%' after all, then treat it as
2402 * literal.
2403 */
2404
e63124bc
MW
2405 ungetc(ch, tv->fp);
2406 if (tvec_readword(tv, &d, ";", "character name")) { rc = -1; goto end; }
c81c35df
MW
2407 if (STRCMP(d.buf, !=, "#")) {
2408 if (read_charname(&ch, d.buf, RCF_EOFOK)) {
2409 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
2410 goto end;
2411 }
2412 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2413 rv->i = ch; rc = 0; goto end;
e63124bc 2414 }
e63124bc
MW
2415 }
2416
c81c35df
MW
2417 /* If this is a single quote then we expect to see a matching one later,
2418 * and we should process backslash escapes. Get the next character and see
2419 * what happens.
2420 */
e63124bc 2421 if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
c81c35df
MW
2422
2423 /* Main character dispatch. */
e63124bc 2424 switch (ch) {
c81c35df 2425
67b5031e 2426 case ';':
c81c35df 2427 /* Unquoted, semicolon begins a comment. */
67b5031e 2428 if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
c81c35df
MW
2429 else goto plain;
2430
67b5031e 2431 case '\n':
c81c35df
MW
2432 /* A newline. If we saw a single quote, then treat that as literal.
2433 * Otherwise this is an error.
2434 */
2435 if (!(f&f_quote)) goto nochar;
2436 else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
2437
67b5031e 2438 case EOF:
c81c35df
MW
2439 /* End-of-file. Similar to newline, but with slightly different
2440 * effects on the parse state.
2441 */
2442 if (!(f&f_quote)) goto nochar;
2443 else { f &= ~f_quote; ch = '\''; goto plain; }
2444
2445 case '\'': nochar:
2446 /* A single quote. This must be the second of a pair, and there should
2447 * have been a character or escape sequence between them.
2448 */
e63124bc 2449 rc = tvec_syntax(tv, ch, "character"); goto end;
c81c35df 2450
e63124bc 2451 case '\\':
c81c35df 2452 /* A backslash. Read a character escape. */
67b5031e 2453 if (read_charesc(&ch, tv)) return (-1);
c81c35df 2454
e63124bc 2455 default: plain:
c81c35df 2456 /* Anything else. Treat as literal. */
e63124bc
MW
2457 rv->i = ch; break;
2458 }
c81c35df
MW
2459
2460 /* If we saw an opening quote, then expect the closing quote. */
e63124bc
MW
2461 if (f&f_quote) {
2462 ch = getc(tv->fp);
2463 if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
2464 }
c81c35df
MW
2465
2466 /* Done. */
e63124bc
MW
2467 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2468 rc = 0;
2469end:
2470 dstr_destroy(&d);
2471 return (rc);
2472
2473#undef f_quote
2474}
2475
c81c35df
MW
2476/* --- @dump_char@ --- *
2477 *
2478 * Arguments: @const union tvec_regval *rv@ = register value
2479 * @const struct tvec_regdef *rd@ = register definition
2480 * @unsigned style@ = output style (@TVSF_...@)
2481 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2482 *
2483 * Returns: ---
2484 *
2485 * Use: Dump a register value to the format output.
2486 *
2487 * Character values are dumped as their symbolic names, if any,
2488 * or as a character or escape sequence within single quotes
2489 * (which may be omitted in compact style). If compact output
2490 * is not requested, then the single-quoted representation (for
2491 * characters dumped as symbolic names) and integer code in
2492 * decimal and hex are printed as a comment.
2493 */
2494
e63124bc
MW
2495static void dump_char(const union tvec_regval *rv,
2496 const struct tvec_regdef *rd,
2497 unsigned style,
2498 const struct gprintf_ops *gops, void *go)
2499{
67b5031e
MW
2500 const char *p;
2501 unsigned f = 0;
2502#define f_semi 1u
2503
c81c35df 2504 /* Print a character name if we can find one. */
67b5031e
MW
2505 p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
2506 if (p) {
2507 gprintf(gops, go, "%s", p);
2508 if (style&TVSF_COMPACT) return;
2509 else { gprintf(gops, go, " ;"); f |= f_semi; }
2510 }
2511
c81c35df
MW
2512 /* If the character isn't @EOF@ then print it as a single-quoted thing.
2513 * In compact style, see if we can omit the quotes.
2514 */
67b5031e
MW
2515 if (rv->i >= 0) {
2516 if (f&f_semi) gprintf(gops, go, " = ");
2517 switch (rv->i) {
2518 case ' ': case '\\': case '\'': quote:
2519 format_char(gops, go, rv->i);
2520 break;
2521 default:
2522 if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
2523 gprintf(gops, go, "%c", (int)rv->i);
2524 return;
2525 }
2526 }
e63124bc 2527
c81c35df 2528 /* And the character code as an integer. */
e63124bc 2529 if (!(style&TVSF_COMPACT)) {
67b5031e
MW
2530 if (!(f&f_semi)) gprintf(gops, go, " ;");
2531 gprintf(gops, go, " = %ld = ", rv->i);
3efcfd2d 2532 format_signed_hex(gops, go, rv->i);
e63124bc 2533 }
67b5031e
MW
2534
2535#undef f_semi
e63124bc
MW
2536}
2537
c81c35df 2538/* Character type definition. */
e63124bc 2539const struct tvec_regty tvty_char = {
3efcfd2d 2540 init_int, trivial_release, eq_int,
e63124bc
MW
2541 tobuf_char, frombuf_char,
2542 parse_char, dump_char
2543};
2544
67b5031e
MW
2545/* --- @tvec_claimeq_char@ --- *
2546 *
2547 * Arguments: @struct tvec_state *tv@ = test-vector state
2548 * @int ch0, ch1@ = two character codes
2549 * @const char *file@, @unsigned @lno@ = calling file and line
2550 * @const char *expr@ = the expression to quote on failure
2551 *
2552 * Returns: Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
2553 *
2554 * Use: Check that values of @ch0@ and @ch1@ are equal. As for
2555 * @tvec_claim@ above, a test case is automatically begun and
2556 * ended if none is already underway. If the values are
2557 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2558 * mismatched values are dumped: @ch0@ is printed as the output
2559 * value and @ch1@ is printed as the input reference.
2560 */
2561
e63124bc
MW
2562int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
2563 const char *file, unsigned lno, const char *expr)
2564{
3efcfd2d 2565 tv->out[0].v.i = c0; tv->in[0].v.i = c1;
e63124bc
MW
2566 return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
2567}
2568
b64eb60f
MW
2569/*----- Text and byte strings ---------------------------------------------*/
2570
c81c35df
MW
2571/* --- @init_text@, @init_bytes@ --- *
2572 *
2573 * Arguments: @union tvec_regval *rv@ = register value
2574 * @const struct tvec_regdef *rd@ = register definition
2575 *
2576 * Returns: ---
2577 *
2578 * Use: Initialize a register value.
2579 *
2580 * Text and binary string values are initialized with a null
2581 * pointer and zero length.
2582 */
2583
2584static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
2585 { rv->text.p = 0; rv->text.sz = 0; }
b64eb60f
MW
2586
2587static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
2588 { rv->bytes.p = 0; rv->bytes.sz = 0; }
2589
c81c35df
MW
2590/* --- @release_string@, @release_bytes@ --- *
2591 *
2592 * Arguments: @const union tvec_regval *rv@ = register value
2593 * @const struct tvec_regdef *rd@ = register definition
2594 *
2595 * Returns: ---
2596 *
2597 * Use: Release resources held by a register value.
2598 *
2599 * Text and binary string buffers are freed.
2600 */
2601
2602static void release_text(union tvec_regval *rv,
2603 const struct tvec_regdef *rd)
2604 { xfree(rv->text.p); }
b64eb60f
MW
2605
2606static void release_bytes(union tvec_regval *rv,
2607 const struct tvec_regdef *rd)
2608 { xfree(rv->bytes.p); }
2609
c81c35df
MW
2610/* --- @eq_text@, @eq_bytes@ --- *
2611 *
2612 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
2613 * @const struct tvec_regdef *rd@ = register definition
2614 *
2615 * Returns: Nonzero if the values are equal, zero if unequal
2616 *
2617 * Use: Compare register values for equality.
2618 */
2619
2620static int eq_text(const union tvec_regval *rv0,
2621 const union tvec_regval *rv1,
2622 const struct tvec_regdef *rd)
b64eb60f 2623{
c81c35df
MW
2624 return (rv0->text.sz == rv1->text.sz &&
2625 (!rv0->text.sz ||
2626 MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
b64eb60f
MW
2627}
2628
2629static int eq_bytes(const union tvec_regval *rv0,
2630 const union tvec_regval *rv1,
2631 const struct tvec_regdef *rd)
2632{
2633 return (rv0->bytes.sz == rv1->bytes.sz &&
2634 (!rv0->bytes.sz ||
2635 MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
2636}
2637
c81c35df
MW
2638/* --- @tobuf_text@, @tobuf_bytes@ --- *
2639 *
2640 * Arguments: @buf *b@ = buffer
2641 * @const union tvec_regval *rv@ = register value
2642 * @const struct tvec_regdef *rd@ = register definition
2643 *
2644 * Returns: Zero on success, %$-1$% on failure.
2645 *
2646 * Use: Serialize a register value to a buffer.
2647 *
2648 * Text and binary string values are serialized as a little-
2649 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
2650 * of string data.
2651 */
2652
2653static int tobuf_text(buf *b, const union tvec_regval *rv,
2654 const struct tvec_regdef *rd)
2655 { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }
b64eb60f
MW
2656
2657static int tobuf_bytes(buf *b, const union tvec_regval *rv,
2658 const struct tvec_regdef *rd)
c81c35df 2659 { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }
b64eb60f 2660
c81c35df
MW
2661/* --- @frombuf_text@, @frombuf_bytes@ --- *
2662 *
2663 * Arguments: @buf *b@ = buffer
2664 * @union tvec_regval *rv@ = register value
2665 * @const struct tvec_regdef *rd@ = register definition
2666 *
2667 * Returns: Zero on success, %$-1$% on failure.
2668 *
2669 * Use: Deserialize a register value from a buffer.
2670 *
2671 * Text and binary string values are serialized as a little-
2672 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
2673 * of string data.
2674 */
2675
2676static int frombuf_text(buf *b, union tvec_regval *rv,
2677 const struct tvec_regdef *rd)
b64eb60f
MW
2678{
2679 const void *p;
2680 size_t sz;
2681
c81c35df
MW
2682 p = buf_getmem64l(b, &sz); if (!p) return (-1);
2683 tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
b64eb60f
MW
2684 return (0);
2685}
2686
2687static int frombuf_bytes(buf *b, union tvec_regval *rv,
2688 const struct tvec_regdef *rd)
2689{
2690 const void *p;
2691 size_t sz;
2692
c81c35df 2693 p = buf_getmem64l(b, &sz); if (!p) return (-1);
b64eb60f
MW
2694 tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
2695 return (0);
2696}
2697
c81c35df
MW
2698/* --- @check_string_length@ --- *
2699 *
2700 * Arguments: @size_t sz@ = found string length
2701 * @const struct tvec_urange *ur@ = acceptable range
2702 * @struct tvec_state *tv@ = test-vector state
2703 *
2704 * Returns: Zero on success, %$-1$% on error.
2705 *
2706 * Use: Checks that @sz@ is within the bounds described by @ur@,
2707 * reporting an error if not.
2708 */
2709
882a39c1
MW
2710static int check_string_length(size_t sz, const struct tvec_urange *ur,
2711 struct tvec_state *tv)
b64eb60f
MW
2712{
2713 if (ur && (ur->min > sz || sz > ur->max))
882a39c1 2714 return (tvec_error(tv,
67b5031e 2715 "invalid string length %lu; must be in [%lu .. %lu]",
882a39c1
MW
2716 (unsigned long)sz, ur->min, ur->max));
2717 return (0);
b64eb60f
MW
2718}
2719
c81c35df
MW
2720/* --- @parse_text@, @parse_bytes@ --- *
2721 *
2722 * Arguments: @union tvec_regval *rv@ = register value
2723 * @const struct tvec_regdef *rd@ = register definition
2724 * @struct tvec_state *tv@ = test-vector state
2725 *
2726 * Returns: Zero on success, %$-1$% on error.
2727 *
2728 * Use: Parse a register value from an input file.
2729 *
2730 * The input format for both kinds of strings is basically the
2731 * same: a `compound string', consisting of
2732 *
2733 * * single-quoted strings, which are interpreted entirely
2734 * literally, but can't contain single quotes or newlines;
2735 *
2736 * * double-quoted strings, in which `%|\|%'-escapes are
2737 * interpreted as for characters;
2738 *
2739 * * character names, marked by an initial `%|#|%' sign;
2740 *
2741 * * special tokens marked by an initial `%|!|%' sign; or
2742 *
2743 * * barewords interpreted according to the current coding
2744 * scheme.
2745 *
2746 * The special tokens are
2747 *
2748 * * `%|!bare|%', which causes subsequent sequences of
2749 * barewords to be treated as plain text;
2750 *
2751 * * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
2752 * subsequent barewords to be decoded in the requested
2753 * manner.
2754 *
2755 * * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
2756 * which includes %$n$% copies of the (compound) string.
2757 *
2758 * The only difference between text and binary strings is that
2759 * the initial coding scheme is %|bare|% for text strings and
2760 * %|hex|% for binary strings.
2761 */
2762
2763static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
2764 struct tvec_state *tv)
b64eb60f 2765{
c81c35df 2766 void *p = rv->text.p;
b64eb60f 2767
c81c35df 2768 if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
67b5031e 2769 return (-1);
c81c35df
MW
2770 rv->text.p = p;
2771 if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
882a39c1 2772 return (0);
b64eb60f
MW
2773}
2774
882a39c1
MW
2775static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
2776 struct tvec_state *tv)
b64eb60f
MW
2777{
2778 void *p = rv->bytes.p;
2779
67b5031e
MW
2780 if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
2781 return (-1);
882a39c1
MW
2782 rv->bytes.p = p;
2783 if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
2784 return (0);
b64eb60f
MW
2785}
2786
c81c35df
MW
2787/* --- @dump_text@, @dump_bytes@ --- *
2788 *
2789 * Arguments: @const union tvec_regval *rv@ = register value
2790 * @const struct tvec_regdef *rd@ = register definition
2791 * @unsigned style@ = output style (@TVSF_...@)
2792 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2793 *
2794 * Returns: ---
2795 *
2796 * Use: Dump a register value to the format output.
2797 *
2798 * Text string values are dumped as plain text, in double quotes
2799 * if necessary, and using backslash escape sequences for
2800 * nonprintable characters. Unless compact output is requested,
2801 * strings consisting of multiple lines are dumped with each
2802 * line of the string on a separate output line.
2803 *
2804 * Binary string values are dumped in hexadecimal. In compact
2805 * style, the output simply consists of a single block of hex
2806 * digits. Otherwise, the dump is a display consisting of
2807 * groups of hex digits, with comments showing the offset (if
2808 * the string is long enough) and the corresponding plain text.
2809 *
2810 * Empty strings are dumped as %|""|%.
2811 */
2812
2813static void dump_text(const union tvec_regval *rv,
2814 const struct tvec_regdef *rd,
2815 unsigned style,
2816 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
2817{
2818 const unsigned char *p, *q, *l;
b64eb60f
MW
2819 unsigned f = 0;
2820#define f_nonword 1u
2821#define f_newline 2u
2822
c81c35df 2823 if (!rv->text.sz) { gprintf(gops, go, "\"\""); return; }
b64eb60f 2824
c81c35df 2825 p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
67b5031e
MW
2826 switch (*p) {
2827 case '!': case '#': case ';': case '"': case '\'':
2828 case '(': case '{': case '[': case ']': case '}': case ')':
2829 f |= f_nonword; break;
2830 }
b64eb60f
MW
2831 for (q = p; q < l; q++)
2832 if (*q == '\n' && q != l - 1) f |= f_newline;
2833 else if (!*q || !isgraph(*q) || *q == '\\') f |= f_nonword;
e63124bc 2834 if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
b64eb60f 2835 else if (f&f_nonword) goto quote;
67b5031e 2836
c81c35df 2837 gops->putm(go, (const char *)p, rv->text.sz);
67b5031e 2838 return;
b64eb60f
MW
2839
2840quote:
e63124bc 2841 gprintf(gops, go, "\"");
b64eb60f 2842 for (q = p; q < l; q++)
e63124bc
MW
2843 if (!isprint(*q) || *q == '"') {
2844 if (p < q) gops->putm(go, (const char *)p, q - p);
67b5031e 2845 if (*q != '\n' || (style&TVSF_COMPACT))
3efcfd2d 2846 format_charesc(gops, go, *q, FCF_BRACE);
67b5031e
MW
2847 else {
2848 if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; }
2849 else gprintf(gops, go, "\\n\"\n\t\"");
2850 }
2851 p = q + 1;
b64eb60f 2852 }
e63124bc
MW
2853 if (p < q) gops->putm(go, (const char *)p, q - p);
2854 gprintf(gops, go, "\"");
b64eb60f
MW
2855
2856#undef f_nonword
2857#undef f_newline
2858}
2859
2860static void dump_bytes(const union tvec_regval *rv,
2861 const struct tvec_regdef *rd,
e63124bc
MW
2862 unsigned style,
2863 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
2864{
2865 const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
2866 size_t off, sz = rv->bytes.sz;
2867 unsigned i, n;
2868 int wd;
2869
2870 if (!sz) {
e63124bc 2871 gprintf(gops, go, style&TVSF_COMPACT ? "\"\"" : "\"\" ; empty");
b64eb60f
MW
2872 return;
2873 }
2874
2875 if (style&TVSF_COMPACT) {
e63124bc 2876 while (p < l) gprintf(gops, go, "%02x", *p++);
b64eb60f
MW
2877 return;
2878 }
2879
e63124bc 2880 if (sz > 16) gprintf(gops, go, "\n\t");
b64eb60f
MW
2881
2882 off = 0; wd = hex_width(sz);
2883 while (p < l) {
2884 if (l - p < 16) n = l - p;
2885 else n = 16;
2886
67b5031e 2887 for (i = 0; i < n; i++) {
e63124bc
MW
2888 if (i < n) gprintf(gops, go, "%02x", p[i]);
2889 else gprintf(gops, go, " ");
67b5031e 2890 if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
b64eb60f 2891 }
e63124bc
MW
2892 gprintf(gops, go, " ; ");
2893 if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
b64eb60f 2894 for (i = 0; i < n; i++)
e63124bc 2895 gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
b64eb60f 2896 p += n; off += n;
e63124bc 2897 if (p < l) gprintf(gops, go, "\n\t");
b64eb60f
MW
2898 }
2899}
2900
c81c35df
MW
2901/* Text and byte string type definitions. */
2902const struct tvec_regty tvty_text = {
2903 init_text, release_text, eq_text,
2904 tobuf_text, frombuf_text,
2905 parse_text, dump_text
b64eb60f 2906};
b64eb60f 2907const struct tvec_regty tvty_bytes = {
e63124bc 2908 init_bytes, release_bytes, eq_bytes,
b64eb60f
MW
2909 tobuf_bytes, frombuf_bytes,
2910 parse_bytes, dump_bytes
2911};
2912
c81c35df 2913/* --- @tvec_claimeq_text@ --- *
67b5031e
MW
2914 *
2915 * Arguments: @struct tvec_state *tv@ = test-vector state
2916 * @const char *p0@, @size_t sz0@ = first string with length
2917 * @const char *p1@, @size_t sz1@ = second string with length
2918 * @const char *file@, @unsigned @lno@ = calling file and line
2919 * @const char *expr@ = the expression to quote on failure
2920 *
2921 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
2922 * zero.
2923 *
2924 * Use: Check that strings at @p0@ and @p1@ are equal. As for
2925 * @tvec_claim@ above, a test case is automatically begun and
2926 * ended if none is already underway. If the values are
2927 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2928 * mismatched values are dumped: @p0@ is printed as the output
2929 * value and @p1@ is printed as the input reference.
2930 */
2931
c81c35df
MW
2932int tvec_claimeq_text(struct tvec_state *tv,
2933 const char *p0, size_t sz0,
2934 const char *p1, size_t sz1,
2935 const char *file, unsigned lno, const char *expr)
b64eb60f 2936{
c81c35df
MW
2937 tv->out[0].v.text.p = (/*unconst*/ char *)p0; tv->out[0].v.text.sz = sz0;
2938 tv->in[0].v.text.p =(/*unconst*/ char *) p1; tv->in[0].v.text.sz = sz1;
2939 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
2940}
2941
c81c35df 2942/* --- @tvec_claimeq_textz@ --- *
67b5031e
MW
2943 *
2944 * Arguments: @struct tvec_state *tv@ = test-vector state
2945 * @const char *p0, *p1@ = two strings to compare
2946 * @const char *file@, @unsigned @lno@ = calling file and line
2947 * @const char *expr@ = the expression to quote on failure
2948 *
2949 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
2950 * zero.
2951 *
2952 * Use: Check that strings at @p0@ and @p1@ are equal, as for
2953 * @tvec_claimeq_string@, except that the strings are assumed
2954 * null-terminated, so their lengths don't need to be supplied
2955 * explicitly.
2956 */
2957
c81c35df
MW
2958int tvec_claimeq_textz(struct tvec_state *tv,
2959 const char *p0, const char *p1,
2960 const char *file, unsigned lno, const char *expr)
b64eb60f 2961{
c81c35df
MW
2962 tv->out[0].v.text.p = (/*unconst*/ char *)p0;
2963 tv->out[0].v.text.sz = strlen(p0);
2964 tv->in[0].v.text.p = (/*unconst*/ char *)p1;
2965 tv->in[0].v.text.sz = strlen(p1);
2966 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
2967}
2968
67b5031e
MW
2969/* --- @tvec_claimeq_bytes@ --- *
2970 *
2971 * Arguments: @struct tvec_state *tv@ = test-vector state
2972 * @const void *p0@, @size_t sz0@ = first string with length
2973 * @const void *p1@, @size_t sz1@ = second string with length
2974 * @const char *file@, @unsigned @lno@ = calling file and line
2975 * @const char *expr@ = the expression to quote on failure
2976 *
2977 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
2978 * zero.
2979 *
2980 * Use: Check that binary strings at @p0@ and @p1@ are equal. As for
2981 * @tvec_claim@ above, a test case is automatically begun and
2982 * ended if none is already underway. If the values are
2983 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2984 * mismatched values are dumped: @p0@ is printed as the output
2985 * value and @p1@ is printed as the input reference.
2986 */
2987
b64eb60f
MW
2988int tvec_claimeq_bytes(struct tvec_state *tv,
2989 const void *p0, size_t sz0,
2990 const void *p1, size_t sz1,
2991 const char *file, unsigned lno, const char *expr)
2992{
3efcfd2d
MW
2993 tv->out[0].v.bytes.p = (/*unconst*/ void *)p0;
2994 tv->out[0].v.bytes.sz = sz0;
2995 tv->in[0].v.bytes.p = (/*unconst*/ void *)p1;
2996 tv->in[0].v.bytes.sz = sz1;
b64eb60f
MW
2997 return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
2998}
2999
c81c35df 3000/* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
67b5031e
MW
3001 *
3002 * Arguments: @union tvec_regval *rv@ = register value
3003 * @size_t sz@ = required size
3004 *
3005 * Returns: ---
3006 *
3007 * Use: Allocated space in a text or binary string register. If the
3008 * current register size is sufficient, its buffer is left
3009 * alone; otherwise, the old buffer, if any, is freed and a
3010 * fresh buffer allocated. These functions are not intended to
3011 * be used to adjust a buffer repeatedly, e.g., while building
3012 * output incrementally: (a) they will perform badly, and (b)
3013 * the old buffer contents are simply discarded if reallocation
3014 * is necessary. Instead, use a @dbuf@ or @dstr@.
3015 *
c81c35df 3016 * The @tvec_alloctext@ function sneakily allocates an extra
67b5031e
MW
3017 * byte for a terminating zero. The @tvec_allocbytes@ function
3018 * doesn't do this.
3019 */
3020
c81c35df 3021void tvec_alloctext(union tvec_regval *rv, size_t sz)
67b5031e 3022{
c81c35df
MW
3023 if (rv->text.sz <= sz) { xfree(rv->text.p); rv->text.p = xmalloc(sz + 1); }
3024 rv->text.sz = sz;
67b5031e
MW
3025}
3026
3027void tvec_allocbytes(union tvec_regval *rv, size_t sz)
3028{
3029 if (rv->bytes.sz < sz) { xfree(rv->bytes.p); rv->bytes.p = xmalloc(sz); }
3030 rv->bytes.sz = sz;
3031}
3032
b64eb60f
MW
3033/*----- Buffer type -------------------------------------------------------*/
3034
c81c35df
MW
3035/* Buffers are initialized and released as binary strings. */
3036
3037/* --- @eq_buffer@ --- *
3038 *
3039 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
3040 * @const struct tvec_regdef *rd@ = register definition
3041 *
3042 * Returns: Nonzero if the values are equal, zero if unequal
3043 *
3044 * Use: Compare register values for equality.
3045 *
3046 * Buffer values are equal if and only if their sizes are equal;
3047 * their contents are %%\emph{not}%% compared.
3048 */
3049
b64eb60f
MW
3050static int eq_buffer(const union tvec_regval *rv0,
3051 const union tvec_regval *rv1,
3052 const struct tvec_regdef *rd)
3053 { return (rv0->bytes.sz == rv1->bytes.sz); }
3054
c81c35df
MW
3055/* --- @tobuf_buffer@ --- *
3056 *
3057 * Arguments: @buf *b@ = buffer
3058 * @const union tvec_regval *rv@ = register value
3059 * @const struct tvec_regdef *rd@ = register definition
3060 *
3061 * Returns: Zero on success, %$-1$% on failure.
3062 *
3063 * Use: Serialize a register value to a buffer.
3064 *
3065 * Buffer values are serialized as just their lengths, as
3066 * unsigned integers.
3067 */
3068
b64eb60f
MW
3069static int tobuf_buffer(buf *b, const union tvec_regval *rv,
3070 const struct tvec_regdef *rd)
3071 { return (unsigned_to_buf(b, rv->bytes.sz)); }
3072
c81c35df
MW
3073/* --- @allocate_buffer@ --- *
3074 *
3075 * Arguments: @union tvec_regval *rv@ = register value
3076 * @size_t sz@ = size to allocate
3077 *
3078 * Returns: ---
3079 *
3080 * Use: Allocate @sz@ bytes to the buffer and fill the space with a
3081 * distinctive pattern.
3082 */
3083
3084static void allocate_buffer(union tvec_regval *rv, size_t sz)
3085 { tvec_allocbytes(rv, sz); memset(rv->bytes.p, '?', sz); }
3086
3087/* --- @frombuf_buffer@ --- *
3088 *
3089 * Arguments: @buf *b@ = buffer
3090 * @union tvec_regval *rv@ = register value
3091 * @const struct tvec_regdef *rd@ = register definition
3092 *
3093 * Returns: Zero on success, %$-1$% on failure.
3094 *
3095 * Use: Deserialize a register value from a buffer.
3096 *
3097 * Buffer values are serialized as just their lengths, as
3098 * unsigned integers. The buffer is allocated on
3099 * deserialization and filled with a distinctive pattern.
3100 */
3101
b64eb60f
MW
3102static int frombuf_buffer(buf *b, union tvec_regval *rv,
3103 const struct tvec_regdef *rd)
3104{
3105 unsigned long u;
3106
3107 if (unsigned_from_buf(b, &u)) return (-1);
3108 if (u > (size_t)-1) return (-1);
c81c35df 3109 allocate_buffer(rv, u);
b64eb60f
MW
3110 return (0);
3111}
3112
c81c35df
MW
3113/* --- @parse_buffer@ --- *
3114 *
3115 * Arguments: @union tvec_regval *rv@ = register value
3116 * @const struct tvec_regdef *rd@ = register definition
3117 * @struct tvec_state *tv@ = test-vector state
3118 *
3119 * Returns: Zero on success, %$-1$% on error.
3120 *
3121 * Use: Parse a register value from an input file.
3122 *
3123 * The input format for a buffer value consists of an unsigned
3124 * integer followed by an optional unit specifier consisting of
3125 * an SI unit prefix and (optionally) the letter `B'. Unit
3126 * prefixes denote %%\emph{binary}%% multipliers, not decimal.
3127 *
3128 * The buffer is allocated and filled with a distinctive
3129 * pattern.
3130 */
3131
b64eb60f
MW
3132static const char units[] = "kMGTPEZY";
3133
882a39c1
MW
3134static int parse_buffer(union tvec_regval *rv,
3135 const struct tvec_regdef *rd,
3136 struct tvec_state *tv)
b64eb60f
MW
3137{
3138 dstr d = DSTR_INIT;
3efcfd2d 3139 const char *q, *unit;
b64eb60f
MW
3140 size_t pos;
3141 unsigned long u, t;
882a39c1 3142 int rc;
b64eb60f
MW
3143 unsigned f = 0;
3144#define f_range 1u
3145
882a39c1 3146 if (tvec_readword(tv, &d, ";", "buffer length")) { rc = -1; goto end; }
3efcfd2d 3147 if (parse_unsigned_integer(&u, &q, d.buf)) goto bad;
b64eb60f
MW
3148 if (!*q) {
3149 tvec_skipspc(tv); pos = d.len;
3150 if (!tvec_readword(tv, &d, ";", 0)) pos++;
3151 q = d.buf + pos;
3152 }
3153
3154 if (u > (size_t)-1) goto rangerr;
3155 for (t = u, unit = units; *unit; unit++) {
3156 if (t > (size_t)-1/1024) f |= f_range;
3157 else t *= 1024;
67b5031e 3158 if (*q == *unit) {
b64eb60f 3159 if (f&f_range) goto rangerr;
67b5031e 3160 u = t; q++; break;
b64eb60f
MW
3161 }
3162 }
67b5031e
MW
3163 if (*q == 'B') q++;
3164 if (*q) goto bad;
882a39c1 3165 if (check_string_length(u, rd->arg.p, tv)) { rc = -1; goto end; }
b64eb60f 3166
882a39c1 3167 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
c81c35df 3168 allocate_buffer(rv, u);
882a39c1
MW
3169 rc = 0;
3170end:
3171 DDESTROY(&d); return (rc);
b64eb60f
MW
3172
3173bad:
3174 tvec_error(tv, "invalid buffer length `%s'", d.buf);
882a39c1 3175 rc = -1; goto end;
b64eb60f
MW
3176
3177rangerr:
3178 tvec_error(tv, "buffer length `%s' out of range", d.buf);
882a39c1 3179 rc = -1; goto end;
b64eb60f
MW
3180
3181#undef f_range
3182}
3183
c81c35df
MW
3184/* --- @dump_buffer@ --- *
3185 *
3186 * Arguments: @const union tvec_regval *rv@ = register value
3187 * @const struct tvec_regdef *rd@ = register definition
3188 * @unsigned style@ = output style (@TVSF_...@)
3189 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3190 *
3191 * Returns: ---
3192 *
3193 * Use: Dump a register value to the format output.
3194 *
3195 * Buffer values are dumped as their size with an appropriate
3196 * unit specifier. A unit prefix is only used if the size is an
3197 * exact multiple of the relevant power of two.
3198 */
3199
b64eb60f
MW
3200static void dump_buffer(const union tvec_regval *rv,
3201 const struct tvec_regdef *rd,
e63124bc
MW
3202 unsigned style,
3203 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
3204{
3205 const char *unit;
3206 unsigned long u = rv->bytes.sz;
3207
3208 if (!u || u%1024)
e63124bc 3209 gprintf(gops, go, "%lu B", u);
b64eb60f
MW
3210 else {
3211 for (unit = units, u /= 1024; !(u%1024) && unit[1]; u /= 1024, unit++);
e63124bc 3212 gprintf(gops, go, "%lu %cB", u, *unit);
b64eb60f
MW
3213 }
3214}
3215
c81c35df 3216/* Buffer type definition. */
b64eb60f 3217const struct tvec_regty tvty_buffer = {
e63124bc 3218 init_bytes, release_bytes, eq_buffer,
b64eb60f
MW
3219 tobuf_buffer, frombuf_buffer,
3220 parse_buffer, dump_buffer
3221};
3222
3223/*----- That's all, folks -------------------------------------------------*/