@@@ fltfmt wip
[mLib] / test / tvec-types.c
CommitLineData
b64eb60f
MW
1/* -*-c-*-
2 *
3 * Types for the test-vector framework
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <assert.h>
31#include <ctype.h>
32#include <errno.h>
e63124bc 33#include <float.h>
b64eb60f 34#include <limits.h>
e63124bc 35#include <math.h>
b64eb60f
MW
36#include <stdio.h>
37#include <string.h>
38
39#include "buf.h"
40#include "codec.h"
41# include "base32.h"
42# include "base64.h"
43# include "hex.h"
44#include "dstr.h"
67b5031e 45#include "maths.h"
b1a20bee 46
b64eb60f 47#include "tvec.h"
b1a20bee
MW
48#include "tvec-adhoc.h"
49#include "tvec-types.h"
b64eb60f
MW
50
51/*----- Preliminary utilities ---------------------------------------------*/
52
67b5031e
MW
53/* --- @trivial_release@ --- *
54 *
55 * Arguments: @union tvec_regval *rv@ = a register value
56 * @const struct tvec_regdef@ = the register definition
57 *
58 * Returns: ---
59 *
60 * Use: Does nothing. Used for register values which don't retain
61 * resources.
62 */
3efcfd2d
MW
63
64static void trivial_release(union tvec_regval *rv,
65 const struct tvec_regdef *rd)
66 { ; }
67
67b5031e
MW
68/*----- Integer utilities -------------------------------------------------*/
69
70/* --- @unsigned_to_buf@, @signed_to_buf@ --- *
71 *
72 * Arguments: @buf *b@ = buffer to write on
73 * @unsigned long u@ or @long i@ = integer to write
74 *
75 * Returns: Zero on success, @-1@ on failure.
76 *
77 * Use: Write @i@ to the buffer, in big-endian (two's-complement, it
78 * signed) format.
79 */
80
81static int unsigned_to_buf(buf *b, unsigned long u)
82 { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }
83
b64eb60f
MW
84static int signed_to_buf(buf *b, long i)
85{
86 kludge64 k;
87 unsigned long u;
88
89 u = i;
90 if (i >= 0) ASSIGN64(k, u);
91 else { ASSIGN64(k, ~u); CPL64(k, k); }
92 return (buf_putk64l(b, k));
93}
94
67b5031e
MW
95/* --- @unsigned_from_buf@, @signed_from_buf@ --- *
96 *
97 * Arguments: @buf *b@ = buffer to write on
98 * @unsigned long *u_out@ or @long *i_out@ = where to put the
99 * result
100 *
101 * Returns: Zero on success, @-1@ on failure.
102 *
103 * Use: Read an integer, in big-endian (two's-complement, if signed)
104 * format, from the buffer.
105 */
b64eb60f
MW
106
107static int unsigned_from_buf(buf *b, unsigned long *u_out)
108{
109 kludge64 k, ulmax;
110
111 ASSIGN64(ulmax, ULONG_MAX);
112 if (buf_getk64l(b, &k)) return (-1);
adec5584 113 if (CMP64(k, >, ulmax)) { buf_break(b); return (-1); }
b64eb60f
MW
114 *u_out = GET64(unsigned long, k); return (0);
115}
116
67b5031e
MW
117/* --- @hex_width@ --- *
118 *
119 * Arguments: @unsigned long u@ = an integer
120 *
121 * Returns: A suitable number of digits to use in order to display @u@ in
122 * hex. Currently, we select a power of two sufficient to show
123 * the value, but at least 2.
124 */
125
b64eb60f
MW
126static int hex_width(unsigned long u)
127{
128 int wd;
129 unsigned long t;
130
131 for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
132 return (wd/4);
133}
134
67b5031e
MW
135/* --- @format_unsigned_hex@, @format_signed_hex@ --- *
136 *
137 * Arguments: @const struct gprintf_ops *gops@ = print operations
138 * @void *go@ = print destination
139 * @unsigned long u@ or @long i@ = integer to print
140 *
141 * Returns: ---
142 *
143 * Use: Print an unsigned or signed integer in hexadecimal.
144 */
145
146static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
147 unsigned long u)
148 { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }
149
150static void format_signed_hex(const struct gprintf_ops *gops, void *go,
151 long i)
152{
153 unsigned long u = i >= 0 ? i : -(unsigned long)i;
154 gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
155}
156
157static int signed_from_buf(buf *b, long *i_out)
158{
159 kludge64 k, lmax, not_lmin;
160
161 ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
162 if (buf_getk64l(b, &k)) return (-1);
163 if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
164 else {
165 CPL64(k, k);
166 if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
adec5584 167 else { buf_break(b); return (-1); }
67b5031e
MW
168 }
169 return (0);
170}
171
b1a20bee 172/* --- @check_signed_range@, @check_unsigned_range@ --- *
67b5031e 173 *
b1a20bee
MW
174 * Arguments: @long i@ or @unsigned long u@ = an integer
175 * @const struct tvec_irange *ir@ or
176 * @const struct tvec_urange *ur@ = range specification,
67b5031e
MW
177 * or null
178 * @struct tvec_state *tv@ = test vector state
c4ccbbf9 179 * @const char *what@ = description of value
67b5031e
MW
180 *
181 * Returns: Zero on success, or @-1@ on error.
182 *
183 * Use: Check that the integer is within bounds. If not, report a
184 * suitable error and return a failure indication.
185 */
186
882a39c1
MW
187static int check_signed_range(long i,
188 const struct tvec_irange *ir,
c4ccbbf9 189 struct tvec_state *tv, const char *what)
b64eb60f 190{
b1a20bee
MW
191 long ii, aa, m;
192
193 if (ir) {
194 if (ir->min > i || i > ir->max) {
195 tvec_error(tv, "%s %ld out of range (must be in [%ld .. %ld])",
196 what, i, ir->min, ir->max);
197 return (-1);
198 }
199 m = ir->m; if (m > 0) m = -m;
200 if (m && m != -1) {
201 /* Reduce both the integer and the intended residue to the canonical
202 * interval [0, m). This is more awkward than it should be because C
203 * (following CPU designs) adopted an unhelpful definition of integer
204 * division when the dividend is negative.
205 *
206 * Note that I've canonicalized the divisor to be %%\emph{negative}%%,
207 * because in two's-complement arithmetic, the absolute value of the
208 * most negative representable value is not itself representable. The
209 * residue modulo the most negative value will itself be representable.
210 */
211
212 ii = i%m; if (ii < 0) ii -= m;
213 aa = ir->a%m; if (aa < 0) aa -= m;
214 if (ii != aa) {
215 tvec_error(tv, "%s %ld == %ld =/= %ld (mod %ld)",
216 what, i, ii, ir->a, ir->m);
217 return (-1);
218 }
219 }
882a39c1
MW
220 }
221 return (0);
b64eb60f
MW
222}
223
882a39c1
MW
224static int check_unsigned_range(unsigned long u,
225 const struct tvec_urange *ur,
c4ccbbf9 226 struct tvec_state *tv, const char *what)
b64eb60f 227{
b1a20bee
MW
228 unsigned long uu;
229
230 if (ur) {
231 if (ur->min > u || u > ur->max) {
232 tvec_error(tv, "%s %lu out of range (must be in [%lu .. %lu])",
233 what, u, ur->min, ur->max);
234 return (-1);
235 }
236 if (ur->m && ur->m != 1) {
237 uu = u%ur->m;
238 if (uu != ur->a%ur->m) {
239 tvec_error(tv, "%s %lu == %lu =/= %lu (mod %lu)",
240 what, u, uu, ur->a, ur->m);
241 return (-1);
242 }
243 }
882a39c1
MW
244 }
245 return (0);
b64eb60f
MW
246}
247
67b5031e
MW
248/* --- @chtodig@ --- *
249 *
250 * Arguments: @int ch@ = a character
251 *
252 * Returns: The numeric value of the character as a digit, or @-1@ if
253 * it's not a digit. Letters count as extended digits starting
254 * with value 10; case is not significant.
255 */
256
3efcfd2d
MW
257static int chtodig(int ch)
258{
259 if ('0' <= ch && ch <= '9') return (ch - '0');
260 else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
261 else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
262 else return (-1);
263}
264
67b5031e
MW
265/* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
266 *
267 * Arguments: @unsigned long *u_out@, @long *i_out@ = where to put the
268 * result
269 * @const char **q_out@ = where to put the end position
270 * @const char *p@ = pointer to the string to parse
271 *
272 * Returns: Zero on success, @-1@ on error.
273 *
274 * Use: Parse an integer from a string in the test-vector format.
275 * This is mostly extension of the traditional C @strtoul@
276 * format: supported inputs include:
277 *
278 * * NNN -- a decimal number (even if it starts with `0');
279 * * 0xNNN -- hexadecimal;
280 * * 0oNNN -- octal;
281 * * 0bNNN -- binary;
282 * * NNrNNN -- base NN.
283 *
284 * Furthermore, single underscores are permitted internally as
285 * an insignificant digit separator.
286 */
287
3efcfd2d
MW
288static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
289 const char *p)
290{
291 unsigned long u;
292 int ch, d, r;
293 const char *q;
294 unsigned f = 0;
67b5031e
MW
295#define f_implicit 1u /* implicitly reading base 10 */
296#define f_digit 2u /* read a real digit */
297#define f_uscore 4u /* found an underscore */
298
299 /* Initial setup
300 *
301 * This will deal with the traditional `0[box]...' prefixes. We'll leave
302 * our new `NNr...' syntax for later.
303 */
3efcfd2d
MW
304 if (p[0] != '0' || !p[1]) {
305 d = chtodig(*p); if (0 > d || d >= 10) return (-1);
306 r = 10; u = d; p++; f |= f_implicit | f_digit;
307 } else {
308 u = 0; d = chtodig(p[2]);
309 if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
310 else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
311 else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
312 else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
313 else { r = 10; f |= f_digit; p++; }
314 }
315
316 q = p;
317 for (;;) {
67b5031e
MW
318 /* Work through the string a character at a time. */
319
320 ch = *p; switch (ch) {
321
322 case '_':
323 /* An underscore is OK if we haven't just seen one. */
324
325 if (f&f_uscore) goto done;
326 p++; f = (f&~f_implicit) | f_uscore;
327 break;
328
329 case 'r': case 'R':
330 /* An `r' is OK if the number so far is small enough to be a sensible
331 * base, and we're scanning decimal implicitly.
332 */
333
334 if (!(f&f_implicit) || !u || u >= 36) goto done;
335 d = chtodig(p[1]); if (0 > d || d >= u) goto done;
336 r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
337 break;
338
339 default:
340 /* Otherwise we expect a valid digit and accumulate it. */
341 d = chtodig(ch); if (d < 0 || d >= r) goto done;
342 if (u > ULONG_MAX/r) return (-1);
343 u *= r; if (u > ULONG_MAX - d) return (-1);
344 u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
345 break;
3efcfd2d
MW
346 }
347 }
348
67b5031e 349done:
3efcfd2d
MW
350 if (!(f&f_digit)) return (-1);
351 *u_out = u; *q_out = q; return (0);
352
353#undef f_implicit
354#undef f_digit
355#undef f_uscore
356}
357
358static int parse_signed_integer(long *i_out, const char **q_out,
359 const char *p)
360{
361 unsigned long u;
362 unsigned f = 0;
363#define f_neg 1u
364
67b5031e 365 /* Read an initial sign. */
3efcfd2d
MW
366 if (*p == '+') p++;
367 else if (*p == '-') { f |= f_neg; p++; }
368
67b5031e 369 /* Scan an unsigned number. */
3efcfd2d
MW
370 if (parse_unsigned_integer(&u, q_out, p)) return (-1);
371
67b5031e 372 /* Check for signed overflow and apply the sign. */
3efcfd2d
MW
373 if (!(f&f_neg)) {
374 if (u > LONG_MAX) return (-1);
375 *i_out = u;
376 } else {
377 if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
378 *i_out = u ? -(long)(u - 1) - 1 : 0;
379 }
380
381 return (0);
382
383#undef f_neg
384}
385
67b5031e
MW
386/* --- @parse_unsigned@, @parse_signed@ --- *
387 *
388 * Arguments: @unsigned long *u_out@ or @long *i_out@ = where to put the
389 * result
390 * @const char *p@ = string to parse
391 * @const struct tvec_urange *ur@ or
392 * @const struct tvec_irange *ir@ = range specification,
393 * or null
394 * @struct tvec_state *tv@ = test vector state
395 *
396 * Returns: Zero on success, @-1@ on error.
397 *
398 * Use: Parse and range-check an integer. Unlike @parse_(un)signed_
399 * integer@, these functions check that there's no cruft
400 * following the final digit, and report errors as they find
401 * them rather than leaving that to the caller.
402 */
403
404static int parse_unsigned(unsigned long *u_out, const char *p,
405 const struct tvec_urange *ur,
406 struct tvec_state *tv)
407{
408 unsigned long u;
409 const char *q;
410
411 if (parse_unsigned_integer(&u, &q, p))
412 return (tvec_error(tv, "invalid unsigned integer `%s'", p));
413 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
c4ccbbf9 414 if (check_unsigned_range(u, ur, tv, "integer")) return (-1);
67b5031e
MW
415 *u_out = u; return (0);
416}
417
882a39c1
MW
418static int parse_signed(long *i_out, const char *p,
419 const struct tvec_irange *ir,
420 struct tvec_state *tv)
b64eb60f 421{
b64eb60f 422 long i;
3efcfd2d 423 const char *q;
b64eb60f 424
3efcfd2d
MW
425 if (parse_signed_integer(&i, &q, p))
426 return (tvec_error(tv, "invalid signed integer `%s'", p));
427 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
c4ccbbf9 428 if (check_signed_range(i, ir, tv, "integer")) return (-1);
3efcfd2d 429 *i_out = i; return (0);
b64eb60f 430}
adec5584
MW
431static const char size_units[] = "kMGTPEZY";
432
c4ccbbf9 433/* --- @parse_szint@ --- *
adec5584
MW
434 *
435 * Arguments: @struct tvec_state *tv@ = test-vector state
c4ccbbf9 436 * @unsigned long *u_out@ = where to put the answer
adec5584
MW
437 * @const char *delims@ = delimiters
438 * @const char *what@ = description of what we're parsing
439 *
440 * Returns: Zero on success, %$-1$% on failure.
441 *
442 * Use: Parse a memory size.
443 */
444
c4ccbbf9
MW
445static int parse_szint(struct tvec_state *tv, unsigned long *u_out,
446 const char *delims, const char *what)
adec5584
MW
447{
448 dstr d = DSTR_INIT;
449 const char *p, *unit;
450 unsigned long u, t;
451 int rc;
452 unsigned f = 0;
453#define f_range 1u
454
455 if (tvec_readword(tv, &d, 0, delims, what)) { rc = -1; goto end; }
456 p = d.buf;
457 if (parse_unsigned_integer(&u, &p, p)) goto bad;
458 if (!*p) tvec_readword(tv, &d, &p, delims, 0);
459
adec5584 460 for (t = u, unit = size_units; *unit; unit++) {
c4ccbbf9 461 if (t > ULONG_MAX/1024) f |= f_range;
adec5584
MW
462 else t *= 1024;
463 if (*p == *unit) {
464 if (f&f_range) goto rangerr;
465 u = t; p++; break;
466 }
467 }
468 if (*p == 'B') p++;
469 if (*p) goto bad;
470
471 *u_out = u; rc = 0;
472end:
473 dstr_destroy(&d);
474 return (rc);
475
476bad:
477 tvec_error(tv, "invalid %s `%s'", what, d.buf);
478 rc = -1; goto end;
479
480rangerr:
481 tvec_error(tv, "%s `%s' out of range", what, d.buf);
482 rc = -1; goto end;
483
484#undef f_range
485}
486
487/* --- @format_size@ --- *
488 *
489 * Arguments: @const struct gprintf_ops *gops@ = print operations
490 * @void *go@ = print destination
491 * @unsigned long u@ = a size
492 * @unsigned style@ = style (@TVSF_...@)
493 *
494 * Returns: ---
495 *
496 * Use: Format @u@ as a size in bytes to the destination, expressing
497 * it with a unit prefix if this is possible exactly.
498 */
499
500static void format_size(const struct gprintf_ops *gops, void *go,
501 unsigned long u, unsigned style)
502{
503 const char *unit;
504
5c0f2e08
MW
505 if (style&TVSF_RAW)
506 gprintf(gops, go, "%lu", u);
507 else if (!u || u%1024)
adec5584
MW
508 gprintf(gops, go, "%lu%sB", u, style&TVSF_COMPACT ? "" : " ");
509 else {
510 for (unit = size_units, u /= 1024;
511 !(u%1024) && unit[1];
512 u /= 1024, unit++);
513 gprintf(gops, go, "%lu%s%cB", u, style&TVSF_COMPACT ? "" : " ", *unit);
514 }
515}
b64eb60f 516
67b5031e 517/*----- Floating-point utilities ------------------------------------------*/
b64eb60f 518
67b5031e
MW
519/* --- @eqish_floating_p@ --- *
520 *
521 * Arguments: @double x, y@ = two numbers to compare
522 * @const struct tvec_floatinfo *fi@ = floating-point info
523 *
c4ccbbf9
MW
524 * Returns: Nonzero if the comparand @x@ is sufficiently close to the
525 * reference @y@, or zero if it's definitely different.
67b5031e 526 */
3efcfd2d 527
67b5031e
MW
528static int eqish_floating_p(double x, double y,
529 const struct tvec_floatinfo *fi)
3efcfd2d 530{
dc6eea4e 531 double t, u;
67b5031e 532
dc6eea4e 533 /* NaNs and infinities are equal only to each other. */
67b5031e
MW
534 if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
535 if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
536
dc6eea4e 537 /* Compare finite values. */
67b5031e
MW
538 switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
539 case TVFF_EXACT:
540 return (x == y && NEGP(x) == NEGP(y));
541 case TVFF_ABSDELTA:
dc6eea4e 542 t = fabs(y - x); return (t < fi->delta);
67b5031e 543 case TVFF_RELDELTA:
dc6eea4e
MW
544 t = fabs(y - x); u = fabs(y*fi->delta); if (u < DBL_MIN) u = DBL_MIN;
545 return (t <= u);
67b5031e
MW
546 default:
547 abort();
548 }
b64eb60f
MW
549}
550
67b5031e
MW
551/* --- @format_floating@ --- *
552 *
553 * Arguments: @const struct gprintf_ops *gops@ = print operations
554 * @void *go@ = print destination
555 * @double x@ = number to print
556 *
557 * Returns: ---
558 *
559 * Use: Print a floating-point number, accurately.
560 */
3efcfd2d 561
e63124bc
MW
562static void format_floating(const struct gprintf_ops *gops, void *go,
563 double x)
564{
565 int prec;
566
567 if (NANP(x))
568 gprintf(gops, go, "#nan");
569 else if (INFP(x))
570 gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
571 else {
572 /* Ugh. C doesn't provide any function for just printing a
573 * floating-point number /correctly/, i.e., so that you can read the
574 * result back and recover the number you first thought of. There are
575 * complicated algorithms published for doing this, but I really don't
576 * want to get into that here. So we have this.
577 *
578 * The sign doesn't cause significant difficulty so we're going to ignore
579 * it for now. So suppose we're given a number %$x = f b^e$%, in
580 * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
581 * %$0 \le f < 1$%. We're going to convert it into the nearest integer
582 * of the form %$X = F B^E$%, with similar conditions, only with the
583 * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
584 * or %$F \ge B^{-N}$%.
585 *
586 * We're rounding to the nearest such %$X$%. If there is to be ambiguity
587 * in the conversion, then some %$x = f b^e$% and the next smallest
588 * representable number %$x' = x + b^{e-n}$% must both map to the same
589 * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
590 * any other number representable in the target system. The nest larger
591 * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
592 * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
593 * is actually %$X - B^{E-N-1}$%. We ignore this latter possibility in
594 * the pursuit of a conservative estimate (though actually it doesn't
595 * matter).
596 *
597 * If both %$x$% and %$x'$% map to %$X$% then we must have
598 * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
599 * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
600 * and secondly %$b^{e-n} \le B^{E-N}$%. Since these inequalities are in
601 * opposite senses, we can divide, giving
602 *
603 * %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
604 *
605 * whence
606 *
607 * %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
608 *
609 * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
610 * possible, it must be the case that
611 *
612 * %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
613 *
614 * Then rearrange and take logarithms, obtaining
615 *
616 * %$(N - 1) \log B < n \log b$% ,
617 *
618 * and so
619 *
620 * %$N < n \log b/\log B + 1$% .
621 *
622 * Recall that this is a necessary condition for a collision to occur; we
623 * are therefore safe whenever
624 *
625 * %$N \ge n \log b/\log B + 1$% ;
626 *
627 * so, taking ceilings,
628 *
629 * %$N \ge \lceil n \log b/\log B \rceil + 1$% .
630 *
631 * So that's why we have this.
632 *
b1a20bee
MW
633 * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small
634 * that we can calculate this without ending up on the wrong side of an
e63124bc
MW
635 * integer boundary.
636 *
b1a20bee
MW
637 * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value
638 * only as a constant. Except that modern compilers are more than clever
e63124bc
MW
639 * enough to work out that this is a constant anyway.
640 *
641 * This is sometimes an overestimate: we'll print out meaningless digits
642 * that don't represent anything we actually know about the number in
643 * question. To fix that, we'd need a complicated algorithm like Steele
644 * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
645 * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
646 * something else in difficult situations).
647 */
648
b1a20bee
MW
649#ifdef DBL_DECIMAL_DIG
650 prec = DBL_DECIMAL_DIG;
651#else
e63124bc 652 prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
b1a20bee 653#endif
e63124bc
MW
654 gprintf(gops, go, "%.*g", prec, x);
655 }
656}
657
67b5031e
MW
658/* --- @parse_floating@ --- *
659 *
660 * Arguments: @double *x_out@ = where to put the result
814e42ff 661 * @const char *q_out@ = where to leave end pointer, or null
67b5031e
MW
662 * @const char *p@ = string to parse
663 * @const struct tvec_floatinfo *fi@ = floating-point info
664 * @struct tvec_state *tv@ = test vector state
665 *
666 * Returns: Zero on success, @-1@ on error.
667 *
668 * Use: Parse a floating-point number from a string. Reports any
814e42ff 669 * necessary errors. If @q_out@ is not null then trailing
adec5584
MW
670 * material is permitted and a pointer to it (or the end of the
671 * string) is left in @*q_out@.
67b5031e 672 */
e63124bc 673
814e42ff 674static int parse_floating(double *x_out, const char **q_out, const char *p,
e63124bc
MW
675 const struct tvec_floatinfo *fi,
676 struct tvec_state *tv)
677{
678 const char *pp; char *q;
679 dstr d = DSTR_INIT;
680 double x;
681 int olderr, rc;
682
67b5031e 683 /* Check for special tokens. */
e63124bc
MW
684 if (STRCMP(p, ==, "#nan")) {
685#ifdef NAN
adec5584 686 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
687 x = NAN; rc = 0;
688#else
689 tvec_error(tv, "NaN not supported on this system");
690 rc = -1; goto end;
691#endif
67b5031e
MW
692 }
693
694 else if (STRCMP(p, ==, "#inf") ||
695 STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
3efcfd2d 696#ifdef INFINITY
adec5584 697 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
698 x = INFINITY; rc = 0;
699#else
700 tvec_error(tv, "infinity not supported on this system");
701 rc = -1; goto end;
702#endif
67b5031e
MW
703 }
704
705 else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
3efcfd2d 706#ifdef INFINITY
adec5584 707 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
708 x = -INFINITY; rc = 0;
709#else
710 tvec_error(tv, "infinity not supported on this system");
711 rc = -1; goto end;
712#endif
67b5031e
MW
713 }
714
715 /* Check that this looks like a number, so we can exclude `strtod'
716 * recognizing its own non-finite number tokens.
717 */
718 else {
e63124bc
MW
719 pp = p;
720 if (*pp == '+' || *pp == '-') pp++;
721 if (*pp == '.') pp++;
722 if (!ISDIGIT(*pp)) {
3efcfd2d 723 tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
e63124bc
MW
724 rc = -1; goto end;
725 }
67b5031e
MW
726
727 /* Parse the number using the system parser. */
e63124bc 728 olderr = errno; errno = 0;
b1a20bee 729#if __STDC_VERSION__ >= 199901
e63124bc 730 x = strtod(p, &q);
b1a20bee
MW
731#else
732 x = strtold(p, &q);
733#endif
adec5584
MW
734 if (q_out) *q_out = q;
735 else if (*q) { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; }
e63124bc 736 if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
814e42ff
MW
737 tvec_error(tv, "invalid floating-point number `%.*s': %s",
738 (int)(q - p), p, strerror(errno));
e63124bc
MW
739 rc = -1; goto end;
740 }
741 errno = olderr;
742 }
743
67b5031e 744 /* Check that the number is acceptable. */
e63124bc
MW
745 if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
746 tvec_error(tv, "#nan not allowed here");
747 rc = -1; goto end;
748 }
67b5031e 749
b1a20bee
MW
750 if (fi &&
751 ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
752 (!(fi->f&TVFF_NOMAX) && x > fi->max)) &&
753 !(INFP(x) && (fi->f&(NEGP(x) ? TVFF_NEGINFOK : TVFF_POSINFOK)))) {
e63124bc
MW
754 dstr_puts(&d, "floating-point number ");
755 format_floating(&dstr_printops, &d, x);
756 dstr_puts(&d, " out of range (must be in ");
757 if (fi->f&TVFF_NOMIN)
758 dstr_puts(&d, "(#-inf");
759 else
760 { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
761 dstr_puts(&d, " .. ");
762 if (fi->f&TVFF_NOMAX)
763 dstr_puts(&d, "#+inf)");
764 else
765 { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
766 dstr_putc(&d, ')'); dstr_putz(&d);
767 tvec_error(tv, "%s", d.buf); rc = -1; goto end;
768 }
769
67b5031e
MW
770 /* All done. */
771 *x_out = x; rc = 0;
772end:
773 dstr_destroy(&d);
774 return (rc);
775}
776
777/*----- String utilities --------------------------------------------------*/
778
779/* Special character name table. */
780static const struct chartab {
781 const char *name; /* character name */
782 int ch; /* character value */
783 unsigned f; /* flags: */
784#define CTF_PREFER 1u /* preferred name */
785#define CTF_SHORT 2u /* short name (compact style) */
786} chartab[] = {
787 { "#eof", EOF, CTF_PREFER | CTF_SHORT },
788 { "#nul", '\0', CTF_PREFER },
789 { "#bell", '\a', CTF_PREFER },
790 { "#ding", '\a', 0 },
791 { "#bel", '\a', CTF_SHORT },
792 { "#backspace", '\b', CTF_PREFER },
793 { "#bs", '\b', CTF_SHORT },
794 { "#escape", '\x1b', CTF_PREFER },
795 { "#esc", '\x1b', CTF_SHORT },
796 { "#formfeed", '\f', CTF_PREFER },
797 { "#ff", '\f', CTF_SHORT },
798 { "#newline", '\n', CTF_PREFER },
799 { "#linefeed", '\n', 0 },
800 { "#lf", '\n', CTF_SHORT },
801 { "#nl", '\n', 0 },
802 { "#return", '\r', CTF_PREFER },
803 { "#carriage-return", '\r', 0 },
804 { "#cr", '\r', CTF_SHORT },
805 { "#tab", '\t', CTF_PREFER | CTF_SHORT },
806 { "#horizontal-tab", '\t', 0 },
807 { "#ht", '\t', 0 },
808 { "#vertical-tab", '\v', CTF_PREFER },
809 { "#vt", '\v', CTF_SHORT },
810 { "#space", ' ', 0 },
811 { "#spc", ' ', CTF_SHORT },
812 { "#delete", '\x7f', CTF_PREFER },
813 { "#del", '\x7f', CTF_SHORT },
814 { 0, 0, 0 }
815};
816
817/* --- @find_charname@ --- *
818 *
819 * Arguments: @int ch@ = character to match
820 * @unsigned f@ = flags (@CTF_...@) to match
821 *
822 * Returns: The name of the character, or null if no match is found.
823 *
824 * Use: Looks up a name for a character. Specifically, it returns
825 * the first entry in the @chartab@ table which matches @ch@ and
826 * which has one of the flags @f@ set.
827 */
828
829static const char *find_charname(int ch, unsigned f)
830{
831 const struct chartab *ct;
832
833 for (ct = chartab; ct->name; ct++)
834 if (ct->ch == ch && (ct->f&f)) return (ct->name);
835 return (0);
836}
837
838/* --- @read_charname@ --- *
839 *
840 * Arguments: @int *ch_out@ = where to put the character
841 * @const char *p@ = character name
842 * @unsigned f@ = flags (@TCF_...@)
843 *
844 * Returns: Zero if a match was found, @-1@ if not.
845 *
846 * Use: Looks up a character by name. If @RCF_EOFOK@ is set in @f@,
847 * then the @EOF@ marker can be matched; otherwise it can't.
848 */
849
850#define RCF_EOFOK 1u
851static int read_charname(int *ch_out, const char *p, unsigned f)
852{
853 const struct chartab *ct;
854
855 for (ct = chartab; ct->name; ct++)
856 if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
857 { *ch_out = ct->ch; return (0); }
858 return (-1);
859}
860
861/* --- @format_charesc@ --- *
862 *
863 * Arguments: @const struct gprintf_ops *gops@ = print operations
864 * @void *go@ = print destination
865 * @int ch@ = character to format
866 * @unsigned f@ = flags (@FCF_...@)
867 *
868 * Returns: ---
869 *
870 * Use: Format a character as an escape sequence, possibly as part of
871 * a larger string. If @FCF_BRACE@ is set in @f@, then put
872 * braces around a `\x...' code, so that it's suitable for use
873 * in a longer string.
874 */
875
876#define FCF_BRACE 1u
877static void format_charesc(const struct gprintf_ops *gops, void *go,
878 int ch, unsigned f)
879{
880 switch (ch) {
881 case '\a': gprintf(gops, go, "\\a"); break;
882 case '\b': gprintf(gops, go, "\\b"); break;
883 case '\x1b': gprintf(gops, go, "\\e"); break;
884 case '\f': gprintf(gops, go, "\\f"); break;
885 case '\r': gprintf(gops, go, "\\r"); break;
886 case '\n': gprintf(gops, go, "\\n"); break;
887 case '\t': gprintf(gops, go, "\\t"); break;
888 case '\v': gprintf(gops, go, "\\v"); break;
889 case '\\': gprintf(gops, go, "\\\\"); break;
890 case '\'': gprintf(gops, go, "\\'"); break;
891 case '\0':
892 if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
893 else gprintf(gops, go, "\\0");
894 break;
895 default:
896 if (f&FCF_BRACE)
897 gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
898 else
899 gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
900 break;
901 }
902}
903
904/* --- @format_char@ --- *
905 *
906 * Arguments: @const struct gprintf_ops *gops@ = print operations
907 * @void *go@ = print destination
908 * @int ch@ = character to format
909 *
910 * Returns: ---
911 *
912 * Use: Format a single character.
913 */
914
915static void format_char(const struct gprintf_ops *gops, void *go, int ch)
916{
917 switch (ch) {
918 case '\\': case '\'': escape:
919 gprintf(gops, go, "'");
920 format_charesc(gops, go, ch, 0);
921 gprintf(gops, go, "'");
922 break;
923 default:
924 if (!isprint(ch)) goto escape;
925 gprintf(gops, go, "'%c'", ch);
926 break;
927 }
928}
929
b1a20bee
MW
930/* --- @fill_pattern@ --- *
931 *
932 * Arguments: @void *p@ = destination pointer
933 * @size_t sz@ = destination buffer size
934 * @const void *pat@ = pointer to pattern
935 * @size_t patsz@ = pattern size
936 *
937 * Returns: ---
938 *
939 * Use: Fill the destination buffer with as many copies of the
940 * pattern as will fit, followed by as many initial bytes of the
941 * pattern will fit in the remaining space.
942 */
943
944static void fill_pattern(void *p, size_t sz, const void *pat, size_t patsz)
945{
946 unsigned char *q = p;
947
948 if (patsz == 1)
949 memset(q, *(unsigned char *)pat, sz);
950 else {
951 if (sz > patsz) {
952 memcpy(q, pat, patsz); pat = q; q += patsz; sz -= patsz;
953 while (sz > patsz)
954 { memcpy(q, pat, patsz); q += patsz; sz -= patsz; patsz *= 2; }
955 }
956 memcpy(q, pat, sz);
957 }
958}
959
67b5031e
MW
960/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
961 *
962 * Arguments: @const struct gprintf_ops *gops@ = print operations
963 * @void *go@ = print destination
964 * @unsigned long u@ or @long i@ = an integer
965 *
966 * Returns: ---
967 *
968 * Use: Format a (signed or unsigned) integer as a character, if it's
969 * in range, printing something like `= 'q''. It's assumed that
970 * a comment marker has already been output.
971 */
972
973static void maybe_format_unsigned_char
974 (const struct gprintf_ops *gops, void *go, unsigned long u)
975{
976 const char *p;
977
978 p = find_charname(u, CTF_PREFER);
979 if (p) gprintf(gops, go, " = %s", p);
980 if (u < UCHAR_MAX)
981 { gprintf(gops, go, " = "); format_char(gops, go, u); }
e63124bc
MW
982}
983
67b5031e
MW
984static void maybe_format_signed_char
985 (const struct gprintf_ops *gops, void *go, long i)
b64eb60f 986{
67b5031e
MW
987 const char *p;
988
989 p = find_charname(i, CTF_PREFER);
990 if (p) gprintf(gops, go, " = %s", p);
991 if (0 <= i && i < UCHAR_MAX)
992 { gprintf(gops, go, " = "); format_char(gops, go, i); }
b64eb60f
MW
993}
994
67b5031e
MW
995/* --- @read_charesc@ --- *
996 *
997 * Arguments: @int *ch_out@ = where to put the result
998 * @struct tvec_state *tv@ = test vector state
999 *
1000 * Returns: Zero on success, @-1@ on error.
1001 *
1002 * Use: Parse and convert an escape sequence from @tv@'s input
1003 * stream, assuming that the initial `\' has already been read.
1004 * Reports errors as appropriate.
1005 */
1006
1007static int read_charesc(int *ch_out, struct tvec_state *tv)
b64eb60f 1008{
b64eb60f
MW
1009 int ch, i, esc;
1010 unsigned f = 0;
1011#define f_brace 1u
1012
e63124bc
MW
1013 ch = getc(tv->fp);
1014 switch (ch) {
67b5031e
MW
1015
1016 /* Things we shouldn't find. */
1017 case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
1018
1019 /* Single-character escapes. */
e63124bc
MW
1020 case '\'': *ch_out = '\''; break;
1021 case '\\': *ch_out = '\\'; break;
1022 case '"': *ch_out = '"'; break;
1023 case 'a': *ch_out = '\a'; break;
1024 case 'b': *ch_out = '\b'; break;
1025 case 'e': *ch_out = '\x1b'; break;
1026 case 'f': *ch_out = '\f'; break;
1027 case 'n': *ch_out = '\n'; break;
1028 case 'r': *ch_out = '\r'; break;
1029 case 't': *ch_out = '\t'; break;
1030 case 'v': *ch_out = '\v'; break;
1031
67b5031e 1032 /* Hex escapes, with and without braces. */
e63124bc
MW
1033 case 'x':
1034 ch = getc(tv->fp);
1035 if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
1036 else f &= ~f_brace;
67b5031e
MW
1037 esc = chtodig(ch);
1038 if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
e63124bc 1039 for (;;) {
67b5031e
MW
1040 ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
1041 esc = 16*esc + i;
e63124bc
MW
1042 if (esc > UCHAR_MAX)
1043 return (tvec_error(tv,
1044 "character code %d out of range", esc));
1045 }
1046 if (!(f&f_brace)) ungetc(ch, tv->fp);
1047 else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1048 *ch_out = esc;
1049 break;
1050
67b5031e
MW
1051 /* Other things, primarily octal escapes. */
1052 case '{':
1053 f |= f_brace; ch = getc(tv->fp);
1054 /* fall through */
e63124bc
MW
1055 default:
1056 if ('0' <= ch && ch < '8') {
1057 i = 1; esc = ch - '0';
1058 for (;;) {
1059 ch = getc(tv->fp);
1060 if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
1061 esc = 8*esc + ch - '0';
1062 i++; if (i >= 3) break;
1063 }
67b5031e
MW
1064 if (f&f_brace) {
1065 ch = getc(tv->fp);
1066 if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1067 }
e63124bc
MW
1068 if (esc > UCHAR_MAX)
1069 return (tvec_error(tv,
1070 "character code %d out of range", esc));
67b5031e 1071 *ch_out = esc; break;
e63124bc
MW
1072 } else
1073 return (tvec_syntax(tv, ch, "string escape"));
1074 }
1075
67b5031e 1076 /* Done. */
e63124bc
MW
1077 return (0);
1078
1079#undef f_brace
1080}
1081
67b5031e
MW
1082/* --- @read_quoted_string@ --- *
1083 *
1084 * Arguments: @dstr *d@ = string to write to
1085 * @int quote@ = initial quote, `'' or `"'
1086 * @struct tvec_state *tv@ = test vector state
1087 *
1088 * Returns: Zero on success, @-1@ on error.
1089 *
1090 * Use: Read the rest of a quoted string into @d@, reporting errors
1091 * as appropriate.
1092 *
1093 * A single-quoted string is entirely literal. A double-quoted
1094 * string may contain C-like escapes.
1095 */
1096
e63124bc
MW
1097static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
1098{
1099 int ch;
b64eb60f
MW
1100
1101 for (;;) {
1102 ch = getc(tv->fp);
b64eb60f
MW
1103 switch (ch) {
1104 case EOF: case '\n':
e63124bc 1105 return (tvec_syntax(tv, ch, "`%c'", quote));
b64eb60f
MW
1106 case '\\':
1107 if (quote == '\'') goto ordinary;
e63124bc 1108 ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
67b5031e 1109 ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
e63124bc 1110 goto ordinary;
b64eb60f
MW
1111 default:
1112 if (ch == quote) goto end;
1113 ordinary:
1114 DPUTC(d, ch);
1115 break;
1116 }
1117 }
1118
1119end:
1120 DPUTZ(d);
882a39c1 1121 return (0);
e63124bc 1122}
b64eb60f 1123
67b5031e
MW
1124/* --- @collect_bare@ --- *
1125 *
1126 * Arguments: @dstr *d@ = string to write to
1127 * @struct tvec_state *tv@ = test vector state
1128 *
1129 * Returns: Zero on success, @-1@ on error.
1130 *
1131 * Use: Read barewords and the whitespace between them. Stop when we
1132 * encounter something which can't start a bareword.
1133 */
b64eb60f
MW
1134
1135static int collect_bare(dstr *d, struct tvec_state *tv)
1136{
1137 size_t pos = d->len;
1138 enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
1139 int ch, rc;
1140
1141 for (;;) {
1142 ch = getc(tv->fp);
1143 switch (ch) {
1144 case EOF:
882a39c1
MW
1145 tvec_syntax(tv, ch, "bareword");
1146 rc = -1; goto end;
b64eb60f
MW
1147 case '\n':
1148 if (s == ESCAPE) { tv->lno++; goto addch; }
1149 if (s == WORD) pos = d->len;
882a39c1 1150 ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
b64eb60f
MW
1151 DPUTC(d, ' '); s = SPACE;
1152 break;
67b5031e 1153 case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
882a39c1 1154 if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
b64eb60f
MW
1155 goto addch;
1156 case '\\':
1157 s = ESCAPE;
1158 break;
1159 default:
1160 if (s != ESCAPE && isspace(ch)) {
1161 if (s == WORD) pos = d->len;
1162 DPUTC(d, ch); s = SPACE;
1163 break;
1164 }
1165 addch:
1166 DPUTC(d, ch); s = WORD;
1167 }
1168 }
1169
1170done:
1171 if (s == SPACE) d->len = pos;
882a39c1
MW
1172 DPUTZ(d); rc = 0;
1173end:
1174 return (rc);
b64eb60f
MW
1175}
1176
67b5031e
MW
1177/* --- @set_up_encoding@ --- *
1178 *
1179 * Arguments: @const codec_class **ccl_out@ = where to put the class
1180 * @unsigned *f_out@ = where to put the flags
1181 * @unsigned code@ = the coding scheme to use (@TVEC_...@)
1182 *
1183 * Returns: ---
1184 *
1185 * Use: Helper for @read_compound_string@ below.
1186 *
1187 * Return the appropriate codec class and flags for @code@.
1188 * Leaves @*ccl_out@ null if the coding scheme doesn't have a
1189 * backing codec class (e.g., @TVCODE_BARE@).
1190 */
1191
1192enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
b64eb60f
MW
1193static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
1194 unsigned code)
1195{
1196 switch (code) {
1197 case TVCODE_BARE:
1198 *ccl_out = 0; *f_out = 0;
1199 break;
1200 case TVCODE_HEX:
1201 *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
1202 break;
1203 case TVCODE_BASE32:
1204 *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
1205 break;
1206 case TVCODE_BASE64:
1207 *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
1208 break;
1209 default:
1210 abort();
1211 }
1212}
1213
67b5031e
MW
1214/* --- @flush_codec@ --- *
1215 *
1216 * Arguments: @codec *cdc@ = a codec, or null
1217 * @dstr *d@ = output string
1218 * @struct tvec_state *tv@ = test vector state
1219 *
1220 * Returns: Zero on success, @-1@ on error.
1221 *
1222 * Use: Helper for @read_compound_string@ below.
1223 *
1224 * Flush out any final buffered material from @cdc@, and check
1225 * that it's in a good state. Frees the codec on success. Does
1226 * nothing if @cdc@ is null.
1227 */
1228
1229static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
1230{
1231 int err;
1232
1233 if (cdc) {
1234 err = cdc->ops->code(cdc, 0, 0, d);
1235 if (err)
1236 return (tvec_error(tv, "invalid %s sequence end: %s",
1237 cdc->ops->c->name, codec_strerror(err)));
1238 cdc->ops->destroy(cdc);
1239 }
1240 return (0);
1241}
1242
1243/* --- @read_compound_string@ --- *
1244 *
1245 * Arguments: @void **p_inout@ = address of output buffer pointer
1246 * @size_t *sz_inout@ = address of buffer size
1247 * @unsigned code@ = initial interpretation of barewords
1248 * @unsigned f@ = other flags (@RCSF_...@)
1249 * @struct tvec_state *tv@ = test vector state
1250 *
1251 * Returns: Zero on success, @-1@ on error.
1252 *
1253 * Use: Parse a compound string, i.e., a sequence of stringish pieces
1254 * which might be quoted strings, character names, or barewords
1255 * to be decoded accoding to @code@, interspersed with
1256 * additional directives.
1257 *
1258 * If the initial buffer pointer is non-null and sufficiently
1259 * large, then it will be reused; otherwise, it is freed and a
1260 * fresh, sufficiently large buffer is allocated and returned.
b1a20bee 1261 * This buffer unconditionally uses the standard-library arena.
67b5031e
MW
1262 */
1263
1264#define RCSF_NESTED 1u
882a39c1 1265static int read_compound_string(void **p_inout, size_t *sz_inout,
67b5031e
MW
1266 unsigned code, unsigned f,
1267 struct tvec_state *tv)
b64eb60f 1268{
67b5031e 1269 const codec_class *ccl; unsigned cdf;
b64eb60f
MW
1270 codec *cdc;
1271 dstr d = DSTR_INIT, w = DSTR_INIT;
1272 char *p;
67b5031e
MW
1273 const char *q;
1274 void *pp = 0; size_t sz;
1275 unsigned long n;
882a39c1 1276 int ch, err, rc;
b64eb60f 1277
67b5031e
MW
1278 set_up_encoding(&ccl, &cdf, code); cdc = 0;
1279
1280 if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
b64eb60f
MW
1281 do {
1282 ch = getc(tv->fp);
67b5031e
MW
1283 switch (ch) {
1284
1285 case ')': case ']': case '}':
1286 /* Close brackets. Leave these for recursive caller if there is one,
1287 * or just complain.
1288 */
1289
1290 if (!(f&RCSF_NESTED))
1291 { rc = tvec_syntax(tv, ch, "string"); goto end; }
1292 ungetc(ch, tv->fp); goto done;
1293
1294 case '"': case '\'':
1295 /* Quotes. Read a quoted string. */
1296
1297 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1298 cdc = 0;
1299 if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
1300 break;
1301
1302 case '#':
1303 /* A named character. */
1304
1305 ungetc(ch, tv->fp);
1306 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1307 cdc = 0;
adec5584 1308 DRESET(&w); tvec_readword(tv, &w, 0, ";", "character name");
5c0f2e08 1309 if (STRCMP(w.buf, ==, "#empty")) break;
67b5031e
MW
1310 if (read_charname(&ch, w.buf, RCF_EOFOK)) {
1311 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
1312 goto end;
1313 }
1314 DPUTC(&d, ch); break;
1315
1316 case '!':
1317 /* A magic keyword. */
1318
1319 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1320 cdc = 0;
b64eb60f 1321 ungetc(ch, tv->fp);
adec5584 1322 DRESET(&w); tvec_readword(tv, &w, 0, ";", "`!'-keyword");
67b5031e
MW
1323
1324 /* Change bareword coding system. */
1325 if (STRCMP(w.buf, ==, "!bare"))
1326 { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
1327 else if (STRCMP(w.buf, ==, "!hex"))
1328 { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
1329 else if (STRCMP(w.buf, ==, "!base32"))
1330 { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
1331 else if (STRCMP(w.buf, ==, "!base64"))
1332 { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
1333
1334 /* Repeated substrings. */
1335 else if (STRCMP(w.buf, ==, "!repeat")) {
1336 if (tvec_nexttoken(tv)) {
1337 rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
1338 goto end;
1339 }
1340 DRESET(&w);
adec5584 1341 if (tvec_readword(tv, &w, 0, ";{", "repeat count"))
67b5031e
MW
1342 { rc = -1; goto end; }
1343 if (parse_unsigned_integer(&n, &q, w.buf)) {
1344 rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
1345 goto end;
1346 }
1347 if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
1348 if (tvec_nexttoken(tv))
1349 { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
1350 ch = getc(tv->fp); if (ch != '{')
1351 { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
1352 sz = 0;
1353 if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
1354 { rc = -1; goto end; }
1355 ch = getc(tv->fp); if (ch != '}')
1356 { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
1357 if (sz) {
1358 if (n > (size_t)-1/sz)
1359 { rc = tvec_error(tv, "repeat size out of range"); goto end; }
b1a20bee
MW
1360 n *= sz;
1361 dstr_ensure(&d, n);
1362 fill_pattern(d.buf + d.len, n, pp, sz); d.len += n;
67b5031e
MW
1363 }
1364 xfree(pp); pp = 0;
1365 }
1366
1367 /* Anything else is an error. */
1368 else {
1369 tvec_error(tv, "unknown string keyword `%s'", w.buf);
1370 rc = -1; goto end;
1371 }
b64eb60f 1372 break;
67b5031e 1373
b64eb60f 1374 default:
67b5031e
MW
1375 /* A bareword. Process it according to the current coding system. */
1376
1377 switch (code) {
1378 case TVCODE_BARE:
1379 ungetc(ch, tv->fp);
1380 if (collect_bare(&d, tv)) goto done;
1381 break;
1382 default:
1383 assert(ccl);
1384 ungetc(ch, tv->fp); DRESET(&w);
adec5584
MW
1385 if (tvec_readword(tv, &w, 0, ";",
1386 "%s-encoded fragment", ccl->name))
67b5031e
MW
1387 { rc = -1; goto end; }
1388 if (!cdc) cdc = ccl->decoder(cdf);
1389 err = cdc->ops->code(cdc, w.buf, w.len, &d);
1390 if (err) {
1391 tvec_error(tv, "invalid %s fragment `%s': %s",
1392 ccl->name, w.buf, codec_strerror(err));
1393 rc = -1; goto end;
1394 }
1395 break;
1396 }
1397 break;
b64eb60f
MW
1398 }
1399 } while (!tvec_nexttoken(tv));
1400
1401done:
67b5031e
MW
1402 /* Wrap things up. */
1403 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1404 cdc = 0;
b64eb60f 1405 if (*sz_inout <= d.len)
b1a20bee 1406 { free(*p_inout); *p_inout = x_alloc(&arena_stdlib, d.len + 1); }
b64eb60f 1407 p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
882a39c1 1408 rc = 0;
67b5031e 1409
882a39c1 1410end:
67b5031e
MW
1411 /* Clean up any debris. */
1412 if (cdc) cdc->ops->destroy(cdc);
1413 if (pp) xfree(pp);
b64eb60f 1414 dstr_destroy(&d); dstr_destroy(&w);
882a39c1 1415 return (rc);
b64eb60f
MW
1416}
1417
b64eb60f
MW
1418/*----- Signed and unsigned integer types ---------------------------------*/
1419
c81c35df
MW
1420/* --- @init_int@, @init_uint@ --- *
1421 *
1422 * Arguments: @union tvec_regval *rv@ = register value
1423 * @const struct tvec_regdef *rd@ = register definition
1424 *
1425 * Returns: ---
1426 *
1427 * Use: Initialize a register value.
1428 *
1429 * Integer values are initialized to zero.
1430 */
1431
b64eb60f
MW
1432static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
1433 { rv->i = 0; }
1434
1435static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
1436 { rv->u = 0; }
1437
c81c35df
MW
1438/* --- @eq_int@, @eq_uint@ --- *
1439 *
1440 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1441 * @const struct tvec_regdef *rd@ = register definition
1442 *
1443 * Returns: Nonzero if the values are equal, zero if unequal
1444 *
1445 * Use: Compare register values for equality.
1446 */
1447
b64eb60f
MW
1448static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
1449 const struct tvec_regdef *rd)
1450 { return (rv0->i == rv1->i); }
1451
1452static int eq_uint(const union tvec_regval *rv0,
1453 const union tvec_regval *rv1,
1454 const struct tvec_regdef *rd)
1455 { return (rv0->u == rv1->u); }
1456
c81c35df
MW
1457/* --- @tobuf_int@, @tobuf_uint@ --- *
1458 *
1459 * Arguments: @buf *b@ = buffer
1460 * @const union tvec_regval *rv@ = register value
1461 * @const struct tvec_regdef *rd@ = register definition
1462 *
1463 * Returns: Zero on success, %$-1$% on failure.
1464 *
1465 * Use: Serialize a register value to a buffer.
1466 *
1467 * Integer values are serialized as little-endian 64-bit signed
1468 * or unsigned integers.
1469 */
1470
b64eb60f
MW
1471static int tobuf_int(buf *b, const union tvec_regval *rv,
1472 const struct tvec_regdef *rd)
1473 { return (signed_to_buf(b, rv->i)); }
1474
1475static int tobuf_uint(buf *b, const union tvec_regval *rv,
1476 const struct tvec_regdef *rd)
1477 { return (unsigned_to_buf(b, rv->u)); }
1478
c81c35df
MW
1479/* --- @frombuf_int@, @frombuf_uint@ --- *
1480 *
1481 * Arguments: @buf *b@ = buffer
1482 * @union tvec_regval *rv@ = register value
1483 * @const struct tvec_regdef *rd@ = register definition
1484 *
1485 * Returns: Zero on success, %$-1$% on failure.
1486 *
1487 * Use: Deserialize a register value from a buffer.
1488 *
1489 * Integer values are serialized as 64-bit signed or unsigned
1490 * integers.
1491 */
1492
b64eb60f
MW
1493static int frombuf_int(buf *b, union tvec_regval *rv,
1494 const struct tvec_regdef *rd)
882a39c1 1495 { return (signed_from_buf(b, &rv->i)); }
b64eb60f
MW
1496
1497static int frombuf_uint(buf *b, union tvec_regval *rv,
1498 const struct tvec_regdef *rd)
1499 { return (unsigned_from_buf(b, &rv->u)); }
1500
c81c35df
MW
1501/* --- @parse_int@, @parse_uint@ --- *
1502 *
1503 * Arguments: @union tvec_regval *rv@ = register value
1504 * @const struct tvec_regdef *rd@ = register definition
1505 * @struct tvec_state *tv@ = test-vector state
1506 *
1507 * Returns: Zero on success, %$-1$% on error.
1508 *
1509 * Use: Parse a register value from an input file.
1510 *
1511 * Integers may be input in decimal, hex, binary, or octal,
1512 * following approximately usual conventions.
1513 *
1514 * * Signed integers may be preceded with a `+' or `-' sign.
1515 *
1516 * * Decimal integers are just a sequence of decimal digits
1517 * `0' ... `9'.
1518 *
1519 * * Octal integers are a sequence of digits `0' ... `7',
1520 * preceded by `0o' or `0O'.
1521 *
1522 * * Hexadecimal integers are a sequence of digits `0'
1523 * ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
1524 * `0X'.
1525 *
1526 * * Radix-B integers are a sequence of digits `0' ... `9',
1527 * `a' ... `f', or `A' ... `F', each with value less than B,
1528 * preceded by `Br' or `BR', where 0 < B < 36 is expressed
1529 * in decimal without any leading `0' or internal
1530 * underscores `_'.
1531 *
1532 * * A digit sequence may contain internal underscore `_'
1533 * separators, but not before or after all of the digits;
1534 * and two consecutive `_' characters are not permitted.
1535 */
1536
882a39c1
MW
1537static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
1538 struct tvec_state *tv)
b64eb60f
MW
1539{
1540 dstr d = DSTR_INIT;
882a39c1 1541 int rc;
b64eb60f 1542
adec5584
MW
1543 if (tvec_readword(tv, &d, 0, ";", "signed integer"))
1544 { rc = -1; goto end; }
c81c35df
MW
1545 if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1546 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1547 rc = 0;
1548end:
b64eb60f 1549 dstr_destroy(&d);
882a39c1 1550 return (rc);
b64eb60f
MW
1551}
1552
882a39c1
MW
1553static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
1554 struct tvec_state *tv)
b64eb60f
MW
1555{
1556 dstr d = DSTR_INIT;
882a39c1 1557 int rc;
b64eb60f 1558
adec5584
MW
1559 if (tvec_readword(tv, &d, 0, ";", "unsigned integer"))
1560 { rc = -1; goto end; }
c81c35df
MW
1561 if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1562 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1563 rc = 0;
1564end:
b64eb60f 1565 dstr_destroy(&d);
882a39c1 1566 return (rc);
b64eb60f
MW
1567}
1568
c81c35df
MW
1569/* --- @dump_int@, @dump_uint@ --- *
1570 *
1571 * Arguments: @const union tvec_regval *rv@ = register value
1572 * @const struct tvec_regdef *rd@ = register definition
1573 * @unsigned style@ = output style (@TVSF_...@)
1574 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1575 *
1576 * Returns: ---
1577 *
1578 * Use: Dump a register value to the format output.
1579 *
1580 * Integer values are dumped in decimal and, unless compact
1581 * output is requested, hex, and maybe a character, as a
1582 * comment.
1583 */
1584
b64eb60f
MW
1585static void dump_int(const union tvec_regval *rv,
1586 const struct tvec_regdef *rd,
e63124bc
MW
1587 unsigned style,
1588 const struct gprintf_ops *gops, void *go)
b64eb60f 1589{
5c0f2e08 1590 if (style&TVSF_RAW) gprintf(gops, go, "int:");
e63124bc 1591 gprintf(gops, go, "%ld", rv->i);
5c0f2e08 1592 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
3efcfd2d
MW
1593 gprintf(gops, go, " ; = ");
1594 format_signed_hex(gops, go, rv->i);
1595 maybe_format_signed_char(gops, go, rv->i);
b64eb60f
MW
1596 }
1597}
1598
1599static void dump_uint(const union tvec_regval *rv,
1600 const struct tvec_regdef *rd,
e63124bc
MW
1601 unsigned style,
1602 const struct gprintf_ops *gops, void *go)
b64eb60f 1603{
5c0f2e08 1604 if (style&TVSF_RAW) gprintf(gops, go, "uint:");
e63124bc 1605 gprintf(gops, go, "%lu", rv->u);
5c0f2e08 1606 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
3efcfd2d
MW
1607 gprintf(gops, go, " ; = ");
1608 format_unsigned_hex(gops, go, rv->u);
1609 maybe_format_unsigned_char(gops, go, rv->u);
e63124bc 1610 }
b64eb60f
MW
1611}
1612
c81c35df 1613/* Integer type definitions. */
b64eb60f 1614const struct tvec_regty tvty_int = {
3efcfd2d 1615 init_int, trivial_release, eq_int,
b64eb60f
MW
1616 tobuf_int, frombuf_int,
1617 parse_int, dump_int
1618};
c81c35df
MW
1619const struct tvec_regty tvty_uint = {
1620 init_uint, trivial_release, eq_uint,
1621 tobuf_uint, frombuf_uint,
1622 parse_uint, dump_uint
1623};
b64eb60f 1624
c81c35df 1625/* Predefined integer ranges. */
b64eb60f 1626const struct tvec_irange
b1a20bee
MW
1627 tvrange_schar = { SCHAR_MIN, SCHAR_MAX, 0, 0 },
1628 tvrange_short = { SHRT_MIN, SHRT_MAX, 0, 0 },
1629 tvrange_int = { INT_MIN, INT_MAX, 0, 0 },
1630 tvrange_long = { LONG_MIN, LONG_MAX, 0, 0 },
1631 tvrange_sbyte = { -128, 127, 0, 0 },
1632 tvrange_i16 = { -32768, +32767, 0, 0 },
1633 tvrange_i32 = { -2147483648, 2147483647, 0, 0 };
b64eb60f 1634const struct tvec_urange
b1a20bee
MW
1635 tvrange_uchar = { 0, UCHAR_MAX, 0, 0 },
1636 tvrange_ushort = { 0, USHRT_MAX, 0, 0 },
1637 tvrange_uint = { 0, UINT_MAX, 0, 0 },
1638 tvrange_ulong = { 0, ULONG_MAX, 0, 0 },
1639 tvrange_size = { 0, (size_t)-1, 0, 0 },
1640 tvrange_byte = { 0, 255, 0, 0 },
1641 tvrange_u16 = { 0, 65535, 0, 0 },
1642 tvrange_u32 = { 0, 4294967295, 0, 0 };
b64eb60f 1643
67b5031e
MW
1644/* --- @tvec_claimeq_int@ --- *
1645 *
1646 * Arguments: @struct tvec_state *tv@ = test-vector state
1647 * @long i0, i1@ = two signed integers
1648 * @const char *file@, @unsigned @lno@ = calling file and line
1649 * @const char *expr@ = the expression to quote on failure
1650 *
1651 * Returns: Nonzero if @i0@ and @i1@ are equal, otherwise zero.
1652 *
1653 * Use: Check that values of @i0@ and @i1@ are equal. As for
1654 * @tvec_claim@ above, a test case is automatically begun and
1655 * ended if none is already underway. If the values are
1656 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1657 * mismatched values are dumped: @i0@ is printed as the output
1658 * value and @i1@ is printed as the input reference.
1659 */
1660
b64eb60f
MW
1661int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
1662 const char *file, unsigned lno, const char *expr)
1663{
3efcfd2d 1664 tv->out[0].v.i = i0; tv->in[0].v.i = i1;
b64eb60f
MW
1665 return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
1666}
1667
67b5031e
MW
1668/* --- @tvec_claimeq_uint@ --- *
1669 *
1670 * Arguments: @struct tvec_state *tv@ = test-vector state
1671 * @unsigned long u0, u1@ = two unsigned integers
1672 * @const char *file@, @unsigned @lno@ = calling file and line
1673 * @const char *expr@ = the expression to quote on failure
1674 *
1675 * Returns: Nonzero if @u0@ and @u1@ are equal, otherwise zero.
1676 *
1677 * Use: Check that values of @u0@ and @u1@ are equal. As for
1678 * @tvec_claim@ above, a test case is automatically begun and
1679 * ended if none is already underway. If the values are
1680 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1681 * mismatched values are dumped: @u0@ is printed as the output
1682 * value and @u1@ is printed as the input reference.
1683 */
1684
b64eb60f
MW
1685int tvec_claimeq_uint(struct tvec_state *tv,
1686 unsigned long u0, unsigned long u1,
1687 const char *file, unsigned lno, const char *expr)
1688{
3efcfd2d 1689 tv->out[0].v.u = u0; tv->in[0].v.u = u1;
b64eb60f
MW
1690 return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
1691}
1692
c4ccbbf9
MW
1693/*----- Size type ---------------------------------------------------------*/
1694
1695/* --- @parse_size@ --- *
1696 *
1697 * Arguments: @union tvec_regval *rv@ = register value
1698 * @const struct tvec_regdef *rd@ = register definition
1699 * @struct tvec_state *tv@ = test-vector state
1700 *
1701 * Returns: Zero on success, %$-1$% on error.
1702 *
1703 * Use: Parse a register value from an input file.
1704 *
1705 * The input format for a size value consists of an unsigned
1706 * integer followed by an optional unit specifier consisting of
1707 * an SI unit prefix and (optionally) the letter `B'. */
1708
1709static int parse_size(union tvec_regval *rv, const struct tvec_regdef *rd,
1710 struct tvec_state *tv)
1711{
1712 unsigned long sz;
1713 int rc;
1714
1715 if (parse_szint(tv, &sz, ";", "size")) { rc = -1; goto end; }
1716 if (check_unsigned_range(sz, rd->arg.p, tv, "size")) { rc = -1; goto end; }
1717 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1718 rv->u = sz; rc = 0;
1719end:
1720 return (rc);
1721}
1722
1723/* --- @dump_size@ --- *
1724 *
1725 * Arguments: @const union tvec_regval *rv@ = register value
1726 * @const struct tvec_regdef *rd@ = register definition
1727 * @unsigned style@ = output style (@TVSF_...@)
1728 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1729 *
1730 * Returns: ---
1731 *
1732 * Use: Dump a register value to the format output.
1733 *
1734 * Size values are dumped with a unit specifier, with a unit
1735 * prefox only if the size is an exact multiple of the relevant
1736 * power of two. Unless compact style is requested, the plain
1737 * decimal and hex representations of the value are also
1738 * printed.
1739 */
1740
1741static void dump_size(const union tvec_regval *rv,
1742 const struct tvec_regdef *rd,
1743 unsigned style,
1744 const struct gprintf_ops *gops, void *go)
1745{
5c0f2e08 1746 if (style&TVSF_RAW) gprintf(gops, go, "size:");
c4ccbbf9 1747 format_size(gops, go, rv->u, style);
5c0f2e08 1748 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
c4ccbbf9
MW
1749 gprintf(gops, go, " ; = %lu", (unsigned long)rv->u);
1750 gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->u);
1751 maybe_format_unsigned_char(gops, go, rv->u);
1752 }
1753}
1754
1755/* Size type definitions. */
1756const struct tvec_regty tvty_size = {
1757 init_uint, trivial_release, eq_uint,
1758 tobuf_uint, frombuf_uint,
1759 parse_size, dump_size
1760};
1761
1762/* --- @tvec_claimeq_size@ --- *
1763 *
1764 * Arguments: @struct tvec_state *tv@ = test-vector state
1765 * @unsigned long sz0, sz1@ = two sizes
1766 * @const char *file@, @unsigned @lno@ = calling file and line
1767 * @const char *expr@ = the expression to quote on failure
1768 *
1769 * Returns: Nonzero if @sz0@ and @sz1@ are equal, otherwise zero.
1770 *
1771 * Use: Check that values of @u0@ and @u1@ are equal. As for
1772 * @tvec_claim@ above, a test case is automatically begun and
1773 * ended if none is already underway. If the values are
1774 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1775 * mismatched values are dumped: @u0@ is printed as the output
1776 * value and @u1@ is printed as the input reference.
1777 */
1778
1779int tvec_claimeq_size(struct tvec_state *tv,
1780 unsigned long sz0, unsigned long sz1,
1781 const char *file, unsigned lno, const char *expr)
1782{
1783 tv->out[0].v.u = sz0; tv->in[0].v.u = sz1;
1784 return (tvec_claimeq(tv, &tvty_size, 0, file, lno, expr));
1785}
1786
3efcfd2d 1787/*----- Floating-point type -----------------------------------------------*/
e63124bc 1788
814e42ff 1789/* --- @int_float@ --- *
c81c35df
MW
1790 *
1791 * Arguments: @union tvec_regval *rv@ = register value
1792 * @const struct tvec_regdef *rd@ = register definition
1793 *
1794 * Returns: ---
1795 *
1796 * Use: Initialize a register value.
1797 *
1798 * Floating-point values are initialized to zero.
1799 */
1800
e63124bc
MW
1801static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
1802 { rv->f = 0.0; }
e63124bc 1803
c81c35df
MW
1804/* --- @eq_float@ --- *
1805 *
1806 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1807 * @const struct tvec_regdef *rd@ = register definition
1808 *
1809 * Returns: Nonzero if the values are equal, zero if unequal
1810 *
1811 * Use: Compare register values for equality.
1812 *
1813 * Floating-point values may be considered equal if their
1814 * absolute or relative difference is sufficiently small, as
1815 * described in the register definition.
1816 */
1817
e63124bc
MW
1818static int eq_float(const union tvec_regval *rv0,
1819 const union tvec_regval *rv1,
1820 const struct tvec_regdef *rd)
1821 { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }
1822
c81c35df
MW
1823/* --- @tobuf_float@ --- *
1824 *
1825 * Arguments: @buf *b@ = buffer
1826 * @const union tvec_regval *rv@ = register value
1827 * @const struct tvec_regdef *rd@ = register definition
1828 *
1829 * Returns: Zero on success, %$-1$% on failure.
1830 *
1831 * Use: Serialize a register value to a buffer.
1832 *
1833 * Floating-point values are serialized as little-endian
1834 * IEEE 754 Binary64.
1835 */
1836
e63124bc
MW
1837static int tobuf_float(buf *b, const union tvec_regval *rv,
1838 const struct tvec_regdef *rd)
1839 { return (buf_putf64l(b, rv->f)); }
c81c35df
MW
1840
1841/* --- @frombuf_float@ --- *
1842 *
1843 * Arguments: @buf *b@ = buffer
1844 * @union tvec_regval *rv@ = register value
1845 * @const struct tvec_regdef *rd@ = register definition
1846 *
1847 * Returns: Zero on success, %$-1$% on failure.
1848 *
1849 * Use: Deserialize a register value from a buffer.
1850 *
1851 * Floating-point values are serialized as little-endian
1852 * IEEE 754 Binary64.
1853 */
1854
e63124bc 1855static int frombuf_float(buf *b, union tvec_regval *rv,
b1a20bee
MW
1856 const struct tvec_regdef *rd)
1857{
1858 double t;
1859 int rc;
1860
1861 rc = buf_getf64l(b, &t); if (!rc) rv->f = t;
1862 return (rc);
1863}
e63124bc 1864
c81c35df
MW
1865/* --- @parse_float@ --- *
1866 *
1867 * Arguments: @union tvec_regval *rv@ = register value
1868 * @const struct tvec_regdef *rd@ = register definition
1869 * @struct tvec_state *tv@ = test-vector state
1870 *
1871 * Returns: Zero on success, %$-1$% on error.
1872 *
1873 * Use: Parse a register value from an input file.
1874 *
1875 * Floating-point values are either NaN (%|#nan|%, if supported
1876 * by the platform); positive or negative infinity (%|#inf|%,
1877 * %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
1878 * or %|#-inf|% (preferring the latter), if supported by the
1879 * platform); or a number in strtod(3) syntax.
1880 */
1881
e63124bc
MW
1882static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
1883 struct tvec_state *tv)
1884{
1885 dstr d = DSTR_INIT;
1886 int rc;
1887
adec5584 1888 if (tvec_readword(tv, &d, 0, ";", "floating-point number"))
e63124bc 1889 { rc = -1; goto end; }
814e42ff
MW
1890 if (parse_floating(&rv->f, 0, d.buf, rd->arg.p, tv))
1891 { rc = -1; goto end; }
c81c35df 1892 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
e63124bc
MW
1893 rc = 0;
1894end:
1895 dstr_destroy(&d);
1896 return (rc);
1897}
1898
c81c35df
MW
1899/* --- @dump_float@ --- *
1900 *
1901 * Arguments: @const union tvec_regval *rv@ = register value
1902 * @const struct tvec_regdef *rd@ = register definition
1903 * @unsigned style@ = output style (@TVSF_...@)
1904 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1905 *
1906 * Returns: ---
1907 *
1908 * Use: Dump a register value to the format output.
1909 *
1910 * Floating-point values are dumped in decimal or as a special
1911 * token beginning with `%|#|%'. Some effort is taken to ensure
1912 * that the output is sufficient to uniquely identify the
1913 * original value, but, honestly, C makes this really hard.
1914 */
1915
e63124bc
MW
1916static void dump_float(const union tvec_regval *rv,
1917 const struct tvec_regdef *rd,
1918 unsigned style,
1919 const struct gprintf_ops *gops, void *go)
5c0f2e08
MW
1920{
1921 if (style&TVSF_RAW) gprintf(gops, go, "float:");
1922 format_floating(gops, go, rv->f);
1923}
e63124bc 1924
c81c35df 1925/* Floating-point type definition. */
e63124bc 1926const struct tvec_regty tvty_float = {
3efcfd2d 1927 init_float, trivial_release, eq_float,
e63124bc
MW
1928 tobuf_float, frombuf_float,
1929 parse_float, dump_float
1930};
1931
c81c35df
MW
1932/* Predefined floating-point ranges. */
1933const struct tvec_floatinfo
dc6eea4e
MW
1934 tvflt_float = { TVFF_RELDELTA | TVFF_INFOK | TVFF_NANOK,
1935 -FLT_MAX, FLT_MAX, FLT_EPSILON/2 },
b1a20bee
MW
1936 tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
1937 -DBL_MAX, DBL_MAX, 0.0 },
c81c35df
MW
1938 tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
1939 tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };
1940
67b5031e
MW
1941/* --- @tvec_claimeqish_float@ --- *
1942 *
1943 * Arguments: @struct tvec_state *tv@ = test-vector state
1944 * @double f0, f1@ = two floating-point numbers
1945 * @unsigned f@ = flags (@TVFF_...@)
1946 * @double delta@ = maximum tolerable difference
1947 * @const char *file@, @unsigned @lno@ = calling file and line
1948 * @const char *expr@ = the expression to quote on failure
1949 *
c4ccbbf9 1950 * Returns: Nonzero if @f0@ and @f1@ are sufficiently close, otherwise
67b5031e
MW
1951 * zero.
1952 *
1953 * Use: Check that values of @f0@ and @f1@ are sufficiently close.
1954 * As for @tvec_claim@ above, a test case is automatically begun
1955 * and ended if none is already underway. If the values are
1956 * too far apart, then @tvec_fail@ is called, quoting @expr@,
1957 * and the mismatched values are dumped: @f0@ is printed as the
1958 * output value and @f1@ is printed as the input reference.
1959 *
1960 * The details for the comparison are as follows.
1961 *
1962 * * A NaN value matches any other NaN, and nothing else.
1963 *
1964 * * An infinity matches another infinity of the same sign,
1965 * and nothing else.
1966 *
1967 * * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
1968 * representable number matches only itself: in particular,
1969 * positive and negative zero are considered distinct.
1970 * (This allows tests to check that they land on the correct
1971 * side of branch cuts, for example.)
1972 *
1973 * * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
1974 * %$y$% when %$|x - y| < \delta$%.
1975 *
1976 * * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
c4ccbbf9
MW
1977 * %$y$% when %$|1 - x/y| < \delta$%. (Note that this
1978 * criterion is asymmetric. Write %$x \approx_\delta y$%
1979 * if and only if %$|1 - x/y < \delta$%. Then, for example,
1980 * if %$y/(1 + \delta) < x < y (1 - \delta)$%, then
1981 * %$x \approx_\delta y$%, but %$y \not\approx_\delta x$%.)
67b5031e
MW
1982 */
1983
e63124bc
MW
1984int tvec_claimeqish_float(struct tvec_state *tv,
1985 double f0, double f1, unsigned f, double delta,
1986 const char *file, unsigned lno,
1987 const char *expr)
1988{
1989 struct tvec_floatinfo fi;
1990 union tvec_misc arg;
1991
1992 fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
3efcfd2d 1993 tv->out[0].v.f = f0; tv->in[0].v.f = f1;
e63124bc
MW
1994 return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
1995}
e63124bc 1996
67b5031e
MW
1997/* --- @tvec_claimeq_float@ --- *
1998 *
1999 * Arguments: @struct tvec_state *tv@ = test-vector state
2000 * @double f0, f1@ = two floating-point numbers
2001 * @const char *file@, @unsigned @lno@ = calling file and line
2002 * @const char *expr@ = the expression to quote on failure
2003 *
c4ccbbf9 2004 * Returns: Nonzero if @f0@ and @f1@ are identical, otherwise zero.
67b5031e
MW
2005 *
2006 * Use: Check that values of @f0@ and @f1@ are identical. The
2007 * function is exactly equivalent to @tvec_claimeqish_float@
2008 * with @f == TVFF_EXACT@.
2009 */
2010
2011int tvec_claimeq_float(struct tvec_state *tv,
2012 double f0, double f1,
2013 const char *file, unsigned lno,
2014 const char *expr)
2015{
2016 return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
2017 file, lno, expr));
2018}
2019
814e42ff
MW
2020/*----- Durations ---------------------------------------------------------*/
2021
2022/* A duration is a floating-point number of seconds. Initialization and
2023 * teardown, equality comparison, and serialization are as for floating-point
2024 * values.
2025 */
2026
2027static const struct duration_unit {
2028 const char *unit;
2029 double scale;
2030 unsigned f;
2031#define DUF_PREFER 1u
2032} duration_units[] = {
2033 { "Ys", 1e+24, 0 },
2034 { "Zs", 1e+21, 0 },
2035 { "Es", 1e+18, 0 },
2036 { "Ps", 1e+15, 0 },
2037 { "Ts", 1e+12, 0 },
2038 { "Gs", 1e+9, 0 },
2039 { "Ms", 1e+6, 0 },
2040 { "ks", 1e+3, 0 },
2041 { "hs", 1e+2, 0 },
2042 { "das", 1e+1, 0 },
2043
2044 { "yr", 31557600.0, DUF_PREFER },
2045 { "y", 31557600.0, 0 },
2046 { "day", 86400.0, DUF_PREFER },
2047 { "dy", 86400.0, 0 },
2048 { "d", 86400.0, 0 },
2049 { "hr", 3600.0, DUF_PREFER },
2050 { "hour", 3600.0, 0 },
2051 { "h", 3600.0, 0 },
2052 { "min", 60.0, DUF_PREFER },
2053 { "m", 60.0, 0 },
2054
2055 { "s", 1.0, DUF_PREFER },
2056 { "sec", 1.0, 0 },
2057
2058 { "ds", 1e-1, 0 },
2059 { "cs", 1e-2, 0 },
2060 { "ms", 1e-3, DUF_PREFER },
2061 { "µs", 1e-6, DUF_PREFER },
2062 { "ns", 1e-9, DUF_PREFER },
2063 { "ps", 1e-12, DUF_PREFER },
2064 { "fs", 1e-15, DUF_PREFER },
2065 { "as", 1e-18, DUF_PREFER },
2066 { "zs", 1e-21, DUF_PREFER },
2067 { "ys", 1e-24, DUF_PREFER },
2068
2069 { 0 }
2070};
2071
13ee7406
MW
2072/* --- @tvec_parsedurunit@ --- *
2073 *
2074 * Arguments: @double *scale_out@ = where to leave the scale
2075 * @const char **p_inout@ = input unit string, updated
2076 *
2077 * Returns: Zero on success, %$-1$% on error.
2078 *
2079 * Use: If @*p_inout@ begins with a unit string followed by the end
2080 * of the string or some non-alphanumeric character, then store
2081 * the corresponding scale factor in @*scale_out@, advance
2082 * @*p_inout@ past the unit string, and return zero. Otherwise,
2083 * return %$-1$%.
2084 */
2085
2086int tvec_parsedurunit(double *scale_out, const char **p_inout)
2087{
2088 const char *p = *p_inout, *q;
2089 const struct duration_unit *u;
2090 size_t n;
2091
2092 while (ISSPACE(*p)) p++;
2093 for (q = p; *q && ISALNUM(*q); q++);
2094 n = q - p; if (!n) { *scale_out = 1.0; return (0); }
2095
2096 for (u = duration_units; u->unit; u++)
2097 if (STRNCMP(p, ==, u->unit, n) && !u->unit[n])
2098 { *scale_out = u->scale; *p_inout = q; return (0); }
2099 return (-1);
2100}
2101
814e42ff
MW
2102/* --- @parse_duration@ --- *
2103 *
2104 * Arguments: @union tvec_regval *rv@ = register value
2105 * @const struct tvec_regdef *rd@ = register definition
2106 * @struct tvec_state *tv@ = test-vector state
2107 *
2108 * Returns: Zero on success, %$-1$% on error.
2109 *
2110 * Use: Parse a register value from an input file.
2111 *
2112 * Duration values are finite nonnegative floating-point
2113 * numbers in @strtod@ syntax, optionally followed by a unit .
2114 */
2115
2116static int parse_duration(union tvec_regval *rv,
2117 const struct tvec_regdef *rd,
2118 struct tvec_state *tv)
2119{
2120 const struct duration_unit *u;
2121 const char *q;
adec5584 2122 dstr d = DSTR_INIT;
814e42ff
MW
2123 double t;
2124 int rc;
2125
adec5584 2126 if (tvec_readword(tv, &d, 0, ";", "duration")) { rc = -1; goto end; }
814e42ff
MW
2127 if (parse_floating(&t, &q, d.buf,
2128 rd->arg.p ? rd->arg.p : &tvflt_nonneg, tv))
2129 { rc = -1; goto end; }
2130
adec5584
MW
2131 if (!*q) tvec_readword(tv, &d, &q, ";", 0);
2132 if (*q) {
814e42ff
MW
2133 for (u = duration_units; u->unit; u++)
2134 if (STRCMP(q, ==, u->unit)) { t *= u->scale; goto found_unit; }
2135 rc = tvec_syntax(tv, *q, "end-of-line"); goto end;
2136 found_unit:;
2137 }
2138
2139 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2140 rv->f = t; rc = 0;
2141end:
2142 dstr_destroy(&d);
2143 return (rc);
2144}
2145
2146/* --- @dump_duration@ --- *
2147 *
2148 * Arguments: @const union tvec_regval *rv@ = register value
2149 * @const struct tvec_regdef *rd@ = register definition
2150 * @unsigned style@ = output style (@TVSF_...@)
2151 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2152 *
2153 * Returns: ---
2154 *
2155 * Use: Dump a register value to the format output.
2156 *
2157 * Durations are dumped as a human-palatable scaled value with
2158 * unit, and, if compact style is not requested, as a raw number
2159 * of seconds at full precision as a comment.
2160 */
2161
2162static void dump_duration(const union tvec_regval *rv,
2163 const struct tvec_regdef *rd,
2164 unsigned style,
2165 const struct gprintf_ops *gops, void *go)
2166{
2167 const struct duration_unit *u;
2168 double t = rv->f;
2169
5c0f2e08
MW
2170 if (style&TVSF_RAW) {
2171 gprintf(gops, go, "duration:");
814e42ff 2172 format_floating(gops, go, rv->f);
5c0f2e08
MW
2173 gprintf(gops, go, "s");
2174 } else {
2175 if (!t) u = 0;
2176 else {
2177 for (u = duration_units; u->scale > t && u[1].unit; u++);
2178 t /= u->scale;
2179 }
2180 gprintf(gops, go, "%.4g %s", t, u ? u->unit : "s");
2181
2182 if (!(style&TVSF_COMPACT)) {
2183 gprintf(gops, go, "; = ");
2184 format_floating(gops, go, rv->f);
2185 gprintf(gops, go, " s");
2186 }
814e42ff
MW
2187 }
2188}
2189
2190/* Duration type definition. */
2191const struct tvec_regty tvty_duration = {
2192 init_float, trivial_release, eq_float,
2193 tobuf_float, frombuf_float,
2194 parse_duration, dump_duration
2195};
2196
c4ccbbf9
MW
2197/* --- @tvec_claimeqish_duration@ --- *
2198 *
2199 * Arguments: @struct tvec_state *tv@ = test-vector state
b1a20bee 2200 * @double t0, t1@ = two durations
c4ccbbf9
MW
2201 * @unsigned f@ = flags (@TVFF_...@)
2202 * @double delta@ = maximum tolerable difference
2203 * @const char *file@, @unsigned @lno@ = calling file and line
2204 * @const char *expr@ = the expression to quote on failure
2205 *
2206 * Returns: Nonzero if @t0@ and @t1@ are sufficiently close, otherwise
2207 * zero.
2208 *
2209 * Use: Check that values of @t0@ and @t1@ are sufficiently close.
2210 * This is essentially the same as @tvec_claimeqish_float@, only
2211 * it dumps the values as durations on a mismatch.
2212 */
2213
2214int tvec_claimeqish_duration(struct tvec_state *tv,
2215 double t0, double t1, unsigned f, double delta,
2216 const char *file, unsigned lno,
2217 const char *expr)
2218{
2219 struct tvec_floatinfo fi;
2220 union tvec_misc arg;
2221
2222 fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
2223 tv->out[0].v.f = t0; tv->in[0].v.f = t1;
2224 return (tvec_claimeq(tv, &tvty_duration, &arg, file, lno, expr));
2225}
2226
2227/* --- @tvec_claimeq_duration@ --- *
2228 *
2229 * Arguments: @struct tvec_state *tv@ = test-vector state
2230 * @double t0, t1@ = two durations
2231 * @const char *file@, @unsigned @lno@ = calling file and line
2232 * @const char *expr@ = the expression to quote on failure
2233 *
2234 * Returns: Nonzero if @t0@ and @t1@ are identical, otherwise zero.
2235 *
2236 * Use: Check that values of @t0@ and @t1@ are identical. The
2237 * function is exactly equivalent to @tvec_claimeqish_duration@
2238 * with @f == TVFF_EXACT@.
2239 */
2240
2241int tvec_claimeq_duration(struct tvec_state *tv,
2242 double t0, double t1,
2243 const char *file, unsigned lno,
2244 const char *expr)
2245{
2246 return (tvec_claimeqish_duration(tv, t0, t1, TVFF_EXACT, 0.0,
2247 file, lno, expr));
2248}
2249
b64eb60f
MW
2250/*----- Enumerations ------------------------------------------------------*/
2251
c81c35df
MW
2252/* --- @init_tenum@ --- *
2253 *
2254 * Arguments: @union tvec_regval *rv@ = register value
2255 * @const struct tvec_regdef *rd@ = register definition
2256 *
2257 * Returns: ---
2258 *
2259 * Use: Initialize a register value.
2260 *
2261 * Integer and floating-point enumeration values are initialized
2262 * as their underlying representations. Pointer enumerations
2263 * are initialized to %|#nil|%.
2264 */
2265
3efcfd2d
MW
2266#define init_ienum init_int
2267#define init_uenum init_uint
2268#define init_fenum init_float
c81c35df 2269
3efcfd2d
MW
2270static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
2271 { rv->p = 0; }
b64eb60f 2272
c81c35df
MW
2273/* --- @eq_tenum@ --- *
2274 *
2275 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
2276 * @const struct tvec_regdef *rd@ = register definition
2277 *
2278 * Returns: Nonzero if the values are equal, zero if unequal
2279 *
2280 * Use: Compare register values for equality.
2281 *
2282 * Integer and floating-point enumeration values are compared as
2283 * their underlying representations; in particular, floating-
2284 * point enumerations may compare equal if their absolute or
2285 * relative difference is sufficiently small. Pointer
2286 * enumerations are compared as pointers.
2287 */
2288
3efcfd2d
MW
2289#define eq_ienum eq_int
2290#define eq_uenum eq_uint
c81c35df 2291
3efcfd2d
MW
2292static int eq_fenum(const union tvec_regval *rv0,
2293 const union tvec_regval *rv1,
2294 const struct tvec_regdef *rd)
b64eb60f 2295{
3efcfd2d
MW
2296 const struct tvec_fenuminfo *ei = rd->arg.p;
2297 return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
b64eb60f 2298}
c81c35df 2299
3efcfd2d
MW
2300static int eq_penum(const union tvec_regval *rv0,
2301 const union tvec_regval *rv1,
2302 const struct tvec_regdef *rd)
2303 { return (rv0->p == rv1->p); }
b64eb60f 2304
c81c35df
MW
2305/* --- @tobuf_tenum@ --- *
2306 *
2307 * Arguments: @buf *b@ = buffer
2308 * @const union tvec_regval *rv@ = register value
2309 * @const struct tvec_regdef *rd@ = register definition
2310 *
2311 * Returns: Zero on success, %$-1$% on failure.
2312 *
2313 * Use: Serialize a register value to a buffer.
2314 *
2315 * Integer and floating-point enumeration values are serialized
2316 * as their underlying representations. Pointer enumerations
2317 * are serialized as the signed integer index into the
2318 * association table; %|#nil|% serializes as %$-1$%, and
2319 * unrecognized pointers cause failure.
2320 */
2321
3efcfd2d
MW
2322#define tobuf_ienum tobuf_int
2323#define tobuf_uenum tobuf_uint
2324#define tobuf_fenum tobuf_float
c81c35df 2325
3efcfd2d
MW
2326static int tobuf_penum(buf *b, const union tvec_regval *rv,
2327 const struct tvec_regdef *rd)
b64eb60f 2328{
3efcfd2d 2329 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
2330 const struct tvec_passoc *pa;
2331 long i;
b64eb60f 2332
3efcfd2d
MW
2333 for (pa = pei->av, i = 0; pa->tag; pa++, i++)
2334 if (pa->p == rv->p) goto found;
2335 if (!rv->p) i = -1;
2336 else return (-1);
2337found:
2338 return (signed_to_buf(b, i));
b64eb60f
MW
2339}
2340
c81c35df
MW
2341/* --- @frombuf_tenum@ --- *
2342 *
2343 * Arguments: @buf *b@ = buffer
2344 * @union tvec_regval *rv@ = register value
2345 * @const struct tvec_regdef *rd@ = register definition
2346 *
2347 * Returns: Zero on success, %$-1$% on failure.
2348 *
2349 * Use: Deserialize a register value from a buffer.
2350 *
2351 * Integer and floating-point enumeration values are serialized
2352 * as their underlying representations. Pointer enumerations
2353 * are serialized as the signed integer index into the
2354 * association table; %|#nil|% serializes as %$-1$%; out-of-
2355 * range indices cause failure.
2356 */
2357
3efcfd2d
MW
2358#define frombuf_ienum frombuf_int
2359#define frombuf_uenum frombuf_uint
2360#define frombuf_fenum frombuf_float
2361static int frombuf_penum(buf *b, union tvec_regval *rv,
b64eb60f
MW
2362 const struct tvec_regdef *rd)
2363{
3efcfd2d 2364 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
2365 const struct tvec_passoc *pa;
2366 long i, n;
b64eb60f 2367
3efcfd2d
MW
2368 for (pa = pei->av, n = 0; pa->tag; pa++, n++);
2369 if (signed_from_buf(b, &i)) return (-1);
b1a20bee 2370 if (0 <= i && i < n) rv->p = UNCONST(void, pei->av[i].p);
3efcfd2d 2371 else if (i == -1) rv->p = 0;
adec5584 2372 else { buf_break(b); return (-1); }
3efcfd2d 2373 return (0);
b64eb60f
MW
2374}
2375
c81c35df
MW
2376/* --- @parse_tenum@ --- *
2377 *
2378 * Arguments: @union tvec_regval *rv@ = register value
2379 * @const struct tvec_regdef *rd@ = register definition
2380 * @struct tvec_state *tv@ = test-vector state
2381 *
2382 * Returns: Zero on success, %$-1$% on error.
2383 *
2384 * Use: Parse a register value from an input file.
2385 *
2386 * An enumerated value may be given by name or as a literal
2387 * value. For enumerations based on numeric types, the literal
2388 * values can be written in the same syntax as the underlying
2389 * values. For enumerations based on pointers, the only
2390 * permitted literal is %|#nil|%, which denotes a null pointer.
2391 */
2392
3efcfd2d
MW
2393#define DEFPARSE_ENUM(tag_, ty, slot) \
2394 static int parse_##slot##enum(union tvec_regval *rv, \
2395 const struct tvec_regdef *rd, \
2396 struct tvec_state *tv) \
2397 { \
2398 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
2399 const struct tvec_##slot##assoc *a; \
2400 dstr d = DSTR_INIT; \
2401 int rc; \
2402 \
5c0f2e08
MW
2403 if (tvec_readword(tv, &d, 0, \
2404 ";", "%s tag or " LITSTR_##tag_, ei->name)) \
3efcfd2d
MW
2405 { rc = -1; goto end; } \
2406 for (a = ei->av; a->tag; a++) \
2407 if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; } \
2408 MISSING_##tag_ \
2409 done: \
2410 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } \
2411 rc = 0; \
2412 end: \
2413 dstr_destroy(&d); \
2414 return (rc); \
2415 }
b64eb60f 2416
3efcfd2d
MW
2417#define LITSTR_INT "literal signed integer"
2418#define FOUND_INT rv->i = a->i;
2419#define MISSING_INT if (parse_signed(&rv->i, d.buf, ei->ir, tv)) \
2420 { rc = -1; goto end; }
2421
2422#define LITSTR_UINT "literal unsigned integer"
2423#define FOUND_UINT rv->u = a->u;
2424#define MISSING_UINT if (parse_unsigned(&rv->u, d.buf, ei->ur, tv)) \
2425 { rc = -1; goto end; }
2426
2427#define LITSTR_FLT "literal floating-point number, " \
2428 "`#-inf', `#+inf', or `#nan'"
2429#define FOUND_FLT rv->f = a->f;
814e42ff 2430#define MISSING_FLT if (parse_floating(&rv->f, 0, d.buf, ei->fi, tv)) \
3efcfd2d
MW
2431 { rc = -1; goto end; }
2432
2433#define LITSTR_PTR "`#nil'"
b1a20bee 2434#define FOUND_PTR rv->p = UNCONST(void, a->p);
3efcfd2d
MW
2435#define MISSING_PTR if (STRCMP(d.buf, ==, "#nil")) \
2436 rv->p = 0; \
2437 else { \
2438 tvec_error(tv, "unknown `%s' value `%s'", \
2439 ei->name, d.buf); \
2440 rc = -1; goto end; \
2441 }
2442
2443TVEC_MISCSLOTS(DEFPARSE_ENUM)
2444
2445#undef LITSTR_INT
2446#undef FOUND_INT
2447#undef MISSING_INT
2448
2449#undef LITSTR_UINT
2450#undef FOUND_UINT
2451#undef MISSING_UINT
2452
2453#undef LITSTR_FLT
2454#undef FOUND_FLT
2455#undef MISSING_FLT
2456
2457#undef LITSTR_PTR
2458#undef FOUND_PTR
2459#undef MISSING_PTR
2460
2461#undef DEFPARSE_ENUM
2462
c81c35df
MW
2463/* --- @dump_tenum@ --- *
2464 *
2465 * Arguments: @const union tvec_regval *rv@ = register value
2466 * @const struct tvec_regdef *rd@ = register definition
2467 * @unsigned style@ = output style (@TVSF_...@)
2468 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2469 *
2470 * Returns: ---
2471 *
2472 * Use: Dump a register value to the format output.
2473 *
2474 * Enumeration values are dumped as their symbolic names, if
2475 * possible, with the underlying values provided as a comment
2476 * unless compact output is requested, as for the underlying
2477 * representation. A null pointer is printed as %|#nil|%;
2478 * non-null pointers are printed as %|#<TYPE PTR>|%, with the
2479 * enumeration TYPE and the raw pointer PTR printed with the
2480 * system's %|%p|% format specifier.
2481 */
2482
2483
3efcfd2d
MW
2484#define DEFDUMP_ENUM(tag_, ty, slot) \
2485 static void dump_##slot##enum(const union tvec_regval *rv, \
2486 const struct tvec_regdef *rd, \
2487 unsigned style, \
2488 const struct gprintf_ops *gops, void *go) \
2489 { \
2490 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
2491 const struct tvec_##slot##assoc *a; \
2492 \
5c0f2e08 2493 if (style&TVSF_RAW) gprintf(gops, go, #slot "enum/%s:", ei->name); \
3efcfd2d
MW
2494 for (a = ei->av; a->tag; a++) \
2495 if (rv->slot == a->slot) { \
2496 gprintf(gops, go, "%s", a->tag); \
2497 if (style&TVSF_COMPACT) return; \
2498 gprintf(gops, go, " ; = "); break; \
2499 } \
2500 \
2501 PRINTRAW_##tag_ \
b64eb60f
MW
2502 }
2503
3efcfd2d 2504#define MAYBE_PRINT_EXTRA \
c81c35df 2505 if (style&TVSF_COMPACT) /* nothing to do */; \
3efcfd2d
MW
2506 else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; } \
2507 else if (1) { gprintf(gops, go, " = "); goto _extra; } \
2508 else _extra:
b64eb60f 2509
3efcfd2d
MW
2510#define PRINTRAW_INT gprintf(gops, go, "%ld", rv->i); \
2511 MAYBE_PRINT_EXTRA { \
2512 format_signed_hex(gops, go, rv->i); \
2513 maybe_format_signed_char(gops, go, rv->i); \
2514 }
b64eb60f 2515
3efcfd2d
MW
2516#define PRINTRAW_UINT gprintf(gops, go, "%lu", rv->u); \
2517 MAYBE_PRINT_EXTRA { \
2518 format_unsigned_hex(gops, go, rv->u); \
2519 maybe_format_unsigned_char(gops, go, rv->u); \
2520 }
2521
2522#define PRINTRAW_FLT format_floating(gops, go, rv->f);
2523
2524#define PRINTRAW_PTR if (!rv->p) gprintf(gops, go, "#nil"); \
e63124bc 2525 else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);
b64eb60f 2526
3efcfd2d 2527TVEC_MISCSLOTS(DEFDUMP_ENUM)
b64eb60f 2528
3efcfd2d
MW
2529#undef PRINTRAW_INT
2530#undef PRINTRAW_UINT
2531#undef PRINTRAW_FLT
2532#undef PRINTRAW_PTR
2533
2534#undef MAYBE_PRINT_EXTRA
2535#undef DEFDUMP_ENUM
2536
c81c35df 2537/* Enumeration type definitions. */
3efcfd2d
MW
2538#define DEFTY_ENUM(tag, ty, slot) \
2539 const struct tvec_regty tvty_##slot##enum = { \
2540 init_##slot##enum, trivial_release, eq_##slot##enum, \
2541 tobuf_##slot##enum, frombuf_##slot##enum, \
2542 parse_##slot##enum, dump_##slot##enum \
2543 };
2544TVEC_MISCSLOTS(DEFTY_ENUM)
2545#undef DEFTY_ENUM
b64eb60f 2546
c81c35df 2547/* Predefined enumeration types. */
e63124bc
MW
2548static const struct tvec_iassoc bool_assoc[] = {
2549 { "nil", 0 },
2550 { "false", 0 },
2551 { "f", 0 },
2552 { "no", 0 },
2553 { "n", 0 },
2554 { "off", 0 },
2555
2556 { "t", 1 },
2557 { "true", 1 },
2558 { "yes", 1 },
2559 { "y", 1 },
2560 { "on", 1 },
2561
20ba6b0b 2562 TVEC_ENDENUM
e63124bc
MW
2563};
2564
2565const struct tvec_ienuminfo tvenum_bool =
3efcfd2d 2566 { "bool", bool_assoc, &tvrange_int };
e63124bc 2567
20ba6b0b
MW
2568static const struct tvec_iassoc cmp_assoc[] = {
2569 { "<", -1 },
2570 { "less", -1 },
2571 { "lt", -1 },
2572
2573 { "=", 0 },
2574 { "equal", 0 },
2575 { "eq", 0 },
2576
2577 { ">", +1 },
2578 { "greater", +1 },
2579 { "gt", +1 },
2580
2581 TVEC_ENDENUM
2582};
2583
2584const struct tvec_ienuminfo tvenum_cmp =
2585 { "cmp", cmp_assoc, &tvrange_int };
2586
67b5031e
MW
2587/* --- @tvec_claimeq_tenum@ --- *
2588 *
2589 * Arguments: @struct tvec_state *tv@ = test-vector state
2590 * @const struct tvec_typeenuminfo *ei@ = enumeration type info
2591 * @ty t0, t1@ = two values
2592 * @const char *file@, @unsigned @lno@ = calling file and line
2593 * @const char *expr@ = the expression to quote on failure
2594 *
2595 * Returns: Nonzero if @t0@ and @t1@ are equal, otherwise zero.
2596 *
2597 * Use: Check that values of @t0@ and @t1@ are equal. As for
2598 * @tvec_claim@ above, a test case is automatically begun and
2599 * ended if none is already underway. If the values are
2600 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2601 * mismatched values are dumped: @t0@ is printed as the output
2602 * value and @t1@ is printed as the input reference.
2603 */
2604
b64eb60f 2605#define DEFCLAIM(tag, ty, slot) \
e63124bc
MW
2606 int tvec_claimeq_##slot##enum \
2607 (struct tvec_state *tv, \
2608 const struct tvec_##slot##enuminfo *ei, ty e0, ty e1, \
2609 const char *file, unsigned lno, const char *expr) \
b64eb60f
MW
2610 { \
2611 union tvec_misc arg; \
2612 \
b64eb60f 2613 arg.p = ei; \
3efcfd2d
MW
2614 tv->out[0].v.slot = GET_##tag(e0); \
2615 tv->in[0].v.slot = GET_##tag(e1); \
2616 return (tvec_claimeq(tv, &tvty_##slot##enum, &arg, \
2617 file, lno, expr)); \
b64eb60f
MW
2618 }
2619#define GET_INT(e) (e)
2620#define GET_UINT(e) (e)
e63124bc 2621#define GET_FLT(e) (e)
b1a20bee 2622#define GET_PTR(e) (UNCONST(void, (e)))
b64eb60f
MW
2623TVEC_MISCSLOTS(DEFCLAIM)
2624#undef DEFCLAIM
2625#undef GET_INT
2626#undef GET_UINT
e63124bc 2627#undef GET_FLT
b64eb60f
MW
2628#undef GET_PTR
2629
2630/*----- Flag types --------------------------------------------------------*/
2631
c81c35df
MW
2632/* Flag types are initialized, compared, and serialized as unsigned
2633 * integers.
2634 */
2635
2636/* --- @parse_flags@ --- *
2637 *
2638 * Arguments: @union tvec_regval *rv@ = register value
2639 * @const struct tvec_regdef *rd@ = register definition
2640 * @struct tvec_state *tv@ = test-vector state
2641 *
2642 * Returns: Zero on success, %$-1$% on error.
2643 *
2644 * Use: Parse a register value from an input file.
2645 *
2646 * The input syntax is a sequence of items separated by `|'
2647 * signs. Each item may be the symbolic name of a field value,
2648 * or a literal unsigned integer. The masks associated with the
2649 * given symbolic names must be disjoint. The resulting
2650 * numerical value is simply the bitwise OR of the given values.
2651 */
2652
882a39c1
MW
2653static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
2654 struct tvec_state *tv)
b64eb60f
MW
2655{
2656 const struct tvec_flaginfo *fi = rd->arg.p;
2657 const struct tvec_flag *f;
2658 unsigned long m = 0, v = 0, t;
2659 dstr d = DSTR_INIT;
882a39c1 2660 int ch, rc;
b64eb60f
MW
2661
2662 for (;;) {
c81c35df
MW
2663
2664 /* Read the next item. */
882a39c1 2665 DRESET(&d);
5c0f2e08 2666 if (tvec_readword(tv, &d, 0, "|;", "%s flag name or integer", fi->name))
882a39c1 2667 { rc = -1; goto end; }
b64eb60f 2668
c81c35df 2669 /* Try to find a matching entry in the table. */
b64eb60f
MW
2670 for (f = fi->fv; f->tag; f++)
2671 if (STRCMP(f->tag, ==, d.buf)) {
882a39c1
MW
2672 if (m&f->m)
2673 { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2674 else
2675 { m |= f->m; v |= f->v; goto next; }
b64eb60f
MW
2676 }
2677
c81c35df 2678 /* Otherwise, try to parse it as a raw integer. */
e63124bc
MW
2679 if (parse_unsigned(&t, d.buf, fi->range, tv))
2680 { rc = -1; goto end; }
882a39c1 2681 v |= t;
c81c35df 2682
b64eb60f 2683 next:
c81c35df
MW
2684 /* Advance to the next token. If it's a separator then consume it, and
2685 * go round again. Otherwise we stop here.
2686 */
b64eb60f 2687 if (tvec_nexttoken(tv)) break;
882a39c1
MW
2688 ch = getc(tv->fp);
2689 if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
5c0f2e08
MW
2690 if (tvec_nexttoken(tv)) {
2691 tvec_syntax(tv, '\n', "%s flag name or integer", fi->name);
2692 rc = -1; goto end;
2693 }
b64eb60f 2694 }
c81c35df
MW
2695
2696 /* Done. */
2697 rv->u = v; rc = 0;
882a39c1
MW
2698end:
2699 dstr_destroy(&d);
2700 return (rc);
b64eb60f
MW
2701}
2702
c81c35df
MW
2703/* --- @dump_flags@ --- *
2704 *
2705 * Arguments: @const union tvec_regval *rv@ = register value
2706 * @const struct tvec_regdef *rd@ = register definition
2707 * @unsigned style@ = output style (@TVSF_...@)
2708 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2709 *
2710 * Returns: ---
2711 *
2712 * Use: Dump a register value to the format output.
2713 *
2714 * The table of symbolic names and their associated values and
2715 * masks is repeatedly scanned, in order, to find disjoint
2716 * matches -- i.e., entries whose value matches the target value
2717 * in the bit positions indicated by the mask, and whose mask
2718 * doesn't overlap with any previously found matches; the names
2719 * are then output, separated by `|'. Any remaining nonzero
2720 * bits not covered by any of the matching masks are output as a
2721 * single literal integer, in hex.
2722 *
2723 * Unless compact output is requested, or no symbolic names were
2724 * found, the raw numeric value is also printed in hex, as a
2725 * comment.
2726 */
2727
b64eb60f
MW
2728static void dump_flags(const union tvec_regval *rv,
2729 const struct tvec_regdef *rd,
e63124bc
MW
2730 unsigned style,
2731 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
2732{
2733 const struct tvec_flaginfo *fi = rd->arg.p;
2734 const struct tvec_flag *f;
c81c35df 2735 unsigned long m = ~0ul, v = rv->u;
b64eb60f
MW
2736 const char *sep;
2737
5c0f2e08
MW
2738 if (style&TVSF_RAW) gprintf(gops, go, "flags/%s:", fi->name);
2739
b64eb60f
MW
2740 for (f = fi->fv, sep = ""; f->tag; f++)
2741 if ((m&f->m) && (v&f->m) == f->v) {
e63124bc 2742 gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
b64eb60f
MW
2743 sep = style&TVSF_COMPACT ? "|" : " | ";
2744 }
2745
e63124bc 2746 if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);
b1a20bee 2747 else if (!v && m == ~0ul) gprintf(gops, go, "0");
b64eb60f 2748
b1a20bee 2749 if (!(style&(TVSF_COMPACT | TVSF_RAW)))
e63124bc 2750 gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
b64eb60f
MW
2751}
2752
c81c35df 2753/* Flags type definition. */
b64eb60f 2754const struct tvec_regty tvty_flags = {
3efcfd2d 2755 init_uint, trivial_release, eq_uint,
b64eb60f
MW
2756 tobuf_uint, frombuf_uint,
2757 parse_flags, dump_flags
2758};
2759
67b5031e
MW
2760/* --- @tvec_claimeq_flags@ --- *
2761 *
2762 * Arguments: @struct tvec_state *tv@ = test-vector state
2763 * @const struct tvec_flaginfo *fi@ = flags type info
2764 * @unsigned long f0, f1@ = two values
2765 * @const char *file@, @unsigned @lno@ = calling file and line
2766 * @const char *expr@ = the expression to quote on failure
2767 *
2768 * Returns: Nonzero if @f0@ and @f1@ are equal, otherwise zero.
2769 *
2770 * Use: Check that values of @f0@ and @f1@ are equal. As for
2771 * @tvec_claim@ above, a test case is automatically begun and
2772 * ended if none is already underway. If the values are
2773 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2774 * mismatched values are dumped: @f0@ is printed as the output
2775 * value and @f1@ is printed as the input reference.
2776 */
2777
b64eb60f
MW
2778int tvec_claimeq_flags(struct tvec_state *tv,
2779 const struct tvec_flaginfo *fi,
2780 unsigned long f0, unsigned long f1,
2781 const char *file, unsigned lno, const char *expr)
2782{
2783 union tvec_misc arg;
2784
3efcfd2d 2785 arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
b64eb60f
MW
2786 return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
2787}
2788
e63124bc
MW
2789/*----- Characters --------------------------------------------------------*/
2790
c81c35df
MW
2791/* Character values are initialized and compared as signed integers. */
2792
2793/* --- @tobuf_char@ --- *
2794 *
2795 * Arguments: @buf *b@ = buffer
2796 * @const union tvec_regval *rv@ = register value
2797 * @const struct tvec_regdef *rd@ = register definition
2798 *
2799 * Returns: Zero on success, %$-1$% on failure.
2800 *
2801 * Use: Serialize a register value to a buffer.
2802 *
2803 * Character values are serialized as little-endian 32-bit
2804 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2805 */
2806
e63124bc 2807static int tobuf_char(buf *b, const union tvec_regval *rv,
67b5031e 2808 const struct tvec_regdef *rd)
e63124bc
MW
2809{
2810 uint32 u;
c81c35df 2811
e63124bc
MW
2812 if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
2813 else if (rv->i == EOF) u = MASK32;
adec5584 2814 else { buf_break(b); return (-1); }
e63124bc
MW
2815 return (buf_putu32l(b, u));
2816}
2817
c81c35df
MW
2818/* --- @frombuf_char@ --- *
2819 *
2820 * Arguments: @buf *b@ = buffer
2821 * @union tvec_regval *rv@ = register value
2822 * @const struct tvec_regdef *rd@ = register definition
2823 *
2824 * Returns: Zero on success, %$-1$% on failure.
2825 *
2826 * Use: Deserialize a register value from a buffer.
2827 *
2828 * Character values are serialized as little-endian 32-bit
2829 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2830 */
2831
e63124bc 2832static int frombuf_char(buf *b, union tvec_regval *rv,
67b5031e 2833 const struct tvec_regdef *rd)
e63124bc
MW
2834{
2835 uint32 u;
2836
2837 if (buf_getu32l(b, &u)) return (-1);
2838 if (0 <= u && u <= UCHAR_MAX) rv->i = u;
2839 else if (u == MASK32) rv->i = EOF;
adec5584 2840 else { buf_break(b); return (-1); }
e63124bc
MW
2841 return (0);
2842}
2843
c81c35df
MW
2844/* --- @parse_char@ --- *
2845 *
2846 * Arguments: @union tvec_regval *rv@ = register value
2847 * @const struct tvec_regdef *rd@ = register definition
2848 * @struct tvec_state *tv@ = test-vector state
2849 *
2850 * Returns: Zero on success, %$-1$% on error.
2851 *
2852 * Use: Parse a register value from an input file.
2853 *
2854 * A character value can be given by symbolic name, with a
2855 * leading `%|#|%'; or a character or `%|\|%'-escape sequence,
2856 * optionally in single quotes.
2857 *
2858 * The following escape sequences and character names are
2859 * recognized.
2860 *
2861 * * `%|#eof|%' is the special end-of-file marker.
2862 *
2863 * * `%|#nul|%' is the NUL character, sometimes used to
2864 * terminate strings.
2865 *
2866 * * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
2867 * character used to ring the terminal bell (or do some other
2868 * thing to attract the user's attention).
2869 *
2870 * * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
2871 * character, used to move the cursor backwords by one cell.
2872 *
2873 * * %|#escape|% %|#esc|%, or%|\e|% is the escape character,
2874 * used to introduce special terminal commands.
2875 *
2876 * * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
2877 * character, used to separate pages of text.
2878 *
2879 * * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
2880 * the newline character, used to terminate lines of text or
2881 * advance the cursor to the next line (perhaps without
2882 * returning it to the start of the line).
2883 *
2884 * * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
2885 * the carriage-return character, used to return the cursor to
2886 * the start of the line.
2887 *
2888 * * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
2889 * tab character, used to advance the cursor to the next tab
2890 * stop on the current line.
2891 *
2892 * * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
2893 * character.
2894 *
2895 * * %|#space|%, %|#spc|% is the space character.
2896 *
2897 * * %|#delete|%, %|#del|% is the delete character, used to
2898 * erase the most recent character.
2899 *
2900 * * %|\'|% is the single-quote character.
2901 *
2902 * * %|\\|% is the backslash character.
2903 *
2904 * * %|\"|% is the double-quote character.
2905 *
2906 * * %|\NNN|% or %|\{NNN}|% is the character with code NNN in
2907 * octal. The NNN may be up to three digits long.
2908 *
2909 * * %|\xNN|% or %|\x{NN}|% is the character with code NNN in
2910 * hexadecimal.
2911 */
2912
e63124bc
MW
2913static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
2914 struct tvec_state *tv)
2915{
2916 dstr d = DSTR_INIT;
2917 int ch, rc;
2918 unsigned f = 0;
2919#define f_quote 1u
2920
c81c35df 2921 /* Inspect the character to see what we're up against. */
e63124bc 2922 ch = getc(tv->fp);
c81c35df 2923
e63124bc 2924 if (ch == '#') {
c81c35df
MW
2925 /* It looks like a special token. Push the `%|#|%' back and fetch the
2926 * whole word. If there's just the `%|#|%' after all, then treat it as
2927 * literal.
2928 */
2929
e63124bc 2930 ungetc(ch, tv->fp);
adec5584
MW
2931 if (tvec_readword(tv, &d, 0, ";", "character name"))
2932 { rc = -1; goto end; }
c81c35df
MW
2933 if (STRCMP(d.buf, !=, "#")) {
2934 if (read_charname(&ch, d.buf, RCF_EOFOK)) {
2935 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
2936 goto end;
2937 }
2938 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2939 rv->i = ch; rc = 0; goto end;
e63124bc 2940 }
e63124bc
MW
2941 }
2942
c81c35df
MW
2943 /* If this is a single quote then we expect to see a matching one later,
2944 * and we should process backslash escapes. Get the next character and see
2945 * what happens.
2946 */
e63124bc 2947 if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
c81c35df
MW
2948
2949 /* Main character dispatch. */
e63124bc 2950 switch (ch) {
c81c35df 2951
67b5031e 2952 case ';':
c81c35df 2953 /* Unquoted, semicolon begins a comment. */
67b5031e 2954 if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
c81c35df
MW
2955 else goto plain;
2956
67b5031e 2957 case '\n':
c81c35df
MW
2958 /* A newline. If we saw a single quote, then treat that as literal.
2959 * Otherwise this is an error.
2960 */
2961 if (!(f&f_quote)) goto nochar;
2962 else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
2963
67b5031e 2964 case EOF:
c81c35df
MW
2965 /* End-of-file. Similar to newline, but with slightly different
2966 * effects on the parse state.
2967 */
2968 if (!(f&f_quote)) goto nochar;
2969 else { f &= ~f_quote; ch = '\''; goto plain; }
2970
2971 case '\'': nochar:
2972 /* A single quote. This must be the second of a pair, and there should
2973 * have been a character or escape sequence between them.
2974 */
e63124bc 2975 rc = tvec_syntax(tv, ch, "character"); goto end;
c81c35df 2976
e63124bc 2977 case '\\':
c81c35df 2978 /* A backslash. Read a character escape. */
67b5031e 2979 if (read_charesc(&ch, tv)) return (-1);
c81c35df 2980
e63124bc 2981 default: plain:
c81c35df 2982 /* Anything else. Treat as literal. */
e63124bc
MW
2983 rv->i = ch; break;
2984 }
c81c35df
MW
2985
2986 /* If we saw an opening quote, then expect the closing quote. */
e63124bc
MW
2987 if (f&f_quote) {
2988 ch = getc(tv->fp);
2989 if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
2990 }
c81c35df
MW
2991
2992 /* Done. */
e63124bc
MW
2993 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2994 rc = 0;
2995end:
2996 dstr_destroy(&d);
2997 return (rc);
2998
2999#undef f_quote
3000}
3001
c81c35df
MW
3002/* --- @dump_char@ --- *
3003 *
3004 * Arguments: @const union tvec_regval *rv@ = register value
3005 * @const struct tvec_regdef *rd@ = register definition
3006 * @unsigned style@ = output style (@TVSF_...@)
3007 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3008 *
3009 * Returns: ---
3010 *
3011 * Use: Dump a register value to the format output.
3012 *
3013 * Character values are dumped as their symbolic names, if any,
3014 * or as a character or escape sequence within single quotes
3015 * (which may be omitted in compact style). If compact output
3016 * is not requested, then the single-quoted representation (for
3017 * characters dumped as symbolic names) and integer code in
3018 * decimal and hex are printed as a comment.
3019 */
3020
e63124bc
MW
3021static void dump_char(const union tvec_regval *rv,
3022 const struct tvec_regdef *rd,
3023 unsigned style,
3024 const struct gprintf_ops *gops, void *go)
3025{
67b5031e
MW
3026 const char *p;
3027 unsigned f = 0;
3028#define f_semi 1u
3029
5c0f2e08
MW
3030 if (style&TVSF_RAW) {
3031 /* Print the raw character unconditionally in single quotes. */
67b5031e 3032
5c0f2e08
MW
3033 gprintf(gops, go, "char:'");
3034 format_char(gops, go, rv->i);
3035 gprintf(gops, go, "'");
3036 } else {
3037 /* Print ina pleasant human-readable way. */
3038
3039 /* Print a character name if we can find one. */
3040 p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
3041 if (p) {
3042 gprintf(gops, go, "%s", p);
3043 if (style&TVSF_COMPACT) return;
3044 else { gprintf(gops, go, " ;"); f |= f_semi; }
67b5031e 3045 }
e63124bc 3046
5c0f2e08
MW
3047 /* If the character isn't @EOF@ then print it as a single-quoted thing.
3048 * In compact style, see if we can omit the quotes.
3049 */
3050 if (rv->i >= 0) {
3051 if (f&f_semi) gprintf(gops, go, " = ");
3052 switch (rv->i) {
3053 case ' ': case '\\': case '\'': quote:
3054 format_char(gops, go, rv->i);
3055 break;
3056 default:
3057 if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
3058 gprintf(gops, go, "%c", (int)rv->i);
3059 return;
3060 }
3061 }
3062
3063 /* And the character code as an integer. */
3064 if (!(style&TVSF_COMPACT)) {
3065 if (!(f&f_semi)) gprintf(gops, go, " ;");
3066 gprintf(gops, go, " = %ld = ", rv->i);
3067 format_signed_hex(gops, go, rv->i);
3068 }
e63124bc 3069 }
67b5031e
MW
3070
3071#undef f_semi
e63124bc
MW
3072}
3073
c81c35df 3074/* Character type definition. */
e63124bc 3075const struct tvec_regty tvty_char = {
3efcfd2d 3076 init_int, trivial_release, eq_int,
e63124bc
MW
3077 tobuf_char, frombuf_char,
3078 parse_char, dump_char
3079};
3080
67b5031e
MW
3081/* --- @tvec_claimeq_char@ --- *
3082 *
3083 * Arguments: @struct tvec_state *tv@ = test-vector state
3084 * @int ch0, ch1@ = two character codes
3085 * @const char *file@, @unsigned @lno@ = calling file and line
3086 * @const char *expr@ = the expression to quote on failure
3087 *
3088 * Returns: Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
3089 *
3090 * Use: Check that values of @ch0@ and @ch1@ are equal. As for
3091 * @tvec_claim@ above, a test case is automatically begun and
3092 * ended if none is already underway. If the values are
3093 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3094 * mismatched values are dumped: @ch0@ is printed as the output
3095 * value and @ch1@ is printed as the input reference.
3096 */
3097
e63124bc
MW
3098int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
3099 const char *file, unsigned lno, const char *expr)
3100{
3efcfd2d 3101 tv->out[0].v.i = c0; tv->in[0].v.i = c1;
e63124bc
MW
3102 return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
3103}
3104
b64eb60f
MW
3105/*----- Text and byte strings ---------------------------------------------*/
3106
c81c35df
MW
3107/* --- @init_text@, @init_bytes@ --- *
3108 *
3109 * Arguments: @union tvec_regval *rv@ = register value
3110 * @const struct tvec_regdef *rd@ = register definition
3111 *
3112 * Returns: ---
3113 *
3114 * Use: Initialize a register value.
3115 *
3116 * Text and binary string values are initialized with a null
3117 * pointer and zero length.
3118 */
3119
3120static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
3121 { rv->text.p = 0; rv->text.sz = 0; }
b64eb60f
MW
3122
3123static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
3124 { rv->bytes.p = 0; rv->bytes.sz = 0; }
3125
c81c35df
MW
3126/* --- @release_string@, @release_bytes@ --- *
3127 *
3128 * Arguments: @const union tvec_regval *rv@ = register value
3129 * @const struct tvec_regdef *rd@ = register definition
3130 *
3131 * Returns: ---
3132 *
3133 * Use: Release resources held by a register value.
3134 *
3135 * Text and binary string buffers are freed.
3136 */
3137
3138static void release_text(union tvec_regval *rv,
3139 const struct tvec_regdef *rd)
b1a20bee 3140 { free(rv->text.p); }
b64eb60f
MW
3141
3142static void release_bytes(union tvec_regval *rv,
3143 const struct tvec_regdef *rd)
b1a20bee 3144 { free(rv->bytes.p); }
b64eb60f 3145
c81c35df
MW
3146/* --- @eq_text@, @eq_bytes@ --- *
3147 *
3148 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
3149 * @const struct tvec_regdef *rd@ = register definition
3150 *
3151 * Returns: Nonzero if the values are equal, zero if unequal
3152 *
3153 * Use: Compare register values for equality.
3154 */
3155
3156static int eq_text(const union tvec_regval *rv0,
3157 const union tvec_regval *rv1,
3158 const struct tvec_regdef *rd)
b64eb60f 3159{
c81c35df
MW
3160 return (rv0->text.sz == rv1->text.sz &&
3161 (!rv0->text.sz ||
3162 MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
b64eb60f
MW
3163}
3164
3165static int eq_bytes(const union tvec_regval *rv0,
3166 const union tvec_regval *rv1,
3167 const struct tvec_regdef *rd)
3168{
3169 return (rv0->bytes.sz == rv1->bytes.sz &&
3170 (!rv0->bytes.sz ||
3171 MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
3172}
3173
c81c35df
MW
3174/* --- @tobuf_text@, @tobuf_bytes@ --- *
3175 *
3176 * Arguments: @buf *b@ = buffer
3177 * @const union tvec_regval *rv@ = register value
3178 * @const struct tvec_regdef *rd@ = register definition
3179 *
3180 * Returns: Zero on success, %$-1$% on failure.
3181 *
3182 * Use: Serialize a register value to a buffer.
3183 *
3184 * Text and binary string values are serialized as a little-
3185 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3186 * of string data.
3187 */
3188
3189static int tobuf_text(buf *b, const union tvec_regval *rv,
3190 const struct tvec_regdef *rd)
3191 { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }
b64eb60f
MW
3192
3193static int tobuf_bytes(buf *b, const union tvec_regval *rv,
3194 const struct tvec_regdef *rd)
c81c35df 3195 { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }
b64eb60f 3196
c81c35df
MW
3197/* --- @frombuf_text@, @frombuf_bytes@ --- *
3198 *
3199 * Arguments: @buf *b@ = buffer
3200 * @union tvec_regval *rv@ = register value
3201 * @const struct tvec_regdef *rd@ = register definition
3202 *
3203 * Returns: Zero on success, %$-1$% on failure.
3204 *
3205 * Use: Deserialize a register value from a buffer.
3206 *
3207 * Text and binary string values are serialized as a little-
3208 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3209 * of string data.
3210 */
3211
3212static int frombuf_text(buf *b, union tvec_regval *rv,
3213 const struct tvec_regdef *rd)
b64eb60f
MW
3214{
3215 const void *p;
3216 size_t sz;
3217
c81c35df
MW
3218 p = buf_getmem64l(b, &sz); if (!p) return (-1);
3219 tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
b64eb60f
MW
3220 return (0);
3221}
3222
3223static int frombuf_bytes(buf *b, union tvec_regval *rv,
3224 const struct tvec_regdef *rd)
3225{
3226 const void *p;
3227 size_t sz;
3228
c81c35df 3229 p = buf_getmem64l(b, &sz); if (!p) return (-1);
b64eb60f
MW
3230 tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
3231 return (0);
3232}
3233
c81c35df
MW
3234/* --- @check_string_length@ --- *
3235 *
3236 * Arguments: @size_t sz@ = found string length
3237 * @const struct tvec_urange *ur@ = acceptable range
3238 * @struct tvec_state *tv@ = test-vector state
3239 *
3240 * Returns: Zero on success, %$-1$% on error.
3241 *
3242 * Use: Checks that @sz@ is within the bounds described by @ur@,
3243 * reporting an error if not.
3244 */
3245
882a39c1
MW
3246static int check_string_length(size_t sz, const struct tvec_urange *ur,
3247 struct tvec_state *tv)
b64eb60f 3248{
b1a20bee
MW
3249 unsigned long uu;
3250
3251 if (ur) {
3252 if (ur->min > sz || sz > ur->max) {
3253 tvec_error(tv, "invalid string length %lu; must be in [%lu .. %lu]",
3254 (unsigned long)sz, ur->min, ur->max);
3255 return (-1);
3256 }
3257 if (ur->m && ur->m != 1) {
3258 uu = sz%ur->m;
3259 if (uu != ur->a%ur->m) {
3260 tvec_error(tv, "invalid string length %lu == %lu =/= %lu (mod %lu)",
3261 (unsigned long)sz, uu, ur->a, ur->m);
3262 return (-1);
3263 }
3264 }
3265 }
882a39c1 3266 return (0);
b64eb60f
MW
3267}
3268
c81c35df
MW
3269/* --- @parse_text@, @parse_bytes@ --- *
3270 *
3271 * Arguments: @union tvec_regval *rv@ = register value
3272 * @const struct tvec_regdef *rd@ = register definition
3273 * @struct tvec_state *tv@ = test-vector state
3274 *
3275 * Returns: Zero on success, %$-1$% on error.
3276 *
3277 * Use: Parse a register value from an input file.
3278 *
3279 * The input format for both kinds of strings is basically the
3280 * same: a `compound string', consisting of
3281 *
3282 * * single-quoted strings, which are interpreted entirely
3283 * literally, but can't contain single quotes or newlines;
3284 *
3285 * * double-quoted strings, in which `%|\|%'-escapes are
3286 * interpreted as for characters;
3287 *
3288 * * character names, marked by an initial `%|#|%' sign;
3289 *
3290 * * special tokens marked by an initial `%|!|%' sign; or
3291 *
3292 * * barewords interpreted according to the current coding
3293 * scheme.
3294 *
3295 * The special tokens are
3296 *
3297 * * `%|!bare|%', which causes subsequent sequences of
3298 * barewords to be treated as plain text;
3299 *
3300 * * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
3301 * subsequent barewords to be decoded in the requested
3302 * manner.
3303 *
3304 * * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
3305 * which includes %$n$% copies of the (compound) string.
3306 *
3307 * The only difference between text and binary strings is that
3308 * the initial coding scheme is %|bare|% for text strings and
3309 * %|hex|% for binary strings.
3310 */
3311
3312static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
3313 struct tvec_state *tv)
b64eb60f 3314{
c81c35df 3315 void *p = rv->text.p;
b64eb60f 3316
c81c35df 3317 if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
67b5031e 3318 return (-1);
c81c35df
MW
3319 rv->text.p = p;
3320 if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
882a39c1 3321 return (0);
b64eb60f
MW
3322}
3323
882a39c1
MW
3324static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
3325 struct tvec_state *tv)
b64eb60f
MW
3326{
3327 void *p = rv->bytes.p;
3328
67b5031e
MW
3329 if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
3330 return (-1);
882a39c1
MW
3331 rv->bytes.p = p;
3332 if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
3333 return (0);
b64eb60f
MW
3334}
3335
c81c35df
MW
3336/* --- @dump_text@, @dump_bytes@ --- *
3337 *
3338 * Arguments: @const union tvec_regval *rv@ = register value
3339 * @const struct tvec_regdef *rd@ = register definition
3340 * @unsigned style@ = output style (@TVSF_...@)
3341 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3342 *
3343 * Returns: ---
3344 *
3345 * Use: Dump a register value to the format output.
3346 *
3347 * Text string values are dumped as plain text, in double quotes
3348 * if necessary, and using backslash escape sequences for
3349 * nonprintable characters. Unless compact output is requested,
3350 * strings consisting of multiple lines are dumped with each
3351 * line of the string on a separate output line.
3352 *
3353 * Binary string values are dumped in hexadecimal. In compact
3354 * style, the output simply consists of a single block of hex
3355 * digits. Otherwise, the dump is a display consisting of
3356 * groups of hex digits, with comments showing the offset (if
3357 * the string is long enough) and the corresponding plain text.
3358 *
5c0f2e08 3359 * Empty strings are dumped as %|#empty|%.
c81c35df
MW
3360 */
3361
5c0f2e08
MW
3362static void dump_empty(const char *ty, unsigned style,
3363 const struct gprintf_ops *gops, void *go)
3364{
3365 if (style&TVSF_RAW) gprintf(gops, go, "%s:", ty);
3366 if (!(style&TVSF_COMPACT)) gprintf(gops, go, "#empty");
3367 if (!(style&(TVSF_COMPACT | TVSF_RAW))) gprintf(gops, go, " ; = ");
3368 if (!(style&TVSF_RAW)) gprintf(gops, go, "\"\"");
3369}
3370
3371
c81c35df
MW
3372static void dump_text(const union tvec_regval *rv,
3373 const struct tvec_regdef *rd,
3374 unsigned style,
3375 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
3376{
3377 const unsigned char *p, *q, *l;
b64eb60f
MW
3378 unsigned f = 0;
3379#define f_nonword 1u
3380#define f_newline 2u
3381
5c0f2e08 3382 if (!rv->text.sz) { dump_empty("text", style, gops, go); return; }
b64eb60f 3383
c81c35df 3384 p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
5c0f2e08
MW
3385 if (style&TVSF_RAW) { gprintf(gops, go, "text:"); goto quote; }
3386 else if (style&TVSF_COMPACT) goto quote;
3387
67b5031e
MW
3388 switch (*p) {
3389 case '!': case '#': case ';': case '"': case '\'':
3390 case '(': case '{': case '[': case ']': case '}': case ')':
3391 f |= f_nonword; break;
3392 }
b64eb60f
MW
3393 for (q = p; q < l; q++)
3394 if (*q == '\n' && q != l - 1) f |= f_newline;
5c0f2e08 3395 else if (!*q || !ISGRAPH(*q) || *q == '\\') f |= f_nonword;
e63124bc 3396 if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
b64eb60f 3397 else if (f&f_nonword) goto quote;
67b5031e 3398
c81c35df 3399 gops->putm(go, (const char *)p, rv->text.sz);
67b5031e 3400 return;
b64eb60f
MW
3401
3402quote:
e63124bc 3403 gprintf(gops, go, "\"");
b64eb60f 3404 for (q = p; q < l; q++)
5c0f2e08 3405 if (!ISPRINT(*q) || *q == '"') {
e63124bc 3406 if (p < q) gops->putm(go, (const char *)p, q - p);
67b5031e 3407 if (*q != '\n' || (style&TVSF_COMPACT))
3efcfd2d 3408 format_charesc(gops, go, *q, FCF_BRACE);
67b5031e
MW
3409 else {
3410 if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; }
3411 else gprintf(gops, go, "\\n\"\n\t\"");
3412 }
3413 p = q + 1;
b64eb60f 3414 }
e63124bc
MW
3415 if (p < q) gops->putm(go, (const char *)p, q - p);
3416 gprintf(gops, go, "\"");
b64eb60f
MW
3417
3418#undef f_nonword
3419#undef f_newline
3420}
3421
3422static void dump_bytes(const union tvec_regval *rv,
3423 const struct tvec_regdef *rd,
e63124bc
MW
3424 unsigned style,
3425 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
3426{
3427 const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
3428 size_t off, sz = rv->bytes.sz;
3429 unsigned i, n;
3430 int wd;
3431
5c0f2e08 3432 if (!rv->text.sz) { dump_empty("bytes", style, gops, go); return; }
b64eb60f 3433
5c0f2e08 3434 if (style&(TVSF_COMPACT | TVSF_RAW)) {
b1a20bee 3435 if (style&TVSF_RAW) gprintf(gops, go, "bytes:");
e63124bc 3436 while (p < l) gprintf(gops, go, "%02x", *p++);
b64eb60f
MW
3437 return;
3438 }
3439
e63124bc 3440 if (sz > 16) gprintf(gops, go, "\n\t");
b64eb60f
MW
3441
3442 off = 0; wd = hex_width(sz);
3443 while (p < l) {
3444 if (l - p < 16) n = l - p;
3445 else n = 16;
3446
67b5031e 3447 for (i = 0; i < n; i++) {
e63124bc
MW
3448 if (i < n) gprintf(gops, go, "%02x", p[i]);
3449 else gprintf(gops, go, " ");
67b5031e 3450 if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
b64eb60f 3451 }
e63124bc
MW
3452 gprintf(gops, go, " ; ");
3453 if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
b64eb60f 3454 for (i = 0; i < n; i++)
e63124bc 3455 gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
b64eb60f 3456 p += n; off += n;
e63124bc 3457 if (p < l) gprintf(gops, go, "\n\t");
b64eb60f
MW
3458 }
3459}
3460
c81c35df
MW
3461/* Text and byte string type definitions. */
3462const struct tvec_regty tvty_text = {
3463 init_text, release_text, eq_text,
3464 tobuf_text, frombuf_text,
3465 parse_text, dump_text
b64eb60f 3466};
b64eb60f 3467const struct tvec_regty tvty_bytes = {
e63124bc 3468 init_bytes, release_bytes, eq_bytes,
b64eb60f
MW
3469 tobuf_bytes, frombuf_bytes,
3470 parse_bytes, dump_bytes
3471};
3472
c81c35df 3473/* --- @tvec_claimeq_text@ --- *
67b5031e
MW
3474 *
3475 * Arguments: @struct tvec_state *tv@ = test-vector state
3476 * @const char *p0@, @size_t sz0@ = first string with length
3477 * @const char *p1@, @size_t sz1@ = second string with length
3478 * @const char *file@, @unsigned @lno@ = calling file and line
3479 * @const char *expr@ = the expression to quote on failure
3480 *
3481 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3482 * zero.
3483 *
3484 * Use: Check that strings at @p0@ and @p1@ are equal. As for
3485 * @tvec_claim@ above, a test case is automatically begun and
3486 * ended if none is already underway. If the values are
3487 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3488 * mismatched values are dumped: @p0@ is printed as the output
3489 * value and @p1@ is printed as the input reference.
3490 */
3491
c81c35df
MW
3492int tvec_claimeq_text(struct tvec_state *tv,
3493 const char *p0, size_t sz0,
3494 const char *p1, size_t sz1,
3495 const char *file, unsigned lno, const char *expr)
b64eb60f 3496{
b1a20bee
MW
3497 tv->out[0].v.text.p = UNCONST(char, p0); tv->out[0].v.text.sz = sz0;
3498 tv->in[0].v.text.p =UNCONST(char, p1); tv->in[0].v.text.sz = sz1;
c81c35df 3499 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
3500}
3501
c81c35df 3502/* --- @tvec_claimeq_textz@ --- *
67b5031e
MW
3503 *
3504 * Arguments: @struct tvec_state *tv@ = test-vector state
3505 * @const char *p0, *p1@ = two strings to compare
3506 * @const char *file@, @unsigned @lno@ = calling file and line
3507 * @const char *expr@ = the expression to quote on failure
3508 *
3509 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3510 * zero.
3511 *
3512 * Use: Check that strings at @p0@ and @p1@ are equal, as for
3513 * @tvec_claimeq_string@, except that the strings are assumed
3514 * null-terminated, so their lengths don't need to be supplied
3515 * explicitly.
3516 */
3517
c81c35df
MW
3518int tvec_claimeq_textz(struct tvec_state *tv,
3519 const char *p0, const char *p1,
3520 const char *file, unsigned lno, const char *expr)
b64eb60f 3521{
b1a20bee
MW
3522 tv->out[0].v.text.p = UNCONST(char, p0); tv->out[0].v.text.sz = strlen(p0);
3523 tv->in[0].v.text.p = UNCONST(char, p1); tv->in[0].v.text.sz = strlen(p1);
c81c35df 3524 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
3525}
3526
67b5031e
MW
3527/* --- @tvec_claimeq_bytes@ --- *
3528 *
3529 * Arguments: @struct tvec_state *tv@ = test-vector state
3530 * @const void *p0@, @size_t sz0@ = first string with length
3531 * @const void *p1@, @size_t sz1@ = second string with length
3532 * @const char *file@, @unsigned @lno@ = calling file and line
3533 * @const char *expr@ = the expression to quote on failure
3534 *
3535 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3536 * zero.
3537 *
3538 * Use: Check that binary strings at @p0@ and @p1@ are equal. As for
3539 * @tvec_claim@ above, a test case is automatically begun and
3540 * ended if none is already underway. If the values are
3541 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3542 * mismatched values are dumped: @p0@ is printed as the output
3543 * value and @p1@ is printed as the input reference.
3544 */
3545
b64eb60f
MW
3546int tvec_claimeq_bytes(struct tvec_state *tv,
3547 const void *p0, size_t sz0,
3548 const void *p1, size_t sz1,
3549 const char *file, unsigned lno, const char *expr)
3550{
b1a20bee
MW
3551 tv->out[0].v.bytes.p = UNCONST(void, p0); tv->out[0].v.bytes.sz = sz0;
3552 tv->in[0].v.bytes.p = UNCONST(void, p1); tv->in[0].v.bytes.sz = sz1;
b64eb60f
MW
3553 return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
3554}
3555
c81c35df 3556/* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
67b5031e
MW
3557 *
3558 * Arguments: @union tvec_regval *rv@ = register value
3559 * @size_t sz@ = required size
3560 *
3561 * Returns: ---
3562 *
3563 * Use: Allocated space in a text or binary string register. If the
3564 * current register size is sufficient, its buffer is left
3565 * alone; otherwise, the old buffer, if any, is freed and a
3566 * fresh buffer allocated. These functions are not intended to
3567 * be used to adjust a buffer repeatedly, e.g., while building
3568 * output incrementally: (a) they will perform badly, and (b)
3569 * the old buffer contents are simply discarded if reallocation
3570 * is necessary. Instead, use a @dbuf@ or @dstr@.
3571 *
c81c35df 3572 * The @tvec_alloctext@ function sneakily allocates an extra
67b5031e
MW
3573 * byte for a terminating zero. The @tvec_allocbytes@ function
3574 * doesn't do this.
3575 */
3576
c81c35df 3577void tvec_alloctext(union tvec_regval *rv, size_t sz)
67b5031e 3578{
b1a20bee
MW
3579 if (rv->text.sz <= sz)
3580 { free(rv->text.p); rv->text.p = x_alloc(&arena_stdlib, sz + 1); }
3581 memset(rv->text.p, '?', sz); rv->text.sz = sz;
67b5031e
MW
3582}
3583
3584void tvec_allocbytes(union tvec_regval *rv, size_t sz)
3585{
b1a20bee
MW
3586 if (rv->bytes.sz < sz)
3587 { free(rv->bytes.p); rv->bytes.p = x_alloc(&arena_stdlib, sz); }
3588 memset(rv->bytes.p, '?', sz); rv->bytes.sz = sz;
67b5031e
MW
3589}
3590
b64eb60f
MW
3591/*----- Buffer type -------------------------------------------------------*/
3592
adec5584
MW
3593/* --- @init_buffer@ --- *
3594 *
3595 * Arguments: @union tvec_regval *rv@ = register value
3596 * @const struct tvec_regdef *rd@ = register definition
3597 *
3598 * Returns: ---
3599 *
3600 * Use: Initialize a register value.
3601 *
3602 * Buffer values values are initialized with a null pointer,
3603 * zero length, and zero residue, modulus, and offset.
3604 */
3605
3606static void init_buffer(union tvec_regval *rv, const struct tvec_regdef *rd)
3607 { rv->buf.p = 0; rv->buf.sz = rv->buf.a = rv->buf.m = rv->buf.off = 0; }
3608
3609/* --- @release_buffer@, @release_bytes@ --- *
3610 *
3611 * Arguments: @const union tvec_regval *rv@ = register value
3612 * @const struct tvec_regdef *rd@ = register definition
3613 *
3614 * Returns: ---
3615 *
3616 * Use: Release resources held by a register value.
3617 *
3618 * Buffers are freed.
3619 */
3620
3621static void release_buffer(union tvec_regval *rv,
3622 const struct tvec_regdef *rd)
b1a20bee 3623 { if (rv->buf.p) free(rv->buf.p - rv->buf.off); }
c81c35df
MW
3624
3625/* --- @eq_buffer@ --- *
3626 *
3627 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
3628 * @const struct tvec_regdef *rd@ = register definition
3629 *
3630 * Returns: Nonzero if the values are equal, zero if unequal
3631 *
3632 * Use: Compare register values for equality.
3633 *
adec5584
MW
3634 * Buffer values are equal if and only if their sizes and
3635 * alignment parameters are equal; their contents are
3636 * %%\emph{not}%% compared.
c81c35df
MW
3637 */
3638
b64eb60f
MW
3639static int eq_buffer(const union tvec_regval *rv0,
3640 const union tvec_regval *rv1,
3641 const struct tvec_regdef *rd)
adec5584
MW
3642{
3643 return (rv0->buf.sz == rv1->buf.sz &&
3644 rv0->buf.a == rv1->buf.a &&
3645 rv0->buf.m == rv1->buf.m);
3646}
b64eb60f 3647
c81c35df
MW
3648/* --- @tobuf_buffer@ --- *
3649 *
3650 * Arguments: @buf *b@ = buffer
3651 * @const union tvec_regval *rv@ = register value
3652 * @const struct tvec_regdef *rd@ = register definition
3653 *
3654 * Returns: Zero on success, %$-1$% on failure.
3655 *
3656 * Use: Serialize a register value to a buffer.
3657 *
adec5584
MW
3658 * Buffer values are serialized as their lengths, residues, and
3659 * moduli, as unsigned integers.
c81c35df
MW
3660 */
3661
b64eb60f
MW
3662static int tobuf_buffer(buf *b, const union tvec_regval *rv,
3663 const struct tvec_regdef *rd)
adec5584
MW
3664{
3665 return (unsigned_to_buf(b, rv->buf.sz) ||
3666 unsigned_to_buf(b, rv->buf.a) ||
3667 unsigned_to_buf(b, rv->buf.m));
3668}
c81c35df
MW
3669
3670/* --- @frombuf_buffer@ --- *
3671 *
3672 * Arguments: @buf *b@ = buffer
3673 * @union tvec_regval *rv@ = register value
3674 * @const struct tvec_regdef *rd@ = register definition
3675 *
3676 * Returns: Zero on success, %$-1$% on failure.
3677 *
3678 * Use: Deserialize a register value from a buffer.
3679 *
3680 * Buffer values are serialized as just their lengths, as
3681 * unsigned integers. The buffer is allocated on
3682 * deserialization and filled with a distinctive pattern.
3683 */
3684
b64eb60f
MW
3685static int frombuf_buffer(buf *b, union tvec_regval *rv,
3686 const struct tvec_regdef *rd)
3687{
adec5584 3688 unsigned long sz, a, m;
b64eb60f 3689
adec5584
MW
3690 if (unsigned_from_buf(b, &sz)) return (-1);
3691 if (unsigned_from_buf(b, &a)) return (-1);
3692 if (unsigned_from_buf(b, &m)) return (-1);
3693 if (sz > (size_t)-1 || a > (size_t)-1 || m > (size_t)-1)
3694 { buf_break(b); return (-1); }
3695 rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
b64eb60f
MW
3696 return (0);
3697}
3698
c81c35df
MW
3699/* --- @parse_buffer@ --- *
3700 *
3701 * Arguments: @union tvec_regval *rv@ = register value
3702 * @const struct tvec_regdef *rd@ = register definition
3703 * @struct tvec_state *tv@ = test-vector state
3704 *
3705 * Returns: Zero on success, %$-1$% on error.
3706 *
3707 * Use: Parse a register value from an input file.
3708 *
c4ccbbf9
MW
3709 * The input format for a buffer value is a size, followed by an
3710 * optional `%|@$%' and an alignment quantum and a further
3711 * optional `%|+|%' and an alignment offset. The size, quantum,
3712 * and offset are syntactically sizes.
c81c35df 3713 *
c4ccbbf9 3714 * The buffer is not allocated.
c81c35df
MW
3715 */
3716
882a39c1
MW
3717static int parse_buffer(union tvec_regval *rv,
3718 const struct tvec_regdef *rd,
3719 struct tvec_state *tv)
b64eb60f 3720{
c4ccbbf9 3721 unsigned long sz, a = 0, m = 0;
adec5584 3722 int ch, rc;
b64eb60f 3723
c4ccbbf9
MW
3724 if (parse_szint(tv, &sz, "@;", "buffer length")) { rc = -1; goto end; }
3725 if (check_unsigned_range(sz, &tvrange_size, tv, "buffer length"))
3726 { rc = -1; goto end; }
adec5584 3727 if (check_string_length(sz, rd->arg.p, tv)) { rc = -1; goto end; }
b64eb60f 3728
adec5584
MW
3729 tvec_skipspc(tv);
3730 ch = getc(tv->fp);
3731 if (ch == ';' || ch == '\n') { ungetc(ch, tv->fp); goto done; }
3732 else if (ch != '@') { rc = tvec_syntax(tv, ch, "`@'"); goto end; }
3733
c4ccbbf9
MW
3734 if (parse_szint(tv, &m, "+;", "alignment quantum")) { rc = -1; goto end; }
3735 if (check_unsigned_range(a, &tvrange_size, tv, "alignment quantum"))
3736 { rc = -1; goto end; }
adec5584
MW
3737 if (m == 1) m = 0;
3738
3739 tvec_skipspc(tv);
3740 ch = getc(tv->fp);
3741 if (ch == ';' || ch == '\n') { ungetc(ch, tv->fp); goto done; }
3742 else if (ch != '+') { rc = tvec_syntax(tv, ch, "`+'"); goto end; }
3743
c4ccbbf9
MW
3744 if (parse_szint(tv, &a, ";", "alignment offset")) { rc = -1; goto end; }
3745 if (check_unsigned_range(m, &tvrange_size, tv, "alignment offset"))
3746 { rc = -1; goto end; }
adec5584
MW
3747 if (a >= m) {
3748 rc = tvec_error(tv, "alignment offset %lu >= quantum %lu",
3749 (unsigned long)a, (unsigned long)m);
3750 goto end;
b64eb60f 3751 }
b64eb60f 3752
adec5584 3753done:
882a39c1 3754 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
adec5584 3755 rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
882a39c1
MW
3756 rc = 0;
3757end:
adec5584 3758 return (rc);
b64eb60f
MW
3759}
3760
c81c35df
MW
3761/* --- @dump_buffer@ --- *
3762 *
3763 * Arguments: @const union tvec_regval *rv@ = register value
3764 * @const struct tvec_regdef *rd@ = register definition
3765 * @unsigned style@ = output style (@TVSF_...@)
3766 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3767 *
3768 * Returns: ---
3769 *
3770 * Use: Dump a register value to the format output.
3771 *
c4ccbbf9
MW
3772 * Buffer values are dumped as their size, with the alignment
3773 * quantum and alignment offset if these are non-default.
c81c35df
MW
3774 */
3775
b64eb60f
MW
3776static void dump_buffer(const union tvec_regval *rv,
3777 const struct tvec_regdef *rd,
e63124bc
MW
3778 unsigned style,
3779 const struct gprintf_ops *gops, void *go)
b64eb60f 3780{
b1a20bee 3781 if (style&TVSF_RAW) gprintf(gops, go, "buffer:");
adec5584
MW
3782 format_size(gops, go, rv->buf.sz, style);
3783 if (rv->buf.m) {
5c0f2e08 3784 gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "@" : " @ ");
adec5584
MW
3785 format_size(gops, go, rv->buf.m, style);
3786 if (rv->buf.a) {
5c0f2e08 3787 gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "+" : " + ");
adec5584
MW
3788 format_size(gops, go, rv->buf.a, style);
3789 }
3790 }
b1a20bee 3791 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
13ee7406 3792 gprintf(gops, go, " ; = %lu", (unsigned long)rv->buf.sz);
adec5584 3793 if (rv->buf.m) {
13ee7406
MW
3794 gprintf(gops, go, " @ %lu", (unsigned long)rv->buf.m);
3795 if (rv->buf.a) gprintf(gops, go, " + %lu", (unsigned long)rv->buf.a);
adec5584
MW
3796 }
3797 gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->buf.sz);
3798 if (rv->buf.m) {
3799 gprintf(gops, go, " @ "); format_unsigned_hex(gops, go, rv->buf.m);
3800 if (rv->buf.a) {
3801 gprintf(gops, go, " + ");
3802 format_unsigned_hex(gops, go, rv->buf.a);
3803 }
3804 }
b64eb60f
MW
3805 }
3806}
3807
c81c35df 3808/* Buffer type definition. */
b64eb60f 3809const struct tvec_regty tvty_buffer = {
adec5584 3810 init_buffer, release_buffer, eq_buffer,
b64eb60f
MW
3811 tobuf_buffer, frombuf_buffer,
3812 parse_buffer, dump_buffer
3813};
3814
adec5584
MW
3815/* --- @tvec_initbuffer@ --- *
3816 *
3817 * Arguments: @union tvec_regval *rv@ = register value
d056fbdf 3818 * @const union tvec_regval *ref@ = source buffer
adec5584
MW
3819 * @size_t sz@ = size to allocate
3820 *
3821 * Returns: ---
3822 *
d056fbdf 3823 * Use: Initialize the alignment parameters in @rv@ to match @ref@,
adec5584
MW
3824 * and the size to @sz@.
3825 */
3826
3827void tvec_initbuffer(union tvec_regval *rv,
d056fbdf
MW
3828 const union tvec_regval *ref, size_t sz)
3829 { rv->buf.sz = sz; rv->buf.a = ref->buf.a; rv->buf.m = ref->buf.m; }
adec5584
MW
3830
3831/* --- @tvec_allocbuffer@ --- *
3832 *
3833 * Arguments: @union tvec_regval *rv@ = register value
3834 *
3835 * Returns: ---
3836 *
3837 * Use: Allocate @sz@ bytes to the buffer and fill the space with a
3838 * distinctive pattern.
3839 */
3840
3841void tvec_allocbuffer(union tvec_regval *rv)
3842{
3843 unsigned char *p; size_t n;
3844
b1a20bee 3845 if (rv->buf.p) free(rv->buf.p - rv->buf.off);
adec5584
MW
3846
3847 if (rv->buf.m < 2) {
b1a20bee 3848 rv->buf.p = x_alloc(&arena_stdlib, rv->buf.sz); rv->buf.off = 0;
adec5584 3849 } else {
b1a20bee 3850 p = x_alloc(&arena_stdlib, rv->buf.sz + rv->buf.m - 1);
adec5584
MW
3851 n = (size_t)p%rv->buf.m;
3852 rv->buf.off = (rv->buf.a - n + rv->buf.m)%rv->buf.m;
3853 rv->buf.p = p + rv->buf.off;
3854 }
3855 memset(rv->buf.p, '?', rv->buf.sz);
3856}
3857
b64eb60f 3858/*----- That's all, folks -------------------------------------------------*/