@@@ fltfmt mess
[mLib] / test / tvec-types.c
CommitLineData
b64eb60f
MW
1/* -*-c-*-
2 *
3 * Types for the test-vector framework
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <assert.h>
31#include <ctype.h>
32#include <errno.h>
e63124bc 33#include <float.h>
b64eb60f 34#include <limits.h>
e63124bc 35#include <math.h>
b64eb60f
MW
36#include <stdio.h>
37#include <string.h>
38
39#include "buf.h"
40#include "codec.h"
41# include "base32.h"
42# include "base64.h"
43# include "hex.h"
44#include "dstr.h"
67b5031e 45#include "maths.h"
b1a20bee 46
b64eb60f 47#include "tvec.h"
b1a20bee
MW
48#include "tvec-adhoc.h"
49#include "tvec-types.h"
b64eb60f
MW
50
51/*----- Preliminary utilities ---------------------------------------------*/
52
67b5031e
MW
53/* --- @trivial_release@ --- *
54 *
55 * Arguments: @union tvec_regval *rv@ = a register value
56 * @const struct tvec_regdef@ = the register definition
57 *
58 * Returns: ---
59 *
60 * Use: Does nothing. Used for register values which don't retain
61 * resources.
62 */
3efcfd2d
MW
63
64static void trivial_release(union tvec_regval *rv,
65 const struct tvec_regdef *rd)
66 { ; }
67
67b5031e
MW
68/*----- Integer utilities -------------------------------------------------*/
69
70/* --- @unsigned_to_buf@, @signed_to_buf@ --- *
71 *
72 * Arguments: @buf *b@ = buffer to write on
73 * @unsigned long u@ or @long i@ = integer to write
74 *
75 * Returns: Zero on success, @-1@ on failure.
76 *
77 * Use: Write @i@ to the buffer, in big-endian (two's-complement, it
78 * signed) format.
79 */
80
81static int unsigned_to_buf(buf *b, unsigned long u)
82 { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); }
83
b64eb60f
MW
84static int signed_to_buf(buf *b, long i)
85{
86 kludge64 k;
87 unsigned long u;
88
89 u = i;
90 if (i >= 0) ASSIGN64(k, u);
91 else { ASSIGN64(k, ~u); CPL64(k, k); }
92 return (buf_putk64l(b, k));
93}
94
67b5031e
MW
95/* --- @unsigned_from_buf@, @signed_from_buf@ --- *
96 *
97 * Arguments: @buf *b@ = buffer to write on
98 * @unsigned long *u_out@ or @long *i_out@ = where to put the
99 * result
100 *
101 * Returns: Zero on success, @-1@ on failure.
102 *
103 * Use: Read an integer, in big-endian (two's-complement, if signed)
104 * format, from the buffer.
105 */
b64eb60f
MW
106
107static int unsigned_from_buf(buf *b, unsigned long *u_out)
108{
109 kludge64 k, ulmax;
110
111 ASSIGN64(ulmax, ULONG_MAX);
112 if (buf_getk64l(b, &k)) return (-1);
adec5584 113 if (CMP64(k, >, ulmax)) { buf_break(b); return (-1); }
b64eb60f
MW
114 *u_out = GET64(unsigned long, k); return (0);
115}
116
67b5031e
MW
117/* --- @hex_width@ --- *
118 *
119 * Arguments: @unsigned long u@ = an integer
120 *
121 * Returns: A suitable number of digits to use in order to display @u@ in
122 * hex. Currently, we select a power of two sufficient to show
123 * the value, but at least 2.
124 */
125
b64eb60f
MW
126static int hex_width(unsigned long u)
127{
128 int wd;
129 unsigned long t;
130
131 for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; );
132 return (wd/4);
133}
134
67b5031e
MW
135/* --- @format_unsigned_hex@, @format_signed_hex@ --- *
136 *
137 * Arguments: @const struct gprintf_ops *gops@ = print operations
138 * @void *go@ = print destination
139 * @unsigned long u@ or @long i@ = integer to print
140 *
141 * Returns: ---
142 *
143 * Use: Print an unsigned or signed integer in hexadecimal.
144 */
145
146static void format_unsigned_hex(const struct gprintf_ops *gops, void *go,
147 unsigned long u)
148 { gprintf(gops, go, "0x%0*lx", hex_width(u), u); }
149
150static void format_signed_hex(const struct gprintf_ops *gops, void *go,
151 long i)
152{
153 unsigned long u = i >= 0 ? i : -(unsigned long)i;
154 gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u);
155}
156
157static int signed_from_buf(buf *b, long *i_out)
158{
159 kludge64 k, lmax, not_lmin;
160
161 ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN);
162 if (buf_getk64l(b, &k)) return (-1);
163 if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k);
164 else {
165 CPL64(k, k);
166 if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1;
adec5584 167 else { buf_break(b); return (-1); }
67b5031e
MW
168 }
169 return (0);
170}
171
b1a20bee 172/* --- @check_signed_range@, @check_unsigned_range@ --- *
67b5031e 173 *
b1a20bee
MW
174 * Arguments: @long i@ or @unsigned long u@ = an integer
175 * @const struct tvec_irange *ir@ or
176 * @const struct tvec_urange *ur@ = range specification,
67b5031e
MW
177 * or null
178 * @struct tvec_state *tv@ = test vector state
c4ccbbf9 179 * @const char *what@ = description of value
67b5031e
MW
180 *
181 * Returns: Zero on success, or @-1@ on error.
182 *
183 * Use: Check that the integer is within bounds. If not, report a
184 * suitable error and return a failure indication.
185 */
186
882a39c1
MW
187static int check_signed_range(long i,
188 const struct tvec_irange *ir,
c4ccbbf9 189 struct tvec_state *tv, const char *what)
b64eb60f 190{
b1a20bee
MW
191 long ii, aa, m;
192
193 if (ir) {
194 if (ir->min > i || i > ir->max) {
195 tvec_error(tv, "%s %ld out of range (must be in [%ld .. %ld])",
196 what, i, ir->min, ir->max);
197 return (-1);
198 }
199 m = ir->m; if (m > 0) m = -m;
200 if (m && m != -1) {
201 /* Reduce both the integer and the intended residue to the canonical
202 * interval [0, m). This is more awkward than it should be because C
203 * (following CPU designs) adopted an unhelpful definition of integer
204 * division when the dividend is negative.
205 *
206 * Note that I've canonicalized the divisor to be %%\emph{negative}%%,
207 * because in two's-complement arithmetic, the absolute value of the
208 * most negative representable value is not itself representable. The
209 * residue modulo the most negative value will itself be representable.
210 */
211
212 ii = i%m; if (ii < 0) ii -= m;
213 aa = ir->a%m; if (aa < 0) aa -= m;
214 if (ii != aa) {
215 tvec_error(tv, "%s %ld == %ld =/= %ld (mod %ld)",
216 what, i, ii, ir->a, ir->m);
217 return (-1);
218 }
219 }
882a39c1
MW
220 }
221 return (0);
b64eb60f
MW
222}
223
882a39c1
MW
224static int check_unsigned_range(unsigned long u,
225 const struct tvec_urange *ur,
c4ccbbf9 226 struct tvec_state *tv, const char *what)
b64eb60f 227{
b1a20bee
MW
228 unsigned long uu;
229
230 if (ur) {
231 if (ur->min > u || u > ur->max) {
232 tvec_error(tv, "%s %lu out of range (must be in [%lu .. %lu])",
233 what, u, ur->min, ur->max);
234 return (-1);
235 }
236 if (ur->m && ur->m != 1) {
237 uu = u%ur->m;
238 if (uu != ur->a%ur->m) {
239 tvec_error(tv, "%s %lu == %lu =/= %lu (mod %lu)",
240 what, u, uu, ur->a, ur->m);
241 return (-1);
242 }
243 }
882a39c1
MW
244 }
245 return (0);
b64eb60f
MW
246}
247
67b5031e
MW
248/* --- @chtodig@ --- *
249 *
250 * Arguments: @int ch@ = a character
251 *
252 * Returns: The numeric value of the character as a digit, or @-1@ if
253 * it's not a digit. Letters count as extended digits starting
254 * with value 10; case is not significant.
255 */
256
3efcfd2d
MW
257static int chtodig(int ch)
258{
259 if ('0' <= ch && ch <= '9') return (ch - '0');
260 else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10);
261 else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10);
262 else return (-1);
263}
264
67b5031e
MW
265/* --- @parse_unsigned_integer@, @parse_signed_integer@ --- *
266 *
267 * Arguments: @unsigned long *u_out@, @long *i_out@ = where to put the
268 * result
269 * @const char **q_out@ = where to put the end position
270 * @const char *p@ = pointer to the string to parse
271 *
272 * Returns: Zero on success, @-1@ on error.
273 *
274 * Use: Parse an integer from a string in the test-vector format.
275 * This is mostly extension of the traditional C @strtoul@
276 * format: supported inputs include:
277 *
278 * * NNN -- a decimal number (even if it starts with `0');
279 * * 0xNNN -- hexadecimal;
280 * * 0oNNN -- octal;
281 * * 0bNNN -- binary;
282 * * NNrNNN -- base NN.
283 *
284 * Furthermore, single underscores are permitted internally as
285 * an insignificant digit separator.
286 */
287
3efcfd2d
MW
288static int parse_unsigned_integer(unsigned long *u_out, const char **q_out,
289 const char *p)
290{
291 unsigned long u;
292 int ch, d, r;
293 const char *q;
294 unsigned f = 0;
67b5031e
MW
295#define f_implicit 1u /* implicitly reading base 10 */
296#define f_digit 2u /* read a real digit */
297#define f_uscore 4u /* found an underscore */
298
299 /* Initial setup
300 *
301 * This will deal with the traditional `0[box]...' prefixes. We'll leave
302 * our new `NNr...' syntax for later.
303 */
3efcfd2d
MW
304 if (p[0] != '0' || !p[1]) {
305 d = chtodig(*p); if (0 > d || d >= 10) return (-1);
306 r = 10; u = d; p++; f |= f_implicit | f_digit;
307 } else {
308 u = 0; d = chtodig(p[2]);
309 if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; }
310 else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; }
311 else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; }
312 else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; }
313 else { r = 10; f |= f_digit; p++; }
314 }
315
316 q = p;
317 for (;;) {
67b5031e
MW
318 /* Work through the string a character at a time. */
319
320 ch = *p; switch (ch) {
321
322 case '_':
323 /* An underscore is OK if we haven't just seen one. */
324
325 if (f&f_uscore) goto done;
326 p++; f = (f&~f_implicit) | f_uscore;
327 break;
328
329 case 'r': case 'R':
330 /* An `r' is OK if the number so far is small enough to be a sensible
331 * base, and we're scanning decimal implicitly.
332 */
333
334 if (!(f&f_implicit) || !u || u >= 36) goto done;
335 d = chtodig(p[1]); if (0 > d || d >= u) goto done;
336 r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p;
337 break;
338
339 default:
340 /* Otherwise we expect a valid digit and accumulate it. */
341 d = chtodig(ch); if (d < 0 || d >= r) goto done;
342 if (u > ULONG_MAX/r) return (-1);
343 u *= r; if (u > ULONG_MAX - d) return (-1);
344 u += d; f = (f&~f_uscore) | f_digit; p++; q = p;
345 break;
3efcfd2d
MW
346 }
347 }
348
67b5031e 349done:
3efcfd2d
MW
350 if (!(f&f_digit)) return (-1);
351 *u_out = u; *q_out = q; return (0);
352
353#undef f_implicit
354#undef f_digit
355#undef f_uscore
356}
357
358static int parse_signed_integer(long *i_out, const char **q_out,
359 const char *p)
360{
361 unsigned long u;
362 unsigned f = 0;
363#define f_neg 1u
364
67b5031e 365 /* Read an initial sign. */
3efcfd2d
MW
366 if (*p == '+') p++;
367 else if (*p == '-') { f |= f_neg; p++; }
368
67b5031e 369 /* Scan an unsigned number. */
3efcfd2d
MW
370 if (parse_unsigned_integer(&u, q_out, p)) return (-1);
371
67b5031e 372 /* Check for signed overflow and apply the sign. */
3efcfd2d
MW
373 if (!(f&f_neg)) {
374 if (u > LONG_MAX) return (-1);
375 *i_out = u;
376 } else {
377 if (u && u - 1 > -(LONG_MIN + 1)) return (-1);
378 *i_out = u ? -(long)(u - 1) - 1 : 0;
379 }
380
381 return (0);
382
383#undef f_neg
384}
385
67b5031e
MW
386/* --- @parse_unsigned@, @parse_signed@ --- *
387 *
388 * Arguments: @unsigned long *u_out@ or @long *i_out@ = where to put the
389 * result
390 * @const char *p@ = string to parse
391 * @const struct tvec_urange *ur@ or
392 * @const struct tvec_irange *ir@ = range specification,
393 * or null
394 * @struct tvec_state *tv@ = test vector state
395 *
396 * Returns: Zero on success, @-1@ on error.
397 *
398 * Use: Parse and range-check an integer. Unlike @parse_(un)signed_
399 * integer@, these functions check that there's no cruft
400 * following the final digit, and report errors as they find
401 * them rather than leaving that to the caller.
402 */
403
404static int parse_unsigned(unsigned long *u_out, const char *p,
405 const struct tvec_urange *ur,
406 struct tvec_state *tv)
407{
408 unsigned long u;
409 const char *q;
410
411 if (parse_unsigned_integer(&u, &q, p))
412 return (tvec_error(tv, "invalid unsigned integer `%s'", p));
413 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
c4ccbbf9 414 if (check_unsigned_range(u, ur, tv, "integer")) return (-1);
67b5031e
MW
415 *u_out = u; return (0);
416}
417
882a39c1
MW
418static int parse_signed(long *i_out, const char *p,
419 const struct tvec_irange *ir,
420 struct tvec_state *tv)
b64eb60f 421{
b64eb60f 422 long i;
3efcfd2d 423 const char *q;
b64eb60f 424
3efcfd2d
MW
425 if (parse_signed_integer(&i, &q, p))
426 return (tvec_error(tv, "invalid signed integer `%s'", p));
427 if (*q) return (tvec_syntax(tv, *q, "end-of-line"));
c4ccbbf9 428 if (check_signed_range(i, ir, tv, "integer")) return (-1);
3efcfd2d 429 *i_out = i; return (0);
b64eb60f 430}
adec5584
MW
431static const char size_units[] = "kMGTPEZY";
432
c4ccbbf9 433/* --- @parse_szint@ --- *
adec5584
MW
434 *
435 * Arguments: @struct tvec_state *tv@ = test-vector state
c4ccbbf9 436 * @unsigned long *u_out@ = where to put the answer
adec5584
MW
437 * @const char *delims@ = delimiters
438 * @const char *what@ = description of what we're parsing
439 *
440 * Returns: Zero on success, %$-1$% on failure.
441 *
442 * Use: Parse a memory size.
443 */
444
c4ccbbf9
MW
445static int parse_szint(struct tvec_state *tv, unsigned long *u_out,
446 const char *delims, const char *what)
adec5584
MW
447{
448 dstr d = DSTR_INIT;
449 const char *p, *unit;
450 unsigned long u, t;
451 int rc;
452 unsigned f = 0;
453#define f_range 1u
454
455 if (tvec_readword(tv, &d, 0, delims, what)) { rc = -1; goto end; }
456 p = d.buf;
457 if (parse_unsigned_integer(&u, &p, p)) goto bad;
458 if (!*p) tvec_readword(tv, &d, &p, delims, 0);
459
adec5584 460 for (t = u, unit = size_units; *unit; unit++) {
c4ccbbf9 461 if (t > ULONG_MAX/1024) f |= f_range;
adec5584
MW
462 else t *= 1024;
463 if (*p == *unit) {
464 if (f&f_range) goto rangerr;
465 u = t; p++; break;
466 }
467 }
468 if (*p == 'B') p++;
469 if (*p) goto bad;
470
471 *u_out = u; rc = 0;
472end:
473 dstr_destroy(&d);
474 return (rc);
475
476bad:
477 tvec_error(tv, "invalid %s `%s'", what, d.buf);
478 rc = -1; goto end;
479
480rangerr:
481 tvec_error(tv, "%s `%s' out of range", what, d.buf);
482 rc = -1; goto end;
483
484#undef f_range
485}
486
487/* --- @format_size@ --- *
488 *
489 * Arguments: @const struct gprintf_ops *gops@ = print operations
490 * @void *go@ = print destination
491 * @unsigned long u@ = a size
492 * @unsigned style@ = style (@TVSF_...@)
493 *
494 * Returns: ---
495 *
496 * Use: Format @u@ as a size in bytes to the destination, expressing
497 * it with a unit prefix if this is possible exactly.
498 */
499
500static void format_size(const struct gprintf_ops *gops, void *go,
501 unsigned long u, unsigned style)
502{
503 const char *unit;
504
5c0f2e08
MW
505 if (style&TVSF_RAW)
506 gprintf(gops, go, "%lu", u);
507 else if (!u || u%1024)
adec5584
MW
508 gprintf(gops, go, "%lu%sB", u, style&TVSF_COMPACT ? "" : " ");
509 else {
510 for (unit = size_units, u /= 1024;
511 !(u%1024) && unit[1];
512 u /= 1024, unit++);
513 gprintf(gops, go, "%lu%s%cB", u, style&TVSF_COMPACT ? "" : " ", *unit);
514 }
515}
b64eb60f 516
67b5031e 517/*----- Floating-point utilities ------------------------------------------*/
b64eb60f 518
67b5031e
MW
519/* --- @eqish_floating_p@ --- *
520 *
521 * Arguments: @double x, y@ = two numbers to compare
522 * @const struct tvec_floatinfo *fi@ = floating-point info
523 *
c4ccbbf9
MW
524 * Returns: Nonzero if the comparand @x@ is sufficiently close to the
525 * reference @y@, or zero if it's definitely different.
67b5031e 526 */
3efcfd2d 527
67b5031e
MW
528static int eqish_floating_p(double x, double y,
529 const struct tvec_floatinfo *fi)
3efcfd2d 530{
67b5031e
MW
531 double t;
532
533 if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0);
534 if (INFP(x)) return (x == y); else if (INFP(y)) return (0);
535
536 switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) {
537 case TVFF_EXACT:
538 return (x == y && NEGP(x) == NEGP(y));
539 case TVFF_ABSDELTA:
540 t = x - y; if (t < 0) t = -t; return (t < fi->delta);
541 case TVFF_RELDELTA:
c4ccbbf9 542 t = 1.0 - x/y; if (t < 0) t = -t; return (t < fi->delta);
67b5031e
MW
543 default:
544 abort();
545 }
b64eb60f
MW
546}
547
67b5031e
MW
548/* --- @format_floating@ --- *
549 *
550 * Arguments: @const struct gprintf_ops *gops@ = print operations
551 * @void *go@ = print destination
552 * @double x@ = number to print
553 *
554 * Returns: ---
555 *
556 * Use: Print a floating-point number, accurately.
557 */
3efcfd2d 558
e63124bc
MW
559static void format_floating(const struct gprintf_ops *gops, void *go,
560 double x)
561{
562 int prec;
563
564 if (NANP(x))
565 gprintf(gops, go, "#nan");
566 else if (INFP(x))
567 gprintf(gops, go, x > 0 ? "#+inf" : "#-inf");
568 else {
569 /* Ugh. C doesn't provide any function for just printing a
570 * floating-point number /correctly/, i.e., so that you can read the
571 * result back and recover the number you first thought of. There are
572 * complicated algorithms published for doing this, but I really don't
573 * want to get into that here. So we have this.
574 *
575 * The sign doesn't cause significant difficulty so we're going to ignore
576 * it for now. So suppose we're given a number %$x = f b^e$%, in
577 * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with
578 * %$0 \le f < 1$%. We're going to convert it into the nearest integer
579 * of the form %$X = F B^E$%, with similar conditions, only with the
580 * additional requirement that %$X$% is normalized, i.e., that %$X = 0$%
581 * or %$F \ge B^{-N}$%.
582 *
583 * We're rounding to the nearest such %$X$%. If there is to be ambiguity
584 * in the conversion, then some %$x = f b^e$% and the next smallest
585 * representable number %$x' = x + b^{e-n}$% must both map to the same
586 * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than
587 * any other number representable in the target system. The nest larger
588 * number is %$X' = X + B^{E-N}$%; the next smaller number will normally
589 * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number
590 * is actually %$X - B^{E-N-1}$%. We ignore this latter possibility in
591 * the pursuit of a conservative estimate (though actually it doesn't
592 * matter).
593 *
594 * If both %$x$% and %$x'$% map to %$X$% then we must have
595 * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%;
596 * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%,
597 * and secondly %$b^{e-n} \le B^{E-N}$%. Since these inequalities are in
598 * opposite senses, we can divide, giving
599 *
600 * %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% ,
601 *
602 * whence
603 *
604 * %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% .
605 *
606 * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be
607 * possible, it must be the case that
608 *
609 * %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% .
610 *
611 * Then rearrange and take logarithms, obtaining
612 *
613 * %$(N - 1) \log B < n \log b$% ,
614 *
615 * and so
616 *
617 * %$N < n \log b/\log B + 1$% .
618 *
619 * Recall that this is a necessary condition for a collision to occur; we
620 * are therefore safe whenever
621 *
622 * %$N \ge n \log b/\log B + 1$% ;
623 *
624 * so, taking ceilings,
625 *
626 * %$N \ge \lceil n \log b/\log B \rceil + 1$% .
627 *
628 * So that's why we have this.
629 *
b1a20bee
MW
630 * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small
631 * that we can calculate this without ending up on the wrong side of an
e63124bc
MW
632 * integer boundary.
633 *
b1a20bee
MW
634 * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value
635 * only as a constant. Except that modern compilers are more than clever
e63124bc
MW
636 * enough to work out that this is a constant anyway.
637 *
638 * This is sometimes an overestimate: we'll print out meaningless digits
639 * that don't represent anything we actually know about the number in
640 * question. To fix that, we'd need a complicated algorithm like Steele
641 * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm
642 * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to
643 * something else in difficult situations).
644 */
645
b1a20bee
MW
646#ifdef DBL_DECIMAL_DIG
647 prec = DBL_DECIMAL_DIG;
648#else
e63124bc 649 prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1;
b1a20bee 650#endif
e63124bc
MW
651 gprintf(gops, go, "%.*g", prec, x);
652 }
653}
654
67b5031e
MW
655/* --- @parse_floating@ --- *
656 *
657 * Arguments: @double *x_out@ = where to put the result
814e42ff 658 * @const char *q_out@ = where to leave end pointer, or null
67b5031e
MW
659 * @const char *p@ = string to parse
660 * @const struct tvec_floatinfo *fi@ = floating-point info
661 * @struct tvec_state *tv@ = test vector state
662 *
663 * Returns: Zero on success, @-1@ on error.
664 *
665 * Use: Parse a floating-point number from a string. Reports any
814e42ff 666 * necessary errors. If @q_out@ is not null then trailing
adec5584
MW
667 * material is permitted and a pointer to it (or the end of the
668 * string) is left in @*q_out@.
67b5031e 669 */
e63124bc 670
814e42ff 671static int parse_floating(double *x_out, const char **q_out, const char *p,
e63124bc
MW
672 const struct tvec_floatinfo *fi,
673 struct tvec_state *tv)
674{
675 const char *pp; char *q;
676 dstr d = DSTR_INIT;
677 double x;
678 int olderr, rc;
679
67b5031e 680 /* Check for special tokens. */
e63124bc
MW
681 if (STRCMP(p, ==, "#nan")) {
682#ifdef NAN
adec5584 683 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
684 x = NAN; rc = 0;
685#else
686 tvec_error(tv, "NaN not supported on this system");
687 rc = -1; goto end;
688#endif
67b5031e
MW
689 }
690
691 else if (STRCMP(p, ==, "#inf") ||
692 STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) {
3efcfd2d 693#ifdef INFINITY
adec5584 694 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
695 x = INFINITY; rc = 0;
696#else
697 tvec_error(tv, "infinity not supported on this system");
698 rc = -1; goto end;
699#endif
67b5031e
MW
700 }
701
702 else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) {
3efcfd2d 703#ifdef INFINITY
adec5584 704 if (q_out) *q_out = p + strlen(p);
e63124bc
MW
705 x = -INFINITY; rc = 0;
706#else
707 tvec_error(tv, "infinity not supported on this system");
708 rc = -1; goto end;
709#endif
67b5031e
MW
710 }
711
712 /* Check that this looks like a number, so we can exclude `strtod'
713 * recognizing its own non-finite number tokens.
714 */
715 else {
e63124bc
MW
716 pp = p;
717 if (*pp == '+' || *pp == '-') pp++;
718 if (*pp == '.') pp++;
719 if (!ISDIGIT(*pp)) {
3efcfd2d 720 tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number");
e63124bc
MW
721 rc = -1; goto end;
722 }
67b5031e
MW
723
724 /* Parse the number using the system parser. */
e63124bc 725 olderr = errno; errno = 0;
b1a20bee 726#if __STDC_VERSION__ >= 199901
e63124bc 727 x = strtod(p, &q);
b1a20bee
MW
728#else
729 x = strtold(p, &q);
730#endif
adec5584
MW
731 if (q_out) *q_out = q;
732 else if (*q) { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; }
e63124bc 733 if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) {
814e42ff
MW
734 tvec_error(tv, "invalid floating-point number `%.*s': %s",
735 (int)(q - p), p, strerror(errno));
e63124bc
MW
736 rc = -1; goto end;
737 }
738 errno = olderr;
739 }
740
67b5031e 741 /* Check that the number is acceptable. */
e63124bc
MW
742 if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) {
743 tvec_error(tv, "#nan not allowed here");
744 rc = -1; goto end;
745 }
67b5031e 746
b1a20bee
MW
747 if (fi &&
748 ((!(fi->f&TVFF_NOMIN) && x < fi->min) ||
749 (!(fi->f&TVFF_NOMAX) && x > fi->max)) &&
750 !(INFP(x) && (fi->f&(NEGP(x) ? TVFF_NEGINFOK : TVFF_POSINFOK)))) {
e63124bc
MW
751 dstr_puts(&d, "floating-point number ");
752 format_floating(&dstr_printops, &d, x);
753 dstr_puts(&d, " out of range (must be in ");
754 if (fi->f&TVFF_NOMIN)
755 dstr_puts(&d, "(#-inf");
756 else
757 { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); }
758 dstr_puts(&d, " .. ");
759 if (fi->f&TVFF_NOMAX)
760 dstr_puts(&d, "#+inf)");
761 else
762 { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); }
763 dstr_putc(&d, ')'); dstr_putz(&d);
764 tvec_error(tv, "%s", d.buf); rc = -1; goto end;
765 }
766
67b5031e
MW
767 /* All done. */
768 *x_out = x; rc = 0;
769end:
770 dstr_destroy(&d);
771 return (rc);
772}
773
774/*----- String utilities --------------------------------------------------*/
775
776/* Special character name table. */
777static const struct chartab {
778 const char *name; /* character name */
779 int ch; /* character value */
780 unsigned f; /* flags: */
781#define CTF_PREFER 1u /* preferred name */
782#define CTF_SHORT 2u /* short name (compact style) */
783} chartab[] = {
784 { "#eof", EOF, CTF_PREFER | CTF_SHORT },
785 { "#nul", '\0', CTF_PREFER },
786 { "#bell", '\a', CTF_PREFER },
787 { "#ding", '\a', 0 },
788 { "#bel", '\a', CTF_SHORT },
789 { "#backspace", '\b', CTF_PREFER },
790 { "#bs", '\b', CTF_SHORT },
791 { "#escape", '\x1b', CTF_PREFER },
792 { "#esc", '\x1b', CTF_SHORT },
793 { "#formfeed", '\f', CTF_PREFER },
794 { "#ff", '\f', CTF_SHORT },
795 { "#newline", '\n', CTF_PREFER },
796 { "#linefeed", '\n', 0 },
797 { "#lf", '\n', CTF_SHORT },
798 { "#nl", '\n', 0 },
799 { "#return", '\r', CTF_PREFER },
800 { "#carriage-return", '\r', 0 },
801 { "#cr", '\r', CTF_SHORT },
802 { "#tab", '\t', CTF_PREFER | CTF_SHORT },
803 { "#horizontal-tab", '\t', 0 },
804 { "#ht", '\t', 0 },
805 { "#vertical-tab", '\v', CTF_PREFER },
806 { "#vt", '\v', CTF_SHORT },
807 { "#space", ' ', 0 },
808 { "#spc", ' ', CTF_SHORT },
809 { "#delete", '\x7f', CTF_PREFER },
810 { "#del", '\x7f', CTF_SHORT },
811 { 0, 0, 0 }
812};
813
814/* --- @find_charname@ --- *
815 *
816 * Arguments: @int ch@ = character to match
817 * @unsigned f@ = flags (@CTF_...@) to match
818 *
819 * Returns: The name of the character, or null if no match is found.
820 *
821 * Use: Looks up a name for a character. Specifically, it returns
822 * the first entry in the @chartab@ table which matches @ch@ and
823 * which has one of the flags @f@ set.
824 */
825
826static const char *find_charname(int ch, unsigned f)
827{
828 const struct chartab *ct;
829
830 for (ct = chartab; ct->name; ct++)
831 if (ct->ch == ch && (ct->f&f)) return (ct->name);
832 return (0);
833}
834
835/* --- @read_charname@ --- *
836 *
837 * Arguments: @int *ch_out@ = where to put the character
838 * @const char *p@ = character name
839 * @unsigned f@ = flags (@TCF_...@)
840 *
841 * Returns: Zero if a match was found, @-1@ if not.
842 *
843 * Use: Looks up a character by name. If @RCF_EOFOK@ is set in @f@,
844 * then the @EOF@ marker can be matched; otherwise it can't.
845 */
846
847#define RCF_EOFOK 1u
848static int read_charname(int *ch_out, const char *p, unsigned f)
849{
850 const struct chartab *ct;
851
852 for (ct = chartab; ct->name; ct++)
853 if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0))
854 { *ch_out = ct->ch; return (0); }
855 return (-1);
856}
857
858/* --- @format_charesc@ --- *
859 *
860 * Arguments: @const struct gprintf_ops *gops@ = print operations
861 * @void *go@ = print destination
862 * @int ch@ = character to format
863 * @unsigned f@ = flags (@FCF_...@)
864 *
865 * Returns: ---
866 *
867 * Use: Format a character as an escape sequence, possibly as part of
868 * a larger string. If @FCF_BRACE@ is set in @f@, then put
869 * braces around a `\x...' code, so that it's suitable for use
870 * in a longer string.
871 */
872
873#define FCF_BRACE 1u
874static void format_charesc(const struct gprintf_ops *gops, void *go,
875 int ch, unsigned f)
876{
877 switch (ch) {
878 case '\a': gprintf(gops, go, "\\a"); break;
879 case '\b': gprintf(gops, go, "\\b"); break;
880 case '\x1b': gprintf(gops, go, "\\e"); break;
881 case '\f': gprintf(gops, go, "\\f"); break;
882 case '\r': gprintf(gops, go, "\\r"); break;
883 case '\n': gprintf(gops, go, "\\n"); break;
884 case '\t': gprintf(gops, go, "\\t"); break;
885 case '\v': gprintf(gops, go, "\\v"); break;
886 case '\\': gprintf(gops, go, "\\\\"); break;
887 case '\'': gprintf(gops, go, "\\'"); break;
888 case '\0':
889 if (f&FCF_BRACE) gprintf(gops, go, "\\{0}");
890 else gprintf(gops, go, "\\0");
891 break;
892 default:
893 if (f&FCF_BRACE)
894 gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch);
895 else
896 gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch);
897 break;
898 }
899}
900
901/* --- @format_char@ --- *
902 *
903 * Arguments: @const struct gprintf_ops *gops@ = print operations
904 * @void *go@ = print destination
905 * @int ch@ = character to format
906 *
907 * Returns: ---
908 *
909 * Use: Format a single character.
910 */
911
912static void format_char(const struct gprintf_ops *gops, void *go, int ch)
913{
914 switch (ch) {
915 case '\\': case '\'': escape:
916 gprintf(gops, go, "'");
917 format_charesc(gops, go, ch, 0);
918 gprintf(gops, go, "'");
919 break;
920 default:
921 if (!isprint(ch)) goto escape;
922 gprintf(gops, go, "'%c'", ch);
923 break;
924 }
925}
926
b1a20bee
MW
927/* --- @fill_pattern@ --- *
928 *
929 * Arguments: @void *p@ = destination pointer
930 * @size_t sz@ = destination buffer size
931 * @const void *pat@ = pointer to pattern
932 * @size_t patsz@ = pattern size
933 *
934 * Returns: ---
935 *
936 * Use: Fill the destination buffer with as many copies of the
937 * pattern as will fit, followed by as many initial bytes of the
938 * pattern will fit in the remaining space.
939 */
940
941static void fill_pattern(void *p, size_t sz, const void *pat, size_t patsz)
942{
943 unsigned char *q = p;
944
945 if (patsz == 1)
946 memset(q, *(unsigned char *)pat, sz);
947 else {
948 if (sz > patsz) {
949 memcpy(q, pat, patsz); pat = q; q += patsz; sz -= patsz;
950 while (sz > patsz)
951 { memcpy(q, pat, patsz); q += patsz; sz -= patsz; patsz *= 2; }
952 }
953 memcpy(q, pat, sz);
954 }
955}
956
67b5031e
MW
957/* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- *
958 *
959 * Arguments: @const struct gprintf_ops *gops@ = print operations
960 * @void *go@ = print destination
961 * @unsigned long u@ or @long i@ = an integer
962 *
963 * Returns: ---
964 *
965 * Use: Format a (signed or unsigned) integer as a character, if it's
966 * in range, printing something like `= 'q''. It's assumed that
967 * a comment marker has already been output.
968 */
969
970static void maybe_format_unsigned_char
971 (const struct gprintf_ops *gops, void *go, unsigned long u)
972{
973 const char *p;
974
975 p = find_charname(u, CTF_PREFER);
976 if (p) gprintf(gops, go, " = %s", p);
977 if (u < UCHAR_MAX)
978 { gprintf(gops, go, " = "); format_char(gops, go, u); }
e63124bc
MW
979}
980
67b5031e
MW
981static void maybe_format_signed_char
982 (const struct gprintf_ops *gops, void *go, long i)
b64eb60f 983{
67b5031e
MW
984 const char *p;
985
986 p = find_charname(i, CTF_PREFER);
987 if (p) gprintf(gops, go, " = %s", p);
988 if (0 <= i && i < UCHAR_MAX)
989 { gprintf(gops, go, " = "); format_char(gops, go, i); }
b64eb60f
MW
990}
991
67b5031e
MW
992/* --- @read_charesc@ --- *
993 *
994 * Arguments: @int *ch_out@ = where to put the result
995 * @struct tvec_state *tv@ = test vector state
996 *
997 * Returns: Zero on success, @-1@ on error.
998 *
999 * Use: Parse and convert an escape sequence from @tv@'s input
1000 * stream, assuming that the initial `\' has already been read.
1001 * Reports errors as appropriate.
1002 */
1003
1004static int read_charesc(int *ch_out, struct tvec_state *tv)
b64eb60f 1005{
b64eb60f
MW
1006 int ch, i, esc;
1007 unsigned f = 0;
1008#define f_brace 1u
1009
e63124bc
MW
1010 ch = getc(tv->fp);
1011 switch (ch) {
67b5031e
MW
1012
1013 /* Things we shouldn't find. */
1014 case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape"));
1015
1016 /* Single-character escapes. */
e63124bc
MW
1017 case '\'': *ch_out = '\''; break;
1018 case '\\': *ch_out = '\\'; break;
1019 case '"': *ch_out = '"'; break;
1020 case 'a': *ch_out = '\a'; break;
1021 case 'b': *ch_out = '\b'; break;
1022 case 'e': *ch_out = '\x1b'; break;
1023 case 'f': *ch_out = '\f'; break;
1024 case 'n': *ch_out = '\n'; break;
1025 case 'r': *ch_out = '\r'; break;
1026 case 't': *ch_out = '\t'; break;
1027 case 'v': *ch_out = '\v'; break;
1028
67b5031e 1029 /* Hex escapes, with and without braces. */
e63124bc
MW
1030 case 'x':
1031 ch = getc(tv->fp);
1032 if (ch == '{') { f |= f_brace; ch = getc(tv->fp); }
1033 else f &= ~f_brace;
67b5031e
MW
1034 esc = chtodig(ch);
1035 if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit"));
e63124bc 1036 for (;;) {
67b5031e
MW
1037 ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break;
1038 esc = 16*esc + i;
e63124bc
MW
1039 if (esc > UCHAR_MAX)
1040 return (tvec_error(tv,
1041 "character code %d out of range", esc));
1042 }
1043 if (!(f&f_brace)) ungetc(ch, tv->fp);
1044 else if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1045 *ch_out = esc;
1046 break;
1047
67b5031e
MW
1048 /* Other things, primarily octal escapes. */
1049 case '{':
1050 f |= f_brace; ch = getc(tv->fp);
1051 /* fall through */
e63124bc
MW
1052 default:
1053 if ('0' <= ch && ch < '8') {
1054 i = 1; esc = ch - '0';
1055 for (;;) {
1056 ch = getc(tv->fp);
1057 if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; }
1058 esc = 8*esc + ch - '0';
1059 i++; if (i >= 3) break;
1060 }
67b5031e
MW
1061 if (f&f_brace) {
1062 ch = getc(tv->fp);
1063 if (ch != '}') return (tvec_syntax(tv, ch, "`}'"));
1064 }
e63124bc
MW
1065 if (esc > UCHAR_MAX)
1066 return (tvec_error(tv,
1067 "character code %d out of range", esc));
67b5031e 1068 *ch_out = esc; break;
e63124bc
MW
1069 } else
1070 return (tvec_syntax(tv, ch, "string escape"));
1071 }
1072
67b5031e 1073 /* Done. */
e63124bc
MW
1074 return (0);
1075
1076#undef f_brace
1077}
1078
67b5031e
MW
1079/* --- @read_quoted_string@ --- *
1080 *
1081 * Arguments: @dstr *d@ = string to write to
1082 * @int quote@ = initial quote, `'' or `"'
1083 * @struct tvec_state *tv@ = test vector state
1084 *
1085 * Returns: Zero on success, @-1@ on error.
1086 *
1087 * Use: Read the rest of a quoted string into @d@, reporting errors
1088 * as appropriate.
1089 *
1090 * A single-quoted string is entirely literal. A double-quoted
1091 * string may contain C-like escapes.
1092 */
1093
e63124bc
MW
1094static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv)
1095{
1096 int ch;
b64eb60f
MW
1097
1098 for (;;) {
1099 ch = getc(tv->fp);
b64eb60f
MW
1100 switch (ch) {
1101 case EOF: case '\n':
e63124bc 1102 return (tvec_syntax(tv, ch, "`%c'", quote));
b64eb60f
MW
1103 case '\\':
1104 if (quote == '\'') goto ordinary;
e63124bc 1105 ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; }
67b5031e 1106 ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1);
e63124bc 1107 goto ordinary;
b64eb60f
MW
1108 default:
1109 if (ch == quote) goto end;
1110 ordinary:
1111 DPUTC(d, ch);
1112 break;
1113 }
1114 }
1115
1116end:
1117 DPUTZ(d);
882a39c1 1118 return (0);
e63124bc 1119}
b64eb60f 1120
67b5031e
MW
1121/* --- @collect_bare@ --- *
1122 *
1123 * Arguments: @dstr *d@ = string to write to
1124 * @struct tvec_state *tv@ = test vector state
1125 *
1126 * Returns: Zero on success, @-1@ on error.
1127 *
1128 * Use: Read barewords and the whitespace between them. Stop when we
1129 * encounter something which can't start a bareword.
1130 */
b64eb60f
MW
1131
1132static int collect_bare(dstr *d, struct tvec_state *tv)
1133{
1134 size_t pos = d->len;
1135 enum { WORD, SPACE, ESCAPE }; unsigned s = WORD;
1136 int ch, rc;
1137
1138 for (;;) {
1139 ch = getc(tv->fp);
1140 switch (ch) {
1141 case EOF:
882a39c1
MW
1142 tvec_syntax(tv, ch, "bareword");
1143 rc = -1; goto end;
b64eb60f
MW
1144 case '\n':
1145 if (s == ESCAPE) { tv->lno++; goto addch; }
1146 if (s == WORD) pos = d->len;
882a39c1 1147 ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; }
b64eb60f
MW
1148 DPUTC(d, ' '); s = SPACE;
1149 break;
67b5031e 1150 case '"': case '\'': case '!': case '#': case ')': case '}': case ']':
882a39c1 1151 if (s == SPACE) { ungetc(ch, tv->fp); goto done; }
b64eb60f
MW
1152 goto addch;
1153 case '\\':
1154 s = ESCAPE;
1155 break;
1156 default:
1157 if (s != ESCAPE && isspace(ch)) {
1158 if (s == WORD) pos = d->len;
1159 DPUTC(d, ch); s = SPACE;
1160 break;
1161 }
1162 addch:
1163 DPUTC(d, ch); s = WORD;
1164 }
1165 }
1166
1167done:
1168 if (s == SPACE) d->len = pos;
882a39c1
MW
1169 DPUTZ(d); rc = 0;
1170end:
1171 return (rc);
b64eb60f
MW
1172}
1173
67b5031e
MW
1174/* --- @set_up_encoding@ --- *
1175 *
1176 * Arguments: @const codec_class **ccl_out@ = where to put the class
1177 * @unsigned *f_out@ = where to put the flags
1178 * @unsigned code@ = the coding scheme to use (@TVEC_...@)
1179 *
1180 * Returns: ---
1181 *
1182 * Use: Helper for @read_compound_string@ below.
1183 *
1184 * Return the appropriate codec class and flags for @code@.
1185 * Leaves @*ccl_out@ null if the coding scheme doesn't have a
1186 * backing codec class (e.g., @TVCODE_BARE@).
1187 */
1188
1189enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 };
b64eb60f
MW
1190static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out,
1191 unsigned code)
1192{
1193 switch (code) {
1194 case TVCODE_BARE:
1195 *ccl_out = 0; *f_out = 0;
1196 break;
1197 case TVCODE_HEX:
1198 *ccl_out = &hex_class; *f_out = CDCF_IGNCASE;
1199 break;
1200 case TVCODE_BASE32:
1201 *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD;
1202 break;
1203 case TVCODE_BASE64:
1204 *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD;
1205 break;
1206 default:
1207 abort();
1208 }
1209}
1210
67b5031e
MW
1211/* --- @flush_codec@ --- *
1212 *
1213 * Arguments: @codec *cdc@ = a codec, or null
1214 * @dstr *d@ = output string
1215 * @struct tvec_state *tv@ = test vector state
1216 *
1217 * Returns: Zero on success, @-1@ on error.
1218 *
1219 * Use: Helper for @read_compound_string@ below.
1220 *
1221 * Flush out any final buffered material from @cdc@, and check
1222 * that it's in a good state. Frees the codec on success. Does
1223 * nothing if @cdc@ is null.
1224 */
1225
1226static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv)
1227{
1228 int err;
1229
1230 if (cdc) {
1231 err = cdc->ops->code(cdc, 0, 0, d);
1232 if (err)
1233 return (tvec_error(tv, "invalid %s sequence end: %s",
1234 cdc->ops->c->name, codec_strerror(err)));
1235 cdc->ops->destroy(cdc);
1236 }
1237 return (0);
1238}
1239
1240/* --- @read_compound_string@ --- *
1241 *
1242 * Arguments: @void **p_inout@ = address of output buffer pointer
1243 * @size_t *sz_inout@ = address of buffer size
1244 * @unsigned code@ = initial interpretation of barewords
1245 * @unsigned f@ = other flags (@RCSF_...@)
1246 * @struct tvec_state *tv@ = test vector state
1247 *
1248 * Returns: Zero on success, @-1@ on error.
1249 *
1250 * Use: Parse a compound string, i.e., a sequence of stringish pieces
1251 * which might be quoted strings, character names, or barewords
1252 * to be decoded accoding to @code@, interspersed with
1253 * additional directives.
1254 *
1255 * If the initial buffer pointer is non-null and sufficiently
1256 * large, then it will be reused; otherwise, it is freed and a
1257 * fresh, sufficiently large buffer is allocated and returned.
b1a20bee 1258 * This buffer unconditionally uses the standard-library arena.
67b5031e
MW
1259 */
1260
1261#define RCSF_NESTED 1u
882a39c1 1262static int read_compound_string(void **p_inout, size_t *sz_inout,
67b5031e
MW
1263 unsigned code, unsigned f,
1264 struct tvec_state *tv)
b64eb60f 1265{
67b5031e 1266 const codec_class *ccl; unsigned cdf;
b64eb60f
MW
1267 codec *cdc;
1268 dstr d = DSTR_INIT, w = DSTR_INIT;
1269 char *p;
67b5031e
MW
1270 const char *q;
1271 void *pp = 0; size_t sz;
1272 unsigned long n;
882a39c1 1273 int ch, err, rc;
b64eb60f 1274
67b5031e
MW
1275 set_up_encoding(&ccl, &cdf, code); cdc = 0;
1276
1277 if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string"));
b64eb60f
MW
1278 do {
1279 ch = getc(tv->fp);
67b5031e
MW
1280 switch (ch) {
1281
1282 case ')': case ']': case '}':
1283 /* Close brackets. Leave these for recursive caller if there is one,
1284 * or just complain.
1285 */
1286
1287 if (!(f&RCSF_NESTED))
1288 { rc = tvec_syntax(tv, ch, "string"); goto end; }
1289 ungetc(ch, tv->fp); goto done;
1290
1291 case '"': case '\'':
1292 /* Quotes. Read a quoted string. */
1293
1294 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1295 cdc = 0;
1296 if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; }
1297 break;
1298
1299 case '#':
1300 /* A named character. */
1301
1302 ungetc(ch, tv->fp);
1303 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1304 cdc = 0;
adec5584 1305 DRESET(&w); tvec_readword(tv, &w, 0, ";", "character name");
5c0f2e08 1306 if (STRCMP(w.buf, ==, "#empty")) break;
67b5031e
MW
1307 if (read_charname(&ch, w.buf, RCF_EOFOK)) {
1308 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
1309 goto end;
1310 }
1311 DPUTC(&d, ch); break;
1312
1313 case '!':
1314 /* A magic keyword. */
1315
1316 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1317 cdc = 0;
b64eb60f 1318 ungetc(ch, tv->fp);
adec5584 1319 DRESET(&w); tvec_readword(tv, &w, 0, ";", "`!'-keyword");
67b5031e
MW
1320
1321 /* Change bareword coding system. */
1322 if (STRCMP(w.buf, ==, "!bare"))
1323 { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); }
1324 else if (STRCMP(w.buf, ==, "!hex"))
1325 { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); }
1326 else if (STRCMP(w.buf, ==, "!base32"))
1327 { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); }
1328 else if (STRCMP(w.buf, ==, "!base64"))
1329 { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); }
1330
1331 /* Repeated substrings. */
1332 else if (STRCMP(w.buf, ==, "!repeat")) {
1333 if (tvec_nexttoken(tv)) {
1334 rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count");
1335 goto end;
1336 }
1337 DRESET(&w);
adec5584 1338 if (tvec_readword(tv, &w, 0, ";{", "repeat count"))
67b5031e
MW
1339 { rc = -1; goto end; }
1340 if (parse_unsigned_integer(&n, &q, w.buf)) {
1341 rc = tvec_error(tv, "invalid repeat count `%s'", w.buf);
1342 goto end;
1343 }
1344 if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; }
1345 if (tvec_nexttoken(tv))
1346 { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; }
1347 ch = getc(tv->fp); if (ch != '{')
1348 { rc = tvec_syntax(tv, ch, "`{'"); goto end; }
1349 sz = 0;
1350 if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv))
1351 { rc = -1; goto end; }
1352 ch = getc(tv->fp); if (ch != '}')
1353 { rc = tvec_syntax(tv, ch, "`}'"); goto end; }
1354 if (sz) {
1355 if (n > (size_t)-1/sz)
1356 { rc = tvec_error(tv, "repeat size out of range"); goto end; }
b1a20bee
MW
1357 n *= sz;
1358 dstr_ensure(&d, n);
1359 fill_pattern(d.buf + d.len, n, pp, sz); d.len += n;
67b5031e
MW
1360 }
1361 xfree(pp); pp = 0;
1362 }
1363
1364 /* Anything else is an error. */
1365 else {
1366 tvec_error(tv, "unknown string keyword `%s'", w.buf);
1367 rc = -1; goto end;
1368 }
b64eb60f 1369 break;
67b5031e 1370
b64eb60f 1371 default:
67b5031e
MW
1372 /* A bareword. Process it according to the current coding system. */
1373
1374 switch (code) {
1375 case TVCODE_BARE:
1376 ungetc(ch, tv->fp);
1377 if (collect_bare(&d, tv)) goto done;
1378 break;
1379 default:
1380 assert(ccl);
1381 ungetc(ch, tv->fp); DRESET(&w);
adec5584
MW
1382 if (tvec_readword(tv, &w, 0, ";",
1383 "%s-encoded fragment", ccl->name))
67b5031e
MW
1384 { rc = -1; goto end; }
1385 if (!cdc) cdc = ccl->decoder(cdf);
1386 err = cdc->ops->code(cdc, w.buf, w.len, &d);
1387 if (err) {
1388 tvec_error(tv, "invalid %s fragment `%s': %s",
1389 ccl->name, w.buf, codec_strerror(err));
1390 rc = -1; goto end;
1391 }
1392 break;
1393 }
1394 break;
b64eb60f
MW
1395 }
1396 } while (!tvec_nexttoken(tv));
1397
1398done:
67b5031e
MW
1399 /* Wrap things up. */
1400 if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; }
1401 cdc = 0;
b64eb60f 1402 if (*sz_inout <= d.len)
b1a20bee 1403 { free(*p_inout); *p_inout = x_alloc(&arena_stdlib, d.len + 1); }
b64eb60f 1404 p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len;
882a39c1 1405 rc = 0;
67b5031e 1406
882a39c1 1407end:
67b5031e
MW
1408 /* Clean up any debris. */
1409 if (cdc) cdc->ops->destroy(cdc);
1410 if (pp) xfree(pp);
b64eb60f 1411 dstr_destroy(&d); dstr_destroy(&w);
882a39c1 1412 return (rc);
b64eb60f
MW
1413}
1414
b64eb60f
MW
1415/*----- Signed and unsigned integer types ---------------------------------*/
1416
c81c35df
MW
1417/* --- @init_int@, @init_uint@ --- *
1418 *
1419 * Arguments: @union tvec_regval *rv@ = register value
1420 * @const struct tvec_regdef *rd@ = register definition
1421 *
1422 * Returns: ---
1423 *
1424 * Use: Initialize a register value.
1425 *
1426 * Integer values are initialized to zero.
1427 */
1428
b64eb60f
MW
1429static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd)
1430 { rv->i = 0; }
1431
1432static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd)
1433 { rv->u = 0; }
1434
c81c35df
MW
1435/* --- @eq_int@, @eq_uint@ --- *
1436 *
1437 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1438 * @const struct tvec_regdef *rd@ = register definition
1439 *
1440 * Returns: Nonzero if the values are equal, zero if unequal
1441 *
1442 * Use: Compare register values for equality.
1443 */
1444
b64eb60f
MW
1445static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1,
1446 const struct tvec_regdef *rd)
1447 { return (rv0->i == rv1->i); }
1448
1449static int eq_uint(const union tvec_regval *rv0,
1450 const union tvec_regval *rv1,
1451 const struct tvec_regdef *rd)
1452 { return (rv0->u == rv1->u); }
1453
c81c35df
MW
1454/* --- @tobuf_int@, @tobuf_uint@ --- *
1455 *
1456 * Arguments: @buf *b@ = buffer
1457 * @const union tvec_regval *rv@ = register value
1458 * @const struct tvec_regdef *rd@ = register definition
1459 *
1460 * Returns: Zero on success, %$-1$% on failure.
1461 *
1462 * Use: Serialize a register value to a buffer.
1463 *
1464 * Integer values are serialized as little-endian 64-bit signed
1465 * or unsigned integers.
1466 */
1467
b64eb60f
MW
1468static int tobuf_int(buf *b, const union tvec_regval *rv,
1469 const struct tvec_regdef *rd)
1470 { return (signed_to_buf(b, rv->i)); }
1471
1472static int tobuf_uint(buf *b, const union tvec_regval *rv,
1473 const struct tvec_regdef *rd)
1474 { return (unsigned_to_buf(b, rv->u)); }
1475
c81c35df
MW
1476/* --- @frombuf_int@, @frombuf_uint@ --- *
1477 *
1478 * Arguments: @buf *b@ = buffer
1479 * @union tvec_regval *rv@ = register value
1480 * @const struct tvec_regdef *rd@ = register definition
1481 *
1482 * Returns: Zero on success, %$-1$% on failure.
1483 *
1484 * Use: Deserialize a register value from a buffer.
1485 *
1486 * Integer values are serialized as 64-bit signed or unsigned
1487 * integers.
1488 */
1489
b64eb60f
MW
1490static int frombuf_int(buf *b, union tvec_regval *rv,
1491 const struct tvec_regdef *rd)
882a39c1 1492 { return (signed_from_buf(b, &rv->i)); }
b64eb60f
MW
1493
1494static int frombuf_uint(buf *b, union tvec_regval *rv,
1495 const struct tvec_regdef *rd)
1496 { return (unsigned_from_buf(b, &rv->u)); }
1497
c81c35df
MW
1498/* --- @parse_int@, @parse_uint@ --- *
1499 *
1500 * Arguments: @union tvec_regval *rv@ = register value
1501 * @const struct tvec_regdef *rd@ = register definition
1502 * @struct tvec_state *tv@ = test-vector state
1503 *
1504 * Returns: Zero on success, %$-1$% on error.
1505 *
1506 * Use: Parse a register value from an input file.
1507 *
1508 * Integers may be input in decimal, hex, binary, or octal,
1509 * following approximately usual conventions.
1510 *
1511 * * Signed integers may be preceded with a `+' or `-' sign.
1512 *
1513 * * Decimal integers are just a sequence of decimal digits
1514 * `0' ... `9'.
1515 *
1516 * * Octal integers are a sequence of digits `0' ... `7',
1517 * preceded by `0o' or `0O'.
1518 *
1519 * * Hexadecimal integers are a sequence of digits `0'
1520 * ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or
1521 * `0X'.
1522 *
1523 * * Radix-B integers are a sequence of digits `0' ... `9',
1524 * `a' ... `f', or `A' ... `F', each with value less than B,
1525 * preceded by `Br' or `BR', where 0 < B < 36 is expressed
1526 * in decimal without any leading `0' or internal
1527 * underscores `_'.
1528 *
1529 * * A digit sequence may contain internal underscore `_'
1530 * separators, but not before or after all of the digits;
1531 * and two consecutive `_' characters are not permitted.
1532 */
1533
882a39c1
MW
1534static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd,
1535 struct tvec_state *tv)
b64eb60f
MW
1536{
1537 dstr d = DSTR_INIT;
882a39c1 1538 int rc;
b64eb60f 1539
adec5584
MW
1540 if (tvec_readword(tv, &d, 0, ";", "signed integer"))
1541 { rc = -1; goto end; }
c81c35df
MW
1542 if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1543 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1544 rc = 0;
1545end:
b64eb60f 1546 dstr_destroy(&d);
882a39c1 1547 return (rc);
b64eb60f
MW
1548}
1549
882a39c1
MW
1550static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd,
1551 struct tvec_state *tv)
b64eb60f
MW
1552{
1553 dstr d = DSTR_INIT;
882a39c1 1554 int rc;
b64eb60f 1555
adec5584
MW
1556 if (tvec_readword(tv, &d, 0, ";", "unsigned integer"))
1557 { rc = -1; goto end; }
c81c35df
MW
1558 if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; }
1559 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
882a39c1
MW
1560 rc = 0;
1561end:
b64eb60f 1562 dstr_destroy(&d);
882a39c1 1563 return (rc);
b64eb60f
MW
1564}
1565
c81c35df
MW
1566/* --- @dump_int@, @dump_uint@ --- *
1567 *
1568 * Arguments: @const union tvec_regval *rv@ = register value
1569 * @const struct tvec_regdef *rd@ = register definition
1570 * @unsigned style@ = output style (@TVSF_...@)
1571 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1572 *
1573 * Returns: ---
1574 *
1575 * Use: Dump a register value to the format output.
1576 *
1577 * Integer values are dumped in decimal and, unless compact
1578 * output is requested, hex, and maybe a character, as a
1579 * comment.
1580 */
1581
b64eb60f
MW
1582static void dump_int(const union tvec_regval *rv,
1583 const struct tvec_regdef *rd,
e63124bc
MW
1584 unsigned style,
1585 const struct gprintf_ops *gops, void *go)
b64eb60f 1586{
5c0f2e08 1587 if (style&TVSF_RAW) gprintf(gops, go, "int:");
e63124bc 1588 gprintf(gops, go, "%ld", rv->i);
5c0f2e08 1589 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
3efcfd2d
MW
1590 gprintf(gops, go, " ; = ");
1591 format_signed_hex(gops, go, rv->i);
1592 maybe_format_signed_char(gops, go, rv->i);
b64eb60f
MW
1593 }
1594}
1595
1596static void dump_uint(const union tvec_regval *rv,
1597 const struct tvec_regdef *rd,
e63124bc
MW
1598 unsigned style,
1599 const struct gprintf_ops *gops, void *go)
b64eb60f 1600{
5c0f2e08 1601 if (style&TVSF_RAW) gprintf(gops, go, "uint:");
e63124bc 1602 gprintf(gops, go, "%lu", rv->u);
5c0f2e08 1603 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
3efcfd2d
MW
1604 gprintf(gops, go, " ; = ");
1605 format_unsigned_hex(gops, go, rv->u);
1606 maybe_format_unsigned_char(gops, go, rv->u);
e63124bc 1607 }
b64eb60f
MW
1608}
1609
c81c35df 1610/* Integer type definitions. */
b64eb60f 1611const struct tvec_regty tvty_int = {
3efcfd2d 1612 init_int, trivial_release, eq_int,
b64eb60f
MW
1613 tobuf_int, frombuf_int,
1614 parse_int, dump_int
1615};
c81c35df
MW
1616const struct tvec_regty tvty_uint = {
1617 init_uint, trivial_release, eq_uint,
1618 tobuf_uint, frombuf_uint,
1619 parse_uint, dump_uint
1620};
b64eb60f 1621
c81c35df 1622/* Predefined integer ranges. */
b64eb60f 1623const struct tvec_irange
b1a20bee
MW
1624 tvrange_schar = { SCHAR_MIN, SCHAR_MAX, 0, 0 },
1625 tvrange_short = { SHRT_MIN, SHRT_MAX, 0, 0 },
1626 tvrange_int = { INT_MIN, INT_MAX, 0, 0 },
1627 tvrange_long = { LONG_MIN, LONG_MAX, 0, 0 },
1628 tvrange_sbyte = { -128, 127, 0, 0 },
1629 tvrange_i16 = { -32768, +32767, 0, 0 },
1630 tvrange_i32 = { -2147483648, 2147483647, 0, 0 };
b64eb60f 1631const struct tvec_urange
b1a20bee
MW
1632 tvrange_uchar = { 0, UCHAR_MAX, 0, 0 },
1633 tvrange_ushort = { 0, USHRT_MAX, 0, 0 },
1634 tvrange_uint = { 0, UINT_MAX, 0, 0 },
1635 tvrange_ulong = { 0, ULONG_MAX, 0, 0 },
1636 tvrange_size = { 0, (size_t)-1, 0, 0 },
1637 tvrange_byte = { 0, 255, 0, 0 },
1638 tvrange_u16 = { 0, 65535, 0, 0 },
1639 tvrange_u32 = { 0, 4294967295, 0, 0 };
b64eb60f 1640
67b5031e
MW
1641/* --- @tvec_claimeq_int@ --- *
1642 *
1643 * Arguments: @struct tvec_state *tv@ = test-vector state
1644 * @long i0, i1@ = two signed integers
1645 * @const char *file@, @unsigned @lno@ = calling file and line
1646 * @const char *expr@ = the expression to quote on failure
1647 *
1648 * Returns: Nonzero if @i0@ and @i1@ are equal, otherwise zero.
1649 *
1650 * Use: Check that values of @i0@ and @i1@ are equal. As for
1651 * @tvec_claim@ above, a test case is automatically begun and
1652 * ended if none is already underway. If the values are
1653 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1654 * mismatched values are dumped: @i0@ is printed as the output
1655 * value and @i1@ is printed as the input reference.
1656 */
1657
b64eb60f
MW
1658int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1,
1659 const char *file, unsigned lno, const char *expr)
1660{
3efcfd2d 1661 tv->out[0].v.i = i0; tv->in[0].v.i = i1;
b64eb60f
MW
1662 return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr));
1663}
1664
67b5031e
MW
1665/* --- @tvec_claimeq_uint@ --- *
1666 *
1667 * Arguments: @struct tvec_state *tv@ = test-vector state
1668 * @unsigned long u0, u1@ = two unsigned integers
1669 * @const char *file@, @unsigned @lno@ = calling file and line
1670 * @const char *expr@ = the expression to quote on failure
1671 *
1672 * Returns: Nonzero if @u0@ and @u1@ are equal, otherwise zero.
1673 *
1674 * Use: Check that values of @u0@ and @u1@ are equal. As for
1675 * @tvec_claim@ above, a test case is automatically begun and
1676 * ended if none is already underway. If the values are
1677 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1678 * mismatched values are dumped: @u0@ is printed as the output
1679 * value and @u1@ is printed as the input reference.
1680 */
1681
b64eb60f
MW
1682int tvec_claimeq_uint(struct tvec_state *tv,
1683 unsigned long u0, unsigned long u1,
1684 const char *file, unsigned lno, const char *expr)
1685{
3efcfd2d 1686 tv->out[0].v.u = u0; tv->in[0].v.u = u1;
b64eb60f
MW
1687 return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr));
1688}
1689
c4ccbbf9
MW
1690/*----- Size type ---------------------------------------------------------*/
1691
1692/* --- @parse_size@ --- *
1693 *
1694 * Arguments: @union tvec_regval *rv@ = register value
1695 * @const struct tvec_regdef *rd@ = register definition
1696 * @struct tvec_state *tv@ = test-vector state
1697 *
1698 * Returns: Zero on success, %$-1$% on error.
1699 *
1700 * Use: Parse a register value from an input file.
1701 *
1702 * The input format for a size value consists of an unsigned
1703 * integer followed by an optional unit specifier consisting of
1704 * an SI unit prefix and (optionally) the letter `B'. */
1705
1706static int parse_size(union tvec_regval *rv, const struct tvec_regdef *rd,
1707 struct tvec_state *tv)
1708{
1709 unsigned long sz;
1710 int rc;
1711
1712 if (parse_szint(tv, &sz, ";", "size")) { rc = -1; goto end; }
1713 if (check_unsigned_range(sz, rd->arg.p, tv, "size")) { rc = -1; goto end; }
1714 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
1715 rv->u = sz; rc = 0;
1716end:
1717 return (rc);
1718}
1719
1720/* --- @dump_size@ --- *
1721 *
1722 * Arguments: @const union tvec_regval *rv@ = register value
1723 * @const struct tvec_regdef *rd@ = register definition
1724 * @unsigned style@ = output style (@TVSF_...@)
1725 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1726 *
1727 * Returns: ---
1728 *
1729 * Use: Dump a register value to the format output.
1730 *
1731 * Size values are dumped with a unit specifier, with a unit
1732 * prefox only if the size is an exact multiple of the relevant
1733 * power of two. Unless compact style is requested, the plain
1734 * decimal and hex representations of the value are also
1735 * printed.
1736 */
1737
1738static void dump_size(const union tvec_regval *rv,
1739 const struct tvec_regdef *rd,
1740 unsigned style,
1741 const struct gprintf_ops *gops, void *go)
1742{
5c0f2e08 1743 if (style&TVSF_RAW) gprintf(gops, go, "size:");
c4ccbbf9 1744 format_size(gops, go, rv->u, style);
5c0f2e08 1745 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
c4ccbbf9
MW
1746 gprintf(gops, go, " ; = %lu", (unsigned long)rv->u);
1747 gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->u);
1748 maybe_format_unsigned_char(gops, go, rv->u);
1749 }
1750}
1751
1752/* Size type definitions. */
1753const struct tvec_regty tvty_size = {
1754 init_uint, trivial_release, eq_uint,
1755 tobuf_uint, frombuf_uint,
1756 parse_size, dump_size
1757};
1758
1759/* --- @tvec_claimeq_size@ --- *
1760 *
1761 * Arguments: @struct tvec_state *tv@ = test-vector state
1762 * @unsigned long sz0, sz1@ = two sizes
1763 * @const char *file@, @unsigned @lno@ = calling file and line
1764 * @const char *expr@ = the expression to quote on failure
1765 *
1766 * Returns: Nonzero if @sz0@ and @sz1@ are equal, otherwise zero.
1767 *
1768 * Use: Check that values of @u0@ and @u1@ are equal. As for
1769 * @tvec_claim@ above, a test case is automatically begun and
1770 * ended if none is already underway. If the values are
1771 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
1772 * mismatched values are dumped: @u0@ is printed as the output
1773 * value and @u1@ is printed as the input reference.
1774 */
1775
1776int tvec_claimeq_size(struct tvec_state *tv,
1777 unsigned long sz0, unsigned long sz1,
1778 const char *file, unsigned lno, const char *expr)
1779{
1780 tv->out[0].v.u = sz0; tv->in[0].v.u = sz1;
1781 return (tvec_claimeq(tv, &tvty_size, 0, file, lno, expr));
1782}
1783
3efcfd2d 1784/*----- Floating-point type -----------------------------------------------*/
e63124bc 1785
814e42ff 1786/* --- @int_float@ --- *
c81c35df
MW
1787 *
1788 * Arguments: @union tvec_regval *rv@ = register value
1789 * @const struct tvec_regdef *rd@ = register definition
1790 *
1791 * Returns: ---
1792 *
1793 * Use: Initialize a register value.
1794 *
1795 * Floating-point values are initialized to zero.
1796 */
1797
e63124bc
MW
1798static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd)
1799 { rv->f = 0.0; }
e63124bc 1800
c81c35df
MW
1801/* --- @eq_float@ --- *
1802 *
1803 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
1804 * @const struct tvec_regdef *rd@ = register definition
1805 *
1806 * Returns: Nonzero if the values are equal, zero if unequal
1807 *
1808 * Use: Compare register values for equality.
1809 *
1810 * Floating-point values may be considered equal if their
1811 * absolute or relative difference is sufficiently small, as
1812 * described in the register definition.
1813 */
1814
e63124bc
MW
1815static int eq_float(const union tvec_regval *rv0,
1816 const union tvec_regval *rv1,
1817 const struct tvec_regdef *rd)
1818 { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); }
1819
c81c35df
MW
1820/* --- @tobuf_float@ --- *
1821 *
1822 * Arguments: @buf *b@ = buffer
1823 * @const union tvec_regval *rv@ = register value
1824 * @const struct tvec_regdef *rd@ = register definition
1825 *
1826 * Returns: Zero on success, %$-1$% on failure.
1827 *
1828 * Use: Serialize a register value to a buffer.
1829 *
1830 * Floating-point values are serialized as little-endian
1831 * IEEE 754 Binary64.
1832 */
1833
e63124bc
MW
1834static int tobuf_float(buf *b, const union tvec_regval *rv,
1835 const struct tvec_regdef *rd)
1836 { return (buf_putf64l(b, rv->f)); }
c81c35df
MW
1837
1838/* --- @frombuf_float@ --- *
1839 *
1840 * Arguments: @buf *b@ = buffer
1841 * @union tvec_regval *rv@ = register value
1842 * @const struct tvec_regdef *rd@ = register definition
1843 *
1844 * Returns: Zero on success, %$-1$% on failure.
1845 *
1846 * Use: Deserialize a register value from a buffer.
1847 *
1848 * Floating-point values are serialized as little-endian
1849 * IEEE 754 Binary64.
1850 */
1851
e63124bc 1852static int frombuf_float(buf *b, union tvec_regval *rv,
b1a20bee
MW
1853 const struct tvec_regdef *rd)
1854{
1855 double t;
1856 int rc;
1857
1858 rc = buf_getf64l(b, &t); if (!rc) rv->f = t;
1859 return (rc);
1860}
e63124bc 1861
c81c35df
MW
1862/* --- @parse_float@ --- *
1863 *
1864 * Arguments: @union tvec_regval *rv@ = register value
1865 * @const struct tvec_regdef *rd@ = register definition
1866 * @struct tvec_state *tv@ = test-vector state
1867 *
1868 * Returns: Zero on success, %$-1$% on error.
1869 *
1870 * Use: Parse a register value from an input file.
1871 *
1872 * Floating-point values are either NaN (%|#nan|%, if supported
1873 * by the platform); positive or negative infinity (%|#inf|%,
1874 * %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|%
1875 * or %|#-inf|% (preferring the latter), if supported by the
1876 * platform); or a number in strtod(3) syntax.
1877 */
1878
e63124bc
MW
1879static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd,
1880 struct tvec_state *tv)
1881{
1882 dstr d = DSTR_INIT;
1883 int rc;
1884
adec5584 1885 if (tvec_readword(tv, &d, 0, ";", "floating-point number"))
e63124bc 1886 { rc = -1; goto end; }
814e42ff
MW
1887 if (parse_floating(&rv->f, 0, d.buf, rd->arg.p, tv))
1888 { rc = -1; goto end; }
c81c35df 1889 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
e63124bc
MW
1890 rc = 0;
1891end:
1892 dstr_destroy(&d);
1893 return (rc);
1894}
1895
c81c35df
MW
1896/* --- @dump_float@ --- *
1897 *
1898 * Arguments: @const union tvec_regval *rv@ = register value
1899 * @const struct tvec_regdef *rd@ = register definition
1900 * @unsigned style@ = output style (@TVSF_...@)
1901 * @const struct gprintf_ops *gops@, @void *gp@ = format output
1902 *
1903 * Returns: ---
1904 *
1905 * Use: Dump a register value to the format output.
1906 *
1907 * Floating-point values are dumped in decimal or as a special
1908 * token beginning with `%|#|%'. Some effort is taken to ensure
1909 * that the output is sufficient to uniquely identify the
1910 * original value, but, honestly, C makes this really hard.
1911 */
1912
e63124bc
MW
1913static void dump_float(const union tvec_regval *rv,
1914 const struct tvec_regdef *rd,
1915 unsigned style,
1916 const struct gprintf_ops *gops, void *go)
5c0f2e08
MW
1917{
1918 if (style&TVSF_RAW) gprintf(gops, go, "float:");
1919 format_floating(gops, go, rv->f);
1920}
e63124bc 1921
c81c35df 1922/* Floating-point type definition. */
e63124bc 1923const struct tvec_regty tvty_float = {
3efcfd2d 1924 init_float, trivial_release, eq_float,
e63124bc
MW
1925 tobuf_float, frombuf_float,
1926 parse_float, dump_float
1927};
1928
c81c35df
MW
1929/* Predefined floating-point ranges. */
1930const struct tvec_floatinfo
b1a20bee
MW
1931 tvflt_float = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
1932 -FLT_MAX, FLT_MAX, 0.0 },
1933 tvflt_double = { TVFF_EXACT | TVFF_INFOK | TVFF_NANOK,
1934 -DBL_MAX, DBL_MAX, 0.0 },
c81c35df
MW
1935 tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 },
1936 tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 };
1937
67b5031e
MW
1938/* --- @tvec_claimeqish_float@ --- *
1939 *
1940 * Arguments: @struct tvec_state *tv@ = test-vector state
1941 * @double f0, f1@ = two floating-point numbers
1942 * @unsigned f@ = flags (@TVFF_...@)
1943 * @double delta@ = maximum tolerable difference
1944 * @const char *file@, @unsigned @lno@ = calling file and line
1945 * @const char *expr@ = the expression to quote on failure
1946 *
c4ccbbf9 1947 * Returns: Nonzero if @f0@ and @f1@ are sufficiently close, otherwise
67b5031e
MW
1948 * zero.
1949 *
1950 * Use: Check that values of @f0@ and @f1@ are sufficiently close.
1951 * As for @tvec_claim@ above, a test case is automatically begun
1952 * and ended if none is already underway. If the values are
1953 * too far apart, then @tvec_fail@ is called, quoting @expr@,
1954 * and the mismatched values are dumped: @f0@ is printed as the
1955 * output value and @f1@ is printed as the input reference.
1956 *
1957 * The details for the comparison are as follows.
1958 *
1959 * * A NaN value matches any other NaN, and nothing else.
1960 *
1961 * * An infinity matches another infinity of the same sign,
1962 * and nothing else.
1963 *
1964 * * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any
1965 * representable number matches only itself: in particular,
1966 * positive and negative zero are considered distinct.
1967 * (This allows tests to check that they land on the correct
1968 * side of branch cuts, for example.)
1969 *
1970 * * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches
1971 * %$y$% when %$|x - y| < \delta$%.
1972 *
1973 * * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches
c4ccbbf9
MW
1974 * %$y$% when %$|1 - x/y| < \delta$%. (Note that this
1975 * criterion is asymmetric. Write %$x \approx_\delta y$%
1976 * if and only if %$|1 - x/y < \delta$%. Then, for example,
1977 * if %$y/(1 + \delta) < x < y (1 - \delta)$%, then
1978 * %$x \approx_\delta y$%, but %$y \not\approx_\delta x$%.)
67b5031e
MW
1979 */
1980
e63124bc
MW
1981int tvec_claimeqish_float(struct tvec_state *tv,
1982 double f0, double f1, unsigned f, double delta,
1983 const char *file, unsigned lno,
1984 const char *expr)
1985{
1986 struct tvec_floatinfo fi;
1987 union tvec_misc arg;
1988
1989 fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
3efcfd2d 1990 tv->out[0].v.f = f0; tv->in[0].v.f = f1;
e63124bc
MW
1991 return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr));
1992}
e63124bc 1993
67b5031e
MW
1994/* --- @tvec_claimeq_float@ --- *
1995 *
1996 * Arguments: @struct tvec_state *tv@ = test-vector state
1997 * @double f0, f1@ = two floating-point numbers
1998 * @const char *file@, @unsigned @lno@ = calling file and line
1999 * @const char *expr@ = the expression to quote on failure
2000 *
c4ccbbf9 2001 * Returns: Nonzero if @f0@ and @f1@ are identical, otherwise zero.
67b5031e
MW
2002 *
2003 * Use: Check that values of @f0@ and @f1@ are identical. The
2004 * function is exactly equivalent to @tvec_claimeqish_float@
2005 * with @f == TVFF_EXACT@.
2006 */
2007
2008int tvec_claimeq_float(struct tvec_state *tv,
2009 double f0, double f1,
2010 const char *file, unsigned lno,
2011 const char *expr)
2012{
2013 return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0,
2014 file, lno, expr));
2015}
2016
814e42ff
MW
2017/*----- Durations ---------------------------------------------------------*/
2018
2019/* A duration is a floating-point number of seconds. Initialization and
2020 * teardown, equality comparison, and serialization are as for floating-point
2021 * values.
2022 */
2023
2024static const struct duration_unit {
2025 const char *unit;
2026 double scale;
2027 unsigned f;
2028#define DUF_PREFER 1u
2029} duration_units[] = {
2030 { "Ys", 1e+24, 0 },
2031 { "Zs", 1e+21, 0 },
2032 { "Es", 1e+18, 0 },
2033 { "Ps", 1e+15, 0 },
2034 { "Ts", 1e+12, 0 },
2035 { "Gs", 1e+9, 0 },
2036 { "Ms", 1e+6, 0 },
2037 { "ks", 1e+3, 0 },
2038 { "hs", 1e+2, 0 },
2039 { "das", 1e+1, 0 },
2040
2041 { "yr", 31557600.0, DUF_PREFER },
2042 { "y", 31557600.0, 0 },
2043 { "day", 86400.0, DUF_PREFER },
2044 { "dy", 86400.0, 0 },
2045 { "d", 86400.0, 0 },
2046 { "hr", 3600.0, DUF_PREFER },
2047 { "hour", 3600.0, 0 },
2048 { "h", 3600.0, 0 },
2049 { "min", 60.0, DUF_PREFER },
2050 { "m", 60.0, 0 },
2051
2052 { "s", 1.0, DUF_PREFER },
2053 { "sec", 1.0, 0 },
2054
2055 { "ds", 1e-1, 0 },
2056 { "cs", 1e-2, 0 },
2057 { "ms", 1e-3, DUF_PREFER },
2058 { "µs", 1e-6, DUF_PREFER },
2059 { "ns", 1e-9, DUF_PREFER },
2060 { "ps", 1e-12, DUF_PREFER },
2061 { "fs", 1e-15, DUF_PREFER },
2062 { "as", 1e-18, DUF_PREFER },
2063 { "zs", 1e-21, DUF_PREFER },
2064 { "ys", 1e-24, DUF_PREFER },
2065
2066 { 0 }
2067};
2068
13ee7406
MW
2069/* --- @tvec_parsedurunit@ --- *
2070 *
2071 * Arguments: @double *scale_out@ = where to leave the scale
2072 * @const char **p_inout@ = input unit string, updated
2073 *
2074 * Returns: Zero on success, %$-1$% on error.
2075 *
2076 * Use: If @*p_inout@ begins with a unit string followed by the end
2077 * of the string or some non-alphanumeric character, then store
2078 * the corresponding scale factor in @*scale_out@, advance
2079 * @*p_inout@ past the unit string, and return zero. Otherwise,
2080 * return %$-1$%.
2081 */
2082
2083int tvec_parsedurunit(double *scale_out, const char **p_inout)
2084{
2085 const char *p = *p_inout, *q;
2086 const struct duration_unit *u;
2087 size_t n;
2088
2089 while (ISSPACE(*p)) p++;
2090 for (q = p; *q && ISALNUM(*q); q++);
2091 n = q - p; if (!n) { *scale_out = 1.0; return (0); }
2092
2093 for (u = duration_units; u->unit; u++)
2094 if (STRNCMP(p, ==, u->unit, n) && !u->unit[n])
2095 { *scale_out = u->scale; *p_inout = q; return (0); }
2096 return (-1);
2097}
2098
814e42ff
MW
2099/* --- @parse_duration@ --- *
2100 *
2101 * Arguments: @union tvec_regval *rv@ = register value
2102 * @const struct tvec_regdef *rd@ = register definition
2103 * @struct tvec_state *tv@ = test-vector state
2104 *
2105 * Returns: Zero on success, %$-1$% on error.
2106 *
2107 * Use: Parse a register value from an input file.
2108 *
2109 * Duration values are finite nonnegative floating-point
2110 * numbers in @strtod@ syntax, optionally followed by a unit .
2111 */
2112
2113static int parse_duration(union tvec_regval *rv,
2114 const struct tvec_regdef *rd,
2115 struct tvec_state *tv)
2116{
2117 const struct duration_unit *u;
2118 const char *q;
adec5584 2119 dstr d = DSTR_INIT;
814e42ff
MW
2120 double t;
2121 int rc;
2122
adec5584 2123 if (tvec_readword(tv, &d, 0, ";", "duration")) { rc = -1; goto end; }
814e42ff
MW
2124 if (parse_floating(&t, &q, d.buf,
2125 rd->arg.p ? rd->arg.p : &tvflt_nonneg, tv))
2126 { rc = -1; goto end; }
2127
adec5584
MW
2128 if (!*q) tvec_readword(tv, &d, &q, ";", 0);
2129 if (*q) {
814e42ff
MW
2130 for (u = duration_units; u->unit; u++)
2131 if (STRCMP(q, ==, u->unit)) { t *= u->scale; goto found_unit; }
2132 rc = tvec_syntax(tv, *q, "end-of-line"); goto end;
2133 found_unit:;
2134 }
2135
2136 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2137 rv->f = t; rc = 0;
2138end:
2139 dstr_destroy(&d);
2140 return (rc);
2141}
2142
2143/* --- @dump_duration@ --- *
2144 *
2145 * Arguments: @const union tvec_regval *rv@ = register value
2146 * @const struct tvec_regdef *rd@ = register definition
2147 * @unsigned style@ = output style (@TVSF_...@)
2148 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2149 *
2150 * Returns: ---
2151 *
2152 * Use: Dump a register value to the format output.
2153 *
2154 * Durations are dumped as a human-palatable scaled value with
2155 * unit, and, if compact style is not requested, as a raw number
2156 * of seconds at full precision as a comment.
2157 */
2158
2159static void dump_duration(const union tvec_regval *rv,
2160 const struct tvec_regdef *rd,
2161 unsigned style,
2162 const struct gprintf_ops *gops, void *go)
2163{
2164 const struct duration_unit *u;
2165 double t = rv->f;
2166
5c0f2e08
MW
2167 if (style&TVSF_RAW) {
2168 gprintf(gops, go, "duration:");
814e42ff 2169 format_floating(gops, go, rv->f);
5c0f2e08
MW
2170 gprintf(gops, go, "s");
2171 } else {
2172 if (!t) u = 0;
2173 else {
2174 for (u = duration_units; u->scale > t && u[1].unit; u++);
2175 t /= u->scale;
2176 }
2177 gprintf(gops, go, "%.4g %s", t, u ? u->unit : "s");
2178
2179 if (!(style&TVSF_COMPACT)) {
2180 gprintf(gops, go, "; = ");
2181 format_floating(gops, go, rv->f);
2182 gprintf(gops, go, " s");
2183 }
814e42ff
MW
2184 }
2185}
2186
2187/* Duration type definition. */
2188const struct tvec_regty tvty_duration = {
2189 init_float, trivial_release, eq_float,
2190 tobuf_float, frombuf_float,
2191 parse_duration, dump_duration
2192};
2193
c4ccbbf9
MW
2194/* --- @tvec_claimeqish_duration@ --- *
2195 *
2196 * Arguments: @struct tvec_state *tv@ = test-vector state
b1a20bee 2197 * @double t0, t1@ = two durations
c4ccbbf9
MW
2198 * @unsigned f@ = flags (@TVFF_...@)
2199 * @double delta@ = maximum tolerable difference
2200 * @const char *file@, @unsigned @lno@ = calling file and line
2201 * @const char *expr@ = the expression to quote on failure
2202 *
2203 * Returns: Nonzero if @t0@ and @t1@ are sufficiently close, otherwise
2204 * zero.
2205 *
2206 * Use: Check that values of @t0@ and @t1@ are sufficiently close.
2207 * This is essentially the same as @tvec_claimeqish_float@, only
2208 * it dumps the values as durations on a mismatch.
2209 */
2210
2211int tvec_claimeqish_duration(struct tvec_state *tv,
2212 double t0, double t1, unsigned f, double delta,
2213 const char *file, unsigned lno,
2214 const char *expr)
2215{
2216 struct tvec_floatinfo fi;
2217 union tvec_misc arg;
2218
2219 fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi;
2220 tv->out[0].v.f = t0; tv->in[0].v.f = t1;
2221 return (tvec_claimeq(tv, &tvty_duration, &arg, file, lno, expr));
2222}
2223
2224/* --- @tvec_claimeq_duration@ --- *
2225 *
2226 * Arguments: @struct tvec_state *tv@ = test-vector state
2227 * @double t0, t1@ = two durations
2228 * @const char *file@, @unsigned @lno@ = calling file and line
2229 * @const char *expr@ = the expression to quote on failure
2230 *
2231 * Returns: Nonzero if @t0@ and @t1@ are identical, otherwise zero.
2232 *
2233 * Use: Check that values of @t0@ and @t1@ are identical. The
2234 * function is exactly equivalent to @tvec_claimeqish_duration@
2235 * with @f == TVFF_EXACT@.
2236 */
2237
2238int tvec_claimeq_duration(struct tvec_state *tv,
2239 double t0, double t1,
2240 const char *file, unsigned lno,
2241 const char *expr)
2242{
2243 return (tvec_claimeqish_duration(tv, t0, t1, TVFF_EXACT, 0.0,
2244 file, lno, expr));
2245}
2246
b64eb60f
MW
2247/*----- Enumerations ------------------------------------------------------*/
2248
c81c35df
MW
2249/* --- @init_tenum@ --- *
2250 *
2251 * Arguments: @union tvec_regval *rv@ = register value
2252 * @const struct tvec_regdef *rd@ = register definition
2253 *
2254 * Returns: ---
2255 *
2256 * Use: Initialize a register value.
2257 *
2258 * Integer and floating-point enumeration values are initialized
2259 * as their underlying representations. Pointer enumerations
2260 * are initialized to %|#nil|%.
2261 */
2262
3efcfd2d
MW
2263#define init_ienum init_int
2264#define init_uenum init_uint
2265#define init_fenum init_float
c81c35df 2266
3efcfd2d
MW
2267static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd)
2268 { rv->p = 0; }
b64eb60f 2269
c81c35df
MW
2270/* --- @eq_tenum@ --- *
2271 *
2272 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
2273 * @const struct tvec_regdef *rd@ = register definition
2274 *
2275 * Returns: Nonzero if the values are equal, zero if unequal
2276 *
2277 * Use: Compare register values for equality.
2278 *
2279 * Integer and floating-point enumeration values are compared as
2280 * their underlying representations; in particular, floating-
2281 * point enumerations may compare equal if their absolute or
2282 * relative difference is sufficiently small. Pointer
2283 * enumerations are compared as pointers.
2284 */
2285
3efcfd2d
MW
2286#define eq_ienum eq_int
2287#define eq_uenum eq_uint
c81c35df 2288
3efcfd2d
MW
2289static int eq_fenum(const union tvec_regval *rv0,
2290 const union tvec_regval *rv1,
2291 const struct tvec_regdef *rd)
b64eb60f 2292{
3efcfd2d
MW
2293 const struct tvec_fenuminfo *ei = rd->arg.p;
2294 return (eqish_floating_p(rv0->f, rv1->f, ei->fi));
b64eb60f 2295}
c81c35df 2296
3efcfd2d
MW
2297static int eq_penum(const union tvec_regval *rv0,
2298 const union tvec_regval *rv1,
2299 const struct tvec_regdef *rd)
2300 { return (rv0->p == rv1->p); }
b64eb60f 2301
c81c35df
MW
2302/* --- @tobuf_tenum@ --- *
2303 *
2304 * Arguments: @buf *b@ = buffer
2305 * @const union tvec_regval *rv@ = register value
2306 * @const struct tvec_regdef *rd@ = register definition
2307 *
2308 * Returns: Zero on success, %$-1$% on failure.
2309 *
2310 * Use: Serialize a register value to a buffer.
2311 *
2312 * Integer and floating-point enumeration values are serialized
2313 * as their underlying representations. Pointer enumerations
2314 * are serialized as the signed integer index into the
2315 * association table; %|#nil|% serializes as %$-1$%, and
2316 * unrecognized pointers cause failure.
2317 */
2318
3efcfd2d
MW
2319#define tobuf_ienum tobuf_int
2320#define tobuf_uenum tobuf_uint
2321#define tobuf_fenum tobuf_float
c81c35df 2322
3efcfd2d
MW
2323static int tobuf_penum(buf *b, const union tvec_regval *rv,
2324 const struct tvec_regdef *rd)
b64eb60f 2325{
3efcfd2d 2326 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
2327 const struct tvec_passoc *pa;
2328 long i;
b64eb60f 2329
3efcfd2d
MW
2330 for (pa = pei->av, i = 0; pa->tag; pa++, i++)
2331 if (pa->p == rv->p) goto found;
2332 if (!rv->p) i = -1;
2333 else return (-1);
2334found:
2335 return (signed_to_buf(b, i));
b64eb60f
MW
2336}
2337
c81c35df
MW
2338/* --- @frombuf_tenum@ --- *
2339 *
2340 * Arguments: @buf *b@ = buffer
2341 * @union tvec_regval *rv@ = register value
2342 * @const struct tvec_regdef *rd@ = register definition
2343 *
2344 * Returns: Zero on success, %$-1$% on failure.
2345 *
2346 * Use: Deserialize a register value from a buffer.
2347 *
2348 * Integer and floating-point enumeration values are serialized
2349 * as their underlying representations. Pointer enumerations
2350 * are serialized as the signed integer index into the
2351 * association table; %|#nil|% serializes as %$-1$%; out-of-
2352 * range indices cause failure.
2353 */
2354
3efcfd2d
MW
2355#define frombuf_ienum frombuf_int
2356#define frombuf_uenum frombuf_uint
2357#define frombuf_fenum frombuf_float
2358static int frombuf_penum(buf *b, union tvec_regval *rv,
b64eb60f
MW
2359 const struct tvec_regdef *rd)
2360{
3efcfd2d 2361 const struct tvec_penuminfo *pei = rd->arg.p;
e63124bc
MW
2362 const struct tvec_passoc *pa;
2363 long i, n;
b64eb60f 2364
3efcfd2d
MW
2365 for (pa = pei->av, n = 0; pa->tag; pa++, n++);
2366 if (signed_from_buf(b, &i)) return (-1);
b1a20bee 2367 if (0 <= i && i < n) rv->p = UNCONST(void, pei->av[i].p);
3efcfd2d 2368 else if (i == -1) rv->p = 0;
adec5584 2369 else { buf_break(b); return (-1); }
3efcfd2d 2370 return (0);
b64eb60f
MW
2371}
2372
c81c35df
MW
2373/* --- @parse_tenum@ --- *
2374 *
2375 * Arguments: @union tvec_regval *rv@ = register value
2376 * @const struct tvec_regdef *rd@ = register definition
2377 * @struct tvec_state *tv@ = test-vector state
2378 *
2379 * Returns: Zero on success, %$-1$% on error.
2380 *
2381 * Use: Parse a register value from an input file.
2382 *
2383 * An enumerated value may be given by name or as a literal
2384 * value. For enumerations based on numeric types, the literal
2385 * values can be written in the same syntax as the underlying
2386 * values. For enumerations based on pointers, the only
2387 * permitted literal is %|#nil|%, which denotes a null pointer.
2388 */
2389
3efcfd2d
MW
2390#define DEFPARSE_ENUM(tag_, ty, slot) \
2391 static int parse_##slot##enum(union tvec_regval *rv, \
2392 const struct tvec_regdef *rd, \
2393 struct tvec_state *tv) \
2394 { \
2395 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
2396 const struct tvec_##slot##assoc *a; \
2397 dstr d = DSTR_INIT; \
2398 int rc; \
2399 \
5c0f2e08
MW
2400 if (tvec_readword(tv, &d, 0, \
2401 ";", "%s tag or " LITSTR_##tag_, ei->name)) \
3efcfd2d
MW
2402 { rc = -1; goto end; } \
2403 for (a = ei->av; a->tag; a++) \
2404 if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; } \
2405 MISSING_##tag_ \
2406 done: \
2407 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } \
2408 rc = 0; \
2409 end: \
2410 dstr_destroy(&d); \
2411 return (rc); \
2412 }
b64eb60f 2413
3efcfd2d
MW
2414#define LITSTR_INT "literal signed integer"
2415#define FOUND_INT rv->i = a->i;
2416#define MISSING_INT if (parse_signed(&rv->i, d.buf, ei->ir, tv)) \
2417 { rc = -1; goto end; }
2418
2419#define LITSTR_UINT "literal unsigned integer"
2420#define FOUND_UINT rv->u = a->u;
2421#define MISSING_UINT if (parse_unsigned(&rv->u, d.buf, ei->ur, tv)) \
2422 { rc = -1; goto end; }
2423
2424#define LITSTR_FLT "literal floating-point number, " \
2425 "`#-inf', `#+inf', or `#nan'"
2426#define FOUND_FLT rv->f = a->f;
814e42ff 2427#define MISSING_FLT if (parse_floating(&rv->f, 0, d.buf, ei->fi, tv)) \
3efcfd2d
MW
2428 { rc = -1; goto end; }
2429
2430#define LITSTR_PTR "`#nil'"
b1a20bee 2431#define FOUND_PTR rv->p = UNCONST(void, a->p);
3efcfd2d
MW
2432#define MISSING_PTR if (STRCMP(d.buf, ==, "#nil")) \
2433 rv->p = 0; \
2434 else { \
2435 tvec_error(tv, "unknown `%s' value `%s'", \
2436 ei->name, d.buf); \
2437 rc = -1; goto end; \
2438 }
2439
2440TVEC_MISCSLOTS(DEFPARSE_ENUM)
2441
2442#undef LITSTR_INT
2443#undef FOUND_INT
2444#undef MISSING_INT
2445
2446#undef LITSTR_UINT
2447#undef FOUND_UINT
2448#undef MISSING_UINT
2449
2450#undef LITSTR_FLT
2451#undef FOUND_FLT
2452#undef MISSING_FLT
2453
2454#undef LITSTR_PTR
2455#undef FOUND_PTR
2456#undef MISSING_PTR
2457
2458#undef DEFPARSE_ENUM
2459
c81c35df
MW
2460/* --- @dump_tenum@ --- *
2461 *
2462 * Arguments: @const union tvec_regval *rv@ = register value
2463 * @const struct tvec_regdef *rd@ = register definition
2464 * @unsigned style@ = output style (@TVSF_...@)
2465 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2466 *
2467 * Returns: ---
2468 *
2469 * Use: Dump a register value to the format output.
2470 *
2471 * Enumeration values are dumped as their symbolic names, if
2472 * possible, with the underlying values provided as a comment
2473 * unless compact output is requested, as for the underlying
2474 * representation. A null pointer is printed as %|#nil|%;
2475 * non-null pointers are printed as %|#<TYPE PTR>|%, with the
2476 * enumeration TYPE and the raw pointer PTR printed with the
2477 * system's %|%p|% format specifier.
2478 */
2479
2480
3efcfd2d
MW
2481#define DEFDUMP_ENUM(tag_, ty, slot) \
2482 static void dump_##slot##enum(const union tvec_regval *rv, \
2483 const struct tvec_regdef *rd, \
2484 unsigned style, \
2485 const struct gprintf_ops *gops, void *go) \
2486 { \
2487 const struct tvec_##slot##enuminfo *ei = rd->arg.p; \
2488 const struct tvec_##slot##assoc *a; \
2489 \
5c0f2e08 2490 if (style&TVSF_RAW) gprintf(gops, go, #slot "enum/%s:", ei->name); \
3efcfd2d
MW
2491 for (a = ei->av; a->tag; a++) \
2492 if (rv->slot == a->slot) { \
2493 gprintf(gops, go, "%s", a->tag); \
2494 if (style&TVSF_COMPACT) return; \
2495 gprintf(gops, go, " ; = "); break; \
2496 } \
2497 \
2498 PRINTRAW_##tag_ \
b64eb60f
MW
2499 }
2500
3efcfd2d 2501#define MAYBE_PRINT_EXTRA \
c81c35df 2502 if (style&TVSF_COMPACT) /* nothing to do */; \
3efcfd2d
MW
2503 else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; } \
2504 else if (1) { gprintf(gops, go, " = "); goto _extra; } \
2505 else _extra:
b64eb60f 2506
3efcfd2d
MW
2507#define PRINTRAW_INT gprintf(gops, go, "%ld", rv->i); \
2508 MAYBE_PRINT_EXTRA { \
2509 format_signed_hex(gops, go, rv->i); \
2510 maybe_format_signed_char(gops, go, rv->i); \
2511 }
b64eb60f 2512
3efcfd2d
MW
2513#define PRINTRAW_UINT gprintf(gops, go, "%lu", rv->u); \
2514 MAYBE_PRINT_EXTRA { \
2515 format_unsigned_hex(gops, go, rv->u); \
2516 maybe_format_unsigned_char(gops, go, rv->u); \
2517 }
2518
2519#define PRINTRAW_FLT format_floating(gops, go, rv->f);
2520
2521#define PRINTRAW_PTR if (!rv->p) gprintf(gops, go, "#nil"); \
e63124bc 2522 else gprintf(gops, go, "#<%s %p>", ei->name, rv->p);
b64eb60f 2523
3efcfd2d 2524TVEC_MISCSLOTS(DEFDUMP_ENUM)
b64eb60f 2525
3efcfd2d
MW
2526#undef PRINTRAW_INT
2527#undef PRINTRAW_UINT
2528#undef PRINTRAW_FLT
2529#undef PRINTRAW_PTR
2530
2531#undef MAYBE_PRINT_EXTRA
2532#undef DEFDUMP_ENUM
2533
c81c35df 2534/* Enumeration type definitions. */
3efcfd2d
MW
2535#define DEFTY_ENUM(tag, ty, slot) \
2536 const struct tvec_regty tvty_##slot##enum = { \
2537 init_##slot##enum, trivial_release, eq_##slot##enum, \
2538 tobuf_##slot##enum, frombuf_##slot##enum, \
2539 parse_##slot##enum, dump_##slot##enum \
2540 };
2541TVEC_MISCSLOTS(DEFTY_ENUM)
2542#undef DEFTY_ENUM
b64eb60f 2543
c81c35df 2544/* Predefined enumeration types. */
e63124bc
MW
2545static const struct tvec_iassoc bool_assoc[] = {
2546 { "nil", 0 },
2547 { "false", 0 },
2548 { "f", 0 },
2549 { "no", 0 },
2550 { "n", 0 },
2551 { "off", 0 },
2552
2553 { "t", 1 },
2554 { "true", 1 },
2555 { "yes", 1 },
2556 { "y", 1 },
2557 { "on", 1 },
2558
20ba6b0b 2559 TVEC_ENDENUM
e63124bc
MW
2560};
2561
2562const struct tvec_ienuminfo tvenum_bool =
3efcfd2d 2563 { "bool", bool_assoc, &tvrange_int };
e63124bc 2564
20ba6b0b
MW
2565static const struct tvec_iassoc cmp_assoc[] = {
2566 { "<", -1 },
2567 { "less", -1 },
2568 { "lt", -1 },
2569
2570 { "=", 0 },
2571 { "equal", 0 },
2572 { "eq", 0 },
2573
2574 { ">", +1 },
2575 { "greater", +1 },
2576 { "gt", +1 },
2577
2578 TVEC_ENDENUM
2579};
2580
2581const struct tvec_ienuminfo tvenum_cmp =
2582 { "cmp", cmp_assoc, &tvrange_int };
2583
67b5031e
MW
2584/* --- @tvec_claimeq_tenum@ --- *
2585 *
2586 * Arguments: @struct tvec_state *tv@ = test-vector state
2587 * @const struct tvec_typeenuminfo *ei@ = enumeration type info
2588 * @ty t0, t1@ = two values
2589 * @const char *file@, @unsigned @lno@ = calling file and line
2590 * @const char *expr@ = the expression to quote on failure
2591 *
2592 * Returns: Nonzero if @t0@ and @t1@ are equal, otherwise zero.
2593 *
2594 * Use: Check that values of @t0@ and @t1@ are equal. As for
2595 * @tvec_claim@ above, a test case is automatically begun and
2596 * ended if none is already underway. If the values are
2597 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2598 * mismatched values are dumped: @t0@ is printed as the output
2599 * value and @t1@ is printed as the input reference.
2600 */
2601
b64eb60f 2602#define DEFCLAIM(tag, ty, slot) \
e63124bc
MW
2603 int tvec_claimeq_##slot##enum \
2604 (struct tvec_state *tv, \
2605 const struct tvec_##slot##enuminfo *ei, ty e0, ty e1, \
2606 const char *file, unsigned lno, const char *expr) \
b64eb60f
MW
2607 { \
2608 union tvec_misc arg; \
2609 \
b64eb60f 2610 arg.p = ei; \
3efcfd2d
MW
2611 tv->out[0].v.slot = GET_##tag(e0); \
2612 tv->in[0].v.slot = GET_##tag(e1); \
2613 return (tvec_claimeq(tv, &tvty_##slot##enum, &arg, \
2614 file, lno, expr)); \
b64eb60f
MW
2615 }
2616#define GET_INT(e) (e)
2617#define GET_UINT(e) (e)
e63124bc 2618#define GET_FLT(e) (e)
b1a20bee 2619#define GET_PTR(e) (UNCONST(void, (e)))
b64eb60f
MW
2620TVEC_MISCSLOTS(DEFCLAIM)
2621#undef DEFCLAIM
2622#undef GET_INT
2623#undef GET_UINT
e63124bc 2624#undef GET_FLT
b64eb60f
MW
2625#undef GET_PTR
2626
2627/*----- Flag types --------------------------------------------------------*/
2628
c81c35df
MW
2629/* Flag types are initialized, compared, and serialized as unsigned
2630 * integers.
2631 */
2632
2633/* --- @parse_flags@ --- *
2634 *
2635 * Arguments: @union tvec_regval *rv@ = register value
2636 * @const struct tvec_regdef *rd@ = register definition
2637 * @struct tvec_state *tv@ = test-vector state
2638 *
2639 * Returns: Zero on success, %$-1$% on error.
2640 *
2641 * Use: Parse a register value from an input file.
2642 *
2643 * The input syntax is a sequence of items separated by `|'
2644 * signs. Each item may be the symbolic name of a field value,
2645 * or a literal unsigned integer. The masks associated with the
2646 * given symbolic names must be disjoint. The resulting
2647 * numerical value is simply the bitwise OR of the given values.
2648 */
2649
882a39c1
MW
2650static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd,
2651 struct tvec_state *tv)
b64eb60f
MW
2652{
2653 const struct tvec_flaginfo *fi = rd->arg.p;
2654 const struct tvec_flag *f;
2655 unsigned long m = 0, v = 0, t;
2656 dstr d = DSTR_INIT;
882a39c1 2657 int ch, rc;
b64eb60f
MW
2658
2659 for (;;) {
c81c35df
MW
2660
2661 /* Read the next item. */
882a39c1 2662 DRESET(&d);
5c0f2e08 2663 if (tvec_readword(tv, &d, 0, "|;", "%s flag name or integer", fi->name))
882a39c1 2664 { rc = -1; goto end; }
b64eb60f 2665
c81c35df 2666 /* Try to find a matching entry in the table. */
b64eb60f
MW
2667 for (f = fi->fv; f->tag; f++)
2668 if (STRCMP(f->tag, ==, d.buf)) {
882a39c1
MW
2669 if (m&f->m)
2670 { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; }
2671 else
2672 { m |= f->m; v |= f->v; goto next; }
b64eb60f
MW
2673 }
2674
c81c35df 2675 /* Otherwise, try to parse it as a raw integer. */
e63124bc
MW
2676 if (parse_unsigned(&t, d.buf, fi->range, tv))
2677 { rc = -1; goto end; }
882a39c1 2678 v |= t;
c81c35df 2679
b64eb60f 2680 next:
c81c35df
MW
2681 /* Advance to the next token. If it's a separator then consume it, and
2682 * go round again. Otherwise we stop here.
2683 */
b64eb60f 2684 if (tvec_nexttoken(tv)) break;
882a39c1
MW
2685 ch = getc(tv->fp);
2686 if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; }
5c0f2e08
MW
2687 if (tvec_nexttoken(tv)) {
2688 tvec_syntax(tv, '\n', "%s flag name or integer", fi->name);
2689 rc = -1; goto end;
2690 }
b64eb60f 2691 }
c81c35df
MW
2692
2693 /* Done. */
2694 rv->u = v; rc = 0;
882a39c1
MW
2695end:
2696 dstr_destroy(&d);
2697 return (rc);
b64eb60f
MW
2698}
2699
c81c35df
MW
2700/* --- @dump_flags@ --- *
2701 *
2702 * Arguments: @const union tvec_regval *rv@ = register value
2703 * @const struct tvec_regdef *rd@ = register definition
2704 * @unsigned style@ = output style (@TVSF_...@)
2705 * @const struct gprintf_ops *gops@, @void *gp@ = format output
2706 *
2707 * Returns: ---
2708 *
2709 * Use: Dump a register value to the format output.
2710 *
2711 * The table of symbolic names and their associated values and
2712 * masks is repeatedly scanned, in order, to find disjoint
2713 * matches -- i.e., entries whose value matches the target value
2714 * in the bit positions indicated by the mask, and whose mask
2715 * doesn't overlap with any previously found matches; the names
2716 * are then output, separated by `|'. Any remaining nonzero
2717 * bits not covered by any of the matching masks are output as a
2718 * single literal integer, in hex.
2719 *
2720 * Unless compact output is requested, or no symbolic names were
2721 * found, the raw numeric value is also printed in hex, as a
2722 * comment.
2723 */
2724
b64eb60f
MW
2725static void dump_flags(const union tvec_regval *rv,
2726 const struct tvec_regdef *rd,
e63124bc
MW
2727 unsigned style,
2728 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
2729{
2730 const struct tvec_flaginfo *fi = rd->arg.p;
2731 const struct tvec_flag *f;
c81c35df 2732 unsigned long m = ~0ul, v = rv->u;
b64eb60f
MW
2733 const char *sep;
2734
5c0f2e08
MW
2735 if (style&TVSF_RAW) gprintf(gops, go, "flags/%s:", fi->name);
2736
b64eb60f
MW
2737 for (f = fi->fv, sep = ""; f->tag; f++)
2738 if ((m&f->m) && (v&f->m) == f->v) {
e63124bc 2739 gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m;
b64eb60f
MW
2740 sep = style&TVSF_COMPACT ? "|" : " | ";
2741 }
2742
e63124bc 2743 if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m);
b1a20bee 2744 else if (!v && m == ~0ul) gprintf(gops, go, "0");
b64eb60f 2745
b1a20bee 2746 if (!(style&(TVSF_COMPACT | TVSF_RAW)))
e63124bc 2747 gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u);
b64eb60f
MW
2748}
2749
c81c35df 2750/* Flags type definition. */
b64eb60f 2751const struct tvec_regty tvty_flags = {
3efcfd2d 2752 init_uint, trivial_release, eq_uint,
b64eb60f
MW
2753 tobuf_uint, frombuf_uint,
2754 parse_flags, dump_flags
2755};
2756
67b5031e
MW
2757/* --- @tvec_claimeq_flags@ --- *
2758 *
2759 * Arguments: @struct tvec_state *tv@ = test-vector state
2760 * @const struct tvec_flaginfo *fi@ = flags type info
2761 * @unsigned long f0, f1@ = two values
2762 * @const char *file@, @unsigned @lno@ = calling file and line
2763 * @const char *expr@ = the expression to quote on failure
2764 *
2765 * Returns: Nonzero if @f0@ and @f1@ are equal, otherwise zero.
2766 *
2767 * Use: Check that values of @f0@ and @f1@ are equal. As for
2768 * @tvec_claim@ above, a test case is automatically begun and
2769 * ended if none is already underway. If the values are
2770 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
2771 * mismatched values are dumped: @f0@ is printed as the output
2772 * value and @f1@ is printed as the input reference.
2773 */
2774
b64eb60f
MW
2775int tvec_claimeq_flags(struct tvec_state *tv,
2776 const struct tvec_flaginfo *fi,
2777 unsigned long f0, unsigned long f1,
2778 const char *file, unsigned lno, const char *expr)
2779{
2780 union tvec_misc arg;
2781
3efcfd2d 2782 arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1;
b64eb60f
MW
2783 return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr));
2784}
2785
e63124bc
MW
2786/*----- Characters --------------------------------------------------------*/
2787
c81c35df
MW
2788/* Character values are initialized and compared as signed integers. */
2789
2790/* --- @tobuf_char@ --- *
2791 *
2792 * Arguments: @buf *b@ = buffer
2793 * @const union tvec_regval *rv@ = register value
2794 * @const struct tvec_regdef *rd@ = register definition
2795 *
2796 * Returns: Zero on success, %$-1$% on failure.
2797 *
2798 * Use: Serialize a register value to a buffer.
2799 *
2800 * Character values are serialized as little-endian 32-bit
2801 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2802 */
2803
e63124bc 2804static int tobuf_char(buf *b, const union tvec_regval *rv,
67b5031e 2805 const struct tvec_regdef *rd)
e63124bc
MW
2806{
2807 uint32 u;
c81c35df 2808
e63124bc
MW
2809 if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i;
2810 else if (rv->i == EOF) u = MASK32;
adec5584 2811 else { buf_break(b); return (-1); }
e63124bc
MW
2812 return (buf_putu32l(b, u));
2813}
2814
c81c35df
MW
2815/* --- @frombuf_char@ --- *
2816 *
2817 * Arguments: @buf *b@ = buffer
2818 * @union tvec_regval *rv@ = register value
2819 * @const struct tvec_regdef *rd@ = register definition
2820 *
2821 * Returns: Zero on success, %$-1$% on failure.
2822 *
2823 * Use: Deserialize a register value from a buffer.
2824 *
2825 * Character values are serialized as little-endian 32-bit
2826 * unsigned integers, with %|EOF|% serialized as all-bits-set.
2827 */
2828
e63124bc 2829static int frombuf_char(buf *b, union tvec_regval *rv,
67b5031e 2830 const struct tvec_regdef *rd)
e63124bc
MW
2831{
2832 uint32 u;
2833
2834 if (buf_getu32l(b, &u)) return (-1);
2835 if (0 <= u && u <= UCHAR_MAX) rv->i = u;
2836 else if (u == MASK32) rv->i = EOF;
adec5584 2837 else { buf_break(b); return (-1); }
e63124bc
MW
2838 return (0);
2839}
2840
c81c35df
MW
2841/* --- @parse_char@ --- *
2842 *
2843 * Arguments: @union tvec_regval *rv@ = register value
2844 * @const struct tvec_regdef *rd@ = register definition
2845 * @struct tvec_state *tv@ = test-vector state
2846 *
2847 * Returns: Zero on success, %$-1$% on error.
2848 *
2849 * Use: Parse a register value from an input file.
2850 *
2851 * A character value can be given by symbolic name, with a
2852 * leading `%|#|%'; or a character or `%|\|%'-escape sequence,
2853 * optionally in single quotes.
2854 *
2855 * The following escape sequences and character names are
2856 * recognized.
2857 *
2858 * * `%|#eof|%' is the special end-of-file marker.
2859 *
2860 * * `%|#nul|%' is the NUL character, sometimes used to
2861 * terminate strings.
2862 *
2863 * * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL
2864 * character used to ring the terminal bell (or do some other
2865 * thing to attract the user's attention).
2866 *
2867 * * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace
2868 * character, used to move the cursor backwords by one cell.
2869 *
2870 * * %|#escape|% %|#esc|%, or%|\e|% is the escape character,
2871 * used to introduce special terminal commands.
2872 *
2873 * * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed
2874 * character, used to separate pages of text.
2875 *
2876 * * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is
2877 * the newline character, used to terminate lines of text or
2878 * advance the cursor to the next line (perhaps without
2879 * returning it to the start of the line).
2880 *
2881 * * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is
2882 * the carriage-return character, used to return the cursor to
2883 * the start of the line.
2884 *
2885 * * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the
2886 * tab character, used to advance the cursor to the next tab
2887 * stop on the current line.
2888 *
2889 * * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab
2890 * character.
2891 *
2892 * * %|#space|%, %|#spc|% is the space character.
2893 *
2894 * * %|#delete|%, %|#del|% is the delete character, used to
2895 * erase the most recent character.
2896 *
2897 * * %|\'|% is the single-quote character.
2898 *
2899 * * %|\\|% is the backslash character.
2900 *
2901 * * %|\"|% is the double-quote character.
2902 *
2903 * * %|\NNN|% or %|\{NNN}|% is the character with code NNN in
2904 * octal. The NNN may be up to three digits long.
2905 *
2906 * * %|\xNN|% or %|\x{NN}|% is the character with code NNN in
2907 * hexadecimal.
2908 */
2909
e63124bc
MW
2910static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd,
2911 struct tvec_state *tv)
2912{
2913 dstr d = DSTR_INIT;
2914 int ch, rc;
2915 unsigned f = 0;
2916#define f_quote 1u
2917
c81c35df 2918 /* Inspect the character to see what we're up against. */
e63124bc 2919 ch = getc(tv->fp);
c81c35df 2920
e63124bc 2921 if (ch == '#') {
c81c35df
MW
2922 /* It looks like a special token. Push the `%|#|%' back and fetch the
2923 * whole word. If there's just the `%|#|%' after all, then treat it as
2924 * literal.
2925 */
2926
e63124bc 2927 ungetc(ch, tv->fp);
adec5584
MW
2928 if (tvec_readword(tv, &d, 0, ";", "character name"))
2929 { rc = -1; goto end; }
c81c35df
MW
2930 if (STRCMP(d.buf, !=, "#")) {
2931 if (read_charname(&ch, d.buf, RCF_EOFOK)) {
2932 rc = tvec_error(tv, "unknown character name `%s'", d.buf);
2933 goto end;
2934 }
2935 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2936 rv->i = ch; rc = 0; goto end;
e63124bc 2937 }
e63124bc
MW
2938 }
2939
c81c35df
MW
2940 /* If this is a single quote then we expect to see a matching one later,
2941 * and we should process backslash escapes. Get the next character and see
2942 * what happens.
2943 */
e63124bc 2944 if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); }
c81c35df
MW
2945
2946 /* Main character dispatch. */
e63124bc 2947 switch (ch) {
c81c35df 2948
67b5031e 2949 case ';':
c81c35df 2950 /* Unquoted, semicolon begins a comment. */
67b5031e 2951 if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; }
c81c35df
MW
2952 else goto plain;
2953
67b5031e 2954 case '\n':
c81c35df
MW
2955 /* A newline. If we saw a single quote, then treat that as literal.
2956 * Otherwise this is an error.
2957 */
2958 if (!(f&f_quote)) goto nochar;
2959 else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; }
2960
67b5031e 2961 case EOF:
c81c35df
MW
2962 /* End-of-file. Similar to newline, but with slightly different
2963 * effects on the parse state.
2964 */
2965 if (!(f&f_quote)) goto nochar;
2966 else { f &= ~f_quote; ch = '\''; goto plain; }
2967
2968 case '\'': nochar:
2969 /* A single quote. This must be the second of a pair, and there should
2970 * have been a character or escape sequence between them.
2971 */
e63124bc 2972 rc = tvec_syntax(tv, ch, "character"); goto end;
c81c35df 2973
e63124bc 2974 case '\\':
c81c35df 2975 /* A backslash. Read a character escape. */
67b5031e 2976 if (read_charesc(&ch, tv)) return (-1);
c81c35df 2977
e63124bc 2978 default: plain:
c81c35df 2979 /* Anything else. Treat as literal. */
e63124bc
MW
2980 rv->i = ch; break;
2981 }
c81c35df
MW
2982
2983 /* If we saw an opening quote, then expect the closing quote. */
e63124bc
MW
2984 if (f&f_quote) {
2985 ch = getc(tv->fp);
2986 if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; }
2987 }
c81c35df
MW
2988
2989 /* Done. */
e63124bc
MW
2990 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
2991 rc = 0;
2992end:
2993 dstr_destroy(&d);
2994 return (rc);
2995
2996#undef f_quote
2997}
2998
c81c35df
MW
2999/* --- @dump_char@ --- *
3000 *
3001 * Arguments: @const union tvec_regval *rv@ = register value
3002 * @const struct tvec_regdef *rd@ = register definition
3003 * @unsigned style@ = output style (@TVSF_...@)
3004 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3005 *
3006 * Returns: ---
3007 *
3008 * Use: Dump a register value to the format output.
3009 *
3010 * Character values are dumped as their symbolic names, if any,
3011 * or as a character or escape sequence within single quotes
3012 * (which may be omitted in compact style). If compact output
3013 * is not requested, then the single-quoted representation (for
3014 * characters dumped as symbolic names) and integer code in
3015 * decimal and hex are printed as a comment.
3016 */
3017
e63124bc
MW
3018static void dump_char(const union tvec_regval *rv,
3019 const struct tvec_regdef *rd,
3020 unsigned style,
3021 const struct gprintf_ops *gops, void *go)
3022{
67b5031e
MW
3023 const char *p;
3024 unsigned f = 0;
3025#define f_semi 1u
3026
5c0f2e08
MW
3027 if (style&TVSF_RAW) {
3028 /* Print the raw character unconditionally in single quotes. */
67b5031e 3029
5c0f2e08
MW
3030 gprintf(gops, go, "char:'");
3031 format_char(gops, go, rv->i);
3032 gprintf(gops, go, "'");
3033 } else {
3034 /* Print ina pleasant human-readable way. */
3035
3036 /* Print a character name if we can find one. */
3037 p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER);
3038 if (p) {
3039 gprintf(gops, go, "%s", p);
3040 if (style&TVSF_COMPACT) return;
3041 else { gprintf(gops, go, " ;"); f |= f_semi; }
67b5031e 3042 }
e63124bc 3043
5c0f2e08
MW
3044 /* If the character isn't @EOF@ then print it as a single-quoted thing.
3045 * In compact style, see if we can omit the quotes.
3046 */
3047 if (rv->i >= 0) {
3048 if (f&f_semi) gprintf(gops, go, " = ");
3049 switch (rv->i) {
3050 case ' ': case '\\': case '\'': quote:
3051 format_char(gops, go, rv->i);
3052 break;
3053 default:
3054 if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote;
3055 gprintf(gops, go, "%c", (int)rv->i);
3056 return;
3057 }
3058 }
3059
3060 /* And the character code as an integer. */
3061 if (!(style&TVSF_COMPACT)) {
3062 if (!(f&f_semi)) gprintf(gops, go, " ;");
3063 gprintf(gops, go, " = %ld = ", rv->i);
3064 format_signed_hex(gops, go, rv->i);
3065 }
e63124bc 3066 }
67b5031e
MW
3067
3068#undef f_semi
e63124bc
MW
3069}
3070
c81c35df 3071/* Character type definition. */
e63124bc 3072const struct tvec_regty tvty_char = {
3efcfd2d 3073 init_int, trivial_release, eq_int,
e63124bc
MW
3074 tobuf_char, frombuf_char,
3075 parse_char, dump_char
3076};
3077
67b5031e
MW
3078/* --- @tvec_claimeq_char@ --- *
3079 *
3080 * Arguments: @struct tvec_state *tv@ = test-vector state
3081 * @int ch0, ch1@ = two character codes
3082 * @const char *file@, @unsigned @lno@ = calling file and line
3083 * @const char *expr@ = the expression to quote on failure
3084 *
3085 * Returns: Nonzero if @ch0@ and @ch1@ are equal, otherwise zero.
3086 *
3087 * Use: Check that values of @ch0@ and @ch1@ are equal. As for
3088 * @tvec_claim@ above, a test case is automatically begun and
3089 * ended if none is already underway. If the values are
3090 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3091 * mismatched values are dumped: @ch0@ is printed as the output
3092 * value and @ch1@ is printed as the input reference.
3093 */
3094
e63124bc
MW
3095int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1,
3096 const char *file, unsigned lno, const char *expr)
3097{
3efcfd2d 3098 tv->out[0].v.i = c0; tv->in[0].v.i = c1;
e63124bc
MW
3099 return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr));
3100}
3101
b64eb60f
MW
3102/*----- Text and byte strings ---------------------------------------------*/
3103
c81c35df
MW
3104/* --- @init_text@, @init_bytes@ --- *
3105 *
3106 * Arguments: @union tvec_regval *rv@ = register value
3107 * @const struct tvec_regdef *rd@ = register definition
3108 *
3109 * Returns: ---
3110 *
3111 * Use: Initialize a register value.
3112 *
3113 * Text and binary string values are initialized with a null
3114 * pointer and zero length.
3115 */
3116
3117static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd)
3118 { rv->text.p = 0; rv->text.sz = 0; }
b64eb60f
MW
3119
3120static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd)
3121 { rv->bytes.p = 0; rv->bytes.sz = 0; }
3122
c81c35df
MW
3123/* --- @release_string@, @release_bytes@ --- *
3124 *
3125 * Arguments: @const union tvec_regval *rv@ = register value
3126 * @const struct tvec_regdef *rd@ = register definition
3127 *
3128 * Returns: ---
3129 *
3130 * Use: Release resources held by a register value.
3131 *
3132 * Text and binary string buffers are freed.
3133 */
3134
3135static void release_text(union tvec_regval *rv,
3136 const struct tvec_regdef *rd)
b1a20bee 3137 { free(rv->text.p); }
b64eb60f
MW
3138
3139static void release_bytes(union tvec_regval *rv,
3140 const struct tvec_regdef *rd)
b1a20bee 3141 { free(rv->bytes.p); }
b64eb60f 3142
c81c35df
MW
3143/* --- @eq_text@, @eq_bytes@ --- *
3144 *
3145 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
3146 * @const struct tvec_regdef *rd@ = register definition
3147 *
3148 * Returns: Nonzero if the values are equal, zero if unequal
3149 *
3150 * Use: Compare register values for equality.
3151 */
3152
3153static int eq_text(const union tvec_regval *rv0,
3154 const union tvec_regval *rv1,
3155 const struct tvec_regdef *rd)
b64eb60f 3156{
c81c35df
MW
3157 return (rv0->text.sz == rv1->text.sz &&
3158 (!rv0->text.sz ||
3159 MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz)));
b64eb60f
MW
3160}
3161
3162static int eq_bytes(const union tvec_regval *rv0,
3163 const union tvec_regval *rv1,
3164 const struct tvec_regdef *rd)
3165{
3166 return (rv0->bytes.sz == rv1->bytes.sz &&
3167 (!rv0->bytes.sz ||
3168 MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz)));
3169}
3170
c81c35df
MW
3171/* --- @tobuf_text@, @tobuf_bytes@ --- *
3172 *
3173 * Arguments: @buf *b@ = buffer
3174 * @const union tvec_regval *rv@ = register value
3175 * @const struct tvec_regdef *rd@ = register definition
3176 *
3177 * Returns: Zero on success, %$-1$% on failure.
3178 *
3179 * Use: Serialize a register value to a buffer.
3180 *
3181 * Text and binary string values are serialized as a little-
3182 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3183 * of string data.
3184 */
3185
3186static int tobuf_text(buf *b, const union tvec_regval *rv,
3187 const struct tvec_regdef *rd)
3188 { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); }
b64eb60f
MW
3189
3190static int tobuf_bytes(buf *b, const union tvec_regval *rv,
3191 const struct tvec_regdef *rd)
c81c35df 3192 { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); }
b64eb60f 3193
c81c35df
MW
3194/* --- @frombuf_text@, @frombuf_bytes@ --- *
3195 *
3196 * Arguments: @buf *b@ = buffer
3197 * @union tvec_regval *rv@ = register value
3198 * @const struct tvec_regdef *rd@ = register definition
3199 *
3200 * Returns: Zero on success, %$-1$% on failure.
3201 *
3202 * Use: Deserialize a register value from a buffer.
3203 *
3204 * Text and binary string values are serialized as a little-
3205 * endian 64-bit length %$n$% in bytes followed by %$n$% bytes
3206 * of string data.
3207 */
3208
3209static int frombuf_text(buf *b, union tvec_regval *rv,
3210 const struct tvec_regdef *rd)
b64eb60f
MW
3211{
3212 const void *p;
3213 size_t sz;
3214
c81c35df
MW
3215 p = buf_getmem64l(b, &sz); if (!p) return (-1);
3216 tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0;
b64eb60f
MW
3217 return (0);
3218}
3219
3220static int frombuf_bytes(buf *b, union tvec_regval *rv,
3221 const struct tvec_regdef *rd)
3222{
3223 const void *p;
3224 size_t sz;
3225
c81c35df 3226 p = buf_getmem64l(b, &sz); if (!p) return (-1);
b64eb60f
MW
3227 tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz);
3228 return (0);
3229}
3230
c81c35df
MW
3231/* --- @check_string_length@ --- *
3232 *
3233 * Arguments: @size_t sz@ = found string length
3234 * @const struct tvec_urange *ur@ = acceptable range
3235 * @struct tvec_state *tv@ = test-vector state
3236 *
3237 * Returns: Zero on success, %$-1$% on error.
3238 *
3239 * Use: Checks that @sz@ is within the bounds described by @ur@,
3240 * reporting an error if not.
3241 */
3242
882a39c1
MW
3243static int check_string_length(size_t sz, const struct tvec_urange *ur,
3244 struct tvec_state *tv)
b64eb60f 3245{
b1a20bee
MW
3246 unsigned long uu;
3247
3248 if (ur) {
3249 if (ur->min > sz || sz > ur->max) {
3250 tvec_error(tv, "invalid string length %lu; must be in [%lu .. %lu]",
3251 (unsigned long)sz, ur->min, ur->max);
3252 return (-1);
3253 }
3254 if (ur->m && ur->m != 1) {
3255 uu = sz%ur->m;
3256 if (uu != ur->a%ur->m) {
3257 tvec_error(tv, "invalid string length %lu == %lu =/= %lu (mod %lu)",
3258 (unsigned long)sz, uu, ur->a, ur->m);
3259 return (-1);
3260 }
3261 }
3262 }
882a39c1 3263 return (0);
b64eb60f
MW
3264}
3265
c81c35df
MW
3266/* --- @parse_text@, @parse_bytes@ --- *
3267 *
3268 * Arguments: @union tvec_regval *rv@ = register value
3269 * @const struct tvec_regdef *rd@ = register definition
3270 * @struct tvec_state *tv@ = test-vector state
3271 *
3272 * Returns: Zero on success, %$-1$% on error.
3273 *
3274 * Use: Parse a register value from an input file.
3275 *
3276 * The input format for both kinds of strings is basically the
3277 * same: a `compound string', consisting of
3278 *
3279 * * single-quoted strings, which are interpreted entirely
3280 * literally, but can't contain single quotes or newlines;
3281 *
3282 * * double-quoted strings, in which `%|\|%'-escapes are
3283 * interpreted as for characters;
3284 *
3285 * * character names, marked by an initial `%|#|%' sign;
3286 *
3287 * * special tokens marked by an initial `%|!|%' sign; or
3288 *
3289 * * barewords interpreted according to the current coding
3290 * scheme.
3291 *
3292 * The special tokens are
3293 *
3294 * * `%|!bare|%', which causes subsequent sequences of
3295 * barewords to be treated as plain text;
3296 *
3297 * * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause
3298 * subsequent barewords to be decoded in the requested
3299 * manner.
3300 *
3301 * * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%',
3302 * which includes %$n$% copies of the (compound) string.
3303 *
3304 * The only difference between text and binary strings is that
3305 * the initial coding scheme is %|bare|% for text strings and
3306 * %|hex|% for binary strings.
3307 */
3308
3309static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd,
3310 struct tvec_state *tv)
b64eb60f 3311{
c81c35df 3312 void *p = rv->text.p;
b64eb60f 3313
c81c35df 3314 if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv))
67b5031e 3315 return (-1);
c81c35df
MW
3316 rv->text.p = p;
3317 if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1);
882a39c1 3318 return (0);
b64eb60f
MW
3319}
3320
882a39c1
MW
3321static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd,
3322 struct tvec_state *tv)
b64eb60f
MW
3323{
3324 void *p = rv->bytes.p;
3325
67b5031e
MW
3326 if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv))
3327 return (-1);
882a39c1
MW
3328 rv->bytes.p = p;
3329 if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1);
3330 return (0);
b64eb60f
MW
3331}
3332
c81c35df
MW
3333/* --- @dump_text@, @dump_bytes@ --- *
3334 *
3335 * Arguments: @const union tvec_regval *rv@ = register value
3336 * @const struct tvec_regdef *rd@ = register definition
3337 * @unsigned style@ = output style (@TVSF_...@)
3338 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3339 *
3340 * Returns: ---
3341 *
3342 * Use: Dump a register value to the format output.
3343 *
3344 * Text string values are dumped as plain text, in double quotes
3345 * if necessary, and using backslash escape sequences for
3346 * nonprintable characters. Unless compact output is requested,
3347 * strings consisting of multiple lines are dumped with each
3348 * line of the string on a separate output line.
3349 *
3350 * Binary string values are dumped in hexadecimal. In compact
3351 * style, the output simply consists of a single block of hex
3352 * digits. Otherwise, the dump is a display consisting of
3353 * groups of hex digits, with comments showing the offset (if
3354 * the string is long enough) and the corresponding plain text.
3355 *
5c0f2e08 3356 * Empty strings are dumped as %|#empty|%.
c81c35df
MW
3357 */
3358
5c0f2e08
MW
3359static void dump_empty(const char *ty, unsigned style,
3360 const struct gprintf_ops *gops, void *go)
3361{
3362 if (style&TVSF_RAW) gprintf(gops, go, "%s:", ty);
3363 if (!(style&TVSF_COMPACT)) gprintf(gops, go, "#empty");
3364 if (!(style&(TVSF_COMPACT | TVSF_RAW))) gprintf(gops, go, " ; = ");
3365 if (!(style&TVSF_RAW)) gprintf(gops, go, "\"\"");
3366}
3367
3368
c81c35df
MW
3369static void dump_text(const union tvec_regval *rv,
3370 const struct tvec_regdef *rd,
3371 unsigned style,
3372 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
3373{
3374 const unsigned char *p, *q, *l;
b64eb60f
MW
3375 unsigned f = 0;
3376#define f_nonword 1u
3377#define f_newline 2u
3378
5c0f2e08 3379 if (!rv->text.sz) { dump_empty("text", style, gops, go); return; }
b64eb60f 3380
c81c35df 3381 p = (const unsigned char *)rv->text.p; l = p + rv->text.sz;
5c0f2e08
MW
3382 if (style&TVSF_RAW) { gprintf(gops, go, "text:"); goto quote; }
3383 else if (style&TVSF_COMPACT) goto quote;
3384
67b5031e
MW
3385 switch (*p) {
3386 case '!': case '#': case ';': case '"': case '\'':
3387 case '(': case '{': case '[': case ']': case '}': case ')':
3388 f |= f_nonword; break;
3389 }
b64eb60f
MW
3390 for (q = p; q < l; q++)
3391 if (*q == '\n' && q != l - 1) f |= f_newline;
5c0f2e08 3392 else if (!*q || !ISGRAPH(*q) || *q == '\\') f |= f_nonword;
e63124bc 3393 if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; }
b64eb60f 3394 else if (f&f_nonword) goto quote;
67b5031e 3395
c81c35df 3396 gops->putm(go, (const char *)p, rv->text.sz);
67b5031e 3397 return;
b64eb60f
MW
3398
3399quote:
e63124bc 3400 gprintf(gops, go, "\"");
b64eb60f 3401 for (q = p; q < l; q++)
5c0f2e08 3402 if (!ISPRINT(*q) || *q == '"') {
e63124bc 3403 if (p < q) gops->putm(go, (const char *)p, q - p);
67b5031e 3404 if (*q != '\n' || (style&TVSF_COMPACT))
3efcfd2d 3405 format_charesc(gops, go, *q, FCF_BRACE);
67b5031e
MW
3406 else {
3407 if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; }
3408 else gprintf(gops, go, "\\n\"\n\t\"");
3409 }
3410 p = q + 1;
b64eb60f 3411 }
e63124bc
MW
3412 if (p < q) gops->putm(go, (const char *)p, q - p);
3413 gprintf(gops, go, "\"");
b64eb60f
MW
3414
3415#undef f_nonword
3416#undef f_newline
3417}
3418
3419static void dump_bytes(const union tvec_regval *rv,
3420 const struct tvec_regdef *rd,
e63124bc
MW
3421 unsigned style,
3422 const struct gprintf_ops *gops, void *go)
b64eb60f
MW
3423{
3424 const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz;
3425 size_t off, sz = rv->bytes.sz;
3426 unsigned i, n;
3427 int wd;
3428
5c0f2e08 3429 if (!rv->text.sz) { dump_empty("bytes", style, gops, go); return; }
b64eb60f 3430
5c0f2e08 3431 if (style&(TVSF_COMPACT | TVSF_RAW)) {
b1a20bee 3432 if (style&TVSF_RAW) gprintf(gops, go, "bytes:");
e63124bc 3433 while (p < l) gprintf(gops, go, "%02x", *p++);
b64eb60f
MW
3434 return;
3435 }
3436
e63124bc 3437 if (sz > 16) gprintf(gops, go, "\n\t");
b64eb60f
MW
3438
3439 off = 0; wd = hex_width(sz);
3440 while (p < l) {
3441 if (l - p < 16) n = l - p;
3442 else n = 16;
3443
67b5031e 3444 for (i = 0; i < n; i++) {
e63124bc
MW
3445 if (i < n) gprintf(gops, go, "%02x", p[i]);
3446 else gprintf(gops, go, " ");
67b5031e 3447 if (i < n - 1 && i%4 == 3) gprintf(gops, go, " ");
b64eb60f 3448 }
e63124bc
MW
3449 gprintf(gops, go, " ; ");
3450 if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off);
b64eb60f 3451 for (i = 0; i < n; i++)
e63124bc 3452 gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.');
b64eb60f 3453 p += n; off += n;
e63124bc 3454 if (p < l) gprintf(gops, go, "\n\t");
b64eb60f
MW
3455 }
3456}
3457
c81c35df
MW
3458/* Text and byte string type definitions. */
3459const struct tvec_regty tvty_text = {
3460 init_text, release_text, eq_text,
3461 tobuf_text, frombuf_text,
3462 parse_text, dump_text
b64eb60f 3463};
b64eb60f 3464const struct tvec_regty tvty_bytes = {
e63124bc 3465 init_bytes, release_bytes, eq_bytes,
b64eb60f
MW
3466 tobuf_bytes, frombuf_bytes,
3467 parse_bytes, dump_bytes
3468};
3469
c81c35df 3470/* --- @tvec_claimeq_text@ --- *
67b5031e
MW
3471 *
3472 * Arguments: @struct tvec_state *tv@ = test-vector state
3473 * @const char *p0@, @size_t sz0@ = first string with length
3474 * @const char *p1@, @size_t sz1@ = second string with length
3475 * @const char *file@, @unsigned @lno@ = calling file and line
3476 * @const char *expr@ = the expression to quote on failure
3477 *
3478 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3479 * zero.
3480 *
3481 * Use: Check that strings at @p0@ and @p1@ are equal. As for
3482 * @tvec_claim@ above, a test case is automatically begun and
3483 * ended if none is already underway. If the values are
3484 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3485 * mismatched values are dumped: @p0@ is printed as the output
3486 * value and @p1@ is printed as the input reference.
3487 */
3488
c81c35df
MW
3489int tvec_claimeq_text(struct tvec_state *tv,
3490 const char *p0, size_t sz0,
3491 const char *p1, size_t sz1,
3492 const char *file, unsigned lno, const char *expr)
b64eb60f 3493{
b1a20bee
MW
3494 tv->out[0].v.text.p = UNCONST(char, p0); tv->out[0].v.text.sz = sz0;
3495 tv->in[0].v.text.p =UNCONST(char, p1); tv->in[0].v.text.sz = sz1;
c81c35df 3496 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
3497}
3498
c81c35df 3499/* --- @tvec_claimeq_textz@ --- *
67b5031e
MW
3500 *
3501 * Arguments: @struct tvec_state *tv@ = test-vector state
3502 * @const char *p0, *p1@ = two strings to compare
3503 * @const char *file@, @unsigned @lno@ = calling file and line
3504 * @const char *expr@ = the expression to quote on failure
3505 *
3506 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3507 * zero.
3508 *
3509 * Use: Check that strings at @p0@ and @p1@ are equal, as for
3510 * @tvec_claimeq_string@, except that the strings are assumed
3511 * null-terminated, so their lengths don't need to be supplied
3512 * explicitly.
3513 */
3514
c81c35df
MW
3515int tvec_claimeq_textz(struct tvec_state *tv,
3516 const char *p0, const char *p1,
3517 const char *file, unsigned lno, const char *expr)
b64eb60f 3518{
b1a20bee
MW
3519 tv->out[0].v.text.p = UNCONST(char, p0); tv->out[0].v.text.sz = strlen(p0);
3520 tv->in[0].v.text.p = UNCONST(char, p1); tv->in[0].v.text.sz = strlen(p1);
c81c35df 3521 return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr));
b64eb60f
MW
3522}
3523
67b5031e
MW
3524/* --- @tvec_claimeq_bytes@ --- *
3525 *
3526 * Arguments: @struct tvec_state *tv@ = test-vector state
3527 * @const void *p0@, @size_t sz0@ = first string with length
3528 * @const void *p1@, @size_t sz1@ = second string with length
3529 * @const char *file@, @unsigned @lno@ = calling file and line
3530 * @const char *expr@ = the expression to quote on failure
3531 *
3532 * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise
3533 * zero.
3534 *
3535 * Use: Check that binary strings at @p0@ and @p1@ are equal. As for
3536 * @tvec_claim@ above, a test case is automatically begun and
3537 * ended if none is already underway. If the values are
3538 * unequal, then @tvec_fail@ is called, quoting @expr@, and the
3539 * mismatched values are dumped: @p0@ is printed as the output
3540 * value and @p1@ is printed as the input reference.
3541 */
3542
b64eb60f
MW
3543int tvec_claimeq_bytes(struct tvec_state *tv,
3544 const void *p0, size_t sz0,
3545 const void *p1, size_t sz1,
3546 const char *file, unsigned lno, const char *expr)
3547{
b1a20bee
MW
3548 tv->out[0].v.bytes.p = UNCONST(void, p0); tv->out[0].v.bytes.sz = sz0;
3549 tv->in[0].v.bytes.p = UNCONST(void, p1); tv->in[0].v.bytes.sz = sz1;
b64eb60f
MW
3550 return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr));
3551}
3552
c81c35df 3553/* --- @tvec_alloctext@, @tvec_allocbytes@ --- *
67b5031e
MW
3554 *
3555 * Arguments: @union tvec_regval *rv@ = register value
3556 * @size_t sz@ = required size
3557 *
3558 * Returns: ---
3559 *
3560 * Use: Allocated space in a text or binary string register. If the
3561 * current register size is sufficient, its buffer is left
3562 * alone; otherwise, the old buffer, if any, is freed and a
3563 * fresh buffer allocated. These functions are not intended to
3564 * be used to adjust a buffer repeatedly, e.g., while building
3565 * output incrementally: (a) they will perform badly, and (b)
3566 * the old buffer contents are simply discarded if reallocation
3567 * is necessary. Instead, use a @dbuf@ or @dstr@.
3568 *
c81c35df 3569 * The @tvec_alloctext@ function sneakily allocates an extra
67b5031e
MW
3570 * byte for a terminating zero. The @tvec_allocbytes@ function
3571 * doesn't do this.
3572 */
3573
c81c35df 3574void tvec_alloctext(union tvec_regval *rv, size_t sz)
67b5031e 3575{
b1a20bee
MW
3576 if (rv->text.sz <= sz)
3577 { free(rv->text.p); rv->text.p = x_alloc(&arena_stdlib, sz + 1); }
3578 memset(rv->text.p, '?', sz); rv->text.sz = sz;
67b5031e
MW
3579}
3580
3581void tvec_allocbytes(union tvec_regval *rv, size_t sz)
3582{
b1a20bee
MW
3583 if (rv->bytes.sz < sz)
3584 { free(rv->bytes.p); rv->bytes.p = x_alloc(&arena_stdlib, sz); }
3585 memset(rv->bytes.p, '?', sz); rv->bytes.sz = sz;
67b5031e
MW
3586}
3587
b64eb60f
MW
3588/*----- Buffer type -------------------------------------------------------*/
3589
adec5584
MW
3590/* --- @init_buffer@ --- *
3591 *
3592 * Arguments: @union tvec_regval *rv@ = register value
3593 * @const struct tvec_regdef *rd@ = register definition
3594 *
3595 * Returns: ---
3596 *
3597 * Use: Initialize a register value.
3598 *
3599 * Buffer values values are initialized with a null pointer,
3600 * zero length, and zero residue, modulus, and offset.
3601 */
3602
3603static void init_buffer(union tvec_regval *rv, const struct tvec_regdef *rd)
3604 { rv->buf.p = 0; rv->buf.sz = rv->buf.a = rv->buf.m = rv->buf.off = 0; }
3605
3606/* --- @release_buffer@, @release_bytes@ --- *
3607 *
3608 * Arguments: @const union tvec_regval *rv@ = register value
3609 * @const struct tvec_regdef *rd@ = register definition
3610 *
3611 * Returns: ---
3612 *
3613 * Use: Release resources held by a register value.
3614 *
3615 * Buffers are freed.
3616 */
3617
3618static void release_buffer(union tvec_regval *rv,
3619 const struct tvec_regdef *rd)
b1a20bee 3620 { if (rv->buf.p) free(rv->buf.p - rv->buf.off); }
c81c35df
MW
3621
3622/* --- @eq_buffer@ --- *
3623 *
3624 * Arguments: @const union tvec_regval *rv0, *rv1@ = register values
3625 * @const struct tvec_regdef *rd@ = register definition
3626 *
3627 * Returns: Nonzero if the values are equal, zero if unequal
3628 *
3629 * Use: Compare register values for equality.
3630 *
adec5584
MW
3631 * Buffer values are equal if and only if their sizes and
3632 * alignment parameters are equal; their contents are
3633 * %%\emph{not}%% compared.
c81c35df
MW
3634 */
3635
b64eb60f
MW
3636static int eq_buffer(const union tvec_regval *rv0,
3637 const union tvec_regval *rv1,
3638 const struct tvec_regdef *rd)
adec5584
MW
3639{
3640 return (rv0->buf.sz == rv1->buf.sz &&
3641 rv0->buf.a == rv1->buf.a &&
3642 rv0->buf.m == rv1->buf.m);
3643}
b64eb60f 3644
c81c35df
MW
3645/* --- @tobuf_buffer@ --- *
3646 *
3647 * Arguments: @buf *b@ = buffer
3648 * @const union tvec_regval *rv@ = register value
3649 * @const struct tvec_regdef *rd@ = register definition
3650 *
3651 * Returns: Zero on success, %$-1$% on failure.
3652 *
3653 * Use: Serialize a register value to a buffer.
3654 *
adec5584
MW
3655 * Buffer values are serialized as their lengths, residues, and
3656 * moduli, as unsigned integers.
c81c35df
MW
3657 */
3658
b64eb60f
MW
3659static int tobuf_buffer(buf *b, const union tvec_regval *rv,
3660 const struct tvec_regdef *rd)
adec5584
MW
3661{
3662 return (unsigned_to_buf(b, rv->buf.sz) ||
3663 unsigned_to_buf(b, rv->buf.a) ||
3664 unsigned_to_buf(b, rv->buf.m));
3665}
c81c35df
MW
3666
3667/* --- @frombuf_buffer@ --- *
3668 *
3669 * Arguments: @buf *b@ = buffer
3670 * @union tvec_regval *rv@ = register value
3671 * @const struct tvec_regdef *rd@ = register definition
3672 *
3673 * Returns: Zero on success, %$-1$% on failure.
3674 *
3675 * Use: Deserialize a register value from a buffer.
3676 *
3677 * Buffer values are serialized as just their lengths, as
3678 * unsigned integers. The buffer is allocated on
3679 * deserialization and filled with a distinctive pattern.
3680 */
3681
b64eb60f
MW
3682static int frombuf_buffer(buf *b, union tvec_regval *rv,
3683 const struct tvec_regdef *rd)
3684{
adec5584 3685 unsigned long sz, a, m;
b64eb60f 3686
adec5584
MW
3687 if (unsigned_from_buf(b, &sz)) return (-1);
3688 if (unsigned_from_buf(b, &a)) return (-1);
3689 if (unsigned_from_buf(b, &m)) return (-1);
3690 if (sz > (size_t)-1 || a > (size_t)-1 || m > (size_t)-1)
3691 { buf_break(b); return (-1); }
3692 rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
b64eb60f
MW
3693 return (0);
3694}
3695
c81c35df
MW
3696/* --- @parse_buffer@ --- *
3697 *
3698 * Arguments: @union tvec_regval *rv@ = register value
3699 * @const struct tvec_regdef *rd@ = register definition
3700 * @struct tvec_state *tv@ = test-vector state
3701 *
3702 * Returns: Zero on success, %$-1$% on error.
3703 *
3704 * Use: Parse a register value from an input file.
3705 *
c4ccbbf9
MW
3706 * The input format for a buffer value is a size, followed by an
3707 * optional `%|@$%' and an alignment quantum and a further
3708 * optional `%|+|%' and an alignment offset. The size, quantum,
3709 * and offset are syntactically sizes.
c81c35df 3710 *
c4ccbbf9 3711 * The buffer is not allocated.
c81c35df
MW
3712 */
3713
882a39c1
MW
3714static int parse_buffer(union tvec_regval *rv,
3715 const struct tvec_regdef *rd,
3716 struct tvec_state *tv)
b64eb60f 3717{
c4ccbbf9 3718 unsigned long sz, a = 0, m = 0;
adec5584 3719 int ch, rc;
b64eb60f 3720
c4ccbbf9
MW
3721 if (parse_szint(tv, &sz, "@;", "buffer length")) { rc = -1; goto end; }
3722 if (check_unsigned_range(sz, &tvrange_size, tv, "buffer length"))
3723 { rc = -1; goto end; }
adec5584 3724 if (check_string_length(sz, rd->arg.p, tv)) { rc = -1; goto end; }
b64eb60f 3725
adec5584
MW
3726 tvec_skipspc(tv);
3727 ch = getc(tv->fp);
3728 if (ch == ';' || ch == '\n') { ungetc(ch, tv->fp); goto done; }
3729 else if (ch != '@') { rc = tvec_syntax(tv, ch, "`@'"); goto end; }
3730
c4ccbbf9
MW
3731 if (parse_szint(tv, &m, "+;", "alignment quantum")) { rc = -1; goto end; }
3732 if (check_unsigned_range(a, &tvrange_size, tv, "alignment quantum"))
3733 { rc = -1; goto end; }
adec5584
MW
3734 if (m == 1) m = 0;
3735
3736 tvec_skipspc(tv);
3737 ch = getc(tv->fp);
3738 if (ch == ';' || ch == '\n') { ungetc(ch, tv->fp); goto done; }
3739 else if (ch != '+') { rc = tvec_syntax(tv, ch, "`+'"); goto end; }
3740
c4ccbbf9
MW
3741 if (parse_szint(tv, &a, ";", "alignment offset")) { rc = -1; goto end; }
3742 if (check_unsigned_range(m, &tvrange_size, tv, "alignment offset"))
3743 { rc = -1; goto end; }
adec5584
MW
3744 if (a >= m) {
3745 rc = tvec_error(tv, "alignment offset %lu >= quantum %lu",
3746 (unsigned long)a, (unsigned long)m);
3747 goto end;
b64eb60f 3748 }
b64eb60f 3749
adec5584 3750done:
882a39c1 3751 if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; }
adec5584 3752 rv->buf.sz = sz; rv->buf.a = a; rv->buf.m = m;
882a39c1
MW
3753 rc = 0;
3754end:
adec5584 3755 return (rc);
b64eb60f
MW
3756}
3757
c81c35df
MW
3758/* --- @dump_buffer@ --- *
3759 *
3760 * Arguments: @const union tvec_regval *rv@ = register value
3761 * @const struct tvec_regdef *rd@ = register definition
3762 * @unsigned style@ = output style (@TVSF_...@)
3763 * @const struct gprintf_ops *gops@, @void *gp@ = format output
3764 *
3765 * Returns: ---
3766 *
3767 * Use: Dump a register value to the format output.
3768 *
c4ccbbf9
MW
3769 * Buffer values are dumped as their size, with the alignment
3770 * quantum and alignment offset if these are non-default.
c81c35df
MW
3771 */
3772
b64eb60f
MW
3773static void dump_buffer(const union tvec_regval *rv,
3774 const struct tvec_regdef *rd,
e63124bc
MW
3775 unsigned style,
3776 const struct gprintf_ops *gops, void *go)
b64eb60f 3777{
b1a20bee 3778 if (style&TVSF_RAW) gprintf(gops, go, "buffer:");
adec5584
MW
3779 format_size(gops, go, rv->buf.sz, style);
3780 if (rv->buf.m) {
5c0f2e08 3781 gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "@" : " @ ");
adec5584
MW
3782 format_size(gops, go, rv->buf.m, style);
3783 if (rv->buf.a) {
5c0f2e08 3784 gprintf(gops, go, style&(TVSF_COMPACT | TVSF_RAW) ? "+" : " + ");
adec5584
MW
3785 format_size(gops, go, rv->buf.a, style);
3786 }
3787 }
b1a20bee 3788 if (!(style&(TVSF_COMPACT | TVSF_RAW))) {
13ee7406 3789 gprintf(gops, go, " ; = %lu", (unsigned long)rv->buf.sz);
adec5584 3790 if (rv->buf.m) {
13ee7406
MW
3791 gprintf(gops, go, " @ %lu", (unsigned long)rv->buf.m);
3792 if (rv->buf.a) gprintf(gops, go, " + %lu", (unsigned long)rv->buf.a);
adec5584
MW
3793 }
3794 gprintf(gops, go, " = "); format_unsigned_hex(gops, go, rv->buf.sz);
3795 if (rv->buf.m) {
3796 gprintf(gops, go, " @ "); format_unsigned_hex(gops, go, rv->buf.m);
3797 if (rv->buf.a) {
3798 gprintf(gops, go, " + ");
3799 format_unsigned_hex(gops, go, rv->buf.a);
3800 }
3801 }
b64eb60f
MW
3802 }
3803}
3804
c81c35df 3805/* Buffer type definition. */
b64eb60f 3806const struct tvec_regty tvty_buffer = {
adec5584 3807 init_buffer, release_buffer, eq_buffer,
b64eb60f
MW
3808 tobuf_buffer, frombuf_buffer,
3809 parse_buffer, dump_buffer
3810};
3811
adec5584
MW
3812/* --- @tvec_initbuffer@ --- *
3813 *
3814 * Arguments: @union tvec_regval *rv@ = register value
d056fbdf 3815 * @const union tvec_regval *ref@ = source buffer
adec5584
MW
3816 * @size_t sz@ = size to allocate
3817 *
3818 * Returns: ---
3819 *
d056fbdf 3820 * Use: Initialize the alignment parameters in @rv@ to match @ref@,
adec5584
MW
3821 * and the size to @sz@.
3822 */
3823
3824void tvec_initbuffer(union tvec_regval *rv,
d056fbdf
MW
3825 const union tvec_regval *ref, size_t sz)
3826 { rv->buf.sz = sz; rv->buf.a = ref->buf.a; rv->buf.m = ref->buf.m; }
adec5584
MW
3827
3828/* --- @tvec_allocbuffer@ --- *
3829 *
3830 * Arguments: @union tvec_regval *rv@ = register value
3831 *
3832 * Returns: ---
3833 *
3834 * Use: Allocate @sz@ bytes to the buffer and fill the space with a
3835 * distinctive pattern.
3836 */
3837
3838void tvec_allocbuffer(union tvec_regval *rv)
3839{
3840 unsigned char *p; size_t n;
3841
b1a20bee 3842 if (rv->buf.p) free(rv->buf.p - rv->buf.off);
adec5584
MW
3843
3844 if (rv->buf.m < 2) {
b1a20bee 3845 rv->buf.p = x_alloc(&arena_stdlib, rv->buf.sz); rv->buf.off = 0;
adec5584 3846 } else {
b1a20bee 3847 p = x_alloc(&arena_stdlib, rv->buf.sz + rv->buf.m - 1);
adec5584
MW
3848 n = (size_t)p%rv->buf.m;
3849 rv->buf.off = (rv->buf.a - n + rv->buf.m)%rv->buf.m;
3850 rv->buf.p = p + rv->buf.off;
3851 }
3852 memset(rv->buf.p, '?', rv->buf.sz);
3853}
3854
b64eb60f 3855/*----- That's all, folks -------------------------------------------------*/