Commit | Line | Data |
---|---|---|
b64eb60f MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Types for the test-vector framework | |
4 | * | |
5 | * (c) 2023 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of the mLib utilities library. | |
11 | * | |
12 | * mLib is free software: you can redistribute it and/or modify it under | |
13 | * the terms of the GNU Library General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or (at | |
15 | * your option) any later version. | |
16 | * | |
17 | * mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | * License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU Library General Public | |
23 | * License along with mLib. If not, write to the Free Software | |
24 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | * USA. | |
26 | */ | |
27 | ||
28 | /*----- Header files ------------------------------------------------------*/ | |
29 | ||
30 | #include <assert.h> | |
31 | #include <ctype.h> | |
32 | #include <errno.h> | |
e63124bc | 33 | #include <float.h> |
b64eb60f | 34 | #include <limits.h> |
e63124bc | 35 | #include <math.h> |
b64eb60f MW |
36 | #include <stdio.h> |
37 | #include <string.h> | |
38 | ||
39 | #include "buf.h" | |
40 | #include "codec.h" | |
41 | # include "base32.h" | |
42 | # include "base64.h" | |
43 | # include "hex.h" | |
44 | #include "dstr.h" | |
67b5031e | 45 | #include "maths.h" |
b64eb60f MW |
46 | #include "tvec.h" |
47 | ||
48 | /*----- Preliminary utilities ---------------------------------------------*/ | |
49 | ||
67b5031e MW |
50 | /* --- @trivial_release@ --- * |
51 | * | |
52 | * Arguments: @union tvec_regval *rv@ = a register value | |
53 | * @const struct tvec_regdef@ = the register definition | |
54 | * | |
55 | * Returns: --- | |
56 | * | |
57 | * Use: Does nothing. Used for register values which don't retain | |
58 | * resources. | |
59 | */ | |
3efcfd2d MW |
60 | |
61 | static void trivial_release(union tvec_regval *rv, | |
62 | const struct tvec_regdef *rd) | |
63 | { ; } | |
64 | ||
67b5031e MW |
65 | /*----- Integer utilities -------------------------------------------------*/ |
66 | ||
67 | /* --- @unsigned_to_buf@, @signed_to_buf@ --- * | |
68 | * | |
69 | * Arguments: @buf *b@ = buffer to write on | |
70 | * @unsigned long u@ or @long i@ = integer to write | |
71 | * | |
72 | * Returns: Zero on success, @-1@ on failure. | |
73 | * | |
74 | * Use: Write @i@ to the buffer, in big-endian (two's-complement, it | |
75 | * signed) format. | |
76 | */ | |
77 | ||
78 | static int unsigned_to_buf(buf *b, unsigned long u) | |
79 | { kludge64 k; ASSIGN64(k, u); return (buf_putk64l(b, k)); } | |
80 | ||
b64eb60f MW |
81 | static int signed_to_buf(buf *b, long i) |
82 | { | |
83 | kludge64 k; | |
84 | unsigned long u; | |
85 | ||
86 | u = i; | |
87 | if (i >= 0) ASSIGN64(k, u); | |
88 | else { ASSIGN64(k, ~u); CPL64(k, k); } | |
89 | return (buf_putk64l(b, k)); | |
90 | } | |
91 | ||
67b5031e MW |
92 | /* --- @unsigned_from_buf@, @signed_from_buf@ --- * |
93 | * | |
94 | * Arguments: @buf *b@ = buffer to write on | |
95 | * @unsigned long *u_out@ or @long *i_out@ = where to put the | |
96 | * result | |
97 | * | |
98 | * Returns: Zero on success, @-1@ on failure. | |
99 | * | |
100 | * Use: Read an integer, in big-endian (two's-complement, if signed) | |
101 | * format, from the buffer. | |
102 | */ | |
b64eb60f MW |
103 | |
104 | static int unsigned_from_buf(buf *b, unsigned long *u_out) | |
105 | { | |
106 | kludge64 k, ulmax; | |
107 | ||
108 | ASSIGN64(ulmax, ULONG_MAX); | |
109 | if (buf_getk64l(b, &k)) return (-1); | |
110 | if (CMP64(k, >, ulmax)) return (-1); | |
111 | *u_out = GET64(unsigned long, k); return (0); | |
112 | } | |
113 | ||
67b5031e MW |
114 | /* --- @hex_width@ --- * |
115 | * | |
116 | * Arguments: @unsigned long u@ = an integer | |
117 | * | |
118 | * Returns: A suitable number of digits to use in order to display @u@ in | |
119 | * hex. Currently, we select a power of two sufficient to show | |
120 | * the value, but at least 2. | |
121 | */ | |
122 | ||
b64eb60f MW |
123 | static int hex_width(unsigned long u) |
124 | { | |
125 | int wd; | |
126 | unsigned long t; | |
127 | ||
128 | for (t = u >> 4, wd = 4; t >>= wd, wd *= 2, t; ); | |
129 | return (wd/4); | |
130 | } | |
131 | ||
67b5031e MW |
132 | /* --- @format_unsigned_hex@, @format_signed_hex@ --- * |
133 | * | |
134 | * Arguments: @const struct gprintf_ops *gops@ = print operations | |
135 | * @void *go@ = print destination | |
136 | * @unsigned long u@ or @long i@ = integer to print | |
137 | * | |
138 | * Returns: --- | |
139 | * | |
140 | * Use: Print an unsigned or signed integer in hexadecimal. | |
141 | */ | |
142 | ||
143 | static void format_unsigned_hex(const struct gprintf_ops *gops, void *go, | |
144 | unsigned long u) | |
145 | { gprintf(gops, go, "0x%0*lx", hex_width(u), u); } | |
146 | ||
147 | static void format_signed_hex(const struct gprintf_ops *gops, void *go, | |
148 | long i) | |
149 | { | |
150 | unsigned long u = i >= 0 ? i : -(unsigned long)i; | |
151 | gprintf(gops, go, "%s0x%0*lx", i < 0 ? "-" : "", hex_width(u), u); | |
152 | } | |
153 | ||
154 | static int signed_from_buf(buf *b, long *i_out) | |
155 | { | |
156 | kludge64 k, lmax, not_lmin; | |
157 | ||
158 | ASSIGN64(lmax, LONG_MAX); ASSIGN64(not_lmin, ~(unsigned long)LONG_MIN); | |
159 | if (buf_getk64l(b, &k)) return (-1); | |
160 | if (CMP64(k, <=, lmax)) *i_out = (long)GET64(unsigned long, k); | |
161 | else { | |
162 | CPL64(k, k); | |
163 | if (CMP64(k, <=, not_lmin)) *i_out = -(long)GET64(unsigned long, k) - 1; | |
164 | else return (-1); | |
165 | } | |
166 | return (0); | |
167 | } | |
168 | ||
169 | /* --- @check_unsigned_range@, @check_signed_range@ --- * | |
170 | * | |
171 | * Arguments: @unsigned long u@ or @long i@ = an integer | |
172 | * @const struct tvec_urange *ur@ or | |
173 | * @const struct tvec_irange *ir@ = range specification, | |
174 | * or null | |
175 | * @struct tvec_state *tv@ = test vector state | |
176 | * | |
177 | * Returns: Zero on success, or @-1@ on error. | |
178 | * | |
179 | * Use: Check that the integer is within bounds. If not, report a | |
180 | * suitable error and return a failure indication. | |
181 | */ | |
182 | ||
882a39c1 MW |
183 | static int check_signed_range(long i, |
184 | const struct tvec_irange *ir, | |
185 | struct tvec_state *tv) | |
b64eb60f | 186 | { |
882a39c1 | 187 | if (ir && (ir->min > i || i > ir->max)) { |
b64eb60f MW |
188 | tvec_error(tv, "integer %ld out of range (must be in [%ld .. %ld])", |
189 | i, ir->min, ir->max); | |
882a39c1 MW |
190 | return (-1); |
191 | } | |
192 | return (0); | |
b64eb60f MW |
193 | } |
194 | ||
882a39c1 MW |
195 | static int check_unsigned_range(unsigned long u, |
196 | const struct tvec_urange *ur, | |
197 | struct tvec_state *tv) | |
b64eb60f | 198 | { |
882a39c1 | 199 | if (ur && (ur->min > u || u > ur->max)) { |
b64eb60f MW |
200 | tvec_error(tv, "integer %lu out of range (must be in [%lu .. %lu])", |
201 | u, ur->min, ur->max); | |
882a39c1 MW |
202 | return (-1); |
203 | } | |
204 | return (0); | |
b64eb60f MW |
205 | } |
206 | ||
67b5031e MW |
207 | /* --- @chtodig@ --- * |
208 | * | |
209 | * Arguments: @int ch@ = a character | |
210 | * | |
211 | * Returns: The numeric value of the character as a digit, or @-1@ if | |
212 | * it's not a digit. Letters count as extended digits starting | |
213 | * with value 10; case is not significant. | |
214 | */ | |
215 | ||
3efcfd2d MW |
216 | static int chtodig(int ch) |
217 | { | |
218 | if ('0' <= ch && ch <= '9') return (ch - '0'); | |
219 | else if ('a' <= ch && ch <= 'z') return (ch - 'a' + 10); | |
220 | else if ('A' <= ch && ch <= 'Z') return (ch - 'A' + 10); | |
221 | else return (-1); | |
222 | } | |
223 | ||
67b5031e MW |
224 | /* --- @parse_unsigned_integer@, @parse_signed_integer@ --- * |
225 | * | |
226 | * Arguments: @unsigned long *u_out@, @long *i_out@ = where to put the | |
227 | * result | |
228 | * @const char **q_out@ = where to put the end position | |
229 | * @const char *p@ = pointer to the string to parse | |
230 | * | |
231 | * Returns: Zero on success, @-1@ on error. | |
232 | * | |
233 | * Use: Parse an integer from a string in the test-vector format. | |
234 | * This is mostly extension of the traditional C @strtoul@ | |
235 | * format: supported inputs include: | |
236 | * | |
237 | * * NNN -- a decimal number (even if it starts with `0'); | |
238 | * * 0xNNN -- hexadecimal; | |
239 | * * 0oNNN -- octal; | |
240 | * * 0bNNN -- binary; | |
241 | * * NNrNNN -- base NN. | |
242 | * | |
243 | * Furthermore, single underscores are permitted internally as | |
244 | * an insignificant digit separator. | |
245 | */ | |
246 | ||
3efcfd2d MW |
247 | static int parse_unsigned_integer(unsigned long *u_out, const char **q_out, |
248 | const char *p) | |
249 | { | |
250 | unsigned long u; | |
251 | int ch, d, r; | |
252 | const char *q; | |
253 | unsigned f = 0; | |
67b5031e MW |
254 | #define f_implicit 1u /* implicitly reading base 10 */ |
255 | #define f_digit 2u /* read a real digit */ | |
256 | #define f_uscore 4u /* found an underscore */ | |
257 | ||
258 | /* Initial setup | |
259 | * | |
260 | * This will deal with the traditional `0[box]...' prefixes. We'll leave | |
261 | * our new `NNr...' syntax for later. | |
262 | */ | |
3efcfd2d MW |
263 | if (p[0] != '0' || !p[1]) { |
264 | d = chtodig(*p); if (0 > d || d >= 10) return (-1); | |
265 | r = 10; u = d; p++; f |= f_implicit | f_digit; | |
266 | } else { | |
267 | u = 0; d = chtodig(p[2]); | |
268 | if (d < 0) { r = 10; f |= f_implicit | f_digit; p++; } | |
269 | else if ((p[1] == 'x' || p[1] == 'X') && d < 16) { r = 16; p += 2; } | |
270 | else if ((p[1] == 'o' || p[1] == 'O') && d < 8) { r = 8; p += 2; } | |
271 | else if ((p[1] == 'b' || p[1] == 'B') && d < 2) { r = 2; p += 2; } | |
272 | else { r = 10; f |= f_digit; p++; } | |
273 | } | |
274 | ||
275 | q = p; | |
276 | for (;;) { | |
67b5031e MW |
277 | /* Work through the string a character at a time. */ |
278 | ||
279 | ch = *p; switch (ch) { | |
280 | ||
281 | case '_': | |
282 | /* An underscore is OK if we haven't just seen one. */ | |
283 | ||
284 | if (f&f_uscore) goto done; | |
285 | p++; f = (f&~f_implicit) | f_uscore; | |
286 | break; | |
287 | ||
288 | case 'r': case 'R': | |
289 | /* An `r' is OK if the number so far is small enough to be a sensible | |
290 | * base, and we're scanning decimal implicitly. | |
291 | */ | |
292 | ||
293 | if (!(f&f_implicit) || !u || u >= 36) goto done; | |
294 | d = chtodig(p[1]); if (0 > d || d >= u) goto done; | |
295 | r = u; u = d; f = (f&~f_implicit) | f_digit; p += 2; q = p; | |
296 | break; | |
297 | ||
298 | default: | |
299 | /* Otherwise we expect a valid digit and accumulate it. */ | |
300 | d = chtodig(ch); if (d < 0 || d >= r) goto done; | |
301 | if (u > ULONG_MAX/r) return (-1); | |
302 | u *= r; if (u > ULONG_MAX - d) return (-1); | |
303 | u += d; f = (f&~f_uscore) | f_digit; p++; q = p; | |
304 | break; | |
3efcfd2d MW |
305 | } |
306 | } | |
307 | ||
67b5031e | 308 | done: |
3efcfd2d MW |
309 | if (!(f&f_digit)) return (-1); |
310 | *u_out = u; *q_out = q; return (0); | |
311 | ||
312 | #undef f_implicit | |
313 | #undef f_digit | |
314 | #undef f_uscore | |
315 | } | |
316 | ||
317 | static int parse_signed_integer(long *i_out, const char **q_out, | |
318 | const char *p) | |
319 | { | |
320 | unsigned long u; | |
321 | unsigned f = 0; | |
322 | #define f_neg 1u | |
323 | ||
67b5031e | 324 | /* Read an initial sign. */ |
3efcfd2d MW |
325 | if (*p == '+') p++; |
326 | else if (*p == '-') { f |= f_neg; p++; } | |
327 | ||
67b5031e | 328 | /* Scan an unsigned number. */ |
3efcfd2d MW |
329 | if (parse_unsigned_integer(&u, q_out, p)) return (-1); |
330 | ||
67b5031e | 331 | /* Check for signed overflow and apply the sign. */ |
3efcfd2d MW |
332 | if (!(f&f_neg)) { |
333 | if (u > LONG_MAX) return (-1); | |
334 | *i_out = u; | |
335 | } else { | |
336 | if (u && u - 1 > -(LONG_MIN + 1)) return (-1); | |
337 | *i_out = u ? -(long)(u - 1) - 1 : 0; | |
338 | } | |
339 | ||
340 | return (0); | |
341 | ||
342 | #undef f_neg | |
343 | } | |
344 | ||
67b5031e MW |
345 | /* --- @parse_unsigned@, @parse_signed@ --- * |
346 | * | |
347 | * Arguments: @unsigned long *u_out@ or @long *i_out@ = where to put the | |
348 | * result | |
349 | * @const char *p@ = string to parse | |
350 | * @const struct tvec_urange *ur@ or | |
351 | * @const struct tvec_irange *ir@ = range specification, | |
352 | * or null | |
353 | * @struct tvec_state *tv@ = test vector state | |
354 | * | |
355 | * Returns: Zero on success, @-1@ on error. | |
356 | * | |
357 | * Use: Parse and range-check an integer. Unlike @parse_(un)signed_ | |
358 | * integer@, these functions check that there's no cruft | |
359 | * following the final digit, and report errors as they find | |
360 | * them rather than leaving that to the caller. | |
361 | */ | |
362 | ||
363 | static int parse_unsigned(unsigned long *u_out, const char *p, | |
364 | const struct tvec_urange *ur, | |
365 | struct tvec_state *tv) | |
366 | { | |
367 | unsigned long u; | |
368 | const char *q; | |
369 | ||
370 | if (parse_unsigned_integer(&u, &q, p)) | |
371 | return (tvec_error(tv, "invalid unsigned integer `%s'", p)); | |
372 | if (*q) return (tvec_syntax(tv, *q, "end-of-line")); | |
373 | if (check_unsigned_range(u, ur, tv)) return (-1); | |
374 | *u_out = u; return (0); | |
375 | } | |
376 | ||
882a39c1 MW |
377 | static int parse_signed(long *i_out, const char *p, |
378 | const struct tvec_irange *ir, | |
379 | struct tvec_state *tv) | |
b64eb60f | 380 | { |
b64eb60f | 381 | long i; |
3efcfd2d | 382 | const char *q; |
b64eb60f | 383 | |
3efcfd2d MW |
384 | if (parse_signed_integer(&i, &q, p)) |
385 | return (tvec_error(tv, "invalid signed integer `%s'", p)); | |
386 | if (*q) return (tvec_syntax(tv, *q, "end-of-line")); | |
e63124bc | 387 | if (check_signed_range(i, ir, tv)) return (-1); |
3efcfd2d | 388 | *i_out = i; return (0); |
b64eb60f MW |
389 | } |
390 | ||
67b5031e | 391 | /*----- Floating-point utilities ------------------------------------------*/ |
b64eb60f | 392 | |
67b5031e MW |
393 | /* --- @eqish_floating_p@ --- * |
394 | * | |
395 | * Arguments: @double x, y@ = two numbers to compare | |
396 | * @const struct tvec_floatinfo *fi@ = floating-point info | |
397 | * | |
398 | * Returns: Nonzero if the comparand @y@ is sufficiently close to the | |
399 | * reference @x@, or zero if it's definitely different. | |
400 | */ | |
3efcfd2d | 401 | |
67b5031e MW |
402 | static int eqish_floating_p(double x, double y, |
403 | const struct tvec_floatinfo *fi) | |
3efcfd2d | 404 | { |
67b5031e MW |
405 | double t; |
406 | ||
407 | if (NANP(x)) return (NANP(y)); else if (NANP(y)) return (0); | |
408 | if (INFP(x)) return (x == y); else if (INFP(y)) return (0); | |
409 | ||
410 | switch (fi ? fi->f&TVFF_EQMASK : TVFF_EXACT) { | |
411 | case TVFF_EXACT: | |
412 | return (x == y && NEGP(x) == NEGP(y)); | |
413 | case TVFF_ABSDELTA: | |
414 | t = x - y; if (t < 0) t = -t; return (t < fi->delta); | |
415 | case TVFF_RELDELTA: | |
416 | t = 1.0 - y/x; if (t < 0) t = -t; return (t < fi->delta); | |
417 | default: | |
418 | abort(); | |
419 | } | |
b64eb60f MW |
420 | } |
421 | ||
67b5031e MW |
422 | /* --- @format_floating@ --- * |
423 | * | |
424 | * Arguments: @const struct gprintf_ops *gops@ = print operations | |
425 | * @void *go@ = print destination | |
426 | * @double x@ = number to print | |
427 | * | |
428 | * Returns: --- | |
429 | * | |
430 | * Use: Print a floating-point number, accurately. | |
431 | */ | |
3efcfd2d | 432 | |
e63124bc MW |
433 | static void format_floating(const struct gprintf_ops *gops, void *go, |
434 | double x) | |
435 | { | |
436 | int prec; | |
437 | ||
438 | if (NANP(x)) | |
439 | gprintf(gops, go, "#nan"); | |
440 | else if (INFP(x)) | |
441 | gprintf(gops, go, x > 0 ? "#+inf" : "#-inf"); | |
442 | else { | |
443 | /* Ugh. C doesn't provide any function for just printing a | |
444 | * floating-point number /correctly/, i.e., so that you can read the | |
445 | * result back and recover the number you first thought of. There are | |
446 | * complicated algorithms published for doing this, but I really don't | |
447 | * want to get into that here. So we have this. | |
448 | * | |
449 | * The sign doesn't cause significant difficulty so we're going to ignore | |
450 | * it for now. So suppose we're given a number %$x = f b^e$%, in | |
451 | * base-%$b$% format, so %$f b^n$% and %$e$% are integers, with | |
452 | * %$0 \le f < 1$%. We're going to convert it into the nearest integer | |
453 | * of the form %$X = F B^E$%, with similar conditions, only with the | |
454 | * additional requirement that %$X$% is normalized, i.e., that %$X = 0$% | |
455 | * or %$F \ge B^{-N}$%. | |
456 | * | |
457 | * We're rounding to the nearest such %$X$%. If there is to be ambiguity | |
458 | * in the conversion, then some %$x = f b^e$% and the next smallest | |
459 | * representable number %$x' = x + b^{e-n}$% must both map to the same | |
460 | * %$X$%, which means both %$x$% and %$x'$% must be nearer to %$X$% than | |
461 | * any other number representable in the target system. The nest larger | |
462 | * number is %$X' = X + B^{E-N}$%; the next smaller number will normally | |
463 | * be %$W = X - B^{E-N}$%, but if %$F = 1/B$ then the next smaller number | |
464 | * is actually %$X - B^{E-N-1}$%. We ignore this latter possibility in | |
465 | * the pursuit of a conservative estimate (though actually it doesn't | |
466 | * matter). | |
467 | * | |
468 | * If both %$x$% and %$x'$% map to %$X$% then we must have | |
469 | * %$L = X - B^{E-N}/2 \le x$% and %$x + b^{e-n} \le R = X + B^{E-N}/2$%; | |
470 | * so firstly %$f b^e = x \ge L = W + B^{E-N}/2 > W = (F - B^{-N}) B^E$%, | |
471 | * and secondly %$b^{e-n} \le B^{E-N}$%. Since these inequalities are in | |
472 | * opposite senses, we can divide, giving | |
473 | * | |
474 | * %$f b^e/b^{e-n} > (F - B^{-N}) B^E/B^{E-N}$% , | |
475 | * | |
476 | * whence | |
477 | * | |
478 | * %$f b^n > (F - B^{-N}) B^N = F B^N - 1$% . | |
479 | * | |
480 | * Now %$f \le 1 - b^{-n}$%, and %$F \ge B^{-1}$%, so, for this to be | |
481 | * possible, it must be the case that | |
482 | * | |
483 | * %$(1 - b^{-n}) b^n = b^n - 1 > B^{N-1} - 1$% . | |
484 | * | |
485 | * Then rearrange and take logarithms, obtaining | |
486 | * | |
487 | * %$(N - 1) \log B < n \log b$% , | |
488 | * | |
489 | * and so | |
490 | * | |
491 | * %$N < n \log b/\log B + 1$% . | |
492 | * | |
493 | * Recall that this is a necessary condition for a collision to occur; we | |
494 | * are therefore safe whenever | |
495 | * | |
496 | * %$N \ge n \log b/\log B + 1$% ; | |
497 | * | |
498 | * so, taking ceilings, | |
499 | * | |
500 | * %$N \ge \lceil n \log b/\log B \rceil + 1$% . | |
501 | * | |
502 | * So that's why we have this. | |
503 | * | |
504 | * I'm going to assume that @n = DBL_MANT_DIG@ is sufficiently small that | |
505 | * we can calculate this without ending up on the wrong side of an | |
506 | * integer boundary. | |
507 | * | |
508 | * In C11, we have @DBL_DECIMAL_DIG@, which should be the same value only | |
509 | * as a constant. Except that modern compilers are more than clever | |
510 | * enough to work out that this is a constant anyway. | |
511 | * | |
512 | * This is sometimes an overestimate: we'll print out meaningless digits | |
513 | * that don't represent anything we actually know about the number in | |
514 | * question. To fix that, we'd need a complicated algorithm like Steele | |
515 | * and White's Dragon4, Gay's @dtoa@, or Burger and Dybvig's algorithm | |
516 | * (note that Loitsch's Grisu2 is conservative, and Grisu3 hands off to | |
517 | * something else in difficult situations). | |
518 | */ | |
519 | ||
520 | prec = ceil(DBL_MANT_DIG*log(FLT_RADIX)/log(10)) + 1; | |
521 | gprintf(gops, go, "%.*g", prec, x); | |
522 | } | |
523 | } | |
524 | ||
67b5031e MW |
525 | /* --- @parse_floating@ --- * |
526 | * | |
527 | * Arguments: @double *x_out@ = where to put the result | |
814e42ff | 528 | * @const char *q_out@ = where to leave end pointer, or null |
67b5031e MW |
529 | * @const char *p@ = string to parse |
530 | * @const struct tvec_floatinfo *fi@ = floating-point info | |
531 | * @struct tvec_state *tv@ = test vector state | |
532 | * | |
533 | * Returns: Zero on success, @-1@ on error. | |
534 | * | |
535 | * Use: Parse a floating-point number from a string. Reports any | |
814e42ff MW |
536 | * necessary errors. If @q_out@ is not null then trailing |
537 | * material is permitted and a pointer to it is left in | |
538 | * @*q_out@; this will be null if there is no trailing material. | |
67b5031e | 539 | */ |
e63124bc | 540 | |
814e42ff | 541 | static int parse_floating(double *x_out, const char **q_out, const char *p, |
e63124bc MW |
542 | const struct tvec_floatinfo *fi, |
543 | struct tvec_state *tv) | |
544 | { | |
545 | const char *pp; char *q; | |
546 | dstr d = DSTR_INIT; | |
547 | double x; | |
548 | int olderr, rc; | |
549 | ||
814e42ff MW |
550 | if (q_out) *q_out = 0; |
551 | ||
67b5031e | 552 | /* Check for special tokens. */ |
e63124bc MW |
553 | if (STRCMP(p, ==, "#nan")) { |
554 | #ifdef NAN | |
555 | x = NAN; rc = 0; | |
556 | #else | |
557 | tvec_error(tv, "NaN not supported on this system"); | |
558 | rc = -1; goto end; | |
559 | #endif | |
67b5031e MW |
560 | } |
561 | ||
562 | else if (STRCMP(p, ==, "#inf") || | |
563 | STRCMP(p, ==, "#+inf") || STRCMP(p, ==, "+#inf")) { | |
3efcfd2d | 564 | #ifdef INFINITY |
e63124bc MW |
565 | x = INFINITY; rc = 0; |
566 | #else | |
567 | tvec_error(tv, "infinity not supported on this system"); | |
568 | rc = -1; goto end; | |
569 | #endif | |
67b5031e MW |
570 | } |
571 | ||
572 | else if (STRCMP(p, ==, "#-inf") || STRCMP(p, ==, "-#inf")) { | |
3efcfd2d | 573 | #ifdef INFINITY |
e63124bc MW |
574 | x = -INFINITY; rc = 0; |
575 | #else | |
576 | tvec_error(tv, "infinity not supported on this system"); | |
577 | rc = -1; goto end; | |
578 | #endif | |
67b5031e MW |
579 | } |
580 | ||
581 | /* Check that this looks like a number, so we can exclude `strtod' | |
582 | * recognizing its own non-finite number tokens. | |
583 | */ | |
584 | else { | |
e63124bc MW |
585 | pp = p; |
586 | if (*pp == '+' || *pp == '-') pp++; | |
587 | if (*pp == '.') pp++; | |
588 | if (!ISDIGIT(*pp)) { | |
3efcfd2d | 589 | tvec_syntax(tv, *p ? *p : fgetc(tv->fp), "floating-point number"); |
e63124bc MW |
590 | rc = -1; goto end; |
591 | } | |
67b5031e MW |
592 | |
593 | /* Parse the number using the system parser. */ | |
e63124bc MW |
594 | olderr = errno; errno = 0; |
595 | x = strtod(p, &q); | |
814e42ff MW |
596 | if (!*q) /* nothing to do */; |
597 | else if (q_out) *q_out = q; | |
598 | else { tvec_syntax(tv, *q, "end-of-line"); rc = -1; goto end; } | |
e63124bc | 599 | if (errno && (errno != ERANGE || (x > 0 ? -x : x) == HUGE_VAL)) { |
814e42ff MW |
600 | tvec_error(tv, "invalid floating-point number `%.*s': %s", |
601 | (int)(q - p), p, strerror(errno)); | |
e63124bc MW |
602 | rc = -1; goto end; |
603 | } | |
604 | errno = olderr; | |
605 | } | |
606 | ||
67b5031e | 607 | /* Check that the number is acceptable. */ |
e63124bc MW |
608 | if (NANP(x) && fi && !(fi->f&TVFF_NANOK)) { |
609 | tvec_error(tv, "#nan not allowed here"); | |
610 | rc = -1; goto end; | |
611 | } | |
67b5031e | 612 | |
e63124bc MW |
613 | if (fi && ((!(fi->f&TVFF_NOMIN) && x < fi->min) || |
614 | (!(fi->f&TVFF_NOMAX) && x > fi->max))) { | |
615 | dstr_puts(&d, "floating-point number "); | |
616 | format_floating(&dstr_printops, &d, x); | |
617 | dstr_puts(&d, " out of range (must be in "); | |
618 | if (fi->f&TVFF_NOMIN) | |
619 | dstr_puts(&d, "(#-inf"); | |
620 | else | |
621 | { dstr_putc(&d, '['); format_floating(&dstr_printops, &d, fi->min); } | |
622 | dstr_puts(&d, " .. "); | |
623 | if (fi->f&TVFF_NOMAX) | |
624 | dstr_puts(&d, "#+inf)"); | |
625 | else | |
626 | { format_floating(&dstr_printops, &d, fi->max); dstr_putc(&d, ']'); } | |
627 | dstr_putc(&d, ')'); dstr_putz(&d); | |
628 | tvec_error(tv, "%s", d.buf); rc = -1; goto end; | |
629 | } | |
630 | ||
67b5031e MW |
631 | /* All done. */ |
632 | *x_out = x; rc = 0; | |
633 | end: | |
634 | dstr_destroy(&d); | |
635 | return (rc); | |
636 | } | |
637 | ||
638 | /*----- String utilities --------------------------------------------------*/ | |
639 | ||
640 | /* Special character name table. */ | |
641 | static const struct chartab { | |
642 | const char *name; /* character name */ | |
643 | int ch; /* character value */ | |
644 | unsigned f; /* flags: */ | |
645 | #define CTF_PREFER 1u /* preferred name */ | |
646 | #define CTF_SHORT 2u /* short name (compact style) */ | |
647 | } chartab[] = { | |
648 | { "#eof", EOF, CTF_PREFER | CTF_SHORT }, | |
649 | { "#nul", '\0', CTF_PREFER }, | |
650 | { "#bell", '\a', CTF_PREFER }, | |
651 | { "#ding", '\a', 0 }, | |
652 | { "#bel", '\a', CTF_SHORT }, | |
653 | { "#backspace", '\b', CTF_PREFER }, | |
654 | { "#bs", '\b', CTF_SHORT }, | |
655 | { "#escape", '\x1b', CTF_PREFER }, | |
656 | { "#esc", '\x1b', CTF_SHORT }, | |
657 | { "#formfeed", '\f', CTF_PREFER }, | |
658 | { "#ff", '\f', CTF_SHORT }, | |
659 | { "#newline", '\n', CTF_PREFER }, | |
660 | { "#linefeed", '\n', 0 }, | |
661 | { "#lf", '\n', CTF_SHORT }, | |
662 | { "#nl", '\n', 0 }, | |
663 | { "#return", '\r', CTF_PREFER }, | |
664 | { "#carriage-return", '\r', 0 }, | |
665 | { "#cr", '\r', CTF_SHORT }, | |
666 | { "#tab", '\t', CTF_PREFER | CTF_SHORT }, | |
667 | { "#horizontal-tab", '\t', 0 }, | |
668 | { "#ht", '\t', 0 }, | |
669 | { "#vertical-tab", '\v', CTF_PREFER }, | |
670 | { "#vt", '\v', CTF_SHORT }, | |
671 | { "#space", ' ', 0 }, | |
672 | { "#spc", ' ', CTF_SHORT }, | |
673 | { "#delete", '\x7f', CTF_PREFER }, | |
674 | { "#del", '\x7f', CTF_SHORT }, | |
675 | { 0, 0, 0 } | |
676 | }; | |
677 | ||
678 | /* --- @find_charname@ --- * | |
679 | * | |
680 | * Arguments: @int ch@ = character to match | |
681 | * @unsigned f@ = flags (@CTF_...@) to match | |
682 | * | |
683 | * Returns: The name of the character, or null if no match is found. | |
684 | * | |
685 | * Use: Looks up a name for a character. Specifically, it returns | |
686 | * the first entry in the @chartab@ table which matches @ch@ and | |
687 | * which has one of the flags @f@ set. | |
688 | */ | |
689 | ||
690 | static const char *find_charname(int ch, unsigned f) | |
691 | { | |
692 | const struct chartab *ct; | |
693 | ||
694 | for (ct = chartab; ct->name; ct++) | |
695 | if (ct->ch == ch && (ct->f&f)) return (ct->name); | |
696 | return (0); | |
697 | } | |
698 | ||
699 | /* --- @read_charname@ --- * | |
700 | * | |
701 | * Arguments: @int *ch_out@ = where to put the character | |
702 | * @const char *p@ = character name | |
703 | * @unsigned f@ = flags (@TCF_...@) | |
704 | * | |
705 | * Returns: Zero if a match was found, @-1@ if not. | |
706 | * | |
707 | * Use: Looks up a character by name. If @RCF_EOFOK@ is set in @f@, | |
708 | * then the @EOF@ marker can be matched; otherwise it can't. | |
709 | */ | |
710 | ||
711 | #define RCF_EOFOK 1u | |
712 | static int read_charname(int *ch_out, const char *p, unsigned f) | |
713 | { | |
714 | const struct chartab *ct; | |
715 | ||
716 | for (ct = chartab; ct->name; ct++) | |
717 | if (STRCMP(p, ==, ct->name) && ((f&RCF_EOFOK) || ct->ch >= 0)) | |
718 | { *ch_out = ct->ch; return (0); } | |
719 | return (-1); | |
720 | } | |
721 | ||
722 | /* --- @format_charesc@ --- * | |
723 | * | |
724 | * Arguments: @const struct gprintf_ops *gops@ = print operations | |
725 | * @void *go@ = print destination | |
726 | * @int ch@ = character to format | |
727 | * @unsigned f@ = flags (@FCF_...@) | |
728 | * | |
729 | * Returns: --- | |
730 | * | |
731 | * Use: Format a character as an escape sequence, possibly as part of | |
732 | * a larger string. If @FCF_BRACE@ is set in @f@, then put | |
733 | * braces around a `\x...' code, so that it's suitable for use | |
734 | * in a longer string. | |
735 | */ | |
736 | ||
737 | #define FCF_BRACE 1u | |
738 | static void format_charesc(const struct gprintf_ops *gops, void *go, | |
739 | int ch, unsigned f) | |
740 | { | |
741 | switch (ch) { | |
742 | case '\a': gprintf(gops, go, "\\a"); break; | |
743 | case '\b': gprintf(gops, go, "\\b"); break; | |
744 | case '\x1b': gprintf(gops, go, "\\e"); break; | |
745 | case '\f': gprintf(gops, go, "\\f"); break; | |
746 | case '\r': gprintf(gops, go, "\\r"); break; | |
747 | case '\n': gprintf(gops, go, "\\n"); break; | |
748 | case '\t': gprintf(gops, go, "\\t"); break; | |
749 | case '\v': gprintf(gops, go, "\\v"); break; | |
750 | case '\\': gprintf(gops, go, "\\\\"); break; | |
751 | case '\'': gprintf(gops, go, "\\'"); break; | |
752 | case '\0': | |
753 | if (f&FCF_BRACE) gprintf(gops, go, "\\{0}"); | |
754 | else gprintf(gops, go, "\\0"); | |
755 | break; | |
756 | default: | |
757 | if (f&FCF_BRACE) | |
758 | gprintf(gops, go, "\\x{%0*x}", hex_width(UCHAR_MAX), ch); | |
759 | else | |
760 | gprintf(gops, go, "\\x%0*x", hex_width(UCHAR_MAX), ch); | |
761 | break; | |
762 | } | |
763 | } | |
764 | ||
765 | /* --- @format_char@ --- * | |
766 | * | |
767 | * Arguments: @const struct gprintf_ops *gops@ = print operations | |
768 | * @void *go@ = print destination | |
769 | * @int ch@ = character to format | |
770 | * | |
771 | * Returns: --- | |
772 | * | |
773 | * Use: Format a single character. | |
774 | */ | |
775 | ||
776 | static void format_char(const struct gprintf_ops *gops, void *go, int ch) | |
777 | { | |
778 | switch (ch) { | |
779 | case '\\': case '\'': escape: | |
780 | gprintf(gops, go, "'"); | |
781 | format_charesc(gops, go, ch, 0); | |
782 | gprintf(gops, go, "'"); | |
783 | break; | |
784 | default: | |
785 | if (!isprint(ch)) goto escape; | |
786 | gprintf(gops, go, "'%c'", ch); | |
787 | break; | |
788 | } | |
789 | } | |
790 | ||
791 | /* --- @maybe_format_unsigned_char@, @maybe_format_signed_char@ --- * | |
792 | * | |
793 | * Arguments: @const struct gprintf_ops *gops@ = print operations | |
794 | * @void *go@ = print destination | |
795 | * @unsigned long u@ or @long i@ = an integer | |
796 | * | |
797 | * Returns: --- | |
798 | * | |
799 | * Use: Format a (signed or unsigned) integer as a character, if it's | |
800 | * in range, printing something like `= 'q''. It's assumed that | |
801 | * a comment marker has already been output. | |
802 | */ | |
803 | ||
804 | static void maybe_format_unsigned_char | |
805 | (const struct gprintf_ops *gops, void *go, unsigned long u) | |
806 | { | |
807 | const char *p; | |
808 | ||
809 | p = find_charname(u, CTF_PREFER); | |
810 | if (p) gprintf(gops, go, " = %s", p); | |
811 | if (u < UCHAR_MAX) | |
812 | { gprintf(gops, go, " = "); format_char(gops, go, u); } | |
e63124bc MW |
813 | } |
814 | ||
67b5031e MW |
815 | static void maybe_format_signed_char |
816 | (const struct gprintf_ops *gops, void *go, long i) | |
b64eb60f | 817 | { |
67b5031e MW |
818 | const char *p; |
819 | ||
820 | p = find_charname(i, CTF_PREFER); | |
821 | if (p) gprintf(gops, go, " = %s", p); | |
822 | if (0 <= i && i < UCHAR_MAX) | |
823 | { gprintf(gops, go, " = "); format_char(gops, go, i); } | |
b64eb60f MW |
824 | } |
825 | ||
67b5031e MW |
826 | /* --- @read_charesc@ --- * |
827 | * | |
828 | * Arguments: @int *ch_out@ = where to put the result | |
829 | * @struct tvec_state *tv@ = test vector state | |
830 | * | |
831 | * Returns: Zero on success, @-1@ on error. | |
832 | * | |
833 | * Use: Parse and convert an escape sequence from @tv@'s input | |
834 | * stream, assuming that the initial `\' has already been read. | |
835 | * Reports errors as appropriate. | |
836 | */ | |
837 | ||
838 | static int read_charesc(int *ch_out, struct tvec_state *tv) | |
b64eb60f | 839 | { |
b64eb60f MW |
840 | int ch, i, esc; |
841 | unsigned f = 0; | |
842 | #define f_brace 1u | |
843 | ||
e63124bc MW |
844 | ch = getc(tv->fp); |
845 | switch (ch) { | |
67b5031e MW |
846 | |
847 | /* Things we shouldn't find. */ | |
848 | case EOF: case '\n': return (tvec_syntax(tv, ch, "string escape")); | |
849 | ||
850 | /* Single-character escapes. */ | |
e63124bc MW |
851 | case '\'': *ch_out = '\''; break; |
852 | case '\\': *ch_out = '\\'; break; | |
853 | case '"': *ch_out = '"'; break; | |
854 | case 'a': *ch_out = '\a'; break; | |
855 | case 'b': *ch_out = '\b'; break; | |
856 | case 'e': *ch_out = '\x1b'; break; | |
857 | case 'f': *ch_out = '\f'; break; | |
858 | case 'n': *ch_out = '\n'; break; | |
859 | case 'r': *ch_out = '\r'; break; | |
860 | case 't': *ch_out = '\t'; break; | |
861 | case 'v': *ch_out = '\v'; break; | |
862 | ||
67b5031e | 863 | /* Hex escapes, with and without braces. */ |
e63124bc MW |
864 | case 'x': |
865 | ch = getc(tv->fp); | |
866 | if (ch == '{') { f |= f_brace; ch = getc(tv->fp); } | |
867 | else f &= ~f_brace; | |
67b5031e MW |
868 | esc = chtodig(ch); |
869 | if (esc < 0 || esc >= 16) return (tvec_syntax(tv, ch, "hex digit")); | |
e63124bc | 870 | for (;;) { |
67b5031e MW |
871 | ch = getc(tv->fp); i = chtodig(ch); if (i < 0 || i >= 16) break; |
872 | esc = 16*esc + i; | |
e63124bc MW |
873 | if (esc > UCHAR_MAX) |
874 | return (tvec_error(tv, | |
875 | "character code %d out of range", esc)); | |
876 | } | |
877 | if (!(f&f_brace)) ungetc(ch, tv->fp); | |
878 | else if (ch != '}') return (tvec_syntax(tv, ch, "`}'")); | |
879 | *ch_out = esc; | |
880 | break; | |
881 | ||
67b5031e MW |
882 | /* Other things, primarily octal escapes. */ |
883 | case '{': | |
884 | f |= f_brace; ch = getc(tv->fp); | |
885 | /* fall through */ | |
e63124bc MW |
886 | default: |
887 | if ('0' <= ch && ch < '8') { | |
888 | i = 1; esc = ch - '0'; | |
889 | for (;;) { | |
890 | ch = getc(tv->fp); | |
891 | if ('0' > ch || ch >= '8') { ungetc(ch, tv->fp); break; } | |
892 | esc = 8*esc + ch - '0'; | |
893 | i++; if (i >= 3) break; | |
894 | } | |
67b5031e MW |
895 | if (f&f_brace) { |
896 | ch = getc(tv->fp); | |
897 | if (ch != '}') return (tvec_syntax(tv, ch, "`}'")); | |
898 | } | |
e63124bc MW |
899 | if (esc > UCHAR_MAX) |
900 | return (tvec_error(tv, | |
901 | "character code %d out of range", esc)); | |
67b5031e | 902 | *ch_out = esc; break; |
e63124bc MW |
903 | } else |
904 | return (tvec_syntax(tv, ch, "string escape")); | |
905 | } | |
906 | ||
67b5031e | 907 | /* Done. */ |
e63124bc MW |
908 | return (0); |
909 | ||
910 | #undef f_brace | |
911 | } | |
912 | ||
67b5031e MW |
913 | /* --- @read_quoted_string@ --- * |
914 | * | |
915 | * Arguments: @dstr *d@ = string to write to | |
916 | * @int quote@ = initial quote, `'' or `"' | |
917 | * @struct tvec_state *tv@ = test vector state | |
918 | * | |
919 | * Returns: Zero on success, @-1@ on error. | |
920 | * | |
921 | * Use: Read the rest of a quoted string into @d@, reporting errors | |
922 | * as appropriate. | |
923 | * | |
924 | * A single-quoted string is entirely literal. A double-quoted | |
925 | * string may contain C-like escapes. | |
926 | */ | |
927 | ||
e63124bc MW |
928 | static int read_quoted_string(dstr *d, int quote, struct tvec_state *tv) |
929 | { | |
930 | int ch; | |
b64eb60f MW |
931 | |
932 | for (;;) { | |
933 | ch = getc(tv->fp); | |
b64eb60f MW |
934 | switch (ch) { |
935 | case EOF: case '\n': | |
e63124bc | 936 | return (tvec_syntax(tv, ch, "`%c'", quote)); |
b64eb60f MW |
937 | case '\\': |
938 | if (quote == '\'') goto ordinary; | |
e63124bc | 939 | ch = getc(tv->fp); if (ch == '\n') { tv->lno++; break; } |
67b5031e | 940 | ungetc(ch, tv->fp); if (read_charesc(&ch, tv)) return (-1); |
e63124bc | 941 | goto ordinary; |
b64eb60f MW |
942 | default: |
943 | if (ch == quote) goto end; | |
944 | ordinary: | |
945 | DPUTC(d, ch); | |
946 | break; | |
947 | } | |
948 | } | |
949 | ||
950 | end: | |
951 | DPUTZ(d); | |
882a39c1 | 952 | return (0); |
e63124bc | 953 | } |
b64eb60f | 954 | |
67b5031e MW |
955 | /* --- @collect_bare@ --- * |
956 | * | |
957 | * Arguments: @dstr *d@ = string to write to | |
958 | * @struct tvec_state *tv@ = test vector state | |
959 | * | |
960 | * Returns: Zero on success, @-1@ on error. | |
961 | * | |
962 | * Use: Read barewords and the whitespace between them. Stop when we | |
963 | * encounter something which can't start a bareword. | |
964 | */ | |
b64eb60f MW |
965 | |
966 | static int collect_bare(dstr *d, struct tvec_state *tv) | |
967 | { | |
968 | size_t pos = d->len; | |
969 | enum { WORD, SPACE, ESCAPE }; unsigned s = WORD; | |
970 | int ch, rc; | |
971 | ||
972 | for (;;) { | |
973 | ch = getc(tv->fp); | |
974 | switch (ch) { | |
975 | case EOF: | |
882a39c1 MW |
976 | tvec_syntax(tv, ch, "bareword"); |
977 | rc = -1; goto end; | |
b64eb60f MW |
978 | case '\n': |
979 | if (s == ESCAPE) { tv->lno++; goto addch; } | |
980 | if (s == WORD) pos = d->len; | |
882a39c1 | 981 | ungetc(ch, tv->fp); if (tvec_nexttoken(tv)) { rc = -1; goto end; } |
b64eb60f MW |
982 | DPUTC(d, ' '); s = SPACE; |
983 | break; | |
67b5031e | 984 | case '"': case '\'': case '!': case '#': case ')': case '}': case ']': |
882a39c1 | 985 | if (s == SPACE) { ungetc(ch, tv->fp); goto done; } |
b64eb60f MW |
986 | goto addch; |
987 | case '\\': | |
988 | s = ESCAPE; | |
989 | break; | |
990 | default: | |
991 | if (s != ESCAPE && isspace(ch)) { | |
992 | if (s == WORD) pos = d->len; | |
993 | DPUTC(d, ch); s = SPACE; | |
994 | break; | |
995 | } | |
996 | addch: | |
997 | DPUTC(d, ch); s = WORD; | |
998 | } | |
999 | } | |
1000 | ||
1001 | done: | |
1002 | if (s == SPACE) d->len = pos; | |
882a39c1 MW |
1003 | DPUTZ(d); rc = 0; |
1004 | end: | |
1005 | return (rc); | |
b64eb60f MW |
1006 | } |
1007 | ||
67b5031e MW |
1008 | /* --- @set_up_encoding@ --- * |
1009 | * | |
1010 | * Arguments: @const codec_class **ccl_out@ = where to put the class | |
1011 | * @unsigned *f_out@ = where to put the flags | |
1012 | * @unsigned code@ = the coding scheme to use (@TVEC_...@) | |
1013 | * | |
1014 | * Returns: --- | |
1015 | * | |
1016 | * Use: Helper for @read_compound_string@ below. | |
1017 | * | |
1018 | * Return the appropriate codec class and flags for @code@. | |
1019 | * Leaves @*ccl_out@ null if the coding scheme doesn't have a | |
1020 | * backing codec class (e.g., @TVCODE_BARE@). | |
1021 | */ | |
1022 | ||
1023 | enum { TVCODE_BARE, TVCODE_HEX, TVCODE_BASE64, TVCODE_BASE32 }; | |
b64eb60f MW |
1024 | static void set_up_encoding(const codec_class **ccl_out, unsigned *f_out, |
1025 | unsigned code) | |
1026 | { | |
1027 | switch (code) { | |
1028 | case TVCODE_BARE: | |
1029 | *ccl_out = 0; *f_out = 0; | |
1030 | break; | |
1031 | case TVCODE_HEX: | |
1032 | *ccl_out = &hex_class; *f_out = CDCF_IGNCASE; | |
1033 | break; | |
1034 | case TVCODE_BASE32: | |
1035 | *ccl_out = &base32_class; *f_out = CDCF_IGNCASE | CDCF_IGNEQPAD; | |
1036 | break; | |
1037 | case TVCODE_BASE64: | |
1038 | *ccl_out = &base64_class; *f_out = CDCF_IGNEQPAD; | |
1039 | break; | |
1040 | default: | |
1041 | abort(); | |
1042 | } | |
1043 | } | |
1044 | ||
67b5031e MW |
1045 | /* --- @flush_codec@ --- * |
1046 | * | |
1047 | * Arguments: @codec *cdc@ = a codec, or null | |
1048 | * @dstr *d@ = output string | |
1049 | * @struct tvec_state *tv@ = test vector state | |
1050 | * | |
1051 | * Returns: Zero on success, @-1@ on error. | |
1052 | * | |
1053 | * Use: Helper for @read_compound_string@ below. | |
1054 | * | |
1055 | * Flush out any final buffered material from @cdc@, and check | |
1056 | * that it's in a good state. Frees the codec on success. Does | |
1057 | * nothing if @cdc@ is null. | |
1058 | */ | |
1059 | ||
1060 | static int flush_codec(codec *cdc, dstr *d, struct tvec_state *tv) | |
1061 | { | |
1062 | int err; | |
1063 | ||
1064 | if (cdc) { | |
1065 | err = cdc->ops->code(cdc, 0, 0, d); | |
1066 | if (err) | |
1067 | return (tvec_error(tv, "invalid %s sequence end: %s", | |
1068 | cdc->ops->c->name, codec_strerror(err))); | |
1069 | cdc->ops->destroy(cdc); | |
1070 | } | |
1071 | return (0); | |
1072 | } | |
1073 | ||
1074 | /* --- @read_compound_string@ --- * | |
1075 | * | |
1076 | * Arguments: @void **p_inout@ = address of output buffer pointer | |
1077 | * @size_t *sz_inout@ = address of buffer size | |
1078 | * @unsigned code@ = initial interpretation of barewords | |
1079 | * @unsigned f@ = other flags (@RCSF_...@) | |
1080 | * @struct tvec_state *tv@ = test vector state | |
1081 | * | |
1082 | * Returns: Zero on success, @-1@ on error. | |
1083 | * | |
1084 | * Use: Parse a compound string, i.e., a sequence of stringish pieces | |
1085 | * which might be quoted strings, character names, or barewords | |
1086 | * to be decoded accoding to @code@, interspersed with | |
1087 | * additional directives. | |
1088 | * | |
1089 | * If the initial buffer pointer is non-null and sufficiently | |
1090 | * large, then it will be reused; otherwise, it is freed and a | |
1091 | * fresh, sufficiently large buffer is allocated and returned. | |
1092 | */ | |
1093 | ||
1094 | #define RCSF_NESTED 1u | |
882a39c1 | 1095 | static int read_compound_string(void **p_inout, size_t *sz_inout, |
67b5031e MW |
1096 | unsigned code, unsigned f, |
1097 | struct tvec_state *tv) | |
b64eb60f | 1098 | { |
67b5031e | 1099 | const codec_class *ccl; unsigned cdf; |
b64eb60f MW |
1100 | codec *cdc; |
1101 | dstr d = DSTR_INIT, w = DSTR_INIT; | |
1102 | char *p; | |
67b5031e MW |
1103 | const char *q; |
1104 | void *pp = 0; size_t sz; | |
1105 | unsigned long n; | |
882a39c1 | 1106 | int ch, err, rc; |
b64eb60f | 1107 | |
67b5031e MW |
1108 | set_up_encoding(&ccl, &cdf, code); cdc = 0; |
1109 | ||
1110 | if (tvec_nexttoken(tv)) return (tvec_syntax(tv, fgetc(tv->fp), "string")); | |
b64eb60f MW |
1111 | do { |
1112 | ch = getc(tv->fp); | |
67b5031e MW |
1113 | switch (ch) { |
1114 | ||
1115 | case ')': case ']': case '}': | |
1116 | /* Close brackets. Leave these for recursive caller if there is one, | |
1117 | * or just complain. | |
1118 | */ | |
1119 | ||
1120 | if (!(f&RCSF_NESTED)) | |
1121 | { rc = tvec_syntax(tv, ch, "string"); goto end; } | |
1122 | ungetc(ch, tv->fp); goto done; | |
1123 | ||
1124 | case '"': case '\'': | |
1125 | /* Quotes. Read a quoted string. */ | |
1126 | ||
1127 | if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; } | |
1128 | cdc = 0; | |
1129 | if (read_quoted_string(&d, ch, tv)) { rc = -1; goto end; } | |
1130 | break; | |
1131 | ||
1132 | case '#': | |
1133 | /* A named character. */ | |
1134 | ||
1135 | ungetc(ch, tv->fp); | |
1136 | if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; } | |
1137 | cdc = 0; | |
1138 | DRESET(&w); tvec_readword(tv, &w, ";", "character name"); | |
1139 | if (read_charname(&ch, w.buf, RCF_EOFOK)) { | |
1140 | rc = tvec_error(tv, "unknown character name `%s'", d.buf); | |
1141 | goto end; | |
1142 | } | |
1143 | DPUTC(&d, ch); break; | |
1144 | ||
1145 | case '!': | |
1146 | /* A magic keyword. */ | |
1147 | ||
1148 | if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; } | |
1149 | cdc = 0; | |
b64eb60f | 1150 | ungetc(ch, tv->fp); |
67b5031e MW |
1151 | DRESET(&w); tvec_readword(tv, &w, ";", "`!'-keyword"); |
1152 | ||
1153 | /* Change bareword coding system. */ | |
1154 | if (STRCMP(w.buf, ==, "!bare")) | |
1155 | { code = TVCODE_BARE; set_up_encoding(&ccl, &cdf, code); } | |
1156 | else if (STRCMP(w.buf, ==, "!hex")) | |
1157 | { code = TVCODE_HEX; set_up_encoding(&ccl, &cdf, code); } | |
1158 | else if (STRCMP(w.buf, ==, "!base32")) | |
1159 | { code = TVCODE_BASE32; set_up_encoding(&ccl, &cdf, code); } | |
1160 | else if (STRCMP(w.buf, ==, "!base64")) | |
1161 | { code = TVCODE_BASE64; set_up_encoding(&ccl, &cdf, code); } | |
1162 | ||
1163 | /* Repeated substrings. */ | |
1164 | else if (STRCMP(w.buf, ==, "!repeat")) { | |
1165 | if (tvec_nexttoken(tv)) { | |
1166 | rc = tvec_syntax(tv, fgetc(tv->fp), "repeat count"); | |
1167 | goto end; | |
1168 | } | |
1169 | DRESET(&w); | |
1170 | if (tvec_readword(tv, &w, ";{", "repeat count")) | |
1171 | { rc = -1; goto end; } | |
1172 | if (parse_unsigned_integer(&n, &q, w.buf)) { | |
1173 | rc = tvec_error(tv, "invalid repeat count `%s'", w.buf); | |
1174 | goto end; | |
1175 | } | |
1176 | if (*q) { rc = tvec_syntax(tv, *q, "`{'"); goto end; } | |
1177 | if (tvec_nexttoken(tv)) | |
1178 | { rc = tvec_syntax(tv, fgetc(tv->fp), "`{'"); goto end; } | |
1179 | ch = getc(tv->fp); if (ch != '{') | |
1180 | { rc = tvec_syntax(tv, ch, "`{'"); goto end; } | |
1181 | sz = 0; | |
1182 | if (read_compound_string(&pp, &sz, code, f | RCSF_NESTED, tv)) | |
1183 | { rc = -1; goto end; } | |
1184 | ch = getc(tv->fp); if (ch != '}') | |
1185 | { rc = tvec_syntax(tv, ch, "`}'"); goto end; } | |
1186 | if (sz) { | |
1187 | if (n > (size_t)-1/sz) | |
1188 | { rc = tvec_error(tv, "repeat size out of range"); goto end; } | |
1189 | dstr_ensure(&d, n*sz); | |
1190 | if (sz == 1) | |
1191 | { memset(d.buf + d.len, *(unsigned char *)pp, n); d.len += n; } | |
1192 | else | |
1193 | for (; n--; d.len += sz) memcpy(d.buf + d.len, pp, sz); | |
1194 | } | |
1195 | xfree(pp); pp = 0; | |
1196 | } | |
1197 | ||
1198 | /* Anything else is an error. */ | |
1199 | else { | |
1200 | tvec_error(tv, "unknown string keyword `%s'", w.buf); | |
1201 | rc = -1; goto end; | |
1202 | } | |
b64eb60f | 1203 | break; |
67b5031e | 1204 | |
b64eb60f | 1205 | default: |
67b5031e MW |
1206 | /* A bareword. Process it according to the current coding system. */ |
1207 | ||
1208 | switch (code) { | |
1209 | case TVCODE_BARE: | |
1210 | ungetc(ch, tv->fp); | |
1211 | if (collect_bare(&d, tv)) goto done; | |
1212 | break; | |
1213 | default: | |
1214 | assert(ccl); | |
1215 | ungetc(ch, tv->fp); DRESET(&w); | |
1216 | if (tvec_readword(tv, &w, ";", "%s-encoded fragment", ccl->name)) | |
1217 | { rc = -1; goto end; } | |
1218 | if (!cdc) cdc = ccl->decoder(cdf); | |
1219 | err = cdc->ops->code(cdc, w.buf, w.len, &d); | |
1220 | if (err) { | |
1221 | tvec_error(tv, "invalid %s fragment `%s': %s", | |
1222 | ccl->name, w.buf, codec_strerror(err)); | |
1223 | rc = -1; goto end; | |
1224 | } | |
1225 | break; | |
1226 | } | |
1227 | break; | |
b64eb60f MW |
1228 | } |
1229 | } while (!tvec_nexttoken(tv)); | |
1230 | ||
1231 | done: | |
67b5031e MW |
1232 | /* Wrap things up. */ |
1233 | if (cdc && flush_codec(cdc, &d, tv)) { rc = -1; goto end; } | |
1234 | cdc = 0; | |
b64eb60f MW |
1235 | if (*sz_inout <= d.len) |
1236 | { xfree(*p_inout); *p_inout = xmalloc(d.len + 1); } | |
1237 | p = *p_inout; memcpy(p, d.buf, d.len); p[d.len] = 0; *sz_inout = d.len; | |
882a39c1 | 1238 | rc = 0; |
67b5031e | 1239 | |
882a39c1 | 1240 | end: |
67b5031e MW |
1241 | /* Clean up any debris. */ |
1242 | if (cdc) cdc->ops->destroy(cdc); | |
1243 | if (pp) xfree(pp); | |
b64eb60f | 1244 | dstr_destroy(&d); dstr_destroy(&w); |
882a39c1 | 1245 | return (rc); |
b64eb60f MW |
1246 | } |
1247 | ||
b64eb60f MW |
1248 | /*----- Signed and unsigned integer types ---------------------------------*/ |
1249 | ||
c81c35df MW |
1250 | /* --- @init_int@, @init_uint@ --- * |
1251 | * | |
1252 | * Arguments: @union tvec_regval *rv@ = register value | |
1253 | * @const struct tvec_regdef *rd@ = register definition | |
1254 | * | |
1255 | * Returns: --- | |
1256 | * | |
1257 | * Use: Initialize a register value. | |
1258 | * | |
1259 | * Integer values are initialized to zero. | |
1260 | */ | |
1261 | ||
b64eb60f MW |
1262 | static void init_int(union tvec_regval *rv, const struct tvec_regdef *rd) |
1263 | { rv->i = 0; } | |
1264 | ||
1265 | static void init_uint(union tvec_regval *rv, const struct tvec_regdef *rd) | |
1266 | { rv->u = 0; } | |
1267 | ||
c81c35df MW |
1268 | /* --- @eq_int@, @eq_uint@ --- * |
1269 | * | |
1270 | * Arguments: @const union tvec_regval *rv0, *rv1@ = register values | |
1271 | * @const struct tvec_regdef *rd@ = register definition | |
1272 | * | |
1273 | * Returns: Nonzero if the values are equal, zero if unequal | |
1274 | * | |
1275 | * Use: Compare register values for equality. | |
1276 | */ | |
1277 | ||
b64eb60f MW |
1278 | static int eq_int(const union tvec_regval *rv0, const union tvec_regval *rv1, |
1279 | const struct tvec_regdef *rd) | |
1280 | { return (rv0->i == rv1->i); } | |
1281 | ||
1282 | static int eq_uint(const union tvec_regval *rv0, | |
1283 | const union tvec_regval *rv1, | |
1284 | const struct tvec_regdef *rd) | |
1285 | { return (rv0->u == rv1->u); } | |
1286 | ||
c81c35df MW |
1287 | /* --- @tobuf_int@, @tobuf_uint@ --- * |
1288 | * | |
1289 | * Arguments: @buf *b@ = buffer | |
1290 | * @const union tvec_regval *rv@ = register value | |
1291 | * @const struct tvec_regdef *rd@ = register definition | |
1292 | * | |
1293 | * Returns: Zero on success, %$-1$% on failure. | |
1294 | * | |
1295 | * Use: Serialize a register value to a buffer. | |
1296 | * | |
1297 | * Integer values are serialized as little-endian 64-bit signed | |
1298 | * or unsigned integers. | |
1299 | */ | |
1300 | ||
b64eb60f MW |
1301 | static int tobuf_int(buf *b, const union tvec_regval *rv, |
1302 | const struct tvec_regdef *rd) | |
1303 | { return (signed_to_buf(b, rv->i)); } | |
1304 | ||
1305 | static int tobuf_uint(buf *b, const union tvec_regval *rv, | |
1306 | const struct tvec_regdef *rd) | |
1307 | { return (unsigned_to_buf(b, rv->u)); } | |
1308 | ||
c81c35df MW |
1309 | /* --- @frombuf_int@, @frombuf_uint@ --- * |
1310 | * | |
1311 | * Arguments: @buf *b@ = buffer | |
1312 | * @union tvec_regval *rv@ = register value | |
1313 | * @const struct tvec_regdef *rd@ = register definition | |
1314 | * | |
1315 | * Returns: Zero on success, %$-1$% on failure. | |
1316 | * | |
1317 | * Use: Deserialize a register value from a buffer. | |
1318 | * | |
1319 | * Integer values are serialized as 64-bit signed or unsigned | |
1320 | * integers. | |
1321 | */ | |
1322 | ||
b64eb60f MW |
1323 | static int frombuf_int(buf *b, union tvec_regval *rv, |
1324 | const struct tvec_regdef *rd) | |
882a39c1 | 1325 | { return (signed_from_buf(b, &rv->i)); } |
b64eb60f MW |
1326 | |
1327 | static int frombuf_uint(buf *b, union tvec_regval *rv, | |
1328 | const struct tvec_regdef *rd) | |
1329 | { return (unsigned_from_buf(b, &rv->u)); } | |
1330 | ||
c81c35df MW |
1331 | /* --- @parse_int@, @parse_uint@ --- * |
1332 | * | |
1333 | * Arguments: @union tvec_regval *rv@ = register value | |
1334 | * @const struct tvec_regdef *rd@ = register definition | |
1335 | * @struct tvec_state *tv@ = test-vector state | |
1336 | * | |
1337 | * Returns: Zero on success, %$-1$% on error. | |
1338 | * | |
1339 | * Use: Parse a register value from an input file. | |
1340 | * | |
1341 | * Integers may be input in decimal, hex, binary, or octal, | |
1342 | * following approximately usual conventions. | |
1343 | * | |
1344 | * * Signed integers may be preceded with a `+' or `-' sign. | |
1345 | * | |
1346 | * * Decimal integers are just a sequence of decimal digits | |
1347 | * `0' ... `9'. | |
1348 | * | |
1349 | * * Octal integers are a sequence of digits `0' ... `7', | |
1350 | * preceded by `0o' or `0O'. | |
1351 | * | |
1352 | * * Hexadecimal integers are a sequence of digits `0' | |
1353 | * ... `9', `a' ... `f', or `A' ... `F', preceded by `0x' or | |
1354 | * `0X'. | |
1355 | * | |
1356 | * * Radix-B integers are a sequence of digits `0' ... `9', | |
1357 | * `a' ... `f', or `A' ... `F', each with value less than B, | |
1358 | * preceded by `Br' or `BR', where 0 < B < 36 is expressed | |
1359 | * in decimal without any leading `0' or internal | |
1360 | * underscores `_'. | |
1361 | * | |
1362 | * * A digit sequence may contain internal underscore `_' | |
1363 | * separators, but not before or after all of the digits; | |
1364 | * and two consecutive `_' characters are not permitted. | |
1365 | */ | |
1366 | ||
882a39c1 MW |
1367 | static int parse_int(union tvec_regval *rv, const struct tvec_regdef *rd, |
1368 | struct tvec_state *tv) | |
b64eb60f MW |
1369 | { |
1370 | dstr d = DSTR_INIT; | |
882a39c1 | 1371 | int rc; |
b64eb60f | 1372 | |
c81c35df MW |
1373 | if (tvec_readword(tv, &d, ";", "signed integer")) { rc = -1; goto end; } |
1374 | if (parse_signed(&rv->i, d.buf, rd->arg.p, tv)) { rc = -1; goto end; } | |
1375 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } | |
882a39c1 MW |
1376 | rc = 0; |
1377 | end: | |
b64eb60f | 1378 | dstr_destroy(&d); |
882a39c1 | 1379 | return (rc); |
b64eb60f MW |
1380 | } |
1381 | ||
882a39c1 MW |
1382 | static int parse_uint(union tvec_regval *rv, const struct tvec_regdef *rd, |
1383 | struct tvec_state *tv) | |
b64eb60f MW |
1384 | { |
1385 | dstr d = DSTR_INIT; | |
882a39c1 | 1386 | int rc; |
b64eb60f | 1387 | |
c81c35df MW |
1388 | if (tvec_readword(tv, &d, ";", "unsigned integer")) { rc = -1; goto end; } |
1389 | if (parse_unsigned(&rv->u, d.buf, rd->arg.p, tv)) { rc = -1; goto end; } | |
1390 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } | |
882a39c1 MW |
1391 | rc = 0; |
1392 | end: | |
b64eb60f | 1393 | dstr_destroy(&d); |
882a39c1 | 1394 | return (rc); |
b64eb60f MW |
1395 | } |
1396 | ||
c81c35df MW |
1397 | /* --- @dump_int@, @dump_uint@ --- * |
1398 | * | |
1399 | * Arguments: @const union tvec_regval *rv@ = register value | |
1400 | * @const struct tvec_regdef *rd@ = register definition | |
1401 | * @unsigned style@ = output style (@TVSF_...@) | |
1402 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
1403 | * | |
1404 | * Returns: --- | |
1405 | * | |
1406 | * Use: Dump a register value to the format output. | |
1407 | * | |
1408 | * Integer values are dumped in decimal and, unless compact | |
1409 | * output is requested, hex, and maybe a character, as a | |
1410 | * comment. | |
1411 | */ | |
1412 | ||
b64eb60f MW |
1413 | static void dump_int(const union tvec_regval *rv, |
1414 | const struct tvec_regdef *rd, | |
e63124bc MW |
1415 | unsigned style, |
1416 | const struct gprintf_ops *gops, void *go) | |
b64eb60f | 1417 | { |
b64eb60f | 1418 | |
e63124bc | 1419 | gprintf(gops, go, "%ld", rv->i); |
b64eb60f | 1420 | if (!(style&TVSF_COMPACT)) { |
3efcfd2d MW |
1421 | gprintf(gops, go, " ; = "); |
1422 | format_signed_hex(gops, go, rv->i); | |
1423 | maybe_format_signed_char(gops, go, rv->i); | |
b64eb60f MW |
1424 | } |
1425 | } | |
1426 | ||
1427 | static void dump_uint(const union tvec_regval *rv, | |
1428 | const struct tvec_regdef *rd, | |
e63124bc MW |
1429 | unsigned style, |
1430 | const struct gprintf_ops *gops, void *go) | |
b64eb60f | 1431 | { |
e63124bc MW |
1432 | gprintf(gops, go, "%lu", rv->u); |
1433 | if (!(style&TVSF_COMPACT)) { | |
3efcfd2d MW |
1434 | gprintf(gops, go, " ; = "); |
1435 | format_unsigned_hex(gops, go, rv->u); | |
1436 | maybe_format_unsigned_char(gops, go, rv->u); | |
e63124bc | 1437 | } |
b64eb60f MW |
1438 | } |
1439 | ||
c81c35df | 1440 | /* Integer type definitions. */ |
b64eb60f | 1441 | const struct tvec_regty tvty_int = { |
3efcfd2d | 1442 | init_int, trivial_release, eq_int, |
b64eb60f MW |
1443 | tobuf_int, frombuf_int, |
1444 | parse_int, dump_int | |
1445 | }; | |
c81c35df MW |
1446 | const struct tvec_regty tvty_uint = { |
1447 | init_uint, trivial_release, eq_uint, | |
1448 | tobuf_uint, frombuf_uint, | |
1449 | parse_uint, dump_uint | |
1450 | }; | |
b64eb60f | 1451 | |
c81c35df | 1452 | /* Predefined integer ranges. */ |
b64eb60f MW |
1453 | const struct tvec_irange |
1454 | tvrange_schar = { SCHAR_MIN, SCHAR_MAX }, | |
1455 | tvrange_short = { SHRT_MIN, SHRT_MAX }, | |
1456 | tvrange_int = { INT_MIN, INT_MAX }, | |
1457 | tvrange_long = { LONG_MIN, LONG_MAX }, | |
1458 | tvrange_sbyte = { -128, 127 }, | |
1459 | tvrange_i16 = { -32768, +32767 }, | |
1460 | tvrange_i32 = { -2147483648, 2147483647 }; | |
b64eb60f MW |
1461 | const struct tvec_urange |
1462 | tvrange_uchar = { 0, UCHAR_MAX }, | |
1463 | tvrange_ushort = { 0, USHRT_MAX }, | |
1464 | tvrange_uint = { 0, UINT_MAX }, | |
1465 | tvrange_ulong = { 0, ULONG_MAX }, | |
1466 | tvrange_size = { 0, (size_t)-1 }, | |
1467 | tvrange_byte = { 0, 255 }, | |
1468 | tvrange_u16 = { 0, 65535 }, | |
1469 | tvrange_u32 = { 0, 4294967296 }; | |
1470 | ||
67b5031e MW |
1471 | /* --- @tvec_claimeq_int@ --- * |
1472 | * | |
1473 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
1474 | * @long i0, i1@ = two signed integers | |
1475 | * @const char *file@, @unsigned @lno@ = calling file and line | |
1476 | * @const char *expr@ = the expression to quote on failure | |
1477 | * | |
1478 | * Returns: Nonzero if @i0@ and @i1@ are equal, otherwise zero. | |
1479 | * | |
1480 | * Use: Check that values of @i0@ and @i1@ are equal. As for | |
1481 | * @tvec_claim@ above, a test case is automatically begun and | |
1482 | * ended if none is already underway. If the values are | |
1483 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
1484 | * mismatched values are dumped: @i0@ is printed as the output | |
1485 | * value and @i1@ is printed as the input reference. | |
1486 | */ | |
1487 | ||
b64eb60f MW |
1488 | int tvec_claimeq_int(struct tvec_state *tv, long i0, long i1, |
1489 | const char *file, unsigned lno, const char *expr) | |
1490 | { | |
3efcfd2d | 1491 | tv->out[0].v.i = i0; tv->in[0].v.i = i1; |
b64eb60f MW |
1492 | return (tvec_claimeq(tv, &tvty_int, 0, file, lno, expr)); |
1493 | } | |
1494 | ||
67b5031e MW |
1495 | /* --- @tvec_claimeq_uint@ --- * |
1496 | * | |
1497 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
1498 | * @unsigned long u0, u1@ = two unsigned integers | |
1499 | * @const char *file@, @unsigned @lno@ = calling file and line | |
1500 | * @const char *expr@ = the expression to quote on failure | |
1501 | * | |
1502 | * Returns: Nonzero if @u0@ and @u1@ are equal, otherwise zero. | |
1503 | * | |
1504 | * Use: Check that values of @u0@ and @u1@ are equal. As for | |
1505 | * @tvec_claim@ above, a test case is automatically begun and | |
1506 | * ended if none is already underway. If the values are | |
1507 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
1508 | * mismatched values are dumped: @u0@ is printed as the output | |
1509 | * value and @u1@ is printed as the input reference. | |
1510 | */ | |
1511 | ||
b64eb60f MW |
1512 | int tvec_claimeq_uint(struct tvec_state *tv, |
1513 | unsigned long u0, unsigned long u1, | |
1514 | const char *file, unsigned lno, const char *expr) | |
1515 | { | |
3efcfd2d | 1516 | tv->out[0].v.u = u0; tv->in[0].v.u = u1; |
b64eb60f MW |
1517 | return (tvec_claimeq(tv, &tvty_uint, 0, file, lno, expr)); |
1518 | } | |
1519 | ||
3efcfd2d | 1520 | /*----- Floating-point type -----------------------------------------------*/ |
e63124bc | 1521 | |
814e42ff | 1522 | /* --- @int_float@ --- * |
c81c35df MW |
1523 | * |
1524 | * Arguments: @union tvec_regval *rv@ = register value | |
1525 | * @const struct tvec_regdef *rd@ = register definition | |
1526 | * | |
1527 | * Returns: --- | |
1528 | * | |
1529 | * Use: Initialize a register value. | |
1530 | * | |
1531 | * Floating-point values are initialized to zero. | |
1532 | */ | |
1533 | ||
e63124bc MW |
1534 | static void init_float(union tvec_regval *rv, const struct tvec_regdef *rd) |
1535 | { rv->f = 0.0; } | |
e63124bc | 1536 | |
c81c35df MW |
1537 | /* --- @eq_float@ --- * |
1538 | * | |
1539 | * Arguments: @const union tvec_regval *rv0, *rv1@ = register values | |
1540 | * @const struct tvec_regdef *rd@ = register definition | |
1541 | * | |
1542 | * Returns: Nonzero if the values are equal, zero if unequal | |
1543 | * | |
1544 | * Use: Compare register values for equality. | |
1545 | * | |
1546 | * Floating-point values may be considered equal if their | |
1547 | * absolute or relative difference is sufficiently small, as | |
1548 | * described in the register definition. | |
1549 | */ | |
1550 | ||
e63124bc MW |
1551 | static int eq_float(const union tvec_regval *rv0, |
1552 | const union tvec_regval *rv1, | |
1553 | const struct tvec_regdef *rd) | |
1554 | { return (eqish_floating_p(rv0->f, rv1->f, rd->arg.p)); } | |
1555 | ||
c81c35df MW |
1556 | /* --- @tobuf_float@ --- * |
1557 | * | |
1558 | * Arguments: @buf *b@ = buffer | |
1559 | * @const union tvec_regval *rv@ = register value | |
1560 | * @const struct tvec_regdef *rd@ = register definition | |
1561 | * | |
1562 | * Returns: Zero on success, %$-1$% on failure. | |
1563 | * | |
1564 | * Use: Serialize a register value to a buffer. | |
1565 | * | |
1566 | * Floating-point values are serialized as little-endian | |
1567 | * IEEE 754 Binary64. | |
1568 | */ | |
1569 | ||
e63124bc MW |
1570 | static int tobuf_float(buf *b, const union tvec_regval *rv, |
1571 | const struct tvec_regdef *rd) | |
1572 | { return (buf_putf64l(b, rv->f)); } | |
c81c35df MW |
1573 | |
1574 | /* --- @frombuf_float@ --- * | |
1575 | * | |
1576 | * Arguments: @buf *b@ = buffer | |
1577 | * @union tvec_regval *rv@ = register value | |
1578 | * @const struct tvec_regdef *rd@ = register definition | |
1579 | * | |
1580 | * Returns: Zero on success, %$-1$% on failure. | |
1581 | * | |
1582 | * Use: Deserialize a register value from a buffer. | |
1583 | * | |
1584 | * Floating-point values are serialized as little-endian | |
1585 | * IEEE 754 Binary64. | |
1586 | */ | |
1587 | ||
e63124bc MW |
1588 | static int frombuf_float(buf *b, union tvec_regval *rv, |
1589 | const struct tvec_regdef *rd) | |
1590 | { return (buf_getf64l(b, &rv->f)); } | |
1591 | ||
c81c35df MW |
1592 | /* --- @parse_float@ --- * |
1593 | * | |
1594 | * Arguments: @union tvec_regval *rv@ = register value | |
1595 | * @const struct tvec_regdef *rd@ = register definition | |
1596 | * @struct tvec_state *tv@ = test-vector state | |
1597 | * | |
1598 | * Returns: Zero on success, %$-1$% on error. | |
1599 | * | |
1600 | * Use: Parse a register value from an input file. | |
1601 | * | |
1602 | * Floating-point values are either NaN (%|#nan|%, if supported | |
1603 | * by the platform); positive or negative infinity (%|#inf|%, | |
1604 | * %|+#inf|%, or %|#+inf|% (preferring the last), and %|-#inf|% | |
1605 | * or %|#-inf|% (preferring the latter), if supported by the | |
1606 | * platform); or a number in strtod(3) syntax. | |
1607 | */ | |
1608 | ||
e63124bc MW |
1609 | static int parse_float(union tvec_regval *rv, const struct tvec_regdef *rd, |
1610 | struct tvec_state *tv) | |
1611 | { | |
1612 | dstr d = DSTR_INIT; | |
1613 | int rc; | |
1614 | ||
1615 | if (tvec_readword(tv, &d, ";", "floating-point number")) | |
1616 | { rc = -1; goto end; } | |
814e42ff MW |
1617 | if (parse_floating(&rv->f, 0, d.buf, rd->arg.p, tv)) |
1618 | { rc = -1; goto end; } | |
c81c35df | 1619 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } |
e63124bc MW |
1620 | rc = 0; |
1621 | end: | |
1622 | dstr_destroy(&d); | |
1623 | return (rc); | |
1624 | } | |
1625 | ||
c81c35df MW |
1626 | /* --- @dump_float@ --- * |
1627 | * | |
1628 | * Arguments: @const union tvec_regval *rv@ = register value | |
1629 | * @const struct tvec_regdef *rd@ = register definition | |
1630 | * @unsigned style@ = output style (@TVSF_...@) | |
1631 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
1632 | * | |
1633 | * Returns: --- | |
1634 | * | |
1635 | * Use: Dump a register value to the format output. | |
1636 | * | |
1637 | * Floating-point values are dumped in decimal or as a special | |
1638 | * token beginning with `%|#|%'. Some effort is taken to ensure | |
1639 | * that the output is sufficient to uniquely identify the | |
1640 | * original value, but, honestly, C makes this really hard. | |
1641 | */ | |
1642 | ||
e63124bc MW |
1643 | static void dump_float(const union tvec_regval *rv, |
1644 | const struct tvec_regdef *rd, | |
1645 | unsigned style, | |
1646 | const struct gprintf_ops *gops, void *go) | |
1647 | { format_floating(gops, go, rv->f); } | |
1648 | ||
c81c35df | 1649 | /* Floating-point type definition. */ |
e63124bc | 1650 | const struct tvec_regty tvty_float = { |
3efcfd2d | 1651 | init_float, trivial_release, eq_float, |
e63124bc MW |
1652 | tobuf_float, frombuf_float, |
1653 | parse_float, dump_float | |
1654 | }; | |
1655 | ||
c81c35df MW |
1656 | /* Predefined floating-point ranges. */ |
1657 | const struct tvec_floatinfo | |
1658 | tvflt_finite = { TVFF_EXACT, -DBL_MAX, DBL_MAX, 0.0 }, | |
1659 | tvflt_nonneg = { TVFF_EXACT, 0, DBL_MAX, 0.0 }; | |
1660 | ||
67b5031e MW |
1661 | /* --- @tvec_claimeqish_float@ --- * |
1662 | * | |
1663 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
1664 | * @double f0, f1@ = two floating-point numbers | |
1665 | * @unsigned f@ = flags (@TVFF_...@) | |
1666 | * @double delta@ = maximum tolerable difference | |
1667 | * @const char *file@, @unsigned @lno@ = calling file and line | |
1668 | * @const char *expr@ = the expression to quote on failure | |
1669 | * | |
1670 | * Returns: Nonzero if @f0@ and @u1@ are sufficiently close, otherwise | |
1671 | * zero. | |
1672 | * | |
1673 | * Use: Check that values of @f0@ and @f1@ are sufficiently close. | |
1674 | * As for @tvec_claim@ above, a test case is automatically begun | |
1675 | * and ended if none is already underway. If the values are | |
1676 | * too far apart, then @tvec_fail@ is called, quoting @expr@, | |
1677 | * and the mismatched values are dumped: @f0@ is printed as the | |
1678 | * output value and @f1@ is printed as the input reference. | |
1679 | * | |
1680 | * The details for the comparison are as follows. | |
1681 | * | |
1682 | * * A NaN value matches any other NaN, and nothing else. | |
1683 | * | |
1684 | * * An infinity matches another infinity of the same sign, | |
1685 | * and nothing else. | |
1686 | * | |
1687 | * * If @f&TVFF_EQMASK@ is @TVFF_EXACT@, then any | |
1688 | * representable number matches only itself: in particular, | |
1689 | * positive and negative zero are considered distinct. | |
1690 | * (This allows tests to check that they land on the correct | |
1691 | * side of branch cuts, for example.) | |
1692 | * | |
1693 | * * If @f&TVFF_EQMASK@ is @TVFF_ABSDELTA@, then %$x$% matches | |
1694 | * %$y$% when %$|x - y| < \delta$%. | |
1695 | * | |
1696 | * * If @f&TVFF_EQMASK@ is @TVFF_RELDELTA@, then %$x$% matches | |
1697 | * %$y$% when %$|1 - y/x| < \delta$%. (Note that this | |
1698 | * criterion is asymmetric FIXME | |
1699 | */ | |
1700 | ||
e63124bc MW |
1701 | int tvec_claimeqish_float(struct tvec_state *tv, |
1702 | double f0, double f1, unsigned f, double delta, | |
1703 | const char *file, unsigned lno, | |
1704 | const char *expr) | |
1705 | { | |
1706 | struct tvec_floatinfo fi; | |
1707 | union tvec_misc arg; | |
1708 | ||
1709 | fi.f = f; fi.min = fi.max = 0.0; fi.delta = delta; arg.p = &fi; | |
3efcfd2d | 1710 | tv->out[0].v.f = f0; tv->in[0].v.f = f1; |
e63124bc MW |
1711 | return (tvec_claimeq(tv, &tvty_float, &arg, file, lno, expr)); |
1712 | } | |
e63124bc | 1713 | |
67b5031e MW |
1714 | /* --- @tvec_claimeq_float@ --- * |
1715 | * | |
1716 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
1717 | * @double f0, f1@ = two floating-point numbers | |
1718 | * @const char *file@, @unsigned @lno@ = calling file and line | |
1719 | * @const char *expr@ = the expression to quote on failure | |
1720 | * | |
1721 | * Returns: Nonzero if @f0@ and @u1@ are identical, otherwise zero. | |
1722 | * | |
1723 | * Use: Check that values of @f0@ and @f1@ are identical. The | |
1724 | * function is exactly equivalent to @tvec_claimeqish_float@ | |
1725 | * with @f == TVFF_EXACT@. | |
1726 | */ | |
1727 | ||
1728 | int tvec_claimeq_float(struct tvec_state *tv, | |
1729 | double f0, double f1, | |
1730 | const char *file, unsigned lno, | |
1731 | const char *expr) | |
1732 | { | |
1733 | return (tvec_claimeqish_float(tv, f0, f1, TVFF_EXACT, 0.0, | |
1734 | file, lno, expr)); | |
1735 | } | |
1736 | ||
814e42ff MW |
1737 | /*----- Durations ---------------------------------------------------------*/ |
1738 | ||
1739 | /* A duration is a floating-point number of seconds. Initialization and | |
1740 | * teardown, equality comparison, and serialization are as for floating-point | |
1741 | * values. | |
1742 | */ | |
1743 | ||
1744 | static const struct duration_unit { | |
1745 | const char *unit; | |
1746 | double scale; | |
1747 | unsigned f; | |
1748 | #define DUF_PREFER 1u | |
1749 | } duration_units[] = { | |
1750 | { "Ys", 1e+24, 0 }, | |
1751 | { "Zs", 1e+21, 0 }, | |
1752 | { "Es", 1e+18, 0 }, | |
1753 | { "Ps", 1e+15, 0 }, | |
1754 | { "Ts", 1e+12, 0 }, | |
1755 | { "Gs", 1e+9, 0 }, | |
1756 | { "Ms", 1e+6, 0 }, | |
1757 | { "ks", 1e+3, 0 }, | |
1758 | { "hs", 1e+2, 0 }, | |
1759 | { "das", 1e+1, 0 }, | |
1760 | ||
1761 | { "yr", 31557600.0, DUF_PREFER }, | |
1762 | { "y", 31557600.0, 0 }, | |
1763 | { "day", 86400.0, DUF_PREFER }, | |
1764 | { "dy", 86400.0, 0 }, | |
1765 | { "d", 86400.0, 0 }, | |
1766 | { "hr", 3600.0, DUF_PREFER }, | |
1767 | { "hour", 3600.0, 0 }, | |
1768 | { "h", 3600.0, 0 }, | |
1769 | { "min", 60.0, DUF_PREFER }, | |
1770 | { "m", 60.0, 0 }, | |
1771 | ||
1772 | { "s", 1.0, DUF_PREFER }, | |
1773 | { "sec", 1.0, 0 }, | |
1774 | ||
1775 | { "ds", 1e-1, 0 }, | |
1776 | { "cs", 1e-2, 0 }, | |
1777 | { "ms", 1e-3, DUF_PREFER }, | |
1778 | { "µs", 1e-6, DUF_PREFER }, | |
1779 | { "ns", 1e-9, DUF_PREFER }, | |
1780 | { "ps", 1e-12, DUF_PREFER }, | |
1781 | { "fs", 1e-15, DUF_PREFER }, | |
1782 | { "as", 1e-18, DUF_PREFER }, | |
1783 | { "zs", 1e-21, DUF_PREFER }, | |
1784 | { "ys", 1e-24, DUF_PREFER }, | |
1785 | ||
1786 | { 0 } | |
1787 | }; | |
1788 | ||
1789 | /* --- @parse_duration@ --- * | |
1790 | * | |
1791 | * Arguments: @union tvec_regval *rv@ = register value | |
1792 | * @const struct tvec_regdef *rd@ = register definition | |
1793 | * @struct tvec_state *tv@ = test-vector state | |
1794 | * | |
1795 | * Returns: Zero on success, %$-1$% on error. | |
1796 | * | |
1797 | * Use: Parse a register value from an input file. | |
1798 | * | |
1799 | * Duration values are finite nonnegative floating-point | |
1800 | * numbers in @strtod@ syntax, optionally followed by a unit . | |
1801 | */ | |
1802 | ||
1803 | static int parse_duration(union tvec_regval *rv, | |
1804 | const struct tvec_regdef *rd, | |
1805 | struct tvec_state *tv) | |
1806 | { | |
1807 | const struct duration_unit *u; | |
1808 | const char *q; | |
1809 | dstr d = DSTR_INIT; size_t pos; | |
1810 | double t; | |
1811 | int rc; | |
1812 | ||
1813 | if (tvec_readword(tv, &d, ";", "duration")) { rc = -1; goto end; } | |
1814 | if (parse_floating(&t, &q, d.buf, | |
1815 | rd->arg.p ? rd->arg.p : &tvflt_nonneg, tv)) | |
1816 | { rc = -1; goto end; } | |
1817 | ||
1818 | if (!q) { | |
1819 | tvec_skipspc(tv); pos = d.len; | |
1820 | if (!tvec_readword(tv, &d, ";", 0)) q = d.buf + pos + 1; | |
1821 | } | |
1822 | ||
1823 | if (q) { | |
1824 | for (u = duration_units; u->unit; u++) | |
1825 | if (STRCMP(q, ==, u->unit)) { t *= u->scale; goto found_unit; } | |
1826 | rc = tvec_syntax(tv, *q, "end-of-line"); goto end; | |
1827 | found_unit:; | |
1828 | } | |
1829 | ||
1830 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } | |
1831 | rv->f = t; rc = 0; | |
1832 | end: | |
1833 | dstr_destroy(&d); | |
1834 | return (rc); | |
1835 | } | |
1836 | ||
1837 | /* --- @dump_duration@ --- * | |
1838 | * | |
1839 | * Arguments: @const union tvec_regval *rv@ = register value | |
1840 | * @const struct tvec_regdef *rd@ = register definition | |
1841 | * @unsigned style@ = output style (@TVSF_...@) | |
1842 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
1843 | * | |
1844 | * Returns: --- | |
1845 | * | |
1846 | * Use: Dump a register value to the format output. | |
1847 | * | |
1848 | * Durations are dumped as a human-palatable scaled value with | |
1849 | * unit, and, if compact style is not requested, as a raw number | |
1850 | * of seconds at full precision as a comment. | |
1851 | */ | |
1852 | ||
1853 | static void dump_duration(const union tvec_regval *rv, | |
1854 | const struct tvec_regdef *rd, | |
1855 | unsigned style, | |
1856 | const struct gprintf_ops *gops, void *go) | |
1857 | { | |
1858 | const struct duration_unit *u; | |
1859 | double t = rv->f; | |
1860 | ||
1861 | if (!t) u = 0; | |
1862 | else { | |
1863 | for (u = duration_units; u->scale > t && u[1].unit; u++); | |
1864 | t /= u->scale; | |
1865 | } | |
1866 | ||
1867 | gprintf(gops, go, "%.4g %s", t, u ? u->unit : "s"); | |
1868 | if (!(style&TVSF_COMPACT)) { | |
1869 | gprintf(gops, go, "; = "); | |
1870 | format_floating(gops, go, rv->f); | |
1871 | gprintf(gops, go, " s"); | |
1872 | } | |
1873 | } | |
1874 | ||
1875 | /* Duration type definition. */ | |
1876 | const struct tvec_regty tvty_duration = { | |
1877 | init_float, trivial_release, eq_float, | |
1878 | tobuf_float, frombuf_float, | |
1879 | parse_duration, dump_duration | |
1880 | }; | |
1881 | ||
b64eb60f MW |
1882 | /*----- Enumerations ------------------------------------------------------*/ |
1883 | ||
c81c35df MW |
1884 | /* --- @init_tenum@ --- * |
1885 | * | |
1886 | * Arguments: @union tvec_regval *rv@ = register value | |
1887 | * @const struct tvec_regdef *rd@ = register definition | |
1888 | * | |
1889 | * Returns: --- | |
1890 | * | |
1891 | * Use: Initialize a register value. | |
1892 | * | |
1893 | * Integer and floating-point enumeration values are initialized | |
1894 | * as their underlying representations. Pointer enumerations | |
1895 | * are initialized to %|#nil|%. | |
1896 | */ | |
1897 | ||
3efcfd2d MW |
1898 | #define init_ienum init_int |
1899 | #define init_uenum init_uint | |
1900 | #define init_fenum init_float | |
c81c35df | 1901 | |
3efcfd2d MW |
1902 | static void init_penum(union tvec_regval *rv, const struct tvec_regdef *rd) |
1903 | { rv->p = 0; } | |
b64eb60f | 1904 | |
c81c35df MW |
1905 | /* --- @eq_tenum@ --- * |
1906 | * | |
1907 | * Arguments: @const union tvec_regval *rv0, *rv1@ = register values | |
1908 | * @const struct tvec_regdef *rd@ = register definition | |
1909 | * | |
1910 | * Returns: Nonzero if the values are equal, zero if unequal | |
1911 | * | |
1912 | * Use: Compare register values for equality. | |
1913 | * | |
1914 | * Integer and floating-point enumeration values are compared as | |
1915 | * their underlying representations; in particular, floating- | |
1916 | * point enumerations may compare equal if their absolute or | |
1917 | * relative difference is sufficiently small. Pointer | |
1918 | * enumerations are compared as pointers. | |
1919 | */ | |
1920 | ||
3efcfd2d MW |
1921 | #define eq_ienum eq_int |
1922 | #define eq_uenum eq_uint | |
c81c35df | 1923 | |
3efcfd2d MW |
1924 | static int eq_fenum(const union tvec_regval *rv0, |
1925 | const union tvec_regval *rv1, | |
1926 | const struct tvec_regdef *rd) | |
b64eb60f | 1927 | { |
3efcfd2d MW |
1928 | const struct tvec_fenuminfo *ei = rd->arg.p; |
1929 | return (eqish_floating_p(rv0->f, rv1->f, ei->fi)); | |
b64eb60f | 1930 | } |
c81c35df | 1931 | |
3efcfd2d MW |
1932 | static int eq_penum(const union tvec_regval *rv0, |
1933 | const union tvec_regval *rv1, | |
1934 | const struct tvec_regdef *rd) | |
1935 | { return (rv0->p == rv1->p); } | |
b64eb60f | 1936 | |
c81c35df MW |
1937 | /* --- @tobuf_tenum@ --- * |
1938 | * | |
1939 | * Arguments: @buf *b@ = buffer | |
1940 | * @const union tvec_regval *rv@ = register value | |
1941 | * @const struct tvec_regdef *rd@ = register definition | |
1942 | * | |
1943 | * Returns: Zero on success, %$-1$% on failure. | |
1944 | * | |
1945 | * Use: Serialize a register value to a buffer. | |
1946 | * | |
1947 | * Integer and floating-point enumeration values are serialized | |
1948 | * as their underlying representations. Pointer enumerations | |
1949 | * are serialized as the signed integer index into the | |
1950 | * association table; %|#nil|% serializes as %$-1$%, and | |
1951 | * unrecognized pointers cause failure. | |
1952 | */ | |
1953 | ||
3efcfd2d MW |
1954 | #define tobuf_ienum tobuf_int |
1955 | #define tobuf_uenum tobuf_uint | |
1956 | #define tobuf_fenum tobuf_float | |
c81c35df | 1957 | |
3efcfd2d MW |
1958 | static int tobuf_penum(buf *b, const union tvec_regval *rv, |
1959 | const struct tvec_regdef *rd) | |
b64eb60f | 1960 | { |
3efcfd2d | 1961 | const struct tvec_penuminfo *pei = rd->arg.p; |
e63124bc MW |
1962 | const struct tvec_passoc *pa; |
1963 | long i; | |
b64eb60f | 1964 | |
3efcfd2d MW |
1965 | for (pa = pei->av, i = 0; pa->tag; pa++, i++) |
1966 | if (pa->p == rv->p) goto found; | |
1967 | if (!rv->p) i = -1; | |
1968 | else return (-1); | |
1969 | found: | |
1970 | return (signed_to_buf(b, i)); | |
b64eb60f MW |
1971 | } |
1972 | ||
c81c35df MW |
1973 | /* --- @frombuf_tenum@ --- * |
1974 | * | |
1975 | * Arguments: @buf *b@ = buffer | |
1976 | * @union tvec_regval *rv@ = register value | |
1977 | * @const struct tvec_regdef *rd@ = register definition | |
1978 | * | |
1979 | * Returns: Zero on success, %$-1$% on failure. | |
1980 | * | |
1981 | * Use: Deserialize a register value from a buffer. | |
1982 | * | |
1983 | * Integer and floating-point enumeration values are serialized | |
1984 | * as their underlying representations. Pointer enumerations | |
1985 | * are serialized as the signed integer index into the | |
1986 | * association table; %|#nil|% serializes as %$-1$%; out-of- | |
1987 | * range indices cause failure. | |
1988 | */ | |
1989 | ||
3efcfd2d MW |
1990 | #define frombuf_ienum frombuf_int |
1991 | #define frombuf_uenum frombuf_uint | |
1992 | #define frombuf_fenum frombuf_float | |
1993 | static int frombuf_penum(buf *b, union tvec_regval *rv, | |
b64eb60f MW |
1994 | const struct tvec_regdef *rd) |
1995 | { | |
3efcfd2d | 1996 | const struct tvec_penuminfo *pei = rd->arg.p; |
e63124bc MW |
1997 | const struct tvec_passoc *pa; |
1998 | long i, n; | |
b64eb60f | 1999 | |
3efcfd2d MW |
2000 | for (pa = pei->av, n = 0; pa->tag; pa++, n++); |
2001 | if (signed_from_buf(b, &i)) return (-1); | |
2002 | if (0 <= i && i < n) rv->p = (/*unconst*/ void *)pei->av[i].p; | |
2003 | else if (i == -1) rv->p = 0; | |
2004 | else return (-1); | |
2005 | return (0); | |
b64eb60f MW |
2006 | } |
2007 | ||
c81c35df MW |
2008 | /* --- @parse_tenum@ --- * |
2009 | * | |
2010 | * Arguments: @union tvec_regval *rv@ = register value | |
2011 | * @const struct tvec_regdef *rd@ = register definition | |
2012 | * @struct tvec_state *tv@ = test-vector state | |
2013 | * | |
2014 | * Returns: Zero on success, %$-1$% on error. | |
2015 | * | |
2016 | * Use: Parse a register value from an input file. | |
2017 | * | |
2018 | * An enumerated value may be given by name or as a literal | |
2019 | * value. For enumerations based on numeric types, the literal | |
2020 | * values can be written in the same syntax as the underlying | |
2021 | * values. For enumerations based on pointers, the only | |
2022 | * permitted literal is %|#nil|%, which denotes a null pointer. | |
2023 | */ | |
2024 | ||
3efcfd2d MW |
2025 | #define DEFPARSE_ENUM(tag_, ty, slot) \ |
2026 | static int parse_##slot##enum(union tvec_regval *rv, \ | |
2027 | const struct tvec_regdef *rd, \ | |
2028 | struct tvec_state *tv) \ | |
2029 | { \ | |
2030 | const struct tvec_##slot##enuminfo *ei = rd->arg.p; \ | |
2031 | const struct tvec_##slot##assoc *a; \ | |
2032 | dstr d = DSTR_INIT; \ | |
2033 | int rc; \ | |
2034 | \ | |
2035 | if (tvec_readword(tv, &d, ";", "enumeration tag or " LITSTR_##tag_)) \ | |
2036 | { rc = -1; goto end; } \ | |
2037 | for (a = ei->av; a->tag; a++) \ | |
2038 | if (STRCMP(a->tag, ==, d.buf)) { FOUND_##tag_ goto done; } \ | |
2039 | MISSING_##tag_ \ | |
2040 | done: \ | |
2041 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } \ | |
2042 | rc = 0; \ | |
2043 | end: \ | |
2044 | dstr_destroy(&d); \ | |
2045 | return (rc); \ | |
2046 | } | |
b64eb60f | 2047 | |
3efcfd2d MW |
2048 | #define LITSTR_INT "literal signed integer" |
2049 | #define FOUND_INT rv->i = a->i; | |
2050 | #define MISSING_INT if (parse_signed(&rv->i, d.buf, ei->ir, tv)) \ | |
2051 | { rc = -1; goto end; } | |
2052 | ||
2053 | #define LITSTR_UINT "literal unsigned integer" | |
2054 | #define FOUND_UINT rv->u = a->u; | |
2055 | #define MISSING_UINT if (parse_unsigned(&rv->u, d.buf, ei->ur, tv)) \ | |
2056 | { rc = -1; goto end; } | |
2057 | ||
2058 | #define LITSTR_FLT "literal floating-point number, " \ | |
2059 | "`#-inf', `#+inf', or `#nan'" | |
2060 | #define FOUND_FLT rv->f = a->f; | |
814e42ff | 2061 | #define MISSING_FLT if (parse_floating(&rv->f, 0, d.buf, ei->fi, tv)) \ |
3efcfd2d MW |
2062 | { rc = -1; goto end; } |
2063 | ||
2064 | #define LITSTR_PTR "`#nil'" | |
2065 | #define FOUND_PTR rv->p = (/*unconst*/ void *)a->p; | |
2066 | #define MISSING_PTR if (STRCMP(d.buf, ==, "#nil")) \ | |
2067 | rv->p = 0; \ | |
2068 | else { \ | |
2069 | tvec_error(tv, "unknown `%s' value `%s'", \ | |
2070 | ei->name, d.buf); \ | |
2071 | rc = -1; goto end; \ | |
2072 | } | |
2073 | ||
2074 | TVEC_MISCSLOTS(DEFPARSE_ENUM) | |
2075 | ||
2076 | #undef LITSTR_INT | |
2077 | #undef FOUND_INT | |
2078 | #undef MISSING_INT | |
2079 | ||
2080 | #undef LITSTR_UINT | |
2081 | #undef FOUND_UINT | |
2082 | #undef MISSING_UINT | |
2083 | ||
2084 | #undef LITSTR_FLT | |
2085 | #undef FOUND_FLT | |
2086 | #undef MISSING_FLT | |
2087 | ||
2088 | #undef LITSTR_PTR | |
2089 | #undef FOUND_PTR | |
2090 | #undef MISSING_PTR | |
2091 | ||
2092 | #undef DEFPARSE_ENUM | |
2093 | ||
c81c35df MW |
2094 | /* --- @dump_tenum@ --- * |
2095 | * | |
2096 | * Arguments: @const union tvec_regval *rv@ = register value | |
2097 | * @const struct tvec_regdef *rd@ = register definition | |
2098 | * @unsigned style@ = output style (@TVSF_...@) | |
2099 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
2100 | * | |
2101 | * Returns: --- | |
2102 | * | |
2103 | * Use: Dump a register value to the format output. | |
2104 | * | |
2105 | * Enumeration values are dumped as their symbolic names, if | |
2106 | * possible, with the underlying values provided as a comment | |
2107 | * unless compact output is requested, as for the underlying | |
2108 | * representation. A null pointer is printed as %|#nil|%; | |
2109 | * non-null pointers are printed as %|#<TYPE PTR>|%, with the | |
2110 | * enumeration TYPE and the raw pointer PTR printed with the | |
2111 | * system's %|%p|% format specifier. | |
2112 | */ | |
2113 | ||
2114 | ||
3efcfd2d MW |
2115 | #define DEFDUMP_ENUM(tag_, ty, slot) \ |
2116 | static void dump_##slot##enum(const union tvec_regval *rv, \ | |
2117 | const struct tvec_regdef *rd, \ | |
2118 | unsigned style, \ | |
2119 | const struct gprintf_ops *gops, void *go) \ | |
2120 | { \ | |
2121 | const struct tvec_##slot##enuminfo *ei = rd->arg.p; \ | |
2122 | const struct tvec_##slot##assoc *a; \ | |
2123 | \ | |
2124 | for (a = ei->av; a->tag; a++) \ | |
2125 | if (rv->slot == a->slot) { \ | |
2126 | gprintf(gops, go, "%s", a->tag); \ | |
2127 | if (style&TVSF_COMPACT) return; \ | |
2128 | gprintf(gops, go, " ; = "); break; \ | |
2129 | } \ | |
2130 | \ | |
2131 | PRINTRAW_##tag_ \ | |
b64eb60f MW |
2132 | } |
2133 | ||
3efcfd2d | 2134 | #define MAYBE_PRINT_EXTRA \ |
c81c35df | 2135 | if (style&TVSF_COMPACT) /* nothing to do */; \ |
3efcfd2d MW |
2136 | else if (!a->tag) { gprintf(gops, go, " ; = "); goto _extra; } \ |
2137 | else if (1) { gprintf(gops, go, " = "); goto _extra; } \ | |
2138 | else _extra: | |
b64eb60f | 2139 | |
3efcfd2d MW |
2140 | #define PRINTRAW_INT gprintf(gops, go, "%ld", rv->i); \ |
2141 | MAYBE_PRINT_EXTRA { \ | |
2142 | format_signed_hex(gops, go, rv->i); \ | |
2143 | maybe_format_signed_char(gops, go, rv->i); \ | |
2144 | } | |
b64eb60f | 2145 | |
3efcfd2d MW |
2146 | #define PRINTRAW_UINT gprintf(gops, go, "%lu", rv->u); \ |
2147 | MAYBE_PRINT_EXTRA { \ | |
2148 | format_unsigned_hex(gops, go, rv->u); \ | |
2149 | maybe_format_unsigned_char(gops, go, rv->u); \ | |
2150 | } | |
2151 | ||
2152 | #define PRINTRAW_FLT format_floating(gops, go, rv->f); | |
2153 | ||
2154 | #define PRINTRAW_PTR if (!rv->p) gprintf(gops, go, "#nil"); \ | |
e63124bc | 2155 | else gprintf(gops, go, "#<%s %p>", ei->name, rv->p); |
b64eb60f | 2156 | |
3efcfd2d | 2157 | TVEC_MISCSLOTS(DEFDUMP_ENUM) |
b64eb60f | 2158 | |
3efcfd2d MW |
2159 | #undef PRINTRAW_INT |
2160 | #undef PRINTRAW_UINT | |
2161 | #undef PRINTRAW_FLT | |
2162 | #undef PRINTRAW_PTR | |
2163 | ||
2164 | #undef MAYBE_PRINT_EXTRA | |
2165 | #undef DEFDUMP_ENUM | |
2166 | ||
c81c35df | 2167 | /* Enumeration type definitions. */ |
3efcfd2d MW |
2168 | #define DEFTY_ENUM(tag, ty, slot) \ |
2169 | const struct tvec_regty tvty_##slot##enum = { \ | |
2170 | init_##slot##enum, trivial_release, eq_##slot##enum, \ | |
2171 | tobuf_##slot##enum, frombuf_##slot##enum, \ | |
2172 | parse_##slot##enum, dump_##slot##enum \ | |
2173 | }; | |
2174 | TVEC_MISCSLOTS(DEFTY_ENUM) | |
2175 | #undef DEFTY_ENUM | |
b64eb60f | 2176 | |
c81c35df | 2177 | /* Predefined enumeration types. */ |
e63124bc MW |
2178 | static const struct tvec_iassoc bool_assoc[] = { |
2179 | { "nil", 0 }, | |
2180 | { "false", 0 }, | |
2181 | { "f", 0 }, | |
2182 | { "no", 0 }, | |
2183 | { "n", 0 }, | |
2184 | { "off", 0 }, | |
2185 | ||
2186 | { "t", 1 }, | |
2187 | { "true", 1 }, | |
2188 | { "yes", 1 }, | |
2189 | { "y", 1 }, | |
2190 | { "on", 1 }, | |
2191 | ||
20ba6b0b | 2192 | TVEC_ENDENUM |
e63124bc MW |
2193 | }; |
2194 | ||
2195 | const struct tvec_ienuminfo tvenum_bool = | |
3efcfd2d | 2196 | { "bool", bool_assoc, &tvrange_int }; |
e63124bc | 2197 | |
20ba6b0b MW |
2198 | static const struct tvec_iassoc cmp_assoc[] = { |
2199 | { "<", -1 }, | |
2200 | { "less", -1 }, | |
2201 | { "lt", -1 }, | |
2202 | ||
2203 | { "=", 0 }, | |
2204 | { "equal", 0 }, | |
2205 | { "eq", 0 }, | |
2206 | ||
2207 | { ">", +1 }, | |
2208 | { "greater", +1 }, | |
2209 | { "gt", +1 }, | |
2210 | ||
2211 | TVEC_ENDENUM | |
2212 | }; | |
2213 | ||
2214 | const struct tvec_ienuminfo tvenum_cmp = | |
2215 | { "cmp", cmp_assoc, &tvrange_int }; | |
2216 | ||
67b5031e MW |
2217 | /* --- @tvec_claimeq_tenum@ --- * |
2218 | * | |
2219 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
2220 | * @const struct tvec_typeenuminfo *ei@ = enumeration type info | |
2221 | * @ty t0, t1@ = two values | |
2222 | * @const char *file@, @unsigned @lno@ = calling file and line | |
2223 | * @const char *expr@ = the expression to quote on failure | |
2224 | * | |
2225 | * Returns: Nonzero if @t0@ and @t1@ are equal, otherwise zero. | |
2226 | * | |
2227 | * Use: Check that values of @t0@ and @t1@ are equal. As for | |
2228 | * @tvec_claim@ above, a test case is automatically begun and | |
2229 | * ended if none is already underway. If the values are | |
2230 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
2231 | * mismatched values are dumped: @t0@ is printed as the output | |
2232 | * value and @t1@ is printed as the input reference. | |
2233 | */ | |
2234 | ||
b64eb60f | 2235 | #define DEFCLAIM(tag, ty, slot) \ |
e63124bc MW |
2236 | int tvec_claimeq_##slot##enum \ |
2237 | (struct tvec_state *tv, \ | |
2238 | const struct tvec_##slot##enuminfo *ei, ty e0, ty e1, \ | |
2239 | const char *file, unsigned lno, const char *expr) \ | |
b64eb60f MW |
2240 | { \ |
2241 | union tvec_misc arg; \ | |
2242 | \ | |
b64eb60f | 2243 | arg.p = ei; \ |
3efcfd2d MW |
2244 | tv->out[0].v.slot = GET_##tag(e0); \ |
2245 | tv->in[0].v.slot = GET_##tag(e1); \ | |
2246 | return (tvec_claimeq(tv, &tvty_##slot##enum, &arg, \ | |
2247 | file, lno, expr)); \ | |
b64eb60f MW |
2248 | } |
2249 | #define GET_INT(e) (e) | |
2250 | #define GET_UINT(e) (e) | |
e63124bc | 2251 | #define GET_FLT(e) (e) |
b64eb60f MW |
2252 | #define GET_PTR(e) ((/*unconst*/ void *)(e)) |
2253 | TVEC_MISCSLOTS(DEFCLAIM) | |
2254 | #undef DEFCLAIM | |
2255 | #undef GET_INT | |
2256 | #undef GET_UINT | |
e63124bc | 2257 | #undef GET_FLT |
b64eb60f MW |
2258 | #undef GET_PTR |
2259 | ||
2260 | /*----- Flag types --------------------------------------------------------*/ | |
2261 | ||
c81c35df MW |
2262 | /* Flag types are initialized, compared, and serialized as unsigned |
2263 | * integers. | |
2264 | */ | |
2265 | ||
2266 | /* --- @parse_flags@ --- * | |
2267 | * | |
2268 | * Arguments: @union tvec_regval *rv@ = register value | |
2269 | * @const struct tvec_regdef *rd@ = register definition | |
2270 | * @struct tvec_state *tv@ = test-vector state | |
2271 | * | |
2272 | * Returns: Zero on success, %$-1$% on error. | |
2273 | * | |
2274 | * Use: Parse a register value from an input file. | |
2275 | * | |
2276 | * The input syntax is a sequence of items separated by `|' | |
2277 | * signs. Each item may be the symbolic name of a field value, | |
2278 | * or a literal unsigned integer. The masks associated with the | |
2279 | * given symbolic names must be disjoint. The resulting | |
2280 | * numerical value is simply the bitwise OR of the given values. | |
2281 | */ | |
2282 | ||
882a39c1 MW |
2283 | static int parse_flags(union tvec_regval *rv, const struct tvec_regdef *rd, |
2284 | struct tvec_state *tv) | |
b64eb60f MW |
2285 | { |
2286 | const struct tvec_flaginfo *fi = rd->arg.p; | |
2287 | const struct tvec_flag *f; | |
2288 | unsigned long m = 0, v = 0, t; | |
2289 | dstr d = DSTR_INIT; | |
882a39c1 | 2290 | int ch, rc; |
b64eb60f MW |
2291 | |
2292 | for (;;) { | |
c81c35df MW |
2293 | |
2294 | /* Read the next item. */ | |
882a39c1 MW |
2295 | DRESET(&d); |
2296 | if (tvec_readword(tv, &d, "|;", "flag name or integer")) | |
2297 | { rc = -1; goto end; } | |
b64eb60f | 2298 | |
c81c35df | 2299 | /* Try to find a matching entry in the table. */ |
b64eb60f MW |
2300 | for (f = fi->fv; f->tag; f++) |
2301 | if (STRCMP(f->tag, ==, d.buf)) { | |
882a39c1 MW |
2302 | if (m&f->m) |
2303 | { tvec_error(tv, "colliding flag setting"); rc = -1; goto end; } | |
2304 | else | |
2305 | { m |= f->m; v |= f->v; goto next; } | |
b64eb60f MW |
2306 | } |
2307 | ||
c81c35df | 2308 | /* Otherwise, try to parse it as a raw integer. */ |
e63124bc MW |
2309 | if (parse_unsigned(&t, d.buf, fi->range, tv)) |
2310 | { rc = -1; goto end; } | |
882a39c1 | 2311 | v |= t; |
c81c35df | 2312 | |
b64eb60f | 2313 | next: |
c81c35df MW |
2314 | /* Advance to the next token. If it's a separator then consume it, and |
2315 | * go round again. Otherwise we stop here. | |
2316 | */ | |
b64eb60f | 2317 | if (tvec_nexttoken(tv)) break; |
882a39c1 MW |
2318 | ch = getc(tv->fp); |
2319 | if (ch != '|') { tvec_syntax(tv, ch, "`|'"); rc = -1; goto end; } | |
c81c35df | 2320 | if (tvec_nexttoken(tv)) |
882a39c1 | 2321 | { tvec_syntax(tv, '\n', "flag name or integer"); rc = -1; goto end; } |
b64eb60f | 2322 | } |
c81c35df MW |
2323 | |
2324 | /* Done. */ | |
2325 | rv->u = v; rc = 0; | |
882a39c1 MW |
2326 | end: |
2327 | dstr_destroy(&d); | |
2328 | return (rc); | |
b64eb60f MW |
2329 | } |
2330 | ||
c81c35df MW |
2331 | /* --- @dump_flags@ --- * |
2332 | * | |
2333 | * Arguments: @const union tvec_regval *rv@ = register value | |
2334 | * @const struct tvec_regdef *rd@ = register definition | |
2335 | * @unsigned style@ = output style (@TVSF_...@) | |
2336 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
2337 | * | |
2338 | * Returns: --- | |
2339 | * | |
2340 | * Use: Dump a register value to the format output. | |
2341 | * | |
2342 | * The table of symbolic names and their associated values and | |
2343 | * masks is repeatedly scanned, in order, to find disjoint | |
2344 | * matches -- i.e., entries whose value matches the target value | |
2345 | * in the bit positions indicated by the mask, and whose mask | |
2346 | * doesn't overlap with any previously found matches; the names | |
2347 | * are then output, separated by `|'. Any remaining nonzero | |
2348 | * bits not covered by any of the matching masks are output as a | |
2349 | * single literal integer, in hex. | |
2350 | * | |
2351 | * Unless compact output is requested, or no symbolic names were | |
2352 | * found, the raw numeric value is also printed in hex, as a | |
2353 | * comment. | |
2354 | */ | |
2355 | ||
b64eb60f MW |
2356 | static void dump_flags(const union tvec_regval *rv, |
2357 | const struct tvec_regdef *rd, | |
e63124bc MW |
2358 | unsigned style, |
2359 | const struct gprintf_ops *gops, void *go) | |
b64eb60f MW |
2360 | { |
2361 | const struct tvec_flaginfo *fi = rd->arg.p; | |
2362 | const struct tvec_flag *f; | |
c81c35df | 2363 | unsigned long m = ~0ul, v = rv->u; |
b64eb60f MW |
2364 | const char *sep; |
2365 | ||
2366 | for (f = fi->fv, sep = ""; f->tag; f++) | |
2367 | if ((m&f->m) && (v&f->m) == f->v) { | |
e63124bc | 2368 | gprintf(gops, go, "%s%s", sep, f->tag); m &= ~f->m; |
b64eb60f MW |
2369 | sep = style&TVSF_COMPACT ? "|" : " | "; |
2370 | } | |
2371 | ||
e63124bc | 2372 | if (v&m) gprintf(gops, go, "%s0x%0*lx", sep, hex_width(v), v&m); |
b64eb60f | 2373 | |
c81c35df | 2374 | if (m != ~0ul && !(style&TVSF_COMPACT)) |
e63124bc | 2375 | gprintf(gops, go, " ; = 0x%0*lx", hex_width(rv->u), rv->u); |
b64eb60f MW |
2376 | } |
2377 | ||
c81c35df | 2378 | /* Flags type definition. */ |
b64eb60f | 2379 | const struct tvec_regty tvty_flags = { |
3efcfd2d | 2380 | init_uint, trivial_release, eq_uint, |
b64eb60f MW |
2381 | tobuf_uint, frombuf_uint, |
2382 | parse_flags, dump_flags | |
2383 | }; | |
2384 | ||
67b5031e MW |
2385 | /* --- @tvec_claimeq_flags@ --- * |
2386 | * | |
2387 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
2388 | * @const struct tvec_flaginfo *fi@ = flags type info | |
2389 | * @unsigned long f0, f1@ = two values | |
2390 | * @const char *file@, @unsigned @lno@ = calling file and line | |
2391 | * @const char *expr@ = the expression to quote on failure | |
2392 | * | |
2393 | * Returns: Nonzero if @f0@ and @f1@ are equal, otherwise zero. | |
2394 | * | |
2395 | * Use: Check that values of @f0@ and @f1@ are equal. As for | |
2396 | * @tvec_claim@ above, a test case is automatically begun and | |
2397 | * ended if none is already underway. If the values are | |
2398 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
2399 | * mismatched values are dumped: @f0@ is printed as the output | |
2400 | * value and @f1@ is printed as the input reference. | |
2401 | */ | |
2402 | ||
b64eb60f MW |
2403 | int tvec_claimeq_flags(struct tvec_state *tv, |
2404 | const struct tvec_flaginfo *fi, | |
2405 | unsigned long f0, unsigned long f1, | |
2406 | const char *file, unsigned lno, const char *expr) | |
2407 | { | |
2408 | union tvec_misc arg; | |
2409 | ||
3efcfd2d | 2410 | arg.p = fi; tv->out[0].v.u = f0; tv->in[0].v.u = f1; |
b64eb60f MW |
2411 | return (tvec_claimeq(tv, &tvty_flags, &arg, file, lno, expr)); |
2412 | } | |
2413 | ||
e63124bc MW |
2414 | /*----- Characters --------------------------------------------------------*/ |
2415 | ||
c81c35df MW |
2416 | /* Character values are initialized and compared as signed integers. */ |
2417 | ||
2418 | /* --- @tobuf_char@ --- * | |
2419 | * | |
2420 | * Arguments: @buf *b@ = buffer | |
2421 | * @const union tvec_regval *rv@ = register value | |
2422 | * @const struct tvec_regdef *rd@ = register definition | |
2423 | * | |
2424 | * Returns: Zero on success, %$-1$% on failure. | |
2425 | * | |
2426 | * Use: Serialize a register value to a buffer. | |
2427 | * | |
2428 | * Character values are serialized as little-endian 32-bit | |
2429 | * unsigned integers, with %|EOF|% serialized as all-bits-set. | |
2430 | */ | |
2431 | ||
e63124bc | 2432 | static int tobuf_char(buf *b, const union tvec_regval *rv, |
67b5031e | 2433 | const struct tvec_regdef *rd) |
e63124bc MW |
2434 | { |
2435 | uint32 u; | |
c81c35df | 2436 | |
e63124bc MW |
2437 | if (0 <= rv->i && rv->i <= UCHAR_MAX) u = rv->i; |
2438 | else if (rv->i == EOF) u = MASK32; | |
2439 | else return (-1); | |
2440 | return (buf_putu32l(b, u)); | |
2441 | } | |
2442 | ||
c81c35df MW |
2443 | /* --- @frombuf_char@ --- * |
2444 | * | |
2445 | * Arguments: @buf *b@ = buffer | |
2446 | * @union tvec_regval *rv@ = register value | |
2447 | * @const struct tvec_regdef *rd@ = register definition | |
2448 | * | |
2449 | * Returns: Zero on success, %$-1$% on failure. | |
2450 | * | |
2451 | * Use: Deserialize a register value from a buffer. | |
2452 | * | |
2453 | * Character values are serialized as little-endian 32-bit | |
2454 | * unsigned integers, with %|EOF|% serialized as all-bits-set. | |
2455 | */ | |
2456 | ||
e63124bc | 2457 | static int frombuf_char(buf *b, union tvec_regval *rv, |
67b5031e | 2458 | const struct tvec_regdef *rd) |
e63124bc MW |
2459 | { |
2460 | uint32 u; | |
2461 | ||
2462 | if (buf_getu32l(b, &u)) return (-1); | |
2463 | if (0 <= u && u <= UCHAR_MAX) rv->i = u; | |
2464 | else if (u == MASK32) rv->i = EOF; | |
2465 | else return (-1); | |
2466 | return (0); | |
2467 | } | |
2468 | ||
c81c35df MW |
2469 | /* --- @parse_char@ --- * |
2470 | * | |
2471 | * Arguments: @union tvec_regval *rv@ = register value | |
2472 | * @const struct tvec_regdef *rd@ = register definition | |
2473 | * @struct tvec_state *tv@ = test-vector state | |
2474 | * | |
2475 | * Returns: Zero on success, %$-1$% on error. | |
2476 | * | |
2477 | * Use: Parse a register value from an input file. | |
2478 | * | |
2479 | * A character value can be given by symbolic name, with a | |
2480 | * leading `%|#|%'; or a character or `%|\|%'-escape sequence, | |
2481 | * optionally in single quotes. | |
2482 | * | |
2483 | * The following escape sequences and character names are | |
2484 | * recognized. | |
2485 | * | |
2486 | * * `%|#eof|%' is the special end-of-file marker. | |
2487 | * | |
2488 | * * `%|#nul|%' is the NUL character, sometimes used to | |
2489 | * terminate strings. | |
2490 | * | |
2491 | * * `%|bell|%', `%|bel|%', `%|ding|%', or `%|\a|%' is the BEL | |
2492 | * character used to ring the terminal bell (or do some other | |
2493 | * thing to attract the user's attention). | |
2494 | * | |
2495 | * * %|#backspace|%, %|#bs|%, or %|\b|% is the backspace | |
2496 | * character, used to move the cursor backwords by one cell. | |
2497 | * | |
2498 | * * %|#escape|% %|#esc|%, or%|\e|% is the escape character, | |
2499 | * used to introduce special terminal commands. | |
2500 | * | |
2501 | * * %|#formfeed|%, %|#ff|%, or %|\f|% is the formfeed | |
2502 | * character, used to separate pages of text. | |
2503 | * | |
2504 | * * %|#newline|%, %|#linefeed|%, %|#lf|%, %|#nl|%, or %|\n|% is | |
2505 | * the newline character, used to terminate lines of text or | |
2506 | * advance the cursor to the next line (perhaps without | |
2507 | * returning it to the start of the line). | |
2508 | * | |
2509 | * * %|#return|%, %|#carriage-return|%, %|#cr|%, or %|\r|% is | |
2510 | * the carriage-return character, used to return the cursor to | |
2511 | * the start of the line. | |
2512 | * | |
2513 | * * %|#tab|%, %|#horizontal-tab|%, %|#ht|%, or %|\t|% is the | |
2514 | * tab character, used to advance the cursor to the next tab | |
2515 | * stop on the current line. | |
2516 | * | |
2517 | * * %|#vertical-tab|%, %|#vt|%, %|\v|% is the vertical tab | |
2518 | * character. | |
2519 | * | |
2520 | * * %|#space|%, %|#spc|% is the space character. | |
2521 | * | |
2522 | * * %|#delete|%, %|#del|% is the delete character, used to | |
2523 | * erase the most recent character. | |
2524 | * | |
2525 | * * %|\'|% is the single-quote character. | |
2526 | * | |
2527 | * * %|\\|% is the backslash character. | |
2528 | * | |
2529 | * * %|\"|% is the double-quote character. | |
2530 | * | |
2531 | * * %|\NNN|% or %|\{NNN}|% is the character with code NNN in | |
2532 | * octal. The NNN may be up to three digits long. | |
2533 | * | |
2534 | * * %|\xNN|% or %|\x{NN}|% is the character with code NNN in | |
2535 | * hexadecimal. | |
2536 | */ | |
2537 | ||
e63124bc MW |
2538 | static int parse_char(union tvec_regval *rv, const struct tvec_regdef *rd, |
2539 | struct tvec_state *tv) | |
2540 | { | |
2541 | dstr d = DSTR_INIT; | |
2542 | int ch, rc; | |
2543 | unsigned f = 0; | |
2544 | #define f_quote 1u | |
2545 | ||
c81c35df | 2546 | /* Inspect the character to see what we're up against. */ |
e63124bc | 2547 | ch = getc(tv->fp); |
c81c35df | 2548 | |
e63124bc | 2549 | if (ch == '#') { |
c81c35df MW |
2550 | /* It looks like a special token. Push the `%|#|%' back and fetch the |
2551 | * whole word. If there's just the `%|#|%' after all, then treat it as | |
2552 | * literal. | |
2553 | */ | |
2554 | ||
e63124bc MW |
2555 | ungetc(ch, tv->fp); |
2556 | if (tvec_readword(tv, &d, ";", "character name")) { rc = -1; goto end; } | |
c81c35df MW |
2557 | if (STRCMP(d.buf, !=, "#")) { |
2558 | if (read_charname(&ch, d.buf, RCF_EOFOK)) { | |
2559 | rc = tvec_error(tv, "unknown character name `%s'", d.buf); | |
2560 | goto end; | |
2561 | } | |
2562 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } | |
2563 | rv->i = ch; rc = 0; goto end; | |
e63124bc | 2564 | } |
e63124bc MW |
2565 | } |
2566 | ||
c81c35df MW |
2567 | /* If this is a single quote then we expect to see a matching one later, |
2568 | * and we should process backslash escapes. Get the next character and see | |
2569 | * what happens. | |
2570 | */ | |
e63124bc | 2571 | if (ch == '\'') { f |= f_quote; ch = getc(tv->fp); } |
c81c35df MW |
2572 | |
2573 | /* Main character dispatch. */ | |
e63124bc | 2574 | switch (ch) { |
c81c35df | 2575 | |
67b5031e | 2576 | case ';': |
c81c35df | 2577 | /* Unquoted, semicolon begins a comment. */ |
67b5031e | 2578 | if (!(f&f_quote)) { rc = tvec_syntax(tv, ch, "character"); goto end; } |
c81c35df MW |
2579 | else goto plain; |
2580 | ||
67b5031e | 2581 | case '\n': |
c81c35df MW |
2582 | /* A newline. If we saw a single quote, then treat that as literal. |
2583 | * Otherwise this is an error. | |
2584 | */ | |
2585 | if (!(f&f_quote)) goto nochar; | |
2586 | else { f &= ~f_quote; ungetc(ch, tv->fp); ch = '\''; goto plain; } | |
2587 | ||
67b5031e | 2588 | case EOF: |
c81c35df MW |
2589 | /* End-of-file. Similar to newline, but with slightly different |
2590 | * effects on the parse state. | |
2591 | */ | |
2592 | if (!(f&f_quote)) goto nochar; | |
2593 | else { f &= ~f_quote; ch = '\''; goto plain; } | |
2594 | ||
2595 | case '\'': nochar: | |
2596 | /* A single quote. This must be the second of a pair, and there should | |
2597 | * have been a character or escape sequence between them. | |
2598 | */ | |
e63124bc | 2599 | rc = tvec_syntax(tv, ch, "character"); goto end; |
c81c35df | 2600 | |
e63124bc | 2601 | case '\\': |
c81c35df | 2602 | /* A backslash. Read a character escape. */ |
67b5031e | 2603 | if (read_charesc(&ch, tv)) return (-1); |
c81c35df | 2604 | |
e63124bc | 2605 | default: plain: |
c81c35df | 2606 | /* Anything else. Treat as literal. */ |
e63124bc MW |
2607 | rv->i = ch; break; |
2608 | } | |
c81c35df MW |
2609 | |
2610 | /* If we saw an opening quote, then expect the closing quote. */ | |
e63124bc MW |
2611 | if (f&f_quote) { |
2612 | ch = getc(tv->fp); | |
2613 | if (ch != '\'') { rc = tvec_syntax(tv, ch, "`''"); goto end; } | |
2614 | } | |
c81c35df MW |
2615 | |
2616 | /* Done. */ | |
e63124bc MW |
2617 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } |
2618 | rc = 0; | |
2619 | end: | |
2620 | dstr_destroy(&d); | |
2621 | return (rc); | |
2622 | ||
2623 | #undef f_quote | |
2624 | } | |
2625 | ||
c81c35df MW |
2626 | /* --- @dump_char@ --- * |
2627 | * | |
2628 | * Arguments: @const union tvec_regval *rv@ = register value | |
2629 | * @const struct tvec_regdef *rd@ = register definition | |
2630 | * @unsigned style@ = output style (@TVSF_...@) | |
2631 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
2632 | * | |
2633 | * Returns: --- | |
2634 | * | |
2635 | * Use: Dump a register value to the format output. | |
2636 | * | |
2637 | * Character values are dumped as their symbolic names, if any, | |
2638 | * or as a character or escape sequence within single quotes | |
2639 | * (which may be omitted in compact style). If compact output | |
2640 | * is not requested, then the single-quoted representation (for | |
2641 | * characters dumped as symbolic names) and integer code in | |
2642 | * decimal and hex are printed as a comment. | |
2643 | */ | |
2644 | ||
e63124bc MW |
2645 | static void dump_char(const union tvec_regval *rv, |
2646 | const struct tvec_regdef *rd, | |
2647 | unsigned style, | |
2648 | const struct gprintf_ops *gops, void *go) | |
2649 | { | |
67b5031e MW |
2650 | const char *p; |
2651 | unsigned f = 0; | |
2652 | #define f_semi 1u | |
2653 | ||
c81c35df | 2654 | /* Print a character name if we can find one. */ |
67b5031e MW |
2655 | p = find_charname(rv->i, (style&TVSF_COMPACT) ? CTF_SHORT : CTF_PREFER); |
2656 | if (p) { | |
2657 | gprintf(gops, go, "%s", p); | |
2658 | if (style&TVSF_COMPACT) return; | |
2659 | else { gprintf(gops, go, " ;"); f |= f_semi; } | |
2660 | } | |
2661 | ||
c81c35df MW |
2662 | /* If the character isn't @EOF@ then print it as a single-quoted thing. |
2663 | * In compact style, see if we can omit the quotes. | |
2664 | */ | |
67b5031e MW |
2665 | if (rv->i >= 0) { |
2666 | if (f&f_semi) gprintf(gops, go, " = "); | |
2667 | switch (rv->i) { | |
2668 | case ' ': case '\\': case '\'': quote: | |
2669 | format_char(gops, go, rv->i); | |
2670 | break; | |
2671 | default: | |
2672 | if (!(style&TVSF_COMPACT) || !isprint(rv->i)) goto quote; | |
2673 | gprintf(gops, go, "%c", (int)rv->i); | |
2674 | return; | |
2675 | } | |
2676 | } | |
e63124bc | 2677 | |
c81c35df | 2678 | /* And the character code as an integer. */ |
e63124bc | 2679 | if (!(style&TVSF_COMPACT)) { |
67b5031e MW |
2680 | if (!(f&f_semi)) gprintf(gops, go, " ;"); |
2681 | gprintf(gops, go, " = %ld = ", rv->i); | |
3efcfd2d | 2682 | format_signed_hex(gops, go, rv->i); |
e63124bc | 2683 | } |
67b5031e MW |
2684 | |
2685 | #undef f_semi | |
e63124bc MW |
2686 | } |
2687 | ||
c81c35df | 2688 | /* Character type definition. */ |
e63124bc | 2689 | const struct tvec_regty tvty_char = { |
3efcfd2d | 2690 | init_int, trivial_release, eq_int, |
e63124bc MW |
2691 | tobuf_char, frombuf_char, |
2692 | parse_char, dump_char | |
2693 | }; | |
2694 | ||
67b5031e MW |
2695 | /* --- @tvec_claimeq_char@ --- * |
2696 | * | |
2697 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
2698 | * @int ch0, ch1@ = two character codes | |
2699 | * @const char *file@, @unsigned @lno@ = calling file and line | |
2700 | * @const char *expr@ = the expression to quote on failure | |
2701 | * | |
2702 | * Returns: Nonzero if @ch0@ and @ch1@ are equal, otherwise zero. | |
2703 | * | |
2704 | * Use: Check that values of @ch0@ and @ch1@ are equal. As for | |
2705 | * @tvec_claim@ above, a test case is automatically begun and | |
2706 | * ended if none is already underway. If the values are | |
2707 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
2708 | * mismatched values are dumped: @ch0@ is printed as the output | |
2709 | * value and @ch1@ is printed as the input reference. | |
2710 | */ | |
2711 | ||
e63124bc MW |
2712 | int tvec_claimeq_char(struct tvec_state *tv, int c0, int c1, |
2713 | const char *file, unsigned lno, const char *expr) | |
2714 | { | |
3efcfd2d | 2715 | tv->out[0].v.i = c0; tv->in[0].v.i = c1; |
e63124bc MW |
2716 | return (tvec_claimeq(tv, &tvty_char, 0, file, lno, expr)); |
2717 | } | |
2718 | ||
b64eb60f MW |
2719 | /*----- Text and byte strings ---------------------------------------------*/ |
2720 | ||
c81c35df MW |
2721 | /* --- @init_text@, @init_bytes@ --- * |
2722 | * | |
2723 | * Arguments: @union tvec_regval *rv@ = register value | |
2724 | * @const struct tvec_regdef *rd@ = register definition | |
2725 | * | |
2726 | * Returns: --- | |
2727 | * | |
2728 | * Use: Initialize a register value. | |
2729 | * | |
2730 | * Text and binary string values are initialized with a null | |
2731 | * pointer and zero length. | |
2732 | */ | |
2733 | ||
2734 | static void init_text(union tvec_regval *rv, const struct tvec_regdef *rd) | |
2735 | { rv->text.p = 0; rv->text.sz = 0; } | |
b64eb60f MW |
2736 | |
2737 | static void init_bytes(union tvec_regval *rv, const struct tvec_regdef *rd) | |
2738 | { rv->bytes.p = 0; rv->bytes.sz = 0; } | |
2739 | ||
c81c35df MW |
2740 | /* --- @release_string@, @release_bytes@ --- * |
2741 | * | |
2742 | * Arguments: @const union tvec_regval *rv@ = register value | |
2743 | * @const struct tvec_regdef *rd@ = register definition | |
2744 | * | |
2745 | * Returns: --- | |
2746 | * | |
2747 | * Use: Release resources held by a register value. | |
2748 | * | |
2749 | * Text and binary string buffers are freed. | |
2750 | */ | |
2751 | ||
2752 | static void release_text(union tvec_regval *rv, | |
2753 | const struct tvec_regdef *rd) | |
2754 | { xfree(rv->text.p); } | |
b64eb60f MW |
2755 | |
2756 | static void release_bytes(union tvec_regval *rv, | |
2757 | const struct tvec_regdef *rd) | |
2758 | { xfree(rv->bytes.p); } | |
2759 | ||
c81c35df MW |
2760 | /* --- @eq_text@, @eq_bytes@ --- * |
2761 | * | |
2762 | * Arguments: @const union tvec_regval *rv0, *rv1@ = register values | |
2763 | * @const struct tvec_regdef *rd@ = register definition | |
2764 | * | |
2765 | * Returns: Nonzero if the values are equal, zero if unequal | |
2766 | * | |
2767 | * Use: Compare register values for equality. | |
2768 | */ | |
2769 | ||
2770 | static int eq_text(const union tvec_regval *rv0, | |
2771 | const union tvec_regval *rv1, | |
2772 | const struct tvec_regdef *rd) | |
b64eb60f | 2773 | { |
c81c35df MW |
2774 | return (rv0->text.sz == rv1->text.sz && |
2775 | (!rv0->text.sz || | |
2776 | MEMCMP(rv0->text.p, ==, rv1->text.p, rv1->text.sz))); | |
b64eb60f MW |
2777 | } |
2778 | ||
2779 | static int eq_bytes(const union tvec_regval *rv0, | |
2780 | const union tvec_regval *rv1, | |
2781 | const struct tvec_regdef *rd) | |
2782 | { | |
2783 | return (rv0->bytes.sz == rv1->bytes.sz && | |
2784 | (!rv0->bytes.sz || | |
2785 | MEMCMP(rv0->bytes.p, ==, rv1->bytes.p, rv1->bytes.sz))); | |
2786 | } | |
2787 | ||
c81c35df MW |
2788 | /* --- @tobuf_text@, @tobuf_bytes@ --- * |
2789 | * | |
2790 | * Arguments: @buf *b@ = buffer | |
2791 | * @const union tvec_regval *rv@ = register value | |
2792 | * @const struct tvec_regdef *rd@ = register definition | |
2793 | * | |
2794 | * Returns: Zero on success, %$-1$% on failure. | |
2795 | * | |
2796 | * Use: Serialize a register value to a buffer. | |
2797 | * | |
2798 | * Text and binary string values are serialized as a little- | |
2799 | * endian 64-bit length %$n$% in bytes followed by %$n$% bytes | |
2800 | * of string data. | |
2801 | */ | |
2802 | ||
2803 | static int tobuf_text(buf *b, const union tvec_regval *rv, | |
2804 | const struct tvec_regdef *rd) | |
2805 | { return (buf_putmem64l(b, rv->text.p, rv->text.sz)); } | |
b64eb60f MW |
2806 | |
2807 | static int tobuf_bytes(buf *b, const union tvec_regval *rv, | |
2808 | const struct tvec_regdef *rd) | |
c81c35df | 2809 | { return (buf_putmem64l(b, rv->bytes.p, rv->bytes.sz)); } |
b64eb60f | 2810 | |
c81c35df MW |
2811 | /* --- @frombuf_text@, @frombuf_bytes@ --- * |
2812 | * | |
2813 | * Arguments: @buf *b@ = buffer | |
2814 | * @union tvec_regval *rv@ = register value | |
2815 | * @const struct tvec_regdef *rd@ = register definition | |
2816 | * | |
2817 | * Returns: Zero on success, %$-1$% on failure. | |
2818 | * | |
2819 | * Use: Deserialize a register value from a buffer. | |
2820 | * | |
2821 | * Text and binary string values are serialized as a little- | |
2822 | * endian 64-bit length %$n$% in bytes followed by %$n$% bytes | |
2823 | * of string data. | |
2824 | */ | |
2825 | ||
2826 | static int frombuf_text(buf *b, union tvec_regval *rv, | |
2827 | const struct tvec_regdef *rd) | |
b64eb60f MW |
2828 | { |
2829 | const void *p; | |
2830 | size_t sz; | |
2831 | ||
c81c35df MW |
2832 | p = buf_getmem64l(b, &sz); if (!p) return (-1); |
2833 | tvec_alloctext(rv, sz); memcpy(rv->text.p, p, sz); rv->text.p[sz] = 0; | |
b64eb60f MW |
2834 | return (0); |
2835 | } | |
2836 | ||
2837 | static int frombuf_bytes(buf *b, union tvec_regval *rv, | |
2838 | const struct tvec_regdef *rd) | |
2839 | { | |
2840 | const void *p; | |
2841 | size_t sz; | |
2842 | ||
c81c35df | 2843 | p = buf_getmem64l(b, &sz); if (!p) return (-1); |
b64eb60f MW |
2844 | tvec_allocbytes(rv, sz); memcpy(rv->bytes.p, p, sz); |
2845 | return (0); | |
2846 | } | |
2847 | ||
c81c35df MW |
2848 | /* --- @check_string_length@ --- * |
2849 | * | |
2850 | * Arguments: @size_t sz@ = found string length | |
2851 | * @const struct tvec_urange *ur@ = acceptable range | |
2852 | * @struct tvec_state *tv@ = test-vector state | |
2853 | * | |
2854 | * Returns: Zero on success, %$-1$% on error. | |
2855 | * | |
2856 | * Use: Checks that @sz@ is within the bounds described by @ur@, | |
2857 | * reporting an error if not. | |
2858 | */ | |
2859 | ||
882a39c1 MW |
2860 | static int check_string_length(size_t sz, const struct tvec_urange *ur, |
2861 | struct tvec_state *tv) | |
b64eb60f MW |
2862 | { |
2863 | if (ur && (ur->min > sz || sz > ur->max)) | |
882a39c1 | 2864 | return (tvec_error(tv, |
67b5031e | 2865 | "invalid string length %lu; must be in [%lu .. %lu]", |
882a39c1 MW |
2866 | (unsigned long)sz, ur->min, ur->max)); |
2867 | return (0); | |
b64eb60f MW |
2868 | } |
2869 | ||
c81c35df MW |
2870 | /* --- @parse_text@, @parse_bytes@ --- * |
2871 | * | |
2872 | * Arguments: @union tvec_regval *rv@ = register value | |
2873 | * @const struct tvec_regdef *rd@ = register definition | |
2874 | * @struct tvec_state *tv@ = test-vector state | |
2875 | * | |
2876 | * Returns: Zero on success, %$-1$% on error. | |
2877 | * | |
2878 | * Use: Parse a register value from an input file. | |
2879 | * | |
2880 | * The input format for both kinds of strings is basically the | |
2881 | * same: a `compound string', consisting of | |
2882 | * | |
2883 | * * single-quoted strings, which are interpreted entirely | |
2884 | * literally, but can't contain single quotes or newlines; | |
2885 | * | |
2886 | * * double-quoted strings, in which `%|\|%'-escapes are | |
2887 | * interpreted as for characters; | |
2888 | * | |
2889 | * * character names, marked by an initial `%|#|%' sign; | |
2890 | * | |
2891 | * * special tokens marked by an initial `%|!|%' sign; or | |
2892 | * | |
2893 | * * barewords interpreted according to the current coding | |
2894 | * scheme. | |
2895 | * | |
2896 | * The special tokens are | |
2897 | * | |
2898 | * * `%|!bare|%', which causes subsequent sequences of | |
2899 | * barewords to be treated as plain text; | |
2900 | * | |
2901 | * * `%|!hex|%', `%|!base32|%', `%|!base64|%', which cause | |
2902 | * subsequent barewords to be decoded in the requested | |
2903 | * manner. | |
2904 | * | |
2905 | * * `%|!repeat|% %$n$% %|{|% %%\textit{string}%% %|}|%', | |
2906 | * which includes %$n$% copies of the (compound) string. | |
2907 | * | |
2908 | * The only difference between text and binary strings is that | |
2909 | * the initial coding scheme is %|bare|% for text strings and | |
2910 | * %|hex|% for binary strings. | |
2911 | */ | |
2912 | ||
2913 | static int parse_text(union tvec_regval *rv, const struct tvec_regdef *rd, | |
2914 | struct tvec_state *tv) | |
b64eb60f | 2915 | { |
c81c35df | 2916 | void *p = rv->text.p; |
b64eb60f | 2917 | |
c81c35df | 2918 | if (read_compound_string(&p, &rv->text.sz, TVCODE_BARE, 0, tv)) |
67b5031e | 2919 | return (-1); |
c81c35df MW |
2920 | rv->text.p = p; |
2921 | if (check_string_length(rv->text.sz, rd->arg.p, tv)) return (-1); | |
882a39c1 | 2922 | return (0); |
b64eb60f MW |
2923 | } |
2924 | ||
882a39c1 MW |
2925 | static int parse_bytes(union tvec_regval *rv, const struct tvec_regdef *rd, |
2926 | struct tvec_state *tv) | |
b64eb60f MW |
2927 | { |
2928 | void *p = rv->bytes.p; | |
2929 | ||
67b5031e MW |
2930 | if (read_compound_string(&p, &rv->bytes.sz, TVCODE_HEX, 0, tv)) |
2931 | return (-1); | |
882a39c1 MW |
2932 | rv->bytes.p = p; |
2933 | if (check_string_length(rv->bytes.sz, rd->arg.p, tv)) return (-1); | |
2934 | return (0); | |
b64eb60f MW |
2935 | } |
2936 | ||
c81c35df MW |
2937 | /* --- @dump_text@, @dump_bytes@ --- * |
2938 | * | |
2939 | * Arguments: @const union tvec_regval *rv@ = register value | |
2940 | * @const struct tvec_regdef *rd@ = register definition | |
2941 | * @unsigned style@ = output style (@TVSF_...@) | |
2942 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
2943 | * | |
2944 | * Returns: --- | |
2945 | * | |
2946 | * Use: Dump a register value to the format output. | |
2947 | * | |
2948 | * Text string values are dumped as plain text, in double quotes | |
2949 | * if necessary, and using backslash escape sequences for | |
2950 | * nonprintable characters. Unless compact output is requested, | |
2951 | * strings consisting of multiple lines are dumped with each | |
2952 | * line of the string on a separate output line. | |
2953 | * | |
2954 | * Binary string values are dumped in hexadecimal. In compact | |
2955 | * style, the output simply consists of a single block of hex | |
2956 | * digits. Otherwise, the dump is a display consisting of | |
2957 | * groups of hex digits, with comments showing the offset (if | |
2958 | * the string is long enough) and the corresponding plain text. | |
2959 | * | |
2960 | * Empty strings are dumped as %|""|%. | |
2961 | */ | |
2962 | ||
2963 | static void dump_text(const union tvec_regval *rv, | |
2964 | const struct tvec_regdef *rd, | |
2965 | unsigned style, | |
2966 | const struct gprintf_ops *gops, void *go) | |
b64eb60f MW |
2967 | { |
2968 | const unsigned char *p, *q, *l; | |
b64eb60f MW |
2969 | unsigned f = 0; |
2970 | #define f_nonword 1u | |
2971 | #define f_newline 2u | |
2972 | ||
c81c35df | 2973 | if (!rv->text.sz) { gprintf(gops, go, "\"\""); return; } |
b64eb60f | 2974 | |
c81c35df | 2975 | p = (const unsigned char *)rv->text.p; l = p + rv->text.sz; |
67b5031e MW |
2976 | switch (*p) { |
2977 | case '!': case '#': case ';': case '"': case '\'': | |
2978 | case '(': case '{': case '[': case ']': case '}': case ')': | |
2979 | f |= f_nonword; break; | |
2980 | } | |
b64eb60f MW |
2981 | for (q = p; q < l; q++) |
2982 | if (*q == '\n' && q != l - 1) f |= f_newline; | |
2983 | else if (!*q || !isgraph(*q) || *q == '\\') f |= f_nonword; | |
e63124bc | 2984 | if (f&f_newline) { gprintf(gops, go, "\n\t"); goto quote; } |
b64eb60f | 2985 | else if (f&f_nonword) goto quote; |
67b5031e | 2986 | |
c81c35df | 2987 | gops->putm(go, (const char *)p, rv->text.sz); |
67b5031e | 2988 | return; |
b64eb60f MW |
2989 | |
2990 | quote: | |
e63124bc | 2991 | gprintf(gops, go, "\""); |
b64eb60f | 2992 | for (q = p; q < l; q++) |
e63124bc MW |
2993 | if (!isprint(*q) || *q == '"') { |
2994 | if (p < q) gops->putm(go, (const char *)p, q - p); | |
67b5031e | 2995 | if (*q != '\n' || (style&TVSF_COMPACT)) |
3efcfd2d | 2996 | format_charesc(gops, go, *q, FCF_BRACE); |
67b5031e MW |
2997 | else { |
2998 | if (q + 1 == l) { gprintf(gops, go, "\\n\""); return; } | |
2999 | else gprintf(gops, go, "\\n\"\n\t\""); | |
3000 | } | |
3001 | p = q + 1; | |
b64eb60f | 3002 | } |
e63124bc MW |
3003 | if (p < q) gops->putm(go, (const char *)p, q - p); |
3004 | gprintf(gops, go, "\""); | |
b64eb60f MW |
3005 | |
3006 | #undef f_nonword | |
3007 | #undef f_newline | |
3008 | } | |
3009 | ||
3010 | static void dump_bytes(const union tvec_regval *rv, | |
3011 | const struct tvec_regdef *rd, | |
e63124bc MW |
3012 | unsigned style, |
3013 | const struct gprintf_ops *gops, void *go) | |
b64eb60f MW |
3014 | { |
3015 | const unsigned char *p = rv->bytes.p, *l = p + rv->bytes.sz; | |
3016 | size_t off, sz = rv->bytes.sz; | |
3017 | unsigned i, n; | |
3018 | int wd; | |
3019 | ||
3020 | if (!sz) { | |
e63124bc | 3021 | gprintf(gops, go, style&TVSF_COMPACT ? "\"\"" : "\"\" ; empty"); |
b64eb60f MW |
3022 | return; |
3023 | } | |
3024 | ||
3025 | if (style&TVSF_COMPACT) { | |
e63124bc | 3026 | while (p < l) gprintf(gops, go, "%02x", *p++); |
b64eb60f MW |
3027 | return; |
3028 | } | |
3029 | ||
e63124bc | 3030 | if (sz > 16) gprintf(gops, go, "\n\t"); |
b64eb60f MW |
3031 | |
3032 | off = 0; wd = hex_width(sz); | |
3033 | while (p < l) { | |
3034 | if (l - p < 16) n = l - p; | |
3035 | else n = 16; | |
3036 | ||
67b5031e | 3037 | for (i = 0; i < n; i++) { |
e63124bc MW |
3038 | if (i < n) gprintf(gops, go, "%02x", p[i]); |
3039 | else gprintf(gops, go, " "); | |
67b5031e | 3040 | if (i < n - 1 && i%4 == 3) gprintf(gops, go, " "); |
b64eb60f | 3041 | } |
e63124bc MW |
3042 | gprintf(gops, go, " ; "); |
3043 | if (sz > 16) gprintf(gops, go, "[%0*lx] ", wd, (unsigned long)off); | |
b64eb60f | 3044 | for (i = 0; i < n; i++) |
e63124bc | 3045 | gprintf(gops, go, "%c", isprint(p[i]) ? p[i] : '.'); |
b64eb60f | 3046 | p += n; off += n; |
e63124bc | 3047 | if (p < l) gprintf(gops, go, "\n\t"); |
b64eb60f MW |
3048 | } |
3049 | } | |
3050 | ||
c81c35df MW |
3051 | /* Text and byte string type definitions. */ |
3052 | const struct tvec_regty tvty_text = { | |
3053 | init_text, release_text, eq_text, | |
3054 | tobuf_text, frombuf_text, | |
3055 | parse_text, dump_text | |
b64eb60f | 3056 | }; |
b64eb60f | 3057 | const struct tvec_regty tvty_bytes = { |
e63124bc | 3058 | init_bytes, release_bytes, eq_bytes, |
b64eb60f MW |
3059 | tobuf_bytes, frombuf_bytes, |
3060 | parse_bytes, dump_bytes | |
3061 | }; | |
3062 | ||
c81c35df | 3063 | /* --- @tvec_claimeq_text@ --- * |
67b5031e MW |
3064 | * |
3065 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
3066 | * @const char *p0@, @size_t sz0@ = first string with length | |
3067 | * @const char *p1@, @size_t sz1@ = second string with length | |
3068 | * @const char *file@, @unsigned @lno@ = calling file and line | |
3069 | * @const char *expr@ = the expression to quote on failure | |
3070 | * | |
3071 | * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise | |
3072 | * zero. | |
3073 | * | |
3074 | * Use: Check that strings at @p0@ and @p1@ are equal. As for | |
3075 | * @tvec_claim@ above, a test case is automatically begun and | |
3076 | * ended if none is already underway. If the values are | |
3077 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
3078 | * mismatched values are dumped: @p0@ is printed as the output | |
3079 | * value and @p1@ is printed as the input reference. | |
3080 | */ | |
3081 | ||
c81c35df MW |
3082 | int tvec_claimeq_text(struct tvec_state *tv, |
3083 | const char *p0, size_t sz0, | |
3084 | const char *p1, size_t sz1, | |
3085 | const char *file, unsigned lno, const char *expr) | |
b64eb60f | 3086 | { |
c81c35df MW |
3087 | tv->out[0].v.text.p = (/*unconst*/ char *)p0; tv->out[0].v.text.sz = sz0; |
3088 | tv->in[0].v.text.p =(/*unconst*/ char *) p1; tv->in[0].v.text.sz = sz1; | |
3089 | return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr)); | |
b64eb60f MW |
3090 | } |
3091 | ||
c81c35df | 3092 | /* --- @tvec_claimeq_textz@ --- * |
67b5031e MW |
3093 | * |
3094 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
3095 | * @const char *p0, *p1@ = two strings to compare | |
3096 | * @const char *file@, @unsigned @lno@ = calling file and line | |
3097 | * @const char *expr@ = the expression to quote on failure | |
3098 | * | |
3099 | * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise | |
3100 | * zero. | |
3101 | * | |
3102 | * Use: Check that strings at @p0@ and @p1@ are equal, as for | |
3103 | * @tvec_claimeq_string@, except that the strings are assumed | |
3104 | * null-terminated, so their lengths don't need to be supplied | |
3105 | * explicitly. | |
3106 | */ | |
3107 | ||
c81c35df MW |
3108 | int tvec_claimeq_textz(struct tvec_state *tv, |
3109 | const char *p0, const char *p1, | |
3110 | const char *file, unsigned lno, const char *expr) | |
b64eb60f | 3111 | { |
c81c35df MW |
3112 | tv->out[0].v.text.p = (/*unconst*/ char *)p0; |
3113 | tv->out[0].v.text.sz = strlen(p0); | |
3114 | tv->in[0].v.text.p = (/*unconst*/ char *)p1; | |
3115 | tv->in[0].v.text.sz = strlen(p1); | |
3116 | return (tvec_claimeq(tv, &tvty_text, 0, file, lno, expr)); | |
b64eb60f MW |
3117 | } |
3118 | ||
67b5031e MW |
3119 | /* --- @tvec_claimeq_bytes@ --- * |
3120 | * | |
3121 | * Arguments: @struct tvec_state *tv@ = test-vector state | |
3122 | * @const void *p0@, @size_t sz0@ = first string with length | |
3123 | * @const void *p1@, @size_t sz1@ = second string with length | |
3124 | * @const char *file@, @unsigned @lno@ = calling file and line | |
3125 | * @const char *expr@ = the expression to quote on failure | |
3126 | * | |
3127 | * Returns: Nonzero if the strings at @p0@ and @p1@ are equal, otherwise | |
3128 | * zero. | |
3129 | * | |
3130 | * Use: Check that binary strings at @p0@ and @p1@ are equal. As for | |
3131 | * @tvec_claim@ above, a test case is automatically begun and | |
3132 | * ended if none is already underway. If the values are | |
3133 | * unequal, then @tvec_fail@ is called, quoting @expr@, and the | |
3134 | * mismatched values are dumped: @p0@ is printed as the output | |
3135 | * value and @p1@ is printed as the input reference. | |
3136 | */ | |
3137 | ||
b64eb60f MW |
3138 | int tvec_claimeq_bytes(struct tvec_state *tv, |
3139 | const void *p0, size_t sz0, | |
3140 | const void *p1, size_t sz1, | |
3141 | const char *file, unsigned lno, const char *expr) | |
3142 | { | |
3efcfd2d MW |
3143 | tv->out[0].v.bytes.p = (/*unconst*/ void *)p0; |
3144 | tv->out[0].v.bytes.sz = sz0; | |
3145 | tv->in[0].v.bytes.p = (/*unconst*/ void *)p1; | |
3146 | tv->in[0].v.bytes.sz = sz1; | |
b64eb60f MW |
3147 | return (tvec_claimeq(tv, &tvty_bytes, 0, file, lno, expr)); |
3148 | } | |
3149 | ||
c81c35df | 3150 | /* --- @tvec_alloctext@, @tvec_allocbytes@ --- * |
67b5031e MW |
3151 | * |
3152 | * Arguments: @union tvec_regval *rv@ = register value | |
3153 | * @size_t sz@ = required size | |
3154 | * | |
3155 | * Returns: --- | |
3156 | * | |
3157 | * Use: Allocated space in a text or binary string register. If the | |
3158 | * current register size is sufficient, its buffer is left | |
3159 | * alone; otherwise, the old buffer, if any, is freed and a | |
3160 | * fresh buffer allocated. These functions are not intended to | |
3161 | * be used to adjust a buffer repeatedly, e.g., while building | |
3162 | * output incrementally: (a) they will perform badly, and (b) | |
3163 | * the old buffer contents are simply discarded if reallocation | |
3164 | * is necessary. Instead, use a @dbuf@ or @dstr@. | |
3165 | * | |
c81c35df | 3166 | * The @tvec_alloctext@ function sneakily allocates an extra |
67b5031e MW |
3167 | * byte for a terminating zero. The @tvec_allocbytes@ function |
3168 | * doesn't do this. | |
3169 | */ | |
3170 | ||
c81c35df | 3171 | void tvec_alloctext(union tvec_regval *rv, size_t sz) |
67b5031e | 3172 | { |
c81c35df MW |
3173 | if (rv->text.sz <= sz) { xfree(rv->text.p); rv->text.p = xmalloc(sz + 1); } |
3174 | rv->text.sz = sz; | |
67b5031e MW |
3175 | } |
3176 | ||
3177 | void tvec_allocbytes(union tvec_regval *rv, size_t sz) | |
3178 | { | |
3179 | if (rv->bytes.sz < sz) { xfree(rv->bytes.p); rv->bytes.p = xmalloc(sz); } | |
3180 | rv->bytes.sz = sz; | |
3181 | } | |
3182 | ||
b64eb60f MW |
3183 | /*----- Buffer type -------------------------------------------------------*/ |
3184 | ||
c81c35df MW |
3185 | /* Buffers are initialized and released as binary strings. */ |
3186 | ||
3187 | /* --- @eq_buffer@ --- * | |
3188 | * | |
3189 | * Arguments: @const union tvec_regval *rv0, *rv1@ = register values | |
3190 | * @const struct tvec_regdef *rd@ = register definition | |
3191 | * | |
3192 | * Returns: Nonzero if the values are equal, zero if unequal | |
3193 | * | |
3194 | * Use: Compare register values for equality. | |
3195 | * | |
3196 | * Buffer values are equal if and only if their sizes are equal; | |
3197 | * their contents are %%\emph{not}%% compared. | |
3198 | */ | |
3199 | ||
b64eb60f MW |
3200 | static int eq_buffer(const union tvec_regval *rv0, |
3201 | const union tvec_regval *rv1, | |
3202 | const struct tvec_regdef *rd) | |
3203 | { return (rv0->bytes.sz == rv1->bytes.sz); } | |
3204 | ||
c81c35df MW |
3205 | /* --- @tobuf_buffer@ --- * |
3206 | * | |
3207 | * Arguments: @buf *b@ = buffer | |
3208 | * @const union tvec_regval *rv@ = register value | |
3209 | * @const struct tvec_regdef *rd@ = register definition | |
3210 | * | |
3211 | * Returns: Zero on success, %$-1$% on failure. | |
3212 | * | |
3213 | * Use: Serialize a register value to a buffer. | |
3214 | * | |
3215 | * Buffer values are serialized as just their lengths, as | |
3216 | * unsigned integers. | |
3217 | */ | |
3218 | ||
b64eb60f MW |
3219 | static int tobuf_buffer(buf *b, const union tvec_regval *rv, |
3220 | const struct tvec_regdef *rd) | |
3221 | { return (unsigned_to_buf(b, rv->bytes.sz)); } | |
3222 | ||
c81c35df MW |
3223 | /* --- @allocate_buffer@ --- * |
3224 | * | |
3225 | * Arguments: @union tvec_regval *rv@ = register value | |
3226 | * @size_t sz@ = size to allocate | |
3227 | * | |
3228 | * Returns: --- | |
3229 | * | |
3230 | * Use: Allocate @sz@ bytes to the buffer and fill the space with a | |
3231 | * distinctive pattern. | |
3232 | */ | |
3233 | ||
3234 | static void allocate_buffer(union tvec_regval *rv, size_t sz) | |
3235 | { tvec_allocbytes(rv, sz); memset(rv->bytes.p, '?', sz); } | |
3236 | ||
3237 | /* --- @frombuf_buffer@ --- * | |
3238 | * | |
3239 | * Arguments: @buf *b@ = buffer | |
3240 | * @union tvec_regval *rv@ = register value | |
3241 | * @const struct tvec_regdef *rd@ = register definition | |
3242 | * | |
3243 | * Returns: Zero on success, %$-1$% on failure. | |
3244 | * | |
3245 | * Use: Deserialize a register value from a buffer. | |
3246 | * | |
3247 | * Buffer values are serialized as just their lengths, as | |
3248 | * unsigned integers. The buffer is allocated on | |
3249 | * deserialization and filled with a distinctive pattern. | |
3250 | */ | |
3251 | ||
b64eb60f MW |
3252 | static int frombuf_buffer(buf *b, union tvec_regval *rv, |
3253 | const struct tvec_regdef *rd) | |
3254 | { | |
3255 | unsigned long u; | |
3256 | ||
3257 | if (unsigned_from_buf(b, &u)) return (-1); | |
3258 | if (u > (size_t)-1) return (-1); | |
c81c35df | 3259 | allocate_buffer(rv, u); |
b64eb60f MW |
3260 | return (0); |
3261 | } | |
3262 | ||
c81c35df MW |
3263 | /* --- @parse_buffer@ --- * |
3264 | * | |
3265 | * Arguments: @union tvec_regval *rv@ = register value | |
3266 | * @const struct tvec_regdef *rd@ = register definition | |
3267 | * @struct tvec_state *tv@ = test-vector state | |
3268 | * | |
3269 | * Returns: Zero on success, %$-1$% on error. | |
3270 | * | |
3271 | * Use: Parse a register value from an input file. | |
3272 | * | |
3273 | * The input format for a buffer value consists of an unsigned | |
3274 | * integer followed by an optional unit specifier consisting of | |
3275 | * an SI unit prefix and (optionally) the letter `B'. Unit | |
3276 | * prefixes denote %%\emph{binary}%% multipliers, not decimal. | |
3277 | * | |
3278 | * The buffer is allocated and filled with a distinctive | |
3279 | * pattern. | |
3280 | */ | |
3281 | ||
b64eb60f MW |
3282 | static const char units[] = "kMGTPEZY"; |
3283 | ||
882a39c1 MW |
3284 | static int parse_buffer(union tvec_regval *rv, |
3285 | const struct tvec_regdef *rd, | |
3286 | struct tvec_state *tv) | |
b64eb60f MW |
3287 | { |
3288 | dstr d = DSTR_INIT; | |
3efcfd2d | 3289 | const char *q, *unit; |
b64eb60f MW |
3290 | size_t pos; |
3291 | unsigned long u, t; | |
882a39c1 | 3292 | int rc; |
b64eb60f MW |
3293 | unsigned f = 0; |
3294 | #define f_range 1u | |
3295 | ||
882a39c1 | 3296 | if (tvec_readword(tv, &d, ";", "buffer length")) { rc = -1; goto end; } |
3efcfd2d | 3297 | if (parse_unsigned_integer(&u, &q, d.buf)) goto bad; |
b64eb60f MW |
3298 | if (!*q) { |
3299 | tvec_skipspc(tv); pos = d.len; | |
3300 | if (!tvec_readword(tv, &d, ";", 0)) pos++; | |
3301 | q = d.buf + pos; | |
3302 | } | |
3303 | ||
3304 | if (u > (size_t)-1) goto rangerr; | |
3305 | for (t = u, unit = units; *unit; unit++) { | |
3306 | if (t > (size_t)-1/1024) f |= f_range; | |
3307 | else t *= 1024; | |
67b5031e | 3308 | if (*q == *unit) { |
b64eb60f | 3309 | if (f&f_range) goto rangerr; |
67b5031e | 3310 | u = t; q++; break; |
b64eb60f MW |
3311 | } |
3312 | } | |
67b5031e MW |
3313 | if (*q == 'B') q++; |
3314 | if (*q) goto bad; | |
882a39c1 | 3315 | if (check_string_length(u, rd->arg.p, tv)) { rc = -1; goto end; } |
b64eb60f | 3316 | |
882a39c1 | 3317 | if (tvec_flushtoeol(tv, 0)) { rc = -1; goto end; } |
c81c35df | 3318 | allocate_buffer(rv, u); |
882a39c1 MW |
3319 | rc = 0; |
3320 | end: | |
3321 | DDESTROY(&d); return (rc); | |
b64eb60f MW |
3322 | |
3323 | bad: | |
3324 | tvec_error(tv, "invalid buffer length `%s'", d.buf); | |
882a39c1 | 3325 | rc = -1; goto end; |
b64eb60f MW |
3326 | |
3327 | rangerr: | |
3328 | tvec_error(tv, "buffer length `%s' out of range", d.buf); | |
882a39c1 | 3329 | rc = -1; goto end; |
b64eb60f MW |
3330 | |
3331 | #undef f_range | |
3332 | } | |
3333 | ||
c81c35df MW |
3334 | /* --- @dump_buffer@ --- * |
3335 | * | |
3336 | * Arguments: @const union tvec_regval *rv@ = register value | |
3337 | * @const struct tvec_regdef *rd@ = register definition | |
3338 | * @unsigned style@ = output style (@TVSF_...@) | |
3339 | * @const struct gprintf_ops *gops@, @void *gp@ = format output | |
3340 | * | |
3341 | * Returns: --- | |
3342 | * | |
3343 | * Use: Dump a register value to the format output. | |
3344 | * | |
3345 | * Buffer values are dumped as their size with an appropriate | |
3346 | * unit specifier. A unit prefix is only used if the size is an | |
3347 | * exact multiple of the relevant power of two. | |
3348 | */ | |
3349 | ||
b64eb60f MW |
3350 | static void dump_buffer(const union tvec_regval *rv, |
3351 | const struct tvec_regdef *rd, | |
e63124bc MW |
3352 | unsigned style, |
3353 | const struct gprintf_ops *gops, void *go) | |
b64eb60f MW |
3354 | { |
3355 | const char *unit; | |
3356 | unsigned long u = rv->bytes.sz; | |
3357 | ||
3358 | if (!u || u%1024) | |
e63124bc | 3359 | gprintf(gops, go, "%lu B", u); |
b64eb60f MW |
3360 | else { |
3361 | for (unit = units, u /= 1024; !(u%1024) && unit[1]; u /= 1024, unit++); | |
e63124bc | 3362 | gprintf(gops, go, "%lu %cB", u, *unit); |
b64eb60f MW |
3363 | } |
3364 | } | |
3365 | ||
c81c35df | 3366 | /* Buffer type definition. */ |
b64eb60f | 3367 | const struct tvec_regty tvty_buffer = { |
e63124bc | 3368 | init_bytes, release_bytes, eq_buffer, |
b64eb60f MW |
3369 | tobuf_buffer, frombuf_buffer, |
3370 | parse_buffer, dump_buffer | |
3371 | }; | |
3372 | ||
3373 | /*----- That's all, folks -------------------------------------------------*/ |