@@@ fltfmt mess
[mLib] / utils / gprintf.c
1 /* -*-c-*-
2 *
3 * Generalized string formatting
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include "config.h"
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <stdarg.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #ifdef HAVE_FLOAT_H
41 # include <float.h>
42 #endif
43
44 #ifdef HAVE_STDINT_H
45 # include <stdint.h>
46 #endif
47
48 #include "darray.h"
49 #include "dstr.h"
50 #include "gprintf.h"
51 #include "growbuf.h"
52 #include "macros.h"
53
54 /*----- Tunable constants -------------------------------------------------*/
55
56 /* For each format specifier, at least @STEP@ bytes are ensured before
57 * writing the formatted result.
58 */
59
60 #define STEP 64 /* Buffer size for @vgprintf@ */
61
62 /*----- Preliminary definitions -------------------------------------------*/
63
64 #ifdef HAVE_FLOAT_H
65 # define IF_FLOAT(x) x
66 #else
67 # define IF_FLOAT(x)
68 #endif
69
70 #if defined(LLONG_MAX) || defined(LONG_LONG_MAX)
71 # define IF_LONGLONG(x) x
72 #else
73 # define IF_LONGLONG(x)
74 #endif
75
76 #ifdef INTMAX_MAX
77 # define IF_INTMAX(x) x
78 #else
79 # define IF_INTMAX(x)
80 #endif
81
82 #define OUTPUT_FMTTYPES(_) \
83 _(i, unsigned int) \
84 _(li, unsigned long) \
85 IF_LONGLONG( _(lli, unsigned long long) ) \
86 _(zi, size_t) \
87 _(ti, ptrdiff_t) \
88 IF_INTMAX( _(ji, uintmax_t) ) \
89 _(s, char *) \
90 _(p, void *) \
91 _(f, double) \
92 _(Lf, long double)
93
94 #define PERCENT_N_FMTTYPES(_) \
95 _(n, int *) \
96 _(hhn, char *) \
97 _(hn, short *) \
98 _(ln, long *) \
99 _(zn, size_t *) \
100 _(tn, ptrdiff_t *) \
101 IF_LONGLONG( _(lln, long long *) ) \
102 IF_INTMAX( _(jn, intmax_t *) )
103
104 #define FMTTYPES(_) \
105 OUTPUT_FMTTYPES(_) \
106 PERCENT_N_FMTTYPES(_)
107
108 enum {
109 fmt_unset = 0,
110 #define CODE(code, ty) fmt_##code,
111 FMTTYPES(CODE)
112 #undef CODE
113 fmt__limit
114 };
115
116 struct fmtarg {
117 int fmt;
118 union {
119 #define MEMB(code, ty) ty code;
120 FMTTYPES(MEMB)
121 #undef MEMB
122 } u;
123 };
124
125 DA_DECL(fmtarg_v, struct fmtarg);
126
127 enum {
128 len_std = 0,
129 len_hh,
130 len_h,
131 len_l,
132 len_ll,
133 len_z,
134 len_t,
135 len_j,
136 len_L
137 };
138
139 #define f_len 0x000fu
140 #define f_wd 0x0010u
141 #define f_wdarg 0x0020u
142 #define f_prec 0x0040u
143 #define f_precarg 0x0080u
144 #define f_plus 0x0100u
145 #define f_minus 0x0200u
146 #define f_sharp 0x0400u
147 #define f_zero 0x0800u
148 #define f_posarg 0x1000u
149
150 struct fmtspec {
151 const char *p;
152 size_t n;
153 unsigned f;
154 int fmt, ch;
155 int wd, prec;
156 int arg;
157 };
158
159 DA_DECL(fmtspec_v, struct fmtspec);
160
161 /*----- Main code ---------------------------------------------------------*/
162
163 /* --- @vgprintf@ --- *
164 *
165 * Arguments: @const struct gprintf_ops *ops@ = output operations
166 * @void *out@ = context for output operations
167 * @const char *p@ = pointer to @printf@-style format string
168 * @va_list *ap@ = argument handle
169 *
170 * Returns: The number of characters written to the string.
171 *
172 * Use: As for @gprintf@, but takes a reified argument tail.
173 */
174
175 static void set_arg(fmtarg_v *av, size_t i, int fmt)
176 {
177 size_t j, n;
178
179 n = DA_LEN(av);
180 if (i >= n) {
181 DA_ENSURE(av, i + 1 - n);
182 for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
183 DA_UNSAFE_EXTEND(av, i + 1 - n);
184 }
185
186 if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
187 else assert(DA(av)[i].fmt == fmt);
188 }
189
190 int vgprintf(const struct gprintf_ops *ops, void *out,
191 const char *p, va_list *ap)
192 {
193 size_t sz, mx, n;
194 dstr dd = DSTR_INIT;
195 fmtspec_v sv = DA_INIT;
196 fmtarg_v av = DA_INIT;
197 struct fmtarg *fa, *fal;
198 struct fmtspec *fs, *fsl;
199 unsigned f;
200 int i, anext, tot = 0;
201 int wd, prec;
202
203 /* --- Initial pass through the input, parsing format specifiers --- *
204 *
205 * We essentially compile the format string into a vector of @fmtspec@
206 * objects, each of which represents a chunk of literal text followed by a
207 * (possibly imaginary, in the case of the final one) formatting directive.
208 * Output then simply consists of interpreting these specifiers in order.
209 */
210
211 anext = 0;
212
213 while (*p) {
214 f = 0;
215 DA_ENSURE(&sv, 1);
216 fs = &DA(&sv)[DA_LEN(&sv)];
217 DA_UNSAFE_EXTEND(&sv, 1);
218
219 /* --- Find the end of this literal portion --- */
220
221 fs->p = p;
222 while (*p && *p != '%') p++;
223 fs->n = p - fs->p;
224
225 /* --- Some simple cases --- *
226 *
227 * We might have reached the end of the string, or maybe a `%%' escape.
228 */
229
230 if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
231 p++;
232 if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
233
234 /* --- Pick up initial flags --- */
235
236 flags:
237 for (;;) {
238 switch (*p) {
239 case '+': f |= f_plus; break;
240 case '-': f |= f_minus; break;
241 case '#': f |= f_sharp; break;
242 case '0': f |= f_zero; break;
243 default: goto done_flags;
244 }
245 p++;
246 }
247
248 /* --- Pick up the field width --- */
249
250 done_flags:
251 i = 0;
252 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
253
254 /* --- Snag: this might have been an argument position indicator --- */
255
256 if (i && *p == '$' && (!f || f == f_zero)) {
257 f |= f_posarg;
258 fs->arg = i - 1;
259 p++;
260 goto flags;
261 }
262
263 /* --- Set the field width --- *
264 *
265 * If @i@ is nonzero here then we have a numeric field width. Otherwise
266 * it might be `*', maybe with an explicit argument number.
267 */
268
269 if (i) {
270 f |= f_wd;
271 fs->wd = i;
272 } else if (*p == '*') {
273 p++;
274 if (!ISDIGIT(*p))
275 i = anext++;
276 else {
277 i = *p++ - '0';
278 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
279 assert(*p == '$'); p++;
280 assert(i > 0); i--;
281 }
282 f |= f_wd | f_wdarg;
283 set_arg(&av, i, fmt_i); fs->wd = i;
284 }
285
286 /* --- Maybe we have a precision spec --- */
287
288 if (*p == '.') {
289 p++;
290 f |= f_prec;
291 if (ISDIGIT(*p)) {
292 i = *p++ - '0';
293 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
294 fs->prec = i;
295 } else if (*p != '*')
296 fs->prec = 0;
297 else {
298 p++;
299 if (!ISDIGIT(*p))
300 i = anext++;
301 else {
302 i = *p++ - '0';
303 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
304 assert(*p == '$'); p++;
305 assert(i > 0); i--;
306 }
307 f |= f_precarg;
308 set_arg(&av, i, fmt_i); fs->prec = i;
309 }
310 }
311
312 /* --- Maybe some length flags --- */
313
314 switch (*p) {
315 case 'h':
316 p++;
317 if (*p == 'h') { f |= len_hh; p++; } else f |= len_h;
318 break;
319 case 'l':
320 p++;
321 IF_LONGLONG( if (*p == 'l') { f |= len_ll; p++; } else ) f |= len_l;
322 break;
323 case 'L': f |= len_L; p++; break;
324 case 'z': f |= len_z; p++; break;
325 case 't': f |= len_t; p++; break;
326 IF_INTMAX( case 'j': f |= len_j; p++; break; )
327 }
328
329 /* --- The flags are now ready --- */
330
331 fs->f = f;
332
333 /* --- At the end, an actual directive --- */
334
335 fs->ch = *p;
336 switch (*p++) {
337 case '%':
338 fs->fmt = fmt_unset;
339 break;
340 case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
341 switch (f&f_len) {
342 case len_l: fs->fmt = fmt_li; break;
343 case len_z: fs->fmt = fmt_zi; break;
344 case len_t: fs->fmt = fmt_ti; break;
345 IF_LONGLONG( case len_ll: fs->fmt = fmt_lli; break; )
346 IF_INTMAX( case len_j: fs->fmt = fmt_ji; break; )
347 default: fs->fmt = fmt_i;
348 }
349 break;
350 case 'a': case 'A':
351 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
352 fs->fmt = (f&f_len) == len_L ? fmt_Lf : fmt_f;
353 break;
354 case 'c':
355 fs->fmt = fmt_i;
356 break;
357 case 's':
358 fs->fmt = fmt_s;
359 break;
360 case 'p':
361 fs->fmt = fmt_p;
362 break;
363 case 'n':
364 switch (f&f_len) {
365 case len_hh: fs->fmt = fmt_hhn; break;
366 case len_h: fs->fmt = fmt_hn; break;
367 case len_l: fs->fmt = fmt_ln; break;
368 case len_z: fs->fmt = fmt_zn; break;
369 case len_t: fs->fmt = fmt_tn; break;
370 IF_LONGLONG( case len_ll: fs->fmt = fmt_lln; break; )
371 IF_INTMAX( case len_j: fs->fmt = fmt_jn; break; )
372 default: fs->fmt = fmt_n;
373 }
374 break;
375 default:
376 fprintf(stderr,
377 "FATAL vgprintf: unknown format specifier `%c'\n", p[-1]);
378 abort();
379 }
380
381 /* --- Finally sort out the argument --- *
382 *
383 * If we don't have explicit argument positions then this comes after the
384 * width and precision; and we don't know the type code until we've
385 * parsed the specifier, so this seems the right place to handle it.
386 */
387
388 if (!(f&f_posarg)) fs->arg = anext++;
389 set_arg(&av, fs->arg, fs->fmt);
390 }
391
392 /* --- Quick pass over the argument vector to collect the arguments --- */
393
394 for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
395 switch (fa->fmt) {
396 #define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
397 FMTTYPES(CASE)
398 #undef CASE
399 default: abort();
400 }
401 }
402
403 /* --- Final pass through the format string to produce output --- */
404
405 fa = DA(&av);
406 for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
407 f = fs->f;
408
409 /* --- Output the literal portion --- */
410
411 if (fs->n) {
412 n = ops->putm(out, fs->p, fs->n); if (n < 0) return (-1);
413 tot += n;
414 }
415
416 /* --- And now the variable portion --- */
417
418 if (fs->fmt == fmt_unset) {
419 switch (fs->ch) {
420 case 0:
421 break;
422 case '%':
423 n = ops->putch(out, '%'); if (n < 0) return (-1);
424 tot += n; break;
425 default:
426 abort();
427 }
428 continue;
429 }
430
431 DRESET(&dd);
432 DPUTC(&dd, '%');
433
434 /* --- Resolve the width and precision --- */
435
436 if (!(f&f_wd))
437 wd = 0;
438 else {
439 wd = (fs->f&f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd;
440 if (wd < 0) { wd = -wd; f |= f_minus; }
441 }
442
443 if (!(f&f_prec))
444 prec = 0;
445 else {
446 prec = (fs->f&f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec;
447 if (prec < 0) { prec = 0; f &= ~f_prec; }
448 }
449
450 /* --- Write out the flags, width and precision --- */
451
452 if (f&f_plus) DPUTC(&dd, '+');
453 if (f&f_minus) DPUTC(&dd, '-');
454 if (f&f_sharp) DPUTC(&dd, '#');
455 if (f&f_zero) DPUTC(&dd, '0');
456
457 if (f&f_wd) {
458 DENSURE(&dd, STEP);
459 dd.len += sprintf(dd.buf + dd.len, "%d", wd);
460 }
461
462 if (f&f_prec) {
463 DENSURE(&dd, STEP + 1);
464 dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
465 }
466
467 /* --- Write out the length gadget --- */
468
469 switch (f&f_len) {
470 case len_hh: DPUTC(&dd, 'h'); /* fall through */
471 case len_h: DPUTC(&dd, 'h'); break;
472 IF_LONGLONG( case len_ll: DPUTC(&dd, 'l'); /* fall through */ )
473 case len_l: DPUTC(&dd, 'l'); break;
474 case len_z: DPUTC(&dd, 'z'); break;
475 case len_t: DPUTC(&dd, 't'); break;
476 case len_L: DPUTC(&dd, 'L'); break;
477 IF_INTMAX( case len_j: DPUTC(&dd, 'j'); break; )
478 case len_std: break;
479 default: abort();
480 }
481
482 /* --- And finally the actually important bit --- */
483
484 DPUTC(&dd, fs->ch);
485 DPUTZ(&dd);
486
487 /* --- Make sure we have enough space for the output --- */
488
489 sz = STEP;
490 if (sz < wd) sz = wd;
491 if (sz < prec + 16) sz = prec + 16;
492 switch (fs->ch) {
493 case 'a': case 'A':
494 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
495 #ifdef HAVE_FLOAT_H
496 if (fs->ch == 'f') {
497 mx = ((fs->f&f_len) == len_L ?
498 LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
499 if (sz < mx) sz = mx;
500 }
501 break;
502 #else
503 # define MSG "<no float support>"
504 n = ops->putm(out, MSG, sizeof(MSG) - 1); if (n < 0) return (-1);
505 tot += n; continue;
506 # undef MSG
507 #endif
508 case 's':
509 if (!(f&f_prec)) {
510 n = strlen(fa[fs->arg].u.s);
511 if (sz < n) sz = n;
512 }
513 break;
514 case 'n':
515 switch (fs->fmt) {
516 #define CASE(code, ty) \
517 case fmt_##code: *fa[fs->arg].u.code = tot; break;
518 PERCENT_N_FMTTYPES(CASE)
519 #undef CASE
520 default: abort();
521 }
522 continue;
523 }
524
525 /* --- Finally do the output stage --- */
526
527 switch (fs->fmt) {
528 #define CASE(code, ty) \
529 case fmt_##code: \
530 n = ops->nputf(out, sz, dd.buf, fa[fs->arg].u.code); \
531 break;
532 OUTPUT_FMTTYPES(CASE)
533 #undef CASE
534 default: abort();
535 }
536 if (n < 0) return (-1);
537 tot += n;
538 }
539
540 /* --- We're done --- */
541
542 DDESTROY(&dd);
543 DA_DESTROY(&av);
544 DA_DESTROY(&sv);
545 return (tot);
546 }
547
548 /* --- @gprintf@ --- *
549 *
550 * Arguments: @const struct gprintf_ops *ops@ = output operations
551 * @void *out@ = context for output operations
552 * @const char *p@ = pointer to @printf@-style format string
553 * @...@ = argument handle
554 *
555 * Returns: The number of characters written to the string.
556 *
557 * Use: Formats a @printf@-like message and writes the result using
558 * the given output operations. This is the backend machinery
559 * for @dstr_putf@, for example.
560 */
561
562 int gprintf(const struct gprintf_ops *ops, void *out, const char *p, ...)
563 {
564 va_list ap;
565 int n;
566
567 va_start(ap, p); n = vgprintf(ops, out, p, &ap); va_end(ap);
568 return (n);
569 }
570
571 /*----- Utilities ---------------------------------------------------------*/
572
573 /* --- @gprintf_memputf@ --- *
574 *
575 * Arguments: @arena *a@ = memory allocation arena
576 * @char **buf_inout@ = address of output buffer pointer
577 * @size_t *sz_inout@ = address of buffer size
578 * @size_t maxsz@ = buffer size needed for this operation
579 * @const char *p@ = pointer to format string
580 * @va_list *ap@ = captured format-arguments tail
581 *
582 * Returns: The formatted length.
583 *
584 * Use: Generic utility for mostly implementing the @nputf@ output
585 * function, if you don't have a better option.
586 *
587 * On entry, @*buf_inout@ should be null or a buffer pointer,
588 * with @*sz_inout@ either zero or the buffer's size,
589 * respectively. On exit, @*buf_input@ and @*sz_inout@ will be
590 * updated, if necessary, to describe a sufficiently large
591 * buffer, and the formatted string will have been written to
592 * the buffer.
593 *
594 * When the buffer is no longer required, free it using
595 * @x_free@.
596 */
597
598 size_t gprintf_memputf(arena *a, char **buf_inout, size_t *sz_inout,
599 size_t maxsz, const char *p, va_list ap)
600 {
601 int n;
602
603 GROWBUF_REPLACE(size_t, a, *buf_inout, *sz_inout, maxsz, 64, 1);
604 #ifdef HAVE_SNPRINTF
605 n = vsnprintf(*buf_inout, maxsz + 1, p, ap);
606 #else
607 n = vsprintf(*buf_inout, p, ap);
608 #endif
609 assert(0 <= n && n <= maxsz);
610 return (n);
611 }
612
613 /*----- Standard printers -------------------------------------------------*/
614
615 static int file_putch(void *out, int ch)
616 {
617 FILE *fp = out;
618
619 if (putc(ch, fp) == EOF) return (-1);
620 return (0);
621 }
622
623 static int file_putm(void *out, const char *p, size_t sz)
624 {
625 FILE *fp = out;
626
627 if (fwrite(p, 1, sz, fp) < sz) return (-1);
628 return (0);
629 }
630
631 static int file_nputf(void *out, size_t maxsz, const char *p, ...)
632 {
633 FILE *fp = out;
634 va_list ap;
635 int n;
636
637 va_start(ap, p);
638 n = vfprintf(fp, p, ap);
639 va_end(ap); if (n < 0) return (-1);
640 return (0);
641 }
642
643 const struct gprintf_ops file_printops =
644 { file_putch, file_putm, file_nputf };
645
646 /*----- That's all, folks -------------------------------------------------*/