b575e1f4ef495d148cf6cb16e57c69822b599d8b
[mLib] / utils / gprintf.c
1 /* -*-c-*-
2 *
3 * Generalized string formatting
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include "config.h"
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <stdarg.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #ifdef HAVE_FLOAT_H
41 # include <float.h>
42 #endif
43
44 #ifdef HAVE_STDINT_H
45 # include <stdint.h>
46 #endif
47
48 #include "darray.h"
49 #include "dstr.h"
50 #include "gprintf.h"
51 #include "macros.h"
52
53 /*----- Tunable constants -------------------------------------------------*/
54
55 /* For each format specifier, at least @STEP@ bytes are ensured before
56 * writing the formatted result.
57 */
58
59 #define STEP 64 /* Buffer size for @vgprintf@ */
60
61 /*----- Preliminary definitions -------------------------------------------*/
62
63 #ifdef HAVE_FLOAT_H
64 # define IF_FLOAT(x) x
65 #else
66 # define IF_FLOAT(x)
67 #endif
68
69 #if defined(LLONG_MAX) || defined(LONG_LONG_MAX)
70 # define IF_LONGLONG(x) x
71 #else
72 # define IF_LONGLONG(x)
73 #endif
74
75 #ifdef INTMAX_MAX
76 # define IF_INTMAX(x) x
77 #else
78 # define IF_INTMAX(x)
79 #endif
80
81 #define OUTPUT_FMTTYPES(_) \
82 _(i, unsigned int) \
83 _(li, unsigned long) \
84 IF_LONGLONG( _(lli, unsigned long long) ) \
85 _(zi, size_t) \
86 _(ti, ptrdiff_t) \
87 IF_INTMAX( _(ji, uintmax_t) ) \
88 _(s, char *) \
89 _(p, void *) \
90 _(f, double) \
91 _(Lf, long double)
92
93 #define PERCENT_N_FMTTYPES(_) \
94 _(n, int *) \
95 _(hhn, char *) \
96 _(hn, short *) \
97 _(ln, long *) \
98 _(zn, size_t *) \
99 _(tn, ptrdiff_t *) \
100 IF_LONGLONG( _(lln, long long *) ) \
101 IF_INTMAX( _(jn, intmax_t *) )
102
103 #define FMTTYPES(_) \
104 OUTPUT_FMTTYPES(_) \
105 PERCENT_N_FMTTYPES(_)
106
107 enum {
108 fmt_unset = 0,
109 #define CODE(code, ty) fmt_##code,
110 FMTTYPES(CODE)
111 #undef CODE
112 fmt__limit
113 };
114
115 struct fmtarg {
116 int fmt;
117 union {
118 #define MEMB(code, ty) ty code;
119 FMTTYPES(MEMB)
120 #undef MEMB
121 } u;
122 };
123
124 DA_DECL(fmtarg_v, struct fmtarg);
125
126 enum {
127 len_std = 0,
128 len_hh,
129 len_h,
130 len_l,
131 len_ll,
132 len_z,
133 len_t,
134 len_j,
135 len_L
136 };
137
138 #define f_len 0x000fu
139 #define f_wd 0x0010u
140 #define f_wdarg 0x0020u
141 #define f_prec 0x0040u
142 #define f_precarg 0x0080u
143 #define f_plus 0x0100u
144 #define f_minus 0x0200u
145 #define f_sharp 0x0400u
146 #define f_zero 0x0800u
147 #define f_posarg 0x1000u
148
149 struct fmtspec {
150 const char *p;
151 size_t n;
152 unsigned f;
153 int fmt, ch;
154 int wd, prec;
155 int arg;
156 };
157
158 DA_DECL(fmtspec_v, struct fmtspec);
159
160 /*----- Main code ---------------------------------------------------------*/
161
162 /* --- @vgprintf@ --- *
163 *
164 * Arguments: @const struct gprintf_ops *ops@ = output operations
165 * @void *out@ = context for output operations
166 * @const char *p@ = pointer to @printf@-style format string
167 * @va_list *ap@ = argument handle
168 *
169 * Returns: The number of characters written to the string.
170 *
171 * Use: As for @gprintf@, but takes a reified argument tail.
172 */
173
174 static void set_arg(fmtarg_v *av, size_t i, int fmt)
175 {
176 size_t j, n;
177
178 n = DA_LEN(av);
179 if (i >= n) {
180 DA_ENSURE(av, i + 1 - n);
181 for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
182 DA_UNSAFE_EXTEND(av, i + 1 - n);
183 }
184
185 if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
186 else assert(DA(av)[i].fmt == fmt);
187 }
188
189 int vgprintf(const struct gprintf_ops *ops, void *out,
190 const char *p, va_list *ap)
191 {
192 size_t sz, mx, n;
193 dstr dd = DSTR_INIT;
194 fmtspec_v sv = DA_INIT;
195 fmtarg_v av = DA_INIT;
196 struct fmtarg *fa, *fal;
197 struct fmtspec *fs, *fsl;
198 unsigned f;
199 int i, anext, tot = 0;
200 int wd, prec;
201
202 /* --- Initial pass through the input, parsing format specifiers --- *
203 *
204 * We essentially compile the format string into a vector of @fmtspec@
205 * objects, each of which represents a chunk of literal text followed by a
206 * (possibly imaginary, in the case of the final one) formatting directive.
207 * Output then simply consists of interpreting these specifiers in order.
208 */
209
210 anext = 0;
211
212 while (*p) {
213 f = 0;
214 DA_ENSURE(&sv, 1);
215 fs = &DA(&sv)[DA_LEN(&sv)];
216 DA_UNSAFE_EXTEND(&sv, 1);
217
218 /* --- Find the end of this literal portion --- */
219
220 fs->p = p;
221 while (*p && *p != '%') p++;
222 fs->n = p - fs->p;
223
224 /* --- Some simple cases --- *
225 *
226 * We might have reached the end of the string, or maybe a `%%' escape.
227 */
228
229 if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
230 p++;
231 if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
232
233 /* --- Pick up initial flags --- */
234
235 flags:
236 for (;;) {
237 switch (*p) {
238 case '+': f |= f_plus; break;
239 case '-': f |= f_minus; break;
240 case '#': f |= f_sharp; break;
241 case '0': f |= f_zero; break;
242 default: goto done_flags;
243 }
244 p++;
245 }
246
247 /* --- Pick up the field width --- */
248
249 done_flags:
250 i = 0;
251 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
252
253 /* --- Snag: this might have been an argument position indicator --- */
254
255 if (i && *p == '$' && (!f || f == f_zero)) {
256 f |= f_posarg;
257 fs->arg = i - 1;
258 p++;
259 goto flags;
260 }
261
262 /* --- Set the field width --- *
263 *
264 * If @i@ is nonzero here then we have a numeric field width. Otherwise
265 * it might be `*', maybe with an explicit argument number.
266 */
267
268 if (i) {
269 f |= f_wd;
270 fs->wd = i;
271 } else if (*p == '*') {
272 p++;
273 if (!ISDIGIT(*p))
274 i = anext++;
275 else {
276 i = *p++ - '0';
277 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
278 assert(*p == '$'); p++;
279 assert(i > 0); i--;
280 }
281 f |= f_wd | f_wdarg;
282 set_arg(&av, i, fmt_i); fs->wd = i;
283 }
284
285 /* --- Maybe we have a precision spec --- */
286
287 if (*p == '.') {
288 p++;
289 f |= f_prec;
290 if (ISDIGIT(*p)) {
291 i = *p++ - '0';
292 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
293 fs->prec = i;
294 } else if (*p != '*')
295 fs->prec = 0;
296 else {
297 p++;
298 if (!ISDIGIT(*p))
299 i = anext++;
300 else {
301 i = *p++ - '0';
302 while (ISDIGIT(*p)) i = 10*i + *p++ - '0';
303 assert(*p == '$'); p++;
304 assert(i > 0); i--;
305 }
306 f |= f_precarg;
307 set_arg(&av, i, fmt_i); fs->prec = i;
308 }
309 }
310
311 /* --- Maybe some length flags --- */
312
313 switch (*p) {
314 case 'h':
315 p++;
316 if (*p == 'h') { f |= len_hh; p++; } else f |= len_h;
317 break;
318 case 'l':
319 p++;
320 IF_LONGLONG( if (*p == 'l') { f |= len_ll; p++; } else ) f |= len_l;
321 break;
322 case 'L': f |= len_L; p++; break;
323 case 'z': f |= len_z; p++; break;
324 case 't': f |= len_t; p++; break;
325 IF_INTMAX( case 'j': f |= len_j; p++; break; )
326 }
327
328 /* --- The flags are now ready --- */
329
330 fs->f = f;
331
332 /* --- At the end, an actual directive --- */
333
334 fs->ch = *p;
335 switch (*p++) {
336 case '%':
337 fs->fmt = fmt_unset;
338 break;
339 case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
340 switch (f&f_len) {
341 case len_l: fs->fmt = fmt_li; break;
342 case len_z: fs->fmt = fmt_zi; break;
343 case len_t: fs->fmt = fmt_ti; break;
344 IF_LONGLONG( case len_ll: fs->fmt = fmt_lli; break; )
345 IF_INTMAX( case len_j: fs->fmt = fmt_ji; break; )
346 default: fs->fmt = fmt_i;
347 }
348 break;
349 case 'a': case 'A':
350 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
351 fs->fmt = (f&f_len) == len_L ? fmt_Lf : fmt_f;
352 break;
353 case 'c':
354 fs->fmt = fmt_i;
355 break;
356 case 's':
357 fs->fmt = fmt_s;
358 break;
359 case 'p':
360 fs->fmt = fmt_p;
361 break;
362 case 'n':
363 switch (f&f_len) {
364 case len_hh: fs->fmt = fmt_hhn; break;
365 case len_h: fs->fmt = fmt_hn; break;
366 case len_l: fs->fmt = fmt_ln; break;
367 case len_z: fs->fmt = fmt_zn; break;
368 case len_t: fs->fmt = fmt_tn; break;
369 IF_LONGLONG( case len_ll: fs->fmt = fmt_lln; break; )
370 IF_INTMAX( case len_j: fs->fmt = fmt_jn; break; )
371 default: fs->fmt = fmt_n;
372 }
373 break;
374 default:
375 fprintf(stderr,
376 "FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]);
377 abort();
378 }
379
380 /* --- Finally sort out the argument --- *
381 *
382 * If we don't have explicit argument positions then this comes after the
383 * width and precision; and we don't know the type code until we've
384 * parsed the specifier, so this seems the right place to handle it.
385 */
386
387 if (!(f&f_posarg)) fs->arg = anext++;
388 set_arg(&av, fs->arg, fs->fmt);
389 }
390
391 /* --- Quick pass over the argument vector to collect the arguments --- */
392
393 for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
394 switch (fa->fmt) {
395 #define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
396 FMTTYPES(CASE)
397 #undef CASE
398 default: abort();
399 }
400 }
401
402 /* --- Final pass through the format string to produce output --- */
403
404 fa = DA(&av);
405 for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
406 f = fs->f;
407
408 /* --- Output the literal portion --- */
409
410 if (fs->n) {
411 if (ops->putm(out, fs->p, fs->n)) return (-1);
412 tot += fs->n;
413 }
414
415 /* --- And now the variable portion --- */
416
417 if (fs->fmt == fmt_unset) {
418 switch (fs->ch) {
419 case 0: break;
420 case '%': ops->putch(out, '%'); break;
421 default: abort();
422 }
423 continue;
424 }
425
426 DRESET(&dd);
427 DPUTC(&dd, '%');
428
429 /* --- Resolve the width and precision --- */
430
431 if (!(f&f_wd))
432 wd = 0;
433 else {
434 wd = (fs->f&f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd;
435 if (wd < 0) { wd = -wd; f |= f_minus; }
436 }
437
438 if (!(f&f_prec))
439 prec = 0;
440 else {
441 prec = (fs->f&f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec;
442 if (prec < 0) { prec = 0; f &= ~f_prec; }
443 }
444
445 /* --- Write out the flags, width and precision --- */
446
447 if (f&f_plus) DPUTC(&dd, '+');
448 if (f&f_minus) DPUTC(&dd, '-');
449 if (f&f_sharp) DPUTC(&dd, '#');
450 if (f&f_zero) DPUTC(&dd, '0');
451
452 if (f&f_wd) {
453 DENSURE(&dd, STEP);
454 dd.len += sprintf(dd.buf + dd.len, "%d", wd);
455 }
456
457 if (f&f_prec) {
458 DENSURE(&dd, STEP + 1);
459 dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
460 }
461
462 /* --- Write out the length gadget --- */
463
464 switch (f&f_len) {
465 case len_hh: DPUTC(&dd, 'h'); /* fall through */
466 case len_h: DPUTC(&dd, 'h'); break;
467 IF_LONGLONG( case len_ll: DPUTC(&dd, 'l'); /* fall through */ )
468 case len_l: DPUTC(&dd, 'l'); break;
469 case len_z: DPUTC(&dd, 'z'); break;
470 case len_t: DPUTC(&dd, 't'); break;
471 case len_L: DPUTC(&dd, 'L'); break;
472 IF_INTMAX( case len_j: DPUTC(&dd, 'j'); break; )
473 case len_std: break;
474 default: abort();
475 }
476
477 /* --- And finally the actually important bit --- */
478
479 DPUTC(&dd, fs->ch);
480 DPUTZ(&dd);
481
482 /* --- Make sure we have enough space for the output --- */
483
484 sz = STEP;
485 if (sz < wd) sz = wd;
486 if (sz < prec + 16) sz = prec + 16;
487 switch (fs->ch) {
488 case 'a': case 'A':
489 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
490 #ifdef HAVE_FLOAT_H
491 if (fs->ch == 'f') {
492 mx = ((fs->f&f_len) == len_L ?
493 LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
494 if (sz < mx) sz = mx;
495 }
496 break;
497 #else
498 # define MSG "<no float support>"
499 if (ops->putm(out, MSG, sizeof(MSG) - 1)) return (-1);
500 continue;
501 # undef MSG
502 #endif
503 case 's':
504 if (!(f&f_prec)) {
505 n = strlen(fa[fs->arg].u.s);
506 if (sz < n) sz = n;
507 }
508 break;
509 case 'n':
510 switch (fs->fmt) {
511 #define CASE(code, ty) \
512 case fmt_##code: *fa[fs->arg].u.code = tot; break;
513 PERCENT_N_FMTTYPES(CASE)
514 #undef CASE
515 default: abort();
516 }
517 continue;
518 }
519
520 /* --- Finally do the output stage --- */
521
522 switch (fs->fmt) {
523 #define CASE(code, ty) \
524 case fmt_##code: \
525 i = ops->nputf(out, sz, dd.buf, fa[fs->arg].u.code); \
526 break;
527 OUTPUT_FMTTYPES(CASE)
528 #undef CASE
529 default: abort();
530 }
531 if (i < 0) return (-1);
532 tot += i;
533 }
534
535 /* --- We're done --- */
536
537 DDESTROY(&dd);
538 DA_DESTROY(&av);
539 DA_DESTROY(&sv);
540 return (tot);
541 }
542
543 /* --- @gprintf@ --- *
544 *
545 * Arguments: @const struct gprintf_ops *ops@ = output operations
546 * @void *out@ = context for output operations
547 * @const char *p@ = pointer to @printf@-style format string
548 * @...@ = argument handle
549 *
550 * Returns: The number of characters written to the string.
551 *
552 * Use: Formats a @printf@-like message and writes the result using
553 * the given output operations. This is the backend machinery
554 * for @dstr_putf@, for example.
555 */
556
557 int gprintf(const struct gprintf_ops *ops, void *out, const char *p, ...)
558 {
559 va_list ap;
560 int n;
561
562 va_start(ap, p); n = vgprintf(ops, out, p, &ap); va_end(ap);
563 return (n);
564 }
565
566 /*----- Utilities ---------------------------------------------------------*/
567
568 /* --- @gprintf_memputf@ --- *
569 *
570 * Arguments: @char **buf_inout@ = address of output buffer pointer
571 * @size_t *sz_inout@ = address of buffer size
572 * @size_t maxsz@ = buffer size needed for this operation
573 * @const char *p@ = pointer to format string
574 * @va_list *ap@ = captured format-arguments tail
575 *
576 * Returns: The formatted length.
577 *
578 * Use: Generic utility for mostly implementing the @nputf@ output
579 * function, if you don't have a better option.
580 *
581 * On entry, @*buf_inout@ should be null or a buffer pointer,
582 * with @*sz_inout@ either zero or the buffer's size,
583 * respectively. On exit, @*buf_input@ and @*sz_inout@ will be
584 * updated, if necessary, to describe a sufficiently large
585 * buffer, and the formatted string will have been written to
586 * the buffer.
587 *
588 * When the buffer is no longer required, free it using @xfree@.
589 */
590
591 size_t gprintf_memputf(char **buf_inout, size_t *sz_inout,
592 size_t maxsz, const char *p, va_list ap)
593 {
594 char *buf = *buf_inout;
595 size_t sz = *sz_inout;
596 int n;
597
598 if (sz <= maxsz) {
599 if (!sz) sz = 32;
600 while (sz <= maxsz) sz *= 2;
601 if (buf) xfree(buf);
602 buf = xmalloc(sz); *buf_inout = buf; *sz_inout = sz;
603 }
604
605 #ifdef HAVE_SNPRINTF
606 n = vsnprintf(buf, maxsz + 1, p, ap);
607 #else
608 n = vsprintf(buf, p, ap);
609 #endif
610 assert(0 <= n && n <= maxsz);
611 return (n);
612 }
613
614 /*----- Standard printers -------------------------------------------------*/
615
616 static int file_putch(void *out, int ch)
617 {
618 FILE *fp = out;
619
620 if (putc(ch, fp) == EOF) return (-1);
621 return (0);
622 }
623
624 static int file_putm(void *out, const char *p, size_t sz)
625 {
626 FILE *fp = out;
627
628 if (fwrite(p, 1, sz, fp) < sz) return (-1);
629 return (0);
630 }
631
632 static int file_nputf(void *out, size_t maxsz, const char *p, ...)
633 {
634 FILE *fp = out;
635 va_list ap;
636 int n;
637
638 va_start(ap, p);
639 n = vfprintf(fp, p, ap);
640 va_end(ap); if (n < 0) return (-1);
641 return (0);
642 }
643
644 const struct gprintf_ops file_printops =
645 { file_putch, file_putm, file_nputf };
646
647 /*----- That's all, folks -------------------------------------------------*/