struct/dstr-putf.c: Fix typo in commentary.
[mLib] / struct / dstr-putf.c
1 /* -*-c-*-
2 *
3 * `printf'-style formatting for dynamic strings
4 *
5 * (c) 1999 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include "config.h"
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40
41 #ifdef HAVE_FLOAT_H
42 # include <float.h>
43 #endif
44
45 #ifdef HAVE_STDINT_H
46 # include <stdint.h>
47 #endif
48
49 #include "darray.h"
50 #include "dstr.h"
51
52 /*----- Tunable constants -------------------------------------------------*/
53
54 /*
55 * For each format specifier, at least @PUTFSTEP@ bytes are ensured before
56 * writing the formatted result.
57 */
58
59 #define PUTFSTEP 64 /* Buffer size for @putf@ */
60
61 /*----- Preliminary definitions -------------------------------------------*/
62
63 #ifdef HAVE_FLOAT_H
64 # define IF_FLOAT(x) x
65 #else
66 # define IF_FLOAT(x)
67 #endif
68
69 #if defined(LLONG_MAX) || defined(LONG_LONG_MAX)
70 # define IF_LONGLONG(x) x
71 #else
72 # define IF_LONGLONG(x)
73 #endif
74
75 #ifdef INTMAX_MAX
76 # define IF_INTMAX(x) x
77 #else
78 # define IF_INTMAX(x)
79 #endif
80
81 #define OUTPUT_FMTTYPES(_) \
82 _(i, unsigned int) \
83 _(li, unsigned long) \
84 IF_LONGLONG( _(lli, unsigned long long) ) \
85 _(zi, size_t) \
86 _(ti, ptrdiff_t) \
87 IF_INTMAX( _(ji, uintmax_t) ) \
88 _(s, char *) \
89 _(p, void *) \
90 _(f, double) \
91 _(Lf, long double)
92
93 #define PERCENT_N_FMTTYPES(_) \
94 _(n, int *) \
95 _(hhn, char *) \
96 _(hn, short *) \
97 _(ln, long *) \
98 _(zn, size_t *) \
99 _(tn, ptrdiff_t *) \
100 IF_LONGLONG( _(lln, long long *) ) \
101 IF_INTMAX( _(jn, intmax_t *) )
102
103 #define FMTTYPES(_) \
104 OUTPUT_FMTTYPES(_) \
105 PERCENT_N_FMTTYPES(_)
106
107 enum {
108 fmt_unset = 0,
109 #define CODE(code, ty) fmt_##code,
110 FMTTYPES(CODE)
111 #undef CODE
112 fmt__limit
113 };
114
115 typedef struct {
116 int fmt;
117 union {
118 #define MEMB(code, ty) ty code;
119 FMTTYPES(MEMB)
120 #undef MEMB
121 } u;
122 } fmtarg;
123
124 DA_DECL(fmtarg_v, fmtarg);
125
126 enum {
127 len_std = 0,
128 len_hh,
129 len_h,
130 len_l,
131 len_ll,
132 len_z,
133 len_t,
134 len_j,
135 len_L
136 };
137
138 #define f_len 0x000fu
139 #define f_wd 0x0010u
140 #define f_wdarg 0x0020u
141 #define f_prec 0x0040u
142 #define f_precarg 0x0080u
143 #define f_plus 0x0100u
144 #define f_minus 0x0200u
145 #define f_sharp 0x0400u
146 #define f_zero 0x0800u
147 #define f_posarg 0x1000u
148
149 typedef struct {
150 const char *p;
151 size_t n;
152 unsigned f;
153 int fmt, ch;
154 int wd, prec;
155 int arg;
156 } fmtspec;
157
158 DA_DECL(fmtspec_v, fmtspec);
159
160 /*----- Main code ---------------------------------------------------------*/
161
162 /* --- @dstr_vputf@ --- *
163 *
164 * Arguments: @dstr *d@ = pointer to a dynamic string block
165 * @const char *p@ = pointer to @printf@-style format string
166 * @va_list *ap@ = argument handle
167 *
168 * Returns: The number of characters written to the string.
169 *
170 * Use: As for @dstr_putf@, but may be used as a back-end to user-
171 * supplied functions with @printf@-style interfaces.
172 */
173
174 static void set_arg(fmtarg_v *av, size_t i, int fmt)
175 {
176 size_t j, n;
177
178 n = DA_LEN(av);
179 if (i >= n) {
180 DA_ENSURE(av, i + 1 - n);
181 for (j = n; j <= i; j++) DA(av)[j].fmt = fmt_unset;
182 DA_UNSAFE_EXTEND(av, i + 1 - n);
183 }
184
185 if (DA(av)[i].fmt == fmt_unset) DA(av)[i].fmt = fmt;
186 else assert(DA(av)[i].fmt == fmt);
187 }
188
189 int dstr_vputf(dstr *d, const char *p, va_list *ap)
190 {
191 size_t n = d->len;
192 size_t sz, mx;
193 dstr dd = DSTR_INIT;
194 fmtspec_v sv = DA_INIT;
195 fmtarg_v av = DA_INIT;
196 fmtarg *fa, *fal;
197 fmtspec *fs, *fsl;
198 unsigned f;
199 int i, anext;
200 int wd, prec;
201
202 /* --- Initial pass through the input, parsing format specifiers --- *
203 *
204 * We essentially compile the format string into a vector of @fmtspec@
205 * objects, each of which represents a chunk of literal text followed by a
206 * (possibly imaginary, in the case of the final one) formatting directive.
207 * Output then simply consists of interpreting these specifiers in order.
208 */
209
210 anext = 0;
211
212 while (*p) {
213 f = 0;
214 DA_ENSURE(&sv, 1);
215 fs = &DA(&sv)[DA_LEN(&sv)];
216 DA_UNSAFE_EXTEND(&sv, 1);
217
218 /* --- Find the end of this literal portion --- */
219
220 fs->p = p;
221 while (*p && *p != '%') p++;
222 fs->n = p - fs->p;
223
224 /* --- Some simple cases --- *
225 *
226 * We might have reached the end of the string, or maybe a `%%' escape.
227 */
228
229 if (!*p) { fs->fmt = fmt_unset; fs->ch = 0; break; }
230 p++;
231 if (*p == '%') { fs->fmt = fmt_unset; fs->ch = '%'; p++; continue; }
232
233 /* --- Pick up initial flags --- */
234
235 flags:
236 for (;;) {
237 switch (*p) {
238 case '+': f |= f_plus; break;
239 case '-': f |= f_minus; break;
240 case '#': f |= f_sharp; break;
241 case '0': f |= f_zero; break;
242 default: goto done_flags;
243 }
244 p++;
245 }
246
247 /* --- Pick up the field width --- */
248
249 done_flags:
250 i = 0;
251 while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0';
252
253 /* --- Snag: this might have been an argument position indicator --- */
254
255 if (i && *p == '$' && (!f || f == f_zero)) {
256 f |= f_posarg;
257 fs->arg = i - 1;
258 p++;
259 goto flags;
260 }
261
262 /* --- Set the field width --- *
263 *
264 * If @i@ is nonzero here then we have a numeric field width. Otherwise
265 * it might be `*', maybe with an explicit argument number.
266 */
267
268 if (i) {
269 f |= f_wd;
270 fs->wd = i;
271 } else if (*p == '*') {
272 p++;
273 if (!isdigit((unsigned char)*p))
274 i = anext++;
275 else {
276 i = *p++ - '0';
277 while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0';
278 assert(*p == '$'); p++;
279 assert(i > 0); i--;
280 }
281 f |= f_wd | f_wdarg;
282 set_arg(&av, i, fmt_i); fs->wd = i;
283 }
284
285 /* --- Maybe we have a precision spec --- */
286
287 if (*p == '.') {
288 p++;
289 f |= f_prec;
290 if (isdigit((unsigned char)*p)) {
291 i = *p++ - '0';
292 while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0';
293 fs->prec = i;
294 } else if (*p != '*')
295 fs->prec = 0;
296 else {
297 p++;
298 if (!isdigit((unsigned char)*p))
299 i = anext++;
300 else {
301 i = *p++ - '0';
302 while (isdigit((unsigned char)*p)) i = 10*i + *p++ - '0';
303 assert(*p == '$'); p++;
304 assert(i > 0); i--;
305 }
306 f |= f_precarg;
307 set_arg(&av, i, fmt_i); fs->prec = i;
308 }
309 }
310
311 /* --- Maybe some length flags --- */
312
313 switch (*p) {
314 case 'h':
315 p++;
316 if (*p == 'h') { f |= len_hh; p++; } else f |= len_h;
317 break;
318 case 'l':
319 p++;
320 IF_LONGLONG( if (*p == 'l') { f |= len_ll; p++; } else ) f |= len_l;
321 break;
322 case 'L': f |= len_L; p++; break;
323 case 'z': f |= len_z; p++; break;
324 case 't': f |= len_t; p++; break;
325 IF_INTMAX( case 'j': f |= len_j; p++; break; )
326 }
327
328 /* --- The flags are now ready --- */
329
330 fs->f = f;
331
332 /* --- At the end, an actual directive --- */
333
334 fs->ch = *p;
335 switch (*p++) {
336 case '%':
337 fs->fmt = fmt_unset;
338 break;
339 case 'd': case 'i': case 'x': case 'X': case 'o': case 'u':
340 switch (f & f_len) {
341 case len_l: fs->fmt = fmt_li; break;
342 case len_z: fs->fmt = fmt_zi; break;
343 case len_t: fs->fmt = fmt_ti; break;
344 IF_LONGLONG( case len_ll: fs->fmt = fmt_lli; break; )
345 IF_INTMAX( case len_j: fs->fmt = fmt_ji; break; )
346 default: fs->fmt = fmt_i;
347 }
348 break;
349 case 'a': case 'A':
350 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
351 fs->fmt = (f & f_len) == len_L ? fmt_Lf : fmt_f;
352 break;
353 case 'c':
354 fs->fmt = fmt_i;
355 break;
356 case 's':
357 fs->fmt = fmt_s;
358 break;
359 case 'p':
360 fs->fmt = fmt_p;
361 break;
362 case 'n':
363 switch (f & f_len) {
364 case len_hh: fs->fmt = fmt_hhn; break;
365 case len_h: fs->fmt = fmt_hn; break;
366 case len_l: fs->fmt = fmt_ln; break;
367 case len_z: fs->fmt = fmt_zn; break;
368 case len_t: fs->fmt = fmt_tn; break;
369 IF_LONGLONG( case len_ll: fs->fmt = fmt_lln; break; )
370 IF_INTMAX( case len_j: fs->fmt = fmt_jn; break; )
371 default: fs->fmt = fmt_n;
372 }
373 break;
374 default:
375 fprintf(stderr,
376 "FATAL dstr_vputf: unknown format specifier `%c'\n", p[-1]);
377 abort();
378 }
379
380 /* --- Finally sort out the argument --- *
381 *
382 * If we don't have explicit argument positions then this comes after the
383 * width and precision; and we don't know the type code until we've
384 * parsed the specifier, so this seems the right place to handle it.
385 */
386
387 if (!(f & f_posarg)) fs->arg = anext++;
388 set_arg(&av, fs->arg, fs->fmt);
389 }
390
391 /* --- Quick pass over the argument vector to collect the arguments --- */
392
393 for (fa = DA(&av), fal = fa + DA_LEN(&av); fa < fal; fa++) {
394 switch (fa->fmt) {
395 #define CASE(code, ty) case fmt_##code: fa->u.code = va_arg(*ap, ty); break;
396 FMTTYPES(CASE)
397 #undef CASE
398 default: abort();
399 }
400 }
401
402 /* --- Final pass through the format string to produce output --- */
403
404 fa = DA(&av);
405 for (fs = DA(&sv), fsl = fs + DA_LEN(&sv); fs < fsl; fs++) {
406 f = fs->f;
407
408 /* --- Output the literal portion --- */
409
410 if (fs->n) DPUTM(d, fs->p, fs->n);
411
412 /* --- And now the variable portion --- */
413
414 if (fs->fmt == fmt_unset) {
415 switch (fs->ch) {
416 case 0: break;
417 case '%': DPUTC(d, '%'); break;
418 default: abort();
419 }
420 continue;
421 }
422
423 DRESET(&dd);
424 DPUTC(&dd, '%');
425
426 /* --- Resolve the width and precision --- */
427
428 if (!(f & f_wd))
429 wd = 0;
430 else {
431 wd = (fs->f & f_wdarg) ? *(int *)&fa[fs->wd].u.i : fs->wd;
432 if (wd < 0) { wd = -wd; f |= f_minus; }
433 }
434
435 if (!(f & f_prec))
436 prec = 0;
437 else {
438 prec = (fs->f & f_precarg) ? *(int *)&fa[fs->prec].u.i : fs->prec;
439 if (prec < 0) { prec = 0; f &= ~f_prec; }
440 }
441
442 /* --- Write out the flags, width and precision --- */
443
444 if (f & f_plus) DPUTC(&dd, '+');
445 if (f & f_minus) DPUTC(&dd, '-');
446 if (f & f_sharp) DPUTC(&dd, '#');
447 if (f & f_zero) DPUTC(&dd, '0');
448
449 if (f & f_wd) {
450 DENSURE(&dd, PUTFSTEP);
451 dd.len += sprintf(dd.buf + dd.len, "%d", wd);
452 }
453
454 if (f & f_prec) {
455 DENSURE(&dd, PUTFSTEP + 1);
456 dd.len += sprintf(dd.buf + dd.len, ".%d", prec);
457 }
458
459 /* --- Write out the length gadget --- */
460
461 switch (f & f_len) {
462 case len_hh: DPUTC(&dd, 'h'); /* fall through */
463 case len_h: DPUTC(&dd, 'h'); break;
464 IF_LONGLONG( case len_ll: DPUTC(&dd, 'l'); /* fall through */ )
465 case len_l: DPUTC(&dd, 'l'); break;
466 case len_z: DPUTC(&dd, 'z'); break;
467 case len_t: DPUTC(&dd, 't'); break;
468 case len_L: DPUTC(&dd, 'L'); break;
469 IF_INTMAX( case len_j: DPUTC(&dd, 'j'); break; )
470 case len_std: break;
471 default: abort();
472 }
473
474 /* --- And finally the actually important bit --- */
475
476 DPUTC(&dd, fs->ch);
477 DPUTZ(&dd);
478
479 /* --- Make sure we have enough space for the output --- */
480
481 sz = PUTFSTEP;
482 if (sz < wd) sz = wd;
483 if (sz < prec + 16) sz = prec + 16;
484 switch (fs->ch) {
485 case 'a': case 'A':
486 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
487 #ifdef HAVE_FLOAT_H
488 if (fs->ch == 'f') {
489 mx = ((fs->f & f_len) == len_L ?
490 LDBL_MAX_10_EXP : DBL_MAX_10_EXP) + 16;
491 if (sz < mx) sz = mx;
492 }
493 break;
494 #else
495 DPUTS(d, "<no float support>");
496 continue;
497 #endif
498 case 's':
499 if (!(f & f_prec)) {
500 n = strlen(fa[fs->arg].u.s);
501 if (sz < n) sz = n;
502 }
503 break;
504 case 'n':
505 switch (fs->fmt) {
506 #define CASE(code, ty) \
507 case fmt_##code: *fa[fs->arg].u.code = d->len - n; break;
508 PERCENT_N_FMTTYPES(CASE)
509 #undef CASE
510 default: abort();
511 }
512 continue;
513 }
514
515 /* --- Finally do the output stage --- */
516
517 DENSURE(d, sz + 1);
518 switch (fs->fmt) {
519 #ifdef HAVE_SNPRINTF
520 # define CASE(code, ty) case fmt_##code: \
521 i = snprintf(d->buf + d->len, sz + 1, dd.buf, fa[fs->arg].u.code); \
522 break;
523 #else
524 # define CASE(code, ty) case fmt_##code: \
525 i = sprintf(d->buf + d->len, dd.buf, fa[fs->arg].u.code); \
526 break;
527 #endif
528 OUTPUT_FMTTYPES(CASE)
529 #undef CASE
530 default: abort();
531 }
532 assert(0 <= i && i <= sz); d->len += i;
533 }
534
535 /* --- We're done --- */
536
537 DPUTZ(d);
538 DDESTROY(&dd);
539 DA_DESTROY(&av);
540 DA_DESTROY(&sv);
541 return (d->len - n);
542 }
543
544 /* --- @dstr_putf@ --- *
545 *
546 * Arguments: @dstr *d@ = pointer to a dynamic string block
547 * @const char *p@ = pointer to @printf@-style format string
548 * @...@ = argument handle
549 *
550 * Returns: The number of characters written to the string.
551 *
552 * Use: Writes a piece of text to a dynamic string, doing @printf@-
553 * style substitutions as it goes. Intended to be robust if
554 * faced with malicious arguments, but not if the format string
555 * itself is malicious.
556 */
557
558 int dstr_putf(dstr *d, const char *p, ...)
559 {
560 int n;
561 va_list ap;
562 va_start(ap, p);
563 n = dstr_vputf(d, p, &ap);
564 va_end(ap);
565 return (n);
566 }
567
568 /*----- That's all, folks -------------------------------------------------*/