README.org: Use a more principled hack to make emphasis work.
[runlisp] / lib.c
1 /* -*-c-*-
2 *
3 * Common definitions for `runlisp'
4 *
5 * (c) 2020 Mark Wooding
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
11 *
12 * Runlisp is free software: you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
16 *
17 * Runlisp is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with Runlisp. If not, see <https://www.gnu.org/licenses/>.
24 */
25
26 /*----- Header files ------------------------------------------------------*/
27
28 #include "config.h"
29
30 #include <assert.h>
31
32 #include <ctype.h>
33 #include <errno.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #include <unistd.h>
40
41 #include "lib.h"
42
43 /*----- Diagnostic utilities ----------------------------------------------*/
44
45 const char *progname = "???";
46 /* Our program name, for use in error messages. */
47
48 /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
49 void set_progname(const char *prog)
50 {
51 const char *p;
52
53 p = strrchr(prog, '/');
54 progname = p ? p + 1 : prog;
55 }
56
57 /* Report an error or warning in Unix style, given a captured argument
58 * cursor.
59 */
60 void vmoan(const char *msg, va_list ap)
61 {
62 fprintf(stderr, "%s: ", progname);
63 vfprintf(stderr, msg, ap);
64 fputc('\n', stderr);
65 }
66
67 /* Issue a warning message. */
68 void moan(const char *msg, ...)
69 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
70
71 /* Issue a fatal error message and exit unsuccessfully. */
72 void lose(const char *msg, ...)
73 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
74
75 /*----- Memory allocation -------------------------------------------------*/
76
77 /* Allocate and return a pointer to N bytes, or report a fatal error.
78 *
79 * Release the pointer using `free' as usual. If N is zero, returns null
80 * (but you are not expected to check for this).
81 */
82 void *xmalloc(size_t n)
83 {
84 void *p;
85
86 if (!n) return (0);
87 p = malloc(n); if (!p) lose("failed to allocate memory");
88 return (p);
89 }
90
91 /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
92 *
93 * The block might (and probably will) move, so it returns the new address.
94 * If N is zero, then the block is freed (if necessary) and a null pointer
95 * returned; otherwise, if P is null then a fresh block is allocated. If
96 * allocation fails, then a fatal error is reported.
97 */
98 void *xrealloc(void *p, size_t n)
99 {
100 if (!n) { free(p); return (0); }
101 else if (!p) return (xmalloc(n));
102 p = realloc(p, n); if (!p) lose("failed to allocate memory");
103 return (p);
104 }
105
106 /* Allocate and return a copy of the N-byte string starting at P.
107 *
108 * The new string is null-terminated, though P need not be. If allocation
109 * fails, then a fatal error is reported.
110 */
111 char *xstrndup(const char *p, size_t n)
112 {
113 char *q = xmalloc(n + 1);
114
115 memcpy(q, p, n); q[n] = 0;
116 return (q);
117 }
118
119 /* Allocate and return a copy of the null-terminated string starting at P.
120 *
121 * If allocation fails, then a fatal error is reported.
122 */
123 char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
124
125 /*----- Dynamic strings ---------------------------------------------------*/
126
127 /* Initialize the string D.
128 *
129 * Usually you'd use the static initializer `DSTR_INIT'.
130 */
131 void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
132
133 /* Reset string D so it's empty again. */
134 void dstr_reset(struct dstr *d) { d->len = 0; }
135
136 /* Ensure that D has at least N unused bytes available. */
137 void dstr_ensure(struct dstr *d, size_t n)
138 {
139 size_t need = d->len + n, newsz;
140
141 if (need <= d->sz) return;
142 newsz = d->sz ? 2*d->sz : 16;
143 while (newsz < need) newsz *= 2;
144 d->p = xrealloc(d->p, newsz); d->sz = newsz;
145 }
146
147 /* Release the memory held by D.
148 *
149 * It must be reinitialized (e.g., by `dstr_init') before it can be used
150 * again.
151 */
152 void dstr_release(struct dstr *d) { free(d->p); }
153
154 /* Append the N-byte string at P to D.
155 *
156 * P need not be null-terminated. D will not be null-terminated
157 * afterwards.
158 */
159 void dstr_putm(struct dstr *d, const void *p, size_t n)
160 { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
161
162 /* Append the null-terminated string P to D.
163 *
164 * D /is/ guaranteed to be null-terminated after this.
165 */
166 void dstr_puts(struct dstr *d, const char *p)
167 {
168 size_t n = strlen(p);
169
170 dstr_ensure(d, n + 1);
171 memcpy(d->p + d->len, p, n + 1);
172 d->len += n;
173 }
174
175 /* Append the single character CH to D.
176 *
177 * D will not be null-terminated afterwards.
178 */
179 void dstr_putc(struct dstr *d, int ch)
180 { dstr_ensure(d, 1); d->p[d->len++] = ch; }
181
182 /* Append N copies of the character CH to D.
183 *
184 * D will not be null-terminated afterwards.
185 */
186 void dstr_putcn(struct dstr *d, int ch, size_t n)
187 { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
188
189 /* Null-terminate the string D.
190 *
191 * This doesn't change the length of D. If further stuff is appended then
192 * the null terminator will be overwritten.
193 */
194 void dstr_putz(struct dstr *d)
195 { dstr_ensure(d, 1); d->p[d->len] = 0; }
196
197 /* Append stuff to D, determined by printf(3) format string P and argument
198 * tail AP.
199 *
200 * D will not be null-terminated afterwards.
201 */
202 void dstr_vputf(struct dstr *d, const char *p, va_list ap)
203 {
204 va_list ap2;
205 size_t r;
206 int n;
207
208 r = d->sz - d->len;
209 va_copy(ap2, ap);
210 n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
211 va_end(ap2);
212 if (n >= r) {
213 dstr_ensure(d, n + 1); r = d->sz - d->len;
214 n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
215 }
216 d->len += n;
217 }
218
219 /* Append stuff to D, determined by printf(3) format string P and arguments.
220 *
221 * D will not be null-terminated afterwards.
222 */
223 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
224 { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
225
226 /* Append the next input line from FP to D.
227 *
228 * Return 0 on success, or -1 if reading immediately fails or encounters
229 * end-of-file (call ferror(3) to distinguish). Any trailing newline is
230 * discarded: it is not possible to determine whether the last line was ended
231 * with a newline. D is guaranteed to be null-terminated afterwards.
232 */
233 int dstr_readline(struct dstr *d, FILE *fp)
234 {
235 size_t n;
236 int any = 0;
237
238 for (;;) {
239 dstr_ensure(d, 2);
240 if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
241 n = strlen(d->p + d->len); assert(n > 0); any = 1;
242 d->len += n;
243 if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
244 }
245
246 if (!any) return (-1);
247 else return (0);
248 }
249
250 /*----- Dynamic vectors of strings ----------------------------------------*/
251
252 /* Initialize the vector AV.
253 *
254 * Usually you'd use the static initializer `ARGV_INIT'.
255 */
256 void argv_init(struct argv *av)
257 { av->v = 0; av->o = av->n = av->sz = 0; }
258
259 /* Reset the vector AV so that it's empty again. */
260 void argv_reset(struct argv *av) { av->n = 0; }
261
262 /* Ensure that AV has at least N unused slots at the end. */
263 void argv_ensure(struct argv *av, size_t n)
264 {
265 size_t need = av->n + av->o + n, newsz;
266
267 if (need <= av->sz) return;
268 newsz = av->sz ? 2*av->sz : 8;
269 while (newsz < need) newsz *= 2;
270 av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
271 av->sz = newsz;
272 }
273
274 /* Ensure that AV has at least N unused slots at the /start/. */
275 void argv_ensure_offset(struct argv *av, size_t n)
276 {
277 size_t newoff;
278
279 /* Stupid version. We won't, in practice, be prepending lots of stuff, so
280 * avoid the extra bookkeeping involved in trying to make a double-ended
281 * extendable array asymptotically efficient.
282 */
283 if (av->o >= n) return;
284 newoff = 16;
285 while (newoff < n) newoff *= 2;
286 argv_ensure(av, newoff - av->o);
287 memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
288 av->v += newoff - av->o; av->o = newoff;
289 }
290
291 /* Release the memory held by AV.
292 *
293 * It must be reinitialized (e.g., by `argv_init') before it can be used
294 * again.
295 */
296 void argv_release(struct argv *av) { free(av->v - av->o); }
297
298 /* Append the pointer P to AV. */
299 void argv_append(struct argv *av, char *p)
300 { argv_ensure(av, 1); av->v[av->n++] = p; }
301
302 /* Append a null pointer to AV, without extending the vactor length.
303 *
304 * The null pointer will be overwritten when the next string is appended.
305 */
306 void argv_appendz(struct argv *av)
307 { argv_ensure(av, 1); av->v[av->n] = 0; }
308
309 /* Append a N-element vector V of pointers to AV. */
310 void argv_appendn(struct argv *av, char *const *v, size_t n)
311 {
312 argv_ensure(av, n);
313 memcpy(av->v + av->n, v, n*sizeof(const char *));
314 av->n += n;
315 }
316
317 /* Append the variable-length vector BV to AV. */
318 void argv_appendav(struct argv *av, const struct argv *bv)
319 { argv_appendn(av, bv->v, bv->n); }
320
321 /* Append the pointers from a variable-length argument list AP to AV.
322 *
323 * The list is terminated by a null pointer.
324 */
325 void argv_appendv(struct argv *av, va_list ap)
326 {
327 char *p;
328 for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
329 }
330
331 /* Append the argument pointers, terminated by a null pointer, to AV. */
332 void argv_appendl(struct argv *av, ...)
333 { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
334
335 /* Prepend the pointer P to AV. */
336 void argv_prepend(struct argv *av, char *p)
337 { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
338
339 /* Prepend a N-element vector V of pointers to AV. */
340 void argv_prependn(struct argv *av, char *const *v, size_t n)
341 {
342 argv_ensure_offset(av, n);
343 av->o -= n; av->v -= n; av->n += n;
344 memcpy(av->v, v, n*sizeof(const char *));
345 }
346
347 /* Prepend the variable-length vector BV to AV. */
348 void argv_prependav(struct argv *av, const struct argv *bv)
349 { argv_prependn(av, bv->v, bv->n); }
350
351 /* Prepend the pointers from a variable-length argument list AP to AV.
352 *
353 * The list is terminated by a null pointer.
354 */
355 void argv_prependv(struct argv *av, va_list ap)
356 {
357 char *p, **v;
358 size_t n = 0;
359
360 for (;;) {
361 p = va_arg(ap, char *); if (!p) break;
362 argv_prepend(av, p); n++;
363 }
364 v = av->v;
365 while (n >= 2) {
366 p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
367 v++; n -= 2;
368 }
369 }
370
371 /* Prepend the argument pointers, terminated by a null pointer, to AV. */
372 void argv_prependl(struct argv *av, ...)
373 { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
374
375 /*----- Treaps ------------------------------------------------------------*/
376
377 /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
378 * string B in a lexicographic ordering.
379 *
380 * All comparison of keys is handled by this function.
381 */
382 static int str_lt(const char *a, size_t an, const char *b, size_t bn)
383 {
384 /* This is a little subtle. We need only compare the first N bytes of the
385 * strings, where N is the length of the shorter string. If this
386 * distinguishes the two strings, then we're clearly done. Otherwise, if
387 * the prefixes are equal then the shorter string is the smaller one. If
388 * the two strings are the same length, then they're equal.
389 *
390 * Hence, if A is the strictly shorter string, then A precedes B if A
391 * precedes or matches the prefix of B; otherwise A only precedes B if A
392 * strictly precedes the prefix of B.
393 */
394 if (an < bn) return (MEMCMP(a, <=, b, an));
395 else return (MEMCMP(a, <, b, bn));
396 }
397
398 /* Initialize the treap T.
399 *
400 * Usually you'd use the static initializer `TREAP_INIT'.
401 */
402 void treap_init(struct treap *t) { t->root = 0; }
403
404 /* Look up the KN-byte key K in the treap T.
405 *
406 * Return a pointer to the matching node if one was found, or null otherwise.
407 */
408 void *treap_lookup(const struct treap *t, const char *k, size_t kn)
409 {
410 struct treap_node *n = t->root, *candidate = 0;
411
412 /* This is a simple prototype for some of the search loops we'll encounter
413 * later. Notice that we use a strict one-sided comparison, rather than
414 * the more conventional two-sided comparison.
415 *
416 * The main loop will find the largest key not greater than K.
417 */
418 while (n)
419 /* Compare the node's key against our key. If the node is too large,
420 * then we ignore it and move left. Otherwise remember this node for
421 * later, and move right to see if we can find a better, larger node.
422 */
423
424 if (str_lt(k, kn, n->k, n->kn)) n = n->left;
425 else { candidate = n; n = n->right; }
426
427 /* If the candidate node is less than our key then we failed. Otherwise,
428 * by trichotomy, we have found the correct node.
429 */
430 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
431 return (candidate);
432 }
433
434 /* Look up the KN-byte K in the treap T, recording a path in P.
435 *
436 * This is similar to `treap_lookup', in that it returns the requested node
437 * if it already exists, or null otherwise, but it also records in P
438 * information to be used by `treap_insert' to insert a new node with the
439 * given key if it's not there already.
440 */
441 void *treap_probe(struct treap *t, const char *k, size_t kn,
442 struct treap_path *p)
443 {
444 struct treap_node **nn = &t->root, *candidate = 0;
445 unsigned i = 0;
446
447 /* This walk is similar to `treap_lookup' above, except that we also record
448 * the address of each node pointer we visit along the way.
449 */
450 for (;;) {
451 assert(i < TREAP_PATHMAX); p->path[i++] = nn;
452 if (!*nn) break;
453 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
454 else { candidate = *nn; nn = &(*nn)->right; }
455 }
456 p->nsteps = i;
457
458 /* Check to see whether we found the right node. */
459 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
460 return (candidate);
461 }
462
463 /* Insert a new node N into T, associating it with the KN-byte key K.
464 *
465 * Use the path data P, from `treap_probe', to help with insertion.
466 */
467 void treap_insert(struct treap *t, const struct treap_path *p,
468 struct treap_node *n, const char *k, size_t kn)
469 {
470 size_t i = p->nsteps;
471 struct treap_node **nn, **uu, *u;
472 unsigned wt;
473
474 /* Fill in the node structure. */
475 n->k = xstrndup(k, kn); n->kn = kn;
476 n->wt = wt = rand(); n->left = n->right = 0;
477
478 /* Prepare for the insertion.
479 *
480 * The path actually points to each of the links traversed when searching
481 * for the node, starting with the `root' pointer, then the `left' or
482 * `right' pointer of the root node, and so on; `nsteps' will always be
483 * nonzero, since the path will always pass through the root, and the final
484 * step, `path->path[path->nsteps - 1]' will always be the address of a
485 * null pointer onto which the freshly inserted node could be hooked in
486 * order to satisfy the binary-search-tree ordering. (Of course, this will
487 * likely /not/ satisfy the heap condition, so more work needs to be done.)
488 *
489 * Throughout, NN is our current candidate for where to attach the node N.
490 * As the loop progresses, NN will ascend to links further up the tree, and
491 * N will be adjusted to accumulate pieces of the existing tree structure.
492 * We'll stop when we find that the parent node's weight is larger than our
493 * new node's weight, at which point we can just set *NN = N; or if we run
494 * out of steps in the path, in which case *NN is the root pointer.
495 */
496 assert(i); nn = p->path[--i];
497 while (i--) {
498
499 /* Collect the next step in the path, and get the pointer to the node. */
500 uu = p->path[i]; u = *uu;
501
502 /* If this node's weight is higher, then we've found the right level and
503 * we can stop.
504 */
505 if (wt <= u->wt) break;
506
507 /* The node U is lighter than our new node N, so we must rotate in order
508 * to fix things. If we were currently planning to hook N as the left
509 * subtree of U, then we rotate like this:
510 *
511 * | |
512 * U (N)
513 * / \ / \
514 * (N) Z ---> X U
515 * / \ / \
516 * X Y Y Z
517 *
518 * On the other hand, if we were planning to hook N as the right subtree
519 * of U, then we do the opposite rotation:
520 *
521 * | |
522 * U (N)
523 * / \ / \
524 * X (N) ---> U Z
525 * / \ / \
526 * Y Z X Y
527 *
528 * These transformations clearly preserve the ordering of nodes in the
529 * binary search tree, and satisfy the heap condition in the subtree
530 * headed by N.
531 */
532 if (nn == &u->left) { u->left = n->right; n->right = u; }
533 else { u->right = n->left; n->left = u; }
534
535 /* And this arrangement must be attached to UU, or some higher attachment
536 * point. The subtree satisfies the heap condition, and can be attached
537 * safely at the selected place.
538 */
539 nn = uu;
540 }
541
542 /* We've found the right spot. Hook the accumulated subtree into place. */
543 *nn = n;
544 }
545
546 /* Remove the node with the KN-byte K from T.
547 *
548 * Return the address of the node we removed, or null if it couldn't be
549 * found.
550 */
551 void *treap_remove(struct treap *t, const char *k, size_t kn)
552 {
553 struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
554
555 /* Search for the matching node, but keep track of the address of the link
556 * which points to our target node.
557 */
558 while (*nn)
559 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
560 else { candidate = nn; nn = &(*nn)->right; }
561
562 /* If this isn't the right node then give up. */
563 if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
564 return (0);
565
566 /* Now we need to disentangle the node from the tree. This is essentially
567 * the reverse of insertion: we pretend that this node is suddenly very
568 * light, and mutate the tree so as to restore the heap condition until
569 * eventually our node is a leaf and can be cut off without trouble.
570 *
571 * Throughout, the link *NN notionally points to N, but we don't actually
572 * update it until we're certain what value it should finally take.
573 */
574 nn = candidate; n = *nn; l = n->left; r = n->right;
575 for (;;)
576
577 /* If its left subtree is empty then we can replace our node by its right
578 * subtree and be done. Similarly, if the right subtree is empty then we
579 * replace the node by its left subtree.
580 *
581 * | | | |
582 * (N) ---> R ; (N) ---> L
583 * / \ / \
584 * * R L *
585 */
586 if (!l) { *nn = r; break; }
587 else if (!r) { *nn = l; break; }
588
589 /* Otherwise we need to rotate the pointers so that the heavier of the
590 * two children takes the place of our node; thus we have either
591 *
592 * | |
593 * (N) L
594 * / \ / \
595 * L R ---> X (N)
596 * / \ / \
597 * X Y Y R
598 *
599 * or
600 *
601 * | |
602 * (N) R
603 * / \ / \
604 * L R ---> (N) Y
605 * / \ / \
606 * X Y L X
607 *
608 * Again, these transformations clearly preserve the ordering of nodes in
609 * the binary search tree, and the heap condition.
610 */
611 else if (l->wt > r->wt)
612 { *nn = l; nn = &l->right; l = n->left = l->right; }
613 else
614 { *nn = r; nn = &r->left; r = n->right = r->left; }
615
616 /* Release the key buffer, and return the node that we've now detached. */
617 free(n->k); return (n);
618 }
619
620 /* Initialize an iterator I over T's nodes. */
621 void treap_start_iter(struct treap *t, struct treap_iter *i)
622 {
623 struct treap_node *n = t->root;
624 unsigned sp = 0;
625
626 /* The `stack' in the iterator structure is an empty ascending stack of
627 * nodes which have been encountered, and their left subtrees investigated,
628 * but not yet visited by the iteration.
629 *
630 * Iteration begins by stacking the root node, its left child, and so on,
631 * At the end of this, the topmost entry on the stack is the least node of
632 * the tree, followed by its parent, grandparent, and so on up to the root.
633 */
634 while (n) {
635 assert(sp < TREAP_PATHMAX);
636 i->stack[sp++] = n; n = n->left;
637 }
638 i->sp = sp;
639 }
640
641 /* Return the next node from I, in ascending order by key.
642 *
643 * If there are no more nodes, then return null.
644 */
645 void *treap_next(struct treap_iter *i)
646 {
647 struct treap_node *n, *o;
648 unsigned sp = i->sp;
649
650 /* We say that a node is /visited/ once it's been returned by this
651 * iterator. To traverse a tree in order, then, we traverse its left
652 * subtree, visit the tree root, and traverse its right subtree -- which is
653 * a fine recursive definition, but we need a nonrecursive implementation.
654 *
655 * As is usual in this kind of essential structural recursion, we maintain
656 * a stack. The invariant that we'll maintain is as follows.
657 *
658 * 1. If the stack is empty, then all nodes have been visited.
659 *
660 * 2, If the stack is nonempty then the topmost entry on the stack is the
661 * least node which has not yet been visited -- and therefore is the
662 * next node to visit.
663 *
664 * 3. The earlier entries in the stack are, in (top to bottom) order,
665 * those of the topmost node's parent, grandparent, etc., up to the
666 * root, which have not yet been visited. More specifically, a node
667 * appears in the stack if and only if some node in its left subtree
668 * is nearer the top of the stack.
669 *
670 * When we initialized the iterator state (in `treap_start_iter' above), we
671 * traced a path to the leftmost leaf, stacking the root, its left-hand
672 * child, and so on. The leftmost leaf is clearly the first node to be
673 * visited, and its entire ancestry is on the stack since none of these
674 * nodes has yet been visited. (If the tree is empty, then we have done
675 * nothing, the stack is empty, and there are no nodes to visit.) This
676 * establishes the base case for the induction.
677 */
678
679 /* So, if the stack is empty now, then (1) all of the nodes have been
680 * visited and there's nothing left to do. Return null.
681 */
682 if (!sp) return (0);
683
684 /* It's clear that, if we pop the topmost element of the stack, visit it,
685 * and arrange to reestablish the invariant, then we'll visit the nodes in
686 * the correct order, pretty much by definition.
687 *
688 * So, pop a node off the stack. This is the node we shall return. But
689 * before we can do that, we must reestablish the above invariant.
690 * Firstly, the current node is removed from the stack, because we're about
691 * to visit it, and visited nodes don't belong on the stack. Then there
692 * are two cases to consider.
693 *
694 * * If the current node's right subtree is not empty, then the next node
695 * to be visited is the leftmost node in that subtree. All of the
696 * nodes on the stack are ancestors of the current node, and the right
697 * subtree consists of its descendants, so none of them are already on
698 * the stack; and they're all greater than the current node, and
699 * therefore haven't been visited. Therefore, we must push the current
700 * node's right child, its /left/ child, and so on, proceeding
701 * leftwards until we fall off the bottom of the tree.
702 *
703 * * Otherwise, we've finished traversing some subtree. Either we are
704 * now done, or (3) we have just finished traversing the left subtree
705 * of the next topmost item on the stack. This must therefore be the
706 * next node to visit. The rest of the stack is already correct.
707 */
708 n = i->stack[--sp];
709 o = n->right;
710 while (o) {
711 assert(sp < TREAP_PATHMAX);
712 i->stack[sp++] = o; o = o->left;
713 }
714 i->sp = sp;
715 return (n);
716 }
717
718 /* Recursively check the subtree headed by N.
719 *
720 * No node should have weight greater than MAXWT, to satisfy the heap
721 * condition; if LO is not null, then all node keys should be strictly
722 * greater than LO, and, similarly, if HI is not null, then all keys should
723 * be strictly smaller than HI.
724 */
725 static void check_subtree(struct treap_node *n, unsigned maxwt,
726 const char *klo, const char *khi)
727 {
728 /* Check the heap condition. */
729 assert(n->wt <= maxwt);
730
731 /* Check that the key is in bounds. (Use `strcmp' here to ensure that our
732 * own `str_lt' is working correctly.)
733 */
734 if (klo) assert(STRCMP(n->k, >, klo));
735 if (khi) assert(STRCMP(n->k, <, khi));
736
737 /* Check the left subtree. Node weights must be bounded above by our own
738 * weight. And every key in the left subtree must be smaller than our
739 * current key. We propagate the lower bound.
740 */
741 if (n->left) check_subtree(n->left, n->wt, klo, n->k);
742
743 /* Finally, check the right subtree. This time, every key must be larger
744 * than our key, and we propagate the upper bound.
745 */
746 if (n->right) check_subtree(n->right, n->wt, n->k, khi);
747 }
748
749 /* Check the treap structure rules for T. */
750 void treap_check(struct treap *t)
751 { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
752
753 /* Recursively dump the subtree headed by N, indenting the output lines by
754 * IND spaces.
755 */
756 static void dump_node(struct treap_node *n, int ind)
757 {
758 if (n->left) dump_node(n->left, ind + 1);
759 printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
760 if (n->right) dump_node(n->right, ind + 1);
761 }
762
763 /* Dump the treap T to standard output, for debugging purposes. */
764 void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
765
766 /*----- Configuration file parsing ----------------------------------------*/
767
768 #ifndef DECL_ENVIRON
769 extern char **environ;
770 #endif
771
772 /* Advance P past a syntactically valid name, but no further than L.
773 *
774 * Return the new pointer. If no name is found, report an error, blaming
775 * FILE and LINE; WHAT is an adjective for the kind of name that was
776 * expected.
777 */
778 static const char *scan_name(const char *what,
779 const char *p, const char *l,
780 const char *file, unsigned line)
781 {
782 const char *q = p;
783
784 while (q < l &&
785 (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
786 *q == '*' || *q == '+' || *q == '%' || *q == '@'))
787 q++;
788 if (q == p) lose("%s:%u: expected %s name", file, line, what);
789 return (q);
790 }
791
792 /* Initialize the configuration state CONF.
793 *
794 * Usually you'd use the static initializer `CONFIG_INIT'.
795 */
796 void config_init(struct config *conf)
797 { treap_init(&conf->sections); }
798
799 /* Find and return the section with null-terminated NAME in CONF.
800 *
801 * If no section is found, the behaviour depends on whether `CF_CREAT' is set
802 * in F: if so, an empty section is created and returned; otherwise, a null
803 * pointer is returned.
804 */
805 struct config_section *config_find_section(struct config *conf, unsigned f,
806 const char *name)
807 { return (config_find_section_n(conf, f, name, strlen(name))); }
808
809 /* Find and return the section with the SZ-byte NAME in CONF.
810 *
811 * This works like `config_find_section', but with an explicit length for the
812 * NAME rather than null-termination.
813 */
814 struct config_section *config_find_section_n(struct config *conf, unsigned f,
815 const char *name, size_t sz)
816 {
817 struct config_section *sect;
818 struct treap_path path;
819
820 if (!(f&CF_CREAT))
821 sect = treap_lookup(&conf->sections, name, sz);
822 else {
823 sect = treap_probe(&conf->sections, name, sz, &path);
824 if (!sect) {
825 sect = xmalloc(sizeof(*sect));
826 if (!conf->head) conf->tail = &conf->head;
827 sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
828 sect->parents = 0; sect->nparents = SIZE_MAX;
829 treap_init(&sect->vars); treap_init(&sect->cache);
830 treap_insert(&conf->sections, &path, &sect->_node, name, sz);
831 config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
832 }
833 }
834 return (sect);
835 }
836
837 /* Set the fallback section for CONF to be SECT.
838 *
839 * That is, if a section has no explicit parents, then by default it will
840 * have a single parent which is SECT. If SECT is null then there is no
841 * fallback section, and sections which don't have explicitly specified
842 * parents have no parents at all. (This is the default situation.)
843 */
844 void config_set_fallback(struct config *conf, struct config_section *sect)
845 { conf->fallback = sect; }
846
847 /* Arrange that SECT has PARENT as its single parent section.
848 *
849 * If PARENT is null, then arrange that SECT has no parents at all. In
850 * either case, any `@parents' setting will be ignored.
851 */
852 void config_set_parent(struct config_section *sect,
853 struct config_section *parent)
854 {
855 if (!parent)
856 sect->nparents = 0;
857 else {
858 sect->parents = xmalloc(sizeof(*sect->parents));
859 sect->parents[0] = parent; sect->nparents = 1;
860 }
861 }
862
863 /* Initialize I to iterate over the sections defined in CONF. */
864 void config_start_section_iter(struct config *conf,
865 struct config_section_iter *i)
866 { i->sect = conf->head; }
867
868 /* Return the next section from I, in order of creation.
869 *
870 * If there are no more sections, then return null.
871 */
872 struct config_section *config_next_section(struct config_section_iter *i)
873 {
874 struct config_section *sect;
875
876 sect = i->sect;
877 if (sect) i->sect = sect->next;
878 return (sect);
879 }
880
881 /* Initialize the `parents' links of SECT, if they aren't set up already.
882 *
883 * If SECT contains a `@parents' setting then parse it to determine the
884 * parents; otherwise use CONF's fallbeck section, as established by
885 * `config_set_fallback'.
886 */
887 static void set_config_section_parents(struct config *conf,
888 struct config_section *sect)
889 {
890 struct config_section *parent;
891 struct config_var *var;
892 const char *file; unsigned line;
893 size_t i, n;
894 char *p, *q, *l;
895 struct argv av = ARGV_INIT;
896
897 /* If the section already has parents established then there's nothing to
898 * do.
899 */
900 if (sect->nparents != SIZE_MAX) return;
901
902 /* Look up `@parents', without recursion! */
903 var = treap_lookup(&sect->vars, "@parents", 8);
904 if (!var) {
905 /* No explicit setting: use the fallback setting. */
906
907 if (!conf->fallback || conf->fallback == sect)
908 sect->nparents = 0;
909 else {
910 sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
911 sect->parents[0] = conf->fallback;
912 }
913 } else {
914 /* Found a `@parents' list: parse it and set the parents list. */
915
916 file = var->file; line = var->line; if (!file) file = "<internal>";
917
918 /* We do this in two phases. First, we parse out the section names, and
919 * record start/limit pointer pairs in `av'.
920 */
921 p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
922 while (*p) {
923 q = p;
924 p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
925 argv_append(&av, q); argv_append(&av, p);
926 while (p < l && ISSPACE(*p)) p++;
927 if (p >= l) break;
928 if (*p == ',') do p++; while (ISSPACE(*p));
929 }
930
931 /* Now that we've finished parsing, we know how many parents we're going
932 * to have, so we can allocate the `parents' vector and fill it in.
933 */
934 sect->nparents = av.n/2;
935 sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
936 for (i = 0; i < av.n; i += 2) {
937 n = av.v[i + 1] - av.v[i];
938 parent = config_find_section_n(conf, 0, av.v[i], n);
939 if (!parent)
940 lose("%s:%u: unknown parent section `%.*s'",
941 file, line, (int)n, av.v[i]);
942 sect->parents[i/2] = parent;
943 }
944 }
945
946 /* All done. */
947 argv_release(&av);
948 }
949
950 /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
951 *
952 * If successful, return a pointer to the variable; otherwise return null.
953 * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
954 * errors.
955 */
956 struct config_var *search_recursive(struct config *conf,
957 struct config_section *sect,
958 const char *name, size_t sz)
959 {
960 struct config_cache_entry *cache;
961 struct treap_path path;
962 struct config_var *var, *v;
963 size_t i, j = j;
964
965 /* If the variable is defined locally then we can just return it. */
966 var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
967
968 /* If we have no parents then there's no way we can find it. */
969 set_config_section_parents(conf, sect);
970 if (!sect->parents) return (0);
971
972 /* Otherwise we must visit the section's parents. We can avoid paying for
973 * this on every lookup by using a cache. If there's already an entry for
974 * this variable then we can return the result immediately (note that we
975 * cache both positive and negative outcomes). Otherwise we create a new
976 * cache entry, do the full recursive search, and fill in the result when
977 * we're done.
978 *
979 * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
980 * new cache entry when it's first created, and clear it when we fill in
981 * the result: if we encounter an open cache entry again, we know that
982 * we've found a cycle.
983 */
984 cache = treap_probe(&sect->cache, name, sz, &path);
985 if (!cache) {
986 cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
987 treap_insert(&sect->cache, &path, &cache->_node, name, sz);
988 } else if (cache->f&CF_OPEN)
989 lose("inheritance cycle through section `%s'",
990 CONFIG_SECTION_NAME(sect));
991 else
992 return (cache->var);
993
994 /* Recursively search in each parent. We insist that all parents that find
995 * a variable find the same binding; otherwise we declare ambiguous
996 * inheritance.
997 */
998 for (i = 0; i < sect->nparents; i++) {
999 v = search_recursive(conf, sect->parents[i], name, sz);
1000 if (!v);
1001 else if (!var) { var = v; j = i; }
1002 else if (var != v)
1003 lose("section `%s' inherits variable `%s' ambiguously "
1004 "via `%s' and `%s'",
1005 CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
1006 CONFIG_SECTION_NAME(sect->parents[j]),
1007 CONFIG_SECTION_NAME(sect->parents[i]));
1008 }
1009
1010 /* All done: fill the cache entry in, clear the open flag, and return the
1011 * result.
1012 */
1013 cache->var = var; cache->f &= ~CF_OPEN;
1014 return (var);
1015 }
1016
1017 /* Find and return the variable with null-terminated NAME in SECT.
1018 *
1019 * If `CF_INHERIT' is set in F, then the function searches the section's
1020 * parents recursively; otherwise, it only checks to see whether the variable
1021 * is set directly in SECT.
1022 *
1023 * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1024 * set in F: if so, an empty variable is created and returned; otherwise, a
1025 * null pointer is returned.
1026 *
1027 * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1028 */
1029 struct config_var *config_find_var(struct config *conf,
1030 struct config_section *sect,
1031 unsigned f, const char *name)
1032 { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
1033
1034 /* Find and return the variable with the given SZ-byte NAME in SECT.
1035 *
1036 * This works like `config_find_var', but with an explicit length for the
1037 * NAME rather than null-termination.
1038 */
1039 struct config_var *config_find_var_n(struct config *conf,
1040 struct config_section *sect,
1041 unsigned f, const char *name, size_t sz)
1042 {
1043 struct config_var *var;
1044 struct treap_path path;
1045
1046 if (f&CF_INHERIT)
1047 var = search_recursive(conf, sect, name, sz);
1048 else if (!(f&CF_CREAT))
1049 var = treap_lookup(&sect->vars, name, sz);
1050 else {
1051 var = treap_probe(&sect->vars, name, sz, &path);
1052 if (!var) {
1053 var = xmalloc(sizeof(*var));
1054 var->val = 0; var->file = 0; var->f = 0; var->line = 1;
1055 treap_insert(&sect->vars, &path, &var->_node, name, sz);
1056 }
1057 }
1058 return (var);
1059 }
1060
1061 /* Set variable NAME to VALUE in SECT, with associated flags F.
1062 *
1063 * The names are null-terminated. The flags are variable flags: see `struct
1064 * config_var' for details. Returns the variable.
1065 *
1066 * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1067 * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1068 */
1069 struct config_var *config_set_var(struct config *conf,
1070 struct config_section *sect,
1071 unsigned f,
1072 const char *name, const char *value)
1073 {
1074 return (config_set_var_n(conf, sect, f,
1075 name, strlen(name),
1076 value, strlen(value)));
1077 }
1078
1079 /* As `config_set_var', except that the variable NAME and VALUE have explicit
1080 * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1081 * terminated.
1082 */
1083 struct config_var *config_set_var_n(struct config *conf,
1084 struct config_section *sect,
1085 unsigned f,
1086 const char *name, size_t namelen,
1087 const char *value, size_t valuelen)
1088 {
1089 struct config_var *var =
1090 config_find_var_n(conf, sect, CF_CREAT, name, namelen);
1091
1092 if (var->f&~f&CF_OVERRIDE) return (var);
1093 free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
1094 var->f = f;
1095 return (var);
1096 }
1097
1098 /* Initialize I to iterate over the variables directly defined in SECT. */
1099 void config_start_var_iter(struct config *conf, struct config_section *sect,
1100 struct config_var_iter *i)
1101 { treap_start_iter(&sect->vars, &i->i); }
1102
1103 /* Return next variable from I, in ascending lexicographical order.
1104 *
1105 * If there are no more variables, then return null.
1106 */
1107 struct config_var *config_next_var(struct config_var_iter *i)
1108 { return (treap_next(&i->i)); }
1109
1110 /* Read and parse configuration FILE, applying its settings to CONF.
1111 *
1112 * If all goes well, the function returns 0. If the file is not found, then
1113 * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1114 * function simply returns -1. Otherwise, a fatal error is reported. Note
1115 * that this /only/ applies if the file does not exist (specifically, opening
1116 * it fails with `ENOENT') -- any other problems are reported as fatal
1117 * errors regardless of the flag setting.
1118 */
1119 int config_read_file(struct config *conf, const char *file, unsigned f)
1120 {
1121 struct config_section *sect;
1122 struct config_var *var;
1123 struct dstr d = DSTR_INIT, dd = DSTR_INIT;
1124 unsigned line = 0;
1125 const char *p, *q, *r;
1126 FILE *fp;
1127
1128 /* Try to open the file. */
1129 fp = fopen(file, "r");
1130 if (!fp) {
1131 if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
1132 lose("failed to open configuration file `%s': %s",
1133 file, strerror(errno));
1134 }
1135
1136 /* Find the initial section. */
1137 sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
1138
1139 /* Work through the file, line by line. */
1140 for (;;) {
1141 dstr_reset(&d); if (dstr_readline(&d, fp)) break;
1142 line++;
1143
1144 /* Trim trailing spaces from the line. The syntax is sensitive to
1145 * leading spaces, so we can't trim those yet.
1146 */
1147 while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
1148 d.p[d.len] = 0;
1149
1150 if (!*d.p || *d.p == ';')
1151 /* Ignore comments entirely. (In particular, a comment doesn't
1152 * interrupt a multiline variable value.)
1153 */
1154 ;
1155
1156 else if (ISSPACE(d.p[0])) {
1157 /* The line starts with whitespace, so it's a continuation line. */
1158
1159 /* Skip the initial whitespace. */
1160 p = d.p; while (ISSPACE(*p)) p++;
1161
1162 /* If we aren't collecting a variable value then this is an error.
1163 * Otherwise, accumulate it into the current value.
1164 */
1165 if (!var)
1166 lose("%s:%u: continuation line, but no variable", file, line);
1167 if (dd.len) dstr_putc(&dd, ' ');
1168 dstr_putm(&dd, p, d.len - (p - d.p));
1169
1170 } else {
1171 /* The line starts in the first column. */
1172
1173 /* If there's a value value being collected then we must commit it to
1174 * its variable (unless there's already a setting there that says we
1175 * shouldn't).
1176 */
1177 if (var) {
1178 if (!(var->f&CF_OVERRIDE))
1179 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1180 var = 0;
1181 }
1182
1183 /* Now decide what kind of line this is. */
1184 if (d.p[0] == '[') {
1185 /* It's a section header. */
1186
1187 /* Parse the header. */
1188 p = d.p + 1; while (ISSPACE(*p)) p++;
1189 q = scan_name("section", p, d.p + d.len, file, line);
1190 r = q; while (ISSPACE(*r)) r++;
1191 if (*r != ']')
1192 lose("%s:%u: expected `]' in section header", file, line);
1193 if (r[1])
1194 lose("%s:%u: trailing junk after `]' in section header",
1195 file, line);
1196
1197 /* Create the new section. */
1198 sect = config_find_section_n(conf, CF_CREAT, p, q - p);
1199
1200 } else {
1201 /* It's a variable assignment. Parse the name out. */
1202 p = scan_name("variable", d.p, d.p + d.len, file, line);
1203 var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
1204 while (ISSPACE(*p)) p++;
1205 if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
1206 p++; while (ISSPACE(*p)) p++;
1207
1208 /* Clear out the variable's initial value, unless we shouldn't
1209 * override it.
1210 */
1211 if (!(var->f&CF_OVERRIDE)) {
1212 free(var->val); var->val = 0; var->f = 0;
1213 free(var->file); var->file = xstrdup(file); var->line = line;
1214 }
1215 dstr_reset(&dd); dstr_puts(&dd, p);
1216 }
1217 }
1218 }
1219
1220 /* If there's a value under construction then commit the result. */
1221 if (var && !(var->f&CF_OVERRIDE))
1222 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1223
1224 /* Close the file. */
1225 if (fclose(fp))
1226 lose("error reading configuration file `%s': %s", file, strerror(errno));
1227
1228 /* All done. */
1229 dstr_release(&d); dstr_release(&dd);
1230 return (0);
1231 }
1232
1233 /* Populate SECT with environment variables.
1234 *
1235 * Environment variables are always set with `CF_LITERAL'.
1236 */
1237 void config_read_env(struct config *conf, struct config_section *sect)
1238 {
1239 const char *p, *v;
1240 size_t i;
1241
1242 for (i = 0; (p = environ[i]) != 0; i++) {
1243 v = strchr(p, '='); if (!v) continue;
1244 config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
1245 }
1246 }
1247
1248 /*----- Substitution and quoting ------------------------------------------*/
1249
1250 /* The substitution and word-splitting state.
1251 *
1252 * This only keeps track of the immutable parameters for the substitution
1253 * task: stuff which changes (flags, filtering state, cursor position) is
1254 * maintained separately.
1255 */
1256 struct subst {
1257 struct config *config; /* configuration state */
1258 struct config_section *home; /* home section for lookups */
1259 struct dstr *d; /* current word being constructed */
1260 struct argv *av; /* output word list */
1261 };
1262
1263 /* Flags for `subst' and related functions. */
1264 #define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */
1265 #define SF_QUOT 0x0002u /* currently within double quotes */
1266 #define SF_SUBST 0x0004u /* apply `$-substitutions */
1267 #define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */
1268 #define SF_SPANMASK 0x00ffu /* mask for the above */
1269
1270 #define SF_WORD 0x0100u /* output word under construction */
1271 #define SF_SKIP 0x0200u /* not producing output */
1272 #define SF_LITERAL 0x0400u /* do not expand or substitute */
1273 #define SF_UPCASE 0x0800u /* convert to uppercase */
1274 #define SF_DOWNCASE 0x1000u /* convert to lowercase */
1275 #define SF_CASEMASK 0x1800u /* mask for case conversions */
1276
1277 /* Apply filters encoded in QFILT and F to the text from P to L, and output.
1278 *
1279 * SB is the substitution state which, in particular, explains where the
1280 * output should go.
1281 *
1282 * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1283 * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is
1284 * encoded as the number of toothpicks to print: see `subst' for how this
1285 * determined.)
1286 */
1287 static void filter_string(const char *p, const char *l,
1288 const struct subst *sb, unsigned qfilt, unsigned f)
1289 {
1290 size_t r, n;
1291 char *q; const char *pp, *ll;
1292
1293 if (!qfilt && !(f&SF_CASEMASK))
1294 /* Fast path: there's nothing to do: just write to the output. */
1295 dstr_putm(sb->d, p, l - p);
1296
1297 else for (;;) {
1298 /* We must be a bit more circumspect. */
1299
1300 /* Determine the length of the next span of characters which don't need
1301 * escaping. (If QFILT is zero then this is everything.)
1302 */
1303 r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
1304 if (n > r) n = r;
1305
1306 if (!(f&SF_CASEMASK))
1307 /* No case conversion: we can just emit this chunk. */
1308
1309 dstr_putm(sb->d, p, n);
1310
1311 else {
1312 /* Case conversion to do. Arrange enough space for the output, and
1313 * convert it character by character.
1314 */
1315
1316 dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
1317 if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
1318 else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
1319 sb->d->len += n;
1320 }
1321
1322 /* If we've reached the end then stop. */
1323 if (n >= r) break;
1324
1325 /* Otherwise we must have found a character which requires escaping.
1326 * Emit enough toothpicks.
1327 */
1328 dstr_putcn(sb->d, '\\', qfilt);
1329
1330 /* This character is now done, so we can skip over and see if there's
1331 * another chunk of stuff we can do at high speed.
1332 */
1333 dstr_putc(sb->d, p[n]); p += n + 1;
1334 }
1335 }
1336
1337 /* Scan and resolve a `[SECT:]VAR' specifier at P.
1338 *
1339 * Return the address of the next character following the specifier; and set
1340 * *VAR_OUT to point to the variable we found, or null if it's not there. L
1341 * is a limit on the region of the buffer that we should process; SB is the
1342 * substitution state which provides the home section if none is given
1343 * explicitly; FILE and LINE are the source location to blame for problems.
1344 */
1345 static const char *retrieve_varspec(const char *p, const char *l,
1346 const struct subst *sb,
1347 struct config_var **var_out,
1348 const char *file, unsigned line)
1349 {
1350 struct config_section *sect = sb->home;
1351 const char *t;
1352
1353 t = scan_name("section or variable", p, l, file, line);
1354 if (t < l && *t == ':') {
1355 sect = config_find_section_n(sb->config, 0, p, t - p);
1356 p = t + 1; t = scan_name("variable", p, l, file, line);
1357 }
1358
1359 if (!sect) *var_out = 0;
1360 else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
1361 return (t);
1362 }
1363
1364 /* Substitute and/or word-split text.
1365 *
1366 * The input text starts at P, and continues to (just before) L. Context for
1367 * the task is provided by SB; the source location to blame is FILE and LINE
1368 * (FILE may be null so that this can be passed directly from a `config_var'
1369 * without further checking); QFILT is the nesting depth in toothpick-
1370 * escaping; and F holds a mask of `SF_...' flags.
1371 */
1372 static const char *subst(const char *p, const char *l,
1373 const struct subst *sb,
1374 const char *file, unsigned line,
1375 unsigned qfilt, unsigned f)
1376 {
1377 struct config_var *var;
1378 const char *q0, *q1, *t;
1379 unsigned subqfilt, ff;
1380 size_t n;
1381
1382 /* It would be best if we could process literal text at high speed. To
1383 * this end, we have a table, indexed by the low-order bits of F, to tell
1384 * us which special characters we need to stop at. This way, we can use
1385 * `strcspn' to skip over literal text and stop at the next character which
1386 * needs special handling. Entries in this table with a null pointer
1387 * correspond to impossible flag settings: notably, `SF_QUOT' can only be
1388 * set when `SF_SUBST' is also set.
1389 */
1390 static const char *const delimtab[] = {
1391
1392 #define ESCAPE "\\" /* always watch for `\'-escapes */
1393 #define SUBST "$" /* check for `$' if `SF_SUBST' set */
1394 #define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but
1395 * not `SF_QUOT' */
1396 #define QUOT "\"" /* only quotes if `SF_SPLIT' and
1397 * `SF_QUOT' */
1398 #define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */
1399
1400 ESCAPE, /* --- */
1401 ESCAPE WORDSEP, /* SPLIT */
1402 0, /* QUOT */
1403 ESCAPE QUOT, /* SPLIT | QUOT */
1404 ESCAPE SUBST, /* SUBST */
1405 ESCAPE SUBST WORDSEP, /* SPLIT | SUBST */
1406 0, /* QUOT | SUBST */
1407 ESCAPE SUBST QUOT, /* SPLIT | QUOT | SUBST */
1408 ESCAPE DELIM, /* SUBEXPR */
1409 ESCAPE DELIM WORDSEP, /* SPLIT | SUBEXPR */
1410 0, /* QUOT | SUBEXPR */
1411 ESCAPE DELIM QUOT, /* SPLIT | QUOT | SUBEXPR */
1412 ESCAPE DELIM SUBST, /* SUBST | SUBEXPR */
1413 ESCAPE DELIM SUBST WORDSEP, /* SPLIT | SUBST | SUBEXPR */
1414 0, /* QUOT | SUBST | SUBEXPR */
1415 ESCAPE DELIM SUBST QUOT /* SPLIT | QUOT | SUBST | SUBEXPR */
1416
1417 #undef ESCAPE
1418 #undef SUBST
1419 #undef WORDSEP
1420 #undef QUOT
1421 #undef DELIM
1422 };
1423
1424 /* Set FILE to be useful if it was null on entry. */
1425 if (!file) file = "<internal>";
1426
1427 /* If the text is literal then hand off to `filter_string'. This obviously
1428 * starts a word.
1429 */
1430 if (f&SF_LITERAL) {
1431 filter_string(p, l, sb, qfilt, f);
1432 f |= SF_WORD;
1433 goto done;
1434 }
1435
1436 /* Chew through the input until it's all gone. */
1437 while (p < l) {
1438
1439 if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
1440 /* This is whitespace, we're supposed to split, and we're not within
1441 * quotes, so we should split here.
1442 */
1443
1444 /* If there's a word in progress then we should commit it. */
1445 if (f&SF_WORD) {
1446 if (!(f&SF_SKIP)) {
1447 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1448 dstr_reset(sb->d);
1449 }
1450 f &= ~SF_WORD;
1451 }
1452
1453 /* Skip over further whitespace at high speed. */
1454 do p++; while (p < l && ISSPACE(*p));
1455
1456 } else if (*p == '\\') {
1457 /* This is a toothpick, so start a new word and add the next character
1458 * to it.
1459 */
1460
1461 /* If there's no next character then we should be upset. */
1462 p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
1463
1464 if (!(f&SF_SKIP)) {
1465
1466 /* If this is a double quote or backslash then check QFILT to see if
1467 * it needs escaping.
1468 */
1469 if (qfilt && (*p == '"' || *p == '\\'))
1470 dstr_putcn(sb->d, '\\', qfilt);
1471
1472 /* Output the character. */
1473 if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
1474 else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
1475 else dstr_putc(sb->d, *p);
1476 }
1477
1478 /* Move past the escaped character. Remember we started a word. */
1479 p++; f |= SF_WORD;
1480
1481 } else if ((f&SF_SPLIT) && *p == '"') {
1482 /* This is a double quote, and we're word splitting. We're definitely
1483 * in a word now. Toggle whether we're within quotes.
1484 */
1485
1486 f ^= SF_QUOT; f |= SF_WORD; p++;
1487
1488 } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
1489 /* This is a single quote, and we're word splitting but not within
1490 * double quotes. Find the matching end quote, and just output
1491 * everything between literally.
1492 */
1493
1494 p++; t = strchr(p, '\'');
1495 if (!t || t >= l) lose("%s:%u: missing `''", file, line);
1496 if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
1497 p = t + 1; f |= SF_WORD;
1498
1499 } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
1500 /* This is an end delimiter, and we're supposed to stop here. */
1501 break;
1502
1503 } else if ((f&SF_SUBST) && *p == '$') {
1504 /* This is a `$' and we're supposed to do substitution. */
1505
1506 /* The kind of substitution is determined by the next character. */
1507 p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
1508
1509 /* Prepare flags for a recursive substitution.
1510 *
1511 * Hide our quote state from the recursive call. If we're within a
1512 * word, then disable word-splitting.
1513 */
1514 ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
1515
1516 /* Now dispatch based on the following character. */
1517 switch (*p) {
1518
1519 case '?':
1520 /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1521
1522 /* Skip initial space. */
1523 p++; while (p < l && ISSPACE(*p)) p++;
1524
1525 /* Find the variable. */
1526 p = retrieve_varspec(p, l, sb, &var, file, line);
1527
1528 /* Skip whitespace again. */
1529 while (p < l && ISSPACE(*p)) p++;
1530
1531 /* Expect the opening `{'. */
1532 if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
1533 p++;
1534
1535 /* We'll process the parts recursively, but we need to come back
1536 * when we hit the appropriate delimiters, so arrange for that.
1537 */
1538 ff |= SF_SUBEXPR;
1539
1540 /* Process the consequent (skip if the variable wasn't found). */
1541 p = subst(p, l, sb, file, line, qfilt,
1542 ff | (var ? 0 : SF_SKIP));
1543
1544 /* If there's a `|' then process the alternative too (skip if the
1545 * variable /was/ found).
1546 */
1547 if (p < l && *p == '|')
1548 p = subst(p + 1, l, sb, file, line, qfilt,
1549 ff | (var ? SF_SKIP : 0));
1550
1551 /* We should now be past the closing `}'. */
1552 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1553 p++;
1554 break;
1555
1556 case '{':
1557 /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1558
1559 /* Skip initial whitespace. */
1560 p++; while (p < l && ISSPACE(*p)) p++;
1561
1562 /* Find the variable. */
1563 q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
1564
1565 /* Determine the filters to apply when substituting the variable
1566 * value.
1567 */
1568 subqfilt = qfilt;
1569 for (;;) {
1570
1571 /* Skip spaces again. */
1572 while (p < l && ISSPACE(*p)) p++;
1573
1574 /* If there's no `|' then there are no more filters, so stop. */
1575 if (p >= l || *p != '|') break;
1576
1577 /* Skip the `|' and more spaces. */
1578 p++; while (p < l && ISSPACE(*p)) p++;
1579
1580 /* Collect the filter name. */
1581 t = scan_name("filter", p, l, file, line);
1582
1583 /* Dispatch on the filter name. */
1584 if (t - p == 1 && *p == 'q')
1585 /* `q' -- quote for Lisp string.
1586 *
1587 * We're currently adding Q `\' characters before each naughty
1588 * character. But a backslash itself is naughty too, so that
1589 * makes Q + 1 naughty characters, each of which needs a
1590 * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1591 *
1592 * Calculate this here rather than at each point toothpicks
1593 * need to be deployed.
1594 */
1595
1596 subqfilt = 2*subqfilt + 1;
1597
1598 else if (t - p == 1 && *p == 'l')
1599 /* `l' -- convert to lowercase.
1600 *
1601 * If a case conversion is already set, then that will override
1602 * whatever we do here, so don't bother.
1603 */
1604
1605 { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
1606
1607 else if (t - p == 1 && *p == 'u')
1608 /* `u' -- convert to uppercase.
1609 *
1610 * If a case conversion is already set, then that will override
1611 * whatever we do here, so don't bother.
1612 */
1613 { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
1614
1615 else
1616 /* Something else we didn't understand. */
1617 lose("%s:%u: unknown filter `%.*s'",
1618 file, line, (int)(t - p), p);
1619
1620 /* Continue from after the filter name. */
1621 p = t;
1622 }
1623
1624 /* If we're not skipping, and we found a variable, then substitute
1625 * its value. This is the point where we need to be careful about
1626 * recursive expansion.
1627 */
1628 if (!(f&SF_SKIP) && var) {
1629 if (var->f&CF_EXPAND)
1630 lose("%s:%u: recursive expansion of variable `%.*s'",
1631 file, line, (int)(q1 - q0), q0);
1632 var->f |= CF_EXPAND;
1633 subst(var->val, var->val + var->n, sb,
1634 var->file, var->line, subqfilt,
1635 ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1636 var->f &= ~CF_EXPAND;
1637 }
1638
1639 /* If there's an alternative, then we need to process (or maybe
1640 * skip) it. Otherwise, we should complain if there was no
1641 * veriable, and we're not skipping.
1642 */
1643 if (p < l && *p == '?')
1644 p = subst(p + 1, l, sb, file, line, subqfilt,
1645 ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
1646 else if (!var && !(f&SF_SKIP))
1647 lose("%s:%u: unknown variable `%.*s'",
1648 file, line, (int)(q1 - q0), q0);
1649
1650 /* Expect a `}' here. (No need to skip spaces: we already did that
1651 * after scanning for filters, and either there was no alternative,
1652 * or we advanced to a delimiter character anyway.)
1653 */
1654 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1655 p++;
1656 break;
1657
1658 default:
1659 /* Something else. That's a shame. */
1660 lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
1661 }
1662
1663 /* Complain if we started out in word-splitting state, and therefore
1664 * have added a whole number of words to the output, but there's a
1665 * word-fragment stuck onto the end of this substitution.
1666 */
1667 if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
1668 !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
1669 lose("%s:%u: surprising word boundary "
1670 "after splicing substitution",
1671 file, line);
1672 }
1673
1674 else {
1675 /* Something else. Try to skip over this at high speed.
1676 *
1677 * This makes use of the table we set up earlier.
1678 */
1679
1680 n = strcspn(p, delimtab[f&SF_SPANMASK]);
1681 if (n > l - p) n = l - p;
1682 if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
1683 p += n; f |= SF_WORD;
1684 }
1685 }
1686
1687 done:
1688 /* Sort out the wreckage. */
1689
1690 /* If we're still within quotes then something has gone wrong. */
1691 if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
1692
1693 /* If we're within a word, and should be splitting, then commit the word to
1694 * the output list.
1695 */
1696 if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
1697 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1698 dstr_reset(sb->d);
1699 }
1700
1701 /* And, with that, we're done. */
1702 return (p);
1703 }
1704
1705 /* Expand substitutions in a string.
1706 *
1707 * Expand the null-terminated string P relative to the HOME section, using
1708 * configuration CONFIG, and appending the result to dynamic string D. Blame
1709 * WHAT in any error messages.
1710 */
1711 void config_subst_string(struct config *config, struct config_section *home,
1712 const char *what, const char *p, struct dstr *d)
1713 {
1714 struct subst sb;
1715
1716 sb.config = config; sb.home = home; sb.d = d;
1717 subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
1718 dstr_putz(d);
1719 }
1720
1721 /* Expand substitutions in a string.
1722 *
1723 * Expand the null-terminated string P relative to the HOME section, using
1724 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1725 * string. Blame WHAT in any error messages.
1726 */
1727 char *config_subst_string_alloc(struct config *config,
1728 struct config_section *home,
1729 const char *what, const char *p)
1730 {
1731 struct dstr d = DSTR_INIT;
1732 char *q;
1733
1734 config_subst_string(config, home, what, p, &d);
1735 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1736 }
1737
1738 /* Expand substitutions in a variable.
1739 *
1740 * Expand the value of the variable VAR relative to the HOME section, using
1741 * configuration CONFIG, appending the result to dynamic string D.
1742 */
1743 void config_subst_var(struct config *config, struct config_section *home,
1744 struct config_var *var, struct dstr *d)
1745 {
1746 struct subst sb;
1747
1748 sb.config = config; sb.home = home; sb.d = d;
1749 var->f |= CF_EXPAND;
1750 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1751 SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1752 var->f &= ~CF_EXPAND;
1753 dstr_putz(d);
1754 }
1755
1756 /* Expand substitutions in a variable.
1757 *
1758 * Expand the value of the variable VAR relative to the HOME section, using
1759 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1760 * string.
1761 */
1762 char *config_subst_var_alloc(struct config *config,
1763 struct config_section *home,
1764 struct config_var *var)
1765 {
1766 struct dstr d = DSTR_INIT;
1767 char *q;
1768
1769 config_subst_var(config, home, var, &d);
1770 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1771 }
1772
1773 /* Expand substitutions in a variable and split into words.
1774 *
1775 * Expand and word-split the value of the variable VAR relative to the HOME
1776 * section, using configuration CONFIG, appending the resulting words into
1777 * the vector AV.
1778 */
1779 void config_subst_split_var(struct config *config,
1780 struct config_section *home,
1781 struct config_var *var, struct argv *av)
1782 {
1783 struct dstr d = DSTR_INIT;
1784 struct subst sb;
1785
1786 sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
1787 var->f |= CF_EXPAND;
1788 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1789 SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1790 var->f &= ~CF_EXPAND;
1791 dstr_release(&d);
1792 }
1793
1794 /*----- That's all, folks -------------------------------------------------*/