lib.c: Fix some commentary blunders.
[runlisp] / lib.c
1 /* -*-c-*-
2 *
3 * Common definitions for `runlisp'
4 *
5 * (c) 2020 Mark Wooding
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
11 *
12 * Runlisp is free software: you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
16 *
17 * Runlisp is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with Runlisp. If not, see <https://www.gnu.org/licenses/>.
24 */
25
26 /*----- Header files ------------------------------------------------------*/
27
28 #include "config.h"
29
30 #include <assert.h>
31
32 #include <ctype.h>
33 #include <errno.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 #include <unistd.h>
40
41 #include "lib.h"
42
43 /*----- Diagnostic utilities ----------------------------------------------*/
44
45 const char *progname = "???";
46 /* Our program name, for use in error messages. */
47
48 /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
49 void set_progname(const char *prog)
50 {
51 const char *p;
52
53 p = strrchr(prog, '/');
54 progname = p ? p + 1 : prog;
55 }
56
57 /* Report an error or warning in Unix style, given a captured argument
58 * cursor.
59 */
60 void vmoan(const char *msg, va_list ap)
61 {
62 fprintf(stderr, "%s: ", progname);
63 vfprintf(stderr, msg, ap);
64 fputc('\n', stderr);
65 }
66
67 /* Issue a warning message. */
68 void moan(const char *msg, ...)
69 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
70
71 /* Issue a fatal error message and exit unsuccessfully. */
72 void lose(const char *msg, ...)
73 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
74
75 /*----- Memory allocation -------------------------------------------------*/
76
77 /* Allocate and return a pointer to N bytes, or report a fatal error.
78 *
79 * Release the pointer using `free' as usual. If N is zero, returns null
80 * (but you are not expected to check for this).
81 */
82 void *xmalloc(size_t n)
83 {
84 void *p;
85
86 if (!n) return (0);
87 p = malloc(n); if (!p) lose("failed to allocate memory");
88 return (p);
89 }
90
91 /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
92 *
93 * The block might (and probably will) move, so it returns the new address.
94 * If N is zero, then the block is freed (if necessary) and a null pointer
95 * returned; otherwise, if P is null then a fresh block is allocated. If
96 * allocation fails, then a fatal error is reported.
97 */
98 void *xrealloc(void *p, size_t n)
99 {
100 if (!n) { free(p); return (0); }
101 else if (!p) return (xmalloc(n));
102 p = realloc(p, n); if (!p) lose("failed to allocate memory");
103 return (p);
104 }
105
106 /* Allocate and return a copy of the N-byte string starting at P.
107 *
108 * The new string is null-terminated, though P need not be. If allocation
109 * fails, then a fatal error is reported.
110 */
111 char *xstrndup(const char *p, size_t n)
112 {
113 char *q = xmalloc(n + 1);
114
115 memcpy(q, p, n); q[n] = 0;
116 return (q);
117 }
118
119 /* Allocate and return a copy of the null-terminated string starting at P.
120 *
121 * If allocation fails, then a fatal error is reported.
122 */
123 char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
124
125 /*----- Dynamic strings ---------------------------------------------------*/
126
127 /* Initialize the string D.
128 *
129 * Usually you'd use the static initializer `DSTR_INIT'.
130 */
131 void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
132
133 /* Reset string D so it's empty again. */
134 void dstr_reset(struct dstr *d) { d->len = 0; }
135
136 /* Ensure that D has at least N unused bytes available. */
137 void dstr_ensure(struct dstr *d, size_t n)
138 {
139 size_t need = d->len + n, newsz;
140
141 if (need <= d->sz) return;
142 newsz = d->sz ? 2*d->sz : 16;
143 while (newsz < need) newsz *= 2;
144 d->p = xrealloc(d->p, newsz); d->sz = newsz;
145 }
146
147 /* Release the memory held by D.
148 *
149 * It must be reinitialized (e.g., by `dstr_init') before it can be used
150 * again.
151 */
152 void dstr_release(struct dstr *d) { free(d->p); }
153
154 /* Append the N-byte string at P to D.
155 *
156 * P need not be null-terminated. D will not be null-terminated
157 * afterwards.
158 */
159 void dstr_putm(struct dstr *d, const void *p, size_t n)
160 { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
161
162 /* Append the null-terminated string P to D.
163 *
164 * D /is/ guaranteed to be null-terminated after this.
165 */
166 void dstr_puts(struct dstr *d, const char *p)
167 {
168 size_t n = strlen(p);
169
170 dstr_ensure(d, n + 1);
171 memcpy(d->p + d->len, p, n + 1);
172 d->len += n;
173 }
174
175 /* Append the single character CH to D.
176 *
177 * D will not be null-terminated afterwards.
178 */
179 void dstr_putc(struct dstr *d, int ch)
180 { dstr_ensure(d, 1); d->p[d->len++] = ch; }
181
182 /* Append N copies of the character CH to D.
183 *
184 * D will not be null-terminated afterwards.
185 */
186 void dstr_putcn(struct dstr *d, int ch, size_t n)
187 { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
188
189 /* Null-terminate the string D.
190 *
191 * This doesn't change the length of D. If further stuff is appended then
192 * the null terminator will be overwritten.
193 */
194 void dstr_putz(struct dstr *d)
195 { dstr_ensure(d, 1); d->p[d->len] = 0; }
196
197 /* Append stuff to D, determined by printf(3) format string P and argument
198 * tail AP.
199 *
200 * D will not be null-terminated afterwards.
201 */
202 void dstr_vputf(struct dstr *d, const char *p, va_list ap)
203 {
204 va_list ap2;
205 size_t r;
206 int n;
207
208 r = d->sz - d->len;
209 va_copy(ap2, ap);
210 n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
211 va_end(ap2);
212 if (n >= r) {
213 dstr_ensure(d, n + 1); r = d->sz - d->len;
214 n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
215 }
216 d->len += n;
217 }
218
219 /* Append stuff to D, determined by printf(3) format string P and arguments.
220 *
221 * D will not be null-terminated afterwards.
222 */
223 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
224 { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
225
226 /* Append the next input line from FP to D.
227 *
228 * Return 0 on success, or -1 if reading immediately fails or encounters
229 * end-of-file (call ferror(3) to distinguish). Any trailing newline is
230 * discarded: it is not possible to determine whether the last line was ended
231 * with a newline. D is guaranteed to be null-terminated afterwards.
232 */
233 int dstr_readline(struct dstr *d, FILE *fp)
234 {
235 size_t n;
236 int any = 0;
237
238 for (;;) {
239 dstr_ensure(d, 2);
240 if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
241 n = strlen(d->p + d->len); assert(n > 0); any = 1;
242 d->len += n;
243 if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
244 }
245
246 if (!any) return (-1);
247 else return (0);
248 }
249
250 /*----- Dynamic vectors of strings ----------------------------------------*/
251
252 /* Initialize the vector AV.
253 *
254 * Usually you'd use the static initializer `ARGV_INIT'.
255 */
256 void argv_init(struct argv *av)
257 { av->v = 0; av->o = av->n = av->sz = 0; }
258
259 /* Reset the vector AV so that it's empty again. */
260 void argv_reset(struct argv *av) { av->n = 0; }
261
262 /* Ensure that AV has at least N unused slots at the end. */
263 void argv_ensure(struct argv *av, size_t n)
264 {
265 size_t need = av->n + av->o + n, newsz;
266
267 if (need <= av->sz) return;
268 newsz = av->sz ? 2*av->sz : 8;
269 while (newsz < need) newsz *= 2;
270 av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
271 av->sz = newsz;
272 }
273
274 /* Ensure that AV has at least N unused slots at the /start/. */
275 void argv_ensure_offset(struct argv *av, size_t n)
276 {
277 size_t newoff;
278
279 /* Stupid version. We won't, in practice, be prepending lots of stuff, so
280 * avoid the extra bookkeeping involved in trying to make a double-ended
281 * extendable array asymptotically efficient.
282 */
283 if (av->o >= n) return;
284 newoff = 16;
285 while (newoff < n) newoff *= 2;
286 argv_ensure(av, newoff - av->o);
287 memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
288 av->v += newoff - av->o; av->o = newoff;
289 }
290
291 /* Release the memory held by AV.
292 *
293 * It must be reinitialized (e.g., by `argv_init') before it can be used
294 * again.
295 */
296 void argv_release(struct argv *av) { free(av->v - av->o); }
297
298 /* Append the pointer P to AV. */
299 void argv_append(struct argv *av, char *p)
300 { argv_ensure(av, 1); av->v[av->n++] = p; }
301
302 /* Append a null pointer to AV, without extending the vactor length.
303 *
304 * The null pointer will be overwritten when the next string is appended.
305 */
306 void argv_appendz(struct argv *av)
307 { argv_ensure(av, 1); av->v[av->n] = 0; }
308
309 /* Append a N-element vector V of pointers to AV. */
310 void argv_appendn(struct argv *av, char *const *v, size_t n)
311 {
312 argv_ensure(av, n);
313 memcpy(av->v + av->n, v, n*sizeof(const char *));
314 av->n += n;
315 }
316
317 /* Append the variable-length vector BV to AV. */
318 void argv_appendav(struct argv *av, const struct argv *bv)
319 { argv_appendn(av, bv->v, bv->n); }
320
321 /* Append the pointers from a variable-length argument list AP to AV.
322 *
323 * The list is terminated by a null pointer.
324 */
325 void argv_appendv(struct argv *av, va_list ap)
326 {
327 char *p;
328 for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
329 }
330
331 /* Append the argument pointers, terminated by a null pointer, to AV. */
332 void argv_appendl(struct argv *av, ...)
333 { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
334
335 /* Prepend the pointer P to AV. */
336 void argv_prepend(struct argv *av, char *p)
337 { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
338
339 /* Prepend a N-element vector V of pointers to AV. */
340 void argv_prependn(struct argv *av, char *const *v, size_t n)
341 {
342 argv_ensure_offset(av, n);
343 av->o -= n; av->v -= n; av->n += n;
344 memcpy(av->v, v, n*sizeof(const char *));
345 }
346
347 /* Prepend the variable-length vector BV to AV. */
348 void argv_prependav(struct argv *av, const struct argv *bv)
349 { argv_prependn(av, bv->v, bv->n); }
350
351 /* Prepend the pointers from a variable-length argument list AP to AV.
352 *
353 * The list is terminated by a null pointer.
354 */
355 void argv_prependv(struct argv *av, va_list ap)
356 {
357 char *p, **v;
358 size_t n = 0;
359
360 for (;;) {
361 p = va_arg(ap, char *); if (!p) break;
362 argv_prepend(av, p); n++;
363 }
364 v = av->v;
365 while (n >= 2) {
366 p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
367 v++; n -= 2;
368 }
369 }
370
371 /* Prepend the argument pointers, terminated by a null pointer, to AV. */
372 void argv_prependl(struct argv *av, ...)
373 { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
374
375 /*----- Treaps ------------------------------------------------------------*/
376
377 /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
378 * string B in a lexicographic ordering.
379 *
380 * All comparison of keys is handled by this function.
381 */
382 static int str_lt(const char *a, size_t an, const char *b, size_t bn)
383 {
384 /* This is a little subtle. We need only compare the first N bytes of the
385 * strings, where N is the length of the shorter string. If this
386 * distinguishes the two strings, then we're clearly done. Otherwise, if
387 * the prefixes are equal then the shorter string is the smaller one. If
388 * the two strings are the same length, then they're equal.
389 *
390 * Hence, if A is the strictly shorter string, then A precedes B if A
391 * precedes or matches the prefix of B; otherwise A only precedes B if A
392 * strictly precedes the prefix of B.
393 */
394 if (an < bn) return (MEMCMP(a, <=, b, an));
395 else return (MEMCMP(a, <, b, bn));
396 }
397
398 /* Initialize the treap T.
399 *
400 * Usually you'd use the static initializer `TREAP_INIT'.
401 */
402 void treap_init(struct treap *t) { t->root = 0; }
403
404 /* Look up the KN-byte key K in the treap T.
405 *
406 * Return a pointer to the matching node if one was found, or null otherwise.
407 */
408 void *treap_lookup(const struct treap *t, const char *k, size_t kn)
409 {
410 struct treap_node *n = t->root, *candidate = 0;
411
412 /* This is a simple prototype for some of the search loops we'll encounter
413 * later. Notice that we use a strict one-sided comparison, rather than
414 * the more conventional two-sided comparison.
415 *
416 * The main loop will find the largest key not greater than K.
417 */
418 while (n)
419 /* Compare the node's key against our key. If the node is too large,
420 * then we ignore it and move left. Otherwise remember this node for
421 * later, and move right to see if we can find a better, larger node.
422 */
423
424 if (str_lt(k, kn, n->k, n->kn)) n = n->left;
425 else { candidate = n; n = n->right; }
426
427 /* If the candidate node is less than our key then we failed. Otherwise,
428 * by trichotomy, we have found the correct node.
429 */
430 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
431 return (candidate);
432 }
433
434 /* Look up the KN-byte K in the treap T, recording a path in P.
435 *
436 * This is similar to `treap_lookup', in that it returns the requested node
437 * if it already exists, or null otherwise, but it also records in P
438 * information to be used by `treap_insert' to insert a new node with the
439 * given key if it's not there already.
440 */
441 void *treap_probe(struct treap *t, const char *k, size_t kn,
442 struct treap_path *p)
443 {
444 struct treap_node **nn = &t->root, *candidate = 0;
445 unsigned i = 0;
446
447 /* This walk is similar to `treap_lookup' above, except that we also record
448 * the address of each node pointer we visit along the way.
449 */
450 for (;;) {
451 assert(i < TREAP_PATHMAX); p->path[i++] = nn;
452 if (!*nn) break;
453 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
454 else { candidate = *nn; nn = &(*nn)->right; }
455 }
456 p->nsteps = i;
457
458 /* Check to see whether we found the right node. */
459 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
460 return (candidate);
461 }
462
463 /* Insert a new node N into T, associating it with the KN-byte key K.
464 *
465 * Use the path data P, from `treap_probe', to help with insertion.
466 */
467 void treap_insert(struct treap *t, const struct treap_path *p,
468 struct treap_node *n, const char *k, size_t kn)
469 {
470 size_t i = p->nsteps;
471 struct treap_node **nn, **uu, *u;
472 unsigned wt;
473
474 /* Fill in the node structure. */
475 n->k = xstrndup(k, kn); n->kn = kn;
476 n->wt = wt = rand(); n->left = n->right = 0;
477
478 /* Prepare for the insertion.
479 *
480 * The path actually points to each of the links traversed when searching
481 * for the node, starting with the `root' pointer, then the `left' or
482 * `right' pointer of the root node, and so on; `nsteps' will always be
483 * nonzero, since the path will always pass through the root, and the final
484 * step, `path->path[path->nsteps - 1]' will always be the address of a
485 * null pointer onto which the freshly inserted node could be hooked in
486 * order to satisfy the binary-search-tree ordering. (Of course, this will
487 * likely /not/ satisfy the heap condition, so more work needs to be done.)
488 *
489 * Throughout, NN is our current candidate for where to attach the node N.
490 * As the loop progresses, NN will ascend to links further up the tree, and
491 * N will be adjusted to accumulate pieces of the existing tree structure.
492 * We'll stop when we find that the parent node's weight is larger than our
493 * new node's weight, at which point we can just set *NN = N; or if we run
494 * out of steps in the path, in which case *NN is the root pointer.
495 */
496 assert(i); nn = p->path[--i];
497 while (i--) {
498
499 /* Collect the next step in the path, and get the pointer to the node. */
500 uu = p->path[i]; u = *uu;
501
502 /* If this node's weight is higher, then we've found the right level and
503 * we can stop.
504 */
505 if (wt <= u->wt) break;
506
507 /* The node U is lighter than our new node N, so we must rotate in order
508 * to fix things. If we were currently planning to hook N as the left
509 * subtree of U, then we rotate like this:
510 *
511 * | |
512 * U (N)
513 * / \ / \
514 * (N) Z ---> X U
515 * / \ / \
516 * X Y Y Z
517 *
518 * On the other hand, if we were planning to hook N as the right subtree
519 * of U, then we do the opposite rotation:
520 *
521 * | |
522 * U (N)
523 * / \ / \
524 * X (N) ---> U Z
525 * / \ / \
526 * Y Z X Y
527 *
528 * These transformations clearly preserve the ordering of nodes in the
529 * binary search tree, and satisfy the heap condition in the subtree
530 * headed by N.
531 */
532 if (nn == &u->left) { u->left = n->right; n->right = u; }
533 else { u->right = n->left; n->left = u; }
534
535 /* And this arrangement must be attached to UU, or some higher attachment
536 * point. The subtree satisfies the heap condition, and can be attached
537 * safely at the selected place.
538 */
539 nn = uu;
540 }
541
542 /* We've found the right spot. Hook the accumulated subtree into place. */
543 *nn = n;
544 }
545
546 /* Remove the node with the KN-byte K from T.
547 *
548 * Return the address of the node we removed, or null if it couldn't be
549 * found.
550 */
551 void *treap_remove(struct treap *t, const char *k, size_t kn)
552 {
553 struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
554
555 /* Search for the matching node, but keep track of the address of the link
556 * which points to our target node.
557 */
558 while (*nn)
559 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
560 else { candidate = nn; nn = &(*nn)->right; }
561
562 /* If this isn't the right node then give up. */
563 if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
564 return (0);
565
566 /* Now we need to disentangle the node from the tree. This is essentially
567 * the reverse of insertion: we pretend that this node is suddenly very
568 * light, and mutate the tree so as to restore the heap condition until
569 * eventually our node is a leaf and can be cut off without trouble.
570 *
571 * Throughout, the link *NN notionally points to N, but we don't actually
572 * update it until we're certain what value it should finally take.
573 */
574 nn = candidate; n = *nn; l = n->left; r = n->right;
575 for (;;)
576
577 /* If its left subtree is empty then we can replace our node by its right
578 * subtree and be done. Similarly, if the right subtree is empty then we
579 * replace the node by its left subtree.
580 *
581 * | | | |
582 * (N) ---> R ; (N) ---> L
583 * / \ / \
584 * * R L *
585 */
586 if (!l) { *nn = r; break; }
587 else if (!r) { *nn = l; break; }
588
589 /* Otherwise we need to rotate the pointers so that the heavier of the
590 * two children takes the place of our node; thus we have either
591 *
592 * | |
593 * (N) L
594 * / \ / \
595 * L R ---> X (N)
596 * / \ / \
597 * X Y Y R
598 *
599 * or
600 *
601 * | |
602 * (N) R
603 * / \ / \
604 * L R ---> (N) Y
605 * / \ / \
606 * X Y L X
607 *
608 * Again, these transformations clearly preserve the ordering of nodes in
609 * the binary search tree, and the heap condition.
610 */
611 else if (l->wt > r->wt)
612 { *nn = l; nn = &l->right; l = n->left = l->right; }
613 else
614 { *nn = r; nn = &r->left; r = n->right = r->left; }
615
616 /* Release the key buffer, and return the node that we've now detached. */
617 free(n->k); return (n);
618 }
619
620 /* Initialize an iterator I over T's nodes. */
621 void treap_start_iter(struct treap *t, struct treap_iter *i)
622 {
623 struct treap_node *n = t->root;
624 unsigned sp = 0;
625
626 /* The `stack' in the iterator structure is an empty ascending stack of
627 * nodes which have been encountered, and their left subtrees investigated,
628 * but not yet visited by the iteration.
629 *
630 * Iteration begins by stacking the root node, its left child, and so on,
631 * At the end of this, the topmost entry on the stack is the least node of
632 * the tree, followed by its parent, grandparent, and so on up to the root.
633 */
634 while (n) {
635 assert(sp < TREAP_PATHMAX);
636 i->stack[sp++] = n; n = n->left;
637 }
638 i->sp = sp;
639 }
640
641 /* Return the next node from I, in ascending order by key.
642 *
643 * If there are no more nodes, then return null.
644 */
645 void *treap_next(struct treap_iter *i)
646 {
647 struct treap_node *n, *o;
648 unsigned sp = i->sp;
649
650 /* We say that a node is /visited/ once it's been returned by this
651 * iterator. To traverse a tree in order, then, we traverse its left
652 * subtree, visit the tree root, and traverse its right subtree -- which is
653 * a fine recursive definition, but we need a nonrecursive implementation.
654 *
655 * As is usual in this kind of essential structural recursion, we maintain
656 * a stack. The invariant that we'll maintain is as follows.
657 *
658 * 1. If the stack is empty, then all nodes have been visited.
659 *
660 * 2, If the stack is nonempty then the topmost entry on the stack is the
661 * least node which has not yet been visited -- and therefore is the
662 * next node to visit.
663 *
664 * 3. The earlier entries in the stack are, in (top to bottom) order,
665 * those of the topmost node's parent, grandparent, etc., up to the
666 * root, which have not yet been visited. More specifically, a node
667 * appears in the stack if and only if some node in its left subtree
668 * is nearer the top of the stack.
669 *
670 * When we initialized the iterator state (in `treap_start_iter' above), we
671 * traced a path to the leftmost leaf, stacking the root, its left-hand
672 * child, and so on. The leftmost leaf is clearly the first node to be
673 * visited, and its entire ancestry is on the stack since none of these
674 * nodes has yet been visited. (If the tree is empty, then we have done
675 * nothing, the stack is empty, and there are no nodes to visit.) This
676 * establishes the base case for the induction.
677 */
678
679 /* So, if the stack is empty now, then (1) all of the nodes have been
680 * visited and there's nothing left to do. Return null.
681 */
682 if (!sp) return (0);
683
684 /* It's clear that, if we pop the topmost element of the stack, visit it,
685 * and arrange to reestablish the invariant, then we'll visit the nodes in
686 * the correct order, pretty much by definition.
687 *
688 * So, pop a node off the stack. This is the node we shall return. But
689 * before we can do that, we must reestablish the above invariant.
690 * Firstly, the current node is removed from the stack, because we're about
691 * to visit it, and visited nodes don't belong on the stack. Then there
692 * are two cases to consider.
693 *
694 * * If the current node's right subtree is not empty, then the next node
695 * to be visited is the leftmost node in that subtree. All of the
696 * nodes on the stack are ancestors of the current node, and the right
697 * subtree consists of its descendants, so none of them are already on
698 * the stack; and they're all greater than the current node, and
699 * therefore haven't been visited. Therefore, we must push the current
700 * node's right child, its /left/ child, and so on, proceeding
701 * leftwards until we fall off the bottom of the tree.
702 *
703 * * Otherwise, we've finished traversing some subtree. Either we are
704 * now done, or (3) we have just finished traversing the left subtree
705 * of the next topmost item on the stack. This must therefore be the
706 * next node to visit. The rest of the stack is already correct.
707 */
708 n = i->stack[--sp];
709 o = n->right;
710 while (o) {
711 assert(sp < TREAP_PATHMAX);
712 i->stack[sp++] = o; o = o->left;
713 }
714 i->sp = sp;
715 return (n);
716 }
717
718 /* Recursively check the subtree headed by N.
719 *
720 * No node should have weight greater than MAXWT, to satisfy the heap
721 * condition; if LO is not null, then all node keys should be strictly
722 * greater than LO, and, similarly, if HI is not null, then all keys should
723 * be strictly smaller than HI.
724 */
725 static void check_subtree(struct treap_node *n, unsigned maxwt,
726 const char *klo, const char *khi)
727 {
728 /* Check the heap condition. */
729 assert(n->wt <= maxwt);
730
731 /* Check that the key is in bounds. (Use `strcmp' here to ensure that our
732 * own `str_lt' is working correctly.)
733 */
734 if (klo) assert(STRCMP(n->k, >, klo));
735 if (khi) assert(STRCMP(n->k, <, khi));
736
737 /* Check the left subtree. Node weights must be bounded above by our own
738 * weight. And every key in the left subtree must be smaller than our
739 * current key. We propagate the lower bound.
740 */
741 if (n->left) check_subtree(n->left, n->wt, klo, n->k);
742
743 /* Finally, check the right subtree. This time, every key must be larger
744 * than our key, and we propagate the upper bound.
745 */
746 if (n->right) check_subtree(n->right, n->wt, n->k, khi);
747 }
748
749 /* Check the treap structure rules for T. */
750 void treap_check(struct treap *t)
751 { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
752
753 /* Recursively dump the subtree headed by N, indenting the output lines by
754 * IND spaces.
755 */
756 static void dump_node(struct treap_node *n, int ind)
757 {
758 if (n->left) dump_node(n->left, ind + 1);
759 printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
760 if (n->right) dump_node(n->right, ind + 1);
761 }
762
763 /* Dump the treap T to standard output, for debugging purposes. */
764 void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
765
766 /*----- Configuration file parsing ----------------------------------------*/
767
768 #ifndef DECL_ENVIRON
769 extern char **environ;
770 #endif
771
772 /* Advance P past a syntactically valid name, but no further than L.
773 *
774 * Return the new pointer. If no name is found, report an error, blaming
775 * FILE and LINE; WHAT is an adjective for the kind of name that was
776 * expected.
777 */
778 static const char *scan_name(const char *what,
779 const char *p, const char *l,
780 const char *file, unsigned line)
781 {
782 const char *q = p;
783
784 while (q < l &&
785 (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
786 *q == '*' || *q == '+' || *q == '%' || *q == '@'))
787 q++;
788 if (q == p) lose("%s:%u: expected %s name", file, line, what);
789 return (q);
790 }
791
792 /* Initialize the configuration state CONF.
793 *
794 * Usually you'd use the static initializer `CONFIG_INIT'.
795 */
796 void config_init(struct config *conf)
797 { treap_init(&conf->sections); }
798
799 /* Find and return the section with null-terminated NAME in CONF.
800 *
801 * If no section is found, the behaviour depends on whether `CF_CREAT' is set
802 * in F: if so, an empty section is created and returned; otherwise, a null
803 * pointer is returned.
804 */
805 struct config_section *config_find_section(struct config *conf, unsigned f,
806 const char *name)
807 { return (config_find_section_n(conf, f, name, strlen(name))); }
808
809 /* Find and return the section with the SZ-byte NAME in CONF.
810 *
811 * This works like `config_find_section', but with an explicit length for the
812 * NAME rather than null-termination.
813 */
814 struct config_section *config_find_section_n(struct config *conf, unsigned f,
815 const char *name, size_t sz)
816 {
817 struct config_section *sect;
818 struct treap_path path;
819
820 if (!(f&CF_CREAT))
821 sect = treap_lookup(&conf->sections, name, sz);
822 else {
823 sect = treap_probe(&conf->sections, name, sz, &path);
824 if (!sect) {
825 sect = xmalloc(sizeof(*sect));
826 if (!conf->head) conf->tail = &conf->head;
827 sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
828 sect->parents = 0; sect->nparents = SIZE_MAX;
829 treap_init(&sect->vars); treap_init(&sect->cache);
830 treap_insert(&conf->sections, &path, &sect->_node, name, sz);
831 config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
832 }
833 }
834 return (sect);
835 }
836
837 /* Set the fallback section for CONF to be SECT.
838 *
839 * That is, if a section has no explicit parents, then by default it will
840 * have a single parent which is SECT. If SECT is null then there is no
841 * fallback section, and sections which don't have explicitly specified
842 * parents have no parents at all. (This is the default situation.)
843 */
844 void config_set_fallback(struct config *conf, struct config_section *sect)
845 { conf->fallback = sect; }
846
847 /* Arrange that SECT has PARENT as its single parent section.
848 *
849 * If PARENT is null, then arrange that SECT has no parents at all. In
850 * either case, any `@parents' setting will be ignored.
851 */
852 void config_set_parent(struct config_section *sect,
853 struct config_section *parent)
854 {
855 if (!parent)
856 sect->nparents = 0;
857 else {
858 sect->parents = xmalloc(sizeof(*sect->parents));
859 sect->parents[0] = parent; sect->nparents = 1;
860 }
861 }
862
863 /* Initialize I to iterate over the sections defined in CONF. */
864 void config_start_section_iter(struct config *conf,
865 struct config_section_iter *i)
866 { i->sect = conf->head; }
867
868 /* Return the next section from I, in order of creation.
869 *
870 * If there are no more sections, then return null.
871 */
872 struct config_section *config_next_section(struct config_section_iter *i)
873 {
874 struct config_section *sect;
875
876 sect = i->sect;
877 if (sect) i->sect = sect->next;
878 return (sect);
879 }
880
881 /* Initialize the `parents' links of SECT, if they aren't set up already.
882 *
883 * If SECT contains a `@parents' setting then parse it to determine the
884 * parents; otherwise use CONF's fallbeck section, as established by
885 * `config_set_fallback'.
886 */
887 static void set_config_section_parents(struct config *conf,
888 struct config_section *sect)
889 {
890 struct config_section *parent;
891 struct config_var *var;
892 const char *file; unsigned line;
893 size_t i, n;
894 char *p, *q, *l;
895 struct argv av = ARGV_INIT;
896
897 /* If the section already has parents established then there's nothing to
898 * do.
899 */
900 if (sect->nparents != SIZE_MAX) return;
901
902 /* Look up `@parents', without recursion! */
903 var = treap_lookup(&sect->vars, "@parents", 8);
904 if (!var) {
905 /* No explicit setting: use the fallback setting. */
906
907 if (!conf->fallback || conf->fallback == sect)
908 sect->nparents = 0;
909 else {
910 sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
911 sect->parents[0] = conf->fallback;
912 }
913 } else {
914 /* Found a `@parents' list: parse it and set the parents list. */
915
916 file = var->file; line = var->line; if (!file) file = "<internal>";
917
918 /* We do this in two phases. First, we parse out the section names, and
919 * record start/limit pointer pairs in `av'.
920 */
921 p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
922 while (*p) {
923 q = p;
924 p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
925 argv_append(&av, q); argv_append(&av, p);
926 while (p < l && ISSPACE(*p)) p++;
927 if (p >= l) break;
928 if (*p == ',') do p++; while (ISSPACE(*p));
929 }
930
931 /* Now that we've finished parsing, we know how many parents we're going
932 * to have, so we can allocate the `parents' vector and fill it in.
933 */
934 sect->nparents = av.n/2;
935 sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
936 for (i = 0; i < av.n; i += 2) {
937 n = av.v[i + 1] - av.v[i];
938 parent = config_find_section_n(conf, 0, av.v[i], n);
939 if (!parent)
940 lose("%s:%u: unknown parent section `%.*s'",
941 file, line, (int)n, av.v[i]);
942 sect->parents[i/2] = parent;
943 }
944 }
945
946 /* All done. */
947 argv_release(&av);
948 }
949
950 /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
951 *
952 * If successful, return a pointer to the variable; otherwise return null.
953 * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
954 * errors.
955 */
956 struct config_var *search_recursive(struct config *conf,
957 struct config_section *sect,
958 const char *name, size_t sz)
959 {
960 struct config_cache_entry *cache;
961 struct treap_path path;
962 struct config_var *var, *v;
963 size_t i, j = j;
964
965 /* If the variable is defined locally then we can just return it. */
966 var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
967
968 /* If we have no parents then there's no way we can find it. */
969 set_config_section_parents(conf, sect);
970 if (!sect->parents) return (0);
971
972 /* Otherwise we must visit the section's parents. We can avoid paying for
973 * this on every lookup by using a cache. If there's already an entry for
974 * this variable then we can return the result immediately (note that we
975 * cache both positive and negative outcomes). Otherwise we create a new
976 * cache entry, do the full recursive search, and fill in the result when
977 * we're done.
978 *
979 * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
980 * new cache entry when it's first created, and clear it when we fill in
981 * the result: if we encounter an open cache entry again, we know that
982 * we've found a cycle.
983 */
984 cache = treap_probe(&sect->cache, name, sz, &path);
985 if (!cache) {
986 cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
987 treap_insert(&sect->cache, &path, &cache->_node, name, sz);
988 } else if (cache->f&CF_OPEN)
989 lose("inheritance cycle through section `%s'",
990 CONFIG_SECTION_NAME(sect));
991 else
992 return (cache->var);
993
994 /* Recursively search in each parent. We insist that all parents that find
995 * a variable find the same binding; otherwise we declare ambiguous
996 * inheritance.
997 */
998 for (i = 0; i < sect->nparents; i++) {
999 v = search_recursive(conf, sect->parents[i], name, sz);
1000 if (!v);
1001 else if (!var) { var = v; j = i; }
1002 else if (var != v)
1003 lose("section `%s' inherits variable `%s' ambiguously "
1004 "via `%s' and `%s'",
1005 CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
1006 CONFIG_SECTION_NAME(sect->parents[j]),
1007 CONFIG_SECTION_NAME(sect->parents[i]));
1008 }
1009
1010 /* All done: fill the cache entry in, clear the open flag, and return the
1011 * result.
1012 */
1013 cache->var = var; cache->f &= ~CF_OPEN;
1014 return (var);
1015 }
1016
1017 /* Find and return the variable with null-terminated NAME in SECT.
1018 *
1019 * If `CF_INHERIT' is set in F, then the function searches the section's
1020 * parents recursively; otherwise, it only checks to see whether the variable
1021 * is set directly in SECT.
1022 *
1023 * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1024 * set in F: if so, an empty variable is created and returned; otherwise, a
1025 * null pointer is returned.
1026 *
1027 * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1028 */
1029 struct config_var *config_find_var(struct config *conf,
1030 struct config_section *sect,
1031 unsigned f, const char *name)
1032 { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
1033
1034 /* Find and return the variable with the given SZ-byte NAME in SECT.
1035 *
1036 * This works like `config_find_var', but with an explicit length for the
1037 * NAME rather than null-termination.
1038 */
1039 struct config_var *config_find_var_n(struct config *conf,
1040 struct config_section *sect,
1041 unsigned f, const char *name, size_t sz)
1042 {
1043 struct config_var *var;
1044 struct treap_path path;
1045
1046 if (f&CF_INHERIT)
1047 var = search_recursive(conf, sect, name, sz);
1048 else if (!(f&CF_CREAT))
1049 var = treap_lookup(&sect->vars, name, sz);
1050 else {
1051 var = treap_probe(&sect->vars, name, sz, &path);
1052 if (!var) {
1053 var = xmalloc(sizeof(*var));
1054 var->val = 0; var->file = 0; var->f = 0; var->line = 1;
1055 treap_insert(&sect->vars, &path, &var->_node, name, sz);
1056 }
1057 }
1058 return (var);
1059 }
1060
1061 /* Set variable NAME to VALUE in SECT, with associated flags F.
1062 *
1063 * The names are null-terminated. The flags are variable flags: see `struct
1064 * config_var' for details. Returns the variable.
1065 *
1066 * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1067 * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1068 */
1069 struct config_var *config_set_var(struct config *conf,
1070 struct config_section *sect,
1071 unsigned f,
1072 const char *name, const char *value)
1073 {
1074 return (config_set_var_n(conf, sect, f,
1075 name, strlen(name),
1076 value, strlen(value)));
1077 }
1078
1079 /* As `config_set_var', except that the variable NAME and VALUE have explicit
1080 * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1081 * terminated.
1082 */
1083 struct config_var *config_set_var_n(struct config *conf,
1084 struct config_section *sect,
1085 unsigned f,
1086 const char *name, size_t namelen,
1087 const char *value, size_t valuelen)
1088 {
1089 struct config_var *var =
1090 config_find_var_n(conf, sect, CF_CREAT, name, namelen);
1091
1092 if (var->f&~f&CF_OVERRIDE) return (var);
1093 free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
1094 var->f = f;
1095 return (var);
1096 }
1097
1098 /* Initialize I to iterate over the variables directly defined in SECT. */
1099 void config_start_var_iter(struct config *conf, struct config_section *sect,
1100 struct config_var_iter *i)
1101 { treap_start_iter(&sect->vars, &i->i); }
1102
1103 /* Return next variable from I, in ascending lexicographical order.
1104 *
1105 * If there are no more variables, then return null.
1106 */
1107 struct config_var *config_next_var(struct config_var_iter *i)
1108 { return (treap_next(&i->i)); }
1109
1110 /* Read and parse configuration FILE, applying its settings to CONF.
1111 *
1112 * If all goes well, the function returns 0. If the file is not found, then
1113 * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1114 * function simply returns -1. Otherwise, a fatal error is reported. Note
1115 * that this /only/ applies if the file does not exist (specifically, opening
1116 * it fails with `ENOENT') -- any other problems are reported as fatal
1117 * errors regardless of the flag setting.
1118 */
1119 int config_read_file(struct config *conf, const char *file, unsigned f)
1120 {
1121 struct config_section *sect;
1122 struct config_var *var;
1123 struct dstr d = DSTR_INIT, dd = DSTR_INIT;
1124 unsigned line = 0;
1125 const char *p, *q, *r;
1126 FILE *fp;
1127
1128 /* Try to open the file. */
1129 fp = fopen(file, "r");
1130 if (!fp) {
1131 if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
1132 lose("failed to open configuration file `%s': %s",
1133 file, strerror(errno));
1134 }
1135
1136 /* Find the initial section. */
1137 sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
1138
1139 /* Work through the file, line by line. */
1140 for (;;) {
1141 dstr_reset(&d); if (dstr_readline(&d, fp)) break;
1142 line++;
1143
1144 /* Trim trailing spaces from the line. The syntax is sensitive to
1145 * leading spaces, so we can't trim those yet.
1146 */
1147 while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
1148 d.p[d.len] = 0;
1149
1150 if (!*d.p || *d.p == ';')
1151 /* Ignore comments entirely. (In particular, a comment doesn't
1152 * interrupt a multiline variable value.)
1153 */
1154 ;
1155
1156 else if (ISSPACE(d.p[0])) {
1157 /* The line starts with whitespace, so it's a continuation line. */
1158
1159 /* Skip the initial whitespace. */
1160 p = d.p; while (ISSPACE(*p)) p++;
1161
1162 /* If we aren't collecting a variable value then this is an error.
1163 * Otherwise, accumulate it into the current value.
1164 */
1165 if (!var)
1166 lose("%s:%u: continuation line, but no variable", file, line);
1167 if (dd.len) dstr_putc(&dd, ' ');
1168 dstr_putm(&dd, p, d.len - (p - d.p));
1169
1170 } else {
1171 /* The line starts in the first column. */
1172
1173 /* If there's a value value being collected then we must commit it to
1174 * its variable (unless there's already a setting there that says we
1175 * shouldn't).
1176 */
1177 if (var) {
1178 if (!(var->f&CF_OVERRIDE))
1179 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1180 var = 0;
1181 }
1182
1183 /* Now decide what kind of line this is. */
1184 if (d.p[0] == '[') {
1185 /* It's a section header. */
1186
1187 /* Parse the header. */
1188 p = d.p + 1; while (ISSPACE(*p)) p++;
1189 q = scan_name("section", p, d.p + d.len, file, line);
1190 r = q; while (ISSPACE(*r)) r++;
1191 if (*r != ']')
1192 lose("%s:%u: expected `]' in section header", file, line);
1193 if (r[1])
1194 lose("%s:%u: trailing junk after `]' in section header",
1195 file, line);
1196
1197 /* Create the new section. */
1198 sect = config_find_section_n(conf, CF_CREAT, p, q - p);
1199
1200 } else {
1201 /* It's a variable assignment. Parse the name out. */
1202 p = scan_name("variable", d.p, d.p + d.len, file, line);
1203 var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
1204 while (ISSPACE(*p)) p++;
1205 if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
1206 p++; while (ISSPACE(*p)) p++;
1207
1208 /* Clear out the variable's initial value, unless we shouldn't
1209 * override it.
1210 */
1211 if (!(var->f&CF_OVERRIDE)) {
1212 free(var->val); var->val = 0; var->f = 0;
1213 free(var->file); var->file = xstrdup(file); var->line = line;
1214 }
1215 dstr_reset(&dd); dstr_puts(&dd, p);
1216 }
1217 }
1218 }
1219
1220 /* If there's a value under construction then commit the result. */
1221 if (var && !(var->f&CF_OVERRIDE))
1222 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1223
1224 /* Close the file. */
1225 if (fclose(fp))
1226 lose("error reading configuration file `%s': %s", file, strerror(errno));
1227
1228 /* All done. */
1229 dstr_release(&d); dstr_release(&dd);
1230 return (0);
1231 }
1232
1233 /* Populate SECT with environment variables.
1234 *
1235 * Environment variables are always set with `CF_LITERAL'.
1236 */
1237 void config_read_env(struct config *conf, struct config_section *sect)
1238 {
1239 const char *p, *v;
1240 size_t i;
1241
1242 for (i = 0; (p = environ[i]) != 0; i++) {
1243 v = strchr(p, '='); if (!v) continue;
1244 config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
1245 }
1246 }
1247
1248 /*----- Substitution and quoting ------------------------------------------*/
1249
1250 /* The substitution and word-splitting state.
1251 *
1252 * This only keeps track of the immutable parameters for the substitution
1253 * task: stuff which changes (flags, filtering state, cursor position) is
1254 * maintained separately.
1255 */
1256 struct subst {
1257 struct config *config; /* configuration state */
1258 struct config_section *home; /* home section for lookups */
1259 struct dstr *d; /* current word being constructed */
1260 struct argv *av; /* output word list */
1261 };
1262
1263 /* Flags for `subst' and related functions. */
1264 #define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */
1265 #define SF_QUOT 0x0002u /* currently within double quotes */
1266 #define SF_SUBST 0x0004u /* apply `$-substitutions */
1267 #define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */
1268 #define SF_SPANMASK 0x00ffu /* mask for the above */
1269
1270 #define SF_WORD 0x0100u /* output word under construction */
1271 #define SF_SKIP 0x0200u /* not producing output */
1272 #define SF_LITERAL 0x0400u /* do not expand or substitute */
1273 #define SF_UPCASE 0x0800u /* convert to uppercase */
1274 #define SF_DOWNCASE 0x1000u /* convert to lowercase */
1275 #define SF_CASEMASK 0x1800u /* mask for case conversions */
1276
1277 /* Apply filters encoded in QFILT and F to the text from P to L, and output.
1278 *
1279 * SB is the substitution state which, in particular, explains where the
1280 * output should go.
1281 *
1282 * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1283 * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is
1284 * encoded as the number of toothpicks to print: see `subst' for how this
1285 * determined.)
1286 */
1287 static void filter_string(const char *p, const char *l,
1288 const struct subst *sb, unsigned qfilt, unsigned f)
1289 {
1290 size_t r, n;
1291 char *q; const char *pp, *ll;
1292
1293 if (!qfilt && !(f&SF_CASEMASK))
1294 /* Fast path: there's nothing to do: just write to the output. */
1295 dstr_putm(sb->d, p, l - p);
1296
1297 else for (;;) {
1298 /* We must be a bit more circumspect. */
1299
1300 /* Determine the length of the next span of characters which don't need
1301 * escaping. (If QFILT is zero then this is everything.)
1302 */
1303 r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
1304 if (n > r) n = r;
1305
1306 if (!(f&SF_CASEMASK))
1307 /* No case conversion: we can just emit this chunk. */
1308
1309 dstr_putm(sb->d, p, n);
1310
1311 else {
1312 /* Case conversion to do. Arrange enough space for the output, and
1313 * convert it character by character.
1314 */
1315
1316 dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
1317 if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
1318 else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
1319 sb->d->len += n;
1320 }
1321
1322 /* If we've reached the end then stop. */
1323 if (n >= r) break;
1324
1325 /* Otherwise we must have found a character which requires escaping.
1326 * Emit enough toothpicks.
1327 */
1328 dstr_putcn(sb->d, '\\', qfilt);
1329
1330 /* This character is now done, so we can skip over and see if there's
1331 * another chunk of stuff we can do at high speed.
1332 */
1333 dstr_putc(sb->d, p[n]); p += n + 1;
1334 }
1335 }
1336
1337 /* Scan and resolve a `[SECT:]VAR' specifier at P.
1338 *
1339 * Return the address of the next character following the specifier; and set
1340 * *VAR_OUT to point to the variable we found, or null if it's not there. L
1341 * is a limit on the region of the buffer that we should process; SB is the
1342 * substitution state which provides the home section if none is given
1343 * explicitly; FILE and LINE are the source location to blame for problems.
1344 */
1345 static const char *retrieve_varspec(const char *p, const char *l,
1346 const struct subst *sb,
1347 struct config_var **var_out,
1348 const char *file, unsigned line)
1349 {
1350 struct config_section *sect = sb->home;
1351 const char *t;
1352
1353 t = scan_name("section or variable", p, l, file, line);
1354 if (t < l && *t == ':') {
1355 sect = config_find_section_n(sb->config, 0, p, t - p);
1356 p = t + 1; t = scan_name("variable", p, l, file, line);
1357 }
1358
1359 if (!sect) *var_out = 0;
1360 else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
1361 return (t);
1362 }
1363
1364 /* Substitute and/or word-split text.
1365 *
1366 * The input text starts at P, and continues to (just before) L. Context for
1367 * the task is provided by SB; the source location to blame is FILE and LINE
1368 * (FILE may be null so that this can be passed directly from a `config_var'
1369 * without further checking); QFILT is the nesting depth in toothpick-
1370 * escaping; and F holds a mask of `SF_...' flags.
1371 */
1372 static const char *subst(const char *p, const char *l,
1373 const struct subst *sb,
1374 const char *file, unsigned line,
1375 unsigned qfilt, unsigned f)
1376 {
1377 struct config_var *var;
1378 const char *q0, *q1, *t;
1379 unsigned subqfilt, ff;
1380 size_t n;
1381
1382 /* It would be best if we could process literal text at high speed. To
1383 * this end, we have a table, indexed by the low-order bits of F, to tell
1384 * us which special characters we need to stop at. This way, we can use
1385 * `strcspn' to skip over literal text and stop at the next character which
1386 * needs special handling. Entries in this table with a null pointer
1387 * correspond to impossible flag settings.
1388 */
1389 static const char *const delimtab[] = {
1390
1391 #define ESCAPE "\\" /* always watch for `\'-escapes */
1392 #define SUBST "$" /* check for `$' if `SF_SUBST' set */
1393 #define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but
1394 * not `SF_QUOT' */
1395 #define QUOT "\"" /* only quotes if `SF_SPLIT' and
1396 * `SF_QUOT' */
1397 #define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */
1398
1399 ESCAPE,
1400 ESCAPE WORDSEP,
1401 0,
1402 ESCAPE QUOT,
1403 ESCAPE SUBST,
1404 ESCAPE SUBST WORDSEP,
1405 0,
1406 ESCAPE SUBST QUOT,
1407 ESCAPE DELIM,
1408 ESCAPE DELIM WORDSEP,
1409 0,
1410 ESCAPE DELIM QUOT,
1411 ESCAPE DELIM SUBST,
1412 ESCAPE DELIM SUBST WORDSEP,
1413 0,
1414 ESCAPE DELIM SUBST QUOT
1415
1416 #undef COMMON
1417 #undef WORDSEP
1418 #undef SQUOT
1419 #undef DELIM
1420 };
1421
1422 /* Set FILE to be useful if it was null on entry. */
1423 if (!file) file = "<internal>";
1424
1425 /* If the text is literal then hand off to `filter_string'. This obviously
1426 * starts a word.
1427 */
1428 if (f&SF_LITERAL) {
1429 filter_string(p, l, sb, qfilt, f);
1430 f |= SF_WORD;
1431 goto done;
1432 }
1433
1434 /* Chew through the input until it's all gone. */
1435 while (p < l) {
1436
1437 if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
1438 /* This is whitespace, we're supposed to split, and we're not within
1439 * quotes, so we should split here.
1440 */
1441
1442 /* If there's a word in progress then we should commit it. */
1443 if (f&SF_WORD) {
1444 if (!(f&SF_SKIP)) {
1445 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1446 dstr_reset(sb->d);
1447 }
1448 f &= ~SF_WORD;
1449 }
1450
1451 /* Skip over further whitespace at high speed. */
1452 do p++; while (p < l && ISSPACE(*p));
1453
1454 } else if (*p == '\\') {
1455 /* This is a toothpick, so start a new word and add the next character
1456 * to it.
1457 */
1458
1459 /* If there's no next character then we should be upset. */
1460 p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
1461
1462 if (!(f&SF_SKIP)) {
1463
1464 /* If this is a double quote or backslash then check DFLT to see if
1465 * it needs escaping.
1466 */
1467 if (qfilt && (*p == '"' || *p == '\\'))
1468 dstr_putcn(sb->d, '\\', qfilt);
1469
1470 /* Output the character. */
1471 if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
1472 else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
1473 else dstr_putc(sb->d, *p);
1474 }
1475
1476 /* Move past the escaped character. Remember we started a word. */
1477 p++; f |= SF_WORD;
1478
1479 } else if ((f&SF_SPLIT) && *p == '"') {
1480 /* This is a double quote, and we're word splitting. We're definitely
1481 * in a word now. Toggle whether we're within quotes.
1482 */
1483
1484 f ^= SF_QUOT; f |= SF_WORD; p++;
1485
1486 } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
1487 /* This is a single quote, and we're word splitting but not within
1488 * double quotes. Find the matching end quote, and just output
1489 * everything between literally.
1490 */
1491
1492 p++; t = strchr(p, '\'');
1493 if (!t || t >= l) lose("%s:%u: missing `''", file, line);
1494 if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
1495 p = t + 1; f |= SF_WORD;
1496
1497 } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
1498 /* This is an end delimiter, and we're supposed to stop here. */
1499 break;
1500
1501 } else if ((f&SF_SUBST) && *p == '$') {
1502 /* This is a `$' and we're supposed to do substitution. */
1503
1504 /* The kind of substitution is determined by the next character. */
1505 p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
1506
1507 /* Prepare flags for a recursive substitution.
1508 *
1509 * Hide our quote state from the recursive call. If we're within a
1510 * word, then disable word-splitting.
1511 */
1512 ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
1513
1514 /* Now dispatch based on the following character. */
1515 switch (*p) {
1516
1517 case '?':
1518 /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1519
1520 /* Skip initial space. */
1521 p++; while (p < l && ISSPACE(*p)) p++;
1522
1523 /* Find the variable. */
1524 p = retrieve_varspec(p, l, sb, &var, file, line);
1525
1526 /* Skip whitespace again. */
1527 while (p < l && ISSPACE(*p)) p++;
1528
1529 /* Expect the opening `{'. */
1530 if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
1531 p++;
1532
1533 /* We'll process the parts recursively, but we need to come back
1534 * when we hit the appropriate delimiters, so arrange for that.
1535 */
1536 ff |= SF_SUBEXPR;
1537
1538 /* Process the consequent (skip if the variable wasn't found). */
1539 p = subst(p, l, sb, file, line, qfilt,
1540 ff | (var ? 0 : SF_SKIP));
1541
1542 /* If there's a `|' then process the alternative too (skip if the
1543 * variable /was/ found).
1544 */
1545 if (p < l && *p == '|')
1546 p = subst(p + 1, l, sb, file, line, qfilt,
1547 ff | (var ? SF_SKIP : 0));
1548
1549 /* We should now be past the closing `}'. */
1550 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1551 p++;
1552 break;
1553
1554 case '{':
1555 /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1556
1557 /* Skip initial whitespace. */
1558 p++; while (p < l && ISSPACE(*p)) p++;
1559
1560 /* Find the variable. */
1561 q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
1562
1563 /* Determine the filters to apply when substituting the variable
1564 * value.
1565 */
1566 subqfilt = qfilt;
1567 for (;;) {
1568
1569 /* Skip spaces again. */
1570 while (p < l && ISSPACE(*p)) p++;
1571
1572 /* If there's no `|' then there are no more filters, so stop. */
1573 if (p >= l || *p != '|') break;
1574
1575 /* Skip the `|' and more spaces. */
1576 p++; while (p < l && ISSPACE(*p)) p++;
1577
1578 /* Collect the filter name. */
1579 t = scan_name("filter", p, l, file, line);
1580
1581 /* Dispatch on the filter name. */
1582 if (t - p == 1 && *p == 'q')
1583 /* `q' -- quote for Lisp string.
1584 *
1585 * We're currently adding Q `\' characters before each naughty
1586 * character. But a backslash itself is naughty too, so that
1587 * makes Q + 1 naughty characters, each of which needs a
1588 * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1589 *
1590 * Calculate this here rather than at each point toothpicks
1591 * need to be deployed.
1592 */
1593
1594 subqfilt = 2*subqfilt + 1;
1595
1596 else if (t - p == 1 && *p == 'l')
1597 /* `l' -- convert to lowercase.
1598 *
1599 * If a case conversion is already set, then that will override
1600 * whatever we do here, so don't bother.
1601 */
1602
1603 { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
1604
1605 else if (t - p == 1 && *p == 'u')
1606 /* `u' -- convert to uppercase.
1607 *
1608 * If a case conversion is already set, then that will override
1609 * whatever we do here, so don't bother.
1610 */
1611 { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
1612
1613 else
1614 /* Something else we didn't understand. */
1615 lose("%s:%u: unknown filter `%.*s'",
1616 file, line, (int)(t - p), p);
1617
1618 /* Continue from after the filter name. */
1619 p = t;
1620 }
1621
1622 /* If we're not skipping, and we found a variable, then substitute
1623 * its value. This is the point where we need to be careful about
1624 * recursive expansion.
1625 */
1626 if (!(f&SF_SKIP) && var) {
1627 if (var->f&CF_EXPAND)
1628 lose("%s:%u: recursive expansion of variable `%.*s'",
1629 file, line, (int)(q1 - q0), q0);
1630 var->f |= CF_EXPAND;
1631 subst(var->val, var->val + var->n, sb,
1632 var->file, var->line, subqfilt,
1633 ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1634 var->f &= ~CF_EXPAND;
1635 }
1636
1637 /* If there's an alternative, then we need to process (or maybe
1638 * skip) it. Otherwise, we should complain if there was no
1639 * veriable, and we're not skipping.
1640 */
1641 if (p < l && *p == '?')
1642 p = subst(p + 1, l, sb, file, line, subqfilt,
1643 ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
1644 else if (!var && !(f&SF_SKIP))
1645 lose("%s:%u: unknown variable `%.*s'",
1646 file, line, (int)(q1 - q0), q0);
1647
1648 /* Expect a `}' here. (No need to skip spaces: we already did that
1649 * after scanning for filters, and either there was no alternative,
1650 * or we advanced to a delimiter character anyway.)
1651 */
1652 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1653 p++;
1654 break;
1655
1656 default:
1657 /* Something else. That's a shame. */
1658 lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
1659 }
1660
1661 /* Complain if we started out in word-splitting state, and therefore
1662 * have added a whole number of words to the output, but there's a
1663 * word-fragment stuck onto the end of this substitution.
1664 */
1665 if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
1666 !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
1667 lose("%s:%u: surprising word boundary "
1668 "after splicing substitution",
1669 file, line);
1670 }
1671
1672 else {
1673 /* Something else. Try to skip over this at high speed.
1674 *
1675 * This makes use of the table we set up earlier.
1676 */
1677
1678 n = strcspn(p, delimtab[f&SF_SPANMASK]);
1679 if (n > l - p) n = l - p;
1680 if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
1681 p += n; f |= SF_WORD;
1682 }
1683 }
1684
1685 done:
1686 /* Sort out the wreckage. */
1687
1688 /* If we're still within quotes then something has gone wrong. */
1689 if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
1690
1691 /* If we're within a word, and should be splitting, then commit the word to
1692 * the output list.
1693 */
1694 if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
1695 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1696 dstr_reset(sb->d);
1697 }
1698
1699 /* And, with that, we're done. */
1700 return (p);
1701 }
1702
1703 /* Expand substitutions in a string.
1704 *
1705 * Expand the null-terminated string P relative to the HOME section, using
1706 * configuration CONFIG, and appending the result to dynamic string D. Blame
1707 * WHAT in any error messages.
1708 */
1709 void config_subst_string(struct config *config, struct config_section *home,
1710 const char *what, const char *p, struct dstr *d)
1711 {
1712 struct subst sb;
1713
1714 sb.config = config; sb.home = home; sb.d = d;
1715 subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
1716 dstr_putz(d);
1717 }
1718
1719 /* Expand substitutions in a string.
1720 *
1721 * Expand the null-terminated string P relative to the HOME section, using
1722 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1723 * string. Blame WHAT in any error messages.
1724 */
1725 char *config_subst_string_alloc(struct config *config,
1726 struct config_section *home,
1727 const char *what, const char *p)
1728 {
1729 struct dstr d = DSTR_INIT;
1730 char *q;
1731
1732 config_subst_string(config, home, what, p, &d);
1733 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1734 }
1735
1736 /* Expand substitutions in a variable.
1737 *
1738 * Expand the value of the variable VAR relative to the HOME section, using
1739 * configuration CONFIG, appending the result to dynamic string D.
1740 */
1741 void config_subst_var(struct config *config, struct config_section *home,
1742 struct config_var *var, struct dstr *d)
1743 {
1744 struct subst sb;
1745
1746 sb.config = config; sb.home = home; sb.d = d;
1747 var->f |= CF_EXPAND;
1748 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1749 SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1750 var->f &= ~CF_EXPAND;
1751 dstr_putz(d);
1752 }
1753
1754 /* Expand substitutions in a variable.
1755 *
1756 * Expand the value of the variable VAR relative to the HOME section, using
1757 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1758 * string.
1759 */
1760 char *config_subst_var_alloc(struct config *config,
1761 struct config_section *home,
1762 struct config_var *var)
1763 {
1764 struct dstr d = DSTR_INIT;
1765 char *q;
1766
1767 config_subst_var(config, home, var, &d);
1768 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1769 }
1770
1771 /* Expand substitutions in a variable and split into words.
1772 *
1773 * Expand and word-split the value of the variable VAR relative to the HOME
1774 * section, using configuration CONFIG, appending the resulting words into
1775 * the vector AV.
1776 */
1777 void config_subst_split_var(struct config *config,
1778 struct config_section *home,
1779 struct config_var *var, struct argv *av)
1780 {
1781 struct dstr d = DSTR_INIT;
1782 struct subst sb;
1783
1784 sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
1785 var->f |= CF_EXPAND;
1786 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1787 SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1788 var->f &= ~CF_EXPAND;
1789 dstr_release(&d);
1790 }
1791
1792 /*----- That's all, folks -------------------------------------------------*/