@@@ more wip
[runlisp] / lib.c
CommitLineData
7b8ff279
MW
1/* -*-c-*-
2 *
3 * Common definitions for `runlisp'
4 *
5 * (c) 2020 Mark Wooding
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
11 *
12 * Runlisp is free software: you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
16 *
17 * Runlisp is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with Runlisp. If not, see <https://www.gnu.org/licenses/>.
24 */
25
26/*----- Header files ------------------------------------------------------*/
27
28#include "config.h"
29
30#include <assert.h>
31
32#include <ctype.h>
33#include <errno.h>
34#include <stdarg.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39#include <unistd.h>
40
41#include "lib.h"
42
7b8ff279
MW
43/*----- Diagnostic utilities ----------------------------------------------*/
44
45const char *progname = "???";
8996f767 46 /* Our program name, for use in error messages. */
7b8ff279 47
8996f767 48/* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
7b8ff279
MW
49void set_progname(const char *prog)
50{
51 const char *p;
52
53 p = strrchr(prog, '/');
54 progname = p ? p + 1 : progname;
55}
56
8996f767
MW
57/* Report an error or warning in Unix style, given a captured argument
58 * cursor.
59 */
7b8ff279
MW
60void vmoan(const char *msg, va_list ap)
61{
62 fprintf(stderr, "%s: ", progname);
63 vfprintf(stderr, msg, ap);
64 fputc('\n', stderr);
65}
66
8996f767 67/* Issue a warning message. */
7b8ff279
MW
68void moan(const char *msg, ...)
69 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
70
8996f767 71/* Issue a fatal error message and exit unsuccessfully. */
7b8ff279
MW
72void lose(const char *msg, ...)
73 { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
74
75/*----- Memory allocation -------------------------------------------------*/
76
8996f767
MW
77/* Allocate and return a pointer to N bytes, or report a fatal error.
78 *
79 * Release the pointer using `free' as usual. If N is zero, returns null
80 * (but you are not expected to check for this).
81 */
7b8ff279
MW
82void *xmalloc(size_t n)
83{
84 void *p;
85
86 if (!n) return (0);
87 p = malloc(n); if (!p) lose("failed to allocate memory");
88 return (p);
89}
90
8996f767
MW
91/* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
92 *
93 * The block might (and probably will) move, so it returns the new address.
94 * If N is zero, then the block is freed (if necessary) and a null pointer
95 * returned; otherwise, if P is null then a fresh block is allocated. If
96 * allocation fails, then a fatal error is reported.
97 */
7b8ff279
MW
98void *xrealloc(void *p, size_t n)
99{
100 if (!n) { free(p); return (0); }
101 else if (!p) return (xmalloc(n));
102 p = realloc(p, n); if (!p) lose("failed to allocate memory");
103 return (p);
104}
105
8996f767
MW
106/* Allocate and return a copy of the N-byte string starting at P.
107 *
108 * The new string is null-terminated, though P need not be. If allocation
109 * fails, then a fatal error is reported.
110 */
7b8ff279
MW
111char *xstrndup(const char *p, size_t n)
112{
113 char *q = xmalloc(n + 1);
114
115 memcpy(q, p, n); q[n] = 0;
116 return (q);
117}
118
8996f767
MW
119/* Allocate and return a copy of the null-terminated string starting at P.
120 *
121 * If allocation fails, then a fatal error is reported.
122 */
7b8ff279
MW
123char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
124
125/*----- Dynamic strings ---------------------------------------------------*/
126
8996f767
MW
127/* Initialize the string D.
128 *
129 * Usually you'd use the static initializer `DSTR_INIT'.
130 */
7b8ff279
MW
131void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
132
8996f767 133/* Reset string D so it's empty again. */
7b8ff279
MW
134void dstr_reset(struct dstr *d) { d->len = 0; }
135
8996f767 136/* Ensure that D has at least N unused bytes available. */
7b8ff279
MW
137void dstr_ensure(struct dstr *d, size_t n)
138{
139 size_t need = d->len + n, newsz;
140
141 if (need <= d->sz) return;
142 newsz = d->sz ? 2*d->sz : 16;
143 while (newsz < need) newsz *= 2;
144 d->p = xrealloc(d->p, newsz); d->sz = newsz;
145}
146
8996f767
MW
147/* Release the memory held by D.
148 *
149 * It must be reinitialized (e.g., by `dstr_init') before it can be used
150 * again.
151 */
7b8ff279
MW
152void dstr_release(struct dstr *d) { free(d->p); }
153
8996f767
MW
154/* Append the N-byte string at P to D.
155 *
156 * P need not be null-terminated. D will not be null-terminated
157 * afterwards.
158 */
7b8ff279
MW
159void dstr_putm(struct dstr *d, const void *p, size_t n)
160 { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
161
8996f767
MW
162/* Append the null-terminated string P to D.
163 *
164 * D /is/ guaranteed to be null-terminated after this.
165 */
7b8ff279
MW
166void dstr_puts(struct dstr *d, const char *p)
167{
168 size_t n = strlen(p);
169
170 dstr_ensure(d, n + 1);
171 memcpy(d->p + d->len, p, n + 1);
172 d->len += n;
173}
174
8996f767
MW
175/* Append the single character CH to D.
176 *
177 * D will not be null-terminated afterwards.
178 */
7b8ff279
MW
179void dstr_putc(struct dstr *d, int ch)
180 { dstr_ensure(d, 1); d->p[d->len++] = ch; }
181
8996f767
MW
182/* Append N copies of the character CH to D.
183 *
184 * D will not be null-terminated afterwards.
185 */
7b8ff279
MW
186void dstr_putcn(struct dstr *d, int ch, size_t n)
187 { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
188
8996f767
MW
189/* Null-terminate the string D.
190 *
191 * This doesn't change the length of D. If further stuff is appended then
192 * the null terminator will be overwritten.
193 */
7b8ff279
MW
194void dstr_putz(struct dstr *d)
195 { dstr_ensure(d, 1); d->p[d->len] = 0; }
196
8996f767
MW
197/* Append stuff to D, determined by printf(3) format string P and argument
198 * tail AP.
199 *
200 * D will not be null-terminated afterwards.
201 */
7b8ff279
MW
202void dstr_vputf(struct dstr *d, const char *p, va_list ap)
203{
204 va_list ap2;
205 size_t r;
206 int n;
207
208 r = d->sz - d->len;
209 va_copy(ap2, ap);
210 n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
211 va_end(ap2);
212 if (n >= r) {
213 dstr_ensure(d, n + 1); r = d->sz - d->len;
214 n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
215 }
216 d->len += n;
217}
218
8996f767
MW
219/* Append stuff to D, determined by printf(3) format string P and arguments.
220 *
221 * D will not be null-terminated afterwards.
222 */
7b8ff279
MW
223PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
224 { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
225
8996f767
MW
226/* Append the next input line from FP to D.
227 *
228 * Return 0 on success, or -1 if reading immediately fails or encounters
229 * end-of-file (call ferror(3) to distinguish). Any trailing newline is
230 * discarded: it is not possible to determine whether the last line was ended
231 * with a newline. D is guaranteed to be null-terminated afterwards.
232 */
7b8ff279
MW
233int dstr_readline(struct dstr *d, FILE *fp)
234{
235 size_t n;
236 int any = 0;
237
238 for (;;) {
239 dstr_ensure(d, 2);
240 if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
241 n = strlen(d->p + d->len); assert(n > 0); any = 1;
242 d->len += n;
243 if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
244 }
245
246 if (!any) return (-1);
247 else return (0);
248}
249
250/*----- Dynamic vectors of strings ----------------------------------------*/
251
8996f767
MW
252/* Initialize the vector AV.
253 *
254 * Usually you'd use the static initializer `ARGV_INIT'.
255 */
7b8ff279
MW
256void argv_init(struct argv *av)
257 { av->v = 0; av->o = av->n = av->sz = 0; }
258
8996f767 259/* Reset the vector AV so that it's empty again. */
7b8ff279
MW
260void argv_reset(struct argv *av) { av->n = 0; }
261
8996f767 262/* Ensure that AV has at least N unused slots at the end. */
7b8ff279
MW
263void argv_ensure(struct argv *av, size_t n)
264{
265 size_t need = av->n + av->o + n, newsz;
266
267 if (need <= av->sz) return;
268 newsz = av->sz ? 2*av->sz : 8;
269 while (newsz < need) newsz *= 2;
8996f767 270 av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)) + av->o;
7b8ff279
MW
271 av->sz = newsz;
272}
273
8996f767 274/* Ensure that AV has at least N unused slots at the /start/. */
7b8ff279
MW
275void argv_ensure_offset(struct argv *av, size_t n)
276{
277 size_t newoff;
278
279 /* Stupid version. We won't, in practice, be prepending lots of stuff, so
280 * avoid the extra bookkeeping involved in trying to make a double-ended
281 * extendable array asymptotically efficient.
282 */
283 if (av->o >= n) return;
284 newoff = 16;
285 while (newoff < n) newoff *= 2;
286 argv_ensure(av, newoff - av->o);
8996f767 287 memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
7b8ff279
MW
288 av->v += newoff - av->o; av->o = newoff;
289}
290
8996f767
MW
291/* Release the memory held by AV.
292 *
293 * It must be reinitialized (e.g., by `argv_init') before it can be used
294 * again.
295 */
7b8ff279
MW
296void argv_release(struct argv *av) { free(av->v - av->o); }
297
8996f767
MW
298/* Append the pointer P to AV. */
299void argv_append(struct argv *av, char *p)
7b8ff279
MW
300 { argv_ensure(av, 1); av->v[av->n++] = p; }
301
8996f767
MW
302/* Append a null pointer to AV, without extending the vactor length.
303 *
304 * The null pointer will be overwritten when the next string is appended.
305 */
7b8ff279
MW
306void argv_appendz(struct argv *av)
307 { argv_ensure(av, 1); av->v[av->n] = 0; }
308
8996f767
MW
309/* Append a N-element vector V of pointers to AV. */
310void argv_appendn(struct argv *av, char *const *v, size_t n)
7b8ff279
MW
311{
312 argv_ensure(av, n);
313 memcpy(av->v + av->n, v, n*sizeof(const char *));
314 av->n += n;
315}
316
8996f767 317/* Append the variable-length vector BV to AV. */
7b8ff279
MW
318void argv_appendav(struct argv *av, const struct argv *bv)
319 { argv_appendn(av, bv->v, bv->n); }
320
8996f767
MW
321/* Append the pointers from a variable-length argument list AP to AV.
322 *
323 * The list is terminated by a null pointer.
324 */
7b8ff279
MW
325void argv_appendv(struct argv *av, va_list ap)
326{
8996f767
MW
327 char *p;
328 for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
7b8ff279
MW
329}
330
8996f767 331/* Append the argument pointers, terminated by a null pointer, to AV. */
7b8ff279
MW
332void argv_appendl(struct argv *av, ...)
333 { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
334
8996f767
MW
335/* Prepend the pointer P to AV. */
336void argv_prepend(struct argv *av, char *p)
7b8ff279
MW
337 { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
338
8996f767
MW
339/* Prepend a N-element vector V of pointers to AV. */
340void argv_prependn(struct argv *av, char *const *v, size_t n)
7b8ff279
MW
341{
342 argv_ensure_offset(av, n);
343 av->o -= n; av->v -= n; av->n += n;
344 memcpy(av->v, v, n*sizeof(const char *));
345}
346
8996f767 347/* Prepend the variable-length vector BV to AV. */
7b8ff279
MW
348void argv_prependav(struct argv *av, const struct argv *bv)
349 { argv_prependn(av, bv->v, bv->n); }
350
8996f767
MW
351/* Prepend the pointers from a variable-length argument list AP to AV.
352 *
353 * The list is terminated by a null pointer.
354 */
7b8ff279
MW
355void argv_prependv(struct argv *av, va_list ap)
356{
8996f767 357 char *p, **v;
7b8ff279
MW
358 size_t n = 0;
359
360 for (;;) {
8996f767 361 p = va_arg(ap, char *); if (!p) break;
7b8ff279
MW
362 argv_prepend(av, p); n++;
363 }
364 v = av->v;
365 while (n >= 2) {
366 p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
367 v++; n -= 2;
368 }
369}
370
8996f767 371/* Prepend the argument pointers, terminated by a null pointer, to AV. */
7b8ff279
MW
372void argv_prependl(struct argv *av, ...)
373 { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
374
375/*----- Treaps ------------------------------------------------------------*/
376
8996f767
MW
377/* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
378 * string B in a lexicographic ordering.
379 *
380 * All comparisons of keys is handled by this function.
381 */
382static int str_lt(const char *a, size_t an, const char *b, size_t bn)
383{
384 /* This is a little subtle. We need only compare the first N bytes of the
385 * strings, where N is the length of the shorter string. If this
386 * distinguishes the two strings, then we're clearly done. Otherwise, if
387 * the prefixes are equal then the shorter string is the smaller one. If
388 * the two strings are the same length, then they're equal.
389 *
390 * Hence, if A is the strictly shorter string, then A precedes B if A
391 * precedes or matches the prefix of B; otherwise A only precedes B if A
392 * strictly precedes the prefix of B.
393 */
394 if (an < bn) return (MEMCMP(a, <=, b, an));
395 else return (MEMCMP(a, <, b, bn));
396}
397
398/* Initialize the treap T.
399 *
400 * Usually you'd use the static initializer `TREAP_INIT'.
401 */
7b8ff279
MW
402void treap_init(struct treap *t) { t->root = 0; }
403
8996f767
MW
404/* Look up the KN-byte key K in the treap T.
405 *
406 * Return a pointer to the matching node if one was found, or null otherwise.
407 */
7b8ff279
MW
408void *treap_lookup(const struct treap *t, const char *k, size_t kn)
409{
410 struct treap_node *n = t->root, *candidate = 0;
411
8996f767
MW
412 /* This is a simple prototype for some of the search loops we'll encounter
413 * later. Notice that we use a strict one-sided comparison, rather than
414 * the more conventional two-sided comparison.
415 *
416 * The main loop will find the largest key not greater than K.
417 */
418 while (n)
419 /* Compare the node's key against our key. If the node is too large,
420 * then we ignore it and move left. Otherwise remember this node for
421 * later, and move right to see if we can find a better, larger node.
422 */
423
7b8ff279
MW
424 if (str_lt(k, kn, n->k, n->kn)) n = n->left;
425 else { candidate = n; n = n->right; }
8996f767
MW
426
427 /* If the candidate node is less than our key then we failed. Otherwise,
428 * by trichotomy, we have found the correct node.
429 */
7b8ff279
MW
430 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
431 return (candidate);
432}
433
8996f767
MW
434/* Look up the KN-byte K in the treap T, recording a path in P.
435 *
436 * This is similar to `treap_lookup', in that it returns the requested node
437 * if it already exists, or null otherwise, but it also records in P
438 * information to be used by `treap_insert' to insert a new node with the
439 * given key it's not there already.
440 */
7b8ff279
MW
441void *treap_probe(struct treap *t, const char *k, size_t kn,
442 struct treap_path *p)
443{
444 struct treap_node **nn = &t->root, *candidate = 0;
445 unsigned i = 0;
446
8996f767
MW
447 /* This walk is similar to `treap_lookup' above, except that we also record
448 * the address of each node pointer we visit along the way.
449 */
7b8ff279
MW
450 for (;;) {
451 assert(i < TREAP_PATHMAX); p->path[i++] = nn;
452 if (!*nn) break;
453 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
454 else { candidate = *nn; nn = &(*nn)->right; }
455 }
456 p->nsteps = i;
8996f767
MW
457
458 /* Check to see whether we found the right node. */
7b8ff279
MW
459 if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
460 return (candidate);
461}
462
8996f767
MW
463/* Insert a new node N into T, associating it with the KN-byte key K.
464 *
465 * Use the path data P, from `treap_probe', to help with insertion.
466 */
7b8ff279
MW
467void treap_insert(struct treap *t, const struct treap_path *p,
468 struct treap_node *n, const char *k, size_t kn)
469{
470 size_t i = p->nsteps;
471 struct treap_node **nn, **uu, *u;
472 unsigned wt;
473
8996f767 474 /* Fill in the node structure. */
7b8ff279
MW
475 n->k = xstrndup(k, kn); n->kn = kn;
476 n->wt = wt = rand(); n->left = n->right = 0;
8996f767
MW
477
478 /* Prepare for the insertion.
479 *
480 * The path actually points to each of the links traversed when searching
481 * for the node, starting with the `root' pointer, then the `left' or
482 * `right' pointer of the root node, and so on; `nsteps' will always be
483 * nonzero, since the path will always pass through the root, and the final
484 * step, `path->path[path->nsteps - 1]' will always be the address of a
485 * null pointer onto which the freshly inserted node could be hooked in
486 * order to satisfy the binary-search-tree ordering. (Of course, this will
487 * likely /not/ satisfy the heap condition, so more work needs to be done.)
488 *
489 * Throughout, NN is our current candidate for where to attach the node N.
490 * As the loop progresses, NN will ascend to links further up the tree, and
491 * N will be adjusted to accumulate pieces of the existing tree structure.
492 * We'll stop when we find that the parent node's weight is larger than our
493 * new node's weight, at which point we can just set *NN = N; or if we run
494 * out of steps in the path, in which case *NN is the root pointer.
495 */
7b8ff279
MW
496 assert(i); nn = p->path[--i];
497 while (i--) {
8996f767
MW
498
499 /* Collect the next step in the path, and get the pointer to the node. */
7b8ff279 500 uu = p->path[i]; u = *uu;
8996f767
MW
501
502 /* If this node's weight is higher, then we've found the right level and
503 * we can stop.
504 */
7b8ff279 505 if (wt <= u->wt) break;
8996f767
MW
506
507 /* The node U is lighter than our new node N, so we must rotate in order
508 * to fix things. If we were currently planning to hook N as the left
509 * subtree of U, then we rotate like this:
510 *
511 * | |
512 * U N
513 * / \ / \
514 * (N) Z ---> X U
515 * / \ / \
516 * X Y Y Z
517 *
518 * On the other hand, if we ere planning to hook N as the right subtree
519 * of U, then we do the opposite rotation:
520 *
521 * | |
522 * U N
523 * / \ / \
524 * X (N) ---> U Z
525 * / \ / \
526 * Y Z X Y
527 *
528 * These transformations clearly preserve the ordering of nodes in the
529 * binary search tree, and satisfy the heap condition in the subtree
530 * headed by N.
531 */
7b8ff279
MW
532 if (nn == &u->left) { u->left = n->right; n->right = u; }
533 else { u->right = n->left; n->left = u; }
8996f767
MW
534
535 /* And this arrangement must be attached to UU, or some higher attachment
536 * point. The subtree satisfies the heap condition, and can be attached
537 * safely at the selected place.
538 */
7b8ff279
MW
539 nn = uu;
540 }
8996f767
MW
541
542 /* We've found the right spot. Hook the accumulated subtree into place. */
7b8ff279
MW
543 *nn = n;
544}
545
8996f767
MW
546/* Remove the node with the KN-byte K from T.
547 *
548 * Return the address of the node we removed, or null if it couldn't be
549 * found.
550 */
7b8ff279
MW
551void *treap_remove(struct treap *t, const char *k, size_t kn)
552{
553 struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
554
8996f767
MW
555 /* Search for the matching node, but keep track of the address of the link
556 * which points to our target node.
557 */
558 while (*nn)
7b8ff279
MW
559 if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
560 else { candidate = nn; nn = &(*nn)->right; }
8996f767
MW
561
562 /* If this isn't the right node then give up. */
7b8ff279
MW
563 if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
564 return (0);
565
8996f767
MW
566 /* Now we need to disentangle the node from the tree. This is essentially
567 * the reverse of insertion: we pretend that this node is suddenly very
568 * light, and mutate the tree so as to restore the heap condition until
569 * eventually our node is a leaf and can be cut off without trouble.
570 *
571 * Throughout, the link *NN notionally points to N, but we don't actually
572 * update it until we're certain what value it should finally take.
573 */
574 nn = candidate; n = *nn; l = n->left; r = n->right;
575 for (;;)
576
577 /* If its left subtree is empty then we can replace our node by its right
578 * subtree and be done. Similarly, if the right subtree is empty then we
579 * replace the node by its left subtree.
580 *
581 * | | | |
582 * (N) ---> R ; (N) ---> L
583 * / \ / \
584 * * R L *
585 */
586 if (!l) { *nn = r; break; }
587 else if (!r) { *nn = l; break; }
588
589 /* Otherwise we need to rotate the pointers so that the heavier of the
590 * two children takes the place of our node; thus we have either
591 *
592 * | |
593 * (N) L
594 * / \ / \
595 * L R ---> X (N)
596 * / \ / \
597 * X Y Y Z
598 *
599 * or
600 *
601 * | |
602 * (N) R
603 * / \ / \
604 * L R ---> (N) Y
605 * / \ / \
606 * X Y L X
607 *
608 * Again, these transformations clearly preserve the ordering of nodes in
609 * the binary search tree, and the heap condition.
610 */
611 else if (l->wt > r->wt) { *nn = l; n->left = l->right; nn = &l->right; }
612 else { *nn = r; n->right = r->left; nn = &r->left; }
613
614 /* Release the key buffer, and return the node that we've now detached. */
615 free(n->k); return (n);
7b8ff279
MW
616}
617
8996f767 618/* Initialize an iterator I over T's nodes. */
7b8ff279
MW
619void treap_start_iter(struct treap *t, struct treap_iter *i)
620{
621 struct treap_node *n = t->root;
622 unsigned sp = 0;
623
8996f767
MW
624 /* The `stack' in the iterator structure is an empty ascending stack of
625 * nodes which have been encountered, and their left subtrees investigated,
626 * but not yet visited by the iteration.
627 *
628 * Iteration begins by stacking the root node, its left child, and so on,
629 * At the end of this, the topmost entry on the stack is the least node of
630 * the tree, followed by its parent, grandparent, and so on up to the root.
631 */
7b8ff279
MW
632 while (n) {
633 assert(sp < TREAP_PATHMAX);
634 i->stack[sp++] = n; n = n->left;
635 }
636 i->sp = sp;
637}
638
8996f767
MW
639/* Return the next node from I, in ascending order by key.
640 *
641 * If there are no more nodes, then return null.
642 */
7b8ff279
MW
643void *treap_next(struct treap_iter *i)
644{
645 struct treap_node *n, *o;
646 unsigned sp = i->sp;
647
8996f767
MW
648 /* We say that a node is /visited/ once it's been returned by this
649 * iterator. To traverse a tree in order, then, we traverse its left
650 * subtree, visit the tree root, and traverse its right subtree -- which is
651 * a fine recursive definition, but we need a nonrecursive implementation.
652 *
653 * As is usual in this kind of essential structural recursion, we maintain
654 * a stack. The invariant that we'll maintain is as follows.
655 *
656 * 1. If the stack is empty, then all nodes have been visited.
657 *
658 * 2, If the stack is nonempty then the topmost entry on the stack is the
659 * least node which has not yet been visited -- and therefore is the
660 * next node to visit.
661 *
662 * 3. The earlier entries in the stack are, in (top to bottom) order,
663 * those of the topmost node's parent, grandparent, etc., up to the
664 * root, which have not yet been visited. More specifically, a node
665 * appears in the stack if and only if some node in its left subtree
666 * is nearer the top of the stack.
667 *
668 * When we initialized the iterator state (in `treap_start_iter' above), we
669 * traced a path to the leftmost leaf, stacking the root, its left-hand
670 * child, and so on. The leftmost leaf is clearly the first node to be
671 * visited, and its entire ancestry is on the stack since none of these
672 * nodes has yet been visited. (If the tree is empty, then we have done
673 * nothing, the stack is empty, and there are no nodes to visit.) This
674 * establishes the base case for the induction.
675 */
676
677 /* So, if the stack is empty now, then (1) all of the nodes have been
678 * visited and there's nothing left to do. Return null.
679 */
7b8ff279 680 if (!sp) return (0);
8996f767
MW
681
682 /* It's clear that, if we pop the topmost element of the stack, visit it,
683 * and arrange to reestablish the invariant, then we'll visit the nodes in
684 * the correct order, pretty much by definition.
685 *
686 * So, pop a node off the stack. This is the node we shall return. But
687 * before we can do that, we must reestablish the above invariant.
688 * Firstly, the current node is removed from the stack, because we're about
689 * to visit it, and visited nodes don't belong on the stack. Then there
690 * are two cases to consider.
691 *
692 * * If the current node's right subtree is not empty, then the next node
693 * to be visited is the leftmost node in that subtree. All of the
694 * nodes on the stack are ancestors of the current nodes, and the right
695 * subtree consists of its descendants, so none of them are already on
696 * the stack, and they're all greater than the current node, and
697 * therefore haven't been visited. Therefore, we must push the current
698 * node's right child, its /left/ child, and so on, proceeding
699 * leftwards until we fall off the bottom of the tree.
700 *
701 * * Otherwise, we've finished traversing some subtree. Either we are
702 * now done, or (3) we have just finished traversing the left subtree
703 * of the next topmost item on the stack. This must therefore be the
704 * next node to visit. The rest of the stack is already correct.
705 */
7b8ff279
MW
706 n = i->stack[--sp];
707 o = n->right;
708 while (o) {
709 assert(sp < TREAP_PATHMAX);
710 i->stack[sp++] = o; o = o->left;
711 }
712 i->sp = sp;
713 return (n);
714}
715
8996f767
MW
716/* Recursively check the subtree headed by N.
717 *
718 * No node should have weight greater than MAXWT, to satisfy the heap
719 * condition; if LO is not null, then all node keys should be strictly
720 * greater than LO, and, similarly, if HI is not null, then all keys should
721 * be strictly smaller than HI.
722 */
723static void check_subtree(struct treap_node *n, unsigned maxwt,
724 const char *klo, const char *khi)
7b8ff279 725{
8996f767 726 /* Check the heap condition. */
7b8ff279 727 assert(n->wt <= maxwt);
8996f767
MW
728
729 /* Check that the key is in bounds. (Use `strcmp' here to ensure that our
730 * own `str_lt' is working correctly.)
731 */
7b8ff279
MW
732 if (klo) assert(STRCMP(n->k, >, klo));
733 if (khi) assert(STRCMP(n->k, <, khi));
8996f767
MW
734
735 /* Check the left subtree. Node weights must be bounded above by our own
736 * weight. And everykey in the left subtree must be smaller than our
737 * current key. We propagate the lower bound.
738 */
739 if (n->left) check_subtree(n->left, n->wt, klo, n->k);
740
741 /* Finally, check the right subtree. This time, every key must be larger
742 * than our key, and we propagate the upper bound.
743 */
744 if (n->right) check_subtree(n->right, n->wt, n->k, khi);
7b8ff279
MW
745}
746
8996f767 747/* Check the treap structure rules for T. */
7b8ff279 748void treap_check(struct treap *t)
8996f767 749 { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
7b8ff279 750
8996f767
MW
751/* Recursively dump the subtree headed by N, indenting the output lines by
752 * IND spaces.
753 */
7b8ff279
MW
754static void dump_node(struct treap_node *n, int ind)
755{
756 if (n->left) dump_node(n->left, ind + 1);
757 printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
758 if (n->right) dump_node(n->right, ind + 1);
759}
760
8996f767 761/* Dump the treap T to standard output, for debugging purposes. */
7b8ff279
MW
762void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
763
764/*----- Configuration file parsing ----------------------------------------*/
765
766#ifndef DECL_ENVIRON
767 extern char **environ;
768#endif
769
8996f767
MW
770/* Advance P past a syntactically valid name, but no further than L.
771 *
772 * Return the new pointer. If no name is found, report an error, blaming
773 * FILE and LINE; WHAT is an adjective for the kind of name that was
774 * expected.
775 */
776static const char *scan_name(const char *what,
777 const char *p, const char *l,
778 const char *file, unsigned line)
779{
780 const char *q = p;
781
782 while (q < l &&
783 (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
784 *q == '*' || *q == '+' || *q == '%' || *q == '@'))
785 q++;
786 if (q == p) lose("%s:%u: expected %s name", file, line, what);
787 return (q);
788}
789
790/* Initialize the configuration state CONF.
791 *
792 * Usually you'd use the static initializer `CONFIG_INIT'.
793 */
7b8ff279
MW
794void config_init(struct config *conf)
795 { treap_init(&conf->sections); }
796
8996f767
MW
797/* Find and return the section with null-terminated NAME in CONF.
798 *
799 * If no section is found, the behaviour depends on whether `CF_CREAT' is set
800 * in F: if so, an empty section is created and returned; otherwise, a null
801 * pointer is returned.
802 */
7b8ff279
MW
803struct config_section *config_find_section(struct config *conf, unsigned f,
804 const char *name)
805 { return (config_find_section_n(conf, f, name, strlen(name))); }
806
8996f767
MW
807/* Find and return the section with the SZ-byte NAME in CONF.
808 *
809 * This works like `config_find_section', but with an explicit length for the
810 * NAME rather than null-termination.
811 */
7b8ff279
MW
812struct config_section *config_find_section_n(struct config *conf, unsigned f,
813 const char *name, size_t sz)
814{
815 struct config_section *sect;
816 struct treap_path path;
817
818 if (!(f&CF_CREAT))
819 sect = treap_lookup(&conf->sections, name, sz);
820 else {
821 sect = treap_probe(&conf->sections, name, sz, &path);
822 if (!sect) {
823 sect = xmalloc(sizeof(*sect));
824 if (!conf->head) conf->tail = &conf->head;
825 sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
826 sect->parents = 0; sect->nparents = SIZE_MAX;
827 treap_init(&sect->vars); treap_init(&sect->cache);
828 treap_insert(&conf->sections, &path, &sect->_node, name, sz);
8996f767 829 config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
7b8ff279
MW
830 }
831 }
832 return (sect);
833}
834
8996f767
MW
835/* Set the fallback section for CONF to be SECT.
836 *
837 * That is, if a section has no explicit parents, then by default it will
838 * have a single parent which is SECT. If SECT is null then there is no
839 * fallback section, and sections which don't have explicitly specified
840 * parents have no parents at all. (This is the default situation.)
841 */
7b8ff279 842void config_set_fallback(struct config *conf, struct config_section *sect)
8996f767 843 { conf->fallback = sect; }
7b8ff279 844
8996f767
MW
845/* Arrange that SECT has PARENT as its single parent section.
846 *
847 * If PARENT is null, then arrange that SECT has no parents at all. In
848 * either case, any `@parents' setting will be ignored.
849 */
7b8ff279
MW
850void config_set_parent(struct config_section *sect,
851 struct config_section *parent)
852{
853 if (!parent)
854 sect->nparents = 0;
855 else {
856 sect->parents = xmalloc(sizeof(*sect->parents));
857 sect->parents[0] = parent; sect->nparents = 1;
858 }
859}
860
8996f767 861/* Initialize I to iterate over the sections defined in CONF. */
7b8ff279
MW
862void config_start_section_iter(struct config *conf,
863 struct config_section_iter *i)
864 { i->sect = conf->head; }
865
8996f767
MW
866/* Return the next section from I, in order of creation.
867 *
868 * If there are no more sections, then return null.
869 */
7b8ff279
MW
870struct config_section *config_next_section(struct config_section_iter *i)
871{
872 struct config_section *sect;
873
874 sect = i->sect;
875 if (sect) i->sect = sect->next;
876 return (sect);
877}
878
8996f767
MW
879/* Initialize the `parents' links of SECT, if they aren't set up already.
880 *
881 * If SECT contains a `@parents' setting then parse it to determine the
882 * parents; otherwise use CONF's fallbeck section, as established by
883 * `config_set_fallback'.
884 */
7b8ff279
MW
885static void set_config_section_parents(struct config *conf,
886 struct config_section *sect)
887{
888 struct config_section *parent;
889 struct config_var *var;
8996f767 890 const char *file; unsigned line;
7b8ff279 891 size_t i, n;
8996f767
MW
892 char *p, *q, *l;
893 struct argv av = ARGV_INIT;
7b8ff279 894
8996f767
MW
895 /* If the section already has parents established then there's nothing to
896 * do.
897 */
7b8ff279
MW
898 if (sect->nparents != SIZE_MAX) return;
899
8996f767
MW
900 /* Look up `@parents', without recursion! */
901 var = treap_lookup(&sect->vars, "@parents", 8);
7b8ff279 902 if (!var) {
8996f767
MW
903 /* No explicit setting: use the fallback setting. */
904
905 if (!conf->fallback || conf->fallback == sect)
7b8ff279
MW
906 sect->nparents = 0;
907 else {
8996f767 908 sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
7b8ff279
MW
909 sect->parents[0] = conf->fallback;
910 }
911 } else {
8996f767
MW
912 /* Found a `@parents' list: parse it and set the parents list. */
913
914 file = var->file; line = var->line; if (!file) file = "<internal>";
915
916 /* We do this in two phases. First, we parse out the section names, and
917 * record start/limit pointer pairs in `av'.
918 */
919 p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
920 while (*p) {
921 q = p;
922 p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
923 argv_append(&av, q); argv_append(&av, p);
924 while (p < l && ISSPACE(*p)) p++;
925 if (p >= l) break;
926 if (*p == ',') do p++; while (ISSPACE(*p));
7b8ff279 927 }
8996f767
MW
928
929 /* Now that we've finished parsing, we know how many parents we're going
930 * to have, so we can allocate the `parents' vector and fill it in.
931 */
7b8ff279
MW
932 sect->nparents = av.n/2;
933 sect->parents = xmalloc(sect->nparents*sizeof(sect->parents));
934 for (i = 0; i < av.n; i += 2) {
935 n = av.v[i + 1] - av.v[i];
936 parent = config_find_section_n(conf, 0, av.v[i], n);
937 if (!parent)
938 lose("%s:%u: unknown parent section `%.*s'",
8996f767 939 file, line, (int)n, av.v[i]);
7b8ff279
MW
940 sect->parents[i/2] = parent;
941 }
7b8ff279 942 }
8996f767
MW
943
944 /* All done. */
945 argv_release(&av);
7b8ff279
MW
946}
947
8996f767
MW
948/* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
949 *
950 * If successful, return a pointer to the variable; otherwise return null.
951 * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
952 * errors.
953 */
7b8ff279
MW
954struct config_var *search_recursive(struct config *conf,
955 struct config_section *sect,
956 const char *name, size_t sz)
957{
958 struct config_cache_entry *cache;
959 struct treap_path path;
960 struct config_var *var, *v;
961 size_t i, j = j;
962
8996f767
MW
963 /* If the variable is defined locally then we can just return it. */
964 var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
965
966 /* If we have no parents then there's no way we can find it. */
967 set_config_section_parents(conf, sect);
968 if (!sect->parents) return (0);
969
970 /* Otherwise we must visit the section's parents. We can avoid paying for
971 * this on every lookup by using a cache. If there's already an entry for
972 * this variable then we can return the result immediately (note that we
973 * cache both positive and negative outcomes). Otherwise we create a new
974 * cache entry, do the full recursive search, and fill in the result when
975 * we're done.
976 *
977 * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
978 * new cache entry when it's first created, and clear it when we fill in
979 * the result: if we encounter an open cache entry again, we know that
980 * we've found a cycle.
981 */
7b8ff279
MW
982 cache = treap_probe(&sect->cache, name, sz, &path);
983 if (!cache) {
984 cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
985 treap_insert(&sect->cache, &path, &cache->_node, name, sz);
986 } else if (cache->f&CF_OPEN)
987 lose("inheritance cycle through section `%s'",
988 CONFIG_SECTION_NAME(sect));
989 else
990 return (cache->var);
991
8996f767
MW
992 /* Recursively search in each parent. We insist that all parents that find
993 * a variable find the same binding; otherwise we declare ambiguous
994 * inheritance.
995 */
996 for (i = 0; i < sect->nparents; i++) {
997 v = search_recursive(conf, sect->parents[i], name, sz);
998 if (!v);
999 else if (!var) { var = v; j = i; }
1000 else if (var != v)
1001 lose("section `%s' inherits variable `%s' ambiguously "
1002 "via `%s' and `%s'",
1003 CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
1004 CONFIG_SECTION_NAME(sect->parents[j]),
1005 CONFIG_SECTION_NAME(sect->parents[i]));
7b8ff279
MW
1006 }
1007
8996f767
MW
1008 /* All done: fill the cache entry in, clear the open flag, and return the
1009 * result.
1010 */
7b8ff279
MW
1011 cache->var = var; cache->f &= ~CF_OPEN;
1012 return (var);
1013}
1014
8996f767
MW
1015/* Find and return the variable with null-terminated NAME in SECT.
1016 *
1017 * If `CF_INHERIT' is set in F, then the function searches the section's
1018 * parents recursively; otherwise, it only checks to see whether the variable
1019 * is set directly in SECT.
1020 *
1021 * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1022 * set in F: if so, an empty variable is created and returned; otherwise, a
1023 * null pointer is returned.
1024 *
1025 * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1026 */
7b8ff279
MW
1027struct config_var *config_find_var(struct config *conf,
1028 struct config_section *sect,
1029 unsigned f, const char *name)
1030 { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
1031
8996f767
MW
1032/* Find and return the variable with the given SZ-byte NAME in SECT.
1033 *
1034 * This works like `config_find_var', but with an explicit length for the
1035 * NAME rather than null-termination.
1036 */
7b8ff279
MW
1037struct config_var *config_find_var_n(struct config *conf,
1038 struct config_section *sect,
1039 unsigned f, const char *name, size_t sz)
1040{
1041 struct config_var *var;
1042 struct treap_path path;
1043
1044 if (f&CF_INHERIT)
1045 var = search_recursive(conf, sect, name, sz);
1046 else if (!(f&CF_CREAT))
1047 var = treap_lookup(&sect->vars, name, sz);
1048 else {
1049 var = treap_probe(&sect->vars, name, sz, &path);
1050 if (!var) {
1051 var = xmalloc(sizeof(*var));
1052 var->val = 0; var->file = 0; var->f = 0; var->line = 1;
1053 treap_insert(&sect->vars, &path, &var->_node, name, sz);
1054 }
1055 }
1056 return (var);
1057}
1058
8996f767
MW
1059/* Set variable NAME to VALUE in SECT, with associated flags F.
1060 *
1061 * The names are null-terminated. The flags are variable flags: see `struct
1062 * config_var' for details.
1063 *
1064 * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1065 * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1066 */
7b8ff279 1067void config_set_var(struct config *conf, struct config_section *sect,
8996f767 1068 unsigned f, const char *name, const char *value)
7b8ff279
MW
1069{
1070 config_set_var_n(conf, sect, f,
1071 name, strlen(name),
1072 value, strlen(value));
1073}
1074
8996f767
MW
1075/* As `config_set_var', except that the variable NAME and VALUE have explicit
1076 * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1077 * terminated.
1078 */
7b8ff279
MW
1079void config_set_var_n(struct config *conf, struct config_section *sect,
1080 unsigned f,
1081 const char *name, size_t namelen,
1082 const char *value, size_t valuelen)
1083{
1084 struct config_var *var =
1085 config_find_var_n(conf, sect, CF_CREAT, name, namelen);
1086
1087 if (var->f&~f&CF_OVERRIDE) return;
1088 free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
1089 var->f = f;
1090}
1091
8996f767
MW
1092/* Initialize I to iterate over the variables directly defined in SECT. */
1093void config_start_var_iter(struct config *conf, struct config_section *sect,
1094 struct config_var_iter *i)
1095 { treap_start_iter(&sect->vars, &i->i); }
1096
1097/* Return next variable from I, in ascending lexicographical order.
1098 *
1099 * If there are no more variables, then return null.
1100 */
1101struct config_var *config_next_var(struct config_var_iter *i)
1102 { return (treap_next(&i->i)); }
1103
1104/* Read and parse configuration FILE, applying its settings to CONF.
1105 *
1106 * If all goes well, the function returns 0. If the file is not found, then
1107 * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1108 * function simply returns -1. Otherwise, a fatal error is reported. Note
1109 * that this /only/ applies if the file does not exist (specifically, opening
1110 * it fails with `ENOENT') -- any other problems are reported as fatal
1111 * errors regardless of the flag setting.
1112 */
7b8ff279
MW
1113int config_read_file(struct config *conf, const char *file, unsigned f)
1114{
1115 struct config_section *sect;
1116 struct config_var *var;
1117 struct dstr d = DSTR_INIT, dd = DSTR_INIT;
1118 unsigned line = 0;
8996f767 1119 const char *p, *q, *r;
7b8ff279
MW
1120 FILE *fp;
1121
8996f767 1122 /* Try to open the file. */
7b8ff279
MW
1123 fp = fopen(file, "r");
1124 if (!fp) {
1125 if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
1126 lose("failed to open configuration file `%s': %s",
1127 file, strerror(errno));
1128 }
1129
8996f767 1130 /* Find the initial section. */
7b8ff279
MW
1131 sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
1132
8996f767 1133 /* Work through the file, line by line. */
7b8ff279
MW
1134 for (;;) {
1135 dstr_reset(&d); if (dstr_readline(&d, fp)) break;
1136 line++;
1137
8996f767
MW
1138 /* Trim trailing spaces from the line. The syntax is sensitive to
1139 * leading spaces, so we can't trim those yet.
1140 */
1141 while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
1142 d.p[d.len] = 0;
1143
1144 if (!*d.p || *d.p == ';')
1145 /* Ignore comments entirely. (In particular, a comment doesn't
1146 * interrupt a multiline variable value.)
1147 */
1148 ;
1149
1150 else if (ISSPACE(d.p[0])) {
1151 /* The line starts with whitespace, so it's a continuation line. */
1152
1153 /* Skip the initial whitespace. */
1154 p = d.p; while (ISSPACE(*p)) p++;
1155
1156 /* If we aren't collecting a variable value then this is an error.
1157 * Otherwise, accumulate it into the current value.
1158 */
1159 if (!var)
1160 lose("%s:%u: continuation line, but no variable", file, line);
1161 if (dd.len) dstr_putc(&dd, ' ');
1162 dstr_putm(&dd, p, d.len - (p - d.p));
1163
1164 } else {
1165 /* The line starts in the first column. */
1166
1167 /* If there's a value value being collected then we must commit it to
1168 * its variable (unless there's already a setting there that says we
1169 * shouldn't).
1170 */
7b8ff279
MW
1171 if (var) {
1172 if (!(var->f&CF_OVERRIDE))
1173 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1174 var = 0;
1175 }
8996f767
MW
1176
1177 /* Now decide what kind of line this is. */
1178 if (d.p[0] == '[') {
1179 /* It's a section header. */
1180
1181 /* Parse the header. */
1182 p = d.p + 1; while (ISSPACE(*p)) p++;
1183 q = scan_name("section", p, d.p + d.len, file, line);
1184 r = q; while (ISSPACE(*r)) r++;
1185 if (*r != ']')
1186 lose("%s:%u: expected `]' in section header", file, line);
1187 if (r[1])
1188 lose("%s:%u: trailing junk after `]' in section header",
1189 file, line);
1190
1191 /* Create the new section. */
7b8ff279 1192 sect = config_find_section_n(conf, CF_CREAT, p, q - p);
8996f767 1193
7b8ff279 1194 } else {
8996f767
MW
1195 /* It's a variable assignment. Parse the name out. */
1196 p = scan_name("variable", d.p, d.p + d.len, file, line);
7b8ff279
MW
1197 var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
1198 while (ISSPACE(*p)) p++;
1199 if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
1200 p++; while (ISSPACE(*p)) p++;
8996f767
MW
1201
1202 /* Clear out the variable's initial value, unless we shouldn't
1203 * override it.
1204 */
7b8ff279
MW
1205 if (!(var->f&CF_OVERRIDE)) {
1206 free(var->val); var->val = 0; var->f = 0;
1207 free(var->file); var->file = xstrdup(file); var->line = line;
1208 }
1209 dstr_reset(&dd); dstr_puts(&dd, p);
1210 }
7b8ff279
MW
1211 }
1212 }
1213
8996f767 1214 /* If there's a value under construction then commit the result. */
7b8ff279
MW
1215 if (var && !(var->f&CF_OVERRIDE))
1216 { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
1217
8996f767 1218 /* Close the file. */
7b8ff279
MW
1219 if (fclose(fp))
1220 lose("error reading configuration file `%s': %s", file, strerror(errno));
8996f767
MW
1221
1222 /* All done. */
1223 dstr_release(&d); dstr_release(&dd);
7b8ff279
MW
1224 return (0);
1225}
1226
8996f767
MW
1227/* Populate SECT with environment variables.
1228 *
1229 * Environment variables are always set with `CF_LITERAL'.
1230 */
7b8ff279
MW
1231void config_read_env(struct config *conf, struct config_section *sect)
1232{
1233 const char *p, *v;
1234 size_t i;
1235
1236 for (i = 0; (p = environ[i]) != 0; i++) {
1237 v = strchr(p, '='); if (!v) continue;
1238 config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
1239 }
1240}
1241
1242/*----- Substitution and quoting ------------------------------------------*/
1243
8996f767
MW
1244/* The substitution and word-splitting state.
1245 *
1246 * This only keeps track of the immutable parameters for the substitution
1247 * task: stuff which changes (flags, filtering state, cursor position) is
1248 * maintained separately.
1249 */
7b8ff279 1250struct subst {
8996f767
MW
1251 struct config *config; /* configuration state */
1252 struct config_section *home; /* home section for lookups */
1253 struct dstr *d; /* current word being constructed */
1254 struct argv *av; /* output word list */
7b8ff279
MW
1255};
1256
8996f767
MW
1257/* Flags for `subst' and related functions. */
1258#define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */
1259#define SF_QUOT 0x0002u /* currently within double quotes */
1260#define SF_SUBST 0x0004u /* apply `$-substitutions */
1261#define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */
1262#define SF_SPANMASK 0x00ffu /* mask for the above */
1263
1264#define SF_WORD 0x0100u /* output word under construction */
1265#define SF_SKIP 0x0200u /* not producing output */
1266#define SF_LITERAL 0x0400u /* do not expand or substitute */
1267#define SF_UPCASE 0x0800u /* convert to uppercase */
1268#define SF_DOWNCASE 0x1000u /* convert to lowercase */
1269#define SF_CASEMASK 0x1800u /* mask for case conversions */
1270
1271/* Apply filters encoded in QFILT and F to the text from P to L, and output.
1272 *
1273 * SB is the substitution state which, in particular, explains where the
1274 * output should go.
1275 *
1276 * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1277 * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is
1278 * encoded as the number of toothpicks to print: see `subst' for how this
1279 * determined.)
1280 */
1281static void filter_string(const char *p, const char *l,
1282 const struct subst *sb, unsigned qfilt, unsigned f)
7b8ff279
MW
1283{
1284 size_t r, n;
8996f767 1285 char *q; const char *pp, *ll;
7b8ff279 1286
8996f767
MW
1287 if (!qfilt && !(f&SF_CASEMASK))
1288 /* Fast path: there's nothing to do: just write to the output. */
7b8ff279 1289 dstr_putm(sb->d, p, l - p);
8996f767 1290
7b8ff279 1291 else for (;;) {
8996f767
MW
1292 /* We must be a bit more circumspect. */
1293
1294 /* Determine the length of the next span of characters which don't need
1295 * escaping. (If QFILT is zero then this is everything.)
1296 */
1297 r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
7b8ff279 1298 if (n > r) n = r;
8996f767
MW
1299
1300 if (!(f&SF_CASEMASK))
1301 /* No case conversion: we can just emit this chunk. */
1302
1303 dstr_putm(sb->d, p, n);
1304
1305 else {
1306 /* Case conversion to do. Arrange enough space for the output, and
1307 * convert it character by character.
1308 */
1309
1310 dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
1311 if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
1312 else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
1313 sb->d->len += n;
1314 }
1315
1316 /* If we've reached the end then stop. */
7b8ff279 1317 if (n >= r) break;
8996f767
MW
1318
1319 /* Otherwise we must have found a character which requires escaping.
1320 * Emit enough toothpicks.
1321 */
1322 dstr_putcn(sb->d, '\\', qfilt);
1323
1324 /* This character is now done, so we can skip over and see if there's
1325 * another chunk of stuff we can do at high speed.
1326 */
1327 dstr_putc(sb->d, p[n]); p += n + 1;
7b8ff279
MW
1328 }
1329}
1330
8996f767
MW
1331/* Scan and resolve a `[SECT:]VAR' specifier at P.
1332 *
1333 * Return the address of the next character following the specifier. L is a
1334 * limit on the region of the buffer that we should process; SB is the
1335 * substitution state which provides the home section if none is given
1336 * explicitly; FILE and LINE are the source location to blame for problems.
1337 */
7b8ff279 1338static const char *retrieve_varspec(const char *p, const char *l,
8996f767
MW
1339 const struct subst *sb,
1340 struct config_var **var_out,
1341 const char *file, unsigned line)
7b8ff279
MW
1342{
1343 struct config_section *sect = sb->home;
1344 const char *t;
1345
8996f767 1346 t = scan_name("section or variable", p, l, file, line);
7b8ff279
MW
1347 if (t < l && *t == ':') {
1348 sect = config_find_section_n(sb->config, 0, p, t - p);
8996f767 1349 p = t + 1; t = scan_name("variable", p, l, file, line);
7b8ff279
MW
1350 }
1351
1352 if (!sect) *var_out = 0;
1353 else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
1354 return (t);
1355}
1356
8996f767
MW
1357/* Substitute and/or word-split text.
1358 *
1359 * The input text starts at P, and continues to (just before) L. Context for
1360 * the task is provided by SB; the source location to blame is FILE and LINE
1361 * (FILE may be null so that this can be passed directly from a `config_var'
1362 * without further checking); QFILT is the nesting depth in toothpick-
1363 * escaping; and F holds a mask of `SF_...' flags.
1364 */
1365static const char *subst(const char *p, const char *l,
1366 const struct subst *sb,
7b8ff279
MW
1367 const char *file, unsigned line,
1368 unsigned qfilt, unsigned f)
1369{
1370 struct config_var *var;
1371 const char *q0, *q1, *t;
1372 unsigned subqfilt, ff;
1373 size_t n;
1374
8996f767
MW
1375 /* It would be best if we could process literal text at high speed. To
1376 * this end,
1377 */
1378 static const char *const delimtab[] = {
1379
1380#define ESCAPE "\\" /* always watch for `\'-escapes */
1381#define SUBST "$" /* check for `$' if `SF_SUBST' set */
1382#define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but
1383 * not `SF_QUOT' */
1384#define QUOT "\"" /* only quotes if `SF_SPLIT' and
1385 * `SF_QUOT' */
1386#define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */
1387
1388 ESCAPE,
1389 ESCAPE WORDSEP,
1390 0,
1391 ESCAPE QUOT,
1392 ESCAPE SUBST,
1393 ESCAPE SUBST WORDSEP,
1394 0,
1395 ESCAPE SUBST QUOT,
1396 ESCAPE DELIM,
1397 ESCAPE DELIM WORDSEP,
1398 0,
1399 ESCAPE DELIM QUOT,
1400 ESCAPE DELIM SUBST,
1401 ESCAPE DELIM SUBST WORDSEP,
1402 0,
1403 ESCAPE DELIM SUBST QUOT
7b8ff279
MW
1404
1405#undef COMMON
1406#undef WORDSEP
1407#undef SQUOT
1408#undef DELIM
8996f767 1409 };
7b8ff279 1410
8996f767 1411 /* Set FILE to be useful if it was null on entry. */
7b8ff279
MW
1412 if (!file) file = "<internal>";
1413
8996f767
MW
1414 /* If the text is literal then hand off to `filter_string'. This obviously
1415 * starts a word.
1416 */
7b8ff279 1417 if (f&SF_LITERAL) {
8996f767 1418 filter_string(p, l, sb, qfilt, f);
7b8ff279
MW
1419 f |= SF_WORD;
1420 goto done;
1421 }
1422
8996f767 1423 /* Chew through the input until it's all gone. */
7b8ff279
MW
1424 while (p < l) {
1425
1426 if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
8996f767
MW
1427 /* This is whitespace, we're supposed to split, and we're not within
1428 * quotes, so we should split here.
1429 */
1430
1431 /* If there's a word in progress then we should commit it. */
7b8ff279
MW
1432 if (f&SF_WORD) {
1433 if (!(f&SF_SKIP)) {
1434 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1435 dstr_reset(sb->d);
1436 }
1437 f &= ~SF_WORD;
1438 }
8996f767
MW
1439
1440 /* Skip over further whitespace at high speed. */
7b8ff279
MW
1441 do p++; while (p < l && ISSPACE(*p));
1442
1443 } else if (*p == '\\') {
8996f767
MW
1444 /* This is a toothpick, so start a new word and add the next character
1445 * to it.
1446 */
1447
1448 /* If there's no next charact3er then we should be upset. */
1449 p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
1450
7b8ff279 1451 if (!(f&SF_SKIP)) {
8996f767
MW
1452
1453 /* If this is a double quote or backslash then check DFLT to see if
1454 * it needs escaping.
1455 */
7b8ff279
MW
1456 if (qfilt && (*p == '"' || *p == '\\'))
1457 dstr_putcn(sb->d, '\\', qfilt);
8996f767
MW
1458
1459 /* Output the character. */
1460 if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
1461 else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
1462 else dstr_putc(sb->d, *p);
7b8ff279 1463 }
8996f767
MW
1464
1465 /* Move past the escaped character. Remember we started a word. */
1466 p++; f |= SF_WORD;
7b8ff279
MW
1467
1468 } else if ((f&SF_SPLIT) && *p == '"') {
8996f767
MW
1469 /* This is a double quote, and we're word splitting. We're definitely
1470 * in a word now. Toggle whether we're within quotes.
1471 */
1472
7b8ff279
MW
1473 f ^= SF_QUOT; f |= SF_WORD; p++;
1474
1475 } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
8996f767
MW
1476 /* This is a single quote, and we're word splitting but not within
1477 * double quotes. Find the matching end quote, and just output
1478 * everything between literally.
1479 */
1480
1481 p++; t = strchr(p, '\'');
1482 if (!t || t >= l) lose("%s:%u: missing `''", file, line);
1483 if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
7b8ff279
MW
1484 p = t + 1; f |= SF_WORD;
1485
1486 } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
8996f767 1487 /* This is an end delimiter, and we're supposed to stop here. */
7b8ff279
MW
1488 break;
1489
1490 } else if ((f&SF_SUBST) && *p == '$') {
8996f767
MW
1491 /* This is a `$' and we're supposed to do substitution. */
1492
1493 /* The kind of substitution is determined by the next character. */
7b8ff279 1494 p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
8996f767
MW
1495
1496 /* Prepare flags for a recursive substitution.
1497 *
1498 * Hide our quote state from the recursive call. If we're within a
1499 * word, then disable word-splitting.
1500 */
7b8ff279 1501 ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
8996f767
MW
1502
1503 /* Now dispatch based on the following character. */
7b8ff279
MW
1504 switch (*p) {
1505
1506 case '?':
8996f767
MW
1507 /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1508
1509 /* Skip initial space. */
1510 p++; while (p < l && ISSPACE(*p)) p++;
1511
1512 /* Find the variable. */
1513 p = retrieve_varspec(p, l, sb, &var, file, line);
1514
1515 /* Skip whitespace again. */
1516 while (p < l && ISSPACE(*p)) p++;
1517
1518 /* Expect the opening `{'. */
7b8ff279
MW
1519 if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
1520 p++;
8996f767
MW
1521
1522 /* We'll process the parts recursively, but we need to come back
1523 * when we hit the appropriate delimiters, so arrange for that.
1524 */
7b8ff279 1525 ff |= SF_SUBEXPR;
8996f767
MW
1526
1527 /* Process the consequent (skip if the variable wasn't found). */
7b8ff279
MW
1528 p = subst(p, l, sb, file, line, qfilt,
1529 ff | (var ? 0 : SF_SKIP));
8996f767
MW
1530
1531 /* If there's a `|' then process the alternative too (skip if the
1532 * variable /was/ found).
1533 */
7b8ff279
MW
1534 if (p < l && *p == '|')
1535 p = subst(p + 1, l, sb, file, line, qfilt,
1536 ff | (var ? SF_SKIP : 0));
8996f767
MW
1537
1538 /* We should now be past the closing `}'. */
7b8ff279
MW
1539 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1540 p++;
1541 break;
1542
1543 case '{':
8996f767
MW
1544 /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1545
1546 /* Skip initial whitespace. */
1547 p++; while (p < l && ISSPACE(*p)) p++;
1548
1549 /* Find the variable. */
1550 q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
1551
1552 /* Determine the filters to apply when substituting the variable
1553 * value.
1554 */
7b8ff279 1555 subqfilt = qfilt;
8996f767
MW
1556 for (;;) {
1557
1558 /* Skip spaces again. */
1559 while (p < l && ISSPACE(*p)) p++;
1560
1561 /* If there's no `|' then there are no more filters, so stop. */
1562 if (p >= l || *p != '|') break;
1563
1564 /* Skip the `|' and more spaces. */
1565 p++; while (p < l && ISSPACE(*p)) p++;
1566
1567 /* Collect the filter name. */
1568 t = scan_name("filter", p, l, file, line);
1569
1570 /* Dispatch on the filter name. */
1571 if (t - p == 1 && *p == 'q')
1572 /* `q' -- quote for Lisp string.
1573 *
1574 * We're currently adding Q `\' characters before each naughty
1575 * character. But a backslash itself is naughty too, so that
1576 * makes Q + 1 naughty characters, each of which needs a
1577 * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1578 *
1579 * Calculate this here rather than at each point toothpicks
1580 * needs to be deployed.
1581 */
1582
1583 subqfilt = 2*subqfilt + 1;
1584
1585 else if (t - p == 1 && *p == 'l')
1586 /* `u' -- convert to uppercase.
1587 *
1588 * If a case conversion is already set, then that will override
1589 * whatever we do here, so don't bother.
1590 */
1591
1592 { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
1593
1594 else if (t - p == 1 && *p == 'u')
1595 /* `u' -- convert to uppercase.
1596 *
1597 * If a case conversion is already set, then that will override
1598 * whatever we do here, so don't bother.
1599 */
1600 { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
1601
7b8ff279 1602 else
8996f767 1603 /* Something else we didn't understand. */
7b8ff279
MW
1604 lose("%s:%u: unknown filter `%.*s'",
1605 file, line, (int)(t - p), p);
8996f767
MW
1606
1607 /* Continue from after the filter name. */
7b8ff279
MW
1608 p = t;
1609 }
8996f767
MW
1610
1611 /* If we're not skipping, and we found a variable, then substitute
1612 * its value. This is the point where we need to be careful about
1613 * recursive expansion.
1614 */
7b8ff279
MW
1615 if (!(f&SF_SKIP) && var) {
1616 if (var->f&CF_EXPAND)
1617 lose("%s:%u: recursive expansion of variable `%.*s'",
1618 file, line, (int)(q1 - q0), q0);
1619 var->f |= CF_EXPAND;
1620 subst(var->val, var->val + var->n, sb,
1621 var->file, var->line, subqfilt,
1622 ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1623 var->f &= ~CF_EXPAND;
1624 }
8996f767
MW
1625
1626 /* If there's an alternative, then we need to process (or maybe
1627 * skip) it. Otherwise, we should complain if there was no
1628 * veriable, and we're not skipping.
1629 */
7b8ff279
MW
1630 if (p < l && *p == '?')
1631 p = subst(p + 1, l, sb, file, line, subqfilt,
1632 ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
1633 else if (!var && !(f&SF_SKIP))
1634 lose("%s:%u: unknown variable `%.*s'",
1635 file, line, (int)(q1 - q0), q0);
8996f767
MW
1636
1637 /* Expect a `}' here. (No need to skip spaces: we already did that
1638 * after scanning for filters, and either there was no alternative,
1639 * or we advanced to a delimiter character anyway.)
1640 */
7b8ff279
MW
1641 if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
1642 p++;
1643 break;
1644
1645 default:
8996f767
MW
1646 /* Something else. That's a shame. */
1647 lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
7b8ff279 1648 }
8996f767
MW
1649
1650 /* Complain if we started out in word-splitting state, and therefore
1651 * have added a whole number of words to the output, but there's a
1652 * word-fragment stuck onto the end of this substitution.
1653 */
7b8ff279
MW
1654 if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
1655 !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
1656 lose("%s:%u: surprising word boundary "
1657 "after splicing substitution",
1658 file, line);
1659 }
1660
1661 else {
8996f767
MW
1662 /* Something else. Try to skip over this at high speed.
1663 *
1664 * This makes use of the table we set up earlier.
1665 */
1666
7b8ff279
MW
1667 n = strcspn(p, delimtab[f&SF_SPANMASK]);
1668 if (n > l - p) n = l - p;
8996f767 1669 if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
7b8ff279
MW
1670 p += n; f |= SF_WORD;
1671 }
1672 }
1673
1674done:
8996f767
MW
1675 /* Sort out the wreckage. */
1676
1677 /* If we're still within quotes then something has gone wrong. */
7b8ff279 1678 if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
8996f767
MW
1679
1680 /* If we're within a word, and should be splitting, then commit the word to
1681 * the output list.
1682 */
7b8ff279
MW
1683 if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
1684 argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
1685 dstr_reset(sb->d);
1686 }
1687
8996f767 1688 /* And, with that, we're done. */
7b8ff279
MW
1689 return (p);
1690}
1691
8996f767
MW
1692/* Expand substitutions in a string.
1693 *
1694 * Expand the null-terminated string P relative to the HOME section, using
1695 * configuration CONFIG, and appending the result to dynamic string D. Blame
1696 * WHAT in any error messages.
1697 */
7b8ff279
MW
1698void config_subst_string(struct config *config, struct config_section *home,
1699 const char *what, const char *p, struct dstr *d)
1700{
1701 struct subst sb;
1702
1703 sb.config = config; sb.home = home; sb.d = d;
1704 subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
1705 dstr_putz(d);
1706}
1707
8996f767
MW
1708/* Expand substitutions in a string.
1709 *
1710 * Expand the null-terminated string P relative to the HOME section, using
1711 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1712 * string. Blame WHAT in any error messages.
1713 */
7b8ff279
MW
1714char *config_subst_string_alloc(struct config *config,
1715 struct config_section *home,
1716 const char *what, const char *p)
1717{
1718 struct dstr d = DSTR_INIT;
1719 char *q;
1720
1721 config_subst_string(config, home, what, p, &d);
1722 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1723}
1724
8996f767
MW
1725/* Expand substitutions in a variable.
1726 *
1727 * Expand the value of the variable VAR relative to the HOME section, using
1728 * configuration CONFIG, appending the result to dynamic string D.
1729 */
7b8ff279
MW
1730void config_subst_var(struct config *config, struct config_section *home,
1731 struct config_var *var, struct dstr *d)
1732{
1733 struct subst sb;
1734
1735 sb.config = config; sb.home = home; sb.d = d;
1736 var->f |= CF_EXPAND;
1737 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1738 SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1739 var->f &= ~CF_EXPAND;
1740 dstr_putz(d);
1741}
1742
8996f767
MW
1743/* Expand substitutions in a variable.
1744 *
1745 * Expand the value of the variable VAR relative to the HOME section, using
1746 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1747 * string.
1748 */
7b8ff279
MW
1749char *config_subst_var_alloc(struct config *config,
1750 struct config_section *home,
1751 struct config_var *var)
1752{
1753 struct dstr d = DSTR_INIT;
1754 char *q;
1755
1756 config_subst_var(config, home, var, &d);
1757 q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
1758}
1759
8996f767
MW
1760/* Expand substitutions in a variable and split into words.
1761 *
1762 * Expand and word-split the value of the variable VAR relative to the HOME
1763 * section, using configuration CONFIG, appending the resulting words into
1764 * the vector AV.
1765 */
7b8ff279
MW
1766void config_subst_split_var(struct config *config,
1767 struct config_section *home,
1768 struct config_var *var, struct argv *av)
1769{
1770 struct dstr d = DSTR_INIT;
1771 struct subst sb;
1772
1773 sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
1774 var->f |= CF_EXPAND;
1775 subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
1776 SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
1777 var->f &= ~CF_EXPAND;
1778 dstr_release(&d);
1779}
1780
1781/*----- That's all, folks -------------------------------------------------*/