3 * Common definitions for `runlisp'
5 * (c) 2020 Mark Wooding
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
12 * Runlisp is free software: you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
17 * Runlisp is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 * You should have received a copy of the GNU General Public License
23 * along with Runlisp. If not, see <https://www.gnu.org/licenses/>.
26 /*----- Header files ------------------------------------------------------*/
43 /*----- Diagnostic utilities ----------------------------------------------*/
45 const char *progname
= "???";
46 /* Our program name, for use in error messages. */
48 /* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
49 void set_progname(const char *prog
)
53 p
= strrchr(prog
, '/');
54 progname
= p ? p
+ 1 : prog
;
57 /* Report an error or warning in Unix style, given a captured argument
60 void vmoan(const char *msg
, va_list ap
)
62 fprintf(stderr
, "%s: ", progname
);
63 vfprintf(stderr
, msg
, ap
);
67 /* Issue a warning message. */
68 void moan(const char *msg
, ...)
69 { va_list ap
; va_start(ap
, msg
); vmoan(msg
, ap
); va_end(ap
); }
71 /* Issue a fatal error message and exit unsuccessfully. */
72 void lose(const char *msg
, ...)
73 { va_list ap
; va_start(ap
, msg
); vmoan(msg
, ap
); va_end(ap
); exit(127); }
75 /*----- Memory allocation -------------------------------------------------*/
77 /* Allocate and return a pointer to N bytes, or report a fatal error.
79 * Release the pointer using `free' as usual. If N is zero, returns null
80 * (but you are not expected to check for this).
82 void *xmalloc(size_t n
)
87 p
= malloc(n
); if (!p
) lose("failed to allocate memory");
91 /* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
93 * The block might (and probably will) move, so it returns the new address.
94 * If N is zero, then the block is freed (if necessary) and a null pointer
95 * returned; otherwise, if P is null then a fresh block is allocated. If
96 * allocation fails, then a fatal error is reported.
98 void *xrealloc(void *p
, size_t n
)
100 if (!n
) { free(p
); return (0); }
101 else if (!p
) return (xmalloc(n
));
102 p
= realloc(p
, n
); if (!p
) lose("failed to allocate memory");
106 /* Allocate and return a copy of the N-byte string starting at P.
108 * The new string is null-terminated, though P need not be. If allocation
109 * fails, then a fatal error is reported.
111 char *xstrndup(const char *p
, size_t n
)
113 char *q
= xmalloc(n
+ 1);
115 memcpy(q
, p
, n
); q
[n
] = 0;
119 /* Allocate and return a copy of the null-terminated string starting at P.
121 * If allocation fails, then a fatal error is reported.
123 char *xstrdup(const char *p
) { return (xstrndup(p
, strlen(p
))); }
125 /*----- Dynamic strings ---------------------------------------------------*/
127 /* Initialize the string D.
129 * Usually you'd use the static initializer `DSTR_INIT'.
131 void dstr_init(struct dstr
*d
) { d
->p
= 0; d
->len
= d
->sz
= 0; }
133 /* Reset string D so it's empty again. */
134 void dstr_reset(struct dstr
*d
) { d
->len
= 0; }
136 /* Ensure that D has at least N unused bytes available. */
137 void dstr_ensure(struct dstr
*d
, size_t n
)
139 size_t need
= d
->len
+ n
, newsz
;
141 if (need
<= d
->sz
) return;
142 newsz
= d
->sz ?
2*d
->sz
: 16;
143 while (newsz
< need
) newsz
*= 2;
144 d
->p
= xrealloc(d
->p
, newsz
); d
->sz
= newsz
;
147 /* Release the memory held by D.
149 * It must be reinitialized (e.g., by `dstr_init') before it can be used
152 void dstr_release(struct dstr
*d
) { free(d
->p
); }
154 /* Append the N-byte string at P to D.
156 * P need not be null-terminated. D will not be null-terminated
159 void dstr_putm(struct dstr
*d
, const void *p
, size_t n
)
160 { dstr_ensure(d
, n
); memcpy(d
->p
+ d
->len
, p
, n
); d
->len
+= n
; }
162 /* Append the null-terminated string P to D.
164 * D /is/ guaranteed to be null-terminated after this.
166 void dstr_puts(struct dstr
*d
, const char *p
)
168 size_t n
= strlen(p
);
170 dstr_ensure(d
, n
+ 1);
171 memcpy(d
->p
+ d
->len
, p
, n
+ 1);
175 /* Append the single character CH to D.
177 * D will not be null-terminated afterwards.
179 void dstr_putc(struct dstr
*d
, int ch
)
180 { dstr_ensure(d
, 1); d
->p
[d
->len
++] = ch
; }
182 /* Append N copies of the character CH to D.
184 * D will not be null-terminated afterwards.
186 void dstr_putcn(struct dstr
*d
, int ch
, size_t n
)
187 { dstr_ensure(d
, n
); memset(d
->p
+ d
->len
, ch
, n
); d
->len
+= n
; }
189 /* Null-terminate the string D.
191 * This doesn't change the length of D. If further stuff is appended then
192 * the null terminator will be overwritten.
194 void dstr_putz(struct dstr
*d
)
195 { dstr_ensure(d
, 1); d
->p
[d
->len
] = 0; }
197 /* Append stuff to D, determined by printf(3) format string P and argument
200 * D will not be null-terminated afterwards.
202 void dstr_vputf(struct dstr
*d
, const char *p
, va_list ap
)
210 n
= vsnprintf(d
->p
+ d
->len
, r
, p
, ap2
); assert(n
>= 0);
213 dstr_ensure(d
, n
+ 1); r
= d
->sz
- d
->len
;
214 n
= vsnprintf(d
->p
+ d
->len
, r
, p
, ap
); assert(n
>= 0); assert(n
< r
);
219 /* Append stuff to D, determined by printf(3) format string P and arguments.
221 * D will not be null-terminated afterwards.
223 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr
*d
, const char *p
, ...)
224 { va_list ap
; va_start(ap
, p
); dstr_vputf(d
, p
, ap
); va_end(ap
); }
226 /* Append the next input line from FP to D.
228 * Return 0 on success, or -1 if reading immediately fails or encounters
229 * end-of-file (call ferror(3) to distinguish). Any trailing newline is
230 * discarded: it is not possible to determine whether the last line was ended
231 * with a newline. D is guaranteed to be null-terminated afterwards.
233 int dstr_readline(struct dstr
*d
, FILE *fp
)
240 if (!fgets(d
->p
+ d
->len
, d
->sz
- d
->len
, fp
)) break;
241 n
= strlen(d
->p
+ d
->len
); assert(n
> 0); any
= 1;
243 if (d
->p
[d
->len
- 1] == '\n') { d
->p
[--d
->len
] = 0; break; }
246 if (!any
) return (-1);
250 /*----- Dynamic vectors of strings ----------------------------------------*/
252 /* Initialize the vector AV.
254 * Usually you'd use the static initializer `ARGV_INIT'.
256 void argv_init(struct argv
*av
)
257 { av
->v
= 0; av
->o
= av
->n
= av
->sz
= 0; }
259 /* Reset the vector AV so that it's empty again. */
260 void argv_reset(struct argv
*av
) { av
->n
= 0; }
262 /* Ensure that AV has at least N unused slots at the end. */
263 void argv_ensure(struct argv
*av
, size_t n
)
265 size_t need
= av
->n
+ av
->o
+ n
, newsz
;
267 if (need
<= av
->sz
) return;
268 newsz
= av
->sz ?
2*av
->sz
: 8;
269 while (newsz
< need
) newsz
*= 2;
270 av
->v
= xrealloc(av
->v
- av
->o
, newsz
*sizeof(char *)); av
->v
+= av
->o
;
274 /* Ensure that AV has at least N unused slots at the /start/. */
275 void argv_ensure_offset(struct argv
*av
, size_t n
)
279 /* Stupid version. We won't, in practice, be prepending lots of stuff, so
280 * avoid the extra bookkeeping involved in trying to make a double-ended
281 * extendable array asymptotically efficient.
283 if (av
->o
>= n
) return;
285 while (newoff
< n
) newoff
*= 2;
286 argv_ensure(av
, newoff
- av
->o
);
287 memmove(av
->v
+ newoff
- av
->o
, av
->v
, av
->n
*sizeof(char *));
288 av
->v
+= newoff
- av
->o
; av
->o
= newoff
;
291 /* Release the memory held by AV.
293 * It must be reinitialized (e.g., by `argv_init') before it can be used
296 void argv_release(struct argv
*av
) { free(av
->v
- av
->o
); }
298 /* Append the pointer P to AV. */
299 void argv_append(struct argv
*av
, char *p
)
300 { argv_ensure(av
, 1); av
->v
[av
->n
++] = p
; }
302 /* Append a null pointer to AV, without extending the vactor length.
304 * The null pointer will be overwritten when the next string is appended.
306 void argv_appendz(struct argv
*av
)
307 { argv_ensure(av
, 1); av
->v
[av
->n
] = 0; }
309 /* Append a N-element vector V of pointers to AV. */
310 void argv_appendn(struct argv
*av
, char *const *v
, size_t n
)
313 memcpy(av
->v
+ av
->n
, v
, n
*sizeof(const char *));
317 /* Append the variable-length vector BV to AV. */
318 void argv_appendav(struct argv
*av
, const struct argv
*bv
)
319 { argv_appendn(av
, bv
->v
, bv
->n
); }
321 /* Append the pointers from a variable-length argument list AP to AV.
323 * The list is terminated by a null pointer.
325 void argv_appendv(struct argv
*av
, va_list ap
)
328 for (;;) { p
= va_arg(ap
, char *); if (!p
) break; argv_append(av
, p
); }
331 /* Append the argument pointers, terminated by a null pointer, to AV. */
332 void argv_appendl(struct argv
*av
, ...)
333 { va_list ap
; va_start(ap
, av
); argv_appendv(av
, ap
); va_end(ap
); }
335 /* Prepend the pointer P to AV. */
336 void argv_prepend(struct argv
*av
, char *p
)
337 { argv_ensure_offset(av
, 1); *--av
->v
= p
; av
->o
--; av
->n
++; }
339 /* Prepend a N-element vector V of pointers to AV. */
340 void argv_prependn(struct argv
*av
, char *const *v
, size_t n
)
342 argv_ensure_offset(av
, n
);
343 av
->o
-= n
; av
->v
-= n
; av
->n
+= n
;
344 memcpy(av
->v
, v
, n
*sizeof(const char *));
347 /* Prepend the variable-length vector BV to AV. */
348 void argv_prependav(struct argv
*av
, const struct argv
*bv
)
349 { argv_prependn(av
, bv
->v
, bv
->n
); }
351 /* Prepend the pointers from a variable-length argument list AP to AV.
353 * The list is terminated by a null pointer.
355 void argv_prependv(struct argv
*av
, va_list ap
)
361 p
= va_arg(ap
, char *); if (!p
) break;
362 argv_prepend(av
, p
); n
++;
366 p
= v
[0]; v
[0] = v
[n
- 1]; v
[n
- 1] = p
;
371 /* Prepend the argument pointers, terminated by a null pointer, to AV. */
372 void argv_prependl(struct argv
*av
, ...)
373 { va_list ap
; va_start(ap
, av
); argv_prependv(av
, ap
); va_end(ap
); }
375 /*----- Treaps ------------------------------------------------------------*/
377 /* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
378 * string B in a lexicographic ordering.
380 * All comparison of keys is handled by this function.
382 static int str_lt(const char *a
, size_t an
, const char *b
, size_t bn
)
384 /* This is a little subtle. We need only compare the first N bytes of the
385 * strings, where N is the length of the shorter string. If this
386 * distinguishes the two strings, then we're clearly done. Otherwise, if
387 * the prefixes are equal then the shorter string is the smaller one. If
388 * the two strings are the same length, then they're equal.
390 * Hence, if A is the strictly shorter string, then A precedes B if A
391 * precedes or matches the prefix of B; otherwise A only precedes B if A
392 * strictly precedes the prefix of B.
394 if (an
< bn
) return (MEMCMP(a
, <=, b
, an
));
395 else return (MEMCMP(a
, <, b
, bn
));
398 /* Initialize the treap T.
400 * Usually you'd use the static initializer `TREAP_INIT'.
402 void treap_init(struct treap
*t
) { t
->root
= 0; }
404 /* Look up the KN-byte key K in the treap T.
406 * Return a pointer to the matching node if one was found, or null otherwise.
408 void *treap_lookup(const struct treap
*t
, const char *k
, size_t kn
)
410 struct treap_node
*n
= t
->root
, *candidate
= 0;
412 /* This is a simple prototype for some of the search loops we'll encounter
413 * later. Notice that we use a strict one-sided comparison, rather than
414 * the more conventional two-sided comparison.
416 * The main loop will find the largest key not greater than K.
419 /* Compare the node's key against our key. If the node is too large,
420 * then we ignore it and move left. Otherwise remember this node for
421 * later, and move right to see if we can find a better, larger node.
424 if (str_lt(k
, kn
, n
->k
, n
->kn
)) n
= n
->left
;
425 else { candidate
= n
; n
= n
->right
; }
427 /* If the candidate node is less than our key then we failed. Otherwise,
428 * by trichotomy, we have found the correct node.
430 if (!candidate
|| str_lt(candidate
->k
, candidate
->kn
, k
, kn
)) return (0);
434 /* Look up the KN-byte K in the treap T, recording a path in P.
436 * This is similar to `treap_lookup', in that it returns the requested node
437 * if it already exists, or null otherwise, but it also records in P
438 * information to be used by `treap_insert' to insert a new node with the
439 * given key if it's not there already.
441 void *treap_probe(struct treap
*t
, const char *k
, size_t kn
,
442 struct treap_path
*p
)
444 struct treap_node
**nn
= &t
->root
, *candidate
= 0;
447 /* This walk is similar to `treap_lookup' above, except that we also record
448 * the address of each node pointer we visit along the way.
451 assert(i
< TREAP_PATHMAX
); p
->path
[i
++] = nn
;
453 if (str_lt(k
, kn
, (*nn
)->k
, (*nn
)->kn
)) nn
= &(*nn
)->left
;
454 else { candidate
= *nn
; nn
= &(*nn
)->right
; }
458 /* Check to see whether we found the right node. */
459 if (!candidate
|| str_lt(candidate
->k
, candidate
->kn
, k
, kn
)) return (0);
463 /* Insert a new node N into T, associating it with the KN-byte key K.
465 * Use the path data P, from `treap_probe', to help with insertion.
467 void treap_insert(struct treap
*t
, const struct treap_path
*p
,
468 struct treap_node
*n
, const char *k
, size_t kn
)
470 size_t i
= p
->nsteps
;
471 struct treap_node
**nn
, **uu
, *u
;
474 /* Fill in the node structure. */
475 n
->k
= xstrndup(k
, kn
); n
->kn
= kn
;
476 n
->wt
= wt
= rand(); n
->left
= n
->right
= 0;
478 /* Prepare for the insertion.
480 * The path actually points to each of the links traversed when searching
481 * for the node, starting with the `root' pointer, then the `left' or
482 * `right' pointer of the root node, and so on; `nsteps' will always be
483 * nonzero, since the path will always pass through the root, and the final
484 * step, `path->path[path->nsteps - 1]' will always be the address of a
485 * null pointer onto which the freshly inserted node could be hooked in
486 * order to satisfy the binary-search-tree ordering. (Of course, this will
487 * likely /not/ satisfy the heap condition, so more work needs to be done.)
489 * Throughout, NN is our current candidate for where to attach the node N.
490 * As the loop progresses, NN will ascend to links further up the tree, and
491 * N will be adjusted to accumulate pieces of the existing tree structure.
492 * We'll stop when we find that the parent node's weight is larger than our
493 * new node's weight, at which point we can just set *NN = N; or if we run
494 * out of steps in the path, in which case *NN is the root pointer.
496 assert(i
); nn
= p
->path
[--i
];
499 /* Collect the next step in the path, and get the pointer to the node. */
500 uu
= p
->path
[i
]; u
= *uu
;
502 /* If this node's weight is higher, then we've found the right level and
505 if (wt
<= u
->wt
) break;
507 /* The node U is lighter than our new node N, so we must rotate in order
508 * to fix things. If we were currently planning to hook N as the left
509 * subtree of U, then we rotate like this:
518 * On the other hand, if we were planning to hook N as the right subtree
519 * of U, then we do the opposite rotation:
528 * These transformations clearly preserve the ordering of nodes in the
529 * binary search tree, and satisfy the heap condition in the subtree
532 if (nn
== &u
->left
) { u
->left
= n
->right
; n
->right
= u
; }
533 else { u
->right
= n
->left
; n
->left
= u
; }
535 /* And this arrangement must be attached to UU, or some higher attachment
536 * point. The subtree satisfies the heap condition, and can be attached
537 * safely at the selected place.
542 /* We've found the right spot. Hook the accumulated subtree into place. */
546 /* Remove the node with the KN-byte K from T.
548 * Return the address of the node we removed, or null if it couldn't be
551 void *treap_remove(struct treap
*t
, const char *k
, size_t kn
)
553 struct treap_node
**nn
= &t
->root
, **candidate
= 0, *n
, *l
, *r
;
555 /* Search for the matching node, but keep track of the address of the link
556 * which points to our target node.
559 if (str_lt(k
, kn
, (*nn
)->k
, (*nn
)->kn
)) nn
= &(*nn
)->left
;
560 else { candidate
= nn
; nn
= &(*nn
)->right
; }
562 /* If this isn't the right node then give up. */
563 if (!candidate
|| str_lt((*candidate
)->k
, (*candidate
)->kn
, k
, kn
))
566 /* Now we need to disentangle the node from the tree. This is essentially
567 * the reverse of insertion: we pretend that this node is suddenly very
568 * light, and mutate the tree so as to restore the heap condition until
569 * eventually our node is a leaf and can be cut off without trouble.
571 * Throughout, the link *NN notionally points to N, but we don't actually
572 * update it until we're certain what value it should finally take.
574 nn
= candidate
; n
= *nn
; l
= n
->left
; r
= n
->right
;
577 /* If its left subtree is empty then we can replace our node by its right
578 * subtree and be done. Similarly, if the right subtree is empty then we
579 * replace the node by its left subtree.
582 * (N) ---> R ; (N) ---> L
586 if (!l
) { *nn
= r
; break; }
587 else if (!r
) { *nn
= l
; break; }
589 /* Otherwise we need to rotate the pointers so that the heavier of the
590 * two children takes the place of our node; thus we have either
608 * Again, these transformations clearly preserve the ordering of nodes in
609 * the binary search tree, and the heap condition.
611 else if (l
->wt
> r
->wt
)
612 { *nn
= l
; nn
= &l
->right
; l
= n
->left
= l
->right
; }
614 { *nn
= r
; nn
= &r
->left
; r
= n
->right
= r
->left
; }
616 /* Release the key buffer, and return the node that we've now detached. */
617 free(n
->k
); return (n
);
620 /* Initialize an iterator I over T's nodes. */
621 void treap_start_iter(struct treap
*t
, struct treap_iter
*i
)
623 struct treap_node
*n
= t
->root
;
626 /* The `stack' in the iterator structure is an empty ascending stack of
627 * nodes which have been encountered, and their left subtrees investigated,
628 * but not yet visited by the iteration.
630 * Iteration begins by stacking the root node, its left child, and so on,
631 * At the end of this, the topmost entry on the stack is the least node of
632 * the tree, followed by its parent, grandparent, and so on up to the root.
635 assert(sp
< TREAP_PATHMAX
);
636 i
->stack
[sp
++] = n
; n
= n
->left
;
641 /* Return the next node from I, in ascending order by key.
643 * If there are no more nodes, then return null.
645 void *treap_next(struct treap_iter
*i
)
647 struct treap_node
*n
, *o
;
650 /* We say that a node is /visited/ once it's been returned by this
651 * iterator. To traverse a tree in order, then, we traverse its left
652 * subtree, visit the tree root, and traverse its right subtree -- which is
653 * a fine recursive definition, but we need a nonrecursive implementation.
655 * As is usual in this kind of essential structural recursion, we maintain
656 * a stack. The invariant that we'll maintain is as follows.
658 * 1. If the stack is empty, then all nodes have been visited.
660 * 2, If the stack is nonempty then the topmost entry on the stack is the
661 * least node which has not yet been visited -- and therefore is the
662 * next node to visit.
664 * 3. The earlier entries in the stack are, in (top to bottom) order,
665 * those of the topmost node's parent, grandparent, etc., up to the
666 * root, which have not yet been visited. More specifically, a node
667 * appears in the stack if and only if some node in its left subtree
668 * is nearer the top of the stack.
670 * When we initialized the iterator state (in `treap_start_iter' above), we
671 * traced a path to the leftmost leaf, stacking the root, its left-hand
672 * child, and so on. The leftmost leaf is clearly the first node to be
673 * visited, and its entire ancestry is on the stack since none of these
674 * nodes has yet been visited. (If the tree is empty, then we have done
675 * nothing, the stack is empty, and there are no nodes to visit.) This
676 * establishes the base case for the induction.
679 /* So, if the stack is empty now, then (1) all of the nodes have been
680 * visited and there's nothing left to do. Return null.
684 /* It's clear that, if we pop the topmost element of the stack, visit it,
685 * and arrange to reestablish the invariant, then we'll visit the nodes in
686 * the correct order, pretty much by definition.
688 * So, pop a node off the stack. This is the node we shall return. But
689 * before we can do that, we must reestablish the above invariant.
690 * Firstly, the current node is removed from the stack, because we're about
691 * to visit it, and visited nodes don't belong on the stack. Then there
692 * are two cases to consider.
694 * * If the current node's right subtree is not empty, then the next node
695 * to be visited is the leftmost node in that subtree. All of the
696 * nodes on the stack are ancestors of the current node, and the right
697 * subtree consists of its descendants, so none of them are already on
698 * the stack; and they're all greater than the current node, and
699 * therefore haven't been visited. Therefore, we must push the current
700 * node's right child, its /left/ child, and so on, proceeding
701 * leftwards until we fall off the bottom of the tree.
703 * * Otherwise, we've finished traversing some subtree. Either we are
704 * now done, or (3) we have just finished traversing the left subtree
705 * of the next topmost item on the stack. This must therefore be the
706 * next node to visit. The rest of the stack is already correct.
711 assert(sp
< TREAP_PATHMAX
);
712 i
->stack
[sp
++] = o
; o
= o
->left
;
718 /* Recursively check the subtree headed by N.
720 * No node should have weight greater than MAXWT, to satisfy the heap
721 * condition; if LO is not null, then all node keys should be strictly
722 * greater than LO, and, similarly, if HI is not null, then all keys should
723 * be strictly smaller than HI.
725 static void check_subtree(struct treap_node
*n
, unsigned maxwt
,
726 const char *klo
, const char *khi
)
728 /* Check the heap condition. */
729 assert(n
->wt
<= maxwt
);
731 /* Check that the key is in bounds. (Use `strcmp' here to ensure that our
732 * own `str_lt' is working correctly.)
734 if (klo
) assert(STRCMP(n
->k
, >, klo
));
735 if (khi
) assert(STRCMP(n
->k
, <, khi
));
737 /* Check the left subtree. Node weights must be bounded above by our own
738 * weight. And every key in the left subtree must be smaller than our
739 * current key. We propagate the lower bound.
741 if (n
->left
) check_subtree(n
->left
, n
->wt
, klo
, n
->k
);
743 /* Finally, check the right subtree. This time, every key must be larger
744 * than our key, and we propagate the upper bound.
746 if (n
->right
) check_subtree(n
->right
, n
->wt
, n
->k
, khi
);
749 /* Check the treap structure rules for T. */
750 void treap_check(struct treap
*t
)
751 { if (t
->root
) check_subtree(t
->root
, t
->root
->wt
, 0, 0); }
753 /* Recursively dump the subtree headed by N, indenting the output lines by
756 static void dump_node(struct treap_node
*n
, int ind
)
758 if (n
->left
) dump_node(n
->left
, ind
+ 1);
759 printf(";;%*s [%10u] `%s'\n", 2*ind
, "", n
->wt
, n
->k
);
760 if (n
->right
) dump_node(n
->right
, ind
+ 1);
763 /* Dump the treap T to standard output, for debugging purposes. */
764 void treap_dump(struct treap
*t
) { if (t
->root
) dump_node(t
->root
, 0); }
766 /*----- Configuration file parsing ----------------------------------------*/
769 extern char **environ
;
772 /* Advance P past a syntactically valid name, but no further than L.
774 * Return the new pointer. If no name is found, report an error, blaming
775 * FILE and LINE; WHAT is an adjective for the kind of name that was
778 static const char *scan_name(const char *what
,
779 const char *p
, const char *l
,
780 const char *file
, unsigned line
)
785 (ISALNUM(*q
) || *q
== '-' || *q
== '_' || *q
== '.' || *q
== '/' ||
786 *q
== '*' || *q
== '+' || *q
== '%' || *q
== '@'))
788 if (q
== p
) lose("%s:%u: expected %s name", file
, line
, what
);
792 /* Initialize the configuration state CONF.
794 * Usually you'd use the static initializer `CONFIG_INIT'.
796 void config_init(struct config
*conf
)
797 { treap_init(&conf
->sections
); }
799 /* Find and return the section with null-terminated NAME in CONF.
801 * If no section is found, the behaviour depends on whether `CF_CREAT' is set
802 * in F: if so, an empty section is created and returned; otherwise, a null
803 * pointer is returned.
805 struct config_section
*config_find_section(struct config
*conf
, unsigned f
,
807 { return (config_find_section_n(conf
, f
, name
, strlen(name
))); }
809 /* Find and return the section with the SZ-byte NAME in CONF.
811 * This works like `config_find_section', but with an explicit length for the
812 * NAME rather than null-termination.
814 struct config_section
*config_find_section_n(struct config
*conf
, unsigned f
,
815 const char *name
, size_t sz
)
817 struct config_section
*sect
;
818 struct treap_path path
;
821 sect
= treap_lookup(&conf
->sections
, name
, sz
);
823 sect
= treap_probe(&conf
->sections
, name
, sz
, &path
);
825 sect
= xmalloc(sizeof(*sect
));
826 if (!conf
->head
) conf
->tail
= &conf
->head
;
827 sect
->next
= 0; *conf
->tail
= sect
; conf
->tail
= §
->next
;
828 sect
->parents
= 0; sect
->nparents
= SIZE_MAX
;
829 treap_init(§
->vars
); treap_init(§
->cache
);
830 treap_insert(&conf
->sections
, &path
, §
->_node
, name
, sz
);
831 config_set_var_n(conf
, sect
, CF_LITERAL
, "@name", 5, name
, sz
);
837 /* Set the fallback section for CONF to be SECT.
839 * That is, if a section has no explicit parents, then by default it will
840 * have a single parent which is SECT. If SECT is null then there is no
841 * fallback section, and sections which don't have explicitly specified
842 * parents have no parents at all. (This is the default situation.)
844 void config_set_fallback(struct config
*conf
, struct config_section
*sect
)
845 { conf
->fallback
= sect
; }
847 /* Arrange that SECT has PARENT as its single parent section.
849 * If PARENT is null, then arrange that SECT has no parents at all. In
850 * either case, any `@parents' setting will be ignored.
852 void config_set_parent(struct config_section
*sect
,
853 struct config_section
*parent
)
858 sect
->parents
= xmalloc(sizeof(*sect
->parents
));
859 sect
->parents
[0] = parent
; sect
->nparents
= 1;
863 /* Initialize I to iterate over the sections defined in CONF. */
864 void config_start_section_iter(struct config
*conf
,
865 struct config_section_iter
*i
)
866 { i
->sect
= conf
->head
; }
868 /* Return the next section from I, in order of creation.
870 * If there are no more sections, then return null.
872 struct config_section
*config_next_section(struct config_section_iter
*i
)
874 struct config_section
*sect
;
877 if (sect
) i
->sect
= sect
->next
;
881 /* Initialize the `parents' links of SECT, if they aren't set up already.
883 * If SECT contains a `@parents' setting then parse it to determine the
884 * parents; otherwise use CONF's fallbeck section, as established by
885 * `config_set_fallback'.
887 static void set_config_section_parents(struct config
*conf
,
888 struct config_section
*sect
)
890 struct config_section
*parent
;
891 struct config_var
*var
;
892 const char *file
; unsigned line
;
895 struct argv av
= ARGV_INIT
;
897 /* If the section already has parents established then there's nothing to
900 if (sect
->nparents
!= SIZE_MAX
) return;
902 /* Look up `@parents', without recursion! */
903 var
= treap_lookup(§
->vars
, "@parents", 8);
905 /* No explicit setting: use the fallback setting. */
907 if (!conf
->fallback
|| conf
->fallback
== sect
)
910 sect
->parents
= xmalloc(sizeof(*sect
->parents
)); sect
->nparents
= 1;
911 sect
->parents
[0] = conf
->fallback
;
914 /* Found a `@parents' list: parse it and set the parents list. */
916 file
= var
->file
; line
= var
->line
; if (!file
) file
= "<internal>";
918 /* We do this in two phases. First, we parse out the section names, and
919 * record start/limit pointer pairs in `av'.
921 p
= var
->val
; l
= p
+ var
->n
; while (p
< l
&& ISSPACE(*p
)) p
++;
924 p
= (/*unconst*/ char *)scan_name("parent section", p
, l
, file
, line
);
925 argv_append(&av
, q
); argv_append(&av
, p
);
926 while (p
< l
&& ISSPACE(*p
)) p
++;
928 if (*p
== ',') do p
++; while (ISSPACE(*p
));
931 /* Now that we've finished parsing, we know how many parents we're going
932 * to have, so we can allocate the `parents' vector and fill it in.
934 sect
->nparents
= av
.n
/2;
935 sect
->parents
= xmalloc(sect
->nparents
*sizeof(*sect
->parents
));
936 for (i
= 0; i
< av
.n
; i
+= 2) {
937 n
= av
.v
[i
+ 1] - av
.v
[i
];
938 parent
= config_find_section_n(conf
, 0, av
.v
[i
], n
);
940 lose("%s:%u: unknown parent section `%.*s'",
941 file
, line
, (int)n
, av
.v
[i
]);
942 sect
->parents
[i
/2] = parent
;
950 /* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
952 * If successful, return a pointer to the variable; otherwise return null.
953 * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
956 struct config_var
*search_recursive(struct config
*conf
,
957 struct config_section
*sect
,
958 const char *name
, size_t sz
)
960 struct config_cache_entry
*cache
;
961 struct treap_path path
;
962 struct config_var
*var
, *v
;
965 /* If the variable is defined locally then we can just return it. */
966 var
= treap_lookup(§
->vars
, name
, sz
); if (var
) return (var
);
968 /* If we have no parents then there's no way we can find it. */
969 set_config_section_parents(conf
, sect
);
970 if (!sect
->parents
) return (0);
972 /* Otherwise we must visit the section's parents. We can avoid paying for
973 * this on every lookup by using a cache. If there's already an entry for
974 * this variable then we can return the result immediately (note that we
975 * cache both positive and negative outcomes). Otherwise we create a new
976 * cache entry, do the full recursive search, and fill in the result when
979 * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
980 * new cache entry when it's first created, and clear it when we fill in
981 * the result: if we encounter an open cache entry again, we know that
982 * we've found a cycle.
984 cache
= treap_probe(§
->cache
, name
, sz
, &path
);
986 cache
= xmalloc(sizeof(*cache
)); cache
->f
= CF_OPEN
;
987 treap_insert(§
->cache
, &path
, &cache
->_node
, name
, sz
);
988 } else if (cache
->f
&CF_OPEN
)
989 lose("inheritance cycle through section `%s'",
990 CONFIG_SECTION_NAME(sect
));
994 /* Recursively search in each parent. We insist that all parents that find
995 * a variable find the same binding; otherwise we declare ambiguous
998 for (i
= 0; i
< sect
->nparents
; i
++) {
999 v
= search_recursive(conf
, sect
->parents
[i
], name
, sz
);
1001 else if (!var
) { var
= v
; j
= i
; }
1003 lose("section `%s' inherits variable `%s' ambiguously "
1004 "via `%s' and `%s'",
1005 CONFIG_SECTION_NAME(sect
), CONFIG_VAR_NAME(var
),
1006 CONFIG_SECTION_NAME(sect
->parents
[j
]),
1007 CONFIG_SECTION_NAME(sect
->parents
[i
]));
1010 /* All done: fill the cache entry in, clear the open flag, and return the
1013 cache
->var
= var
; cache
->f
&= ~CF_OPEN
;
1017 /* Find and return the variable with null-terminated NAME in SECT.
1019 * If `CF_INHERIT' is set in F, then the function searches the section's
1020 * parents recursively; otherwise, it only checks to see whether the variable
1021 * is set directly in SECT.
1023 * If no variable is found, the behaviour depends on whether `CF_CREAT' is
1024 * set in F: if so, an empty variable is created and returned; otherwise, a
1025 * null pointer is returned.
1027 * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
1029 struct config_var
*config_find_var(struct config
*conf
,
1030 struct config_section
*sect
,
1031 unsigned f
, const char *name
)
1032 { return (config_find_var_n(conf
, sect
, f
, name
, strlen(name
))); }
1034 /* Find and return the variable with the given SZ-byte NAME in SECT.
1036 * This works like `config_find_var', but with an explicit length for the
1037 * NAME rather than null-termination.
1039 struct config_var
*config_find_var_n(struct config
*conf
,
1040 struct config_section
*sect
,
1041 unsigned f
, const char *name
, size_t sz
)
1043 struct config_var
*var
;
1044 struct treap_path path
;
1047 var
= search_recursive(conf
, sect
, name
, sz
);
1048 else if (!(f
&CF_CREAT
))
1049 var
= treap_lookup(§
->vars
, name
, sz
);
1051 var
= treap_probe(§
->vars
, name
, sz
, &path
);
1053 var
= xmalloc(sizeof(*var
));
1054 var
->val
= 0; var
->file
= 0; var
->f
= 0; var
->line
= 1;
1055 treap_insert(§
->vars
, &path
, &var
->_node
, name
, sz
);
1061 /* Set variable NAME to VALUE in SECT, with associated flags F.
1063 * The names are null-terminated. The flags are variable flags: see `struct
1064 * config_var' for details. Returns the variable.
1066 * If the variable is already set and has the `CF_OVERRIDE' flag, then this
1067 * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
1069 struct config_var
*config_set_var(struct config
*conf
,
1070 struct config_section
*sect
,
1072 const char *name
, const char *value
)
1074 return (config_set_var_n(conf
, sect
, f
,
1076 value
, strlen(value
)));
1079 /* As `config_set_var', except that the variable NAME and VALUE have explicit
1080 * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
1083 struct config_var
*config_set_var_n(struct config
*conf
,
1084 struct config_section
*sect
,
1086 const char *name
, size_t namelen
,
1087 const char *value
, size_t valuelen
)
1089 struct config_var
*var
=
1090 config_find_var_n(conf
, sect
, CF_CREAT
, name
, namelen
);
1092 if (var
->f
&~f
&CF_OVERRIDE
) return (var
);
1093 free(var
->val
); var
->val
= xstrndup(value
, valuelen
); var
->n
= valuelen
;
1098 /* Initialize I to iterate over the variables directly defined in SECT. */
1099 void config_start_var_iter(struct config
*conf
, struct config_section
*sect
,
1100 struct config_var_iter
*i
)
1101 { treap_start_iter(§
->vars
, &i
->i
); }
1103 /* Return next variable from I, in ascending lexicographical order.
1105 * If there are no more variables, then return null.
1107 struct config_var
*config_next_var(struct config_var_iter
*i
)
1108 { return (treap_next(&i
->i
)); }
1110 /* Read and parse configuration FILE, applying its settings to CONF.
1112 * If all goes well, the function returns 0. If the file is not found, then
1113 * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
1114 * function simply returns -1. Otherwise, a fatal error is reported. Note
1115 * that this /only/ applies if the file does not exist (specifically, opening
1116 * it fails with `ENOENT') -- any other problems are reported as fatal
1117 * errors regardless of the flag setting.
1119 int config_read_file(struct config
*conf
, const char *file
, unsigned f
)
1121 struct config_section
*sect
;
1122 struct config_var
*var
;
1123 struct dstr d
= DSTR_INIT
, dd
= DSTR_INIT
;
1125 const char *p
, *q
, *r
;
1128 /* Try to open the file. */
1129 fp
= fopen(file
, "r");
1131 if ((f
&CF_NOENTOK
) && errno
== ENOENT
) return (-1);
1132 lose("failed to open configuration file `%s': %s",
1133 file
, strerror(errno
));
1136 /* Find the initial section. */
1137 sect
= config_find_section(conf
, CF_CREAT
, "@CONFIG"); var
= 0;
1139 /* Work through the file, line by line. */
1141 dstr_reset(&d
); if (dstr_readline(&d
, fp
)) break;
1144 /* Trim trailing spaces from the line. The syntax is sensitive to
1145 * leading spaces, so we can't trim those yet.
1147 while (d
.len
&& ISSPACE(d
.p
[d
.len
- 1])) d
.len
--;
1150 if (!*d
.p
|| *d
.p
== ';')
1151 /* Ignore comments entirely. (In particular, a comment doesn't
1152 * interrupt a multiline variable value.)
1156 else if (ISSPACE(d
.p
[0])) {
1157 /* The line starts with whitespace, so it's a continuation line. */
1159 /* Skip the initial whitespace. */
1160 p
= d
.p
; while (ISSPACE(*p
)) p
++;
1162 /* If we aren't collecting a variable value then this is an error.
1163 * Otherwise, accumulate it into the current value.
1166 lose("%s:%u: continuation line, but no variable", file
, line
);
1167 if (dd
.len
) dstr_putc(&dd
, ' ');
1168 dstr_putm(&dd
, p
, d
.len
- (p
- d
.p
));
1171 /* The line starts in the first column. */
1173 /* If there's a value value being collected then we must commit it to
1174 * its variable (unless there's already a setting there that says we
1178 if (!(var
->f
&CF_OVERRIDE
))
1179 { var
->val
= xstrndup(dd
.p
, dd
.len
); var
->n
= dd
.len
; }
1183 /* Now decide what kind of line this is. */
1184 if (d
.p
[0] == '[') {
1185 /* It's a section header. */
1187 /* Parse the header. */
1188 p
= d
.p
+ 1; while (ISSPACE(*p
)) p
++;
1189 q
= scan_name("section", p
, d
.p
+ d
.len
, file
, line
);
1190 r
= q
; while (ISSPACE(*r
)) r
++;
1192 lose("%s:%u: expected `]' in section header", file
, line
);
1194 lose("%s:%u: trailing junk after `]' in section header",
1197 /* Create the new section. */
1198 sect
= config_find_section_n(conf
, CF_CREAT
, p
, q
- p
);
1201 /* It's a variable assignment. Parse the name out. */
1202 p
= scan_name("variable", d
.p
, d
.p
+ d
.len
, file
, line
);
1203 var
= config_find_var_n(conf
, sect
, CF_CREAT
, d
.p
, p
- d
.p
);
1204 while (ISSPACE(*p
)) p
++;
1205 if (*p
!= '=') lose("%s:%u: missing `=' in assignment", file
, line
);
1206 p
++; while (ISSPACE(*p
)) p
++;
1208 /* Clear out the variable's initial value, unless we shouldn't
1211 if (!(var
->f
&CF_OVERRIDE
)) {
1212 free(var
->val
); var
->val
= 0; var
->f
= 0;
1213 free(var
->file
); var
->file
= xstrdup(file
); var
->line
= line
;
1215 dstr_reset(&dd
); dstr_puts(&dd
, p
);
1220 /* If there's a value under construction then commit the result. */
1221 if (var
&& !(var
->f
&CF_OVERRIDE
))
1222 { var
->val
= xstrndup(dd
.p
, dd
.len
); var
->n
= dd
.len
; }
1224 /* Close the file. */
1226 lose("error reading configuration file `%s': %s", file
, strerror(errno
));
1229 dstr_release(&d
); dstr_release(&dd
);
1233 /* Populate SECT with environment variables.
1235 * Environment variables are always set with `CF_LITERAL'.
1237 void config_read_env(struct config
*conf
, struct config_section
*sect
)
1242 for (i
= 0; (p
= environ
[i
]) != 0; i
++) {
1243 v
= strchr(p
, '='); if (!v
) continue;
1244 config_set_var_n(conf
, sect
, CF_LITERAL
, p
, v
- p
, v
+ 1, strlen(v
+ 1));
1248 /*----- Substitution and quoting ------------------------------------------*/
1250 /* The substitution and word-splitting state.
1252 * This only keeps track of the immutable parameters for the substitution
1253 * task: stuff which changes (flags, filtering state, cursor position) is
1254 * maintained separately.
1257 struct config
*config
; /* configuration state */
1258 struct config_section
*home
; /* home section for lookups */
1259 struct dstr
*d
; /* current word being constructed */
1260 struct argv
*av
; /* output word list */
1263 /* Flags for `subst' and related functions. */
1264 #define SF_SPLIT 0x0001u /* split at (unquoted) whitespace */
1265 #define SF_QUOT 0x0002u /* currently within double quotes */
1266 #define SF_SUBST 0x0004u /* apply `$-substitutions */
1267 #define SF_SUBEXPR 0x0008u /* stop at delimiter `|' or `}' */
1268 #define SF_SPANMASK 0x00ffu /* mask for the above */
1270 #define SF_WORD 0x0100u /* output word under construction */
1271 #define SF_SKIP 0x0200u /* not producing output */
1272 #define SF_LITERAL 0x0400u /* do not expand or substitute */
1273 #define SF_UPCASE 0x0800u /* convert to uppercase */
1274 #define SF_DOWNCASE 0x1000u /* convert to lowercase */
1275 #define SF_CASEMASK 0x1800u /* mask for case conversions */
1277 /* Apply filters encoded in QFILT and F to the text from P to L, and output.
1279 * SB is the substitution state which, in particular, explains where the
1282 * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
1283 * conversions, and a nesting depth QFILT for toothpick escaping. (QFILT is
1284 * encoded as the number of toothpicks to print: see `subst' for how this
1287 static void filter_string(const char *p
, const char *l
,
1288 const struct subst
*sb
, unsigned qfilt
, unsigned f
)
1291 char *q
; const char *pp
, *ll
;
1293 if (!qfilt
&& !(f
&SF_CASEMASK
))
1294 /* Fast path: there's nothing to do: just write to the output. */
1295 dstr_putm(sb
->d
, p
, l
- p
);
1298 /* We must be a bit more circumspect. */
1300 /* Determine the length of the next span of characters which don't need
1301 * escaping. (If QFILT is zero then this is everything.)
1303 r
= l
- p
; n
= qfilt ?
strcspn(p
, "\"\\") : r
;
1306 if (!(f
&SF_CASEMASK
))
1307 /* No case conversion: we can just emit this chunk. */
1309 dstr_putm(sb
->d
, p
, n
);
1312 /* Case conversion to do. Arrange enough space for the output, and
1313 * convert it character by character.
1316 dstr_ensure(sb
->d
, n
); q
= sb
->d
->p
+ sb
->d
->len
; pp
= p
; ll
= p
+ n
;
1317 if (f
&SF_DOWNCASE
) while (pp
< ll
) *q
++ = TOLOWER(*pp
++);
1318 else if (f
&SF_UPCASE
) while (pp
< ll
) *q
++ = TOUPPER(*pp
++);
1322 /* If we've reached the end then stop. */
1325 /* Otherwise we must have found a character which requires escaping.
1326 * Emit enough toothpicks.
1328 dstr_putcn(sb
->d
, '\\', qfilt
);
1330 /* This character is now done, so we can skip over and see if there's
1331 * another chunk of stuff we can do at high speed.
1333 dstr_putc(sb
->d
, p
[n
]); p
+= n
+ 1;
1337 /* Scan and resolve a `[SECT:]VAR' specifier at P.
1339 * Return the address of the next character following the specifier; and set
1340 * *VAR_OUT to point to the variable we found, or null if it's not there. L
1341 * is a limit on the region of the buffer that we should process; SB is the
1342 * substitution state which provides the home section if none is given
1343 * explicitly; FILE and LINE are the source location to blame for problems.
1345 static const char *retrieve_varspec(const char *p
, const char *l
,
1346 const struct subst
*sb
,
1347 struct config_var
**var_out
,
1348 const char *file
, unsigned line
)
1350 struct config_section
*sect
= sb
->home
;
1353 t
= scan_name("section or variable", p
, l
, file
, line
);
1354 if (t
< l
&& *t
== ':') {
1355 sect
= config_find_section_n(sb
->config
, 0, p
, t
- p
);
1356 p
= t
+ 1; t
= scan_name("variable", p
, l
, file
, line
);
1359 if (!sect
) *var_out
= 0;
1360 else *var_out
= config_find_var_n(sb
->config
, sect
, CF_INHERIT
, p
, t
- p
);
1364 /* Substitute and/or word-split text.
1366 * The input text starts at P, and continues to (just before) L. Context for
1367 * the task is provided by SB; the source location to blame is FILE and LINE
1368 * (FILE may be null so that this can be passed directly from a `config_var'
1369 * without further checking); QFILT is the nesting depth in toothpick-
1370 * escaping; and F holds a mask of `SF_...' flags.
1372 static const char *subst(const char *p
, const char *l
,
1373 const struct subst
*sb
,
1374 const char *file
, unsigned line
,
1375 unsigned qfilt
, unsigned f
)
1377 struct config_var
*var
;
1378 const char *q0
, *q1
, *t
;
1379 unsigned subqfilt
, ff
;
1382 /* It would be best if we could process literal text at high speed. To
1383 * this end, we have a table, indexed by the low-order bits of F, to tell
1384 * us which special characters we need to stop at. This way, we can use
1385 * `strcspn' to skip over literal text and stop at the next character which
1386 * needs special handling. Entries in this table with a null pointer
1387 * correspond to impossible flag settings.
1389 static const char *const delimtab
[] = {
1391 #define ESCAPE "\\" /* always watch for `\'-escapes */
1392 #define SUBST "$" /* check for `$' if `SF_SUBST' set */
1393 #define WORDSEP " \f\r\n\t\v'\"" /* space, quotes if `SF_SPLIT' but
1395 #define QUOT "\"" /* only quotes if `SF_SPLIT' and
1397 #define DELIM "|}" /* end delimiters of `SF_SUBEXPR' */
1404 ESCAPE SUBST WORDSEP
,
1408 ESCAPE DELIM WORDSEP
,
1412 ESCAPE DELIM SUBST WORDSEP
,
1414 ESCAPE DELIM SUBST QUOT
1422 /* Set FILE to be useful if it was null on entry. */
1423 if (!file
) file
= "<internal>";
1425 /* If the text is literal then hand off to `filter_string'. This obviously
1429 filter_string(p
, l
, sb
, qfilt
, f
);
1434 /* Chew through the input until it's all gone. */
1437 if ((f
&(SF_SPLIT
| SF_QUOT
)) == SF_SPLIT
&& ISSPACE(*p
)) {
1438 /* This is whitespace, we're supposed to split, and we're not within
1439 * quotes, so we should split here.
1442 /* If there's a word in progress then we should commit it. */
1445 argv_append(sb
->av
, xstrndup(sb
->d
->p
, sb
->d
->len
));
1451 /* Skip over further whitespace at high speed. */
1452 do p
++; while (p
< l
&& ISSPACE(*p
));
1454 } else if (*p
== '\\') {
1455 /* This is a toothpick, so start a new word and add the next character
1459 /* If there's no next character then we should be upset. */
1460 p
++; if (p
>= l
) lose("%s:%u: unfinished `\\' escape", file
, line
);
1464 /* If this is a double quote or backslash then check DFLT to see if
1465 * it needs escaping.
1467 if (qfilt
&& (*p
== '"' || *p
== '\\'))
1468 dstr_putcn(sb
->d
, '\\', qfilt
);
1470 /* Output the character. */
1471 if (f
&SF_DOWNCASE
) dstr_putc(sb
->d
, TOLOWER(*p
));
1472 else if (f
&SF_UPCASE
) dstr_putc(sb
->d
, TOUPPER(*p
));
1473 else dstr_putc(sb
->d
, *p
);
1476 /* Move past the escaped character. Remember we started a word. */
1479 } else if ((f
&SF_SPLIT
) && *p
== '"') {
1480 /* This is a double quote, and we're word splitting. We're definitely
1481 * in a word now. Toggle whether we're within quotes.
1484 f
^= SF_QUOT
; f
|= SF_WORD
; p
++;
1486 } else if ((f
&(SF_SPLIT
| SF_QUOT
)) == SF_SPLIT
&& *p
== '\'') {
1487 /* This is a single quote, and we're word splitting but not within
1488 * double quotes. Find the matching end quote, and just output
1489 * everything between literally.
1492 p
++; t
= strchr(p
, '\'');
1493 if (!t
|| t
>= l
) lose("%s:%u: missing `''", file
, line
);
1494 if (!(f
&SF_SKIP
)) filter_string(p
, t
, sb
, qfilt
, f
);
1495 p
= t
+ 1; f
|= SF_WORD
;
1497 } else if ((f
&SF_SUBEXPR
) && (*p
== '|' || *p
== '}')) {
1498 /* This is an end delimiter, and we're supposed to stop here. */
1501 } else if ((f
&SF_SUBST
) && *p
== '$') {
1502 /* This is a `$' and we're supposed to do substitution. */
1504 /* The kind of substitution is determined by the next character. */
1505 p
++; if (p
>= l
) lose("%s:%u: incomplete substitution", file
, line
);
1507 /* Prepare flags for a recursive substitution.
1509 * Hide our quote state from the recursive call. If we're within a
1510 * word, then disable word-splitting.
1512 ff
= f
&~(SF_QUOT
| (f
&SF_WORD ? SF_SPLIT
: 0));
1514 /* Now dispatch based on the following character. */
1518 /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
1520 /* Skip initial space. */
1521 p
++; while (p
< l
&& ISSPACE(*p
)) p
++;
1523 /* Find the variable. */
1524 p
= retrieve_varspec(p
, l
, sb
, &var
, file
, line
);
1526 /* Skip whitespace again. */
1527 while (p
< l
&& ISSPACE(*p
)) p
++;
1529 /* Expect the opening `{'. */
1530 if (p
> l
|| *p
!= '{') lose("%s:%u: expected `{'", file
, line
);
1533 /* We'll process the parts recursively, but we need to come back
1534 * when we hit the appropriate delimiters, so arrange for that.
1538 /* Process the consequent (skip if the variable wasn't found). */
1539 p
= subst(p
, l
, sb
, file
, line
, qfilt
,
1540 ff
| (var ?
0 : SF_SKIP
));
1542 /* If there's a `|' then process the alternative too (skip if the
1543 * variable /was/ found).
1545 if (p
< l
&& *p
== '|')
1546 p
= subst(p
+ 1, l
, sb
, file
, line
, qfilt
,
1547 ff
| (var ? SF_SKIP
: 0));
1549 /* We should now be past the closing `}'. */
1550 if (p
>= l
|| *p
!= '}') lose("%s:%u: missing `}'", file
, line
);
1555 /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
1557 /* Skip initial whitespace. */
1558 p
++; while (p
< l
&& ISSPACE(*p
)) p
++;
1560 /* Find the variable. */
1561 q0
= p
; p
= retrieve_varspec(p
, l
, sb
, &var
, file
, line
); q1
= p
;
1563 /* Determine the filters to apply when substituting the variable
1569 /* Skip spaces again. */
1570 while (p
< l
&& ISSPACE(*p
)) p
++;
1572 /* If there's no `|' then there are no more filters, so stop. */
1573 if (p
>= l
|| *p
!= '|') break;
1575 /* Skip the `|' and more spaces. */
1576 p
++; while (p
< l
&& ISSPACE(*p
)) p
++;
1578 /* Collect the filter name. */
1579 t
= scan_name("filter", p
, l
, file
, line
);
1581 /* Dispatch on the filter name. */
1582 if (t
- p
== 1 && *p
== 'q')
1583 /* `q' -- quote for Lisp string.
1585 * We're currently adding Q `\' characters before each naughty
1586 * character. But a backslash itself is naughty too, so that
1587 * makes Q + 1 naughty characters, each of which needs a
1588 * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
1590 * Calculate this here rather than at each point toothpicks
1591 * need to be deployed.
1594 subqfilt
= 2*subqfilt
+ 1;
1596 else if (t
- p
== 1 && *p
== 'l')
1597 /* `l' -- convert to lowercase.
1599 * If a case conversion is already set, then that will override
1600 * whatever we do here, so don't bother.
1603 { if (!(ff
&SF_CASEMASK
)) ff
|= SF_DOWNCASE
; }
1605 else if (t
- p
== 1 && *p
== 'u')
1606 /* `u' -- convert to uppercase.
1608 * If a case conversion is already set, then that will override
1609 * whatever we do here, so don't bother.
1611 { if (!(ff
&SF_CASEMASK
)) ff
|= SF_UPCASE
; }
1614 /* Something else we didn't understand. */
1615 lose("%s:%u: unknown filter `%.*s'",
1616 file
, line
, (int)(t
- p
), p
);
1618 /* Continue from after the filter name. */
1622 /* If we're not skipping, and we found a variable, then substitute
1623 * its value. This is the point where we need to be careful about
1624 * recursive expansion.
1626 if (!(f
&SF_SKIP
) && var
) {
1627 if (var
->f
&CF_EXPAND
)
1628 lose("%s:%u: recursive expansion of variable `%.*s'",
1629 file
, line
, (int)(q1
- q0
), q0
);
1630 var
->f
|= CF_EXPAND
;
1631 subst(var
->val
, var
->val
+ var
->n
, sb
,
1632 var
->file
, var
->line
, subqfilt
,
1633 ff
| (var
->f
&CF_LITERAL ? SF_LITERAL
: 0));
1634 var
->f
&= ~CF_EXPAND
;
1637 /* If there's an alternative, then we need to process (or maybe
1638 * skip) it. Otherwise, we should complain if there was no
1639 * veriable, and we're not skipping.
1641 if (p
< l
&& *p
== '?')
1642 p
= subst(p
+ 1, l
, sb
, file
, line
, subqfilt
,
1643 ff
| SF_SUBEXPR
| (var ? SF_SKIP
: 0));
1644 else if (!var
&& !(f
&SF_SKIP
))
1645 lose("%s:%u: unknown variable `%.*s'",
1646 file
, line
, (int)(q1
- q0
), q0
);
1648 /* Expect a `}' here. (No need to skip spaces: we already did that
1649 * after scanning for filters, and either there was no alternative,
1650 * or we advanced to a delimiter character anyway.)
1652 if (p
>= l
|| *p
!= '}') lose("%s:%u: missing `}'", file
, line
);
1657 /* Something else. That's a shame. */
1658 lose("%s:%u: unexpected `$'-substitution `%c'", file
, line
, *p
);
1661 /* Complain if we started out in word-splitting state, and therefore
1662 * have added a whole number of words to the output, but there's a
1663 * word-fragment stuck onto the end of this substitution.
1665 if (p
< l
&& !(~f
&~(SF_WORD
| SF_SPLIT
)) && !ISSPACE(*p
) &&
1666 !((f
&SF_SUBEXPR
) && (*p
== '|' || *p
== '}')))
1667 lose("%s:%u: surprising word boundary "
1668 "after splicing substitution",
1673 /* Something else. Try to skip over this at high speed.
1675 * This makes use of the table we set up earlier.
1678 n
= strcspn(p
, delimtab
[f
&SF_SPANMASK
]);
1679 if (n
> l
- p
) n
= l
- p
;
1680 if (!(f
&SF_SKIP
)) filter_string(p
, p
+ n
, sb
, qfilt
, f
);
1681 p
+= n
; f
|= SF_WORD
;
1686 /* Sort out the wreckage. */
1688 /* If we're still within quotes then something has gone wrong. */
1689 if (f
&SF_QUOT
) lose("%s:%u: missing `\"'", file
, line
);
1691 /* If we're within a word, and should be splitting, then commit the word to
1694 if ((f
&(SF_WORD
| SF_SPLIT
| SF_SKIP
)) == (SF_SPLIT
| SF_WORD
)) {
1695 argv_append(sb
->av
, xstrndup(sb
->d
->p
, sb
->d
->len
));
1699 /* And, with that, we're done. */
1703 /* Expand substitutions in a string.
1705 * Expand the null-terminated string P relative to the HOME section, using
1706 * configuration CONFIG, and appending the result to dynamic string D. Blame
1707 * WHAT in any error messages.
1709 void config_subst_string(struct config
*config
, struct config_section
*home
,
1710 const char *what
, const char *p
, struct dstr
*d
)
1714 sb
.config
= config
; sb
.home
= home
; sb
.d
= d
;
1715 subst(p
, p
+ strlen(p
), &sb
, what
, 0, 0, SF_SUBST
);
1719 /* Expand substitutions in a string.
1721 * Expand the null-terminated string P relative to the HOME section, using
1722 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1723 * string. Blame WHAT in any error messages.
1725 char *config_subst_string_alloc(struct config
*config
,
1726 struct config_section
*home
,
1727 const char *what
, const char *p
)
1729 struct dstr d
= DSTR_INIT
;
1732 config_subst_string(config
, home
, what
, p
, &d
);
1733 q
= xstrndup(d
.p
, d
.len
); dstr_release(&d
); return (q
);
1736 /* Expand substitutions in a variable.
1738 * Expand the value of the variable VAR relative to the HOME section, using
1739 * configuration CONFIG, appending the result to dynamic string D.
1741 void config_subst_var(struct config
*config
, struct config_section
*home
,
1742 struct config_var
*var
, struct dstr
*d
)
1746 sb
.config
= config
; sb
.home
= home
; sb
.d
= d
;
1747 var
->f
|= CF_EXPAND
;
1748 subst(var
->val
, var
->val
+ var
->n
, &sb
, var
->file
, var
->line
, 0,
1749 SF_SUBST
| (var
->f
&CF_LITERAL ? SF_LITERAL
: 0));
1750 var
->f
&= ~CF_EXPAND
;
1754 /* Expand substitutions in a variable.
1756 * Expand the value of the variable VAR relative to the HOME section, using
1757 * configuration CONFIG, returning the result as a freshly malloc(3)ed
1760 char *config_subst_var_alloc(struct config
*config
,
1761 struct config_section
*home
,
1762 struct config_var
*var
)
1764 struct dstr d
= DSTR_INIT
;
1767 config_subst_var(config
, home
, var
, &d
);
1768 q
= xstrndup(d
.p
, d
.len
); dstr_release(&d
); return (q
);
1771 /* Expand substitutions in a variable and split into words.
1773 * Expand and word-split the value of the variable VAR relative to the HOME
1774 * section, using configuration CONFIG, appending the resulting words into
1777 void config_subst_split_var(struct config
*config
,
1778 struct config_section
*home
,
1779 struct config_var
*var
, struct argv
*av
)
1781 struct dstr d
= DSTR_INIT
;
1784 sb
.config
= config
; sb
.home
= home
; sb
.av
= av
; sb
.d
= &d
;
1785 var
->f
|= CF_EXPAND
;
1786 subst(var
->val
, var
->val
+ var
->n
, &sb
, var
->file
, var
->line
, 0,
1787 SF_SUBST
| SF_SPLIT
| (var
->f
&CF_LITERAL ? SF_LITERAL
: 0));
1788 var
->f
&= ~CF_EXPAND
;
1792 /*----- That's all, folks -------------------------------------------------*/