[runlisp] / lib.c

/* -*-c-*-
 *
 * Common definitions for `runlisp'
 *
 * (c) 2020 Mark Wooding
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of Runlisp, a tool for invoking Common Lisp scripts.
 *
 * Runlisp is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 3 of the License, or (at your
 * option) any later version.
 *
 * Runlisp is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Runlisp.  If not, see <https://www.gnu.org/licenses/>.
 */

/*----- Header files ------------------------------------------------------*/

#include "config.h"

#include <assert.h>

#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <unistd.h>

#include "lib.h"

/*----- Diagnostic utilities ----------------------------------------------*/

const char *progname = "???";
	/* Our program name, for use in error messages. */

/* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
void set_progname(const char *prog)
{
  const char *p;

  p = strrchr(prog, '/');
  progname = p ? p + 1 : prog;
}

/* Report an error or warning in Unix style, given a captured argument
 * cursor.
 */
void vmoan(const char *msg, va_list ap)
{
  fprintf(stderr, "%s: ", progname);
  vfprintf(stderr, msg, ap);
  fputc('\n', stderr);
}

/* Issue a warning message. */
void moan(const char *msg, ...)
  { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }

/* Issue a fatal error message and exit unsuccessfully. */
void lose(const char *msg, ...)
  { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }

/*----- Memory allocation -------------------------------------------------*/

/* Allocate and return a pointer to N bytes, or report a fatal error.
 *
 * Release the pointer using `free' as usual.  If N is zero, returns null
 * (but you are not expected to check for this).
 */
void *xmalloc(size_t n)
{
  void *p;

  if (!n) return (0);
  p = malloc(n); if (!p) lose("failed to allocate memory");
  return (p);
}

/* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
 *
 * The block might (and probably will) move, so it returns the new address.
 * If N is zero, then the block is freed (if necessary) and a null pointer
 * returned; otherwise, if P is null then a fresh block is allocated.  If
 * allocation fails, then a fatal error is reported.
 */
void *xrealloc(void *p, size_t n)
{
  if (!n) { free(p); return (0); }
  else if (!p) return (xmalloc(n));
  p = realloc(p, n); if (!p) lose("failed to allocate memory");
  return (p);
}

/* Allocate and return a copy of the N-byte string starting at P.
 *
 * The new string is null-terminated, though P need not be.  If allocation
 * fails, then a fatal error is reported.
 */
char *xstrndup(const char *p, size_t n)
{
  char *q = xmalloc(n + 1);

  memcpy(q, p, n); q[n] = 0;
  return (q);
}

/* Allocate and return a copy of the null-terminated string starting at P.
 *
 * If allocation fails, then a fatal error is reported.
 */
char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }

/*----- Dynamic strings ---------------------------------------------------*/

/* Initialize the string D.
 *
 * Usually you'd use the static initializer `DSTR_INIT'.
 */
void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }

/* Reset string D so it's empty again. */
void dstr_reset(struct dstr *d) { d->len = 0; }

/* Ensure that D has at least N unused bytes available. */
void dstr_ensure(struct dstr *d, size_t n)
{
  size_t need = d->len + n, newsz;

  if (need <= d->sz) return;
  newsz = d->sz ? 2*d->sz : 16;
  while (newsz < need) newsz *= 2;
  d->p = xrealloc(d->p, newsz); d->sz = newsz;
}

/* Release the memory held by D.
 *
 * It must be reinitialized (e.g., by `dstr_init') before it can be used
 * again.
 */
void dstr_release(struct dstr *d) { free(d->p); }

/* Append the N-byte string at P to D.
 *
 * P need not be null-terminated.  D will not be null-terminated
 * afterwards.
 */
void dstr_putm(struct dstr *d, const void *p, size_t n)
  { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }

/* Append the null-terminated string P to D.
 *
 * D /is/ guaranteed to be null-terminated after this.
 */
void dstr_puts(struct dstr *d, const char *p)
{
  size_t n = strlen(p);

  dstr_ensure(d, n + 1);
  memcpy(d->p + d->len, p, n + 1);
  d->len += n;
}

/* Append the single character CH to D.
 *
 * D will not be null-terminated afterwards.
 */
void dstr_putc(struct dstr *d, int ch)
  { dstr_ensure(d, 1); d->p[d->len++] = ch; }

/* Append N copies of the character CH to D.
 *
 * D will not be null-terminated afterwards.
 */
void dstr_putcn(struct dstr *d, int ch, size_t n)
  { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }

/* Null-terminate the string D.
 *
 * This doesn't change the length of D.  If further stuff is appended then
 * the null terminator will be overwritten.
 */
void dstr_putz(struct dstr *d)
  { dstr_ensure(d, 1); d->p[d->len] = 0; }

/* Append stuff to D, determined by printf(3) format string P and argument
 * tail AP.
 *
 * D will not be null-terminated afterwards.
 */
void dstr_vputf(struct dstr *d, const char *p, va_list ap)
{
  va_list ap2;
  size_t r;
  int n;

  r = d->sz - d->len;
  va_copy(ap2, ap);
  n = vsnprintf(d->p + d->len, r, p, ap2); assert(n >= 0);
  va_end(ap2);
  if (n >= r) {
    dstr_ensure(d, n + 1); r = d->sz - d->len;
    n = vsnprintf(d->p + d->len, r, p, ap); assert(n >= 0); assert(n < r);
  }
  d->len += n;
}

/* Append stuff to D, determined by printf(3) format string P and arguments.
 *
 * D will not be null-terminated afterwards.
 */
PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
  { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }

/* Append the next input line from FP to D.
 *
 * Return 0 on success, or -1 if reading immediately fails or encounters
 * end-of-file (call ferror(3) to distinguish).  Any trailing newline is
 * discarded: it is not possible to determine whether the last line was ended
 * with a newline.  D is guaranteed to be null-terminated afterwards.
 */
int dstr_readline(struct dstr *d, FILE *fp)
{
  size_t n;
  int any = 0;

  for (;;) {
    dstr_ensure(d, 2);
    if (!fgets(d->p + d->len, d->sz - d->len, fp)) break;
    n = strlen(d->p + d->len); assert(n > 0); any = 1;
    d->len += n;
    if (d->p[d->len - 1] == '\n') { d->p[--d->len] = 0; break; }
  }

  if (!any) return (-1);
  else return (0);
}

/*----- Dynamic vectors of strings ----------------------------------------*/

/* Initialize the vector AV.
 *
 * Usually you'd use the static initializer `ARGV_INIT'.
 */
void argv_init(struct argv *av)
  { av->v = 0; av->o = av->n = av->sz = 0; }

/* Reset the vector AV so that it's empty again. */
void argv_reset(struct argv *av) { av->n = 0; }

/* Ensure that AV has at least N unused slots at the end. */
void argv_ensure(struct argv *av, size_t n)
{
  size_t need = av->n + av->o + n, newsz;

  if (need <= av->sz) return;
  newsz = av->sz ? 2*av->sz : 8;
  while (newsz < need) newsz *= 2;
  av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
  av->sz = newsz;
}

/* Ensure that AV has at least N unused slots at the /start/. */
void argv_ensure_offset(struct argv *av, size_t n)
{
  size_t newoff;

  /* Stupid version.  We won't, in practice, be prepending lots of stuff, so
   * avoid the extra bookkeeping involved in trying to make a double-ended
   * extendable array asymptotically efficient.
   */
  if (av->o >= n) return;
  newoff = 16;
  while (newoff < n) newoff *= 2;
  argv_ensure(av, newoff - av->o);
  memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
  av->v += newoff - av->o; av->o = newoff;
}

/* Release the memory held by AV.
 *
 * It must be reinitialized (e.g., by `argv_init') before it can be used
 * again.
 */
void argv_release(struct argv *av) { free(av->v - av->o); }

/* Append the pointer P to AV. */
void argv_append(struct argv *av, char *p)
  { argv_ensure(av, 1); av->v[av->n++] = p; }

/* Append a null pointer to AV, without extending the vactor length.
 *
 * The null pointer will be overwritten when the next string is appended.
 */
void argv_appendz(struct argv *av)
  { argv_ensure(av, 1); av->v[av->n] = 0; }

/* Append a N-element vector V of pointers to AV. */
void argv_appendn(struct argv *av, char *const *v, size_t n)
{
  argv_ensure(av, n);
  memcpy(av->v + av->n, v, n*sizeof(const char *));
  av->n += n;
}

/* Append the variable-length vector BV to AV. */
void argv_appendav(struct argv *av, const struct argv *bv)
  { argv_appendn(av, bv->v, bv->n); }

/* Append the pointers from a variable-length argument list AP to AV.
 *
 * The list is terminated by a null pointer.
 */
void argv_appendv(struct argv *av, va_list ap)
{
  char *p;
  for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
}

/* Append the argument pointers, terminated by a null pointer, to AV. */
void argv_appendl(struct argv *av, ...)
  { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }

/* Prepend the pointer P to AV. */
void argv_prepend(struct argv *av, char *p)
  { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }

/* Prepend a N-element vector V of pointers to AV. */
void argv_prependn(struct argv *av, char *const *v, size_t n)
{
  argv_ensure_offset(av, n);
  av->o -= n; av->v -= n; av->n += n;
  memcpy(av->v, v, n*sizeof(const char *));
}

/* Prepend the variable-length vector BV to AV. */
void argv_prependav(struct argv *av, const struct argv *bv)
  { argv_prependn(av, bv->v, bv->n); }

/* Prepend the pointers from a variable-length argument list AP to AV.
 *
 * The list is terminated by a null pointer.
 */
void argv_prependv(struct argv *av, va_list ap)
{
  char *p, **v;
  size_t n = 0;

  for (;;) {
    p = va_arg(ap, char *); if (!p) break;
    argv_prepend(av, p); n++;
  }
  v = av->v;
  while (n >= 2) {
    p = v[0]; v[0] = v[n - 1]; v[n - 1] = p;
    v++; n -= 2;
  }
}

/* Prepend the argument pointers, terminated by a null pointer, to AV. */
void argv_prependl(struct argv *av, ...)
  { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }

/*----- Treaps ------------------------------------------------------------*/

/* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
 * string B in a lexicographic ordering.
 *
 * All comparisons of keys is handled by this function.
 */
static int str_lt(const char *a, size_t an, const char *b, size_t bn)
{
  /* This is a little subtle.  We need only compare the first N bytes of the
   * strings, where N is the length of the shorter string.  If this
   * distinguishes the two strings, then we're clearly done.  Otherwise, if
   * the prefixes are equal then the shorter string is the smaller one.  If
   * the two strings are the same length, then they're equal.
   *
   * Hence, if A is the strictly shorter string, then A precedes B if A
   * precedes or matches the prefix of B; otherwise A only precedes B if A
   * strictly precedes the prefix of B.
   */
  if (an < bn) return (MEMCMP(a, <=, b, an));
  else return (MEMCMP(a, <, b, bn));
}

/* Initialize the treap T.
 *
 * Usually you'd use the static initializer `TREAP_INIT'.
 */
void treap_init(struct treap *t) { t->root = 0; }

/* Look up the KN-byte key K in the treap T.
 *
 * Return a pointer to the matching node if one was found, or null otherwise.
 */
void *treap_lookup(const struct treap *t, const char *k, size_t kn)
{
  struct treap_node *n = t->root, *candidate = 0;

  /* This is a simple prototype for some of the search loops we'll encounter
   * later.  Notice that we use a strict one-sided comparison, rather than
   * the more conventional two-sided comparison.
   *
   * The main loop will find the largest key not greater than K.
   */
  while (n)
    /* Compare the node's key against our key.  If the node is too large,
     * then we ignore it and move left.  Otherwise remember this node for
     * later, and move right to see if we can find a better, larger node.
     */

    if (str_lt(k, kn, n->k, n->kn)) n = n->left;
    else { candidate = n; n = n->right; }

  /* If the candidate node is less than our key then we failed.  Otherwise,
   * by trichotomy, we have found the correct node.
   */
  if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
  return (candidate);
}

/* Look up the KN-byte K in the treap T, recording a path in P.
 *
 * This is similar to `treap_lookup', in that it returns the requested node
 * if it already exists, or null otherwise, but it also records in P
 * information to be used by `treap_insert' to insert a new node with the
 * given key if it's not there already.
 */
void *treap_probe(struct treap *t, const char *k, size_t kn,
		  struct treap_path *p)
{
  struct treap_node **nn = &t->root, *candidate = 0;
  unsigned i = 0;

  /* This walk is similar to `treap_lookup' above, except that we also record
   * the address of each node pointer we visit along the way.
   */
  for (;;) {
    assert(i < TREAP_PATHMAX); p->path[i++] = nn;
    if (!*nn) break;
    if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
    else { candidate = *nn; nn = &(*nn)->right; }
  }
  p->nsteps = i;

  /* Check to see whether we found the right node. */
  if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
  return (candidate);
}

/* Insert a new node N into T, associating it with the KN-byte key K.
 *
 * Use the path data P, from `treap_probe', to help with insertion.
 */
void treap_insert(struct treap *t, const struct treap_path *p,
		  struct treap_node *n, const char *k, size_t kn)
{
  size_t i = p->nsteps;
  struct treap_node **nn, **uu, *u;
  unsigned wt;

  /* Fill in the node structure. */
  n->k = xstrndup(k, kn); n->kn = kn;
  n->wt = wt = rand(); n->left = n->right = 0;

  /* Prepare for the insertion.
   *
   * The path actually points to each of the links traversed when searching
   * for the node, starting with the `root' pointer, then the `left' or
   * `right' pointer of the root node, and so on; `nsteps' will always be
   * nonzero, since the path will always pass through the root, and the final
   * step, `path->path[path->nsteps - 1]' will always be the address of a
   * null pointer onto which the freshly inserted node could be hooked in
   * order to satisfy the binary-search-tree ordering.  (Of course, this will
   * likely /not/ satisfy the heap condition, so more work needs to be done.)
   *
   * Throughout, NN is our current candidate for where to attach the node N.
   * As the loop progresses, NN will ascend to links further up the tree, and
   * N will be adjusted to accumulate pieces of the existing tree structure.
   * We'll stop when we find that the parent node's weight is larger than our
   * new node's weight, at which point we can just set *NN = N; or if we run
   * out of steps in the path, in which case *NN is the root pointer.
   */
  assert(i); nn = p->path[--i];
  while (i--) {

    /* Collect the next step in the path, and get the pointer to the node. */
    uu = p->path[i]; u = *uu;

    /* If this node's weight is higher, then we've found the right level and
     * we can stop.
     */
    if (wt <= u->wt) break;

    /* The node U is lighter than our new node N, so we must rotate in order
     * to fix things.  If we were currently planning to hook N as the left
     * subtree of U, then we rotate like this:
     *
     *			|		    |
     *			U		   (N)
     *		      /   \		  /   \
     *		   (N)	    Z	--->    X       U
     *		  /   \			      /   \
     *		X       Y		    Y	    Z
     *
     * On the other hand, if we ere planning to hook N as the right subtree
     * of U, then we do the opposite rotation:
     *
     *		    |				|
     *		    U			       (N)
     *		  /   \			      /   \
     *		X      (N)	--->	    U	    Z
     *		      /   \		  /   \
     *		    Y	    Z		X	Y
     *
     * These transformations clearly preserve the ordering of nodes in the
     * binary search tree, and satisfy the heap condition in the subtree
     * headed by N.
     */
    if (nn == &u->left) { u->left = n->right; n->right = u; }
    else { u->right = n->left; n->left = u; }

    /* And this arrangement must be attached to UU, or some higher attachment
     * point.  The subtree satisfies the heap condition, and can be attached
     * safely at the selected place.
     */
    nn = uu;
  }

  /* We've found the right spot.  Hook the accumulated subtree into place. */
  *nn = n;
}

/* Remove the node with the KN-byte K from T.
 *
 * Return the address of the node we removed, or null if it couldn't be
 * found.
 */
void *treap_remove(struct treap *t, const char *k, size_t kn)
{
  struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;

  /* Search for the matching node, but keep track of the address of the link
   * which points to our target node.
   */
  while (*nn)
    if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
    else { candidate = nn; nn = &(*nn)->right; }

  /* If this isn't the right node then give up. */
  if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
    return (0);

  /* Now we need to disentangle the node from the tree.  This is essentially
   * the reverse of insertion: we pretend that this node is suddenly very
   * light, and mutate the tree so as to restore the heap condition until
   * eventually our node is a leaf and can be cut off without trouble.
   *
   * Throughout, the link *NN notionally points to N, but we don't actually
   * update it until we're certain what value it should finally take.
   */
  nn = candidate; n = *nn; l = n->left; r = n->right;
  for (;;)

    /* If its left subtree is empty then we can replace our node by its right
     * subtree and be done.  Similarly, if the right subtree is empty then we
     * replace the node by its left subtree.
     *
     *		    |		|		|		|
     *		   (N)	--->	R ;	       (N)	--->	L
     *		  /   \			      /   \
     *		*       R		    L	    *
     */
    if (!l) { *nn = r; break; }
    else if (!r) { *nn = l; break; }

    /* Otherwise we need to rotate the pointers so that the heavier of the
     * two children takes the place of our node; thus we have either
     *
     *			|		    |
     *		       (N)		    L
     *		      /   \		  /   \
     *		    L	    R	--->	X      (N)
     *		  /   \			      /   \
     *		X	Y		    Y	    R
     *
     * or
     *
     *		    |				|
     *		   (N)				R
     *		  /   \			      /   \
     *		L	R	--->	   (N)	    Y
     *		      /   \		  /   \
     *		    X	    Y		L       X
     *
     * Again, these transformations clearly preserve the ordering of nodes in
     * the binary search tree, and the heap condition.
     */
    else if (l->wt > r->wt)
      { *nn = l; nn = &l->right; l = n->left = l->right; }
    else
      { *nn = r; nn = &r->left; r = n->right = r->left; }

  /* Release the key buffer, and return the node that we've now detached. */
  free(n->k); return (n);
}

/* Initialize an iterator I over T's nodes. */
void treap_start_iter(struct treap *t, struct treap_iter *i)
{
  struct treap_node *n = t->root;
  unsigned sp = 0;

  /* The `stack' in the iterator structure is an empty ascending stack of
   * nodes which have been encountered, and their left subtrees investigated,
   * but not yet visited by the iteration.
   *
   * Iteration begins by stacking the root node, its left child, and so on,
   * At the end of this, the topmost entry on the stack is the least node of
   * the tree, followed by its parent, grandparent, and so on up to the root.
   */
  while (n) {
    assert(sp < TREAP_PATHMAX);
    i->stack[sp++] = n; n = n->left;
  }
  i->sp = sp;
}

/* Return the next node from I, in ascending order by key.
 *
 * If there are no more nodes, then return null.
 */
void *treap_next(struct treap_iter *i)
{
  struct treap_node *n, *o;
  unsigned sp = i->sp;

  /* We say that a node is /visited/ once it's been returned by this
   * iterator.  To traverse a tree in order, then, we traverse its left
   * subtree, visit the tree root, and traverse its right subtree -- which is
   * a fine recursive definition, but we need a nonrecursive implementation.
   *
   * As is usual in this kind of essential structural recursion, we maintain
   * a stack.  The invariant that we'll maintain is as follows.
   *
   *   1. If the stack is empty, then all nodes have been visited.
   *
   *   2, If the stack is nonempty then the topmost entry on the stack is the
   *	  least node which has not yet been visited -- and therefore is the
   *	  next node to visit.
   *
   *   3. The earlier entries in the stack are, in (top to bottom) order,
   *	  those of the topmost node's parent, grandparent, etc., up to the
   *	  root, which have not yet been visited.  More specifically, a node
   *	  appears in the stack if and only if some node in its left subtree
   *	  is nearer the top of the stack.
   *
   * When we initialized the iterator state (in `treap_start_iter' above), we
   * traced a path to the leftmost leaf, stacking the root, its left-hand
   * child, and so on.  The leftmost leaf is clearly the first node to be
   * visited, and its entire ancestry is on the stack since none of these
   * nodes has yet been visited.  (If the tree is empty, then we have done
   * nothing, the stack is empty, and there are no nodes to visit.)  This
   * establishes the base case for the induction.
   */

  /* So, if the stack is empty now, then (1) all of the nodes have been
   * visited and there's nothing left to do.  Return null.
   */
  if (!sp) return (0);

  /* It's clear that, if we pop the topmost element of the stack, visit it,
   * and arrange to reestablish the invariant, then we'll visit the nodes in
   * the correct order, pretty much by definition.
   *
   * So, pop a node off the stack.  This is the node we shall return.  But
   * before we can do that, we must reestablish the above invariant.
   * Firstly, the current node is removed from the stack, because we're about
   * to visit it, and visited nodes don't belong on the stack.  Then there
   * are two cases to consider.
   *
   *   * If the current node's right subtree is not empty, then the next node
   *	 to be visited is the leftmost node in that subtree.  All of the
   *	 nodes on the stack are ancestors of the current node, and the right
   *	 subtree consists of its descendants, so none of them are already on
   *	 the stack; and they're all greater than the current node, and
   *	 therefore haven't been visited.  Therefore, we must push the current
   *	 node's right child, its /left/ child, and so on, proceeding
   *	 leftwards until we fall off the bottom of the tree.
   *
   *   * Otherwise, we've finished traversing some subtree.  Either we are
   *	 now done, or (3) we have just finished traversing the left subtree
   *	 of the next topmost item on the stack.  This must therefore be the
   *	 next node to visit.  The rest of the stack is already correct.
   */
  n = i->stack[--sp];
  o = n->right;
  while (o) {
    assert(sp < TREAP_PATHMAX);
    i->stack[sp++] = o; o = o->left;
  }
  i->sp = sp;
  return (n);
}

/* Recursively check the subtree headed by N.
 *
 * No node should have weight greater than MAXWT, to satisfy the heap
 * condition; if LO is not null, then all node keys should be strictly
 * greater than LO, and, similarly, if HI is not null, then all keys should
 * be strictly smaller than HI.
 */
static void check_subtree(struct treap_node *n, unsigned maxwt,
			  const char *klo, const char *khi)
{
  /* Check the heap condition. */
  assert(n->wt <= maxwt);

  /* Check that the key is in bounds.  (Use `strcmp' here to ensure that our
   * own `str_lt' is working correctly.)
   */
  if (klo) assert(STRCMP(n->k, >, klo));
  if (khi) assert(STRCMP(n->k, <, khi));

  /* Check the left subtree.  Node weights must be bounded above by our own
   * weight.  And everykey in the left subtree must be smaller than our
   * current key.  We propagate the lower bound.
   */
  if (n->left) check_subtree(n->left, n->wt, klo, n->k);

  /* Finally, check the right subtree.  This time, every key must be larger
   * than our key, and we propagate the upper bound.
   */
  if (n->right) check_subtree(n->right, n->wt, n->k, khi);
}

/* Check the treap structure rules for T. */
void treap_check(struct treap *t)
  { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }

/* Recursively dump the subtree headed by N, indenting the output lines by
 * IND spaces.
 */
static void dump_node(struct treap_node *n, int ind)
{
  if (n->left) dump_node(n->left, ind + 1);
  printf(";;%*s [%10u] `%s'\n", 2*ind, "", n->wt, n->k);
  if (n->right) dump_node(n->right, ind + 1);
}

/* Dump the treap T to standard output, for debugging purposes. */
void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }

/*----- Configuration file parsing ----------------------------------------*/

#ifndef DECL_ENVIRON
  extern char **environ;
#endif

/* Advance P past a syntactically valid name, but no further than L.
 *
 * Return the new pointer.  If no name is found, report an error, blaming
 * FILE and LINE; WHAT is an adjective for the kind of name that was
 * expected.
 */
static const char *scan_name(const char *what,
			     const char *p, const char *l,
			     const char *file, unsigned line)
{
  const char *q = p;

  while (q < l &&
	 (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
			 *q == '*' || *q == '+' || *q == '%' || *q == '@'))
    q++;
  if (q == p) lose("%s:%u: expected %s name", file, line, what);
  return (q);
}

/* Initialize the configuration state CONF.
 *
 * Usually you'd use the static initializer `CONFIG_INIT'.
 */
void config_init(struct config *conf)
  { treap_init(&conf->sections); }

/* Find and return the section with null-terminated NAME in CONF.
 *
 * If no section is found, the behaviour depends on whether `CF_CREAT' is set
 * in F: if so, an empty section is created and returned; otherwise, a null
 * pointer is returned.
 */
struct config_section *config_find_section(struct config *conf, unsigned f,
					   const char *name)
  { return (config_find_section_n(conf, f, name, strlen(name))); }

/* Find and return the section with the SZ-byte NAME in CONF.
 *
 * This works like `config_find_section', but with an explicit length for the
 * NAME rather than null-termination.
 */
struct config_section *config_find_section_n(struct config *conf, unsigned f,
					     const char *name, size_t sz)
{
  struct config_section *sect;
  struct treap_path path;

  if (!(f&CF_CREAT))
    sect = treap_lookup(&conf->sections, name, sz);
  else {
    sect = treap_probe(&conf->sections, name, sz, &path);
    if (!sect) {
      sect = xmalloc(sizeof(*sect));
      if (!conf->head) conf->tail = &conf->head;
      sect->next = 0; *conf->tail = sect; conf->tail = &sect->next;
      sect->parents = 0; sect->nparents = SIZE_MAX;
      treap_init(&sect->vars); treap_init(&sect->cache);
      treap_insert(&conf->sections, &path, &sect->_node, name, sz);
      config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
    }
  }
  return (sect);
}

/* Set the fallback section for CONF to be SECT.
 *
 * That is, if a section has no explicit parents, then by default it will
 * have a single parent which is SECT.  If SECT is null then there is no
 * fallback section, and sections which don't have explicitly specified
 * parents have no parents at all.  (This is the default situation.)
 */
void config_set_fallback(struct config *conf, struct config_section *sect)
  { conf->fallback = sect; }

/* Arrange that SECT has PARENT as its single parent section.
 *
 * If PARENT is null, then arrange that SECT has no parents at all.  In
 * either case, any `@parents' setting will be ignored.
 */
void config_set_parent(struct config_section *sect,
		       struct config_section *parent)
{
  if (!parent)
    sect->nparents = 0;
  else {
    sect->parents = xmalloc(sizeof(*sect->parents));
    sect->parents[0] = parent; sect->nparents = 1;
  }
}

/* Initialize I to iterate over the sections defined in CONF. */
void config_start_section_iter(struct config *conf,
			       struct config_section_iter *i)
  { i->sect = conf->head; }

/* Return the next section from I, in order of creation.
 *
 * If there are no more sections, then return null.
 */
struct config_section *config_next_section(struct config_section_iter *i)
{
  struct config_section *sect;

  sect = i->sect;
  if (sect) i->sect = sect->next;
  return (sect);
}

/* Initialize the `parents' links of SECT, if they aren't set up already.
 *
 * If SECT contains a `@parents' setting then parse it to determine the
 * parents; otherwise use CONF's fallbeck section, as established by
 * `config_set_fallback'.
 */
static void set_config_section_parents(struct config *conf,
				       struct config_section *sect)
{
  struct config_section *parent;
  struct config_var *var;
  const char *file; unsigned line;
  size_t i, n;
  char *p, *q, *l;
  struct argv av = ARGV_INIT;

  /* If the section already has parents established then there's nothing to
   * do.
   */
  if (sect->nparents != SIZE_MAX) return;

  /* Look up `@parents', without recursion! */
  var = treap_lookup(&sect->vars, "@parents", 8);
  if (!var) {
    /* No explicit setting: use the fallback setting. */

    if (!conf->fallback || conf->fallback == sect)
      sect->nparents = 0;
    else {
      sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
      sect->parents[0] = conf->fallback;
    }
  } else {
    /* Found a `@parents' list: parse it and set the parents list. */

    file = var->file; line = var->line; if (!file) file = "<internal>";

    /* We do this in two phases.  First, we parse out the section names, and
     * record start/limit pointer pairs in `av'.
     */
    p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
    while (*p) {
      q = p;
      p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
      argv_append(&av, q); argv_append(&av, p);
      while (p < l && ISSPACE(*p)) p++;
      if (p >= l) break;
      if (*p == ',') do p++; while (ISSPACE(*p));
    }

    /* Now that we've finished parsing, we know how many parents we're going
     * to have, so we can allocate the `parents' vector and fill it in.
     */
    sect->nparents = av.n/2;
    sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
    for (i = 0; i < av.n; i += 2) {
      n = av.v[i + 1] - av.v[i];
      parent = config_find_section_n(conf, 0, av.v[i], n);
      if (!parent)
	lose("%s:%u: unknown parent section `%.*s'",
	     file, line, (int)n, av.v[i]);
      sect->parents[i/2] = parent;
    }
  }

  /* All done. */
  argv_release(&av);
}

/* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
 *
 * If successful, return a pointer to the variable; otherwise return null.
 * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
 * errors.
 */
struct config_var *search_recursive(struct config *conf,
				    struct config_section *sect,
				    const char *name, size_t sz)
{
  struct config_cache_entry *cache;
  struct treap_path path;
  struct config_var *var, *v;
  size_t i, j = j;

  /* If the variable is defined locally then we can just return it. */
  var = treap_lookup(&sect->vars, name, sz); if (var) return (var);

  /* If we have no parents then there's no way we can find it. */
  set_config_section_parents(conf, sect);
  if (!sect->parents) return (0);

  /* Otherwise we must visit the section's parents.  We can avoid paying for
   * this on every lookup by using a cache.  If there's already an entry for
   * this variable then we can return the result immediately (note that we
   * cache both positive and negative outcomes).  Otherwise we create a new
   * cache entry, do the full recursive search, and fill in the result when
   * we're done.
   *
   * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
   * new cache entry when it's first created, and clear it when we fill in
   * the result: if we encounter an open cache entry again, we know that
   * we've found a cycle.
   */
  cache = treap_probe(&sect->cache, name, sz, &path);
  if (!cache) {
    cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
    treap_insert(&sect->cache, &path, &cache->_node, name, sz);
  } else if (cache->f&CF_OPEN)
    lose("inheritance cycle through section `%s'",
	 CONFIG_SECTION_NAME(sect));
  else
    return (cache->var);

  /* Recursively search in each parent.  We insist that all parents that find
   * a variable find the same binding; otherwise we declare ambiguous
   * inheritance.
   */
  for (i = 0; i < sect->nparents; i++) {
    v = search_recursive(conf, sect->parents[i], name, sz);
    if (!v);
    else if (!var) { var = v; j = i; }
    else if (var != v)
      lose("section `%s' inherits variable `%s' ambiguously "
	   "via `%s' and `%s'",
	   CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
	   CONFIG_SECTION_NAME(sect->parents[j]),
	   CONFIG_SECTION_NAME(sect->parents[i]));
  }

  /* All done: fill the cache entry in, clear the open flag, and return the
   * result.
   */
  cache->var = var; cache->f &= ~CF_OPEN;
  return (var);
}

/* Find and return the variable with null-terminated NAME in SECT.
 *
 * If `CF_INHERIT' is set in F, then the function searches the section's
 * parents recursively; otherwise, it only checks to see whether the variable
 * is set directly in SECT.
 *
 * If no variable is found, the behaviour depends on whether `CF_CREAT' is
 * set in F: if so, an empty variable is created and returned; otherwise, a
 * null pointer is returned.
 *
 * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
 */
struct config_var *config_find_var(struct config *conf,
				   struct config_section *sect,
				   unsigned f, const char *name)
  { return (config_find_var_n(conf, sect, f, name, strlen(name))); }

/* Find and return the variable with the given SZ-byte NAME in SECT.
 *
 * This works like `config_find_var', but with an explicit length for the
 * NAME rather than null-termination.
 */
struct config_var *config_find_var_n(struct config *conf,
				     struct config_section *sect,
				     unsigned f, const char *name, size_t sz)
{
  struct config_var *var;
  struct treap_path path;

  if (f&CF_INHERIT)
    var = search_recursive(conf, sect, name, sz);
  else if (!(f&CF_CREAT))
    var = treap_lookup(&sect->vars, name, sz);
  else {
    var = treap_probe(&sect->vars, name, sz, &path);
    if (!var) {
      var = xmalloc(sizeof(*var));
      var->val = 0; var->file = 0; var->f = 0; var->line = 1;
      treap_insert(&sect->vars, &path, &var->_node, name, sz);
    }
  }
  return (var);
}

/* Set variable NAME to VALUE in SECT, with associated flags F.
 *
 * The names are null-terminated.  The flags are variable flags: see `struct
 * config_var' for details.  Returns the variable.
 *
 * If the variable is already set and has the `CF_OVERRIDE' flag, then this
 * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
 */
struct config_var *config_set_var(struct config *conf,
				  struct config_section *sect,
				  unsigned f,
				  const char *name, const char *value)
{
  return (config_set_var_n(conf, sect, f,
			   name, strlen(name),
			   value, strlen(value)));
}

/* As `config_set_var', except that the variable NAME and VALUE have explicit
 * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
 * terminated.
 */
struct config_var *config_set_var_n(struct config *conf,
				    struct config_section *sect,
				    unsigned f,
				    const char *name, size_t namelen,
				    const char *value, size_t valuelen)
{
  struct config_var *var =
    config_find_var_n(conf, sect, CF_CREAT, name, namelen);

  if (var->f&~f&CF_OVERRIDE) return (var);
  free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
  var->f = f;
  return (var);
}

/* Initialize I to iterate over the variables directly defined in SECT. */
void config_start_var_iter(struct config *conf, struct config_section *sect,
			   struct config_var_iter *i)
  { treap_start_iter(&sect->vars, &i->i); }

/* Return next variable from I, in ascending lexicographical order.
 *
 * If there are no more variables, then return null.
 */
struct config_var *config_next_var(struct config_var_iter *i)
  { return (treap_next(&i->i)); }

/* Read and parse configuration FILE, applying its settings to CONF.
 *
 * If all goes well, the function returns 0.  If the file is not found, then
 * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
 * function simply returns -1.  Otherwise, a fatal error is reported.  Note
 * that this /only/ applies if the file does not exist (specifically, opening
 * it fails with `ENOENT') -- any other problems are reported as fatal
 * errors regardless of the flag setting.
 */
int config_read_file(struct config *conf, const char *file, unsigned f)
{
  struct config_section *sect;
  struct config_var *var;
  struct dstr d = DSTR_INIT, dd = DSTR_INIT;
  unsigned line = 0;
  const char *p, *q, *r;
  FILE *fp;

  /* Try to open the file. */
  fp = fopen(file, "r");
  if (!fp) {
    if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
    lose("failed to open configuration file `%s': %s",
	 file, strerror(errno));
  }

  /* Find the initial section. */
  sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;

  /* Work through the file, line by line. */
  for (;;) {
    dstr_reset(&d); if (dstr_readline(&d, fp)) break;
    line++;

    /* Trim trailing spaces from the line.  The syntax is sensitive to
     * leading spaces, so we can't trim those yet.
     */
    while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
    d.p[d.len] = 0;

    if (!*d.p || *d.p == ';')
      /* Ignore comments entirely.  (In particular, a comment doesn't
       * interrupt a multiline variable value.)
       */
      ;

    else if (ISSPACE(d.p[0])) {
      /* The line starts with whitespace, so it's a continuation line. */

      /* Skip the initial whitespace. */
      p = d.p; while (ISSPACE(*p)) p++;

      /* If we aren't collecting a variable value then this is an error.
       * Otherwise, accumulate it into the current value.
       */
      if (!var)
	lose("%s:%u: continuation line, but no variable", file, line);
      if (dd.len) dstr_putc(&dd, ' ');
      dstr_putm(&dd, p, d.len - (p - d.p));

    } else {
      /* The line starts in the first column. */

      /* If there's a value value being collected then we must commit it to
       * its variable (unless there's already a setting there that says we
       * shouldn't).
       */
      if (var) {
	if (!(var->f&CF_OVERRIDE))
	  { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
	var = 0;
      }

      /* Now decide what kind of line this is. */
      if (d.p[0] == '[') {
	/* It's a section header. */

	/* Parse the header. */
	p = d.p + 1; while (ISSPACE(*p)) p++;
	q = scan_name("section", p, d.p + d.len, file, line);
	r = q; while (ISSPACE(*r)) r++;
	if (*r != ']')
	  lose("%s:%u: expected `]' in section header", file, line);
	if (r[1])
	  lose("%s:%u: trailing junk after `]' in section header",
	       file, line);

	/* Create the new section. */
	sect = config_find_section_n(conf, CF_CREAT, p, q - p);

      } else {
	/* It's a variable assignment.  Parse the name out. */
	p = scan_name("variable", d.p, d.p + d.len, file, line);
	var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
	while (ISSPACE(*p)) p++;
	if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
	p++; while (ISSPACE(*p)) p++;

	/* Clear out the variable's initial value, unless we shouldn't
	 * override it.
	 */
	if (!(var->f&CF_OVERRIDE)) {
	  free(var->val); var->val = 0; var->f = 0;
	  free(var->file); var->file = xstrdup(file); var->line = line;
	}
	dstr_reset(&dd); dstr_puts(&dd, p);
      }
    }
  }

  /* If there's a value under construction then commit the result. */
  if (var && !(var->f&CF_OVERRIDE))
    { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }

  /* Close the file. */
  if (fclose(fp))
    lose("error reading configuration file `%s': %s", file, strerror(errno));

  /* All done. */
  dstr_release(&d); dstr_release(&dd);
  return (0);
}

/* Populate SECT with environment variables.
 *
 * Environment variables are always set with `CF_LITERAL'.
 */
void config_read_env(struct config *conf, struct config_section *sect)
{
  const char *p, *v;
  size_t i;

  for (i = 0; (p = environ[i]) != 0; i++) {
    v = strchr(p, '='); if (!v) continue;
    config_set_var_n(conf, sect, CF_LITERAL, p, v - p, v + 1, strlen(v + 1));
  }
}

/*----- Substitution and quoting ------------------------------------------*/

/* The substitution and word-splitting state.
 *
 * This only keeps track of the immutable parameters for the substitution
 * task: stuff which changes (flags, filtering state, cursor position) is
 *	 maintained separately.
 */
struct subst {
  struct config *config;		/* configuration state */
  struct config_section *home;		/* home section for lookups */
  struct dstr *d;			/* current word being constructed */
  struct argv *av;			/* output word list */
};

/* Flags for `subst' and related functions. */
#define SF_SPLIT 0x0001u		/* split at (unquoted) whitespace */
#define SF_QUOT 0x0002u			/* currently within double quotes */
#define SF_SUBST 0x0004u		/* apply `$-substitutions */
#define SF_SUBEXPR 0x0008u		/* stop at delimiter `|' or `}' */
#define SF_SPANMASK 0x00ffu		/* mask for the above */

#define SF_WORD 0x0100u			/* output word under construction */
#define SF_SKIP 0x0200u			/* not producing output */
#define SF_LITERAL 0x0400u		/* do not expand or substitute */
#define SF_UPCASE 0x0800u		/* convert to uppercase */
#define SF_DOWNCASE 0x1000u		/* convert to lowercase */
#define SF_CASEMASK 0x1800u		/* mask for case conversions */

/* Apply filters encoded in QFILT and F to the text from P to L, and output.
 *
 * SB is the substitution state which, in particular, explains where the
 * output should go.
 *
 * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
 * conversions, and a nesting depth QFILT for toothpick escaping.  (QFILT is
 * encoded as the number of toothpicks to print: see `subst' for how this
 * determined.)
 */
static void filter_string(const char *p, const char *l,
			  const struct subst *sb, unsigned qfilt, unsigned f)
{
  size_t r, n;
  char *q; const char *pp, *ll;

  if (!qfilt && !(f&SF_CASEMASK))
    /* Fast path: there's nothing to do: just write to the output. */
    dstr_putm(sb->d, p, l - p);

  else for (;;) {
    /* We must be a bit more circumspect. */

    /* Determine the length of the next span of characters which don't need
     * escaping.  (If QFILT is zero then this is everything.)
     */
    r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
    if (n > r) n = r;

    if (!(f&SF_CASEMASK))
      /* No case conversion: we can just emit this chunk. */

      dstr_putm(sb->d, p, n);

    else {
      /* Case conversion to do.  Arrange enough space for the output, and
       * convert it character by character.
       */

      dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
      if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
      else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
      sb->d->len += n;
    }

    /* If we've reached the end then stop. */
    if (n >= r) break;

    /* Otherwise we must have found a character which requires escaping.
     * Emit enough toothpicks.
     */
    dstr_putcn(sb->d, '\\', qfilt);

    /* This character is now done, so we can skip over and see if there's
     * another chunk of stuff we can do at high speed.
     */
    dstr_putc(sb->d, p[n]); p += n + 1;
  }
}

/* Scan and resolve a `[SECT:]VAR' specifier at P.
 *
 * Return the address of the next character following the specifier.  L is a
 * limit on the region of the buffer that we should process; SB is the
 * substitution state which provides the home section if none is given
 * explicitly; FILE and LINE are the source location to blame for problems.
 */
static const char *retrieve_varspec(const char *p, const char *l,
				    const struct subst *sb,
				    struct config_var **var_out,
				    const char *file, unsigned line)
{
  struct config_section *sect = sb->home;
  const char *t;

  t = scan_name("section or variable", p, l, file, line);
  if (t < l && *t == ':') {
    sect = config_find_section_n(sb->config, 0, p, t - p);
    p = t + 1; t = scan_name("variable", p, l, file, line);
  }

  if (!sect) *var_out = 0;
  else *var_out = config_find_var_n(sb->config, sect, CF_INHERIT, p, t - p);
  return (t);
}

/* Substitute and/or word-split text.
 *
 * The input text starts at P, and continues to (just before) L.  Context for
 * the task is provided by SB; the source location to blame is FILE and LINE
 * (FILE may be null so that this can be passed directly from a `config_var'
 * without further checking); QFILT is the nesting depth in toothpick-
 * escaping; and F holds a mask of `SF_...' flags.
 */
static const char *subst(const char *p, const char *l,
			 const struct subst *sb,
			 const char *file, unsigned line,
			 unsigned qfilt, unsigned f)
{
  struct config_var *var;
  const char *q0, *q1, *t;
  unsigned subqfilt, ff;
  size_t n;

  /* It would be best if we could process literal text at high speed.  To
   * this end, we have a table, indexed by the low-order bits of F, to tell
   * us which special characters we need to stop at.  This way, we can use
   * `strcspn' to skip over literal text and stop at the next character which
   * needs special handling.  Entries in this table with a null pointer
   * correspond to impossible flag settings.
   */
  static const char *const delimtab[] = {

#define ESCAPE "\\"			/* always watch for `\'-escapes */
#define SUBST "$"			/* check for `$' if `SF_SUBST' set */
#define WORDSEP " \f\r\n\t\v'\""	/* space, quotes if `SF_SPLIT' but
					 * not `SF_QUOT' */
#define QUOT "\""			/* only quotes if `SF_SPLIT' and
					 * `SF_QUOT' */
#define DELIM "|}"			/* end delimiters of `SF_SUBEXPR' */

    ESCAPE,
    ESCAPE	       WORDSEP,
    0,
    ESCAPE	       QUOT,
    ESCAPE	 SUBST,
    ESCAPE	 SUBST WORDSEP,
    0,
    ESCAPE	 SUBST QUOT,
    ESCAPE DELIM,
    ESCAPE DELIM       WORDSEP,
    0,
    ESCAPE DELIM       QUOT,
    ESCAPE DELIM SUBST,
    ESCAPE DELIM SUBST WORDSEP,
    0,
    ESCAPE DELIM SUBST QUOT

#undef COMMON
#undef WORDSEP
#undef SQUOT
#undef DELIM
  };

  /* Set FILE to be useful if it was null on entry. */
  if (!file) file = "<internal>";

  /* If the text is literal then hand off to `filter_string'.  This obviously
   * starts a word.
   */
  if (f&SF_LITERAL) {
    filter_string(p, l, sb, qfilt, f);
    f |= SF_WORD;
    goto done;
  }

  /* Chew through the input until it's all gone. */
  while (p < l) {

    if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
      /* This is whitespace, we're supposed to split, and we're not within
       * quotes, so we should split here.
       */

      /* If there's a word in progress then we should commit it. */
      if (f&SF_WORD) {
	if (!(f&SF_SKIP)) {
	  argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
	  dstr_reset(sb->d);
	}
	f &= ~SF_WORD;
      }

      /* Skip over further whitespace at high speed. */
      do p++; while (p < l && ISSPACE(*p));

    } else if (*p == '\\') {
      /* This is a toothpick, so start a new word and add the next character
       * to it.
       */

      /* If there's no next charact3er then we should be upset. */
      p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);

      if (!(f&SF_SKIP)) {

	/* If this is a double quote or backslash then check DFLT to see if
	 * it needs escaping.
	 */
	if (qfilt && (*p == '"' || *p == '\\'))
	  dstr_putcn(sb->d, '\\', qfilt);

	/* Output the character. */
	if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
	else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
	else dstr_putc(sb->d, *p);
      }

      /* Move past the escaped character.  Remember we started a word. */
      p++; f |= SF_WORD;

    } else if ((f&SF_SPLIT) && *p == '"') {
      /* This is a double quote, and we're word splitting.  We're definitely
       * in a word now.  Toggle whether we're within quotes.
       */

      f ^= SF_QUOT; f |= SF_WORD; p++;

    } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
      /* This is a single quote, and we're word splitting but not within
       * double quotes.  Find the matching end quote, and just output
       * everything between literally.
       */

      p++; t = strchr(p, '\'');
      if (!t || t >= l) lose("%s:%u: missing `''", file, line);
      if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
      p = t + 1; f |= SF_WORD;

    } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
      /* This is an end delimiter, and we're supposed to stop here. */
      break;

    } else if ((f&SF_SUBST) && *p == '$') {
      /* This is a `$' and we're supposed to do substitution. */

      /* The kind of substitution is determined by the next character. */
      p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);

      /* Prepare flags for a recursive substitution.
       *
       * Hide our quote state from the recursive call.  If we're within a
       * word, then disable word-splitting.
       */
      ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));

      /* Now dispatch based on the following character. */
      switch (*p) {

	case '?':
	  /* A conditional expression: $?VAR{CONSEQ[|ALT]} */

	  /* Skip initial space. */
	  p++; while (p < l && ISSPACE(*p)) p++;

	  /* Find the variable. */
	  p = retrieve_varspec(p, l, sb, &var, file, line);

	  /* Skip whitespace again. */
	  while (p < l && ISSPACE(*p)) p++;

	  /* Expect the opening `{'. */
	  if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
	  p++;

	  /* We'll process the parts recursively, but we need to come back
	   * when we hit the appropriate delimiters, so arrange for that.
	   */
	  ff |= SF_SUBEXPR;

	  /* Process the consequent (skip if the variable wasn't found). */
	  p = subst(p, l, sb, file, line, qfilt,
		    ff | (var ? 0 : SF_SKIP));

	  /* If there's a `|' then process the alternative too (skip if the
	   * variable /was/ found).
	   */
	  if (p < l && *p == '|')
	    p = subst(p + 1, l, sb, file, line, qfilt,
		      ff | (var ? SF_SKIP : 0));

	  /* We should now be past the closing `}'. */
	  if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
	  p++;
	  break;

	case '{':
	  /* A variable substitution: ${VAR[|FILT]...[?ALT]} */

	  /* Skip initial whitespace. */
	  p++; while (p < l && ISSPACE(*p)) p++;

	  /* Find the variable. */
	  q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;

	  /* Determine the filters to apply when substituting the variable
	   * value.
	   */
	  subqfilt = qfilt;
	  for (;;) {

	    /* Skip spaces again. */
	    while (p < l && ISSPACE(*p)) p++;

	    /* If there's no `|' then there are no more filters, so stop. */
	    if (p >= l || *p != '|') break;

	    /* Skip the `|' and more spaces. */
	    p++; while (p < l && ISSPACE(*p)) p++;

	    /* Collect the filter name. */
	    t = scan_name("filter", p, l, file, line);

	    /* Dispatch on the filter name. */
	    if (t - p == 1 && *p == 'q')
	      /* `q' -- quote for Lisp string.
	       *
	       * We're currently adding Q `\' characters before each naughty
	       * character.  But a backslash itself is naughty too, so that
	       * makes Q + 1 naughty characters, each of which needs a
	       * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
	       *
	       * Calculate this here rather than at each point toothpicks
	       * needs to be deployed.
	       */

	      subqfilt = 2*subqfilt + 1;

	    else if (t - p == 1 && *p == 'l')
	      /* `u' -- convert to uppercase.
	       *
	       * If a case conversion is already set, then that will override
	       * whatever we do here, so don't bother.
	       */

	      { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }

	    else if (t - p == 1 && *p == 'u')
	      /* `u' -- convert to uppercase.
	       *
	       * If a case conversion is already set, then that will override
	       * whatever we do here, so don't bother.
	       */
	      { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }

	    else
	      /* Something else we didn't understand. */
	      lose("%s:%u: unknown filter `%.*s'",
		   file, line, (int)(t - p), p);

	    /* Continue from after the filter name. */
	    p = t;
	  }

	  /* If we're not skipping, and we found a variable, then substitute
	   * its value.  This is the point where we need to be careful about
	   * recursive expansion.
	   */
	  if (!(f&SF_SKIP) && var) {
	    if (var->f&CF_EXPAND)
	      lose("%s:%u: recursive expansion of variable `%.*s'",
		   file, line, (int)(q1 - q0), q0);
	    var->f |= CF_EXPAND;
	    subst(var->val, var->val + var->n, sb,
		  var->file, var->line, subqfilt,
		  ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
	    var->f &= ~CF_EXPAND;
	  }

	  /* If there's an alternative, then we need to process (or maybe
	   * skip) it.  Otherwise, we should complain if there was no
	   * veriable, and we're not skipping.
	   */
	  if (p < l && *p == '?')
	    p = subst(p + 1, l, sb, file, line, subqfilt,
		      ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
	  else if (!var && !(f&SF_SKIP))
	    lose("%s:%u: unknown variable `%.*s'",
		 file, line, (int)(q1 - q0), q0);

	  /* Expect a `}' here.  (No need to skip spaces: we already did that
	   * after scanning for filters, and either there was no alternative,
	   * or we advanced to a delimiter character anyway.)
	   */
	  if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
	  p++;
	  break;

	default:
	  /* Something else.  That's a shame. */
	  lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
      }

      /* Complain if we started out in word-splitting state, and therefore
       * have added a whole number of words to the output, but there's a
       * word-fragment stuck onto the end of this substitution.
       */
      if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
	  !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
	lose("%s:%u: surprising word boundary "
	     "after splicing substitution",
	     file, line);
    }

    else {
      /* Something else.  Try to skip over this at high speed.
       *
       * This makes use of the table we set up earlier.
       */

      n = strcspn(p, delimtab[f&SF_SPANMASK]);
      if (n > l - p) n = l - p;
      if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
      p += n; f |= SF_WORD;
    }
  }

done:
  /* Sort out the wreckage. */

  /* If we're still within quotes then something has gone wrong. */
  if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);

  /* If we're within a word, and should be splitting, then commit the word to
   * the output list.
   */
  if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
    argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
    dstr_reset(sb->d);
  }

  /* And, with that, we're done. */
  return (p);
}

/* Expand substitutions in a string.
 *
 * Expand the null-terminated string P relative to the HOME section, using
 * configuration CONFIG, and appending the result to dynamic string D.  Blame
 * WHAT in any error messages.
 */
void config_subst_string(struct config *config, struct config_section *home,
			 const char *what, const char *p, struct dstr *d)
{
  struct subst sb;

  sb.config = config; sb.home = home; sb.d = d;
  subst(p, p + strlen(p), &sb, what, 0, 0, SF_SUBST);
  dstr_putz(d);
}

/* Expand substitutions in a string.
 *
 * Expand the null-terminated string P relative to the HOME section, using
 * configuration CONFIG, returning the result as a freshly malloc(3)ed
 * string.  Blame WHAT in any error messages.
 */
char *config_subst_string_alloc(struct config *config,
				struct config_section *home,
				const char *what, const char *p)
{
  struct dstr d = DSTR_INIT;
  char *q;

  config_subst_string(config, home, what, p, &d);
  q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
}

/* Expand substitutions in a variable.
 *
 * Expand the value of the variable VAR relative to the HOME section, using
 * configuration CONFIG, appending the result to dynamic string D.
 */
void config_subst_var(struct config *config, struct config_section *home,
		      struct config_var *var, struct dstr *d)
{
  struct subst sb;

  sb.config = config; sb.home = home; sb.d = d;
  var->f |= CF_EXPAND;
  subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
	SF_SUBST | (var->f&CF_LITERAL ? SF_LITERAL : 0));
  var->f &= ~CF_EXPAND;
  dstr_putz(d);
}

/* Expand substitutions in a variable.
 *
 * Expand the value of the variable VAR relative to the HOME section, using
 * configuration CONFIG, returning the result as a freshly malloc(3)ed
 * string.
 */
char *config_subst_var_alloc(struct config *config,
			     struct config_section *home,
			     struct config_var *var)
{
  struct dstr d = DSTR_INIT;
  char *q;

  config_subst_var(config, home, var, &d);
  q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
}

/* Expand substitutions in a variable and split into words.
 *
 * Expand and word-split the value of the variable VAR relative to the HOME
 * section, using configuration CONFIG, appending the resulting words into
 * the vector AV.
 */
void config_subst_split_var(struct config *config,
			    struct config_section *home,
			    struct config_var *var, struct argv *av)
{
  struct dstr d = DSTR_INIT;
  struct subst sb;

  sb.config = config; sb.home = home; sb.av = av; sb.d = &d;
  var->f |= CF_EXPAND;
  subst(var->val, var->val + var->n, &sb, var->file, var->line, 0,
	SF_SUBST | SF_SPLIT | (var->f&CF_LITERAL ? SF_LITERAL : 0));
  var->f &= ~CF_EXPAND;
  dstr_release(&d);
}

/*----- That's all, folks -------------------------------------------------*/