X-Git-Url: https://git.distorted.org.uk/~mdw/runlisp/blobdiff_plain/7b8ff279e7304e41b243459d78c3b6703bb8c3f5..47af3df3850a1a17ee706b278868309d40bacd55:/lib.c

diff --git a/lib.c b/lib.c
index 104eebd..b509cea 100644
--- a/lib.c
+++ b/lib.c
@@ -40,26 +40,23 @@
 
 #include "lib.h"
 
-/*----- Miscellany --------------------------------------------------------*/
-
-int str_lt(const char *a, size_t an, const char *b, size_t bn)
-{
-  if (an < bn) return (MEMCMP(a, <=, b, an));
-  else return (MEMCMP(a, <, b, bn));
-}
-
 /*----- Diagnostic utilities ----------------------------------------------*/
 
 const char *progname = "???";
+	/* Our program name, for use in error messages. */
 
+/* Set `progname' from the pathname in PROG (typically from `argv[0]'). */
 void set_progname(const char *prog)
 {
   const char *p;
 
   p = strrchr(prog, '/');
-  progname = p ? p + 1 : progname;
+  progname = p ? p + 1 : prog;
 }
 
+/* Report an error or warning in Unix style, given a captured argument
+ * cursor.
+ */
 void vmoan(const char *msg, va_list ap)
 {
   fprintf(stderr, "%s: ", progname);
@@ -67,14 +64,21 @@ void vmoan(const char *msg, va_list ap)
   fputc('\n', stderr);
 }
 
+/* Issue a warning message. */
 void moan(const char *msg, ...)
   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); }
 
+/* Issue a fatal error message and exit unsuccessfully. */
 void lose(const char *msg, ...)
   { va_list ap; va_start(ap, msg); vmoan(msg, ap); va_end(ap); exit(127); }
 
 /*----- Memory allocation -------------------------------------------------*/
 
+/* Allocate and return a pointer to N bytes, or report a fatal error.
+ *
+ * Release the pointer using `free' as usual.  If N is zero, returns null
+ * (but you are not expected to check for this).
+ */
 void *xmalloc(size_t n)
 {
   void *p;
@@ -84,6 +88,13 @@ void *xmalloc(size_t n)
   return (p);
 }
 
+/* Resize the block at P (from `malloc' or `xmalloc') to be N bytes long.
+ *
+ * The block might (and probably will) move, so it returns the new address.
+ * If N is zero, then the block is freed (if necessary) and a null pointer
+ * returned; otherwise, if P is null then a fresh block is allocated.  If
+ * allocation fails, then a fatal error is reported.
+ */
 void *xrealloc(void *p, size_t n)
 {
   if (!n) { free(p); return (0); }
@@ -92,6 +103,11 @@ void *xrealloc(void *p, size_t n)
   return (p);
 }
 
+/* Allocate and return a copy of the N-byte string starting at P.
+ *
+ * The new string is null-terminated, though P need not be.  If allocation
+ * fails, then a fatal error is reported.
+ */
 char *xstrndup(const char *p, size_t n)
 {
   char *q = xmalloc(n + 1);
@@ -100,14 +116,24 @@ char *xstrndup(const char *p, size_t n)
   return (q);
 }
 
+/* Allocate and return a copy of the null-terminated string starting at P.
+ *
+ * If allocation fails, then a fatal error is reported.
+ */
 char *xstrdup(const char *p) { return (xstrndup(p, strlen(p))); }
 
 /*----- Dynamic strings ---------------------------------------------------*/
 
+/* Initialize the string D.
+ *
+ * Usually you'd use the static initializer `DSTR_INIT'.
+ */
 void dstr_init(struct dstr *d) { d->p = 0; d->len = d->sz = 0; }
 
+/* Reset string D so it's empty again. */
 void dstr_reset(struct dstr *d) { d->len = 0; }
 
+/* Ensure that D has at least N unused bytes available. */
 void dstr_ensure(struct dstr *d, size_t n)
 {
   size_t need = d->len + n, newsz;
@@ -118,11 +144,25 @@ void dstr_ensure(struct dstr *d, size_t n)
   d->p = xrealloc(d->p, newsz); d->sz = newsz;
 }
 
+/* Release the memory held by D.
+ *
+ * It must be reinitialized (e.g., by `dstr_init') before it can be used
+ * again.
+ */
 void dstr_release(struct dstr *d) { free(d->p); }
 
+/* Append the N-byte string at P to D.
+ *
+ * P need not be null-terminated.  D will not be null-terminated
+ * afterwards.
+ */
 void dstr_putm(struct dstr *d, const void *p, size_t n)
   { dstr_ensure(d, n); memcpy(d->p + d->len, p, n); d->len += n; }
 
+/* Append the null-terminated string P to D.
+ *
+ * D /is/ guaranteed to be null-terminated after this.
+ */
 void dstr_puts(struct dstr *d, const char *p)
 {
   size_t n = strlen(p);
@@ -132,15 +172,33 @@ void dstr_puts(struct dstr *d, const char *p)
   d->len += n;
 }
 
+/* Append the single character CH to D.
+ *
+ * D will not be null-terminated afterwards.
+ */
 void dstr_putc(struct dstr *d, int ch)
   { dstr_ensure(d, 1); d->p[d->len++] = ch; }
 
+/* Append N copies of the character CH to D.
+ *
+ * D will not be null-terminated afterwards.
+ */
 void dstr_putcn(struct dstr *d, int ch, size_t n)
   { dstr_ensure(d, n); memset(d->p + d->len, ch, n); d->len += n; }
 
+/* Null-terminate the string D.
+ *
+ * This doesn't change the length of D.  If further stuff is appended then
+ * the null terminator will be overwritten.
+ */
 void dstr_putz(struct dstr *d)
   { dstr_ensure(d, 1); d->p[d->len] = 0; }
 
+/* Append stuff to D, determined by printf(3) format string P and argument
+ * tail AP.
+ *
+ * D will not be null-terminated afterwards.
+ */
 void dstr_vputf(struct dstr *d, const char *p, va_list ap)
 {
   va_list ap2;
@@ -158,9 +216,20 @@ void dstr_vputf(struct dstr *d, const char *p, va_list ap)
   d->len += n;
 }
 
+/* Append stuff to D, determined by printf(3) format string P and arguments.
+ *
+ * D will not be null-terminated afterwards.
+ */
 PRINTF_LIKE(2, 3) void dstr_putf(struct dstr *d, const char *p, ...)
   { va_list ap; va_start(ap, p); dstr_vputf(d, p, ap); va_end(ap); }
 
+/* Append the next input line from FP to D.
+ *
+ * Return 0 on success, or -1 if reading immediately fails or encounters
+ * end-of-file (call ferror(3) to distinguish).  Any trailing newline is
+ * discarded: it is not possible to determine whether the last line was ended
+ * with a newline.  D is guaranteed to be null-terminated afterwards.
+ */
 int dstr_readline(struct dstr *d, FILE *fp)
 {
   size_t n;
@@ -180,11 +249,17 @@ int dstr_readline(struct dstr *d, FILE *fp)
 
 /*----- Dynamic vectors of strings ----------------------------------------*/
 
+/* Initialize the vector AV.
+ *
+ * Usually you'd use the static initializer `ARGV_INIT'.
+ */
 void argv_init(struct argv *av)
   { av->v = 0; av->o = av->n = av->sz = 0; }
 
+/* Reset the vector AV so that it's empty again. */
 void argv_reset(struct argv *av) { av->n = 0; }
 
+/* Ensure that AV has at least N unused slots at the end. */
 void argv_ensure(struct argv *av, size_t n)
 {
   size_t need = av->n + av->o + n, newsz;
@@ -192,12 +267,11 @@ void argv_ensure(struct argv *av, size_t n)
   if (need <= av->sz) return;
   newsz = av->sz ? 2*av->sz : 8;
   while (newsz < need) newsz *= 2;
-  av->v =
-    (const char **)xrealloc(av->v - av->o, newsz*sizeof(const char *)) +
-    av->o;
+  av->v = xrealloc(av->v - av->o, newsz*sizeof(char *)); av->v += av->o;
   av->sz = newsz;
 }
 
+/* Ensure that AV has at least N unused slots at the /start/. */
 void argv_ensure_offset(struct argv *av, size_t n)
 {
   size_t newoff;
@@ -210,59 +284,81 @@ void argv_ensure_offset(struct argv *av, size_t n)
   newoff = 16;
   while (newoff < n) newoff *= 2;
   argv_ensure(av, newoff - av->o);
-  memmove(av->v + newoff - av->o, av->v, av->n*sizeof(const char *));
+  memmove(av->v + newoff - av->o, av->v, av->n*sizeof(char *));
   av->v += newoff - av->o; av->o = newoff;
 }
 
+/* Release the memory held by AV.
+ *
+ * It must be reinitialized (e.g., by `argv_init') before it can be used
+ * again.
+ */
 void argv_release(struct argv *av) { free(av->v - av->o); }
 
-void argv_append(struct argv *av, const char *p)
+/* Append the pointer P to AV. */
+void argv_append(struct argv *av, char *p)
   { argv_ensure(av, 1); av->v[av->n++] = p; }
 
+/* Append a null pointer to AV, without extending the vactor length.
+ *
+ * The null pointer will be overwritten when the next string is appended.
+ */
 void argv_appendz(struct argv *av)
   { argv_ensure(av, 1); av->v[av->n] = 0; }
 
-void argv_appendn(struct argv *av, const char *const *v, size_t n)
+/* Append a N-element vector V of pointers to AV. */
+void argv_appendn(struct argv *av, char *const *v, size_t n)
 {
   argv_ensure(av, n);
   memcpy(av->v + av->n, v, n*sizeof(const char *));
   av->n += n;
 }
 
+/* Append the variable-length vector BV to AV. */
 void argv_appendav(struct argv *av, const struct argv *bv)
   { argv_appendn(av, bv->v, bv->n); }
 
+/* Append the pointers from a variable-length argument list AP to AV.
+ *
+ * The list is terminated by a null pointer.
+ */
 void argv_appendv(struct argv *av, va_list ap)
 {
-  const char *p;
-
-  for (;;)
-    { p = va_arg(ap, const char *); if (!p) break; argv_append(av, p); }
+  char *p;
+  for (;;) { p = va_arg(ap, char *); if (!p) break; argv_append(av, p); }
 }
 
+/* Append the argument pointers, terminated by a null pointer, to AV. */
 void argv_appendl(struct argv *av, ...)
   { va_list ap; va_start(ap, av); argv_appendv(av, ap); va_end(ap); }
 
-void argv_prepend(struct argv *av, const char *p)
+/* Prepend the pointer P to AV. */
+void argv_prepend(struct argv *av, char *p)
   { argv_ensure_offset(av, 1); *--av->v = p; av->o--; av->n++; }
 
-void argv_prependn(struct argv *av, const char *const *v, size_t n)
+/* Prepend a N-element vector V of pointers to AV. */
+void argv_prependn(struct argv *av, char *const *v, size_t n)
 {
   argv_ensure_offset(av, n);
   av->o -= n; av->v -= n; av->n += n;
   memcpy(av->v, v, n*sizeof(const char *));
 }
 
+/* Prepend the variable-length vector BV to AV. */
 void argv_prependav(struct argv *av, const struct argv *bv)
   { argv_prependn(av, bv->v, bv->n); }
 
+/* Prepend the pointers from a variable-length argument list AP to AV.
+ *
+ * The list is terminated by a null pointer.
+ */
 void argv_prependv(struct argv *av, va_list ap)
 {
-  const char *p, **v;
+  char *p, **v;
   size_t n = 0;
 
   for (;;) {
-    p = va_arg(ap, const char *); if (!p) break;
+    p = va_arg(ap, char *); if (!p) break;
     argv_prepend(av, p); n++;
   }
   v = av->v;
@@ -272,31 +368,85 @@ void argv_prependv(struct argv *av, va_list ap)
   }
 }
 
+/* Prepend the argument pointers, terminated by a null pointer, to AV. */
 void argv_prependl(struct argv *av, ...)
   { va_list ap; va_start(ap, av); argv_prependv(av, ap); va_end(ap); }
 
 /*----- Treaps ------------------------------------------------------------*/
 
+/* Return nonzero if the AN-byte string A is strictly precedes the BN-byte
+ * string B in a lexicographic ordering.
+ *
+ * All comparison of keys is handled by this function.
+ */
+static int str_lt(const char *a, size_t an, const char *b, size_t bn)
+{
+  /* This is a little subtle.  We need only compare the first N bytes of the
+   * strings, where N is the length of the shorter string.  If this
+   * distinguishes the two strings, then we're clearly done.  Otherwise, if
+   * the prefixes are equal then the shorter string is the smaller one.  If
+   * the two strings are the same length, then they're equal.
+   *
+   * Hence, if A is the strictly shorter string, then A precedes B if A
+   * precedes or matches the prefix of B; otherwise A only precedes B if A
+   * strictly precedes the prefix of B.
+   */
+  if (an < bn) return (MEMCMP(a, <=, b, an));
+  else return (MEMCMP(a, <, b, bn));
+}
+
+/* Initialize the treap T.
+ *
+ * Usually you'd use the static initializer `TREAP_INIT'.
+ */
 void treap_init(struct treap *t) { t->root = 0; }
 
+/* Look up the KN-byte key K in the treap T.
+ *
+ * Return a pointer to the matching node if one was found, or null otherwise.
+ */
 void *treap_lookup(const struct treap *t, const char *k, size_t kn)
 {
   struct treap_node *n = t->root, *candidate = 0;
 
-  while (n) {
+  /* This is a simple prototype for some of the search loops we'll encounter
+   * later.  Notice that we use a strict one-sided comparison, rather than
+   * the more conventional two-sided comparison.
+   *
+   * The main loop will find the largest key not greater than K.
+   */
+  while (n)
+    /* Compare the node's key against our key.  If the node is too large,
+     * then we ignore it and move left.  Otherwise remember this node for
+     * later, and move right to see if we can find a better, larger node.
+     */
+
     if (str_lt(k, kn, n->k, n->kn)) n = n->left;
     else { candidate = n; n = n->right; }
-  }
+
+  /* If the candidate node is less than our key then we failed.  Otherwise,
+   * by trichotomy, we have found the correct node.
+   */
   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
   return (candidate);
 }
 
+/* Look up the KN-byte K in the treap T, recording a path in P.
+ *
+ * This is similar to `treap_lookup', in that it returns the requested node
+ * if it already exists, or null otherwise, but it also records in P
+ * information to be used by `treap_insert' to insert a new node with the
+ * given key if it's not there already.
+ */
 void *treap_probe(struct treap *t, const char *k, size_t kn,
 		  struct treap_path *p)
 {
   struct treap_node **nn = &t->root, *candidate = 0;
   unsigned i = 0;
 
+  /* This walk is similar to `treap_lookup' above, except that we also record
+   * the address of each node pointer we visit along the way.
+   */
   for (;;) {
     assert(i < TREAP_PATHMAX); p->path[i++] = nn;
     if (!*nn) break;
@@ -304,10 +454,16 @@ void *treap_probe(struct treap *t, const char *k, size_t kn,
     else { candidate = *nn; nn = &(*nn)->right; }
   }
   p->nsteps = i;
+
+  /* Check to see whether we found the right node. */
   if (!candidate || str_lt(candidate->k, candidate->kn, k, kn)) return (0);
   return (candidate);
 }
 
+/* Insert a new node N into T, associating it with the KN-byte key K.
+ *
+ * Use the path data P, from `treap_probe', to help with insertion.
+ */
 void treap_insert(struct treap *t, const struct treap_path *p,
 		  struct treap_node *n, const char *k, size_t kn)
 {
@@ -315,46 +471,166 @@ void treap_insert(struct treap *t, const struct treap_path *p,
   struct treap_node **nn, **uu, *u;
   unsigned wt;
 
+  /* Fill in the node structure. */
   n->k = xstrndup(k, kn); n->kn = kn;
   n->wt = wt = rand(); n->left = n->right = 0;
+
+  /* Prepare for the insertion.
+   *
+   * The path actually points to each of the links traversed when searching
+   * for the node, starting with the `root' pointer, then the `left' or
+   * `right' pointer of the root node, and so on; `nsteps' will always be
+   * nonzero, since the path will always pass through the root, and the final
+   * step, `path->path[path->nsteps - 1]' will always be the address of a
+   * null pointer onto which the freshly inserted node could be hooked in
+   * order to satisfy the binary-search-tree ordering.  (Of course, this will
+   * likely /not/ satisfy the heap condition, so more work needs to be done.)
+   *
+   * Throughout, NN is our current candidate for where to attach the node N.
+   * As the loop progresses, NN will ascend to links further up the tree, and
+   * N will be adjusted to accumulate pieces of the existing tree structure.
+   * We'll stop when we find that the parent node's weight is larger than our
+   * new node's weight, at which point we can just set *NN = N; or if we run
+   * out of steps in the path, in which case *NN is the root pointer.
+   */
   assert(i); nn = p->path[--i];
   while (i--) {
+
+    /* Collect the next step in the path, and get the pointer to the node. */
     uu = p->path[i]; u = *uu;
+
+    /* If this node's weight is higher, then we've found the right level and
+     * we can stop.
+     */
     if (wt <= u->wt) break;
+
+    /* The node U is lighter than our new node N, so we must rotate in order
+     * to fix things.  If we were currently planning to hook N as the left
+     * subtree of U, then we rotate like this:
+     *
+     *			|		    |
+     *			U		   (N)
+     *		      /   \		  /   \
+     *		   (N)	    Z	--->    X       U
+     *		  /   \			      /   \
+     *		X       Y		    Y	    Z
+     *
+     * On the other hand, if we were planning to hook N as the right subtree
+     * of U, then we do the opposite rotation:
+     *
+     *		    |				|
+     *		    U			       (N)
+     *		  /   \			      /   \
+     *		X      (N)	--->	    U	    Z
+     *		      /   \		  /   \
+     *		    Y	    Z		X	Y
+     *
+     * These transformations clearly preserve the ordering of nodes in the
+     * binary search tree, and satisfy the heap condition in the subtree
+     * headed by N.
+     */
     if (nn == &u->left) { u->left = n->right; n->right = u; }
     else { u->right = n->left; n->left = u; }
+
+    /* And this arrangement must be attached to UU, or some higher attachment
+     * point.  The subtree satisfies the heap condition, and can be attached
+     * safely at the selected place.
+     */
     nn = uu;
   }
+
+  /* We've found the right spot.  Hook the accumulated subtree into place. */
   *nn = n;
 }
 
+/* Remove the node with the KN-byte K from T.
+ *
+ * Return the address of the node we removed, or null if it couldn't be
+ * found.
+ */
 void *treap_remove(struct treap *t, const char *k, size_t kn)
 {
   struct treap_node **nn = &t->root, **candidate = 0, *n, *l, *r;
 
-  while (*nn) {
+  /* Search for the matching node, but keep track of the address of the link
+   * which points to our target node.
+   */
+  while (*nn)
     if (str_lt(k, kn, (*nn)->k, (*nn)->kn)) nn = &(*nn)->left;
     else { candidate = nn; nn = &(*nn)->right; }
-  }
+
+  /* If this isn't the right node then give up. */
   if (!candidate || str_lt((*candidate)->k, (*candidate)->kn, k, kn))
     return (0);
 
-  n = *candidate; l = n->left; r = n->right;
-  for (;;) {
-    if (l && (!r || l->wt > r->wt)) { nn = &l->right; l = l->right; }
-    else if (r) { nn = &r->left; r = r->left; }
-    else break;
-  }
-  *nn = 0;
-  free(n->k);
-  return (n);
+  /* Now we need to disentangle the node from the tree.  This is essentially
+   * the reverse of insertion: we pretend that this node is suddenly very
+   * light, and mutate the tree so as to restore the heap condition until
+   * eventually our node is a leaf and can be cut off without trouble.
+   *
+   * Throughout, the link *NN notionally points to N, but we don't actually
+   * update it until we're certain what value it should finally take.
+   */
+  nn = candidate; n = *nn; l = n->left; r = n->right;
+  for (;;)
+
+    /* If its left subtree is empty then we can replace our node by its right
+     * subtree and be done.  Similarly, if the right subtree is empty then we
+     * replace the node by its left subtree.
+     *
+     *		    |		|		|		|
+     *		   (N)	--->	R ;	       (N)	--->	L
+     *		  /   \			      /   \
+     *		*       R		    L	    *
+     */
+    if (!l) { *nn = r; break; }
+    else if (!r) { *nn = l; break; }
+
+    /* Otherwise we need to rotate the pointers so that the heavier of the
+     * two children takes the place of our node; thus we have either
+     *
+     *			|		    |
+     *		       (N)		    L
+     *		      /   \		  /   \
+     *		    L	    R	--->	X      (N)
+     *		  /   \			      /   \
+     *		X	Y		    Y	    R
+     *
+     * or
+     *
+     *		    |				|
+     *		   (N)				R
+     *		  /   \			      /   \
+     *		L	R	--->	   (N)	    Y
+     *		      /   \		  /   \
+     *		    X	    Y		L       X
+     *
+     * Again, these transformations clearly preserve the ordering of nodes in
+     * the binary search tree, and the heap condition.
+     */
+    else if (l->wt > r->wt)
+      { *nn = l; nn = &l->right; l = n->left = l->right; }
+    else
+      { *nn = r; nn = &r->left; r = n->right = r->left; }
+
+  /* Release the key buffer, and return the node that we've now detached. */
+  free(n->k); return (n);
 }
 
+/* Initialize an iterator I over T's nodes. */
 void treap_start_iter(struct treap *t, struct treap_iter *i)
 {
   struct treap_node *n = t->root;
   unsigned sp = 0;
 
+  /* The `stack' in the iterator structure is an empty ascending stack of
+   * nodes which have been encountered, and their left subtrees investigated,
+   * but not yet visited by the iteration.
+   *
+   * Iteration begins by stacking the root node, its left child, and so on,
+   * At the end of this, the topmost entry on the stack is the least node of
+   * the tree, followed by its parent, grandparent, and so on up to the root.
+   */
   while (n) {
     assert(sp < TREAP_PATHMAX);
     i->stack[sp++] = n; n = n->left;
@@ -362,12 +638,73 @@ void treap_start_iter(struct treap *t, struct treap_iter *i)
   i->sp = sp;
 }
 
+/* Return the next node from I, in ascending order by key.
+ *
+ * If there are no more nodes, then return null.
+ */
 void *treap_next(struct treap_iter *i)
 {
   struct treap_node *n, *o;
   unsigned sp = i->sp;
 
+  /* We say that a node is /visited/ once it's been returned by this
+   * iterator.  To traverse a tree in order, then, we traverse its left
+   * subtree, visit the tree root, and traverse its right subtree -- which is
+   * a fine recursive definition, but we need a nonrecursive implementation.
+   *
+   * As is usual in this kind of essential structural recursion, we maintain
+   * a stack.  The invariant that we'll maintain is as follows.
+   *
+   *   1. If the stack is empty, then all nodes have been visited.
+   *
+   *   2, If the stack is nonempty then the topmost entry on the stack is the
+   *	  least node which has not yet been visited -- and therefore is the
+   *	  next node to visit.
+   *
+   *   3. The earlier entries in the stack are, in (top to bottom) order,
+   *	  those of the topmost node's parent, grandparent, etc., up to the
+   *	  root, which have not yet been visited.  More specifically, a node
+   *	  appears in the stack if and only if some node in its left subtree
+   *	  is nearer the top of the stack.
+   *
+   * When we initialized the iterator state (in `treap_start_iter' above), we
+   * traced a path to the leftmost leaf, stacking the root, its left-hand
+   * child, and so on.  The leftmost leaf is clearly the first node to be
+   * visited, and its entire ancestry is on the stack since none of these
+   * nodes has yet been visited.  (If the tree is empty, then we have done
+   * nothing, the stack is empty, and there are no nodes to visit.)  This
+   * establishes the base case for the induction.
+   */
+
+  /* So, if the stack is empty now, then (1) all of the nodes have been
+   * visited and there's nothing left to do.  Return null.
+   */
   if (!sp) return (0);
+
+  /* It's clear that, if we pop the topmost element of the stack, visit it,
+   * and arrange to reestablish the invariant, then we'll visit the nodes in
+   * the correct order, pretty much by definition.
+   *
+   * So, pop a node off the stack.  This is the node we shall return.  But
+   * before we can do that, we must reestablish the above invariant.
+   * Firstly, the current node is removed from the stack, because we're about
+   * to visit it, and visited nodes don't belong on the stack.  Then there
+   * are two cases to consider.
+   *
+   *   * If the current node's right subtree is not empty, then the next node
+   *	 to be visited is the leftmost node in that subtree.  All of the
+   *	 nodes on the stack are ancestors of the current node, and the right
+   *	 subtree consists of its descendants, so none of them are already on
+   *	 the stack; and they're all greater than the current node, and
+   *	 therefore haven't been visited.  Therefore, we must push the current
+   *	 node's right child, its /left/ child, and so on, proceeding
+   *	 leftwards until we fall off the bottom of the tree.
+   *
+   *   * Otherwise, we've finished traversing some subtree.  Either we are
+   *	 now done, or (3) we have just finished traversing the left subtree
+   *	 of the next topmost item on the stack.  This must therefore be the
+   *	 next node to visit.  The rest of the stack is already correct.
+   */
   n = i->stack[--sp];
   o = n->right;
   while (o) {
@@ -378,19 +715,44 @@ void *treap_next(struct treap_iter *i)
   return (n);
 }
 
-static void check_node(struct treap_node *n, unsigned maxwt,
-		       const char *klo, const char *khi)
+/* Recursively check the subtree headed by N.
+ *
+ * No node should have weight greater than MAXWT, to satisfy the heap
+ * condition; if LO is not null, then all node keys should be strictly
+ * greater than LO, and, similarly, if HI is not null, then all keys should
+ * be strictly smaller than HI.
+ */
+static void check_subtree(struct treap_node *n, unsigned maxwt,
+			  const char *klo, const char *khi)
 {
+  /* Check the heap condition. */
   assert(n->wt <= maxwt);
+
+  /* Check that the key is in bounds.  (Use `strcmp' here to ensure that our
+   * own `str_lt' is working correctly.)
+   */
   if (klo) assert(STRCMP(n->k, >, klo));
   if (khi) assert(STRCMP(n->k, <, khi));
-  if (n->left) check_node(n->left, n->wt, klo, n->k);
-  if (n->right) check_node(n->right, n->wt, n->k, khi);
+
+  /* Check the left subtree.  Node weights must be bounded above by our own
+   * weight.  And every key in the left subtree must be smaller than our
+   * current key.  We propagate the lower bound.
+   */
+  if (n->left) check_subtree(n->left, n->wt, klo, n->k);
+
+  /* Finally, check the right subtree.  This time, every key must be larger
+   * than our key, and we propagate the upper bound.
+   */
+  if (n->right) check_subtree(n->right, n->wt, n->k, khi);
 }
 
+/* Check the treap structure rules for T. */
 void treap_check(struct treap *t)
-  { if (t->root) check_node(t->root, t->root->wt, 0, 0); }
+  { if (t->root) check_subtree(t->root, t->root->wt, 0, 0); }
 
+/* Recursively dump the subtree headed by N, indenting the output lines by
+ * IND spaces.
+ */
 static void dump_node(struct treap_node *n, int ind)
 {
   if (n->left) dump_node(n->left, ind + 1);
@@ -398,6 +760,7 @@ static void dump_node(struct treap_node *n, int ind)
   if (n->right) dump_node(n->right, ind + 1);
 }
 
+/* Dump the treap T to standard output, for debugging purposes. */
 void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
 
 /*----- Configuration file parsing ----------------------------------------*/
@@ -406,13 +769,48 @@ void treap_dump(struct treap *t) { if (t->root) dump_node(t->root, 0); }
   extern char **environ;
 #endif
 
+/* Advance P past a syntactically valid name, but no further than L.
+ *
+ * Return the new pointer.  If no name is found, report an error, blaming
+ * FILE and LINE; WHAT is an adjective for the kind of name that was
+ * expected.
+ */
+static const char *scan_name(const char *what,
+			     const char *p, const char *l,
+			     const char *file, unsigned line)
+{
+  const char *q = p;
+
+  while (q < l &&
+	 (ISALNUM(*q) || *q == '-' || *q == '_' || *q == '.' || *q == '/' ||
+			 *q == '*' || *q == '+' || *q == '%' || *q == '@'))
+    q++;
+  if (q == p) lose("%s:%u: expected %s name", file, line, what);
+  return (q);
+}
+
+/* Initialize the configuration state CONF.
+ *
+ * Usually you'd use the static initializer `CONFIG_INIT'.
+ */
 void config_init(struct config *conf)
   { treap_init(&conf->sections); }
 
+/* Find and return the section with null-terminated NAME in CONF.
+ *
+ * If no section is found, the behaviour depends on whether `CF_CREAT' is set
+ * in F: if so, an empty section is created and returned; otherwise, a null
+ * pointer is returned.
+ */
 struct config_section *config_find_section(struct config *conf, unsigned f,
 					   const char *name)
   { return (config_find_section_n(conf, f, name, strlen(name))); }
 
+/* Find and return the section with the SZ-byte NAME in CONF.
+ *
+ * This works like `config_find_section', but with an explicit length for the
+ * NAME rather than null-termination.
+ */
 struct config_section *config_find_section_n(struct config *conf, unsigned f,
 					     const char *name, size_t sz)
 {
@@ -430,18 +828,27 @@ struct config_section *config_find_section_n(struct config *conf, unsigned f,
       sect->parents = 0; sect->nparents = SIZE_MAX;
       treap_init(&sect->vars); treap_init(&sect->cache);
       treap_insert(&conf->sections, &path, &sect->_node, name, sz);
-      config_set_var_n(conf, sect, CF_LITERAL, "@NAME", 5, name, sz);
+      config_set_var_n(conf, sect, CF_LITERAL, "@name", 5, name, sz);
     }
   }
   return (sect);
 }
 
+/* Set the fallback section for CONF to be SECT.
+ *
+ * That is, if a section has no explicit parents, then by default it will
+ * have a single parent which is SECT.  If SECT is null then there is no
+ * fallback section, and sections which don't have explicitly specified
+ * parents have no parents at all.  (This is the default situation.)
+ */
 void config_set_fallback(struct config *conf, struct config_section *sect)
-{
-  if (sect->nparents == SIZE_MAX) sect->nparents = 0;
-  conf->fallback = sect;
-}
+  { conf->fallback = sect; }
 
+/* Arrange that SECT has PARENT as its single parent section.
+ *
+ * If PARENT is null, then arrange that SECT has no parents at all.  In
+ * either case, any `@parents' setting will be ignored.
+ */
 void config_set_parent(struct config_section *sect,
 		       struct config_section *parent)
 {
@@ -453,10 +860,15 @@ void config_set_parent(struct config_section *sect,
   }
 }
 
+/* Initialize I to iterate over the sections defined in CONF. */
 void config_start_section_iter(struct config *conf,
 			       struct config_section_iter *i)
   { i->sect = conf->head; }
 
+/* Return the next section from I, in order of creation.
+ *
+ * If there are no more sections, then return null.
+ */
 struct config_section *config_next_section(struct config_section_iter *i)
 {
   struct config_section *sect;
@@ -466,49 +878,81 @@ struct config_section *config_next_section(struct config_section_iter *i)
   return (sect);
 }
 
+/* Initialize the `parents' links of SECT, if they aren't set up already.
+ *
+ * If SECT contains a `@parents' setting then parse it to determine the
+ * parents; otherwise use CONF's fallbeck section, as established by
+ * `config_set_fallback'.
+ */
 static void set_config_section_parents(struct config *conf,
 				       struct config_section *sect)
 {
   struct config_section *parent;
   struct config_var *var;
-  struct argv av = ARGV_INIT;
+  const char *file; unsigned line;
   size_t i, n;
-  char *p, *q;
+  char *p, *q, *l;
+  struct argv av = ARGV_INIT;
 
+  /* If the section already has parents established then there's nothing to
+   * do.
+   */
   if (sect->nparents != SIZE_MAX) return;
 
-  var = treap_lookup(&sect->vars, "@PARENTS", 8);
+  /* Look up `@parents', without recursion! */
+  var = treap_lookup(&sect->vars, "@parents", 8);
   if (!var) {
-    if (!conf->fallback)
+    /* No explicit setting: use the fallback setting. */
+
+    if (!conf->fallback || conf->fallback == sect)
       sect->nparents = 0;
     else {
-      sect->parents = xmalloc(sizeof(*sect->parents));
-      sect->nparents = 1;
+      sect->parents = xmalloc(sizeof(*sect->parents)); sect->nparents = 1;
       sect->parents[0] = conf->fallback;
     }
   } else {
-    p = var->val;
-    for (;;) {
-      while (ISSPACE(*p)) p++;
-      if (!*p) break;
-      q = p; while (*q && *q != ',' && !ISSPACE(*q)) q++;
-      argv_append(&av, p); argv_append(&av, q);
-      p = q; if (*p == ',') p++;
+    /* Found a `@parents' list: parse it and set the parents list. */
+
+    file = var->file; line = var->line; if (!file) file = "<internal>";
+
+    /* We do this in two phases.  First, we parse out the section names, and
+     * record start/limit pointer pairs in `av'.
+     */
+    p = var->val; l = p + var->n; while (p < l && ISSPACE(*p)) p++;
+    while (*p) {
+      q = p;
+      p = (/*unconst*/ char *)scan_name("parent section", p, l, file, line);
+      argv_append(&av, q); argv_append(&av, p);
+      while (p < l && ISSPACE(*p)) p++;
+      if (p >= l) break;
+      if (*p == ',') do p++; while (ISSPACE(*p));
     }
+
+    /* Now that we've finished parsing, we know how many parents we're going
+     * to have, so we can allocate the `parents' vector and fill it in.
+     */
     sect->nparents = av.n/2;
-    sect->parents = xmalloc(sect->nparents*sizeof(sect->parents));
+    sect->parents = xmalloc(sect->nparents*sizeof(*sect->parents));
     for (i = 0; i < av.n; i += 2) {
       n = av.v[i + 1] - av.v[i];
       parent = config_find_section_n(conf, 0, av.v[i], n);
       if (!parent)
 	lose("%s:%u: unknown parent section `%.*s'",
-	     var->file, var->line, (int)n, av.v[i]);
+	     file, line, (int)n, av.v[i]);
       sect->parents[i/2] = parent;
     }
-    argv_release(&av);
   }
+
+  /* All done. */
+  argv_release(&av);
 }
 
+/* Find a setting of the SZ-byte variable NAME in CONF, starting from SECT.
+ *
+ * If successful, return a pointer to the variable; otherwise return null.
+ * Inheritance cycles and ambiguous inheritance are diagnosed as fatal
+ * errors.
+ */
 struct config_var *search_recursive(struct config *conf,
 				    struct config_section *sect,
 				    const char *name, size_t sz)
@@ -518,6 +962,25 @@ struct config_var *search_recursive(struct config *conf,
   struct config_var *var, *v;
   size_t i, j = j;
 
+  /* If the variable is defined locally then we can just return it. */
+  var = treap_lookup(&sect->vars, name, sz); if (var) return (var);
+
+  /* If we have no parents then there's no way we can find it. */
+  set_config_section_parents(conf, sect);
+  if (!sect->parents) return (0);
+
+  /* Otherwise we must visit the section's parents.  We can avoid paying for
+   * this on every lookup by using a cache.  If there's already an entry for
+   * this variable then we can return the result immediately (note that we
+   * cache both positive and negative outcomes).  Otherwise we create a new
+   * cache entry, do the full recursive search, and fill in the result when
+   * we're done.
+   *
+   * The cache also helps us detect cycles: we set the `CF_OPEN' flag on a
+   * new cache entry when it's first created, and clear it when we fill in
+   * the result: if we encounter an open cache entry again, we know that
+   * we've found a cycle.
+   */
   cache = treap_probe(&sect->cache, name, sz, &path);
   if (!cache) {
     cache = xmalloc(sizeof(*cache)); cache->f = CF_OPEN;
@@ -528,32 +991,51 @@ struct config_var *search_recursive(struct config *conf,
   else
     return (cache->var);
 
-  set_config_section_parents(conf, sect);
-
-  var = treap_lookup(&sect->vars, name, sz);
-  if (!var) {
-    for (i = 0; i < sect->nparents; i++) {
-      v = search_recursive(conf, sect->parents[i], name, sz);
-      if (!v);
-      else if (!var) { var = v; j = i; }
-      else if (var != v)
-	lose("section `%s' inherits variable `%s' ambiguously "
-	     "via `%s' and `%s'",
-	     CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
-	     CONFIG_SECTION_NAME(sect->parents[j]),
-	     CONFIG_SECTION_NAME(sect->parents[i]));
-    }
+  /* Recursively search in each parent.  We insist that all parents that find
+   * a variable find the same binding; otherwise we declare ambiguous
+   * inheritance.
+   */
+  for (i = 0; i < sect->nparents; i++) {
+    v = search_recursive(conf, sect->parents[i], name, sz);
+    if (!v);
+    else if (!var) { var = v; j = i; }
+    else if (var != v)
+      lose("section `%s' inherits variable `%s' ambiguously "
+	   "via `%s' and `%s'",
+	   CONFIG_SECTION_NAME(sect), CONFIG_VAR_NAME(var),
+	   CONFIG_SECTION_NAME(sect->parents[j]),
+	   CONFIG_SECTION_NAME(sect->parents[i]));
   }
 
+  /* All done: fill the cache entry in, clear the open flag, and return the
+   * result.
+   */
   cache->var = var; cache->f &= ~CF_OPEN;
   return (var);
 }
 
+/* Find and return the variable with null-terminated NAME in SECT.
+ *
+ * If `CF_INHERIT' is set in F, then the function searches the section's
+ * parents recursively; otherwise, it only checks to see whether the variable
+ * is set directly in SECT.
+ *
+ * If no variable is found, the behaviour depends on whether `CF_CREAT' is
+ * set in F: if so, an empty variable is created and returned; otherwise, a
+ * null pointer is returned.
+ *
+ * Setting both `CF_INHERIT' and `CF_CREAT' is not useful.
+ */
 struct config_var *config_find_var(struct config *conf,
 				   struct config_section *sect,
 				   unsigned f, const char *name)
   { return (config_find_var_n(conf, sect, f, name, strlen(name))); }
 
+/* Find and return the variable with the given SZ-byte NAME in SECT.
+ *
+ * This works like `config_find_var', but with an explicit length for the
+ * NAME rather than null-termination.
+ */
 struct config_var *config_find_var_n(struct config *conf,
 				     struct config_section *sect,
 				     unsigned f, const char *name, size_t sz)
@@ -576,44 +1058,74 @@ struct config_var *config_find_var_n(struct config *conf,
   return (var);
 }
 
-void config_start_var_iter(struct config_section *sect,
-			   struct config_var_iter *i)
-  { treap_start_iter(&sect->vars, &i->i); }
-
-struct config_var *config_next_var(struct config_var_iter *i)
-  { return (treap_next(&i->i)); }
-
-void config_set_var(struct config *conf, struct config_section *sect,
-		    unsigned f,
-		    const char *name, const char *value)
+/* Set variable NAME to VALUE in SECT, with associated flags F.
+ *
+ * The names are null-terminated.  The flags are variable flags: see `struct
+ * config_var' for details.  Returns the variable.
+ *
+ * If the variable is already set and has the `CF_OVERRIDE' flag, then this
+ * function does nothing unless `CF_OVERRIDE' is /also/ set in F.
+ */
+struct config_var *config_set_var(struct config *conf,
+				  struct config_section *sect,
+				  unsigned f,
+				  const char *name, const char *value)
 {
-  config_set_var_n(conf, sect, f,
-		   name, strlen(name),
-		   value, strlen(value));
+  return (config_set_var_n(conf, sect, f,
+			   name, strlen(name),
+			   value, strlen(value)));
 }
 
-void config_set_var_n(struct config *conf, struct config_section *sect,
-		      unsigned f,
-		      const char *name, size_t namelen,
-		      const char *value, size_t valuelen)
+/* As `config_set_var', except that the variable NAME and VALUE have explicit
+ * lengths (NAMELEN and VALUELEN, respectively) rather than being null-
+ * terminated.
+ */
+struct config_var *config_set_var_n(struct config *conf,
+				    struct config_section *sect,
+				    unsigned f,
+				    const char *name, size_t namelen,
+				    const char *value, size_t valuelen)
 {
   struct config_var *var =
     config_find_var_n(conf, sect, CF_CREAT, name, namelen);
 
-  if (var->f&~f&CF_OVERRIDE) return;
+  if (var->f&~f&CF_OVERRIDE) return (var);
   free(var->val); var->val = xstrndup(value, valuelen); var->n = valuelen;
   var->f = f;
+  return (var);
 }
 
+/* Initialize I to iterate over the variables directly defined in SECT. */
+void config_start_var_iter(struct config *conf, struct config_section *sect,
+			   struct config_var_iter *i)
+  { treap_start_iter(&sect->vars, &i->i); }
+
+/* Return next variable from I, in ascending lexicographical order.
+ *
+ * If there are no more variables, then return null.
+ */
+struct config_var *config_next_var(struct config_var_iter *i)
+  { return (treap_next(&i->i)); }
+
+/* Read and parse configuration FILE, applying its settings to CONF.
+ *
+ * If all goes well, the function returns 0.  If the file is not found, then
+ * the behaviour depends on whether `CF_NOENTOK' is set in F: if so, then the
+ * function simply returns -1.  Otherwise, a fatal error is reported.  Note
+ * that this /only/ applies if the file does not exist (specifically, opening
+ * it fails with `ENOENT') -- any other problems are reported as fatal
+ * errors regardless of the flag setting.
+ */
 int config_read_file(struct config *conf, const char *file, unsigned f)
 {
   struct config_section *sect;
   struct config_var *var;
   struct dstr d = DSTR_INIT, dd = DSTR_INIT;
   unsigned line = 0;
-  char *p, *q;
+  const char *p, *q, *r;
   FILE *fp;
 
+  /* Try to open the file. */
   fp = fopen(file, "r");
   if (!fp) {
     if ((f&CF_NOENTOK) && errno == ENOENT) return (-1);
@@ -621,61 +1133,107 @@ int config_read_file(struct config *conf, const char *file, unsigned f)
 	 file, strerror(errno));
   }
 
+  /* Find the initial section. */
   sect = config_find_section(conf, CF_CREAT, "@CONFIG"); var = 0;
 
+  /* Work through the file, line by line. */
   for (;;) {
     dstr_reset(&d); if (dstr_readline(&d, fp)) break;
     line++;
 
-    if (d.p[0] && !ISSPACE(d.p[0])) {
+    /* Trim trailing spaces from the line.  The syntax is sensitive to
+     * leading spaces, so we can't trim those yet.
+     */
+    while (d.len && ISSPACE(d.p[d.len - 1])) d.len--;
+    d.p[d.len] = 0;
+
+    if (!*d.p || *d.p == ';')
+      /* Ignore comments entirely.  (In particular, a comment doesn't
+       * interrupt a multiline variable value.)
+       */
+      ;
+
+    else if (ISSPACE(d.p[0])) {
+      /* The line starts with whitespace, so it's a continuation line. */
+
+      /* Skip the initial whitespace. */
+      p = d.p; while (ISSPACE(*p)) p++;
+
+      /* If we aren't collecting a variable value then this is an error.
+       * Otherwise, accumulate it into the current value.
+       */
+      if (!var)
+	lose("%s:%u: continuation line, but no variable", file, line);
+      if (dd.len) dstr_putc(&dd, ' ');
+      dstr_putm(&dd, p, d.len - (p - d.p));
+
+    } else {
+      /* The line starts in the first column. */
+
+      /* If there's a value value being collected then we must commit it to
+       * its variable (unless there's already a setting there that says we
+       * shouldn't).
+       */
       if (var) {
 	if (!(var->f&CF_OVERRIDE))
 	  { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
 	var = 0;
       }
-      if (d.p[0] == ';')
-	;
-      else if (d.p[0] == '[') {
-	p = d.p + 1; q = strchr(p, ']');
-	if (!q) lose("%s:%u: missing `]' in section header", file, line);
+
+      /* Now decide what kind of line this is. */
+      if (d.p[0] == '[') {
+	/* It's a section header. */
+
+	/* Parse the header. */
+	p = d.p + 1; while (ISSPACE(*p)) p++;
+	q = scan_name("section", p, d.p + d.len, file, line);
+	r = q; while (ISSPACE(*r)) r++;
+	if (*r != ']')
+	  lose("%s:%u: expected `]' in section header", file, line);
+	if (r[1])
+	  lose("%s:%u: trailing junk after `]' in section header",
+	       file, line);
+
+	/* Create the new section. */
 	sect = config_find_section_n(conf, CF_CREAT, p, q - p);
-	q++; while (ISSPACE(*q)) q++;
-	if (*q) lose("%s:%u: trailing junk after `]' in section header",
-		     file, line);
+
       } else {
-	p = d.p;
-	while (*p && !ISSPACE(*p) && *p != '{' && *p != '}' && *p != '=')
-	  p++;
+	/* It's a variable assignment.  Parse the name out. */
+	p = scan_name("variable", d.p, d.p + d.len, file, line);
 	var = config_find_var_n(conf, sect, CF_CREAT, d.p, p - d.p);
 	while (ISSPACE(*p)) p++;
 	if (*p != '=') lose("%s:%u: missing `=' in assignment", file, line);
 	p++; while (ISSPACE(*p)) p++;
+
+	/* Clear out the variable's initial value, unless we shouldn't
+	 * override it.
+	 */
 	if (!(var->f&CF_OVERRIDE)) {
 	  free(var->val); var->val = 0; var->f = 0;
 	  free(var->file); var->file = xstrdup(file); var->line = line;
 	}
 	dstr_reset(&dd); dstr_puts(&dd, p);
       }
-    } else {
-      p = d.p; while (ISSPACE(*p)) p++;
-      if (*p) {
-	if (!var)
-	  lose("%s:%u: continuation line, but no variable", file, line);
-	if (dd.len) dstr_putc(&dd, ' ');
-	dstr_puts(&dd, p);
-      }
     }
   }
 
+  /* If there's a value under construction then commit the result. */
   if (var && !(var->f&CF_OVERRIDE))
     { var->val = xstrndup(dd.p, dd.len); var->n = dd.len; }
 
-  dstr_release(&d); dstr_release(&dd);
+  /* Close the file. */
   if (fclose(fp))
     lose("error reading configuration file `%s': %s", file, strerror(errno));
+
+  /* All done. */
+  dstr_release(&d); dstr_release(&dd);
   return (0);
 }
 
+/* Populate SECT with environment variables.
+ *
+ * Environment variables are always set with `CF_LITERAL'.
+ */
 void config_read_env(struct config *conf, struct config_section *sect)
 {
   const char *p, *v;
@@ -689,50 +1247,113 @@ void config_read_env(struct config *conf, struct config_section *sect)
 
 /*----- Substitution and quoting ------------------------------------------*/
 
+/* The substitution and word-splitting state.
+ *
+ * This only keeps track of the immutable parameters for the substitution
+ * task: stuff which changes (flags, filtering state, cursor position) is
+ *	 maintained separately.
+ */
 struct subst {
-  struct config *config;
-  struct config_section *home, *fallback;
-  struct argv *av;
-  struct dstr *d;
+  struct config *config;		/* configuration state */
+  struct config_section *home;		/* home section for lookups */
+  struct dstr *d;			/* current word being constructed */
+  struct argv *av;			/* output word list */
 };
 
-static const char *scan_name(const char *p, const char *l)
-{
-  while (p < l &&
-	 (ISALNUM(*p) || *p == '-' || *p == '_' || *p == '.' || *p == '/' ||
-			 *p == '*' || *p == '+' || *p == '%' || *p == '@'))
-    p++;
-  return (p);
-}
-
-static void filter_string(const char *p, const char *l, struct subst *sb,
-			  unsigned qfilt)
+/* Flags for `subst' and related functions. */
+#define SF_SPLIT 0x0001u		/* split at (unquoted) whitespace */
+#define SF_QUOT 0x0002u			/* currently within double quotes */
+#define SF_SUBST 0x0004u		/* apply `$-substitutions */
+#define SF_SUBEXPR 0x0008u		/* stop at delimiter `|' or `}' */
+#define SF_SPANMASK 0x00ffu		/* mask for the above */
+
+#define SF_WORD 0x0100u			/* output word under construction */
+#define SF_SKIP 0x0200u			/* not producing output */
+#define SF_LITERAL 0x0400u		/* do not expand or substitute */
+#define SF_UPCASE 0x0800u		/* convert to uppercase */
+#define SF_DOWNCASE 0x1000u		/* convert to lowercase */
+#define SF_CASEMASK 0x1800u		/* mask for case conversions */
+
+/* Apply filters encoded in QFILT and F to the text from P to L, and output.
+ *
+ * SB is the substitution state which, in particular, explains where the
+ * output should go.
+ *
+ * The filters are encoded as flags `SF_UPCASE' and `SF_DOWNCASE' for case
+ * conversions, and a nesting depth QFILT for toothpick escaping.  (QFILT is
+ * encoded as the number of toothpicks to print: see `subst' for how this
+ * determined.)
+ */
+static void filter_string(const char *p, const char *l,
+			  const struct subst *sb, unsigned qfilt, unsigned f)
 {
   size_t r, n;
+  char *q; const char *pp, *ll;
 
-  if (!qfilt)
+  if (!qfilt && !(f&SF_CASEMASK))
+    /* Fast path: there's nothing to do: just write to the output. */
     dstr_putm(sb->d, p, l - p);
+
   else for (;;) {
-    r = l - p; n = strcspn(p, "\"\\");
+    /* We must be a bit more circumspect. */
+
+    /* Determine the length of the next span of characters which don't need
+     * escaping.  (If QFILT is zero then this is everything.)
+     */
+    r = l - p; n = qfilt ? strcspn(p, "\"\\") : r;
     if (n > r) n = r;
-    dstr_putm(sb->d, p, n);
+
+    if (!(f&SF_CASEMASK))
+      /* No case conversion: we can just emit this chunk. */
+
+      dstr_putm(sb->d, p, n);
+
+    else {
+      /* Case conversion to do.  Arrange enough space for the output, and
+       * convert it character by character.
+       */
+
+      dstr_ensure(sb->d, n); q = sb->d->p + sb->d->len; pp = p; ll = p + n;
+      if (f&SF_DOWNCASE) while (pp < ll) *q++ = TOLOWER(*pp++);
+      else if (f&SF_UPCASE) while (pp < ll) *q++ = TOUPPER(*pp++);
+      sb->d->len += n;
+    }
+
+    /* If we've reached the end then stop. */
     if (n >= r) break;
-    dstr_putcn(sb->d, '\\', qfilt); dstr_putc(sb->d, p[n]);
-    p += n + 1;
+
+    /* Otherwise we must have found a character which requires escaping.
+     * Emit enough toothpicks.
+     */
+    dstr_putcn(sb->d, '\\', qfilt);
+
+    /* This character is now done, so we can skip over and see if there's
+     * another chunk of stuff we can do at high speed.
+     */
+    dstr_putc(sb->d, p[n]); p += n + 1;
   }
 }
 
+/* Scan and resolve a `[SECT:]VAR' specifier at P.
+ *
+ * Return the address of the next character following the specifier; and set
+ * *VAR_OUT to point to the variable we found, or null if it's not there.  L
+ * is a limit on the region of the buffer that we should process; SB is the
+ * substitution state which provides the home section if none is given
+ * explicitly; FILE and LINE are the source location to blame for problems.
+ */
 static const char *retrieve_varspec(const char *p, const char *l,
-				    struct subst *sb,
-				    struct config_var **var_out)
+				    const struct subst *sb,
+				    struct config_var **var_out,
+				    const char *file, unsigned line)
 {
   struct config_section *sect = sb->home;
   const char *t;
 
-  t = scan_name(p, l);
+  t = scan_name("section or variable", p, l, file, line);
   if (t < l && *t == ':') {
     sect = config_find_section_n(sb->config, 0, p, t - p);
-    p = t + 1; t = scan_name(p, l);
+    p = t + 1; t = scan_name("variable", p, l, file, line);
   }
 
   if (!sect) *var_out = 0;
@@ -740,16 +1361,16 @@ static const char *retrieve_varspec(const char *p, const char *l,
   return (t);
 }
 
-#define SF_SPLIT 0x0001u
-#define SF_QUOT 0x0002u
-#define SF_SUBST 0x0004u
-#define SF_SUBEXPR 0x0008u
-#define SF_SPANMASK 0x00ffu
-#define SF_WORD 0x0100u
-#define SF_SKIP 0x0200u
-#define SF_LITERAL 0x0400u
-
-static const char *subst(const char *p, const char *l, struct subst *sb,
+/* Substitute and/or word-split text.
+ *
+ * The input text starts at P, and continues to (just before) L.  Context for
+ * the task is provided by SB; the source location to blame is FILE and LINE
+ * (FILE may be null so that this can be passed directly from a `config_var'
+ * without further checking); QFILT is the nesting depth in toothpick-
+ * escaping; and F holds a mask of `SF_...' flags.
+ */
+static const char *subst(const char *p, const char *l,
+			 const struct subst *sb,
 			 const char *file, unsigned line,
 			 unsigned qfilt, unsigned f)
 {
@@ -758,46 +1379,67 @@ static const char *subst(const char *p, const char *l, struct subst *sb,
   unsigned subqfilt, ff;
   size_t n;
 
-#define ESCAPE "\\"
-#define SUBST "$"
-#define WORDSEP " \f\r\n\t\v'\""
-#define QUOT "\""
-#define DELIM "|}"
-
-  static const char *const delimtab[] =
-    { ESCAPE,
-      ESCAPE		 WORDSEP,
-      0,
-      ESCAPE		 QUOT,
-      ESCAPE	   SUBST,
-      ESCAPE       SUBST WORDSEP,
-      0,
-      ESCAPE	   SUBST QUOT,
-      ESCAPE DELIM,
-      ESCAPE DELIM	 WORDSEP,
-      0,
-      ESCAPE DELIM	 QUOT,
-      ESCAPE DELIM SUBST,
-      ESCAPE DELIM SUBST WORDSEP,
-      0,
-      ESCAPE DELIM SUBST QUOT };
+  /* It would be best if we could process literal text at high speed.  To
+   * this end, we have a table, indexed by the low-order bits of F, to tell
+   * us which special characters we need to stop at.  This way, we can use
+   * `strcspn' to skip over literal text and stop at the next character which
+   * needs special handling.  Entries in this table with a null pointer
+   * correspond to impossible flag settings.
+   */
+  static const char *const delimtab[] = {
+
+#define ESCAPE "\\"			/* always watch for `\'-escapes */
+#define SUBST "$"			/* check for `$' if `SF_SUBST' set */
+#define WORDSEP " \f\r\n\t\v'\""	/* space, quotes if `SF_SPLIT' but
+					 * not `SF_QUOT' */
+#define QUOT "\""			/* only quotes if `SF_SPLIT' and
+					 * `SF_QUOT' */
+#define DELIM "|}"			/* end delimiters of `SF_SUBEXPR' */
+
+    ESCAPE,
+    ESCAPE	       WORDSEP,
+    0,
+    ESCAPE	       QUOT,
+    ESCAPE	 SUBST,
+    ESCAPE	 SUBST WORDSEP,
+    0,
+    ESCAPE	 SUBST QUOT,
+    ESCAPE DELIM,
+    ESCAPE DELIM       WORDSEP,
+    0,
+    ESCAPE DELIM       QUOT,
+    ESCAPE DELIM SUBST,
+    ESCAPE DELIM SUBST WORDSEP,
+    0,
+    ESCAPE DELIM SUBST QUOT
 
 #undef COMMON
 #undef WORDSEP
 #undef SQUOT
 #undef DELIM
+  };
 
+  /* Set FILE to be useful if it was null on entry. */
   if (!file) file = "<internal>";
 
+  /* If the text is literal then hand off to `filter_string'.  This obviously
+   * starts a word.
+   */
   if (f&SF_LITERAL) {
-    filter_string(p, l, sb, qfilt);
+    filter_string(p, l, sb, qfilt, f);
     f |= SF_WORD;
     goto done;
   }
 
+  /* Chew through the input until it's all gone. */
   while (p < l) {
 
     if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && ISSPACE(*p)) {
+      /* This is whitespace, we're supposed to split, and we're not within
+       * quotes, so we should split here.
+       */
+
+      /* If there's a word in progress then we should commit it. */
       if (f&SF_WORD) {
 	if (!(f&SF_SKIP)) {
 	  argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
@@ -805,60 +1447,182 @@ static const char *subst(const char *p, const char *l, struct subst *sb,
 	}
 	f &= ~SF_WORD;
       }
+
+      /* Skip over further whitespace at high speed. */
       do p++; while (p < l && ISSPACE(*p));
 
     } else if (*p == '\\') {
-      p++;
-      if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
+      /* This is a toothpick, so start a new word and add the next character
+       * to it.
+       */
+
+      /* If there's no next character then we should be upset. */
+      p++; if (p >= l) lose("%s:%u: unfinished `\\' escape", file, line);
+
       if (!(f&SF_SKIP)) {
+
+	/* If this is a double quote or backslash then check DFLT to see if
+	 * it needs escaping.
+	 */
 	if (qfilt && (*p == '"' || *p == '\\'))
 	  dstr_putcn(sb->d, '\\', qfilt);
-	dstr_putc(sb->d, *p);
+
+	/* Output the character. */
+	if (f&SF_DOWNCASE) dstr_putc(sb->d, TOLOWER(*p));
+	else if (f&SF_UPCASE) dstr_putc(sb->d, TOUPPER(*p));
+	else dstr_putc(sb->d, *p);
       }
-      p++;
+
+      /* Move past the escaped character.  Remember we started a word. */
+      p++; f |= SF_WORD;
 
     } else if ((f&SF_SPLIT) && *p == '"') {
+      /* This is a double quote, and we're word splitting.  We're definitely
+       * in a word now.  Toggle whether we're within quotes.
+       */
+
       f ^= SF_QUOT; f |= SF_WORD; p++;
 
     } else if ((f&(SF_SPLIT | SF_QUOT)) == SF_SPLIT && *p == '\'') {
-      t = strchr(p, '\''); if (!t) lose("%s:%u: missing `''", file, line);
-      if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt);
+      /* This is a single quote, and we're word splitting but not within
+       * double quotes.  Find the matching end quote, and just output
+       * everything between literally.
+       */
+
+      p++; t = strchr(p, '\'');
+      if (!t || t >= l) lose("%s:%u: missing `''", file, line);
+      if (!(f&SF_SKIP)) filter_string(p, t, sb, qfilt, f);
       p = t + 1; f |= SF_WORD;
 
     } else if ((f&SF_SUBEXPR) && (*p == '|' || *p == '}')) {
+      /* This is an end delimiter, and we're supposed to stop here. */
       break;
 
     } else if ((f&SF_SUBST) && *p == '$') {
+      /* This is a `$' and we're supposed to do substitution. */
+
+      /* The kind of substitution is determined by the next character. */
       p++; if (p >= l) lose("%s:%u: incomplete substitution", file, line);
+
+      /* Prepare flags for a recursive substitution.
+       *
+       * Hide our quote state from the recursive call.  If we're within a
+       * word, then disable word-splitting.
+       */
       ff = f&~(SF_QUOT | (f&SF_WORD ? SF_SPLIT : 0));
+
+      /* Now dispatch based on the following character. */
       switch (*p) {
 
 	case '?':
-	  p = retrieve_varspec(p + 1, l, sb, &var);
+	  /* A conditional expression: $?VAR{CONSEQ[|ALT]} */
+
+	  /* Skip initial space. */
+	  p++; while (p < l && ISSPACE(*p)) p++;
+
+	  /* Find the variable. */
+	  p = retrieve_varspec(p, l, sb, &var, file, line);
+
+	  /* Skip whitespace again. */
+	  while (p < l && ISSPACE(*p)) p++;
+
+	  /* Expect the opening `{'. */
 	  if (p > l || *p != '{') lose("%s:%u: expected `{'", file, line);
 	  p++;
+
+	  /* We'll process the parts recursively, but we need to come back
+	   * when we hit the appropriate delimiters, so arrange for that.
+	   */
 	  ff |= SF_SUBEXPR;
+
+	  /* Process the consequent (skip if the variable wasn't found). */
 	  p = subst(p, l, sb, file, line, qfilt,
 		    ff | (var ? 0 : SF_SKIP));
+
+	  /* If there's a `|' then process the alternative too (skip if the
+	   * variable /was/ found).
+	   */
 	  if (p < l && *p == '|')
 	    p = subst(p + 1, l, sb, file, line, qfilt,
 		      ff | (var ? SF_SKIP : 0));
+
+	  /* We should now be past the closing `}'. */
 	  if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
 	  p++;
 	  break;
 
 	case '{':
-	  q0 = p + 1; p = retrieve_varspec(q0, l, sb, &var); q1 = p;
+	  /* A variable substitution: ${VAR[|FILT]...[?ALT]} */
+
+	  /* Skip initial whitespace. */
+	  p++; while (p < l && ISSPACE(*p)) p++;
+
+	  /* Find the variable. */
+	  q0 = p; p = retrieve_varspec(p, l, sb, &var, file, line); q1 = p;
+
+	  /* Determine the filters to apply when substituting the variable
+	   * value.
+	   */
 	  subqfilt = qfilt;
-	  while (p < l) {
-	    if (*p != '|') break;
-	    p++; t = scan_name(p, l);
-	    if (t - p == 1 && *p == 'q') subqfilt = 2*subqfilt + 1;
+	  for (;;) {
+
+	    /* Skip spaces again. */
+	    while (p < l && ISSPACE(*p)) p++;
+
+	    /* If there's no `|' then there are no more filters, so stop. */
+	    if (p >= l || *p != '|') break;
+
+	    /* Skip the `|' and more spaces. */
+	    p++; while (p < l && ISSPACE(*p)) p++;
+
+	    /* Collect the filter name. */
+	    t = scan_name("filter", p, l, file, line);
+
+	    /* Dispatch on the filter name. */
+	    if (t - p == 1 && *p == 'q')
+	      /* `q' -- quote for Lisp string.
+	       *
+	       * We're currently adding Q `\' characters before each naughty
+	       * character.  But a backslash itself is naughty too, so that
+	       * makes Q + 1 naughty characters, each of which needs a
+	       * toothpick, so now we need Q + (Q + 1) = 2 Q + 1 toothpicks.
+	       *
+	       * Calculate this here rather than at each point toothpicks
+	       * need to be deployed.
+	       */
+
+	      subqfilt = 2*subqfilt + 1;
+
+	    else if (t - p == 1 && *p == 'l')
+	      /* `l' -- convert to lowercase.
+	       *
+	       * If a case conversion is already set, then that will override
+	       * whatever we do here, so don't bother.
+	       */
+
+	      { if (!(ff&SF_CASEMASK)) ff |= SF_DOWNCASE; }
+
+	    else if (t - p == 1 && *p == 'u')
+	      /* `u' -- convert to uppercase.
+	       *
+	       * If a case conversion is already set, then that will override
+	       * whatever we do here, so don't bother.
+	       */
+	      { if (!(ff&SF_CASEMASK)) ff |= SF_UPCASE; }
+
 	    else
+	      /* Something else we didn't understand. */
 	      lose("%s:%u: unknown filter `%.*s'",
 		   file, line, (int)(t - p), p);
+
+	    /* Continue from after the filter name. */
 	    p = t;
 	  }
+
+	  /* If we're not skipping, and we found a variable, then substitute
+	   * its value.  This is the point where we need to be careful about
+	   * recursive expansion.
+	   */
 	  if (!(f&SF_SKIP) && var) {
 	    if (var->f&CF_EXPAND)
 	      lose("%s:%u: recursive expansion of variable `%.*s'",
@@ -869,19 +1633,35 @@ static const char *subst(const char *p, const char *l, struct subst *sb,
 		  ff | (var->f&CF_LITERAL ? SF_LITERAL : 0));
 	    var->f &= ~CF_EXPAND;
 	  }
+
+	  /* If there's an alternative, then we need to process (or maybe
+	   * skip) it.  Otherwise, we should complain if there was no
+	   * veriable, and we're not skipping.
+	   */
 	  if (p < l && *p == '?')
 	    p = subst(p + 1, l, sb, file, line, subqfilt,
 		      ff | SF_SUBEXPR | (var ? SF_SKIP : 0));
 	  else if (!var && !(f&SF_SKIP))
 	    lose("%s:%u: unknown variable `%.*s'",
 		 file, line, (int)(q1 - q0), q0);
+
+	  /* Expect a `}' here.  (No need to skip spaces: we already did that
+	   * after scanning for filters, and either there was no alternative,
+	   * or we advanced to a delimiter character anyway.)
+	   */
 	  if (p >= l || *p != '}') lose("%s:%u: missing `}'", file, line);
 	  p++;
 	  break;
 
 	default:
-	  lose("%s:%u: unexpected substitution `%c'", file, line, *p);
+	  /* Something else.  That's a shame. */
+	  lose("%s:%u: unexpected `$'-substitution `%c'", file, line, *p);
       }
+
+      /* Complain if we started out in word-splitting state, and therefore
+       * have added a whole number of words to the output, but there's a
+       * word-fragment stuck onto the end of this substitution.
+       */
       if (p < l && !(~f&~(SF_WORD | SF_SPLIT)) && !ISSPACE(*p) &&
 	  !((f&SF_SUBEXPR) && (*p == '|' || *p == '}')))
 	lose("%s:%u: surprising word boundary "
@@ -890,23 +1670,42 @@ static const char *subst(const char *p, const char *l, struct subst *sb,
     }
 
     else {
+      /* Something else.  Try to skip over this at high speed.
+       *
+       * This makes use of the table we set up earlier.
+       */
+
       n = strcspn(p, delimtab[f&SF_SPANMASK]);
       if (n > l - p) n = l - p;
-      if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt);
+      if (!(f&SF_SKIP)) filter_string(p, p + n, sb, qfilt, f);
       p += n; f |= SF_WORD;
     }
   }
 
 done:
+  /* Sort out the wreckage. */
+
+  /* If we're still within quotes then something has gone wrong. */
   if (f&SF_QUOT) lose("%s:%u: missing `\"'", file, line);
+
+  /* If we're within a word, and should be splitting, then commit the word to
+   * the output list.
+   */
   if ((f&(SF_WORD | SF_SPLIT | SF_SKIP)) == (SF_SPLIT | SF_WORD)) {
     argv_append(sb->av, xstrndup(sb->d->p, sb->d->len));
     dstr_reset(sb->d);
   }
 
+  /* And, with that, we're done. */
   return (p);
 }
 
+/* Expand substitutions in a string.
+ *
+ * Expand the null-terminated string P relative to the HOME section, using
+ * configuration CONFIG, and appending the result to dynamic string D.  Blame
+ * WHAT in any error messages.
+ */
 void config_subst_string(struct config *config, struct config_section *home,
 			 const char *what, const char *p, struct dstr *d)
 {
@@ -917,6 +1716,12 @@ void config_subst_string(struct config *config, struct config_section *home,
   dstr_putz(d);
 }
 
+/* Expand substitutions in a string.
+ *
+ * Expand the null-terminated string P relative to the HOME section, using
+ * configuration CONFIG, returning the result as a freshly malloc(3)ed
+ * string.  Blame WHAT in any error messages.
+ */
 char *config_subst_string_alloc(struct config *config,
 				struct config_section *home,
 				const char *what, const char *p)
@@ -928,6 +1733,11 @@ char *config_subst_string_alloc(struct config *config,
   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
 }
 
+/* Expand substitutions in a variable.
+ *
+ * Expand the value of the variable VAR relative to the HOME section, using
+ * configuration CONFIG, appending the result to dynamic string D.
+ */
 void config_subst_var(struct config *config, struct config_section *home,
 		      struct config_var *var, struct dstr *d)
 {
@@ -941,6 +1751,12 @@ void config_subst_var(struct config *config, struct config_section *home,
   dstr_putz(d);
 }
 
+/* Expand substitutions in a variable.
+ *
+ * Expand the value of the variable VAR relative to the HOME section, using
+ * configuration CONFIG, returning the result as a freshly malloc(3)ed
+ * string.
+ */
 char *config_subst_var_alloc(struct config *config,
 			     struct config_section *home,
 			     struct config_var *var)
@@ -952,6 +1768,12 @@ char *config_subst_var_alloc(struct config *config,
   q = xstrndup(d.p, d.len); dstr_release(&d); return (q);
 }
 
+/* Expand substitutions in a variable and split into words.
+ *
+ * Expand and word-split the value of the variable VAR relative to the HOME
+ * section, using configuration CONFIG, appending the resulting words into
+ * the vector AV.
+ */
 void config_subst_split_var(struct config *config,
 			    struct config_section *home,
 			    struct config_var *var, struct argv *av)