From d7482997dd1ca71b70df43c15dd5956f435a1a7e Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Mon, 5 Aug 2002 10:31:35 +0000
Subject: [PATCH] Rename Buttress to Halibut. I _think_ I've caught everything
 in this pass.

git-svn-id: svn://svn.tartarus.org/sgt/halibut@1800 cda61777-01e9-0310-a592-d414129be87e
---
 LICENCE            |   21 +
 MODULE             |    5 +
 Makefile           |   81 ++
 biblio.c           |  105 +++
 bk_text.c          |  587 ++++++++++++++
 bk_whlp.c          |  639 +++++++++++++++
 bk_xhtml.c         | 1446 ++++++++++++++++++++++++++++++++++
 contents.c         |  222 ++++++
 error.c            |  219 ++++++
 halibut.h          |  416 ++++++++++
 help.c             |   32 +
 index.c            |  230 ++++++
 input.c            | 1164 ++++++++++++++++++++++++++++
 inputs/errors.but  |   51 ++
 inputs/errors2.but |    5 +
 inputs/test.but    |  130 ++++
 keywords.c         |  154 ++++
 licence.c          |   16 +
 main.c             |  300 +++++++
 malloc.c           |  149 ++++
 misc.c             |  312 ++++++++
 misc/halibut.sl    |   98 +++
 misc/logalloc      |   61 ++
 style.c            |    8 +
 tree234.c          | 2193 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tree234.h          |  202 +++++
 ustring.c          |  174 +++++
 version.c          |   13 +
 winhelp.c          | 2127 ++++++++++++++++++++++++++++++++++++++++++++++++++
 winhelp.h          |  168 ++++
 30 files changed, 11328 insertions(+)
 create mode 100644 LICENCE
 create mode 100644 MODULE
 create mode 100644 Makefile
 create mode 100644 biblio.c
 create mode 100644 bk_text.c
 create mode 100644 bk_whlp.c
 create mode 100644 bk_xhtml.c
 create mode 100644 contents.c
 create mode 100644 error.c
 create mode 100644 halibut.h
 create mode 100644 help.c
 create mode 100644 index.c
 create mode 100644 input.c
 create mode 100644 inputs/errors.but
 create mode 100644 inputs/errors2.but
 create mode 100644 inputs/test.but
 create mode 100644 keywords.c
 create mode 100644 licence.c
 create mode 100644 main.c
 create mode 100644 malloc.c
 create mode 100644 misc.c
 create mode 100644 misc/halibut.sl
 create mode 100755 misc/logalloc
 create mode 100644 style.c
 create mode 100644 tree234.c
 create mode 100644 tree234.h
 create mode 100644 ustring.c
 create mode 100644 version.c
 create mode 100644 winhelp.c
 create mode 100644 winhelp.h

diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..b1bfa8b
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,21 @@
+Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation files
+(the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software,
+and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MODULE b/MODULE
new file mode 100644
index 0000000..260f8ab
--- /dev/null
+++ b/MODULE
@@ -0,0 +1,5 @@
+Module: halibut
+Author: Simon Tatham <anakin@pobox.com>
+Description: Halibut is yet another text formatting system, intended primarily for writing software documentation. It accepts a single source format and outputs a variety of formats, planned to include text, HTML, Texinfo, Windows Help, Windows HTMLHelp, PostScript and PDF. It has comprehensive indexing and cross-referencing support, and generates hyperlinks within output documents wherever possible.
+Homepage: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html
+Release: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b82ba23
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,81 @@
+# Halibut master makefile
+
+# Requires a compiler with -MD support, currently
+
+# `make' from top level will build in directory `build'
+# `make BUILDDIR=foo' from top level will build in directory foo
+ifndef REALBUILD
+ifndef BUILDDIR
+ifdef TEST
+BUILDDIR := test
+else
+BUILDDIR := build
+endif
+endif
+all:
+	@test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+	@make -C $(BUILDDIR) -f ../Makefile REALBUILD=yes
+spotless:
+	@test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+	@make -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes
+clean:
+	@test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+	@make -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes
+else
+
+# The `real' makefile part.
+
+CFLAGS += -Wall -W
+
+ifdef TEST
+CFLAGS += -DLOGALLOC
+LIBS += -lefence
+endif
+
+ifdef RELEASE
+ifndef VERSION
+VERSION := $(RELEASE)
+endif
+else
+CFLAGS += -g
+endif
+
+ifndef VER
+ifdef VERSION
+VER := $(VERSION)
+endif
+endif
+ifdef VER
+VDEF := -DVERSION=\"$(VER)\"
+endif
+
+SRC := ../
+
+MODULES := main malloc ustring error help licence version misc tree234
+MODULES += input keywords contents index style biblio
+MODULES += bk_text bk_xhtml bk_whlp
+MODULES += winhelp
+
+OBJECTS := $(addsuffix .o,$(MODULES))
+DEPS := $(addsuffix .d,$(MODULES))
+
+halibut: $(OBJECTS)
+	$(CC) $(LFLAGS) -o halibut $(OBJECTS) $(LIBS)
+
+%.o: $(SRC)%.c
+	$(CC) $(CFLAGS) -MD -c $<
+
+version.o: FORCE
+	$(CC) $(VDEF) -MD -c $(SRC)version.c
+
+spotless:: clean
+	rm -f *.d
+
+clean::
+	rm -f *.o halibut core
+
+FORCE: # phony target to force version.o to be rebuilt every time
+
+-include $(DEPS)
+
+endif
diff --git a/biblio.c b/biblio.c
new file mode 100644
index 0000000..de0b925
--- /dev/null
+++ b/biblio.c
@@ -0,0 +1,105 @@
+/*
+ * biblio.c: process the bibliography
+ */
+
+#include <assert.h>
+#include "halibut.h"
+
+static wchar_t *gentext(int num) {
+    wchar_t text[22];
+    wchar_t *p = text + sizeof(text);
+    *--p = L'\0';
+    *--p = L']';
+    while (num != 0) {
+	assert(p > text);
+	*--p = L"0123456789"[num % 10];
+	num /= 10;
+    }
+    assert(p > text);
+    *--p = L'[';
+    return ustrdup(p);
+}
+
+static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) {
+    keyword *kw = kw_lookup(kl, key);
+    if (!kw)
+	error(err_nosuchkw, &fpos, key);
+    else {
+	/*
+	 * We've found a \k reference. If it's a
+	 * bibliography entry ...
+	 */
+	if (kw->para->type == para_Biblio) {
+	    /*
+	     * ... then mark the paragraph as cited.
+	     */
+	    kw->para->type = para_BiblioCited;
+	}
+    }
+}
+
+/*
+ * Make a pass through the source form, generating citation formats
+ * for bibliography entries and also marking which bibliography
+ * entries are actually cited (or \nocite-ed).
+ */
+
+void gen_citations(paragraph *source, keywordlist *kl) {
+    paragraph *para;
+    int bibnum = 0;
+
+    for (para = source; para; para = para->next) {
+	word *ptr;
+
+	/*
+	 * \BR and \nocite paragraphs get special processing here.
+	 */
+	if (para->type == para_BR) {
+	    keyword *kw = kw_lookup(kl, para->keyword);
+	    if (!kw) {
+		error(err_nosuchkw, &para->fpos, para->keyword);
+	    } else if (kw->text) {
+		error(err_multiBR, &para->fpos, para->keyword);
+	    } else {
+		kw->text = dup_word_list(para->words);
+	    }
+	} else if (para->type == para_NoCite) {
+	    wchar_t *wp = para->keyword;
+	    while (*wp) {
+		cite_biblio(kl, wp, para->fpos);
+		wp = uadv(wp);
+	    }
+	}
+
+	/*
+	 * Scan for keyword references.
+	 */
+	for (ptr = para->words; ptr; ptr = ptr->next) {
+	    if (ptr->type == word_UpperXref ||
+		ptr->type == word_LowerXref)
+		cite_biblio(kl, ptr->text, ptr->fpos);
+	}
+    }
+
+    /*
+     * We're now almost done; all that remains is to scan through
+     * the cited bibliography entries and invent default citation
+     * texts for the ones that don't already have explicitly
+     * provided \BR text.
+     */
+    for (para = source; para; para = para->next) {
+	if (para->type == para_BiblioCited) {
+	    keyword *kw = kw_lookup(kl, para->keyword);
+	    assert(kw != NULL);
+	    if (!kw->text) {
+		word *wd = smalloc(sizeof(word));
+		wd->text = gentext(++bibnum);
+		wd->type = word_Normal;
+		wd->alt = NULL;
+		wd->next = NULL;
+		kw->text = wd;
+	    }
+	    para->kwtext = kw->text;
+	}
+    }
+}
diff --git a/bk_text.c b/bk_text.c
new file mode 100644
index 0000000..1583cbd
--- /dev/null
+++ b/bk_text.c
@@ -0,0 +1,587 @@
+/*
+ * text backend for Halibut
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
+typedef struct {
+    alignment align;
+    int just_numbers;
+    wchar_t underline;
+} alignstruct;
+
+typedef struct {
+    int indent, indent_code;
+    int listindentbefore, listindentafter;
+    int width;
+    alignstruct atitle, achapter, *asect;
+    int nasect;
+    int include_version_id;
+    int indent_preambles;
+    word bullet;
+} textconfig;
+
+static int text_convert(wchar_t *, char **);
+
+static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
+static void text_rule(FILE *, int, int);
+static void text_para(FILE *, word *, char *, word *, int, int, int);
+static void text_codepara(FILE *, word *, int, int);
+static void text_versionid(FILE *, word *);
+
+static alignment utoalign(wchar_t *p) {
+    if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
+	return CENTRE;
+    if (!ustricmp(p, L"leftplus"))
+	return LEFTPLUS;
+    return LEFT;
+}
+
+static textconfig text_configure(paragraph *source) {
+    textconfig ret;
+
+    /*
+     * Non-negotiables.
+     */
+    ret.bullet.next = NULL;
+    ret.bullet.alt = NULL;
+    ret.bullet.type = word_Normal;
+    ret.atitle.just_numbers = FALSE;   /* ignored */
+
+    /*
+     * Defaults.
+     */
+    ret.indent = 7;
+    ret.indent_code = 2;
+    ret.listindentbefore = 1;
+    ret.listindentafter = 3;
+    ret.width = 68;
+    ret.atitle.align = CENTRE;
+    ret.atitle.underline = L'=';
+    ret.achapter.align = LEFT;
+    ret.achapter.just_numbers = FALSE;
+    ret.achapter.underline = L'-';
+    ret.nasect = 1;
+    ret.asect = mknewa(alignstruct, ret.nasect);
+    ret.asect[0].align = LEFTPLUS;
+    ret.asect[0].just_numbers = TRUE;
+    ret.asect[0].underline = L'\0';
+    ret.include_version_id = TRUE;
+    ret.indent_preambles = FALSE;
+    ret.bullet.text = ustrdup(L"-");
+
+    for (; source; source = source->next) {
+	if (source->type == para_Config) {
+	    if (!ustricmp(source->keyword, L"text-indent")) {
+		ret.indent = utoi(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-indent-code")) {
+		ret.indent_code = utoi(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-width")) {
+		ret.width = utoi(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-list-indent")) {
+		ret.listindentbefore = utoi(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
+		ret.listindentafter = utoi(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
+		ret.achapter.align = utoalign(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
+		ret.achapter.underline = *uadv(source->keyword);
+	    } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
+		ret.achapter.underline = utob(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-section-align")) {
+		wchar_t *p = uadv(source->keyword);
+		int n = 0;
+		if (uisdigit(*p)) {
+		    n = utoi(p);
+		    p = uadv(p);
+		}
+		if (n >= ret.nasect) {
+		    int i;
+		    ret.asect = resize(ret.asect, n+1);
+		    for (i = ret.nasect; i <= n; i++)
+			ret.asect[i] = ret.asect[ret.nasect-1];
+		    ret.nasect = n+1;
+		}
+		ret.asect[n].align = utoalign(p);
+	    } else if (!ustricmp(source->keyword, L"text-section-underline")) {
+		wchar_t *p = uadv(source->keyword);
+		int n = 0;
+		if (uisdigit(*p)) {
+		    n = utoi(p);
+		    p = uadv(p);
+		}
+		if (n >= ret.nasect) {
+		    int i;
+		    ret.asect = resize(ret.asect, n+1);
+		    for (i = ret.nasect; i <= n; i++)
+			ret.asect[i] = ret.asect[ret.nasect-1];
+		    ret.nasect = n+1;
+		}
+		ret.asect[n].underline = *p;
+	    } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
+		wchar_t *p = uadv(source->keyword);
+		int n = 0;
+		if (uisdigit(*p)) {
+		    n = utoi(p);
+		    p = uadv(p);
+		}
+		if (n >= ret.nasect) {
+		    int i;
+		    ret.asect = resize(ret.asect, n+1);
+		    for (i = ret.nasect; i <= n; i++)
+			ret.asect[i] = ret.asect[ret.nasect-1];
+		    ret.nasect = n+1;
+		}
+		ret.asect[n].just_numbers = utob(p);
+	    } else if (!ustricmp(source->keyword, L"text-title-align")) {
+		ret.atitle.align = utoalign(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-title-underline")) {
+		ret.atitle.underline = *uadv(source->keyword);
+	    } else if (!ustricmp(source->keyword, L"text-versionid")) {
+		ret.include_version_id = utob(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
+		ret.indent_preambles = utob(uadv(source->keyword));
+	    } else if (!ustricmp(source->keyword, L"text-bullet")) {
+		ret.bullet.text = uadv(source->keyword);
+	    }
+	}
+    }
+
+    return ret;
+}
+
+void text_backend(paragraph *sourceform, keywordlist *keywords,
+		  indexdata *idx) {
+    paragraph *p;
+    textconfig conf;
+    word *prefix, *body, *wp;
+    word spaceword;
+    FILE *fp;
+    char *prefixextra;
+    int indentb, indenta;
+
+    IGNORE(keywords);		       /* we don't happen to need this */
+    IGNORE(idx);		       /* or this */
+
+    conf = text_configure(sourceform);
+
+    /*
+     * Determine the output file name, and open the output file
+     *
+     * FIXME: want configurable output file names here. For the
+     * moment, we'll just call it `output.txt'.
+     */
+    fp = fopen("output.txt", "w");
+    if (!fp) {
+	error(err_cantopenw, "output.txt");
+	return;
+    }
+
+    /* Do the title */
+    for (p = sourceform; p; p = p->next)
+	if (p->type == para_Title)
+	    text_heading(fp, NULL, NULL, p->words,
+			 conf.atitle, conf.indent, conf.width);
+
+    /* Do the preamble and copyright */
+    for (p = sourceform; p; p = p->next)
+	if (p->type == para_Preamble)
+	    text_para(fp, NULL, NULL, p->words,
+		      conf.indent_preambles ? conf.indent : 0, 0,
+		      conf.width + (conf.indent_preambles ? 0 : conf.indent));
+    for (p = sourceform; p; p = p->next)
+	if (p->type == para_Copyright)
+	    text_para(fp, NULL, NULL, p->words,
+		      conf.indent_preambles ? conf.indent : 0, 0,
+		      conf.width + (conf.indent_preambles ? 0 : conf.indent));
+
+    /* Do the main document */
+    for (p = sourceform; p; p = p->next) switch (p->type) {
+
+	/*
+	 * Things we ignore because we've already processed them or
+	 * aren't going to touch them in this pass.
+	 */
+      case para_IM:
+      case para_BR:
+      case para_Biblio:		       /* only touch BiblioCited */
+      case para_VersionID:
+      case para_Copyright:
+      case para_Preamble:
+      case para_NoCite:
+      case para_Title:
+	break;
+
+	/*
+	 * Chapter titles.
+	 */
+      case para_Chapter:
+      case para_Appendix:
+      case para_UnnumberedChapter:
+	text_heading(fp, p->kwtext, p->kwtext2, p->words,
+		     conf.achapter, conf.indent, conf.width);
+	break;
+
+      case para_Heading:
+      case para_Subsect:
+	text_heading(fp, p->kwtext, p->kwtext2, p->words,
+		     conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
+		     conf.indent, conf.width);
+	break;
+
+      case para_Rule:
+	text_rule(fp, conf.indent, conf.width);
+	break;
+
+      case para_Normal:
+      case para_BiblioCited:
+      case para_Bullet:
+      case para_NumberedList:
+	if (p->type == para_Bullet) {
+	    prefix = &conf.bullet;
+	    prefixextra = NULL;
+	    indentb = conf.listindentbefore;
+	    indenta = conf.listindentafter;
+	} else if (p->type == para_NumberedList) {
+	    prefix = p->kwtext;
+	    prefixextra = ".";	       /* FIXME: configurability */
+	    indentb = conf.listindentbefore;
+	    indenta = conf.listindentafter;
+	} else {
+	    prefix = NULL;
+	    prefixextra = NULL;
+	    indentb = indenta = 0;
+	}
+	if (p->type == para_BiblioCited) {
+	    body = dup_word_list(p->kwtext);
+	    for (wp = body; wp->next; wp = wp->next);
+	    wp->next = &spaceword;
+	    spaceword.next = p->words;
+	    spaceword.alt = NULL;
+	    spaceword.type = word_WhiteSpace;
+	    spaceword.text = NULL;
+	} else {
+	    wp = NULL;
+	    body = p->words;
+	}
+	text_para(fp, prefix, prefixextra, body,
+		  conf.indent + indentb, indenta, conf.width);
+	if (wp) {
+	    wp->next = NULL;
+	    free_word_list(body);
+	}
+	break;
+
+      case para_Code:
+	text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code);
+	break;
+    }
+
+    /* Do the version ID */
+    if (conf.include_version_id) {
+	for (p = sourceform; p; p = p->next)
+	    if (p->type == para_VersionID)
+ 		text_versionid(fp, p->words);
+    }
+
+    /*
+     * Tidy up
+     */
+    fclose(fp);
+    sfree(conf.bullet.text);
+}
+
+/*
+ * Convert a wide string into a string of chars. If `result' is
+ * non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible for the output character
+ * set.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int text_convert(wchar_t *s, char **result) {
+    /*
+     * FIXME. Currently this is ISO8859-1 only.
+     */
+    int doing = (result != 0);
+    int ok = TRUE;
+    char *p = NULL;
+    int plen = 0, psize = 0;
+
+    for (; *s; s++) {
+	wchar_t c = *s;
+	char outc;
+
+	if ((c >= 32 && c <= 126) ||
+	    (c >= 160 && c <= 255)) {
+	    /* Char is OK. */
+	    outc = (char)c;
+	} else {
+	    /* Char is not OK. */
+	    ok = FALSE;
+	    outc = 0xBF;	       /* approximate the good old DEC `uh?' */
+	}
+	if (doing) {
+	    if (plen >= psize) {
+		psize = plen + 256;
+		p = resize(p, psize);
+	    }
+	    p[plen++] = outc;
+	}
+    }
+    if (doing) {
+	p = resize(p, plen+1);
+	p[plen] = '\0';
+	*result = p;
+    }
+    return ok;
+}
+
+static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
+    char *c;
+
+    for (; text && text != end; text = text->next) switch (text->type) {
+      case word_HyperLink:
+      case word_HyperEnd:
+      case word_UpperXref:
+      case word_LowerXref:
+      case word_XrefEnd:
+      case word_IndexRef:
+	break;
+
+      case word_Normal:
+      case word_Emph:
+      case word_Code:
+      case word_WeakCode:
+      case word_WhiteSpace:
+      case word_EmphSpace:
+      case word_CodeSpace:
+      case word_WkCodeSpace:
+      case word_Quote:
+      case word_EmphQuote:
+      case word_CodeQuote:
+      case word_WkCodeQuote:
+	assert(text->type != word_CodeQuote &&
+	       text->type != word_WkCodeQuote);
+	if (towordstyle(text->type) == word_Emph &&
+	    (attraux(text->aux) == attr_First ||
+	     attraux(text->aux) == attr_Only))
+	    rdaddc(rs, '_');	       /* FIXME: configurability */
+	else if (towordstyle(text->type) == word_Code &&
+		 (attraux(text->aux) == attr_First ||
+		  attraux(text->aux) == attr_Only))
+	    rdaddc(rs, '`');	       /* FIXME: configurability */
+	if (removeattr(text->type) == word_Normal) {
+	    if (text_convert(text->text, &c))
+		rdaddsc(rs, c);
+	    else
+		text_rdaddwc(rs, text->alt, NULL);
+	    sfree(c);
+	} else if (removeattr(text->type) == word_WhiteSpace) {
+	    rdaddc(rs, ' ');
+	} else if (removeattr(text->type) == word_Quote) {
+	    rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
+				       /* FIXME: configurability */
+	}
+	if (towordstyle(text->type) == word_Emph &&
+	    (attraux(text->aux) == attr_Last ||
+	     attraux(text->aux) == attr_Only))
+	    rdaddc(rs, '_');	       /* FIXME: configurability */
+	else if (towordstyle(text->type) == word_Code &&
+		 (attraux(text->aux) == attr_Last ||
+		  attraux(text->aux) == attr_Only))
+	    rdaddc(rs, '\'');	       /* FIXME: configurability */
+	break;
+    }
+}
+
+static int text_width(word *);
+
+static int text_width_list(word *text) {
+    int w = 0;
+    while (text) {
+	w += text_width(text);
+	text = text->next;
+    }
+    return w;
+}
+
+static int text_width(word *text) {
+    switch (text->type) {
+      case word_HyperLink:
+      case word_HyperEnd:
+      case word_UpperXref:
+      case word_LowerXref:
+      case word_XrefEnd:
+      case word_IndexRef:
+	return 0;
+
+      case word_Normal:
+      case word_Emph:
+      case word_Code:
+      case word_WeakCode:
+	return (((text->type == word_Emph ||
+		  text->type == word_Code)
+		 ? (attraux(text->aux) == attr_Only ? 2 :
+		    attraux(text->aux) == attr_Always ? 0 : 1)
+		 : 0) +
+		(text_convert(text->text, NULL) ?
+		 ustrlen(text->text) :
+		 text_width_list(text->alt)));
+
+      case word_WhiteSpace:
+      case word_EmphSpace:
+      case word_CodeSpace:
+      case word_WkCodeSpace:
+      case word_Quote:
+      case word_EmphQuote:
+      case word_CodeQuote:
+      case word_WkCodeQuote:
+	assert(text->type != word_CodeQuote &&
+	       text->type != word_WkCodeQuote);
+	return (((towordstyle(text->type) == word_Emph ||
+		  towordstyle(text->type) == word_Code)
+		 ? (attraux(text->aux) == attr_Only ? 2 :
+		    attraux(text->aux) == attr_Always ? 0 : 1)
+		 : 0) + 1);
+    }
+    return 0;			       /* should never happen */
+}
+
+static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
+			 alignstruct align, int indent, int width) {
+    rdstringc t = { 0, 0, NULL };
+    int margin, length;
+    int firstlinewidth, wrapwidth;
+    wrappedline *wrapping, *p;
+
+    if (align.just_numbers && nprefix) {
+	text_rdaddwc(&t, nprefix, NULL);
+	rdaddc(&t, ' ');	       /* FIXME: as below */
+    } else if (!align.just_numbers && tprefix) {
+	text_rdaddwc(&t, tprefix, NULL);
+	rdaddsc(&t, ": ");	       /* FIXME: configurability */
+    }
+    margin = length = (t.text ? strlen(t.text) : 0);
+
+    if (align.align == LEFTPLUS) {
+	margin = indent - margin;
+	if (margin < 0) margin = 0;
+	firstlinewidth = indent + width - margin - length;
+	wrapwidth = width;
+    } else if (align.align == LEFT || align.align == CENTRE) {
+	margin = 0;
+	firstlinewidth = indent + width - length;
+	wrapwidth = indent + width;
+    }
+
+    wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
+    for (p = wrapping; p; p = p->next) {
+	text_rdaddwc(&t, p->begin, p->end);
+	length = (t.text ? strlen(t.text) : 0);
+	if (align.align == CENTRE) {
+	    margin = (indent + width - length)/2;
+	    if (margin < 0) margin = 0;
+	}
+	fprintf(fp, "%*s%s\n", margin, "", t.text);
+	if (align.underline != L'\0') {
+	    char *u, uc;
+	    wchar_t uw[2];
+	    uw[0] = align.underline; uw[1] = L'\0';
+	    text_convert(uw, &u);
+	    uc = u[0];
+	    sfree(u);
+	    fprintf(fp, "%*s", margin, "");
+	    while (length--)
+		putc(uc, fp);
+	    putc('\n', fp);
+	}
+	if (align.align == LEFTPLUS)
+	    margin = indent;
+	else
+	    margin = 0;
+	sfree(t.text);
+	t = empty_rdstringc;
+    }
+    wrap_free(wrapping);
+    putc('\n', fp);
+
+    sfree(t.text);
+}
+
+static void text_rule(FILE *fp, int indent, int width) {
+    while (indent--) putc(' ', fp);
+    while (width--) putc('-', fp);     /* FIXME: configurability! */
+    putc('\n', fp);
+    putc('\n', fp);
+}
+
+static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
+		      int indent, int extraindent, int width) {
+    wrappedline *wrapping, *p;
+    rdstringc pfx = { 0, 0, NULL };
+    int e;
+    int firstlinewidth = width;
+
+    if (prefix) {
+	text_rdaddwc(&pfx, prefix, NULL);
+	if (prefixextra)
+	    rdaddsc(&pfx, prefixextra);
+	fprintf(fp, "%*s%s", indent, "", pfx.text);
+	e = extraindent - strlen(pfx.text);
+	if (e < 0) {
+	    e = 0;
+	    firstlinewidth -= e;
+	    if (firstlinewidth < 0) {
+		e = indent + extraindent;
+		firstlinewidth = width;
+		fprintf(fp, "\n");
+	    }
+	}
+	sfree(pfx.text);
+    } else
+	e = indent + extraindent;
+
+    wrapping = wrap_para(text, firstlinewidth, width, text_width);
+    for (p = wrapping; p; p = p->next) {
+	rdstringc t = { 0, 0, NULL };
+	text_rdaddwc(&t, p->begin, p->end);
+	fprintf(fp, "%*s%s\n", e, "", t.text);
+	e = indent + extraindent;
+	sfree(t.text);
+    }
+    wrap_free(wrapping);
+    putc('\n', fp);
+}
+
+static void text_codepara(FILE *fp, word *text, int indent, int width) {
+    for (; text; text = text->next) if (text->type == word_WeakCode) {
+	char *c;
+	text_convert(text->text, &c);
+	if (strlen(c) > (size_t)width) {
+	    /* FIXME: warn */
+	}
+	fprintf(fp, "%*s%s\n", indent, "", c);
+	sfree(c);
+    }
+
+    putc('\n', fp);
+}
+
+static void text_versionid(FILE *fp, word *text) {
+    rdstringc t = { 0, 0, NULL };
+
+    rdaddc(&t, '[');		       /* FIXME: configurability */
+    text_rdaddwc(&t, text, NULL);
+    rdaddc(&t, ']');		       /* FIXME: configurability */
+
+    fprintf(fp, "%s\n", t.text);
+    sfree(t.text);
+}
diff --git a/bk_whlp.c b/bk_whlp.c
new file mode 100644
index 0000000..a8cb99e
--- /dev/null
+++ b/bk_whlp.c
@@ -0,0 +1,639 @@
+/*
+ * Windows Help backend for Halibut
+ * 
+ * TODO:
+ *  - allow user to specify section contexts.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "halibut.h"
+#include "winhelp.h"
+
+struct bk_whlp_state {
+    WHLP h;
+    indexdata *idx;
+    keywordlist *keywords;
+    WHLP_TOPIC curr_topic;
+    FILE *cntfp;
+    int cnt_last_level, cnt_workaround;
+};
+
+/*
+ * Indexes of fonts in our standard font descriptor set.
+ */
+enum {
+    FONT_NORMAL,
+    FONT_EMPH,
+    FONT_CODE,
+    FONT_TITLE,
+    FONT_TITLE_EMPH,
+    FONT_TITLE_CODE,
+    FONT_RULE
+};
+
+static void whlp_rdaddwc(rdstringc *rs, word *text);
+static int whlp_convert(wchar_t *s, char **result, int hard_spaces);
+static void whlp_mkparagraph(struct bk_whlp_state *state,
+			     int font, word *text, int subsidiary);
+static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p);
+static void whlp_contents_write(struct bk_whlp_state *state,
+				int level, char *text, WHLP_TOPIC topic);
+    
+void whlp_backend(paragraph *sourceform, keywordlist *keywords,
+		  indexdata *idx) {
+    WHLP h;
+    char *filename, *cntname;
+    paragraph *p, *lastsect;
+    struct bk_whlp_state state;
+    WHLP_TOPIC contents_topic;
+    int i;
+    indexentry *ie;
+
+    filename = "output.hlp";	       /* FIXME: configurability */
+    cntname = "output.cnt";	       /* corresponding contents file */
+
+    state.cntfp = fopen(cntname, "wb");
+    state.cnt_last_level = -1; state.cnt_workaround = 0;
+
+    h = state.h = whlp_new();
+    state.keywords = keywords;
+    state.idx = idx;
+
+    whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")");
+    whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")");
+    whlp_start_macro(h, "BrowseButtons()");
+
+    whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+		     0, 0, 0, 0);
+    whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+		     WHLP_FONT_ITALIC, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+		     0, 0, 0, 0);
+    whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
+		     WHLP_FONT_BOLD, 0, 0, 0);
+    whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
+		     WHLP_FONT_BOLD|WHLP_FONT_ITALIC, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 30,
+		     WHLP_FONT_BOLD, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_SANS, 18,
+		     WHLP_FONT_STRIKEOUT, 0, 0, 0);
+
+    /*
+     * Loop over the source form finding out whether the user has
+     * specified particular help topic names for anything.
+     */
+    for (p = sourceform; p; p = p->next) {
+	p->private_data = NULL;
+	if (p->type == para_Config && p->parent) {
+	    if (!ustricmp(p->keyword, L"winhelp-topic")) {
+		char *topicname;
+		whlp_convert(uadv(p->keyword), &topicname, 0);
+		/* Store the topic name in the private_data field of the
+		 * containing section. */
+		p->parent->private_data = topicname;
+	    }
+	}
+    }
+
+    /*
+     * Loop over the source form registering WHLP_TOPICs for
+     * everything.
+     */
+
+    contents_topic = whlp_register_topic(h, "Top", NULL);
+    whlp_primary_topic(h, contents_topic);
+    for (p = sourceform; p; p = p->next) {
+	if (p->type == para_Chapter ||
+	    p->type == para_Appendix ||
+	    p->type == para_UnnumberedChapter ||
+	    p->type == para_Heading ||
+	    p->type == para_Subsect) {
+	    char *topicid = p->private_data;
+	    char *errstr;
+
+	    p->private_data = whlp_register_topic(h, topicid, &errstr);
+	    if (!p->private_data) {
+		p->private_data = whlp_register_topic(h, NULL, NULL);
+		error(err_winhelp_ctxclash, &p->fpos, topicid, errstr);
+	    }
+	    sfree(topicid);
+	}
+    }
+
+    /*
+     * Loop over the index entries, preparing final text forms for
+     * each one.
+     */
+    for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
+	rdstringc rs = {0, 0, NULL};
+	whlp_rdaddwc(&rs, ie->text);
+	ie->backend_data = rs.text;
+    }
+
+    whlp_prepare(h);
+
+    /* ------------------------------------------------------------------
+     * Do the contents page, containing title, preamble and
+     * copyright.
+     */
+
+    whlp_begin_topic(h, contents_topic, "Contents", "DB(\"btn_up\")", NULL);
+
+    /*
+     * The manual title goes in the non-scroll region, and also
+     * goes into the system title slot.
+     */
+    {
+	rdstringc rs = {0, 0, NULL};
+	for (p = sourceform; p; p = p->next) {
+	    if (p->type == para_Title) {
+		whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+		whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
+		whlp_rdaddwc(&rs, p->words);
+		whlp_end_para(h);
+	    }
+	}
+	if (rs.text) {
+	    whlp_title(h, rs.text);
+	    fprintf(state.cntfp, ":Title %s\r\n", rs.text);
+	    sfree(rs.text);
+	}
+	whlp_contents_write(&state, 1, "Title page", contents_topic);
+	/* FIXME: configurability in that string */
+    }
+
+    /*
+     * Next comes the preamble, which just goes into the ordinary
+     * scrolling region.
+     */
+    for (p = sourceform; p; p = p->next) {
+	if (p->type == para_Preamble) {
+	    whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+	    whlp_begin_para(h, WHLP_PARA_SCROLL);
+	    whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+	    whlp_end_para(h);
+	}
+    }
+
+    /*
+     * The copyright goes to two places, again: into the contents
+     * page and also into the system section.
+     */
+    {
+	rdstringc rs = {0, 0, NULL};
+	for (p = sourceform; p; p = p->next) {
+	    if (p->type == para_Copyright) {
+		whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+		whlp_begin_para(h, WHLP_PARA_SCROLL);
+		whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+		whlp_end_para(h);
+		whlp_rdaddwc(&rs, p->words);
+	    }
+	}
+	if (rs.text) {
+	    whlp_copyright(h, rs.text);
+	    sfree(rs.text);
+	}
+    }
+
+    /*
+     * Now do the primary navigation menu.
+     */
+    for (p = sourceform; p; p = p->next) {
+	if (p->type == para_Chapter ||
+	    p->type == para_Appendix ||
+	    p->type == para_UnnumberedChapter)
+	    whlp_navmenu(&state, p);
+    }
+
+    state.curr_topic = contents_topic;
+    lastsect = NULL;
+
+    /* ------------------------------------------------------------------
+     * Now we've done the contents page, we're ready to go through
+     * and do the main manual text. Ooh.
+     */
+    for (p = sourceform; p; p = p->next) switch (p->type) {
+	/*
+	 * Things we ignore because we've already processed them or
+	 * aren't going to touch them in this pass.
+	 */
+      case para_IM:
+      case para_BR:
+      case para_Biblio:		       /* only touch BiblioCited */
+      case para_VersionID:
+      case para_Copyright:
+      case para_Preamble:
+      case para_NoCite:
+      case para_Title:
+	break;
+
+	/*
+	 * Chapter and section titles: start a new Help topic.
+	 */
+      case para_Chapter:
+      case para_Appendix:
+      case para_UnnumberedChapter:
+      case para_Heading:
+      case para_Subsect:
+	if (lastsect && lastsect->child) {
+	    paragraph *q;
+	    /*
+	     * Do a navigation menu for the previous section we
+	     * were in.
+	     */
+	    for (q = lastsect->child; q; q = q->sibling)
+		whlp_navmenu(&state, q);
+	}
+	{
+	    rdstringc rs = {0, 0, NULL};
+	    WHLP_TOPIC new_topic, parent_topic;
+	    char *macro, *topicid;
+
+	    new_topic = p->private_data;
+	    whlp_browse_link(h, state.curr_topic, new_topic);
+	    state.curr_topic = new_topic;
+
+	    if (p->kwtext) {
+		whlp_rdaddwc(&rs, p->kwtext);
+		rdaddsc(&rs, ": ");    /* FIXME: configurability */
+	    }
+	    whlp_rdaddwc(&rs, p->words);
+	    if (p->parent == NULL)
+		parent_topic = contents_topic;
+	    else
+		parent_topic = (WHLP_TOPIC)p->parent->private_data;
+	    topicid = whlp_topic_id(parent_topic);
+	    macro = smalloc(100+strlen(topicid));
+	    sprintf(macro,
+		    "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+		    topicid);
+	    whlp_begin_topic(h, new_topic,
+			     rs.text ? rs.text : "",
+			     macro, NULL);
+	    sfree(macro);
+
+	    {
+		/*
+		 * Output the .cnt entry.
+		 * 
+		 * WinHelp has a bug involving having an internal
+		 * node followed by a leaf at the same level: the
+		 * leaf is output at the wrong level. We can mostly
+		 * work around this by modifying the leaf level
+		 * itself (see whlp_contents_write), but this
+		 * doesn't work for top-level sections since we
+		 * can't turn a level-1 leaf into a level-0 one. So
+		 * for top-level leaf sections (Bibliography
+		 * springs to mind), we output an internal node
+		 * containing only the leaf for that section.
+		 */
+		int i;
+		paragraph *q;
+
+		/* Count up the level. */
+		i = 1;
+		for (q = p; q->parent; q = q->parent) i++;
+
+		if (p->child || !p->parent) {
+		    /*
+		     * If p has children then it needs to be a
+		     * folder; if it has no parent then it needs to
+		     * be a folder to work around the bug.
+		     */
+		    whlp_contents_write(&state, i, rs.text, NULL);
+		    i++;
+		}
+		whlp_contents_write(&state, i, rs.text, new_topic);
+	    }
+
+	    sfree(rs.text);
+
+	    whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+	    if (p->kwtext) {
+		whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE);
+		whlp_set_font(h, FONT_TITLE);
+		whlp_text(h, ": ");    /* FIXME: configurability */
+	    }
+	    whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
+	    whlp_end_para(h);
+
+	    lastsect = p;
+	}
+	break;
+
+      case para_Rule:
+	whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+	whlp_para_attr(h, WHLP_PARA_ALIGNMENT, WHLP_ALIGN_CENTRE);
+	whlp_begin_para(h, WHLP_PARA_SCROLL);
+	whlp_set_font(h, FONT_RULE);
+#define TEN "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0"
+#define TWENTY TEN TEN
+#define FORTY TWENTY TWENTY
+#define EIGHTY FORTY FORTY
+	whlp_text(h, EIGHTY);
+#undef TEN
+#undef TWENTY
+#undef FORTY
+#undef EIGHTY
+	whlp_end_para(h);
+	break;
+
+      case para_Normal:
+      case para_BiblioCited:
+      case para_Bullet:
+      case para_NumberedList:
+	whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+	if (p->type == para_Bullet || p->type == para_NumberedList) {
+	    whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72);
+	    whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
+	    whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT);
+	    whlp_begin_para(h, WHLP_PARA_SCROLL);
+	    whlp_set_font(h, FONT_NORMAL);
+	    if (p->type == para_Bullet) {
+		whlp_text(h, "\x95");
+	    } else {
+		whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
+		whlp_text(h, ".");
+	    }
+	    whlp_tab(h);
+	} else {
+	    whlp_begin_para(h, WHLP_PARA_SCROLL);
+	}
+
+	if (p->type == para_BiblioCited) {
+	    whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
+	    whlp_text(h, " ");
+	}
+
+	whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+	whlp_end_para(h);
+	break;
+
+      case para_Code:
+	/*
+	 * In a code paragraph, each individual word is a line. For
+	 * Help files, we will have to output this as a set of
+	 * paragraphs, all but the last of which don't set
+	 * SPACEBELOW.
+	 */
+	{
+	    word *w;
+	    char *c;
+	    for (w = p->words; w; w = w->next) {
+		if (!w->next)
+		    whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+		whlp_begin_para(h, WHLP_PARA_SCROLL);
+		whlp_set_font(h, FONT_CODE);
+		whlp_convert(w->text, &c, FALSE);
+		whlp_text(h, c);
+		sfree(c);
+		whlp_end_para(h);
+	    }
+	}
+	break;
+    }
+
+    fclose(state.cntfp);
+    whlp_close(h, filename);
+
+    /*
+     * Loop over the index entries, cleaning up our final text
+     * forms.
+     */
+    for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
+	sfree(ie->backend_data);
+    }
+}
+
+static void whlp_contents_write(struct bk_whlp_state *state,
+				int level, char *text, WHLP_TOPIC topic) {
+    /*
+     * Horrifying bug in WinHelp. When dropping a section level or
+     * more without using a folder-type entry, WinHelp accidentally
+     * adds one to the section level. So we correct for that here.
+     */
+    if (state->cnt_last_level > level && topic)
+	state->cnt_workaround = -1;
+    else if (!topic)
+	state->cnt_workaround = 0;
+    state->cnt_last_level = level;
+
+    fprintf(state->cntfp, "%d ", level + state->cnt_workaround);
+    while (*text) {
+	if (*text == '=')
+	    fputc('\\', state->cntfp);
+	fputc(*text, state->cntfp);
+	text++;
+    }
+    if (topic)
+	fprintf(state->cntfp, "=%s", whlp_topic_id(topic));
+    fputc('\n', state->cntfp);
+}
+
+static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p) {
+    whlp_begin_para(state->h, WHLP_PARA_NONSCROLL);
+    whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data);
+    if (p->kwtext) {
+	whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE);
+	whlp_set_font(state->h, FONT_NORMAL);
+	whlp_text(state->h, ": ");    /* FIXME: configurability */
+    }
+    whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE);
+    whlp_end_hyperlink(state->h);
+    whlp_end_para(state->h);
+
+}
+
+static void whlp_mkparagraph(struct bk_whlp_state *state,
+			     int font, word *text, int subsidiary) {
+    keyword *kwl;
+    int deffont = font;
+    int currfont = -1;
+    int newfont;
+    char *c;
+    paragraph *xref_target = NULL;
+
+    for (; text; text = text->next) switch (text->type) {
+      case word_HyperLink:
+      case word_HyperEnd:
+	break;
+
+      case word_IndexRef:
+	if (subsidiary) break;	       /* disabled in subsidiary bits */
+	{
+	    indextag *tag = index_findtag(state->idx, text->text);
+	    int i;
+	    if (!tag)
+		break;
+	    for (i = 0; i < tag->nrefs; i++)
+		whlp_index_term(state->h, tag->refs[i]->backend_data,
+				state->curr_topic);
+	}
+	break;
+
+      case word_UpperXref:
+      case word_LowerXref:
+	if (subsidiary) break;	       /* disabled in subsidiary bits */
+        kwl = kw_lookup(state->keywords, text->text);
+	assert(xref_target == NULL);
+	if (kwl->para->type == para_NumberedList) {
+	    break;		       /* don't xref to numbered list items */
+	} else if (kwl->para->type == para_BiblioCited) {
+	    /*
+	     * An xref to a bibliography item jumps to the section
+	     * containing it.
+	     */
+	    if (kwl->para->parent)
+		xref_target = kwl->para->parent;
+	    else
+		break;
+	} else {
+	    xref_target = kwl->para;
+	}
+	whlp_start_hyperlink(state->h, (WHLP_TOPIC)xref_target->private_data);
+	break;
+
+      case word_XrefEnd:
+	if (subsidiary) break;	       /* disabled in subsidiary bits */
+	if (xref_target)
+	    whlp_end_hyperlink(state->h);
+	xref_target = NULL;
+	break;
+	
+      case word_Normal:
+      case word_Emph:
+      case word_Code:
+      case word_WeakCode:
+      case word_WhiteSpace:
+      case word_EmphSpace:
+      case word_CodeSpace:
+      case word_WkCodeSpace:
+      case word_Quote:
+      case word_EmphQuote:
+      case word_CodeQuote:
+      case word_WkCodeQuote:
+	if (towordstyle(text->type) == word_Emph)
+	    newfont = deffont + FONT_EMPH;
+	else if (towordstyle(text->type) == word_Code ||
+		 towordstyle(text->type) == word_WeakCode)
+	    newfont = deffont + FONT_CODE;
+	else
+	    newfont = deffont;
+	if (newfont != currfont) {
+	    currfont = newfont;
+	    whlp_set_font(state->h, newfont);
+	}
+	if (removeattr(text->type) == word_Normal) {
+	    if (whlp_convert(text->text, &c, TRUE))
+		whlp_text(state->h, c);
+	    else
+		whlp_mkparagraph(state, deffont, text->alt, FALSE);
+	    sfree(c);
+	} else if (removeattr(text->type) == word_WhiteSpace) {
+	    whlp_text(state->h, " ");
+	} else if (removeattr(text->type) == word_Quote) {
+	    whlp_text(state->h,
+		      quoteaux(text->aux) == quote_Open ? "\x91" : "\x92");
+				       /* FIXME: configurability */
+	}
+	break;
+    }
+}
+
+static void whlp_rdaddwc(rdstringc *rs, word *text) {
+    char *c;
+
+    for (; text; text = text->next) switch (text->type) {
+      case word_HyperLink:
+      case word_HyperEnd:
+      case word_UpperXref:
+      case word_LowerXref:
+      case word_XrefEnd:
+      case word_IndexRef:
+	break;
+
+      case word_Normal:
+      case word_Emph:
+      case word_Code:
+      case word_WeakCode:
+      case word_WhiteSpace:
+      case word_EmphSpace:
+      case word_CodeSpace:
+      case word_WkCodeSpace:
+      case word_Quote:
+      case word_EmphQuote:
+      case word_CodeQuote:
+      case word_WkCodeQuote:
+	assert(text->type != word_CodeQuote &&
+	       text->type != word_WkCodeQuote);
+	if (removeattr(text->type) == word_Normal) {
+	    if (whlp_convert(text->text, &c, FALSE))
+		rdaddsc(rs, c);
+	    else
+		whlp_rdaddwc(rs, text->alt);
+	    sfree(c);
+	} else if (removeattr(text->type) == word_WhiteSpace) {
+	    rdaddc(rs, ' ');
+	} else if (removeattr(text->type) == word_Quote) {
+	    rdaddc(rs, quoteaux(text->aux) == quote_Open ? '\x91' : '\x92');
+				       /* FIXME: configurability */
+	}
+	break;
+    }
+}
+
+/*
+ * Convert a wide string into a string of chars. If `result' is
+ * non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible for the output character
+ * set.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int whlp_convert(wchar_t *s, char **result, int hard_spaces) {
+    /*
+     * FIXME. Currently this is ISO8859-1 only.
+     */
+    int doing = (result != 0);
+    int ok = TRUE;
+    char *p = NULL;
+    int plen = 0, psize = 0;
+
+    for (; *s; s++) {
+	wchar_t c = *s;
+	char outc;
+
+	if ((c >= 32 && c <= 126) ||
+	    (c >= 160 && c <= 255)) {
+	    /* Char is OK. */
+	    if (c == 32 && hard_spaces)
+		outc = '\240';
+	    else
+		outc = (char)c;
+	} else {
+	    /* Char is not OK. */
+	    ok = FALSE;
+	    outc = 0xBF;	       /* approximate the good old DEC `uh?' */
+	}
+	if (doing) {
+	    if (plen >= psize) {
+		psize = plen + 256;
+		p = resize(p, psize);
+	    }
+	    p[plen++] = outc;
+	}
+    }
+    if (doing) {
+	p = resize(p, plen+1);
+	p[plen] = '\0';
+	*result = p;
+    }
+    return ok;
+}
diff --git a/bk_xhtml.c b/bk_xhtml.c
new file mode 100644
index 0000000..2016e10
--- /dev/null
+++ b/bk_xhtml.c
@@ -0,0 +1,1446 @@
+/*
+ * xhtml backend for Halibut
+ * (initial implementation by James Aylett)
+ *
+ * Still to do:
+ *
+ *  +++ doesn't handle non-breaking hyphens. Not sure how to yet.
+ *  +++ entity names (from a file -- ideally supply normal SGML files)
+ *  +++ configuration directive to file split where the current layout
+ *      code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
+ *      perhaps others.
+ *
+ * Limitations:
+ *
+ *  +++ biblio/index references target the nearest section marker, rather
+ *   than having a dedicated target themselves. In large bibliographies
+ *   this will cause problems. (The solution is to fake up a response
+ *   from xhtml_find_section(), probably linking it into the sections
+ *   chain just in case we need it again, and to make freeing it up
+ *   easier.) docsrc.pl used to work as we do, however, and SGT agrees that
+ *   this is acceptable for now.
+ *  +++ can't cope with leaf-level == 0. It's all to do with the
+ *   top-level file not being normal, probably not even having a valid
+ *   section level, and stuff like that. I question whether this is an
+ *   issue, frankly; small manuals that fit on one page should probably
+ *   not be written in halibut at all.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+struct xhtmlsection_Struct {
+    struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
+    struct xhtmlsection_Struct *child; /* NULL if split across files */
+    struct xhtmlsection_Struct *parent; /* NULL if split across files */
+    struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
+    paragraph *para;
+    struct xhtmlfile_Struct *file; /* which file is this a part of? */
+    char *fragment; /* fragment id within the file */
+    int level;
+};
+
+struct xhtmlfile_Struct {
+    struct xhtmlfile_Struct *next;
+    struct xhtmlfile_Struct *child;
+    struct xhtmlfile_Struct *parent;
+    char *filename;
+    struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
+    int is_leaf; /* is this file a leaf file, ie does it not have any children? */
+};
+
+typedef struct xhtmlsection_Struct xhtmlsection;
+typedef struct xhtmlfile_Struct xhtmlfile;
+typedef struct xhtmlindex_Struct xhtmlindex;
+
+struct xhtmlindex_Struct {
+  int nsection;
+  int size;
+  xhtmlsection **sections;
+};
+
+typedef struct {
+  int contents_depth[6];
+  int leaf_contains_contents;
+  int leaf_level;
+  int leaf_smallest_contents;
+  int include_version_id;
+  wchar_t *author, *description;
+  wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
+  int suppress_address;
+} xhtmlconfig;
+
+/*static void xhtml_level(paragraph *, int);
+static void xhtml_level_0(paragraph *);
+static void xhtml_docontents(FILE *, paragraph *, int);
+static void xhtml_dosections(FILE *, paragraph *, int);
+static void xhtml_dobody(FILE *, paragraph *, int);*/
+
+static void xhtml_doheader(FILE *, word *);
+static void xhtml_dofooter(FILE *);
+static void xhtml_versionid(FILE *, word *, int);
+
+static void xhtml_utostr(wchar_t *, char **);
+static int xhtml_para_level(paragraph *);
+static int xhtml_reservedchar(int);
+
+static int xhtml_convert(wchar_t *, char **, int);
+static void xhtml_rdaddwc(rdstringc *, word *, word *);
+static void xhtml_para(FILE *, word *);
+static void xhtml_codepara(FILE *, word *);
+static void xhtml_heading(FILE *, paragraph *);
+
+/* File-global variables are much easier than passing these things
+ * all over the place. Evil, but easier. We can replace this with a single
+ * structure at some point.
+ */
+static xhtmlconfig conf;
+static keywordlist *keywords;
+static indexdata *idx;
+static xhtmlfile *topfile;
+static xhtmlsection *topsection;
+static paragraph *sourceparas;
+static xhtmlfile *lastfile;
+static xhtmlfile *xhtml_last_file = NULL;
+static int last_level=-1;
+static xhtmlsection *currentsection;
+
+static xhtmlconfig xhtml_configure(paragraph *source)
+{
+  xhtmlconfig ret;
+
+  /*
+   * Defaults.
+   */
+  ret.contents_depth[0] = 2;
+  ret.contents_depth[1] = 3;
+  ret.contents_depth[2] = 4;
+  ret.contents_depth[3] = 5;
+  ret.contents_depth[4] = 6;
+  ret.contents_depth[5] = 7;
+  ret.leaf_level = 2;
+  ret.leaf_smallest_contents = 4;
+  ret.leaf_contains_contents = FALSE;
+  ret.include_version_id = TRUE;
+  ret.author = NULL;
+  ret.description = NULL;
+  ret.head_end = NULL;
+  ret.body = NULL;
+  ret.body_start = NULL;
+  ret.body_end = NULL;
+  ret.address_start = NULL;
+  ret.address_end = NULL;
+  ret.nav_attrs = NULL;
+  ret.suppress_address = FALSE;
+
+  for (; source; source = source->next)
+  {
+    if (source->type == para_Config)
+    {
+             if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
+        ret.contents_depth[0] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
+        ret.contents_depth[1] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
+        ret.contents_depth[2] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
+        ret.contents_depth[3] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
+        ret.contents_depth[4] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
+        ret.contents_depth[5] = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
+        ret.leaf_level = utoi(uadv(source->keyword));
+        if (ret.leaf_level==0) {
+          fatal(err_whatever, "xhtml-leaf-level cannot be zero");
+        }
+      } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
+        ret.leaf_smallest_contents = utoi(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
+        ret.include_version_id = utob(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
+        ret.leaf_contains_contents = utob(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
+        ret.suppress_address = utob(uadv(source->keyword));
+      } else if (!ustricmp(source->keyword, L"xhtml-author")) {
+        ret.author = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-description")) {
+        ret.description = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
+        ret.head_end = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
+        ret.body_start = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
+        ret.body = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
+        ret.body_end = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
+        ret.address_start = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
+        ret.address_end = uadv(source->keyword);
+      } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
+        ret.nav_attrs = uadv(source->keyword);
+      }
+    }
+  }
+
+  /*  printf(" !!! leaf_level = %i\n", ret.leaf_level);
+  printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
+  printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
+  printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
+  printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
+  printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
+  printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
+  printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
+  return ret;
+}
+
+static xhtmlsection *xhtml_new_section(xhtmlsection *last)
+{
+  xhtmlsection *ret = mknew(xhtmlsection);
+  ret->next=NULL;
+  ret->child=NULL;
+  ret->parent=NULL;
+  ret->chain=last;
+  ret->para=NULL;
+  ret->file=NULL;
+  ret->fragment=NULL;
+  ret->level=-1; /* marker: end of chain */
+  return ret;
+}
+
+/* Returns NULL or the section that marks that paragraph */
+static xhtmlsection *xhtml_find_section(paragraph *p)
+{
+  xhtmlsection *ret = topsection;
+  if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
+    paragraph *p2 = sourceparas;
+    paragraph *p3 = NULL;
+    while (p2 && p2!=p) {
+      if (xhtml_para_level(p2)!=-1) {
+        p3 = p2;
+      }
+      p2=p2->next;
+    }
+    if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
+      /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
+       * So don't do that, then.
+       */
+      return NULL;
+    }
+    p=p3;
+  }
+  while (ret && ret->para != p) {
+/*    printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
+    ret=ret->chain;
+  }
+  return ret;
+}
+
+static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
+{
+  xhtmlfile *ret = mknew(xhtmlfile);
+
+  ret->next=NULL;
+  ret->child=NULL;
+  ret->parent=NULL;
+  ret->filename=NULL;
+  ret->sections=sect;
+  ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
+  if (sect==NULL) {
+    if (conf.leaf_level==0) { /* currently unused */
+#define FILENAME_MANUAL "Manual.html"
+#define FILENAME_CONTENTS "Contents.html"
+      ret->filename = smalloc(strlen(FILENAME_MANUAL)+1);
+      sprintf(ret->filename, FILENAME_MANUAL);
+    } else {
+      ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1);
+      sprintf(ret->filename, FILENAME_CONTENTS);
+    }
+  } else {
+    paragraph *p = sect->para;
+    rdstringc fname_c = { 0, 0, NULL };
+    char *c;
+    word *w;
+    for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next)
+    {
+      switch (removeattr(w->type))
+      {
+      case word_Normal:
+        /*case word_Emph:
+        case word_Code:
+        case word_WeakCode:*/
+        xhtml_utostr(w->text, &c);
+        rdaddsc(&fname_c,c);
+        sfree(c);
+        break;
+      }
+    }
+    rdaddsc(&fname_c, ".html");
+    ret->filename = rdtrimc(&fname_c);
+  }
+  /*  printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
+  return ret;
+}
+
+/*
+ * Walk the tree fixing up files which are actually leaf (ie
+ * have no children) but aren't at leaf level, so they have the
+ * leaf flag set.
+ */
+void xhtml_fixup_layout(xhtmlfile* file)
+{
+  if (file->child==NULL) {
+    file->is_leaf = TRUE;
+  } else {
+    xhtml_fixup_layout(file->child);
+  }
+  if (file->next)
+    xhtml_fixup_layout(file->next);
+}
+
+/*
+ * Create the tree structure so we know where everything goes.
+ * Method:
+ *
+ * Ignoring file splitting, we have three choices with each new section:
+ * 
+ * +-----------------+-----------------+
+ * |                 |                 |
+ * X            +----X----+           (1)
+ *              |         |
+ *              Y        (3)
+ *              |
+ *             (3)
+ *
+ * Y is the last section we added (currentsect).
+ * If sect is the section we want to add, then:
+ *
+ * (1) if sect->level < currentsect->level
+ * (2) if sect->level == currentsect->level
+ * (3) if sect->level > currentsect->level
+ *
+ * This requires the constraint that you never skip section numbers
+ * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
+ *
+ * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
+ * more than one level at a time. Lots of asserts, and probably part of
+ * the algorithm here, rely on this being true. (It currently isn't
+ * enforced by halibut, however.)
+ *
+ * File splitting makes this harder. For instance, say we added at (3)
+ * above and now need to add another section. We are splitting at level
+ * 2, ie the level of Y. Z is the last section we added:
+ *
+ * +-----------------+-----------------+
+ * |                 |                 |
+ * X            +----X----+           (1)
+ *              |         |
+ *         +----Y----+   (1)
+ *         |         |
+ *         Z        (2)
+ *         |
+ *        (3)
+ *
+ * The (1) case is now split; we need to search upwards to find where
+ * to actually link in. The other two cases remain the same (and will
+ * always be like this).
+ *
+ * File splitting makes this harder, however. The decision of whether
+ * to split to a new file is always on the same condition, however (is
+ * the level of this section higher than the leaf_level configuration
+ * value or not).
+ *
+ * Treating the cases backwards:
+ *
+ * (3) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ *     if in the same file, currentsect->child points to sect
+ *     otherwise the linking is done through the file tree (which works
+ *     in more or less the same way, ie currentfile->child points to
+ *     the new file)
+ *
+ * (2) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ *     if in the same file, currentsect->next points to sect
+ *     otherwise file linking and currentfile->next points to the new
+ *     file (we know that Z must have caused a new file to be created)
+ *
+ * (1) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ *     this is actually effectively the same case as (2) here,
+ *     except that we first have to travel up the sections to figure
+ *     out which section this new one will be a sibling of. In doing
+ *     so, we may disappear off the top of a file and have to go up
+ *     to its parent in the file tree.
+ *
+ */
+static void xhtml_ponder_layout(paragraph *p)
+{
+  xhtmlsection *lastsection;
+  xhtmlsection *currentsect;
+  xhtmlfile *currentfile;
+
+  lastfile = NULL;
+  topsection = xhtml_new_section(NULL);
+  topfile = xhtml_new_file(NULL);
+  lastsection = topsection;
+  currentfile = topfile;
+  currentsect = topsection;
+
+  for (; p; p=p->next)
+  {
+    int level = xhtml_para_level(p);
+    if (level>0) /* actually a section */
+    {
+      xhtmlsection *sect;
+      word *w;
+      char *c;
+      rdstringc fname_c = { 0, 0, NULL };
+
+      sect = xhtml_new_section(lastsection);
+      lastsection = sect;
+      sect->para = p;
+      for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
+      {
+        switch (removeattr(w->type))
+        {
+        case word_Normal:
+         /*case word_Emph:
+         case word_Code:
+         case word_WeakCode:*/
+          xhtml_utostr(w->text, &c);
+          rdaddsc(&fname_c,c);
+          sfree(c);
+          break;
+        }
+      }
+/*      rdaddsc(&fname_c, ".html");*/
+      sect->fragment = rdtrimc(&fname_c);
+      sect->level = level;
+      /*      printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
+
+      if (level>currentsect->level) { /* case (3) */
+        if (level>conf.leaf_level) { /* same file */
+          assert(currentfile->is_leaf);
+          currentsect->child = sect;
+          sect->parent=currentsect;
+          sect->file=currentfile;
+	  /*          printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
+          currentsect=sect;
+        } else { /* new file */
+          xhtmlfile *file = xhtml_new_file(sect);
+          assert(!currentfile->is_leaf);
+          currentfile->child=file;
+          sect->file=file;
+          file->parent=currentfile;
+	  /*          printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
+          currentfile=file;
+          currentsect=sect;
+        }
+      } else if (level >= currentsect->file->sections->level) {
+	/* Case (1) or (2) *AND* still under the section that starts
+	 * the current file.
+	 *
+	 * I'm not convinced that this couldn't be rolled in with the
+	 * final else {} leg further down. It seems a lot of effort
+	 * this way.
+	 */
+        if (level>conf.leaf_level) { /* stick within the same file */
+          assert(currentfile->is_leaf);
+          sect->file = currentfile;
+	  while (currentsect && currentsect->level > level &&
+		 currentsect->file==currentsect->parent->file) {
+	    currentsect = currentsect->parent;
+	  }
+	  assert(currentsect);
+          currentsect->next = sect;
+	  assert(currentsect->level == sect->level);
+	  sect->parent = currentsect->parent;
+          currentsect = sect;
+	  /*          printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
+        } else { /* new file */
+          xhtmlfile *file = xhtml_new_file(sect);
+          sect->file=file;
+          currentfile->next=file;
+          file->parent=currentfile->parent;
+          file->is_leaf=(level==conf.leaf_level);
+          file->sections=sect;
+	  /*          printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
+          currentfile=file;
+          currentsect=sect;
+        }
+      } else { /* Case (1) or (2) and we must move up the file tree first */
+	/* this loop is now probably irrelevant - we know we can't connect
+	 * to anything in the current file */
+        while (currentsect && level<currentsect->level) {
+          currentsect=currentsect->parent;
+          if (currentsect) {
+	    /*            printf(" * up one level to '%s'\n", currentsect->fragment);*/
+          } else {
+	    /*            printf(" * up one level (off top of current file)\n");*/
+          }
+        }
+        if (currentsect) {
+	  /* I'm pretty sure this can now never fire */
+          assert(currentfile->is_leaf);
+	  /*          printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
+          sect->file = currentfile;
+          currentsect->next=sect;
+          currentsect=sect;
+        } else { /* find a file we can attach to */
+          while (currentfile && currentfile->sections && level<currentfile->sections->level) {
+            currentfile=currentfile->parent;
+            if (currentfile) {
+	      /*              printf(" * up one file level to '%s'\n", currentfile->filename);*/
+            } else {
+	      /*              printf(" * up one file level (off top of tree)\n");*/
+            }
+          }
+          if (currentfile) { /* new file (we had to skip up a file to
+				get here, so we must be dealing with a
+				level no lower than the configured
+				leaf_level */
+            xhtmlfile *file = xhtml_new_file(sect);
+            currentfile->next=file;
+            sect->file=file;
+            file->parent=currentfile->parent;
+            file->is_leaf=(level==conf.leaf_level);
+            file->sections=sect;
+	    /*            printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
+            currentfile=file;
+            currentsect=sect;
+          } else {
+            fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
+          }
+        }
+      }
+    }
+  }
+  topsection = lastsection; /* get correct end of the chain */
+  xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
+}
+
+static void xhtml_do_index();
+static void xhtml_do_file(xhtmlfile *file);
+static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
+static void xhtml_do_paras(FILE *fp, paragraph *p);
+static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
+static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
+static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
+static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
+static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
+static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
+
+/*
+ * Do all the files in this structure.
+ */
+static void xhtml_do_files(xhtmlfile *file)
+{
+  xhtml_do_file(file);
+  if (file->child)
+    xhtml_do_files(file->child);
+  if (file->next)
+    xhtml_do_files(file->next);
+}
+
+/*
+ * Free up all memory used by the file tree from 'xfile' downwards
+ */
+static void xhtml_free_file(xhtmlfile* xfile)
+{
+  if (xfile==NULL) {
+    return;
+  }
+
+  if (xfile->filename) {
+    sfree(xfile->filename);
+  }
+  xhtml_free_file(xfile->child);
+  xhtml_free_file(xfile->next);
+  sfree(xfile);
+}
+
+/*
+ * Main function.
+ */
+void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
+		   indexdata *in_idx)
+{
+/*  int i;*/
+  indexentry *ientry;
+  int ti;
+  xhtmlsection *xsect;
+
+  sourceparas = sourceform;
+  conf = xhtml_configure(sourceform);
+  keywords = in_keywords;
+  idx = in_idx;
+
+  /* Clear up the index entries backend data pointers */
+  for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
+    ientry->backend_data=NULL;
+  }
+
+  xhtml_ponder_layout(sourceform);
+
+  /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
+/*  xhtml_level_0(sourceform);
+  for (i=1; i<=conf.leaf_level; i++)
+  {
+    xhtml_level(sourceform, i);
+  }*/
+
+  /* new system ... (writes to *.html, but isn't fully trusted) */
+  xhtml_do_top_file(topfile, sourceform);
+  assert(!topfile->next); /* shouldn't have a sibling at all */
+  xhtml_do_files(topfile->child);
+  xhtml_do_index();
+
+  /* release file, section, index data structures */
+  xsect = topsection;
+  while (xsect) {
+    xhtmlsection *tmp = xsect->chain;
+    if (xsect->fragment) {
+      sfree(xsect->fragment);
+    }
+    sfree(xsect);
+    xsect = tmp;
+  }
+  xhtml_free_file(topfile);
+  for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
+    if (ientry->backend_data!=NULL) {
+      xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
+      if (xi->sections!=NULL) {
+	sfree(xi->sections);
+      }
+      sfree(xi);
+    }
+    ientry->backend_data = NULL;
+  }
+}
+
+static int xhtml_para_level(paragraph *p)
+{
+  switch (p->type)
+  {
+  case para_UnnumberedChapter:
+  case para_Chapter:
+  case para_Appendix:
+    return 1;
+    break;
+/*  case para_BiblioCited:
+    return 2;
+    break;*/
+  case para_Heading:
+  case para_Subsect:
+    return p->aux+2;
+    break;
+  default:
+    return -1;
+    break;
+  }
+}
+
+static char* xhtml_index_filename = "IndexPage.html";
+
+/* Output the nav links for the current file.
+ * file == NULL means we're doing the index
+ */
+static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
+{
+  xhtmlfile *xhtml_next_file = NULL;
+  fprintf(fp, "<p");
+  if (conf.nav_attrs!=NULL) {
+    fprintf(fp, " %ls>", conf.nav_attrs);
+  } else {
+    fprintf(fp, ">");
+  }
+  if (xhtml_last_file==NULL) {
+    fprintf(fp, "Previous | ");
+  } else {
+    fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
+  }
+  fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
+  if (file != NULL) { /* otherwise we're doing nav links for the index */
+    if (xhtml_next_file==NULL)
+      xhtml_next_file = file->child;
+    if (xhtml_next_file==NULL)
+      xhtml_next_file = file->next;
+    if (xhtml_next_file==NULL)
+      xhtml_next_file = file->parent->next;
+  }
+  if (xhtml_next_file==NULL) {
+    if (file==NULL) { /* index, so no next file */
+      fprintf(fp, "Next	");
+    } else {
+      fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
+    }
+  } else {
+    fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
+  }
+  fprintf(fp, "</p>\n");
+}
+
+/* Write out the index file */
+static void xhtml_do_index()
+{
+  word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
+  indexentry *y;
+  int ti;
+  FILE *fp = fopen(xhtml_index_filename, "w");
+
+  if (fp==NULL)
+    fatal(err_cantopenw, xhtml_index_filename);
+  xhtml_doheader(fp, &temp_word);
+  xhtml_donavlinks(fp, NULL);
+
+  fprintf(fp, "<dl>\n");
+  /* iterate over idx->entries using the tree functions and display everything */
+  for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
+    if (y->backend_data) {
+      int i;
+      xhtmlindex *xi;
+
+      fprintf(fp, "<dt>");
+      xhtml_para(fp, y->text);
+      fprintf(fp, "</dt>\n<dd>");
+
+      xi = (xhtmlindex*) y->backend_data;
+      for (i=0; i<xi->nsection; i++) {
+	xhtmlsection *sect = xi->sections[i];
+	if (sect) {
+	  fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
+	  if (sect->para->kwtext) {
+	    xhtml_para(fp, sect->para->kwtext);
+	  } else if (sect->para->words) {
+	    xhtml_para(fp, sect->para->words);
+	  }
+	  fprintf(fp, "</a>");
+	  if (i+1<xi->nsection) {
+	    fprintf(fp, ", ");
+	  }
+	}
+      }
+      fprintf(fp, "</dd>\n");
+    }
+  }
+  fprintf(fp, "</dl>\n");
+
+  xhtml_donavlinks(fp, NULL);
+  xhtml_dofooter(fp);
+  fclose(fp);
+}
+
+/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
+static void xhtml_do_file(xhtmlfile *file)
+{
+  FILE *fp = fopen(file->filename, "w");
+  if (fp==NULL)
+    fatal(err_cantopenw, file->filename);
+
+  if (file->sections->para->words) {
+    xhtml_doheader(fp, file->sections->para->words);
+  } else if (file->sections->para->kwtext) {
+    xhtml_doheader(fp, file->sections->para->kwtext);
+  } else {
+    xhtml_doheader(fp, NULL);
+  }
+
+  xhtml_donavlinks(fp, file);
+
+  if (file->is_leaf && conf.leaf_contains_contents && xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
+    xhtml_do_contents(fp, file);
+  xhtml_do_sections(fp, file->sections);
+  if (!file->is_leaf)
+    xhtml_do_naked_contents(fp, file);
+
+  xhtml_donavlinks(fp, file);
+
+  xhtml_dofooter(fp);
+  fclose(fp);
+
+  xhtml_last_file = file;
+}
+
+/* Output the top-level file. */
+static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
+{
+  paragraph *p;
+  int done=FALSE;
+  FILE *fp = fopen(file->filename, "w");
+  if (fp==NULL)
+    fatal(err_cantopenw, file->filename);
+
+  /* Do the title -- only one allowed */
+  for (p = sourceform; p && !done; p = p->next)
+  {
+    if (p->type == para_Title)
+    {
+      xhtml_doheader(fp, p->words);
+      done=TRUE;
+    }
+  }
+  if (!done)
+    xhtml_doheader(fp, NULL /* Eek! */);
+
+  /* Do the preamble and copyright */
+  for (p = sourceform; p; p = p->next)
+  {
+    if (p->type == para_Preamble)
+    {
+      fprintf(fp, "<p>");
+      xhtml_para(fp, p->words);
+      fprintf(fp, "</p>\n");
+    }
+  }
+  for (p = sourceform; p; p = p->next)
+  {
+    if (p->type == para_Copyright)
+    {
+      fprintf(fp, "<p>");
+      xhtml_para(fp, p->words);
+      fprintf(fp, "</p>\n");
+    }
+  }
+
+  xhtml_do_contents(fp, file);
+  xhtml_do_sections(fp, file->sections);
+  xhtml_dofooter(fp);
+  fclose(fp);
+}
+
+/* Convert a Unicode string to an ASCII one. '?' is
+ * used for unmappable characters.
+ */
+static void xhtml_utostr(wchar_t *in, char **out)
+{
+  int l = ustrlen(in);
+  int i;
+  *out = smalloc(l+1);
+  for (i=0; i<l; i++)
+  {
+    if (in[i]>=32 && in[i]<=126)
+      (*out)[i]=(char)in[i];
+    else
+      (*out)[i]='?';
+  }
+  (*out)[i]=0;
+}
+
+/*
+ * Write contents for the given file, and subfiles, down to
+ * the appropriate contents depth. Returns the number of
+ * entries written.
+ */
+static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
+{
+  int level, limit, start_level, count = 0;
+  if (!file)
+    return 0;
+
+  level = (file->sections)?(file->sections->level):(0);
+  limit = conf.contents_depth[(level>5)?(5):(level)];
+  start_level = (file->is_leaf) ? (level-1) : (level);
+  last_level = start_level;
+
+  count += xhtml_do_contents_section_limit(fp, file->sections, limit);
+  count += xhtml_do_contents_limit(fp, file->child, limit);
+  if (fp!=NULL) {
+    while (last_level > start_level) {
+      last_level--;
+      fprintf(fp, "</ul>\n");
+    }
+  }
+  return count;
+}
+
+/* As above, but doesn't do anything in the current file */
+static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
+{
+  int level, limit, start_level, count = 0;
+  if (!file)
+    return 0;
+
+  level = (file->sections)?(file->sections->level):(0);
+  limit = conf.contents_depth[(level>5)?(5):(level)];
+  start_level = (file->is_leaf) ? (level-1) : (level);
+  last_level = start_level;
+
+  count = xhtml_do_contents_limit(fp, file->child, limit);
+  if (fp!=NULL) {
+    while (last_level > start_level) {
+      last_level--;
+      fprintf(fp, "</ul>\n");
+    }
+  }
+  return count;
+}
+
+/*
+ * Write contents for the given file, children, and siblings, down to
+ * given limit contents depth.
+ */
+static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
+{
+  int count = 0;
+  while (file) {
+    count += xhtml_do_contents_section_limit(fp, file->sections, limit);
+    count += xhtml_do_contents_limit(fp, file->child, limit);
+    file = file->next;
+  }
+  return count;
+}
+
+/*
+ * Write contents entries for the given section tree, down to the
+ * limit contents depth.
+ */
+static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
+{
+  int count = 0;
+  while (section) {
+    if (!xhtml_add_contents_entry(fp, section, limit))
+      return 0;
+    else
+      count++;
+    count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
+    section = section->next;
+  }
+  return count;
+}
+
+/*
+ * Write contents entries for the given section tree, down to the
+ * limit contents depth.
+ */
+static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
+{
+  int count = 0;
+  if (!section)
+    return 0;
+  xhtml_add_contents_entry(fp, section, limit);
+  count=1;
+  count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
+  /*  section=section->child;
+  while (section && xhtml_add_contents_entry(fp, section, limit)) {
+    section = section->next;
+    }*/
+  return count;
+}
+
+/*
+ * Add a section entry, unless we're exceeding the limit, in which
+ * case return FALSE (otherwise return TRUE).
+ */
+static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
+{
+  if (!section || section->level > limit)
+    return FALSE;
+  if (fp==NULL)
+    return TRUE;
+  while (last_level > section->level) {
+    last_level--;
+    fprintf(fp, "</ul>\n");
+  }
+  while (last_level < section->level) {
+    last_level++;
+    fprintf(fp, "<ul>\n");
+  }
+  fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
+  if (section->para->kwtext) {
+    xhtml_para(fp, section->para->kwtext);
+    if (section->para->words) {
+      fprintf(fp, ": ");
+    }
+  }
+  if (section->para->words) {
+    xhtml_para(fp, section->para->words);
+  }
+  fprintf(fp, "</a></li>\n");
+  return TRUE;
+}
+
+/*
+ * Write all the sections in this file. Do all paragraphs in this section, then all
+ * children (recursively), then go on to the next one (tail recursively).
+ */
+static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
+{
+  while (sections) {
+    currentsection = sections;
+    xhtml_do_paras(fp, sections->para);
+    xhtml_do_sections(fp, sections->child);
+    sections = sections->next;
+  }
+}
+
+/* Write this list of paragraphs. Close off all lists at the end. */
+static void xhtml_do_paras(FILE *fp, paragraph *p)
+{
+  int last_type = -1, first=TRUE;
+  if (!p)
+    return;
+
+/*  for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
+  for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) {
+    first=FALSE;
+    switch (p->type)
+    {
+      /*
+       * Things we ignore because we've already processed them or
+       * aren't going to touch them in this pass.
+       */
+     case para_IM:
+     case para_BR:
+     case para_Biblio:		       /* only touch BiblioCited */
+     case para_VersionID:
+     case para_Copyright:
+     case para_Preamble:
+     case para_NoCite:
+     case para_Title:
+       break;
+
+       /*
+        * Chapter titles.
+        */
+      case para_Chapter:
+      case para_Appendix:
+      case para_UnnumberedChapter:
+        xhtml_heading(fp, p);
+        break;
+
+      case para_Heading:
+      case para_Subsect:
+        xhtml_heading(fp, p);
+        break;
+
+      case para_Rule:
+        fprintf(fp, "\n<hr />\n");
+        break;
+
+      case para_Normal:
+        fprintf(fp, "\n<p>");
+        xhtml_para(fp, p->words);
+        fprintf(fp, "</p>\n");
+        break;
+
+      case para_Bullet:
+      case para_NumberedList:
+      case para_BiblioCited:
+        if (last_type!=p->type) {
+          /* start up list if necessary */
+          if (p->type == para_Bullet) {
+            fprintf(fp, "<ul>\n");
+          } else if (p->type == para_NumberedList) {
+            fprintf(fp, "<ol>\n");
+          } else if (p->type == para_BiblioCited) {
+            fprintf(fp, "<dl>\n");
+          }
+        }
+        if (p->type == para_Bullet || p->type == para_NumberedList)
+          fprintf(fp, "<li>");
+        else if (p->type == para_BiblioCited) {
+          fprintf(fp, "<dt>");
+          xhtml_para(fp, p->kwtext);
+          fprintf(fp, "</dt>\n<dd>");
+        }
+        xhtml_para(fp, p->words);
+        if (p->type == para_BiblioCited) {
+          fprintf(fp, "</dd>\n");
+        } else if (p->type == para_Bullet || p->type == para_NumberedList) {
+          fprintf(fp, "</li>");
+        }
+        if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited)
+          /* close off list if necessary */
+        {
+          paragraph *p2 = p->next;
+          int close_off=FALSE;
+/*          if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
+          if (p2 && xhtml_para_level(p2)==-1) {
+            if (p2->type != p->type)
+              close_off=TRUE;
+          } else {
+            close_off=TRUE;
+          }
+          if (close_off) {
+            if (p->type == para_Bullet) {
+              fprintf(fp, "</ul>\n");
+            } else if (p->type == para_NumberedList) {
+              fprintf(fp, "</ol>\n");
+            } else if (p->type == para_BiblioCited) {
+              fprintf(fp, "</dl>\n");
+            }
+          }
+        }
+        break;
+
+      case para_Code:
+        xhtml_codepara(fp, p->words);
+        break;
+    }
+    last_type = p->type;
+  }
+}
+
+/*
+ * Output a header for this XHTML file.
+ */
+static void xhtml_doheader(FILE *fp, word *title)
+{
+  fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
+  fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
+  fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
+  if (title==NULL)
+    fprintf(fp, "The thing with no name!");
+  else
+    xhtml_para(fp, title);
+  fprintf(fp, "</title>\n");
+  fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
+  if (conf.author)
+    fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
+  if (conf.description)
+    fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
+  if (conf.head_end)
+    fprintf(fp, "%ls\n", conf.head_end);
+  fprintf(fp, "</head>\n\n");
+  if (conf.body)
+    fprintf(fp, "%ls\n", conf.body);
+  else
+    fprintf(fp, "<body>\n");
+  if (conf.body_start)
+    fprintf(fp, "%ls\n", conf.body_start);
+}
+
+/*
+ * Output a footer for this XHTML file.
+ */
+static void xhtml_dofooter(FILE *fp)
+{
+  fprintf(fp, "\n<hr />\n\n");
+  if (conf.body_end)
+    fprintf(fp, "%ls\n", conf.body_end);
+  if (!conf.suppress_address) {
+    fprintf(fp,"<address>\n");
+    if (conf.address_start)
+      fprintf(fp, "%ls\n", conf.address_start);
+    /* Do the version ID */
+    if (conf.include_version_id) {
+      paragraph *p;
+      int started = 0;
+      for (p = sourceparas; p; p = p->next)
+	if (p->type == para_VersionID) {
+	  xhtml_versionid(fp, p->words, started);
+	  started = 1;
+	}
+    }
+    if (conf.address_end)
+      fprintf(fp, "%ls\n", conf.address_end);
+    fprintf(fp, "</address>\n");
+  }
+  fprintf(fp, "</body>\n\n</html>\n");
+}
+
+/*
+ * Output the versionid paragraph. Typically this is a version control
+ * ID string (such as $Id...$ in RCS).
+ */
+static void xhtml_versionid(FILE *fp, word *text, int started)
+{
+  rdstringc t = { 0, 0, NULL };
+
+  rdaddc(&t, '[');		       /* FIXME: configurability */
+  xhtml_rdaddwc(&t, text, NULL);
+  rdaddc(&t, ']');		       /* FIXME: configurability */
+
+  if (started)
+    fprintf(fp, "<br>\n");
+  fprintf(fp, "%s\n", t.text);
+  sfree(t.text);
+}
+
+/* Is this an XHTML reserved character? */
+static int xhtml_reservedchar(int c)
+{
+  if (c=='&' || c=='<' || c=='>' || c=='"')
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/*
+ * Convert a wide string into valid XHTML: Anything outside ASCII will
+ * be fixed up as an entity. Currently we don't worry about constraining the
+ * encoded character set, which we should probably do at some point (we can
+ * still fix up and return FALSE - see the last comment here). We also don't
+ * currently
+ *
+ * Because this is only used for words, spaces are HARD spaces (any other
+ * spaces will be word_Whitespace not word_Normal). So they become &nbsp;
+ * Unless hard_spaces is FALSE, of course (code paragraphs break the above
+ * rule).
+ *
+ * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int xhtml_convert(wchar_t *s, char **result, int hard_spaces) {
+    int doing = (result != 0);
+    int ok = TRUE;
+    char *p = NULL;
+    int plen = 0, psize = 0;
+
+    for (; *s; s++) {
+	wchar_t c = *s;
+
+#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
+
+	if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
+	    /* Char is OK. */
+	    if (doing)
+	    {
+	      ensure_size(plen);
+	      p[plen++] = (char)c;
+	    }
+	} else {
+	    /* Char needs fixing up. */
+	    /* ok = FALSE; -- currently we never return FALSE; we
+	     * might want to when considering a character set for the
+	     * encoded document.
+	     */
+	    if (doing)
+	    {
+	      if (c==32) { /* a space in a word is a hard space */
+		ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
+		sprintf(p+plen, "&nbsp;");
+		plen+=6;
+	      } else {
+		/* FIXME: entity names! */
+		ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
+		plen+=sprintf(p+plen, "&#%04i;", (int)c);
+	      }
+	    }
+	}
+    }
+    if (doing) {
+	p = resize(p, plen+1);
+	p[plen] = '\0';
+	*result = p;
+    }
+    return ok;
+}
+
+/*
+ * This formats the given words as XHTML.
+ */
+static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end) {
+    char *c;
+    keyword *kwl;
+    xhtmlsection *sect;
+    indextag *itag;
+    int ti;
+
+    for (; text && text != end; text = text->next) {
+      switch (text->type) {
+      case word_HyperLink:
+	xhtml_utostr(text->text, &c);
+        rdaddsc(rs, "<a href=\"");
+        rdaddsc(rs, c);
+        rdaddsc(rs, "\">");
+        sfree(c);
+        break;
+
+      case word_UpperXref:
+      case word_LowerXref:
+        kwl = kw_lookup(keywords, text->text);
+	if (kwl) {
+	  sect=xhtml_find_section(kwl->para);
+	  if (sect) {
+	    rdaddsc(rs, "<a href=\"");
+	    rdaddsc(rs, sect->file->filename);
+	    rdaddc(rs, '#');
+	    rdaddsc(rs, sect->fragment);
+	    rdaddsc(rs, "\">");
+	  } else {
+	    rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
+	    error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
+	  }
+        } else {
+	  rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
+	  error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
+	}
+        break;
+
+      case word_IndexRef: /* in theory we could make an index target here */
+/*        rdaddsc(rs, "<a name=\"idx-");
+        xhtml_utostr(text->text, &c);
+        rdaddsc(rs, c);
+        sfree(c);
+        rdaddsc(rs, "\"></a>");*/
+	/* what we _do_ need to do is to fix up the backend data
+	 * for any indexentry this points to.
+	 */
+	for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
+	  /* FIXME: really ustricmp() and not ustrcmp()? */
+	  if (ustricmp(itag->name, text->text)==0) {
+	    break;
+	  }
+	}
+	if (itag!=NULL) {
+	  if (itag->refs!=NULL) {
+	    int i;
+	    for (i=0; i<itag->nrefs; i++) {
+	      xhtmlindex *idx_ref;
+	      indexentry *ientry;
+
+	      ientry = itag->refs[i];
+	      if (ientry->backend_data==NULL) {
+		idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
+		if (idx_ref==NULL)
+		  fatal(err_nomemory);
+		idx_ref->nsection = 0;
+		idx_ref->size = 4;
+		idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
+		if (idx_ref->sections==NULL)
+		  fatal(err_nomemory);
+		ientry->backend_data = idx_ref;
+	      } else {
+		idx_ref = ientry->backend_data;
+		if (idx_ref->nsection+1 > idx_ref->size) {
+		  int new_size = idx_ref->size * 2;
+		  idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
+		  if (idx_ref->sections==NULL) {
+		    fatal(err_nomemory);
+		  }
+		  idx_ref->size = new_size;
+		}
+	      }
+	      idx_ref->sections[idx_ref->nsection++] = currentsection;
+#if 0
+#endif
+	    }
+	  } else {
+	    fatal(err_whatever, "Index tag had no entries!");
+	  }
+	} else {
+	  fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
+	  fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
+	}
+	break;
+
+      case word_HyperEnd:
+      case word_XrefEnd:
+        rdaddsc(rs, "</a>");
+	break;
+
+      case word_Normal:
+      case word_Emph:
+      case word_Code:
+      case word_WeakCode:
+      case word_WhiteSpace:
+      case word_EmphSpace:
+      case word_CodeSpace:
+      case word_WkCodeSpace:
+      case word_Quote:
+      case word_EmphQuote:
+      case word_CodeQuote:
+      case word_WkCodeQuote:
+	assert(text->type != word_CodeQuote &&
+	       text->type != word_WkCodeQuote);
+	if (towordstyle(text->type) == word_Emph &&
+	    (attraux(text->aux) == attr_First ||
+	     attraux(text->aux) == attr_Only))
+	    rdaddsc(rs, "<em>");
+	else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
+		 (attraux(text->aux) == attr_First ||
+		  attraux(text->aux) == attr_Only))
+	    rdaddsc(rs, "<code>");
+
+	if (removeattr(text->type) == word_Normal) {
+	  if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */
+	    rdaddsc(rs, c);
+	  else
+	    xhtml_rdaddwc(rs, text->alt, NULL);
+	  sfree(c);
+	} else if (removeattr(text->type) == word_WhiteSpace) {
+	  rdaddc(rs, ' ');
+	} else if (removeattr(text->type) == word_Quote) {
+	  rdaddsc(rs, "&quot;");
+	}
+
+	if (towordstyle(text->type) == word_Emph &&
+	    (attraux(text->aux) == attr_Last ||
+	     attraux(text->aux) == attr_Only))
+	    rdaddsc(rs, "</em>");
+	else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
+		 (attraux(text->aux) == attr_Last ||
+		  attraux(text->aux) == attr_Only))
+	    rdaddsc(rs, "</code>");
+	break;
+      }
+    }
+}
+
+/* Output a heading, formatted as XHTML.
+ */
+static void xhtml_heading(FILE *fp, paragraph *p)
+{
+    rdstringc t = { 0, 0, NULL };
+    word *tprefix = p->kwtext;
+    word *nprefix = p->kwtext2;
+    word *text = p->words;
+    int level = xhtml_para_level(p);
+    xhtmlsection *sect = xhtml_find_section(p);
+    char *fragment;
+    if (sect) {
+      fragment = sect->fragment;
+    } else {
+      fragment = ""; /* FIXME: what else can we do? */
+      error(err_whatever, "Couldn't locate heading cross-reference!");
+    }
+
+    if (level>2 && nprefix) { /* FIXME: configurability on the level thing */
+	xhtml_rdaddwc(&t, nprefix, NULL);
+	rdaddc(&t, ' ');	       /* FIXME: as below */
+    } else if (tprefix) {
+	xhtml_rdaddwc(&t, tprefix, NULL);
+	rdaddsc(&t, ": ");	       /* FIXME: configurability */
+    }
+    xhtml_rdaddwc(&t, text, NULL);
+    fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
+    sfree(t.text);
+}
+
+/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
+ * This looks pretty simple; I may have missed something ...
+ */
+static void xhtml_para(FILE *fp, word *text)
+{
+  rdstringc out = { 0, 0, NULL };
+  xhtml_rdaddwc(&out, text, NULL);
+  fprintf(fp, "%s", out.text);
+  sfree(out.text);
+}
+
+/* Output a code paragraph. I'm treating this as preformatted, which
+ * may not be entirely correct. See xhtml_para() for my worries about
+ * this being overly-simple; however I think that most of the complexity
+ * of the text backend came entirely out of word wrapping anyway.
+ */
+static void xhtml_codepara(FILE *fp, word *text)
+{
+  fprintf(fp, "<pre>");
+    for (; text; text = text->next) if (text->type == word_WeakCode) {
+	char *c;
+	xhtml_convert(text->text, &c, FALSE);
+	fprintf(fp, "%s\n", c);
+	sfree(c);
+    }
+  fprintf(fp, "</pre>\n");
+}
diff --git a/contents.c b/contents.c
new file mode 100644
index 0000000..de45433
--- /dev/null
+++ b/contents.c
@@ -0,0 +1,222 @@
+/*
+ * contents.c: build a table of contents
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include "halibut.h"
+
+struct numberstate_Tag {
+    int chapternum;
+    int appendixnum;
+    int ischapter;
+    int *sectionlevels;
+    paragraph **currentsects;
+    paragraph *lastsect;
+    int oklevel;
+    int maxsectlevel;
+    int listitem;
+    wchar_t *chaptertext;	       /* the word for a chapter */
+    wchar_t *sectiontext;	       /* the word for a section */
+    wchar_t *apptext;		       /* the word for an appendix */
+};
+
+numberstate *number_init(void) {
+    numberstate *ret = mknew(numberstate);
+    ret->chapternum = 0;
+    ret->appendixnum = -1;
+    ret->ischapter = 1;
+    ret->oklevel = -1;		       /* not even in a chapter yet */
+    ret->maxsectlevel = 32;
+    ret->sectionlevels = mknewa(int, ret->maxsectlevel);
+    ret->currentsects = mknewa(paragraph *, ret->maxsectlevel+1);
+    memset(ret->currentsects, 0, (ret->maxsectlevel+1)*sizeof(paragraph *));
+    ret->lastsect = NULL;
+    ret->listitem = -1;
+    return ret;
+}
+
+void number_free(numberstate *state) {
+    sfree(state->sectionlevels);
+    sfree(state->currentsects);
+    sfree(state);
+}
+
+static void dotext(word ***wret, wchar_t *text) {
+    word *mnewword = mknew(word);
+    mnewword->text = ustrdup(text);
+    mnewword->type = word_Normal;
+    mnewword->alt = NULL;
+    mnewword->next = NULL;
+    **wret = mnewword;
+    *wret = &mnewword->next;
+}
+
+static void dospace(word ***wret) {
+    word *mnewword = mknew(word);
+    mnewword->text = NULL;
+    mnewword->type = word_WhiteSpace;
+    mnewword->alt = NULL;
+    mnewword->next = NULL;
+    **wret = mnewword;
+    *wret = &mnewword->next;
+}
+
+static void donumber(word ***wret, int num) {
+    wchar_t text[20];
+    wchar_t *p = text + sizeof(text);
+    *--p = L'\0';
+    while (num != 0) {
+	assert(p > text);
+	*--p = L"0123456789"[num % 10];
+	num /= 10;
+    }
+    dotext(wret, p);
+}
+
+static void doanumber(word ***wret, int num) {
+    wchar_t text[20];
+    wchar_t *p;
+    int nletters, aton;
+    nletters = 1;
+    aton = 25;
+    while (num > aton) {
+	nletters++;
+	num -= aton+1;
+	if (aton < INT_MAX/26)
+	    aton = (aton+1) * 26 - 1;
+	else
+	    aton = INT_MAX;
+    }
+    p = text + sizeof(text);
+    *--p = L'\0';
+    while (nletters--) {
+	assert(p > text);
+	*--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26];
+	num /= 26;
+    }
+    dotext(wret, p);
+}
+
+void number_cfg(numberstate *state, paragraph *source) {
+    /*
+     * Defaults
+     */
+    state->chaptertext = L"Chapter";
+    state->sectiontext = L"Section";
+    state->apptext = L"Appendix";
+
+    for (; source; source = source->next) {
+	if (source->type == para_Config) {
+	    if (!ustricmp(source->keyword, L"chapter")) {
+		state->chaptertext = uadv(source->keyword);
+	    } else if (!ustricmp(source->keyword, L"section")) {
+		state->sectiontext = uadv(source->keyword);
+	    } else if (!ustricmp(source->keyword, L"appendix")) {
+		state->apptext = uadv(source->keyword);
+	    }
+	}
+    }
+}
+
+word *number_mktext(numberstate *state, paragraph *p, wchar_t *category,
+		    int prev, int *errflag) {
+    word *ret = NULL;
+    word **ret2 = &ret;
+    word **pret = &ret;
+    int i, level;
+
+    level = -2;			       /* default for non-section-heading */
+    switch (p->type) {
+      case para_Chapter:
+	state->chapternum++;
+	for (i = 0; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, category ? category : state->chaptertext);
+	dospace(&pret);
+	ret2 = pret;
+	donumber(&pret, state->chapternum);
+	state->ischapter = 1;
+	state->oklevel = 0;
+	level = -1;
+	break;
+      case para_Heading:
+      case para_Subsect:
+	level = (p->type == para_Heading ? 0 : p->aux);
+	if (level > state->oklevel) {
+	    error(err_sectjump, &p->fpos);
+	    *errflag = TRUE;
+	    ret = NULL;
+	    break;
+	}
+	state->oklevel = level+1;
+	if (state->maxsectlevel <= level) {
+	    state->maxsectlevel = level + 32;
+	    state->sectionlevels = resize(state->sectionlevels,
+					  state->maxsectlevel);
+	}
+	state->sectionlevels[level]++;
+	for (i = level+1; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, category ? category : state->sectiontext);
+	dospace(&pret);
+	ret2 = pret;
+	if (state->ischapter)
+	    donumber(&pret, state->chapternum);
+	else
+	    doanumber(&pret, state->appendixnum);
+	for (i = 0; i <= level; i++) {
+	    dotext(&pret, L".");
+	    if (state->sectionlevels[i] == 0)
+		state->sectionlevels[i] = 1;
+	    donumber(&pret, state->sectionlevels[i]);
+	}
+	break;
+      case para_Appendix:
+	state->appendixnum++;
+	for (i = 0; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, category ? category : state->apptext);
+	dospace(&pret);
+	ret2 = pret;
+	doanumber(&pret, state->appendixnum);
+	state->ischapter = 0;
+	state->oklevel = 0;
+	level = -1;
+	break;
+      case para_UnnumberedChapter:
+	level = -1;
+	break;
+      case para_NumberedList:
+	ret2 = pret;
+	if (prev != para_NumberedList)
+	    state->listitem = 0;
+	state->listitem++;
+	donumber(&pret, state->listitem);
+	break;
+    }
+
+    /*
+     * Now set up parent, child and sibling links.
+     */
+    p->parent = p->child = p->sibling = NULL;
+    if (level != -2) {
+	if (state->currentsects[level+1])
+	    state->currentsects[level+1]->sibling = p;
+	if (level >= 0 && state->currentsects[level]) {
+	    p->parent = state->currentsects[level];
+	    if (!state->currentsects[level]->child)
+		state->currentsects[level]->child = p;
+	}
+	state->currentsects[level+1] = state->lastsect = p;
+	for (i = level+2; i < state->maxsectlevel+1; i++)
+	    state->currentsects[i] = NULL;
+    } else {
+	p->parent = state->lastsect;
+    }
+
+    p->kwtext2 = *ret2;
+    return ret;
+}
diff --git a/error.c b/error.c
new file mode 100644
index 0000000..6d8dd13
--- /dev/null
+++ b/error.c
@@ -0,0 +1,219 @@
+/*
+ * error.c: Halibut error handling
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include "halibut.h"
+
+/*
+ * Error flags
+ */
+#define PREFIX 0x0001		       /* give `halibut:' prefix */
+#define FILEPOS 0x0002		       /* give file position prefix */
+
+static void do_error(int code, va_list ap) {
+    char error[1024];
+    char auxbuf[256];
+    char *sp, *sp2;
+    wchar_t *wsp;
+    filepos fpos, fpos2;
+    int flags;
+
+    switch(code) {
+      case err_nomemory:	       /* no arguments */
+	sprintf(error, "out of memory");
+	flags = PREFIX;
+	break;
+      case err_optnoarg:
+	sp = va_arg(ap, char *);
+	sprintf(error, "option `-%.200s' requires an argument", sp);
+	flags = PREFIX;
+	break;
+      case err_nosuchopt:
+	sp = va_arg(ap, char *);
+	sprintf(error, "unrecognised option `-%.200s'", sp);
+	flags = PREFIX;
+	break;
+      case err_noinput:		       /* no arguments */
+	sprintf(error, "no input files");
+	flags = PREFIX;
+	break;
+      case err_cantopen:
+	sp = va_arg(ap, char *);
+	sprintf(error, "unable to open input file `%.200s'", sp);
+	flags = PREFIX;
+	break;
+      case err_nodata:		       /* no arguments */
+	sprintf(error, "no data in input files");
+	flags = PREFIX;
+	break;
+      case err_brokencodepara:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "every line of a code paragraph should begin `\\c'");
+	flags = FILEPOS;
+	break;
+      case err_kwunclosed:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected `}' after paragraph keyword");
+	flags = FILEPOS;
+	break;
+      case err_kwexpected:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected a paragraph keyword");
+	flags = FILEPOS;
+	break;
+      case err_kwillegal:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected no paragraph keyword");
+	flags = FILEPOS;
+	break;
+      case err_kwtoomany:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected only one paragraph keyword");
+	flags = FILEPOS;
+	break;
+      case err_bodyillegal:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected no text after paragraph keyword");
+	flags = FILEPOS;
+	break;
+      case err_badparatype:
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "command `%.200s' unrecognised at start of"
+		" paragraph", sp);
+	flags = FILEPOS;
+	break;
+      case err_badmidcmd:
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
+	flags = FILEPOS;
+	break;
+      case err_unexbrace:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "brace character unexpected in mid-paragraph");
+	flags = FILEPOS;
+	break;
+      case err_explbr:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected `{' after command");
+	flags = FILEPOS;
+	break;
+      case err_commenteof:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "end of file unexpected inside `\\#{...}' comment");
+	flags = FILEPOS;
+	break;
+      case err_kwexprbr:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected `}' after cross-reference");
+	flags = FILEPOS;
+	break;
+      case err_missingrbrace:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "unclosed braces at end of paragraph");
+	flags = FILEPOS;
+	break;
+      case err_nestedstyles:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "unable to nest text styles");
+	flags = FILEPOS;
+	break;
+      case err_nestedindex:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "unable to nest index markings");
+	flags = FILEPOS;
+	break;
+      case err_nosuchkw:
+	fpos = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
+	flags = FILEPOS;
+	break;
+      case err_multiBR:
+	fpos = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
+	flags = FILEPOS;
+	break;
+      case err_nosuchidxtag:
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
+	flags = 0;
+	/* FIXME: need to get a filepos to here somehow */
+	break;
+      case err_cantopenw:
+	sp = va_arg(ap, char *);
+	sprintf(error, "unable to open output file `%.200s'", sp);
+	flags = PREFIX;
+	break;
+      case err_macroexists:
+	fpos = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "macro `%.200s' already defined", sp);
+	flags = FILEPOS;
+	break;
+      case err_sectjump:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "expected higher heading levels before this one");
+	flags = FILEPOS;
+	break;
+      case err_winhelp_ctxclash:
+	fpos = *va_arg(ap, filepos *);
+	sp = va_arg(ap, char *);
+	sp2 = va_arg(ap, char *);
+	sprintf(error, "Windows Help context id `%.200s' clashes with "
+		"previously defined `%.200s'", sp, sp2);
+	flags = FILEPOS;
+	break;
+      case err_multikw:
+	fpos = *va_arg(ap, filepos *);
+	fpos2 = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
+	sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
+	flags = FILEPOS;
+	break;
+      case err_whatever:
+	sp = va_arg(ap, char *);
+        vsprintf(error, sp, ap);
+        flags = PREFIX;
+        break;
+    }
+
+    if (flags & PREFIX)
+	fputs("halibut: ", stderr);
+    if (flags & FILEPOS) {
+	fprintf(stderr, "%s:%d:", fpos.filename, fpos.line);
+	if (fpos.col > 0)
+	    fprintf(stderr, "%d:", fpos.col);
+	fputc(' ', stderr);
+    }
+    fputs(error, stderr);
+    fputc('\n', stderr);
+}
+
+void fatal(int code, ...) {
+    va_list ap;
+    va_start(ap, code);
+    do_error(code, ap);
+    va_end(ap);
+    exit(EXIT_FAILURE);
+}
+
+void error(int code, ...) {
+    va_list ap;
+    va_start(ap, code);
+    do_error(code, ap);
+    va_end(ap);
+}
diff --git a/halibut.h b/halibut.h
new file mode 100644
index 0000000..c948624
--- /dev/null
+++ b/halibut.h
@@ -0,0 +1,416 @@
+#ifndef HALIBUT_HALIBUT_H
+#define HALIBUT_HALIBUT_H
+
+#include <stdio.h>
+#include <wchar.h>
+#include <time.h>
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN /* nothing */
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* For suppressing unused-parameter warnings */
+#define IGNORE(x) ( (x) = (x) )
+
+#include "tree234.h"
+
+/*
+ * Structure tags
+ */
+typedef struct input_Tag input;
+typedef struct filepos_Tag filepos;
+typedef struct paragraph_Tag paragraph;
+typedef struct word_Tag word;
+typedef struct keywordlist_Tag keywordlist;
+typedef struct keyword_Tag keyword;
+typedef struct userstyle_Tag userstyle;
+typedef struct numberstate_Tag numberstate;
+typedef struct indexdata_Tag indexdata;
+typedef struct indextag_Tag indextag;
+typedef struct indexentry_Tag indexentry;
+typedef struct macrostack_Tag macrostack;
+
+/*
+ * Data structure to hold a file name and index, a line and a
+ * column number, for reporting errors
+ */
+struct filepos_Tag {
+    char *filename;
+    int line, col;
+};
+
+/*
+ * Data structure to hold all the file names etc for input
+ */
+typedef struct pushback_Tag {
+    int chr;
+    filepos pos;
+} pushback;
+struct input_Tag {
+    char **filenames;		       /* complete list of input files */
+    int nfiles;			       /* how many in the list */
+    FILE *currfp;		       /* the currently open one */
+    int currindex;		       /* which one is that in the list */
+    pushback *pushback;		       /* pushed-back input characters */
+    int npushback, pushbacksize;
+    filepos pos;
+    int reportcols;		       /* report column numbers in errors */
+    macrostack *stack;		       /* macro expansions in force */
+};
+
+/*
+ * Data structure to hold the input form of the source, ie a linked
+ * list of paragraphs
+ */
+struct paragraph_Tag {
+    paragraph *next;
+    int type;
+    wchar_t *keyword;		       /* for most special paragraphs */
+    word *words;		       /* list of words in paragraph */
+    int aux;			       /* number, in a numbered paragraph
+                                        * or subsection level
+                                        */
+    word *kwtext;		       /* chapter/section indication */
+    word *kwtext2;		       /* numeric-only form of kwtext */
+    filepos fpos;
+
+    paragraph *parent, *child, *sibling;   /* for hierarchy navigation */
+
+    void *private_data; 	       /* for temp use in backends */
+};
+enum {
+    para_IM,			       /* index merge */
+    para_BR,			       /* bibliography rewrite */
+    para_Rule,			       /* random horizontal rule */
+    para_Chapter,
+    para_Appendix,
+    para_UnnumberedChapter,
+    para_Heading,
+    para_Subsect,
+    para_Normal,
+    para_Biblio,		       /* causes no output unless turned ... */
+    para_BiblioCited,		       /*  ... into this paragraph type */
+    para_Bullet,
+    para_NumberedList,
+    para_Code,
+    para_Copyright,
+    para_Preamble,
+    para_NoCite,
+    para_Title,
+    para_VersionID,
+    para_Config,		       /* configuration directive */
+    para_NotParaType		       /* placeholder value */
+};
+
+/*
+ * Data structure to hold an individual word
+ */
+struct word_Tag {
+    word *next, *alt;
+    int type;
+    int aux;
+    int breaks;			       /* can a line break after it? */
+    wchar_t *text;
+    filepos fpos;
+};
+enum {
+    /* ORDERING CONSTRAINT: these normal-word types ... */
+    word_Normal,
+    word_Emph,
+    word_Code,			       /* monospaced; `quoted' in text */
+    word_WeakCode,		       /* monospaced, normal in text */
+    /* ... must be in the same order as these space types ... */
+    word_WhiteSpace,		       /* text is NULL or ignorable */
+    word_EmphSpace,		       /* WhiteSpace when emphasised */
+    word_CodeSpace,		       /* WhiteSpace when code */
+    word_WkCodeSpace,		       /* WhiteSpace when weak code */
+    /* ... and must be in the same order as these quote types ... */
+    word_Quote,			       /* text is NULL or ignorable */
+    word_EmphQuote,		       /* Quote when emphasised */
+    word_CodeQuote,		       /* (can't happen) */
+    word_WkCodeQuote,		       /* (can't happen) */
+    /* END ORDERING CONSTRAINT */
+    word_internal_endattrs,
+    word_UpperXref,		       /* \K */
+    word_LowerXref,		       /* \k */
+    word_XrefEnd,		       /* (invisible; no text) */
+    word_IndexRef,		       /* (always an invisible one) */
+    word_HyperLink,		       /* (invisible) */
+    word_HyperEnd		       /* (also invisible; no text) */
+};
+/* aux values for attributed words */
+enum {
+    attr_Only   = 0x0000,	       /* a lone word with the attribute */
+    attr_First  = 0x0001,	       /* the first of a series */
+    attr_Last   = 0x0002,	       /* the last of a series */
+    attr_Always	= 0x0003,	       /* any other part of a series */
+    attr_mask   = 0x0003,
+};
+/* aux values for quote-type words */
+enum {
+    quote_Open  = 0x0010,
+    quote_Close = 0x0020,
+    quote_mask  = 0x0030,
+};
+#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
+                    ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
+#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 )
+#define towordstyle(x) ( word_Normal + ((x) & 3) )
+#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) )
+#define toquotestyle(x) ( word_Quote + ((x) & 3) )
+#define removeattr(x) ( word_Normal + ((x) &~ 3) )
+
+#define attraux(x) ( (x) & attr_mask )
+#define quoteaux(x) ( (x) & quote_mask )
+
+/*
+ * error.c
+ */
+void fatal(int code, ...) NORETURN;
+void error(int code, ...);
+enum {
+    err_nomemory,		       /* out of memory */
+    err_optnoarg,		       /* option `-%s' requires an argument */
+    err_nosuchopt,		       /* unrecognised option `-%s' */
+    err_noinput,		       /* no input files */
+    err_cantopen,		       /* unable to open input file `%s' */
+    err_nodata,			       /* no data in input files */
+    err_brokencodepara,		       /* line in codepara didn't begin `\c' */
+    err_kwunclosed,		       /* expected `}' after keyword */
+    err_kwillegal,		       /* paragraph type expects no keyword */
+    err_kwexpected,		       /* paragraph type expects a keyword */
+    err_kwtoomany,		       /* paragraph type expects only 1 */
+    err_bodyillegal,		       /* paragraph type expects only kws! */
+    err_badparatype,		       /* invalid command at start of para */
+    err_badmidcmd,		       /* invalid command in mid-para */
+    err_unexbrace,		       /* unexpected brace */
+    err_explbr,			       /* expected `{' after command */
+    err_commenteof,		       /* EOF inside braced comment */
+    err_kwexprbr,		       /* expected `}' after cross-ref */
+    err_missingrbrace,		       /* unclosed braces at end of para */
+    err_nestedstyles,		       /* unable to nest text styles */
+    err_nestedindex,		       /* unable to nest `\i' thingys */
+    err_nosuchkw,		       /* unresolved cross-reference */
+    err_multiBR,		       /* multiple \BRs on same keyword */
+    err_nosuchidxtag,		       /* \IM on unknown index tag (warning) */
+    err_cantopenw,		       /* can't open output file for write */
+    err_macroexists,		       /* this macro already exists */
+    err_sectjump,		       /* jump a heading level, eg \C -> \S */
+    err_winhelp_ctxclash,	       /* WinHelp context ID hash clash */
+    err_multikw,		       /* keyword clash in sections */
+    err_whatever                       /* random error of another type */
+};
+
+/*
+ * malloc.c
+ */
+#ifdef LOGALLOC
+void *smalloc(char *file, int line, int size);
+void *srealloc(char *file, int line, void *p, int size);
+void sfree(char *file, int line, void *p);
+#define smalloc(x) smalloc(__FILE__, __LINE__, x)
+#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
+#define sfree(x) sfree(__FILE__, __LINE__, x)
+#else
+void *smalloc(int size);
+void *srealloc(void *p, int size);
+void sfree(void *p);
+#endif
+void free_word_list(word *w);
+void free_para_list(paragraph *p);
+word *dup_word_list(word *w);
+char *dupstr(char *s);
+
+#define mknew(type) ( (type *) smalloc (sizeof (type)) )
+#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
+#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
+#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
+
+/*
+ * ustring.c
+ */
+wchar_t *ustrdup(wchar_t *s);
+char *ustrtoa(wchar_t *s, char *outbuf, int size);
+int ustrlen(wchar_t *s);
+wchar_t *uadv(wchar_t *s);
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+wchar_t utolower(wchar_t);
+int ustrcmp(wchar_t *lhs, wchar_t *rhs);
+int ustricmp(wchar_t *lhs, wchar_t *rhs);
+int utoi(wchar_t *);
+int utob(wchar_t *);
+int uisdigit(wchar_t);
+wchar_t *ustrlow(wchar_t *s);
+wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
+
+/*
+ * help.c
+ */
+void help(void);
+void usage(void);
+void showversion(void);
+
+/*
+ * licence.c
+ */
+void licence(void);
+
+/*
+ * version.c
+ */
+const char *const version;
+
+/*
+ * misc.c
+ */
+typedef struct stackTag *stack;
+stack stk_new(void);
+void stk_free(stack);
+void stk_push(stack, void *);
+void *stk_pop(stack);
+
+typedef struct tagRdstring rdstring;
+struct tagRdstring {
+    int pos, size;
+    wchar_t *text;
+};
+typedef struct tagRdstringc rdstringc;
+struct tagRdstringc {
+    int pos, size;
+    char *text;
+};
+extern const rdstring empty_rdstring;
+extern const rdstringc empty_rdstringc;
+void rdadd(rdstring *rs, wchar_t c);
+void rdadds(rdstring *rs, wchar_t *p);
+wchar_t *rdtrim(rdstring *rs);
+void rdaddc(rdstringc *rs, char c);
+void rdaddsc(rdstringc *rs, char *p);
+char *rdtrimc(rdstringc *rs);
+
+int compare_wordlists(word *a, word *b);
+
+void mark_attr_ends(paragraph *sourceform);
+
+typedef struct tagWrappedLine wrappedline;
+struct tagWrappedLine {
+    wrappedline *next;
+    word *begin, *end;		       /* first & last words of line */
+    int nspaces;		       /* number of whitespaces in line */
+    int shortfall;		       /* how much shorter than max width */
+};
+wrappedline *wrap_para(word *, int, int, int (*)(word *));
+void wrap_free(wrappedline *);
+
+/*
+ * input.c
+ */
+paragraph *read_input(input *in, indexdata *idx);
+
+/*
+ * keywords.c
+ */
+struct keywordlist_Tag {
+    int nkeywords;
+    int size;
+    tree234 *keys;		       /* sorted by `key' field */
+    word **looseends;		       /* non-keyword list element numbers */
+    int nlooseends;
+    int looseendssize;
+};
+struct keyword_Tag {
+    wchar_t *key;		       /* the keyword itself */
+    word *text;			       /* "Chapter 2", "Appendix Q"... */
+    				       /* (NB: filepos are not set) */
+    paragraph *para;		       /* the paragraph referenced */
+};
+keyword *kw_lookup(keywordlist *, wchar_t *);
+keywordlist *get_keywords(paragraph *);
+void free_keywords(keywordlist *);
+void subst_keywords(paragraph *, keywordlist *);
+
+/*
+ * index.c
+ */
+
+/*
+ * Data structure to hold both sides of the index.
+ */
+struct indexdata_Tag {
+    tree234 *tags;		       /* holds type `indextag' */
+    tree234 *entries;		       /* holds type `indexentry' */
+};
+
+/*
+ * Data structure to hold an index tag (LHS of index).
+ */
+struct indextag_Tag {
+    wchar_t *name;
+    word *implicit_text;
+    word **explicit_texts;
+    int nexplicit, explicit_size;
+    int nrefs;
+    indexentry **refs;		       /* array of entries referenced by tag */
+};
+
+/*
+ * Data structure to hold an index entry (RHS of index).
+ */
+struct indexentry_Tag {
+    word *text;
+    void *backend_data;		       /* private to back end */
+};
+
+indexdata *make_index(void);
+void cleanup_index(indexdata *);
+/* index_merge takes responsibility for freeing arg 3 iff implicit; never
+ * takes responsibility for arg 2 */
+void index_merge(indexdata *, int is_explicit, wchar_t *, word *);
+void build_index(indexdata *);
+void index_debug(indexdata *);
+indextag *index_findtag(indexdata *idx, wchar_t *name);
+
+/*
+ * contents.c
+ */
+numberstate *number_init(void);
+void number_cfg(numberstate *, paragraph *);
+word *number_mktext(numberstate *, paragraph *, wchar_t *, int , int *);
+void number_free(numberstate *);
+
+/*
+ * biblio.c
+ */
+void gen_citations(paragraph *, keywordlist *);
+
+/*
+ * style.c
+ */
+struct userstyle_Tag {
+};
+
+/*
+ * bk_text.c
+ */
+void text_backend(paragraph *, keywordlist *, indexdata *);
+
+/*
+ * bk_xhtml.c
+ */
+void xhtml_backend(paragraph *, keywordlist *, indexdata *);
+
+/*
+ * bk_whlp.c
+ */
+void whlp_backend(paragraph *, keywordlist *, indexdata *);
+
+#endif
diff --git a/help.c b/help.c
new file mode 100644
index 0000000..3fd957f
--- /dev/null
+++ b/help.c
@@ -0,0 +1,32 @@
+/*
+ * help.c: usage instructions
+ */
+
+#include <stdio.h>
+#include "halibut.h"
+
+static char *helptext[] = {
+    "FIXME: help text goes here",
+    NULL
+};
+
+static char *usagetext[] = {
+    "FIXME: usage text goes here",
+    NULL
+};
+
+void help(void) {
+    char **p;
+    for (p = helptext; *p; p++)
+	puts(*p);
+}
+
+void usage(void) {
+    char **p;
+    for (p = usagetext; *p; p++)
+	puts(*p);
+}
+
+void showversion(void) {
+    printf("Halibut, %s\n", version);
+}
diff --git a/index.c b/index.c
new file mode 100644
index 0000000..3b1df51
--- /dev/null
+++ b/index.c
@@ -0,0 +1,230 @@
+/*
+ * index.c: create and collate index data structures
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
+static int compare_tags(void *av, void *bv);
+static int compare_entries(void *av, void *bv);
+
+indexdata *make_index(void) {
+    indexdata *ret = mknew(indexdata);
+    ret->tags = newtree234(compare_tags);
+    ret->entries = newtree234(compare_entries);
+    return ret;
+}
+
+static indextag *make_indextag(void) {
+    indextag *ret = mknew(indextag);
+    ret->name = NULL;
+    ret->implicit_text = NULL;
+    ret->explicit_texts = NULL;
+    ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
+    ret->refs = NULL;
+    return ret;
+}
+
+static int compare_tags(void *av, void *bv) {
+    indextag *a = (indextag *)av, *b = (indextag *)bv;
+    return ustricmp(a->name, b->name);
+}
+
+static int compare_to_find_tag(void *av, void *bv) {
+    wchar_t *a = (wchar_t *)av;
+    indextag *b = (indextag *)bv;
+    return ustricmp(a, b->name);
+}
+
+static int compare_entries(void *av, void *bv) {
+    indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
+    return compare_wordlists(a->text, b->text);    
+}
+
+/*
+ * Back-end utility: find the indextag with a given name.
+ */
+indextag *index_findtag(indexdata *idx, wchar_t *name) {
+    return find234(idx->tags, name, compare_to_find_tag);
+}
+
+/*
+ * Add a \IM. `tags' points to a zero-terminated chain of
+ * zero-terminated strings ("first\0second\0thirdandlast\0\0").
+ * `text' points to a word list.
+ *
+ * Guarantee on calling sequence: all implicit merges are given
+ * before the explicit ones.
+ */
+void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
+    indextag *t, *existing;
+
+    /*
+     * FIXME: want to warn on overlapping source sets.
+     */
+    for (; *tags; tags = uadv(tags)) {
+	t = make_indextag();
+	t->name = tags;
+	existing = add234(idx->tags, t);
+	if (existing == t) {
+	    /*
+	     * Duplicate this so we can free it independently.
+	     */
+	    t->name = ustrdup(tags);
+
+	    /*
+	     * Every tag has an implicit \IM. So if this tag
+	     * doesn't exist and we're explicit, then we should
+	     * warn (and drop it, since it won't be referenced).
+	     */
+	    if (is_explicit) {
+		error(err_nosuchidxtag, tags);
+		continue;
+	    }
+
+	    /*
+	     * Otherwise, this is a new tag with an implicit \IM.
+	     */
+	    t->implicit_text = text;
+	} else {
+	    sfree(t);
+	    t = existing;
+	    if (!is_explicit) {
+ 		/*
+		 * An implicit \IM for a tag that's had an implicit
+		 * \IM before. FIXME: we should check the text
+		 * against the existing text and warn on
+		 * differences. And check the tag for case match
+		 * against the existing tag, likewise.
+		 */
+	    } else {
+		/*
+		 * An explicit \IM added to a valid tag. In
+		 * particular, this removes the implicit \IM if
+		 * present.
+		 */
+		if (t->implicit_text) {
+		    free_word_list(t->implicit_text);
+		    t->implicit_text = NULL;
+		}
+		if (t->nexplicit >= t->explicit_size) {
+		    t->explicit_size = t->nexplicit + 8;
+		    t->explicit_texts = resize(t->explicit_texts,
+					       t->explicit_size);
+		}
+		t->explicit_texts[t->nexplicit++] = text;
+	    }
+	}
+    }
+}
+
+/*
+ * Build the final-form index. We now have every tag, with every
+ * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
+ * the RHSes of the \IMs, and sort by final form, and decorate the
+ * entries in the original 2-3 tree with pointers to the RHS
+ * entries.
+ */
+void build_index(indexdata *i) {
+    indextag *t;
+    word **ta;
+    int ti;
+    int j;
+
+    for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+	if (t->implicit_text) {
+	    t->nrefs = 1;
+	    ta = &t->implicit_text;
+	} else {
+	    t->nrefs = t->nexplicit;
+	    ta = t->explicit_texts;
+	}
+	if (t->nrefs) {
+	    t->refs = mknewa(indexentry *, t->nrefs);
+	    for (j = 0; j < t->nrefs; j++) {
+		indexentry *ent = mknew(indexentry);
+		ent->text = *ta++;
+		t->refs[j] = add234(i->entries, ent);
+		if (t->refs[j] != ent)     /* duplicate */
+		    sfree(ent);
+	    }
+	}
+    }
+}
+
+void cleanup_index(indexdata *i) {
+    indextag *t;
+    indexentry *ent;
+    int ti;
+
+    for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+	sfree(t->name);
+	free_word_list(t->implicit_text);
+	sfree(t->explicit_texts);
+	sfree(t->refs);
+	sfree(t);
+    }
+    freetree234(i->tags);
+    for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
+	sfree(ent);
+    }
+    freetree234(i->entries);
+    sfree(i);
+}
+
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text);
+
+void index_debug(indexdata *i) {
+    indextag *t;
+    indexentry *y;
+    int ti;
+    int j;
+
+    printf("\nINDEX TAGS\n==========\n\n");
+    for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+        printf("\n");
+	if (t->implicit_text)
+	    dbg_prtmerge(0, t->name, t->implicit_text);
+	for (j = 0; j < t->nexplicit; j++)
+	    dbg_prtmerge(1, t->name, t->explicit_texts[j]);
+    }
+
+    printf("\nINDEX ENTRIES\n=============\n\n");
+    for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
+        printf("\n");
+	printf("{\n");
+	dbg_prtwordlist(1, y->text);
+	printf("}\n");
+    }
+}
+
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
+    printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
+    for (; *tag; tag++)
+	putchar(*tag);
+    printf("\" {\n");
+    dbg_prtwordlist(1, text);
+    printf("}\n");
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+    for (; w; w = w->next) {
+	wchar_t *wp;
+	printf("%*sword %d ", level*4, "", w->type);
+	if (w->text) {
+	    printf("\"");
+	    for (wp = w->text; *wp; wp++)
+		    putchar(*wp);
+	    printf("\"");
+	} else
+	    printf("(no text)");
+	if (w->alt) {
+	    printf(" alt = {\n");
+	    dbg_prtwordlist(level+1, w->alt);
+	    printf("%*s}", level*4, "");
+	}
+	printf("\n");
+    }
+}
diff --git a/input.c b/input.c
new file mode 100644
index 0000000..15e0a97
--- /dev/null
+++ b/input.c
@@ -0,0 +1,1164 @@
+/*
+ * input.c: read the source form
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <time.h>
+#include "halibut.h"
+
+#define TAB_STOP 8		       /* for column number tracking */
+
+static void setpos(input *in, char *fname) {
+    in->pos.filename = fname;
+    in->pos.line = 1;
+    in->pos.col = (in->reportcols ? 1 : -1);
+}
+
+static void unget(input *in, int c, filepos *pos) {
+    if (in->npushback >= in->pushbacksize) {
+	in->pushbacksize = in->npushback + 16;
+	in->pushback = resize(in->pushback, in->pushbacksize);
+    }
+    in->pushback[in->npushback].chr = c;
+    in->pushback[in->npushback].pos = *pos;   /* structure copy */
+    in->npushback++;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Macro subsystem
+ */
+typedef struct macro_Tag macro;
+struct macro_Tag {
+    wchar_t *name, *text;
+};
+struct macrostack_Tag {
+    macrostack *next;
+    wchar_t *text;
+    int ptr, npushback;
+    filepos pos;
+};
+static int macrocmp(void *av, void *bv) {
+    macro *a = (macro *)av, *b = (macro *)bv;
+    return ustrcmp(a->name, b->name);
+}
+static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
+		     filepos fpos) {
+    macro *m = mknew(macro);
+    m->name = name;
+    m->text = text;
+    if (add234(macros, m) != m) {
+	error(err_macroexists, &fpos, name);
+	sfree(name);
+	sfree(text);
+    }
+}
+static int macrolookup(tree234 *macros, input *in, wchar_t *name,
+		       filepos *pos) {
+    macro m, *gotit;
+    m.name = name;
+    gotit = find234(macros, &m, NULL);
+    if (gotit) {
+	macrostack *expansion = mknew(macrostack);
+	expansion->next = in->stack;
+	expansion->text = gotit->text;
+	expansion->pos = *pos;	       /* structure copy */
+	expansion->ptr = 0;
+	expansion->npushback = in->npushback;
+	in->stack = expansion;
+	return TRUE;
+    } else
+	return FALSE;
+}
+static void macrocleanup(tree234 *macros) {
+    int ti;
+    macro *m;
+    for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
+	sfree(m->name);
+	sfree(m->text);
+	sfree(m);
+    }
+    freetree234(macros);
+}
+
+/*
+ * Can return EOF
+ */
+static int get(input *in, filepos *pos) {
+    int pushbackpt = in->stack ? in->stack->npushback : 0;
+    if (in->npushback > pushbackpt) {
+	--in->npushback;
+	if (pos)
+	    *pos = in->pushback[in->npushback].pos;   /* structure copy */
+	return in->pushback[in->npushback].chr;
+    }
+    else if (in->stack) {
+	wchar_t c = in->stack->text[in->stack->ptr];
+	if (in->stack->text[++in->stack->ptr] == L'\0') {
+	    macrostack *tmp = in->stack;
+	    in->stack = tmp->next;
+	    sfree(tmp);
+	}
+	return c;
+    }
+    else if (in->currfp) {
+	int c = getc(in->currfp);
+
+	if (c == EOF) {
+	    fclose(in->currfp);
+	    in->currfp = NULL;
+	}
+	/* Track line numbers, for error reporting */
+	if (pos)
+	    *pos = in->pos;
+	if (in->reportcols) {
+	    switch (c) {
+	      case '\t':
+		in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
+		break;
+	      case '\n':
+		in->pos.col = 1;
+		in->pos.line++;
+		break;
+	      default:
+		in->pos.col++;
+		break;
+	    }
+	} else {
+	    in->pos.col = -1;
+	    if (c == '\n')
+		in->pos.line++;
+	}
+	/* FIXME: do input charmap translation. We should be returning
+	 * Unicode here. */
+	return c;
+    } else
+	return EOF;
+}
+
+/*
+ * Lexical analysis of source files.
+ */
+typedef struct token_Tag token;
+struct token_Tag {
+    int type;
+    int cmd, aux;
+    wchar_t *text;
+    filepos pos;
+};
+enum {
+    tok_eof,			       /* end of file */
+    tok_eop,			       /* end of paragraph */
+    tok_white,			       /* whitespace */
+    tok_word,			       /* a word or word fragment */
+    tok_cmd,			       /* \command */
+    tok_lbrace,			       /* { */
+    tok_rbrace			       /* } */
+};
+
+/* Halibut command keywords. */
+enum {
+    c__invalid,			       /* invalid command */
+    c__comment,			       /* comment command (\#) */
+    c__escaped,			       /* escaped character */
+    c__nbsp,			       /* nonbreaking space */
+    c_A,			       /* appendix heading */
+    c_B,			       /* bibliography entry */
+    c_BR,			       /* bibliography rewrite */
+    c_C,			       /* chapter heading */
+    c_H,			       /* heading */
+    c_I,			       /* invisible index mark */
+    c_IM,			       /* index merge/rewrite */
+    c_K,			       /* capitalised cross-reference */
+    c_S,			       /* aux field is 0, 1, 2, ... */
+    c_U,			       /* unnumbered-chapter heading */
+    c_W,			       /* Web hyperlink */
+    c_b,			       /* bulletted list */
+    c_c,			       /* code */
+    c_cfg,			       /* configuration directive */
+    c_copyright,		       /* copyright statement */
+    c_cw,			       /* weak code */
+    c_date,			       /* document processing date */
+    c_define,			       /* macro definition */
+    c_e,			       /* emphasis */
+    c_i,			       /* visible index mark */
+    c_ii,			       /* uncapitalised visible index mark */
+    c_k,			       /* uncapitalised cross-reference */
+    c_n,			       /* numbered list */
+    c_nocite,			       /* bibliography trickery */
+    c_preamble,			       /* document preamble text */
+    c_q,			       /* quote marks */
+    c_rule,			       /* horizontal rule */
+    c_title,			       /* document title */
+    c_u,			       /* aux field is char code */
+    c_versionid			       /* document RCS id */
+};
+
+/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
+#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
+#define isnl(c) ( (c)==10 )
+#define isdec(c) ( ((c)>='0'&&(c)<='9') )
+#define fromdec(c) ( (c)-'0' )
+#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
+#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
+#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
+
+/*
+ * Keyword comparison function. Like strcmp, but between a wchar_t *
+ * and a char *.
+ */
+static int kwcmp(wchar_t const *p, char const *q) {
+    int i;
+    do {
+	i = *p - *q;
+    } while (*p++ && *q++ && !i);
+    return i;
+}
+
+/*
+ * Match a keyword.
+ */
+static void match_kw(token *tok) {
+    /*
+     * FIXME. The ids are explicit in here so as to allow long-name
+     * equivalents to the various very short keywords.
+     */
+    static const struct { char const *name; int id; } keywords[] = {
+	{"#", c__comment},	       /* comment command (\#) */
+	{"-", c__escaped},	       /* nonbreaking hyphen */
+	{"A", c_A},		       /* appendix heading */
+	{"B", c_B},		       /* bibliography entry */
+	{"BR", c_BR},		       /* bibliography rewrite */
+	{"C", c_C},		       /* chapter heading */
+	{"H", c_H},		       /* heading */
+	{"I", c_I},		       /* invisible index mark */
+	{"IM", c_IM},		       /* index merge/rewrite */
+	{"K", c_K},		       /* capitalised cross-reference */
+	{"U", c_U},		       /* unnumbered-chapter heading */
+	{"W", c_W},		       /* Web hyperlink */
+	{"\\", c__escaped},	       /* escaped backslash (\\) */
+	{"_", c__nbsp},		       /* nonbreaking space (\_) */
+	{"b", c_b},		       /* bulletted list */
+	{"c", c_c},		       /* code */
+	{"cfg", c_cfg},		       /* configuration directive */
+	{"copyright", c_copyright},    /* copyright statement */
+	{"cw", c_cw},		       /* weak code */
+	{"date", c_date},	       /* document processing date */
+	{"define", c_define},	       /* macro definition */
+	{"e", c_e},		       /* emphasis */
+	{"i", c_i},		       /* visible index mark */
+	{"ii", c_ii},		       /* uncapitalised visible index mark */
+	{"k", c_k},		       /* uncapitalised cross-reference */
+	{"n", c_n},		       /* numbered list */
+	{"nocite", c_nocite},	       /* bibliography trickery */
+	{"preamble", c_preamble},      /* document preamble text */
+	{"q", c_q},		       /* quote marks */
+	{"rule", c_rule},	       /* horizontal rule */
+	{"title", c_title},	       /* document title */
+	{"versionid", c_versionid},    /* document RCS id */
+	{"{", c__escaped},	       /* escaped lbrace (\{) */
+	{"}", c__escaped},	       /* escaped rbrace (\}) */
+    };
+    int i, j, k, c;
+
+    /*
+     * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
+     * doesn't match correctly, we just fall through to the
+     * binary-search phase.
+     */
+    if (tok->text[0] == 'S') {
+	/* We expect numeric characters thereafter. */
+	wchar_t *p = tok->text+1;
+	int n;
+	if (!*p)
+	    n = 1;
+	else {
+	    n = 0;
+	    while (*p && isdec(*p)) {
+		n = 10 * n + fromdec(*p);
+		p++;
+	    }
+	}
+	if (!*p) {
+	    tok->cmd = c_S;
+	    tok->aux = n;
+	    return;
+	}
+    } else if (tok->text[0] == 'u') {
+	/* We expect hex characters thereafter. */
+	wchar_t *p = tok->text+1;
+	int n = 0;
+	while (*p && ishex(*p)) {
+	    n = 16 * n + fromhex(*p);
+	    p++;
+	}
+	if (!*p) {
+	    tok->cmd = c_u;
+	    tok->aux = n;
+	    return;
+	}
+    }
+
+    i = -1;
+    j = sizeof(keywords)/sizeof(*keywords);
+    while (j-i > 1) {
+	k = (i+j)/2;
+	c = kwcmp(tok->text, keywords[k].name);
+	if (c < 0)
+	    j = k;
+	else if (c > 0)
+	    i = k;
+	else /* c == 0 */ {
+	    tok->cmd = keywords[k].id;
+	    return;
+	}
+    }
+
+    tok->cmd = c__invalid;
+}
+
+
+/*
+ * Read a token from the input file, in the normal way (`normal' in
+ * the sense that code paragraphs work a different way).
+ */
+token get_token(input *in) {
+    int c;
+    int nls;
+    token ret;
+    rdstring rs = { 0, 0, NULL };
+    filepos cpos;
+
+    ret.text = NULL;		       /* default */
+    c = get(in, &cpos);
+    ret.pos = cpos;
+    if (iswhite(c)) {		       /* tok_white or tok_eop */
+	nls = 0;
+	do {
+	    if (isnl(c))
+		nls++;
+	} while ((c = get(in, &cpos)) != EOF && iswhite(c));
+	if (c == EOF) {
+	    ret.type = tok_eof;
+	    return ret;
+	}
+	unget(in, c, &cpos);
+	ret.type = (nls > 1 ? tok_eop : tok_white);
+	return ret;
+    } else if (c == EOF) {	       /* tok_eof */
+	ret.type = tok_eof;
+	return ret;
+    } else if (c == '\\') {	       /* tok_cmd */
+	c = get(in, &cpos);
+	if (c == '-' || c == '\\' || c == '_' ||
+	    c == '#' || c == '{' || c == '}') {
+	    /* single-char command */
+	    rdadd(&rs, c);
+	} else if (c == 'u') {
+	    int len = 0;
+	    do {
+		rdadd(&rs, c);
+		len++;
+		c = get(in, &cpos);
+	    } while (ishex(c) && len < 5);
+	    unget(in, c, &cpos);
+	} else if (iscmd(c)) {
+	    do {
+		rdadd(&rs, c);
+		c = get(in, &cpos);
+	    } while (iscmd(c));
+	    unget(in, c, &cpos);
+	}
+	/*
+	 * Now match the command against the list of available
+	 * ones.
+	 */
+	ret.type = tok_cmd;
+	ret.text = ustrdup(rs.text);
+	match_kw(&ret);
+	sfree(rs.text);
+	return ret;
+    } else if (c == '{') {	       /* tok_lbrace */
+	ret.type = tok_lbrace;
+	return ret;
+    } else if (c == '}') {	       /* tok_rbrace */
+	ret.type = tok_rbrace;
+	return ret;
+    } else {			       /* tok_word */
+	/*
+	 * Read a word: the longest possible contiguous sequence of
+	 * things other than whitespace, backslash, braces and
+	 * hyphen. A hyphen terminates the word but is returned as
+	 * part of it; everything else is pushed back for the next
+	 * token. The `aux' field contains TRUE if the word ends in
+	 * a hyphen.
+	 */
+	ret.aux = FALSE;	       /* assumed for now */
+	while (1) {
+	    if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
+		/* Put back the character that caused termination */
+		unget(in, c, &cpos);
+		break;
+	    } else {
+		rdadd(&rs, c);
+		if (c == '-') {
+		    ret.aux = TRUE;
+		    break;	       /* hyphen terminates word */
+		}
+	    }
+	    c = get(in, &cpos);
+	}
+	ret.type = tok_word;
+	ret.text = ustrdup(rs.text);
+	sfree(rs.text);
+	return ret;
+    }
+}
+
+/*
+ * Determine whether the next input character is an open brace (for
+ * telling code paragraphs from paragraphs which merely start with
+ * code).
+ */
+int isbrace(input *in) {
+    int c;
+    filepos cpos;
+
+    c = get(in, &cpos);
+    unget(in, c, &cpos);
+    return (c == '{');
+}
+
+/*
+ * Read the rest of a line that starts `\c'. Including nothing at
+ * all (tok_word with empty text).
+ */
+token get_codepar_token(input *in) {
+    int c;
+    token ret;
+    rdstring rs = { 0, 0, NULL };
+    filepos cpos;
+
+    ret.type = tok_word;
+    c = get(in, &cpos);		       /* expect (and discard) one space */
+    ret.pos = cpos;
+    if (c == ' ') {
+	c = get(in, &cpos);
+	ret.pos = cpos;
+    }
+    while (!isnl(c) && c != EOF) {
+	int c2 = c;
+	c = get(in, &cpos);
+	/* Discard \r just before \n. */
+	if (c2 != 13 || !isnl(c))
+	    rdadd(&rs, c2);
+    }
+    unget(in, c, &cpos);
+    ret.text = ustrdup(rs.text);
+    sfree(rs.text);
+    return ret;
+}
+
+/*
+ * Adds a new word to a linked list
+ */
+static word *addword(word newword, word ***hptrptr) {
+    word *mnewword;
+    if (!hptrptr)
+	return NULL;
+    mnewword = mknew(word);
+    *mnewword = newword;	       /* structure copy */
+    mnewword->next = NULL;
+    **hptrptr = mnewword;
+    *hptrptr = &mnewword->next;
+    return mnewword;
+}
+
+/*
+ * Adds a new paragraph to a linked list
+ */
+static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
+    paragraph *mnewpara = mknew(paragraph);
+    *mnewpara = newpara;	       /* structure copy */
+    mnewpara->next = NULL;
+    **hptrptr = mnewpara;
+    *hptrptr = &mnewpara->next;
+    return mnewpara;
+}
+
+/*
+ * Destructor before token is reassigned; should catch most memory
+ * leaks
+ */
+#define dtor(t) ( sfree(t.text) )
+
+/*
+ * Reads a single file (ie until get() returns EOF)
+ */
+static void read_file(paragraph ***ret, input *in, indexdata *idx) {
+    token t;
+    paragraph par;
+    word wd, **whptr, **idximplicit;
+    tree234 *macros;
+    wchar_t utext[2], *wdtext;
+    int style, spcstyle;
+    int already;
+    int iswhite, seenwhite;
+    int type;
+    struct stack_item {
+	enum {
+	    stack_nop = 0,	       /* do nothing (for error recovery) */
+	    stack_ualt = 1,	       /* \u alternative */
+	    stack_style = 2,	       /* \e, \c, \cw */
+	    stack_idx = 4,	       /* \I, \i, \ii */
+	    stack_hyper = 8,	       /* \W */
+	    stack_quote = 16,	       /* \q */
+	} type;
+	word **whptr;		       /* to restore from \u alternatives */
+	word **idximplicit;	       /* to restore from \u alternatives */
+    } *sitem;
+    stack parsestk;
+    word *indexword, *uword, *iword;
+    word *idxwordlist;
+    rdstring indexstr;
+    int index_downcase, index_visible, indexing;
+    const rdstring nullrs = { 0, 0, NULL };
+    wchar_t uchr;
+
+    t.text = NULL;
+    macros = newtree234(macrocmp);
+
+    /*
+     * Loop on each paragraph.
+     */
+    while (1) {
+	par.words = NULL;
+	par.keyword = NULL;
+	whptr = &par.words;
+
+	/*
+	 * Get a token.
+	 */
+	dtor(t), t = get_token(in);
+	if (t.type == tok_eof)
+	    return;
+
+	/*
+	 * Parse code paragraphs separately.
+	 */
+	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
+	    par.type = para_Code;
+	    par.fpos = t.pos;
+	    while (1) {
+		dtor(t), t = get_codepar_token(in);
+		wd.type = word_WeakCode;
+		wd.breaks = FALSE;     /* shouldn't need this... */
+		wd.text = ustrdup(t.text);
+		wd.alt = NULL;
+		wd.fpos = t.pos;
+		addword(wd, &whptr);
+		dtor(t), t = get_token(in);
+		if (t.type == tok_white) {
+		    /*
+		     * The newline after a code-paragraph line
+		     */
+		    dtor(t), t = get_token(in);
+		}
+		if (t.type == tok_eop || t.type == tok_eof)
+		    break;
+		else if (t.type != tok_cmd || t.cmd != c_c) {
+		    error(err_brokencodepara, &t.pos);
+		    addpara(par, ret);
+		    while (t.type != tok_eop)   /* error recovery: */
+			dtor(t), t = get_token(in);   /* eat rest of paragraph */
+		    goto codeparabroken;   /* ick, but such is life */
+		}
+	    }
+	    addpara(par, ret);
+	    codeparabroken:
+	    continue;
+	}
+
+	/*
+	 * This token begins a paragraph. See if it's one of the
+	 * special commands that define a paragraph type.
+	 *
+	 * (note that \# is special in a way, and \nocite takes no
+	 * text)
+	 */
+	par.type = para_Normal;
+	if (t.type == tok_cmd) {
+	    int needkw;
+	    int is_macro = FALSE;
+
+	    par.fpos = t.pos;
+	    switch (t.cmd) {
+	      default:
+		needkw = -1;
+		break;
+	      case c__invalid:
+		error(err_badparatype, t.text, &t.pos);
+		needkw = 4;
+		break;
+	      case c__comment:
+		if (isbrace(in))
+		    break;	       /* `\#{': isn't a comment para */
+		do {
+		    dtor(t), t = get_token(in);
+		} while (t.type != tok_eop && t.type != tok_eof);
+		continue;	       /* next paragraph */
+		/*
+		 * `needkw' values:
+		 *
+		 *   1 -- exactly one keyword
+		 *   2 -- at least one keyword
+		 *   4 -- any number of keywords including zero
+		 *   8 -- at least one keyword and then nothing else
+		 *  16 -- nothing at all! no keywords, no body
+		 *  32 -- no keywords at all
+		 */
+	      case c_A: needkw = 2; par.type = para_Appendix; break;
+	      case c_B: needkw = 2; par.type = para_Biblio; break;
+	      case c_BR: needkw = 1; par.type = para_BR; break;
+	      case c_C: needkw = 2; par.type = para_Chapter; break;
+	      case c_H: needkw = 2; par.type = para_Heading;
+		par.aux = 0;
+		break;
+	      case c_IM: needkw = 2; par.type = para_IM; break;
+	      case c_S: needkw = 2; par.type = para_Subsect;
+		par.aux = t.aux; break;
+	      case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
+		/* For \b and \n the keyword is optional */
+	      case c_b: needkw = 4; par.type = para_Bullet; break;
+	      case c_n: needkw = 4; par.type = para_NumberedList; break;
+	      case c_cfg: needkw = 8; par.type = para_Config; break;
+	      case c_copyright: needkw = 32; par.type = para_Copyright; break;
+	      case c_define: is_macro = TRUE; needkw = 1; break;
+		/* For \nocite the keyword is _everything_ */
+	      case c_nocite: needkw = 8; par.type = para_NoCite; break;
+	      case c_preamble: needkw = 32; par.type = para_Preamble; break;
+	      case c_rule: needkw = 16; par.type = para_Rule; break;
+	      case c_title: needkw = 32; par.type = para_Title; break;
+	      case c_versionid: needkw = 32; par.type = para_VersionID; break;
+	    }
+
+	    if (needkw > 0) {
+		rdstring rs = { 0, 0, NULL };
+		int nkeys = 0;
+		filepos fp;
+
+		/* Get keywords. */
+		dtor(t), t = get_token(in);
+		fp = t.pos;
+		while (t.type == tok_lbrace) {
+		    /* This is a keyword. */
+		    nkeys++;
+		    /* FIXME: there will be bugs if anyone specifies an
+		     * empty keyword (\foo{}), so trap this case. */
+		    while (dtor(t), t = get_token(in),
+			   t.type == tok_word || 
+			   t.type == tok_white ||
+			   (t.type == tok_cmd && t.cmd == c__nbsp) ||
+			   (t.type == tok_cmd && t.cmd == c__escaped)) {
+			if (t.type == tok_white ||
+			    (t.type == tok_cmd && t.cmd == c__nbsp))
+			    rdadd(&rs, ' ');
+			else
+			    rdadds(&rs, t.text);
+		    }
+		    if (t.type != tok_rbrace) {
+			error(err_kwunclosed, &t.pos);
+			continue;
+		    }
+		    rdadd(&rs, 0);     /* add string terminator */
+		    dtor(t), t = get_token(in); /* eat right brace */
+		}
+
+		rdadd(&rs, 0);     /* add string terminator */
+
+		/* See whether we have the right number of keywords. */
+		if ((needkw & 48) && nkeys > 0)
+		    error(err_kwillegal, &fp);
+		if ((needkw & 11) && nkeys == 0)
+		    error(err_kwexpected, &fp);
+		if ((needkw & 5) && nkeys > 1)
+		    error(err_kwtoomany, &fp);
+
+		if (is_macro) {
+		    /*
+		     * Macro definition. Get the rest of the line
+		     * as a code-paragraph token, repeatedly until
+		     * there's nothing more left of it. Separate
+		     * with newlines.
+		     */
+		    rdstring macrotext = { 0, 0, NULL };
+		    while (1) {
+			dtor(t), t = get_codepar_token(in);
+			if (macrotext.pos > 0)
+			    rdadd(&macrotext, L'\n');
+			rdadds(&macrotext, t.text);
+			dtor(t), t = get_token(in);
+			if (t.type == tok_eop) break;
+		    }
+		    macrodef(macros, rs.text, macrotext.text, fp);
+		    continue;	       /* next paragraph */
+		}
+
+		par.keyword = rdtrim(&rs);
+
+		/* Move to EOP in case of needkw==8 or 16 (no body) */
+		if (needkw & 24) {
+		    if (t.type != tok_eop && t.type != tok_eof) {
+			error(err_bodyillegal, &t.pos);
+			/* Error recovery: eat the rest of the paragraph */
+			while (t.type != tok_eop && t.type != tok_eof)
+			    dtor(t), t = get_token(in);
+		    }
+		    addpara(par, ret);
+		    continue;	       /* next paragraph */
+		}
+	    }
+	}		  
+
+	/*
+	 * Now read the actual paragraph, word by word, adding to
+	 * the paragraph list.
+	 *
+	 * Mid-paragraph commands:
+	 *
+	 *  \K \k
+	 *  \c \cw
+	 *  \e
+	 *  \i \ii
+	 *  \I
+	 *  \u
+	 *  \W
+	 *  \date
+	 *  \\ \{ \}
+	 */
+	parsestk = stk_new();
+	style = word_Normal;
+	spcstyle = word_WhiteSpace;
+	indexing = FALSE;
+	seenwhite = TRUE;
+	while (t.type != tok_eop && t.type != tok_eof) {
+	    iswhite = FALSE;
+	    already = FALSE;
+	    if (t.type == tok_cmd && t.cmd == c__escaped) {
+		t.type = tok_word;     /* nice and simple */
+		t.aux = 0;	       /* even if `\-' - nonbreaking! */
+	    }
+	    if (t.type == tok_cmd && t.cmd == c__nbsp) {
+		t.type = tok_word;     /* nice and simple */
+		sfree(t.text);
+		t.text = ustrdup(L" ");  /* text is ` ' not `_' */
+		t.aux = 0;	       /* (nonbreaking) */
+	    }
+	    switch (t.type) {
+	      case tok_white:
+		if (whptr == &par.words)
+		    break;	       /* strip whitespace at start of para */
+		wd.text = NULL;
+		wd.type = spcstyle;
+		wd.alt = NULL;
+		wd.aux = 0;
+		wd.fpos = t.pos;
+		wd.breaks = FALSE;
+		if (indexing)
+		    rdadd(&indexstr, ' ');
+		if (!indexing || index_visible)
+		    addword(wd, &whptr);
+		if (indexing)
+		    addword(wd, &idximplicit);
+		iswhite = TRUE;
+		break;
+	      case tok_word:
+		if (indexing)
+		    rdadds(&indexstr, t.text);
+		wd.type = style;
+		wd.alt = NULL;
+		wd.aux = 0;
+		wd.fpos = t.pos;
+		wd.breaks = t.aux;
+		if (!indexing || index_visible) {
+		    wd.text = ustrdup(t.text);
+		    addword(wd, &whptr);
+		}
+		if (indexing) {
+		    wd.text = ustrdup(t.text);
+		    addword(wd, &idximplicit);
+		}
+		break;
+	      case tok_lbrace:
+		error(err_unexbrace, &t.pos);
+		/* Error recovery: push nop */
+		sitem = mknew(struct stack_item);
+		sitem->type = stack_nop;
+		stk_push(parsestk, sitem);
+		break;
+	      case tok_rbrace:
+		sitem = stk_pop(parsestk);
+		if (!sitem)
+		    error(err_unexbrace, &t.pos);
+		else {
+		    if (sitem->type & stack_ualt) {
+			whptr = sitem->whptr;
+			idximplicit = sitem->idximplicit;
+		    }
+		    if (sitem->type & stack_style) {
+			style = word_Normal;
+			spcstyle = word_WhiteSpace;
+		    }
+		    if (sitem->type & stack_idx) {
+			indexword->text = ustrdup(indexstr.text);
+			if (index_downcase)
+			    ustrlow(indexword->text);
+			indexing = FALSE;
+			rdadd(&indexstr, L'\0');
+			index_merge(idx, FALSE, indexstr.text, idxwordlist);
+			sfree(indexstr.text);
+		    }
+		    if (sitem->type & stack_hyper) {
+			wd.text = NULL;
+			wd.type = word_HyperEnd;
+			wd.alt = NULL;
+			wd.aux = 0;
+			wd.fpos = t.pos;
+			wd.breaks = FALSE;
+			if (!indexing || index_visible)
+			    addword(wd, &whptr);
+			if (indexing)
+			    addword(wd, &idximplicit);
+		    }
+		    if (sitem->type & stack_quote) {
+			wd.text = NULL;
+			wd.type = toquotestyle(style);
+			wd.alt = NULL;
+			wd.aux = quote_Close;
+			wd.fpos = t.pos;
+			wd.breaks = FALSE;
+			if (!indexing || index_visible)
+			    addword(wd, &whptr);
+			if (indexing) {
+			    rdadd(&indexstr, L'"');
+			    addword(wd, &idximplicit);
+			}
+		    }
+		}
+		sfree(sitem);
+		break;
+	      case tok_cmd:
+		switch (t.cmd) {
+		  case c__comment:
+		    /*
+		     * In-paragraph comment: \#{ balanced braces }
+		     *
+		     * Anything goes here; even tok_eop. We should
+		     * eat whitespace after the close brace _if_
+		     * there was whitespace before the \#.
+		     */
+		    dtor(t), t = get_token(in);
+		    if (t.type != tok_lbrace) {
+			error(err_explbr, &t.pos);
+		    } else {
+			int braces = 1;
+			while (braces > 0) {
+			    dtor(t), t = get_token(in);
+			    if (t.type == tok_lbrace)
+				braces++;
+			    else if (t.type == tok_rbrace)
+				braces--;
+			    else if (t.type == tok_eof) {
+				error(err_commenteof, &t.pos);
+				break;
+			    }
+			}
+		    }
+		    if (seenwhite) {
+			already = TRUE;
+			dtor(t), t = get_token(in);
+			if (t.type == tok_white) {
+			    iswhite = TRUE;
+			    already = FALSE;
+			}
+		    }
+		    break;
+		  case c_q:
+		    dtor(t), t = get_token(in);
+		    if (t.type != tok_lbrace) {
+			error(err_explbr, &t.pos);
+		    } else {
+			wd.text = NULL;
+			wd.type = toquotestyle(style);
+			wd.alt = NULL;
+			wd.aux = quote_Open;
+			wd.fpos = t.pos;
+			wd.breaks = FALSE;
+			if (!indexing || index_visible)
+			    addword(wd, &whptr);
+			if (indexing) {
+			    rdadd(&indexstr, L'"');
+			    addword(wd, &idximplicit);
+			}
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_quote;
+			stk_push(parsestk, sitem);
+		    }
+		    break;
+		  case c_K:
+		  case c_k:
+		  case c_W:
+		  case c_date:
+		    /*
+		     * Keyword, hyperlink, or \date. We expect a
+		     * left brace, some text, and then a right
+		     * brace. No nesting; no arguments.
+		     */
+		    wd.fpos = t.pos;
+		    wd.breaks = FALSE;
+		    if (t.cmd == c_K)
+			wd.type = word_UpperXref;
+		    else if (t.cmd == c_k)
+			wd.type = word_LowerXref;
+		    else if (t.cmd == c_W)
+			wd.type = word_HyperLink;
+		    else
+			wd.type = word_Normal;
+		    dtor(t), t = get_token(in);
+		    if (t.type != tok_lbrace) {
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    already = TRUE;
+			    wdtext = ustrftime(NULL, broken);
+			    wd.type = style;
+			} else {
+			    error(err_explbr, &t.pos);
+			    wdtext = NULL;
+			}
+		    } else {
+			rdstring rs = { 0, 0, NULL };
+			while (dtor(t), t = get_token(in),
+			       t.type == tok_word || t.type == tok_white) {
+			    if (t.type == tok_white)
+				rdadd(&rs, ' ');
+			    else
+				rdadds(&rs, t.text);
+			}
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    wdtext = ustrftime(rs.text, broken);
+			    wd.type = style;
+			} else {
+			    wdtext = ustrdup(rs.text);
+			}
+			sfree(rs.text);
+			if (t.type != tok_rbrace) {
+			    error(err_kwexprbr, &t.pos);
+			}
+		    }
+		    wd.alt = NULL;
+		    wd.aux = 0;
+		    if (!indexing || index_visible) {
+			wd.text = ustrdup(wdtext);
+			addword(wd, &whptr);
+		    }
+		    if (indexing) {
+			wd.text = ustrdup(wdtext);
+			addword(wd, &idximplicit);
+		    }
+		    sfree(wdtext);
+		    if (wd.type == word_HyperLink) {
+			/*
+			 * Hyperlinks are different: they then
+			 * expect another left brace, to begin
+			 * delimiting the text marked by the link.
+			 */
+			dtor(t), t = get_token(in);
+			/*
+			 * Special cases: \W{}\c, \W{}\e, \W{}\cw
+			 */
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_hyper;
+			if (t.type == tok_cmd &&
+			    (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			    if (style != word_Normal)
+				error(err_nestedstyles, &t.pos);
+			    else {
+				style = (t.cmd == c_c ? word_Code :
+					 t.cmd == c_cw ? word_WeakCode :
+					 word_Emph);
+				spcstyle = tospacestyle(style);
+				sitem->type |= stack_style;
+			    }
+			    dtor(t), t = get_token(in);
+			}
+			if (t.type != tok_lbrace) {
+			    error(err_explbr, &t.pos);
+			    sfree(sitem);
+			} else {
+			    stk_push(parsestk, sitem);
+			}
+		    }
+		    break;
+		  case c_c:
+		  case c_cw:
+		  case c_e:
+		    type = t.cmd;
+		    if (style != word_Normal) {
+			error(err_nestedstyles, &t.pos);
+			/* Error recovery: eat lbrace, push nop. */
+			dtor(t), t = get_token(in);
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_nop;
+			stk_push(parsestk, sitem);
+		    }
+		    dtor(t), t = get_token(in);
+		    if (t.type != tok_lbrace) {
+			error(err_explbr, &t.pos);
+		    } else {
+			style = (type == c_c ? word_Code :
+				 type == c_cw ? word_WeakCode :
+				 word_Emph);
+			spcstyle = tospacestyle(style);
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_style;
+			stk_push(parsestk, sitem);
+		    }
+		    break;
+		  case c_i:
+		  case c_ii:
+		  case c_I:
+		    type = t.cmd;
+		    if (indexing) {
+			error(err_nestedindex, &t.pos);
+			/* Error recovery: eat lbrace, push nop. */
+			dtor(t), t = get_token(in);
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_nop;
+			stk_push(parsestk, sitem);
+		    }
+		    sitem = mknew(struct stack_item);
+		    sitem->type = stack_idx;
+		    dtor(t), t = get_token(in);
+		    /*
+		     * Special cases: \i\c, \i\e, \i\cw
+		     */
+		    wd.fpos = t.pos;
+		    if (t.type == tok_cmd &&
+			(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			if (style != word_Normal)
+			    error(err_nestedstyles, &t.pos);
+			else {
+			    style = (t.cmd == c_c ? word_Code :
+				     t.cmd == c_cw ? word_WeakCode :
+				     word_Emph);
+			    spcstyle = tospacestyle(style);
+			    sitem->type |= stack_style;
+			}
+			dtor(t), t = get_token(in);
+		    }
+		    if (t.type != tok_lbrace) {
+			sfree(sitem);
+			error(err_explbr, &t.pos);
+		    } else {
+			/* Add an index-reference word with no text as yet */
+			wd.type = word_IndexRef;
+			wd.text = NULL;
+			wd.alt = NULL;
+			wd.aux = 0;
+			wd.breaks = FALSE;
+			indexword = addword(wd, &whptr);
+			/* Set up a rdstring to read the index text */
+			indexstr = nullrs;
+			/* Flags so that we do the Right Things with text */
+			index_visible = (type != c_I);
+			index_downcase = (type == c_ii);
+			indexing = TRUE;
+			idxwordlist = NULL;
+			idximplicit = &idxwordlist;
+			/* Stack item to close the indexing on exit */
+			stk_push(parsestk, sitem);
+		    }
+		    break;
+		  case c_u:
+		    uchr = t.aux;
+		    utext[0] = uchr; utext[1] = 0;
+		    wd.type = style;
+		    wd.breaks = FALSE;
+		    wd.alt = NULL;
+		    wd.aux = 0;
+		    wd.fpos = t.pos;
+		    if (!indexing || index_visible) {
+			wd.text = ustrdup(utext);
+			uword = addword(wd, &whptr);
+		    } else
+			uword = NULL;
+		    if (indexing) {
+			wd.text = ustrdup(utext);
+			iword = addword(wd, &idximplicit);
+		    } else
+			iword = NULL;
+		    dtor(t), t = get_token(in);
+		    if (t.type == tok_lbrace) {
+			/*
+			 * \u with a left brace. Until the brace
+			 * closes, all further words go on a
+			 * sidetrack from the main thread of the
+			 * paragraph.
+			 */
+			sitem = mknew(struct stack_item);
+			sitem->type = stack_ualt;
+			sitem->whptr = whptr;
+			sitem->idximplicit = idximplicit;
+			stk_push(parsestk, sitem);
+			whptr = uword ? &uword->alt : NULL;
+			idximplicit = iword ? &iword->alt : NULL;
+		    } else {
+			if (indexing)
+			    rdadd(&indexstr, uchr);
+			already = TRUE;
+		    }
+		    break;
+		  default:
+		    if (!macrolookup(macros, in, t.text, &t.pos))
+			error(err_badmidcmd, t.text, &t.pos);
+		    break;
+		}
+	    }
+	    if (!already)
+		dtor(t), t = get_token(in);
+	    seenwhite = iswhite;
+	}
+	/* Check the stack is empty */
+	if (NULL != (sitem = stk_pop(parsestk))) {
+	    do {
+		sfree(sitem);
+		sitem = stk_pop(parsestk);
+	    } while (sitem);
+	    error(err_missingrbrace, &t.pos);
+	}
+	stk_free(parsestk);
+	addpara(par, ret);
+    }
+    dtor(t);
+    macrocleanup(macros);
+}
+
+paragraph *read_input(input *in, indexdata *idx) {
+    paragraph *head = NULL;
+    paragraph **hptr = &head;
+
+    while (in->currindex < in->nfiles) {
+	in->currfp = fopen(in->filenames[in->currindex], "r");
+	if (in->currfp) {
+	    setpos(in, in->filenames[in->currindex]);
+	    read_file(&hptr, in, idx);
+	}
+	in->currindex++;
+    }
+
+    return head;
+}
diff --git a/inputs/errors.but b/inputs/errors.but
new file mode 100644
index 0000000..1befc74
--- /dev/null
+++ b/inputs/errors.but
@@ -0,0 +1,51 @@
+Bogus keyword: \k{nonexist}
+
+\BR{nonexist2} [Foogle]
+
+\B{book} Foo McBar, "A Lot Of Rubbish", 1992.
+
+\B{book} The same book again. Isn't that odd?
+
+\IM{nonexist3} Logical impossibilities
+
+\define{macro} macro definition
+
+\define{macro} same macro again
+
+\c Foo
+\c Bar
+Bombadillo. Now get out of _that_.
+
+\unngh This is a bit of a bizarre paragraph, now isn't it?
+
+\C{unfinished-symphony
+
+\U{thisshouldn'tbehere} Unnumbered chapter. Or is it?
+
+\C Numbered chapter. Or _is_ it?
+
+\BR{two}{too}{many} [Which One?]
+
+\nocite{ooh} With some illegal text.
+
+Paragraph with {bizarre braces}.
+
+Another one } .
+
+Bogus in-para comment: \# foobar.
+
+Bogus xrefs: \k and \k{foo{}}.
+
+Nest those styles! \c{foo\e{bar}}. And
+\c{foo\W{file:/dev/null}\e{bar}}. And \e{foo\i\c{bar}}.
+And one without brace: \e.
+
+Bogus hyperlinks: \W and \W{file:/dev/null}bar.
+
+Nested index: \i{foo\i{bar}}. Broken styled index: \i\c.
+
+Complete twaddle: \twaddle.
+
+Unclosed brace: \c{foo.
+
+Comment to EOF: \#{ and here we go.
diff --git a/inputs/errors2.but b/inputs/errors2.but
new file mode 100644
index 0000000..b1a3303
--- /dev/null
+++ b/inputs/errors2.but
@@ -0,0 +1,5 @@
+\H{outofplace} Heading out of place.
+
+\C{chap} Chapter is fine
+
+\S{subsect} Subsection should have heading before it.
diff --git a/inputs/test.but b/inputs/test.but
new file mode 100644
index 0000000..5849e52
--- /dev/null
+++ b/inputs/test.but
@@ -0,0 +1,130 @@
+\title Halibut: A Test Document With A Stupidly Long Title Just To
+See If Wrapping Titles Works OK. In Fact This Title Will Span Three
+Lines, Not Just Two. How's That For Ludicrous?
+
+\cfg{xhtml-leaf-smallest-contents}{2}
+
+\cfg{xhtml-leaf-contains-contents}{true}
+
+\preamble This manual is a small joke effort, designed to use every
+feature \#{ comment } that Halibut's input format supports. Creation
+date \date{%Y.%m.%d} (default format is \date).
+
+\copyright Copyright 1999 Simon \#{second comment}Tatham. All rights
+reserved.
+
+\define{metacoopt} [this is a nested,
+multi-line macro, talking about \coopt
+a bit]
+
+\define{coopt} co\u00F6{-o}pt
+
+\versionid $Id: test.but,v 1.18 2002/08/05 10:31:33 simon Exp $
+
+\C{ch\\ap} First chapter title; for similar wrapping reasons this
+chapter title will be ludicrously long. I wonder how much more
+chapter title I can write before feeling silly.
+
+This is a para\#{another{} comment}graph of text. It
+has line\#{yet another one} breaks in   between words, multiple
+  spaces (ignored), and \e{emphasised text} as well as \c{code
+fragments}.
+
+\cw{This} is weak code. And \k{head} contains some other stuff.
+\K{subhead} does too.
+
+\H{head} First section title (very long again, no prizes for
+guessing the reason why this time, and here's yet more text to pad
+it out to three lines of output)
+
+\cfg{winhelp-topic}{M359HPEHGW}
+
+Here's a code paragraph:
+
+\c No leading spaces
+\c  One leading space
+\c Two blank lines follow this one.
+\c
+\c
+\c Two blank lines precede this one.
+\c   Two leading spaces
+\c We can use \ { and } with impunity here.
+
+This is a list:
+
+\b Ooh.
+
+\b Aah.
+
+\b Eek.
+
+This is a horizontal rule:
+
+\rule
+
+This is a numbered list:
+
+\n Ooh.
+
+\n{keyword} Aah.
+
+\n Eek. \q{Aah} is point \k{keyword}.
+
+A-paragraph-full-of-hyphens-to-test-the-idea-that-word-wrapping-can-happen-somewhere-in-all-this-hyphenatory-nonsense.
+
+A\-paragraph\-full\-of\-nonbreaking\-hyphens\-to\-test\-the\-idea\-that\-word\-wrapping\-misses\-them.
+
+A\_paragraph\_full\_of\_nonbreaking\_spaces\_to\_test\_the\_idea\_that\_word\_wrapping\_misses\_them\_too.
+
+Use of macros: let's talk about \coopt. And about \coopt some more.
+And a nested macro: \metacoopt.
+
+Oh, while I'm here: some special characters. The \\, \{ and \}
+characters, to be precise. And their code equivalents, \c{\\},
+\i\c{\{}, \c{\}}.
+
+\S{subhead} First subheading
+
+So here's a \I{subheading}\I{subsection}subsection. Just
+incidentally, \q{this} is in quotes. \ii{Those} quotes had better work
+in all formats.
+
+We'll try for some Unicode here: \i{Schr\u00F6{oe}dinger}.
+
+An index tag containing non-alternatived Unicode: \i{\u00BFChe?}
+
+An invisible index tag: \I{she seems to have an invisible tag}yeah.
+
+\S2{sub-sub}{Florble} Smaller heading still
+
+A tiny section. Awww. How cute. Actually, this one's a \e{florble},
+and here's a reference to it: \k{sub-sub}.
+
+\A{app} Needless appendix
+
+\# \cfg{winhelp-topic}{Y5VQEXZQVJ} (uncomment this and it clashes)
+
+Here's an \i{appendix}, for no terribly good reason at all. See
+\k{book}.
+
+It also contains a \W{http://www.tartarus.org/}{hyperlink}.
+
+\U Bibliography
+
+\B{book} Some text describing a book.
+
+\B{nocite} Some text describing a book. This text should appear in
+the document even though there is no \cw{\\k} citing it.
+
+\BR{book} [SillyCitation]
+
+\nocite{nocite}
+
+\B{uncited} If this text appears, there's an actual error.
+
+\# This is a comment.
+
+\# Now for the index section.
+
+\IM{she seems to have an invisible tag}{appendix} Invisible tags
+and/or appendices
diff --git a/keywords.c b/keywords.c
new file mode 100644
index 0000000..6e8a230
--- /dev/null
+++ b/keywords.c
@@ -0,0 +1,154 @@
+/*
+ * keywords.c: keep track of all cross-reference keywords
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+static int kwcmp(void *av, void *bv)
+{
+    const keyword *a = (const keyword *)av;
+    const keyword *b = (const keyword *)bv;
+    return ustrcmp(a->key, b->key);
+}
+
+static int kwfind(void *av, void *bv)
+{
+    wchar_t *a = (wchar_t *)av;
+    const keyword *b = (const keyword *)bv;
+    return ustrcmp(a, b->key);
+}
+
+keyword *kw_lookup(keywordlist *kl, wchar_t *str) {
+    return find234(kl->keys, str, kwfind);
+}
+
+/*
+ * This function reads through source form and collects the
+ * keywords. They get collected in a heap, sorted by Unicode
+ * collation, last at the top (so that we can Heapsort them when we
+ * finish).
+ */
+keywordlist *get_keywords(paragraph *source) {
+    int errors = FALSE;
+    keywordlist *kl = mknew(keywordlist);
+    numberstate *n = number_init();
+    int prevpara = para_NotParaType;
+
+    number_cfg(n, source);
+
+    kl->size = 0;
+    kl->keys = newtree234(kwcmp);
+    kl->nlooseends = kl->looseendssize = 0;
+    kl->looseends = NULL;
+    for (; source; source = source->next) {
+	wchar_t *p, *q;
+	p = q = source->keyword;
+
+	/*
+	 * Look for the section type override (`example',
+	 * `question' or whatever - to replace `chapter' or
+	 * `section' on a per-section basis).
+	 */
+	if (q) {
+	    q = uadv(q);	       /* point q at the word beyond */
+	    if (!*q) q = NULL;
+	}
+	
+	/*
+	 * Number the chapter / section / list-item / whatever.
+	 * This also sets up the `parent', `child' and `sibling'
+	 * links.
+	 */
+	source->kwtext = number_mktext(n, source, q, prevpara, &errors);
+	prevpara = source->type;
+
+	if (p && *p) {
+	    if (source->kwtext || source->type == para_Biblio) {
+		keyword *kw, *ret;
+
+		kw = mknew(keyword);
+		kw->key = p;
+		kw->text = source->kwtext;
+		kw->para = source;
+		ret = add234(kl->keys, kw);
+		if (ret != kw) {
+		    error(err_multikw, &source->fpos, &ret->para->fpos, p);
+		    sfree(kw);
+		    /* FIXME: what happens to kw->text? Does it leak? */
+		}
+	    }
+	} else {
+	    if (kl->nlooseends >= kl->looseendssize) {
+		kl->looseendssize = kl->nlooseends + 32;
+		kl->looseends = resize(kl->looseends, kl->looseendssize);
+	    }
+	    kl->looseends[kl->nlooseends++] = source->kwtext;
+	}
+    }
+
+    number_free(n);
+
+    if (errors) {
+	free_keywords(kl);
+	return NULL;
+    }
+
+    return kl;
+}
+
+void free_keywords(keywordlist *kl) {
+    keyword *kw;
+    while (kl->nlooseends)
+	free_word_list(kl->looseends[--kl->nlooseends]);
+    sfree(kl->looseends);
+    while ( (kw = index234(kl->keys, 0)) != NULL) {
+        delpos234(kl->keys, 0);
+	free_word_list(kw->text);
+	sfree(kw);
+    }
+    freetree234(kl->keys);
+    sfree(kl);
+}
+
+void subst_keywords(paragraph *source, keywordlist *kl) {
+    for (; source; source = source->next) {
+	word *ptr;
+	for (ptr = source->words; ptr; ptr = ptr->next) {
+	    if (ptr->type == word_UpperXref ||
+		ptr->type == word_LowerXref) {
+		keyword *kw;
+		word **endptr, *close, *subst;
+
+		kw = kw_lookup(kl, ptr->text);
+		if (!kw) {
+		    error(err_nosuchkw, &ptr->fpos, ptr->text);
+		    subst = NULL;
+		} else
+		    subst = dup_word_list(kw->text);
+
+		if (subst && ptr->type == word_LowerXref &&
+		    kw->para->type != para_Biblio &&
+		    kw->para->type != para_BiblioCited)
+		    ustrlow(subst->text);
+
+		close = mknew(word);
+		close->text = NULL;
+		close->alt = NULL;
+		close->type = word_XrefEnd;
+		close->fpos = ptr->fpos;
+
+		close->next = ptr->next;
+		ptr->next = subst;
+
+		for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next)
+		    (*endptr)->fpos = ptr->fpos;
+
+		*endptr = close;
+		ptr = close;
+	    }
+	}
+    }
+}
diff --git a/licence.c b/licence.c
new file mode 100644
index 0000000..0c4338a
--- /dev/null
+++ b/licence.c
@@ -0,0 +1,16 @@
+/*
+ * licence.c: licence text
+ */
+
+#include <stdio.h>
+
+static char *licencetext[] = {
+    "FIXME: licence text goes here",
+    NULL
+};
+
+void licence(void) {
+    char **p;
+    for (p = licencetext; *p; p++)
+	puts(*p);
+}
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..64f1869
--- /dev/null
+++ b/main.c
@@ -0,0 +1,300 @@
+/*
+ * main.c: command line parsing and top level
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
+static void dbg_prtsource(paragraph *sourceform);
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtkws(keywordlist *kws);
+
+int main(int argc, char **argv) {
+    char **infiles;
+    char *outfile;
+    int nfiles;
+    int nogo;
+    int errs;
+    int reportcols;
+    int debug;
+
+    /*
+     * Set up initial (default) parameters.
+     */
+    infiles = mknewa(char *, argc);
+    outfile = NULL;
+    nfiles = 0;
+    nogo = errs = FALSE;
+    reportcols = 0;
+    debug = 0;
+
+    if (argc == 1) {
+	usage();
+	exit(EXIT_SUCCESS);
+    }
+
+    /*
+     * Parse command line arguments.
+     */
+    while (--argc) {
+	char *p = *++argv;
+	if (*p == '-') {
+	    /*
+	     * An option.
+	     */
+	    while (p && *++p) {
+		char c = *p;
+		switch (c) {
+		  case '-':
+		    /*
+		     * Long option.
+		     */
+		    {
+			char *opt, *val;
+			opt = p++;     /* opt will have _one_ leading - */
+			while (*p && *p != '=')
+			    p++;	       /* find end of option */
+			if (*p == '=') {
+			    *p++ = '\0';
+			    val = p;
+			} else
+			    val = NULL;
+			if (!strcmp(opt, "-help")) {
+			    help();
+			    nogo = TRUE;
+			} else if (!strcmp(opt, "-version")) {
+			    showversion();
+			    nogo = TRUE;
+			} else if (!strcmp(opt, "-licence") ||
+				   !strcmp(opt, "-license")) {
+			    licence();
+			    nogo = TRUE;
+			} else if (!strcmp(opt, "-output")) {
+			    if (!val)
+				errs = TRUE, error(err_optnoarg, opt);
+			    else
+				outfile = val;
+			} else if (!strcmp(opt, "-precise")) {
+			    reportcols = 1;
+			} else {
+			    errs = TRUE, error(err_nosuchopt, opt);
+			}
+		    }
+		    p = NULL;
+		    break;
+		  case 'h':
+		  case 'V':
+		  case 'L':
+		  case 'P':
+		  case 'd':
+		    /*
+		     * Option requiring no parameter.
+		     */
+		    switch (c) {
+		      case 'h':
+			help();
+			nogo = TRUE;
+			break;
+		      case 'V':
+			showversion();
+			nogo = TRUE;
+			break;
+		      case 'L':
+			licence();
+			nogo = TRUE;
+			break;
+		      case 'P':
+			reportcols = 1;
+			break;
+		      case 'd':
+			debug = TRUE;
+			break;
+		    }
+		    break;
+		  case 'o':
+		    /*
+		     * Option requiring parameter.
+		     */
+		    p++;
+		    if (!*p && argc > 1)
+			--argc, p = *++argv;
+		    else if (!*p) {
+			char opt[2];
+			opt[0] = c;
+			opt[1] = '\0';
+			errs = TRUE, error(err_optnoarg, opt);
+		    }
+		    /*
+		     * Now c is the option and p is the parameter.
+		     */
+		    switch (c) {
+		      case 'o':
+			outfile = p;
+			break;
+		    }
+		    p = NULL;	       /* prevent continued processing */
+		    break;
+		  default:
+		    /*
+		     * Unrecognised option.
+		     */
+		    {
+			char opt[2];
+			opt[0] = c;
+			opt[1] = '\0';
+			errs = TRUE, error(err_nosuchopt, opt);
+		    }
+		}
+	    }
+	} else {
+	    /*
+	     * A non-option argument.
+	     */
+	    infiles[nfiles++] = p;
+	}
+    }
+
+    if (errs)
+	exit(EXIT_FAILURE);
+    if (nogo)
+	exit(EXIT_SUCCESS);
+
+    /*
+     * Do the work.
+     */
+    if (nfiles == 0) {
+	error(err_noinput);
+	usage();
+	exit(EXIT_FAILURE);
+    }
+
+    {
+	input in;
+	paragraph *sourceform, *p;
+	indexdata *idx;
+	keywordlist *keywords;
+
+	in.filenames = infiles;
+	in.nfiles = nfiles;
+	in.currfp = NULL;
+	in.currindex = 0;
+	in.npushback = in.pushbacksize = 0;
+	in.pushback = NULL;
+	in.reportcols = reportcols;
+	in.stack = NULL;
+
+	idx = make_index();
+
+	sourceform = read_input(&in, idx);
+	if (!sourceform)
+	    exit(EXIT_FAILURE);
+
+	sfree(in.pushback);
+
+	mark_attr_ends(sourceform);
+
+	sfree(infiles);
+
+	keywords = get_keywords(sourceform);
+	if (!keywords)
+	    exit(EXIT_FAILURE);
+	gen_citations(sourceform, keywords);
+	subst_keywords(sourceform, keywords);
+
+	for (p = sourceform; p; p = p->next)
+	    if (p->type == para_IM)
+		index_merge(idx, TRUE, p->keyword, p->words);
+
+	build_index(idx);
+
+	if (debug) {
+	    index_debug(idx);
+	    dbg_prtkws(keywords);
+	    dbg_prtsource(sourceform);
+	}
+
+	text_backend(sourceform, keywords, idx);
+	xhtml_backend(sourceform, keywords, idx);
+	whlp_backend(sourceform, keywords, idx);
+
+	free_para_list(sourceform);
+	free_keywords(keywords);
+	cleanup_index(idx);
+    }
+
+    return 0;
+}
+
+static void dbg_prtsource(paragraph *sourceform) {
+    /*
+     * Output source form in debugging format.
+     */
+
+    paragraph *p;
+    for (p = sourceform; p; p = p->next) {
+	wchar_t *wp;
+	printf("para %d ", p->type);
+	if (p->keyword) {
+	    wp = p->keyword;
+	    while (*wp) {
+		putchar('\"');
+		for (; *wp; wp++)
+		    putchar(*wp);
+		putchar('\"');
+		if (*++wp)
+		    printf(", ");
+	    }
+	} else
+	    printf("(no keyword)");
+	printf(" {\n");
+	dbg_prtwordlist(1, p->words);
+	printf("}\n");
+    }
+}
+
+static void dbg_prtkws(keywordlist *kws) {
+    /*
+     * Output keywords in debugging format.
+     */
+
+    int i;
+    keyword *kw;
+
+    for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
+	wchar_t *wp;
+	printf("keyword ");
+	wp = kw->key;
+	while (*wp) {
+	    putchar('\"');
+	    for (; *wp; wp++)
+		putchar(*wp);
+	    putchar('\"');
+	    if (*++wp)
+		printf(", ");
+	}
+	printf(" {\n");
+	dbg_prtwordlist(1, kw->text);
+	printf("}\n");
+    }
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+    for (; w; w = w->next) {
+	wchar_t *wp;
+	printf("%*sword %d ", level*4, "", w->type);
+	if (w->text) {
+	    printf("\"");
+	    for (wp = w->text; *wp; wp++)
+		    putchar(*wp);
+	    printf("\"");
+	} else
+	    printf("(no text)");
+	if (w->alt) {
+	    printf(" alt = {\n");
+	    dbg_prtwordlist(level+1, w->alt);
+	    printf("%*s}", level*4, "");
+	}
+	printf("\n");
+    }
+}
diff --git a/malloc.c b/malloc.c
new file mode 100644
index 0000000..1635b47
--- /dev/null
+++ b/malloc.c
@@ -0,0 +1,149 @@
+/*
+ * malloc.c: safe wrappers around malloc, realloc, free, strdup
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "halibut.h"
+
+#ifdef LOGALLOC
+#define LOGPARAMS char *file, int line,
+static FILE *logallocfp = NULL;
+static int logline = 2;		       /* off by 1: `null pointer is' */
+static void loginc(void) { }
+static void logallocinit(void) {
+    if (!logallocfp) {
+	logallocfp = fopen("malloc.log", "w");
+	if (!logallocfp) {
+	    fprintf(stderr, "panic: unable to open malloc.log\n");
+	    exit(10);
+	}
+	setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ);
+	fprintf(logallocfp, "null pointer is %p\n", NULL);
+    }
+}
+static void logprintf(char *fmt, ...) {
+    va_list ap;
+    va_start(ap, fmt);
+    vfprintf(logallocfp, fmt, ap);
+    va_end(ap);
+}
+#define LOGPRINT(x) ( logallocinit(), logprintf x )
+#define LOGINC do { loginc(); logline++; } while (0)
+#else
+#define LOGPARAMS
+#define LOGPRINT(x)
+#define LOGINC ((void)0)
+#endif
+
+/*
+ * smalloc should guarantee to return a useful pointer - Halibut
+ * can do nothing except die when it's out of memory anyway.
+ */
+void *(smalloc)(LOGPARAMS int size) {
+    void *p;
+    LOGINC;
+    LOGPRINT(("%s %d malloc(%ld)",
+	      file, line, (long)size));
+    p = malloc(size);
+    if (!p)
+	fatal(err_nomemory);
+    LOGPRINT((" returns %p\n", p));
+    return p;
+}
+
+/*
+ * sfree should guaranteeably deal gracefully with freeing NULL
+ */
+void (sfree)(LOGPARAMS void *p) {
+    if (p) {
+	LOGINC;
+	LOGPRINT(("%s %d free(%p)\n",
+		  file, line, p));
+	free(p);
+    }
+}
+
+/*
+ * srealloc should guaranteeably be able to realloc NULL
+ */
+void *(srealloc)(LOGPARAMS void *p, int size) {
+    void *q;
+    if (p) {
+	LOGINC;
+	LOGPRINT(("%s %d realloc(%p,%ld)",
+		  file, line, p, (long)size));
+	q = realloc(p, size);
+	LOGPRINT((" returns %p\n", q));
+    } else {
+	LOGINC;
+	LOGPRINT(("%s %d malloc(%ld)",
+		  file, line, (long)size));
+	q = malloc(size);
+	LOGPRINT((" returns %p\n", q));
+    }
+    if (!q)
+	fatal(err_nomemory);
+    return q;
+}
+
+/*
+ * dupstr is like strdup, but with the never-return-NULL property
+ * of smalloc (and also reliably defined in all environments :-)
+ */
+char *dupstr(char *s) {
+    char *r = smalloc(1+strlen(s));
+    strcpy(r,s);
+    return r;
+}
+
+/*
+ * Duplicate a linked list of words
+ */
+word *dup_word_list(word *w) {
+    word *head, **eptr = &head;
+
+    while (w) {
+	word *newwd = mknew(word);
+	*newwd = *w;		       /* structure copy */
+	newwd->text = ustrdup(w->text);
+	if (w->alt)
+	    newwd->alt = dup_word_list(w->alt);
+	*eptr = newwd;
+	newwd->next = NULL;
+	eptr = &newwd->next;
+
+	w = w->next;
+    }
+
+    return head;
+}
+
+/*
+ * Free a linked list of words
+ */
+void free_word_list(word *w) {
+    word *t;
+    while (w) {
+	t = w;
+	w = w->next;
+	sfree(t->text);
+	if (t->alt)
+	    free_word_list(t->alt);
+	sfree(t);
+    }
+}
+
+/*
+ * Free a linked list of paragraphs
+ */
+void free_para_list(paragraph *p) {
+    paragraph *t;
+    while (p) {
+	t = p;
+	p = p->next;
+	sfree(t->keyword);
+	free_word_list(t->words);
+	sfree(t);
+    }
+}
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..c4ac72f
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,312 @@
+/*
+ * misc.c: miscellaneous useful items
+ */
+
+#include "halibut.h"
+
+struct stackTag {
+    void **data;
+    int sp;
+    int size;
+};
+
+stack stk_new(void) {
+    stack s;
+
+    s = mknew(struct stackTag);
+    s->sp = 0;
+    s->size = 0;
+    s->data = NULL;
+
+    return s;
+}
+
+void stk_free(stack s) {
+    sfree(s->data);
+    sfree(s);
+}
+
+void stk_push(stack s, void *item) {
+    if (s->size <= s->sp) {
+	s->size = s->sp + 32;
+	s->data = resize(s->data, s->size);
+    }
+    s->data[s->sp++] = item;
+}
+
+void *stk_pop(stack s) {
+    if (s->sp > 0)
+	return s->data[--s->sp];
+    else
+	return NULL;
+}
+
+/*
+ * Small routines to amalgamate a string from an input source.
+ */
+const rdstring empty_rdstring = {0, 0, NULL};
+const rdstringc empty_rdstringc = {0, 0, NULL};
+
+void rdadd(rdstring *rs, wchar_t c) {
+    if (rs->pos >= rs->size-1) {
+	rs->size = rs->pos + 128;
+	rs->text = resize(rs->text, rs->size);
+    }
+    rs->text[rs->pos++] = c;
+    rs->text[rs->pos] = 0;
+}
+void rdadds(rdstring *rs, wchar_t *p) {
+    int len = ustrlen(p);
+    if (rs->pos >= rs->size - len) {
+	rs->size = rs->pos + len + 128;
+	rs->text = resize(rs->text, rs->size);
+    }
+    ustrcpy(rs->text + rs->pos, p);
+    rs->pos += len;
+}
+wchar_t *rdtrim(rdstring *rs) {
+    rs->text = resize(rs->text, rs->pos + 1);
+    return rs->text;
+}
+
+void rdaddc(rdstringc *rs, char c) {
+    if (rs->pos >= rs->size-1) {
+	rs->size = rs->pos + 128;
+	rs->text = resize(rs->text, rs->size);
+    }
+    rs->text[rs->pos++] = c;
+    rs->text[rs->pos] = 0;
+}
+void rdaddsc(rdstringc *rs, char *p) {
+    int len = strlen(p);
+    if (rs->pos >= rs->size - len) {
+	rs->size = rs->pos + len + 128;
+	rs->text = resize(rs->text, rs->size);
+    }
+    strcpy(rs->text + rs->pos, p);
+    rs->pos += len;
+}
+char *rdtrimc(rdstringc *rs) {
+    rs->text = resize(rs->text, rs->pos + 1);
+    return rs->text;
+}
+
+int compare_wordlists(word *a, word *b) {
+    int t;
+    while (a && b) {
+	if (a->type != b->type)
+	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
+	t = a->type;
+	if ((t != word_Normal && t != word_Code &&
+	     t != word_WeakCode && t != word_Emph) ||
+	    a->alt || b->alt) {
+	    int c;
+	    if (a->text && b->text) {
+		c = ustricmp(a->text, b->text);
+		if (c)
+		    return c;
+	    }
+	    c = compare_wordlists(a->alt, b->alt);
+	    if (c)
+		return c;
+	    a = a->next;
+	    b = b->next;
+	} else {
+	    wchar_t *ap = a->text, *bp = b->text;
+	    while (*ap && *bp) {
+		wchar_t ac = utolower(*ap), bc = utolower(*bp);
+		if (ac != bc)
+		    return (ac < bc ? -1 : +1);
+		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+		    a = a->next, ap = a->text;
+		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+		    b = b->next, bp = b->text;
+	    }
+	    if (*ap || *bp)
+		return (*ap ? +1 : -1);
+	    a = a->next;
+	    b = b->next;
+	}
+    }
+
+    if (a || b)
+	return (a ? +1 : -1);
+    else
+	return 0;
+}
+
+void mark_attr_ends(paragraph *sourceform) {
+    paragraph *p;
+    word *w, *wp;
+    for (p = sourceform; p; p = p->next) {
+	wp = NULL;
+	for (w = p->words; w; w = w->next) {
+	    if (isattr(w->type)) {
+		int before = (wp && isattr(wp->type) &&
+			      sameattr(wp->type, w->type));
+		int after = (w->next && isattr(w->next->type) &&
+			     sameattr(w->next->type, w->type));
+		w->aux |= (before ?
+			   (after ? attr_Always : attr_Last) :
+			   (after ? attr_First : attr_Only));
+	    }
+	    wp = w;
+	}
+    }
+}
+
+wrappedline *wrap_para(word *text, int width, int subsequentwidth,
+		       int (*widthfn)(word *)) {
+    wrappedline *head = NULL, **ptr = &head;
+    int nwords, wordsize;
+    struct wrapword {
+	word *begin, *end;
+	int width;
+	int spacewidth;
+	int cost;
+	int nwords;
+    } *wrapwords;
+    int i, j, n;
+
+    /*
+     * Break the line up into wrappable components.
+     */
+    nwords = wordsize = 0;
+    wrapwords = NULL;
+    while (text) {
+	if (nwords >= wordsize) {
+	    wordsize = nwords + 64;
+	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
+	}
+	wrapwords[nwords].width = 0;
+	wrapwords[nwords].begin = text;
+	while (text) {
+	    wrapwords[nwords].width += widthfn(text);
+	    wrapwords[nwords].end = text->next;
+	    if (text->next && (text->next->type == word_WhiteSpace ||
+			       text->next->type == word_EmphSpace ||
+			       text->breaks))
+		break;
+	    text = text->next;
+	}
+	if (text && text->next && (text->next->type == word_WhiteSpace ||
+			   text->next->type == word_EmphSpace)) {
+	    wrapwords[nwords].spacewidth = widthfn(text->next);
+	    text = text->next;
+	} else {
+	    wrapwords[nwords].spacewidth = 0;
+	}
+	nwords++;
+	if (text)
+	    text = text->next;
+    }
+
+    /*
+     * Perform the dynamic wrapping algorithm: work backwards from
+     * nwords-1, determining the optimal wrapping for each terminal
+     * subsequence of the paragraph.
+     */
+    for (i = nwords; i-- ;) {
+	int best = -1;
+	int bestcost = 0;
+	int cost;
+	int linelen = 0, spacewidth = 0;
+	int seenspace;
+	int thiswidth = (i == 0 ? width : subsequentwidth);
+
+	j = 0;
+	seenspace = 0;
+	while (i+j < nwords) {
+	    /*
+	     * See what happens if we put j+1 words on this line.
+	     */
+	    if (spacewidth)
+		seenspace = 1;
+	    linelen += spacewidth + wrapwords[i+j].width;
+	    spacewidth = wrapwords[i+j].spacewidth;
+	    j++;
+	    if (linelen > thiswidth) {
+		/*
+		 * If we're over the width limit, abandon ship,
+		 * _unless_ there is no best-effort yet (which will
+		 * only happen if the first word is too long all by
+		 * itself).
+		 */
+		if (best > 0)
+		    break;
+	    }
+	    if (i+j == nwords) {
+		/*
+		 * Special case: if we're at the very end of the
+		 * paragraph, we don't score penalty points for the
+		 * white space left on the line.
+		 */
+		cost = 0;
+	    } else {
+		cost = (thiswidth-linelen) * (thiswidth-linelen);
+		cost += wrapwords[i+j].cost;
+	    }
+	    /*
+	     * We compare bestcost >= cost, not bestcost > cost,
+	     * because in cases where the costs are identical we
+	     * want to try to look like the greedy algorithm,
+	     * because readers are likely to have spent a lot of
+	     * time looking at greedy-wrapped paragraphs and
+	     * there's no point violating the Principle of Least
+	     * Surprise if it doesn't actually gain anything.
+	     */
+	    if (best < 0 || bestcost >= cost) {
+		bestcost = cost;
+		best = j;
+	    }
+	}
+	/*
+	 * Now we know the optimal answer for this terminal
+	 * subsequence, so put it in wrapwords.
+	 */
+	wrapwords[i].cost = bestcost;
+	wrapwords[i].nwords = best;
+    }
+
+    /*
+     * We've wrapped the paragraph. Now build the output
+     * `wrappedline' list.
+     */
+    i = 0;
+    while (i < nwords) {
+	wrappedline *w = mknew(wrappedline);
+	*ptr = w;
+	ptr = &w->next;
+	w->next = NULL;
+
+	n = wrapwords[i].nwords;
+	w->begin = wrapwords[i].begin;
+	w->end = wrapwords[i+n-1].end;
+
+	/*
+	 * Count along the words to find nspaces and shortfall.
+	 */
+	w->nspaces = 0;
+	w->shortfall = width;
+	for (j = 0; j < n; j++) {
+	    w->shortfall -= wrapwords[i+j].width;
+	    if (j < n-1 && wrapwords[i+j].spacewidth) {
+		w->nspaces++;
+		w->shortfall -= wrapwords[i+j].spacewidth;
+	    }
+	}
+	i += n;
+    }
+
+    sfree(wrapwords);
+
+    return head;
+}
+
+void wrap_free(wrappedline *w) {
+    while (w) {
+	wrappedline *t = w->next;
+	sfree(w);
+	w = t;
+    }
+}
diff --git a/misc/halibut.sl b/misc/halibut.sl
new file mode 100644
index 0000000..e88b1ed
--- /dev/null
+++ b/misc/halibut.sl
@@ -0,0 +1,98 @@
+% Halibut mode for Jed.
+
+$1 = "Halibut";
+create_syntax_table ($1);
+
+define_syntax ("\#", "", '%', $1);       % Comment Syntax
+define_syntax ('\\', '\\', $1);         % Quote character
+define_syntax ("{", "}", '(', $1);    %  are all these needed?
+define_syntax ("a-zA-Z0-9", 'w', $1);
+set_syntax_flags ($1, 8);
+
+#ifdef HAS_DFA_SYNTAX
+%enable_highlight_cache ("halibut.dfa", $1);
+
+% A braced comment in Halibut is \#{ ... }, where ... may contain
+% any correctly nested sequence of braces. Of course we can't match
+% that in a DFA rule, so we'll go down to a reasonable depth of 3
+% instead.
+#ifexists dfa_define_highlight_rule
+dfa_define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}",
+			   "Qcomment", $1);
+
+dfa_define_highlight_rule ("\\\\#.*$", "comment", $1);
+dfa_define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1);
+dfa_define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1);
+dfa_define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1);
+dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]",
+			   "keyword0", $1);
+dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]",
+			   "keyword1", $1);
+dfa_define_highlight_rule ("[{}]", "delimiter", $1);
+dfa_define_highlight_rule (".", "normal", $1);
+dfa_build_highlight_table ($1);
+#else
+define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}",
+		       "Qcomment", $1);
+
+define_highlight_rule ("\\\\#.*$", "comment", $1);
+define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1);
+define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1);
+define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1);
+define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]",
+		       "keyword0", $1);
+define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]",
+		       "keyword1", $1);
+define_highlight_rule ("[{}]", "delimiter", $1);
+define_highlight_rule (".", "normal", $1);
+build_highlight_table ($1);
+#endif
+#endif
+
+%  This hook identifies lines containing comments as paragraph separator
+define halibut_is_comment() {
+    bol ();
+    while (ffind ("\\\\#")) go_right (3);
+    ffind ("\\#"); % return value on stack
+}
+
+variable Halibut_Ignore_Comment = 0;  % if true, line containing a comment
+                                       % does not delimit a paragraph
+
+define halibut_paragraph_separator() {
+    bol();
+    skip_white();
+    if (eolp())
+	return 1;
+    if (looking_at("\\c ") or looking_at("\\c\t") or
+	looking_at("\\c\n"))
+	return 1;
+    return not (Halibut_Ignore_Comment) and halibut_is_comment();
+} 
+
+define halibut_wrap_hook() {
+    variable yep;
+    push_spot ();
+    yep = up_1 () and halibut_is_comment ();
+    pop_spot ();
+    if (yep) {
+	push_spot ();
+	bol_skip_white ();
+	insert ("\\# ");
+	pop_spot ();
+    }
+}
+
+#ifexists mode_set_mode_info
+mode_set_mode_info("Halibut", "fold_info", "\\# {{{\r\\# }}}\r\r");
+#endif
+
+define halibut_mode() {
+    variable mode = "Halibut";
+    % use_keymap (mode);
+    set_mode (mode, 0x1 | 0x20);
+    set_buffer_hook ("par_sep", "halibut_paragraph_separator");
+    set_buffer_hook ("wrap_hook", "halibut_wrap_hook");
+    use_syntax_table (mode);
+    runhooks ("halibut_mode_hook");
+}
diff --git a/misc/logalloc b/misc/logalloc
new file mode 100755
index 0000000..cdc4504
--- /dev/null
+++ b/misc/logalloc
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+#
+# Reads a log file, containing lines of the four types
+#    <file> <line> malloc(<number>) returns <pointer>
+#    <file> <line> strdup(<number>) returns <pointer>
+#    <file> <line> calloc(<number>*<number>) returns <pointer>
+#    <file> <line> realloc(<pointer>,<number>) returns <pointer>
+#    <file> <line> free(<pointer>)
+#
+# with optional line on the front saying
+#    null pointer is <pointer>
+#
+# and produces a list of free()s and realloc()s of wrong pointers,
+# and also of malloc()s, calloc()s and realloc()s that never get free()d.
+
+$errors=0;
+
+while (<>) {
+  $in=$out="";
+  ($file, $line, $call, $in, $out)=($1,$2,$3,"",$4)
+      if /^(\S+) (\S+) (malloc|strdup)\(\S+\) returns (\S+)$/;
+  ($file, $line, $call, $in, $out)=($1,$2,"calloc","",$5)
+      if /^(\S+) (\S+) calloc\(\S+\*\S+\) returns (\S+)$/;
+  ($file, $line, $call, $in, $out)=($1,$2,"realloc",$3,$4)
+      if /^(\S+) (\S+) realloc\((\S+),\S+\) returns (\S+)$/;
+  ($file, $line, $call, $in, $out)=($1,$2,"free",$3,"")
+      if /^(\S+) (\S+) free\((\S+)\)$/;
+  $null = $1, next if /^null pointer is (\S+)$/;
+  if ($in ne "") {
+    if (&null($in)) {
+      $bad = "null pointer";
+    } elsif (defined $lastalloc{$in}) {
+      $bad = "already-freed pointer (last alloc $lastalloc{$in}, last free $lastfree{$in})";
+    } else {
+      $bad = "bad pointer";
+    }
+    $errors=1, print "($.) $file:$line: $call() $bad\n"
+      if $record{$in} eq "";
+    $lastfree{$in}="($.) $file:$line";
+    $record{$in}="";
+  }
+  if ($out ne "" && !&null($out)) {
+    $errors=1, print "($.) $file:$line: $call() returned already ".
+      "allocated pointer\n" if $record{$out} ne "";
+    $record{$out}="($.) $file:$line: $call()";
+    $lastalloc{$out}="($.) $file:$line";
+  }
+}
+
+foreach $i (keys %record) {
+  $errors=1, print "$record{$i} never got freed\n"
+      if $record{$i} ne "";
+}
+
+print "no problems\n" if !$errors;
+
+# determine if a string refers to a null pointer
+sub null {
+  local ($_) = @_;
+  $null ? $_ eq $null : /^((0x)?0+|\(nil\))$/;
+}
diff --git a/style.c b/style.c
new file mode 100644
index 0000000..562c594
--- /dev/null
+++ b/style.c
@@ -0,0 +1,8 @@
+/*
+ * style.c: load and keep track of user style preferences
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
diff --git a/tree234.c b/tree234.c
new file mode 100644
index 0000000..bc88039
--- /dev/null
+++ b/tree234.c
@@ -0,0 +1,2193 @@
+/*
+ * tree234.c: reasonably generic counted 2-3-4 tree routines.
+ * 
+ * This file is copyright 1999-2001 Simon Tatham.
+ * 
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "tree234.h"
+
+#define smalloc malloc
+#define sfree free
+
+#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
+
+#ifdef TEST
+#define LOG(x) (printf x)
+#else
+#define LOG(x)
+#endif
+
+typedef struct node234_Tag node234;
+
+struct tree234_Tag {
+    node234 *root;
+    cmpfn234 cmp;
+};
+
+struct node234_Tag {
+    node234 *parent;
+    node234 *kids[4];
+    int counts[4];
+    void *elems[3];
+};
+
+/*
+ * Create a 2-3-4 tree.
+ */
+tree234 *newtree234(cmpfn234 cmp) {
+    tree234 *ret = mknew(tree234);
+    LOG(("created tree %p\n", ret));
+    ret->root = NULL;
+    ret->cmp = cmp;
+    return ret;
+}
+
+/*
+ * Free a 2-3-4 tree (not including freeing the elements).
+ */
+static void freenode234(node234 *n) {
+    if (!n)
+	return;
+    freenode234(n->kids[0]);
+    freenode234(n->kids[1]);
+    freenode234(n->kids[2]);
+    freenode234(n->kids[3]);
+    sfree(n);
+}
+void freetree234(tree234 *t) {
+    freenode234(t->root);
+    sfree(t);
+}
+
+/*
+ * Internal function to count a node.
+ */
+static int countnode234(node234 *n) {
+    int count = 0;
+    int i;
+    if (!n)
+	return 0;
+    for (i = 0; i < 4; i++)
+	count += n->counts[i];
+    for (i = 0; i < 3; i++)
+	if (n->elems[i])
+	    count++;
+    return count;
+}
+
+/*
+ * Count the elements in a tree.
+ */
+int count234(tree234 *t) {
+    if (t->root)
+	return countnode234(t->root);
+    else
+	return 0;
+}
+
+/*
+ * Propagate a node overflow up a tree until it stops. Returns 0 or
+ * 1, depending on whether the root had to be split or not.
+ */
+static int add234_insert(node234 *left, void *e, node234 *right,
+			 node234 **root, node234 *n, int ki) {
+    int lcount, rcount;
+    /*
+     * We need to insert the new left/element/right set in n at
+     * child position ki.
+     */
+    lcount = countnode234(left);
+    rcount = countnode234(right);
+    while (n) {
+	LOG(("  at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	     n,
+	     n->kids[0], n->counts[0], n->elems[0],
+	     n->kids[1], n->counts[1], n->elems[1],
+	     n->kids[2], n->counts[2], n->elems[2],
+	     n->kids[3], n->counts[3]));
+	LOG(("  need to insert %p/%d \"%s\" %p/%d at position %d\n",
+	     left, lcount, e, right, rcount, ki));
+	if (n->elems[1] == NULL) {
+	    /*
+	     * Insert in a 2-node; simple.
+	     */
+	    if (ki == 0) {
+		LOG(("  inserting on left of 2-node\n"));
+		n->kids[2] = n->kids[1];     n->counts[2] = n->counts[1];
+		n->elems[1] = n->elems[0];
+		n->kids[1] = right;          n->counts[1] = rcount;
+		n->elems[0] = e;
+		n->kids[0] = left;           n->counts[0] = lcount;
+	    } else { /* ki == 1 */
+		LOG(("  inserting on right of 2-node\n"));
+		n->kids[2] = right;          n->counts[2] = rcount;
+		n->elems[1] = e;
+		n->kids[1] = left;           n->counts[1] = lcount;
+	    }
+	    if (n->kids[0]) n->kids[0]->parent = n;
+	    if (n->kids[1]) n->kids[1]->parent = n;
+	    if (n->kids[2]) n->kids[2]->parent = n;
+	    LOG(("  done\n"));
+	    break;
+	} else if (n->elems[2] == NULL) {
+	    /*
+	     * Insert in a 3-node; simple.
+	     */
+	    if (ki == 0) {
+		LOG(("  inserting on left of 3-node\n"));
+		n->kids[3] = n->kids[2];    n->counts[3] = n->counts[2];
+		n->elems[2] = n->elems[1];
+		n->kids[2] = n->kids[1];    n->counts[2] = n->counts[1];
+		n->elems[1] = n->elems[0];
+		n->kids[1] = right;         n->counts[1] = rcount;
+		n->elems[0] = e;
+		n->kids[0] = left;          n->counts[0] = lcount;
+	    } else if (ki == 1) {
+		LOG(("  inserting in middle of 3-node\n"));
+		n->kids[3] = n->kids[2];    n->counts[3] = n->counts[2];
+		n->elems[2] = n->elems[1];
+		n->kids[2] = right;         n->counts[2] = rcount;
+		n->elems[1] = e;
+		n->kids[1] = left;          n->counts[1] = lcount;
+	    } else { /* ki == 2 */
+		LOG(("  inserting on right of 3-node\n"));
+		n->kids[3] = right;         n->counts[3] = rcount;
+		n->elems[2] = e;
+		n->kids[2] = left;          n->counts[2] = lcount;
+	    }
+	    if (n->kids[0]) n->kids[0]->parent = n;
+	    if (n->kids[1]) n->kids[1]->parent = n;
+	    if (n->kids[2]) n->kids[2]->parent = n;
+	    if (n->kids[3]) n->kids[3]->parent = n;
+	    LOG(("  done\n"));
+	    break;
+	} else {
+	    node234 *m = mknew(node234);
+	    m->parent = n->parent;
+	    LOG(("  splitting a 4-node; created new node %p\n", m));
+	    /*
+	     * Insert in a 4-node; split into a 2-node and a
+	     * 3-node, and move focus up a level.
+	     * 
+	     * I don't think it matters which way round we put the
+	     * 2 and the 3. For simplicity, we'll put the 3 first
+	     * always.
+	     */
+	    if (ki == 0) {
+		m->kids[0] = left;          m->counts[0] = lcount;
+		m->elems[0] = e;
+		m->kids[1] = right;         m->counts[1] = rcount;
+		m->elems[1] = n->elems[0];
+		m->kids[2] = n->kids[1];    m->counts[2] = n->counts[1];
+		e = n->elems[1];
+		n->kids[0] = n->kids[2];    n->counts[0] = n->counts[2];
+		n->elems[0] = n->elems[2];
+		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
+	    } else if (ki == 1) {
+		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = left;          m->counts[1] = lcount;
+		m->elems[1] = e;
+		m->kids[2] = right;         m->counts[2] = rcount;
+		e = n->elems[1];
+		n->kids[0] = n->kids[2];    n->counts[0] = n->counts[2];
+		n->elems[0] = n->elems[2];
+		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
+	    } else if (ki == 2) {
+		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = n->kids[1];    m->counts[1] = n->counts[1];
+		m->elems[1] = n->elems[1];
+		m->kids[2] = left;          m->counts[2] = lcount;
+		/* e = e; */
+		n->kids[0] = right;         n->counts[0] = rcount;
+		n->elems[0] = n->elems[2];
+		n->kids[1] = n->kids[3];    n->counts[1] = n->counts[3];
+	    } else { /* ki == 3 */
+		m->kids[0] = n->kids[0];    m->counts[0] = n->counts[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = n->kids[1];    m->counts[1] = n->counts[1];
+		m->elems[1] = n->elems[1];
+		m->kids[2] = n->kids[2];    m->counts[2] = n->counts[2];
+		n->kids[0] = left;          n->counts[0] = lcount;
+		n->elems[0] = e;
+		n->kids[1] = right;         n->counts[1] = rcount;
+		e = n->elems[2];
+	    }
+	    m->kids[3] = n->kids[3] = n->kids[2] = NULL;
+	    m->counts[3] = n->counts[3] = n->counts[2] = 0;
+	    m->elems[2] = n->elems[2] = n->elems[1] = NULL;
+	    if (m->kids[0]) m->kids[0]->parent = m;
+	    if (m->kids[1]) m->kids[1]->parent = m;
+	    if (m->kids[2]) m->kids[2]->parent = m;
+	    if (n->kids[0]) n->kids[0]->parent = n;
+	    if (n->kids[1]) n->kids[1]->parent = n;
+	    LOG(("  left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m,
+		 m->kids[0], m->counts[0], m->elems[0],
+		 m->kids[1], m->counts[1], m->elems[1],
+		 m->kids[2], m->counts[2]));
+	    LOG(("  right (%p): %p/%d \"%s\" %p/%d\n", n,
+		 n->kids[0], n->counts[0], n->elems[0],
+		 n->kids[1], n->counts[1]));
+	    left = m;  lcount = countnode234(left);
+	    right = n; rcount = countnode234(right);
+	}
+	if (n->parent)
+	    ki = (n->parent->kids[0] == n ? 0 :
+		  n->parent->kids[1] == n ? 1 :
+		  n->parent->kids[2] == n ? 2 : 3);
+	n = n->parent;
+    }
+
+    /*
+     * If we've come out of here by `break', n will still be
+     * non-NULL and all we need to do is go back up the tree
+     * updating counts. If we've come here because n is NULL, we
+     * need to create a new root for the tree because the old one
+     * has just split into two. */
+    if (n) {
+	while (n->parent) {
+	    int count = countnode234(n);
+	    int childnum;
+	    childnum = (n->parent->kids[0] == n ? 0 :
+			n->parent->kids[1] == n ? 1 :
+			n->parent->kids[2] == n ? 2 : 3);
+	    n->parent->counts[childnum] = count;
+	    n = n->parent;
+	}
+	return 0;		       /* root unchanged */
+    } else {
+	LOG(("  root is overloaded, split into two\n"));
+	(*root) = mknew(node234);
+	(*root)->kids[0] = left;     (*root)->counts[0] = lcount;
+	(*root)->elems[0] = e;
+	(*root)->kids[1] = right;    (*root)->counts[1] = rcount;
+	(*root)->elems[1] = NULL;
+	(*root)->kids[2] = NULL;     (*root)->counts[2] = 0;
+	(*root)->elems[2] = NULL;
+	(*root)->kids[3] = NULL;     (*root)->counts[3] = 0;
+	(*root)->parent = NULL;
+	if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root);
+	if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root);
+	LOG(("  new root is %p/%d \"%s\" %p/%d\n",
+	     (*root)->kids[0], (*root)->counts[0],
+	     (*root)->elems[0],
+	     (*root)->kids[1], (*root)->counts[1]));
+	return 1;		       /* root moved */
+    }
+}
+
+/*
+ * Add an element e to a 2-3-4 tree t. Returns e on success, or if
+ * an existing element compares equal, returns that.
+ */
+static void *add234_internal(tree234 *t, void *e, int index) {
+    node234 *n;
+    int ki;
+    void *orig_e = e;
+    int c;
+
+    LOG(("adding element \"%s\" to tree %p\n", e, t));
+    if (t->root == NULL) {
+	t->root = mknew(node234);
+	t->root->elems[1] = t->root->elems[2] = NULL;
+	t->root->kids[0] = t->root->kids[1] = NULL;
+	t->root->kids[2] = t->root->kids[3] = NULL;
+	t->root->counts[0] = t->root->counts[1] = 0;
+	t->root->counts[2] = t->root->counts[3] = 0;
+	t->root->parent = NULL;
+	t->root->elems[0] = e;
+	LOG(("  created root %p\n", t->root));
+	return orig_e;
+    }
+
+    n = t->root;
+    while (n) {
+	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	     n,
+	     n->kids[0], n->counts[0], n->elems[0],
+	     n->kids[1], n->counts[1], n->elems[1],
+	     n->kids[2], n->counts[2], n->elems[2],
+	     n->kids[3], n->counts[3]));
+	if (index >= 0) {
+	    if (!n->kids[0]) {
+		/*
+		 * Leaf node. We want to insert at kid position
+		 * equal to the index:
+		 * 
+		 *   0 A 1 B 2 C 3
+		 */
+		ki = index;
+	    } else {
+		/*
+		 * Internal node. We always descend through it (add
+		 * always starts at the bottom, never in the
+		 * middle).
+		 */
+		if (index <= n->counts[0]) {
+		    ki = 0;
+		} else if (index -= n->counts[0] + 1, index <= n->counts[1]) {
+		    ki = 1;
+		} else if (index -= n->counts[1] + 1, index <= n->counts[2]) {
+		    ki = 2;
+		} else if (index -= n->counts[2] + 1, index <= n->counts[3]) {
+		    ki = 3;
+		} else
+		    return NULL;       /* error: index out of range */
+	    }
+	} else {
+	    if ((c = t->cmp(e, n->elems[0])) < 0)
+		ki = 0;
+	    else if (c == 0)
+		return n->elems[0];	       /* already exists */
+	    else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
+		ki = 1;
+	    else if (c == 0)
+		return n->elems[1];	       /* already exists */
+	    else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
+		ki = 2;
+	    else if (c == 0)
+		return n->elems[2];	       /* already exists */
+	    else
+		ki = 3;
+	}
+	LOG(("  moving to child %d (%p)\n", ki, n->kids[ki]));
+	if (!n->kids[ki])
+	    break;
+	n = n->kids[ki];
+    }
+
+    add234_insert(NULL, e, NULL, &t->root, n, ki);
+
+    return orig_e;
+}
+
+void *add234(tree234 *t, void *e) {
+    if (!t->cmp)		       /* tree is unsorted */
+	return NULL;
+
+    return add234_internal(t, e, -1);
+}
+void *addpos234(tree234 *t, void *e, int index) {
+    if (index < 0 ||		       /* index out of range */
+	t->cmp)			       /* tree is sorted */
+	return NULL;		       /* return failure */
+
+    return add234_internal(t, e, index);  /* this checks the upper bound */
+}
+
+/*
+ * Look up the element at a given numeric index in a 2-3-4 tree.
+ * Returns NULL if the index is out of range.
+ */
+void *index234(tree234 *t, int index) {
+    node234 *n;
+
+    if (!t->root)
+	return NULL;		       /* tree is empty */
+
+    if (index < 0 || index >= countnode234(t->root))
+	return NULL;		       /* out of range */
+
+    n = t->root;
+    
+    while (n) {
+	if (index < n->counts[0])
+	    n = n->kids[0];
+	else if (index -= n->counts[0] + 1, index < 0)
+	    return n->elems[0];
+	else if (index < n->counts[1])
+	    n = n->kids[1];
+	else if (index -= n->counts[1] + 1, index < 0)
+	    return n->elems[1];
+	else if (index < n->counts[2])
+	    n = n->kids[2];
+	else if (index -= n->counts[2] + 1, index < 0)
+	    return n->elems[2];
+	else
+	    n = n->kids[3];
+    }
+
+    /* We shouldn't ever get here. I wonder how we did. */
+    return NULL;
+}
+
+/*
+ * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
+ * found. e is always passed as the first argument to cmp, so cmp
+ * can be an asymmetric function if desired. cmp can also be passed
+ * as NULL, in which case the compare function from the tree proper
+ * will be used.
+ */
+void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
+		    int relation, int *index) {
+    node234 *n;
+    void *ret;
+    int c;
+    int idx, ecount, kcount, cmpret;
+
+    if (t->root == NULL)
+	return NULL;
+
+    if (cmp == NULL)
+	cmp = t->cmp;
+
+    n = t->root;
+    /*
+     * Attempt to find the element itself.
+     */
+    idx = 0;
+    ecount = -1;
+    /*
+     * Prepare a fake `cmp' result if e is NULL.
+     */
+    cmpret = 0;
+    if (e == NULL) {
+	assert(relation == REL234_LT || relation == REL234_GT);
+	if (relation == REL234_LT)
+	    cmpret = +1;	       /* e is a max: always greater */
+	else if (relation == REL234_GT)
+	    cmpret = -1;	       /* e is a min: always smaller */
+    }
+    while (1) {
+	for (kcount = 0; kcount < 4; kcount++) {
+	    if (kcount >= 3 || n->elems[kcount] == NULL ||
+		(c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
+		break;
+	    }
+	    if (n->kids[kcount]) idx += n->counts[kcount];
+	    if (c == 0) {
+		ecount = kcount;
+		break;
+	    }
+	    idx++;
+	}
+	if (ecount >= 0)
+	    break;
+	if (n->kids[kcount])
+	    n = n->kids[kcount];
+	else
+	    break;
+    }
+
+    if (ecount >= 0) {
+	/*
+	 * We have found the element we're looking for. It's
+	 * n->elems[ecount], at tree index idx. If our search
+	 * relation is EQ, LE or GE we can now go home.
+	 */
+	if (relation != REL234_LT && relation != REL234_GT) {
+	    if (index) *index = idx;
+	    return n->elems[ecount];
+	}
+
+	/*
+	 * Otherwise, we'll do an indexed lookup for the previous
+	 * or next element. (It would be perfectly possible to
+	 * implement these search types in a non-counted tree by
+	 * going back up from where we are, but far more fiddly.)
+	 */
+	if (relation == REL234_LT)
+	    idx--;
+	else
+	    idx++;
+    } else {
+	/*
+	 * We've found our way to the bottom of the tree and we
+	 * know where we would insert this node if we wanted to:
+	 * we'd put it in in place of the (empty) subtree
+	 * n->kids[kcount], and it would have index idx
+	 * 
+	 * But the actual element isn't there. So if our search
+	 * relation is EQ, we're doomed.
+	 */
+	if (relation == REL234_EQ)
+	    return NULL;
+
+	/*
+	 * Otherwise, we must do an index lookup for index idx-1
+	 * (if we're going left - LE or LT) or index idx (if we're
+	 * going right - GE or GT).
+	 */
+	if (relation == REL234_LT || relation == REL234_LE) {
+	    idx--;
+	}
+    }
+
+    /*
+     * We know the index of the element we want; just call index234
+     * to do the rest. This will return NULL if the index is out of
+     * bounds, which is exactly what we want.
+     */
+    ret = index234(t, idx);
+    if (ret && index) *index = idx;
+    return ret;
+}
+void *find234(tree234 *t, void *e, cmpfn234 cmp) {
+    return findrelpos234(t, e, cmp, REL234_EQ, NULL);
+}
+void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
+    return findrelpos234(t, e, cmp, relation, NULL);
+}
+void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
+    return findrelpos234(t, e, cmp, REL234_EQ, index);
+}
+
+/*
+ * Tree transformation used in delete and split: move a subtree
+ * right, from child ki of a node to the next child. Update k and
+ * index so that they still point to the same place in the
+ * transformed tree. Assumes the destination child is not full, and
+ * that the source child does have a subtree to spare. Can cope if
+ * the destination child is undersized.
+ * 
+ *                . C .                     . B .
+ *               /     \     ->            /     \
+ * [more] a A b B c   d D e      [more] a A b   c C d D e
+ * 
+ *                 . C .                     . B .
+ *                /     \    ->             /     \
+ *  [more] a A b B c     d        [more] a A b   c C d
+ */
+static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) {
+    node234 *src, *dest;
+    int i, srclen, adjust;
+
+    src = n->kids[ki];
+    dest = n->kids[ki+1];
+
+    LOG(("  trans234_subtree_right(%p, %d):\n", n, ki));
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 src,
+	 src->kids[0], src->counts[0], src->elems[0],
+	 src->kids[1], src->counts[1], src->elems[1],
+	 src->kids[2], src->counts[2], src->elems[2],
+	 src->kids[3], src->counts[3]));
+    LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 dest,
+	 dest->kids[0], dest->counts[0], dest->elems[0],
+	 dest->kids[1], dest->counts[1], dest->elems[1],
+	 dest->kids[2], dest->counts[2], dest->elems[2],
+	 dest->kids[3], dest->counts[3]));
+    /*
+     * Move over the rest of the destination node to make space.
+     */
+    dest->kids[3] = dest->kids[2];    dest->counts[3] = dest->counts[2];
+    dest->elems[2] = dest->elems[1];
+    dest->kids[2] = dest->kids[1];    dest->counts[2] = dest->counts[1];
+    dest->elems[1] = dest->elems[0];
+    dest->kids[1] = dest->kids[0];    dest->counts[1] = dest->counts[0];
+
+    /* which element to move over */
+    i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0);
+
+    dest->elems[0] = n->elems[ki];
+    n->elems[ki] = src->elems[i];
+    src->elems[i] = NULL;
+
+    dest->kids[0] = src->kids[i+1];   dest->counts[0] = src->counts[i+1];
+    src->kids[i+1] = NULL;            src->counts[i+1] = 0;
+
+    if (dest->kids[0]) dest->kids[0]->parent = dest;
+
+    adjust = dest->counts[0] + 1;
+
+    n->counts[ki] -= adjust;
+    n->counts[ki+1] += adjust;
+
+    srclen = n->counts[ki];
+
+    if (k) {
+	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
+	if ((*k) == ki && (*index) > srclen) {
+	    (*index) -= srclen + 1;
+	    (*k)++;
+	} else if ((*k) == ki+1) {
+	    (*index) += adjust;
+	}
+	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
+    }
+
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 src,
+	 src->kids[0], src->counts[0], src->elems[0],
+	 src->kids[1], src->counts[1], src->elems[1],
+	 src->kids[2], src->counts[2], src->elems[2],
+	 src->kids[3], src->counts[3]));
+    LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 dest,
+	 dest->kids[0], dest->counts[0], dest->elems[0],
+	 dest->kids[1], dest->counts[1], dest->elems[1],
+	 dest->kids[2], dest->counts[2], dest->elems[2],
+	 dest->kids[3], dest->counts[3]));
+}
+
+/*
+ * Tree transformation used in delete and split: move a subtree
+ * left, from child ki of a node to the previous child. Update k
+ * and index so that they still point to the same place in the
+ * transformed tree. Assumes the destination child is not full, and
+ * that the source child does have a subtree to spare. Can cope if
+ * the destination child is undersized. 
+ *
+ *      . B .                             . C .
+ *     /     \                ->         /     \
+ *  a A b   c C d D e [more]      a A b B c   d D e [more]
+ *
+ *     . A .                             . B .
+ *    /     \                 ->        /     \
+ *   a   b B c C d [more]            a A b   c C d [more]
+ */
+static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) {
+    node234 *src, *dest;
+    int i, adjust;
+
+    src = n->kids[ki];
+    dest = n->kids[ki-1];
+
+    LOG(("  trans234_subtree_left(%p, %d):\n", n, ki));
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 dest,
+	 dest->kids[0], dest->counts[0], dest->elems[0],
+	 dest->kids[1], dest->counts[1], dest->elems[1],
+	 dest->kids[2], dest->counts[2], dest->elems[2],
+	 dest->kids[3], dest->counts[3]));
+    LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 src,
+	 src->kids[0], src->counts[0], src->elems[0],
+	 src->kids[1], src->counts[1], src->elems[1],
+	 src->kids[2], src->counts[2], src->elems[2],
+	 src->kids[3], src->counts[3]));
+
+    /* where in dest to put it */
+    i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0);
+    dest->elems[i] = n->elems[ki-1];
+    n->elems[ki-1] = src->elems[0];
+
+    dest->kids[i+1] = src->kids[0];   dest->counts[i+1] = src->counts[0];
+
+    if (dest->kids[i+1]) dest->kids[i+1]->parent = dest;
+
+    /*
+     * Move over the rest of the source node.
+     */
+    src->kids[0] = src->kids[1];      src->counts[0] = src->counts[1];
+    src->elems[0] = src->elems[1];
+    src->kids[1] = src->kids[2];      src->counts[1] = src->counts[2];
+    src->elems[1] = src->elems[2];
+    src->kids[2] = src->kids[3];      src->counts[2] = src->counts[3];
+    src->elems[2] = NULL;
+    src->kids[3] = NULL;              src->counts[3] = 0;
+
+    adjust = dest->counts[i+1] + 1;
+
+    n->counts[ki] -= adjust;
+    n->counts[ki-1] += adjust;
+
+    if (k) {
+	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
+	if ((*k) == ki) {
+	    (*index) -= adjust;
+	    if ((*index) < 0) {
+		(*index) += n->counts[ki-1] + 1;
+		(*k)--;
+	    }
+	}
+	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
+    }
+
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 dest,
+	 dest->kids[0], dest->counts[0], dest->elems[0],
+	 dest->kids[1], dest->counts[1], dest->elems[1],
+	 dest->kids[2], dest->counts[2], dest->elems[2],
+	 dest->kids[3], dest->counts[3]));
+    LOG(("    src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 src,
+	 src->kids[0], src->counts[0], src->elems[0],
+	 src->kids[1], src->counts[1], src->elems[1],
+	 src->kids[2], src->counts[2], src->elems[2],
+	 src->kids[3], src->counts[3]));
+}
+
+/*
+ * Tree transformation used in delete and split: merge child nodes
+ * ki and ki+1 of a node. Update k and index so that they still
+ * point to the same place in the transformed tree. Assumes both
+ * children _are_ sufficiently small.
+ *
+ *      . B .                .
+ *     /     \     ->        |
+ *  a A b   c C d      a A b B c C d
+ * 
+ * This routine can also cope with either child being undersized:
+ * 
+ *     . A .                 .
+ *    /     \      ->        |
+ *   a     b B c         a A b B c
+ *
+ *    . A .                  .
+ *   /     \       ->        |
+ *  a   b B c C d      a A b B c C d
+ */
+static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) {
+    node234 *left, *right;
+    int i, leftlen, rightlen, lsize, rsize;
+
+    left = n->kids[ki];               leftlen = n->counts[ki];
+    right = n->kids[ki+1];            rightlen = n->counts[ki+1];
+
+    LOG(("  trans234_subtree_merge(%p, %d):\n", n, ki));
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 left,
+	 left->kids[0], left->counts[0], left->elems[0],
+	 left->kids[1], left->counts[1], left->elems[1],
+	 left->kids[2], left->counts[2], left->elems[2],
+	 left->kids[3], left->counts[3]));
+    LOG(("    right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 right,
+	 right->kids[0], right->counts[0], right->elems[0],
+	 right->kids[1], right->counts[1], right->elems[1],
+	 right->kids[2], right->counts[2], right->elems[2],
+	 right->kids[3], right->counts[3]));
+
+    assert(!left->elems[2] && !right->elems[2]);   /* neither is large! */
+    lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0);
+    rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0);
+
+    left->elems[lsize] = n->elems[ki];
+
+    for (i = 0; i < rsize+1; i++) {
+	left->kids[lsize+1+i] = right->kids[i];
+	left->counts[lsize+1+i] = right->counts[i];
+	if (left->kids[lsize+1+i])
+	    left->kids[lsize+1+i]->parent = left;
+	if (i < rsize)
+	    left->elems[lsize+1+i] = right->elems[i];
+    }
+
+    n->counts[ki] += rightlen + 1;
+
+    sfree(right);
+
+    /*
+     * Move the rest of n up by one.
+     */
+    for (i = ki+1; i < 3; i++) {
+	n->kids[i] = n->kids[i+1];
+	n->counts[i] = n->counts[i+1];
+    }
+    for (i = ki; i < 2; i++) {
+	n->elems[i] = n->elems[i+1];
+    }
+    n->kids[3] = NULL;
+    n->counts[3] = 0;
+    n->elems[2] = NULL;
+
+    if (k) {
+	LOG(("    before: k,index = %d,%d\n", (*k), (*index)));
+	if ((*k) == ki+1) {
+	    (*k)--;
+	    (*index) += leftlen + 1;
+	} else if ((*k) > ki+1) {
+	    (*k)--;
+	}
+	LOG(("    after: k,index = %d,%d\n", (*k), (*index)));
+    }
+
+    LOG(("    parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 n,
+	 n->kids[0], n->counts[0], n->elems[0],
+	 n->kids[1], n->counts[1], n->elems[1],
+	 n->kids[2], n->counts[2], n->elems[2],
+	 n->kids[3], n->counts[3]));
+    LOG(("    merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	 left,
+	 left->kids[0], left->counts[0], left->elems[0],
+	 left->kids[1], left->counts[1], left->elems[1],
+	 left->kids[2], left->counts[2], left->elems[2],
+	 left->kids[3], left->counts[3]));
+
+}
+    
+/*
+ * Delete an element e in a 2-3-4 tree. Does not free the element,
+ * merely removes all links to it from the tree nodes.
+ */
+static void *delpos234_internal(tree234 *t, int index) {
+    node234 *n;
+    void *retval;
+    int ki, i;
+
+    retval = NULL;
+
+    n = t->root;		       /* by assumption this is non-NULL */
+    LOG(("deleting item %d from tree %p\n", index, t));
+    while (1) {
+	node234 *sub;
+
+	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
+	     n,
+	     n->kids[0], n->counts[0], n->elems[0],
+	     n->kids[1], n->counts[1], n->elems[1],
+	     n->kids[2], n->counts[2], n->elems[2],
+	     n->kids[3], n->counts[3],
+	     index));
+	if (index <= n->counts[0]) {
+	    ki = 0;
+	} else if (index -= n->counts[0]+1, index <= n->counts[1]) {
+	    ki = 1;
+	} else if (index -= n->counts[1]+1, index <= n->counts[2]) {
+	    ki = 2;
+	} else if (index -= n->counts[2]+1, index <= n->counts[3]) {
+	    ki = 3;
+	} else {
+	    assert(0);		       /* can't happen */
+	}
+
+	if (!n->kids[0])
+	    break;		       /* n is a leaf node; we're here! */
+
+	/*
+	 * Check to see if we've found our target element. If so,
+	 * we must choose a new target (we'll use the old target's
+	 * successor, which will be in a leaf), move it into the
+	 * place of the old one, continue down to the leaf and
+	 * delete the old copy of the new target.
+	 */
+	if (index == n->counts[ki]) {
+	    node234 *m;
+	    LOG(("  found element in internal node, index %d\n", ki));
+	    assert(n->elems[ki]);      /* must be a kid _before_ an element */
+	    ki++; index = 0;
+	    for (m = n->kids[ki]; m->kids[0]; m = m->kids[0])
+		continue;
+	    LOG(("  replacing with element \"%s\" from leaf node %p\n",
+		 m->elems[0], m));
+	    retval = n->elems[ki-1];
+	    n->elems[ki-1] = m->elems[0];
+	}
+
+	/*
+	 * Recurse down to subtree ki. If it has only one element,
+	 * we have to do some transformation to start with.
+	 */
+	LOG(("  moving to subtree %d\n", ki));
+	sub = n->kids[ki];
+	if (!sub->elems[1]) {
+	    LOG(("  subtree has only one element!\n"));
+	    if (ki > 0 && n->kids[ki-1]->elems[1]) {
+		/*
+		 * Child ki has only one element, but child
+		 * ki-1 has two or more. So we need to move a
+		 * subtree from ki-1 to ki.
+		 */
+		trans234_subtree_right(n, ki-1, &ki, &index);
+	    } else if (ki < 3 && n->kids[ki+1] &&
+		       n->kids[ki+1]->elems[1]) {
+		/*
+		 * Child ki has only one element, but ki+1 has
+		 * two or more. Move a subtree from ki+1 to ki.
+		 */
+		trans234_subtree_left(n, ki+1, &ki, &index);
+	    } else {
+		/*
+		 * ki is small with only small neighbours. Pick a
+		 * neighbour and merge with it.
+		 */
+		trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index);
+		sub = n->kids[ki];
+
+		if (!n->elems[0]) {
+		    /*
+		     * The root is empty and needs to be
+		     * removed.
+		     */
+		    LOG(("  shifting root!\n"));
+		    t->root = sub;
+		    sub->parent = NULL;
+		    sfree(n);
+		    n = NULL;
+		}
+	    }
+	}
+
+	if (n)
+	    n->counts[ki]--;
+	n = sub;
+    }
+
+    /*
+     * Now n is a leaf node, and ki marks the element number we
+     * want to delete. We've already arranged for the leaf to be
+     * bigger than minimum size, so let's just go to it.
+     */
+    assert(!n->kids[0]);
+    if (!retval)
+	retval = n->elems[ki];
+
+    for (i = ki; i < 2 && n->elems[i+1]; i++)
+	n->elems[i] = n->elems[i+1];
+    n->elems[i] = NULL;
+
+    /*
+     * It's just possible that we have reduced the leaf to zero
+     * size. This can only happen if it was the root - so destroy
+     * it and make the tree empty.
+     */
+    if (!n->elems[0]) {
+	LOG(("  removed last element in tree, destroying empty root\n"));
+	assert(n == t->root);
+	sfree(n);
+	t->root = NULL;
+    }
+
+    return retval;		       /* finished! */
+}
+void *delpos234(tree234 *t, int index) {
+    if (index < 0 || index >= countnode234(t->root))
+	return NULL;
+    return delpos234_internal(t, index);
+}
+void *del234(tree234 *t, void *e) {
+    int index;
+    if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
+	return NULL;		       /* it wasn't in there anyway */
+    return delpos234_internal(t, index); /* it's there; delete it. */
+}
+
+/*
+ * Join two subtrees together with a separator element between
+ * them, given their relative height.
+ * 
+ * (Height<0 means the left tree is shorter, >0 means the right
+ * tree is shorter, =0 means (duh) they're equal.)
+ * 
+ * It is assumed that any checks needed on the ordering criterion
+ * have _already_ been done.
+ * 
+ * The value returned in `height' is 0 or 1 depending on whether the
+ * resulting tree is the same height as the original larger one, or
+ * one higher.
+ */
+static node234 *join234_internal(node234 *left, void *sep,
+				 node234 *right, int *height) {
+    node234 *root, *node;
+    int relht = *height;
+    int ki;
+
+    LOG(("  join: joining %p \"%s\" %p, relative height is %d\n",
+	 left, sep, right, relht));
+    if (relht == 0) {
+	/*
+	 * The trees are the same height. Create a new one-element
+	 * root containing the separator and pointers to the two
+	 * nodes.
+	 */
+	node234 *newroot;
+	newroot = mknew(node234);
+	newroot->kids[0] = left;     newroot->counts[0] = countnode234(left);
+	newroot->elems[0] = sep;
+	newroot->kids[1] = right;    newroot->counts[1] = countnode234(right);
+	newroot->elems[1] = NULL;
+	newroot->kids[2] = NULL;     newroot->counts[2] = 0;
+	newroot->elems[2] = NULL;
+	newroot->kids[3] = NULL;     newroot->counts[3] = 0;
+	newroot->parent = NULL;
+	if (left) left->parent = newroot;
+	if (right) right->parent = newroot;
+	*height = 1;
+	LOG(("  join: same height, brand new root\n"));
+	return newroot;
+    }
+
+    /*
+     * This now works like the addition algorithm on the larger
+     * tree. We're replacing a single kid pointer with two kid
+     * pointers separated by an element; if that causes the node to
+     * overload, we split it in two, move a separator element up to
+     * the next node, and repeat.
+     */
+    if (relht < 0) {
+	/*
+	 * Left tree is shorter. Search down the right tree to find
+	 * the pointer we're inserting at.
+	 */
+	node = root = right;
+	while (++relht < 0) {
+	    node = node->kids[0];
+	}
+	ki = 0;
+	right = node->kids[ki];
+    } else {
+	/*
+	 * Right tree is shorter; search down the left to find the
+	 * pointer we're inserting at.
+	 */
+	node = root = left;
+	while (--relht > 0) {
+	    if (node->elems[2])
+		node = node->kids[3];
+	    else if (node->elems[1])
+		node = node->kids[2];
+	    else
+		node = node->kids[1];
+	}
+	if (node->elems[2])
+	    ki = 3;
+	else if (node->elems[1])
+	    ki = 2;
+	else
+	    ki = 1;
+	left = node->kids[ki];
+    }
+
+    /*
+     * Now proceed as for addition.
+     */
+    *height = add234_insert(left, sep, right, &root, node, ki);
+
+    return root;
+}
+static int height234(tree234 *t) {
+    int level = 0;
+    node234 *n = t->root;
+    while (n) {
+	level++;
+	n = n->kids[0];
+    }
+    return level;
+}
+tree234 *join234(tree234 *t1, tree234 *t2) {
+    int size2 = countnode234(t2->root);
+    if (size2 > 0) {
+	void *element;
+	int relht;
+
+	if (t1->cmp) {
+	    element = index234(t2, 0);
+	    element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
+	    if (element)
+		return NULL;
+	}
+
+	element = delpos234(t2, 0);
+	relht = height234(t1) - height234(t2);
+	t1->root = join234_internal(t1->root, element, t2->root, &relht);
+	t2->root = NULL;
+    }
+    return t1;
+}
+tree234 *join234r(tree234 *t1, tree234 *t2) {
+    int size1 = countnode234(t1->root);
+    if (size1 > 0) {
+	void *element;
+	int relht;
+
+	if (t2->cmp) {
+	    element = index234(t1, size1-1);
+	    element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
+	    if (element)
+		return NULL;
+	}
+
+	element = delpos234(t1, size1-1);
+	relht = height234(t1) - height234(t2);
+	t2->root = join234_internal(t1->root, element, t2->root, &relht);
+	t1->root = NULL;
+    }
+    return t2;
+}
+
+/*
+ * Split out the first <index> elements in a tree and return a
+ * pointer to the root node. Leave the root node of the remainder
+ * in t.
+ */
+static node234 *split234_internal(tree234 *t, int index) {
+    node234 *halves[2], *n, *sib, *sub;
+    node234 *lparent, *rparent;
+    int ki, pki, i, half, lcount, rcount;
+
+    n = t->root;
+    LOG(("splitting tree %p at point %d\n", t, index));
+
+    /*
+     * Easy special cases. After this we have also dealt completely
+     * with the empty-tree case and we can assume the root exists.
+     */
+    if (index == 0)		       /* return nothing */
+	return NULL;
+    if (index == countnode234(t->root)) {   /* return the whole tree */
+	node234 *ret = t->root;
+	t->root = NULL;
+	return ret;
+    }
+
+    /*
+     * Search down the tree to find the split point.
+     */
+    lparent = rparent = NULL;
+    while (n) {
+	LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
+	     n,
+	     n->kids[0], n->counts[0], n->elems[0],
+	     n->kids[1], n->counts[1], n->elems[1],
+	     n->kids[2], n->counts[2], n->elems[2],
+	     n->kids[3], n->counts[3],
+	     index));
+	lcount = index;
+	rcount = countnode234(n) - lcount;
+	if (index <= n->counts[0]) {
+	    ki = 0;
+	} else if (index -= n->counts[0]+1, index <= n->counts[1]) {
+	    ki = 1;
+	} else if (index -= n->counts[1]+1, index <= n->counts[2]) {
+	    ki = 2;
+	} else {
+	    index -= n->counts[2]+1;
+	    ki = 3;
+	}
+
+	LOG(("  splitting at subtree %d\n", ki));
+	sub = n->kids[ki];
+
+	LOG(("  splitting at child index %d\n", ki));
+
+	/*
+	 * Split the node, put halves[0] on the right of the left
+	 * one and halves[1] on the left of the right one, put the
+	 * new node pointers in halves[0] and halves[1], and go up
+	 * a level.
+	 */
+	sib = mknew(node234);
+	for (i = 0; i < 3; i++) {
+	    if (i+ki < 3 && n->elems[i+ki]) {
+		sib->elems[i] = n->elems[i+ki];
+		sib->kids[i+1] = n->kids[i+ki+1];
+		if (sib->kids[i+1]) sib->kids[i+1]->parent = sib;
+		sib->counts[i+1] = n->counts[i+ki+1];
+		n->elems[i+ki] = NULL;
+		n->kids[i+ki+1] = NULL;
+		n->counts[i+ki+1] = 0;
+	    } else {
+		sib->elems[i] = NULL;
+		sib->kids[i+1] = NULL;
+		sib->counts[i+1] = 0;
+	    }
+	}
+	if (lparent) {
+	    lparent->kids[pki] = n;
+	    lparent->counts[pki] = lcount;
+	    n->parent = lparent;
+	    rparent->kids[0] = sib;
+	    rparent->counts[0] = rcount;
+	    sib->parent = rparent;
+	} else {
+	    halves[0] = n;
+	    n->parent = NULL;
+	    halves[1] = sib;
+	    sib->parent = NULL;
+	}
+	lparent = n;
+	rparent = sib;
+	pki = ki;
+	LOG(("  left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	     n,
+	     n->kids[0], n->counts[0], n->elems[0],
+	     n->kids[1], n->counts[1], n->elems[1],
+	     n->kids[2], n->counts[2], n->elems[2],
+	     n->kids[3], n->counts[3]));
+	LOG(("  right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+	     sib,
+	     sib->kids[0], sib->counts[0], sib->elems[0],
+	     sib->kids[1], sib->counts[1], sib->elems[1],
+	     sib->kids[2], sib->counts[2], sib->elems[2],
+	     sib->kids[3], sib->counts[3]));
+
+	n = sub;
+    }
+
+    /*
+     * We've come off the bottom here, so we've successfully split
+     * the tree into two equally high subtrees. The only problem is
+     * that some of the nodes down the fault line will be smaller
+     * than the minimum permitted size. (Since this is a 2-3-4
+     * tree, that means they'll be zero-element one-child nodes.)
+     */
+    LOG(("  fell off bottom, lroot is %p, rroot is %p\n",
+	 halves[0], halves[1]));
+    lparent->counts[pki] = rparent->counts[0] = 0;
+    lparent->kids[pki] = rparent->kids[0] = NULL;
+
+    /*
+     * So now we go back down the tree from each of the two roots,
+     * fixing up undersize nodes.
+     */
+    for (half = 0; half < 2; half++) {
+	/*
+	 * Remove the root if it's undersize (it will contain only
+	 * one child pointer, so just throw it away and replace it
+	 * with its child). This might happen several times.
+	 */
+	while (halves[half] && !halves[half]->elems[0]) {
+	    LOG(("  root %p is undersize, throwing away\n", halves[half]));
+	    halves[half] = halves[half]->kids[0];
+	    sfree(halves[half]->parent);
+	    halves[half]->parent = NULL;
+	    LOG(("  new root is %p\n", halves[half]));
+	}
+
+	n = halves[half];
+	while (n) {
+	    void (*toward)(node234 *n, int ki, int *k, int *index);
+	    int ni, merge;
+
+	    /*
+	     * Now we have a potentially undersize node on the
+	     * right (if half==0) or left (if half==1). Sort it
+	     * out, by merging with a neighbour or by transferring
+	     * subtrees over. At this time we must also ensure that
+	     * nodes are bigger than minimum, in case we need an
+	     * element to merge two nodes below.
+	     */
+	    LOG(("  node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+		 n,
+		 n->kids[0], n->counts[0], n->elems[0],
+		 n->kids[1], n->counts[1], n->elems[1],
+		 n->kids[2], n->counts[2], n->elems[2],
+		 n->kids[3], n->counts[3]));
+	    if (half == 1) {
+		ki = 0;		       /* the kid we're interested in */
+		ni = 1;		       /* the neighbour */
+		merge = 0;	       /* for merge: leftmost of the two */
+		toward = trans234_subtree_left;
+	    } else {
+		ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1);
+		ni = ki-1;
+		merge = ni;
+		toward = trans234_subtree_right;
+	    }
+
+	    sub = n->kids[ki];
+	    if (sub && !sub->elems[1]) {
+		/*
+		 * This node is undersized or minimum-size. If we
+		 * can merge it with its neighbour, we do so;
+		 * otherwise we must be able to transfer subtrees
+		 * over to it until it is greater than minimum
+		 * size.
+		 */
+		int undersized = (!sub->elems[0]);
+		LOG(("  child %d is %ssize\n", ki,
+		     undersized ? "under" : "minimum-"));
+		LOG(("  neighbour is %s\n",
+		     n->kids[ni]->elems[2] ? "large" :
+		     n->kids[ni]->elems[1] ? "medium" : "small"));
+		if (!n->kids[ni]->elems[1] ||
+		    (undersized && !n->kids[ni]->elems[2])) {
+		    /*
+		     * Neighbour is small, or possibly neighbour is
+		     * medium and we are undersize.
+		     */
+		    trans234_subtree_merge(n, merge, NULL, NULL);
+		    sub = n->kids[merge];
+		    if (!n->elems[0]) {
+			/*
+			 * n is empty, and hence must have been the
+			 * root and needs to be removed.
+			 */
+			assert(!n->parent);
+			LOG(("  shifting root!\n"));
+			halves[half] = sub;
+			halves[half]->parent = NULL;
+			sfree(n);
+		    }
+		} else {
+		    /* Neighbour is big enough to move trees over. */
+		    toward(n, ni, NULL, NULL);
+		    if (undersized)
+			toward(n, ni, NULL, NULL);
+		}
+	    }
+	    n = sub;
+	}
+    }
+
+    t->root = halves[1];
+    return halves[0];
+}
+tree234 *splitpos234(tree234 *t, int index, int before) {
+    tree234 *ret;
+    node234 *n;
+    int count;
+
+    count = countnode234(t->root);
+    if (index < 0 || index > count)
+	return NULL;		       /* error */
+    ret = newtree234(t->cmp);
+    n = split234_internal(t, index);
+    if (before) {
+	/* We want to return the ones before the index. */
+	ret->root = n;
+    } else {
+	/*
+	 * We want to keep the ones before the index and return the
+	 * ones after.
+	 */
+	ret->root = t->root;
+	t->root = n;
+    }
+    return ret;
+}
+tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) {
+    int before;
+    int index;
+
+    assert(rel != REL234_EQ);
+
+    if (rel == REL234_GT || rel == REL234_GE) {
+	before = 1;
+	rel = (rel == REL234_GT ? REL234_LE : REL234_LT);
+    } else {
+	before = 0;
+    }
+    if (!findrelpos234(t, e, cmp, rel, &index))
+	index = 0;
+
+    return splitpos234(t, index+1, before);
+}
+
+static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) {
+    int i;
+    node234 *n2 = mknew(node234);
+
+    for (i = 0; i < 3; i++) {
+	if (n->elems[i] && copyfn)
+	    n2->elems[i] = copyfn(copyfnstate, n->elems[i]);
+	else
+	    n2->elems[i] = n->elems[i];
+    }
+
+    for (i = 0; i < 4; i++) {
+	if (n->kids[i]) {
+	    n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate);
+	    n2->kids[i]->parent = n2;
+	} else {
+	    n2->kids[i] = NULL;
+	}
+	n2->counts[i] = n->counts[i];
+    }
+
+    return n2;
+}
+tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) {
+    tree234 *t2;
+
+    t2 = newtree234(t->cmp);
+    t2->root = copynode234(t->root, copyfn, copyfnstate);
+    t2->root->parent = NULL;
+
+    return t2;
+}
+
+#ifdef TEST
+
+/*
+ * Test code for the 2-3-4 tree. This code maintains an alternative
+ * representation of the data in the tree, in an array (using the
+ * obvious and slow insert and delete functions). After each tree
+ * operation, the verify() function is called, which ensures all
+ * the tree properties are preserved:
+ *  - node->child->parent always equals node
+ *  - tree->root->parent always equals NULL
+ *  - number of kids == 0 or number of elements + 1;
+ *  - tree has the same depth everywhere
+ *  - every node has at least one element
+ *  - subtree element counts are accurate
+ *  - any NULL kid pointer is accompanied by a zero count
+ *  - in a sorted tree: ordering property between elements of a
+ *    node and elements of its children is preserved
+ * and also ensures the list represented by the tree is the same
+ * list it should be. (This last check also doubly verifies the
+ * ordering properties, because the `same list it should be' is by
+ * definition correctly ordered. It also ensures all nodes are
+ * distinct, because the enum functions would get caught in a loop
+ * if not.)
+ */
+
+#include <stdarg.h>
+
+#define srealloc realloc
+
+/*
+ * Error reporting function.
+ */
+void error(char *fmt, ...) {
+    va_list ap;
+    printf("ERROR: ");
+    va_start(ap, fmt);
+    vfprintf(stdout, fmt, ap);
+    va_end(ap);
+    printf("\n");
+}
+
+/* The array representation of the data. */
+void **array;
+int arraylen, arraysize;
+cmpfn234 cmp;
+
+/* The tree representation of the same data. */
+tree234 *tree;
+
+/*
+ * Routines to provide a diagnostic printout of a tree. Currently
+ * relies on every element in the tree being a one-character string
+ * :-)
+ */
+typedef struct {
+    char **levels;
+} dispctx;
+
+int dispnode(node234 *n, int level, dispctx *ctx) {
+    if (level == 0) {
+	int xpos = strlen(ctx->levels[0]);
+	int len;
+
+	if (n->elems[2])
+	    len = sprintf(ctx->levels[0]+xpos, " %s%s%s",
+			  n->elems[0], n->elems[1], n->elems[2]);
+	else if (n->elems[1])
+	    len = sprintf(ctx->levels[0]+xpos, " %s%s",
+			  n->elems[0], n->elems[1]);
+	else
+	    len = sprintf(ctx->levels[0]+xpos, " %s",
+			  n->elems[0]);
+	return xpos + 1 + (len-1) / 2;
+    } else {
+	int xpos[4], nkids;
+	int nodelen, mypos, myleft, x, i;
+
+	xpos[0] = dispnode(n->kids[0], level-3, ctx);
+	xpos[1] = dispnode(n->kids[1], level-3, ctx);
+	nkids = 2;
+	if (n->kids[2]) {
+	    xpos[2] = dispnode(n->kids[2], level-3, ctx);
+	    nkids = 3;
+	}
+	if (n->kids[3]) {
+	    xpos[3] = dispnode(n->kids[3], level-3, ctx);
+	    nkids = 4;
+	}
+
+	if (nkids == 4)
+	    mypos = (xpos[1] + xpos[2]) / 2;
+	else if (nkids == 3)
+	    mypos = xpos[1];
+	else
+	    mypos = (xpos[0] + xpos[1]) / 2;
+	nodelen = nkids * 2 - 1;
+	myleft = mypos - ((nodelen-1)/2);
+	assert(myleft >= xpos[0]);
+	assert(myleft + nodelen-1 <= xpos[nkids-1]);
+
+	x = strlen(ctx->levels[level]);
+	while (x <= xpos[0] && x < myleft)
+	    ctx->levels[level][x++] = ' ';
+	while (x < myleft)
+	    ctx->levels[level][x++] = '_';
+	if (nkids==4)
+	    x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.",
+			 n->elems[0], n->elems[1], n->elems[2]);
+	else if (nkids==3)
+	    x += sprintf(ctx->levels[level]+x, ".%s.%s.",
+			 n->elems[0], n->elems[1]);
+	else
+	    x += sprintf(ctx->levels[level]+x, ".%s.",
+			 n->elems[0]);
+	while (x < xpos[nkids-1])
+	    ctx->levels[level][x++] = '_';
+	ctx->levels[level][x] = '\0';
+
+	x = strlen(ctx->levels[level-1]);
+	for (i = 0; i < nkids; i++) {
+	    int rpos, pos;
+	    rpos = xpos[i];
+	    if (i > 0 && i < nkids-1)
+		pos = myleft + 2*i;
+	    else
+		pos = rpos;
+	    if (rpos < pos)
+		rpos++;
+	    while (x < pos && x < rpos)
+		ctx->levels[level-1][x++] = ' ';
+	    if (x == pos)
+		ctx->levels[level-1][x++] = '|';
+	    while (x < pos || x < rpos)
+		ctx->levels[level-1][x++] = '_';
+	    if (x == pos)
+		ctx->levels[level-1][x++] = '|';
+	}
+	ctx->levels[level-1][x] = '\0';
+
+	x = strlen(ctx->levels[level-2]);
+	for (i = 0; i < nkids; i++) {
+	    int rpos = xpos[i];
+
+	    while (x < rpos)
+		ctx->levels[level-2][x++] = ' ';
+	    ctx->levels[level-2][x++] = '|';
+	}
+	ctx->levels[level-2][x] = '\0';
+
+	return mypos;
+    }
+}
+
+void disptree(tree234 *t) {
+    dispctx ctx;
+    char *leveldata;
+    int width = count234(t);
+    int ht = height234(t) * 3 - 2;
+    int i;
+
+    if (!t->root) {
+	printf("[empty tree]\n");
+    }
+
+    leveldata = smalloc(ht * (width+2));
+    ctx.levels = smalloc(ht * sizeof(char *));
+    for (i = 0; i < ht; i++) {
+	ctx.levels[i] = leveldata + i * (width+2);
+	ctx.levels[i][0] = '\0';
+    }
+
+    (void) dispnode(t->root, ht-1, &ctx);
+
+    for (i = ht; i-- ;)
+	printf("%s\n", ctx.levels[i]);
+
+    sfree(ctx.levels);
+    sfree(leveldata);
+}
+
+typedef struct {
+    int treedepth;
+    int elemcount;
+} chkctx;
+
+int chknode(chkctx *ctx, int level, node234 *node,
+	    void *lowbound, void *highbound) {
+    int nkids, nelems;
+    int i;
+    int count;
+
+    /* Count the non-NULL kids. */
+    for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
+    /* Ensure no kids beyond the first NULL are non-NULL. */
+    for (i = nkids; i < 4; i++)
+        if (node->kids[i]) {
+            error("node %p: nkids=%d but kids[%d] non-NULL",
+                   node, nkids, i);
+        } else if (node->counts[i]) {
+            error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
+                   node, i, i, node->counts[i]);
+	}
+
+    /* Count the non-NULL elements. */
+    for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
+    /* Ensure no elements beyond the first NULL are non-NULL. */
+    for (i = nelems; i < 3; i++)
+        if (node->elems[i]) {
+            error("node %p: nelems=%d but elems[%d] non-NULL",
+                   node, nelems, i);
+        }
+
+    if (nkids == 0) {
+        /*
+         * If nkids==0, this is a leaf node; verify that the tree
+         * depth is the same everywhere.
+         */
+        if (ctx->treedepth < 0)
+            ctx->treedepth = level;    /* we didn't know the depth yet */
+        else if (ctx->treedepth != level)
+            error("node %p: leaf at depth %d, previously seen depth %d",
+                   node, level, ctx->treedepth);
+    } else {
+        /*
+         * If nkids != 0, then it should be nelems+1, unless nelems
+         * is 0 in which case nkids should also be 0 (and so we
+         * shouldn't be in this condition at all).
+         */
+        int shouldkids = (nelems ? nelems+1 : 0);
+        if (nkids != shouldkids) {
+            error("node %p: %d elems should mean %d kids but has %d",
+                   node, nelems, shouldkids, nkids);
+        }
+    }
+
+    /*
+     * nelems should be at least 1.
+     */
+    if (nelems == 0) {
+        error("node %p: no elems", node, nkids);
+    }
+
+    /*
+     * Add nelems to the running element count of the whole tree.
+     */
+    ctx->elemcount += nelems;
+
+    /*
+     * Check ordering property: all elements should be strictly >
+     * lowbound, strictly < highbound, and strictly < each other in
+     * sequence. (lowbound and highbound are NULL at edges of tree
+     * - both NULL at root node - and NULL is considered to be <
+     * everything and > everything. IYSWIM.)
+     */
+    if (cmp) {
+	for (i = -1; i < nelems; i++) {
+	    void *lower = (i == -1 ? lowbound : node->elems[i]);
+	    void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
+	    if (lower && higher && cmp(lower, higher) >= 0) {
+		error("node %p: kid comparison [%d=%s,%d=%s] failed",
+		      node, i, lower, i+1, higher);
+	    }
+	}
+    }
+
+    /*
+     * Check parent pointers: all non-NULL kids should have a
+     * parent pointer coming back to this node.
+     */
+    for (i = 0; i < nkids; i++)
+        if (node->kids[i]->parent != node) {
+            error("node %p kid %d: parent ptr is %p not %p",
+                   node, i, node->kids[i]->parent, node);
+        }
+
+
+    /*
+     * Now (finally!) recurse into subtrees.
+     */
+    count = nelems;
+
+    for (i = 0; i < nkids; i++) {
+        void *lower = (i == 0 ? lowbound : node->elems[i-1]);
+        void *higher = (i >= nelems ? highbound : node->elems[i]);
+	int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
+	if (node->counts[i] != subcount) {
+	    error("node %p kid %d: count says %d, subtree really has %d",
+		  node, i, node->counts[i], subcount);
+	}
+        count += subcount;
+    }
+
+    return count;
+}
+
+void verifytree(tree234 *tree, void **array, int arraylen) {
+    chkctx ctx;
+    int i;
+    void *p;
+
+    ctx.treedepth = -1;                /* depth unknown yet */
+    ctx.elemcount = 0;                 /* no elements seen yet */
+    /*
+     * Verify validity of tree properties.
+     */
+    if (tree->root) {
+	if (tree->root->parent != NULL)
+	    error("root->parent is %p should be null", tree->root->parent);
+        chknode(&ctx, 0, tree->root, NULL, NULL);
+    }
+    printf("tree depth: %d\n", ctx.treedepth);
+    /*
+     * Enumerate the tree and ensure it matches up to the array.
+     */
+    for (i = 0; NULL != (p = index234(tree, i)); i++) {
+        if (i >= arraylen)
+            error("tree contains more than %d elements", arraylen);
+        if (array[i] != p)
+            error("enum at position %d: array says %s, tree says %s",
+                   i, array[i], p);
+    }
+    if (ctx.elemcount != i) {
+        error("tree really contains %d elements, enum gave %d",
+               ctx.elemcount, i);
+    }
+    if (i < arraylen) {
+        error("enum gave only %d elements, array has %d", i, arraylen);
+    }
+    i = count234(tree);
+    if (ctx.elemcount != i) {
+        error("tree really contains %d elements, count234 gave %d",
+	      ctx.elemcount, i);
+    }
+}
+void verify(void) { verifytree(tree, array, arraylen); }
+
+void internal_addtest(void *elem, int index, void *realret) {
+    int i, j;
+    void *retval;
+
+    if (arraysize < arraylen+1) {
+        arraysize = arraylen+1+256;
+        array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
+                 srealloc(array, arraysize*sizeof(*array)));
+    }
+
+    i = index;
+    /* now i points to the first element >= elem */
+    retval = elem;                  /* expect elem returned (success) */
+    for (j = arraylen; j > i; j--)
+	array[j] = array[j-1];
+    array[i] = elem;                /* add elem to array */
+    arraylen++;
+
+    if (realret != retval) {
+        error("add: retval was %p expected %p", realret, retval);
+    }
+
+    verify();
+}
+
+void addtest(void *elem) {
+    int i;
+    void *realret;
+
+    realret = add234(tree, elem);
+
+    i = 0;
+    while (i < arraylen && cmp(elem, array[i]) > 0)
+        i++;
+    if (i < arraylen && !cmp(elem, array[i])) {
+        void *retval = array[i];       /* expect that returned not elem */
+	if (realret != retval) {
+	    error("add: retval was %p expected %p", realret, retval);
+	}
+    } else
+	internal_addtest(elem, i, realret);
+}
+
+void addpostest(void *elem, int i) {
+    void *realret;
+
+    realret = addpos234(tree, elem, i);
+
+    internal_addtest(elem, i, realret);
+}
+
+void delpostest(int i) {
+    int index = i;
+    void *elem = array[i], *ret;
+
+    /* i points to the right element */
+    while (i < arraylen-1) {
+	array[i] = array[i+1];
+	i++;
+    }
+    arraylen--;			       /* delete elem from array */
+
+    if (tree->cmp)
+	ret = del234(tree, elem);
+    else
+	ret = delpos234(tree, index);
+
+    if (ret != elem) {
+	error("del returned %p, expected %p", ret, elem);
+    }
+
+    verify();
+}
+
+void deltest(void *elem) {
+    int i;
+
+    i = 0;
+    while (i < arraylen && cmp(elem, array[i]) > 0)
+        i++;
+    if (i >= arraylen || cmp(elem, array[i]) != 0)
+        return;                        /* don't do it! */
+    delpostest(i);
+}
+
+/* A sample data set and test utility. Designed for pseudo-randomness,
+ * and yet repeatability. */
+
+/*
+ * This random number generator uses the `portable implementation'
+ * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
+ * change it if not.
+ */
+int randomnumber(unsigned *seed) {
+    *seed *= 1103515245;
+    *seed += 12345;
+    return ((*seed) / 65536) % 32768;
+}
+
+int mycmp(void *av, void *bv) {
+    char const *a = (char const *)av;
+    char const *b = (char const *)bv;
+    return strcmp(a, b);
+}
+
+#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
+
+char *strings[] = {
+    "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
+    "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
+    "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
+    "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
+    "m", "s", "l", "4",
+#if 0
+    "a", "ab", "absque", "coram", "de",
+    "palam", "clam", "cum", "ex", "e",
+    "sine", "tenus", "pro", "prae",
+    "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
+    "penguin", "blancmange", "pangolin", "whale", "hedgehog",
+    "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
+    "murfl", "spoo", "breen", "flarn", "octothorpe",
+    "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
+    "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
+    "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
+    "wand", "ring", "amulet"
+#endif
+};
+
+#define NSTR lenof(strings)
+
+void findtest(void) {
+    static const int rels[] = {
+	REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
+    };
+    static const char *const relnames[] = {
+	"EQ", "GE", "LE", "LT", "GT"
+    };
+    int i, j, rel, index;
+    char *p, *ret, *realret, *realret2;
+    int lo, hi, mid, c;
+
+    for (i = 0; i < (int)NSTR; i++) {
+	p = strings[i];
+	for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
+	    rel = rels[j];
+
+	    lo = 0; hi = arraylen-1;
+	    while (lo <= hi) {
+		mid = (lo + hi) / 2;
+		c = strcmp(p, array[mid]);
+		if (c < 0)
+		    hi = mid-1;
+		else if (c > 0)
+		    lo = mid+1;
+		else
+		    break;
+	    }
+
+	    if (c == 0) {
+		if (rel == REL234_LT)
+		    ret = (mid > 0 ? array[--mid] : NULL);
+		else if (rel == REL234_GT)
+		    ret = (mid < arraylen-1 ? array[++mid] : NULL);
+		else
+		    ret = array[mid];
+	    } else {
+		assert(lo == hi+1);
+		if (rel == REL234_LT || rel == REL234_LE) {
+		    mid = hi;
+		    ret = (hi >= 0 ? array[hi] : NULL);
+		} else if (rel == REL234_GT || rel == REL234_GE) {
+		    mid = lo;
+		    ret = (lo < arraylen ? array[lo] : NULL);
+		} else
+		    ret = NULL;
+	    }
+
+	    realret = findrelpos234(tree, p, NULL, rel, &index);
+	    if (realret != ret) {
+		error("find(\"%s\",%s) gave %s should be %s",
+		      p, relnames[j], realret, ret);
+	    }
+	    if (realret && index != mid) {
+		error("find(\"%s\",%s) gave %d should be %d",
+		      p, relnames[j], index, mid);
+	    }
+	    if (realret && rel == REL234_EQ) {
+		realret2 = index234(tree, index);
+		if (realret2 != realret) {
+		    error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
+			  p, relnames[j], realret, index, index, realret2);
+		}
+	    }
+#if 0
+	    printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
+		   realret, index);
+#endif
+	}
+    }
+
+    realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
+    if (arraylen && (realret != array[0] || index != 0)) {
+	error("find(NULL,GT) gave %s(%d) should be %s(0)",
+	      realret, index, array[0]);
+    } else if (!arraylen && (realret != NULL)) {
+	error("find(NULL,GT) gave %s(%d) should be NULL",
+	      realret, index);
+    }
+
+    realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
+    if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
+	error("find(NULL,LT) gave %s(%d) should be %s(0)",
+	      realret, index, array[arraylen-1]);
+    } else if (!arraylen && (realret != NULL)) {
+	error("find(NULL,LT) gave %s(%d) should be NULL",
+	      realret, index);
+    }
+}
+
+void splittest(tree234 *tree, void **array, int arraylen) {
+    int i;
+    tree234 *tree3, *tree4;
+    for (i = 0; i <= arraylen; i++) {
+	tree3 = copytree234(tree, NULL, NULL);
+	tree4 = splitpos234(tree3, i, 0);
+	verifytree(tree3, array, i);
+	verifytree(tree4, array+i, arraylen-i);
+	join234(tree3, tree4);
+	freetree234(tree4);	       /* left empty by join */
+	verifytree(tree3, array, arraylen);
+	freetree234(tree3);
+    }
+}
+
+int main(void) {
+    int in[NSTR];
+    int i, j, k;
+    int tworoot, tmplen;
+    unsigned seed = 0;
+    tree234 *tree2, *tree3, *tree4;
+    int c;
+
+    setvbuf(stdout, NULL, _IOLBF, 0);
+
+    for (i = 0; i < (int)NSTR; i++) in[i] = 0;
+    array = NULL;
+    arraylen = arraysize = 0;
+    tree = newtree234(mycmp);
+    cmp = mycmp;
+
+    verify();
+    for (i = 0; i < 10000; i++) {
+        j = randomnumber(&seed);
+        j %= NSTR;
+        printf("trial: %d\n", i);
+        if (in[j]) {
+            printf("deleting %s (%d)\n", strings[j], j);
+            deltest(strings[j]);
+            in[j] = 0;
+        } else {
+            printf("adding %s (%d)\n", strings[j], j);
+            addtest(strings[j]);
+            in[j] = 1;
+        }
+	disptree(tree);
+	findtest();
+    }
+
+    while (arraylen > 0) {
+        j = randomnumber(&seed);
+        j %= arraylen;
+        deltest(array[j]);
+    }
+
+    freetree234(tree);
+
+    /*
+     * Now try an unsorted tree. We don't really need to test
+     * delpos234 because we know del234 is based on it, so it's
+     * already been tested in the above sorted-tree code; but for
+     * completeness we'll use it to tear down our unsorted tree
+     * once we've built it.
+     */
+    tree = newtree234(NULL);
+    cmp = NULL;
+    verify();
+    for (i = 0; i < 1000; i++) {
+	printf("trial: %d\n", i);
+	j = randomnumber(&seed);
+	j %= NSTR;
+	k = randomnumber(&seed);
+	k %= count234(tree)+1;
+	printf("adding string %s at index %d\n", strings[j], k);
+	addpostest(strings[j], k);
+    }
+
+    /*
+     * While we have this tree in its full form, we'll take a copy
+     * of it to use in split and join testing.
+     */
+    tree2 = copytree234(tree, NULL, NULL);
+    verifytree(tree2, array, arraylen);/* check the copy is accurate */
+    /*
+     * Split tests. Split the tree at every possible point and
+     * check the resulting subtrees.
+     */
+    tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */
+    splittest(tree2, array, arraylen);
+    /*
+     * Now do the split test again, but on a tree that has a 2-root
+     * (if the previous one didn't) or doesn't (if the previous one
+     * did).
+     */
+    tmplen = arraylen;
+    while ((!tree2->root->elems[1]) == tworoot) {
+	delpos234(tree2, --tmplen);
+    }
+    printf("now trying splits on second tree\n");
+    splittest(tree2, array, tmplen);
+    freetree234(tree2);
+
+    /*
+     * Back to the main testing of uncounted trees.
+     */
+    while (count234(tree) > 0) {
+	printf("cleanup: tree size %d\n", count234(tree));
+	j = randomnumber(&seed);
+	j %= count234(tree);
+	printf("deleting string %s from index %d\n", (char *)array[j], j);
+	delpostest(j);
+    }
+    freetree234(tree);
+
+    /*
+     * Finally, do some testing on split/join on _sorted_ trees. At
+     * the same time, we'll be testing split on very small trees.
+     */
+    tree = newtree234(mycmp);
+    cmp = mycmp;
+    arraylen = 0;
+    for (i = 0; i < 16; i++) {
+	addtest(strings[i]);
+	tree2 = copytree234(tree, NULL, NULL);
+	splittest(tree2, array, arraylen);
+	freetree234(tree2);
+    }
+    freetree234(tree);
+
+    /*
+     * Test silly cases of join: join(emptytree, emptytree), and
+     * also ensure join correctly spots when sorted trees fail the
+     * ordering constraint.
+     */
+    tree = newtree234(mycmp);
+    tree2 = newtree234(mycmp);
+    tree3 = newtree234(mycmp);
+    tree4 = newtree234(mycmp);
+    assert(mycmp(strings[0], strings[1]) < 0);   /* just in case :-) */
+    add234(tree2, strings[1]);
+    add234(tree4, strings[0]);
+    array[0] = strings[0];
+    array[1] = strings[1];
+    verifytree(tree, array, 0);
+    verifytree(tree2, array+1, 1);
+    verifytree(tree3, array, 0);
+    verifytree(tree4, array, 1);
+
+    /*
+     * So:
+     *  - join(tree,tree3) should leave both tree and tree3 unchanged.
+     *  - joinr(tree,tree2) should leave both tree and tree2 unchanged.
+     *  - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
+     *  - join(tree, tree2) should move the element from tree2 to tree.
+     *  - joinr(tree4, tree3) should move the element from tree4 to tree3.
+     *  - join(tree,tree3) should return NULL and leave both unchanged.
+     *  - join(tree3,tree) should work and create a bigger tree in tree3.
+     */
+    assert(tree == join234(tree, tree3));
+    verifytree(tree, array, 0);
+    verifytree(tree3, array, 0);
+    assert(tree2 == join234r(tree, tree2));
+    verifytree(tree, array, 0);
+    verifytree(tree2, array+1, 1);
+    assert(tree4 == join234(tree4, tree3));
+    verifytree(tree3, array, 0);
+    verifytree(tree4, array, 1);
+    assert(tree == join234(tree, tree2));
+    verifytree(tree, array+1, 1);
+    verifytree(tree2, array, 0);
+    assert(tree3 == join234r(tree4, tree3));
+    verifytree(tree3, array, 1);
+    verifytree(tree4, array, 0);
+    assert(NULL == join234(tree, tree3));
+    verifytree(tree, array+1, 1);
+    verifytree(tree3, array, 1);
+    assert(tree3 == join234(tree3, tree));
+    verifytree(tree3, array, 2);
+    verifytree(tree, array, 0);
+
+    return 0;
+}
+
+#endif
+
+#if 0 /* sorted list of strings might be useful */
+{
+    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",
+}
+#endif
diff --git a/tree234.h b/tree234.h
new file mode 100644
index 0000000..f75c8f7
--- /dev/null
+++ b/tree234.h
@@ -0,0 +1,202 @@
+/*
+ * tree234.h: header defining functions in tree234.c.
+ * 
+ * This file is copyright 1999-2001 Simon Tatham.
+ * 
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef TREE234_H
+#define TREE234_H
+
+/*
+ * This typedef is opaque outside tree234.c itself.
+ */
+typedef struct tree234_Tag tree234;
+
+typedef int (*cmpfn234)(void *, void *);
+
+typedef void *(*copyfn234)(void *state, void *element);
+
+/*
+ * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and
+ * lookups by key will fail: you can only look things up by numeric
+ * index, and you have to use addpos234() and delpos234().
+ */
+tree234 *newtree234(cmpfn234 cmp);
+
+/*
+ * Free a 2-3-4 tree (not including freeing the elements).
+ */
+void freetree234(tree234 *t);
+
+/*
+ * Add an element e to a sorted 2-3-4 tree t. Returns e on success,
+ * or if an existing element compares equal, returns that.
+ */
+void *add234(tree234 *t, void *e);
+
+/*
+ * Add an element e to an unsorted 2-3-4 tree t. Returns e on
+ * success, NULL on failure. (Failure should only occur if the
+ * index is out of range or the tree is sorted.)
+ * 
+ * Index range can be from 0 to the tree's current element count,
+ * inclusive.
+ */
+void *addpos234(tree234 *t, void *e, int index);
+
+/*
+ * Look up the element at a given numeric index in a 2-3-4 tree.
+ * Returns NULL if the index is out of range.
+ * 
+ * One obvious use for this function is in iterating over the whole
+ * of a tree (sorted or unsorted):
+ * 
+ *   for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p);
+ * 
+ * or
+ * 
+ *   int maxcount = count234(tree);
+ *   for (i = 0; i < maxcount; i++) {
+ *       p = index234(tree, i);
+ *       assert(p != NULL);
+ *       consume(p);
+ *   }
+ */
+void *index234(tree234 *t, int index);
+
+/*
+ * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
+ * found. e is always passed as the first argument to cmp, so cmp
+ * can be an asymmetric function if desired. cmp can also be passed
+ * as NULL, in which case the compare function from the tree proper
+ * will be used.
+ * 
+ * Three of these functions are special cases of findrelpos234. The
+ * non-`pos' variants lack the `index' parameter: if the parameter
+ * is present and non-NULL, it must point to an integer variable
+ * which will be filled with the numeric index of the returned
+ * element.
+ * 
+ * The non-`rel' variants lack the `relation' parameter. This
+ * parameter allows you to specify what relation the element you
+ * provide has to the element you're looking for. This parameter
+ * can be:
+ * 
+ *   REL234_EQ     - find only an element that compares equal to e
+ *   REL234_LT     - find the greatest element that compares < e
+ *   REL234_LE     - find the greatest element that compares <= e
+ *   REL234_GT     - find the smallest element that compares > e
+ *   REL234_GE     - find the smallest element that compares >= e
+ * 
+ * Non-`rel' variants assume REL234_EQ.
+ * 
+ * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be
+ * NULL. In this case, REL234_GT will return the smallest element
+ * in the tree, and REL234_LT will return the greatest. This gives
+ * an alternative means of iterating over a sorted tree, instead of
+ * using index234:
+ * 
+ *   // to loop forwards
+ *   for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;)
+ *       consume(p);
+ * 
+ *   // to loop backwards
+ *   for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;)
+ *       consume(p);
+ */
+enum {
+    REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE
+};
+void *find234(tree234 *t, void *e, cmpfn234 cmp);
+void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation);
+void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index);
+void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, int relation,
+		    int *index);
+
+/*
+ * Delete an element e in a 2-3-4 tree. Does not free the element,
+ * merely removes all links to it from the tree nodes.
+ * 
+ * delpos234 deletes the element at a particular tree index: it
+ * works on both sorted and unsorted trees.
+ * 
+ * del234 deletes the element passed to it, so it only works on
+ * sorted trees. (It's equivalent to using findpos234 to determine
+ * the index of an element, and then passing that index to
+ * delpos234.)
+ * 
+ * Both functions return a pointer to the element they delete, for
+ * the user to free or pass on elsewhere or whatever. If the index
+ * is out of range (delpos234) or the element is already not in the
+ * tree (del234) then they return NULL.
+ */
+void *del234(tree234 *t, void *e);
+void *delpos234(tree234 *t, int index);
+
+/*
+ * Return the total element count of a tree234.
+ */
+int count234(tree234 *t);
+
+/*
+ * Split a tree234 into two valid tree234s.
+ * 
+ * splitpos234 splits at a given index. If `before' is TRUE, the
+ * items at and after that index are left in t and the ones before
+ * are returned; if `before' is FALSE, the items before that index
+ * are left in t and the rest are returned.
+ * 
+ * split234 splits at a given key. You can pass any of the
+ * relations used with findrel234, except for REL234_EQ. The items
+ * in the tree that satisfy the relation are returned; the
+ * remainder are left.
+ */
+tree234 *splitpos234(tree234 *t, int index, int before);
+tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel);
+
+/*
+ * Join two tree234s together into a single one.
+ * 
+ * All the elements in t1 are placed to the left of all the
+ * elements in t2. If the trees are sorted, there will be a test to
+ * ensure that this satisfies the ordering criterion, and NULL will
+ * be returned otherwise. If the trees are unsorted, there is no
+ * restriction on the use of join234.
+ * 
+ * The tree returned is t1 (join234) or t2 (join234r), if the
+ * operation is successful.
+ */
+tree234 *join234(tree234 *t1, tree234 *t2);
+tree234 *join234r(tree234 *t1, tree234 *t2);
+
+/*
+ * Make a complete copy of a tree234. Element pointers will be
+ * reused unless copyfn is non-NULL, in which case it will be used
+ * to copy each element. (copyfn takes two `void *' parameters; the
+ * first is private state and the second is the element. A simple
+ * copy routine probably won't need private state.)
+ */
+tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate);
+
+#endif /* TREE234_H */
diff --git a/ustring.c b/ustring.c
new file mode 100644
index 0000000..1573a19
--- /dev/null
+++ b/ustring.c
@@ -0,0 +1,174 @@
+/*
+ * ustring.c: Unicode string routines
+ */
+
+#include <wchar.h>
+#include <time.h>
+#include "halibut.h"
+
+wchar_t *ustrdup(wchar_t *s) {
+    wchar_t *r;
+    if (s) {
+	r = mknewa(wchar_t, 1+ustrlen(s));
+	ustrcpy(r, s);
+    } else {
+	r = mknew(wchar_t);
+	*r = 0;
+    }
+    return r;
+}
+
+char *ustrtoa(wchar_t *s, char *outbuf, int size) {
+    char *p;
+    if (!s) {
+	*outbuf = '\0';
+	return outbuf;
+    }
+    for (p = outbuf; *s && p < outbuf+size; p++,s++)
+	*p = *s;
+    if (p < outbuf+size)
+	*p = '\0';
+    else
+	outbuf[size-1] = '\0';
+    return outbuf;
+}
+
+int ustrlen(wchar_t *s) {
+    int len = 0;
+    while (*s++) len++;
+    return len;
+}
+
+wchar_t *uadv(wchar_t *s) {
+    return s + 1 + ustrlen(s);
+}
+
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) {
+    wchar_t *ret = dest;
+    do {
+	*dest++ = *source;
+    } while (*source++);
+    return ret;
+}
+
+int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
+    if (!lhs && !rhs) return 0;
+    if (!lhs) return -1;
+    if (!rhs) return +1;
+    while (*lhs && *rhs && *lhs==*rhs)
+	lhs++, rhs++;
+    if (*lhs < *rhs)
+	return -1;
+    else if (*lhs > *rhs)
+	return 1;
+    return 0;
+}
+
+wchar_t utolower(wchar_t c) {
+    if (c == L'\0')
+	return c;		       /* this property needed by ustricmp */
+    /* FIXME: this doesn't even come close */
+    if (c >= 'A' && c <= 'Z')
+	c += 'a'-'A';
+    return c;
+}
+
+int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+    wchar_t lc, rc;
+    while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+	lhs++, rhs++;
+    if (!lc && !rc)
+	return 0;
+    if (lc < rc)
+	return -1;
+    else
+	return 1;
+}
+
+wchar_t *ustrlow(wchar_t *s) {
+    wchar_t *p = s;
+    while (*p) {
+	*p = utolower(*p);
+	p++;
+    }
+    return s;
+}
+
+int utoi(wchar_t *s) {
+    int sign = +1;
+    int n;
+
+    if (*s == L'-') {
+	s++;
+	sign = -1;
+    }
+
+    n = 0;
+    while (*s && *s >= L'0' && *s <= L'9') {
+	n *= 10;
+	n += (*s - '0');
+	s++;
+    }
+
+    return n;
+}
+
+int utob(wchar_t *s) {
+    if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
+	!ustricmp(s, L"true") || !ustricmp(s, L"t"))
+	return TRUE;
+    return FALSE;
+}
+
+int uisdigit(wchar_t c) {
+    return c >= L'0' && c <= L'9';
+}
+
+#define USTRFTIME_DELTA 128
+wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
+    void *blk = NULL;
+    wchar_t *wblk, *wp;
+    char *fmt, *text, *p;
+    size_t size = 0;
+    size_t len;
+
+    /*
+     * strftime has the entertaining property that it returns 0
+     * _either_ on out-of-space _or_ on successful generation of
+     * the empty string. Hence we must ensure our format can never
+     * generate the empty string. Somebody throw a custard pie at
+     * whoever was responsible for that. Please?
+     */
+    if (wfmt) {
+	len = ustrlen(wfmt);
+	fmt = mknewa(char, 2+len);
+	ustrtoa(wfmt, fmt+1, len+1);
+	fmt[0] = ' ';
+    } else
+	fmt = " %c";
+
+    while (1) {
+	size += USTRFTIME_DELTA;
+	blk = resize((char *)blk, size);
+	len = strftime((char *)blk, size-1, fmt, timespec);
+	if (len > 0)
+	    break;
+    }
+
+    /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
+    wblk = resize((wchar_t *)blk, len);
+    text = mknewa(char, len);
+    strftime(text, len, fmt+1, timespec);
+    /*
+     * We operate in the C locale, so this all ought to be kosher
+     * ASCII. If we ever move outside ASCII machines, we may need
+     * to make this more portable...
+     */
+    for (wp = wblk, p = text; *p; p++, wp++)
+	*wp = *p;
+    *wp = 0;
+    if (wfmt)
+	sfree(fmt);
+    sfree(text);
+    return wblk;
+}
diff --git a/version.c b/version.c
new file mode 100644
index 0000000..38fbca1
--- /dev/null
+++ b/version.c
@@ -0,0 +1,13 @@
+/*
+ * version.c: version string
+ */
+
+#include <stdio.h>
+
+#ifndef VERSION
+#define VER "anonymous build (" __DATE__ " " __TIME__ ")"
+#else
+#define VER "version " VERSION
+#endif
+
+const char *const version = VER;
diff --git a/winhelp.c b/winhelp.c
new file mode 100644
index 0000000..005409e
--- /dev/null
+++ b/winhelp.c
@@ -0,0 +1,2127 @@
+/*
+ * winhelp.c   a module to generate Windows .HLP files
+ * 
+ * Documentation of the .HLP file format comes from the excellent
+ * HELPFILE.TXT, published alongside the Help decompiler HELPDECO
+ * by Manfred Winterhoff. This code would not have been possible
+ * without his efforts. Many thanks.
+ */
+
+/*
+ * Potential future features:
+ * 
+ *  - perhaps LZ77 compression? This appears to cause a phase order
+ *    problem: it's hard to do the compression until the data to be
+ *    compressed is finalised, and yet you can't finalise the data
+ *    to be compressed until you know how much of it is going into
+ *    which TOPICBLOCK in order to work out the offsets in the
+ *    topic headers - for which you have to have already done the
+ *    compression. Perhaps the thing to do is to implement an LZ77
+ *    compressor that can guarantee to leave particular bytes in
+ *    the stream as literals, and then go back and fix the offsets
+ *    up later. Not pleasant.
+ *  
+ *  - It would be good to find out what relation (if any) the LCID
+ *    record in the |SYSTEM section bears to the codepage used in
+ *    the actual help text, so as to be able to vary that if the
+ *    user needs it. For the moment I suspect we're stuck with
+ *    Win1252.
+ * 
+ *  - tables might be nice.
+ * 
+ * Unlikely future features:
+ * 
+ *  - Phrase compression sounds harder. It's reasonably easy
+ *    (though space-costly) to analyse all the text in the file to
+ *    determine the one key phrase which would save most space if
+ *    replaced by a reference everywhere it appears; but finding
+ *    the _1024_ most effective phrases seems much harder since a
+ *    naive analysis might find lots of phrases that all overlap
+ *    (so you wouldn't get the saving you expected, as after taking
+ *    out the first phrase the rest would never crop up). In
+ *    addition, MS hold US patent number 4955066 which may cover
+ *    phrase compression, so perhaps it's best just to leave it.
+ * 
+ * Cleanup work:
+ * 
+ *  - sort out begin_topic. Ideally we should have a separate
+ *    topic_macro function that adds to the existing linkdata for
+ *    the topic, because that's more flexible than a variadic
+ *    function. This will be fiddly, though: if it's called before
+ *    whlp_begin_topic then we must buffer macros, and if it's
+ *    called afterwards then we must be able to go back and modify
+ *    the linkdata2 of the topic start block. Foo.
+ * 
+ *  - find out what should happen if a single topiclink crosses
+ *    _two_ topicblock boundaries.
+ * 
+ *  - What is the BlockSize in a topic header (first 4 bytes of
+ *    LinkData1 in a type 2 record) supposed to mean? How on earth
+ *    is it measured? The help file doesn't become perceptibly
+ *    corrupt if I frob it randomly; and on some occasions taking a
+ *    bit _out_ of the help file _increases_ that value. I have a
+ *    feeling it's completely made up and/or vestigial, so for the
+ *    moment I'm just making up a plausible value as I go along.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <stdarg.h>
+
+#include "halibut.h"
+#include "winhelp.h"
+#include "tree234.h"
+
+#ifdef TESTMODE
+/*
+ * This lot is useful for testing. Something like it will also be
+ * needed to use this module standalone.
+ */
+#define smalloc malloc
+#define srealloc realloc
+#define sfree free
+#define mknew(type) ( (type *) smalloc (sizeof (type)) )
+#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
+#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
+#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
+char *dupstr(char *s) {
+    char *r = mknewa(char, 1+strlen(s)); strcpy(r,s); return r;
+}
+#endif
+
+#define UNUSEDARG(x) ( (x) = (x) )
+
+#define GET_32BIT_LSB_FIRST(cp) \
+  (((unsigned long)(unsigned char)(cp)[0]) | \
+  ((unsigned long)(unsigned char)(cp)[1] << 8) | \
+  ((unsigned long)(unsigned char)(cp)[2] << 16) | \
+  ((unsigned long)(unsigned char)(cp)[3] << 24))
+
+#define PUT_32BIT_LSB_FIRST(cp, value) do { \
+  (cp)[0] = 0xFF & (value); \
+  (cp)[1] = 0xFF & ((value) >> 8); \
+  (cp)[2] = 0xFF & ((value) >> 16); \
+  (cp)[3] = 0xFF & ((value) >> 24); } while (0)
+
+#define GET_16BIT_LSB_FIRST(cp) \
+  (((unsigned long)(unsigned char)(cp)[0]) | \
+  ((unsigned long)(unsigned char)(cp)[1] << 8))
+
+#define PUT_16BIT_LSB_FIRST(cp, value) do { \
+  (cp)[0] = 0xFF & (value); \
+  (cp)[1] = 0xFF & ((value) >> 8); } while (0)
+
+#define MAX_PAGE_SIZE 0x800	       /* max page size in any B-tree */
+#define TOPIC_BLKSIZE 4096	       /* implied by version/flags combo */
+
+typedef struct WHLP_TOPIC_tag context;
+
+struct file {
+    char *name;			       /* file name, will need freeing */
+    unsigned char *data;	       /* file data, will need freeing */
+    int pos;			       /* position for adding data */
+    int len;			       /* # of meaningful bytes in data */
+    int size;			       /* # of allocated bytes in data */
+    int fileoffset;		       /* offset in the real .HLP file */
+};
+
+struct indexrec {
+    char *term;                        /* index term, will need freeing */
+    context *topic;                    /* topic it links to */
+    int count, offset;                 /* used when building |KWDATA */
+};
+
+struct topiclink {
+    int topicoffset, topicpos;	       /* for referencing from elsewhere */
+    int recordtype;
+    int len1, len2;
+    unsigned char *data1, *data2;
+    context *context;
+    struct topiclink *nonscroll, *scroll, *nexttopic;
+    int block_size;		       /* for the topic header - *boggle* */
+};
+
+struct WHLP_TOPIC_tag {
+    char *name;			       /* needs freeing */
+    unsigned long hash;
+    struct topiclink *link;	       /* this provides TOPICOFFSET */
+    context *browse_next, *browse_prev;
+    char *title;		       /* needs freeing */
+    int index;                         /* arbitrary number */
+};
+
+struct fontdesc {
+    char *font;
+    int family, rendition, halfpoints;
+    int r, g, b;
+};
+
+struct WHLP_tag {
+    tree234 *files;		       /* stores `struct file' */
+    tree234 *pre_contexts;	       /* stores `context' */
+    tree234 *contexts;		       /* also stores `context' */
+    tree234 *titles;		       /* _also_ stores `context' */
+    tree234 *text;		       /* stores `struct topiclink' */
+    tree234 *index;		       /* stores `struct indexrec' */
+    tree234 *tabstops;                 /* stores `int' */
+    tree234 *fontnames;		       /* stores `char *' */
+    tree234 *fontdescs;		       /* stores `struct fontdesc' */
+    struct file *systemfile;	       /* the |SYSTEM internal file */
+    context *ptopic;		       /* primary topic */
+    struct topiclink *prevtopic;       /* to link type-2 records together */
+    struct topiclink *link;	       /* while building a topiclink */
+    unsigned char linkdata1[TOPIC_BLKSIZE];   /* while building a topiclink */
+    unsigned char linkdata2[TOPIC_BLKSIZE];   /* while building a topiclink */
+    int topicblock_remaining;	       /* while building |TOPIC section */
+    int lasttopiclink;		       /* while building |TOPIC section */
+    int firsttopiclink_offset;	       /* while building |TOPIC section */
+    int lasttopicstart;		       /* while building |TOPIC section */
+    int para_flags;
+    int para_attrs[7];
+    int ncontexts;
+};
+
+/* Functions to return the index and leaf data for B-tree contents. */
+typedef int (*bt_index_fn)(const void *item, unsigned char *outbuf);
+typedef int (*bt_leaf_fn)(const void *item, unsigned char *outbuf);
+
+/* Forward references. */
+static void whlp_para_reset(WHLP h);
+static struct file *whlp_new_file(WHLP h, char *name);
+static void whlp_file_add(struct file *f, const void *data, int len);
+static void whlp_file_add_char(struct file *f, int data);
+static void whlp_file_add_short(struct file *f, int data);
+static void whlp_file_add_long(struct file *f, int data);
+static void whlp_file_fill(struct file *f, int len);
+static void whlp_file_seek(struct file *f, int pos, int whence);
+static int whlp_file_offset(struct file *f);
+
+/* ----------------------------------------------------------------------
+ * Fiddly little functions: B-tree compare, index and leaf functions.
+ */
+
+/* The master index maps file names to help-file offsets. */
+
+static int filecmp(void *av, void *bv)
+{
+    const struct file *a = (const struct file *)av;
+    const struct file *b = (const struct file *)bv;
+    return strcmp(a->name, b->name);
+}
+
+static int fileindex(const void *av, unsigned char *outbuf)
+{
+    const struct file *a = (const struct file *)av;
+    int len = 1+strlen(a->name);
+    memcpy(outbuf, a->name, len);
+    return len;
+}
+
+static int fileleaf(const void *av, unsigned char *outbuf)
+{
+    const struct file *a = (const struct file *)av;
+    int len = 1+strlen(a->name);
+    memcpy(outbuf, a->name, len);
+    PUT_32BIT_LSB_FIRST(outbuf+len, a->fileoffset);
+    return len+4;
+}
+
+/* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */
+
+static int ctxcmp(void *av, void *bv)
+{
+    const context *a = (const context *)av;
+    const context *b = (const context *)bv;
+    if ((signed long)a->hash < (signed long)b->hash)
+	return -1;
+    if ((signed long)a->hash > (signed long)b->hash)
+	return +1;
+    return 0;
+}
+
+static int ctxindex(const void *av, unsigned char *outbuf)
+{
+    const context *a = (const context *)av;
+    PUT_32BIT_LSB_FIRST(outbuf, a->hash);
+    return 4;
+}
+
+static int ctxleaf(const void *av, unsigned char *outbuf)
+{
+    const context *a = (const context *)av;
+    PUT_32BIT_LSB_FIRST(outbuf, a->hash);
+    PUT_32BIT_LSB_FIRST(outbuf+4, a->link->topicoffset);
+    return 8;
+}
+
+/* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */
+
+static int ttlcmp(void *av, void *bv)
+{
+    const context *a = (const context *)av;
+    const context *b = (const context *)bv;
+    if (a->link->topicoffset < b->link->topicoffset)
+	return -1;
+    if (a->link->topicoffset > b->link->topicoffset)
+	return +1;
+    return 0;
+}
+
+static int ttlindex(const void *av, unsigned char *outbuf)
+{
+    const context *a = (const context *)av;
+    PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset);
+    return 4;
+}
+
+static int ttlleaf(const void *av, unsigned char *outbuf)
+{
+    const context *a = (const context *)av;
+    int slen;
+    PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset);
+    slen = 1+strlen(a->title);
+    memcpy(outbuf+4, a->title, slen);
+    return 4+slen;
+}
+
+/* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */
+
+static int idxcmp(void *av, void *bv)
+{
+    const struct indexrec *a = (const struct indexrec *)av;
+    const struct indexrec *b = (const struct indexrec *)bv;
+    int cmp;
+    if ( (cmp = strcmp(a->term, b->term)) != 0)
+        return cmp;
+    /* Now sort on the index field of the topics. */
+    if (a->topic->index < b->topic->index)
+	return -1;
+    if (a->topic->index > b->topic->index)
+	return +1;
+    return 0;
+}
+
+static int idxindex(const void *av, unsigned char *outbuf)
+{
+    const struct indexrec *a = (const struct indexrec *)av;
+    int len = 1+strlen(a->term);
+    memcpy(outbuf, a->term, len);
+    return len;
+}
+
+static int idxleaf(const void *av, unsigned char *outbuf)
+{
+    const struct indexrec *a = (const struct indexrec *)av;
+    int len = 1+strlen(a->term);
+    memcpy(outbuf, a->term, len);
+    PUT_16BIT_LSB_FIRST(outbuf+len, a->count);
+    PUT_32BIT_LSB_FIRST(outbuf+len+2, a->offset);
+    return len+6;
+}
+
+/*
+ * The internal `tabstops' B-tree stores pointers-to-int. Sorting
+ * is by the low 16 bits of the number (above that is flags).
+ */
+
+static int tabcmp(void *av, void *bv)
+{
+    const int *a = (const int *)av;
+    const int *b = (const int *)bv;
+    if ((*a & 0xFFFF) < (*b & 0xFFFF))
+	return -1;
+    if ((*a & 0xFFFF) > (*b & 0xFFFF))
+	return +1;
+    return 0;
+}
+
+/* The internal `fontnames' B-tree stores strings. */
+static int fontcmp(void *av, void *bv)
+{
+    const char *a = (const char *)av;
+    const char *b = (const char *)bv;
+    return strcmp(a,b);
+}
+
+/* ----------------------------------------------------------------------
+ * Manage help contexts and topics.
+ */
+
+/*
+ * This is the code to compute the hash of a context name. Copied
+ * straight from Winterhoff's documentation.
+ */
+static unsigned long context_hash(char *context)
+{
+    signed char bytemapping[256] =
+	"\x00\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
+	"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
+	"\xF0\x0B\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\x0C\xFF"
+	"\x0A\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x0B\x0C\x0D\x0E\x0D"
+	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
+	"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
+	"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
+	"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
+	"\x80\x81\x82\x83\x0B\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
+	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
+	"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
+	"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
+	"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF";
+    unsigned long hash;
+    
+    /* Sanity check the size of unsigned long */
+    enum { assertion = 1 /
+	    (((unsigned long)0xFFFFFFFF) + 2 == (unsigned long)1) };
+
+    /*
+     * The hash algorithm starts the hash at 0 and updates it with
+     * each character. Therefore, logically, the hash of an empty
+     * string should be 0 (it starts at 0 and is never updated);
+     * but Winterhoff says it is in fact 1. Shouldn't matter, since
+     * I never plan to use empty context names, but I'll stick the
+     * special case in here anyway.
+     */
+    if (!*context)
+	return 1;
+
+    /*
+     * Now compute the hash in the normal way.
+     */
+    hash = 0;
+    while (*context) {
+	hash = hash * 43 + bytemapping[(unsigned char)*context];
+	context++;
+    }
+    return hash;
+}
+
+WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash)
+{
+    context *ctx = mknew(context);
+    context *otherctx;
+
+    /*
+     * Index contexts in order of creation, just so there's some
+     * sort of non-arbitrary ordering in the index B-tree. Call me
+     * fussy, but I don't like indexing on pointer values because I
+     * prefer the code to be deterministic when run under different
+     * C libraries.
+     */
+    ctx->index = h->ncontexts++;
+    ctx->browse_prev = ctx->browse_next = NULL;
+
+    if (context_name) {
+	/*
+	 * We have a context name, which means we can put this
+	 * context straight into the `contexts' tree.
+	 */
+	ctx->name = dupstr(context_name);
+	ctx->hash = context_hash(context_name);
+	otherctx = add234(h->contexts, ctx);
+	if (otherctx != ctx) {
+	    /*
+	     * Hash clash. Destroy the new context and return NULL,
+	     * providing the clashing string.
+	     */
+	    sfree(ctx->name);
+	    sfree(ctx);
+	    if (clash) *clash = otherctx->name;
+	    return NULL;
+	}
+    } else {
+	/*
+	 * We have no context name yet. Enter this into the
+	 * pre_contexts tree of anonymous topics, which we will go
+	 * through later and allocate unique context names and hash
+	 * values.
+	 */
+	ctx->name = NULL;
+	addpos234(h->pre_contexts, ctx, count234(h->pre_contexts));
+    }
+    return ctx;
+}
+
+void whlp_prepare(WHLP h)
+{
+    /*
+     * We must go through pre_contexts and allocate a context ID to
+     * each anonymous context, making sure it doesn't clash with
+     * the existing contexts.
+     * 
+     * Our own context IDs will just be of the form `t00000001',
+     * and we'll increment the number each time and skip over any
+     * IDs that clash with existing context names.
+     */
+    int ctx_num = 0;
+    context *ctx, *otherctx;
+
+    while ( (ctx = index234(h->pre_contexts, 0)) != NULL ) {
+	delpos234(h->pre_contexts, 0);
+	ctx->name = mknewa(char, 20);
+	do {
+	    sprintf(ctx->name, "t%08d", ctx_num++);
+	    ctx->hash = context_hash(ctx->name);
+	    otherctx = add234(h->contexts, ctx);
+	} while (otherctx != ctx);
+    }
+
+    /*
+     * Ensure paragraph attributes are clear for the start of text
+     * output.
+     */
+    whlp_para_reset(h);
+}
+
+char *whlp_topic_id(WHLP_TOPIC topic)
+{
+    return topic->name;
+}
+
+void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...)
+{
+    struct topiclink *link = mknew(struct topiclink);
+    int len, slen;
+    char *macro;
+    va_list ap;
+
+    link->nexttopic = NULL;
+    if (h->prevtopic)
+	h->prevtopic->nexttopic = link;
+    h->prevtopic = link;
+
+    link->nonscroll = link->scroll = NULL;
+    link->context = topic;
+    link->block_size = 0;
+
+    link->recordtype = 2;	       /* topic header */
+    link->len1 = 4*7;		       /* standard linkdata1 size */
+    link->data1 = mknewa(unsigned char, link->len1);
+    
+    slen = strlen(title);
+    assert(slen+1 <= TOPIC_BLKSIZE);
+    memcpy(h->linkdata2, title, slen+1);
+    len = slen+1;
+
+    va_start(ap, title);
+    while ( (macro = va_arg(ap, char *)) != NULL) {
+	slen = strlen(macro);
+	assert(len+slen+1 <= TOPIC_BLKSIZE);
+	memcpy(h->linkdata2+len, macro, slen+1);
+	len += slen+1;
+    }
+    va_end(ap);
+    len--;			       /* lose the last \0 on the last macro */
+
+    link->len2 = len;
+    link->data2 = mknewa(unsigned char, link->len2);
+    memcpy(link->data2, h->linkdata2, link->len2);
+
+    topic->title = dupstr(title);
+    topic->link = link;
+
+    addpos234(h->text, link, count234(h->text));
+}
+
+void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after)
+{
+    UNUSEDARG(h);
+
+    /*
+     * See if the `before' topic is already linked to another one,
+     * and break the link to that if so. Likewise the `after'
+     * topic.
+     */
+    if (before->browse_next)
+        before->browse_next->browse_prev = NULL;
+    if (after->browse_prev)
+        after->browse_prev->browse_next = NULL;
+    before->browse_next = after;
+    after->browse_prev = before;
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the actual generation of paragraph and text records.
+ */
+
+static void whlp_linkdata(WHLP h, int which, int c)
+{
+    int *len = (which == 1 ? &h->link->len1 : &h->link->len2);
+    char *data = (which == 1 ? h->linkdata1 : h->linkdata2);
+    assert(*len < TOPIC_BLKSIZE);
+    data[(*len)++] = c;
+}
+
+static void whlp_linkdata_short(WHLP h, int which, int data)
+{
+    whlp_linkdata(h, which, data & 0xFF);
+    whlp_linkdata(h, which, (data >> 8) & 0xFF);
+}
+
+static void whlp_linkdata_long(WHLP h, int which, int data)
+{
+    whlp_linkdata(h, which, data & 0xFF);
+    whlp_linkdata(h, which, (data >> 8) & 0xFF);
+    whlp_linkdata(h, which, (data >> 16) & 0xFF);
+    whlp_linkdata(h, which, (data >> 24) & 0xFF);
+}
+
+static void whlp_linkdata_cushort(WHLP h, int which, int data)
+{
+    if (data <= 0x7F) {
+	whlp_linkdata(h, which, data*2);
+    } else {
+	whlp_linkdata(h, which, 1 + (data%128 * 2));
+	whlp_linkdata(h, which, data/128);
+    }
+}
+
+static void whlp_linkdata_csshort(WHLP h, int which, int data)
+{
+    if (data >= -0x40 && data <= 0x3F)
+	whlp_linkdata_cushort(h, which, data+64);
+    else
+	whlp_linkdata_cushort(h, which, data+16384);
+}
+
+static void whlp_linkdata_culong(WHLP h, int which, int data)
+{
+    if (data <= 0x7FFF) {
+	whlp_linkdata_short(h, which, data*2);
+    } else {
+	whlp_linkdata_short(h, which, 1 + (data%32768 * 2));
+	whlp_linkdata_short(h, which, data/32768);
+    }
+}
+
+static void whlp_linkdata_cslong(WHLP h, int which, int data)
+{
+    if (data >= -0x4000 && data <= 0x3FFF)
+	whlp_linkdata_culong(h, which, data+16384);
+    else
+	whlp_linkdata_culong(h, which, data+67108864);
+}
+
+static void whlp_para_reset(WHLP h)
+{
+    int *p;
+
+    h->para_flags = 0;
+
+    while ( (p = index234(h->tabstops, 0)) != NULL) {
+        delpos234(h->tabstops, 0);
+        sfree(p);
+    }
+}
+
+void whlp_para_attr(WHLP h, int attr_id, int attr_param)
+{
+    if (attr_id >= WHLP_PARA_SPACEABOVE &&
+	attr_id <= WHLP_PARA_FIRSTLINEINDENT) {
+	h->para_flags |= 1 << attr_id;
+	h->para_attrs[attr_id] = attr_param;
+    } else if (attr_id == WHLP_PARA_ALIGNMENT) {
+	h->para_flags &= ~0xC00;
+	if (attr_param == WHLP_ALIGN_RIGHT)
+	    h->para_flags |= 0x400;
+	else if (attr_param == WHLP_ALIGN_CENTRE)
+	    h->para_flags |= 0x800;
+    }
+}
+
+void whlp_set_tabstop(WHLP h, int tabstop, int alignment)
+{
+    int *p;
+
+    if (alignment == WHLP_ALIGN_CENTRE)
+        tabstop |= 0x20000;
+    if (alignment == WHLP_ALIGN_RIGHT)
+        tabstop |= 0x10000;
+
+    p = mknew(int);
+    *p = tabstop;
+    add234(h->tabstops, p);
+    h->para_flags |= 0x0200;
+}
+
+void whlp_begin_para(WHLP h, int para_type)
+{
+    struct topiclink *link = mknew(struct topiclink);
+    int i;
+
+    /*
+     * Clear these to NULL out of paranoia, although in records
+     * that aren't type 2 they should never actually be needed.
+     */
+    link->nexttopic = NULL;
+    link->context = NULL;
+    link->nonscroll = link->scroll = NULL;
+
+    link->recordtype = 32;	       /* text record */
+
+    h->link = link;
+    link->len1 = link->len2 = 0;
+    link->data1 = h->linkdata1;
+    link->data2 = h->linkdata2;
+
+    if (para_type == WHLP_PARA_NONSCROLL && h->prevtopic &&
+	!h->prevtopic->nonscroll)
+	h->prevtopic->nonscroll = link;
+    if (para_type == WHLP_PARA_SCROLL && h->prevtopic &&
+	!h->prevtopic->scroll)
+	h->prevtopic->scroll = link;
+
+    /*
+     * Now we're ready to start accumulating stuff in linkdata1 and
+     * linkdata2. Next we build up the paragraph info. Note that
+     * the TopicSize (cslong: size of LinkData1 minus the topicsize
+     * and topiclength fields) and TopicLength (cushort: size of
+     * LinkData2) fields are missing; we will put those on when we
+     * end the paragraph.
+     */
+    whlp_linkdata(h, 1, 0);	       /* must-be-0x00 */
+    whlp_linkdata(h, 1, 0x80);	       /* must-be-0x80 */
+    whlp_linkdata_short(h, 1, 0); /* Winterhoff says `id'; always 0 AFAICT */
+    whlp_linkdata_short(h, 1, h->para_flags);
+    for (i = WHLP_PARA_SPACEABOVE; i <= WHLP_PARA_FIRSTLINEINDENT; i++) {
+	if (h->para_flags & (1<<i))
+	    whlp_linkdata_csshort(h, 1, h->para_attrs[i]);
+    }
+    if (h->para_flags & 0x0200) {
+        int ntabs;
+        /*
+         * Write out tab stop data.
+         */
+        ntabs = count234(h->tabstops);
+        whlp_linkdata_csshort(h, 1, ntabs);
+        for (i = 0; i < ntabs; i++) {
+            int tab, *tabp;
+            tabp = index234(h->tabstops, i);
+            tab = *tabp;
+            if (tab & 0x30000)
+                tab |= 0x4000;
+            whlp_linkdata_cushort(h, 1, tab & 0xFFFF);
+            if (tab & 0x4000)
+                whlp_linkdata_cushort(h, 1, tab >> 16);
+        }
+    }
+
+    /*
+     * Fine. Now we're ready to start writing actual text and
+     * formatting commands.
+     */
+}
+
+void whlp_set_font(WHLP h, int font_id)
+{
+    /*
+     * Write a NUL into linkdata2 to cause the reader to flip over
+     * to linkdata1 to see the formatting command.
+     */
+    whlp_linkdata(h, 2, 0);
+    /*
+     * Now the formatting command is 0x80 followed by a short.
+     */
+    whlp_linkdata(h, 1, 0x80);
+    whlp_linkdata_short(h, 1, font_id);
+}
+
+void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target)
+{
+    /*
+     * Write a NUL into linkdata2.
+     */
+    whlp_linkdata(h, 2, 0);
+    /*
+     * Now the formatting command is 0xE3 followed by the context
+     * hash.
+     */
+    whlp_linkdata(h, 1, 0xE3);
+    whlp_linkdata_long(h, 1, target->hash);
+}
+
+void whlp_end_hyperlink(WHLP h)
+{
+    /*
+     * Write a NUL into linkdata2.
+     */
+    whlp_linkdata(h, 2, 0);
+    /*
+     * Now the formatting command is 0x89.
+     */
+    whlp_linkdata(h, 1, 0x89);
+}
+
+void whlp_tab(WHLP h)
+{
+    /*
+     * Write a NUL into linkdata2.
+     */
+    whlp_linkdata(h, 2, 0);
+    /*
+     * Now the formatting command is 0x83.
+     */
+    whlp_linkdata(h, 1, 0x83);
+}
+
+void whlp_text(WHLP h, char *text)
+{
+    while (*text) {
+	whlp_linkdata(h, 2, *text++);
+    }
+}
+
+void whlp_end_para(WHLP h)
+{
+    int data1cut;
+
+    /*
+     * Round off the paragraph with 0x82 and 0xFF formatting
+     * commands. Each requires a NUL in linkdata2.
+     */
+    whlp_linkdata(h, 2, 0);
+    whlp_linkdata(h, 1, 0x82);
+    whlp_linkdata(h, 2, 0);
+    whlp_linkdata(h, 1, 0xFF);
+
+    /*
+     * Now finish up: create the header of linkdata1 (TopicLength
+     * and TopicSize fields), allocate the real linkdata1 and
+     * linkdata2 fields, and copy them out of the buffers in h.
+     * Then insert the finished topiclink into the `text' tree, and
+     * clean up.
+     */
+    data1cut = h->link->len1;
+    whlp_linkdata_cslong(h, 1, data1cut);
+    whlp_linkdata_cushort(h, 1, h->link->len2);
+
+    h->link->data1 = mknewa(unsigned char, h->link->len1);
+    memcpy(h->link->data1, h->linkdata1 + data1cut, h->link->len1 - data1cut);
+    memcpy(h->link->data1 + h->link->len1 - data1cut, h->linkdata1, data1cut);
+    h->link->data2 = mknewa(unsigned char, h->link->len2);
+    memcpy(h->link->data2, h->linkdata2, h->link->len2);
+
+    addpos234(h->text, h->link, count234(h->text));
+
+    /* Hack: accumulate the `blocksize' parameter in the topic header. */
+    if (h->prevtopic)
+	h->prevtopic->block_size += 21 + h->link->len1 + h->link->len2;
+
+    h->link = NULL;		       /* this is now in the tree */
+
+    whlp_para_reset(h);
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the layout and generation of the |TOPIC section.
+ */
+
+static void whlp_topicsect_write(WHLP h, struct file *f, void *data, int len,
+				 int can_break)
+{
+    unsigned char *p = (unsigned char *)data;
+
+    if (h->topicblock_remaining <= 0 ||
+	h->topicblock_remaining < can_break) {
+	/*
+	 * Start a new block.
+	 */
+	if (h->topicblock_remaining > 0)
+	    whlp_file_fill(f, h->topicblock_remaining);
+	whlp_file_add_long(f, h->lasttopiclink);
+	h->firsttopiclink_offset = whlp_file_offset(f);
+	whlp_file_add_long(f, -1L);    /* this will be filled in later */
+	whlp_file_add_long(f, h->lasttopicstart);
+	h->topicblock_remaining = TOPIC_BLKSIZE - 12;
+    }
+    while (len > 0) {
+	int thislen = (h->topicblock_remaining < len ?
+		       h->topicblock_remaining : len);
+	whlp_file_add(f, p, thislen);
+	p += thislen;
+	len -= thislen;	
+	h->topicblock_remaining -= thislen;
+	if (len > 0 && h->topicblock_remaining <= 0) {
+	    /*
+	     * Start a new block.
+	     */
+	    whlp_file_add_long(f, h->lasttopiclink);
+	    h->firsttopiclink_offset = whlp_file_offset(f);
+	    whlp_file_add_long(f, -1L);    /* this will be filled in later */
+	    whlp_file_add_long(f, h->lasttopicstart);
+	    h->topicblock_remaining = TOPIC_BLKSIZE - 12;
+	}
+    }
+}
+
+static void whlp_topic_layout(WHLP h)
+{
+    int block, offset, pos;
+    int i, nlinks, size;
+    int topicnum;
+    struct topiclink *link;
+    struct file *f;
+
+    /*
+     * Create a final TOPICLINK containing no usable data.
+     */
+    link = mknew(struct topiclink);
+    link->nexttopic = NULL;
+    if (h->prevtopic)
+	h->prevtopic->nexttopic = link;
+    h->prevtopic = link;
+    link->data1 = mknewa(unsigned char, 0x1c);
+    link->block_size = 0;
+    link->data2 = NULL;
+    link->len1 = 0x1c;
+    link->len2 = 0;
+    link->nexttopic = NULL;
+    link->recordtype = 2;
+    link->nonscroll = link->scroll = NULL;
+    link->context = NULL;
+    addpos234(h->text, link, count234(h->text));
+
+    /*
+     * Each TOPICBLOCK has space for TOPIC_BLKSIZE-12 bytes. The
+     * size of each TOPICLINK is 21 bytes plus the combined lengths
+     * of LinkData1 and LinkData2. So we can now go through and
+     * break up the TOPICLINKs into TOPICBLOCKs, and also set up
+     * the TOPICOFFSET and TOPICPOS of each one while we do so.
+     */
+
+    block = 0;
+    offset = 0;
+    pos = 12;
+    nlinks = count234(h->text);
+    for (i = 0; i < nlinks; i++) {
+	link = index234(h->text, i);
+	size = 21 + link->len1 + link->len2;
+	/*
+	 * We can't split within the topicblock header or within
+	 * linkdata1. So if the split would fall in that area,
+	 * start a new block _now_.
+	 */
+	if (TOPIC_BLKSIZE - pos < 21 + link->len1) {
+	    block++;
+	    offset = 0;
+	    pos = 12;
+	}
+	link->topicoffset = block * 0x8000 + offset;
+	link->topicpos = block * 0x4000 + pos;
+	pos += size;
+	if (link->recordtype != 2)     /* TOPICOFFSET doesn't count titles */
+	    offset += link->len2;
+	while (pos > TOPIC_BLKSIZE) {
+	    block++;
+	    offset = 0;
+	    pos -= TOPIC_BLKSIZE - 12;
+	}
+    }
+
+    /*
+     * Now we have laid out the TOPICLINKs into blocks, and
+     * determined the final TOPICOFFSET and TOPICPOS of each one.
+     * So now we can go through and write the headers of the type-2
+     * records.
+     */
+
+    topicnum = 0;
+    for (i = 0; i < nlinks; i++) {
+	link = index234(h->text, i);
+	if (link->recordtype != 2)
+	    continue;
+	
+	PUT_32BIT_LSB_FIRST(link->data1 + 0, link->block_size);
+	if (link->context && link->context->browse_prev)
+	    PUT_32BIT_LSB_FIRST(link->data1 + 4,
+				link->context->browse_prev->link->topicoffset);
+	else
+	    PUT_32BIT_LSB_FIRST(link->data1 + 4, 0xFFFFFFFFL);
+	if (link->context && link->context->browse_next)
+	    PUT_32BIT_LSB_FIRST(link->data1 + 8,
+				link->context->browse_next->link->topicoffset);
+	else
+	    PUT_32BIT_LSB_FIRST(link->data1 + 8, 0xFFFFFFFFL);
+	PUT_32BIT_LSB_FIRST(link->data1 + 12, topicnum);
+	topicnum++;
+	if (link->nonscroll)
+	    PUT_32BIT_LSB_FIRST(link->data1 + 16, link->nonscroll->topicpos);
+	else
+	    PUT_32BIT_LSB_FIRST(link->data1 + 16, 0xFFFFFFFFL);
+	if (link->scroll)
+	    PUT_32BIT_LSB_FIRST(link->data1 + 20, link->scroll->topicpos);
+	else
+	    PUT_32BIT_LSB_FIRST(link->data1 + 20, 0xFFFFFFFFL);
+	if (link->nexttopic)
+	    PUT_32BIT_LSB_FIRST(link->data1 + 24, link->nexttopic->topicpos);
+	else
+	    PUT_32BIT_LSB_FIRST(link->data1 + 24, 0xFFFFFFFFL);
+    }
+
+    /*
+     * Having done all _that_, we're now finally ready to go
+     * through and create the |TOPIC section in its final form.
+     */
+
+    h->lasttopiclink = -1L;
+    h->lasttopicstart = 0L;
+    f = whlp_new_file(h, "|TOPIC");
+    h->topicblock_remaining = -1;
+    whlp_topicsect_write(h, f, NULL, 0, 0);   /* start the first block */
+    for (i = 0; i < nlinks; i++) {
+	unsigned char header[21];
+	struct topiclink *otherlink;
+
+	link = index234(h->text, i);
+
+	/*
+	 * Create and output the TOPICLINK header.
+	 */
+	PUT_32BIT_LSB_FIRST(header + 0, 21 + link->len1 + link->len2);
+	PUT_32BIT_LSB_FIRST(header + 4, link->len2);
+	if (i == 0) {
+	    PUT_32BIT_LSB_FIRST(header + 8, 0xFFFFFFFFL);
+	} else {
+	    otherlink = index234(h->text, i-1);
+	    PUT_32BIT_LSB_FIRST(header + 8, otherlink->topicpos);
+	}
+	if (i+1 >= nlinks) {
+	    PUT_32BIT_LSB_FIRST(header + 12, 0xFFFFFFFFL);
+	} else {
+	    otherlink = index234(h->text, i+1);
+	    PUT_32BIT_LSB_FIRST(header + 12, otherlink->topicpos);
+	}
+	PUT_32BIT_LSB_FIRST(header + 16, 21 + link->len1);
+	header[20] = link->recordtype;
+	whlp_topicsect_write(h, f, header, 21, 21 + link->len1);
+	
+	/*
+	 * Fill in the `first topiclink' pointer in the block
+	 * header if appropriate. (We do this _after_ outputting
+	 * the header because then we can be sure we'll be in the
+	 * same block as we think we are.)
+	 */
+	if (h->firsttopiclink_offset > 0) {
+	    whlp_file_seek(f, h->firsttopiclink_offset, 0);
+	    whlp_file_add_long(f, link->topicpos);
+	    h->firsttopiclink_offset = 0;
+	    whlp_file_seek(f, 0, 2);
+	}
+
+	/*
+	 * Update the `last topiclink', and possibly `last
+	 * topicstart', pointers.
+	 */
+	h->lasttopiclink = link->topicpos;
+	if (link->recordtype == 2)
+	    h->lasttopicstart = link->topicpos;
+
+
+	/*
+	 * Output LinkData1 and LinkData2.
+	 */
+	whlp_topicsect_write(h, f, link->data1, link->len1, link->len1);
+	whlp_topicsect_write(h, f, link->data2, link->len2, 0);
+
+	/*
+	 * Output the block header.
+	 */
+
+	link = index234(h->text, i);
+	
+    }
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the index sections (|KWDATA, |KWMAP, |KWBTREE).
+ */
+
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic)
+{
+    struct indexrec *idx = mknew(struct indexrec);
+
+    idx->term = dupstr(index);
+    idx->topic = topic;
+    /*
+     * If this reference is already in the tree, just silently drop
+     * the duplicate.
+     */
+    if (add234(h->index, idx) != idx) {
+        sfree(idx->term);
+        sfree(idx);
+    }
+}
+
+static void whlp_build_kwdata(WHLP h)
+{
+    struct file *f;
+    int i;
+    struct indexrec *first, *next;
+
+    f = whlp_new_file(h, "|KWDATA");
+
+    /*
+     * Go through the index B-tree, condensing all sequences of
+     * records with the same term into a single one with a valid
+     * (count,offset) pair, and building up the KWDATA section.
+     */
+    i = 0;
+    while ( (first = index234(h->index, i)) != NULL) {
+        first->count = 1;
+        first->offset = whlp_file_offset(f);
+        whlp_file_add_long(f, first->topic->link->topicoffset);
+        i++;
+        while ( (next = index234(h->index, i)) != NULL &&
+               !strcmp(first->term, next->term)) {
+            /*
+             * The next index record has the same term. Fold it
+             * into this one and remove from the tree.
+             */
+            whlp_file_add_long(f, next->topic->link->topicoffset);
+            first->count++;
+            delpos234(h->index, i);
+            sfree(next->term);
+            sfree(next);
+        }
+    }
+
+    /*
+     * Now we should have `index' in a form that's ready to
+     * construct |KWBTREE. So we can return.
+     */
+}
+
+/* ----------------------------------------------------------------------
+ * Standard chunks of data for the |SYSTEM and |FONT sections.
+ */
+
+static void whlp_system_record(struct file *f, int id,
+			       const void *data, int length)
+{
+    whlp_file_add_short(f, id);
+    whlp_file_add_short(f, length);
+    whlp_file_add(f, data, length);
+}
+
+static void whlp_standard_systemsection(struct file *f)
+{
+    const char lcid[] = { 0, 0, 0, 0, 0, 0, 0, 0, 9, 4 };
+    const char charset[] = { 0, 0, 0, 2, 0 };
+
+    whlp_file_add_short(f, 0x36C);     /* magic number */
+    whlp_file_add_short(f, 33);	       /* minor version: HCW 4.00 Win95+ */
+    whlp_file_add_short(f, 1);	       /* major version */
+    whlp_file_add_long(f, time(NULL)); /* generation date */
+    whlp_file_add_short(f, 0);	       /* flags=0 means no compression */
+
+    /*
+     * Add some magic locale identifier information. (We ought to
+     * find out something about what all this means; see the TODO
+     * list at the top of the file.)
+     */
+    whlp_system_record(f, 9, lcid, sizeof(lcid));
+    whlp_system_record(f, 11, charset, sizeof(charset));
+}
+
+void whlp_title(WHLP h, char *title)
+{
+    whlp_system_record(h->systemfile, 1, title, 1+strlen(title));
+}
+
+void whlp_copyright(WHLP h, char *copyright)
+{
+    whlp_system_record(h->systemfile, 2, copyright, 1+strlen(copyright));
+}
+
+void whlp_start_macro(WHLP h, char *macro)
+{
+    whlp_system_record(h->systemfile, 4, macro, 1+strlen(macro));
+}
+
+void whlp_primary_topic(WHLP h, WHLP_TOPIC t)
+{
+    h->ptopic = t;
+}
+
+static void whlp_do_primary_topic(WHLP h)
+{
+    unsigned char firsttopic[4];
+    PUT_32BIT_LSB_FIRST(firsttopic, h->ptopic->link->topicoffset);
+    whlp_system_record(h->systemfile, 3, firsttopic, sizeof(firsttopic));
+}
+
+int whlp_create_font(WHLP h, char *font, int family, int halfpoints,
+		     int rendition, int r, int g, int b)
+{
+    char *fontname = dupstr(font);
+    struct fontdesc *fontdesc;
+    int index;
+
+    font = add234(h->fontnames, fontname);
+    if (font != fontname) {
+	/* The font name was already present. Free the new copy. */
+	sfree(fontname);
+    }
+
+    fontdesc = mknew(struct fontdesc);
+    fontdesc->font = font;
+    fontdesc->family = family;
+    fontdesc->halfpoints = halfpoints;
+    fontdesc->rendition = rendition;
+    fontdesc->r = r;
+    fontdesc->g = g;
+    fontdesc->b = b;
+
+    index = count234(h->fontdescs);
+    addpos234(h->fontdescs, fontdesc, index);
+    return index;
+}
+
+static void whlp_make_fontsection(WHLP h, struct file *f)
+{
+    int i;
+    char *fontname;
+    struct fontdesc *fontdesc;
+
+    /*
+     * Header block: number of font names, number of font
+     * descriptors, offset to font names, and offset to font
+     * descriptors.
+     */
+    whlp_file_add_short(f, count234(h->fontnames));
+    whlp_file_add_short(f, count234(h->fontdescs));
+    whlp_file_add_short(f, 8);
+    whlp_file_add_short(f, 8 + 32 * count234(h->fontnames));
+    
+    /*
+     * Font names.
+     */
+    for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) {
+	char data[32];
+	memset(data, i, sizeof(data));
+	strncpy(data, fontname, sizeof(data));
+	whlp_file_add(f, data, sizeof(data));
+    }
+    
+    /*
+     * Font descriptors.
+     */
+    for (i = 0; (fontdesc = index234(h->fontdescs, i)) != NULL; i++) {
+	int fontpos;
+	void *ret;
+
+	ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos);
+	assert(ret != NULL);
+
+	whlp_file_add_char(f, fontdesc->rendition);
+	whlp_file_add_char(f, fontdesc->halfpoints);
+	whlp_file_add_char(f, fontdesc->family);
+	whlp_file_add_short(f, fontpos);
+	/* Foreground RGB */
+	whlp_file_add_char(f, fontdesc->r);
+	whlp_file_add_char(f, fontdesc->g);
+	whlp_file_add_char(f, fontdesc->b);
+	/* Background RGB is apparently unused and always set to zero */
+	whlp_file_add_char(f, 0);
+	whlp_file_add_char(f, 0);
+	whlp_file_add_char(f, 0);
+    }
+
+}
+
+/* ----------------------------------------------------------------------
+ * Routines to manage a B-tree type file.
+ */
+
+static void whlp_make_btree(struct file *f, int flags, int pagesize,
+			    char *dataformat, tree234 *tree,
+                            struct file *map,
+			    bt_index_fn indexfn, bt_leaf_fn leaffn)
+{
+    void **page_elements = NULL;
+    int npages = 0, pagessize = 0;
+    int npages_this_level, nentries, nlevels;
+    int total_leaf_entries;
+    char btdata[MAX_PAGE_SIZE];
+    int btlen;
+    int page_start, fixups_offset, unused_bytes;
+    void *element;
+    int index;
+
+    assert(pagesize <= MAX_PAGE_SIZE);
+
+    /*
+     * Start with the B-tree header. We'll have to come back and
+     * fill in a few bits later.
+     */
+    whlp_file_add_short(f, 0x293B);    /* magic number */
+    whlp_file_add_short(f, flags);
+    whlp_file_add_short(f, pagesize);
+    {
+	char data[16];
+	memset(data, 0, sizeof(data));
+	assert(strlen(dataformat) <= sizeof(data));
+	memcpy(data, dataformat, strlen(dataformat));
+	whlp_file_add(f, data, sizeof(data));
+    }
+    whlp_file_add_short(f, 0);	       /* must-be-zero */
+    fixups_offset = whlp_file_offset(f);
+    whlp_file_add_short(f, 0);	       /* page splits; fix up later */
+    whlp_file_add_short(f, 0);	       /* root page index; fix up later */
+    whlp_file_add_short(f, -1);	       /* must-be-minus-one */
+    whlp_file_add_short(f, 0);	       /* total number of pages; fix later */
+    whlp_file_add_short(f, 0);	       /* number of levels; fix later */
+    whlp_file_add_long(f, count234(tree));/* total B-tree entries */
+
+    /*
+     * If we have a map section, leave space at the start for its
+     * element count.
+     */
+    if (map) {
+        whlp_file_add_short(map, 0);
+    }
+
+    /* 
+     * Now create the leaf pages.
+     */
+    index = 0;
+
+    npages_this_level = 0;
+    total_leaf_entries = 0;
+
+    element = index234(tree, index);
+    while (element) {
+	/*
+	 * Make a new leaf page.
+	 */
+	npages_this_level++;
+	if (npages >= pagessize) {
+	    pagessize = npages + 32;
+	    page_elements = resize(page_elements, pagessize);
+	}
+	page_elements[npages++] = element;
+
+	/*
+	 * Leave space in the leaf page for the header. We'll
+	 * come back and add it later.
+	 */
+	page_start = whlp_file_offset(f);
+	whlp_file_add(f, "12345678", 8);
+	unused_bytes = pagesize - 8;
+	nentries = 0;
+
+	/*
+	 * Now add leaf entries until we run out of room, or out of
+	 * elements.
+	 */
+	while (element) {
+	    btlen = leaffn(element, btdata);
+	    if (btlen > unused_bytes)
+		break;
+	    whlp_file_add(f, btdata, btlen);
+	    unused_bytes -= btlen;
+	    nentries++;
+	    index++;
+	    element = index234(tree, index);
+	}
+
+	/*
+	 * Now add the unused bytes, and then go back and put
+	 * in the header.
+	 */
+	whlp_file_fill(f, unused_bytes);
+	whlp_file_seek(f, page_start, 0);
+	whlp_file_add_short(f, unused_bytes);
+	whlp_file_add_short(f, nentries);
+	/* Previous-page indicator will automatically go to -1 when
+	 * absent. */
+	whlp_file_add_short(f, npages-2);
+	/* Next-page indicator must be -1 if we're at the end. */
+	if (!element)
+	    whlp_file_add_short(f, -1);
+	else
+	    whlp_file_add_short(f, npages);
+	whlp_file_seek(f, 0, 2);
+
+        /*
+         * If we have a map section, add a map entry.
+         */
+        if (map) {
+            whlp_file_add_long(map, total_leaf_entries);
+            whlp_file_add_short(map, npages_this_level-1);
+        }
+        total_leaf_entries += nentries;
+    }
+
+    /*
+     * If we have a map section, write the total number of map
+     * entries into it.
+     */
+    if (map) {
+        whlp_file_seek(map, 0, 0);
+        whlp_file_add_short(map, npages_this_level);
+        whlp_file_seek(map, 0, 2);
+    }
+
+    /*
+     * Now create further levels until we're down to one page.
+     */
+    nlevels = 1;
+    while (npages_this_level > 1) {
+	int first = npages - npages_this_level;
+	int last = npages - 1;
+	int current;
+
+	nlevels++;
+	npages_this_level = 0;
+
+	current = first;
+	while (current <= last) {
+	    /*
+	     * Make a new index page.
+	     */
+	    npages_this_level++;
+	    if (npages >= pagessize) {
+		pagessize = npages + 32;
+		page_elements = resize(page_elements, pagessize);
+	    }
+	    page_elements[npages++] = page_elements[current];
+
+	    /*
+	     * Leave space for some of the header, but we can put
+	     * in the PreviousPage link already.
+	     */
+	    page_start = whlp_file_offset(f);
+	    whlp_file_add(f, "1234", 4);
+	    whlp_file_add_short(f, current);
+	    unused_bytes = pagesize - 6;
+
+	    /*
+	     * Now add index entries until we run out of either
+	     * space or pages.
+	     */
+	    current++;
+	    nentries = 0;
+	    while (current <= last) {
+		btlen = indexfn(page_elements[current], btdata);
+		if (btlen + 2 > unused_bytes)
+		    break;
+		whlp_file_add(f, btdata, btlen);
+		whlp_file_add_short(f, current);
+		unused_bytes -= btlen+2;
+		nentries++;
+		current++;
+	    }
+
+	    /*
+	     * Now add the unused bytes, and then go back and put
+	     * in the header.
+	     */
+	    whlp_file_fill(f, unused_bytes);
+	    whlp_file_seek(f, page_start, 0);
+	    whlp_file_add_short(f, unused_bytes);
+	    whlp_file_add_short(f, nentries);
+	    whlp_file_seek(f, 0, 2);
+	}
+    }
+
+    /*
+     * Now we have all our pages ready, and we know where our root
+     * page is. Fix up the main B-tree header.
+     */
+    whlp_file_seek(f, fixups_offset, 0);
+    /* Creation of every page requires a split unless it's the first in
+     * a new level. Hence, page splits equals pages minus levels. */
+    whlp_file_add_short(f, npages - nlevels);
+    whlp_file_add_short(f, npages-1);  /* root page index */
+    whlp_file_add_short(f, -1);	       /* must-be-minus-one */
+    whlp_file_add_short(f, npages);    /* total number of pages */
+    whlp_file_add_short(f, nlevels);   /* number of levels */
+
+    /* Just for tidiness, seek to the end of the file :-) */
+    whlp_file_seek(f, 0, 2);
+
+    /* Clean up. */
+    sfree(page_elements);
+}
+			    
+
+/* ----------------------------------------------------------------------
+ * Routines to manage the `internal file' structure.
+ */
+
+static struct file *whlp_new_file(WHLP h, char *name)
+{
+    struct file *f;
+    f = mknew(struct file);
+    f->data = NULL;
+    f->pos = f->len = f->size = 0;
+    if (name) {
+	f->name = dupstr(name);
+	add234(h->files, f);
+    } else {
+	f->name = NULL;
+    }
+    return f;
+}
+
+static void whlp_free_file(struct file *f)
+{
+    sfree(f->data);
+    sfree(f->name);		       /* may be NULL */
+    sfree(f);
+}
+
+static void whlp_file_add(struct file *f, const void *data, int len)
+{
+    if (f->pos + len > f->size) {
+	f->size = f->pos + len + 1024;
+	f->data = resize(f->data, f->size);
+    }
+    memcpy(f->data + f->pos, data, len);
+    f->pos += len;
+    if (f->len < f->pos)
+	f->len = f->pos;
+}
+
+static void whlp_file_add_char(struct file *f, int data)
+{
+    unsigned char s;
+    s = data & 0xFF;
+    whlp_file_add(f, &s, 1);
+}
+
+static void whlp_file_add_short(struct file *f, int data)
+{
+    unsigned char s[2];
+    PUT_16BIT_LSB_FIRST(s, data);
+    whlp_file_add(f, s, 2);
+}
+
+static void whlp_file_add_long(struct file *f, int data)
+{
+    unsigned char s[4];
+    PUT_32BIT_LSB_FIRST(s, data);
+    whlp_file_add(f, s, 4);
+}
+
+static void whlp_file_fill(struct file *f, int len)
+{
+    if (f->pos + len > f->size) {
+	f->size = f->pos + len + 1024;
+	f->data = resize(f->data, f->size);
+    }
+    memset(f->data + f->pos, 0, len);
+    f->pos += len;
+    if (f->len < f->pos)
+	f->len = f->pos;
+}
+
+static void whlp_file_seek(struct file *f, int pos, int whence)
+{
+    f->pos = (whence == 0 ? 0 : whence == 1 ? f->pos : f->len) + pos;
+}
+
+static int whlp_file_offset(struct file *f)
+{
+    return f->pos;
+}
+
+/* ----------------------------------------------------------------------
+ * Open and close routines; final wrapper around everything.
+ */
+
+WHLP whlp_new(void)
+{
+    WHLP ret;
+    struct file *f;
+
+    ret = mknew(struct WHLP_tag);
+
+    /*
+     * Internal B-trees.
+     */
+    ret->files = newtree234(filecmp);
+    ret->pre_contexts = newtree234(NULL);
+    ret->contexts = newtree234(ctxcmp);
+    ret->titles = newtree234(ttlcmp);
+    ret->text = newtree234(NULL);
+    ret->index = newtree234(idxcmp);
+    ret->tabstops = newtree234(tabcmp);
+    ret->fontnames = newtree234(fontcmp);
+    ret->fontdescs = newtree234(NULL);
+
+    /*
+     * Some standard files.
+     */
+    f = whlp_new_file(ret, "|CTXOMAP");
+    whlp_file_add_short(f, 0);	       /* dummy section */
+    f = whlp_new_file(ret, "|SYSTEM");
+    whlp_standard_systemsection(f);
+    ret->systemfile = f;
+
+    /*
+     * Other variables.
+     */
+    ret->prevtopic = NULL;
+    ret->ncontexts = 0;
+    ret->link = NULL;
+
+    return ret;
+}
+
+void whlp_close(WHLP h, char *filename)
+{
+    FILE *fp;
+    int filecount, offset, index, filelen;
+    struct file *file, *map, *md;
+    context *ctx;
+    int has_index;
+
+    /*
+     * Lay out the topic section.
+     */
+    whlp_topic_layout(h);
+
+    /*
+     * Finish off the system section.
+     */
+    whlp_do_primary_topic(h);
+
+    /*
+     * Assemble the font section.
+     */
+    file = whlp_new_file(h, "|FONT");
+    whlp_make_fontsection(h, file);
+
+    /*
+     * Set up the index.
+     */
+    has_index = (count234(h->index) != 0);
+    if (has_index)
+        whlp_build_kwdata(h);
+
+    /*
+     * Set up the `titles' B-tree for the |TTLBTREE section.
+     */
+    for (index = 0; (ctx = index234(h->contexts, index)) != NULL; index++)
+	add234(h->titles, ctx);
+
+    /*
+     * Construct the various B-trees.
+     */
+    file = whlp_new_file(h, "|CONTEXT");
+    whlp_make_btree(file, 0x0002, 0x0800, "L4",
+                    h->contexts, NULL, ctxindex, ctxleaf);
+
+    file = whlp_new_file(h, "|TTLBTREE");
+    whlp_make_btree(file, 0x0002, 0x0800, "Lz",
+		    h->titles, NULL, ttlindex, ttlleaf);
+
+    if (has_index) {
+        file = whlp_new_file(h, "|KWBTREE");
+        map = whlp_new_file(h, "|KWMAP");
+        whlp_make_btree(file, 0x0002, 0x0800, "F24",
+                        h->index, map, idxindex, idxleaf);
+    }
+
+    /*
+     * Open the output file.
+     */
+    fp = fopen(filename, "wb");
+    if (!fp) {
+	whlp_abandon(h);
+	return;
+    }
+
+    /*
+     * Work out all the file offsets.
+     */
+    filecount = count234(h->files);
+    offset = 16;		       /* just after header */
+    for (index = 0; index < filecount; index++) {
+	file = index234(h->files, index);
+	file->fileoffset = offset;
+	offset += 9 + file->len;       /* 9 is size of file header */
+    }
+    /* Now `offset' holds what will be the offset of the master directory. */
+
+    md = whlp_new_file(h, NULL);       /* master directory file */
+    whlp_make_btree(md, 0x0402, 0x0400, "z4",
+                    h->files, NULL, fileindex, fileleaf);
+
+    filelen = offset + 9 + md->len;
+
+    /*
+     * Write out the file header.
+     */
+    {
+	unsigned char header[16];
+	PUT_32BIT_LSB_FIRST(header+0, 0x00035F3FL);  /* magic */
+	PUT_32BIT_LSB_FIRST(header+4, offset);       /* offset to directory */
+	PUT_32BIT_LSB_FIRST(header+8, 0xFFFFFFFFL);  /* first free block */
+	PUT_32BIT_LSB_FIRST(header+12, filelen);     /* total file length */
+	fwrite(header, 1, 16, fp);
+    }
+
+    /*
+     * Now write out each file.
+     */
+    for (index = 0; index <= filecount; index++) {
+	int used, reserved;
+	unsigned char header[9];
+
+	if (index == filecount)
+	    file = md;		       /* master directory comes last */
+	else
+	    file = index234(h->files, index);
+
+	used = file->len;
+	reserved = used + 9;
+
+	/* File header. */
+	PUT_32BIT_LSB_FIRST(header+0, reserved);
+	PUT_32BIT_LSB_FIRST(header+4, used);
+	header[8] = 0;		       /* flags */
+	fwrite(header, 1, 9, fp);
+
+	/* File data. */
+	fwrite(file->data, 1, file->len, fp);
+    }
+
+    fclose(fp);
+
+    whlp_free_file(md);
+
+    whlp_abandon(h);		       /* now free everything */
+}
+
+void whlp_abandon(WHLP h)
+{
+    struct file *f;
+    struct indexrec *idx;
+    struct topiclink *link;
+    struct fontdesc *fontdesc;
+    char *fontname;
+    context *ctx;
+
+    /* Get rid of any lingering tab stops. */
+    whlp_para_reset(h);
+
+    /* Delete the (now empty) tabstops tree. */
+    freetree234(h->tabstops);
+
+    /* Delete the index tree and all its entries. */
+    while ( (idx = index234(h->index, 0)) != NULL) {
+	delpos234(h->index, 0);
+	sfree(idx->term);
+	sfree(idx);
+    }
+    freetree234(h->index);
+
+    /* Delete the text tree and all its topiclinks. */
+    while ( (link = index234(h->text, 0)) != NULL) {
+	delpos234(h->text, 0);
+	sfree(link->data1);	       /* may be NULL */
+	sfree(link->data2);	       /* may be NULL */
+	sfree(link);
+    }
+    freetree234(h->text);
+
+    /* Delete the fontdescs tree and all its entries. */
+    while ( (fontdesc = index234(h->fontdescs, 0)) != NULL) {
+	delpos234(h->fontdescs, 0);
+	sfree(fontdesc);
+    }
+    freetree234(h->fontdescs);
+
+    /* Delete the fontnames tree and all its entries. */
+    while ( (fontname = index234(h->fontnames, 0)) != NULL) {
+	delpos234(h->fontnames, 0);
+	sfree(fontname);
+    }
+    freetree234(h->fontnames);
+
+    /* There might be an unclosed paragraph in h->link. */
+    if (h->link)
+	sfree(h->link);		       /* if so it won't have data1 or data2 */
+
+    /*
+     * `titles' contains copies of the `contexts' entries, so we
+     * don't need to free them here.
+     */
+    freetree234(h->titles);
+
+    /*
+     * `contexts' and `pre_contexts' _both_ contain contexts that
+     * need freeing. (pre_contexts shouldn't contain any, unless
+     * the help generation was abandoned half-way through.)
+     */
+    while ( (ctx = index234(h->pre_contexts, 0)) != NULL) {
+	delpos234(h->index, 0);
+	sfree(ctx->name);
+	sfree(ctx->title);
+	sfree(ctx);
+    }
+    freetree234(h->pre_contexts);
+    while ( (ctx = index234(h->contexts, 0)) != NULL) {
+	delpos234(h->contexts, 0);
+	sfree(ctx->name);
+	sfree(ctx->title);
+	sfree(ctx);
+    }
+    freetree234(h->contexts);
+
+    /*
+     * Free all the internal files.
+     */
+    while ( (f = index234(h->files, 0)) != NULL ) {
+	delpos234(h->files, 0);
+	whlp_free_file(f);
+    }
+    freetree234(h->files);
+
+    sfree(h);
+}
+
+#ifdef TESTMODE
+
+int main(void)
+{
+    WHLP h;
+    WHLP_TOPIC t1, t2, t3;
+    char *e;
+    char mymacro[100];
+
+    h = whlp_new();
+
+    whlp_title(h, "Test Help File");
+    whlp_copyright(h, "This manual is copyright \251 2001 Simon Tatham."
+		   " All rights reversed.");
+    whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")");
+    whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")");
+    whlp_start_macro(h, "BrowseButtons()");
+
+    whlp_create_font(h, "Arial", WHLP_FONTFAM_SANS, 30,
+		     0, 0, 0, 0);
+    whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+		     WHLP_FONT_STRIKEOUT, 0, 0, 0);
+    whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+		     WHLP_FONT_ITALIC, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+		     0, 0, 0, 0);
+
+    t1 = whlp_register_topic(h, "foobar", &e);
+    assert(t1 != NULL);
+    t2 = whlp_register_topic(h, "M359HPEHGW", &e);
+    assert(t2 != NULL);
+    t3 = whlp_register_topic(h, "Y5VQEXZQVJ", &e);
+    assert(t3 == NULL && !strcmp(e, "M359HPEHGW"));
+    t3 = whlp_register_topic(h, NULL, NULL);
+    assert(t3 != NULL);
+
+    whlp_primary_topic(h, t2);
+
+    whlp_prepare(h);
+
+    whlp_begin_topic(h, t1, "First Topic", "DB(\"btn_up\")", NULL);
+
+    whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+    whlp_set_font(h, 0);
+    whlp_text(h, "Foobar");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "This is a silly paragraph with ");
+    whlp_set_font(h, 3);
+    whlp_text(h, "code");
+    whlp_set_font(h, 1);
+    whlp_text(h, " in it.");
+    whlp_end_para(h);
+
+    whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "This second, equally silly, paragraph has ");
+    whlp_set_font(h, 2);
+    whlp_text(h, "emphasis");
+    whlp_set_font(h, 1);
+    whlp_text(h, " just to prove we can do it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+	      " to make some wrapping happen, and also to make the topicblock"
+	      " go across its boundaries. This is going to take a fair amount"
+	      " of text, so I'll just have to cheat and c'n'p a lot of it.");
+    whlp_end_para(h);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Have a ");
+    whlp_start_hyperlink(h, t2);
+    whlp_text(h, "hyperlink");
+    whlp_end_hyperlink(h);
+    whlp_text(h, " to another topic.");
+    whlp_end_para(h);
+
+    sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+	    whlp_topic_id(t3));
+
+    whlp_begin_topic(h, t2, "Second Topic", mymacro, NULL);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "This topic contains no non-scrolling region. I would"
+	      " illustrate this with a ludicrously long paragraph, but that"
+	      " would get very tedious very quickly. Instead I'll just waffle"
+	      " on pointlessly for a little bit and then shut up.");
+    whlp_end_para(h);
+
+    whlp_set_tabstop(h, 36, WHLP_ALIGN_LEFT);
+    whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 36);
+    whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
+    whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "\225");              /* bullet */
+    whlp_tab(h);
+    whlp_text(h, "This is a paragraph with a bullet. With any luck it should"
+              " work exactly like it used to in the old NASM help file.");
+    whlp_end_para(h);
+
+    whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT);
+    whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE);
+    whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT);
+    whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Ooh:"); whlp_tab(h);
+    whlp_text(h, "Right?"); whlp_tab(h);
+    whlp_text(h, "Centre?"); whlp_tab(h);
+    whlp_text(h, "Left?");
+    whlp_end_para(h);
+
+    whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT);
+    whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE);
+    whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT);
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "Aah:"); whlp_tab(h);
+    whlp_text(h, "R?"); whlp_tab(h);
+    whlp_text(h, "C?"); whlp_tab(h);
+    whlp_text(h, "L?");
+    whlp_end_para(h);
+
+    sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+	    whlp_topic_id(t1));
+
+    whlp_begin_topic(h, t3, "Third Topic", mymacro, NULL);
+
+    whlp_begin_para(h, WHLP_PARA_SCROLL);
+    whlp_set_font(h, 1);
+    whlp_text(h, "This third topic is almost as boring as the first. Woo!");
+    whlp_end_para(h);
+
+    /*
+     * Browse sequence.
+     */
+    whlp_browse_link(h, t1, t2);
+    whlp_browse_link(h, t2, t3);
+
+    /*
+     * Index terms.
+     */
+    whlp_index_term(h, "foobarbaz", t1);
+    whlp_index_term(h, "foobarbaz", t2);
+    whlp_index_term(h, "foobarbaz", t3);
+    whlp_index_term(h, "foobar", t1);
+    whlp_index_term(h, "foobar", t2);
+    whlp_index_term(h, "foobaz", t1);
+    whlp_index_term(h, "foobaz", t3);
+    whlp_index_term(h, "barbaz", t2);
+    whlp_index_term(h, "barbaz", t3);
+    whlp_index_term(h, "foo", t1);
+    whlp_index_term(h, "bar", t2);
+    whlp_index_term(h, "baz", t3);
+
+    whlp_close(h, "test.hlp");
+    return 0;
+}
+
+#endif
diff --git a/winhelp.h b/winhelp.h
new file mode 100644
index 0000000..d0c81d5
--- /dev/null
+++ b/winhelp.h
@@ -0,0 +1,168 @@
+/*
+ * winhelp.h   header file for winhelp.c
+ */
+
+typedef struct WHLP_tag *WHLP;
+
+typedef struct WHLP_TOPIC_tag *WHLP_TOPIC;
+
+/*
+ * Initialise a new WHlp context and begin accumulating data in it.
+ */
+WHLP whlp_new(void);
+
+/*
+ * Close a WHlp context and write out the help file it has created.
+ */
+void whlp_close(WHLP h, char *filename);
+
+/*
+ * Abandon and free a WHlp context without writing out anything.
+ */
+void whlp_abandon(WHLP h);
+
+/*
+ * Specify the title and copyright notice of a help file. Also
+ * specify Help macros to be run on loading.
+ */
+void whlp_title(WHLP h, char *title);
+void whlp_copyright(WHLP h, char *copyright);
+void whlp_start_macro(WHLP h, char *macro);
+
+/*
+ * Register a help topic. Irritatingly, due to weird phase-order
+ * issues with the whole file format, you have to register all your
+ * topics _before_ actually outputting your text. This seems likely
+ * to require two passes over the source document.
+ * 
+ * If you want to specify a particular context string (for
+ * reference from other programs, to provide context-sensitive
+ * help), you can supply it here. Otherwise, just pass NULL and a
+ * nondescript one will be allocated automatically.
+ *
+ * If you specify two context strings which clash under the Windows
+ * help file hash algorithm, this function will return NULL and
+ * provide a pointer to the other context string that this one
+ * clashed with, and you must tell your user to fix the clash.
+ * Sadly this is the only way to do it; despite HLP files having a
+ * perfectly good method of mapping arbitrary strings to things,
+ * they didn't see fit to use that method for help contexts, so
+ * instead they hash the context names and expect the hashes to be
+ * unique. Sigh.
+ * 
+ * On success (i.e. in any circumstance other than a hash clash), a
+ * valid WHLP_TOPIC is returned for later use.
+ */
+WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash);
+
+/*
+ * Link two topics together in a browse sequence. Automatically
+ * takes care of the forward and reverse links.
+ */
+void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after);
+
+/*
+ * After calling whlp_register_topic for all topics, you should
+ * call this, which will sort out all loose ends and allocate
+ * context names for all anonymous topics. Then you can start
+ * writing actual text.
+ */
+void whlp_prepare(WHLP h);
+
+/*
+ * Create a link from an index term to a topic.
+ */
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic);
+
+/*
+ * Call this if you need the id of a topic and you don't already
+ * know it (for example, if whlp_prepare has allocated it
+ * anonymously for you). You might need this, for example, in
+ * creating macros for button-bar bindings.
+ * 
+ * The string returned will be freed when the WHLP context is
+ * closed. You should not free it yourself.
+ * 
+ * Do not call this before calling whlp_prepare().
+ */
+char *whlp_topic_id(WHLP_TOPIC topic);
+
+/*
+ * Call this to specify which help topic will be the first one
+ * displayed when the help file is loaded.
+ */
+void whlp_primary_topic(WHLP h, WHLP_TOPIC topic);
+
+/*
+ * Call this when about to begin writing out the text for a topic.
+ * 
+ * Any additional arguments are Help macros, terminated with a
+ * NULL. So the minimum call sequence is
+ * 
+ *   whlp_begin_topic(helpfile, mytopic, "Title", NULL);
+ */
+void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...);
+
+/*
+ * Call this to set up a font descriptor. You supply the font name,
+ * the font size (in half-points), the graphic rendition flags
+ * (bold, italic etc), and the general font family (for Windows to
+ * select a fallback font if yours is unavailable). You can also
+ * specify a foreground colour for the text (but unfortunately not
+ * a background).
+ * 
+ * Font descriptors are identified in whlp_set_font() by small
+ * integers, which are allocated from 0 upwards in the order you
+ * call whlp_create_font(). For your convenience,
+ * whlp_create_font() returns the integer allocated to each font
+ * descriptor you create, but you could work this out just as
+ * easily yourself by counting.
+ */
+enum {
+    WHLP_FONT_BOLD = 1,
+    WHLP_FONT_ITALIC = 2,
+    WHLP_FONT_UNDERLINE = 4,
+    WHLP_FONT_STRIKEOUT = 8,
+    WHLP_FONT_DOUBLEUND = 16,
+    WHLP_FONT_SMALLCAPS = 32
+};
+enum {
+    WHLP_FONTFAM_FIXED = 1,
+    WHLP_FONTFAM_SERIF = 2,
+    WHLP_FONTFAM_SANS = 3,
+    WHLP_FONTFAM_SCRIPT = 4,
+    WHLP_FONTFAM_DECOR = 5
+};
+int whlp_create_font(WHLP h, char *font, int family, int halfpoints,
+		     int rendition, int r, int g, int b);
+
+/*
+ * Routines to output paragraphs and actual text (at last).
+ * 
+ * You should start by calling whlp_para_attr() to set any
+ * paragraph attributes that differ from the standard settings.
+ * Next call whlp_begin_para() to start the paragraph. Then call
+ * the various in-paragraph functions until you have output the
+ * whole paragraph, and finally call whlp_end_para() to finish it
+ * off.
+ */
+enum {
+    WHLP_PARA_SPACEABOVE=1, WHLP_PARA_SPACEBELOW, WHLP_PARA_SPACELINES,
+    WHLP_PARA_LEFTINDENT, WHLP_PARA_RIGHTINDENT, WHLP_PARA_FIRSTLINEINDENT,
+    WHLP_PARA_ALIGNMENT
+};
+enum {
+    WHLP_ALIGN_LEFT, WHLP_ALIGN_RIGHT, WHLP_ALIGN_CENTRE
+};
+enum {
+    WHLP_PARA_SCROLL, WHLP_PARA_NONSCROLL
+};
+void whlp_para_attr(WHLP h, int attr_id, int attr_param);
+void whlp_set_tabstop(WHLP h, int tabstop, int alignment);
+void whlp_begin_para(WHLP h, int para_type);
+void whlp_end_para(WHLP h);
+void whlp_set_font(WHLP h, int font_id);
+void whlp_text(WHLP h, char *text);
+void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target);
+void whlp_end_hyperlink(WHLP h);
+void whlp_tab(WHLP h);
-- 
2.11.0