--- /dev/null
+Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation files
+(the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software,
+and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null
+Module: halibut
+Author: Simon Tatham <anakin@pobox.com>
+Description: Halibut is yet another text formatting system, intended primarily for writing software documentation. It accepts a single source format and outputs a variety of formats, planned to include text, HTML, Texinfo, Windows Help, Windows HTMLHelp, PostScript and PDF. It has comprehensive indexing and cross-referencing support, and generates hyperlinks within output documents wherever possible.
+Homepage: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html
+Release: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html
--- /dev/null
+# Halibut master makefile
+
+# Requires a compiler with -MD support, currently
+
+# `make' from top level will build in directory `build'
+# `make BUILDDIR=foo' from top level will build in directory foo
+ifndef REALBUILD
+ifndef BUILDDIR
+ifdef TEST
+BUILDDIR := test
+else
+BUILDDIR := build
+endif
+endif
+all:
+ @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+ @make -C $(BUILDDIR) -f ../Makefile REALBUILD=yes
+spotless:
+ @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+ @make -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes
+clean:
+ @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+ @make -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes
+else
+
+# The `real' makefile part.
+
+CFLAGS += -Wall -W
+
+ifdef TEST
+CFLAGS += -DLOGALLOC
+LIBS += -lefence
+endif
+
+ifdef RELEASE
+ifndef VERSION
+VERSION := $(RELEASE)
+endif
+else
+CFLAGS += -g
+endif
+
+ifndef VER
+ifdef VERSION
+VER := $(VERSION)
+endif
+endif
+ifdef VER
+VDEF := -DVERSION=\"$(VER)\"
+endif
+
+SRC := ../
+
+MODULES := main malloc ustring error help licence version misc tree234
+MODULES += input keywords contents index style biblio
+MODULES += bk_text bk_xhtml bk_whlp
+MODULES += winhelp
+
+OBJECTS := $(addsuffix .o,$(MODULES))
+DEPS := $(addsuffix .d,$(MODULES))
+
+halibut: $(OBJECTS)
+ $(CC) $(LFLAGS) -o halibut $(OBJECTS) $(LIBS)
+
+%.o: $(SRC)%.c
+ $(CC) $(CFLAGS) -MD -c $<
+
+version.o: FORCE
+ $(CC) $(VDEF) -MD -c $(SRC)version.c
+
+spotless:: clean
+ rm -f *.d
+
+clean::
+ rm -f *.o halibut core
+
+FORCE: # phony target to force version.o to be rebuilt every time
+
+-include $(DEPS)
+
+endif
--- /dev/null
+/*
+ * biblio.c: process the bibliography
+ */
+
+#include <assert.h>
+#include "halibut.h"
+
+static wchar_t *gentext(int num) {
+ wchar_t text[22];
+ wchar_t *p = text + sizeof(text);
+ *--p = L'\0';
+ *--p = L']';
+ while (num != 0) {
+ assert(p > text);
+ *--p = L"0123456789"[num % 10];
+ num /= 10;
+ }
+ assert(p > text);
+ *--p = L'[';
+ return ustrdup(p);
+}
+
+static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) {
+ keyword *kw = kw_lookup(kl, key);
+ if (!kw)
+ error(err_nosuchkw, &fpos, key);
+ else {
+ /*
+ * We've found a \k reference. If it's a
+ * bibliography entry ...
+ */
+ if (kw->para->type == para_Biblio) {
+ /*
+ * ... then mark the paragraph as cited.
+ */
+ kw->para->type = para_BiblioCited;
+ }
+ }
+}
+
+/*
+ * Make a pass through the source form, generating citation formats
+ * for bibliography entries and also marking which bibliography
+ * entries are actually cited (or \nocite-ed).
+ */
+
+void gen_citations(paragraph *source, keywordlist *kl) {
+ paragraph *para;
+ int bibnum = 0;
+
+ for (para = source; para; para = para->next) {
+ word *ptr;
+
+ /*
+ * \BR and \nocite paragraphs get special processing here.
+ */
+ if (para->type == para_BR) {
+ keyword *kw = kw_lookup(kl, para->keyword);
+ if (!kw) {
+ error(err_nosuchkw, ¶->fpos, para->keyword);
+ } else if (kw->text) {
+ error(err_multiBR, ¶->fpos, para->keyword);
+ } else {
+ kw->text = dup_word_list(para->words);
+ }
+ } else if (para->type == para_NoCite) {
+ wchar_t *wp = para->keyword;
+ while (*wp) {
+ cite_biblio(kl, wp, para->fpos);
+ wp = uadv(wp);
+ }
+ }
+
+ /*
+ * Scan for keyword references.
+ */
+ for (ptr = para->words; ptr; ptr = ptr->next) {
+ if (ptr->type == word_UpperXref ||
+ ptr->type == word_LowerXref)
+ cite_biblio(kl, ptr->text, ptr->fpos);
+ }
+ }
+
+ /*
+ * We're now almost done; all that remains is to scan through
+ * the cited bibliography entries and invent default citation
+ * texts for the ones that don't already have explicitly
+ * provided \BR text.
+ */
+ for (para = source; para; para = para->next) {
+ if (para->type == para_BiblioCited) {
+ keyword *kw = kw_lookup(kl, para->keyword);
+ assert(kw != NULL);
+ if (!kw->text) {
+ word *wd = smalloc(sizeof(word));
+ wd->text = gentext(++bibnum);
+ wd->type = word_Normal;
+ wd->alt = NULL;
+ wd->next = NULL;
+ kw->text = wd;
+ }
+ para->kwtext = kw->text;
+ }
+ }
+}
--- /dev/null
+/*
+ * text backend for Halibut
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
+typedef struct {
+ alignment align;
+ int just_numbers;
+ wchar_t underline;
+} alignstruct;
+
+typedef struct {
+ int indent, indent_code;
+ int listindentbefore, listindentafter;
+ int width;
+ alignstruct atitle, achapter, *asect;
+ int nasect;
+ int include_version_id;
+ int indent_preambles;
+ word bullet;
+} textconfig;
+
+static int text_convert(wchar_t *, char **);
+
+static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int);
+static void text_rule(FILE *, int, int);
+static void text_para(FILE *, word *, char *, word *, int, int, int);
+static void text_codepara(FILE *, word *, int, int);
+static void text_versionid(FILE *, word *);
+
+static alignment utoalign(wchar_t *p) {
+ if (!ustricmp(p, L"centre") || !ustricmp(p, L"center"))
+ return CENTRE;
+ if (!ustricmp(p, L"leftplus"))
+ return LEFTPLUS;
+ return LEFT;
+}
+
+static textconfig text_configure(paragraph *source) {
+ textconfig ret;
+
+ /*
+ * Non-negotiables.
+ */
+ ret.bullet.next = NULL;
+ ret.bullet.alt = NULL;
+ ret.bullet.type = word_Normal;
+ ret.atitle.just_numbers = FALSE; /* ignored */
+
+ /*
+ * Defaults.
+ */
+ ret.indent = 7;
+ ret.indent_code = 2;
+ ret.listindentbefore = 1;
+ ret.listindentafter = 3;
+ ret.width = 68;
+ ret.atitle.align = CENTRE;
+ ret.atitle.underline = L'=';
+ ret.achapter.align = LEFT;
+ ret.achapter.just_numbers = FALSE;
+ ret.achapter.underline = L'-';
+ ret.nasect = 1;
+ ret.asect = mknewa(alignstruct, ret.nasect);
+ ret.asect[0].align = LEFTPLUS;
+ ret.asect[0].just_numbers = TRUE;
+ ret.asect[0].underline = L'\0';
+ ret.include_version_id = TRUE;
+ ret.indent_preambles = FALSE;
+ ret.bullet.text = ustrdup(L"-");
+
+ for (; source; source = source->next) {
+ if (source->type == para_Config) {
+ if (!ustricmp(source->keyword, L"text-indent")) {
+ ret.indent = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-indent-code")) {
+ ret.indent_code = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-width")) {
+ ret.width = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-list-indent")) {
+ ret.listindentbefore = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
+ ret.listindentafter = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
+ ret.achapter.align = utoalign(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
+ ret.achapter.underline = *uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
+ ret.achapter.underline = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-section-align")) {
+ wchar_t *p = uadv(source->keyword);
+ int n = 0;
+ if (uisdigit(*p)) {
+ n = utoi(p);
+ p = uadv(p);
+ }
+ if (n >= ret.nasect) {
+ int i;
+ ret.asect = resize(ret.asect, n+1);
+ for (i = ret.nasect; i <= n; i++)
+ ret.asect[i] = ret.asect[ret.nasect-1];
+ ret.nasect = n+1;
+ }
+ ret.asect[n].align = utoalign(p);
+ } else if (!ustricmp(source->keyword, L"text-section-underline")) {
+ wchar_t *p = uadv(source->keyword);
+ int n = 0;
+ if (uisdigit(*p)) {
+ n = utoi(p);
+ p = uadv(p);
+ }
+ if (n >= ret.nasect) {
+ int i;
+ ret.asect = resize(ret.asect, n+1);
+ for (i = ret.nasect; i <= n; i++)
+ ret.asect[i] = ret.asect[ret.nasect-1];
+ ret.nasect = n+1;
+ }
+ ret.asect[n].underline = *p;
+ } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
+ wchar_t *p = uadv(source->keyword);
+ int n = 0;
+ if (uisdigit(*p)) {
+ n = utoi(p);
+ p = uadv(p);
+ }
+ if (n >= ret.nasect) {
+ int i;
+ ret.asect = resize(ret.asect, n+1);
+ for (i = ret.nasect; i <= n; i++)
+ ret.asect[i] = ret.asect[ret.nasect-1];
+ ret.nasect = n+1;
+ }
+ ret.asect[n].just_numbers = utob(p);
+ } else if (!ustricmp(source->keyword, L"text-title-align")) {
+ ret.atitle.align = utoalign(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-title-underline")) {
+ ret.atitle.underline = *uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"text-versionid")) {
+ ret.include_version_id = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
+ ret.indent_preambles = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"text-bullet")) {
+ ret.bullet.text = uadv(source->keyword);
+ }
+ }
+ }
+
+ return ret;
+}
+
+void text_backend(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx) {
+ paragraph *p;
+ textconfig conf;
+ word *prefix, *body, *wp;
+ word spaceword;
+ FILE *fp;
+ char *prefixextra;
+ int indentb, indenta;
+
+ IGNORE(keywords); /* we don't happen to need this */
+ IGNORE(idx); /* or this */
+
+ conf = text_configure(sourceform);
+
+ /*
+ * Determine the output file name, and open the output file
+ *
+ * FIXME: want configurable output file names here. For the
+ * moment, we'll just call it `output.txt'.
+ */
+ fp = fopen("output.txt", "w");
+ if (!fp) {
+ error(err_cantopenw, "output.txt");
+ return;
+ }
+
+ /* Do the title */
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_Title)
+ text_heading(fp, NULL, NULL, p->words,
+ conf.atitle, conf.indent, conf.width);
+
+ /* Do the preamble and copyright */
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_Preamble)
+ text_para(fp, NULL, NULL, p->words,
+ conf.indent_preambles ? conf.indent : 0, 0,
+ conf.width + (conf.indent_preambles ? 0 : conf.indent));
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_Copyright)
+ text_para(fp, NULL, NULL, p->words,
+ conf.indent_preambles ? conf.indent : 0, 0,
+ conf.width + (conf.indent_preambles ? 0 : conf.indent));
+
+ /* Do the main document */
+ for (p = sourceform; p; p = p->next) switch (p->type) {
+
+ /*
+ * Things we ignore because we've already processed them or
+ * aren't going to touch them in this pass.
+ */
+ case para_IM:
+ case para_BR:
+ case para_Biblio: /* only touch BiblioCited */
+ case para_VersionID:
+ case para_Copyright:
+ case para_Preamble:
+ case para_NoCite:
+ case para_Title:
+ break;
+
+ /*
+ * Chapter titles.
+ */
+ case para_Chapter:
+ case para_Appendix:
+ case para_UnnumberedChapter:
+ text_heading(fp, p->kwtext, p->kwtext2, p->words,
+ conf.achapter, conf.indent, conf.width);
+ break;
+
+ case para_Heading:
+ case para_Subsect:
+ text_heading(fp, p->kwtext, p->kwtext2, p->words,
+ conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
+ conf.indent, conf.width);
+ break;
+
+ case para_Rule:
+ text_rule(fp, conf.indent, conf.width);
+ break;
+
+ case para_Normal:
+ case para_BiblioCited:
+ case para_Bullet:
+ case para_NumberedList:
+ if (p->type == para_Bullet) {
+ prefix = &conf.bullet;
+ prefixextra = NULL;
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
+ } else if (p->type == para_NumberedList) {
+ prefix = p->kwtext;
+ prefixextra = "."; /* FIXME: configurability */
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
+ } else {
+ prefix = NULL;
+ prefixextra = NULL;
+ indentb = indenta = 0;
+ }
+ if (p->type == para_BiblioCited) {
+ body = dup_word_list(p->kwtext);
+ for (wp = body; wp->next; wp = wp->next);
+ wp->next = &spaceword;
+ spaceword.next = p->words;
+ spaceword.alt = NULL;
+ spaceword.type = word_WhiteSpace;
+ spaceword.text = NULL;
+ } else {
+ wp = NULL;
+ body = p->words;
+ }
+ text_para(fp, prefix, prefixextra, body,
+ conf.indent + indentb, indenta, conf.width);
+ if (wp) {
+ wp->next = NULL;
+ free_word_list(body);
+ }
+ break;
+
+ case para_Code:
+ text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code);
+ break;
+ }
+
+ /* Do the version ID */
+ if (conf.include_version_id) {
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_VersionID)
+ text_versionid(fp, p->words);
+ }
+
+ /*
+ * Tidy up
+ */
+ fclose(fp);
+ sfree(conf.bullet.text);
+}
+
+/*
+ * Convert a wide string into a string of chars. If `result' is
+ * non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible for the output character
+ * set.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int text_convert(wchar_t *s, char **result) {
+ /*
+ * FIXME. Currently this is ISO8859-1 only.
+ */
+ int doing = (result != 0);
+ int ok = TRUE;
+ char *p = NULL;
+ int plen = 0, psize = 0;
+
+ for (; *s; s++) {
+ wchar_t c = *s;
+ char outc;
+
+ if ((c >= 32 && c <= 126) ||
+ (c >= 160 && c <= 255)) {
+ /* Char is OK. */
+ outc = (char)c;
+ } else {
+ /* Char is not OK. */
+ ok = FALSE;
+ outc = 0xBF; /* approximate the good old DEC `uh?' */
+ }
+ if (doing) {
+ if (plen >= psize) {
+ psize = plen + 256;
+ p = resize(p, psize);
+ }
+ p[plen++] = outc;
+ }
+ }
+ if (doing) {
+ p = resize(p, plen+1);
+ p[plen] = '\0';
+ *result = p;
+ }
+ return ok;
+}
+
+static void text_rdaddwc(rdstringc *rs, word *text, word *end) {
+ char *c;
+
+ for (; text && text != end; text = text->next) switch (text->type) {
+ case word_HyperLink:
+ case word_HyperEnd:
+ case word_UpperXref:
+ case word_LowerXref:
+ case word_XrefEnd:
+ case word_IndexRef:
+ break;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ assert(text->type != word_CodeQuote &&
+ text->type != word_WkCodeQuote);
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddc(rs, '_'); /* FIXME: configurability */
+ else if (towordstyle(text->type) == word_Code &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddc(rs, '`'); /* FIXME: configurability */
+ if (removeattr(text->type) == word_Normal) {
+ if (text_convert(text->text, &c))
+ rdaddsc(rs, c);
+ else
+ text_rdaddwc(rs, text->alt, NULL);
+ sfree(c);
+ } else if (removeattr(text->type) == word_WhiteSpace) {
+ rdaddc(rs, ' ');
+ } else if (removeattr(text->type) == word_Quote) {
+ rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\'');
+ /* FIXME: configurability */
+ }
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddc(rs, '_'); /* FIXME: configurability */
+ else if (towordstyle(text->type) == word_Code &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddc(rs, '\''); /* FIXME: configurability */
+ break;
+ }
+}
+
+static int text_width(word *);
+
+static int text_width_list(word *text) {
+ int w = 0;
+ while (text) {
+ w += text_width(text);
+ text = text->next;
+ }
+ return w;
+}
+
+static int text_width(word *text) {
+ switch (text->type) {
+ case word_HyperLink:
+ case word_HyperEnd:
+ case word_UpperXref:
+ case word_LowerXref:
+ case word_XrefEnd:
+ case word_IndexRef:
+ return 0;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ return (((text->type == word_Emph ||
+ text->type == word_Code)
+ ? (attraux(text->aux) == attr_Only ? 2 :
+ attraux(text->aux) == attr_Always ? 0 : 1)
+ : 0) +
+ (text_convert(text->text, NULL) ?
+ ustrlen(text->text) :
+ text_width_list(text->alt)));
+
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ assert(text->type != word_CodeQuote &&
+ text->type != word_WkCodeQuote);
+ return (((towordstyle(text->type) == word_Emph ||
+ towordstyle(text->type) == word_Code)
+ ? (attraux(text->aux) == attr_Only ? 2 :
+ attraux(text->aux) == attr_Always ? 0 : 1)
+ : 0) + 1);
+ }
+ return 0; /* should never happen */
+}
+
+static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text,
+ alignstruct align, int indent, int width) {
+ rdstringc t = { 0, 0, NULL };
+ int margin, length;
+ int firstlinewidth, wrapwidth;
+ wrappedline *wrapping, *p;
+
+ if (align.just_numbers && nprefix) {
+ text_rdaddwc(&t, nprefix, NULL);
+ rdaddc(&t, ' '); /* FIXME: as below */
+ } else if (!align.just_numbers && tprefix) {
+ text_rdaddwc(&t, tprefix, NULL);
+ rdaddsc(&t, ": "); /* FIXME: configurability */
+ }
+ margin = length = (t.text ? strlen(t.text) : 0);
+
+ if (align.align == LEFTPLUS) {
+ margin = indent - margin;
+ if (margin < 0) margin = 0;
+ firstlinewidth = indent + width - margin - length;
+ wrapwidth = width;
+ } else if (align.align == LEFT || align.align == CENTRE) {
+ margin = 0;
+ firstlinewidth = indent + width - length;
+ wrapwidth = indent + width;
+ }
+
+ wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width);
+ for (p = wrapping; p; p = p->next) {
+ text_rdaddwc(&t, p->begin, p->end);
+ length = (t.text ? strlen(t.text) : 0);
+ if (align.align == CENTRE) {
+ margin = (indent + width - length)/2;
+ if (margin < 0) margin = 0;
+ }
+ fprintf(fp, "%*s%s\n", margin, "", t.text);
+ if (align.underline != L'\0') {
+ char *u, uc;
+ wchar_t uw[2];
+ uw[0] = align.underline; uw[1] = L'\0';
+ text_convert(uw, &u);
+ uc = u[0];
+ sfree(u);
+ fprintf(fp, "%*s", margin, "");
+ while (length--)
+ putc(uc, fp);
+ putc('\n', fp);
+ }
+ if (align.align == LEFTPLUS)
+ margin = indent;
+ else
+ margin = 0;
+ sfree(t.text);
+ t = empty_rdstringc;
+ }
+ wrap_free(wrapping);
+ putc('\n', fp);
+
+ sfree(t.text);
+}
+
+static void text_rule(FILE *fp, int indent, int width) {
+ while (indent--) putc(' ', fp);
+ while (width--) putc('-', fp); /* FIXME: configurability! */
+ putc('\n', fp);
+ putc('\n', fp);
+}
+
+static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text,
+ int indent, int extraindent, int width) {
+ wrappedline *wrapping, *p;
+ rdstringc pfx = { 0, 0, NULL };
+ int e;
+ int firstlinewidth = width;
+
+ if (prefix) {
+ text_rdaddwc(&pfx, prefix, NULL);
+ if (prefixextra)
+ rdaddsc(&pfx, prefixextra);
+ fprintf(fp, "%*s%s", indent, "", pfx.text);
+ e = extraindent - strlen(pfx.text);
+ if (e < 0) {
+ e = 0;
+ firstlinewidth -= e;
+ if (firstlinewidth < 0) {
+ e = indent + extraindent;
+ firstlinewidth = width;
+ fprintf(fp, "\n");
+ }
+ }
+ sfree(pfx.text);
+ } else
+ e = indent + extraindent;
+
+ wrapping = wrap_para(text, firstlinewidth, width, text_width);
+ for (p = wrapping; p; p = p->next) {
+ rdstringc t = { 0, 0, NULL };
+ text_rdaddwc(&t, p->begin, p->end);
+ fprintf(fp, "%*s%s\n", e, "", t.text);
+ e = indent + extraindent;
+ sfree(t.text);
+ }
+ wrap_free(wrapping);
+ putc('\n', fp);
+}
+
+static void text_codepara(FILE *fp, word *text, int indent, int width) {
+ for (; text; text = text->next) if (text->type == word_WeakCode) {
+ char *c;
+ text_convert(text->text, &c);
+ if (strlen(c) > (size_t)width) {
+ /* FIXME: warn */
+ }
+ fprintf(fp, "%*s%s\n", indent, "", c);
+ sfree(c);
+ }
+
+ putc('\n', fp);
+}
+
+static void text_versionid(FILE *fp, word *text) {
+ rdstringc t = { 0, 0, NULL };
+
+ rdaddc(&t, '['); /* FIXME: configurability */
+ text_rdaddwc(&t, text, NULL);
+ rdaddc(&t, ']'); /* FIXME: configurability */
+
+ fprintf(fp, "%s\n", t.text);
+ sfree(t.text);
+}
--- /dev/null
+/*
+ * Windows Help backend for Halibut
+ *
+ * TODO:
+ * - allow user to specify section contexts.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "halibut.h"
+#include "winhelp.h"
+
+struct bk_whlp_state {
+ WHLP h;
+ indexdata *idx;
+ keywordlist *keywords;
+ WHLP_TOPIC curr_topic;
+ FILE *cntfp;
+ int cnt_last_level, cnt_workaround;
+};
+
+/*
+ * Indexes of fonts in our standard font descriptor set.
+ */
+enum {
+ FONT_NORMAL,
+ FONT_EMPH,
+ FONT_CODE,
+ FONT_TITLE,
+ FONT_TITLE_EMPH,
+ FONT_TITLE_CODE,
+ FONT_RULE
+};
+
+static void whlp_rdaddwc(rdstringc *rs, word *text);
+static int whlp_convert(wchar_t *s, char **result, int hard_spaces);
+static void whlp_mkparagraph(struct bk_whlp_state *state,
+ int font, word *text, int subsidiary);
+static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p);
+static void whlp_contents_write(struct bk_whlp_state *state,
+ int level, char *text, WHLP_TOPIC topic);
+
+void whlp_backend(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx) {
+ WHLP h;
+ char *filename, *cntname;
+ paragraph *p, *lastsect;
+ struct bk_whlp_state state;
+ WHLP_TOPIC contents_topic;
+ int i;
+ indexentry *ie;
+
+ filename = "output.hlp"; /* FIXME: configurability */
+ cntname = "output.cnt"; /* corresponding contents file */
+
+ state.cntfp = fopen(cntname, "wb");
+ state.cnt_last_level = -1; state.cnt_workaround = 0;
+
+ h = state.h = whlp_new();
+ state.keywords = keywords;
+ state.idx = idx;
+
+ whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")");
+ whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")");
+ whlp_start_macro(h, "BrowseButtons()");
+
+ whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+ 0, 0, 0, 0);
+ whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+ WHLP_FONT_ITALIC, 0, 0, 0);
+ whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+ 0, 0, 0, 0);
+ whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
+ WHLP_FONT_BOLD, 0, 0, 0);
+ whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
+ WHLP_FONT_BOLD|WHLP_FONT_ITALIC, 0, 0, 0);
+ whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 30,
+ WHLP_FONT_BOLD, 0, 0, 0);
+ whlp_create_font(h, "Courier New", WHLP_FONTFAM_SANS, 18,
+ WHLP_FONT_STRIKEOUT, 0, 0, 0);
+
+ /*
+ * Loop over the source form finding out whether the user has
+ * specified particular help topic names for anything.
+ */
+ for (p = sourceform; p; p = p->next) {
+ p->private_data = NULL;
+ if (p->type == para_Config && p->parent) {
+ if (!ustricmp(p->keyword, L"winhelp-topic")) {
+ char *topicname;
+ whlp_convert(uadv(p->keyword), &topicname, 0);
+ /* Store the topic name in the private_data field of the
+ * containing section. */
+ p->parent->private_data = topicname;
+ }
+ }
+ }
+
+ /*
+ * Loop over the source form registering WHLP_TOPICs for
+ * everything.
+ */
+
+ contents_topic = whlp_register_topic(h, "Top", NULL);
+ whlp_primary_topic(h, contents_topic);
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_Chapter ||
+ p->type == para_Appendix ||
+ p->type == para_UnnumberedChapter ||
+ p->type == para_Heading ||
+ p->type == para_Subsect) {
+ char *topicid = p->private_data;
+ char *errstr;
+
+ p->private_data = whlp_register_topic(h, topicid, &errstr);
+ if (!p->private_data) {
+ p->private_data = whlp_register_topic(h, NULL, NULL);
+ error(err_winhelp_ctxclash, &p->fpos, topicid, errstr);
+ }
+ sfree(topicid);
+ }
+ }
+
+ /*
+ * Loop over the index entries, preparing final text forms for
+ * each one.
+ */
+ for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
+ rdstringc rs = {0, 0, NULL};
+ whlp_rdaddwc(&rs, ie->text);
+ ie->backend_data = rs.text;
+ }
+
+ whlp_prepare(h);
+
+ /* ------------------------------------------------------------------
+ * Do the contents page, containing title, preamble and
+ * copyright.
+ */
+
+ whlp_begin_topic(h, contents_topic, "Contents", "DB(\"btn_up\")", NULL);
+
+ /*
+ * The manual title goes in the non-scroll region, and also
+ * goes into the system title slot.
+ */
+ {
+ rdstringc rs = {0, 0, NULL};
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_Title) {
+ whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+ whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
+ whlp_rdaddwc(&rs, p->words);
+ whlp_end_para(h);
+ }
+ }
+ if (rs.text) {
+ whlp_title(h, rs.text);
+ fprintf(state.cntfp, ":Title %s\r\n", rs.text);
+ sfree(rs.text);
+ }
+ whlp_contents_write(&state, 1, "Title page", contents_topic);
+ /* FIXME: configurability in that string */
+ }
+
+ /*
+ * Next comes the preamble, which just goes into the ordinary
+ * scrolling region.
+ */
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_Preamble) {
+ whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+ whlp_end_para(h);
+ }
+ }
+
+ /*
+ * The copyright goes to two places, again: into the contents
+ * page and also into the system section.
+ */
+ {
+ rdstringc rs = {0, 0, NULL};
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_Copyright) {
+ whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+ whlp_end_para(h);
+ whlp_rdaddwc(&rs, p->words);
+ }
+ }
+ if (rs.text) {
+ whlp_copyright(h, rs.text);
+ sfree(rs.text);
+ }
+ }
+
+ /*
+ * Now do the primary navigation menu.
+ */
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_Chapter ||
+ p->type == para_Appendix ||
+ p->type == para_UnnumberedChapter)
+ whlp_navmenu(&state, p);
+ }
+
+ state.curr_topic = contents_topic;
+ lastsect = NULL;
+
+ /* ------------------------------------------------------------------
+ * Now we've done the contents page, we're ready to go through
+ * and do the main manual text. Ooh.
+ */
+ for (p = sourceform; p; p = p->next) switch (p->type) {
+ /*
+ * Things we ignore because we've already processed them or
+ * aren't going to touch them in this pass.
+ */
+ case para_IM:
+ case para_BR:
+ case para_Biblio: /* only touch BiblioCited */
+ case para_VersionID:
+ case para_Copyright:
+ case para_Preamble:
+ case para_NoCite:
+ case para_Title:
+ break;
+
+ /*
+ * Chapter and section titles: start a new Help topic.
+ */
+ case para_Chapter:
+ case para_Appendix:
+ case para_UnnumberedChapter:
+ case para_Heading:
+ case para_Subsect:
+ if (lastsect && lastsect->child) {
+ paragraph *q;
+ /*
+ * Do a navigation menu for the previous section we
+ * were in.
+ */
+ for (q = lastsect->child; q; q = q->sibling)
+ whlp_navmenu(&state, q);
+ }
+ {
+ rdstringc rs = {0, 0, NULL};
+ WHLP_TOPIC new_topic, parent_topic;
+ char *macro, *topicid;
+
+ new_topic = p->private_data;
+ whlp_browse_link(h, state.curr_topic, new_topic);
+ state.curr_topic = new_topic;
+
+ if (p->kwtext) {
+ whlp_rdaddwc(&rs, p->kwtext);
+ rdaddsc(&rs, ": "); /* FIXME: configurability */
+ }
+ whlp_rdaddwc(&rs, p->words);
+ if (p->parent == NULL)
+ parent_topic = contents_topic;
+ else
+ parent_topic = (WHLP_TOPIC)p->parent->private_data;
+ topicid = whlp_topic_id(parent_topic);
+ macro = smalloc(100+strlen(topicid));
+ sprintf(macro,
+ "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+ topicid);
+ whlp_begin_topic(h, new_topic,
+ rs.text ? rs.text : "",
+ macro, NULL);
+ sfree(macro);
+
+ {
+ /*
+ * Output the .cnt entry.
+ *
+ * WinHelp has a bug involving having an internal
+ * node followed by a leaf at the same level: the
+ * leaf is output at the wrong level. We can mostly
+ * work around this by modifying the leaf level
+ * itself (see whlp_contents_write), but this
+ * doesn't work for top-level sections since we
+ * can't turn a level-1 leaf into a level-0 one. So
+ * for top-level leaf sections (Bibliography
+ * springs to mind), we output an internal node
+ * containing only the leaf for that section.
+ */
+ int i;
+ paragraph *q;
+
+ /* Count up the level. */
+ i = 1;
+ for (q = p; q->parent; q = q->parent) i++;
+
+ if (p->child || !p->parent) {
+ /*
+ * If p has children then it needs to be a
+ * folder; if it has no parent then it needs to
+ * be a folder to work around the bug.
+ */
+ whlp_contents_write(&state, i, rs.text, NULL);
+ i++;
+ }
+ whlp_contents_write(&state, i, rs.text, new_topic);
+ }
+
+ sfree(rs.text);
+
+ whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+ if (p->kwtext) {
+ whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE);
+ whlp_set_font(h, FONT_TITLE);
+ whlp_text(h, ": "); /* FIXME: configurability */
+ }
+ whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE);
+ whlp_end_para(h);
+
+ lastsect = p;
+ }
+ break;
+
+ case para_Rule:
+ whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ whlp_para_attr(h, WHLP_PARA_ALIGNMENT, WHLP_ALIGN_CENTRE);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, FONT_RULE);
+#define TEN "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0"
+#define TWENTY TEN TEN
+#define FORTY TWENTY TWENTY
+#define EIGHTY FORTY FORTY
+ whlp_text(h, EIGHTY);
+#undef TEN
+#undef TWENTY
+#undef FORTY
+#undef EIGHTY
+ whlp_end_para(h);
+ break;
+
+ case para_Normal:
+ case para_BiblioCited:
+ case para_Bullet:
+ case para_NumberedList:
+ whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ if (p->type == para_Bullet || p->type == para_NumberedList) {
+ whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72);
+ whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
+ whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, FONT_NORMAL);
+ if (p->type == para_Bullet) {
+ whlp_text(h, "\x95");
+ } else {
+ whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
+ whlp_text(h, ".");
+ }
+ whlp_tab(h);
+ } else {
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ }
+
+ if (p->type == para_BiblioCited) {
+ whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE);
+ whlp_text(h, " ");
+ }
+
+ whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
+ whlp_end_para(h);
+ break;
+
+ case para_Code:
+ /*
+ * In a code paragraph, each individual word is a line. For
+ * Help files, we will have to output this as a set of
+ * paragraphs, all but the last of which don't set
+ * SPACEBELOW.
+ */
+ {
+ word *w;
+ char *c;
+ for (w = p->words; w; w = w->next) {
+ if (!w->next)
+ whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, FONT_CODE);
+ whlp_convert(w->text, &c, FALSE);
+ whlp_text(h, c);
+ sfree(c);
+ whlp_end_para(h);
+ }
+ }
+ break;
+ }
+
+ fclose(state.cntfp);
+ whlp_close(h, filename);
+
+ /*
+ * Loop over the index entries, cleaning up our final text
+ * forms.
+ */
+ for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
+ sfree(ie->backend_data);
+ }
+}
+
+static void whlp_contents_write(struct bk_whlp_state *state,
+ int level, char *text, WHLP_TOPIC topic) {
+ /*
+ * Horrifying bug in WinHelp. When dropping a section level or
+ * more without using a folder-type entry, WinHelp accidentally
+ * adds one to the section level. So we correct for that here.
+ */
+ if (state->cnt_last_level > level && topic)
+ state->cnt_workaround = -1;
+ else if (!topic)
+ state->cnt_workaround = 0;
+ state->cnt_last_level = level;
+
+ fprintf(state->cntfp, "%d ", level + state->cnt_workaround);
+ while (*text) {
+ if (*text == '=')
+ fputc('\\', state->cntfp);
+ fputc(*text, state->cntfp);
+ text++;
+ }
+ if (topic)
+ fprintf(state->cntfp, "=%s", whlp_topic_id(topic));
+ fputc('\n', state->cntfp);
+}
+
+static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p) {
+ whlp_begin_para(state->h, WHLP_PARA_NONSCROLL);
+ whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data);
+ if (p->kwtext) {
+ whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE);
+ whlp_set_font(state->h, FONT_NORMAL);
+ whlp_text(state->h, ": "); /* FIXME: configurability */
+ }
+ whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE);
+ whlp_end_hyperlink(state->h);
+ whlp_end_para(state->h);
+
+}
+
+static void whlp_mkparagraph(struct bk_whlp_state *state,
+ int font, word *text, int subsidiary) {
+ keyword *kwl;
+ int deffont = font;
+ int currfont = -1;
+ int newfont;
+ char *c;
+ paragraph *xref_target = NULL;
+
+ for (; text; text = text->next) switch (text->type) {
+ case word_HyperLink:
+ case word_HyperEnd:
+ break;
+
+ case word_IndexRef:
+ if (subsidiary) break; /* disabled in subsidiary bits */
+ {
+ indextag *tag = index_findtag(state->idx, text->text);
+ int i;
+ if (!tag)
+ break;
+ for (i = 0; i < tag->nrefs; i++)
+ whlp_index_term(state->h, tag->refs[i]->backend_data,
+ state->curr_topic);
+ }
+ break;
+
+ case word_UpperXref:
+ case word_LowerXref:
+ if (subsidiary) break; /* disabled in subsidiary bits */
+ kwl = kw_lookup(state->keywords, text->text);
+ assert(xref_target == NULL);
+ if (kwl->para->type == para_NumberedList) {
+ break; /* don't xref to numbered list items */
+ } else if (kwl->para->type == para_BiblioCited) {
+ /*
+ * An xref to a bibliography item jumps to the section
+ * containing it.
+ */
+ if (kwl->para->parent)
+ xref_target = kwl->para->parent;
+ else
+ break;
+ } else {
+ xref_target = kwl->para;
+ }
+ whlp_start_hyperlink(state->h, (WHLP_TOPIC)xref_target->private_data);
+ break;
+
+ case word_XrefEnd:
+ if (subsidiary) break; /* disabled in subsidiary bits */
+ if (xref_target)
+ whlp_end_hyperlink(state->h);
+ xref_target = NULL;
+ break;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ if (towordstyle(text->type) == word_Emph)
+ newfont = deffont + FONT_EMPH;
+ else if (towordstyle(text->type) == word_Code ||
+ towordstyle(text->type) == word_WeakCode)
+ newfont = deffont + FONT_CODE;
+ else
+ newfont = deffont;
+ if (newfont != currfont) {
+ currfont = newfont;
+ whlp_set_font(state->h, newfont);
+ }
+ if (removeattr(text->type) == word_Normal) {
+ if (whlp_convert(text->text, &c, TRUE))
+ whlp_text(state->h, c);
+ else
+ whlp_mkparagraph(state, deffont, text->alt, FALSE);
+ sfree(c);
+ } else if (removeattr(text->type) == word_WhiteSpace) {
+ whlp_text(state->h, " ");
+ } else if (removeattr(text->type) == word_Quote) {
+ whlp_text(state->h,
+ quoteaux(text->aux) == quote_Open ? "\x91" : "\x92");
+ /* FIXME: configurability */
+ }
+ break;
+ }
+}
+
+static void whlp_rdaddwc(rdstringc *rs, word *text) {
+ char *c;
+
+ for (; text; text = text->next) switch (text->type) {
+ case word_HyperLink:
+ case word_HyperEnd:
+ case word_UpperXref:
+ case word_LowerXref:
+ case word_XrefEnd:
+ case word_IndexRef:
+ break;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ assert(text->type != word_CodeQuote &&
+ text->type != word_WkCodeQuote);
+ if (removeattr(text->type) == word_Normal) {
+ if (whlp_convert(text->text, &c, FALSE))
+ rdaddsc(rs, c);
+ else
+ whlp_rdaddwc(rs, text->alt);
+ sfree(c);
+ } else if (removeattr(text->type) == word_WhiteSpace) {
+ rdaddc(rs, ' ');
+ } else if (removeattr(text->type) == word_Quote) {
+ rdaddc(rs, quoteaux(text->aux) == quote_Open ? '\x91' : '\x92');
+ /* FIXME: configurability */
+ }
+ break;
+ }
+}
+
+/*
+ * Convert a wide string into a string of chars. If `result' is
+ * non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible for the output character
+ * set.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int whlp_convert(wchar_t *s, char **result, int hard_spaces) {
+ /*
+ * FIXME. Currently this is ISO8859-1 only.
+ */
+ int doing = (result != 0);
+ int ok = TRUE;
+ char *p = NULL;
+ int plen = 0, psize = 0;
+
+ for (; *s; s++) {
+ wchar_t c = *s;
+ char outc;
+
+ if ((c >= 32 && c <= 126) ||
+ (c >= 160 && c <= 255)) {
+ /* Char is OK. */
+ if (c == 32 && hard_spaces)
+ outc = '\240';
+ else
+ outc = (char)c;
+ } else {
+ /* Char is not OK. */
+ ok = FALSE;
+ outc = 0xBF; /* approximate the good old DEC `uh?' */
+ }
+ if (doing) {
+ if (plen >= psize) {
+ psize = plen + 256;
+ p = resize(p, psize);
+ }
+ p[plen++] = outc;
+ }
+ }
+ if (doing) {
+ p = resize(p, plen+1);
+ p[plen] = '\0';
+ *result = p;
+ }
+ return ok;
+}
--- /dev/null
+/*
+ * xhtml backend for Halibut
+ * (initial implementation by James Aylett)
+ *
+ * Still to do:
+ *
+ * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
+ * +++ entity names (from a file -- ideally supply normal SGML files)
+ * +++ configuration directive to file split where the current layout
+ * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
+ * perhaps others.
+ *
+ * Limitations:
+ *
+ * +++ biblio/index references target the nearest section marker, rather
+ * than having a dedicated target themselves. In large bibliographies
+ * this will cause problems. (The solution is to fake up a response
+ * from xhtml_find_section(), probably linking it into the sections
+ * chain just in case we need it again, and to make freeing it up
+ * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
+ * this is acceptable for now.
+ * +++ can't cope with leaf-level == 0. It's all to do with the
+ * top-level file not being normal, probably not even having a valid
+ * section level, and stuff like that. I question whether this is an
+ * issue, frankly; small manuals that fit on one page should probably
+ * not be written in halibut at all.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+struct xhtmlsection_Struct {
+ struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
+ struct xhtmlsection_Struct *child; /* NULL if split across files */
+ struct xhtmlsection_Struct *parent; /* NULL if split across files */
+ struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
+ paragraph *para;
+ struct xhtmlfile_Struct *file; /* which file is this a part of? */
+ char *fragment; /* fragment id within the file */
+ int level;
+};
+
+struct xhtmlfile_Struct {
+ struct xhtmlfile_Struct *next;
+ struct xhtmlfile_Struct *child;
+ struct xhtmlfile_Struct *parent;
+ char *filename;
+ struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
+ int is_leaf; /* is this file a leaf file, ie does it not have any children? */
+};
+
+typedef struct xhtmlsection_Struct xhtmlsection;
+typedef struct xhtmlfile_Struct xhtmlfile;
+typedef struct xhtmlindex_Struct xhtmlindex;
+
+struct xhtmlindex_Struct {
+ int nsection;
+ int size;
+ xhtmlsection **sections;
+};
+
+typedef struct {
+ int contents_depth[6];
+ int leaf_contains_contents;
+ int leaf_level;
+ int leaf_smallest_contents;
+ int include_version_id;
+ wchar_t *author, *description;
+ wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs;
+ int suppress_address;
+} xhtmlconfig;
+
+/*static void xhtml_level(paragraph *, int);
+static void xhtml_level_0(paragraph *);
+static void xhtml_docontents(FILE *, paragraph *, int);
+static void xhtml_dosections(FILE *, paragraph *, int);
+static void xhtml_dobody(FILE *, paragraph *, int);*/
+
+static void xhtml_doheader(FILE *, word *);
+static void xhtml_dofooter(FILE *);
+static void xhtml_versionid(FILE *, word *, int);
+
+static void xhtml_utostr(wchar_t *, char **);
+static int xhtml_para_level(paragraph *);
+static int xhtml_reservedchar(int);
+
+static int xhtml_convert(wchar_t *, char **, int);
+static void xhtml_rdaddwc(rdstringc *, word *, word *);
+static void xhtml_para(FILE *, word *);
+static void xhtml_codepara(FILE *, word *);
+static void xhtml_heading(FILE *, paragraph *);
+
+/* File-global variables are much easier than passing these things
+ * all over the place. Evil, but easier. We can replace this with a single
+ * structure at some point.
+ */
+static xhtmlconfig conf;
+static keywordlist *keywords;
+static indexdata *idx;
+static xhtmlfile *topfile;
+static xhtmlsection *topsection;
+static paragraph *sourceparas;
+static xhtmlfile *lastfile;
+static xhtmlfile *xhtml_last_file = NULL;
+static int last_level=-1;
+static xhtmlsection *currentsection;
+
+static xhtmlconfig xhtml_configure(paragraph *source)
+{
+ xhtmlconfig ret;
+
+ /*
+ * Defaults.
+ */
+ ret.contents_depth[0] = 2;
+ ret.contents_depth[1] = 3;
+ ret.contents_depth[2] = 4;
+ ret.contents_depth[3] = 5;
+ ret.contents_depth[4] = 6;
+ ret.contents_depth[5] = 7;
+ ret.leaf_level = 2;
+ ret.leaf_smallest_contents = 4;
+ ret.leaf_contains_contents = FALSE;
+ ret.include_version_id = TRUE;
+ ret.author = NULL;
+ ret.description = NULL;
+ ret.head_end = NULL;
+ ret.body = NULL;
+ ret.body_start = NULL;
+ ret.body_end = NULL;
+ ret.address_start = NULL;
+ ret.address_end = NULL;
+ ret.nav_attrs = NULL;
+ ret.suppress_address = FALSE;
+
+ for (; source; source = source->next)
+ {
+ if (source->type == para_Config)
+ {
+ if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
+ ret.contents_depth[0] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
+ ret.contents_depth[1] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
+ ret.contents_depth[2] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
+ ret.contents_depth[3] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
+ ret.contents_depth[4] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
+ ret.contents_depth[5] = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
+ ret.leaf_level = utoi(uadv(source->keyword));
+ if (ret.leaf_level==0) {
+ fatal(err_whatever, "xhtml-leaf-level cannot be zero");
+ }
+ } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
+ ret.leaf_smallest_contents = utoi(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
+ ret.include_version_id = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
+ ret.leaf_contains_contents = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
+ ret.suppress_address = utob(uadv(source->keyword));
+ } else if (!ustricmp(source->keyword, L"xhtml-author")) {
+ ret.author = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-description")) {
+ ret.description = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
+ ret.head_end = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
+ ret.body_start = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
+ ret.body = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
+ ret.body_end = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
+ ret.address_start = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
+ ret.address_end = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
+ ret.nav_attrs = uadv(source->keyword);
+ }
+ }
+ }
+
+ /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
+ printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
+ printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
+ printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
+ printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
+ printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
+ printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
+ printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
+ return ret;
+}
+
+static xhtmlsection *xhtml_new_section(xhtmlsection *last)
+{
+ xhtmlsection *ret = mknew(xhtmlsection);
+ ret->next=NULL;
+ ret->child=NULL;
+ ret->parent=NULL;
+ ret->chain=last;
+ ret->para=NULL;
+ ret->file=NULL;
+ ret->fragment=NULL;
+ ret->level=-1; /* marker: end of chain */
+ return ret;
+}
+
+/* Returns NULL or the section that marks that paragraph */
+static xhtmlsection *xhtml_find_section(paragraph *p)
+{
+ xhtmlsection *ret = topsection;
+ if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
+ paragraph *p2 = sourceparas;
+ paragraph *p3 = NULL;
+ while (p2 && p2!=p) {
+ if (xhtml_para_level(p2)!=-1) {
+ p3 = p2;
+ }
+ p2=p2->next;
+ }
+ if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
+ /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
+ * So don't do that, then.
+ */
+ return NULL;
+ }
+ p=p3;
+ }
+ while (ret && ret->para != p) {
+/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
+ ret=ret->chain;
+ }
+ return ret;
+}
+
+static xhtmlfile *xhtml_new_file(xhtmlsection *sect)
+{
+ xhtmlfile *ret = mknew(xhtmlfile);
+
+ ret->next=NULL;
+ ret->child=NULL;
+ ret->parent=NULL;
+ ret->filename=NULL;
+ ret->sections=sect;
+ ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
+ if (sect==NULL) {
+ if (conf.leaf_level==0) { /* currently unused */
+#define FILENAME_MANUAL "Manual.html"
+#define FILENAME_CONTENTS "Contents.html"
+ ret->filename = smalloc(strlen(FILENAME_MANUAL)+1);
+ sprintf(ret->filename, FILENAME_MANUAL);
+ } else {
+ ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1);
+ sprintf(ret->filename, FILENAME_CONTENTS);
+ }
+ } else {
+ paragraph *p = sect->para;
+ rdstringc fname_c = { 0, 0, NULL };
+ char *c;
+ word *w;
+ for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next)
+ {
+ switch (removeattr(w->type))
+ {
+ case word_Normal:
+ /*case word_Emph:
+ case word_Code:
+ case word_WeakCode:*/
+ xhtml_utostr(w->text, &c);
+ rdaddsc(&fname_c,c);
+ sfree(c);
+ break;
+ }
+ }
+ rdaddsc(&fname_c, ".html");
+ ret->filename = rdtrimc(&fname_c);
+ }
+ /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
+ return ret;
+}
+
+/*
+ * Walk the tree fixing up files which are actually leaf (ie
+ * have no children) but aren't at leaf level, so they have the
+ * leaf flag set.
+ */
+void xhtml_fixup_layout(xhtmlfile* file)
+{
+ if (file->child==NULL) {
+ file->is_leaf = TRUE;
+ } else {
+ xhtml_fixup_layout(file->child);
+ }
+ if (file->next)
+ xhtml_fixup_layout(file->next);
+}
+
+/*
+ * Create the tree structure so we know where everything goes.
+ * Method:
+ *
+ * Ignoring file splitting, we have three choices with each new section:
+ *
+ * +-----------------+-----------------+
+ * | | |
+ * X +----X----+ (1)
+ * | |
+ * Y (3)
+ * |
+ * (3)
+ *
+ * Y is the last section we added (currentsect).
+ * If sect is the section we want to add, then:
+ *
+ * (1) if sect->level < currentsect->level
+ * (2) if sect->level == currentsect->level
+ * (3) if sect->level > currentsect->level
+ *
+ * This requires the constraint that you never skip section numbers
+ * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
+ *
+ * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
+ * more than one level at a time. Lots of asserts, and probably part of
+ * the algorithm here, rely on this being true. (It currently isn't
+ * enforced by halibut, however.)
+ *
+ * File splitting makes this harder. For instance, say we added at (3)
+ * above and now need to add another section. We are splitting at level
+ * 2, ie the level of Y. Z is the last section we added:
+ *
+ * +-----------------+-----------------+
+ * | | |
+ * X +----X----+ (1)
+ * | |
+ * +----Y----+ (1)
+ * | |
+ * Z (2)
+ * |
+ * (3)
+ *
+ * The (1) case is now split; we need to search upwards to find where
+ * to actually link in. The other two cases remain the same (and will
+ * always be like this).
+ *
+ * File splitting makes this harder, however. The decision of whether
+ * to split to a new file is always on the same condition, however (is
+ * the level of this section higher than the leaf_level configuration
+ * value or not).
+ *
+ * Treating the cases backwards:
+ *
+ * (3) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ * if in the same file, currentsect->child points to sect
+ * otherwise the linking is done through the file tree (which works
+ * in more or less the same way, ie currentfile->child points to
+ * the new file)
+ *
+ * (2) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ * if in the same file, currentsect->next points to sect
+ * otherwise file linking and currentfile->next points to the new
+ * file (we know that Z must have caused a new file to be created)
+ *
+ * (1) same file if sect->level > conf.leaf_level, otherwise new file
+ *
+ * this is actually effectively the same case as (2) here,
+ * except that we first have to travel up the sections to figure
+ * out which section this new one will be a sibling of. In doing
+ * so, we may disappear off the top of a file and have to go up
+ * to its parent in the file tree.
+ *
+ */
+static void xhtml_ponder_layout(paragraph *p)
+{
+ xhtmlsection *lastsection;
+ xhtmlsection *currentsect;
+ xhtmlfile *currentfile;
+
+ lastfile = NULL;
+ topsection = xhtml_new_section(NULL);
+ topfile = xhtml_new_file(NULL);
+ lastsection = topsection;
+ currentfile = topfile;
+ currentsect = topsection;
+
+ for (; p; p=p->next)
+ {
+ int level = xhtml_para_level(p);
+ if (level>0) /* actually a section */
+ {
+ xhtmlsection *sect;
+ word *w;
+ char *c;
+ rdstringc fname_c = { 0, 0, NULL };
+
+ sect = xhtml_new_section(lastsection);
+ lastsection = sect;
+ sect->para = p;
+ for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */
+ {
+ switch (removeattr(w->type))
+ {
+ case word_Normal:
+ /*case word_Emph:
+ case word_Code:
+ case word_WeakCode:*/
+ xhtml_utostr(w->text, &c);
+ rdaddsc(&fname_c,c);
+ sfree(c);
+ break;
+ }
+ }
+/* rdaddsc(&fname_c, ".html");*/
+ sect->fragment = rdtrimc(&fname_c);
+ sect->level = level;
+ /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
+
+ if (level>currentsect->level) { /* case (3) */
+ if (level>conf.leaf_level) { /* same file */
+ assert(currentfile->is_leaf);
+ currentsect->child = sect;
+ sect->parent=currentsect;
+ sect->file=currentfile;
+ /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
+ currentsect=sect;
+ } else { /* new file */
+ xhtmlfile *file = xhtml_new_file(sect);
+ assert(!currentfile->is_leaf);
+ currentfile->child=file;
+ sect->file=file;
+ file->parent=currentfile;
+ /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
+ currentfile=file;
+ currentsect=sect;
+ }
+ } else if (level >= currentsect->file->sections->level) {
+ /* Case (1) or (2) *AND* still under the section that starts
+ * the current file.
+ *
+ * I'm not convinced that this couldn't be rolled in with the
+ * final else {} leg further down. It seems a lot of effort
+ * this way.
+ */
+ if (level>conf.leaf_level) { /* stick within the same file */
+ assert(currentfile->is_leaf);
+ sect->file = currentfile;
+ while (currentsect && currentsect->level > level &&
+ currentsect->file==currentsect->parent->file) {
+ currentsect = currentsect->parent;
+ }
+ assert(currentsect);
+ currentsect->next = sect;
+ assert(currentsect->level == sect->level);
+ sect->parent = currentsect->parent;
+ currentsect = sect;
+ /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
+ } else { /* new file */
+ xhtmlfile *file = xhtml_new_file(sect);
+ sect->file=file;
+ currentfile->next=file;
+ file->parent=currentfile->parent;
+ file->is_leaf=(level==conf.leaf_level);
+ file->sections=sect;
+ /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
+ currentfile=file;
+ currentsect=sect;
+ }
+ } else { /* Case (1) or (2) and we must move up the file tree first */
+ /* this loop is now probably irrelevant - we know we can't connect
+ * to anything in the current file */
+ while (currentsect && level<currentsect->level) {
+ currentsect=currentsect->parent;
+ if (currentsect) {
+ /* printf(" * up one level to '%s'\n", currentsect->fragment);*/
+ } else {
+ /* printf(" * up one level (off top of current file)\n");*/
+ }
+ }
+ if (currentsect) {
+ /* I'm pretty sure this can now never fire */
+ assert(currentfile->is_leaf);
+ /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
+ sect->file = currentfile;
+ currentsect->next=sect;
+ currentsect=sect;
+ } else { /* find a file we can attach to */
+ while (currentfile && currentfile->sections && level<currentfile->sections->level) {
+ currentfile=currentfile->parent;
+ if (currentfile) {
+ /* printf(" * up one file level to '%s'\n", currentfile->filename);*/
+ } else {
+ /* printf(" * up one file level (off top of tree)\n");*/
+ }
+ }
+ if (currentfile) { /* new file (we had to skip up a file to
+ get here, so we must be dealing with a
+ level no lower than the configured
+ leaf_level */
+ xhtmlfile *file = xhtml_new_file(sect);
+ currentfile->next=file;
+ sect->file=file;
+ file->parent=currentfile->parent;
+ file->is_leaf=(level==conf.leaf_level);
+ file->sections=sect;
+ /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
+ currentfile=file;
+ currentsect=sect;
+ } else {
+ fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
+ }
+ }
+ }
+ }
+ }
+ topsection = lastsection; /* get correct end of the chain */
+ xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
+}
+
+static void xhtml_do_index();
+static void xhtml_do_file(xhtmlfile *file);
+static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform);
+static void xhtml_do_paras(FILE *fp, paragraph *p);
+static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit);
+static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit);
+static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit);
+static int xhtml_do_contents(FILE *fp, xhtmlfile *file);
+static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file);
+static void xhtml_do_sections(FILE *fp, xhtmlsection *sections);
+
+/*
+ * Do all the files in this structure.
+ */
+static void xhtml_do_files(xhtmlfile *file)
+{
+ xhtml_do_file(file);
+ if (file->child)
+ xhtml_do_files(file->child);
+ if (file->next)
+ xhtml_do_files(file->next);
+}
+
+/*
+ * Free up all memory used by the file tree from 'xfile' downwards
+ */
+static void xhtml_free_file(xhtmlfile* xfile)
+{
+ if (xfile==NULL) {
+ return;
+ }
+
+ if (xfile->filename) {
+ sfree(xfile->filename);
+ }
+ xhtml_free_file(xfile->child);
+ xhtml_free_file(xfile->next);
+ sfree(xfile);
+}
+
+/*
+ * Main function.
+ */
+void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords,
+ indexdata *in_idx)
+{
+/* int i;*/
+ indexentry *ientry;
+ int ti;
+ xhtmlsection *xsect;
+
+ sourceparas = sourceform;
+ conf = xhtml_configure(sourceform);
+ keywords = in_keywords;
+ idx = in_idx;
+
+ /* Clear up the index entries backend data pointers */
+ for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
+ ientry->backend_data=NULL;
+ }
+
+ xhtml_ponder_layout(sourceform);
+
+ /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
+/* xhtml_level_0(sourceform);
+ for (i=1; i<=conf.leaf_level; i++)
+ {
+ xhtml_level(sourceform, i);
+ }*/
+
+ /* new system ... (writes to *.html, but isn't fully trusted) */
+ xhtml_do_top_file(topfile, sourceform);
+ assert(!topfile->next); /* shouldn't have a sibling at all */
+ xhtml_do_files(topfile->child);
+ xhtml_do_index();
+
+ /* release file, section, index data structures */
+ xsect = topsection;
+ while (xsect) {
+ xhtmlsection *tmp = xsect->chain;
+ if (xsect->fragment) {
+ sfree(xsect->fragment);
+ }
+ sfree(xsect);
+ xsect = tmp;
+ }
+ xhtml_free_file(topfile);
+ for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
+ if (ientry->backend_data!=NULL) {
+ xhtmlindex *xi = (xhtmlindex*) ientry->backend_data;
+ if (xi->sections!=NULL) {
+ sfree(xi->sections);
+ }
+ sfree(xi);
+ }
+ ientry->backend_data = NULL;
+ }
+}
+
+static int xhtml_para_level(paragraph *p)
+{
+ switch (p->type)
+ {
+ case para_UnnumberedChapter:
+ case para_Chapter:
+ case para_Appendix:
+ return 1;
+ break;
+/* case para_BiblioCited:
+ return 2;
+ break;*/
+ case para_Heading:
+ case para_Subsect:
+ return p->aux+2;
+ break;
+ default:
+ return -1;
+ break;
+ }
+}
+
+static char* xhtml_index_filename = "IndexPage.html";
+
+/* Output the nav links for the current file.
+ * file == NULL means we're doing the index
+ */
+static void xhtml_donavlinks(FILE *fp, xhtmlfile *file)
+{
+ xhtmlfile *xhtml_next_file = NULL;
+ fprintf(fp, "<p");
+ if (conf.nav_attrs!=NULL) {
+ fprintf(fp, " %ls>", conf.nav_attrs);
+ } else {
+ fprintf(fp, ">");
+ }
+ if (xhtml_last_file==NULL) {
+ fprintf(fp, "Previous | ");
+ } else {
+ fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
+ }
+ fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
+ if (file != NULL) { /* otherwise we're doing nav links for the index */
+ if (xhtml_next_file==NULL)
+ xhtml_next_file = file->child;
+ if (xhtml_next_file==NULL)
+ xhtml_next_file = file->next;
+ if (xhtml_next_file==NULL)
+ xhtml_next_file = file->parent->next;
+ }
+ if (xhtml_next_file==NULL) {
+ if (file==NULL) { /* index, so no next file */
+ fprintf(fp, "Next ");
+ } else {
+ fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
+ }
+ } else {
+ fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
+ }
+ fprintf(fp, "</p>\n");
+}
+
+/* Write out the index file */
+static void xhtml_do_index()
+{
+ word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
+ indexentry *y;
+ int ti;
+ FILE *fp = fopen(xhtml_index_filename, "w");
+
+ if (fp==NULL)
+ fatal(err_cantopenw, xhtml_index_filename);
+ xhtml_doheader(fp, &temp_word);
+ xhtml_donavlinks(fp, NULL);
+
+ fprintf(fp, "<dl>\n");
+ /* iterate over idx->entries using the tree functions and display everything */
+ for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
+ if (y->backend_data) {
+ int i;
+ xhtmlindex *xi;
+
+ fprintf(fp, "<dt>");
+ xhtml_para(fp, y->text);
+ fprintf(fp, "</dt>\n<dd>");
+
+ xi = (xhtmlindex*) y->backend_data;
+ for (i=0; i<xi->nsection; i++) {
+ xhtmlsection *sect = xi->sections[i];
+ if (sect) {
+ fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
+ if (sect->para->kwtext) {
+ xhtml_para(fp, sect->para->kwtext);
+ } else if (sect->para->words) {
+ xhtml_para(fp, sect->para->words);
+ }
+ fprintf(fp, "</a>");
+ if (i+1<xi->nsection) {
+ fprintf(fp, ", ");
+ }
+ }
+ }
+ fprintf(fp, "</dd>\n");
+ }
+ }
+ fprintf(fp, "</dl>\n");
+
+ xhtml_donavlinks(fp, NULL);
+ xhtml_dofooter(fp);
+ fclose(fp);
+}
+
+/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
+static void xhtml_do_file(xhtmlfile *file)
+{
+ FILE *fp = fopen(file->filename, "w");
+ if (fp==NULL)
+ fatal(err_cantopenw, file->filename);
+
+ if (file->sections->para->words) {
+ xhtml_doheader(fp, file->sections->para->words);
+ } else if (file->sections->para->kwtext) {
+ xhtml_doheader(fp, file->sections->para->kwtext);
+ } else {
+ xhtml_doheader(fp, NULL);
+ }
+
+ xhtml_donavlinks(fp, file);
+
+ if (file->is_leaf && conf.leaf_contains_contents && xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
+ xhtml_do_contents(fp, file);
+ xhtml_do_sections(fp, file->sections);
+ if (!file->is_leaf)
+ xhtml_do_naked_contents(fp, file);
+
+ xhtml_donavlinks(fp, file);
+
+ xhtml_dofooter(fp);
+ fclose(fp);
+
+ xhtml_last_file = file;
+}
+
+/* Output the top-level file. */
+static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform)
+{
+ paragraph *p;
+ int done=FALSE;
+ FILE *fp = fopen(file->filename, "w");
+ if (fp==NULL)
+ fatal(err_cantopenw, file->filename);
+
+ /* Do the title -- only one allowed */
+ for (p = sourceform; p && !done; p = p->next)
+ {
+ if (p->type == para_Title)
+ {
+ xhtml_doheader(fp, p->words);
+ done=TRUE;
+ }
+ }
+ if (!done)
+ xhtml_doheader(fp, NULL /* Eek! */);
+
+ /* Do the preamble and copyright */
+ for (p = sourceform; p; p = p->next)
+ {
+ if (p->type == para_Preamble)
+ {
+ fprintf(fp, "<p>");
+ xhtml_para(fp, p->words);
+ fprintf(fp, "</p>\n");
+ }
+ }
+ for (p = sourceform; p; p = p->next)
+ {
+ if (p->type == para_Copyright)
+ {
+ fprintf(fp, "<p>");
+ xhtml_para(fp, p->words);
+ fprintf(fp, "</p>\n");
+ }
+ }
+
+ xhtml_do_contents(fp, file);
+ xhtml_do_sections(fp, file->sections);
+ xhtml_dofooter(fp);
+ fclose(fp);
+}
+
+/* Convert a Unicode string to an ASCII one. '?' is
+ * used for unmappable characters.
+ */
+static void xhtml_utostr(wchar_t *in, char **out)
+{
+ int l = ustrlen(in);
+ int i;
+ *out = smalloc(l+1);
+ for (i=0; i<l; i++)
+ {
+ if (in[i]>=32 && in[i]<=126)
+ (*out)[i]=(char)in[i];
+ else
+ (*out)[i]='?';
+ }
+ (*out)[i]=0;
+}
+
+/*
+ * Write contents for the given file, and subfiles, down to
+ * the appropriate contents depth. Returns the number of
+ * entries written.
+ */
+static int xhtml_do_contents(FILE *fp, xhtmlfile *file)
+{
+ int level, limit, start_level, count = 0;
+ if (!file)
+ return 0;
+
+ level = (file->sections)?(file->sections->level):(0);
+ limit = conf.contents_depth[(level>5)?(5):(level)];
+ start_level = (file->is_leaf) ? (level-1) : (level);
+ last_level = start_level;
+
+ count += xhtml_do_contents_section_limit(fp, file->sections, limit);
+ count += xhtml_do_contents_limit(fp, file->child, limit);
+ if (fp!=NULL) {
+ while (last_level > start_level) {
+ last_level--;
+ fprintf(fp, "</ul>\n");
+ }
+ }
+ return count;
+}
+
+/* As above, but doesn't do anything in the current file */
+static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file)
+{
+ int level, limit, start_level, count = 0;
+ if (!file)
+ return 0;
+
+ level = (file->sections)?(file->sections->level):(0);
+ limit = conf.contents_depth[(level>5)?(5):(level)];
+ start_level = (file->is_leaf) ? (level-1) : (level);
+ last_level = start_level;
+
+ count = xhtml_do_contents_limit(fp, file->child, limit);
+ if (fp!=NULL) {
+ while (last_level > start_level) {
+ last_level--;
+ fprintf(fp, "</ul>\n");
+ }
+ }
+ return count;
+}
+
+/*
+ * Write contents for the given file, children, and siblings, down to
+ * given limit contents depth.
+ */
+static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit)
+{
+ int count = 0;
+ while (file) {
+ count += xhtml_do_contents_section_limit(fp, file->sections, limit);
+ count += xhtml_do_contents_limit(fp, file->child, limit);
+ file = file->next;
+ }
+ return count;
+}
+
+/*
+ * Write contents entries for the given section tree, down to the
+ * limit contents depth.
+ */
+static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit)
+{
+ int count = 0;
+ while (section) {
+ if (!xhtml_add_contents_entry(fp, section, limit))
+ return 0;
+ else
+ count++;
+ count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
+ section = section->next;
+ }
+ return count;
+}
+
+/*
+ * Write contents entries for the given section tree, down to the
+ * limit contents depth.
+ */
+static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit)
+{
+ int count = 0;
+ if (!section)
+ return 0;
+ xhtml_add_contents_entry(fp, section, limit);
+ count=1;
+ count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
+ /* section=section->child;
+ while (section && xhtml_add_contents_entry(fp, section, limit)) {
+ section = section->next;
+ }*/
+ return count;
+}
+
+/*
+ * Add a section entry, unless we're exceeding the limit, in which
+ * case return FALSE (otherwise return TRUE).
+ */
+static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit)
+{
+ if (!section || section->level > limit)
+ return FALSE;
+ if (fp==NULL)
+ return TRUE;
+ while (last_level > section->level) {
+ last_level--;
+ fprintf(fp, "</ul>\n");
+ }
+ while (last_level < section->level) {
+ last_level++;
+ fprintf(fp, "<ul>\n");
+ }
+ fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
+ if (section->para->kwtext) {
+ xhtml_para(fp, section->para->kwtext);
+ if (section->para->words) {
+ fprintf(fp, ": ");
+ }
+ }
+ if (section->para->words) {
+ xhtml_para(fp, section->para->words);
+ }
+ fprintf(fp, "</a></li>\n");
+ return TRUE;
+}
+
+/*
+ * Write all the sections in this file. Do all paragraphs in this section, then all
+ * children (recursively), then go on to the next one (tail recursively).
+ */
+static void xhtml_do_sections(FILE *fp, xhtmlsection *sections)
+{
+ while (sections) {
+ currentsection = sections;
+ xhtml_do_paras(fp, sections->para);
+ xhtml_do_sections(fp, sections->child);
+ sections = sections->next;
+ }
+}
+
+/* Write this list of paragraphs. Close off all lists at the end. */
+static void xhtml_do_paras(FILE *fp, paragraph *p)
+{
+ int last_type = -1, first=TRUE;
+ if (!p)
+ return;
+
+/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
+ for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) {
+ first=FALSE;
+ switch (p->type)
+ {
+ /*
+ * Things we ignore because we've already processed them or
+ * aren't going to touch them in this pass.
+ */
+ case para_IM:
+ case para_BR:
+ case para_Biblio: /* only touch BiblioCited */
+ case para_VersionID:
+ case para_Copyright:
+ case para_Preamble:
+ case para_NoCite:
+ case para_Title:
+ break;
+
+ /*
+ * Chapter titles.
+ */
+ case para_Chapter:
+ case para_Appendix:
+ case para_UnnumberedChapter:
+ xhtml_heading(fp, p);
+ break;
+
+ case para_Heading:
+ case para_Subsect:
+ xhtml_heading(fp, p);
+ break;
+
+ case para_Rule:
+ fprintf(fp, "\n<hr />\n");
+ break;
+
+ case para_Normal:
+ fprintf(fp, "\n<p>");
+ xhtml_para(fp, p->words);
+ fprintf(fp, "</p>\n");
+ break;
+
+ case para_Bullet:
+ case para_NumberedList:
+ case para_BiblioCited:
+ if (last_type!=p->type) {
+ /* start up list if necessary */
+ if (p->type == para_Bullet) {
+ fprintf(fp, "<ul>\n");
+ } else if (p->type == para_NumberedList) {
+ fprintf(fp, "<ol>\n");
+ } else if (p->type == para_BiblioCited) {
+ fprintf(fp, "<dl>\n");
+ }
+ }
+ if (p->type == para_Bullet || p->type == para_NumberedList)
+ fprintf(fp, "<li>");
+ else if (p->type == para_BiblioCited) {
+ fprintf(fp, "<dt>");
+ xhtml_para(fp, p->kwtext);
+ fprintf(fp, "</dt>\n<dd>");
+ }
+ xhtml_para(fp, p->words);
+ if (p->type == para_BiblioCited) {
+ fprintf(fp, "</dd>\n");
+ } else if (p->type == para_Bullet || p->type == para_NumberedList) {
+ fprintf(fp, "</li>");
+ }
+ if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited)
+ /* close off list if necessary */
+ {
+ paragraph *p2 = p->next;
+ int close_off=FALSE;
+/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
+ if (p2 && xhtml_para_level(p2)==-1) {
+ if (p2->type != p->type)
+ close_off=TRUE;
+ } else {
+ close_off=TRUE;
+ }
+ if (close_off) {
+ if (p->type == para_Bullet) {
+ fprintf(fp, "</ul>\n");
+ } else if (p->type == para_NumberedList) {
+ fprintf(fp, "</ol>\n");
+ } else if (p->type == para_BiblioCited) {
+ fprintf(fp, "</dl>\n");
+ }
+ }
+ }
+ break;
+
+ case para_Code:
+ xhtml_codepara(fp, p->words);
+ break;
+ }
+ last_type = p->type;
+ }
+}
+
+/*
+ * Output a header for this XHTML file.
+ */
+static void xhtml_doheader(FILE *fp, word *title)
+{
+ fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
+ fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
+ fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
+ if (title==NULL)
+ fprintf(fp, "The thing with no name!");
+ else
+ xhtml_para(fp, title);
+ fprintf(fp, "</title>\n");
+ fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
+ if (conf.author)
+ fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
+ if (conf.description)
+ fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
+ if (conf.head_end)
+ fprintf(fp, "%ls\n", conf.head_end);
+ fprintf(fp, "</head>\n\n");
+ if (conf.body)
+ fprintf(fp, "%ls\n", conf.body);
+ else
+ fprintf(fp, "<body>\n");
+ if (conf.body_start)
+ fprintf(fp, "%ls\n", conf.body_start);
+}
+
+/*
+ * Output a footer for this XHTML file.
+ */
+static void xhtml_dofooter(FILE *fp)
+{
+ fprintf(fp, "\n<hr />\n\n");
+ if (conf.body_end)
+ fprintf(fp, "%ls\n", conf.body_end);
+ if (!conf.suppress_address) {
+ fprintf(fp,"<address>\n");
+ if (conf.address_start)
+ fprintf(fp, "%ls\n", conf.address_start);
+ /* Do the version ID */
+ if (conf.include_version_id) {
+ paragraph *p;
+ int started = 0;
+ for (p = sourceparas; p; p = p->next)
+ if (p->type == para_VersionID) {
+ xhtml_versionid(fp, p->words, started);
+ started = 1;
+ }
+ }
+ if (conf.address_end)
+ fprintf(fp, "%ls\n", conf.address_end);
+ fprintf(fp, "</address>\n");
+ }
+ fprintf(fp, "</body>\n\n</html>\n");
+}
+
+/*
+ * Output the versionid paragraph. Typically this is a version control
+ * ID string (such as $Id...$ in RCS).
+ */
+static void xhtml_versionid(FILE *fp, word *text, int started)
+{
+ rdstringc t = { 0, 0, NULL };
+
+ rdaddc(&t, '['); /* FIXME: configurability */
+ xhtml_rdaddwc(&t, text, NULL);
+ rdaddc(&t, ']'); /* FIXME: configurability */
+
+ if (started)
+ fprintf(fp, "<br>\n");
+ fprintf(fp, "%s\n", t.text);
+ sfree(t.text);
+}
+
+/* Is this an XHTML reserved character? */
+static int xhtml_reservedchar(int c)
+{
+ if (c=='&' || c=='<' || c=='>' || c=='"')
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * Convert a wide string into valid XHTML: Anything outside ASCII will
+ * be fixed up as an entity. Currently we don't worry about constraining the
+ * encoded character set, which we should probably do at some point (we can
+ * still fix up and return FALSE - see the last comment here). We also don't
+ * currently
+ *
+ * Because this is only used for words, spaces are HARD spaces (any other
+ * spaces will be word_Whitespace not word_Normal). So they become
+ * Unless hard_spaces is FALSE, of course (code paragraphs break the above
+ * rule).
+ *
+ * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
+ * it in `*result'. If `result' is NULL, merely checks whether all
+ * characters in the string are feasible.
+ *
+ * Return is nonzero if all characters are OK. If not all
+ * characters are OK but `result' is non-NULL, a result _will_
+ * still be generated!
+ */
+static int xhtml_convert(wchar_t *s, char **result, int hard_spaces) {
+ int doing = (result != 0);
+ int ok = TRUE;
+ char *p = NULL;
+ int plen = 0, psize = 0;
+
+ for (; *s; s++) {
+ wchar_t c = *s;
+
+#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
+
+ if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
+ /* Char is OK. */
+ if (doing)
+ {
+ ensure_size(plen);
+ p[plen++] = (char)c;
+ }
+ } else {
+ /* Char needs fixing up. */
+ /* ok = FALSE; -- currently we never return FALSE; we
+ * might want to when considering a character set for the
+ * encoded document.
+ */
+ if (doing)
+ {
+ if (c==32) { /* a space in a word is a hard space */
+ ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
+ sprintf(p+plen, " ");
+ plen+=6;
+ } else {
+ /* FIXME: entity names! */
+ ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
+ plen+=sprintf(p+plen, "&#%04i;", (int)c);
+ }
+ }
+ }
+ }
+ if (doing) {
+ p = resize(p, plen+1);
+ p[plen] = '\0';
+ *result = p;
+ }
+ return ok;
+}
+
+/*
+ * This formats the given words as XHTML.
+ */
+static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end) {
+ char *c;
+ keyword *kwl;
+ xhtmlsection *sect;
+ indextag *itag;
+ int ti;
+
+ for (; text && text != end; text = text->next) {
+ switch (text->type) {
+ case word_HyperLink:
+ xhtml_utostr(text->text, &c);
+ rdaddsc(rs, "<a href=\"");
+ rdaddsc(rs, c);
+ rdaddsc(rs, "\">");
+ sfree(c);
+ break;
+
+ case word_UpperXref:
+ case word_LowerXref:
+ kwl = kw_lookup(keywords, text->text);
+ if (kwl) {
+ sect=xhtml_find_section(kwl->para);
+ if (sect) {
+ rdaddsc(rs, "<a href=\"");
+ rdaddsc(rs, sect->file->filename);
+ rdaddc(rs, '#');
+ rdaddsc(rs, sect->fragment);
+ rdaddsc(rs, "\">");
+ } else {
+ rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
+ error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
+ }
+ } else {
+ rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
+ error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
+ }
+ break;
+
+ case word_IndexRef: /* in theory we could make an index target here */
+/* rdaddsc(rs, "<a name=\"idx-");
+ xhtml_utostr(text->text, &c);
+ rdaddsc(rs, c);
+ sfree(c);
+ rdaddsc(rs, "\"></a>");*/
+ /* what we _do_ need to do is to fix up the backend data
+ * for any indexentry this points to.
+ */
+ for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
+ /* FIXME: really ustricmp() and not ustrcmp()? */
+ if (ustricmp(itag->name, text->text)==0) {
+ break;
+ }
+ }
+ if (itag!=NULL) {
+ if (itag->refs!=NULL) {
+ int i;
+ for (i=0; i<itag->nrefs; i++) {
+ xhtmlindex *idx_ref;
+ indexentry *ientry;
+
+ ientry = itag->refs[i];
+ if (ientry->backend_data==NULL) {
+ idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
+ if (idx_ref==NULL)
+ fatal(err_nomemory);
+ idx_ref->nsection = 0;
+ idx_ref->size = 4;
+ idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*));
+ if (idx_ref->sections==NULL)
+ fatal(err_nomemory);
+ ientry->backend_data = idx_ref;
+ } else {
+ idx_ref = ientry->backend_data;
+ if (idx_ref->nsection+1 > idx_ref->size) {
+ int new_size = idx_ref->size * 2;
+ idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
+ if (idx_ref->sections==NULL) {
+ fatal(err_nomemory);
+ }
+ idx_ref->size = new_size;
+ }
+ }
+ idx_ref->sections[idx_ref->nsection++] = currentsection;
+#if 0
+#endif
+ }
+ } else {
+ fatal(err_whatever, "Index tag had no entries!");
+ }
+ } else {
+ fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
+ fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
+ }
+ break;
+
+ case word_HyperEnd:
+ case word_XrefEnd:
+ rdaddsc(rs, "</a>");
+ break;
+
+ case word_Normal:
+ case word_Emph:
+ case word_Code:
+ case word_WeakCode:
+ case word_WhiteSpace:
+ case word_EmphSpace:
+ case word_CodeSpace:
+ case word_WkCodeSpace:
+ case word_Quote:
+ case word_EmphQuote:
+ case word_CodeQuote:
+ case word_WkCodeQuote:
+ assert(text->type != word_CodeQuote &&
+ text->type != word_WkCodeQuote);
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "<em>");
+ else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
+ (attraux(text->aux) == attr_First ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "<code>");
+
+ if (removeattr(text->type) == word_Normal) {
+ if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */
+ rdaddsc(rs, c);
+ else
+ xhtml_rdaddwc(rs, text->alt, NULL);
+ sfree(c);
+ } else if (removeattr(text->type) == word_WhiteSpace) {
+ rdaddc(rs, ' ');
+ } else if (removeattr(text->type) == word_Quote) {
+ rdaddsc(rs, """);
+ }
+
+ if (towordstyle(text->type) == word_Emph &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "</em>");
+ else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) &&
+ (attraux(text->aux) == attr_Last ||
+ attraux(text->aux) == attr_Only))
+ rdaddsc(rs, "</code>");
+ break;
+ }
+ }
+}
+
+/* Output a heading, formatted as XHTML.
+ */
+static void xhtml_heading(FILE *fp, paragraph *p)
+{
+ rdstringc t = { 0, 0, NULL };
+ word *tprefix = p->kwtext;
+ word *nprefix = p->kwtext2;
+ word *text = p->words;
+ int level = xhtml_para_level(p);
+ xhtmlsection *sect = xhtml_find_section(p);
+ char *fragment;
+ if (sect) {
+ fragment = sect->fragment;
+ } else {
+ fragment = ""; /* FIXME: what else can we do? */
+ error(err_whatever, "Couldn't locate heading cross-reference!");
+ }
+
+ if (level>2 && nprefix) { /* FIXME: configurability on the level thing */
+ xhtml_rdaddwc(&t, nprefix, NULL);
+ rdaddc(&t, ' '); /* FIXME: as below */
+ } else if (tprefix) {
+ xhtml_rdaddwc(&t, tprefix, NULL);
+ rdaddsc(&t, ": "); /* FIXME: configurability */
+ }
+ xhtml_rdaddwc(&t, text, NULL);
+ fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
+ sfree(t.text);
+}
+
+/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
+ * This looks pretty simple; I may have missed something ...
+ */
+static void xhtml_para(FILE *fp, word *text)
+{
+ rdstringc out = { 0, 0, NULL };
+ xhtml_rdaddwc(&out, text, NULL);
+ fprintf(fp, "%s", out.text);
+ sfree(out.text);
+}
+
+/* Output a code paragraph. I'm treating this as preformatted, which
+ * may not be entirely correct. See xhtml_para() for my worries about
+ * this being overly-simple; however I think that most of the complexity
+ * of the text backend came entirely out of word wrapping anyway.
+ */
+static void xhtml_codepara(FILE *fp, word *text)
+{
+ fprintf(fp, "<pre>");
+ for (; text; text = text->next) if (text->type == word_WeakCode) {
+ char *c;
+ xhtml_convert(text->text, &c, FALSE);
+ fprintf(fp, "%s\n", c);
+ sfree(c);
+ }
+ fprintf(fp, "</pre>\n");
+}
--- /dev/null
+/*
+ * contents.c: build a table of contents
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include "halibut.h"
+
+struct numberstate_Tag {
+ int chapternum;
+ int appendixnum;
+ int ischapter;
+ int *sectionlevels;
+ paragraph **currentsects;
+ paragraph *lastsect;
+ int oklevel;
+ int maxsectlevel;
+ int listitem;
+ wchar_t *chaptertext; /* the word for a chapter */
+ wchar_t *sectiontext; /* the word for a section */
+ wchar_t *apptext; /* the word for an appendix */
+};
+
+numberstate *number_init(void) {
+ numberstate *ret = mknew(numberstate);
+ ret->chapternum = 0;
+ ret->appendixnum = -1;
+ ret->ischapter = 1;
+ ret->oklevel = -1; /* not even in a chapter yet */
+ ret->maxsectlevel = 32;
+ ret->sectionlevels = mknewa(int, ret->maxsectlevel);
+ ret->currentsects = mknewa(paragraph *, ret->maxsectlevel+1);
+ memset(ret->currentsects, 0, (ret->maxsectlevel+1)*sizeof(paragraph *));
+ ret->lastsect = NULL;
+ ret->listitem = -1;
+ return ret;
+}
+
+void number_free(numberstate *state) {
+ sfree(state->sectionlevels);
+ sfree(state->currentsects);
+ sfree(state);
+}
+
+static void dotext(word ***wret, wchar_t *text) {
+ word *mnewword = mknew(word);
+ mnewword->text = ustrdup(text);
+ mnewword->type = word_Normal;
+ mnewword->alt = NULL;
+ mnewword->next = NULL;
+ **wret = mnewword;
+ *wret = &mnewword->next;
+}
+
+static void dospace(word ***wret) {
+ word *mnewword = mknew(word);
+ mnewword->text = NULL;
+ mnewword->type = word_WhiteSpace;
+ mnewword->alt = NULL;
+ mnewword->next = NULL;
+ **wret = mnewword;
+ *wret = &mnewword->next;
+}
+
+static void donumber(word ***wret, int num) {
+ wchar_t text[20];
+ wchar_t *p = text + sizeof(text);
+ *--p = L'\0';
+ while (num != 0) {
+ assert(p > text);
+ *--p = L"0123456789"[num % 10];
+ num /= 10;
+ }
+ dotext(wret, p);
+}
+
+static void doanumber(word ***wret, int num) {
+ wchar_t text[20];
+ wchar_t *p;
+ int nletters, aton;
+ nletters = 1;
+ aton = 25;
+ while (num > aton) {
+ nletters++;
+ num -= aton+1;
+ if (aton < INT_MAX/26)
+ aton = (aton+1) * 26 - 1;
+ else
+ aton = INT_MAX;
+ }
+ p = text + sizeof(text);
+ *--p = L'\0';
+ while (nletters--) {
+ assert(p > text);
+ *--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26];
+ num /= 26;
+ }
+ dotext(wret, p);
+}
+
+void number_cfg(numberstate *state, paragraph *source) {
+ /*
+ * Defaults
+ */
+ state->chaptertext = L"Chapter";
+ state->sectiontext = L"Section";
+ state->apptext = L"Appendix";
+
+ for (; source; source = source->next) {
+ if (source->type == para_Config) {
+ if (!ustricmp(source->keyword, L"chapter")) {
+ state->chaptertext = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"section")) {
+ state->sectiontext = uadv(source->keyword);
+ } else if (!ustricmp(source->keyword, L"appendix")) {
+ state->apptext = uadv(source->keyword);
+ }
+ }
+ }
+}
+
+word *number_mktext(numberstate *state, paragraph *p, wchar_t *category,
+ int prev, int *errflag) {
+ word *ret = NULL;
+ word **ret2 = &ret;
+ word **pret = &ret;
+ int i, level;
+
+ level = -2; /* default for non-section-heading */
+ switch (p->type) {
+ case para_Chapter:
+ state->chapternum++;
+ for (i = 0; i < state->maxsectlevel; i++)
+ state->sectionlevels[i] = 0;
+ dotext(&pret, category ? category : state->chaptertext);
+ dospace(&pret);
+ ret2 = pret;
+ donumber(&pret, state->chapternum);
+ state->ischapter = 1;
+ state->oklevel = 0;
+ level = -1;
+ break;
+ case para_Heading:
+ case para_Subsect:
+ level = (p->type == para_Heading ? 0 : p->aux);
+ if (level > state->oklevel) {
+ error(err_sectjump, &p->fpos);
+ *errflag = TRUE;
+ ret = NULL;
+ break;
+ }
+ state->oklevel = level+1;
+ if (state->maxsectlevel <= level) {
+ state->maxsectlevel = level + 32;
+ state->sectionlevels = resize(state->sectionlevels,
+ state->maxsectlevel);
+ }
+ state->sectionlevels[level]++;
+ for (i = level+1; i < state->maxsectlevel; i++)
+ state->sectionlevels[i] = 0;
+ dotext(&pret, category ? category : state->sectiontext);
+ dospace(&pret);
+ ret2 = pret;
+ if (state->ischapter)
+ donumber(&pret, state->chapternum);
+ else
+ doanumber(&pret, state->appendixnum);
+ for (i = 0; i <= level; i++) {
+ dotext(&pret, L".");
+ if (state->sectionlevels[i] == 0)
+ state->sectionlevels[i] = 1;
+ donumber(&pret, state->sectionlevels[i]);
+ }
+ break;
+ case para_Appendix:
+ state->appendixnum++;
+ for (i = 0; i < state->maxsectlevel; i++)
+ state->sectionlevels[i] = 0;
+ dotext(&pret, category ? category : state->apptext);
+ dospace(&pret);
+ ret2 = pret;
+ doanumber(&pret, state->appendixnum);
+ state->ischapter = 0;
+ state->oklevel = 0;
+ level = -1;
+ break;
+ case para_UnnumberedChapter:
+ level = -1;
+ break;
+ case para_NumberedList:
+ ret2 = pret;
+ if (prev != para_NumberedList)
+ state->listitem = 0;
+ state->listitem++;
+ donumber(&pret, state->listitem);
+ break;
+ }
+
+ /*
+ * Now set up parent, child and sibling links.
+ */
+ p->parent = p->child = p->sibling = NULL;
+ if (level != -2) {
+ if (state->currentsects[level+1])
+ state->currentsects[level+1]->sibling = p;
+ if (level >= 0 && state->currentsects[level]) {
+ p->parent = state->currentsects[level];
+ if (!state->currentsects[level]->child)
+ state->currentsects[level]->child = p;
+ }
+ state->currentsects[level+1] = state->lastsect = p;
+ for (i = level+2; i < state->maxsectlevel+1; i++)
+ state->currentsects[i] = NULL;
+ } else {
+ p->parent = state->lastsect;
+ }
+
+ p->kwtext2 = *ret2;
+ return ret;
+}
--- /dev/null
+/*
+ * error.c: Halibut error handling
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include "halibut.h"
+
+/*
+ * Error flags
+ */
+#define PREFIX 0x0001 /* give `halibut:' prefix */
+#define FILEPOS 0x0002 /* give file position prefix */
+
+static void do_error(int code, va_list ap) {
+ char error[1024];
+ char auxbuf[256];
+ char *sp, *sp2;
+ wchar_t *wsp;
+ filepos fpos, fpos2;
+ int flags;
+
+ switch(code) {
+ case err_nomemory: /* no arguments */
+ sprintf(error, "out of memory");
+ flags = PREFIX;
+ break;
+ case err_optnoarg:
+ sp = va_arg(ap, char *);
+ sprintf(error, "option `-%.200s' requires an argument", sp);
+ flags = PREFIX;
+ break;
+ case err_nosuchopt:
+ sp = va_arg(ap, char *);
+ sprintf(error, "unrecognised option `-%.200s'", sp);
+ flags = PREFIX;
+ break;
+ case err_noinput: /* no arguments */
+ sprintf(error, "no input files");
+ flags = PREFIX;
+ break;
+ case err_cantopen:
+ sp = va_arg(ap, char *);
+ sprintf(error, "unable to open input file `%.200s'", sp);
+ flags = PREFIX;
+ break;
+ case err_nodata: /* no arguments */
+ sprintf(error, "no data in input files");
+ flags = PREFIX;
+ break;
+ case err_brokencodepara:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "every line of a code paragraph should begin `\\c'");
+ flags = FILEPOS;
+ break;
+ case err_kwunclosed:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `}' after paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwexpected:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected a paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwillegal:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected no paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwtoomany:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected only one paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_bodyillegal:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected no text after paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_badparatype:
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "command `%.200s' unrecognised at start of"
+ " paragraph", sp);
+ flags = FILEPOS;
+ break;
+ case err_badmidcmd:
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
+ flags = FILEPOS;
+ break;
+ case err_unexbrace:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "brace character unexpected in mid-paragraph");
+ flags = FILEPOS;
+ break;
+ case err_explbr:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `{' after command");
+ flags = FILEPOS;
+ break;
+ case err_commenteof:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "end of file unexpected inside `\\#{...}' comment");
+ flags = FILEPOS;
+ break;
+ case err_kwexprbr:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `}' after cross-reference");
+ flags = FILEPOS;
+ break;
+ case err_missingrbrace:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unclosed braces at end of paragraph");
+ flags = FILEPOS;
+ break;
+ case err_nestedstyles:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unable to nest text styles");
+ flags = FILEPOS;
+ break;
+ case err_nestedindex:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unable to nest index markings");
+ flags = FILEPOS;
+ break;
+ case err_nosuchkw:
+ fpos = *va_arg(ap, filepos *);
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
+ flags = FILEPOS;
+ break;
+ case err_multiBR:
+ fpos = *va_arg(ap, filepos *);
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
+ flags = FILEPOS;
+ break;
+ case err_nosuchidxtag:
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
+ flags = 0;
+ /* FIXME: need to get a filepos to here somehow */
+ break;
+ case err_cantopenw:
+ sp = va_arg(ap, char *);
+ sprintf(error, "unable to open output file `%.200s'", sp);
+ flags = PREFIX;
+ break;
+ case err_macroexists:
+ fpos = *va_arg(ap, filepos *);
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "macro `%.200s' already defined", sp);
+ flags = FILEPOS;
+ break;
+ case err_sectjump:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected higher heading levels before this one");
+ flags = FILEPOS;
+ break;
+ case err_winhelp_ctxclash:
+ fpos = *va_arg(ap, filepos *);
+ sp = va_arg(ap, char *);
+ sp2 = va_arg(ap, char *);
+ sprintf(error, "Windows Help context id `%.200s' clashes with "
+ "previously defined `%.200s'", sp, sp2);
+ flags = FILEPOS;
+ break;
+ case err_multikw:
+ fpos = *va_arg(ap, filepos *);
+ fpos2 = *va_arg(ap, filepos *);
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
+ sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
+ flags = FILEPOS;
+ break;
+ case err_whatever:
+ sp = va_arg(ap, char *);
+ vsprintf(error, sp, ap);
+ flags = PREFIX;
+ break;
+ }
+
+ if (flags & PREFIX)
+ fputs("halibut: ", stderr);
+ if (flags & FILEPOS) {
+ fprintf(stderr, "%s:%d:", fpos.filename, fpos.line);
+ if (fpos.col > 0)
+ fprintf(stderr, "%d:", fpos.col);
+ fputc(' ', stderr);
+ }
+ fputs(error, stderr);
+ fputc('\n', stderr);
+}
+
+void fatal(int code, ...) {
+ va_list ap;
+ va_start(ap, code);
+ do_error(code, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+void error(int code, ...) {
+ va_list ap;
+ va_start(ap, code);
+ do_error(code, ap);
+ va_end(ap);
+}
--- /dev/null
+#ifndef HALIBUT_HALIBUT_H
+#define HALIBUT_HALIBUT_H
+
+#include <stdio.h>
+#include <wchar.h>
+#include <time.h>
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN /* nothing */
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* For suppressing unused-parameter warnings */
+#define IGNORE(x) ( (x) = (x) )
+
+#include "tree234.h"
+
+/*
+ * Structure tags
+ */
+typedef struct input_Tag input;
+typedef struct filepos_Tag filepos;
+typedef struct paragraph_Tag paragraph;
+typedef struct word_Tag word;
+typedef struct keywordlist_Tag keywordlist;
+typedef struct keyword_Tag keyword;
+typedef struct userstyle_Tag userstyle;
+typedef struct numberstate_Tag numberstate;
+typedef struct indexdata_Tag indexdata;
+typedef struct indextag_Tag indextag;
+typedef struct indexentry_Tag indexentry;
+typedef struct macrostack_Tag macrostack;
+
+/*
+ * Data structure to hold a file name and index, a line and a
+ * column number, for reporting errors
+ */
+struct filepos_Tag {
+ char *filename;
+ int line, col;
+};
+
+/*
+ * Data structure to hold all the file names etc for input
+ */
+typedef struct pushback_Tag {
+ int chr;
+ filepos pos;
+} pushback;
+struct input_Tag {
+ char **filenames; /* complete list of input files */
+ int nfiles; /* how many in the list */
+ FILE *currfp; /* the currently open one */
+ int currindex; /* which one is that in the list */
+ pushback *pushback; /* pushed-back input characters */
+ int npushback, pushbacksize;
+ filepos pos;
+ int reportcols; /* report column numbers in errors */
+ macrostack *stack; /* macro expansions in force */
+};
+
+/*
+ * Data structure to hold the input form of the source, ie a linked
+ * list of paragraphs
+ */
+struct paragraph_Tag {
+ paragraph *next;
+ int type;
+ wchar_t *keyword; /* for most special paragraphs */
+ word *words; /* list of words in paragraph */
+ int aux; /* number, in a numbered paragraph
+ * or subsection level
+ */
+ word *kwtext; /* chapter/section indication */
+ word *kwtext2; /* numeric-only form of kwtext */
+ filepos fpos;
+
+ paragraph *parent, *child, *sibling; /* for hierarchy navigation */
+
+ void *private_data; /* for temp use in backends */
+};
+enum {
+ para_IM, /* index merge */
+ para_BR, /* bibliography rewrite */
+ para_Rule, /* random horizontal rule */
+ para_Chapter,
+ para_Appendix,
+ para_UnnumberedChapter,
+ para_Heading,
+ para_Subsect,
+ para_Normal,
+ para_Biblio, /* causes no output unless turned ... */
+ para_BiblioCited, /* ... into this paragraph type */
+ para_Bullet,
+ para_NumberedList,
+ para_Code,
+ para_Copyright,
+ para_Preamble,
+ para_NoCite,
+ para_Title,
+ para_VersionID,
+ para_Config, /* configuration directive */
+ para_NotParaType /* placeholder value */
+};
+
+/*
+ * Data structure to hold an individual word
+ */
+struct word_Tag {
+ word *next, *alt;
+ int type;
+ int aux;
+ int breaks; /* can a line break after it? */
+ wchar_t *text;
+ filepos fpos;
+};
+enum {
+ /* ORDERING CONSTRAINT: these normal-word types ... */
+ word_Normal,
+ word_Emph,
+ word_Code, /* monospaced; `quoted' in text */
+ word_WeakCode, /* monospaced, normal in text */
+ /* ... must be in the same order as these space types ... */
+ word_WhiteSpace, /* text is NULL or ignorable */
+ word_EmphSpace, /* WhiteSpace when emphasised */
+ word_CodeSpace, /* WhiteSpace when code */
+ word_WkCodeSpace, /* WhiteSpace when weak code */
+ /* ... and must be in the same order as these quote types ... */
+ word_Quote, /* text is NULL or ignorable */
+ word_EmphQuote, /* Quote when emphasised */
+ word_CodeQuote, /* (can't happen) */
+ word_WkCodeQuote, /* (can't happen) */
+ /* END ORDERING CONSTRAINT */
+ word_internal_endattrs,
+ word_UpperXref, /* \K */
+ word_LowerXref, /* \k */
+ word_XrefEnd, /* (invisible; no text) */
+ word_IndexRef, /* (always an invisible one) */
+ word_HyperLink, /* (invisible) */
+ word_HyperEnd /* (also invisible; no text) */
+};
+/* aux values for attributed words */
+enum {
+ attr_Only = 0x0000, /* a lone word with the attribute */
+ attr_First = 0x0001, /* the first of a series */
+ attr_Last = 0x0002, /* the last of a series */
+ attr_Always = 0x0003, /* any other part of a series */
+ attr_mask = 0x0003,
+};
+/* aux values for quote-type words */
+enum {
+ quote_Open = 0x0010,
+ quote_Close = 0x0020,
+ quote_mask = 0x0030,
+};
+#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
+ ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
+#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 )
+#define towordstyle(x) ( word_Normal + ((x) & 3) )
+#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) )
+#define toquotestyle(x) ( word_Quote + ((x) & 3) )
+#define removeattr(x) ( word_Normal + ((x) &~ 3) )
+
+#define attraux(x) ( (x) & attr_mask )
+#define quoteaux(x) ( (x) & quote_mask )
+
+/*
+ * error.c
+ */
+void fatal(int code, ...) NORETURN;
+void error(int code, ...);
+enum {
+ err_nomemory, /* out of memory */
+ err_optnoarg, /* option `-%s' requires an argument */
+ err_nosuchopt, /* unrecognised option `-%s' */
+ err_noinput, /* no input files */
+ err_cantopen, /* unable to open input file `%s' */
+ err_nodata, /* no data in input files */
+ err_brokencodepara, /* line in codepara didn't begin `\c' */
+ err_kwunclosed, /* expected `}' after keyword */
+ err_kwillegal, /* paragraph type expects no keyword */
+ err_kwexpected, /* paragraph type expects a keyword */
+ err_kwtoomany, /* paragraph type expects only 1 */
+ err_bodyillegal, /* paragraph type expects only kws! */
+ err_badparatype, /* invalid command at start of para */
+ err_badmidcmd, /* invalid command in mid-para */
+ err_unexbrace, /* unexpected brace */
+ err_explbr, /* expected `{' after command */
+ err_commenteof, /* EOF inside braced comment */
+ err_kwexprbr, /* expected `}' after cross-ref */
+ err_missingrbrace, /* unclosed braces at end of para */
+ err_nestedstyles, /* unable to nest text styles */
+ err_nestedindex, /* unable to nest `\i' thingys */
+ err_nosuchkw, /* unresolved cross-reference */
+ err_multiBR, /* multiple \BRs on same keyword */
+ err_nosuchidxtag, /* \IM on unknown index tag (warning) */
+ err_cantopenw, /* can't open output file for write */
+ err_macroexists, /* this macro already exists */
+ err_sectjump, /* jump a heading level, eg \C -> \S */
+ err_winhelp_ctxclash, /* WinHelp context ID hash clash */
+ err_multikw, /* keyword clash in sections */
+ err_whatever /* random error of another type */
+};
+
+/*
+ * malloc.c
+ */
+#ifdef LOGALLOC
+void *smalloc(char *file, int line, int size);
+void *srealloc(char *file, int line, void *p, int size);
+void sfree(char *file, int line, void *p);
+#define smalloc(x) smalloc(__FILE__, __LINE__, x)
+#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
+#define sfree(x) sfree(__FILE__, __LINE__, x)
+#else
+void *smalloc(int size);
+void *srealloc(void *p, int size);
+void sfree(void *p);
+#endif
+void free_word_list(word *w);
+void free_para_list(paragraph *p);
+word *dup_word_list(word *w);
+char *dupstr(char *s);
+
+#define mknew(type) ( (type *) smalloc (sizeof (type)) )
+#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
+#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
+#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
+
+/*
+ * ustring.c
+ */
+wchar_t *ustrdup(wchar_t *s);
+char *ustrtoa(wchar_t *s, char *outbuf, int size);
+int ustrlen(wchar_t *s);
+wchar_t *uadv(wchar_t *s);
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+wchar_t utolower(wchar_t);
+int ustrcmp(wchar_t *lhs, wchar_t *rhs);
+int ustricmp(wchar_t *lhs, wchar_t *rhs);
+int utoi(wchar_t *);
+int utob(wchar_t *);
+int uisdigit(wchar_t);
+wchar_t *ustrlow(wchar_t *s);
+wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
+
+/*
+ * help.c
+ */
+void help(void);
+void usage(void);
+void showversion(void);
+
+/*
+ * licence.c
+ */
+void licence(void);
+
+/*
+ * version.c
+ */
+const char *const version;
+
+/*
+ * misc.c
+ */
+typedef struct stackTag *stack;
+stack stk_new(void);
+void stk_free(stack);
+void stk_push(stack, void *);
+void *stk_pop(stack);
+
+typedef struct tagRdstring rdstring;
+struct tagRdstring {
+ int pos, size;
+ wchar_t *text;
+};
+typedef struct tagRdstringc rdstringc;
+struct tagRdstringc {
+ int pos, size;
+ char *text;
+};
+extern const rdstring empty_rdstring;
+extern const rdstringc empty_rdstringc;
+void rdadd(rdstring *rs, wchar_t c);
+void rdadds(rdstring *rs, wchar_t *p);
+wchar_t *rdtrim(rdstring *rs);
+void rdaddc(rdstringc *rs, char c);
+void rdaddsc(rdstringc *rs, char *p);
+char *rdtrimc(rdstringc *rs);
+
+int compare_wordlists(word *a, word *b);
+
+void mark_attr_ends(paragraph *sourceform);
+
+typedef struct tagWrappedLine wrappedline;
+struct tagWrappedLine {
+ wrappedline *next;
+ word *begin, *end; /* first & last words of line */
+ int nspaces; /* number of whitespaces in line */
+ int shortfall; /* how much shorter than max width */
+};
+wrappedline *wrap_para(word *, int, int, int (*)(word *));
+void wrap_free(wrappedline *);
+
+/*
+ * input.c
+ */
+paragraph *read_input(input *in, indexdata *idx);
+
+/*
+ * keywords.c
+ */
+struct keywordlist_Tag {
+ int nkeywords;
+ int size;
+ tree234 *keys; /* sorted by `key' field */
+ word **looseends; /* non-keyword list element numbers */
+ int nlooseends;
+ int looseendssize;
+};
+struct keyword_Tag {
+ wchar_t *key; /* the keyword itself */
+ word *text; /* "Chapter 2", "Appendix Q"... */
+ /* (NB: filepos are not set) */
+ paragraph *para; /* the paragraph referenced */
+};
+keyword *kw_lookup(keywordlist *, wchar_t *);
+keywordlist *get_keywords(paragraph *);
+void free_keywords(keywordlist *);
+void subst_keywords(paragraph *, keywordlist *);
+
+/*
+ * index.c
+ */
+
+/*
+ * Data structure to hold both sides of the index.
+ */
+struct indexdata_Tag {
+ tree234 *tags; /* holds type `indextag' */
+ tree234 *entries; /* holds type `indexentry' */
+};
+
+/*
+ * Data structure to hold an index tag (LHS of index).
+ */
+struct indextag_Tag {
+ wchar_t *name;
+ word *implicit_text;
+ word **explicit_texts;
+ int nexplicit, explicit_size;
+ int nrefs;
+ indexentry **refs; /* array of entries referenced by tag */
+};
+
+/*
+ * Data structure to hold an index entry (RHS of index).
+ */
+struct indexentry_Tag {
+ word *text;
+ void *backend_data; /* private to back end */
+};
+
+indexdata *make_index(void);
+void cleanup_index(indexdata *);
+/* index_merge takes responsibility for freeing arg 3 iff implicit; never
+ * takes responsibility for arg 2 */
+void index_merge(indexdata *, int is_explicit, wchar_t *, word *);
+void build_index(indexdata *);
+void index_debug(indexdata *);
+indextag *index_findtag(indexdata *idx, wchar_t *name);
+
+/*
+ * contents.c
+ */
+numberstate *number_init(void);
+void number_cfg(numberstate *, paragraph *);
+word *number_mktext(numberstate *, paragraph *, wchar_t *, int , int *);
+void number_free(numberstate *);
+
+/*
+ * biblio.c
+ */
+void gen_citations(paragraph *, keywordlist *);
+
+/*
+ * style.c
+ */
+struct userstyle_Tag {
+};
+
+/*
+ * bk_text.c
+ */
+void text_backend(paragraph *, keywordlist *, indexdata *);
+
+/*
+ * bk_xhtml.c
+ */
+void xhtml_backend(paragraph *, keywordlist *, indexdata *);
+
+/*
+ * bk_whlp.c
+ */
+void whlp_backend(paragraph *, keywordlist *, indexdata *);
+
+#endif
--- /dev/null
+/*
+ * help.c: usage instructions
+ */
+
+#include <stdio.h>
+#include "halibut.h"
+
+static char *helptext[] = {
+ "FIXME: help text goes here",
+ NULL
+};
+
+static char *usagetext[] = {
+ "FIXME: usage text goes here",
+ NULL
+};
+
+void help(void) {
+ char **p;
+ for (p = helptext; *p; p++)
+ puts(*p);
+}
+
+void usage(void) {
+ char **p;
+ for (p = usagetext; *p; p++)
+ puts(*p);
+}
+
+void showversion(void) {
+ printf("Halibut, %s\n", version);
+}
--- /dev/null
+/*
+ * index.c: create and collate index data structures
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
+static int compare_tags(void *av, void *bv);
+static int compare_entries(void *av, void *bv);
+
+indexdata *make_index(void) {
+ indexdata *ret = mknew(indexdata);
+ ret->tags = newtree234(compare_tags);
+ ret->entries = newtree234(compare_entries);
+ return ret;
+}
+
+static indextag *make_indextag(void) {
+ indextag *ret = mknew(indextag);
+ ret->name = NULL;
+ ret->implicit_text = NULL;
+ ret->explicit_texts = NULL;
+ ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
+ ret->refs = NULL;
+ return ret;
+}
+
+static int compare_tags(void *av, void *bv) {
+ indextag *a = (indextag *)av, *b = (indextag *)bv;
+ return ustricmp(a->name, b->name);
+}
+
+static int compare_to_find_tag(void *av, void *bv) {
+ wchar_t *a = (wchar_t *)av;
+ indextag *b = (indextag *)bv;
+ return ustricmp(a, b->name);
+}
+
+static int compare_entries(void *av, void *bv) {
+ indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
+ return compare_wordlists(a->text, b->text);
+}
+
+/*
+ * Back-end utility: find the indextag with a given name.
+ */
+indextag *index_findtag(indexdata *idx, wchar_t *name) {
+ return find234(idx->tags, name, compare_to_find_tag);
+}
+
+/*
+ * Add a \IM. `tags' points to a zero-terminated chain of
+ * zero-terminated strings ("first\0second\0thirdandlast\0\0").
+ * `text' points to a word list.
+ *
+ * Guarantee on calling sequence: all implicit merges are given
+ * before the explicit ones.
+ */
+void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) {
+ indextag *t, *existing;
+
+ /*
+ * FIXME: want to warn on overlapping source sets.
+ */
+ for (; *tags; tags = uadv(tags)) {
+ t = make_indextag();
+ t->name = tags;
+ existing = add234(idx->tags, t);
+ if (existing == t) {
+ /*
+ * Duplicate this so we can free it independently.
+ */
+ t->name = ustrdup(tags);
+
+ /*
+ * Every tag has an implicit \IM. So if this tag
+ * doesn't exist and we're explicit, then we should
+ * warn (and drop it, since it won't be referenced).
+ */
+ if (is_explicit) {
+ error(err_nosuchidxtag, tags);
+ continue;
+ }
+
+ /*
+ * Otherwise, this is a new tag with an implicit \IM.
+ */
+ t->implicit_text = text;
+ } else {
+ sfree(t);
+ t = existing;
+ if (!is_explicit) {
+ /*
+ * An implicit \IM for a tag that's had an implicit
+ * \IM before. FIXME: we should check the text
+ * against the existing text and warn on
+ * differences. And check the tag for case match
+ * against the existing tag, likewise.
+ */
+ } else {
+ /*
+ * An explicit \IM added to a valid tag. In
+ * particular, this removes the implicit \IM if
+ * present.
+ */
+ if (t->implicit_text) {
+ free_word_list(t->implicit_text);
+ t->implicit_text = NULL;
+ }
+ if (t->nexplicit >= t->explicit_size) {
+ t->explicit_size = t->nexplicit + 8;
+ t->explicit_texts = resize(t->explicit_texts,
+ t->explicit_size);
+ }
+ t->explicit_texts[t->nexplicit++] = text;
+ }
+ }
+ }
+}
+
+/*
+ * Build the final-form index. We now have every tag, with every
+ * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
+ * the RHSes of the \IMs, and sort by final form, and decorate the
+ * entries in the original 2-3 tree with pointers to the RHS
+ * entries.
+ */
+void build_index(indexdata *i) {
+ indextag *t;
+ word **ta;
+ int ti;
+ int j;
+
+ for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+ if (t->implicit_text) {
+ t->nrefs = 1;
+ ta = &t->implicit_text;
+ } else {
+ t->nrefs = t->nexplicit;
+ ta = t->explicit_texts;
+ }
+ if (t->nrefs) {
+ t->refs = mknewa(indexentry *, t->nrefs);
+ for (j = 0; j < t->nrefs; j++) {
+ indexentry *ent = mknew(indexentry);
+ ent->text = *ta++;
+ t->refs[j] = add234(i->entries, ent);
+ if (t->refs[j] != ent) /* duplicate */
+ sfree(ent);
+ }
+ }
+ }
+}
+
+void cleanup_index(indexdata *i) {
+ indextag *t;
+ indexentry *ent;
+ int ti;
+
+ for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+ sfree(t->name);
+ free_word_list(t->implicit_text);
+ sfree(t->explicit_texts);
+ sfree(t->refs);
+ sfree(t);
+ }
+ freetree234(i->tags);
+ for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
+ sfree(ent);
+ }
+ freetree234(i->entries);
+ sfree(i);
+}
+
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text);
+
+void index_debug(indexdata *i) {
+ indextag *t;
+ indexentry *y;
+ int ti;
+ int j;
+
+ printf("\nINDEX TAGS\n==========\n\n");
+ for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
+ printf("\n");
+ if (t->implicit_text)
+ dbg_prtmerge(0, t->name, t->implicit_text);
+ for (j = 0; j < t->nexplicit; j++)
+ dbg_prtmerge(1, t->name, t->explicit_texts[j]);
+ }
+
+ printf("\nINDEX ENTRIES\n=============\n\n");
+ for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
+ printf("\n");
+ printf("{\n");
+ dbg_prtwordlist(1, y->text);
+ printf("}\n");
+ }
+}
+
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
+ printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
+ for (; *tag; tag++)
+ putchar(*tag);
+ printf("\" {\n");
+ dbg_prtwordlist(1, text);
+ printf("}\n");
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+ for (; w; w = w->next) {
+ wchar_t *wp;
+ printf("%*sword %d ", level*4, "", w->type);
+ if (w->text) {
+ printf("\"");
+ for (wp = w->text; *wp; wp++)
+ putchar(*wp);
+ printf("\"");
+ } else
+ printf("(no text)");
+ if (w->alt) {
+ printf(" alt = {\n");
+ dbg_prtwordlist(level+1, w->alt);
+ printf("%*s}", level*4, "");
+ }
+ printf("\n");
+ }
+}
--- /dev/null
+/*
+ * input.c: read the source form
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <time.h>
+#include "halibut.h"
+
+#define TAB_STOP 8 /* for column number tracking */
+
+static void setpos(input *in, char *fname) {
+ in->pos.filename = fname;
+ in->pos.line = 1;
+ in->pos.col = (in->reportcols ? 1 : -1);
+}
+
+static void unget(input *in, int c, filepos *pos) {
+ if (in->npushback >= in->pushbacksize) {
+ in->pushbacksize = in->npushback + 16;
+ in->pushback = resize(in->pushback, in->pushbacksize);
+ }
+ in->pushback[in->npushback].chr = c;
+ in->pushback[in->npushback].pos = *pos; /* structure copy */
+ in->npushback++;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Macro subsystem
+ */
+typedef struct macro_Tag macro;
+struct macro_Tag {
+ wchar_t *name, *text;
+};
+struct macrostack_Tag {
+ macrostack *next;
+ wchar_t *text;
+ int ptr, npushback;
+ filepos pos;
+};
+static int macrocmp(void *av, void *bv) {
+ macro *a = (macro *)av, *b = (macro *)bv;
+ return ustrcmp(a->name, b->name);
+}
+static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
+ filepos fpos) {
+ macro *m = mknew(macro);
+ m->name = name;
+ m->text = text;
+ if (add234(macros, m) != m) {
+ error(err_macroexists, &fpos, name);
+ sfree(name);
+ sfree(text);
+ }
+}
+static int macrolookup(tree234 *macros, input *in, wchar_t *name,
+ filepos *pos) {
+ macro m, *gotit;
+ m.name = name;
+ gotit = find234(macros, &m, NULL);
+ if (gotit) {
+ macrostack *expansion = mknew(macrostack);
+ expansion->next = in->stack;
+ expansion->text = gotit->text;
+ expansion->pos = *pos; /* structure copy */
+ expansion->ptr = 0;
+ expansion->npushback = in->npushback;
+ in->stack = expansion;
+ return TRUE;
+ } else
+ return FALSE;
+}
+static void macrocleanup(tree234 *macros) {
+ int ti;
+ macro *m;
+ for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
+ sfree(m->name);
+ sfree(m->text);
+ sfree(m);
+ }
+ freetree234(macros);
+}
+
+/*
+ * Can return EOF
+ */
+static int get(input *in, filepos *pos) {
+ int pushbackpt = in->stack ? in->stack->npushback : 0;
+ if (in->npushback > pushbackpt) {
+ --in->npushback;
+ if (pos)
+ *pos = in->pushback[in->npushback].pos; /* structure copy */
+ return in->pushback[in->npushback].chr;
+ }
+ else if (in->stack) {
+ wchar_t c = in->stack->text[in->stack->ptr];
+ if (in->stack->text[++in->stack->ptr] == L'\0') {
+ macrostack *tmp = in->stack;
+ in->stack = tmp->next;
+ sfree(tmp);
+ }
+ return c;
+ }
+ else if (in->currfp) {
+ int c = getc(in->currfp);
+
+ if (c == EOF) {
+ fclose(in->currfp);
+ in->currfp = NULL;
+ }
+ /* Track line numbers, for error reporting */
+ if (pos)
+ *pos = in->pos;
+ if (in->reportcols) {
+ switch (c) {
+ case '\t':
+ in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
+ break;
+ case '\n':
+ in->pos.col = 1;
+ in->pos.line++;
+ break;
+ default:
+ in->pos.col++;
+ break;
+ }
+ } else {
+ in->pos.col = -1;
+ if (c == '\n')
+ in->pos.line++;
+ }
+ /* FIXME: do input charmap translation. We should be returning
+ * Unicode here. */
+ return c;
+ } else
+ return EOF;
+}
+
+/*
+ * Lexical analysis of source files.
+ */
+typedef struct token_Tag token;
+struct token_Tag {
+ int type;
+ int cmd, aux;
+ wchar_t *text;
+ filepos pos;
+};
+enum {
+ tok_eof, /* end of file */
+ tok_eop, /* end of paragraph */
+ tok_white, /* whitespace */
+ tok_word, /* a word or word fragment */
+ tok_cmd, /* \command */
+ tok_lbrace, /* { */
+ tok_rbrace /* } */
+};
+
+/* Halibut command keywords. */
+enum {
+ c__invalid, /* invalid command */
+ c__comment, /* comment command (\#) */
+ c__escaped, /* escaped character */
+ c__nbsp, /* nonbreaking space */
+ c_A, /* appendix heading */
+ c_B, /* bibliography entry */
+ c_BR, /* bibliography rewrite */
+ c_C, /* chapter heading */
+ c_H, /* heading */
+ c_I, /* invisible index mark */
+ c_IM, /* index merge/rewrite */
+ c_K, /* capitalised cross-reference */
+ c_S, /* aux field is 0, 1, 2, ... */
+ c_U, /* unnumbered-chapter heading */
+ c_W, /* Web hyperlink */
+ c_b, /* bulletted list */
+ c_c, /* code */
+ c_cfg, /* configuration directive */
+ c_copyright, /* copyright statement */
+ c_cw, /* weak code */
+ c_date, /* document processing date */
+ c_define, /* macro definition */
+ c_e, /* emphasis */
+ c_i, /* visible index mark */
+ c_ii, /* uncapitalised visible index mark */
+ c_k, /* uncapitalised cross-reference */
+ c_n, /* numbered list */
+ c_nocite, /* bibliography trickery */
+ c_preamble, /* document preamble text */
+ c_q, /* quote marks */
+ c_rule, /* horizontal rule */
+ c_title, /* document title */
+ c_u, /* aux field is char code */
+ c_versionid /* document RCS id */
+};
+
+/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
+#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
+#define isnl(c) ( (c)==10 )
+#define isdec(c) ( ((c)>='0'&&(c)<='9') )
+#define fromdec(c) ( (c)-'0' )
+#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
+#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
+#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
+
+/*
+ * Keyword comparison function. Like strcmp, but between a wchar_t *
+ * and a char *.
+ */
+static int kwcmp(wchar_t const *p, char const *q) {
+ int i;
+ do {
+ i = *p - *q;
+ } while (*p++ && *q++ && !i);
+ return i;
+}
+
+/*
+ * Match a keyword.
+ */
+static void match_kw(token *tok) {
+ /*
+ * FIXME. The ids are explicit in here so as to allow long-name
+ * equivalents to the various very short keywords.
+ */
+ static const struct { char const *name; int id; } keywords[] = {
+ {"#", c__comment}, /* comment command (\#) */
+ {"-", c__escaped}, /* nonbreaking hyphen */
+ {"A", c_A}, /* appendix heading */
+ {"B", c_B}, /* bibliography entry */
+ {"BR", c_BR}, /* bibliography rewrite */
+ {"C", c_C}, /* chapter heading */
+ {"H", c_H}, /* heading */
+ {"I", c_I}, /* invisible index mark */
+ {"IM", c_IM}, /* index merge/rewrite */
+ {"K", c_K}, /* capitalised cross-reference */
+ {"U", c_U}, /* unnumbered-chapter heading */
+ {"W", c_W}, /* Web hyperlink */
+ {"\\", c__escaped}, /* escaped backslash (\\) */
+ {"_", c__nbsp}, /* nonbreaking space (\_) */
+ {"b", c_b}, /* bulletted list */
+ {"c", c_c}, /* code */
+ {"cfg", c_cfg}, /* configuration directive */
+ {"copyright", c_copyright}, /* copyright statement */
+ {"cw", c_cw}, /* weak code */
+ {"date", c_date}, /* document processing date */
+ {"define", c_define}, /* macro definition */
+ {"e", c_e}, /* emphasis */
+ {"i", c_i}, /* visible index mark */
+ {"ii", c_ii}, /* uncapitalised visible index mark */
+ {"k", c_k}, /* uncapitalised cross-reference */
+ {"n", c_n}, /* numbered list */
+ {"nocite", c_nocite}, /* bibliography trickery */
+ {"preamble", c_preamble}, /* document preamble text */
+ {"q", c_q}, /* quote marks */
+ {"rule", c_rule}, /* horizontal rule */
+ {"title", c_title}, /* document title */
+ {"versionid", c_versionid}, /* document RCS id */
+ {"{", c__escaped}, /* escaped lbrace (\{) */
+ {"}", c__escaped}, /* escaped rbrace (\}) */
+ };
+ int i, j, k, c;
+
+ /*
+ * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
+ * doesn't match correctly, we just fall through to the
+ * binary-search phase.
+ */
+ if (tok->text[0] == 'S') {
+ /* We expect numeric characters thereafter. */
+ wchar_t *p = tok->text+1;
+ int n;
+ if (!*p)
+ n = 1;
+ else {
+ n = 0;
+ while (*p && isdec(*p)) {
+ n = 10 * n + fromdec(*p);
+ p++;
+ }
+ }
+ if (!*p) {
+ tok->cmd = c_S;
+ tok->aux = n;
+ return;
+ }
+ } else if (tok->text[0] == 'u') {
+ /* We expect hex characters thereafter. */
+ wchar_t *p = tok->text+1;
+ int n = 0;
+ while (*p && ishex(*p)) {
+ n = 16 * n + fromhex(*p);
+ p++;
+ }
+ if (!*p) {
+ tok->cmd = c_u;
+ tok->aux = n;
+ return;
+ }
+ }
+
+ i = -1;
+ j = sizeof(keywords)/sizeof(*keywords);
+ while (j-i > 1) {
+ k = (i+j)/2;
+ c = kwcmp(tok->text, keywords[k].name);
+ if (c < 0)
+ j = k;
+ else if (c > 0)
+ i = k;
+ else /* c == 0 */ {
+ tok->cmd = keywords[k].id;
+ return;
+ }
+ }
+
+ tok->cmd = c__invalid;
+}
+
+
+/*
+ * Read a token from the input file, in the normal way (`normal' in
+ * the sense that code paragraphs work a different way).
+ */
+token get_token(input *in) {
+ int c;
+ int nls;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+ filepos cpos;
+
+ ret.text = NULL; /* default */
+ c = get(in, &cpos);
+ ret.pos = cpos;
+ if (iswhite(c)) { /* tok_white or tok_eop */
+ nls = 0;
+ do {
+ if (isnl(c))
+ nls++;
+ } while ((c = get(in, &cpos)) != EOF && iswhite(c));
+ if (c == EOF) {
+ ret.type = tok_eof;
+ return ret;
+ }
+ unget(in, c, &cpos);
+ ret.type = (nls > 1 ? tok_eop : tok_white);
+ return ret;
+ } else if (c == EOF) { /* tok_eof */
+ ret.type = tok_eof;
+ return ret;
+ } else if (c == '\\') { /* tok_cmd */
+ c = get(in, &cpos);
+ if (c == '-' || c == '\\' || c == '_' ||
+ c == '#' || c == '{' || c == '}') {
+ /* single-char command */
+ rdadd(&rs, c);
+ } else if (c == 'u') {
+ int len = 0;
+ do {
+ rdadd(&rs, c);
+ len++;
+ c = get(in, &cpos);
+ } while (ishex(c) && len < 5);
+ unget(in, c, &cpos);
+ } else if (iscmd(c)) {
+ do {
+ rdadd(&rs, c);
+ c = get(in, &cpos);
+ } while (iscmd(c));
+ unget(in, c, &cpos);
+ }
+ /*
+ * Now match the command against the list of available
+ * ones.
+ */
+ ret.type = tok_cmd;
+ ret.text = ustrdup(rs.text);
+ match_kw(&ret);
+ sfree(rs.text);
+ return ret;
+ } else if (c == '{') { /* tok_lbrace */
+ ret.type = tok_lbrace;
+ return ret;
+ } else if (c == '}') { /* tok_rbrace */
+ ret.type = tok_rbrace;
+ return ret;
+ } else { /* tok_word */
+ /*
+ * Read a word: the longest possible contiguous sequence of
+ * things other than whitespace, backslash, braces and
+ * hyphen. A hyphen terminates the word but is returned as
+ * part of it; everything else is pushed back for the next
+ * token. The `aux' field contains TRUE if the word ends in
+ * a hyphen.
+ */
+ ret.aux = FALSE; /* assumed for now */
+ while (1) {
+ if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
+ /* Put back the character that caused termination */
+ unget(in, c, &cpos);
+ break;
+ } else {
+ rdadd(&rs, c);
+ if (c == '-') {
+ ret.aux = TRUE;
+ break; /* hyphen terminates word */
+ }
+ }
+ c = get(in, &cpos);
+ }
+ ret.type = tok_word;
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+ }
+}
+
+/*
+ * Determine whether the next input character is an open brace (for
+ * telling code paragraphs from paragraphs which merely start with
+ * code).
+ */
+int isbrace(input *in) {
+ int c;
+ filepos cpos;
+
+ c = get(in, &cpos);
+ unget(in, c, &cpos);
+ return (c == '{');
+}
+
+/*
+ * Read the rest of a line that starts `\c'. Including nothing at
+ * all (tok_word with empty text).
+ */
+token get_codepar_token(input *in) {
+ int c;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+ filepos cpos;
+
+ ret.type = tok_word;
+ c = get(in, &cpos); /* expect (and discard) one space */
+ ret.pos = cpos;
+ if (c == ' ') {
+ c = get(in, &cpos);
+ ret.pos = cpos;
+ }
+ while (!isnl(c) && c != EOF) {
+ int c2 = c;
+ c = get(in, &cpos);
+ /* Discard \r just before \n. */
+ if (c2 != 13 || !isnl(c))
+ rdadd(&rs, c2);
+ }
+ unget(in, c, &cpos);
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+}
+
+/*
+ * Adds a new word to a linked list
+ */
+static word *addword(word newword, word ***hptrptr) {
+ word *mnewword;
+ if (!hptrptr)
+ return NULL;
+ mnewword = mknew(word);
+ *mnewword = newword; /* structure copy */
+ mnewword->next = NULL;
+ **hptrptr = mnewword;
+ *hptrptr = &mnewword->next;
+ return mnewword;
+}
+
+/*
+ * Adds a new paragraph to a linked list
+ */
+static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
+ paragraph *mnewpara = mknew(paragraph);
+ *mnewpara = newpara; /* structure copy */
+ mnewpara->next = NULL;
+ **hptrptr = mnewpara;
+ *hptrptr = &mnewpara->next;
+ return mnewpara;
+}
+
+/*
+ * Destructor before token is reassigned; should catch most memory
+ * leaks
+ */
+#define dtor(t) ( sfree(t.text) )
+
+/*
+ * Reads a single file (ie until get() returns EOF)
+ */
+static void read_file(paragraph ***ret, input *in, indexdata *idx) {
+ token t;
+ paragraph par;
+ word wd, **whptr, **idximplicit;
+ tree234 *macros;
+ wchar_t utext[2], *wdtext;
+ int style, spcstyle;
+ int already;
+ int iswhite, seenwhite;
+ int type;
+ struct stack_item {
+ enum {
+ stack_nop = 0, /* do nothing (for error recovery) */
+ stack_ualt = 1, /* \u alternative */
+ stack_style = 2, /* \e, \c, \cw */
+ stack_idx = 4, /* \I, \i, \ii */
+ stack_hyper = 8, /* \W */
+ stack_quote = 16, /* \q */
+ } type;
+ word **whptr; /* to restore from \u alternatives */
+ word **idximplicit; /* to restore from \u alternatives */
+ } *sitem;
+ stack parsestk;
+ word *indexword, *uword, *iword;
+ word *idxwordlist;
+ rdstring indexstr;
+ int index_downcase, index_visible, indexing;
+ const rdstring nullrs = { 0, 0, NULL };
+ wchar_t uchr;
+
+ t.text = NULL;
+ macros = newtree234(macrocmp);
+
+ /*
+ * Loop on each paragraph.
+ */
+ while (1) {
+ par.words = NULL;
+ par.keyword = NULL;
+ whptr = &par.words;
+
+ /*
+ * Get a token.
+ */
+ dtor(t), t = get_token(in);
+ if (t.type == tok_eof)
+ return;
+
+ /*
+ * Parse code paragraphs separately.
+ */
+ if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
+ par.type = para_Code;
+ par.fpos = t.pos;
+ while (1) {
+ dtor(t), t = get_codepar_token(in);
+ wd.type = word_WeakCode;
+ wd.breaks = FALSE; /* shouldn't need this... */
+ wd.text = ustrdup(t.text);
+ wd.alt = NULL;
+ wd.fpos = t.pos;
+ addword(wd, &whptr);
+ dtor(t), t = get_token(in);
+ if (t.type == tok_white) {
+ /*
+ * The newline after a code-paragraph line
+ */
+ dtor(t), t = get_token(in);
+ }
+ if (t.type == tok_eop || t.type == tok_eof)
+ break;
+ else if (t.type != tok_cmd || t.cmd != c_c) {
+ error(err_brokencodepara, &t.pos);
+ addpara(par, ret);
+ while (t.type != tok_eop) /* error recovery: */
+ dtor(t), t = get_token(in); /* eat rest of paragraph */
+ goto codeparabroken; /* ick, but such is life */
+ }
+ }
+ addpara(par, ret);
+ codeparabroken:
+ continue;
+ }
+
+ /*
+ * This token begins a paragraph. See if it's one of the
+ * special commands that define a paragraph type.
+ *
+ * (note that \# is special in a way, and \nocite takes no
+ * text)
+ */
+ par.type = para_Normal;
+ if (t.type == tok_cmd) {
+ int needkw;
+ int is_macro = FALSE;
+
+ par.fpos = t.pos;
+ switch (t.cmd) {
+ default:
+ needkw = -1;
+ break;
+ case c__invalid:
+ error(err_badparatype, t.text, &t.pos);
+ needkw = 4;
+ break;
+ case c__comment:
+ if (isbrace(in))
+ break; /* `\#{': isn't a comment para */
+ do {
+ dtor(t), t = get_token(in);
+ } while (t.type != tok_eop && t.type != tok_eof);
+ continue; /* next paragraph */
+ /*
+ * `needkw' values:
+ *
+ * 1 -- exactly one keyword
+ * 2 -- at least one keyword
+ * 4 -- any number of keywords including zero
+ * 8 -- at least one keyword and then nothing else
+ * 16 -- nothing at all! no keywords, no body
+ * 32 -- no keywords at all
+ */
+ case c_A: needkw = 2; par.type = para_Appendix; break;
+ case c_B: needkw = 2; par.type = para_Biblio; break;
+ case c_BR: needkw = 1; par.type = para_BR; break;
+ case c_C: needkw = 2; par.type = para_Chapter; break;
+ case c_H: needkw = 2; par.type = para_Heading;
+ par.aux = 0;
+ break;
+ case c_IM: needkw = 2; par.type = para_IM; break;
+ case c_S: needkw = 2; par.type = para_Subsect;
+ par.aux = t.aux; break;
+ case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
+ /* For \b and \n the keyword is optional */
+ case c_b: needkw = 4; par.type = para_Bullet; break;
+ case c_n: needkw = 4; par.type = para_NumberedList; break;
+ case c_cfg: needkw = 8; par.type = para_Config; break;
+ case c_copyright: needkw = 32; par.type = para_Copyright; break;
+ case c_define: is_macro = TRUE; needkw = 1; break;
+ /* For \nocite the keyword is _everything_ */
+ case c_nocite: needkw = 8; par.type = para_NoCite; break;
+ case c_preamble: needkw = 32; par.type = para_Preamble; break;
+ case c_rule: needkw = 16; par.type = para_Rule; break;
+ case c_title: needkw = 32; par.type = para_Title; break;
+ case c_versionid: needkw = 32; par.type = para_VersionID; break;
+ }
+
+ if (needkw > 0) {
+ rdstring rs = { 0, 0, NULL };
+ int nkeys = 0;
+ filepos fp;
+
+ /* Get keywords. */
+ dtor(t), t = get_token(in);
+ fp = t.pos;
+ while (t.type == tok_lbrace) {
+ /* This is a keyword. */
+ nkeys++;
+ /* FIXME: there will be bugs if anyone specifies an
+ * empty keyword (\foo{}), so trap this case. */
+ while (dtor(t), t = get_token(in),
+ t.type == tok_word ||
+ t.type == tok_white ||
+ (t.type == tok_cmd && t.cmd == c__nbsp) ||
+ (t.type == tok_cmd && t.cmd == c__escaped)) {
+ if (t.type == tok_white ||
+ (t.type == tok_cmd && t.cmd == c__nbsp))
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ if (t.type != tok_rbrace) {
+ error(err_kwunclosed, &t.pos);
+ continue;
+ }
+ rdadd(&rs, 0); /* add string terminator */
+ dtor(t), t = get_token(in); /* eat right brace */
+ }
+
+ rdadd(&rs, 0); /* add string terminator */
+
+ /* See whether we have the right number of keywords. */
+ if ((needkw & 48) && nkeys > 0)
+ error(err_kwillegal, &fp);
+ if ((needkw & 11) && nkeys == 0)
+ error(err_kwexpected, &fp);
+ if ((needkw & 5) && nkeys > 1)
+ error(err_kwtoomany, &fp);
+
+ if (is_macro) {
+ /*
+ * Macro definition. Get the rest of the line
+ * as a code-paragraph token, repeatedly until
+ * there's nothing more left of it. Separate
+ * with newlines.
+ */
+ rdstring macrotext = { 0, 0, NULL };
+ while (1) {
+ dtor(t), t = get_codepar_token(in);
+ if (macrotext.pos > 0)
+ rdadd(¯otext, L'\n');
+ rdadds(¯otext, t.text);
+ dtor(t), t = get_token(in);
+ if (t.type == tok_eop) break;
+ }
+ macrodef(macros, rs.text, macrotext.text, fp);
+ continue; /* next paragraph */
+ }
+
+ par.keyword = rdtrim(&rs);
+
+ /* Move to EOP in case of needkw==8 or 16 (no body) */
+ if (needkw & 24) {
+ if (t.type != tok_eop && t.type != tok_eof) {
+ error(err_bodyillegal, &t.pos);
+ /* Error recovery: eat the rest of the paragraph */
+ while (t.type != tok_eop && t.type != tok_eof)
+ dtor(t), t = get_token(in);
+ }
+ addpara(par, ret);
+ continue; /* next paragraph */
+ }
+ }
+ }
+
+ /*
+ * Now read the actual paragraph, word by word, adding to
+ * the paragraph list.
+ *
+ * Mid-paragraph commands:
+ *
+ * \K \k
+ * \c \cw
+ * \e
+ * \i \ii
+ * \I
+ * \u
+ * \W
+ * \date
+ * \\ \{ \}
+ */
+ parsestk = stk_new();
+ style = word_Normal;
+ spcstyle = word_WhiteSpace;
+ indexing = FALSE;
+ seenwhite = TRUE;
+ while (t.type != tok_eop && t.type != tok_eof) {
+ iswhite = FALSE;
+ already = FALSE;
+ if (t.type == tok_cmd && t.cmd == c__escaped) {
+ t.type = tok_word; /* nice and simple */
+ t.aux = 0; /* even if `\-' - nonbreaking! */
+ }
+ if (t.type == tok_cmd && t.cmd == c__nbsp) {
+ t.type = tok_word; /* nice and simple */
+ sfree(t.text);
+ t.text = ustrdup(L" "); /* text is ` ' not `_' */
+ t.aux = 0; /* (nonbreaking) */
+ }
+ switch (t.type) {
+ case tok_white:
+ if (whptr == &par.words)
+ break; /* strip whitespace at start of para */
+ wd.text = NULL;
+ wd.type = spcstyle;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (indexing)
+ rdadd(&indexstr, ' ');
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
+ iswhite = TRUE;
+ break;
+ case tok_word:
+ if (indexing)
+ rdadds(&indexstr, t.text);
+ wd.type = style;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = t.aux;
+ if (!indexing || index_visible) {
+ wd.text = ustrdup(t.text);
+ addword(wd, &whptr);
+ }
+ if (indexing) {
+ wd.text = ustrdup(t.text);
+ addword(wd, &idximplicit);
+ }
+ break;
+ case tok_lbrace:
+ error(err_unexbrace, &t.pos);
+ /* Error recovery: push nop */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ break;
+ case tok_rbrace:
+ sitem = stk_pop(parsestk);
+ if (!sitem)
+ error(err_unexbrace, &t.pos);
+ else {
+ if (sitem->type & stack_ualt) {
+ whptr = sitem->whptr;
+ idximplicit = sitem->idximplicit;
+ }
+ if (sitem->type & stack_style) {
+ style = word_Normal;
+ spcstyle = word_WhiteSpace;
+ }
+ if (sitem->type & stack_idx) {
+ indexword->text = ustrdup(indexstr.text);
+ if (index_downcase)
+ ustrlow(indexword->text);
+ indexing = FALSE;
+ rdadd(&indexstr, L'\0');
+ index_merge(idx, FALSE, indexstr.text, idxwordlist);
+ sfree(indexstr.text);
+ }
+ if (sitem->type & stack_hyper) {
+ wd.text = NULL;
+ wd.type = word_HyperEnd;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
+ }
+ if (sitem->type & stack_quote) {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Close;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing) {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ }
+ }
+ sfree(sitem);
+ break;
+ case tok_cmd:
+ switch (t.cmd) {
+ case c__comment:
+ /*
+ * In-paragraph comment: \#{ balanced braces }
+ *
+ * Anything goes here; even tok_eop. We should
+ * eat whitespace after the close brace _if_
+ * there was whitespace before the \#.
+ */
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ } else {
+ int braces = 1;
+ while (braces > 0) {
+ dtor(t), t = get_token(in);
+ if (t.type == tok_lbrace)
+ braces++;
+ else if (t.type == tok_rbrace)
+ braces--;
+ else if (t.type == tok_eof) {
+ error(err_commenteof, &t.pos);
+ break;
+ }
+ }
+ }
+ if (seenwhite) {
+ already = TRUE;
+ dtor(t), t = get_token(in);
+ if (t.type == tok_white) {
+ iswhite = TRUE;
+ already = FALSE;
+ }
+ }
+ break;
+ case c_q:
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ } else {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Open;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing) {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_quote;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_K:
+ case c_k:
+ case c_W:
+ case c_date:
+ /*
+ * Keyword, hyperlink, or \date. We expect a
+ * left brace, some text, and then a right
+ * brace. No nesting; no arguments.
+ */
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (t.cmd == c_K)
+ wd.type = word_UpperXref;
+ else if (t.cmd == c_k)
+ wd.type = word_LowerXref;
+ else if (t.cmd == c_W)
+ wd.type = word_HyperLink;
+ else
+ wd.type = word_Normal;
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ if (wd.type == word_Normal) {
+ time_t thetime = time(NULL);
+ struct tm *broken = localtime(&thetime);
+ already = TRUE;
+ wdtext = ustrftime(NULL, broken);
+ wd.type = style;
+ } else {
+ error(err_explbr, &t.pos);
+ wdtext = NULL;
+ }
+ } else {
+ rdstring rs = { 0, 0, NULL };
+ while (dtor(t), t = get_token(in),
+ t.type == tok_word || t.type == tok_white) {
+ if (t.type == tok_white)
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ if (wd.type == word_Normal) {
+ time_t thetime = time(NULL);
+ struct tm *broken = localtime(&thetime);
+ wdtext = ustrftime(rs.text, broken);
+ wd.type = style;
+ } else {
+ wdtext = ustrdup(rs.text);
+ }
+ sfree(rs.text);
+ if (t.type != tok_rbrace) {
+ error(err_kwexprbr, &t.pos);
+ }
+ }
+ wd.alt = NULL;
+ wd.aux = 0;
+ if (!indexing || index_visible) {
+ wd.text = ustrdup(wdtext);
+ addword(wd, &whptr);
+ }
+ if (indexing) {
+ wd.text = ustrdup(wdtext);
+ addword(wd, &idximplicit);
+ }
+ sfree(wdtext);
+ if (wd.type == word_HyperLink) {
+ /*
+ * Hyperlinks are different: they then
+ * expect another left brace, to begin
+ * delimiting the text marked by the link.
+ */
+ dtor(t), t = get_token(in);
+ /*
+ * Special cases: \W{}\c, \W{}\e, \W{}\cw
+ */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_hyper;
+ if (t.type == tok_cmd &&
+ (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+ if (style != word_Normal)
+ error(err_nestedstyles, &t.pos);
+ else {
+ style = (t.cmd == c_c ? word_Code :
+ t.cmd == c_cw ? word_WeakCode :
+ word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ dtor(t), t = get_token(in);
+ }
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ sfree(sitem);
+ } else {
+ stk_push(parsestk, sitem);
+ }
+ }
+ break;
+ case c_c:
+ case c_cw:
+ case c_e:
+ type = t.cmd;
+ if (style != word_Normal) {
+ error(err_nestedstyles, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ dtor(t), t = get_token(in);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ } else {
+ style = (type == c_c ? word_Code :
+ type == c_cw ? word_WeakCode :
+ word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_style;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_i:
+ case c_ii:
+ case c_I:
+ type = t.cmd;
+ if (indexing) {
+ error(err_nestedindex, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ dtor(t), t = get_token(in);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_idx;
+ dtor(t), t = get_token(in);
+ /*
+ * Special cases: \i\c, \i\e, \i\cw
+ */
+ wd.fpos = t.pos;
+ if (t.type == tok_cmd &&
+ (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+ if (style != word_Normal)
+ error(err_nestedstyles, &t.pos);
+ else {
+ style = (t.cmd == c_c ? word_Code :
+ t.cmd == c_cw ? word_WeakCode :
+ word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ dtor(t), t = get_token(in);
+ }
+ if (t.type != tok_lbrace) {
+ sfree(sitem);
+ error(err_explbr, &t.pos);
+ } else {
+ /* Add an index-reference word with no text as yet */
+ wd.type = word_IndexRef;
+ wd.text = NULL;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.breaks = FALSE;
+ indexword = addword(wd, &whptr);
+ /* Set up a rdstring to read the index text */
+ indexstr = nullrs;
+ /* Flags so that we do the Right Things with text */
+ index_visible = (type != c_I);
+ index_downcase = (type == c_ii);
+ indexing = TRUE;
+ idxwordlist = NULL;
+ idximplicit = &idxwordlist;
+ /* Stack item to close the indexing on exit */
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_u:
+ uchr = t.aux;
+ utext[0] = uchr; utext[1] = 0;
+ wd.type = style;
+ wd.breaks = FALSE;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ if (!indexing || index_visible) {
+ wd.text = ustrdup(utext);
+ uword = addword(wd, &whptr);
+ } else
+ uword = NULL;
+ if (indexing) {
+ wd.text = ustrdup(utext);
+ iword = addword(wd, &idximplicit);
+ } else
+ iword = NULL;
+ dtor(t), t = get_token(in);
+ if (t.type == tok_lbrace) {
+ /*
+ * \u with a left brace. Until the brace
+ * closes, all further words go on a
+ * sidetrack from the main thread of the
+ * paragraph.
+ */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_ualt;
+ sitem->whptr = whptr;
+ sitem->idximplicit = idximplicit;
+ stk_push(parsestk, sitem);
+ whptr = uword ? &uword->alt : NULL;
+ idximplicit = iword ? &iword->alt : NULL;
+ } else {
+ if (indexing)
+ rdadd(&indexstr, uchr);
+ already = TRUE;
+ }
+ break;
+ default:
+ if (!macrolookup(macros, in, t.text, &t.pos))
+ error(err_badmidcmd, t.text, &t.pos);
+ break;
+ }
+ }
+ if (!already)
+ dtor(t), t = get_token(in);
+ seenwhite = iswhite;
+ }
+ /* Check the stack is empty */
+ if (NULL != (sitem = stk_pop(parsestk))) {
+ do {
+ sfree(sitem);
+ sitem = stk_pop(parsestk);
+ } while (sitem);
+ error(err_missingrbrace, &t.pos);
+ }
+ stk_free(parsestk);
+ addpara(par, ret);
+ }
+ dtor(t);
+ macrocleanup(macros);
+}
+
+paragraph *read_input(input *in, indexdata *idx) {
+ paragraph *head = NULL;
+ paragraph **hptr = &head;
+
+ while (in->currindex < in->nfiles) {
+ in->currfp = fopen(in->filenames[in->currindex], "r");
+ if (in->currfp) {
+ setpos(in, in->filenames[in->currindex]);
+ read_file(&hptr, in, idx);
+ }
+ in->currindex++;
+ }
+
+ return head;
+}
--- /dev/null
+Bogus keyword: \k{nonexist}
+
+\BR{nonexist2} [Foogle]
+
+\B{book} Foo McBar, "A Lot Of Rubbish", 1992.
+
+\B{book} The same book again. Isn't that odd?
+
+\IM{nonexist3} Logical impossibilities
+
+\define{macro} macro definition
+
+\define{macro} same macro again
+
+\c Foo
+\c Bar
+Bombadillo. Now get out of _that_.
+
+\unngh This is a bit of a bizarre paragraph, now isn't it?
+
+\C{unfinished-symphony
+
+\U{thisshouldn'tbehere} Unnumbered chapter. Or is it?
+
+\C Numbered chapter. Or _is_ it?
+
+\BR{two}{too}{many} [Which One?]
+
+\nocite{ooh} With some illegal text.
+
+Paragraph with {bizarre braces}.
+
+Another one } .
+
+Bogus in-para comment: \# foobar.
+
+Bogus xrefs: \k and \k{foo{}}.
+
+Nest those styles! \c{foo\e{bar}}. And
+\c{foo\W{file:/dev/null}\e{bar}}. And \e{foo\i\c{bar}}.
+And one without brace: \e.
+
+Bogus hyperlinks: \W and \W{file:/dev/null}bar.
+
+Nested index: \i{foo\i{bar}}. Broken styled index: \i\c.
+
+Complete twaddle: \twaddle.
+
+Unclosed brace: \c{foo.
+
+Comment to EOF: \#{ and here we go.
--- /dev/null
+\H{outofplace} Heading out of place.
+
+\C{chap} Chapter is fine
+
+\S{subsect} Subsection should have heading before it.
--- /dev/null
+\title Halibut: A Test Document With A Stupidly Long Title Just To
+See If Wrapping Titles Works OK. In Fact This Title Will Span Three
+Lines, Not Just Two. How's That For Ludicrous?
+
+\cfg{xhtml-leaf-smallest-contents}{2}
+
+\cfg{xhtml-leaf-contains-contents}{true}
+
+\preamble This manual is a small joke effort, designed to use every
+feature \#{ comment } that Halibut's input format supports. Creation
+date \date{%Y.%m.%d} (default format is \date).
+
+\copyright Copyright 1999 Simon \#{second comment}Tatham. All rights
+reserved.
+
+\define{metacoopt} [this is a nested,
+multi-line macro, talking about \coopt
+a bit]
+
+\define{coopt} co\u00F6{-o}pt
+
+\versionid $Id: test.but,v 1.18 2002/08/05 10:31:33 simon Exp $
+
+\C{ch\\ap} First chapter title; for similar wrapping reasons this
+chapter title will be ludicrously long. I wonder how much more
+chapter title I can write before feeling silly.
+
+This is a para\#{another{} comment}graph of text. It
+has line\#{yet another one} breaks in between words, multiple
+ spaces (ignored), and \e{emphasised text} as well as \c{code
+fragments}.
+
+\cw{This} is weak code. And \k{head} contains some other stuff.
+\K{subhead} does too.
+
+\H{head} First section title (very long again, no prizes for
+guessing the reason why this time, and here's yet more text to pad
+it out to three lines of output)
+
+\cfg{winhelp-topic}{M359HPEHGW}
+
+Here's a code paragraph:
+
+\c No leading spaces
+\c One leading space
+\c Two blank lines follow this one.
+\c
+\c
+\c Two blank lines precede this one.
+\c Two leading spaces
+\c We can use \ { and } with impunity here.
+
+This is a list:
+
+\b Ooh.
+
+\b Aah.
+
+\b Eek.
+
+This is a horizontal rule:
+
+\rule
+
+This is a numbered list:
+
+\n Ooh.
+
+\n{keyword} Aah.
+
+\n Eek. \q{Aah} is point \k{keyword}.
+
+A-paragraph-full-of-hyphens-to-test-the-idea-that-word-wrapping-can-happen-somewhere-in-all-this-hyphenatory-nonsense.
+
+A\-paragraph\-full\-of\-nonbreaking\-hyphens\-to\-test\-the\-idea\-that\-word\-wrapping\-misses\-them.
+
+A\_paragraph\_full\_of\_nonbreaking\_spaces\_to\_test\_the\_idea\_that\_word\_wrapping\_misses\_them\_too.
+
+Use of macros: let's talk about \coopt. And about \coopt some more.
+And a nested macro: \metacoopt.
+
+Oh, while I'm here: some special characters. The \\, \{ and \}
+characters, to be precise. And their code equivalents, \c{\\},
+\i\c{\{}, \c{\}}.
+
+\S{subhead} First subheading
+
+So here's a \I{subheading}\I{subsection}subsection. Just
+incidentally, \q{this} is in quotes. \ii{Those} quotes had better work
+in all formats.
+
+We'll try for some Unicode here: \i{Schr\u00F6{oe}dinger}.
+
+An index tag containing non-alternatived Unicode: \i{\u00BFChe?}
+
+An invisible index tag: \I{she seems to have an invisible tag}yeah.
+
+\S2{sub-sub}{Florble} Smaller heading still
+
+A tiny section. Awww. How cute. Actually, this one's a \e{florble},
+and here's a reference to it: \k{sub-sub}.
+
+\A{app} Needless appendix
+
+\# \cfg{winhelp-topic}{Y5VQEXZQVJ} (uncomment this and it clashes)
+
+Here's an \i{appendix}, for no terribly good reason at all. See
+\k{book}.
+
+It also contains a \W{http://www.tartarus.org/}{hyperlink}.
+
+\U Bibliography
+
+\B{book} Some text describing a book.
+
+\B{nocite} Some text describing a book. This text should appear in
+the document even though there is no \cw{\\k} citing it.
+
+\BR{book} [SillyCitation]
+
+\nocite{nocite}
+
+\B{uncited} If this text appears, there's an actual error.
+
+\# This is a comment.
+
+\# Now for the index section.
+
+\IM{she seems to have an invisible tag}{appendix} Invisible tags
+and/or appendices
--- /dev/null
+/*
+ * keywords.c: keep track of all cross-reference keywords
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "halibut.h"
+
+static int kwcmp(void *av, void *bv)
+{
+ const keyword *a = (const keyword *)av;
+ const keyword *b = (const keyword *)bv;
+ return ustrcmp(a->key, b->key);
+}
+
+static int kwfind(void *av, void *bv)
+{
+ wchar_t *a = (wchar_t *)av;
+ const keyword *b = (const keyword *)bv;
+ return ustrcmp(a, b->key);
+}
+
+keyword *kw_lookup(keywordlist *kl, wchar_t *str) {
+ return find234(kl->keys, str, kwfind);
+}
+
+/*
+ * This function reads through source form and collects the
+ * keywords. They get collected in a heap, sorted by Unicode
+ * collation, last at the top (so that we can Heapsort them when we
+ * finish).
+ */
+keywordlist *get_keywords(paragraph *source) {
+ int errors = FALSE;
+ keywordlist *kl = mknew(keywordlist);
+ numberstate *n = number_init();
+ int prevpara = para_NotParaType;
+
+ number_cfg(n, source);
+
+ kl->size = 0;
+ kl->keys = newtree234(kwcmp);
+ kl->nlooseends = kl->looseendssize = 0;
+ kl->looseends = NULL;
+ for (; source; source = source->next) {
+ wchar_t *p, *q;
+ p = q = source->keyword;
+
+ /*
+ * Look for the section type override (`example',
+ * `question' or whatever - to replace `chapter' or
+ * `section' on a per-section basis).
+ */
+ if (q) {
+ q = uadv(q); /* point q at the word beyond */
+ if (!*q) q = NULL;
+ }
+
+ /*
+ * Number the chapter / section / list-item / whatever.
+ * This also sets up the `parent', `child' and `sibling'
+ * links.
+ */
+ source->kwtext = number_mktext(n, source, q, prevpara, &errors);
+ prevpara = source->type;
+
+ if (p && *p) {
+ if (source->kwtext || source->type == para_Biblio) {
+ keyword *kw, *ret;
+
+ kw = mknew(keyword);
+ kw->key = p;
+ kw->text = source->kwtext;
+ kw->para = source;
+ ret = add234(kl->keys, kw);
+ if (ret != kw) {
+ error(err_multikw, &source->fpos, &ret->para->fpos, p);
+ sfree(kw);
+ /* FIXME: what happens to kw->text? Does it leak? */
+ }
+ }
+ } else {
+ if (kl->nlooseends >= kl->looseendssize) {
+ kl->looseendssize = kl->nlooseends + 32;
+ kl->looseends = resize(kl->looseends, kl->looseendssize);
+ }
+ kl->looseends[kl->nlooseends++] = source->kwtext;
+ }
+ }
+
+ number_free(n);
+
+ if (errors) {
+ free_keywords(kl);
+ return NULL;
+ }
+
+ return kl;
+}
+
+void free_keywords(keywordlist *kl) {
+ keyword *kw;
+ while (kl->nlooseends)
+ free_word_list(kl->looseends[--kl->nlooseends]);
+ sfree(kl->looseends);
+ while ( (kw = index234(kl->keys, 0)) != NULL) {
+ delpos234(kl->keys, 0);
+ free_word_list(kw->text);
+ sfree(kw);
+ }
+ freetree234(kl->keys);
+ sfree(kl);
+}
+
+void subst_keywords(paragraph *source, keywordlist *kl) {
+ for (; source; source = source->next) {
+ word *ptr;
+ for (ptr = source->words; ptr; ptr = ptr->next) {
+ if (ptr->type == word_UpperXref ||
+ ptr->type == word_LowerXref) {
+ keyword *kw;
+ word **endptr, *close, *subst;
+
+ kw = kw_lookup(kl, ptr->text);
+ if (!kw) {
+ error(err_nosuchkw, &ptr->fpos, ptr->text);
+ subst = NULL;
+ } else
+ subst = dup_word_list(kw->text);
+
+ if (subst && ptr->type == word_LowerXref &&
+ kw->para->type != para_Biblio &&
+ kw->para->type != para_BiblioCited)
+ ustrlow(subst->text);
+
+ close = mknew(word);
+ close->text = NULL;
+ close->alt = NULL;
+ close->type = word_XrefEnd;
+ close->fpos = ptr->fpos;
+
+ close->next = ptr->next;
+ ptr->next = subst;
+
+ for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next)
+ (*endptr)->fpos = ptr->fpos;
+
+ *endptr = close;
+ ptr = close;
+ }
+ }
+ }
+}
--- /dev/null
+/*
+ * licence.c: licence text
+ */
+
+#include <stdio.h>
+
+static char *licencetext[] = {
+ "FIXME: licence text goes here",
+ NULL
+};
+
+void licence(void) {
+ char **p;
+ for (p = licencetext; *p; p++)
+ puts(*p);
+}
--- /dev/null
+/*
+ * main.c: command line parsing and top level
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
+static void dbg_prtsource(paragraph *sourceform);
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtkws(keywordlist *kws);
+
+int main(int argc, char **argv) {
+ char **infiles;
+ char *outfile;
+ int nfiles;
+ int nogo;
+ int errs;
+ int reportcols;
+ int debug;
+
+ /*
+ * Set up initial (default) parameters.
+ */
+ infiles = mknewa(char *, argc);
+ outfile = NULL;
+ nfiles = 0;
+ nogo = errs = FALSE;
+ reportcols = 0;
+ debug = 0;
+
+ if (argc == 1) {
+ usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ /*
+ * Parse command line arguments.
+ */
+ while (--argc) {
+ char *p = *++argv;
+ if (*p == '-') {
+ /*
+ * An option.
+ */
+ while (p && *++p) {
+ char c = *p;
+ switch (c) {
+ case '-':
+ /*
+ * Long option.
+ */
+ {
+ char *opt, *val;
+ opt = p++; /* opt will have _one_ leading - */
+ while (*p && *p != '=')
+ p++; /* find end of option */
+ if (*p == '=') {
+ *p++ = '\0';
+ val = p;
+ } else
+ val = NULL;
+ if (!strcmp(opt, "-help")) {
+ help();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-version")) {
+ showversion();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-licence") ||
+ !strcmp(opt, "-license")) {
+ licence();
+ nogo = TRUE;
+ } else if (!strcmp(opt, "-output")) {
+ if (!val)
+ errs = TRUE, error(err_optnoarg, opt);
+ else
+ outfile = val;
+ } else if (!strcmp(opt, "-precise")) {
+ reportcols = 1;
+ } else {
+ errs = TRUE, error(err_nosuchopt, opt);
+ }
+ }
+ p = NULL;
+ break;
+ case 'h':
+ case 'V':
+ case 'L':
+ case 'P':
+ case 'd':
+ /*
+ * Option requiring no parameter.
+ */
+ switch (c) {
+ case 'h':
+ help();
+ nogo = TRUE;
+ break;
+ case 'V':
+ showversion();
+ nogo = TRUE;
+ break;
+ case 'L':
+ licence();
+ nogo = TRUE;
+ break;
+ case 'P':
+ reportcols = 1;
+ break;
+ case 'd':
+ debug = TRUE;
+ break;
+ }
+ break;
+ case 'o':
+ /*
+ * Option requiring parameter.
+ */
+ p++;
+ if (!*p && argc > 1)
+ --argc, p = *++argv;
+ else if (!*p) {
+ char opt[2];
+ opt[0] = c;
+ opt[1] = '\0';
+ errs = TRUE, error(err_optnoarg, opt);
+ }
+ /*
+ * Now c is the option and p is the parameter.
+ */
+ switch (c) {
+ case 'o':
+ outfile = p;
+ break;
+ }
+ p = NULL; /* prevent continued processing */
+ break;
+ default:
+ /*
+ * Unrecognised option.
+ */
+ {
+ char opt[2];
+ opt[0] = c;
+ opt[1] = '\0';
+ errs = TRUE, error(err_nosuchopt, opt);
+ }
+ }
+ }
+ } else {
+ /*
+ * A non-option argument.
+ */
+ infiles[nfiles++] = p;
+ }
+ }
+
+ if (errs)
+ exit(EXIT_FAILURE);
+ if (nogo)
+ exit(EXIT_SUCCESS);
+
+ /*
+ * Do the work.
+ */
+ if (nfiles == 0) {
+ error(err_noinput);
+ usage();
+ exit(EXIT_FAILURE);
+ }
+
+ {
+ input in;
+ paragraph *sourceform, *p;
+ indexdata *idx;
+ keywordlist *keywords;
+
+ in.filenames = infiles;
+ in.nfiles = nfiles;
+ in.currfp = NULL;
+ in.currindex = 0;
+ in.npushback = in.pushbacksize = 0;
+ in.pushback = NULL;
+ in.reportcols = reportcols;
+ in.stack = NULL;
+
+ idx = make_index();
+
+ sourceform = read_input(&in, idx);
+ if (!sourceform)
+ exit(EXIT_FAILURE);
+
+ sfree(in.pushback);
+
+ mark_attr_ends(sourceform);
+
+ sfree(infiles);
+
+ keywords = get_keywords(sourceform);
+ if (!keywords)
+ exit(EXIT_FAILURE);
+ gen_citations(sourceform, keywords);
+ subst_keywords(sourceform, keywords);
+
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_IM)
+ index_merge(idx, TRUE, p->keyword, p->words);
+
+ build_index(idx);
+
+ if (debug) {
+ index_debug(idx);
+ dbg_prtkws(keywords);
+ dbg_prtsource(sourceform);
+ }
+
+ text_backend(sourceform, keywords, idx);
+ xhtml_backend(sourceform, keywords, idx);
+ whlp_backend(sourceform, keywords, idx);
+
+ free_para_list(sourceform);
+ free_keywords(keywords);
+ cleanup_index(idx);
+ }
+
+ return 0;
+}
+
+static void dbg_prtsource(paragraph *sourceform) {
+ /*
+ * Output source form in debugging format.
+ */
+
+ paragraph *p;
+ for (p = sourceform; p; p = p->next) {
+ wchar_t *wp;
+ printf("para %d ", p->type);
+ if (p->keyword) {
+ wp = p->keyword;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ } else
+ printf("(no keyword)");
+ printf(" {\n");
+ dbg_prtwordlist(1, p->words);
+ printf("}\n");
+ }
+}
+
+static void dbg_prtkws(keywordlist *kws) {
+ /*
+ * Output keywords in debugging format.
+ */
+
+ int i;
+ keyword *kw;
+
+ for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
+ wchar_t *wp;
+ printf("keyword ");
+ wp = kw->key;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ printf(" {\n");
+ dbg_prtwordlist(1, kw->text);
+ printf("}\n");
+ }
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+ for (; w; w = w->next) {
+ wchar_t *wp;
+ printf("%*sword %d ", level*4, "", w->type);
+ if (w->text) {
+ printf("\"");
+ for (wp = w->text; *wp; wp++)
+ putchar(*wp);
+ printf("\"");
+ } else
+ printf("(no text)");
+ if (w->alt) {
+ printf(" alt = {\n");
+ dbg_prtwordlist(level+1, w->alt);
+ printf("%*s}", level*4, "");
+ }
+ printf("\n");
+ }
+}
--- /dev/null
+/*
+ * malloc.c: safe wrappers around malloc, realloc, free, strdup
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "halibut.h"
+
+#ifdef LOGALLOC
+#define LOGPARAMS char *file, int line,
+static FILE *logallocfp = NULL;
+static int logline = 2; /* off by 1: `null pointer is' */
+static void loginc(void) { }
+static void logallocinit(void) {
+ if (!logallocfp) {
+ logallocfp = fopen("malloc.log", "w");
+ if (!logallocfp) {
+ fprintf(stderr, "panic: unable to open malloc.log\n");
+ exit(10);
+ }
+ setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ);
+ fprintf(logallocfp, "null pointer is %p\n", NULL);
+ }
+}
+static void logprintf(char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(logallocfp, fmt, ap);
+ va_end(ap);
+}
+#define LOGPRINT(x) ( logallocinit(), logprintf x )
+#define LOGINC do { loginc(); logline++; } while (0)
+#else
+#define LOGPARAMS
+#define LOGPRINT(x)
+#define LOGINC ((void)0)
+#endif
+
+/*
+ * smalloc should guarantee to return a useful pointer - Halibut
+ * can do nothing except die when it's out of memory anyway.
+ */
+void *(smalloc)(LOGPARAMS int size) {
+ void *p;
+ LOGINC;
+ LOGPRINT(("%s %d malloc(%ld)",
+ file, line, (long)size));
+ p = malloc(size);
+ if (!p)
+ fatal(err_nomemory);
+ LOGPRINT((" returns %p\n", p));
+ return p;
+}
+
+/*
+ * sfree should guaranteeably deal gracefully with freeing NULL
+ */
+void (sfree)(LOGPARAMS void *p) {
+ if (p) {
+ LOGINC;
+ LOGPRINT(("%s %d free(%p)\n",
+ file, line, p));
+ free(p);
+ }
+}
+
+/*
+ * srealloc should guaranteeably be able to realloc NULL
+ */
+void *(srealloc)(LOGPARAMS void *p, int size) {
+ void *q;
+ if (p) {
+ LOGINC;
+ LOGPRINT(("%s %d realloc(%p,%ld)",
+ file, line, p, (long)size));
+ q = realloc(p, size);
+ LOGPRINT((" returns %p\n", q));
+ } else {
+ LOGINC;
+ LOGPRINT(("%s %d malloc(%ld)",
+ file, line, (long)size));
+ q = malloc(size);
+ LOGPRINT((" returns %p\n", q));
+ }
+ if (!q)
+ fatal(err_nomemory);
+ return q;
+}
+
+/*
+ * dupstr is like strdup, but with the never-return-NULL property
+ * of smalloc (and also reliably defined in all environments :-)
+ */
+char *dupstr(char *s) {
+ char *r = smalloc(1+strlen(s));
+ strcpy(r,s);
+ return r;
+}
+
+/*
+ * Duplicate a linked list of words
+ */
+word *dup_word_list(word *w) {
+ word *head, **eptr = &head;
+
+ while (w) {
+ word *newwd = mknew(word);
+ *newwd = *w; /* structure copy */
+ newwd->text = ustrdup(w->text);
+ if (w->alt)
+ newwd->alt = dup_word_list(w->alt);
+ *eptr = newwd;
+ newwd->next = NULL;
+ eptr = &newwd->next;
+
+ w = w->next;
+ }
+
+ return head;
+}
+
+/*
+ * Free a linked list of words
+ */
+void free_word_list(word *w) {
+ word *t;
+ while (w) {
+ t = w;
+ w = w->next;
+ sfree(t->text);
+ if (t->alt)
+ free_word_list(t->alt);
+ sfree(t);
+ }
+}
+
+/*
+ * Free a linked list of paragraphs
+ */
+void free_para_list(paragraph *p) {
+ paragraph *t;
+ while (p) {
+ t = p;
+ p = p->next;
+ sfree(t->keyword);
+ free_word_list(t->words);
+ sfree(t);
+ }
+}
--- /dev/null
+/*
+ * misc.c: miscellaneous useful items
+ */
+
+#include "halibut.h"
+
+struct stackTag {
+ void **data;
+ int sp;
+ int size;
+};
+
+stack stk_new(void) {
+ stack s;
+
+ s = mknew(struct stackTag);
+ s->sp = 0;
+ s->size = 0;
+ s->data = NULL;
+
+ return s;
+}
+
+void stk_free(stack s) {
+ sfree(s->data);
+ sfree(s);
+}
+
+void stk_push(stack s, void *item) {
+ if (s->size <= s->sp) {
+ s->size = s->sp + 32;
+ s->data = resize(s->data, s->size);
+ }
+ s->data[s->sp++] = item;
+}
+
+void *stk_pop(stack s) {
+ if (s->sp > 0)
+ return s->data[--s->sp];
+ else
+ return NULL;
+}
+
+/*
+ * Small routines to amalgamate a string from an input source.
+ */
+const rdstring empty_rdstring = {0, 0, NULL};
+const rdstringc empty_rdstringc = {0, 0, NULL};
+
+void rdadd(rdstring *rs, wchar_t c) {
+ if (rs->pos >= rs->size-1) {
+ rs->size = rs->pos + 128;
+ rs->text = resize(rs->text, rs->size);
+ }
+ rs->text[rs->pos++] = c;
+ rs->text[rs->pos] = 0;
+}
+void rdadds(rdstring *rs, wchar_t *p) {
+ int len = ustrlen(p);
+ if (rs->pos >= rs->size - len) {
+ rs->size = rs->pos + len + 128;
+ rs->text = resize(rs->text, rs->size);
+ }
+ ustrcpy(rs->text + rs->pos, p);
+ rs->pos += len;
+}
+wchar_t *rdtrim(rdstring *rs) {
+ rs->text = resize(rs->text, rs->pos + 1);
+ return rs->text;
+}
+
+void rdaddc(rdstringc *rs, char c) {
+ if (rs->pos >= rs->size-1) {
+ rs->size = rs->pos + 128;
+ rs->text = resize(rs->text, rs->size);
+ }
+ rs->text[rs->pos++] = c;
+ rs->text[rs->pos] = 0;
+}
+void rdaddsc(rdstringc *rs, char *p) {
+ int len = strlen(p);
+ if (rs->pos >= rs->size - len) {
+ rs->size = rs->pos + len + 128;
+ rs->text = resize(rs->text, rs->size);
+ }
+ strcpy(rs->text + rs->pos, p);
+ rs->pos += len;
+}
+char *rdtrimc(rdstringc *rs) {
+ rs->text = resize(rs->text, rs->pos + 1);
+ return rs->text;
+}
+
+int compare_wordlists(word *a, word *b) {
+ int t;
+ while (a && b) {
+ if (a->type != b->type)
+ return (a->type < b->type ? -1 : +1); /* FIXME? */
+ t = a->type;
+ if ((t != word_Normal && t != word_Code &&
+ t != word_WeakCode && t != word_Emph) ||
+ a->alt || b->alt) {
+ int c;
+ if (a->text && b->text) {
+ c = ustricmp(a->text, b->text);
+ if (c)
+ return c;
+ }
+ c = compare_wordlists(a->alt, b->alt);
+ if (c)
+ return c;
+ a = a->next;
+ b = b->next;
+ } else {
+ wchar_t *ap = a->text, *bp = b->text;
+ while (*ap && *bp) {
+ wchar_t ac = utolower(*ap), bc = utolower(*bp);
+ if (ac != bc)
+ return (ac < bc ? -1 : +1);
+ if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+ a = a->next, ap = a->text;
+ if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+ b = b->next, bp = b->text;
+ }
+ if (*ap || *bp)
+ return (*ap ? +1 : -1);
+ a = a->next;
+ b = b->next;
+ }
+ }
+
+ if (a || b)
+ return (a ? +1 : -1);
+ else
+ return 0;
+}
+
+void mark_attr_ends(paragraph *sourceform) {
+ paragraph *p;
+ word *w, *wp;
+ for (p = sourceform; p; p = p->next) {
+ wp = NULL;
+ for (w = p->words; w; w = w->next) {
+ if (isattr(w->type)) {
+ int before = (wp && isattr(wp->type) &&
+ sameattr(wp->type, w->type));
+ int after = (w->next && isattr(w->next->type) &&
+ sameattr(w->next->type, w->type));
+ w->aux |= (before ?
+ (after ? attr_Always : attr_Last) :
+ (after ? attr_First : attr_Only));
+ }
+ wp = w;
+ }
+ }
+}
+
+wrappedline *wrap_para(word *text, int width, int subsequentwidth,
+ int (*widthfn)(word *)) {
+ wrappedline *head = NULL, **ptr = &head;
+ int nwords, wordsize;
+ struct wrapword {
+ word *begin, *end;
+ int width;
+ int spacewidth;
+ int cost;
+ int nwords;
+ } *wrapwords;
+ int i, j, n;
+
+ /*
+ * Break the line up into wrappable components.
+ */
+ nwords = wordsize = 0;
+ wrapwords = NULL;
+ while (text) {
+ if (nwords >= wordsize) {
+ wordsize = nwords + 64;
+ wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
+ }
+ wrapwords[nwords].width = 0;
+ wrapwords[nwords].begin = text;
+ while (text) {
+ wrapwords[nwords].width += widthfn(text);
+ wrapwords[nwords].end = text->next;
+ if (text->next && (text->next->type == word_WhiteSpace ||
+ text->next->type == word_EmphSpace ||
+ text->breaks))
+ break;
+ text = text->next;
+ }
+ if (text && text->next && (text->next->type == word_WhiteSpace ||
+ text->next->type == word_EmphSpace)) {
+ wrapwords[nwords].spacewidth = widthfn(text->next);
+ text = text->next;
+ } else {
+ wrapwords[nwords].spacewidth = 0;
+ }
+ nwords++;
+ if (text)
+ text = text->next;
+ }
+
+ /*
+ * Perform the dynamic wrapping algorithm: work backwards from
+ * nwords-1, determining the optimal wrapping for each terminal
+ * subsequence of the paragraph.
+ */
+ for (i = nwords; i-- ;) {
+ int best = -1;
+ int bestcost = 0;
+ int cost;
+ int linelen = 0, spacewidth = 0;
+ int seenspace;
+ int thiswidth = (i == 0 ? width : subsequentwidth);
+
+ j = 0;
+ seenspace = 0;
+ while (i+j < nwords) {
+ /*
+ * See what happens if we put j+1 words on this line.
+ */
+ if (spacewidth)
+ seenspace = 1;
+ linelen += spacewidth + wrapwords[i+j].width;
+ spacewidth = wrapwords[i+j].spacewidth;
+ j++;
+ if (linelen > thiswidth) {
+ /*
+ * If we're over the width limit, abandon ship,
+ * _unless_ there is no best-effort yet (which will
+ * only happen if the first word is too long all by
+ * itself).
+ */
+ if (best > 0)
+ break;
+ }
+ if (i+j == nwords) {
+ /*
+ * Special case: if we're at the very end of the
+ * paragraph, we don't score penalty points for the
+ * white space left on the line.
+ */
+ cost = 0;
+ } else {
+ cost = (thiswidth-linelen) * (thiswidth-linelen);
+ cost += wrapwords[i+j].cost;
+ }
+ /*
+ * We compare bestcost >= cost, not bestcost > cost,
+ * because in cases where the costs are identical we
+ * want to try to look like the greedy algorithm,
+ * because readers are likely to have spent a lot of
+ * time looking at greedy-wrapped paragraphs and
+ * there's no point violating the Principle of Least
+ * Surprise if it doesn't actually gain anything.
+ */
+ if (best < 0 || bestcost >= cost) {
+ bestcost = cost;
+ best = j;
+ }
+ }
+ /*
+ * Now we know the optimal answer for this terminal
+ * subsequence, so put it in wrapwords.
+ */
+ wrapwords[i].cost = bestcost;
+ wrapwords[i].nwords = best;
+ }
+
+ /*
+ * We've wrapped the paragraph. Now build the output
+ * `wrappedline' list.
+ */
+ i = 0;
+ while (i < nwords) {
+ wrappedline *w = mknew(wrappedline);
+ *ptr = w;
+ ptr = &w->next;
+ w->next = NULL;
+
+ n = wrapwords[i].nwords;
+ w->begin = wrapwords[i].begin;
+ w->end = wrapwords[i+n-1].end;
+
+ /*
+ * Count along the words to find nspaces and shortfall.
+ */
+ w->nspaces = 0;
+ w->shortfall = width;
+ for (j = 0; j < n; j++) {
+ w->shortfall -= wrapwords[i+j].width;
+ if (j < n-1 && wrapwords[i+j].spacewidth) {
+ w->nspaces++;
+ w->shortfall -= wrapwords[i+j].spacewidth;
+ }
+ }
+ i += n;
+ }
+
+ sfree(wrapwords);
+
+ return head;
+}
+
+void wrap_free(wrappedline *w) {
+ while (w) {
+ wrappedline *t = w->next;
+ sfree(w);
+ w = t;
+ }
+}
--- /dev/null
+% Halibut mode for Jed.
+
+$1 = "Halibut";
+create_syntax_table ($1);
+
+define_syntax ("\#", "", '%', $1); % Comment Syntax
+define_syntax ('\\', '\\', $1); % Quote character
+define_syntax ("{", "}", '(', $1); % are all these needed?
+define_syntax ("a-zA-Z0-9", 'w', $1);
+set_syntax_flags ($1, 8);
+
+#ifdef HAS_DFA_SYNTAX
+%enable_highlight_cache ("halibut.dfa", $1);
+
+% A braced comment in Halibut is \#{ ... }, where ... may contain
+% any correctly nested sequence of braces. Of course we can't match
+% that in a DFA rule, so we'll go down to a reasonable depth of 3
+% instead.
+#ifexists dfa_define_highlight_rule
+dfa_define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}",
+ "Qcomment", $1);
+
+dfa_define_highlight_rule ("\\\\#.*$", "comment", $1);
+dfa_define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1);
+dfa_define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1);
+dfa_define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1);
+dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]",
+ "keyword0", $1);
+dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]",
+ "keyword1", $1);
+dfa_define_highlight_rule ("[{}]", "delimiter", $1);
+dfa_define_highlight_rule (".", "normal", $1);
+dfa_build_highlight_table ($1);
+#else
+define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}",
+ "Qcomment", $1);
+
+define_highlight_rule ("\\\\#.*$", "comment", $1);
+define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1);
+define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1);
+define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1);
+define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]",
+ "keyword0", $1);
+define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]",
+ "keyword1", $1);
+define_highlight_rule ("[{}]", "delimiter", $1);
+define_highlight_rule (".", "normal", $1);
+build_highlight_table ($1);
+#endif
+#endif
+
+% This hook identifies lines containing comments as paragraph separator
+define halibut_is_comment() {
+ bol ();
+ while (ffind ("\\\\#")) go_right (3);
+ ffind ("\\#"); % return value on stack
+}
+
+variable Halibut_Ignore_Comment = 0; % if true, line containing a comment
+ % does not delimit a paragraph
+
+define halibut_paragraph_separator() {
+ bol();
+ skip_white();
+ if (eolp())
+ return 1;
+ if (looking_at("\\c ") or looking_at("\\c\t") or
+ looking_at("\\c\n"))
+ return 1;
+ return not (Halibut_Ignore_Comment) and halibut_is_comment();
+}
+
+define halibut_wrap_hook() {
+ variable yep;
+ push_spot ();
+ yep = up_1 () and halibut_is_comment ();
+ pop_spot ();
+ if (yep) {
+ push_spot ();
+ bol_skip_white ();
+ insert ("\\# ");
+ pop_spot ();
+ }
+}
+
+#ifexists mode_set_mode_info
+mode_set_mode_info("Halibut", "fold_info", "\\# {{{\r\\# }}}\r\r");
+#endif
+
+define halibut_mode() {
+ variable mode = "Halibut";
+ % use_keymap (mode);
+ set_mode (mode, 0x1 | 0x20);
+ set_buffer_hook ("par_sep", "halibut_paragraph_separator");
+ set_buffer_hook ("wrap_hook", "halibut_wrap_hook");
+ use_syntax_table (mode);
+ runhooks ("halibut_mode_hook");
+}
--- /dev/null
+#!/usr/bin/perl
+#
+# Reads a log file, containing lines of the four types
+# <file> <line> malloc(<number>) returns <pointer>
+# <file> <line> strdup(<number>) returns <pointer>
+# <file> <line> calloc(<number>*<number>) returns <pointer>
+# <file> <line> realloc(<pointer>,<number>) returns <pointer>
+# <file> <line> free(<pointer>)
+#
+# with optional line on the front saying
+# null pointer is <pointer>
+#
+# and produces a list of free()s and realloc()s of wrong pointers,
+# and also of malloc()s, calloc()s and realloc()s that never get free()d.
+
+$errors=0;
+
+while (<>) {
+ $in=$out="";
+ ($file, $line, $call, $in, $out)=($1,$2,$3,"",$4)
+ if /^(\S+) (\S+) (malloc|strdup)\(\S+\) returns (\S+)$/;
+ ($file, $line, $call, $in, $out)=($1,$2,"calloc","",$5)
+ if /^(\S+) (\S+) calloc\(\S+\*\S+\) returns (\S+)$/;
+ ($file, $line, $call, $in, $out)=($1,$2,"realloc",$3,$4)
+ if /^(\S+) (\S+) realloc\((\S+),\S+\) returns (\S+)$/;
+ ($file, $line, $call, $in, $out)=($1,$2,"free",$3,"")
+ if /^(\S+) (\S+) free\((\S+)\)$/;
+ $null = $1, next if /^null pointer is (\S+)$/;
+ if ($in ne "") {
+ if (&null($in)) {
+ $bad = "null pointer";
+ } elsif (defined $lastalloc{$in}) {
+ $bad = "already-freed pointer (last alloc $lastalloc{$in}, last free $lastfree{$in})";
+ } else {
+ $bad = "bad pointer";
+ }
+ $errors=1, print "($.) $file:$line: $call() $bad\n"
+ if $record{$in} eq "";
+ $lastfree{$in}="($.) $file:$line";
+ $record{$in}="";
+ }
+ if ($out ne "" && !&null($out)) {
+ $errors=1, print "($.) $file:$line: $call() returned already ".
+ "allocated pointer\n" if $record{$out} ne "";
+ $record{$out}="($.) $file:$line: $call()";
+ $lastalloc{$out}="($.) $file:$line";
+ }
+}
+
+foreach $i (keys %record) {
+ $errors=1, print "$record{$i} never got freed\n"
+ if $record{$i} ne "";
+}
+
+print "no problems\n" if !$errors;
+
+# determine if a string refers to a null pointer
+sub null {
+ local ($_) = @_;
+ $null ? $_ eq $null : /^((0x)?0+|\(nil\))$/;
+}
--- /dev/null
+/*
+ * style.c: load and keep track of user style preferences
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "halibut.h"
+
--- /dev/null
+/*
+ * tree234.c: reasonably generic counted 2-3-4 tree routines.
+ *
+ * This file is copyright 1999-2001 Simon Tatham.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "tree234.h"
+
+#define smalloc malloc
+#define sfree free
+
+#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
+
+#ifdef TEST
+#define LOG(x) (printf x)
+#else
+#define LOG(x)
+#endif
+
+typedef struct node234_Tag node234;
+
+struct tree234_Tag {
+ node234 *root;
+ cmpfn234 cmp;
+};
+
+struct node234_Tag {
+ node234 *parent;
+ node234 *kids[4];
+ int counts[4];
+ void *elems[3];
+};
+
+/*
+ * Create a 2-3-4 tree.
+ */
+tree234 *newtree234(cmpfn234 cmp) {
+ tree234 *ret = mknew(tree234);
+ LOG(("created tree %p\n", ret));
+ ret->root = NULL;
+ ret->cmp = cmp;
+ return ret;
+}
+
+/*
+ * Free a 2-3-4 tree (not including freeing the elements).
+ */
+static void freenode234(node234 *n) {
+ if (!n)
+ return;
+ freenode234(n->kids[0]);
+ freenode234(n->kids[1]);
+ freenode234(n->kids[2]);
+ freenode234(n->kids[3]);
+ sfree(n);
+}
+void freetree234(tree234 *t) {
+ freenode234(t->root);
+ sfree(t);
+}
+
+/*
+ * Internal function to count a node.
+ */
+static int countnode234(node234 *n) {
+ int count = 0;
+ int i;
+ if (!n)
+ return 0;
+ for (i = 0; i < 4; i++)
+ count += n->counts[i];
+ for (i = 0; i < 3; i++)
+ if (n->elems[i])
+ count++;
+ return count;
+}
+
+/*
+ * Count the elements in a tree.
+ */
+int count234(tree234 *t) {
+ if (t->root)
+ return countnode234(t->root);
+ else
+ return 0;
+}
+
+/*
+ * Propagate a node overflow up a tree until it stops. Returns 0 or
+ * 1, depending on whether the root had to be split or not.
+ */
+static int add234_insert(node234 *left, void *e, node234 *right,
+ node234 **root, node234 *n, int ki) {
+ int lcount, rcount;
+ /*
+ * We need to insert the new left/element/right set in n at
+ * child position ki.
+ */
+ lcount = countnode234(left);
+ rcount = countnode234(right);
+ while (n) {
+ LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n",
+ left, lcount, e, right, rcount, ki));
+ if (n->elems[1] == NULL) {
+ /*
+ * Insert in a 2-node; simple.
+ */
+ if (ki == 0) {
+ LOG((" inserting on left of 2-node\n"));
+ n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
+ n->elems[1] = n->elems[0];
+ n->kids[1] = right; n->counts[1] = rcount;
+ n->elems[0] = e;
+ n->kids[0] = left; n->counts[0] = lcount;
+ } else { /* ki == 1 */
+ LOG((" inserting on right of 2-node\n"));
+ n->kids[2] = right; n->counts[2] = rcount;
+ n->elems[1] = e;
+ n->kids[1] = left; n->counts[1] = lcount;
+ }
+ if (n->kids[0]) n->kids[0]->parent = n;
+ if (n->kids[1]) n->kids[1]->parent = n;
+ if (n->kids[2]) n->kids[2]->parent = n;
+ LOG((" done\n"));
+ break;
+ } else if (n->elems[2] == NULL) {
+ /*
+ * Insert in a 3-node; simple.
+ */
+ if (ki == 0) {
+ LOG((" inserting on left of 3-node\n"));
+ n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
+ n->elems[2] = n->elems[1];
+ n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
+ n->elems[1] = n->elems[0];
+ n->kids[1] = right; n->counts[1] = rcount;
+ n->elems[0] = e;
+ n->kids[0] = left; n->counts[0] = lcount;
+ } else if (ki == 1) {
+ LOG((" inserting in middle of 3-node\n"));
+ n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
+ n->elems[2] = n->elems[1];
+ n->kids[2] = right; n->counts[2] = rcount;
+ n->elems[1] = e;
+ n->kids[1] = left; n->counts[1] = lcount;
+ } else { /* ki == 2 */
+ LOG((" inserting on right of 3-node\n"));
+ n->kids[3] = right; n->counts[3] = rcount;
+ n->elems[2] = e;
+ n->kids[2] = left; n->counts[2] = lcount;
+ }
+ if (n->kids[0]) n->kids[0]->parent = n;
+ if (n->kids[1]) n->kids[1]->parent = n;
+ if (n->kids[2]) n->kids[2]->parent = n;
+ if (n->kids[3]) n->kids[3]->parent = n;
+ LOG((" done\n"));
+ break;
+ } else {
+ node234 *m = mknew(node234);
+ m->parent = n->parent;
+ LOG((" splitting a 4-node; created new node %p\n", m));
+ /*
+ * Insert in a 4-node; split into a 2-node and a
+ * 3-node, and move focus up a level.
+ *
+ * I don't think it matters which way round we put the
+ * 2 and the 3. For simplicity, we'll put the 3 first
+ * always.
+ */
+ if (ki == 0) {
+ m->kids[0] = left; m->counts[0] = lcount;
+ m->elems[0] = e;
+ m->kids[1] = right; m->counts[1] = rcount;
+ m->elems[1] = n->elems[0];
+ m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
+ e = n->elems[1];
+ n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
+ n->elems[0] = n->elems[2];
+ n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
+ } else if (ki == 1) {
+ m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = left; m->counts[1] = lcount;
+ m->elems[1] = e;
+ m->kids[2] = right; m->counts[2] = rcount;
+ e = n->elems[1];
+ n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
+ n->elems[0] = n->elems[2];
+ n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
+ } else if (ki == 2) {
+ m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
+ m->elems[1] = n->elems[1];
+ m->kids[2] = left; m->counts[2] = lcount;
+ /* e = e; */
+ n->kids[0] = right; n->counts[0] = rcount;
+ n->elems[0] = n->elems[2];
+ n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
+ } else { /* ki == 3 */
+ m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
+ m->elems[1] = n->elems[1];
+ m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
+ n->kids[0] = left; n->counts[0] = lcount;
+ n->elems[0] = e;
+ n->kids[1] = right; n->counts[1] = rcount;
+ e = n->elems[2];
+ }
+ m->kids[3] = n->kids[3] = n->kids[2] = NULL;
+ m->counts[3] = n->counts[3] = n->counts[2] = 0;
+ m->elems[2] = n->elems[2] = n->elems[1] = NULL;
+ if (m->kids[0]) m->kids[0]->parent = m;
+ if (m->kids[1]) m->kids[1]->parent = m;
+ if (m->kids[2]) m->kids[2]->parent = m;
+ if (n->kids[0]) n->kids[0]->parent = n;
+ if (n->kids[1]) n->kids[1]->parent = n;
+ LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m,
+ m->kids[0], m->counts[0], m->elems[0],
+ m->kids[1], m->counts[1], m->elems[1],
+ m->kids[2], m->counts[2]));
+ LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1]));
+ left = m; lcount = countnode234(left);
+ right = n; rcount = countnode234(right);
+ }
+ if (n->parent)
+ ki = (n->parent->kids[0] == n ? 0 :
+ n->parent->kids[1] == n ? 1 :
+ n->parent->kids[2] == n ? 2 : 3);
+ n = n->parent;
+ }
+
+ /*
+ * If we've come out of here by `break', n will still be
+ * non-NULL and all we need to do is go back up the tree
+ * updating counts. If we've come here because n is NULL, we
+ * need to create a new root for the tree because the old one
+ * has just split into two. */
+ if (n) {
+ while (n->parent) {
+ int count = countnode234(n);
+ int childnum;
+ childnum = (n->parent->kids[0] == n ? 0 :
+ n->parent->kids[1] == n ? 1 :
+ n->parent->kids[2] == n ? 2 : 3);
+ n->parent->counts[childnum] = count;
+ n = n->parent;
+ }
+ return 0; /* root unchanged */
+ } else {
+ LOG((" root is overloaded, split into two\n"));
+ (*root) = mknew(node234);
+ (*root)->kids[0] = left; (*root)->counts[0] = lcount;
+ (*root)->elems[0] = e;
+ (*root)->kids[1] = right; (*root)->counts[1] = rcount;
+ (*root)->elems[1] = NULL;
+ (*root)->kids[2] = NULL; (*root)->counts[2] = 0;
+ (*root)->elems[2] = NULL;
+ (*root)->kids[3] = NULL; (*root)->counts[3] = 0;
+ (*root)->parent = NULL;
+ if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root);
+ if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root);
+ LOG((" new root is %p/%d \"%s\" %p/%d\n",
+ (*root)->kids[0], (*root)->counts[0],
+ (*root)->elems[0],
+ (*root)->kids[1], (*root)->counts[1]));
+ return 1; /* root moved */
+ }
+}
+
+/*
+ * Add an element e to a 2-3-4 tree t. Returns e on success, or if
+ * an existing element compares equal, returns that.
+ */
+static void *add234_internal(tree234 *t, void *e, int index) {
+ node234 *n;
+ int ki;
+ void *orig_e = e;
+ int c;
+
+ LOG(("adding element \"%s\" to tree %p\n", e, t));
+ if (t->root == NULL) {
+ t->root = mknew(node234);
+ t->root->elems[1] = t->root->elems[2] = NULL;
+ t->root->kids[0] = t->root->kids[1] = NULL;
+ t->root->kids[2] = t->root->kids[3] = NULL;
+ t->root->counts[0] = t->root->counts[1] = 0;
+ t->root->counts[2] = t->root->counts[3] = 0;
+ t->root->parent = NULL;
+ t->root->elems[0] = e;
+ LOG((" created root %p\n", t->root));
+ return orig_e;
+ }
+
+ n = t->root;
+ while (n) {
+ LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ if (index >= 0) {
+ if (!n->kids[0]) {
+ /*
+ * Leaf node. We want to insert at kid position
+ * equal to the index:
+ *
+ * 0 A 1 B 2 C 3
+ */
+ ki = index;
+ } else {
+ /*
+ * Internal node. We always descend through it (add
+ * always starts at the bottom, never in the
+ * middle).
+ */
+ if (index <= n->counts[0]) {
+ ki = 0;
+ } else if (index -= n->counts[0] + 1, index <= n->counts[1]) {
+ ki = 1;
+ } else if (index -= n->counts[1] + 1, index <= n->counts[2]) {
+ ki = 2;
+ } else if (index -= n->counts[2] + 1, index <= n->counts[3]) {
+ ki = 3;
+ } else
+ return NULL; /* error: index out of range */
+ }
+ } else {
+ if ((c = t->cmp(e, n->elems[0])) < 0)
+ ki = 0;
+ else if (c == 0)
+ return n->elems[0]; /* already exists */
+ else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
+ ki = 1;
+ else if (c == 0)
+ return n->elems[1]; /* already exists */
+ else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
+ ki = 2;
+ else if (c == 0)
+ return n->elems[2]; /* already exists */
+ else
+ ki = 3;
+ }
+ LOG((" moving to child %d (%p)\n", ki, n->kids[ki]));
+ if (!n->kids[ki])
+ break;
+ n = n->kids[ki];
+ }
+
+ add234_insert(NULL, e, NULL, &t->root, n, ki);
+
+ return orig_e;
+}
+
+void *add234(tree234 *t, void *e) {
+ if (!t->cmp) /* tree is unsorted */
+ return NULL;
+
+ return add234_internal(t, e, -1);
+}
+void *addpos234(tree234 *t, void *e, int index) {
+ if (index < 0 || /* index out of range */
+ t->cmp) /* tree is sorted */
+ return NULL; /* return failure */
+
+ return add234_internal(t, e, index); /* this checks the upper bound */
+}
+
+/*
+ * Look up the element at a given numeric index in a 2-3-4 tree.
+ * Returns NULL if the index is out of range.
+ */
+void *index234(tree234 *t, int index) {
+ node234 *n;
+
+ if (!t->root)
+ return NULL; /* tree is empty */
+
+ if (index < 0 || index >= countnode234(t->root))
+ return NULL; /* out of range */
+
+ n = t->root;
+
+ while (n) {
+ if (index < n->counts[0])
+ n = n->kids[0];
+ else if (index -= n->counts[0] + 1, index < 0)
+ return n->elems[0];
+ else if (index < n->counts[1])
+ n = n->kids[1];
+ else if (index -= n->counts[1] + 1, index < 0)
+ return n->elems[1];
+ else if (index < n->counts[2])
+ n = n->kids[2];
+ else if (index -= n->counts[2] + 1, index < 0)
+ return n->elems[2];
+ else
+ n = n->kids[3];
+ }
+
+ /* We shouldn't ever get here. I wonder how we did. */
+ return NULL;
+}
+
+/*
+ * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
+ * found. e is always passed as the first argument to cmp, so cmp
+ * can be an asymmetric function if desired. cmp can also be passed
+ * as NULL, in which case the compare function from the tree proper
+ * will be used.
+ */
+void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
+ int relation, int *index) {
+ node234 *n;
+ void *ret;
+ int c;
+ int idx, ecount, kcount, cmpret;
+
+ if (t->root == NULL)
+ return NULL;
+
+ if (cmp == NULL)
+ cmp = t->cmp;
+
+ n = t->root;
+ /*
+ * Attempt to find the element itself.
+ */
+ idx = 0;
+ ecount = -1;
+ /*
+ * Prepare a fake `cmp' result if e is NULL.
+ */
+ cmpret = 0;
+ if (e == NULL) {
+ assert(relation == REL234_LT || relation == REL234_GT);
+ if (relation == REL234_LT)
+ cmpret = +1; /* e is a max: always greater */
+ else if (relation == REL234_GT)
+ cmpret = -1; /* e is a min: always smaller */
+ }
+ while (1) {
+ for (kcount = 0; kcount < 4; kcount++) {
+ if (kcount >= 3 || n->elems[kcount] == NULL ||
+ (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
+ break;
+ }
+ if (n->kids[kcount]) idx += n->counts[kcount];
+ if (c == 0) {
+ ecount = kcount;
+ break;
+ }
+ idx++;
+ }
+ if (ecount >= 0)
+ break;
+ if (n->kids[kcount])
+ n = n->kids[kcount];
+ else
+ break;
+ }
+
+ if (ecount >= 0) {
+ /*
+ * We have found the element we're looking for. It's
+ * n->elems[ecount], at tree index idx. If our search
+ * relation is EQ, LE or GE we can now go home.
+ */
+ if (relation != REL234_LT && relation != REL234_GT) {
+ if (index) *index = idx;
+ return n->elems[ecount];
+ }
+
+ /*
+ * Otherwise, we'll do an indexed lookup for the previous
+ * or next element. (It would be perfectly possible to
+ * implement these search types in a non-counted tree by
+ * going back up from where we are, but far more fiddly.)
+ */
+ if (relation == REL234_LT)
+ idx--;
+ else
+ idx++;
+ } else {
+ /*
+ * We've found our way to the bottom of the tree and we
+ * know where we would insert this node if we wanted to:
+ * we'd put it in in place of the (empty) subtree
+ * n->kids[kcount], and it would have index idx
+ *
+ * But the actual element isn't there. So if our search
+ * relation is EQ, we're doomed.
+ */
+ if (relation == REL234_EQ)
+ return NULL;
+
+ /*
+ * Otherwise, we must do an index lookup for index idx-1
+ * (if we're going left - LE or LT) or index idx (if we're
+ * going right - GE or GT).
+ */
+ if (relation == REL234_LT || relation == REL234_LE) {
+ idx--;
+ }
+ }
+
+ /*
+ * We know the index of the element we want; just call index234
+ * to do the rest. This will return NULL if the index is out of
+ * bounds, which is exactly what we want.
+ */
+ ret = index234(t, idx);
+ if (ret && index) *index = idx;
+ return ret;
+}
+void *find234(tree234 *t, void *e, cmpfn234 cmp) {
+ return findrelpos234(t, e, cmp, REL234_EQ, NULL);
+}
+void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
+ return findrelpos234(t, e, cmp, relation, NULL);
+}
+void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
+ return findrelpos234(t, e, cmp, REL234_EQ, index);
+}
+
+/*
+ * Tree transformation used in delete and split: move a subtree
+ * right, from child ki of a node to the next child. Update k and
+ * index so that they still point to the same place in the
+ * transformed tree. Assumes the destination child is not full, and
+ * that the source child does have a subtree to spare. Can cope if
+ * the destination child is undersized.
+ *
+ * . C . . B .
+ * / \ -> / \
+ * [more] a A b B c d D e [more] a A b c C d D e
+ *
+ * . C . . B .
+ * / \ -> / \
+ * [more] a A b B c d [more] a A b c C d
+ */
+static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) {
+ node234 *src, *dest;
+ int i, srclen, adjust;
+
+ src = n->kids[ki];
+ dest = n->kids[ki+1];
+
+ LOG((" trans234_subtree_right(%p, %d):\n", n, ki));
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ src,
+ src->kids[0], src->counts[0], src->elems[0],
+ src->kids[1], src->counts[1], src->elems[1],
+ src->kids[2], src->counts[2], src->elems[2],
+ src->kids[3], src->counts[3]));
+ LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ dest,
+ dest->kids[0], dest->counts[0], dest->elems[0],
+ dest->kids[1], dest->counts[1], dest->elems[1],
+ dest->kids[2], dest->counts[2], dest->elems[2],
+ dest->kids[3], dest->counts[3]));
+ /*
+ * Move over the rest of the destination node to make space.
+ */
+ dest->kids[3] = dest->kids[2]; dest->counts[3] = dest->counts[2];
+ dest->elems[2] = dest->elems[1];
+ dest->kids[2] = dest->kids[1]; dest->counts[2] = dest->counts[1];
+ dest->elems[1] = dest->elems[0];
+ dest->kids[1] = dest->kids[0]; dest->counts[1] = dest->counts[0];
+
+ /* which element to move over */
+ i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0);
+
+ dest->elems[0] = n->elems[ki];
+ n->elems[ki] = src->elems[i];
+ src->elems[i] = NULL;
+
+ dest->kids[0] = src->kids[i+1]; dest->counts[0] = src->counts[i+1];
+ src->kids[i+1] = NULL; src->counts[i+1] = 0;
+
+ if (dest->kids[0]) dest->kids[0]->parent = dest;
+
+ adjust = dest->counts[0] + 1;
+
+ n->counts[ki] -= adjust;
+ n->counts[ki+1] += adjust;
+
+ srclen = n->counts[ki];
+
+ if (k) {
+ LOG((" before: k,index = %d,%d\n", (*k), (*index)));
+ if ((*k) == ki && (*index) > srclen) {
+ (*index) -= srclen + 1;
+ (*k)++;
+ } else if ((*k) == ki+1) {
+ (*index) += adjust;
+ }
+ LOG((" after: k,index = %d,%d\n", (*k), (*index)));
+ }
+
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ src,
+ src->kids[0], src->counts[0], src->elems[0],
+ src->kids[1], src->counts[1], src->elems[1],
+ src->kids[2], src->counts[2], src->elems[2],
+ src->kids[3], src->counts[3]));
+ LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ dest,
+ dest->kids[0], dest->counts[0], dest->elems[0],
+ dest->kids[1], dest->counts[1], dest->elems[1],
+ dest->kids[2], dest->counts[2], dest->elems[2],
+ dest->kids[3], dest->counts[3]));
+}
+
+/*
+ * Tree transformation used in delete and split: move a subtree
+ * left, from child ki of a node to the previous child. Update k
+ * and index so that they still point to the same place in the
+ * transformed tree. Assumes the destination child is not full, and
+ * that the source child does have a subtree to spare. Can cope if
+ * the destination child is undersized.
+ *
+ * . B . . C .
+ * / \ -> / \
+ * a A b c C d D e [more] a A b B c d D e [more]
+ *
+ * . A . . B .
+ * / \ -> / \
+ * a b B c C d [more] a A b c C d [more]
+ */
+static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) {
+ node234 *src, *dest;
+ int i, adjust;
+
+ src = n->kids[ki];
+ dest = n->kids[ki-1];
+
+ LOG((" trans234_subtree_left(%p, %d):\n", n, ki));
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ dest,
+ dest->kids[0], dest->counts[0], dest->elems[0],
+ dest->kids[1], dest->counts[1], dest->elems[1],
+ dest->kids[2], dest->counts[2], dest->elems[2],
+ dest->kids[3], dest->counts[3]));
+ LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ src,
+ src->kids[0], src->counts[0], src->elems[0],
+ src->kids[1], src->counts[1], src->elems[1],
+ src->kids[2], src->counts[2], src->elems[2],
+ src->kids[3], src->counts[3]));
+
+ /* where in dest to put it */
+ i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0);
+ dest->elems[i] = n->elems[ki-1];
+ n->elems[ki-1] = src->elems[0];
+
+ dest->kids[i+1] = src->kids[0]; dest->counts[i+1] = src->counts[0];
+
+ if (dest->kids[i+1]) dest->kids[i+1]->parent = dest;
+
+ /*
+ * Move over the rest of the source node.
+ */
+ src->kids[0] = src->kids[1]; src->counts[0] = src->counts[1];
+ src->elems[0] = src->elems[1];
+ src->kids[1] = src->kids[2]; src->counts[1] = src->counts[2];
+ src->elems[1] = src->elems[2];
+ src->kids[2] = src->kids[3]; src->counts[2] = src->counts[3];
+ src->elems[2] = NULL;
+ src->kids[3] = NULL; src->counts[3] = 0;
+
+ adjust = dest->counts[i+1] + 1;
+
+ n->counts[ki] -= adjust;
+ n->counts[ki-1] += adjust;
+
+ if (k) {
+ LOG((" before: k,index = %d,%d\n", (*k), (*index)));
+ if ((*k) == ki) {
+ (*index) -= adjust;
+ if ((*index) < 0) {
+ (*index) += n->counts[ki-1] + 1;
+ (*k)--;
+ }
+ }
+ LOG((" after: k,index = %d,%d\n", (*k), (*index)));
+ }
+
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ dest,
+ dest->kids[0], dest->counts[0], dest->elems[0],
+ dest->kids[1], dest->counts[1], dest->elems[1],
+ dest->kids[2], dest->counts[2], dest->elems[2],
+ dest->kids[3], dest->counts[3]));
+ LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ src,
+ src->kids[0], src->counts[0], src->elems[0],
+ src->kids[1], src->counts[1], src->elems[1],
+ src->kids[2], src->counts[2], src->elems[2],
+ src->kids[3], src->counts[3]));
+}
+
+/*
+ * Tree transformation used in delete and split: merge child nodes
+ * ki and ki+1 of a node. Update k and index so that they still
+ * point to the same place in the transformed tree. Assumes both
+ * children _are_ sufficiently small.
+ *
+ * . B . .
+ * / \ -> |
+ * a A b c C d a A b B c C d
+ *
+ * This routine can also cope with either child being undersized:
+ *
+ * . A . .
+ * / \ -> |
+ * a b B c a A b B c
+ *
+ * . A . .
+ * / \ -> |
+ * a b B c C d a A b B c C d
+ */
+static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) {
+ node234 *left, *right;
+ int i, leftlen, rightlen, lsize, rsize;
+
+ left = n->kids[ki]; leftlen = n->counts[ki];
+ right = n->kids[ki+1]; rightlen = n->counts[ki+1];
+
+ LOG((" trans234_subtree_merge(%p, %d):\n", n, ki));
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ left,
+ left->kids[0], left->counts[0], left->elems[0],
+ left->kids[1], left->counts[1], left->elems[1],
+ left->kids[2], left->counts[2], left->elems[2],
+ left->kids[3], left->counts[3]));
+ LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ right,
+ right->kids[0], right->counts[0], right->elems[0],
+ right->kids[1], right->counts[1], right->elems[1],
+ right->kids[2], right->counts[2], right->elems[2],
+ right->kids[3], right->counts[3]));
+
+ assert(!left->elems[2] && !right->elems[2]); /* neither is large! */
+ lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0);
+ rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0);
+
+ left->elems[lsize] = n->elems[ki];
+
+ for (i = 0; i < rsize+1; i++) {
+ left->kids[lsize+1+i] = right->kids[i];
+ left->counts[lsize+1+i] = right->counts[i];
+ if (left->kids[lsize+1+i])
+ left->kids[lsize+1+i]->parent = left;
+ if (i < rsize)
+ left->elems[lsize+1+i] = right->elems[i];
+ }
+
+ n->counts[ki] += rightlen + 1;
+
+ sfree(right);
+
+ /*
+ * Move the rest of n up by one.
+ */
+ for (i = ki+1; i < 3; i++) {
+ n->kids[i] = n->kids[i+1];
+ n->counts[i] = n->counts[i+1];
+ }
+ for (i = ki; i < 2; i++) {
+ n->elems[i] = n->elems[i+1];
+ }
+ n->kids[3] = NULL;
+ n->counts[3] = 0;
+ n->elems[2] = NULL;
+
+ if (k) {
+ LOG((" before: k,index = %d,%d\n", (*k), (*index)));
+ if ((*k) == ki+1) {
+ (*k)--;
+ (*index) += leftlen + 1;
+ } else if ((*k) > ki+1) {
+ (*k)--;
+ }
+ LOG((" after: k,index = %d,%d\n", (*k), (*index)));
+ }
+
+ LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ left,
+ left->kids[0], left->counts[0], left->elems[0],
+ left->kids[1], left->counts[1], left->elems[1],
+ left->kids[2], left->counts[2], left->elems[2],
+ left->kids[3], left->counts[3]));
+
+}
+
+/*
+ * Delete an element e in a 2-3-4 tree. Does not free the element,
+ * merely removes all links to it from the tree nodes.
+ */
+static void *delpos234_internal(tree234 *t, int index) {
+ node234 *n;
+ void *retval;
+ int ki, i;
+
+ retval = NULL;
+
+ n = t->root; /* by assumption this is non-NULL */
+ LOG(("deleting item %d from tree %p\n", index, t));
+ while (1) {
+ node234 *sub;
+
+ LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3],
+ index));
+ if (index <= n->counts[0]) {
+ ki = 0;
+ } else if (index -= n->counts[0]+1, index <= n->counts[1]) {
+ ki = 1;
+ } else if (index -= n->counts[1]+1, index <= n->counts[2]) {
+ ki = 2;
+ } else if (index -= n->counts[2]+1, index <= n->counts[3]) {
+ ki = 3;
+ } else {
+ assert(0); /* can't happen */
+ }
+
+ if (!n->kids[0])
+ break; /* n is a leaf node; we're here! */
+
+ /*
+ * Check to see if we've found our target element. If so,
+ * we must choose a new target (we'll use the old target's
+ * successor, which will be in a leaf), move it into the
+ * place of the old one, continue down to the leaf and
+ * delete the old copy of the new target.
+ */
+ if (index == n->counts[ki]) {
+ node234 *m;
+ LOG((" found element in internal node, index %d\n", ki));
+ assert(n->elems[ki]); /* must be a kid _before_ an element */
+ ki++; index = 0;
+ for (m = n->kids[ki]; m->kids[0]; m = m->kids[0])
+ continue;
+ LOG((" replacing with element \"%s\" from leaf node %p\n",
+ m->elems[0], m));
+ retval = n->elems[ki-1];
+ n->elems[ki-1] = m->elems[0];
+ }
+
+ /*
+ * Recurse down to subtree ki. If it has only one element,
+ * we have to do some transformation to start with.
+ */
+ LOG((" moving to subtree %d\n", ki));
+ sub = n->kids[ki];
+ if (!sub->elems[1]) {
+ LOG((" subtree has only one element!\n"));
+ if (ki > 0 && n->kids[ki-1]->elems[1]) {
+ /*
+ * Child ki has only one element, but child
+ * ki-1 has two or more. So we need to move a
+ * subtree from ki-1 to ki.
+ */
+ trans234_subtree_right(n, ki-1, &ki, &index);
+ } else if (ki < 3 && n->kids[ki+1] &&
+ n->kids[ki+1]->elems[1]) {
+ /*
+ * Child ki has only one element, but ki+1 has
+ * two or more. Move a subtree from ki+1 to ki.
+ */
+ trans234_subtree_left(n, ki+1, &ki, &index);
+ } else {
+ /*
+ * ki is small with only small neighbours. Pick a
+ * neighbour and merge with it.
+ */
+ trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index);
+ sub = n->kids[ki];
+
+ if (!n->elems[0]) {
+ /*
+ * The root is empty and needs to be
+ * removed.
+ */
+ LOG((" shifting root!\n"));
+ t->root = sub;
+ sub->parent = NULL;
+ sfree(n);
+ n = NULL;
+ }
+ }
+ }
+
+ if (n)
+ n->counts[ki]--;
+ n = sub;
+ }
+
+ /*
+ * Now n is a leaf node, and ki marks the element number we
+ * want to delete. We've already arranged for the leaf to be
+ * bigger than minimum size, so let's just go to it.
+ */
+ assert(!n->kids[0]);
+ if (!retval)
+ retval = n->elems[ki];
+
+ for (i = ki; i < 2 && n->elems[i+1]; i++)
+ n->elems[i] = n->elems[i+1];
+ n->elems[i] = NULL;
+
+ /*
+ * It's just possible that we have reduced the leaf to zero
+ * size. This can only happen if it was the root - so destroy
+ * it and make the tree empty.
+ */
+ if (!n->elems[0]) {
+ LOG((" removed last element in tree, destroying empty root\n"));
+ assert(n == t->root);
+ sfree(n);
+ t->root = NULL;
+ }
+
+ return retval; /* finished! */
+}
+void *delpos234(tree234 *t, int index) {
+ if (index < 0 || index >= countnode234(t->root))
+ return NULL;
+ return delpos234_internal(t, index);
+}
+void *del234(tree234 *t, void *e) {
+ int index;
+ if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
+ return NULL; /* it wasn't in there anyway */
+ return delpos234_internal(t, index); /* it's there; delete it. */
+}
+
+/*
+ * Join two subtrees together with a separator element between
+ * them, given their relative height.
+ *
+ * (Height<0 means the left tree is shorter, >0 means the right
+ * tree is shorter, =0 means (duh) they're equal.)
+ *
+ * It is assumed that any checks needed on the ordering criterion
+ * have _already_ been done.
+ *
+ * The value returned in `height' is 0 or 1 depending on whether the
+ * resulting tree is the same height as the original larger one, or
+ * one higher.
+ */
+static node234 *join234_internal(node234 *left, void *sep,
+ node234 *right, int *height) {
+ node234 *root, *node;
+ int relht = *height;
+ int ki;
+
+ LOG((" join: joining %p \"%s\" %p, relative height is %d\n",
+ left, sep, right, relht));
+ if (relht == 0) {
+ /*
+ * The trees are the same height. Create a new one-element
+ * root containing the separator and pointers to the two
+ * nodes.
+ */
+ node234 *newroot;
+ newroot = mknew(node234);
+ newroot->kids[0] = left; newroot->counts[0] = countnode234(left);
+ newroot->elems[0] = sep;
+ newroot->kids[1] = right; newroot->counts[1] = countnode234(right);
+ newroot->elems[1] = NULL;
+ newroot->kids[2] = NULL; newroot->counts[2] = 0;
+ newroot->elems[2] = NULL;
+ newroot->kids[3] = NULL; newroot->counts[3] = 0;
+ newroot->parent = NULL;
+ if (left) left->parent = newroot;
+ if (right) right->parent = newroot;
+ *height = 1;
+ LOG((" join: same height, brand new root\n"));
+ return newroot;
+ }
+
+ /*
+ * This now works like the addition algorithm on the larger
+ * tree. We're replacing a single kid pointer with two kid
+ * pointers separated by an element; if that causes the node to
+ * overload, we split it in two, move a separator element up to
+ * the next node, and repeat.
+ */
+ if (relht < 0) {
+ /*
+ * Left tree is shorter. Search down the right tree to find
+ * the pointer we're inserting at.
+ */
+ node = root = right;
+ while (++relht < 0) {
+ node = node->kids[0];
+ }
+ ki = 0;
+ right = node->kids[ki];
+ } else {
+ /*
+ * Right tree is shorter; search down the left to find the
+ * pointer we're inserting at.
+ */
+ node = root = left;
+ while (--relht > 0) {
+ if (node->elems[2])
+ node = node->kids[3];
+ else if (node->elems[1])
+ node = node->kids[2];
+ else
+ node = node->kids[1];
+ }
+ if (node->elems[2])
+ ki = 3;
+ else if (node->elems[1])
+ ki = 2;
+ else
+ ki = 1;
+ left = node->kids[ki];
+ }
+
+ /*
+ * Now proceed as for addition.
+ */
+ *height = add234_insert(left, sep, right, &root, node, ki);
+
+ return root;
+}
+static int height234(tree234 *t) {
+ int level = 0;
+ node234 *n = t->root;
+ while (n) {
+ level++;
+ n = n->kids[0];
+ }
+ return level;
+}
+tree234 *join234(tree234 *t1, tree234 *t2) {
+ int size2 = countnode234(t2->root);
+ if (size2 > 0) {
+ void *element;
+ int relht;
+
+ if (t1->cmp) {
+ element = index234(t2, 0);
+ element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
+ if (element)
+ return NULL;
+ }
+
+ element = delpos234(t2, 0);
+ relht = height234(t1) - height234(t2);
+ t1->root = join234_internal(t1->root, element, t2->root, &relht);
+ t2->root = NULL;
+ }
+ return t1;
+}
+tree234 *join234r(tree234 *t1, tree234 *t2) {
+ int size1 = countnode234(t1->root);
+ if (size1 > 0) {
+ void *element;
+ int relht;
+
+ if (t2->cmp) {
+ element = index234(t1, size1-1);
+ element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
+ if (element)
+ return NULL;
+ }
+
+ element = delpos234(t1, size1-1);
+ relht = height234(t1) - height234(t2);
+ t2->root = join234_internal(t1->root, element, t2->root, &relht);
+ t1->root = NULL;
+ }
+ return t2;
+}
+
+/*
+ * Split out the first <index> elements in a tree and return a
+ * pointer to the root node. Leave the root node of the remainder
+ * in t.
+ */
+static node234 *split234_internal(tree234 *t, int index) {
+ node234 *halves[2], *n, *sib, *sub;
+ node234 *lparent, *rparent;
+ int ki, pki, i, half, lcount, rcount;
+
+ n = t->root;
+ LOG(("splitting tree %p at point %d\n", t, index));
+
+ /*
+ * Easy special cases. After this we have also dealt completely
+ * with the empty-tree case and we can assume the root exists.
+ */
+ if (index == 0) /* return nothing */
+ return NULL;
+ if (index == countnode234(t->root)) { /* return the whole tree */
+ node234 *ret = t->root;
+ t->root = NULL;
+ return ret;
+ }
+
+ /*
+ * Search down the tree to find the split point.
+ */
+ lparent = rparent = NULL;
+ while (n) {
+ LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3],
+ index));
+ lcount = index;
+ rcount = countnode234(n) - lcount;
+ if (index <= n->counts[0]) {
+ ki = 0;
+ } else if (index -= n->counts[0]+1, index <= n->counts[1]) {
+ ki = 1;
+ } else if (index -= n->counts[1]+1, index <= n->counts[2]) {
+ ki = 2;
+ } else {
+ index -= n->counts[2]+1;
+ ki = 3;
+ }
+
+ LOG((" splitting at subtree %d\n", ki));
+ sub = n->kids[ki];
+
+ LOG((" splitting at child index %d\n", ki));
+
+ /*
+ * Split the node, put halves[0] on the right of the left
+ * one and halves[1] on the left of the right one, put the
+ * new node pointers in halves[0] and halves[1], and go up
+ * a level.
+ */
+ sib = mknew(node234);
+ for (i = 0; i < 3; i++) {
+ if (i+ki < 3 && n->elems[i+ki]) {
+ sib->elems[i] = n->elems[i+ki];
+ sib->kids[i+1] = n->kids[i+ki+1];
+ if (sib->kids[i+1]) sib->kids[i+1]->parent = sib;
+ sib->counts[i+1] = n->counts[i+ki+1];
+ n->elems[i+ki] = NULL;
+ n->kids[i+ki+1] = NULL;
+ n->counts[i+ki+1] = 0;
+ } else {
+ sib->elems[i] = NULL;
+ sib->kids[i+1] = NULL;
+ sib->counts[i+1] = 0;
+ }
+ }
+ if (lparent) {
+ lparent->kids[pki] = n;
+ lparent->counts[pki] = lcount;
+ n->parent = lparent;
+ rparent->kids[0] = sib;
+ rparent->counts[0] = rcount;
+ sib->parent = rparent;
+ } else {
+ halves[0] = n;
+ n->parent = NULL;
+ halves[1] = sib;
+ sib->parent = NULL;
+ }
+ lparent = n;
+ rparent = sib;
+ pki = ki;
+ LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ sib,
+ sib->kids[0], sib->counts[0], sib->elems[0],
+ sib->kids[1], sib->counts[1], sib->elems[1],
+ sib->kids[2], sib->counts[2], sib->elems[2],
+ sib->kids[3], sib->counts[3]));
+
+ n = sub;
+ }
+
+ /*
+ * We've come off the bottom here, so we've successfully split
+ * the tree into two equally high subtrees. The only problem is
+ * that some of the nodes down the fault line will be smaller
+ * than the minimum permitted size. (Since this is a 2-3-4
+ * tree, that means they'll be zero-element one-child nodes.)
+ */
+ LOG((" fell off bottom, lroot is %p, rroot is %p\n",
+ halves[0], halves[1]));
+ lparent->counts[pki] = rparent->counts[0] = 0;
+ lparent->kids[pki] = rparent->kids[0] = NULL;
+
+ /*
+ * So now we go back down the tree from each of the two roots,
+ * fixing up undersize nodes.
+ */
+ for (half = 0; half < 2; half++) {
+ /*
+ * Remove the root if it's undersize (it will contain only
+ * one child pointer, so just throw it away and replace it
+ * with its child). This might happen several times.
+ */
+ while (halves[half] && !halves[half]->elems[0]) {
+ LOG((" root %p is undersize, throwing away\n", halves[half]));
+ halves[half] = halves[half]->kids[0];
+ sfree(halves[half]->parent);
+ halves[half]->parent = NULL;
+ LOG((" new root is %p\n", halves[half]));
+ }
+
+ n = halves[half];
+ while (n) {
+ void (*toward)(node234 *n, int ki, int *k, int *index);
+ int ni, merge;
+
+ /*
+ * Now we have a potentially undersize node on the
+ * right (if half==0) or left (if half==1). Sort it
+ * out, by merging with a neighbour or by transferring
+ * subtrees over. At this time we must also ensure that
+ * nodes are bigger than minimum, in case we need an
+ * element to merge two nodes below.
+ */
+ LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
+ n,
+ n->kids[0], n->counts[0], n->elems[0],
+ n->kids[1], n->counts[1], n->elems[1],
+ n->kids[2], n->counts[2], n->elems[2],
+ n->kids[3], n->counts[3]));
+ if (half == 1) {
+ ki = 0; /* the kid we're interested in */
+ ni = 1; /* the neighbour */
+ merge = 0; /* for merge: leftmost of the two */
+ toward = trans234_subtree_left;
+ } else {
+ ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1);
+ ni = ki-1;
+ merge = ni;
+ toward = trans234_subtree_right;
+ }
+
+ sub = n->kids[ki];
+ if (sub && !sub->elems[1]) {
+ /*
+ * This node is undersized or minimum-size. If we
+ * can merge it with its neighbour, we do so;
+ * otherwise we must be able to transfer subtrees
+ * over to it until it is greater than minimum
+ * size.
+ */
+ int undersized = (!sub->elems[0]);
+ LOG((" child %d is %ssize\n", ki,
+ undersized ? "under" : "minimum-"));
+ LOG((" neighbour is %s\n",
+ n->kids[ni]->elems[2] ? "large" :
+ n->kids[ni]->elems[1] ? "medium" : "small"));
+ if (!n->kids[ni]->elems[1] ||
+ (undersized && !n->kids[ni]->elems[2])) {
+ /*
+ * Neighbour is small, or possibly neighbour is
+ * medium and we are undersize.
+ */
+ trans234_subtree_merge(n, merge, NULL, NULL);
+ sub = n->kids[merge];
+ if (!n->elems[0]) {
+ /*
+ * n is empty, and hence must have been the
+ * root and needs to be removed.
+ */
+ assert(!n->parent);
+ LOG((" shifting root!\n"));
+ halves[half] = sub;
+ halves[half]->parent = NULL;
+ sfree(n);
+ }
+ } else {
+ /* Neighbour is big enough to move trees over. */
+ toward(n, ni, NULL, NULL);
+ if (undersized)
+ toward(n, ni, NULL, NULL);
+ }
+ }
+ n = sub;
+ }
+ }
+
+ t->root = halves[1];
+ return halves[0];
+}
+tree234 *splitpos234(tree234 *t, int index, int before) {
+ tree234 *ret;
+ node234 *n;
+ int count;
+
+ count = countnode234(t->root);
+ if (index < 0 || index > count)
+ return NULL; /* error */
+ ret = newtree234(t->cmp);
+ n = split234_internal(t, index);
+ if (before) {
+ /* We want to return the ones before the index. */
+ ret->root = n;
+ } else {
+ /*
+ * We want to keep the ones before the index and return the
+ * ones after.
+ */
+ ret->root = t->root;
+ t->root = n;
+ }
+ return ret;
+}
+tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) {
+ int before;
+ int index;
+
+ assert(rel != REL234_EQ);
+
+ if (rel == REL234_GT || rel == REL234_GE) {
+ before = 1;
+ rel = (rel == REL234_GT ? REL234_LE : REL234_LT);
+ } else {
+ before = 0;
+ }
+ if (!findrelpos234(t, e, cmp, rel, &index))
+ index = 0;
+
+ return splitpos234(t, index+1, before);
+}
+
+static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) {
+ int i;
+ node234 *n2 = mknew(node234);
+
+ for (i = 0; i < 3; i++) {
+ if (n->elems[i] && copyfn)
+ n2->elems[i] = copyfn(copyfnstate, n->elems[i]);
+ else
+ n2->elems[i] = n->elems[i];
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (n->kids[i]) {
+ n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate);
+ n2->kids[i]->parent = n2;
+ } else {
+ n2->kids[i] = NULL;
+ }
+ n2->counts[i] = n->counts[i];
+ }
+
+ return n2;
+}
+tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) {
+ tree234 *t2;
+
+ t2 = newtree234(t->cmp);
+ t2->root = copynode234(t->root, copyfn, copyfnstate);
+ t2->root->parent = NULL;
+
+ return t2;
+}
+
+#ifdef TEST
+
+/*
+ * Test code for the 2-3-4 tree. This code maintains an alternative
+ * representation of the data in the tree, in an array (using the
+ * obvious and slow insert and delete functions). After each tree
+ * operation, the verify() function is called, which ensures all
+ * the tree properties are preserved:
+ * - node->child->parent always equals node
+ * - tree->root->parent always equals NULL
+ * - number of kids == 0 or number of elements + 1;
+ * - tree has the same depth everywhere
+ * - every node has at least one element
+ * - subtree element counts are accurate
+ * - any NULL kid pointer is accompanied by a zero count
+ * - in a sorted tree: ordering property between elements of a
+ * node and elements of its children is preserved
+ * and also ensures the list represented by the tree is the same
+ * list it should be. (This last check also doubly verifies the
+ * ordering properties, because the `same list it should be' is by
+ * definition correctly ordered. It also ensures all nodes are
+ * distinct, because the enum functions would get caught in a loop
+ * if not.)
+ */
+
+#include <stdarg.h>
+
+#define srealloc realloc
+
+/*
+ * Error reporting function.
+ */
+void error(char *fmt, ...) {
+ va_list ap;
+ printf("ERROR: ");
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ va_end(ap);
+ printf("\n");
+}
+
+/* The array representation of the data. */
+void **array;
+int arraylen, arraysize;
+cmpfn234 cmp;
+
+/* The tree representation of the same data. */
+tree234 *tree;
+
+/*
+ * Routines to provide a diagnostic printout of a tree. Currently
+ * relies on every element in the tree being a one-character string
+ * :-)
+ */
+typedef struct {
+ char **levels;
+} dispctx;
+
+int dispnode(node234 *n, int level, dispctx *ctx) {
+ if (level == 0) {
+ int xpos = strlen(ctx->levels[0]);
+ int len;
+
+ if (n->elems[2])
+ len = sprintf(ctx->levels[0]+xpos, " %s%s%s",
+ n->elems[0], n->elems[1], n->elems[2]);
+ else if (n->elems[1])
+ len = sprintf(ctx->levels[0]+xpos, " %s%s",
+ n->elems[0], n->elems[1]);
+ else
+ len = sprintf(ctx->levels[0]+xpos, " %s",
+ n->elems[0]);
+ return xpos + 1 + (len-1) / 2;
+ } else {
+ int xpos[4], nkids;
+ int nodelen, mypos, myleft, x, i;
+
+ xpos[0] = dispnode(n->kids[0], level-3, ctx);
+ xpos[1] = dispnode(n->kids[1], level-3, ctx);
+ nkids = 2;
+ if (n->kids[2]) {
+ xpos[2] = dispnode(n->kids[2], level-3, ctx);
+ nkids = 3;
+ }
+ if (n->kids[3]) {
+ xpos[3] = dispnode(n->kids[3], level-3, ctx);
+ nkids = 4;
+ }
+
+ if (nkids == 4)
+ mypos = (xpos[1] + xpos[2]) / 2;
+ else if (nkids == 3)
+ mypos = xpos[1];
+ else
+ mypos = (xpos[0] + xpos[1]) / 2;
+ nodelen = nkids * 2 - 1;
+ myleft = mypos - ((nodelen-1)/2);
+ assert(myleft >= xpos[0]);
+ assert(myleft + nodelen-1 <= xpos[nkids-1]);
+
+ x = strlen(ctx->levels[level]);
+ while (x <= xpos[0] && x < myleft)
+ ctx->levels[level][x++] = ' ';
+ while (x < myleft)
+ ctx->levels[level][x++] = '_';
+ if (nkids==4)
+ x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.",
+ n->elems[0], n->elems[1], n->elems[2]);
+ else if (nkids==3)
+ x += sprintf(ctx->levels[level]+x, ".%s.%s.",
+ n->elems[0], n->elems[1]);
+ else
+ x += sprintf(ctx->levels[level]+x, ".%s.",
+ n->elems[0]);
+ while (x < xpos[nkids-1])
+ ctx->levels[level][x++] = '_';
+ ctx->levels[level][x] = '\0';
+
+ x = strlen(ctx->levels[level-1]);
+ for (i = 0; i < nkids; i++) {
+ int rpos, pos;
+ rpos = xpos[i];
+ if (i > 0 && i < nkids-1)
+ pos = myleft + 2*i;
+ else
+ pos = rpos;
+ if (rpos < pos)
+ rpos++;
+ while (x < pos && x < rpos)
+ ctx->levels[level-1][x++] = ' ';
+ if (x == pos)
+ ctx->levels[level-1][x++] = '|';
+ while (x < pos || x < rpos)
+ ctx->levels[level-1][x++] = '_';
+ if (x == pos)
+ ctx->levels[level-1][x++] = '|';
+ }
+ ctx->levels[level-1][x] = '\0';
+
+ x = strlen(ctx->levels[level-2]);
+ for (i = 0; i < nkids; i++) {
+ int rpos = xpos[i];
+
+ while (x < rpos)
+ ctx->levels[level-2][x++] = ' ';
+ ctx->levels[level-2][x++] = '|';
+ }
+ ctx->levels[level-2][x] = '\0';
+
+ return mypos;
+ }
+}
+
+void disptree(tree234 *t) {
+ dispctx ctx;
+ char *leveldata;
+ int width = count234(t);
+ int ht = height234(t) * 3 - 2;
+ int i;
+
+ if (!t->root) {
+ printf("[empty tree]\n");
+ }
+
+ leveldata = smalloc(ht * (width+2));
+ ctx.levels = smalloc(ht * sizeof(char *));
+ for (i = 0; i < ht; i++) {
+ ctx.levels[i] = leveldata + i * (width+2);
+ ctx.levels[i][0] = '\0';
+ }
+
+ (void) dispnode(t->root, ht-1, &ctx);
+
+ for (i = ht; i-- ;)
+ printf("%s\n", ctx.levels[i]);
+
+ sfree(ctx.levels);
+ sfree(leveldata);
+}
+
+typedef struct {
+ int treedepth;
+ int elemcount;
+} chkctx;
+
+int chknode(chkctx *ctx, int level, node234 *node,
+ void *lowbound, void *highbound) {
+ int nkids, nelems;
+ int i;
+ int count;
+
+ /* Count the non-NULL kids. */
+ for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
+ /* Ensure no kids beyond the first NULL are non-NULL. */
+ for (i = nkids; i < 4; i++)
+ if (node->kids[i]) {
+ error("node %p: nkids=%d but kids[%d] non-NULL",
+ node, nkids, i);
+ } else if (node->counts[i]) {
+ error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
+ node, i, i, node->counts[i]);
+ }
+
+ /* Count the non-NULL elements. */
+ for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
+ /* Ensure no elements beyond the first NULL are non-NULL. */
+ for (i = nelems; i < 3; i++)
+ if (node->elems[i]) {
+ error("node %p: nelems=%d but elems[%d] non-NULL",
+ node, nelems, i);
+ }
+
+ if (nkids == 0) {
+ /*
+ * If nkids==0, this is a leaf node; verify that the tree
+ * depth is the same everywhere.
+ */
+ if (ctx->treedepth < 0)
+ ctx->treedepth = level; /* we didn't know the depth yet */
+ else if (ctx->treedepth != level)
+ error("node %p: leaf at depth %d, previously seen depth %d",
+ node, level, ctx->treedepth);
+ } else {
+ /*
+ * If nkids != 0, then it should be nelems+1, unless nelems
+ * is 0 in which case nkids should also be 0 (and so we
+ * shouldn't be in this condition at all).
+ */
+ int shouldkids = (nelems ? nelems+1 : 0);
+ if (nkids != shouldkids) {
+ error("node %p: %d elems should mean %d kids but has %d",
+ node, nelems, shouldkids, nkids);
+ }
+ }
+
+ /*
+ * nelems should be at least 1.
+ */
+ if (nelems == 0) {
+ error("node %p: no elems", node, nkids);
+ }
+
+ /*
+ * Add nelems to the running element count of the whole tree.
+ */
+ ctx->elemcount += nelems;
+
+ /*
+ * Check ordering property: all elements should be strictly >
+ * lowbound, strictly < highbound, and strictly < each other in
+ * sequence. (lowbound and highbound are NULL at edges of tree
+ * - both NULL at root node - and NULL is considered to be <
+ * everything and > everything. IYSWIM.)
+ */
+ if (cmp) {
+ for (i = -1; i < nelems; i++) {
+ void *lower = (i == -1 ? lowbound : node->elems[i]);
+ void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
+ if (lower && higher && cmp(lower, higher) >= 0) {
+ error("node %p: kid comparison [%d=%s,%d=%s] failed",
+ node, i, lower, i+1, higher);
+ }
+ }
+ }
+
+ /*
+ * Check parent pointers: all non-NULL kids should have a
+ * parent pointer coming back to this node.
+ */
+ for (i = 0; i < nkids; i++)
+ if (node->kids[i]->parent != node) {
+ error("node %p kid %d: parent ptr is %p not %p",
+ node, i, node->kids[i]->parent, node);
+ }
+
+
+ /*
+ * Now (finally!) recurse into subtrees.
+ */
+ count = nelems;
+
+ for (i = 0; i < nkids; i++) {
+ void *lower = (i == 0 ? lowbound : node->elems[i-1]);
+ void *higher = (i >= nelems ? highbound : node->elems[i]);
+ int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
+ if (node->counts[i] != subcount) {
+ error("node %p kid %d: count says %d, subtree really has %d",
+ node, i, node->counts[i], subcount);
+ }
+ count += subcount;
+ }
+
+ return count;
+}
+
+void verifytree(tree234 *tree, void **array, int arraylen) {
+ chkctx ctx;
+ int i;
+ void *p;
+
+ ctx.treedepth = -1; /* depth unknown yet */
+ ctx.elemcount = 0; /* no elements seen yet */
+ /*
+ * Verify validity of tree properties.
+ */
+ if (tree->root) {
+ if (tree->root->parent != NULL)
+ error("root->parent is %p should be null", tree->root->parent);
+ chknode(&ctx, 0, tree->root, NULL, NULL);
+ }
+ printf("tree depth: %d\n", ctx.treedepth);
+ /*
+ * Enumerate the tree and ensure it matches up to the array.
+ */
+ for (i = 0; NULL != (p = index234(tree, i)); i++) {
+ if (i >= arraylen)
+ error("tree contains more than %d elements", arraylen);
+ if (array[i] != p)
+ error("enum at position %d: array says %s, tree says %s",
+ i, array[i], p);
+ }
+ if (ctx.elemcount != i) {
+ error("tree really contains %d elements, enum gave %d",
+ ctx.elemcount, i);
+ }
+ if (i < arraylen) {
+ error("enum gave only %d elements, array has %d", i, arraylen);
+ }
+ i = count234(tree);
+ if (ctx.elemcount != i) {
+ error("tree really contains %d elements, count234 gave %d",
+ ctx.elemcount, i);
+ }
+}
+void verify(void) { verifytree(tree, array, arraylen); }
+
+void internal_addtest(void *elem, int index, void *realret) {
+ int i, j;
+ void *retval;
+
+ if (arraysize < arraylen+1) {
+ arraysize = arraylen+1+256;
+ array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
+ srealloc(array, arraysize*sizeof(*array)));
+ }
+
+ i = index;
+ /* now i points to the first element >= elem */
+ retval = elem; /* expect elem returned (success) */
+ for (j = arraylen; j > i; j--)
+ array[j] = array[j-1];
+ array[i] = elem; /* add elem to array */
+ arraylen++;
+
+ if (realret != retval) {
+ error("add: retval was %p expected %p", realret, retval);
+ }
+
+ verify();
+}
+
+void addtest(void *elem) {
+ int i;
+ void *realret;
+
+ realret = add234(tree, elem);
+
+ i = 0;
+ while (i < arraylen && cmp(elem, array[i]) > 0)
+ i++;
+ if (i < arraylen && !cmp(elem, array[i])) {
+ void *retval = array[i]; /* expect that returned not elem */
+ if (realret != retval) {
+ error("add: retval was %p expected %p", realret, retval);
+ }
+ } else
+ internal_addtest(elem, i, realret);
+}
+
+void addpostest(void *elem, int i) {
+ void *realret;
+
+ realret = addpos234(tree, elem, i);
+
+ internal_addtest(elem, i, realret);
+}
+
+void delpostest(int i) {
+ int index = i;
+ void *elem = array[i], *ret;
+
+ /* i points to the right element */
+ while (i < arraylen-1) {
+ array[i] = array[i+1];
+ i++;
+ }
+ arraylen--; /* delete elem from array */
+
+ if (tree->cmp)
+ ret = del234(tree, elem);
+ else
+ ret = delpos234(tree, index);
+
+ if (ret != elem) {
+ error("del returned %p, expected %p", ret, elem);
+ }
+
+ verify();
+}
+
+void deltest(void *elem) {
+ int i;
+
+ i = 0;
+ while (i < arraylen && cmp(elem, array[i]) > 0)
+ i++;
+ if (i >= arraylen || cmp(elem, array[i]) != 0)
+ return; /* don't do it! */
+ delpostest(i);
+}
+
+/* A sample data set and test utility. Designed for pseudo-randomness,
+ * and yet repeatability. */
+
+/*
+ * This random number generator uses the `portable implementation'
+ * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
+ * change it if not.
+ */
+int randomnumber(unsigned *seed) {
+ *seed *= 1103515245;
+ *seed += 12345;
+ return ((*seed) / 65536) % 32768;
+}
+
+int mycmp(void *av, void *bv) {
+ char const *a = (char const *)av;
+ char const *b = (char const *)bv;
+ return strcmp(a, b);
+}
+
+#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
+
+char *strings[] = {
+ "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
+ "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
+ "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
+ "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
+ "m", "s", "l", "4",
+#if 0
+ "a", "ab", "absque", "coram", "de",
+ "palam", "clam", "cum", "ex", "e",
+ "sine", "tenus", "pro", "prae",
+ "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
+ "penguin", "blancmange", "pangolin", "whale", "hedgehog",
+ "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
+ "murfl", "spoo", "breen", "flarn", "octothorpe",
+ "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
+ "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
+ "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
+ "wand", "ring", "amulet"
+#endif
+};
+
+#define NSTR lenof(strings)
+
+void findtest(void) {
+ static const int rels[] = {
+ REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
+ };
+ static const char *const relnames[] = {
+ "EQ", "GE", "LE", "LT", "GT"
+ };
+ int i, j, rel, index;
+ char *p, *ret, *realret, *realret2;
+ int lo, hi, mid, c;
+
+ for (i = 0; i < (int)NSTR; i++) {
+ p = strings[i];
+ for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) {
+ rel = rels[j];
+
+ lo = 0; hi = arraylen-1;
+ while (lo <= hi) {
+ mid = (lo + hi) / 2;
+ c = strcmp(p, array[mid]);
+ if (c < 0)
+ hi = mid-1;
+ else if (c > 0)
+ lo = mid+1;
+ else
+ break;
+ }
+
+ if (c == 0) {
+ if (rel == REL234_LT)
+ ret = (mid > 0 ? array[--mid] : NULL);
+ else if (rel == REL234_GT)
+ ret = (mid < arraylen-1 ? array[++mid] : NULL);
+ else
+ ret = array[mid];
+ } else {
+ assert(lo == hi+1);
+ if (rel == REL234_LT || rel == REL234_LE) {
+ mid = hi;
+ ret = (hi >= 0 ? array[hi] : NULL);
+ } else if (rel == REL234_GT || rel == REL234_GE) {
+ mid = lo;
+ ret = (lo < arraylen ? array[lo] : NULL);
+ } else
+ ret = NULL;
+ }
+
+ realret = findrelpos234(tree, p, NULL, rel, &index);
+ if (realret != ret) {
+ error("find(\"%s\",%s) gave %s should be %s",
+ p, relnames[j], realret, ret);
+ }
+ if (realret && index != mid) {
+ error("find(\"%s\",%s) gave %d should be %d",
+ p, relnames[j], index, mid);
+ }
+ if (realret && rel == REL234_EQ) {
+ realret2 = index234(tree, index);
+ if (realret2 != realret) {
+ error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
+ p, relnames[j], realret, index, index, realret2);
+ }
+ }
+#if 0
+ printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
+ realret, index);
+#endif
+ }
+ }
+
+ realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
+ if (arraylen && (realret != array[0] || index != 0)) {
+ error("find(NULL,GT) gave %s(%d) should be %s(0)",
+ realret, index, array[0]);
+ } else if (!arraylen && (realret != NULL)) {
+ error("find(NULL,GT) gave %s(%d) should be NULL",
+ realret, index);
+ }
+
+ realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
+ if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
+ error("find(NULL,LT) gave %s(%d) should be %s(0)",
+ realret, index, array[arraylen-1]);
+ } else if (!arraylen && (realret != NULL)) {
+ error("find(NULL,LT) gave %s(%d) should be NULL",
+ realret, index);
+ }
+}
+
+void splittest(tree234 *tree, void **array, int arraylen) {
+ int i;
+ tree234 *tree3, *tree4;
+ for (i = 0; i <= arraylen; i++) {
+ tree3 = copytree234(tree, NULL, NULL);
+ tree4 = splitpos234(tree3, i, 0);
+ verifytree(tree3, array, i);
+ verifytree(tree4, array+i, arraylen-i);
+ join234(tree3, tree4);
+ freetree234(tree4); /* left empty by join */
+ verifytree(tree3, array, arraylen);
+ freetree234(tree3);
+ }
+}
+
+int main(void) {
+ int in[NSTR];
+ int i, j, k;
+ int tworoot, tmplen;
+ unsigned seed = 0;
+ tree234 *tree2, *tree3, *tree4;
+ int c;
+
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
+ for (i = 0; i < (int)NSTR; i++) in[i] = 0;
+ array = NULL;
+ arraylen = arraysize = 0;
+ tree = newtree234(mycmp);
+ cmp = mycmp;
+
+ verify();
+ for (i = 0; i < 10000; i++) {
+ j = randomnumber(&seed);
+ j %= NSTR;
+ printf("trial: %d\n", i);
+ if (in[j]) {
+ printf("deleting %s (%d)\n", strings[j], j);
+ deltest(strings[j]);
+ in[j] = 0;
+ } else {
+ printf("adding %s (%d)\n", strings[j], j);
+ addtest(strings[j]);
+ in[j] = 1;
+ }
+ disptree(tree);
+ findtest();
+ }
+
+ while (arraylen > 0) {
+ j = randomnumber(&seed);
+ j %= arraylen;
+ deltest(array[j]);
+ }
+
+ freetree234(tree);
+
+ /*
+ * Now try an unsorted tree. We don't really need to test
+ * delpos234 because we know del234 is based on it, so it's
+ * already been tested in the above sorted-tree code; but for
+ * completeness we'll use it to tear down our unsorted tree
+ * once we've built it.
+ */
+ tree = newtree234(NULL);
+ cmp = NULL;
+ verify();
+ for (i = 0; i < 1000; i++) {
+ printf("trial: %d\n", i);
+ j = randomnumber(&seed);
+ j %= NSTR;
+ k = randomnumber(&seed);
+ k %= count234(tree)+1;
+ printf("adding string %s at index %d\n", strings[j], k);
+ addpostest(strings[j], k);
+ }
+
+ /*
+ * While we have this tree in its full form, we'll take a copy
+ * of it to use in split and join testing.
+ */
+ tree2 = copytree234(tree, NULL, NULL);
+ verifytree(tree2, array, arraylen);/* check the copy is accurate */
+ /*
+ * Split tests. Split the tree at every possible point and
+ * check the resulting subtrees.
+ */
+ tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */
+ splittest(tree2, array, arraylen);
+ /*
+ * Now do the split test again, but on a tree that has a 2-root
+ * (if the previous one didn't) or doesn't (if the previous one
+ * did).
+ */
+ tmplen = arraylen;
+ while ((!tree2->root->elems[1]) == tworoot) {
+ delpos234(tree2, --tmplen);
+ }
+ printf("now trying splits on second tree\n");
+ splittest(tree2, array, tmplen);
+ freetree234(tree2);
+
+ /*
+ * Back to the main testing of uncounted trees.
+ */
+ while (count234(tree) > 0) {
+ printf("cleanup: tree size %d\n", count234(tree));
+ j = randomnumber(&seed);
+ j %= count234(tree);
+ printf("deleting string %s from index %d\n", (char *)array[j], j);
+ delpostest(j);
+ }
+ freetree234(tree);
+
+ /*
+ * Finally, do some testing on split/join on _sorted_ trees. At
+ * the same time, we'll be testing split on very small trees.
+ */
+ tree = newtree234(mycmp);
+ cmp = mycmp;
+ arraylen = 0;
+ for (i = 0; i < 16; i++) {
+ addtest(strings[i]);
+ tree2 = copytree234(tree, NULL, NULL);
+ splittest(tree2, array, arraylen);
+ freetree234(tree2);
+ }
+ freetree234(tree);
+
+ /*
+ * Test silly cases of join: join(emptytree, emptytree), and
+ * also ensure join correctly spots when sorted trees fail the
+ * ordering constraint.
+ */
+ tree = newtree234(mycmp);
+ tree2 = newtree234(mycmp);
+ tree3 = newtree234(mycmp);
+ tree4 = newtree234(mycmp);
+ assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */
+ add234(tree2, strings[1]);
+ add234(tree4, strings[0]);
+ array[0] = strings[0];
+ array[1] = strings[1];
+ verifytree(tree, array, 0);
+ verifytree(tree2, array+1, 1);
+ verifytree(tree3, array, 0);
+ verifytree(tree4, array, 1);
+
+ /*
+ * So:
+ * - join(tree,tree3) should leave both tree and tree3 unchanged.
+ * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
+ * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
+ * - join(tree, tree2) should move the element from tree2 to tree.
+ * - joinr(tree4, tree3) should move the element from tree4 to tree3.
+ * - join(tree,tree3) should return NULL and leave both unchanged.
+ * - join(tree3,tree) should work and create a bigger tree in tree3.
+ */
+ assert(tree == join234(tree, tree3));
+ verifytree(tree, array, 0);
+ verifytree(tree3, array, 0);
+ assert(tree2 == join234r(tree, tree2));
+ verifytree(tree, array, 0);
+ verifytree(tree2, array+1, 1);
+ assert(tree4 == join234(tree4, tree3));
+ verifytree(tree3, array, 0);
+ verifytree(tree4, array, 1);
+ assert(tree == join234(tree, tree2));
+ verifytree(tree, array+1, 1);
+ verifytree(tree2, array, 0);
+ assert(tree3 == join234r(tree4, tree3));
+ verifytree(tree3, array, 1);
+ verifytree(tree4, array, 0);
+ assert(NULL == join234(tree, tree3));
+ verifytree(tree, array+1, 1);
+ verifytree(tree3, array, 1);
+ assert(tree3 == join234(tree3, tree));
+ verifytree(tree3, array, 2);
+ verifytree(tree, array, 0);
+
+ return 0;
+}
+
+#endif
+
+#if 0 /* sorted list of strings might be useful */
+{
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",
+}
+#endif
--- /dev/null
+/*
+ * tree234.h: header defining functions in tree234.c.
+ *
+ * This file is copyright 1999-2001 Simon Tatham.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef TREE234_H
+#define TREE234_H
+
+/*
+ * This typedef is opaque outside tree234.c itself.
+ */
+typedef struct tree234_Tag tree234;
+
+typedef int (*cmpfn234)(void *, void *);
+
+typedef void *(*copyfn234)(void *state, void *element);
+
+/*
+ * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and
+ * lookups by key will fail: you can only look things up by numeric
+ * index, and you have to use addpos234() and delpos234().
+ */
+tree234 *newtree234(cmpfn234 cmp);
+
+/*
+ * Free a 2-3-4 tree (not including freeing the elements).
+ */
+void freetree234(tree234 *t);
+
+/*
+ * Add an element e to a sorted 2-3-4 tree t. Returns e on success,
+ * or if an existing element compares equal, returns that.
+ */
+void *add234(tree234 *t, void *e);
+
+/*
+ * Add an element e to an unsorted 2-3-4 tree t. Returns e on
+ * success, NULL on failure. (Failure should only occur if the
+ * index is out of range or the tree is sorted.)
+ *
+ * Index range can be from 0 to the tree's current element count,
+ * inclusive.
+ */
+void *addpos234(tree234 *t, void *e, int index);
+
+/*
+ * Look up the element at a given numeric index in a 2-3-4 tree.
+ * Returns NULL if the index is out of range.
+ *
+ * One obvious use for this function is in iterating over the whole
+ * of a tree (sorted or unsorted):
+ *
+ * for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p);
+ *
+ * or
+ *
+ * int maxcount = count234(tree);
+ * for (i = 0; i < maxcount; i++) {
+ * p = index234(tree, i);
+ * assert(p != NULL);
+ * consume(p);
+ * }
+ */
+void *index234(tree234 *t, int index);
+
+/*
+ * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
+ * found. e is always passed as the first argument to cmp, so cmp
+ * can be an asymmetric function if desired. cmp can also be passed
+ * as NULL, in which case the compare function from the tree proper
+ * will be used.
+ *
+ * Three of these functions are special cases of findrelpos234. The
+ * non-`pos' variants lack the `index' parameter: if the parameter
+ * is present and non-NULL, it must point to an integer variable
+ * which will be filled with the numeric index of the returned
+ * element.
+ *
+ * The non-`rel' variants lack the `relation' parameter. This
+ * parameter allows you to specify what relation the element you
+ * provide has to the element you're looking for. This parameter
+ * can be:
+ *
+ * REL234_EQ - find only an element that compares equal to e
+ * REL234_LT - find the greatest element that compares < e
+ * REL234_LE - find the greatest element that compares <= e
+ * REL234_GT - find the smallest element that compares > e
+ * REL234_GE - find the smallest element that compares >= e
+ *
+ * Non-`rel' variants assume REL234_EQ.
+ *
+ * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be
+ * NULL. In this case, REL234_GT will return the smallest element
+ * in the tree, and REL234_LT will return the greatest. This gives
+ * an alternative means of iterating over a sorted tree, instead of
+ * using index234:
+ *
+ * // to loop forwards
+ * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;)
+ * consume(p);
+ *
+ * // to loop backwards
+ * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;)
+ * consume(p);
+ */
+enum {
+ REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE
+};
+void *find234(tree234 *t, void *e, cmpfn234 cmp);
+void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation);
+void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index);
+void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, int relation,
+ int *index);
+
+/*
+ * Delete an element e in a 2-3-4 tree. Does not free the element,
+ * merely removes all links to it from the tree nodes.
+ *
+ * delpos234 deletes the element at a particular tree index: it
+ * works on both sorted and unsorted trees.
+ *
+ * del234 deletes the element passed to it, so it only works on
+ * sorted trees. (It's equivalent to using findpos234 to determine
+ * the index of an element, and then passing that index to
+ * delpos234.)
+ *
+ * Both functions return a pointer to the element they delete, for
+ * the user to free or pass on elsewhere or whatever. If the index
+ * is out of range (delpos234) or the element is already not in the
+ * tree (del234) then they return NULL.
+ */
+void *del234(tree234 *t, void *e);
+void *delpos234(tree234 *t, int index);
+
+/*
+ * Return the total element count of a tree234.
+ */
+int count234(tree234 *t);
+
+/*
+ * Split a tree234 into two valid tree234s.
+ *
+ * splitpos234 splits at a given index. If `before' is TRUE, the
+ * items at and after that index are left in t and the ones before
+ * are returned; if `before' is FALSE, the items before that index
+ * are left in t and the rest are returned.
+ *
+ * split234 splits at a given key. You can pass any of the
+ * relations used with findrel234, except for REL234_EQ. The items
+ * in the tree that satisfy the relation are returned; the
+ * remainder are left.
+ */
+tree234 *splitpos234(tree234 *t, int index, int before);
+tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel);
+
+/*
+ * Join two tree234s together into a single one.
+ *
+ * All the elements in t1 are placed to the left of all the
+ * elements in t2. If the trees are sorted, there will be a test to
+ * ensure that this satisfies the ordering criterion, and NULL will
+ * be returned otherwise. If the trees are unsorted, there is no
+ * restriction on the use of join234.
+ *
+ * The tree returned is t1 (join234) or t2 (join234r), if the
+ * operation is successful.
+ */
+tree234 *join234(tree234 *t1, tree234 *t2);
+tree234 *join234r(tree234 *t1, tree234 *t2);
+
+/*
+ * Make a complete copy of a tree234. Element pointers will be
+ * reused unless copyfn is non-NULL, in which case it will be used
+ * to copy each element. (copyfn takes two `void *' parameters; the
+ * first is private state and the second is the element. A simple
+ * copy routine probably won't need private state.)
+ */
+tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate);
+
+#endif /* TREE234_H */
--- /dev/null
+/*
+ * ustring.c: Unicode string routines
+ */
+
+#include <wchar.h>
+#include <time.h>
+#include "halibut.h"
+
+wchar_t *ustrdup(wchar_t *s) {
+ wchar_t *r;
+ if (s) {
+ r = mknewa(wchar_t, 1+ustrlen(s));
+ ustrcpy(r, s);
+ } else {
+ r = mknew(wchar_t);
+ *r = 0;
+ }
+ return r;
+}
+
+char *ustrtoa(wchar_t *s, char *outbuf, int size) {
+ char *p;
+ if (!s) {
+ *outbuf = '\0';
+ return outbuf;
+ }
+ for (p = outbuf; *s && p < outbuf+size; p++,s++)
+ *p = *s;
+ if (p < outbuf+size)
+ *p = '\0';
+ else
+ outbuf[size-1] = '\0';
+ return outbuf;
+}
+
+int ustrlen(wchar_t *s) {
+ int len = 0;
+ while (*s++) len++;
+ return len;
+}
+
+wchar_t *uadv(wchar_t *s) {
+ return s + 1 + ustrlen(s);
+}
+
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) {
+ wchar_t *ret = dest;
+ do {
+ *dest++ = *source;
+ } while (*source++);
+ return ret;
+}
+
+int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
+ if (!lhs && !rhs) return 0;
+ if (!lhs) return -1;
+ if (!rhs) return +1;
+ while (*lhs && *rhs && *lhs==*rhs)
+ lhs++, rhs++;
+ if (*lhs < *rhs)
+ return -1;
+ else if (*lhs > *rhs)
+ return 1;
+ return 0;
+}
+
+wchar_t utolower(wchar_t c) {
+ if (c == L'\0')
+ return c; /* this property needed by ustricmp */
+ /* FIXME: this doesn't even come close */
+ if (c >= 'A' && c <= 'Z')
+ c += 'a'-'A';
+ return c;
+}
+
+int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+ wchar_t lc, rc;
+ while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+ lhs++, rhs++;
+ if (!lc && !rc)
+ return 0;
+ if (lc < rc)
+ return -1;
+ else
+ return 1;
+}
+
+wchar_t *ustrlow(wchar_t *s) {
+ wchar_t *p = s;
+ while (*p) {
+ *p = utolower(*p);
+ p++;
+ }
+ return s;
+}
+
+int utoi(wchar_t *s) {
+ int sign = +1;
+ int n;
+
+ if (*s == L'-') {
+ s++;
+ sign = -1;
+ }
+
+ n = 0;
+ while (*s && *s >= L'0' && *s <= L'9') {
+ n *= 10;
+ n += (*s - '0');
+ s++;
+ }
+
+ return n;
+}
+
+int utob(wchar_t *s) {
+ if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
+ !ustricmp(s, L"true") || !ustricmp(s, L"t"))
+ return TRUE;
+ return FALSE;
+}
+
+int uisdigit(wchar_t c) {
+ return c >= L'0' && c <= L'9';
+}
+
+#define USTRFTIME_DELTA 128
+wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
+ void *blk = NULL;
+ wchar_t *wblk, *wp;
+ char *fmt, *text, *p;
+ size_t size = 0;
+ size_t len;
+
+ /*
+ * strftime has the entertaining property that it returns 0
+ * _either_ on out-of-space _or_ on successful generation of
+ * the empty string. Hence we must ensure our format can never
+ * generate the empty string. Somebody throw a custard pie at
+ * whoever was responsible for that. Please?
+ */
+ if (wfmt) {
+ len = ustrlen(wfmt);
+ fmt = mknewa(char, 2+len);
+ ustrtoa(wfmt, fmt+1, len+1);
+ fmt[0] = ' ';
+ } else
+ fmt = " %c";
+
+ while (1) {
+ size += USTRFTIME_DELTA;
+ blk = resize((char *)blk, size);
+ len = strftime((char *)blk, size-1, fmt, timespec);
+ if (len > 0)
+ break;
+ }
+
+ /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
+ wblk = resize((wchar_t *)blk, len);
+ text = mknewa(char, len);
+ strftime(text, len, fmt+1, timespec);
+ /*
+ * We operate in the C locale, so this all ought to be kosher
+ * ASCII. If we ever move outside ASCII machines, we may need
+ * to make this more portable...
+ */
+ for (wp = wblk, p = text; *p; p++, wp++)
+ *wp = *p;
+ *wp = 0;
+ if (wfmt)
+ sfree(fmt);
+ sfree(text);
+ return wblk;
+}
--- /dev/null
+/*
+ * version.c: version string
+ */
+
+#include <stdio.h>
+
+#ifndef VERSION
+#define VER "anonymous build (" __DATE__ " " __TIME__ ")"
+#else
+#define VER "version " VERSION
+#endif
+
+const char *const version = VER;
--- /dev/null
+/*
+ * winhelp.c a module to generate Windows .HLP files
+ *
+ * Documentation of the .HLP file format comes from the excellent
+ * HELPFILE.TXT, published alongside the Help decompiler HELPDECO
+ * by Manfred Winterhoff. This code would not have been possible
+ * without his efforts. Many thanks.
+ */
+
+/*
+ * Potential future features:
+ *
+ * - perhaps LZ77 compression? This appears to cause a phase order
+ * problem: it's hard to do the compression until the data to be
+ * compressed is finalised, and yet you can't finalise the data
+ * to be compressed until you know how much of it is going into
+ * which TOPICBLOCK in order to work out the offsets in the
+ * topic headers - for which you have to have already done the
+ * compression. Perhaps the thing to do is to implement an LZ77
+ * compressor that can guarantee to leave particular bytes in
+ * the stream as literals, and then go back and fix the offsets
+ * up later. Not pleasant.
+ *
+ * - It would be good to find out what relation (if any) the LCID
+ * record in the |SYSTEM section bears to the codepage used in
+ * the actual help text, so as to be able to vary that if the
+ * user needs it. For the moment I suspect we're stuck with
+ * Win1252.
+ *
+ * - tables might be nice.
+ *
+ * Unlikely future features:
+ *
+ * - Phrase compression sounds harder. It's reasonably easy
+ * (though space-costly) to analyse all the text in the file to
+ * determine the one key phrase which would save most space if
+ * replaced by a reference everywhere it appears; but finding
+ * the _1024_ most effective phrases seems much harder since a
+ * naive analysis might find lots of phrases that all overlap
+ * (so you wouldn't get the saving you expected, as after taking
+ * out the first phrase the rest would never crop up). In
+ * addition, MS hold US patent number 4955066 which may cover
+ * phrase compression, so perhaps it's best just to leave it.
+ *
+ * Cleanup work:
+ *
+ * - sort out begin_topic. Ideally we should have a separate
+ * topic_macro function that adds to the existing linkdata for
+ * the topic, because that's more flexible than a variadic
+ * function. This will be fiddly, though: if it's called before
+ * whlp_begin_topic then we must buffer macros, and if it's
+ * called afterwards then we must be able to go back and modify
+ * the linkdata2 of the topic start block. Foo.
+ *
+ * - find out what should happen if a single topiclink crosses
+ * _two_ topicblock boundaries.
+ *
+ * - What is the BlockSize in a topic header (first 4 bytes of
+ * LinkData1 in a type 2 record) supposed to mean? How on earth
+ * is it measured? The help file doesn't become perceptibly
+ * corrupt if I frob it randomly; and on some occasions taking a
+ * bit _out_ of the help file _increases_ that value. I have a
+ * feeling it's completely made up and/or vestigial, so for the
+ * moment I'm just making up a plausible value as I go along.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <stdarg.h>
+
+#include "halibut.h"
+#include "winhelp.h"
+#include "tree234.h"
+
+#ifdef TESTMODE
+/*
+ * This lot is useful for testing. Something like it will also be
+ * needed to use this module standalone.
+ */
+#define smalloc malloc
+#define srealloc realloc
+#define sfree free
+#define mknew(type) ( (type *) smalloc (sizeof (type)) )
+#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
+#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
+#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
+char *dupstr(char *s) {
+ char *r = mknewa(char, 1+strlen(s)); strcpy(r,s); return r;
+}
+#endif
+
+#define UNUSEDARG(x) ( (x) = (x) )
+
+#define GET_32BIT_LSB_FIRST(cp) \
+ (((unsigned long)(unsigned char)(cp)[0]) | \
+ ((unsigned long)(unsigned char)(cp)[1] << 8) | \
+ ((unsigned long)(unsigned char)(cp)[2] << 16) | \
+ ((unsigned long)(unsigned char)(cp)[3] << 24))
+
+#define PUT_32BIT_LSB_FIRST(cp, value) do { \
+ (cp)[0] = 0xFF & (value); \
+ (cp)[1] = 0xFF & ((value) >> 8); \
+ (cp)[2] = 0xFF & ((value) >> 16); \
+ (cp)[3] = 0xFF & ((value) >> 24); } while (0)
+
+#define GET_16BIT_LSB_FIRST(cp) \
+ (((unsigned long)(unsigned char)(cp)[0]) | \
+ ((unsigned long)(unsigned char)(cp)[1] << 8))
+
+#define PUT_16BIT_LSB_FIRST(cp, value) do { \
+ (cp)[0] = 0xFF & (value); \
+ (cp)[1] = 0xFF & ((value) >> 8); } while (0)
+
+#define MAX_PAGE_SIZE 0x800 /* max page size in any B-tree */
+#define TOPIC_BLKSIZE 4096 /* implied by version/flags combo */
+
+typedef struct WHLP_TOPIC_tag context;
+
+struct file {
+ char *name; /* file name, will need freeing */
+ unsigned char *data; /* file data, will need freeing */
+ int pos; /* position for adding data */
+ int len; /* # of meaningful bytes in data */
+ int size; /* # of allocated bytes in data */
+ int fileoffset; /* offset in the real .HLP file */
+};
+
+struct indexrec {
+ char *term; /* index term, will need freeing */
+ context *topic; /* topic it links to */
+ int count, offset; /* used when building |KWDATA */
+};
+
+struct topiclink {
+ int topicoffset, topicpos; /* for referencing from elsewhere */
+ int recordtype;
+ int len1, len2;
+ unsigned char *data1, *data2;
+ context *context;
+ struct topiclink *nonscroll, *scroll, *nexttopic;
+ int block_size; /* for the topic header - *boggle* */
+};
+
+struct WHLP_TOPIC_tag {
+ char *name; /* needs freeing */
+ unsigned long hash;
+ struct topiclink *link; /* this provides TOPICOFFSET */
+ context *browse_next, *browse_prev;
+ char *title; /* needs freeing */
+ int index; /* arbitrary number */
+};
+
+struct fontdesc {
+ char *font;
+ int family, rendition, halfpoints;
+ int r, g, b;
+};
+
+struct WHLP_tag {
+ tree234 *files; /* stores `struct file' */
+ tree234 *pre_contexts; /* stores `context' */
+ tree234 *contexts; /* also stores `context' */
+ tree234 *titles; /* _also_ stores `context' */
+ tree234 *text; /* stores `struct topiclink' */
+ tree234 *index; /* stores `struct indexrec' */
+ tree234 *tabstops; /* stores `int' */
+ tree234 *fontnames; /* stores `char *' */
+ tree234 *fontdescs; /* stores `struct fontdesc' */
+ struct file *systemfile; /* the |SYSTEM internal file */
+ context *ptopic; /* primary topic */
+ struct topiclink *prevtopic; /* to link type-2 records together */
+ struct topiclink *link; /* while building a topiclink */
+ unsigned char linkdata1[TOPIC_BLKSIZE]; /* while building a topiclink */
+ unsigned char linkdata2[TOPIC_BLKSIZE]; /* while building a topiclink */
+ int topicblock_remaining; /* while building |TOPIC section */
+ int lasttopiclink; /* while building |TOPIC section */
+ int firsttopiclink_offset; /* while building |TOPIC section */
+ int lasttopicstart; /* while building |TOPIC section */
+ int para_flags;
+ int para_attrs[7];
+ int ncontexts;
+};
+
+/* Functions to return the index and leaf data for B-tree contents. */
+typedef int (*bt_index_fn)(const void *item, unsigned char *outbuf);
+typedef int (*bt_leaf_fn)(const void *item, unsigned char *outbuf);
+
+/* Forward references. */
+static void whlp_para_reset(WHLP h);
+static struct file *whlp_new_file(WHLP h, char *name);
+static void whlp_file_add(struct file *f, const void *data, int len);
+static void whlp_file_add_char(struct file *f, int data);
+static void whlp_file_add_short(struct file *f, int data);
+static void whlp_file_add_long(struct file *f, int data);
+static void whlp_file_fill(struct file *f, int len);
+static void whlp_file_seek(struct file *f, int pos, int whence);
+static int whlp_file_offset(struct file *f);
+
+/* ----------------------------------------------------------------------
+ * Fiddly little functions: B-tree compare, index and leaf functions.
+ */
+
+/* The master index maps file names to help-file offsets. */
+
+static int filecmp(void *av, void *bv)
+{
+ const struct file *a = (const struct file *)av;
+ const struct file *b = (const struct file *)bv;
+ return strcmp(a->name, b->name);
+}
+
+static int fileindex(const void *av, unsigned char *outbuf)
+{
+ const struct file *a = (const struct file *)av;
+ int len = 1+strlen(a->name);
+ memcpy(outbuf, a->name, len);
+ return len;
+}
+
+static int fileleaf(const void *av, unsigned char *outbuf)
+{
+ const struct file *a = (const struct file *)av;
+ int len = 1+strlen(a->name);
+ memcpy(outbuf, a->name, len);
+ PUT_32BIT_LSB_FIRST(outbuf+len, a->fileoffset);
+ return len+4;
+}
+
+/* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */
+
+static int ctxcmp(void *av, void *bv)
+{
+ const context *a = (const context *)av;
+ const context *b = (const context *)bv;
+ if ((signed long)a->hash < (signed long)b->hash)
+ return -1;
+ if ((signed long)a->hash > (signed long)b->hash)
+ return +1;
+ return 0;
+}
+
+static int ctxindex(const void *av, unsigned char *outbuf)
+{
+ const context *a = (const context *)av;
+ PUT_32BIT_LSB_FIRST(outbuf, a->hash);
+ return 4;
+}
+
+static int ctxleaf(const void *av, unsigned char *outbuf)
+{
+ const context *a = (const context *)av;
+ PUT_32BIT_LSB_FIRST(outbuf, a->hash);
+ PUT_32BIT_LSB_FIRST(outbuf+4, a->link->topicoffset);
+ return 8;
+}
+
+/* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */
+
+static int ttlcmp(void *av, void *bv)
+{
+ const context *a = (const context *)av;
+ const context *b = (const context *)bv;
+ if (a->link->topicoffset < b->link->topicoffset)
+ return -1;
+ if (a->link->topicoffset > b->link->topicoffset)
+ return +1;
+ return 0;
+}
+
+static int ttlindex(const void *av, unsigned char *outbuf)
+{
+ const context *a = (const context *)av;
+ PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset);
+ return 4;
+}
+
+static int ttlleaf(const void *av, unsigned char *outbuf)
+{
+ const context *a = (const context *)av;
+ int slen;
+ PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset);
+ slen = 1+strlen(a->title);
+ memcpy(outbuf+4, a->title, slen);
+ return 4+slen;
+}
+
+/* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */
+
+static int idxcmp(void *av, void *bv)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ const struct indexrec *b = (const struct indexrec *)bv;
+ int cmp;
+ if ( (cmp = strcmp(a->term, b->term)) != 0)
+ return cmp;
+ /* Now sort on the index field of the topics. */
+ if (a->topic->index < b->topic->index)
+ return -1;
+ if (a->topic->index > b->topic->index)
+ return +1;
+ return 0;
+}
+
+static int idxindex(const void *av, unsigned char *outbuf)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ int len = 1+strlen(a->term);
+ memcpy(outbuf, a->term, len);
+ return len;
+}
+
+static int idxleaf(const void *av, unsigned char *outbuf)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ int len = 1+strlen(a->term);
+ memcpy(outbuf, a->term, len);
+ PUT_16BIT_LSB_FIRST(outbuf+len, a->count);
+ PUT_32BIT_LSB_FIRST(outbuf+len+2, a->offset);
+ return len+6;
+}
+
+/*
+ * The internal `tabstops' B-tree stores pointers-to-int. Sorting
+ * is by the low 16 bits of the number (above that is flags).
+ */
+
+static int tabcmp(void *av, void *bv)
+{
+ const int *a = (const int *)av;
+ const int *b = (const int *)bv;
+ if ((*a & 0xFFFF) < (*b & 0xFFFF))
+ return -1;
+ if ((*a & 0xFFFF) > (*b & 0xFFFF))
+ return +1;
+ return 0;
+}
+
+/* The internal `fontnames' B-tree stores strings. */
+static int fontcmp(void *av, void *bv)
+{
+ const char *a = (const char *)av;
+ const char *b = (const char *)bv;
+ return strcmp(a,b);
+}
+
+/* ----------------------------------------------------------------------
+ * Manage help contexts and topics.
+ */
+
+/*
+ * This is the code to compute the hash of a context name. Copied
+ * straight from Winterhoff's documentation.
+ */
+static unsigned long context_hash(char *context)
+{
+ signed char bytemapping[256] =
+ "\x00\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
+ "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
+ "\xF0\x0B\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\x0C\xFF"
+ "\x0A\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x0B\x0C\x0D\x0E\x0D"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
+ "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
+ "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
+ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
+ "\x80\x81\x82\x83\x0B\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
+ "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
+ "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
+ "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF";
+ unsigned long hash;
+
+ /* Sanity check the size of unsigned long */
+ enum { assertion = 1 /
+ (((unsigned long)0xFFFFFFFF) + 2 == (unsigned long)1) };
+
+ /*
+ * The hash algorithm starts the hash at 0 and updates it with
+ * each character. Therefore, logically, the hash of an empty
+ * string should be 0 (it starts at 0 and is never updated);
+ * but Winterhoff says it is in fact 1. Shouldn't matter, since
+ * I never plan to use empty context names, but I'll stick the
+ * special case in here anyway.
+ */
+ if (!*context)
+ return 1;
+
+ /*
+ * Now compute the hash in the normal way.
+ */
+ hash = 0;
+ while (*context) {
+ hash = hash * 43 + bytemapping[(unsigned char)*context];
+ context++;
+ }
+ return hash;
+}
+
+WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash)
+{
+ context *ctx = mknew(context);
+ context *otherctx;
+
+ /*
+ * Index contexts in order of creation, just so there's some
+ * sort of non-arbitrary ordering in the index B-tree. Call me
+ * fussy, but I don't like indexing on pointer values because I
+ * prefer the code to be deterministic when run under different
+ * C libraries.
+ */
+ ctx->index = h->ncontexts++;
+ ctx->browse_prev = ctx->browse_next = NULL;
+
+ if (context_name) {
+ /*
+ * We have a context name, which means we can put this
+ * context straight into the `contexts' tree.
+ */
+ ctx->name = dupstr(context_name);
+ ctx->hash = context_hash(context_name);
+ otherctx = add234(h->contexts, ctx);
+ if (otherctx != ctx) {
+ /*
+ * Hash clash. Destroy the new context and return NULL,
+ * providing the clashing string.
+ */
+ sfree(ctx->name);
+ sfree(ctx);
+ if (clash) *clash = otherctx->name;
+ return NULL;
+ }
+ } else {
+ /*
+ * We have no context name yet. Enter this into the
+ * pre_contexts tree of anonymous topics, which we will go
+ * through later and allocate unique context names and hash
+ * values.
+ */
+ ctx->name = NULL;
+ addpos234(h->pre_contexts, ctx, count234(h->pre_contexts));
+ }
+ return ctx;
+}
+
+void whlp_prepare(WHLP h)
+{
+ /*
+ * We must go through pre_contexts and allocate a context ID to
+ * each anonymous context, making sure it doesn't clash with
+ * the existing contexts.
+ *
+ * Our own context IDs will just be of the form `t00000001',
+ * and we'll increment the number each time and skip over any
+ * IDs that clash with existing context names.
+ */
+ int ctx_num = 0;
+ context *ctx, *otherctx;
+
+ while ( (ctx = index234(h->pre_contexts, 0)) != NULL ) {
+ delpos234(h->pre_contexts, 0);
+ ctx->name = mknewa(char, 20);
+ do {
+ sprintf(ctx->name, "t%08d", ctx_num++);
+ ctx->hash = context_hash(ctx->name);
+ otherctx = add234(h->contexts, ctx);
+ } while (otherctx != ctx);
+ }
+
+ /*
+ * Ensure paragraph attributes are clear for the start of text
+ * output.
+ */
+ whlp_para_reset(h);
+}
+
+char *whlp_topic_id(WHLP_TOPIC topic)
+{
+ return topic->name;
+}
+
+void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...)
+{
+ struct topiclink *link = mknew(struct topiclink);
+ int len, slen;
+ char *macro;
+ va_list ap;
+
+ link->nexttopic = NULL;
+ if (h->prevtopic)
+ h->prevtopic->nexttopic = link;
+ h->prevtopic = link;
+
+ link->nonscroll = link->scroll = NULL;
+ link->context = topic;
+ link->block_size = 0;
+
+ link->recordtype = 2; /* topic header */
+ link->len1 = 4*7; /* standard linkdata1 size */
+ link->data1 = mknewa(unsigned char, link->len1);
+
+ slen = strlen(title);
+ assert(slen+1 <= TOPIC_BLKSIZE);
+ memcpy(h->linkdata2, title, slen+1);
+ len = slen+1;
+
+ va_start(ap, title);
+ while ( (macro = va_arg(ap, char *)) != NULL) {
+ slen = strlen(macro);
+ assert(len+slen+1 <= TOPIC_BLKSIZE);
+ memcpy(h->linkdata2+len, macro, slen+1);
+ len += slen+1;
+ }
+ va_end(ap);
+ len--; /* lose the last \0 on the last macro */
+
+ link->len2 = len;
+ link->data2 = mknewa(unsigned char, link->len2);
+ memcpy(link->data2, h->linkdata2, link->len2);
+
+ topic->title = dupstr(title);
+ topic->link = link;
+
+ addpos234(h->text, link, count234(h->text));
+}
+
+void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after)
+{
+ UNUSEDARG(h);
+
+ /*
+ * See if the `before' topic is already linked to another one,
+ * and break the link to that if so. Likewise the `after'
+ * topic.
+ */
+ if (before->browse_next)
+ before->browse_next->browse_prev = NULL;
+ if (after->browse_prev)
+ after->browse_prev->browse_next = NULL;
+ before->browse_next = after;
+ after->browse_prev = before;
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the actual generation of paragraph and text records.
+ */
+
+static void whlp_linkdata(WHLP h, int which, int c)
+{
+ int *len = (which == 1 ? &h->link->len1 : &h->link->len2);
+ char *data = (which == 1 ? h->linkdata1 : h->linkdata2);
+ assert(*len < TOPIC_BLKSIZE);
+ data[(*len)++] = c;
+}
+
+static void whlp_linkdata_short(WHLP h, int which, int data)
+{
+ whlp_linkdata(h, which, data & 0xFF);
+ whlp_linkdata(h, which, (data >> 8) & 0xFF);
+}
+
+static void whlp_linkdata_long(WHLP h, int which, int data)
+{
+ whlp_linkdata(h, which, data & 0xFF);
+ whlp_linkdata(h, which, (data >> 8) & 0xFF);
+ whlp_linkdata(h, which, (data >> 16) & 0xFF);
+ whlp_linkdata(h, which, (data >> 24) & 0xFF);
+}
+
+static void whlp_linkdata_cushort(WHLP h, int which, int data)
+{
+ if (data <= 0x7F) {
+ whlp_linkdata(h, which, data*2);
+ } else {
+ whlp_linkdata(h, which, 1 + (data%128 * 2));
+ whlp_linkdata(h, which, data/128);
+ }
+}
+
+static void whlp_linkdata_csshort(WHLP h, int which, int data)
+{
+ if (data >= -0x40 && data <= 0x3F)
+ whlp_linkdata_cushort(h, which, data+64);
+ else
+ whlp_linkdata_cushort(h, which, data+16384);
+}
+
+static void whlp_linkdata_culong(WHLP h, int which, int data)
+{
+ if (data <= 0x7FFF) {
+ whlp_linkdata_short(h, which, data*2);
+ } else {
+ whlp_linkdata_short(h, which, 1 + (data%32768 * 2));
+ whlp_linkdata_short(h, which, data/32768);
+ }
+}
+
+static void whlp_linkdata_cslong(WHLP h, int which, int data)
+{
+ if (data >= -0x4000 && data <= 0x3FFF)
+ whlp_linkdata_culong(h, which, data+16384);
+ else
+ whlp_linkdata_culong(h, which, data+67108864);
+}
+
+static void whlp_para_reset(WHLP h)
+{
+ int *p;
+
+ h->para_flags = 0;
+
+ while ( (p = index234(h->tabstops, 0)) != NULL) {
+ delpos234(h->tabstops, 0);
+ sfree(p);
+ }
+}
+
+void whlp_para_attr(WHLP h, int attr_id, int attr_param)
+{
+ if (attr_id >= WHLP_PARA_SPACEABOVE &&
+ attr_id <= WHLP_PARA_FIRSTLINEINDENT) {
+ h->para_flags |= 1 << attr_id;
+ h->para_attrs[attr_id] = attr_param;
+ } else if (attr_id == WHLP_PARA_ALIGNMENT) {
+ h->para_flags &= ~0xC00;
+ if (attr_param == WHLP_ALIGN_RIGHT)
+ h->para_flags |= 0x400;
+ else if (attr_param == WHLP_ALIGN_CENTRE)
+ h->para_flags |= 0x800;
+ }
+}
+
+void whlp_set_tabstop(WHLP h, int tabstop, int alignment)
+{
+ int *p;
+
+ if (alignment == WHLP_ALIGN_CENTRE)
+ tabstop |= 0x20000;
+ if (alignment == WHLP_ALIGN_RIGHT)
+ tabstop |= 0x10000;
+
+ p = mknew(int);
+ *p = tabstop;
+ add234(h->tabstops, p);
+ h->para_flags |= 0x0200;
+}
+
+void whlp_begin_para(WHLP h, int para_type)
+{
+ struct topiclink *link = mknew(struct topiclink);
+ int i;
+
+ /*
+ * Clear these to NULL out of paranoia, although in records
+ * that aren't type 2 they should never actually be needed.
+ */
+ link->nexttopic = NULL;
+ link->context = NULL;
+ link->nonscroll = link->scroll = NULL;
+
+ link->recordtype = 32; /* text record */
+
+ h->link = link;
+ link->len1 = link->len2 = 0;
+ link->data1 = h->linkdata1;
+ link->data2 = h->linkdata2;
+
+ if (para_type == WHLP_PARA_NONSCROLL && h->prevtopic &&
+ !h->prevtopic->nonscroll)
+ h->prevtopic->nonscroll = link;
+ if (para_type == WHLP_PARA_SCROLL && h->prevtopic &&
+ !h->prevtopic->scroll)
+ h->prevtopic->scroll = link;
+
+ /*
+ * Now we're ready to start accumulating stuff in linkdata1 and
+ * linkdata2. Next we build up the paragraph info. Note that
+ * the TopicSize (cslong: size of LinkData1 minus the topicsize
+ * and topiclength fields) and TopicLength (cushort: size of
+ * LinkData2) fields are missing; we will put those on when we
+ * end the paragraph.
+ */
+ whlp_linkdata(h, 1, 0); /* must-be-0x00 */
+ whlp_linkdata(h, 1, 0x80); /* must-be-0x80 */
+ whlp_linkdata_short(h, 1, 0); /* Winterhoff says `id'; always 0 AFAICT */
+ whlp_linkdata_short(h, 1, h->para_flags);
+ for (i = WHLP_PARA_SPACEABOVE; i <= WHLP_PARA_FIRSTLINEINDENT; i++) {
+ if (h->para_flags & (1<<i))
+ whlp_linkdata_csshort(h, 1, h->para_attrs[i]);
+ }
+ if (h->para_flags & 0x0200) {
+ int ntabs;
+ /*
+ * Write out tab stop data.
+ */
+ ntabs = count234(h->tabstops);
+ whlp_linkdata_csshort(h, 1, ntabs);
+ for (i = 0; i < ntabs; i++) {
+ int tab, *tabp;
+ tabp = index234(h->tabstops, i);
+ tab = *tabp;
+ if (tab & 0x30000)
+ tab |= 0x4000;
+ whlp_linkdata_cushort(h, 1, tab & 0xFFFF);
+ if (tab & 0x4000)
+ whlp_linkdata_cushort(h, 1, tab >> 16);
+ }
+ }
+
+ /*
+ * Fine. Now we're ready to start writing actual text and
+ * formatting commands.
+ */
+}
+
+void whlp_set_font(WHLP h, int font_id)
+{
+ /*
+ * Write a NUL into linkdata2 to cause the reader to flip over
+ * to linkdata1 to see the formatting command.
+ */
+ whlp_linkdata(h, 2, 0);
+ /*
+ * Now the formatting command is 0x80 followed by a short.
+ */
+ whlp_linkdata(h, 1, 0x80);
+ whlp_linkdata_short(h, 1, font_id);
+}
+
+void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target)
+{
+ /*
+ * Write a NUL into linkdata2.
+ */
+ whlp_linkdata(h, 2, 0);
+ /*
+ * Now the formatting command is 0xE3 followed by the context
+ * hash.
+ */
+ whlp_linkdata(h, 1, 0xE3);
+ whlp_linkdata_long(h, 1, target->hash);
+}
+
+void whlp_end_hyperlink(WHLP h)
+{
+ /*
+ * Write a NUL into linkdata2.
+ */
+ whlp_linkdata(h, 2, 0);
+ /*
+ * Now the formatting command is 0x89.
+ */
+ whlp_linkdata(h, 1, 0x89);
+}
+
+void whlp_tab(WHLP h)
+{
+ /*
+ * Write a NUL into linkdata2.
+ */
+ whlp_linkdata(h, 2, 0);
+ /*
+ * Now the formatting command is 0x83.
+ */
+ whlp_linkdata(h, 1, 0x83);
+}
+
+void whlp_text(WHLP h, char *text)
+{
+ while (*text) {
+ whlp_linkdata(h, 2, *text++);
+ }
+}
+
+void whlp_end_para(WHLP h)
+{
+ int data1cut;
+
+ /*
+ * Round off the paragraph with 0x82 and 0xFF formatting
+ * commands. Each requires a NUL in linkdata2.
+ */
+ whlp_linkdata(h, 2, 0);
+ whlp_linkdata(h, 1, 0x82);
+ whlp_linkdata(h, 2, 0);
+ whlp_linkdata(h, 1, 0xFF);
+
+ /*
+ * Now finish up: create the header of linkdata1 (TopicLength
+ * and TopicSize fields), allocate the real linkdata1 and
+ * linkdata2 fields, and copy them out of the buffers in h.
+ * Then insert the finished topiclink into the `text' tree, and
+ * clean up.
+ */
+ data1cut = h->link->len1;
+ whlp_linkdata_cslong(h, 1, data1cut);
+ whlp_linkdata_cushort(h, 1, h->link->len2);
+
+ h->link->data1 = mknewa(unsigned char, h->link->len1);
+ memcpy(h->link->data1, h->linkdata1 + data1cut, h->link->len1 - data1cut);
+ memcpy(h->link->data1 + h->link->len1 - data1cut, h->linkdata1, data1cut);
+ h->link->data2 = mknewa(unsigned char, h->link->len2);
+ memcpy(h->link->data2, h->linkdata2, h->link->len2);
+
+ addpos234(h->text, h->link, count234(h->text));
+
+ /* Hack: accumulate the `blocksize' parameter in the topic header. */
+ if (h->prevtopic)
+ h->prevtopic->block_size += 21 + h->link->len1 + h->link->len2;
+
+ h->link = NULL; /* this is now in the tree */
+
+ whlp_para_reset(h);
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the layout and generation of the |TOPIC section.
+ */
+
+static void whlp_topicsect_write(WHLP h, struct file *f, void *data, int len,
+ int can_break)
+{
+ unsigned char *p = (unsigned char *)data;
+
+ if (h->topicblock_remaining <= 0 ||
+ h->topicblock_remaining < can_break) {
+ /*
+ * Start a new block.
+ */
+ if (h->topicblock_remaining > 0)
+ whlp_file_fill(f, h->topicblock_remaining);
+ whlp_file_add_long(f, h->lasttopiclink);
+ h->firsttopiclink_offset = whlp_file_offset(f);
+ whlp_file_add_long(f, -1L); /* this will be filled in later */
+ whlp_file_add_long(f, h->lasttopicstart);
+ h->topicblock_remaining = TOPIC_BLKSIZE - 12;
+ }
+ while (len > 0) {
+ int thislen = (h->topicblock_remaining < len ?
+ h->topicblock_remaining : len);
+ whlp_file_add(f, p, thislen);
+ p += thislen;
+ len -= thislen;
+ h->topicblock_remaining -= thislen;
+ if (len > 0 && h->topicblock_remaining <= 0) {
+ /*
+ * Start a new block.
+ */
+ whlp_file_add_long(f, h->lasttopiclink);
+ h->firsttopiclink_offset = whlp_file_offset(f);
+ whlp_file_add_long(f, -1L); /* this will be filled in later */
+ whlp_file_add_long(f, h->lasttopicstart);
+ h->topicblock_remaining = TOPIC_BLKSIZE - 12;
+ }
+ }
+}
+
+static void whlp_topic_layout(WHLP h)
+{
+ int block, offset, pos;
+ int i, nlinks, size;
+ int topicnum;
+ struct topiclink *link;
+ struct file *f;
+
+ /*
+ * Create a final TOPICLINK containing no usable data.
+ */
+ link = mknew(struct topiclink);
+ link->nexttopic = NULL;
+ if (h->prevtopic)
+ h->prevtopic->nexttopic = link;
+ h->prevtopic = link;
+ link->data1 = mknewa(unsigned char, 0x1c);
+ link->block_size = 0;
+ link->data2 = NULL;
+ link->len1 = 0x1c;
+ link->len2 = 0;
+ link->nexttopic = NULL;
+ link->recordtype = 2;
+ link->nonscroll = link->scroll = NULL;
+ link->context = NULL;
+ addpos234(h->text, link, count234(h->text));
+
+ /*
+ * Each TOPICBLOCK has space for TOPIC_BLKSIZE-12 bytes. The
+ * size of each TOPICLINK is 21 bytes plus the combined lengths
+ * of LinkData1 and LinkData2. So we can now go through and
+ * break up the TOPICLINKs into TOPICBLOCKs, and also set up
+ * the TOPICOFFSET and TOPICPOS of each one while we do so.
+ */
+
+ block = 0;
+ offset = 0;
+ pos = 12;
+ nlinks = count234(h->text);
+ for (i = 0; i < nlinks; i++) {
+ link = index234(h->text, i);
+ size = 21 + link->len1 + link->len2;
+ /*
+ * We can't split within the topicblock header or within
+ * linkdata1. So if the split would fall in that area,
+ * start a new block _now_.
+ */
+ if (TOPIC_BLKSIZE - pos < 21 + link->len1) {
+ block++;
+ offset = 0;
+ pos = 12;
+ }
+ link->topicoffset = block * 0x8000 + offset;
+ link->topicpos = block * 0x4000 + pos;
+ pos += size;
+ if (link->recordtype != 2) /* TOPICOFFSET doesn't count titles */
+ offset += link->len2;
+ while (pos > TOPIC_BLKSIZE) {
+ block++;
+ offset = 0;
+ pos -= TOPIC_BLKSIZE - 12;
+ }
+ }
+
+ /*
+ * Now we have laid out the TOPICLINKs into blocks, and
+ * determined the final TOPICOFFSET and TOPICPOS of each one.
+ * So now we can go through and write the headers of the type-2
+ * records.
+ */
+
+ topicnum = 0;
+ for (i = 0; i < nlinks; i++) {
+ link = index234(h->text, i);
+ if (link->recordtype != 2)
+ continue;
+
+ PUT_32BIT_LSB_FIRST(link->data1 + 0, link->block_size);
+ if (link->context && link->context->browse_prev)
+ PUT_32BIT_LSB_FIRST(link->data1 + 4,
+ link->context->browse_prev->link->topicoffset);
+ else
+ PUT_32BIT_LSB_FIRST(link->data1 + 4, 0xFFFFFFFFL);
+ if (link->context && link->context->browse_next)
+ PUT_32BIT_LSB_FIRST(link->data1 + 8,
+ link->context->browse_next->link->topicoffset);
+ else
+ PUT_32BIT_LSB_FIRST(link->data1 + 8, 0xFFFFFFFFL);
+ PUT_32BIT_LSB_FIRST(link->data1 + 12, topicnum);
+ topicnum++;
+ if (link->nonscroll)
+ PUT_32BIT_LSB_FIRST(link->data1 + 16, link->nonscroll->topicpos);
+ else
+ PUT_32BIT_LSB_FIRST(link->data1 + 16, 0xFFFFFFFFL);
+ if (link->scroll)
+ PUT_32BIT_LSB_FIRST(link->data1 + 20, link->scroll->topicpos);
+ else
+ PUT_32BIT_LSB_FIRST(link->data1 + 20, 0xFFFFFFFFL);
+ if (link->nexttopic)
+ PUT_32BIT_LSB_FIRST(link->data1 + 24, link->nexttopic->topicpos);
+ else
+ PUT_32BIT_LSB_FIRST(link->data1 + 24, 0xFFFFFFFFL);
+ }
+
+ /*
+ * Having done all _that_, we're now finally ready to go
+ * through and create the |TOPIC section in its final form.
+ */
+
+ h->lasttopiclink = -1L;
+ h->lasttopicstart = 0L;
+ f = whlp_new_file(h, "|TOPIC");
+ h->topicblock_remaining = -1;
+ whlp_topicsect_write(h, f, NULL, 0, 0); /* start the first block */
+ for (i = 0; i < nlinks; i++) {
+ unsigned char header[21];
+ struct topiclink *otherlink;
+
+ link = index234(h->text, i);
+
+ /*
+ * Create and output the TOPICLINK header.
+ */
+ PUT_32BIT_LSB_FIRST(header + 0, 21 + link->len1 + link->len2);
+ PUT_32BIT_LSB_FIRST(header + 4, link->len2);
+ if (i == 0) {
+ PUT_32BIT_LSB_FIRST(header + 8, 0xFFFFFFFFL);
+ } else {
+ otherlink = index234(h->text, i-1);
+ PUT_32BIT_LSB_FIRST(header + 8, otherlink->topicpos);
+ }
+ if (i+1 >= nlinks) {
+ PUT_32BIT_LSB_FIRST(header + 12, 0xFFFFFFFFL);
+ } else {
+ otherlink = index234(h->text, i+1);
+ PUT_32BIT_LSB_FIRST(header + 12, otherlink->topicpos);
+ }
+ PUT_32BIT_LSB_FIRST(header + 16, 21 + link->len1);
+ header[20] = link->recordtype;
+ whlp_topicsect_write(h, f, header, 21, 21 + link->len1);
+
+ /*
+ * Fill in the `first topiclink' pointer in the block
+ * header if appropriate. (We do this _after_ outputting
+ * the header because then we can be sure we'll be in the
+ * same block as we think we are.)
+ */
+ if (h->firsttopiclink_offset > 0) {
+ whlp_file_seek(f, h->firsttopiclink_offset, 0);
+ whlp_file_add_long(f, link->topicpos);
+ h->firsttopiclink_offset = 0;
+ whlp_file_seek(f, 0, 2);
+ }
+
+ /*
+ * Update the `last topiclink', and possibly `last
+ * topicstart', pointers.
+ */
+ h->lasttopiclink = link->topicpos;
+ if (link->recordtype == 2)
+ h->lasttopicstart = link->topicpos;
+
+
+ /*
+ * Output LinkData1 and LinkData2.
+ */
+ whlp_topicsect_write(h, f, link->data1, link->len1, link->len1);
+ whlp_topicsect_write(h, f, link->data2, link->len2, 0);
+
+ /*
+ * Output the block header.
+ */
+
+ link = index234(h->text, i);
+
+ }
+}
+
+/* ----------------------------------------------------------------------
+ * Manage the index sections (|KWDATA, |KWMAP, |KWBTREE).
+ */
+
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic)
+{
+ struct indexrec *idx = mknew(struct indexrec);
+
+ idx->term = dupstr(index);
+ idx->topic = topic;
+ /*
+ * If this reference is already in the tree, just silently drop
+ * the duplicate.
+ */
+ if (add234(h->index, idx) != idx) {
+ sfree(idx->term);
+ sfree(idx);
+ }
+}
+
+static void whlp_build_kwdata(WHLP h)
+{
+ struct file *f;
+ int i;
+ struct indexrec *first, *next;
+
+ f = whlp_new_file(h, "|KWDATA");
+
+ /*
+ * Go through the index B-tree, condensing all sequences of
+ * records with the same term into a single one with a valid
+ * (count,offset) pair, and building up the KWDATA section.
+ */
+ i = 0;
+ while ( (first = index234(h->index, i)) != NULL) {
+ first->count = 1;
+ first->offset = whlp_file_offset(f);
+ whlp_file_add_long(f, first->topic->link->topicoffset);
+ i++;
+ while ( (next = index234(h->index, i)) != NULL &&
+ !strcmp(first->term, next->term)) {
+ /*
+ * The next index record has the same term. Fold it
+ * into this one and remove from the tree.
+ */
+ whlp_file_add_long(f, next->topic->link->topicoffset);
+ first->count++;
+ delpos234(h->index, i);
+ sfree(next->term);
+ sfree(next);
+ }
+ }
+
+ /*
+ * Now we should have `index' in a form that's ready to
+ * construct |KWBTREE. So we can return.
+ */
+}
+
+/* ----------------------------------------------------------------------
+ * Standard chunks of data for the |SYSTEM and |FONT sections.
+ */
+
+static void whlp_system_record(struct file *f, int id,
+ const void *data, int length)
+{
+ whlp_file_add_short(f, id);
+ whlp_file_add_short(f, length);
+ whlp_file_add(f, data, length);
+}
+
+static void whlp_standard_systemsection(struct file *f)
+{
+ const char lcid[] = { 0, 0, 0, 0, 0, 0, 0, 0, 9, 4 };
+ const char charset[] = { 0, 0, 0, 2, 0 };
+
+ whlp_file_add_short(f, 0x36C); /* magic number */
+ whlp_file_add_short(f, 33); /* minor version: HCW 4.00 Win95+ */
+ whlp_file_add_short(f, 1); /* major version */
+ whlp_file_add_long(f, time(NULL)); /* generation date */
+ whlp_file_add_short(f, 0); /* flags=0 means no compression */
+
+ /*
+ * Add some magic locale identifier information. (We ought to
+ * find out something about what all this means; see the TODO
+ * list at the top of the file.)
+ */
+ whlp_system_record(f, 9, lcid, sizeof(lcid));
+ whlp_system_record(f, 11, charset, sizeof(charset));
+}
+
+void whlp_title(WHLP h, char *title)
+{
+ whlp_system_record(h->systemfile, 1, title, 1+strlen(title));
+}
+
+void whlp_copyright(WHLP h, char *copyright)
+{
+ whlp_system_record(h->systemfile, 2, copyright, 1+strlen(copyright));
+}
+
+void whlp_start_macro(WHLP h, char *macro)
+{
+ whlp_system_record(h->systemfile, 4, macro, 1+strlen(macro));
+}
+
+void whlp_primary_topic(WHLP h, WHLP_TOPIC t)
+{
+ h->ptopic = t;
+}
+
+static void whlp_do_primary_topic(WHLP h)
+{
+ unsigned char firsttopic[4];
+ PUT_32BIT_LSB_FIRST(firsttopic, h->ptopic->link->topicoffset);
+ whlp_system_record(h->systemfile, 3, firsttopic, sizeof(firsttopic));
+}
+
+int whlp_create_font(WHLP h, char *font, int family, int halfpoints,
+ int rendition, int r, int g, int b)
+{
+ char *fontname = dupstr(font);
+ struct fontdesc *fontdesc;
+ int index;
+
+ font = add234(h->fontnames, fontname);
+ if (font != fontname) {
+ /* The font name was already present. Free the new copy. */
+ sfree(fontname);
+ }
+
+ fontdesc = mknew(struct fontdesc);
+ fontdesc->font = font;
+ fontdesc->family = family;
+ fontdesc->halfpoints = halfpoints;
+ fontdesc->rendition = rendition;
+ fontdesc->r = r;
+ fontdesc->g = g;
+ fontdesc->b = b;
+
+ index = count234(h->fontdescs);
+ addpos234(h->fontdescs, fontdesc, index);
+ return index;
+}
+
+static void whlp_make_fontsection(WHLP h, struct file *f)
+{
+ int i;
+ char *fontname;
+ struct fontdesc *fontdesc;
+
+ /*
+ * Header block: number of font names, number of font
+ * descriptors, offset to font names, and offset to font
+ * descriptors.
+ */
+ whlp_file_add_short(f, count234(h->fontnames));
+ whlp_file_add_short(f, count234(h->fontdescs));
+ whlp_file_add_short(f, 8);
+ whlp_file_add_short(f, 8 + 32 * count234(h->fontnames));
+
+ /*
+ * Font names.
+ */
+ for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) {
+ char data[32];
+ memset(data, i, sizeof(data));
+ strncpy(data, fontname, sizeof(data));
+ whlp_file_add(f, data, sizeof(data));
+ }
+
+ /*
+ * Font descriptors.
+ */
+ for (i = 0; (fontdesc = index234(h->fontdescs, i)) != NULL; i++) {
+ int fontpos;
+ void *ret;
+
+ ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos);
+ assert(ret != NULL);
+
+ whlp_file_add_char(f, fontdesc->rendition);
+ whlp_file_add_char(f, fontdesc->halfpoints);
+ whlp_file_add_char(f, fontdesc->family);
+ whlp_file_add_short(f, fontpos);
+ /* Foreground RGB */
+ whlp_file_add_char(f, fontdesc->r);
+ whlp_file_add_char(f, fontdesc->g);
+ whlp_file_add_char(f, fontdesc->b);
+ /* Background RGB is apparently unused and always set to zero */
+ whlp_file_add_char(f, 0);
+ whlp_file_add_char(f, 0);
+ whlp_file_add_char(f, 0);
+ }
+
+}
+
+/* ----------------------------------------------------------------------
+ * Routines to manage a B-tree type file.
+ */
+
+static void whlp_make_btree(struct file *f, int flags, int pagesize,
+ char *dataformat, tree234 *tree,
+ struct file *map,
+ bt_index_fn indexfn, bt_leaf_fn leaffn)
+{
+ void **page_elements = NULL;
+ int npages = 0, pagessize = 0;
+ int npages_this_level, nentries, nlevels;
+ int total_leaf_entries;
+ char btdata[MAX_PAGE_SIZE];
+ int btlen;
+ int page_start, fixups_offset, unused_bytes;
+ void *element;
+ int index;
+
+ assert(pagesize <= MAX_PAGE_SIZE);
+
+ /*
+ * Start with the B-tree header. We'll have to come back and
+ * fill in a few bits later.
+ */
+ whlp_file_add_short(f, 0x293B); /* magic number */
+ whlp_file_add_short(f, flags);
+ whlp_file_add_short(f, pagesize);
+ {
+ char data[16];
+ memset(data, 0, sizeof(data));
+ assert(strlen(dataformat) <= sizeof(data));
+ memcpy(data, dataformat, strlen(dataformat));
+ whlp_file_add(f, data, sizeof(data));
+ }
+ whlp_file_add_short(f, 0); /* must-be-zero */
+ fixups_offset = whlp_file_offset(f);
+ whlp_file_add_short(f, 0); /* page splits; fix up later */
+ whlp_file_add_short(f, 0); /* root page index; fix up later */
+ whlp_file_add_short(f, -1); /* must-be-minus-one */
+ whlp_file_add_short(f, 0); /* total number of pages; fix later */
+ whlp_file_add_short(f, 0); /* number of levels; fix later */
+ whlp_file_add_long(f, count234(tree));/* total B-tree entries */
+
+ /*
+ * If we have a map section, leave space at the start for its
+ * element count.
+ */
+ if (map) {
+ whlp_file_add_short(map, 0);
+ }
+
+ /*
+ * Now create the leaf pages.
+ */
+ index = 0;
+
+ npages_this_level = 0;
+ total_leaf_entries = 0;
+
+ element = index234(tree, index);
+ while (element) {
+ /*
+ * Make a new leaf page.
+ */
+ npages_this_level++;
+ if (npages >= pagessize) {
+ pagessize = npages + 32;
+ page_elements = resize(page_elements, pagessize);
+ }
+ page_elements[npages++] = element;
+
+ /*
+ * Leave space in the leaf page for the header. We'll
+ * come back and add it later.
+ */
+ page_start = whlp_file_offset(f);
+ whlp_file_add(f, "12345678", 8);
+ unused_bytes = pagesize - 8;
+ nentries = 0;
+
+ /*
+ * Now add leaf entries until we run out of room, or out of
+ * elements.
+ */
+ while (element) {
+ btlen = leaffn(element, btdata);
+ if (btlen > unused_bytes)
+ break;
+ whlp_file_add(f, btdata, btlen);
+ unused_bytes -= btlen;
+ nentries++;
+ index++;
+ element = index234(tree, index);
+ }
+
+ /*
+ * Now add the unused bytes, and then go back and put
+ * in the header.
+ */
+ whlp_file_fill(f, unused_bytes);
+ whlp_file_seek(f, page_start, 0);
+ whlp_file_add_short(f, unused_bytes);
+ whlp_file_add_short(f, nentries);
+ /* Previous-page indicator will automatically go to -1 when
+ * absent. */
+ whlp_file_add_short(f, npages-2);
+ /* Next-page indicator must be -1 if we're at the end. */
+ if (!element)
+ whlp_file_add_short(f, -1);
+ else
+ whlp_file_add_short(f, npages);
+ whlp_file_seek(f, 0, 2);
+
+ /*
+ * If we have a map section, add a map entry.
+ */
+ if (map) {
+ whlp_file_add_long(map, total_leaf_entries);
+ whlp_file_add_short(map, npages_this_level-1);
+ }
+ total_leaf_entries += nentries;
+ }
+
+ /*
+ * If we have a map section, write the total number of map
+ * entries into it.
+ */
+ if (map) {
+ whlp_file_seek(map, 0, 0);
+ whlp_file_add_short(map, npages_this_level);
+ whlp_file_seek(map, 0, 2);
+ }
+
+ /*
+ * Now create further levels until we're down to one page.
+ */
+ nlevels = 1;
+ while (npages_this_level > 1) {
+ int first = npages - npages_this_level;
+ int last = npages - 1;
+ int current;
+
+ nlevels++;
+ npages_this_level = 0;
+
+ current = first;
+ while (current <= last) {
+ /*
+ * Make a new index page.
+ */
+ npages_this_level++;
+ if (npages >= pagessize) {
+ pagessize = npages + 32;
+ page_elements = resize(page_elements, pagessize);
+ }
+ page_elements[npages++] = page_elements[current];
+
+ /*
+ * Leave space for some of the header, but we can put
+ * in the PreviousPage link already.
+ */
+ page_start = whlp_file_offset(f);
+ whlp_file_add(f, "1234", 4);
+ whlp_file_add_short(f, current);
+ unused_bytes = pagesize - 6;
+
+ /*
+ * Now add index entries until we run out of either
+ * space or pages.
+ */
+ current++;
+ nentries = 0;
+ while (current <= last) {
+ btlen = indexfn(page_elements[current], btdata);
+ if (btlen + 2 > unused_bytes)
+ break;
+ whlp_file_add(f, btdata, btlen);
+ whlp_file_add_short(f, current);
+ unused_bytes -= btlen+2;
+ nentries++;
+ current++;
+ }
+
+ /*
+ * Now add the unused bytes, and then go back and put
+ * in the header.
+ */
+ whlp_file_fill(f, unused_bytes);
+ whlp_file_seek(f, page_start, 0);
+ whlp_file_add_short(f, unused_bytes);
+ whlp_file_add_short(f, nentries);
+ whlp_file_seek(f, 0, 2);
+ }
+ }
+
+ /*
+ * Now we have all our pages ready, and we know where our root
+ * page is. Fix up the main B-tree header.
+ */
+ whlp_file_seek(f, fixups_offset, 0);
+ /* Creation of every page requires a split unless it's the first in
+ * a new level. Hence, page splits equals pages minus levels. */
+ whlp_file_add_short(f, npages - nlevels);
+ whlp_file_add_short(f, npages-1); /* root page index */
+ whlp_file_add_short(f, -1); /* must-be-minus-one */
+ whlp_file_add_short(f, npages); /* total number of pages */
+ whlp_file_add_short(f, nlevels); /* number of levels */
+
+ /* Just for tidiness, seek to the end of the file :-) */
+ whlp_file_seek(f, 0, 2);
+
+ /* Clean up. */
+ sfree(page_elements);
+}
+
+
+/* ----------------------------------------------------------------------
+ * Routines to manage the `internal file' structure.
+ */
+
+static struct file *whlp_new_file(WHLP h, char *name)
+{
+ struct file *f;
+ f = mknew(struct file);
+ f->data = NULL;
+ f->pos = f->len = f->size = 0;
+ if (name) {
+ f->name = dupstr(name);
+ add234(h->files, f);
+ } else {
+ f->name = NULL;
+ }
+ return f;
+}
+
+static void whlp_free_file(struct file *f)
+{
+ sfree(f->data);
+ sfree(f->name); /* may be NULL */
+ sfree(f);
+}
+
+static void whlp_file_add(struct file *f, const void *data, int len)
+{
+ if (f->pos + len > f->size) {
+ f->size = f->pos + len + 1024;
+ f->data = resize(f->data, f->size);
+ }
+ memcpy(f->data + f->pos, data, len);
+ f->pos += len;
+ if (f->len < f->pos)
+ f->len = f->pos;
+}
+
+static void whlp_file_add_char(struct file *f, int data)
+{
+ unsigned char s;
+ s = data & 0xFF;
+ whlp_file_add(f, &s, 1);
+}
+
+static void whlp_file_add_short(struct file *f, int data)
+{
+ unsigned char s[2];
+ PUT_16BIT_LSB_FIRST(s, data);
+ whlp_file_add(f, s, 2);
+}
+
+static void whlp_file_add_long(struct file *f, int data)
+{
+ unsigned char s[4];
+ PUT_32BIT_LSB_FIRST(s, data);
+ whlp_file_add(f, s, 4);
+}
+
+static void whlp_file_fill(struct file *f, int len)
+{
+ if (f->pos + len > f->size) {
+ f->size = f->pos + len + 1024;
+ f->data = resize(f->data, f->size);
+ }
+ memset(f->data + f->pos, 0, len);
+ f->pos += len;
+ if (f->len < f->pos)
+ f->len = f->pos;
+}
+
+static void whlp_file_seek(struct file *f, int pos, int whence)
+{
+ f->pos = (whence == 0 ? 0 : whence == 1 ? f->pos : f->len) + pos;
+}
+
+static int whlp_file_offset(struct file *f)
+{
+ return f->pos;
+}
+
+/* ----------------------------------------------------------------------
+ * Open and close routines; final wrapper around everything.
+ */
+
+WHLP whlp_new(void)
+{
+ WHLP ret;
+ struct file *f;
+
+ ret = mknew(struct WHLP_tag);
+
+ /*
+ * Internal B-trees.
+ */
+ ret->files = newtree234(filecmp);
+ ret->pre_contexts = newtree234(NULL);
+ ret->contexts = newtree234(ctxcmp);
+ ret->titles = newtree234(ttlcmp);
+ ret->text = newtree234(NULL);
+ ret->index = newtree234(idxcmp);
+ ret->tabstops = newtree234(tabcmp);
+ ret->fontnames = newtree234(fontcmp);
+ ret->fontdescs = newtree234(NULL);
+
+ /*
+ * Some standard files.
+ */
+ f = whlp_new_file(ret, "|CTXOMAP");
+ whlp_file_add_short(f, 0); /* dummy section */
+ f = whlp_new_file(ret, "|SYSTEM");
+ whlp_standard_systemsection(f);
+ ret->systemfile = f;
+
+ /*
+ * Other variables.
+ */
+ ret->prevtopic = NULL;
+ ret->ncontexts = 0;
+ ret->link = NULL;
+
+ return ret;
+}
+
+void whlp_close(WHLP h, char *filename)
+{
+ FILE *fp;
+ int filecount, offset, index, filelen;
+ struct file *file, *map, *md;
+ context *ctx;
+ int has_index;
+
+ /*
+ * Lay out the topic section.
+ */
+ whlp_topic_layout(h);
+
+ /*
+ * Finish off the system section.
+ */
+ whlp_do_primary_topic(h);
+
+ /*
+ * Assemble the font section.
+ */
+ file = whlp_new_file(h, "|FONT");
+ whlp_make_fontsection(h, file);
+
+ /*
+ * Set up the index.
+ */
+ has_index = (count234(h->index) != 0);
+ if (has_index)
+ whlp_build_kwdata(h);
+
+ /*
+ * Set up the `titles' B-tree for the |TTLBTREE section.
+ */
+ for (index = 0; (ctx = index234(h->contexts, index)) != NULL; index++)
+ add234(h->titles, ctx);
+
+ /*
+ * Construct the various B-trees.
+ */
+ file = whlp_new_file(h, "|CONTEXT");
+ whlp_make_btree(file, 0x0002, 0x0800, "L4",
+ h->contexts, NULL, ctxindex, ctxleaf);
+
+ file = whlp_new_file(h, "|TTLBTREE");
+ whlp_make_btree(file, 0x0002, 0x0800, "Lz",
+ h->titles, NULL, ttlindex, ttlleaf);
+
+ if (has_index) {
+ file = whlp_new_file(h, "|KWBTREE");
+ map = whlp_new_file(h, "|KWMAP");
+ whlp_make_btree(file, 0x0002, 0x0800, "F24",
+ h->index, map, idxindex, idxleaf);
+ }
+
+ /*
+ * Open the output file.
+ */
+ fp = fopen(filename, "wb");
+ if (!fp) {
+ whlp_abandon(h);
+ return;
+ }
+
+ /*
+ * Work out all the file offsets.
+ */
+ filecount = count234(h->files);
+ offset = 16; /* just after header */
+ for (index = 0; index < filecount; index++) {
+ file = index234(h->files, index);
+ file->fileoffset = offset;
+ offset += 9 + file->len; /* 9 is size of file header */
+ }
+ /* Now `offset' holds what will be the offset of the master directory. */
+
+ md = whlp_new_file(h, NULL); /* master directory file */
+ whlp_make_btree(md, 0x0402, 0x0400, "z4",
+ h->files, NULL, fileindex, fileleaf);
+
+ filelen = offset + 9 + md->len;
+
+ /*
+ * Write out the file header.
+ */
+ {
+ unsigned char header[16];
+ PUT_32BIT_LSB_FIRST(header+0, 0x00035F3FL); /* magic */
+ PUT_32BIT_LSB_FIRST(header+4, offset); /* offset to directory */
+ PUT_32BIT_LSB_FIRST(header+8, 0xFFFFFFFFL); /* first free block */
+ PUT_32BIT_LSB_FIRST(header+12, filelen); /* total file length */
+ fwrite(header, 1, 16, fp);
+ }
+
+ /*
+ * Now write out each file.
+ */
+ for (index = 0; index <= filecount; index++) {
+ int used, reserved;
+ unsigned char header[9];
+
+ if (index == filecount)
+ file = md; /* master directory comes last */
+ else
+ file = index234(h->files, index);
+
+ used = file->len;
+ reserved = used + 9;
+
+ /* File header. */
+ PUT_32BIT_LSB_FIRST(header+0, reserved);
+ PUT_32BIT_LSB_FIRST(header+4, used);
+ header[8] = 0; /* flags */
+ fwrite(header, 1, 9, fp);
+
+ /* File data. */
+ fwrite(file->data, 1, file->len, fp);
+ }
+
+ fclose(fp);
+
+ whlp_free_file(md);
+
+ whlp_abandon(h); /* now free everything */
+}
+
+void whlp_abandon(WHLP h)
+{
+ struct file *f;
+ struct indexrec *idx;
+ struct topiclink *link;
+ struct fontdesc *fontdesc;
+ char *fontname;
+ context *ctx;
+
+ /* Get rid of any lingering tab stops. */
+ whlp_para_reset(h);
+
+ /* Delete the (now empty) tabstops tree. */
+ freetree234(h->tabstops);
+
+ /* Delete the index tree and all its entries. */
+ while ( (idx = index234(h->index, 0)) != NULL) {
+ delpos234(h->index, 0);
+ sfree(idx->term);
+ sfree(idx);
+ }
+ freetree234(h->index);
+
+ /* Delete the text tree and all its topiclinks. */
+ while ( (link = index234(h->text, 0)) != NULL) {
+ delpos234(h->text, 0);
+ sfree(link->data1); /* may be NULL */
+ sfree(link->data2); /* may be NULL */
+ sfree(link);
+ }
+ freetree234(h->text);
+
+ /* Delete the fontdescs tree and all its entries. */
+ while ( (fontdesc = index234(h->fontdescs, 0)) != NULL) {
+ delpos234(h->fontdescs, 0);
+ sfree(fontdesc);
+ }
+ freetree234(h->fontdescs);
+
+ /* Delete the fontnames tree and all its entries. */
+ while ( (fontname = index234(h->fontnames, 0)) != NULL) {
+ delpos234(h->fontnames, 0);
+ sfree(fontname);
+ }
+ freetree234(h->fontnames);
+
+ /* There might be an unclosed paragraph in h->link. */
+ if (h->link)
+ sfree(h->link); /* if so it won't have data1 or data2 */
+
+ /*
+ * `titles' contains copies of the `contexts' entries, so we
+ * don't need to free them here.
+ */
+ freetree234(h->titles);
+
+ /*
+ * `contexts' and `pre_contexts' _both_ contain contexts that
+ * need freeing. (pre_contexts shouldn't contain any, unless
+ * the help generation was abandoned half-way through.)
+ */
+ while ( (ctx = index234(h->pre_contexts, 0)) != NULL) {
+ delpos234(h->index, 0);
+ sfree(ctx->name);
+ sfree(ctx->title);
+ sfree(ctx);
+ }
+ freetree234(h->pre_contexts);
+ while ( (ctx = index234(h->contexts, 0)) != NULL) {
+ delpos234(h->contexts, 0);
+ sfree(ctx->name);
+ sfree(ctx->title);
+ sfree(ctx);
+ }
+ freetree234(h->contexts);
+
+ /*
+ * Free all the internal files.
+ */
+ while ( (f = index234(h->files, 0)) != NULL ) {
+ delpos234(h->files, 0);
+ whlp_free_file(f);
+ }
+ freetree234(h->files);
+
+ sfree(h);
+}
+
+#ifdef TESTMODE
+
+int main(void)
+{
+ WHLP h;
+ WHLP_TOPIC t1, t2, t3;
+ char *e;
+ char mymacro[100];
+
+ h = whlp_new();
+
+ whlp_title(h, "Test Help File");
+ whlp_copyright(h, "This manual is copyright \251 2001 Simon Tatham."
+ " All rights reversed.");
+ whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")");
+ whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")");
+ whlp_start_macro(h, "BrowseButtons()");
+
+ whlp_create_font(h, "Arial", WHLP_FONTFAM_SANS, 30,
+ 0, 0, 0, 0);
+ whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+ WHLP_FONT_STRIKEOUT, 0, 0, 0);
+ whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24,
+ WHLP_FONT_ITALIC, 0, 0, 0);
+ whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+ 0, 0, 0, 0);
+
+ t1 = whlp_register_topic(h, "foobar", &e);
+ assert(t1 != NULL);
+ t2 = whlp_register_topic(h, "M359HPEHGW", &e);
+ assert(t2 != NULL);
+ t3 = whlp_register_topic(h, "Y5VQEXZQVJ", &e);
+ assert(t3 == NULL && !strcmp(e, "M359HPEHGW"));
+ t3 = whlp_register_topic(h, NULL, NULL);
+ assert(t3 != NULL);
+
+ whlp_primary_topic(h, t2);
+
+ whlp_prepare(h);
+
+ whlp_begin_topic(h, t1, "First Topic", "DB(\"btn_up\")", NULL);
+
+ whlp_begin_para(h, WHLP_PARA_NONSCROLL);
+ whlp_set_font(h, 0);
+ whlp_text(h, "Foobar");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "This is a silly paragraph with ");
+ whlp_set_font(h, 3);
+ whlp_text(h, "code");
+ whlp_set_font(h, 1);
+ whlp_text(h, " in it.");
+ whlp_end_para(h);
+
+ whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "This second, equally silly, paragraph has ");
+ whlp_set_font(h, 2);
+ whlp_text(h, "emphasis");
+ whlp_set_font(h, 1);
+ whlp_text(h, " just to prove we can do it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt"
+ " to make some wrapping happen, and also to make the topicblock"
+ " go across its boundaries. This is going to take a fair amount"
+ " of text, so I'll just have to cheat and c'n'p a lot of it.");
+ whlp_end_para(h);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Have a ");
+ whlp_start_hyperlink(h, t2);
+ whlp_text(h, "hyperlink");
+ whlp_end_hyperlink(h);
+ whlp_text(h, " to another topic.");
+ whlp_end_para(h);
+
+ sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+ whlp_topic_id(t3));
+
+ whlp_begin_topic(h, t2, "Second Topic", mymacro, NULL);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "This topic contains no non-scrolling region. I would"
+ " illustrate this with a ludicrously long paragraph, but that"
+ " would get very tedious very quickly. Instead I'll just waffle"
+ " on pointlessly for a little bit and then shut up.");
+ whlp_end_para(h);
+
+ whlp_set_tabstop(h, 36, WHLP_ALIGN_LEFT);
+ whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 36);
+ whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36);
+ whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "\225"); /* bullet */
+ whlp_tab(h);
+ whlp_text(h, "This is a paragraph with a bullet. With any luck it should"
+ " work exactly like it used to in the old NASM help file.");
+ whlp_end_para(h);
+
+ whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT);
+ whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE);
+ whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT);
+ whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Ooh:"); whlp_tab(h);
+ whlp_text(h, "Right?"); whlp_tab(h);
+ whlp_text(h, "Centre?"); whlp_tab(h);
+ whlp_text(h, "Left?");
+ whlp_end_para(h);
+
+ whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT);
+ whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE);
+ whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT);
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "Aah:"); whlp_tab(h);
+ whlp_text(h, "R?"); whlp_tab(h);
+ whlp_text(h, "C?"); whlp_tab(h);
+ whlp_text(h, "L?");
+ whlp_end_para(h);
+
+ sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")",
+ whlp_topic_id(t1));
+
+ whlp_begin_topic(h, t3, "Third Topic", mymacro, NULL);
+
+ whlp_begin_para(h, WHLP_PARA_SCROLL);
+ whlp_set_font(h, 1);
+ whlp_text(h, "This third topic is almost as boring as the first. Woo!");
+ whlp_end_para(h);
+
+ /*
+ * Browse sequence.
+ */
+ whlp_browse_link(h, t1, t2);
+ whlp_browse_link(h, t2, t3);
+
+ /*
+ * Index terms.
+ */
+ whlp_index_term(h, "foobarbaz", t1);
+ whlp_index_term(h, "foobarbaz", t2);
+ whlp_index_term(h, "foobarbaz", t3);
+ whlp_index_term(h, "foobar", t1);
+ whlp_index_term(h, "foobar", t2);
+ whlp_index_term(h, "foobaz", t1);
+ whlp_index_term(h, "foobaz", t3);
+ whlp_index_term(h, "barbaz", t2);
+ whlp_index_term(h, "barbaz", t3);
+ whlp_index_term(h, "foo", t1);
+ whlp_index_term(h, "bar", t2);
+ whlp_index_term(h, "baz", t3);
+
+ whlp_close(h, "test.hlp");
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * winhelp.h header file for winhelp.c
+ */
+
+typedef struct WHLP_tag *WHLP;
+
+typedef struct WHLP_TOPIC_tag *WHLP_TOPIC;
+
+/*
+ * Initialise a new WHlp context and begin accumulating data in it.
+ */
+WHLP whlp_new(void);
+
+/*
+ * Close a WHlp context and write out the help file it has created.
+ */
+void whlp_close(WHLP h, char *filename);
+
+/*
+ * Abandon and free a WHlp context without writing out anything.
+ */
+void whlp_abandon(WHLP h);
+
+/*
+ * Specify the title and copyright notice of a help file. Also
+ * specify Help macros to be run on loading.
+ */
+void whlp_title(WHLP h, char *title);
+void whlp_copyright(WHLP h, char *copyright);
+void whlp_start_macro(WHLP h, char *macro);
+
+/*
+ * Register a help topic. Irritatingly, due to weird phase-order
+ * issues with the whole file format, you have to register all your
+ * topics _before_ actually outputting your text. This seems likely
+ * to require two passes over the source document.
+ *
+ * If you want to specify a particular context string (for
+ * reference from other programs, to provide context-sensitive
+ * help), you can supply it here. Otherwise, just pass NULL and a
+ * nondescript one will be allocated automatically.
+ *
+ * If you specify two context strings which clash under the Windows
+ * help file hash algorithm, this function will return NULL and
+ * provide a pointer to the other context string that this one
+ * clashed with, and you must tell your user to fix the clash.
+ * Sadly this is the only way to do it; despite HLP files having a
+ * perfectly good method of mapping arbitrary strings to things,
+ * they didn't see fit to use that method for help contexts, so
+ * instead they hash the context names and expect the hashes to be
+ * unique. Sigh.
+ *
+ * On success (i.e. in any circumstance other than a hash clash), a
+ * valid WHLP_TOPIC is returned for later use.
+ */
+WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash);
+
+/*
+ * Link two topics together in a browse sequence. Automatically
+ * takes care of the forward and reverse links.
+ */
+void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after);
+
+/*
+ * After calling whlp_register_topic for all topics, you should
+ * call this, which will sort out all loose ends and allocate
+ * context names for all anonymous topics. Then you can start
+ * writing actual text.
+ */
+void whlp_prepare(WHLP h);
+
+/*
+ * Create a link from an index term to a topic.
+ */
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic);
+
+/*
+ * Call this if you need the id of a topic and you don't already
+ * know it (for example, if whlp_prepare has allocated it
+ * anonymously for you). You might need this, for example, in
+ * creating macros for button-bar bindings.
+ *
+ * The string returned will be freed when the WHLP context is
+ * closed. You should not free it yourself.
+ *
+ * Do not call this before calling whlp_prepare().
+ */
+char *whlp_topic_id(WHLP_TOPIC topic);
+
+/*
+ * Call this to specify which help topic will be the first one
+ * displayed when the help file is loaded.
+ */
+void whlp_primary_topic(WHLP h, WHLP_TOPIC topic);
+
+/*
+ * Call this when about to begin writing out the text for a topic.
+ *
+ * Any additional arguments are Help macros, terminated with a
+ * NULL. So the minimum call sequence is
+ *
+ * whlp_begin_topic(helpfile, mytopic, "Title", NULL);
+ */
+void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...);
+
+/*
+ * Call this to set up a font descriptor. You supply the font name,
+ * the font size (in half-points), the graphic rendition flags
+ * (bold, italic etc), and the general font family (for Windows to
+ * select a fallback font if yours is unavailable). You can also
+ * specify a foreground colour for the text (but unfortunately not
+ * a background).
+ *
+ * Font descriptors are identified in whlp_set_font() by small
+ * integers, which are allocated from 0 upwards in the order you
+ * call whlp_create_font(). For your convenience,
+ * whlp_create_font() returns the integer allocated to each font
+ * descriptor you create, but you could work this out just as
+ * easily yourself by counting.
+ */
+enum {
+ WHLP_FONT_BOLD = 1,
+ WHLP_FONT_ITALIC = 2,
+ WHLP_FONT_UNDERLINE = 4,
+ WHLP_FONT_STRIKEOUT = 8,
+ WHLP_FONT_DOUBLEUND = 16,
+ WHLP_FONT_SMALLCAPS = 32
+};
+enum {
+ WHLP_FONTFAM_FIXED = 1,
+ WHLP_FONTFAM_SERIF = 2,
+ WHLP_FONTFAM_SANS = 3,
+ WHLP_FONTFAM_SCRIPT = 4,
+ WHLP_FONTFAM_DECOR = 5
+};
+int whlp_create_font(WHLP h, char *font, int family, int halfpoints,
+ int rendition, int r, int g, int b);
+
+/*
+ * Routines to output paragraphs and actual text (at last).
+ *
+ * You should start by calling whlp_para_attr() to set any
+ * paragraph attributes that differ from the standard settings.
+ * Next call whlp_begin_para() to start the paragraph. Then call
+ * the various in-paragraph functions until you have output the
+ * whole paragraph, and finally call whlp_end_para() to finish it
+ * off.
+ */
+enum {
+ WHLP_PARA_SPACEABOVE=1, WHLP_PARA_SPACEBELOW, WHLP_PARA_SPACELINES,
+ WHLP_PARA_LEFTINDENT, WHLP_PARA_RIGHTINDENT, WHLP_PARA_FIRSTLINEINDENT,
+ WHLP_PARA_ALIGNMENT
+};
+enum {
+ WHLP_ALIGN_LEFT, WHLP_ALIGN_RIGHT, WHLP_ALIGN_CENTRE
+};
+enum {
+ WHLP_PARA_SCROLL, WHLP_PARA_NONSCROLL
+};
+void whlp_para_attr(WHLP h, int attr_id, int attr_param);
+void whlp_set_tabstop(WHLP h, int tabstop, int alignment);
+void whlp_begin_para(WHLP h, int para_type);
+void whlp_end_para(WHLP h);
+void whlp_set_font(WHLP h, int font_id);
+void whlp_text(WHLP h, char *text);
+void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target);
+void whlp_end_hyperlink(WHLP h);
+void whlp_tab(WHLP h);