From d7482997dd1ca71b70df43c15dd5956f435a1a7e Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 5 Aug 2002 10:31:35 +0000 Subject: [PATCH] Rename Buttress to Halibut. I _think_ I've caught everything in this pass. git-svn-id: svn://svn.tartarus.org/sgt/halibut@1800 cda61777-01e9-0310-a592-d414129be87e --- LICENCE | 21 + MODULE | 5 + Makefile | 81 ++ biblio.c | 105 +++ bk_text.c | 587 ++++++++++++++ bk_whlp.c | 639 +++++++++++++++ bk_xhtml.c | 1446 ++++++++++++++++++++++++++++++++++ contents.c | 222 ++++++ error.c | 219 ++++++ halibut.h | 416 ++++++++++ help.c | 32 + index.c | 230 ++++++ input.c | 1164 ++++++++++++++++++++++++++++ inputs/errors.but | 51 ++ inputs/errors2.but | 5 + inputs/test.but | 130 ++++ keywords.c | 154 ++++ licence.c | 16 + main.c | 300 +++++++ malloc.c | 149 ++++ misc.c | 312 ++++++++ misc/halibut.sl | 98 +++ misc/logalloc | 61 ++ style.c | 8 + tree234.c | 2193 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tree234.h | 202 +++++ ustring.c | 174 +++++ version.c | 13 + winhelp.c | 2127 ++++++++++++++++++++++++++++++++++++++++++++++++++ winhelp.h | 168 ++++ 30 files changed, 11328 insertions(+) create mode 100644 LICENCE create mode 100644 MODULE create mode 100644 Makefile create mode 100644 biblio.c create mode 100644 bk_text.c create mode 100644 bk_whlp.c create mode 100644 bk_xhtml.c create mode 100644 contents.c create mode 100644 error.c create mode 100644 halibut.h create mode 100644 help.c create mode 100644 index.c create mode 100644 input.c create mode 100644 inputs/errors.but create mode 100644 inputs/errors2.but create mode 100644 inputs/test.but create mode 100644 keywords.c create mode 100644 licence.c create mode 100644 main.c create mode 100644 malloc.c create mode 100644 misc.c create mode 100644 misc/halibut.sl create mode 100755 misc/logalloc create mode 100644 style.c create mode 100644 tree234.c create mode 100644 tree234.h create mode 100644 ustring.c create mode 100644 version.c create mode 100644 winhelp.c create mode 100644 winhelp.h diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..b1bfa8b --- /dev/null +++ b/LICENCE @@ -0,0 +1,21 @@ +Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MODULE b/MODULE new file mode 100644 index 0000000..260f8ab --- /dev/null +++ b/MODULE @@ -0,0 +1,5 @@ +Module: halibut +Author: Simon Tatham +Description: Halibut is yet another text formatting system, intended primarily for writing software documentation. It accepts a single source format and outputs a variety of formats, planned to include text, HTML, Texinfo, Windows Help, Windows HTMLHelp, PostScript and PDF. It has comprehensive indexing and cross-referencing support, and generates hyperlinks within output documents wherever possible. +Homepage: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html +Release: http://www.chiark.greenend.org.uk/~sgtatham/halibut.html diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b82ba23 --- /dev/null +++ b/Makefile @@ -0,0 +1,81 @@ +# Halibut master makefile + +# Requires a compiler with -MD support, currently + +# `make' from top level will build in directory `build' +# `make BUILDDIR=foo' from top level will build in directory foo +ifndef REALBUILD +ifndef BUILDDIR +ifdef TEST +BUILDDIR := test +else +BUILDDIR := build +endif +endif +all: + @test -d $(BUILDDIR) || mkdir $(BUILDDIR) + @make -C $(BUILDDIR) -f ../Makefile REALBUILD=yes +spotless: + @test -d $(BUILDDIR) || mkdir $(BUILDDIR) + @make -C $(BUILDDIR) -f ../Makefile spotless REALBUILD=yes +clean: + @test -d $(BUILDDIR) || mkdir $(BUILDDIR) + @make -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes +else + +# The `real' makefile part. + +CFLAGS += -Wall -W + +ifdef TEST +CFLAGS += -DLOGALLOC +LIBS += -lefence +endif + +ifdef RELEASE +ifndef VERSION +VERSION := $(RELEASE) +endif +else +CFLAGS += -g +endif + +ifndef VER +ifdef VERSION +VER := $(VERSION) +endif +endif +ifdef VER +VDEF := -DVERSION=\"$(VER)\" +endif + +SRC := ../ + +MODULES := main malloc ustring error help licence version misc tree234 +MODULES += input keywords contents index style biblio +MODULES += bk_text bk_xhtml bk_whlp +MODULES += winhelp + +OBJECTS := $(addsuffix .o,$(MODULES)) +DEPS := $(addsuffix .d,$(MODULES)) + +halibut: $(OBJECTS) + $(CC) $(LFLAGS) -o halibut $(OBJECTS) $(LIBS) + +%.o: $(SRC)%.c + $(CC) $(CFLAGS) -MD -c $< + +version.o: FORCE + $(CC) $(VDEF) -MD -c $(SRC)version.c + +spotless:: clean + rm -f *.d + +clean:: + rm -f *.o halibut core + +FORCE: # phony target to force version.o to be rebuilt every time + +-include $(DEPS) + +endif diff --git a/biblio.c b/biblio.c new file mode 100644 index 0000000..de0b925 --- /dev/null +++ b/biblio.c @@ -0,0 +1,105 @@ +/* + * biblio.c: process the bibliography + */ + +#include +#include "halibut.h" + +static wchar_t *gentext(int num) { + wchar_t text[22]; + wchar_t *p = text + sizeof(text); + *--p = L'\0'; + *--p = L']'; + while (num != 0) { + assert(p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + assert(p > text); + *--p = L'['; + return ustrdup(p); +} + +static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) { + keyword *kw = kw_lookup(kl, key); + if (!kw) + error(err_nosuchkw, &fpos, key); + else { + /* + * We've found a \k reference. If it's a + * bibliography entry ... + */ + if (kw->para->type == para_Biblio) { + /* + * ... then mark the paragraph as cited. + */ + kw->para->type = para_BiblioCited; + } + } +} + +/* + * Make a pass through the source form, generating citation formats + * for bibliography entries and also marking which bibliography + * entries are actually cited (or \nocite-ed). + */ + +void gen_citations(paragraph *source, keywordlist *kl) { + paragraph *para; + int bibnum = 0; + + for (para = source; para; para = para->next) { + word *ptr; + + /* + * \BR and \nocite paragraphs get special processing here. + */ + if (para->type == para_BR) { + keyword *kw = kw_lookup(kl, para->keyword); + if (!kw) { + error(err_nosuchkw, ¶->fpos, para->keyword); + } else if (kw->text) { + error(err_multiBR, ¶->fpos, para->keyword); + } else { + kw->text = dup_word_list(para->words); + } + } else if (para->type == para_NoCite) { + wchar_t *wp = para->keyword; + while (*wp) { + cite_biblio(kl, wp, para->fpos); + wp = uadv(wp); + } + } + + /* + * Scan for keyword references. + */ + for (ptr = para->words; ptr; ptr = ptr->next) { + if (ptr->type == word_UpperXref || + ptr->type == word_LowerXref) + cite_biblio(kl, ptr->text, ptr->fpos); + } + } + + /* + * We're now almost done; all that remains is to scan through + * the cited bibliography entries and invent default citation + * texts for the ones that don't already have explicitly + * provided \BR text. + */ + for (para = source; para; para = para->next) { + if (para->type == para_BiblioCited) { + keyword *kw = kw_lookup(kl, para->keyword); + assert(kw != NULL); + if (!kw->text) { + word *wd = smalloc(sizeof(word)); + wd->text = gentext(++bibnum); + wd->type = word_Normal; + wd->alt = NULL; + wd->next = NULL; + kw->text = wd; + } + para->kwtext = kw->text; + } + } +} diff --git a/bk_text.c b/bk_text.c new file mode 100644 index 0000000..1583cbd --- /dev/null +++ b/bk_text.c @@ -0,0 +1,587 @@ +/* + * text backend for Halibut + */ + +#include +#include +#include +#include "halibut.h" + +typedef enum { LEFT, LEFTPLUS, CENTRE } alignment; +typedef struct { + alignment align; + int just_numbers; + wchar_t underline; +} alignstruct; + +typedef struct { + int indent, indent_code; + int listindentbefore, listindentafter; + int width; + alignstruct atitle, achapter, *asect; + int nasect; + int include_version_id; + int indent_preambles; + word bullet; +} textconfig; + +static int text_convert(wchar_t *, char **); + +static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int); +static void text_rule(FILE *, int, int); +static void text_para(FILE *, word *, char *, word *, int, int, int); +static void text_codepara(FILE *, word *, int, int); +static void text_versionid(FILE *, word *); + +static alignment utoalign(wchar_t *p) { + if (!ustricmp(p, L"centre") || !ustricmp(p, L"center")) + return CENTRE; + if (!ustricmp(p, L"leftplus")) + return LEFTPLUS; + return LEFT; +} + +static textconfig text_configure(paragraph *source) { + textconfig ret; + + /* + * Non-negotiables. + */ + ret.bullet.next = NULL; + ret.bullet.alt = NULL; + ret.bullet.type = word_Normal; + ret.atitle.just_numbers = FALSE; /* ignored */ + + /* + * Defaults. + */ + ret.indent = 7; + ret.indent_code = 2; + ret.listindentbefore = 1; + ret.listindentafter = 3; + ret.width = 68; + ret.atitle.align = CENTRE; + ret.atitle.underline = L'='; + ret.achapter.align = LEFT; + ret.achapter.just_numbers = FALSE; + ret.achapter.underline = L'-'; + ret.nasect = 1; + ret.asect = mknewa(alignstruct, ret.nasect); + ret.asect[0].align = LEFTPLUS; + ret.asect[0].just_numbers = TRUE; + ret.asect[0].underline = L'\0'; + ret.include_version_id = TRUE; + ret.indent_preambles = FALSE; + ret.bullet.text = ustrdup(L"-"); + + for (; source; source = source->next) { + if (source->type == para_Config) { + if (!ustricmp(source->keyword, L"text-indent")) { + ret.indent = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-indent-code")) { + ret.indent_code = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-width")) { + ret.width = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-list-indent")) { + ret.listindentbefore = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-listitem-indent")) { + ret.listindentafter = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-chapter-align")) { + ret.achapter.align = utoalign(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-chapter-underline")) { + ret.achapter.underline = *uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) { + ret.achapter.underline = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-section-align")) { + wchar_t *p = uadv(source->keyword); + int n = 0; + if (uisdigit(*p)) { + n = utoi(p); + p = uadv(p); + } + if (n >= ret.nasect) { + int i; + ret.asect = resize(ret.asect, n+1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect-1]; + ret.nasect = n+1; + } + ret.asect[n].align = utoalign(p); + } else if (!ustricmp(source->keyword, L"text-section-underline")) { + wchar_t *p = uadv(source->keyword); + int n = 0; + if (uisdigit(*p)) { + n = utoi(p); + p = uadv(p); + } + if (n >= ret.nasect) { + int i; + ret.asect = resize(ret.asect, n+1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect-1]; + ret.nasect = n+1; + } + ret.asect[n].underline = *p; + } else if (!ustricmp(source->keyword, L"text-section-numeric")) { + wchar_t *p = uadv(source->keyword); + int n = 0; + if (uisdigit(*p)) { + n = utoi(p); + p = uadv(p); + } + if (n >= ret.nasect) { + int i; + ret.asect = resize(ret.asect, n+1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect-1]; + ret.nasect = n+1; + } + ret.asect[n].just_numbers = utob(p); + } else if (!ustricmp(source->keyword, L"text-title-align")) { + ret.atitle.align = utoalign(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-title-underline")) { + ret.atitle.underline = *uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"text-versionid")) { + ret.include_version_id = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-indent-preamble")) { + ret.indent_preambles = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"text-bullet")) { + ret.bullet.text = uadv(source->keyword); + } + } + } + + return ret; +} + +void text_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx) { + paragraph *p; + textconfig conf; + word *prefix, *body, *wp; + word spaceword; + FILE *fp; + char *prefixextra; + int indentb, indenta; + + IGNORE(keywords); /* we don't happen to need this */ + IGNORE(idx); /* or this */ + + conf = text_configure(sourceform); + + /* + * Determine the output file name, and open the output file + * + * FIXME: want configurable output file names here. For the + * moment, we'll just call it `output.txt'. + */ + fp = fopen("output.txt", "w"); + if (!fp) { + error(err_cantopenw, "output.txt"); + return; + } + + /* Do the title */ + for (p = sourceform; p; p = p->next) + if (p->type == para_Title) + text_heading(fp, NULL, NULL, p->words, + conf.atitle, conf.indent, conf.width); + + /* Do the preamble and copyright */ + for (p = sourceform; p; p = p->next) + if (p->type == para_Preamble) + text_para(fp, NULL, NULL, p->words, + conf.indent_preambles ? conf.indent : 0, 0, + conf.width + (conf.indent_preambles ? 0 : conf.indent)); + for (p = sourceform; p; p = p->next) + if (p->type == para_Copyright) + text_para(fp, NULL, NULL, p->words, + conf.indent_preambles ? conf.indent : 0, 0, + conf.width + (conf.indent_preambles ? 0 : conf.indent)); + + /* Do the main document */ + for (p = sourceform; p; p = p->next) switch (p->type) { + + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter titles. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + text_heading(fp, p->kwtext, p->kwtext2, p->words, + conf.achapter, conf.indent, conf.width); + break; + + case para_Heading: + case para_Subsect: + text_heading(fp, p->kwtext, p->kwtext2, p->words, + conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux], + conf.indent, conf.width); + break; + + case para_Rule: + text_rule(fp, conf.indent, conf.width); + break; + + case para_Normal: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + if (p->type == para_Bullet) { + prefix = &conf.bullet; + prefixextra = NULL; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; + } else if (p->type == para_NumberedList) { + prefix = p->kwtext; + prefixextra = "."; /* FIXME: configurability */ + indentb = conf.listindentbefore; + indenta = conf.listindentafter; + } else { + prefix = NULL; + prefixextra = NULL; + indentb = indenta = 0; + } + if (p->type == para_BiblioCited) { + body = dup_word_list(p->kwtext); + for (wp = body; wp->next; wp = wp->next); + wp->next = &spaceword; + spaceword.next = p->words; + spaceword.alt = NULL; + spaceword.type = word_WhiteSpace; + spaceword.text = NULL; + } else { + wp = NULL; + body = p->words; + } + text_para(fp, prefix, prefixextra, body, + conf.indent + indentb, indenta, conf.width); + if (wp) { + wp->next = NULL; + free_word_list(body); + } + break; + + case para_Code: + text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code); + break; + } + + /* Do the version ID */ + if (conf.include_version_id) { + for (p = sourceform; p; p = p->next) + if (p->type == para_VersionID) + text_versionid(fp, p->words); + } + + /* + * Tidy up + */ + fclose(fp); + sfree(conf.bullet.text); +} + +/* + * Convert a wide string into a string of chars. If `result' is + * non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible for the output character + * set. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int text_convert(wchar_t *s, char **result) { + /* + * FIXME. Currently this is ISO8859-1 only. + */ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) { + wchar_t c = *s; + char outc; + + if ((c >= 32 && c <= 126) || + (c >= 160 && c <= 255)) { + /* Char is OK. */ + outc = (char)c; + } else { + /* Char is not OK. */ + ok = FALSE; + outc = 0xBF; /* approximate the good old DEC `uh?' */ + } + if (doing) { + if (plen >= psize) { + psize = plen + 256; + p = resize(p, psize); + } + p[plen++] = outc; + } + } + if (doing) { + p = resize(p, plen+1); + p[plen] = '\0'; + *result = p; + } + return ok; +} + +static void text_rdaddwc(rdstringc *rs, word *text, word *end) { + char *c; + + for (; text && text != end; text = text->next) switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddc(rs, '_'); /* FIXME: configurability */ + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddc(rs, '`'); /* FIXME: configurability */ + if (removeattr(text->type) == word_Normal) { + if (text_convert(text->text, &c)) + rdaddsc(rs, c); + else + text_rdaddwc(rs, text->alt, NULL); + sfree(c); + } else if (removeattr(text->type) == word_WhiteSpace) { + rdaddc(rs, ' '); + } else if (removeattr(text->type) == word_Quote) { + rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\''); + /* FIXME: configurability */ + } + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddc(rs, '_'); /* FIXME: configurability */ + else if (towordstyle(text->type) == word_Code && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddc(rs, '\''); /* FIXME: configurability */ + break; + } +} + +static int text_width(word *); + +static int text_width_list(word *text) { + int w = 0; + while (text) { + w += text_width(text); + text = text->next; + } + return w; +} + +static int text_width(word *text) { + switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + return 0; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + return (((text->type == word_Emph || + text->type == word_Code) + ? (attraux(text->aux) == attr_Only ? 2 : + attraux(text->aux) == attr_Always ? 0 : 1) + : 0) + + (text_convert(text->text, NULL) ? + ustrlen(text->text) : + text_width_list(text->alt))); + + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + return (((towordstyle(text->type) == word_Emph || + towordstyle(text->type) == word_Code) + ? (attraux(text->aux) == attr_Only ? 2 : + attraux(text->aux) == attr_Always ? 0 : 1) + : 0) + 1); + } + return 0; /* should never happen */ +} + +static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text, + alignstruct align, int indent, int width) { + rdstringc t = { 0, 0, NULL }; + int margin, length; + int firstlinewidth, wrapwidth; + wrappedline *wrapping, *p; + + if (align.just_numbers && nprefix) { + text_rdaddwc(&t, nprefix, NULL); + rdaddc(&t, ' '); /* FIXME: as below */ + } else if (!align.just_numbers && tprefix) { + text_rdaddwc(&t, tprefix, NULL); + rdaddsc(&t, ": "); /* FIXME: configurability */ + } + margin = length = (t.text ? strlen(t.text) : 0); + + if (align.align == LEFTPLUS) { + margin = indent - margin; + if (margin < 0) margin = 0; + firstlinewidth = indent + width - margin - length; + wrapwidth = width; + } else if (align.align == LEFT || align.align == CENTRE) { + margin = 0; + firstlinewidth = indent + width - length; + wrapwidth = indent + width; + } + + wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width); + for (p = wrapping; p; p = p->next) { + text_rdaddwc(&t, p->begin, p->end); + length = (t.text ? strlen(t.text) : 0); + if (align.align == CENTRE) { + margin = (indent + width - length)/2; + if (margin < 0) margin = 0; + } + fprintf(fp, "%*s%s\n", margin, "", t.text); + if (align.underline != L'\0') { + char *u, uc; + wchar_t uw[2]; + uw[0] = align.underline; uw[1] = L'\0'; + text_convert(uw, &u); + uc = u[0]; + sfree(u); + fprintf(fp, "%*s", margin, ""); + while (length--) + putc(uc, fp); + putc('\n', fp); + } + if (align.align == LEFTPLUS) + margin = indent; + else + margin = 0; + sfree(t.text); + t = empty_rdstringc; + } + wrap_free(wrapping); + putc('\n', fp); + + sfree(t.text); +} + +static void text_rule(FILE *fp, int indent, int width) { + while (indent--) putc(' ', fp); + while (width--) putc('-', fp); /* FIXME: configurability! */ + putc('\n', fp); + putc('\n', fp); +} + +static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text, + int indent, int extraindent, int width) { + wrappedline *wrapping, *p; + rdstringc pfx = { 0, 0, NULL }; + int e; + int firstlinewidth = width; + + if (prefix) { + text_rdaddwc(&pfx, prefix, NULL); + if (prefixextra) + rdaddsc(&pfx, prefixextra); + fprintf(fp, "%*s%s", indent, "", pfx.text); + e = extraindent - strlen(pfx.text); + if (e < 0) { + e = 0; + firstlinewidth -= e; + if (firstlinewidth < 0) { + e = indent + extraindent; + firstlinewidth = width; + fprintf(fp, "\n"); + } + } + sfree(pfx.text); + } else + e = indent + extraindent; + + wrapping = wrap_para(text, firstlinewidth, width, text_width); + for (p = wrapping; p; p = p->next) { + rdstringc t = { 0, 0, NULL }; + text_rdaddwc(&t, p->begin, p->end); + fprintf(fp, "%*s%s\n", e, "", t.text); + e = indent + extraindent; + sfree(t.text); + } + wrap_free(wrapping); + putc('\n', fp); +} + +static void text_codepara(FILE *fp, word *text, int indent, int width) { + for (; text; text = text->next) if (text->type == word_WeakCode) { + char *c; + text_convert(text->text, &c); + if (strlen(c) > (size_t)width) { + /* FIXME: warn */ + } + fprintf(fp, "%*s%s\n", indent, "", c); + sfree(c); + } + + putc('\n', fp); +} + +static void text_versionid(FILE *fp, word *text) { + rdstringc t = { 0, 0, NULL }; + + rdaddc(&t, '['); /* FIXME: configurability */ + text_rdaddwc(&t, text, NULL); + rdaddc(&t, ']'); /* FIXME: configurability */ + + fprintf(fp, "%s\n", t.text); + sfree(t.text); +} diff --git a/bk_whlp.c b/bk_whlp.c new file mode 100644 index 0000000..a8cb99e --- /dev/null +++ b/bk_whlp.c @@ -0,0 +1,639 @@ +/* + * Windows Help backend for Halibut + * + * TODO: + * - allow user to specify section contexts. + */ + +#include +#include +#include + +#include "halibut.h" +#include "winhelp.h" + +struct bk_whlp_state { + WHLP h; + indexdata *idx; + keywordlist *keywords; + WHLP_TOPIC curr_topic; + FILE *cntfp; + int cnt_last_level, cnt_workaround; +}; + +/* + * Indexes of fonts in our standard font descriptor set. + */ +enum { + FONT_NORMAL, + FONT_EMPH, + FONT_CODE, + FONT_TITLE, + FONT_TITLE_EMPH, + FONT_TITLE_CODE, + FONT_RULE +}; + +static void whlp_rdaddwc(rdstringc *rs, word *text); +static int whlp_convert(wchar_t *s, char **result, int hard_spaces); +static void whlp_mkparagraph(struct bk_whlp_state *state, + int font, word *text, int subsidiary); +static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p); +static void whlp_contents_write(struct bk_whlp_state *state, + int level, char *text, WHLP_TOPIC topic); + +void whlp_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx) { + WHLP h; + char *filename, *cntname; + paragraph *p, *lastsect; + struct bk_whlp_state state; + WHLP_TOPIC contents_topic; + int i; + indexentry *ie; + + filename = "output.hlp"; /* FIXME: configurability */ + cntname = "output.cnt"; /* corresponding contents file */ + + state.cntfp = fopen(cntname, "wb"); + state.cnt_last_level = -1; state.cnt_workaround = 0; + + h = state.h = whlp_new(); + state.keywords = keywords; + state.idx = idx; + + whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")"); + whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")"); + whlp_start_macro(h, "BrowseButtons()"); + + whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + 0, 0, 0, 0); + whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24, + 0, 0, 0, 0); + whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30, + WHLP_FONT_BOLD, 0, 0, 0); + whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30, + WHLP_FONT_BOLD|WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 30, + WHLP_FONT_BOLD, 0, 0, 0); + whlp_create_font(h, "Courier New", WHLP_FONTFAM_SANS, 18, + WHLP_FONT_STRIKEOUT, 0, 0, 0); + + /* + * Loop over the source form finding out whether the user has + * specified particular help topic names for anything. + */ + for (p = sourceform; p; p = p->next) { + p->private_data = NULL; + if (p->type == para_Config && p->parent) { + if (!ustricmp(p->keyword, L"winhelp-topic")) { + char *topicname; + whlp_convert(uadv(p->keyword), &topicname, 0); + /* Store the topic name in the private_data field of the + * containing section. */ + p->parent->private_data = topicname; + } + } + } + + /* + * Loop over the source form registering WHLP_TOPICs for + * everything. + */ + + contents_topic = whlp_register_topic(h, "Top", NULL); + whlp_primary_topic(h, contents_topic); + for (p = sourceform; p; p = p->next) { + if (p->type == para_Chapter || + p->type == para_Appendix || + p->type == para_UnnumberedChapter || + p->type == para_Heading || + p->type == para_Subsect) { + char *topicid = p->private_data; + char *errstr; + + p->private_data = whlp_register_topic(h, topicid, &errstr); + if (!p->private_data) { + p->private_data = whlp_register_topic(h, NULL, NULL); + error(err_winhelp_ctxclash, &p->fpos, topicid, errstr); + } + sfree(topicid); + } + } + + /* + * Loop over the index entries, preparing final text forms for + * each one. + */ + for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) { + rdstringc rs = {0, 0, NULL}; + whlp_rdaddwc(&rs, ie->text); + ie->backend_data = rs.text; + } + + whlp_prepare(h); + + /* ------------------------------------------------------------------ + * Do the contents page, containing title, preamble and + * copyright. + */ + + whlp_begin_topic(h, contents_topic, "Contents", "DB(\"btn_up\")", NULL); + + /* + * The manual title goes in the non-scroll region, and also + * goes into the system title slot. + */ + { + rdstringc rs = {0, 0, NULL}; + for (p = sourceform; p; p = p->next) { + if (p->type == para_Title) { + whlp_begin_para(h, WHLP_PARA_NONSCROLL); + whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE); + whlp_rdaddwc(&rs, p->words); + whlp_end_para(h); + } + } + if (rs.text) { + whlp_title(h, rs.text); + fprintf(state.cntfp, ":Title %s\r\n", rs.text); + sfree(rs.text); + } + whlp_contents_write(&state, 1, "Title page", contents_topic); + /* FIXME: configurability in that string */ + } + + /* + * Next comes the preamble, which just goes into the ordinary + * scrolling region. + */ + for (p = sourceform; p; p = p->next) { + if (p->type == para_Preamble) { + whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para(h); + } + } + + /* + * The copyright goes to two places, again: into the contents + * page and also into the system section. + */ + { + rdstringc rs = {0, 0, NULL}; + for (p = sourceform; p; p = p->next) { + if (p->type == para_Copyright) { + whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para(h); + whlp_rdaddwc(&rs, p->words); + } + } + if (rs.text) { + whlp_copyright(h, rs.text); + sfree(rs.text); + } + } + + /* + * Now do the primary navigation menu. + */ + for (p = sourceform; p; p = p->next) { + if (p->type == para_Chapter || + p->type == para_Appendix || + p->type == para_UnnumberedChapter) + whlp_navmenu(&state, p); + } + + state.curr_topic = contents_topic; + lastsect = NULL; + + /* ------------------------------------------------------------------ + * Now we've done the contents page, we're ready to go through + * and do the main manual text. Ooh. + */ + for (p = sourceform; p; p = p->next) switch (p->type) { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter and section titles: start a new Help topic. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + case para_Heading: + case para_Subsect: + if (lastsect && lastsect->child) { + paragraph *q; + /* + * Do a navigation menu for the previous section we + * were in. + */ + for (q = lastsect->child; q; q = q->sibling) + whlp_navmenu(&state, q); + } + { + rdstringc rs = {0, 0, NULL}; + WHLP_TOPIC new_topic, parent_topic; + char *macro, *topicid; + + new_topic = p->private_data; + whlp_browse_link(h, state.curr_topic, new_topic); + state.curr_topic = new_topic; + + if (p->kwtext) { + whlp_rdaddwc(&rs, p->kwtext); + rdaddsc(&rs, ": "); /* FIXME: configurability */ + } + whlp_rdaddwc(&rs, p->words); + if (p->parent == NULL) + parent_topic = contents_topic; + else + parent_topic = (WHLP_TOPIC)p->parent->private_data; + topicid = whlp_topic_id(parent_topic); + macro = smalloc(100+strlen(topicid)); + sprintf(macro, + "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", + topicid); + whlp_begin_topic(h, new_topic, + rs.text ? rs.text : "", + macro, NULL); + sfree(macro); + + { + /* + * Output the .cnt entry. + * + * WinHelp has a bug involving having an internal + * node followed by a leaf at the same level: the + * leaf is output at the wrong level. We can mostly + * work around this by modifying the leaf level + * itself (see whlp_contents_write), but this + * doesn't work for top-level sections since we + * can't turn a level-1 leaf into a level-0 one. So + * for top-level leaf sections (Bibliography + * springs to mind), we output an internal node + * containing only the leaf for that section. + */ + int i; + paragraph *q; + + /* Count up the level. */ + i = 1; + for (q = p; q->parent; q = q->parent) i++; + + if (p->child || !p->parent) { + /* + * If p has children then it needs to be a + * folder; if it has no parent then it needs to + * be a folder to work around the bug. + */ + whlp_contents_write(&state, i, rs.text, NULL); + i++; + } + whlp_contents_write(&state, i, rs.text, new_topic); + } + + sfree(rs.text); + + whlp_begin_para(h, WHLP_PARA_NONSCROLL); + if (p->kwtext) { + whlp_mkparagraph(&state, FONT_TITLE, p->kwtext, FALSE); + whlp_set_font(h, FONT_TITLE); + whlp_text(h, ": "); /* FIXME: configurability */ + } + whlp_mkparagraph(&state, FONT_TITLE, p->words, FALSE); + whlp_end_para(h); + + lastsect = p; + } + break; + + case para_Rule: + whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + whlp_para_attr(h, WHLP_PARA_ALIGNMENT, WHLP_ALIGN_CENTRE); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, FONT_RULE); +#define TEN "\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0\xA0" +#define TWENTY TEN TEN +#define FORTY TWENTY TWENTY +#define EIGHTY FORTY FORTY + whlp_text(h, EIGHTY); +#undef TEN +#undef TWENTY +#undef FORTY +#undef EIGHTY + whlp_end_para(h); + break; + + case para_Normal: + case para_BiblioCited: + case para_Bullet: + case para_NumberedList: + whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + if (p->type == para_Bullet || p->type == para_NumberedList) { + whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72); + whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36); + whlp_set_tabstop(h, 72, WHLP_ALIGN_LEFT); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, FONT_NORMAL); + if (p->type == para_Bullet) { + whlp_text(h, "\x95"); + } else { + whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE); + whlp_text(h, "."); + } + whlp_tab(h); + } else { + whlp_begin_para(h, WHLP_PARA_SCROLL); + } + + if (p->type == para_BiblioCited) { + whlp_mkparagraph(&state, FONT_NORMAL, p->kwtext, FALSE); + whlp_text(h, " "); + } + + whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE); + whlp_end_para(h); + break; + + case para_Code: + /* + * In a code paragraph, each individual word is a line. For + * Help files, we will have to output this as a set of + * paragraphs, all but the last of which don't set + * SPACEBELOW. + */ + { + word *w; + char *c; + for (w = p->words; w; w = w->next) { + if (!w->next) + whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, FONT_CODE); + whlp_convert(w->text, &c, FALSE); + whlp_text(h, c); + sfree(c); + whlp_end_para(h); + } + } + break; + } + + fclose(state.cntfp); + whlp_close(h, filename); + + /* + * Loop over the index entries, cleaning up our final text + * forms. + */ + for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) { + sfree(ie->backend_data); + } +} + +static void whlp_contents_write(struct bk_whlp_state *state, + int level, char *text, WHLP_TOPIC topic) { + /* + * Horrifying bug in WinHelp. When dropping a section level or + * more without using a folder-type entry, WinHelp accidentally + * adds one to the section level. So we correct for that here. + */ + if (state->cnt_last_level > level && topic) + state->cnt_workaround = -1; + else if (!topic) + state->cnt_workaround = 0; + state->cnt_last_level = level; + + fprintf(state->cntfp, "%d ", level + state->cnt_workaround); + while (*text) { + if (*text == '=') + fputc('\\', state->cntfp); + fputc(*text, state->cntfp); + text++; + } + if (topic) + fprintf(state->cntfp, "=%s", whlp_topic_id(topic)); + fputc('\n', state->cntfp); +} + +static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p) { + whlp_begin_para(state->h, WHLP_PARA_NONSCROLL); + whlp_start_hyperlink(state->h, (WHLP_TOPIC)p->private_data); + if (p->kwtext) { + whlp_mkparagraph(state, FONT_NORMAL, p->kwtext, TRUE); + whlp_set_font(state->h, FONT_NORMAL); + whlp_text(state->h, ": "); /* FIXME: configurability */ + } + whlp_mkparagraph(state, FONT_NORMAL, p->words, TRUE); + whlp_end_hyperlink(state->h); + whlp_end_para(state->h); + +} + +static void whlp_mkparagraph(struct bk_whlp_state *state, + int font, word *text, int subsidiary) { + keyword *kwl; + int deffont = font; + int currfont = -1; + int newfont; + char *c; + paragraph *xref_target = NULL; + + for (; text; text = text->next) switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + break; + + case word_IndexRef: + if (subsidiary) break; /* disabled in subsidiary bits */ + { + indextag *tag = index_findtag(state->idx, text->text); + int i; + if (!tag) + break; + for (i = 0; i < tag->nrefs; i++) + whlp_index_term(state->h, tag->refs[i]->backend_data, + state->curr_topic); + } + break; + + case word_UpperXref: + case word_LowerXref: + if (subsidiary) break; /* disabled in subsidiary bits */ + kwl = kw_lookup(state->keywords, text->text); + assert(xref_target == NULL); + if (kwl->para->type == para_NumberedList) { + break; /* don't xref to numbered list items */ + } else if (kwl->para->type == para_BiblioCited) { + /* + * An xref to a bibliography item jumps to the section + * containing it. + */ + if (kwl->para->parent) + xref_target = kwl->para->parent; + else + break; + } else { + xref_target = kwl->para; + } + whlp_start_hyperlink(state->h, (WHLP_TOPIC)xref_target->private_data); + break; + + case word_XrefEnd: + if (subsidiary) break; /* disabled in subsidiary bits */ + if (xref_target) + whlp_end_hyperlink(state->h); + xref_target = NULL; + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + if (towordstyle(text->type) == word_Emph) + newfont = deffont + FONT_EMPH; + else if (towordstyle(text->type) == word_Code || + towordstyle(text->type) == word_WeakCode) + newfont = deffont + FONT_CODE; + else + newfont = deffont; + if (newfont != currfont) { + currfont = newfont; + whlp_set_font(state->h, newfont); + } + if (removeattr(text->type) == word_Normal) { + if (whlp_convert(text->text, &c, TRUE)) + whlp_text(state->h, c); + else + whlp_mkparagraph(state, deffont, text->alt, FALSE); + sfree(c); + } else if (removeattr(text->type) == word_WhiteSpace) { + whlp_text(state->h, " "); + } else if (removeattr(text->type) == word_Quote) { + whlp_text(state->h, + quoteaux(text->aux) == quote_Open ? "\x91" : "\x92"); + /* FIXME: configurability */ + } + break; + } +} + +static void whlp_rdaddwc(rdstringc *rs, word *text) { + char *c; + + for (; text; text = text->next) switch (text->type) { + case word_HyperLink: + case word_HyperEnd: + case word_UpperXref: + case word_LowerXref: + case word_XrefEnd: + case word_IndexRef: + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (removeattr(text->type) == word_Normal) { + if (whlp_convert(text->text, &c, FALSE)) + rdaddsc(rs, c); + else + whlp_rdaddwc(rs, text->alt); + sfree(c); + } else if (removeattr(text->type) == word_WhiteSpace) { + rdaddc(rs, ' '); + } else if (removeattr(text->type) == word_Quote) { + rdaddc(rs, quoteaux(text->aux) == quote_Open ? '\x91' : '\x92'); + /* FIXME: configurability */ + } + break; + } +} + +/* + * Convert a wide string into a string of chars. If `result' is + * non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible for the output character + * set. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int whlp_convert(wchar_t *s, char **result, int hard_spaces) { + /* + * FIXME. Currently this is ISO8859-1 only. + */ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) { + wchar_t c = *s; + char outc; + + if ((c >= 32 && c <= 126) || + (c >= 160 && c <= 255)) { + /* Char is OK. */ + if (c == 32 && hard_spaces) + outc = '\240'; + else + outc = (char)c; + } else { + /* Char is not OK. */ + ok = FALSE; + outc = 0xBF; /* approximate the good old DEC `uh?' */ + } + if (doing) { + if (plen >= psize) { + psize = plen + 256; + p = resize(p, psize); + } + p[plen++] = outc; + } + } + if (doing) { + p = resize(p, plen+1); + p[plen] = '\0'; + *result = p; + } + return ok; +} diff --git a/bk_xhtml.c b/bk_xhtml.c new file mode 100644 index 0000000..2016e10 --- /dev/null +++ b/bk_xhtml.c @@ -0,0 +1,1446 @@ +/* + * xhtml backend for Halibut + * (initial implementation by James Aylett) + * + * Still to do: + * + * +++ doesn't handle non-breaking hyphens. Not sure how to yet. + * +++ entity names (from a file -- ideally supply normal SGML files) + * +++ configuration directive to file split where the current layout + * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), + * perhaps others. + * + * Limitations: + * + * +++ biblio/index references target the nearest section marker, rather + * than having a dedicated target themselves. In large bibliographies + * this will cause problems. (The solution is to fake up a response + * from xhtml_find_section(), probably linking it into the sections + * chain just in case we need it again, and to make freeing it up + * easier.) docsrc.pl used to work as we do, however, and SGT agrees that + * this is acceptable for now. + * +++ can't cope with leaf-level == 0. It's all to do with the + * top-level file not being normal, probably not even having a valid + * section level, and stuff like that. I question whether this is an + * issue, frankly; small manuals that fit on one page should probably + * not be written in halibut at all. + */ + +#include +#include +#include +#include "halibut.h" + +struct xhtmlsection_Struct { + struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ + struct xhtmlsection_Struct *child; /* NULL if split across files */ + struct xhtmlsection_Struct *parent; /* NULL if split across files */ + struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ + paragraph *para; + struct xhtmlfile_Struct *file; /* which file is this a part of? */ + char *fragment; /* fragment id within the file */ + int level; +}; + +struct xhtmlfile_Struct { + struct xhtmlfile_Struct *next; + struct xhtmlfile_Struct *child; + struct xhtmlfile_Struct *parent; + char *filename; + struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ + int is_leaf; /* is this file a leaf file, ie does it not have any children? */ +}; + +typedef struct xhtmlsection_Struct xhtmlsection; +typedef struct xhtmlfile_Struct xhtmlfile; +typedef struct xhtmlindex_Struct xhtmlindex; + +struct xhtmlindex_Struct { + int nsection; + int size; + xhtmlsection **sections; +}; + +typedef struct { + int contents_depth[6]; + int leaf_contains_contents; + int leaf_level; + int leaf_smallest_contents; + int include_version_id; + wchar_t *author, *description; + wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs; + int suppress_address; +} xhtmlconfig; + +/*static void xhtml_level(paragraph *, int); +static void xhtml_level_0(paragraph *); +static void xhtml_docontents(FILE *, paragraph *, int); +static void xhtml_dosections(FILE *, paragraph *, int); +static void xhtml_dobody(FILE *, paragraph *, int);*/ + +static void xhtml_doheader(FILE *, word *); +static void xhtml_dofooter(FILE *); +static void xhtml_versionid(FILE *, word *, int); + +static void xhtml_utostr(wchar_t *, char **); +static int xhtml_para_level(paragraph *); +static int xhtml_reservedchar(int); + +static int xhtml_convert(wchar_t *, char **, int); +static void xhtml_rdaddwc(rdstringc *, word *, word *); +static void xhtml_para(FILE *, word *); +static void xhtml_codepara(FILE *, word *); +static void xhtml_heading(FILE *, paragraph *); + +/* File-global variables are much easier than passing these things + * all over the place. Evil, but easier. We can replace this with a single + * structure at some point. + */ +static xhtmlconfig conf; +static keywordlist *keywords; +static indexdata *idx; +static xhtmlfile *topfile; +static xhtmlsection *topsection; +static paragraph *sourceparas; +static xhtmlfile *lastfile; +static xhtmlfile *xhtml_last_file = NULL; +static int last_level=-1; +static xhtmlsection *currentsection; + +static xhtmlconfig xhtml_configure(paragraph *source) +{ + xhtmlconfig ret; + + /* + * Defaults. + */ + ret.contents_depth[0] = 2; + ret.contents_depth[1] = 3; + ret.contents_depth[2] = 4; + ret.contents_depth[3] = 5; + ret.contents_depth[4] = 6; + ret.contents_depth[5] = 7; + ret.leaf_level = 2; + ret.leaf_smallest_contents = 4; + ret.leaf_contains_contents = FALSE; + ret.include_version_id = TRUE; + ret.author = NULL; + ret.description = NULL; + ret.head_end = NULL; + ret.body = NULL; + ret.body_start = NULL; + ret.body_end = NULL; + ret.address_start = NULL; + ret.address_end = NULL; + ret.nav_attrs = NULL; + ret.suppress_address = FALSE; + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { + ret.contents_depth[0] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { + ret.contents_depth[1] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) { + ret.contents_depth[2] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) { + ret.contents_depth[3] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) { + ret.contents_depth[4] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) { + ret.contents_depth[5] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) { + ret.leaf_level = utoi(uadv(source->keyword)); + if (ret.leaf_level==0) { + fatal(err_whatever, "xhtml-leaf-level cannot be zero"); + } + } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) { + ret.leaf_smallest_contents = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-versionid")) { + ret.include_version_id = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) { + ret.leaf_contains_contents = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) { + ret.suppress_address = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-author")) { + ret.author = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-description")) { + ret.description = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-head-end")) { + ret.head_end = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-start")) { + ret.body_start = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) { + ret.body = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-end")) { + ret.body_end = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-address-start")) { + ret.address_start = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-address-end")) { + ret.address_end = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) { + ret.nav_attrs = uadv(source->keyword); + } + } + } + + /* printf(" !!! leaf_level = %i\n", ret.leaf_level); + printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); + printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); + printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); + printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); + printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); + printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); + printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/ + return ret; +} + +static xhtmlsection *xhtml_new_section(xhtmlsection *last) +{ + xhtmlsection *ret = mknew(xhtmlsection); + ret->next=NULL; + ret->child=NULL; + ret->parent=NULL; + ret->chain=last; + ret->para=NULL; + ret->file=NULL; + ret->fragment=NULL; + ret->level=-1; /* marker: end of chain */ + return ret; +} + +/* Returns NULL or the section that marks that paragraph */ +static xhtmlsection *xhtml_find_section(paragraph *p) +{ + xhtmlsection *ret = topsection; + if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */ + paragraph *p2 = sourceparas; + paragraph *p3 = NULL; + while (p2 && p2!=p) { + if (xhtml_para_level(p2)!=-1) { + p3 = p2; + } + p2=p2->next; + } + if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */ + /* Note that this can happen, if you have a cross-reference to before the first chapter starts. + * So don't do that, then. + */ + return NULL; + } + p=p3; + } + while (ret && ret->para != p) { +/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ + ret=ret->chain; + } + return ret; +} + +static xhtmlfile *xhtml_new_file(xhtmlsection *sect) +{ + xhtmlfile *ret = mknew(xhtmlfile); + + ret->next=NULL; + ret->child=NULL; + ret->parent=NULL; + ret->filename=NULL; + ret->sections=sect; + ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level); + if (sect==NULL) { + if (conf.leaf_level==0) { /* currently unused */ +#define FILENAME_MANUAL "Manual.html" +#define FILENAME_CONTENTS "Contents.html" + ret->filename = smalloc(strlen(FILENAME_MANUAL)+1); + sprintf(ret->filename, FILENAME_MANUAL); + } else { + ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1); + sprintf(ret->filename, FILENAME_CONTENTS); + } + } else { + paragraph *p = sect->para; + rdstringc fname_c = { 0, 0, NULL }; + char *c; + word *w; + for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next) + { + switch (removeattr(w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode:*/ + xhtml_utostr(w->text, &c); + rdaddsc(&fname_c,c); + sfree(c); + break; + } + } + rdaddsc(&fname_c, ".html"); + ret->filename = rdtrimc(&fname_c); + } + /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/ + return ret; +} + +/* + * Walk the tree fixing up files which are actually leaf (ie + * have no children) but aren't at leaf level, so they have the + * leaf flag set. + */ +void xhtml_fixup_layout(xhtmlfile* file) +{ + if (file->child==NULL) { + file->is_leaf = TRUE; + } else { + xhtml_fixup_layout(file->child); + } + if (file->next) + xhtml_fixup_layout(file->next); +} + +/* + * Create the tree structure so we know where everything goes. + * Method: + * + * Ignoring file splitting, we have three choices with each new section: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * Y (3) + * | + * (3) + * + * Y is the last section we added (currentsect). + * If sect is the section we want to add, then: + * + * (1) if sect->level < currentsect->level + * (2) if sect->level == currentsect->level + * (3) if sect->level > currentsect->level + * + * This requires the constraint that you never skip section numbers + * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). + * + * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change + * more than one level at a time. Lots of asserts, and probably part of + * the algorithm here, rely on this being true. (It currently isn't + * enforced by halibut, however.) + * + * File splitting makes this harder. For instance, say we added at (3) + * above and now need to add another section. We are splitting at level + * 2, ie the level of Y. Z is the last section we added: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * +----Y----+ (1) + * | | + * Z (2) + * | + * (3) + * + * The (1) case is now split; we need to search upwards to find where + * to actually link in. The other two cases remain the same (and will + * always be like this). + * + * File splitting makes this harder, however. The decision of whether + * to split to a new file is always on the same condition, however (is + * the level of this section higher than the leaf_level configuration + * value or not). + * + * Treating the cases backwards: + * + * (3) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->child points to sect + * otherwise the linking is done through the file tree (which works + * in more or less the same way, ie currentfile->child points to + * the new file) + * + * (2) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->next points to sect + * otherwise file linking and currentfile->next points to the new + * file (we know that Z must have caused a new file to be created) + * + * (1) same file if sect->level > conf.leaf_level, otherwise new file + * + * this is actually effectively the same case as (2) here, + * except that we first have to travel up the sections to figure + * out which section this new one will be a sibling of. In doing + * so, we may disappear off the top of a file and have to go up + * to its parent in the file tree. + * + */ +static void xhtml_ponder_layout(paragraph *p) +{ + xhtmlsection *lastsection; + xhtmlsection *currentsect; + xhtmlfile *currentfile; + + lastfile = NULL; + topsection = xhtml_new_section(NULL); + topfile = xhtml_new_file(NULL); + lastsection = topsection; + currentfile = topfile; + currentsect = topsection; + + for (; p; p=p->next) + { + int level = xhtml_para_level(p); + if (level>0) /* actually a section */ + { + xhtmlsection *sect; + word *w; + char *c; + rdstringc fname_c = { 0, 0, NULL }; + + sect = xhtml_new_section(lastsection); + lastsection = sect; + sect->para = p; + for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */ + { + switch (removeattr(w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode:*/ + xhtml_utostr(w->text, &c); + rdaddsc(&fname_c,c); + sfree(c); + break; + } + } +/* rdaddsc(&fname_c, ".html");*/ + sect->fragment = rdtrimc(&fname_c); + sect->level = level; + /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/ + + if (level>currentsect->level) { /* case (3) */ + if (level>conf.leaf_level) { /* same file */ + assert(currentfile->is_leaf); + currentsect->child = sect; + sect->parent=currentsect; + sect->file=currentfile; + /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/ + currentsect=sect; + } else { /* new file */ + xhtmlfile *file = xhtml_new_file(sect); + assert(!currentfile->is_leaf); + currentfile->child=file; + sect->file=file; + file->parent=currentfile; + /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/ + currentfile=file; + currentsect=sect; + } + } else if (level >= currentsect->file->sections->level) { + /* Case (1) or (2) *AND* still under the section that starts + * the current file. + * + * I'm not convinced that this couldn't be rolled in with the + * final else {} leg further down. It seems a lot of effort + * this way. + */ + if (level>conf.leaf_level) { /* stick within the same file */ + assert(currentfile->is_leaf); + sect->file = currentfile; + while (currentsect && currentsect->level > level && + currentsect->file==currentsect->parent->file) { + currentsect = currentsect->parent; + } + assert(currentsect); + currentsect->next = sect; + assert(currentsect->level == sect->level); + sect->parent = currentsect->parent; + currentsect = sect; + /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/ + } else { /* new file */ + xhtmlfile *file = xhtml_new_file(sect); + sect->file=file; + currentfile->next=file; + file->parent=currentfile->parent; + file->is_leaf=(level==conf.leaf_level); + file->sections=sect; + /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/ + currentfile=file; + currentsect=sect; + } + } else { /* Case (1) or (2) and we must move up the file tree first */ + /* this loop is now probably irrelevant - we know we can't connect + * to anything in the current file */ + while (currentsect && levellevel) { + currentsect=currentsect->parent; + if (currentsect) { + /* printf(" * up one level to '%s'\n", currentsect->fragment);*/ + } else { + /* printf(" * up one level (off top of current file)\n");*/ + } + } + if (currentsect) { + /* I'm pretty sure this can now never fire */ + assert(currentfile->is_leaf); + /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/ + sect->file = currentfile; + currentsect->next=sect; + currentsect=sect; + } else { /* find a file we can attach to */ + while (currentfile && currentfile->sections && levelsections->level) { + currentfile=currentfile->parent; + if (currentfile) { + /* printf(" * up one file level to '%s'\n", currentfile->filename);*/ + } else { + /* printf(" * up one file level (off top of tree)\n");*/ + } + } + if (currentfile) { /* new file (we had to skip up a file to + get here, so we must be dealing with a + level no lower than the configured + leaf_level */ + xhtmlfile *file = xhtml_new_file(sect); + currentfile->next=file; + sect->file=file; + file->parent=currentfile->parent; + file->is_leaf=(level==conf.leaf_level); + file->sections=sect; + /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/ + currentfile=file; + currentsect=sect; + } else { + fatal(err_whatever, "Ran off the top trying to connect sibling: strange document."); + } + } + } + } + } + topsection = lastsection; /* get correct end of the chain */ + xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */ +} + +static void xhtml_do_index(); +static void xhtml_do_file(xhtmlfile *file); +static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform); +static void xhtml_do_paras(FILE *fp, paragraph *p); +static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit); +static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit); +static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit); +static int xhtml_do_contents(FILE *fp, xhtmlfile *file); +static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file); +static void xhtml_do_sections(FILE *fp, xhtmlsection *sections); + +/* + * Do all the files in this structure. + */ +static void xhtml_do_files(xhtmlfile *file) +{ + xhtml_do_file(file); + if (file->child) + xhtml_do_files(file->child); + if (file->next) + xhtml_do_files(file->next); +} + +/* + * Free up all memory used by the file tree from 'xfile' downwards + */ +static void xhtml_free_file(xhtmlfile* xfile) +{ + if (xfile==NULL) { + return; + } + + if (xfile->filename) { + sfree(xfile->filename); + } + xhtml_free_file(xfile->child); + xhtml_free_file(xfile->next); + sfree(xfile); +} + +/* + * Main function. + */ +void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords, + indexdata *in_idx) +{ +/* int i;*/ + indexentry *ientry; + int ti; + xhtmlsection *xsect; + + sourceparas = sourceform; + conf = xhtml_configure(sourceform); + keywords = in_keywords; + idx = in_idx; + + /* Clear up the index entries backend data pointers */ + for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) { + ientry->backend_data=NULL; + } + + xhtml_ponder_layout(sourceform); + + /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */ +/* xhtml_level_0(sourceform); + for (i=1; i<=conf.leaf_level; i++) + { + xhtml_level(sourceform, i); + }*/ + + /* new system ... (writes to *.html, but isn't fully trusted) */ + xhtml_do_top_file(topfile, sourceform); + assert(!topfile->next); /* shouldn't have a sibling at all */ + xhtml_do_files(topfile->child); + xhtml_do_index(); + + /* release file, section, index data structures */ + xsect = topsection; + while (xsect) { + xhtmlsection *tmp = xsect->chain; + if (xsect->fragment) { + sfree(xsect->fragment); + } + sfree(xsect); + xsect = tmp; + } + xhtml_free_file(topfile); + for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) { + if (ientry->backend_data!=NULL) { + xhtmlindex *xi = (xhtmlindex*) ientry->backend_data; + if (xi->sections!=NULL) { + sfree(xi->sections); + } + sfree(xi); + } + ientry->backend_data = NULL; + } +} + +static int xhtml_para_level(paragraph *p) +{ + switch (p->type) + { + case para_UnnumberedChapter: + case para_Chapter: + case para_Appendix: + return 1; + break; +/* case para_BiblioCited: + return 2; + break;*/ + case para_Heading: + case para_Subsect: + return p->aux+2; + break; + default: + return -1; + break; + } +} + +static char* xhtml_index_filename = "IndexPage.html"; + +/* Output the nav links for the current file. + * file == NULL means we're doing the index + */ +static void xhtml_donavlinks(FILE *fp, xhtmlfile *file) +{ + xhtmlfile *xhtml_next_file = NULL; + fprintf(fp, "", conf.nav_attrs); + } else { + fprintf(fp, ">"); + } + if (xhtml_last_file==NULL) { + fprintf(fp, "Previous | "); + } else { + fprintf(fp, "Previous | ", xhtml_last_file->filename); + } + fprintf(fp, "Contents | "); + if (file != NULL) { /* otherwise we're doing nav links for the index */ + if (xhtml_next_file==NULL) + xhtml_next_file = file->child; + if (xhtml_next_file==NULL) + xhtml_next_file = file->next; + if (xhtml_next_file==NULL) + xhtml_next_file = file->parent->next; + } + if (xhtml_next_file==NULL) { + if (file==NULL) { /* index, so no next file */ + fprintf(fp, "Next "); + } else { + fprintf(fp, "Next", xhtml_index_filename); + } + } else { + fprintf(fp, "Next", xhtml_next_file->filename); + } + fprintf(fp, "

\n"); +} + +/* Write out the index file */ +static void xhtml_do_index() +{ + word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} }; + indexentry *y; + int ti; + FILE *fp = fopen(xhtml_index_filename, "w"); + + if (fp==NULL) + fatal(err_cantopenw, xhtml_index_filename); + xhtml_doheader(fp, &temp_word); + xhtml_donavlinks(fp, NULL); + + fprintf(fp, "
\n"); + /* iterate over idx->entries using the tree functions and display everything */ + for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) { + if (y->backend_data) { + int i; + xhtmlindex *xi; + + fprintf(fp, "
"); + xhtml_para(fp, y->text); + fprintf(fp, "
\n
"); + + xi = (xhtmlindex*) y->backend_data; + for (i=0; insection; i++) { + xhtmlsection *sect = xi->sections[i]; + if (sect) { + fprintf(fp, "", sect->file->filename, sect->fragment); + if (sect->para->kwtext) { + xhtml_para(fp, sect->para->kwtext); + } else if (sect->para->words) { + xhtml_para(fp, sect->para->words); + } + fprintf(fp, ""); + if (i+1nsection) { + fprintf(fp, ", "); + } + } + } + fprintf(fp, "
\n"); + } + } + fprintf(fp, "
\n"); + + xhtml_donavlinks(fp, NULL); + xhtml_dofooter(fp); + fclose(fp); +} + +/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */ +static void xhtml_do_file(xhtmlfile *file) +{ + FILE *fp = fopen(file->filename, "w"); + if (fp==NULL) + fatal(err_cantopenw, file->filename); + + if (file->sections->para->words) { + xhtml_doheader(fp, file->sections->para->words); + } else if (file->sections->para->kwtext) { + xhtml_doheader(fp, file->sections->para->kwtext); + } else { + xhtml_doheader(fp, NULL); + } + + xhtml_donavlinks(fp, file); + + if (file->is_leaf && conf.leaf_contains_contents && xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents) + xhtml_do_contents(fp, file); + xhtml_do_sections(fp, file->sections); + if (!file->is_leaf) + xhtml_do_naked_contents(fp, file); + + xhtml_donavlinks(fp, file); + + xhtml_dofooter(fp); + fclose(fp); + + xhtml_last_file = file; +} + +/* Output the top-level file. */ +static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform) +{ + paragraph *p; + int done=FALSE; + FILE *fp = fopen(file->filename, "w"); + if (fp==NULL) + fatal(err_cantopenw, file->filename); + + /* Do the title -- only one allowed */ + for (p = sourceform; p && !done; p = p->next) + { + if (p->type == para_Title) + { + xhtml_doheader(fp, p->words); + done=TRUE; + } + } + if (!done) + xhtml_doheader(fp, NULL /* Eek! */); + + /* Do the preamble and copyright */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Preamble) + { + fprintf(fp, "

"); + xhtml_para(fp, p->words); + fprintf(fp, "

\n"); + } + } + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Copyright) + { + fprintf(fp, "

"); + xhtml_para(fp, p->words); + fprintf(fp, "

\n"); + } + } + + xhtml_do_contents(fp, file); + xhtml_do_sections(fp, file->sections); + xhtml_dofooter(fp); + fclose(fp); +} + +/* Convert a Unicode string to an ASCII one. '?' is + * used for unmappable characters. + */ +static void xhtml_utostr(wchar_t *in, char **out) +{ + int l = ustrlen(in); + int i; + *out = smalloc(l+1); + for (i=0; i=32 && in[i]<=126) + (*out)[i]=(char)in[i]; + else + (*out)[i]='?'; + } + (*out)[i]=0; +} + +/* + * Write contents for the given file, and subfiles, down to + * the appropriate contents depth. Returns the number of + * entries written. + */ +static int xhtml_do_contents(FILE *fp, xhtmlfile *file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections)?(file->sections->level):(0); + limit = conf.contents_depth[(level>5)?(5):(level)]; + start_level = (file->is_leaf) ? (level-1) : (level); + last_level = start_level; + + count += xhtml_do_contents_section_limit(fp, file->sections, limit); + count += xhtml_do_contents_limit(fp, file->child, limit); + if (fp!=NULL) { + while (last_level > start_level) { + last_level--; + fprintf(fp, "\n"); + } + } + return count; +} + +/* As above, but doesn't do anything in the current file */ +static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections)?(file->sections->level):(0); + limit = conf.contents_depth[(level>5)?(5):(level)]; + start_level = (file->is_leaf) ? (level-1) : (level); + last_level = start_level; + + count = xhtml_do_contents_limit(fp, file->child, limit); + if (fp!=NULL) { + while (last_level > start_level) { + last_level--; + fprintf(fp, "\n"); + } + } + return count; +} + +/* + * Write contents for the given file, children, and siblings, down to + * given limit contents depth. + */ +static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit) +{ + int count = 0; + while (file) { + count += xhtml_do_contents_section_limit(fp, file->sections, limit); + count += xhtml_do_contents_limit(fp, file->child, limit); + file = file->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit) +{ + int count = 0; + while (section) { + if (!xhtml_add_contents_entry(fp, section, limit)) + return 0; + else + count++; + count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); + section = section->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit) +{ + int count = 0; + if (!section) + return 0; + xhtml_add_contents_entry(fp, section, limit); + count=1; + count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); + /* section=section->child; + while (section && xhtml_add_contents_entry(fp, section, limit)) { + section = section->next; + }*/ + return count; +} + +/* + * Add a section entry, unless we're exceeding the limit, in which + * case return FALSE (otherwise return TRUE). + */ +static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit) +{ + if (!section || section->level > limit) + return FALSE; + if (fp==NULL) + return TRUE; + while (last_level > section->level) { + last_level--; + fprintf(fp, "\n"); + } + while (last_level < section->level) { + last_level++; + fprintf(fp, "
    \n"); + } + fprintf(fp, "
  • ", section->file->filename, section->fragment); + if (section->para->kwtext) { + xhtml_para(fp, section->para->kwtext); + if (section->para->words) { + fprintf(fp, ": "); + } + } + if (section->para->words) { + xhtml_para(fp, section->para->words); + } + fprintf(fp, "
  • \n"); + return TRUE; +} + +/* + * Write all the sections in this file. Do all paragraphs in this section, then all + * children (recursively), then go on to the next one (tail recursively). + */ +static void xhtml_do_sections(FILE *fp, xhtmlsection *sections) +{ + while (sections) { + currentsection = sections; + xhtml_do_paras(fp, sections->para); + xhtml_do_sections(fp, sections->child); + sections = sections->next; + } +} + +/* Write this list of paragraphs. Close off all lists at the end. */ +static void xhtml_do_paras(FILE *fp, paragraph *p) +{ + int last_type = -1, first=TRUE; + if (!p) + return; + +/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ + for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) { + first=FALSE; + switch (p->type) + { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter titles. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + xhtml_heading(fp, p); + break; + + case para_Heading: + case para_Subsect: + xhtml_heading(fp, p); + break; + + case para_Rule: + fprintf(fp, "\n
    \n"); + break; + + case para_Normal: + fprintf(fp, "\n

    "); + xhtml_para(fp, p->words); + fprintf(fp, "

    \n"); + break; + + case para_Bullet: + case para_NumberedList: + case para_BiblioCited: + if (last_type!=p->type) { + /* start up list if necessary */ + if (p->type == para_Bullet) { + fprintf(fp, "
      \n"); + } else if (p->type == para_NumberedList) { + fprintf(fp, "
        \n"); + } else if (p->type == para_BiblioCited) { + fprintf(fp, "
        \n"); + } + } + if (p->type == para_Bullet || p->type == para_NumberedList) + fprintf(fp, "
      1. "); + else if (p->type == para_BiblioCited) { + fprintf(fp, "
        "); + xhtml_para(fp, p->kwtext); + fprintf(fp, "
        \n
        "); + } + xhtml_para(fp, p->words); + if (p->type == para_BiblioCited) { + fprintf(fp, "
        \n"); + } else if (p->type == para_Bullet || p->type == para_NumberedList) { + fprintf(fp, "
      2. "); + } + if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited) + /* close off list if necessary */ + { + paragraph *p2 = p->next; + int close_off=FALSE; +/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ + if (p2 && xhtml_para_level(p2)==-1) { + if (p2->type != p->type) + close_off=TRUE; + } else { + close_off=TRUE; + } + if (close_off) { + if (p->type == para_Bullet) { + fprintf(fp, "
    \n"); + } else if (p->type == para_NumberedList) { + fprintf(fp, "\n"); + } else if (p->type == para_BiblioCited) { + fprintf(fp, "\n"); + } + } + } + break; + + case para_Code: + xhtml_codepara(fp, p->words); + break; + } + last_type = p->type; + } +} + +/* + * Output a header for this XHTML file. + */ +static void xhtml_doheader(FILE *fp, word *title) +{ + fprintf(fp, "\n"); + fprintf(fp, "\n\n\n"); + if (title==NULL) + fprintf(fp, "The thing with no name!"); + else + xhtml_para(fp, title); + fprintf(fp, "\n"); + fprintf(fp, "\n", version); + if (conf.author) + fprintf(fp, "\n", conf.author); + if (conf.description) + fprintf(fp, "\n", conf.description); + if (conf.head_end) + fprintf(fp, "%ls\n", conf.head_end); + fprintf(fp, "\n\n"); + if (conf.body) + fprintf(fp, "%ls\n", conf.body); + else + fprintf(fp, "\n"); + if (conf.body_start) + fprintf(fp, "%ls\n", conf.body_start); +} + +/* + * Output a footer for this XHTML file. + */ +static void xhtml_dofooter(FILE *fp) +{ + fprintf(fp, "\n
    \n\n"); + if (conf.body_end) + fprintf(fp, "%ls\n", conf.body_end); + if (!conf.suppress_address) { + fprintf(fp,"
    \n"); + if (conf.address_start) + fprintf(fp, "%ls\n", conf.address_start); + /* Do the version ID */ + if (conf.include_version_id) { + paragraph *p; + int started = 0; + for (p = sourceparas; p; p = p->next) + if (p->type == para_VersionID) { + xhtml_versionid(fp, p->words, started); + started = 1; + } + } + if (conf.address_end) + fprintf(fp, "%ls\n", conf.address_end); + fprintf(fp, "
    \n"); + } + fprintf(fp, "\n\n\n"); +} + +/* + * Output the versionid paragraph. Typically this is a version control + * ID string (such as $Id...$ in RCS). + */ +static void xhtml_versionid(FILE *fp, word *text, int started) +{ + rdstringc t = { 0, 0, NULL }; + + rdaddc(&t, '['); /* FIXME: configurability */ + xhtml_rdaddwc(&t, text, NULL); + rdaddc(&t, ']'); /* FIXME: configurability */ + + if (started) + fprintf(fp, "
    \n"); + fprintf(fp, "%s\n", t.text); + sfree(t.text); +} + +/* Is this an XHTML reserved character? */ +static int xhtml_reservedchar(int c) +{ + if (c=='&' || c=='<' || c=='>' || c=='"') + return TRUE; + else + return FALSE; +} + +/* + * Convert a wide string into valid XHTML: Anything outside ASCII will + * be fixed up as an entity. Currently we don't worry about constraining the + * encoded character set, which we should probably do at some point (we can + * still fix up and return FALSE - see the last comment here). We also don't + * currently + * + * Because this is only used for words, spaces are HARD spaces (any other + * spaces will be word_Whitespace not word_Normal). So they become   + * Unless hard_spaces is FALSE, of course (code paragraphs break the above + * rule). + * + * If `result' is non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int xhtml_convert(wchar_t *s, char **result, int hard_spaces) { + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) { + wchar_t c = *s; + +#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); } + + if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) { + /* Char is OK. */ + if (doing) + { + ensure_size(plen); + p[plen++] = (char)c; + } + } else { + /* Char needs fixing up. */ + /* ok = FALSE; -- currently we never return FALSE; we + * might want to when considering a character set for the + * encoded document. + */ + if (doing) + { + if (c==32) { /* a space in a word is a hard space */ + ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */ + sprintf(p+plen, " "); + plen+=6; + } else { + /* FIXME: entity names! */ + ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */ + plen+=sprintf(p+plen, "&#%04i;", (int)c); + } + } + } + } + if (doing) { + p = resize(p, plen+1); + p[plen] = '\0'; + *result = p; + } + return ok; +} + +/* + * This formats the given words as XHTML. + */ +static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end) { + char *c; + keyword *kwl; + xhtmlsection *sect; + indextag *itag; + int ti; + + for (; text && text != end; text = text->next) { + switch (text->type) { + case word_HyperLink: + xhtml_utostr(text->text, &c); + rdaddsc(rs, ""); + sfree(c); + break; + + case word_UpperXref: + case word_LowerXref: + kwl = kw_lookup(keywords, text->text); + if (kwl) { + sect=xhtml_find_section(kwl->para); + if (sect) { + rdaddsc(rs, "file->filename); + rdaddc(rs, '#'); + rdaddsc(rs, sect->fragment); + rdaddsc(rs, "\">"); + } else { + rdaddsc(rs, ""); + error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)"); + } + } else { + rdaddsc(rs, ""); + error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)"); + } + break; + + case word_IndexRef: /* in theory we could make an index target here */ +/* rdaddsc(rs, "text, &c); + rdaddsc(rs, c); + sfree(c); + rdaddsc(rs, "\">");*/ + /* what we _do_ need to do is to fix up the backend data + * for any indexentry this points to. + */ + for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) { + /* FIXME: really ustricmp() and not ustrcmp()? */ + if (ustricmp(itag->name, text->text)==0) { + break; + } + } + if (itag!=NULL) { + if (itag->refs!=NULL) { + int i; + for (i=0; inrefs; i++) { + xhtmlindex *idx_ref; + indexentry *ientry; + + ientry = itag->refs[i]; + if (ientry->backend_data==NULL) { + idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex)); + if (idx_ref==NULL) + fatal(err_nomemory); + idx_ref->nsection = 0; + idx_ref->size = 4; + idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*)); + if (idx_ref->sections==NULL) + fatal(err_nomemory); + ientry->backend_data = idx_ref; + } else { + idx_ref = ientry->backend_data; + if (idx_ref->nsection+1 > idx_ref->size) { + int new_size = idx_ref->size * 2; + idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection)); + if (idx_ref->sections==NULL) { + fatal(err_nomemory); + } + idx_ref->size = new_size; + } + } + idx_ref->sections[idx_ref->nsection++] = currentsection; +#if 0 +#endif + } + } else { + fatal(err_whatever, "Index tag had no entries!"); + } + } else { + fprintf(stderr, "Looking for index entry '%ls'\n", text->text); + fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)"); + } + break; + + case word_HyperEnd: + case word_XrefEnd: + rdaddsc(rs, ""); + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, ""); + else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, ""); + + if (removeattr(text->type) == word_Normal) { + if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */ + rdaddsc(rs, c); + else + xhtml_rdaddwc(rs, text->alt, NULL); + sfree(c); + } else if (removeattr(text->type) == word_WhiteSpace) { + rdaddc(rs, ' '); + } else if (removeattr(text->type) == word_Quote) { + rdaddsc(rs, """); + } + + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, ""); + else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, ""); + break; + } + } +} + +/* Output a heading, formatted as XHTML. + */ +static void xhtml_heading(FILE *fp, paragraph *p) +{ + rdstringc t = { 0, 0, NULL }; + word *tprefix = p->kwtext; + word *nprefix = p->kwtext2; + word *text = p->words; + int level = xhtml_para_level(p); + xhtmlsection *sect = xhtml_find_section(p); + char *fragment; + if (sect) { + fragment = sect->fragment; + } else { + fragment = ""; /* FIXME: what else can we do? */ + error(err_whatever, "Couldn't locate heading cross-reference!"); + } + + if (level>2 && nprefix) { /* FIXME: configurability on the level thing */ + xhtml_rdaddwc(&t, nprefix, NULL); + rdaddc(&t, ' '); /* FIXME: as below */ + } else if (tprefix) { + xhtml_rdaddwc(&t, tprefix, NULL); + rdaddsc(&t, ": "); /* FIXME: configurability */ + } + xhtml_rdaddwc(&t, text, NULL); + fprintf(fp, "%s\n", fragment, level, t.text, level); + sfree(t.text); +} + +/* Output a paragraph. Styles are handled by xhtml_rdaddwc(). + * This looks pretty simple; I may have missed something ... + */ +static void xhtml_para(FILE *fp, word *text) +{ + rdstringc out = { 0, 0, NULL }; + xhtml_rdaddwc(&out, text, NULL); + fprintf(fp, "%s", out.text); + sfree(out.text); +} + +/* Output a code paragraph. I'm treating this as preformatted, which + * may not be entirely correct. See xhtml_para() for my worries about + * this being overly-simple; however I think that most of the complexity + * of the text backend came entirely out of word wrapping anyway. + */ +static void xhtml_codepara(FILE *fp, word *text) +{ + fprintf(fp, "
    ");
    +    for (; text; text = text->next) if (text->type == word_WeakCode) {
    +	char *c;
    +	xhtml_convert(text->text, &c, FALSE);
    +	fprintf(fp, "%s\n", c);
    +	sfree(c);
    +    }
    +  fprintf(fp, "
    \n"); +} diff --git a/contents.c b/contents.c new file mode 100644 index 0000000..de45433 --- /dev/null +++ b/contents.c @@ -0,0 +1,222 @@ +/* + * contents.c: build a table of contents + */ + +#include +#include +#include +#include +#include "halibut.h" + +struct numberstate_Tag { + int chapternum; + int appendixnum; + int ischapter; + int *sectionlevels; + paragraph **currentsects; + paragraph *lastsect; + int oklevel; + int maxsectlevel; + int listitem; + wchar_t *chaptertext; /* the word for a chapter */ + wchar_t *sectiontext; /* the word for a section */ + wchar_t *apptext; /* the word for an appendix */ +}; + +numberstate *number_init(void) { + numberstate *ret = mknew(numberstate); + ret->chapternum = 0; + ret->appendixnum = -1; + ret->ischapter = 1; + ret->oklevel = -1; /* not even in a chapter yet */ + ret->maxsectlevel = 32; + ret->sectionlevels = mknewa(int, ret->maxsectlevel); + ret->currentsects = mknewa(paragraph *, ret->maxsectlevel+1); + memset(ret->currentsects, 0, (ret->maxsectlevel+1)*sizeof(paragraph *)); + ret->lastsect = NULL; + ret->listitem = -1; + return ret; +} + +void number_free(numberstate *state) { + sfree(state->sectionlevels); + sfree(state->currentsects); + sfree(state); +} + +static void dotext(word ***wret, wchar_t *text) { + word *mnewword = mknew(word); + mnewword->text = ustrdup(text); + mnewword->type = word_Normal; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void dospace(word ***wret) { + word *mnewword = mknew(word); + mnewword->text = NULL; + mnewword->type = word_WhiteSpace; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void donumber(word ***wret, int num) { + wchar_t text[20]; + wchar_t *p = text + sizeof(text); + *--p = L'\0'; + while (num != 0) { + assert(p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + dotext(wret, p); +} + +static void doanumber(word ***wret, int num) { + wchar_t text[20]; + wchar_t *p; + int nletters, aton; + nletters = 1; + aton = 25; + while (num > aton) { + nletters++; + num -= aton+1; + if (aton < INT_MAX/26) + aton = (aton+1) * 26 - 1; + else + aton = INT_MAX; + } + p = text + sizeof(text); + *--p = L'\0'; + while (nletters--) { + assert(p > text); + *--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26]; + num /= 26; + } + dotext(wret, p); +} + +void number_cfg(numberstate *state, paragraph *source) { + /* + * Defaults + */ + state->chaptertext = L"Chapter"; + state->sectiontext = L"Section"; + state->apptext = L"Appendix"; + + for (; source; source = source->next) { + if (source->type == para_Config) { + if (!ustricmp(source->keyword, L"chapter")) { + state->chaptertext = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"section")) { + state->sectiontext = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"appendix")) { + state->apptext = uadv(source->keyword); + } + } + } +} + +word *number_mktext(numberstate *state, paragraph *p, wchar_t *category, + int prev, int *errflag) { + word *ret = NULL; + word **ret2 = &ret; + word **pret = &ret; + int i, level; + + level = -2; /* default for non-section-heading */ + switch (p->type) { + case para_Chapter: + state->chapternum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->chaptertext); + dospace(&pret); + ret2 = pret; + donumber(&pret, state->chapternum); + state->ischapter = 1; + state->oklevel = 0; + level = -1; + break; + case para_Heading: + case para_Subsect: + level = (p->type == para_Heading ? 0 : p->aux); + if (level > state->oklevel) { + error(err_sectjump, &p->fpos); + *errflag = TRUE; + ret = NULL; + break; + } + state->oklevel = level+1; + if (state->maxsectlevel <= level) { + state->maxsectlevel = level + 32; + state->sectionlevels = resize(state->sectionlevels, + state->maxsectlevel); + } + state->sectionlevels[level]++; + for (i = level+1; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->sectiontext); + dospace(&pret); + ret2 = pret; + if (state->ischapter) + donumber(&pret, state->chapternum); + else + doanumber(&pret, state->appendixnum); + for (i = 0; i <= level; i++) { + dotext(&pret, L"."); + if (state->sectionlevels[i] == 0) + state->sectionlevels[i] = 1; + donumber(&pret, state->sectionlevels[i]); + } + break; + case para_Appendix: + state->appendixnum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->apptext); + dospace(&pret); + ret2 = pret; + doanumber(&pret, state->appendixnum); + state->ischapter = 0; + state->oklevel = 0; + level = -1; + break; + case para_UnnumberedChapter: + level = -1; + break; + case para_NumberedList: + ret2 = pret; + if (prev != para_NumberedList) + state->listitem = 0; + state->listitem++; + donumber(&pret, state->listitem); + break; + } + + /* + * Now set up parent, child and sibling links. + */ + p->parent = p->child = p->sibling = NULL; + if (level != -2) { + if (state->currentsects[level+1]) + state->currentsects[level+1]->sibling = p; + if (level >= 0 && state->currentsects[level]) { + p->parent = state->currentsects[level]; + if (!state->currentsects[level]->child) + state->currentsects[level]->child = p; + } + state->currentsects[level+1] = state->lastsect = p; + for (i = level+2; i < state->maxsectlevel+1; i++) + state->currentsects[i] = NULL; + } else { + p->parent = state->lastsect; + } + + p->kwtext2 = *ret2; + return ret; +} diff --git a/error.c b/error.c new file mode 100644 index 0000000..6d8dd13 --- /dev/null +++ b/error.c @@ -0,0 +1,219 @@ +/* + * error.c: Halibut error handling + */ + +#include +#include +#include +#include "halibut.h" + +/* + * Error flags + */ +#define PREFIX 0x0001 /* give `halibut:' prefix */ +#define FILEPOS 0x0002 /* give file position prefix */ + +static void do_error(int code, va_list ap) { + char error[1024]; + char auxbuf[256]; + char *sp, *sp2; + wchar_t *wsp; + filepos fpos, fpos2; + int flags; + + switch(code) { + case err_nomemory: /* no arguments */ + sprintf(error, "out of memory"); + flags = PREFIX; + break; + case err_optnoarg: + sp = va_arg(ap, char *); + sprintf(error, "option `-%.200s' requires an argument", sp); + flags = PREFIX; + break; + case err_nosuchopt: + sp = va_arg(ap, char *); + sprintf(error, "unrecognised option `-%.200s'", sp); + flags = PREFIX; + break; + case err_noinput: /* no arguments */ + sprintf(error, "no input files"); + flags = PREFIX; + break; + case err_cantopen: + sp = va_arg(ap, char *); + sprintf(error, "unable to open input file `%.200s'", sp); + flags = PREFIX; + break; + case err_nodata: /* no arguments */ + sprintf(error, "no data in input files"); + flags = PREFIX; + break; + case err_brokencodepara: + fpos = *va_arg(ap, filepos *); + sprintf(error, "every line of a code paragraph should begin `\\c'"); + flags = FILEPOS; + break; + case err_kwunclosed: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `}' after paragraph keyword"); + flags = FILEPOS; + break; + case err_kwexpected: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected a paragraph keyword"); + flags = FILEPOS; + break; + case err_kwillegal: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected no paragraph keyword"); + flags = FILEPOS; + break; + case err_kwtoomany: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected only one paragraph keyword"); + flags = FILEPOS; + break; + case err_bodyillegal: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected no text after paragraph keyword"); + flags = FILEPOS; + break; + case err_badparatype: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + fpos = *va_arg(ap, filepos *); + sprintf(error, "command `%.200s' unrecognised at start of" + " paragraph", sp); + flags = FILEPOS; + break; + case err_badmidcmd: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + fpos = *va_arg(ap, filepos *); + sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); + flags = FILEPOS; + break; + case err_unexbrace: + fpos = *va_arg(ap, filepos *); + sprintf(error, "brace character unexpected in mid-paragraph"); + flags = FILEPOS; + break; + case err_explbr: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `{' after command"); + flags = FILEPOS; + break; + case err_commenteof: + fpos = *va_arg(ap, filepos *); + sprintf(error, "end of file unexpected inside `\\#{...}' comment"); + flags = FILEPOS; + break; + case err_kwexprbr: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `}' after cross-reference"); + flags = FILEPOS; + break; + case err_missingrbrace: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unclosed braces at end of paragraph"); + flags = FILEPOS; + break; + case err_nestedstyles: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unable to nest text styles"); + flags = FILEPOS; + break; + case err_nestedindex: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unable to nest index markings"); + flags = FILEPOS; + break; + case err_nosuchkw: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); + flags = FILEPOS; + break; + case err_multiBR: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); + flags = FILEPOS; + break; + case err_nosuchidxtag: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); + flags = 0; + /* FIXME: need to get a filepos to here somehow */ + break; + case err_cantopenw: + sp = va_arg(ap, char *); + sprintf(error, "unable to open output file `%.200s'", sp); + flags = PREFIX; + break; + case err_macroexists: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "macro `%.200s' already defined", sp); + flags = FILEPOS; + break; + case err_sectjump: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected higher heading levels before this one"); + flags = FILEPOS; + break; + case err_winhelp_ctxclash: + fpos = *va_arg(ap, filepos *); + sp = va_arg(ap, char *); + sp2 = va_arg(ap, char *); + sprintf(error, "Windows Help context id `%.200s' clashes with " + "previously defined `%.200s'", sp, sp2); + flags = FILEPOS; + break; + case err_multikw: + fpos = *va_arg(ap, filepos *); + fpos2 = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); + sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); + flags = FILEPOS; + break; + case err_whatever: + sp = va_arg(ap, char *); + vsprintf(error, sp, ap); + flags = PREFIX; + break; + } + + if (flags & PREFIX) + fputs("halibut: ", stderr); + if (flags & FILEPOS) { + fprintf(stderr, "%s:%d:", fpos.filename, fpos.line); + if (fpos.col > 0) + fprintf(stderr, "%d:", fpos.col); + fputc(' ', stderr); + } + fputs(error, stderr); + fputc('\n', stderr); +} + +void fatal(int code, ...) { + va_list ap; + va_start(ap, code); + do_error(code, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +void error(int code, ...) { + va_list ap; + va_start(ap, code); + do_error(code, ap); + va_end(ap); +} diff --git a/halibut.h b/halibut.h new file mode 100644 index 0000000..c948624 --- /dev/null +++ b/halibut.h @@ -0,0 +1,416 @@ +#ifndef HALIBUT_HALIBUT_H +#define HALIBUT_HALIBUT_H + +#include +#include +#include + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN /* nothing */ +#endif + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* For suppressing unused-parameter warnings */ +#define IGNORE(x) ( (x) = (x) ) + +#include "tree234.h" + +/* + * Structure tags + */ +typedef struct input_Tag input; +typedef struct filepos_Tag filepos; +typedef struct paragraph_Tag paragraph; +typedef struct word_Tag word; +typedef struct keywordlist_Tag keywordlist; +typedef struct keyword_Tag keyword; +typedef struct userstyle_Tag userstyle; +typedef struct numberstate_Tag numberstate; +typedef struct indexdata_Tag indexdata; +typedef struct indextag_Tag indextag; +typedef struct indexentry_Tag indexentry; +typedef struct macrostack_Tag macrostack; + +/* + * Data structure to hold a file name and index, a line and a + * column number, for reporting errors + */ +struct filepos_Tag { + char *filename; + int line, col; +}; + +/* + * Data structure to hold all the file names etc for input + */ +typedef struct pushback_Tag { + int chr; + filepos pos; +} pushback; +struct input_Tag { + char **filenames; /* complete list of input files */ + int nfiles; /* how many in the list */ + FILE *currfp; /* the currently open one */ + int currindex; /* which one is that in the list */ + pushback *pushback; /* pushed-back input characters */ + int npushback, pushbacksize; + filepos pos; + int reportcols; /* report column numbers in errors */ + macrostack *stack; /* macro expansions in force */ +}; + +/* + * Data structure to hold the input form of the source, ie a linked + * list of paragraphs + */ +struct paragraph_Tag { + paragraph *next; + int type; + wchar_t *keyword; /* for most special paragraphs */ + word *words; /* list of words in paragraph */ + int aux; /* number, in a numbered paragraph + * or subsection level + */ + word *kwtext; /* chapter/section indication */ + word *kwtext2; /* numeric-only form of kwtext */ + filepos fpos; + + paragraph *parent, *child, *sibling; /* for hierarchy navigation */ + + void *private_data; /* for temp use in backends */ +}; +enum { + para_IM, /* index merge */ + para_BR, /* bibliography rewrite */ + para_Rule, /* random horizontal rule */ + para_Chapter, + para_Appendix, + para_UnnumberedChapter, + para_Heading, + para_Subsect, + para_Normal, + para_Biblio, /* causes no output unless turned ... */ + para_BiblioCited, /* ... into this paragraph type */ + para_Bullet, + para_NumberedList, + para_Code, + para_Copyright, + para_Preamble, + para_NoCite, + para_Title, + para_VersionID, + para_Config, /* configuration directive */ + para_NotParaType /* placeholder value */ +}; + +/* + * Data structure to hold an individual word + */ +struct word_Tag { + word *next, *alt; + int type; + int aux; + int breaks; /* can a line break after it? */ + wchar_t *text; + filepos fpos; +}; +enum { + /* ORDERING CONSTRAINT: these normal-word types ... */ + word_Normal, + word_Emph, + word_Code, /* monospaced; `quoted' in text */ + word_WeakCode, /* monospaced, normal in text */ + /* ... must be in the same order as these space types ... */ + word_WhiteSpace, /* text is NULL or ignorable */ + word_EmphSpace, /* WhiteSpace when emphasised */ + word_CodeSpace, /* WhiteSpace when code */ + word_WkCodeSpace, /* WhiteSpace when weak code */ + /* ... and must be in the same order as these quote types ... */ + word_Quote, /* text is NULL or ignorable */ + word_EmphQuote, /* Quote when emphasised */ + word_CodeQuote, /* (can't happen) */ + word_WkCodeQuote, /* (can't happen) */ + /* END ORDERING CONSTRAINT */ + word_internal_endattrs, + word_UpperXref, /* \K */ + word_LowerXref, /* \k */ + word_XrefEnd, /* (invisible; no text) */ + word_IndexRef, /* (always an invisible one) */ + word_HyperLink, /* (invisible) */ + word_HyperEnd /* (also invisible; no text) */ +}; +/* aux values for attributed words */ +enum { + attr_Only = 0x0000, /* a lone word with the attribute */ + attr_First = 0x0001, /* the first of a series */ + attr_Last = 0x0002, /* the last of a series */ + attr_Always = 0x0003, /* any other part of a series */ + attr_mask = 0x0003, +}; +/* aux values for quote-type words */ +enum { + quote_Open = 0x0010, + quote_Close = 0x0020, + quote_mask = 0x0030, +}; +#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \ + ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) ) +#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 ) +#define towordstyle(x) ( word_Normal + ((x) & 3) ) +#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) ) +#define toquotestyle(x) ( word_Quote + ((x) & 3) ) +#define removeattr(x) ( word_Normal + ((x) &~ 3) ) + +#define attraux(x) ( (x) & attr_mask ) +#define quoteaux(x) ( (x) & quote_mask ) + +/* + * error.c + */ +void fatal(int code, ...) NORETURN; +void error(int code, ...); +enum { + err_nomemory, /* out of memory */ + err_optnoarg, /* option `-%s' requires an argument */ + err_nosuchopt, /* unrecognised option `-%s' */ + err_noinput, /* no input files */ + err_cantopen, /* unable to open input file `%s' */ + err_nodata, /* no data in input files */ + err_brokencodepara, /* line in codepara didn't begin `\c' */ + err_kwunclosed, /* expected `}' after keyword */ + err_kwillegal, /* paragraph type expects no keyword */ + err_kwexpected, /* paragraph type expects a keyword */ + err_kwtoomany, /* paragraph type expects only 1 */ + err_bodyillegal, /* paragraph type expects only kws! */ + err_badparatype, /* invalid command at start of para */ + err_badmidcmd, /* invalid command in mid-para */ + err_unexbrace, /* unexpected brace */ + err_explbr, /* expected `{' after command */ + err_commenteof, /* EOF inside braced comment */ + err_kwexprbr, /* expected `}' after cross-ref */ + err_missingrbrace, /* unclosed braces at end of para */ + err_nestedstyles, /* unable to nest text styles */ + err_nestedindex, /* unable to nest `\i' thingys */ + err_nosuchkw, /* unresolved cross-reference */ + err_multiBR, /* multiple \BRs on same keyword */ + err_nosuchidxtag, /* \IM on unknown index tag (warning) */ + err_cantopenw, /* can't open output file for write */ + err_macroexists, /* this macro already exists */ + err_sectjump, /* jump a heading level, eg \C -> \S */ + err_winhelp_ctxclash, /* WinHelp context ID hash clash */ + err_multikw, /* keyword clash in sections */ + err_whatever /* random error of another type */ +}; + +/* + * malloc.c + */ +#ifdef LOGALLOC +void *smalloc(char *file, int line, int size); +void *srealloc(char *file, int line, void *p, int size); +void sfree(char *file, int line, void *p); +#define smalloc(x) smalloc(__FILE__, __LINE__, x) +#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) +#define sfree(x) sfree(__FILE__, __LINE__, x) +#else +void *smalloc(int size); +void *srealloc(void *p, int size); +void sfree(void *p); +#endif +void free_word_list(word *w); +void free_para_list(paragraph *p); +word *dup_word_list(word *w); +char *dupstr(char *s); + +#define mknew(type) ( (type *) smalloc (sizeof (type)) ) +#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) ) +#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) ) +#define lenof(array) ( sizeof(array) / sizeof(*(array)) ) + +/* + * ustring.c + */ +wchar_t *ustrdup(wchar_t *s); +char *ustrtoa(wchar_t *s, char *outbuf, int size); +int ustrlen(wchar_t *s); +wchar_t *uadv(wchar_t *s); +wchar_t *ustrcpy(wchar_t *dest, wchar_t *source); +wchar_t utolower(wchar_t); +int ustrcmp(wchar_t *lhs, wchar_t *rhs); +int ustricmp(wchar_t *lhs, wchar_t *rhs); +int utoi(wchar_t *); +int utob(wchar_t *); +int uisdigit(wchar_t); +wchar_t *ustrlow(wchar_t *s); +wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec); + +/* + * help.c + */ +void help(void); +void usage(void); +void showversion(void); + +/* + * licence.c + */ +void licence(void); + +/* + * version.c + */ +const char *const version; + +/* + * misc.c + */ +typedef struct stackTag *stack; +stack stk_new(void); +void stk_free(stack); +void stk_push(stack, void *); +void *stk_pop(stack); + +typedef struct tagRdstring rdstring; +struct tagRdstring { + int pos, size; + wchar_t *text; +}; +typedef struct tagRdstringc rdstringc; +struct tagRdstringc { + int pos, size; + char *text; +}; +extern const rdstring empty_rdstring; +extern const rdstringc empty_rdstringc; +void rdadd(rdstring *rs, wchar_t c); +void rdadds(rdstring *rs, wchar_t *p); +wchar_t *rdtrim(rdstring *rs); +void rdaddc(rdstringc *rs, char c); +void rdaddsc(rdstringc *rs, char *p); +char *rdtrimc(rdstringc *rs); + +int compare_wordlists(word *a, word *b); + +void mark_attr_ends(paragraph *sourceform); + +typedef struct tagWrappedLine wrappedline; +struct tagWrappedLine { + wrappedline *next; + word *begin, *end; /* first & last words of line */ + int nspaces; /* number of whitespaces in line */ + int shortfall; /* how much shorter than max width */ +}; +wrappedline *wrap_para(word *, int, int, int (*)(word *)); +void wrap_free(wrappedline *); + +/* + * input.c + */ +paragraph *read_input(input *in, indexdata *idx); + +/* + * keywords.c + */ +struct keywordlist_Tag { + int nkeywords; + int size; + tree234 *keys; /* sorted by `key' field */ + word **looseends; /* non-keyword list element numbers */ + int nlooseends; + int looseendssize; +}; +struct keyword_Tag { + wchar_t *key; /* the keyword itself */ + word *text; /* "Chapter 2", "Appendix Q"... */ + /* (NB: filepos are not set) */ + paragraph *para; /* the paragraph referenced */ +}; +keyword *kw_lookup(keywordlist *, wchar_t *); +keywordlist *get_keywords(paragraph *); +void free_keywords(keywordlist *); +void subst_keywords(paragraph *, keywordlist *); + +/* + * index.c + */ + +/* + * Data structure to hold both sides of the index. + */ +struct indexdata_Tag { + tree234 *tags; /* holds type `indextag' */ + tree234 *entries; /* holds type `indexentry' */ +}; + +/* + * Data structure to hold an index tag (LHS of index). + */ +struct indextag_Tag { + wchar_t *name; + word *implicit_text; + word **explicit_texts; + int nexplicit, explicit_size; + int nrefs; + indexentry **refs; /* array of entries referenced by tag */ +}; + +/* + * Data structure to hold an index entry (RHS of index). + */ +struct indexentry_Tag { + word *text; + void *backend_data; /* private to back end */ +}; + +indexdata *make_index(void); +void cleanup_index(indexdata *); +/* index_merge takes responsibility for freeing arg 3 iff implicit; never + * takes responsibility for arg 2 */ +void index_merge(indexdata *, int is_explicit, wchar_t *, word *); +void build_index(indexdata *); +void index_debug(indexdata *); +indextag *index_findtag(indexdata *idx, wchar_t *name); + +/* + * contents.c + */ +numberstate *number_init(void); +void number_cfg(numberstate *, paragraph *); +word *number_mktext(numberstate *, paragraph *, wchar_t *, int , int *); +void number_free(numberstate *); + +/* + * biblio.c + */ +void gen_citations(paragraph *, keywordlist *); + +/* + * style.c + */ +struct userstyle_Tag { +}; + +/* + * bk_text.c + */ +void text_backend(paragraph *, keywordlist *, indexdata *); + +/* + * bk_xhtml.c + */ +void xhtml_backend(paragraph *, keywordlist *, indexdata *); + +/* + * bk_whlp.c + */ +void whlp_backend(paragraph *, keywordlist *, indexdata *); + +#endif diff --git a/help.c b/help.c new file mode 100644 index 0000000..3fd957f --- /dev/null +++ b/help.c @@ -0,0 +1,32 @@ +/* + * help.c: usage instructions + */ + +#include +#include "halibut.h" + +static char *helptext[] = { + "FIXME: help text goes here", + NULL +}; + +static char *usagetext[] = { + "FIXME: usage text goes here", + NULL +}; + +void help(void) { + char **p; + for (p = helptext; *p; p++) + puts(*p); +} + +void usage(void) { + char **p; + for (p = usagetext; *p; p++) + puts(*p); +} + +void showversion(void) { + printf("Halibut, %s\n", version); +} diff --git a/index.c b/index.c new file mode 100644 index 0000000..3b1df51 --- /dev/null +++ b/index.c @@ -0,0 +1,230 @@ +/* + * index.c: create and collate index data structures + */ + +#include +#include +#include "halibut.h" + +static int compare_tags(void *av, void *bv); +static int compare_entries(void *av, void *bv); + +indexdata *make_index(void) { + indexdata *ret = mknew(indexdata); + ret->tags = newtree234(compare_tags); + ret->entries = newtree234(compare_entries); + return ret; +} + +static indextag *make_indextag(void) { + indextag *ret = mknew(indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; +} + +static int compare_tags(void *av, void *bv) { + indextag *a = (indextag *)av, *b = (indextag *)bv; + return ustricmp(a->name, b->name); +} + +static int compare_to_find_tag(void *av, void *bv) { + wchar_t *a = (wchar_t *)av; + indextag *b = (indextag *)bv; + return ustricmp(a, b->name); +} + +static int compare_entries(void *av, void *bv) { + indexentry *a = (indexentry *)av, *b = (indexentry *)bv; + return compare_wordlists(a->text, b->text); +} + +/* + * Back-end utility: find the indextag with a given name. + */ +indextag *index_findtag(indexdata *idx, wchar_t *name) { + return find234(idx->tags, name, compare_to_find_tag); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text) { + indextag *t, *existing; + + /* + * FIXME: want to warn on overlapping source sets. + */ + for (; *tags; tags = uadv(tags)) { + t = make_indextag(); + t->name = tags; + existing = add234(idx->tags, t); + if (existing == t) { + /* + * Duplicate this so we can free it independently. + */ + t->name = ustrdup(tags); + + /* + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). + */ + if (is_explicit) { + error(err_nosuchidxtag, tags); + continue; + } + + /* + * Otherwise, this is a new tag with an implicit \IM. + */ + t->implicit_text = text; + } else { + sfree(t); + t = existing; + if (!is_explicit) { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + } else { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + if (t->implicit_text) { + free_word_list(t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = resize(t->explicit_texts, + t->explicit_size); + } + t->explicit_texts[t->nexplicit++] = text; + } + } + } +} + +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void build_index(indexdata *i) { + indextag *t; + word **ta; + int ti; + int j; + + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + if (t->implicit_text) { + t->nrefs = 1; + ta = &t->implicit_text; + } else { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + } + if (t->nrefs) { + t->refs = mknewa(indexentry *, t->nrefs); + for (j = 0; j < t->nrefs; j++) { + indexentry *ent = mknew(indexentry); + ent->text = *ta++; + t->refs[j] = add234(i->entries, ent); + if (t->refs[j] != ent) /* duplicate */ + sfree(ent); + } + } + } +} + +void cleanup_index(indexdata *i) { + indextag *t; + indexentry *ent; + int ti; + + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + sfree(t->name); + free_word_list(t->implicit_text); + sfree(t->explicit_texts); + sfree(t->refs); + sfree(t); + } + freetree234(i->tags); + for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) { + sfree(ent); + } + freetree234(i->entries); + sfree(i); +} + +static void dbg_prtwordlist(int level, word *w); +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text); + +void index_debug(indexdata *i) { + indextag *t; + indexentry *y; + int ti; + int j; + + printf("\nINDEX TAGS\n==========\n\n"); + for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) { + printf("\n"); + if (t->implicit_text) + dbg_prtmerge(0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge(1, t->name, t->explicit_texts[j]); + } + + printf("\nINDEX ENTRIES\n=============\n\n"); + for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) { + printf("\n"); + printf("{\n"); + dbg_prtwordlist(1, y->text); + printf("}\n"); + } +} + +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) { + printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar(*tag); + printf("\" {\n"); + dbg_prtwordlist(1, text); + printf("}\n"); +} + +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } +} diff --git a/input.c b/input.c new file mode 100644 index 0000000..15e0a97 --- /dev/null +++ b/input.c @@ -0,0 +1,1164 @@ +/* + * input.c: read the source form + */ + +#include +#include +#include +#include "halibut.h" + +#define TAB_STOP 8 /* for column number tracking */ + +static void setpos(input *in, char *fname) { + in->pos.filename = fname; + in->pos.line = 1; + in->pos.col = (in->reportcols ? 1 : -1); +} + +static void unget(input *in, int c, filepos *pos) { + if (in->npushback >= in->pushbacksize) { + in->pushbacksize = in->npushback + 16; + in->pushback = resize(in->pushback, in->pushbacksize); + } + in->pushback[in->npushback].chr = c; + in->pushback[in->npushback].pos = *pos; /* structure copy */ + in->npushback++; +} + +/* ---------------------------------------------------------------------- */ +/* + * Macro subsystem + */ +typedef struct macro_Tag macro; +struct macro_Tag { + wchar_t *name, *text; +}; +struct macrostack_Tag { + macrostack *next; + wchar_t *text; + int ptr, npushback; + filepos pos; +}; +static int macrocmp(void *av, void *bv) { + macro *a = (macro *)av, *b = (macro *)bv; + return ustrcmp(a->name, b->name); +} +static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text, + filepos fpos) { + macro *m = mknew(macro); + m->name = name; + m->text = text; + if (add234(macros, m) != m) { + error(err_macroexists, &fpos, name); + sfree(name); + sfree(text); + } +} +static int macrolookup(tree234 *macros, input *in, wchar_t *name, + filepos *pos) { + macro m, *gotit; + m.name = name; + gotit = find234(macros, &m, NULL); + if (gotit) { + macrostack *expansion = mknew(macrostack); + expansion->next = in->stack; + expansion->text = gotit->text; + expansion->pos = *pos; /* structure copy */ + expansion->ptr = 0; + expansion->npushback = in->npushback; + in->stack = expansion; + return TRUE; + } else + return FALSE; +} +static void macrocleanup(tree234 *macros) { + int ti; + macro *m; + for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) { + sfree(m->name); + sfree(m->text); + sfree(m); + } + freetree234(macros); +} + +/* + * Can return EOF + */ +static int get(input *in, filepos *pos) { + int pushbackpt = in->stack ? in->stack->npushback : 0; + if (in->npushback > pushbackpt) { + --in->npushback; + if (pos) + *pos = in->pushback[in->npushback].pos; /* structure copy */ + return in->pushback[in->npushback].chr; + } + else if (in->stack) { + wchar_t c = in->stack->text[in->stack->ptr]; + if (in->stack->text[++in->stack->ptr] == L'\0') { + macrostack *tmp = in->stack; + in->stack = tmp->next; + sfree(tmp); + } + return c; + } + else if (in->currfp) { + int c = getc(in->currfp); + + if (c == EOF) { + fclose(in->currfp); + in->currfp = NULL; + } + /* Track line numbers, for error reporting */ + if (pos) + *pos = in->pos; + if (in->reportcols) { + switch (c) { + case '\t': + in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP; + break; + case '\n': + in->pos.col = 1; + in->pos.line++; + break; + default: + in->pos.col++; + break; + } + } else { + in->pos.col = -1; + if (c == '\n') + in->pos.line++; + } + /* FIXME: do input charmap translation. We should be returning + * Unicode here. */ + return c; + } else + return EOF; +} + +/* + * Lexical analysis of source files. + */ +typedef struct token_Tag token; +struct token_Tag { + int type; + int cmd, aux; + wchar_t *text; + filepos pos; +}; +enum { + tok_eof, /* end of file */ + tok_eop, /* end of paragraph */ + tok_white, /* whitespace */ + tok_word, /* a word or word fragment */ + tok_cmd, /* \command */ + tok_lbrace, /* { */ + tok_rbrace /* } */ +}; + +/* Halibut command keywords. */ +enum { + c__invalid, /* invalid command */ + c__comment, /* comment command (\#) */ + c__escaped, /* escaped character */ + c__nbsp, /* nonbreaking space */ + c_A, /* appendix heading */ + c_B, /* bibliography entry */ + c_BR, /* bibliography rewrite */ + c_C, /* chapter heading */ + c_H, /* heading */ + c_I, /* invisible index mark */ + c_IM, /* index merge/rewrite */ + c_K, /* capitalised cross-reference */ + c_S, /* aux field is 0, 1, 2, ... */ + c_U, /* unnumbered-chapter heading */ + c_W, /* Web hyperlink */ + c_b, /* bulletted list */ + c_c, /* code */ + c_cfg, /* configuration directive */ + c_copyright, /* copyright statement */ + c_cw, /* weak code */ + c_date, /* document processing date */ + c_define, /* macro definition */ + c_e, /* emphasis */ + c_i, /* visible index mark */ + c_ii, /* uncapitalised visible index mark */ + c_k, /* uncapitalised cross-reference */ + c_n, /* numbered list */ + c_nocite, /* bibliography trickery */ + c_preamble, /* document preamble text */ + c_q, /* quote marks */ + c_rule, /* horizontal rule */ + c_title, /* document title */ + c_u, /* aux field is char code */ + c_versionid /* document RCS id */ +}; + +/* Perhaps whitespace should be defined in a more Unicode-friendly way? */ +#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 ) +#define isnl(c) ( (c)==10 ) +#define isdec(c) ( ((c)>='0'&&(c)<='9') ) +#define fromdec(c) ( (c)-'0' ) +#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f')) +#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) ) +#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z')) + +/* + * Keyword comparison function. Like strcmp, but between a wchar_t * + * and a char *. + */ +static int kwcmp(wchar_t const *p, char const *q) { + int i; + do { + i = *p - *q; + } while (*p++ && *q++ && !i); + return i; +} + +/* + * Match a keyword. + */ +static void match_kw(token *tok) { + /* + * FIXME. The ids are explicit in here so as to allow long-name + * equivalents to the various very short keywords. + */ + static const struct { char const *name; int id; } keywords[] = { + {"#", c__comment}, /* comment command (\#) */ + {"-", c__escaped}, /* nonbreaking hyphen */ + {"A", c_A}, /* appendix heading */ + {"B", c_B}, /* bibliography entry */ + {"BR", c_BR}, /* bibliography rewrite */ + {"C", c_C}, /* chapter heading */ + {"H", c_H}, /* heading */ + {"I", c_I}, /* invisible index mark */ + {"IM", c_IM}, /* index merge/rewrite */ + {"K", c_K}, /* capitalised cross-reference */ + {"U", c_U}, /* unnumbered-chapter heading */ + {"W", c_W}, /* Web hyperlink */ + {"\\", c__escaped}, /* escaped backslash (\\) */ + {"_", c__nbsp}, /* nonbreaking space (\_) */ + {"b", c_b}, /* bulletted list */ + {"c", c_c}, /* code */ + {"cfg", c_cfg}, /* configuration directive */ + {"copyright", c_copyright}, /* copyright statement */ + {"cw", c_cw}, /* weak code */ + {"date", c_date}, /* document processing date */ + {"define", c_define}, /* macro definition */ + {"e", c_e}, /* emphasis */ + {"i", c_i}, /* visible index mark */ + {"ii", c_ii}, /* uncapitalised visible index mark */ + {"k", c_k}, /* uncapitalised cross-reference */ + {"n", c_n}, /* numbered list */ + {"nocite", c_nocite}, /* bibliography trickery */ + {"preamble", c_preamble}, /* document preamble text */ + {"q", c_q}, /* quote marks */ + {"rule", c_rule}, /* horizontal rule */ + {"title", c_title}, /* document title */ + {"versionid", c_versionid}, /* document RCS id */ + {"{", c__escaped}, /* escaped lbrace (\{) */ + {"}", c__escaped}, /* escaped rbrace (\}) */ + }; + int i, j, k, c; + + /* + * Special cases: \S{0,1,2,...} and \uABCD. If the syntax + * doesn't match correctly, we just fall through to the + * binary-search phase. + */ + if (tok->text[0] == 'S') { + /* We expect numeric characters thereafter. */ + wchar_t *p = tok->text+1; + int n; + if (!*p) + n = 1; + else { + n = 0; + while (*p && isdec(*p)) { + n = 10 * n + fromdec(*p); + p++; + } + } + if (!*p) { + tok->cmd = c_S; + tok->aux = n; + return; + } + } else if (tok->text[0] == 'u') { + /* We expect hex characters thereafter. */ + wchar_t *p = tok->text+1; + int n = 0; + while (*p && ishex(*p)) { + n = 16 * n + fromhex(*p); + p++; + } + if (!*p) { + tok->cmd = c_u; + tok->aux = n; + return; + } + } + + i = -1; + j = sizeof(keywords)/sizeof(*keywords); + while (j-i > 1) { + k = (i+j)/2; + c = kwcmp(tok->text, keywords[k].name); + if (c < 0) + j = k; + else if (c > 0) + i = k; + else /* c == 0 */ { + tok->cmd = keywords[k].id; + return; + } + } + + tok->cmd = c__invalid; +} + + +/* + * Read a token from the input file, in the normal way (`normal' in + * the sense that code paragraphs work a different way). + */ +token get_token(input *in) { + int c; + int nls; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.text = NULL; /* default */ + c = get(in, &cpos); + ret.pos = cpos; + if (iswhite(c)) { /* tok_white or tok_eop */ + nls = 0; + do { + if (isnl(c)) + nls++; + } while ((c = get(in, &cpos)) != EOF && iswhite(c)); + if (c == EOF) { + ret.type = tok_eof; + return ret; + } + unget(in, c, &cpos); + ret.type = (nls > 1 ? tok_eop : tok_white); + return ret; + } else if (c == EOF) { /* tok_eof */ + ret.type = tok_eof; + return ret; + } else if (c == '\\') { /* tok_cmd */ + c = get(in, &cpos); + if (c == '-' || c == '\\' || c == '_' || + c == '#' || c == '{' || c == '}') { + /* single-char command */ + rdadd(&rs, c); + } else if (c == 'u') { + int len = 0; + do { + rdadd(&rs, c); + len++; + c = get(in, &cpos); + } while (ishex(c) && len < 5); + unget(in, c, &cpos); + } else if (iscmd(c)) { + do { + rdadd(&rs, c); + c = get(in, &cpos); + } while (iscmd(c)); + unget(in, c, &cpos); + } + /* + * Now match the command against the list of available + * ones. + */ + ret.type = tok_cmd; + ret.text = ustrdup(rs.text); + match_kw(&ret); + sfree(rs.text); + return ret; + } else if (c == '{') { /* tok_lbrace */ + ret.type = tok_lbrace; + return ret; + } else if (c == '}') { /* tok_rbrace */ + ret.type = tok_rbrace; + return ret; + } else { /* tok_word */ + /* + * Read a word: the longest possible contiguous sequence of + * things other than whitespace, backslash, braces and + * hyphen. A hyphen terminates the word but is returned as + * part of it; everything else is pushed back for the next + * token. The `aux' field contains TRUE if the word ends in + * a hyphen. + */ + ret.aux = FALSE; /* assumed for now */ + while (1) { + if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) { + /* Put back the character that caused termination */ + unget(in, c, &cpos); + break; + } else { + rdadd(&rs, c); + if (c == '-') { + ret.aux = TRUE; + break; /* hyphen terminates word */ + } + } + c = get(in, &cpos); + } + ret.type = tok_word; + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; + } +} + +/* + * Determine whether the next input character is an open brace (for + * telling code paragraphs from paragraphs which merely start with + * code). + */ +int isbrace(input *in) { + int c; + filepos cpos; + + c = get(in, &cpos); + unget(in, c, &cpos); + return (c == '{'); +} + +/* + * Read the rest of a line that starts `\c'. Including nothing at + * all (tok_word with empty text). + */ +token get_codepar_token(input *in) { + int c; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.type = tok_word; + c = get(in, &cpos); /* expect (and discard) one space */ + ret.pos = cpos; + if (c == ' ') { + c = get(in, &cpos); + ret.pos = cpos; + } + while (!isnl(c) && c != EOF) { + int c2 = c; + c = get(in, &cpos); + /* Discard \r just before \n. */ + if (c2 != 13 || !isnl(c)) + rdadd(&rs, c2); + } + unget(in, c, &cpos); + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; +} + +/* + * Adds a new word to a linked list + */ +static word *addword(word newword, word ***hptrptr) { + word *mnewword; + if (!hptrptr) + return NULL; + mnewword = mknew(word); + *mnewword = newword; /* structure copy */ + mnewword->next = NULL; + **hptrptr = mnewword; + *hptrptr = &mnewword->next; + return mnewword; +} + +/* + * Adds a new paragraph to a linked list + */ +static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { + paragraph *mnewpara = mknew(paragraph); + *mnewpara = newpara; /* structure copy */ + mnewpara->next = NULL; + **hptrptr = mnewpara; + *hptrptr = &mnewpara->next; + return mnewpara; +} + +/* + * Destructor before token is reassigned; should catch most memory + * leaks + */ +#define dtor(t) ( sfree(t.text) ) + +/* + * Reads a single file (ie until get() returns EOF) + */ +static void read_file(paragraph ***ret, input *in, indexdata *idx) { + token t; + paragraph par; + word wd, **whptr, **idximplicit; + tree234 *macros; + wchar_t utext[2], *wdtext; + int style, spcstyle; + int already; + int iswhite, seenwhite; + int type; + struct stack_item { + enum { + stack_nop = 0, /* do nothing (for error recovery) */ + stack_ualt = 1, /* \u alternative */ + stack_style = 2, /* \e, \c, \cw */ + stack_idx = 4, /* \I, \i, \ii */ + stack_hyper = 8, /* \W */ + stack_quote = 16, /* \q */ + } type; + word **whptr; /* to restore from \u alternatives */ + word **idximplicit; /* to restore from \u alternatives */ + } *sitem; + stack parsestk; + word *indexword, *uword, *iword; + word *idxwordlist; + rdstring indexstr; + int index_downcase, index_visible, indexing; + const rdstring nullrs = { 0, 0, NULL }; + wchar_t uchr; + + t.text = NULL; + macros = newtree234(macrocmp); + + /* + * Loop on each paragraph. + */ + while (1) { + par.words = NULL; + par.keyword = NULL; + whptr = &par.words; + + /* + * Get a token. + */ + dtor(t), t = get_token(in); + if (t.type == tok_eof) + return; + + /* + * Parse code paragraphs separately. + */ + if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) { + par.type = para_Code; + par.fpos = t.pos; + while (1) { + dtor(t), t = get_codepar_token(in); + wd.type = word_WeakCode; + wd.breaks = FALSE; /* shouldn't need this... */ + wd.text = ustrdup(t.text); + wd.alt = NULL; + wd.fpos = t.pos; + addword(wd, &whptr); + dtor(t), t = get_token(in); + if (t.type == tok_white) { + /* + * The newline after a code-paragraph line + */ + dtor(t), t = get_token(in); + } + if (t.type == tok_eop || t.type == tok_eof) + break; + else if (t.type != tok_cmd || t.cmd != c_c) { + error(err_brokencodepara, &t.pos); + addpara(par, ret); + while (t.type != tok_eop) /* error recovery: */ + dtor(t), t = get_token(in); /* eat rest of paragraph */ + goto codeparabroken; /* ick, but such is life */ + } + } + addpara(par, ret); + codeparabroken: + continue; + } + + /* + * This token begins a paragraph. See if it's one of the + * special commands that define a paragraph type. + * + * (note that \# is special in a way, and \nocite takes no + * text) + */ + par.type = para_Normal; + if (t.type == tok_cmd) { + int needkw; + int is_macro = FALSE; + + par.fpos = t.pos; + switch (t.cmd) { + default: + needkw = -1; + break; + case c__invalid: + error(err_badparatype, t.text, &t.pos); + needkw = 4; + break; + case c__comment: + if (isbrace(in)) + break; /* `\#{': isn't a comment para */ + do { + dtor(t), t = get_token(in); + } while (t.type != tok_eop && t.type != tok_eof); + continue; /* next paragraph */ + /* + * `needkw' values: + * + * 1 -- exactly one keyword + * 2 -- at least one keyword + * 4 -- any number of keywords including zero + * 8 -- at least one keyword and then nothing else + * 16 -- nothing at all! no keywords, no body + * 32 -- no keywords at all + */ + case c_A: needkw = 2; par.type = para_Appendix; break; + case c_B: needkw = 2; par.type = para_Biblio; break; + case c_BR: needkw = 1; par.type = para_BR; break; + case c_C: needkw = 2; par.type = para_Chapter; break; + case c_H: needkw = 2; par.type = para_Heading; + par.aux = 0; + break; + case c_IM: needkw = 2; par.type = para_IM; break; + case c_S: needkw = 2; par.type = para_Subsect; + par.aux = t.aux; break; + case c_U: needkw = 32; par.type = para_UnnumberedChapter; break; + /* For \b and \n the keyword is optional */ + case c_b: needkw = 4; par.type = para_Bullet; break; + case c_n: needkw = 4; par.type = para_NumberedList; break; + case c_cfg: needkw = 8; par.type = para_Config; break; + case c_copyright: needkw = 32; par.type = para_Copyright; break; + case c_define: is_macro = TRUE; needkw = 1; break; + /* For \nocite the keyword is _everything_ */ + case c_nocite: needkw = 8; par.type = para_NoCite; break; + case c_preamble: needkw = 32; par.type = para_Preamble; break; + case c_rule: needkw = 16; par.type = para_Rule; break; + case c_title: needkw = 32; par.type = para_Title; break; + case c_versionid: needkw = 32; par.type = para_VersionID; break; + } + + if (needkw > 0) { + rdstring rs = { 0, 0, NULL }; + int nkeys = 0; + filepos fp; + + /* Get keywords. */ + dtor(t), t = get_token(in); + fp = t.pos; + while (t.type == tok_lbrace) { + /* This is a keyword. */ + nkeys++; + /* FIXME: there will be bugs if anyone specifies an + * empty keyword (\foo{}), so trap this case. */ + while (dtor(t), t = get_token(in), + t.type == tok_word || + t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp) || + (t.type == tok_cmd && t.cmd == c__escaped)) { + if (t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp)) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (t.type != tok_rbrace) { + error(err_kwunclosed, &t.pos); + continue; + } + rdadd(&rs, 0); /* add string terminator */ + dtor(t), t = get_token(in); /* eat right brace */ + } + + rdadd(&rs, 0); /* add string terminator */ + + /* See whether we have the right number of keywords. */ + if ((needkw & 48) && nkeys > 0) + error(err_kwillegal, &fp); + if ((needkw & 11) && nkeys == 0) + error(err_kwexpected, &fp); + if ((needkw & 5) && nkeys > 1) + error(err_kwtoomany, &fp); + + if (is_macro) { + /* + * Macro definition. Get the rest of the line + * as a code-paragraph token, repeatedly until + * there's nothing more left of it. Separate + * with newlines. + */ + rdstring macrotext = { 0, 0, NULL }; + while (1) { + dtor(t), t = get_codepar_token(in); + if (macrotext.pos > 0) + rdadd(¯otext, L'\n'); + rdadds(¯otext, t.text); + dtor(t), t = get_token(in); + if (t.type == tok_eop) break; + } + macrodef(macros, rs.text, macrotext.text, fp); + continue; /* next paragraph */ + } + + par.keyword = rdtrim(&rs); + + /* Move to EOP in case of needkw==8 or 16 (no body) */ + if (needkw & 24) { + if (t.type != tok_eop && t.type != tok_eof) { + error(err_bodyillegal, &t.pos); + /* Error recovery: eat the rest of the paragraph */ + while (t.type != tok_eop && t.type != tok_eof) + dtor(t), t = get_token(in); + } + addpara(par, ret); + continue; /* next paragraph */ + } + } + } + + /* + * Now read the actual paragraph, word by word, adding to + * the paragraph list. + * + * Mid-paragraph commands: + * + * \K \k + * \c \cw + * \e + * \i \ii + * \I + * \u + * \W + * \date + * \\ \{ \} + */ + parsestk = stk_new(); + style = word_Normal; + spcstyle = word_WhiteSpace; + indexing = FALSE; + seenwhite = TRUE; + while (t.type != tok_eop && t.type != tok_eof) { + iswhite = FALSE; + already = FALSE; + if (t.type == tok_cmd && t.cmd == c__escaped) { + t.type = tok_word; /* nice and simple */ + t.aux = 0; /* even if `\-' - nonbreaking! */ + } + if (t.type == tok_cmd && t.cmd == c__nbsp) { + t.type = tok_word; /* nice and simple */ + sfree(t.text); + t.text = ustrdup(L" "); /* text is ` ' not `_' */ + t.aux = 0; /* (nonbreaking) */ + } + switch (t.type) { + case tok_white: + if (whptr == &par.words) + break; /* strip whitespace at start of para */ + wd.text = NULL; + wd.type = spcstyle; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (indexing) + rdadd(&indexstr, ' '); + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + iswhite = TRUE; + break; + case tok_word: + if (indexing) + rdadds(&indexstr, t.text); + wd.type = style; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = t.aux; + if (!indexing || index_visible) { + wd.text = ustrdup(t.text); + addword(wd, &whptr); + } + if (indexing) { + wd.text = ustrdup(t.text); + addword(wd, &idximplicit); + } + break; + case tok_lbrace: + error(err_unexbrace, &t.pos); + /* Error recovery: push nop */ + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + break; + case tok_rbrace: + sitem = stk_pop(parsestk); + if (!sitem) + error(err_unexbrace, &t.pos); + else { + if (sitem->type & stack_ualt) { + whptr = sitem->whptr; + idximplicit = sitem->idximplicit; + } + if (sitem->type & stack_style) { + style = word_Normal; + spcstyle = word_WhiteSpace; + } + if (sitem->type & stack_idx) { + indexword->text = ustrdup(indexstr.text); + if (index_downcase) + ustrlow(indexword->text); + indexing = FALSE; + rdadd(&indexstr, L'\0'); + index_merge(idx, FALSE, indexstr.text, idxwordlist); + sfree(indexstr.text); + } + if (sitem->type & stack_hyper) { + wd.text = NULL; + wd.type = word_HyperEnd; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + } + if (sitem->type & stack_quote) { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Close; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + } + } + sfree(sitem); + break; + case tok_cmd: + switch (t.cmd) { + case c__comment: + /* + * In-paragraph comment: \#{ balanced braces } + * + * Anything goes here; even tok_eop. We should + * eat whitespace after the close brace _if_ + * there was whitespace before the \#. + */ + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + } else { + int braces = 1; + while (braces > 0) { + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) + braces++; + else if (t.type == tok_rbrace) + braces--; + else if (t.type == tok_eof) { + error(err_commenteof, &t.pos); + break; + } + } + } + if (seenwhite) { + already = TRUE; + dtor(t), t = get_token(in); + if (t.type == tok_white) { + iswhite = TRUE; + already = FALSE; + } + } + break; + case c_q: + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + } else { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Open; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + sitem = mknew(struct stack_item); + sitem->type = stack_quote; + stk_push(parsestk, sitem); + } + break; + case c_K: + case c_k: + case c_W: + case c_date: + /* + * Keyword, hyperlink, or \date. We expect a + * left brace, some text, and then a right + * brace. No nesting; no arguments. + */ + wd.fpos = t.pos; + wd.breaks = FALSE; + if (t.cmd == c_K) + wd.type = word_UpperXref; + else if (t.cmd == c_k) + wd.type = word_LowerXref; + else if (t.cmd == c_W) + wd.type = word_HyperLink; + else + wd.type = word_Normal; + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + if (wd.type == word_Normal) { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + already = TRUE; + wdtext = ustrftime(NULL, broken); + wd.type = style; + } else { + error(err_explbr, &t.pos); + wdtext = NULL; + } + } else { + rdstring rs = { 0, 0, NULL }; + while (dtor(t), t = get_token(in), + t.type == tok_word || t.type == tok_white) { + if (t.type == tok_white) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (wd.type == word_Normal) { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + wdtext = ustrftime(rs.text, broken); + wd.type = style; + } else { + wdtext = ustrdup(rs.text); + } + sfree(rs.text); + if (t.type != tok_rbrace) { + error(err_kwexprbr, &t.pos); + } + } + wd.alt = NULL; + wd.aux = 0; + if (!indexing || index_visible) { + wd.text = ustrdup(wdtext); + addword(wd, &whptr); + } + if (indexing) { + wd.text = ustrdup(wdtext); + addword(wd, &idximplicit); + } + sfree(wdtext); + if (wd.type == word_HyperLink) { + /* + * Hyperlinks are different: they then + * expect another left brace, to begin + * delimiting the text marked by the link. + */ + dtor(t), t = get_token(in); + /* + * Special cases: \W{}\c, \W{}\e, \W{}\cw + */ + sitem = mknew(struct stack_item); + sitem->type = stack_hyper; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : + word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + sfree(sitem); + } else { + stk_push(parsestk, sitem); + } + } + break; + case c_c: + case c_cw: + case c_e: + type = t.cmd; + if (style != word_Normal) { + error(err_nestedstyles, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + } else { + style = (type == c_c ? word_Code : + type == c_cw ? word_WeakCode : + word_Emph); + spcstyle = tospacestyle(style); + sitem = mknew(struct stack_item); + sitem->type = stack_style; + stk_push(parsestk, sitem); + } + break; + case c_i: + case c_ii: + case c_I: + type = t.cmd; + if (indexing) { + error(err_nestedindex, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + sitem = mknew(struct stack_item); + sitem->type = stack_idx; + dtor(t), t = get_token(in); + /* + * Special cases: \i\c, \i\e, \i\cw + */ + wd.fpos = t.pos; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : + word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) { + sfree(sitem); + error(err_explbr, &t.pos); + } else { + /* Add an index-reference word with no text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + wd.aux = 0; + wd.breaks = FALSE; + indexword = addword(wd, &whptr); + /* Set up a rdstring to read the index text */ + indexstr = nullrs; + /* Flags so that we do the Right Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; + /* Stack item to close the indexing on exit */ + stk_push(parsestk, sitem); + } + break; + case c_u: + uchr = t.aux; + utext[0] = uchr; utext[1] = 0; + wd.type = style; + wd.breaks = FALSE; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + if (!indexing || index_visible) { + wd.text = ustrdup(utext); + uword = addword(wd, &whptr); + } else + uword = NULL; + if (indexing) { + wd.text = ustrdup(utext); + iword = addword(wd, &idximplicit); + } else + iword = NULL; + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) { + /* + * \u with a left brace. Until the brace + * closes, all further words go on a + * sidetrack from the main thread of the + * paragraph. + */ + sitem = mknew(struct stack_item); + sitem->type = stack_ualt; + sitem->whptr = whptr; + sitem->idximplicit = idximplicit; + stk_push(parsestk, sitem); + whptr = uword ? &uword->alt : NULL; + idximplicit = iword ? &iword->alt : NULL; + } else { + if (indexing) + rdadd(&indexstr, uchr); + already = TRUE; + } + break; + default: + if (!macrolookup(macros, in, t.text, &t.pos)) + error(err_badmidcmd, t.text, &t.pos); + break; + } + } + if (!already) + dtor(t), t = get_token(in); + seenwhite = iswhite; + } + /* Check the stack is empty */ + if (NULL != (sitem = stk_pop(parsestk))) { + do { + sfree(sitem); + sitem = stk_pop(parsestk); + } while (sitem); + error(err_missingrbrace, &t.pos); + } + stk_free(parsestk); + addpara(par, ret); + } + dtor(t); + macrocleanup(macros); +} + +paragraph *read_input(input *in, indexdata *idx) { + paragraph *head = NULL; + paragraph **hptr = &head; + + while (in->currindex < in->nfiles) { + in->currfp = fopen(in->filenames[in->currindex], "r"); + if (in->currfp) { + setpos(in, in->filenames[in->currindex]); + read_file(&hptr, in, idx); + } + in->currindex++; + } + + return head; +} diff --git a/inputs/errors.but b/inputs/errors.but new file mode 100644 index 0000000..1befc74 --- /dev/null +++ b/inputs/errors.but @@ -0,0 +1,51 @@ +Bogus keyword: \k{nonexist} + +\BR{nonexist2} [Foogle] + +\B{book} Foo McBar, "A Lot Of Rubbish", 1992. + +\B{book} The same book again. Isn't that odd? + +\IM{nonexist3} Logical impossibilities + +\define{macro} macro definition + +\define{macro} same macro again + +\c Foo +\c Bar +Bombadillo. Now get out of _that_. + +\unngh This is a bit of a bizarre paragraph, now isn't it? + +\C{unfinished-symphony + +\U{thisshouldn'tbehere} Unnumbered chapter. Or is it? + +\C Numbered chapter. Or _is_ it? + +\BR{two}{too}{many} [Which One?] + +\nocite{ooh} With some illegal text. + +Paragraph with {bizarre braces}. + +Another one } . + +Bogus in-para comment: \# foobar. + +Bogus xrefs: \k and \k{foo{}}. + +Nest those styles! \c{foo\e{bar}}. And +\c{foo\W{file:/dev/null}\e{bar}}. And \e{foo\i\c{bar}}. +And one without brace: \e. + +Bogus hyperlinks: \W and \W{file:/dev/null}bar. + +Nested index: \i{foo\i{bar}}. Broken styled index: \i\c. + +Complete twaddle: \twaddle. + +Unclosed brace: \c{foo. + +Comment to EOF: \#{ and here we go. diff --git a/inputs/errors2.but b/inputs/errors2.but new file mode 100644 index 0000000..b1a3303 --- /dev/null +++ b/inputs/errors2.but @@ -0,0 +1,5 @@ +\H{outofplace} Heading out of place. + +\C{chap} Chapter is fine + +\S{subsect} Subsection should have heading before it. diff --git a/inputs/test.but b/inputs/test.but new file mode 100644 index 0000000..5849e52 --- /dev/null +++ b/inputs/test.but @@ -0,0 +1,130 @@ +\title Halibut: A Test Document With A Stupidly Long Title Just To +See If Wrapping Titles Works OK. In Fact This Title Will Span Three +Lines, Not Just Two. How's That For Ludicrous? + +\cfg{xhtml-leaf-smallest-contents}{2} + +\cfg{xhtml-leaf-contains-contents}{true} + +\preamble This manual is a small joke effort, designed to use every +feature \#{ comment } that Halibut's input format supports. Creation +date \date{%Y.%m.%d} (default format is \date). + +\copyright Copyright 1999 Simon \#{second comment}Tatham. All rights +reserved. + +\define{metacoopt} [this is a nested, +multi-line macro, talking about \coopt +a bit] + +\define{coopt} co\u00F6{-o}pt + +\versionid $Id: test.but,v 1.18 2002/08/05 10:31:33 simon Exp $ + +\C{ch\\ap} First chapter title; for similar wrapping reasons this +chapter title will be ludicrously long. I wonder how much more +chapter title I can write before feeling silly. + +This is a para\#{another{} comment}graph of text. It +has line\#{yet another one} breaks in between words, multiple + spaces (ignored), and \e{emphasised text} as well as \c{code +fragments}. + +\cw{This} is weak code. And \k{head} contains some other stuff. +\K{subhead} does too. + +\H{head} First section title (very long again, no prizes for +guessing the reason why this time, and here's yet more text to pad +it out to three lines of output) + +\cfg{winhelp-topic}{M359HPEHGW} + +Here's a code paragraph: + +\c No leading spaces +\c One leading space +\c Two blank lines follow this one. +\c +\c +\c Two blank lines precede this one. +\c Two leading spaces +\c We can use \ { and } with impunity here. + +This is a list: + +\b Ooh. + +\b Aah. + +\b Eek. + +This is a horizontal rule: + +\rule + +This is a numbered list: + +\n Ooh. + +\n{keyword} Aah. + +\n Eek. \q{Aah} is point \k{keyword}. + +A-paragraph-full-of-hyphens-to-test-the-idea-that-word-wrapping-can-happen-somewhere-in-all-this-hyphenatory-nonsense. + +A\-paragraph\-full\-of\-nonbreaking\-hyphens\-to\-test\-the\-idea\-that\-word\-wrapping\-misses\-them. + +A\_paragraph\_full\_of\_nonbreaking\_spaces\_to\_test\_the\_idea\_that\_word\_wrapping\_misses\_them\_too. + +Use of macros: let's talk about \coopt. And about \coopt some more. +And a nested macro: \metacoopt. + +Oh, while I'm here: some special characters. The \\, \{ and \} +characters, to be precise. And their code equivalents, \c{\\}, +\i\c{\{}, \c{\}}. + +\S{subhead} First subheading + +So here's a \I{subheading}\I{subsection}subsection. Just +incidentally, \q{this} is in quotes. \ii{Those} quotes had better work +in all formats. + +We'll try for some Unicode here: \i{Schr\u00F6{oe}dinger}. + +An index tag containing non-alternatived Unicode: \i{\u00BFChe?} + +An invisible index tag: \I{she seems to have an invisible tag}yeah. + +\S2{sub-sub}{Florble} Smaller heading still + +A tiny section. Awww. How cute. Actually, this one's a \e{florble}, +and here's a reference to it: \k{sub-sub}. + +\A{app} Needless appendix + +\# \cfg{winhelp-topic}{Y5VQEXZQVJ} (uncomment this and it clashes) + +Here's an \i{appendix}, for no terribly good reason at all. See +\k{book}. + +It also contains a \W{http://www.tartarus.org/}{hyperlink}. + +\U Bibliography + +\B{book} Some text describing a book. + +\B{nocite} Some text describing a book. This text should appear in +the document even though there is no \cw{\\k} citing it. + +\BR{book} [SillyCitation] + +\nocite{nocite} + +\B{uncited} If this text appears, there's an actual error. + +\# This is a comment. + +\# Now for the index section. + +\IM{she seems to have an invisible tag}{appendix} Invisible tags +and/or appendices diff --git a/keywords.c b/keywords.c new file mode 100644 index 0000000..6e8a230 --- /dev/null +++ b/keywords.c @@ -0,0 +1,154 @@ +/* + * keywords.c: keep track of all cross-reference keywords + */ + +#include +#include +#include +#include "halibut.h" + +static int kwcmp(void *av, void *bv) +{ + const keyword *a = (const keyword *)av; + const keyword *b = (const keyword *)bv; + return ustrcmp(a->key, b->key); +} + +static int kwfind(void *av, void *bv) +{ + wchar_t *a = (wchar_t *)av; + const keyword *b = (const keyword *)bv; + return ustrcmp(a, b->key); +} + +keyword *kw_lookup(keywordlist *kl, wchar_t *str) { + return find234(kl->keys, str, kwfind); +} + +/* + * This function reads through source form and collects the + * keywords. They get collected in a heap, sorted by Unicode + * collation, last at the top (so that we can Heapsort them when we + * finish). + */ +keywordlist *get_keywords(paragraph *source) { + int errors = FALSE; + keywordlist *kl = mknew(keywordlist); + numberstate *n = number_init(); + int prevpara = para_NotParaType; + + number_cfg(n, source); + + kl->size = 0; + kl->keys = newtree234(kwcmp); + kl->nlooseends = kl->looseendssize = 0; + kl->looseends = NULL; + for (; source; source = source->next) { + wchar_t *p, *q; + p = q = source->keyword; + + /* + * Look for the section type override (`example', + * `question' or whatever - to replace `chapter' or + * `section' on a per-section basis). + */ + if (q) { + q = uadv(q); /* point q at the word beyond */ + if (!*q) q = NULL; + } + + /* + * Number the chapter / section / list-item / whatever. + * This also sets up the `parent', `child' and `sibling' + * links. + */ + source->kwtext = number_mktext(n, source, q, prevpara, &errors); + prevpara = source->type; + + if (p && *p) { + if (source->kwtext || source->type == para_Biblio) { + keyword *kw, *ret; + + kw = mknew(keyword); + kw->key = p; + kw->text = source->kwtext; + kw->para = source; + ret = add234(kl->keys, kw); + if (ret != kw) { + error(err_multikw, &source->fpos, &ret->para->fpos, p); + sfree(kw); + /* FIXME: what happens to kw->text? Does it leak? */ + } + } + } else { + if (kl->nlooseends >= kl->looseendssize) { + kl->looseendssize = kl->nlooseends + 32; + kl->looseends = resize(kl->looseends, kl->looseendssize); + } + kl->looseends[kl->nlooseends++] = source->kwtext; + } + } + + number_free(n); + + if (errors) { + free_keywords(kl); + return NULL; + } + + return kl; +} + +void free_keywords(keywordlist *kl) { + keyword *kw; + while (kl->nlooseends) + free_word_list(kl->looseends[--kl->nlooseends]); + sfree(kl->looseends); + while ( (kw = index234(kl->keys, 0)) != NULL) { + delpos234(kl->keys, 0); + free_word_list(kw->text); + sfree(kw); + } + freetree234(kl->keys); + sfree(kl); +} + +void subst_keywords(paragraph *source, keywordlist *kl) { + for (; source; source = source->next) { + word *ptr; + for (ptr = source->words; ptr; ptr = ptr->next) { + if (ptr->type == word_UpperXref || + ptr->type == word_LowerXref) { + keyword *kw; + word **endptr, *close, *subst; + + kw = kw_lookup(kl, ptr->text); + if (!kw) { + error(err_nosuchkw, &ptr->fpos, ptr->text); + subst = NULL; + } else + subst = dup_word_list(kw->text); + + if (subst && ptr->type == word_LowerXref && + kw->para->type != para_Biblio && + kw->para->type != para_BiblioCited) + ustrlow(subst->text); + + close = mknew(word); + close->text = NULL; + close->alt = NULL; + close->type = word_XrefEnd; + close->fpos = ptr->fpos; + + close->next = ptr->next; + ptr->next = subst; + + for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next) + (*endptr)->fpos = ptr->fpos; + + *endptr = close; + ptr = close; + } + } + } +} diff --git a/licence.c b/licence.c new file mode 100644 index 0000000..0c4338a --- /dev/null +++ b/licence.c @@ -0,0 +1,16 @@ +/* + * licence.c: licence text + */ + +#include + +static char *licencetext[] = { + "FIXME: licence text goes here", + NULL +}; + +void licence(void) { + char **p; + for (p = licencetext; *p; p++) + puts(*p); +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..64f1869 --- /dev/null +++ b/main.c @@ -0,0 +1,300 @@ +/* + * main.c: command line parsing and top level + */ + +#include +#include +#include "halibut.h" + +static void dbg_prtsource(paragraph *sourceform); +static void dbg_prtwordlist(int level, word *w); +static void dbg_prtkws(keywordlist *kws); + +int main(int argc, char **argv) { + char **infiles; + char *outfile; + int nfiles; + int nogo; + int errs; + int reportcols; + int debug; + + /* + * Set up initial (default) parameters. + */ + infiles = mknewa(char *, argc); + outfile = NULL; + nfiles = 0; + nogo = errs = FALSE; + reportcols = 0; + debug = 0; + + if (argc == 1) { + usage(); + exit(EXIT_SUCCESS); + } + + /* + * Parse command line arguments. + */ + while (--argc) { + char *p = *++argv; + if (*p == '-') { + /* + * An option. + */ + while (p && *++p) { + char c = *p; + switch (c) { + case '-': + /* + * Long option. + */ + { + char *opt, *val; + opt = p++; /* opt will have _one_ leading - */ + while (*p && *p != '=') + p++; /* find end of option */ + if (*p == '=') { + *p++ = '\0'; + val = p; + } else + val = NULL; + if (!strcmp(opt, "-help")) { + help(); + nogo = TRUE; + } else if (!strcmp(opt, "-version")) { + showversion(); + nogo = TRUE; + } else if (!strcmp(opt, "-licence") || + !strcmp(opt, "-license")) { + licence(); + nogo = TRUE; + } else if (!strcmp(opt, "-output")) { + if (!val) + errs = TRUE, error(err_optnoarg, opt); + else + outfile = val; + } else if (!strcmp(opt, "-precise")) { + reportcols = 1; + } else { + errs = TRUE, error(err_nosuchopt, opt); + } + } + p = NULL; + break; + case 'h': + case 'V': + case 'L': + case 'P': + case 'd': + /* + * Option requiring no parameter. + */ + switch (c) { + case 'h': + help(); + nogo = TRUE; + break; + case 'V': + showversion(); + nogo = TRUE; + break; + case 'L': + licence(); + nogo = TRUE; + break; + case 'P': + reportcols = 1; + break; + case 'd': + debug = TRUE; + break; + } + break; + case 'o': + /* + * Option requiring parameter. + */ + p++; + if (!*p && argc > 1) + --argc, p = *++argv; + else if (!*p) { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_optnoarg, opt); + } + /* + * Now c is the option and p is the parameter. + */ + switch (c) { + case 'o': + outfile = p; + break; + } + p = NULL; /* prevent continued processing */ + break; + default: + /* + * Unrecognised option. + */ + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_nosuchopt, opt); + } + } + } + } else { + /* + * A non-option argument. + */ + infiles[nfiles++] = p; + } + } + + if (errs) + exit(EXIT_FAILURE); + if (nogo) + exit(EXIT_SUCCESS); + + /* + * Do the work. + */ + if (nfiles == 0) { + error(err_noinput); + usage(); + exit(EXIT_FAILURE); + } + + { + input in; + paragraph *sourceform, *p; + indexdata *idx; + keywordlist *keywords; + + in.filenames = infiles; + in.nfiles = nfiles; + in.currfp = NULL; + in.currindex = 0; + in.npushback = in.pushbacksize = 0; + in.pushback = NULL; + in.reportcols = reportcols; + in.stack = NULL; + + idx = make_index(); + + sourceform = read_input(&in, idx); + if (!sourceform) + exit(EXIT_FAILURE); + + sfree(in.pushback); + + mark_attr_ends(sourceform); + + sfree(infiles); + + keywords = get_keywords(sourceform); + if (!keywords) + exit(EXIT_FAILURE); + gen_citations(sourceform, keywords); + subst_keywords(sourceform, keywords); + + for (p = sourceform; p; p = p->next) + if (p->type == para_IM) + index_merge(idx, TRUE, p->keyword, p->words); + + build_index(idx); + + if (debug) { + index_debug(idx); + dbg_prtkws(keywords); + dbg_prtsource(sourceform); + } + + text_backend(sourceform, keywords, idx); + xhtml_backend(sourceform, keywords, idx); + whlp_backend(sourceform, keywords, idx); + + free_para_list(sourceform); + free_keywords(keywords); + cleanup_index(idx); + } + + return 0; +} + +static void dbg_prtsource(paragraph *sourceform) { + /* + * Output source form in debugging format. + */ + + paragraph *p; + for (p = sourceform; p; p = p->next) { + wchar_t *wp; + printf("para %d ", p->type); + if (p->keyword) { + wp = p->keyword; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + } else + printf("(no keyword)"); + printf(" {\n"); + dbg_prtwordlist(1, p->words); + printf("}\n"); + } +} + +static void dbg_prtkws(keywordlist *kws) { + /* + * Output keywords in debugging format. + */ + + int i; + keyword *kw; + + for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) { + wchar_t *wp; + printf("keyword "); + wp = kw->key; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + printf(" {\n"); + dbg_prtwordlist(1, kw->text); + printf("}\n"); + } +} + +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } +} diff --git a/malloc.c b/malloc.c new file mode 100644 index 0000000..1635b47 --- /dev/null +++ b/malloc.c @@ -0,0 +1,149 @@ +/* + * malloc.c: safe wrappers around malloc, realloc, free, strdup + */ + +#include +#include +#include "halibut.h" + +#ifdef LOGALLOC +#define LOGPARAMS char *file, int line, +static FILE *logallocfp = NULL; +static int logline = 2; /* off by 1: `null pointer is' */ +static void loginc(void) { } +static void logallocinit(void) { + if (!logallocfp) { + logallocfp = fopen("malloc.log", "w"); + if (!logallocfp) { + fprintf(stderr, "panic: unable to open malloc.log\n"); + exit(10); + } + setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ); + fprintf(logallocfp, "null pointer is %p\n", NULL); + } +} +static void logprintf(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(logallocfp, fmt, ap); + va_end(ap); +} +#define LOGPRINT(x) ( logallocinit(), logprintf x ) +#define LOGINC do { loginc(); logline++; } while (0) +#else +#define LOGPARAMS +#define LOGPRINT(x) +#define LOGINC ((void)0) +#endif + +/* + * smalloc should guarantee to return a useful pointer - Halibut + * can do nothing except die when it's out of memory anyway. + */ +void *(smalloc)(LOGPARAMS int size) { + void *p; + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", + file, line, (long)size)); + p = malloc(size); + if (!p) + fatal(err_nomemory); + LOGPRINT((" returns %p\n", p)); + return p; +} + +/* + * sfree should guaranteeably deal gracefully with freeing NULL + */ +void (sfree)(LOGPARAMS void *p) { + if (p) { + LOGINC; + LOGPRINT(("%s %d free(%p)\n", + file, line, p)); + free(p); + } +} + +/* + * srealloc should guaranteeably be able to realloc NULL + */ +void *(srealloc)(LOGPARAMS void *p, int size) { + void *q; + if (p) { + LOGINC; + LOGPRINT(("%s %d realloc(%p,%ld)", + file, line, p, (long)size)); + q = realloc(p, size); + LOGPRINT((" returns %p\n", q)); + } else { + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", + file, line, (long)size)); + q = malloc(size); + LOGPRINT((" returns %p\n", q)); + } + if (!q) + fatal(err_nomemory); + return q; +} + +/* + * dupstr is like strdup, but with the never-return-NULL property + * of smalloc (and also reliably defined in all environments :-) + */ +char *dupstr(char *s) { + char *r = smalloc(1+strlen(s)); + strcpy(r,s); + return r; +} + +/* + * Duplicate a linked list of words + */ +word *dup_word_list(word *w) { + word *head, **eptr = &head; + + while (w) { + word *newwd = mknew(word); + *newwd = *w; /* structure copy */ + newwd->text = ustrdup(w->text); + if (w->alt) + newwd->alt = dup_word_list(w->alt); + *eptr = newwd; + newwd->next = NULL; + eptr = &newwd->next; + + w = w->next; + } + + return head; +} + +/* + * Free a linked list of words + */ +void free_word_list(word *w) { + word *t; + while (w) { + t = w; + w = w->next; + sfree(t->text); + if (t->alt) + free_word_list(t->alt); + sfree(t); + } +} + +/* + * Free a linked list of paragraphs + */ +void free_para_list(paragraph *p) { + paragraph *t; + while (p) { + t = p; + p = p->next; + sfree(t->keyword); + free_word_list(t->words); + sfree(t); + } +} diff --git a/misc.c b/misc.c new file mode 100644 index 0000000..c4ac72f --- /dev/null +++ b/misc.c @@ -0,0 +1,312 @@ +/* + * misc.c: miscellaneous useful items + */ + +#include "halibut.h" + +struct stackTag { + void **data; + int sp; + int size; +}; + +stack stk_new(void) { + stack s; + + s = mknew(struct stackTag); + s->sp = 0; + s->size = 0; + s->data = NULL; + + return s; +} + +void stk_free(stack s) { + sfree(s->data); + sfree(s); +} + +void stk_push(stack s, void *item) { + if (s->size <= s->sp) { + s->size = s->sp + 32; + s->data = resize(s->data, s->size); + } + s->data[s->sp++] = item; +} + +void *stk_pop(stack s) { + if (s->sp > 0) + return s->data[--s->sp]; + else + return NULL; +} + +/* + * Small routines to amalgamate a string from an input source. + */ +const rdstring empty_rdstring = {0, 0, NULL}; +const rdstringc empty_rdstringc = {0, 0, NULL}; + +void rdadd(rdstring *rs, wchar_t c) { + if (rs->pos >= rs->size-1) { + rs->size = rs->pos + 128; + rs->text = resize(rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} +void rdadds(rdstring *rs, wchar_t *p) { + int len = ustrlen(p); + if (rs->pos >= rs->size - len) { + rs->size = rs->pos + len + 128; + rs->text = resize(rs->text, rs->size); + } + ustrcpy(rs->text + rs->pos, p); + rs->pos += len; +} +wchar_t *rdtrim(rdstring *rs) { + rs->text = resize(rs->text, rs->pos + 1); + return rs->text; +} + +void rdaddc(rdstringc *rs, char c) { + if (rs->pos >= rs->size-1) { + rs->size = rs->pos + 128; + rs->text = resize(rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} +void rdaddsc(rdstringc *rs, char *p) { + int len = strlen(p); + if (rs->pos >= rs->size - len) { + rs->size = rs->pos + len + 128; + rs->text = resize(rs->text, rs->size); + } + strcpy(rs->text + rs->pos, p); + rs->pos += len; +} +char *rdtrimc(rdstringc *rs) { + rs->text = resize(rs->text, rs->pos + 1); + return rs->text; +} + +int compare_wordlists(word *a, word *b) { + int t; + while (a && b) { + if (a->type != b->type) + return (a->type < b->type ? -1 : +1); /* FIXME? */ + t = a->type; + if ((t != word_Normal && t != word_Code && + t != word_WeakCode && t != word_Emph) || + a->alt || b->alt) { + int c; + if (a->text && b->text) { + c = ustricmp(a->text, b->text); + if (c) + return c; + } + c = compare_wordlists(a->alt, b->alt); + if (c) + return c; + a = a->next; + b = b->next; + } else { + wchar_t *ap = a->text, *bp = b->text; + while (*ap && *bp) { + wchar_t ac = utolower(*ap), bc = utolower(*bp); + if (ac != bc) + return (ac < bc ? -1 : +1); + if (!*++ap && a->next && a->next->type == t && !a->next->alt) + a = a->next, ap = a->text; + if (!*++bp && b->next && b->next->type == t && !b->next->alt) + b = b->next, bp = b->text; + } + if (*ap || *bp) + return (*ap ? +1 : -1); + a = a->next; + b = b->next; + } + } + + if (a || b) + return (a ? +1 : -1); + else + return 0; +} + +void mark_attr_ends(paragraph *sourceform) { + paragraph *p; + word *w, *wp; + for (p = sourceform; p; p = p->next) { + wp = NULL; + for (w = p->words; w; w = w->next) { + if (isattr(w->type)) { + int before = (wp && isattr(wp->type) && + sameattr(wp->type, w->type)); + int after = (w->next && isattr(w->next->type) && + sameattr(w->next->type, w->type)); + w->aux |= (before ? + (after ? attr_Always : attr_Last) : + (after ? attr_First : attr_Only)); + } + wp = w; + } + } +} + +wrappedline *wrap_para(word *text, int width, int subsequentwidth, + int (*widthfn)(word *)) { + wrappedline *head = NULL, **ptr = &head; + int nwords, wordsize; + struct wrapword { + word *begin, *end; + int width; + int spacewidth; + int cost; + int nwords; + } *wrapwords; + int i, j, n; + + /* + * Break the line up into wrappable components. + */ + nwords = wordsize = 0; + wrapwords = NULL; + while (text) { + if (nwords >= wordsize) { + wordsize = nwords + 64; + wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords)); + } + wrapwords[nwords].width = 0; + wrapwords[nwords].begin = text; + while (text) { + wrapwords[nwords].width += widthfn(text); + wrapwords[nwords].end = text->next; + if (text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace || + text->breaks)) + break; + text = text->next; + } + if (text && text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace)) { + wrapwords[nwords].spacewidth = widthfn(text->next); + text = text->next; + } else { + wrapwords[nwords].spacewidth = 0; + } + nwords++; + if (text) + text = text->next; + } + + /* + * Perform the dynamic wrapping algorithm: work backwards from + * nwords-1, determining the optimal wrapping for each terminal + * subsequence of the paragraph. + */ + for (i = nwords; i-- ;) { + int best = -1; + int bestcost = 0; + int cost; + int linelen = 0, spacewidth = 0; + int seenspace; + int thiswidth = (i == 0 ? width : subsequentwidth); + + j = 0; + seenspace = 0; + while (i+j < nwords) { + /* + * See what happens if we put j+1 words on this line. + */ + if (spacewidth) + seenspace = 1; + linelen += spacewidth + wrapwords[i+j].width; + spacewidth = wrapwords[i+j].spacewidth; + j++; + if (linelen > thiswidth) { + /* + * If we're over the width limit, abandon ship, + * _unless_ there is no best-effort yet (which will + * only happen if the first word is too long all by + * itself). + */ + if (best > 0) + break; + } + if (i+j == nwords) { + /* + * Special case: if we're at the very end of the + * paragraph, we don't score penalty points for the + * white space left on the line. + */ + cost = 0; + } else { + cost = (thiswidth-linelen) * (thiswidth-linelen); + cost += wrapwords[i+j].cost; + } + /* + * We compare bestcost >= cost, not bestcost > cost, + * because in cases where the costs are identical we + * want to try to look like the greedy algorithm, + * because readers are likely to have spent a lot of + * time looking at greedy-wrapped paragraphs and + * there's no point violating the Principle of Least + * Surprise if it doesn't actually gain anything. + */ + if (best < 0 || bestcost >= cost) { + bestcost = cost; + best = j; + } + } + /* + * Now we know the optimal answer for this terminal + * subsequence, so put it in wrapwords. + */ + wrapwords[i].cost = bestcost; + wrapwords[i].nwords = best; + } + + /* + * We've wrapped the paragraph. Now build the output + * `wrappedline' list. + */ + i = 0; + while (i < nwords) { + wrappedline *w = mknew(wrappedline); + *ptr = w; + ptr = &w->next; + w->next = NULL; + + n = wrapwords[i].nwords; + w->begin = wrapwords[i].begin; + w->end = wrapwords[i+n-1].end; + + /* + * Count along the words to find nspaces and shortfall. + */ + w->nspaces = 0; + w->shortfall = width; + for (j = 0; j < n; j++) { + w->shortfall -= wrapwords[i+j].width; + if (j < n-1 && wrapwords[i+j].spacewidth) { + w->nspaces++; + w->shortfall -= wrapwords[i+j].spacewidth; + } + } + i += n; + } + + sfree(wrapwords); + + return head; +} + +void wrap_free(wrappedline *w) { + while (w) { + wrappedline *t = w->next; + sfree(w); + w = t; + } +} diff --git a/misc/halibut.sl b/misc/halibut.sl new file mode 100644 index 0000000..e88b1ed --- /dev/null +++ b/misc/halibut.sl @@ -0,0 +1,98 @@ +% Halibut mode for Jed. + +$1 = "Halibut"; +create_syntax_table ($1); + +define_syntax ("\#", "", '%', $1); % Comment Syntax +define_syntax ('\\', '\\', $1); % Quote character +define_syntax ("{", "}", '(', $1); % are all these needed? +define_syntax ("a-zA-Z0-9", 'w', $1); +set_syntax_flags ($1, 8); + +#ifdef HAS_DFA_SYNTAX +%enable_highlight_cache ("halibut.dfa", $1); + +% A braced comment in Halibut is \#{ ... }, where ... may contain +% any correctly nested sequence of braces. Of course we can't match +% that in a DFA rule, so we'll go down to a reasonable depth of 3 +% instead. +#ifexists dfa_define_highlight_rule +dfa_define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}", + "Qcomment", $1); + +dfa_define_highlight_rule ("\\\\#.*$", "comment", $1); +dfa_define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1); +dfa_define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1); +dfa_define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1); +dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]", + "keyword0", $1); +dfa_define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]", + "keyword1", $1); +dfa_define_highlight_rule ("[{}]", "delimiter", $1); +dfa_define_highlight_rule (".", "normal", $1); +dfa_build_highlight_table ($1); +#else +define_highlight_rule ("\\\\#{[^{}]*({[^{}]*({[^}]*}[^{}]*)*}[^{}]*)*}", + "Qcomment", $1); + +define_highlight_rule ("\\\\#.*$", "comment", $1); +define_highlight_rule ("^\\\\c([ \t].*)?$", "string", $1); +define_highlight_rule ("\\\\[\\\\{}\\-_]", "keyword0", $1); +define_highlight_rule ("\\\\[A-Za-tv-z][A-Za-z0-9]*", "keyword0", $1); +define_highlight_rule ("\\\\u[A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9][A-Fa-f0-9]", + "keyword0", $1); +define_highlight_rule ("\\\\u[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]?[A-Fa-f0-9]", + "keyword1", $1); +define_highlight_rule ("[{}]", "delimiter", $1); +define_highlight_rule (".", "normal", $1); +build_highlight_table ($1); +#endif +#endif + +% This hook identifies lines containing comments as paragraph separator +define halibut_is_comment() { + bol (); + while (ffind ("\\\\#")) go_right (3); + ffind ("\\#"); % return value on stack +} + +variable Halibut_Ignore_Comment = 0; % if true, line containing a comment + % does not delimit a paragraph + +define halibut_paragraph_separator() { + bol(); + skip_white(); + if (eolp()) + return 1; + if (looking_at("\\c ") or looking_at("\\c\t") or + looking_at("\\c\n")) + return 1; + return not (Halibut_Ignore_Comment) and halibut_is_comment(); +} + +define halibut_wrap_hook() { + variable yep; + push_spot (); + yep = up_1 () and halibut_is_comment (); + pop_spot (); + if (yep) { + push_spot (); + bol_skip_white (); + insert ("\\# "); + pop_spot (); + } +} + +#ifexists mode_set_mode_info +mode_set_mode_info("Halibut", "fold_info", "\\# {{{\r\\# }}}\r\r"); +#endif + +define halibut_mode() { + variable mode = "Halibut"; + % use_keymap (mode); + set_mode (mode, 0x1 | 0x20); + set_buffer_hook ("par_sep", "halibut_paragraph_separator"); + set_buffer_hook ("wrap_hook", "halibut_wrap_hook"); + use_syntax_table (mode); + runhooks ("halibut_mode_hook"); +} diff --git a/misc/logalloc b/misc/logalloc new file mode 100755 index 0000000..cdc4504 --- /dev/null +++ b/misc/logalloc @@ -0,0 +1,61 @@ +#!/usr/bin/perl +# +# Reads a log file, containing lines of the four types +# malloc() returns +# strdup() returns +# calloc(*) returns +# realloc(,) returns +# free() +# +# with optional line on the front saying +# null pointer is +# +# and produces a list of free()s and realloc()s of wrong pointers, +# and also of malloc()s, calloc()s and realloc()s that never get free()d. + +$errors=0; + +while (<>) { + $in=$out=""; + ($file, $line, $call, $in, $out)=($1,$2,$3,"",$4) + if /^(\S+) (\S+) (malloc|strdup)\(\S+\) returns (\S+)$/; + ($file, $line, $call, $in, $out)=($1,$2,"calloc","",$5) + if /^(\S+) (\S+) calloc\(\S+\*\S+\) returns (\S+)$/; + ($file, $line, $call, $in, $out)=($1,$2,"realloc",$3,$4) + if /^(\S+) (\S+) realloc\((\S+),\S+\) returns (\S+)$/; + ($file, $line, $call, $in, $out)=($1,$2,"free",$3,"") + if /^(\S+) (\S+) free\((\S+)\)$/; + $null = $1, next if /^null pointer is (\S+)$/; + if ($in ne "") { + if (&null($in)) { + $bad = "null pointer"; + } elsif (defined $lastalloc{$in}) { + $bad = "already-freed pointer (last alloc $lastalloc{$in}, last free $lastfree{$in})"; + } else { + $bad = "bad pointer"; + } + $errors=1, print "($.) $file:$line: $call() $bad\n" + if $record{$in} eq ""; + $lastfree{$in}="($.) $file:$line"; + $record{$in}=""; + } + if ($out ne "" && !&null($out)) { + $errors=1, print "($.) $file:$line: $call() returned already ". + "allocated pointer\n" if $record{$out} ne ""; + $record{$out}="($.) $file:$line: $call()"; + $lastalloc{$out}="($.) $file:$line"; + } +} + +foreach $i (keys %record) { + $errors=1, print "$record{$i} never got freed\n" + if $record{$i} ne ""; +} + +print "no problems\n" if !$errors; + +# determine if a string refers to a null pointer +sub null { + local ($_) = @_; + $null ? $_ eq $null : /^((0x)?0+|\(nil\))$/; +} diff --git a/style.c b/style.c new file mode 100644 index 0000000..562c594 --- /dev/null +++ b/style.c @@ -0,0 +1,8 @@ +/* + * style.c: load and keep track of user style preferences + */ + +#include +#include +#include "halibut.h" + diff --git a/tree234.c b/tree234.c new file mode 100644 index 0000000..bc88039 --- /dev/null +++ b/tree234.c @@ -0,0 +1,2193 @@ +/* + * tree234.c: reasonably generic counted 2-3-4 tree routines. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "tree234.h" + +#define smalloc malloc +#define sfree free + +#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) ) + +#ifdef TEST +#define LOG(x) (printf x) +#else +#define LOG(x) +#endif + +typedef struct node234_Tag node234; + +struct tree234_Tag { + node234 *root; + cmpfn234 cmp; +}; + +struct node234_Tag { + node234 *parent; + node234 *kids[4]; + int counts[4]; + void *elems[3]; +}; + +/* + * Create a 2-3-4 tree. + */ +tree234 *newtree234(cmpfn234 cmp) { + tree234 *ret = mknew(tree234); + LOG(("created tree %p\n", ret)); + ret->root = NULL; + ret->cmp = cmp; + return ret; +} + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +static void freenode234(node234 *n) { + if (!n) + return; + freenode234(n->kids[0]); + freenode234(n->kids[1]); + freenode234(n->kids[2]); + freenode234(n->kids[3]); + sfree(n); +} +void freetree234(tree234 *t) { + freenode234(t->root); + sfree(t); +} + +/* + * Internal function to count a node. + */ +static int countnode234(node234 *n) { + int count = 0; + int i; + if (!n) + return 0; + for (i = 0; i < 4; i++) + count += n->counts[i]; + for (i = 0; i < 3; i++) + if (n->elems[i]) + count++; + return count; +} + +/* + * Count the elements in a tree. + */ +int count234(tree234 *t) { + if (t->root) + return countnode234(t->root); + else + return 0; +} + +/* + * Propagate a node overflow up a tree until it stops. Returns 0 or + * 1, depending on whether the root had to be split or not. + */ +static int add234_insert(node234 *left, void *e, node234 *right, + node234 **root, node234 *n, int ki) { + int lcount, rcount; + /* + * We need to insert the new left/element/right set in n at + * child position ki. + */ + lcount = countnode234(left); + rcount = countnode234(right); + while (n) { + LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n", + left, lcount, e, right, rcount, ki)); + if (n->elems[1] == NULL) { + /* + * Insert in a 2-node; simple. + */ + if (ki == 0) { + LOG((" inserting on left of 2-node\n")); + n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; n->counts[0] = lcount; + } else { /* ki == 1 */ + LOG((" inserting on right of 2-node\n")); + n->kids[2] = right; n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; n->counts[1] = lcount; + } + if (n->kids[0]) n->kids[0]->parent = n; + if (n->kids[1]) n->kids[1]->parent = n; + if (n->kids[2]) n->kids[2]->parent = n; + LOG((" done\n")); + break; + } else if (n->elems[2] == NULL) { + /* + * Insert in a 3-node; simple. + */ + if (ki == 0) { + LOG((" inserting on left of 3-node\n")); + n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; n->counts[0] = lcount; + } else if (ki == 1) { + LOG((" inserting in middle of 3-node\n")); + n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = right; n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; n->counts[1] = lcount; + } else { /* ki == 2 */ + LOG((" inserting on right of 3-node\n")); + n->kids[3] = right; n->counts[3] = rcount; + n->elems[2] = e; + n->kids[2] = left; n->counts[2] = lcount; + } + if (n->kids[0]) n->kids[0]->parent = n; + if (n->kids[1]) n->kids[1]->parent = n; + if (n->kids[2]) n->kids[2]->parent = n; + if (n->kids[3]) n->kids[3]->parent = n; + LOG((" done\n")); + break; + } else { + node234 *m = mknew(node234); + m->parent = n->parent; + LOG((" splitting a 4-node; created new node %p\n", m)); + /* + * Insert in a 4-node; split into a 2-node and a + * 3-node, and move focus up a level. + * + * I don't think it matters which way round we put the + * 2 and the 3. For simplicity, we'll put the 3 first + * always. + */ + if (ki == 0) { + m->kids[0] = left; m->counts[0] = lcount; + m->elems[0] = e; + m->kids[1] = right; m->counts[1] = rcount; + m->elems[1] = n->elems[0]; + m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1]; + e = n->elems[1]; + n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; + } else if (ki == 1) { + m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = left; m->counts[1] = lcount; + m->elems[1] = e; + m->kids[2] = right; m->counts[2] = rcount; + e = n->elems[1]; + n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; + } else if (ki == 2) { + m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = left; m->counts[2] = lcount; + /* e = e; */ + n->kids[0] = right; n->counts[0] = rcount; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3]; + } else { /* ki == 3 */ + m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2]; + n->kids[0] = left; n->counts[0] = lcount; + n->elems[0] = e; + n->kids[1] = right; n->counts[1] = rcount; + e = n->elems[2]; + } + m->kids[3] = n->kids[3] = n->kids[2] = NULL; + m->counts[3] = n->counts[3] = n->counts[2] = 0; + m->elems[2] = n->elems[2] = n->elems[1] = NULL; + if (m->kids[0]) m->kids[0]->parent = m; + if (m->kids[1]) m->kids[1]->parent = m; + if (m->kids[2]) m->kids[2]->parent = m; + if (n->kids[0]) n->kids[0]->parent = n; + if (n->kids[1]) n->kids[1]->parent = n; + LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m, + m->kids[0], m->counts[0], m->elems[0], + m->kids[1], m->counts[1], m->elems[1], + m->kids[2], m->counts[2])); + LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1])); + left = m; lcount = countnode234(left); + right = n; rcount = countnode234(right); + } + if (n->parent) + ki = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : + n->parent->kids[2] == n ? 2 : 3); + n = n->parent; + } + + /* + * If we've come out of here by `break', n will still be + * non-NULL and all we need to do is go back up the tree + * updating counts. If we've come here because n is NULL, we + * need to create a new root for the tree because the old one + * has just split into two. */ + if (n) { + while (n->parent) { + int count = countnode234(n); + int childnum; + childnum = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : + n->parent->kids[2] == n ? 2 : 3); + n->parent->counts[childnum] = count; + n = n->parent; + } + return 0; /* root unchanged */ + } else { + LOG((" root is overloaded, split into two\n")); + (*root) = mknew(node234); + (*root)->kids[0] = left; (*root)->counts[0] = lcount; + (*root)->elems[0] = e; + (*root)->kids[1] = right; (*root)->counts[1] = rcount; + (*root)->elems[1] = NULL; + (*root)->kids[2] = NULL; (*root)->counts[2] = 0; + (*root)->elems[2] = NULL; + (*root)->kids[3] = NULL; (*root)->counts[3] = 0; + (*root)->parent = NULL; + if ((*root)->kids[0]) (*root)->kids[0]->parent = (*root); + if ((*root)->kids[1]) (*root)->kids[1]->parent = (*root); + LOG((" new root is %p/%d \"%s\" %p/%d\n", + (*root)->kids[0], (*root)->counts[0], + (*root)->elems[0], + (*root)->kids[1], (*root)->counts[1])); + return 1; /* root moved */ + } +} + +/* + * Add an element e to a 2-3-4 tree t. Returns e on success, or if + * an existing element compares equal, returns that. + */ +static void *add234_internal(tree234 *t, void *e, int index) { + node234 *n; + int ki; + void *orig_e = e; + int c; + + LOG(("adding element \"%s\" to tree %p\n", e, t)); + if (t->root == NULL) { + t->root = mknew(node234); + t->root->elems[1] = t->root->elems[2] = NULL; + t->root->kids[0] = t->root->kids[1] = NULL; + t->root->kids[2] = t->root->kids[3] = NULL; + t->root->counts[0] = t->root->counts[1] = 0; + t->root->counts[2] = t->root->counts[3] = 0; + t->root->parent = NULL; + t->root->elems[0] = e; + LOG((" created root %p\n", t->root)); + return orig_e; + } + + n = t->root; + while (n) { + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + if (index >= 0) { + if (!n->kids[0]) { + /* + * Leaf node. We want to insert at kid position + * equal to the index: + * + * 0 A 1 B 2 C 3 + */ + ki = index; + } else { + /* + * Internal node. We always descend through it (add + * always starts at the bottom, never in the + * middle). + */ + if (index <= n->counts[0]) { + ki = 0; + } else if (index -= n->counts[0] + 1, index <= n->counts[1]) { + ki = 1; + } else if (index -= n->counts[1] + 1, index <= n->counts[2]) { + ki = 2; + } else if (index -= n->counts[2] + 1, index <= n->counts[3]) { + ki = 3; + } else + return NULL; /* error: index out of range */ + } + } else { + if ((c = t->cmp(e, n->elems[0])) < 0) + ki = 0; + else if (c == 0) + return n->elems[0]; /* already exists */ + else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) + ki = 1; + else if (c == 0) + return n->elems[1]; /* already exists */ + else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) + ki = 2; + else if (c == 0) + return n->elems[2]; /* already exists */ + else + ki = 3; + } + LOG((" moving to child %d (%p)\n", ki, n->kids[ki])); + if (!n->kids[ki]) + break; + n = n->kids[ki]; + } + + add234_insert(NULL, e, NULL, &t->root, n, ki); + + return orig_e; +} + +void *add234(tree234 *t, void *e) { + if (!t->cmp) /* tree is unsorted */ + return NULL; + + return add234_internal(t, e, -1); +} +void *addpos234(tree234 *t, void *e, int index) { + if (index < 0 || /* index out of range */ + t->cmp) /* tree is sorted */ + return NULL; /* return failure */ + + return add234_internal(t, e, index); /* this checks the upper bound */ +} + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + */ +void *index234(tree234 *t, int index) { + node234 *n; + + if (!t->root) + return NULL; /* tree is empty */ + + if (index < 0 || index >= countnode234(t->root)) + return NULL; /* out of range */ + + n = t->root; + + while (n) { + if (index < n->counts[0]) + n = n->kids[0]; + else if (index -= n->counts[0] + 1, index < 0) + return n->elems[0]; + else if (index < n->counts[1]) + n = n->kids[1]; + else if (index -= n->counts[1] + 1, index < 0) + return n->elems[1]; + else if (index < n->counts[2]) + n = n->kids[2]; + else if (index -= n->counts[2] + 1, index < 0) + return n->elems[2]; + else + n = n->kids[3]; + } + + /* We shouldn't ever get here. I wonder how we did. */ + return NULL; +} + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + */ +void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, + int relation, int *index) { + node234 *n; + void *ret; + int c; + int idx, ecount, kcount, cmpret; + + if (t->root == NULL) + return NULL; + + if (cmp == NULL) + cmp = t->cmp; + + n = t->root; + /* + * Attempt to find the element itself. + */ + idx = 0; + ecount = -1; + /* + * Prepare a fake `cmp' result if e is NULL. + */ + cmpret = 0; + if (e == NULL) { + assert(relation == REL234_LT || relation == REL234_GT); + if (relation == REL234_LT) + cmpret = +1; /* e is a max: always greater */ + else if (relation == REL234_GT) + cmpret = -1; /* e is a min: always smaller */ + } + while (1) { + for (kcount = 0; kcount < 4; kcount++) { + if (kcount >= 3 || n->elems[kcount] == NULL || + (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) { + break; + } + if (n->kids[kcount]) idx += n->counts[kcount]; + if (c == 0) { + ecount = kcount; + break; + } + idx++; + } + if (ecount >= 0) + break; + if (n->kids[kcount]) + n = n->kids[kcount]; + else + break; + } + + if (ecount >= 0) { + /* + * We have found the element we're looking for. It's + * n->elems[ecount], at tree index idx. If our search + * relation is EQ, LE or GE we can now go home. + */ + if (relation != REL234_LT && relation != REL234_GT) { + if (index) *index = idx; + return n->elems[ecount]; + } + + /* + * Otherwise, we'll do an indexed lookup for the previous + * or next element. (It would be perfectly possible to + * implement these search types in a non-counted tree by + * going back up from where we are, but far more fiddly.) + */ + if (relation == REL234_LT) + idx--; + else + idx++; + } else { + /* + * We've found our way to the bottom of the tree and we + * know where we would insert this node if we wanted to: + * we'd put it in in place of the (empty) subtree + * n->kids[kcount], and it would have index idx + * + * But the actual element isn't there. So if our search + * relation is EQ, we're doomed. + */ + if (relation == REL234_EQ) + return NULL; + + /* + * Otherwise, we must do an index lookup for index idx-1 + * (if we're going left - LE or LT) or index idx (if we're + * going right - GE or GT). + */ + if (relation == REL234_LT || relation == REL234_LE) { + idx--; + } + } + + /* + * We know the index of the element we want; just call index234 + * to do the rest. This will return NULL if the index is out of + * bounds, which is exactly what we want. + */ + ret = index234(t, idx); + if (ret && index) *index = idx; + return ret; +} +void *find234(tree234 *t, void *e, cmpfn234 cmp) { + return findrelpos234(t, e, cmp, REL234_EQ, NULL); +} +void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) { + return findrelpos234(t, e, cmp, relation, NULL); +} +void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) { + return findrelpos234(t, e, cmp, REL234_EQ, index); +} + +/* + * Tree transformation used in delete and split: move a subtree + * right, from child ki of a node to the next child. Update k and + * index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d D e [more] a A b c C d D e + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d [more] a A b c C d + */ +static void trans234_subtree_right(node234 *n, int ki, int *k, int *index) { + node234 *src, *dest; + int i, srclen, adjust; + + src = n->kids[ki]; + dest = n->kids[ki+1]; + + LOG((" trans234_subtree_right(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + /* + * Move over the rest of the destination node to make space. + */ + dest->kids[3] = dest->kids[2]; dest->counts[3] = dest->counts[2]; + dest->elems[2] = dest->elems[1]; + dest->kids[2] = dest->kids[1]; dest->counts[2] = dest->counts[1]; + dest->elems[1] = dest->elems[0]; + dest->kids[1] = dest->kids[0]; dest->counts[1] = dest->counts[0]; + + /* which element to move over */ + i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0); + + dest->elems[0] = n->elems[ki]; + n->elems[ki] = src->elems[i]; + src->elems[i] = NULL; + + dest->kids[0] = src->kids[i+1]; dest->counts[0] = src->counts[i+1]; + src->kids[i+1] = NULL; src->counts[i+1] = 0; + + if (dest->kids[0]) dest->kids[0]->parent = dest; + + adjust = dest->counts[0] + 1; + + n->counts[ki] -= adjust; + n->counts[ki+1] += adjust; + + srclen = n->counts[ki]; + + if (k) { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki && (*index) > srclen) { + (*index) -= srclen + 1; + (*k)++; + } else if ((*k) == ki+1) { + (*index) += adjust; + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); +} + +/* + * Tree transformation used in delete and split: move a subtree + * left, from child ki of a node to the previous child. Update k + * and index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . B . . C . + * / \ -> / \ + * a A b c C d D e [more] a A b B c d D e [more] + * + * . A . . B . + * / \ -> / \ + * a b B c C d [more] a A b c C d [more] + */ +static void trans234_subtree_left(node234 *n, int ki, int *k, int *index) { + node234 *src, *dest; + int i, adjust; + + src = n->kids[ki]; + dest = n->kids[ki-1]; + + LOG((" trans234_subtree_left(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + + /* where in dest to put it */ + i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0); + dest->elems[i] = n->elems[ki-1]; + n->elems[ki-1] = src->elems[0]; + + dest->kids[i+1] = src->kids[0]; dest->counts[i+1] = src->counts[0]; + + if (dest->kids[i+1]) dest->kids[i+1]->parent = dest; + + /* + * Move over the rest of the source node. + */ + src->kids[0] = src->kids[1]; src->counts[0] = src->counts[1]; + src->elems[0] = src->elems[1]; + src->kids[1] = src->kids[2]; src->counts[1] = src->counts[2]; + src->elems[1] = src->elems[2]; + src->kids[2] = src->kids[3]; src->counts[2] = src->counts[3]; + src->elems[2] = NULL; + src->kids[3] = NULL; src->counts[3] = 0; + + adjust = dest->counts[i+1] + 1; + + n->counts[ki] -= adjust; + n->counts[ki-1] += adjust; + + if (k) { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki) { + (*index) -= adjust; + if ((*index) < 0) { + (*index) += n->counts[ki-1] + 1; + (*k)--; + } + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); +} + +/* + * Tree transformation used in delete and split: merge child nodes + * ki and ki+1 of a node. Update k and index so that they still + * point to the same place in the transformed tree. Assumes both + * children _are_ sufficiently small. + * + * . B . . + * / \ -> | + * a A b c C d a A b B c C d + * + * This routine can also cope with either child being undersized: + * + * . A . . + * / \ -> | + * a b B c a A b B c + * + * . A . . + * / \ -> | + * a b B c C d a A b B c C d + */ +static void trans234_subtree_merge(node234 *n, int ki, int *k, int *index) { + node234 *left, *right; + int i, leftlen, rightlen, lsize, rsize; + + left = n->kids[ki]; leftlen = n->counts[ki]; + right = n->kids[ki+1]; rightlen = n->counts[ki+1]; + + LOG((" trans234_subtree_merge(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + right, + right->kids[0], right->counts[0], right->elems[0], + right->kids[1], right->counts[1], right->elems[1], + right->kids[2], right->counts[2], right->elems[2], + right->kids[3], right->counts[3])); + + assert(!left->elems[2] && !right->elems[2]); /* neither is large! */ + lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0); + rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0); + + left->elems[lsize] = n->elems[ki]; + + for (i = 0; i < rsize+1; i++) { + left->kids[lsize+1+i] = right->kids[i]; + left->counts[lsize+1+i] = right->counts[i]; + if (left->kids[lsize+1+i]) + left->kids[lsize+1+i]->parent = left; + if (i < rsize) + left->elems[lsize+1+i] = right->elems[i]; + } + + n->counts[ki] += rightlen + 1; + + sfree(right); + + /* + * Move the rest of n up by one. + */ + for (i = ki+1; i < 3; i++) { + n->kids[i] = n->kids[i+1]; + n->counts[i] = n->counts[i+1]; + } + for (i = ki; i < 2; i++) { + n->elems[i] = n->elems[i+1]; + } + n->kids[3] = NULL; + n->counts[3] = 0; + n->elems[2] = NULL; + + if (k) { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki+1) { + (*k)--; + (*index) += leftlen + 1; + } else if ((*k) > ki+1) { + (*k)--; + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + +} + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + */ +static void *delpos234_internal(tree234 *t, int index) { + node234 *n; + void *retval; + int ki, i; + + retval = NULL; + + n = t->root; /* by assumption this is non-NULL */ + LOG(("deleting item %d from tree %p\n", index, t)); + while (1) { + node234 *sub; + + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3], + index)); + if (index <= n->counts[0]) { + ki = 0; + } else if (index -= n->counts[0]+1, index <= n->counts[1]) { + ki = 1; + } else if (index -= n->counts[1]+1, index <= n->counts[2]) { + ki = 2; + } else if (index -= n->counts[2]+1, index <= n->counts[3]) { + ki = 3; + } else { + assert(0); /* can't happen */ + } + + if (!n->kids[0]) + break; /* n is a leaf node; we're here! */ + + /* + * Check to see if we've found our target element. If so, + * we must choose a new target (we'll use the old target's + * successor, which will be in a leaf), move it into the + * place of the old one, continue down to the leaf and + * delete the old copy of the new target. + */ + if (index == n->counts[ki]) { + node234 *m; + LOG((" found element in internal node, index %d\n", ki)); + assert(n->elems[ki]); /* must be a kid _before_ an element */ + ki++; index = 0; + for (m = n->kids[ki]; m->kids[0]; m = m->kids[0]) + continue; + LOG((" replacing with element \"%s\" from leaf node %p\n", + m->elems[0], m)); + retval = n->elems[ki-1]; + n->elems[ki-1] = m->elems[0]; + } + + /* + * Recurse down to subtree ki. If it has only one element, + * we have to do some transformation to start with. + */ + LOG((" moving to subtree %d\n", ki)); + sub = n->kids[ki]; + if (!sub->elems[1]) { + LOG((" subtree has only one element!\n")); + if (ki > 0 && n->kids[ki-1]->elems[1]) { + /* + * Child ki has only one element, but child + * ki-1 has two or more. So we need to move a + * subtree from ki-1 to ki. + */ + trans234_subtree_right(n, ki-1, &ki, &index); + } else if (ki < 3 && n->kids[ki+1] && + n->kids[ki+1]->elems[1]) { + /* + * Child ki has only one element, but ki+1 has + * two or more. Move a subtree from ki+1 to ki. + */ + trans234_subtree_left(n, ki+1, &ki, &index); + } else { + /* + * ki is small with only small neighbours. Pick a + * neighbour and merge with it. + */ + trans234_subtree_merge(n, ki>0 ? ki-1 : ki, &ki, &index); + sub = n->kids[ki]; + + if (!n->elems[0]) { + /* + * The root is empty and needs to be + * removed. + */ + LOG((" shifting root!\n")); + t->root = sub; + sub->parent = NULL; + sfree(n); + n = NULL; + } + } + } + + if (n) + n->counts[ki]--; + n = sub; + } + + /* + * Now n is a leaf node, and ki marks the element number we + * want to delete. We've already arranged for the leaf to be + * bigger than minimum size, so let's just go to it. + */ + assert(!n->kids[0]); + if (!retval) + retval = n->elems[ki]; + + for (i = ki; i < 2 && n->elems[i+1]; i++) + n->elems[i] = n->elems[i+1]; + n->elems[i] = NULL; + + /* + * It's just possible that we have reduced the leaf to zero + * size. This can only happen if it was the root - so destroy + * it and make the tree empty. + */ + if (!n->elems[0]) { + LOG((" removed last element in tree, destroying empty root\n")); + assert(n == t->root); + sfree(n); + t->root = NULL; + } + + return retval; /* finished! */ +} +void *delpos234(tree234 *t, int index) { + if (index < 0 || index >= countnode234(t->root)) + return NULL; + return delpos234_internal(t, index); +} +void *del234(tree234 *t, void *e) { + int index; + if (!findrelpos234(t, e, NULL, REL234_EQ, &index)) + return NULL; /* it wasn't in there anyway */ + return delpos234_internal(t, index); /* it's there; delete it. */ +} + +/* + * Join two subtrees together with a separator element between + * them, given their relative height. + * + * (Height<0 means the left tree is shorter, >0 means the right + * tree is shorter, =0 means (duh) they're equal.) + * + * It is assumed that any checks needed on the ordering criterion + * have _already_ been done. + * + * The value returned in `height' is 0 or 1 depending on whether the + * resulting tree is the same height as the original larger one, or + * one higher. + */ +static node234 *join234_internal(node234 *left, void *sep, + node234 *right, int *height) { + node234 *root, *node; + int relht = *height; + int ki; + + LOG((" join: joining %p \"%s\" %p, relative height is %d\n", + left, sep, right, relht)); + if (relht == 0) { + /* + * The trees are the same height. Create a new one-element + * root containing the separator and pointers to the two + * nodes. + */ + node234 *newroot; + newroot = mknew(node234); + newroot->kids[0] = left; newroot->counts[0] = countnode234(left); + newroot->elems[0] = sep; + newroot->kids[1] = right; newroot->counts[1] = countnode234(right); + newroot->elems[1] = NULL; + newroot->kids[2] = NULL; newroot->counts[2] = 0; + newroot->elems[2] = NULL; + newroot->kids[3] = NULL; newroot->counts[3] = 0; + newroot->parent = NULL; + if (left) left->parent = newroot; + if (right) right->parent = newroot; + *height = 1; + LOG((" join: same height, brand new root\n")); + return newroot; + } + + /* + * This now works like the addition algorithm on the larger + * tree. We're replacing a single kid pointer with two kid + * pointers separated by an element; if that causes the node to + * overload, we split it in two, move a separator element up to + * the next node, and repeat. + */ + if (relht < 0) { + /* + * Left tree is shorter. Search down the right tree to find + * the pointer we're inserting at. + */ + node = root = right; + while (++relht < 0) { + node = node->kids[0]; + } + ki = 0; + right = node->kids[ki]; + } else { + /* + * Right tree is shorter; search down the left to find the + * pointer we're inserting at. + */ + node = root = left; + while (--relht > 0) { + if (node->elems[2]) + node = node->kids[3]; + else if (node->elems[1]) + node = node->kids[2]; + else + node = node->kids[1]; + } + if (node->elems[2]) + ki = 3; + else if (node->elems[1]) + ki = 2; + else + ki = 1; + left = node->kids[ki]; + } + + /* + * Now proceed as for addition. + */ + *height = add234_insert(left, sep, right, &root, node, ki); + + return root; +} +static int height234(tree234 *t) { + int level = 0; + node234 *n = t->root; + while (n) { + level++; + n = n->kids[0]; + } + return level; +} +tree234 *join234(tree234 *t1, tree234 *t2) { + int size2 = countnode234(t2->root); + if (size2 > 0) { + void *element; + int relht; + + if (t1->cmp) { + element = index234(t2, 0); + element = findrelpos234(t1, element, NULL, REL234_GE, NULL); + if (element) + return NULL; + } + + element = delpos234(t2, 0); + relht = height234(t1) - height234(t2); + t1->root = join234_internal(t1->root, element, t2->root, &relht); + t2->root = NULL; + } + return t1; +} +tree234 *join234r(tree234 *t1, tree234 *t2) { + int size1 = countnode234(t1->root); + if (size1 > 0) { + void *element; + int relht; + + if (t2->cmp) { + element = index234(t1, size1-1); + element = findrelpos234(t2, element, NULL, REL234_LE, NULL); + if (element) + return NULL; + } + + element = delpos234(t1, size1-1); + relht = height234(t1) - height234(t2); + t2->root = join234_internal(t1->root, element, t2->root, &relht); + t1->root = NULL; + } + return t2; +} + +/* + * Split out the first elements in a tree and return a + * pointer to the root node. Leave the root node of the remainder + * in t. + */ +static node234 *split234_internal(tree234 *t, int index) { + node234 *halves[2], *n, *sib, *sub; + node234 *lparent, *rparent; + int ki, pki, i, half, lcount, rcount; + + n = t->root; + LOG(("splitting tree %p at point %d\n", t, index)); + + /* + * Easy special cases. After this we have also dealt completely + * with the empty-tree case and we can assume the root exists. + */ + if (index == 0) /* return nothing */ + return NULL; + if (index == countnode234(t->root)) { /* return the whole tree */ + node234 *ret = t->root; + t->root = NULL; + return ret; + } + + /* + * Search down the tree to find the split point. + */ + lparent = rparent = NULL; + while (n) { + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3], + index)); + lcount = index; + rcount = countnode234(n) - lcount; + if (index <= n->counts[0]) { + ki = 0; + } else if (index -= n->counts[0]+1, index <= n->counts[1]) { + ki = 1; + } else if (index -= n->counts[1]+1, index <= n->counts[2]) { + ki = 2; + } else { + index -= n->counts[2]+1; + ki = 3; + } + + LOG((" splitting at subtree %d\n", ki)); + sub = n->kids[ki]; + + LOG((" splitting at child index %d\n", ki)); + + /* + * Split the node, put halves[0] on the right of the left + * one and halves[1] on the left of the right one, put the + * new node pointers in halves[0] and halves[1], and go up + * a level. + */ + sib = mknew(node234); + for (i = 0; i < 3; i++) { + if (i+ki < 3 && n->elems[i+ki]) { + sib->elems[i] = n->elems[i+ki]; + sib->kids[i+1] = n->kids[i+ki+1]; + if (sib->kids[i+1]) sib->kids[i+1]->parent = sib; + sib->counts[i+1] = n->counts[i+ki+1]; + n->elems[i+ki] = NULL; + n->kids[i+ki+1] = NULL; + n->counts[i+ki+1] = 0; + } else { + sib->elems[i] = NULL; + sib->kids[i+1] = NULL; + sib->counts[i+1] = 0; + } + } + if (lparent) { + lparent->kids[pki] = n; + lparent->counts[pki] = lcount; + n->parent = lparent; + rparent->kids[0] = sib; + rparent->counts[0] = rcount; + sib->parent = rparent; + } else { + halves[0] = n; + n->parent = NULL; + halves[1] = sib; + sib->parent = NULL; + } + lparent = n; + rparent = sib; + pki = ki; + LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + sib, + sib->kids[0], sib->counts[0], sib->elems[0], + sib->kids[1], sib->counts[1], sib->elems[1], + sib->kids[2], sib->counts[2], sib->elems[2], + sib->kids[3], sib->counts[3])); + + n = sub; + } + + /* + * We've come off the bottom here, so we've successfully split + * the tree into two equally high subtrees. The only problem is + * that some of the nodes down the fault line will be smaller + * than the minimum permitted size. (Since this is a 2-3-4 + * tree, that means they'll be zero-element one-child nodes.) + */ + LOG((" fell off bottom, lroot is %p, rroot is %p\n", + halves[0], halves[1])); + lparent->counts[pki] = rparent->counts[0] = 0; + lparent->kids[pki] = rparent->kids[0] = NULL; + + /* + * So now we go back down the tree from each of the two roots, + * fixing up undersize nodes. + */ + for (half = 0; half < 2; half++) { + /* + * Remove the root if it's undersize (it will contain only + * one child pointer, so just throw it away and replace it + * with its child). This might happen several times. + */ + while (halves[half] && !halves[half]->elems[0]) { + LOG((" root %p is undersize, throwing away\n", halves[half])); + halves[half] = halves[half]->kids[0]; + sfree(halves[half]->parent); + halves[half]->parent = NULL; + LOG((" new root is %p\n", halves[half])); + } + + n = halves[half]; + while (n) { + void (*toward)(node234 *n, int ki, int *k, int *index); + int ni, merge; + + /* + * Now we have a potentially undersize node on the + * right (if half==0) or left (if half==1). Sort it + * out, by merging with a neighbour or by transferring + * subtrees over. At this time we must also ensure that + * nodes are bigger than minimum, in case we need an + * element to merge two nodes below. + */ + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + if (half == 1) { + ki = 0; /* the kid we're interested in */ + ni = 1; /* the neighbour */ + merge = 0; /* for merge: leftmost of the two */ + toward = trans234_subtree_left; + } else { + ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1); + ni = ki-1; + merge = ni; + toward = trans234_subtree_right; + } + + sub = n->kids[ki]; + if (sub && !sub->elems[1]) { + /* + * This node is undersized or minimum-size. If we + * can merge it with its neighbour, we do so; + * otherwise we must be able to transfer subtrees + * over to it until it is greater than minimum + * size. + */ + int undersized = (!sub->elems[0]); + LOG((" child %d is %ssize\n", ki, + undersized ? "under" : "minimum-")); + LOG((" neighbour is %s\n", + n->kids[ni]->elems[2] ? "large" : + n->kids[ni]->elems[1] ? "medium" : "small")); + if (!n->kids[ni]->elems[1] || + (undersized && !n->kids[ni]->elems[2])) { + /* + * Neighbour is small, or possibly neighbour is + * medium and we are undersize. + */ + trans234_subtree_merge(n, merge, NULL, NULL); + sub = n->kids[merge]; + if (!n->elems[0]) { + /* + * n is empty, and hence must have been the + * root and needs to be removed. + */ + assert(!n->parent); + LOG((" shifting root!\n")); + halves[half] = sub; + halves[half]->parent = NULL; + sfree(n); + } + } else { + /* Neighbour is big enough to move trees over. */ + toward(n, ni, NULL, NULL); + if (undersized) + toward(n, ni, NULL, NULL); + } + } + n = sub; + } + } + + t->root = halves[1]; + return halves[0]; +} +tree234 *splitpos234(tree234 *t, int index, int before) { + tree234 *ret; + node234 *n; + int count; + + count = countnode234(t->root); + if (index < 0 || index > count) + return NULL; /* error */ + ret = newtree234(t->cmp); + n = split234_internal(t, index); + if (before) { + /* We want to return the ones before the index. */ + ret->root = n; + } else { + /* + * We want to keep the ones before the index and return the + * ones after. + */ + ret->root = t->root; + t->root = n; + } + return ret; +} +tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel) { + int before; + int index; + + assert(rel != REL234_EQ); + + if (rel == REL234_GT || rel == REL234_GE) { + before = 1; + rel = (rel == REL234_GT ? REL234_LE : REL234_LT); + } else { + before = 0; + } + if (!findrelpos234(t, e, cmp, rel, &index)) + index = 0; + + return splitpos234(t, index+1, before); +} + +static node234 *copynode234(node234 *n, copyfn234 copyfn, void *copyfnstate) { + int i; + node234 *n2 = mknew(node234); + + for (i = 0; i < 3; i++) { + if (n->elems[i] && copyfn) + n2->elems[i] = copyfn(copyfnstate, n->elems[i]); + else + n2->elems[i] = n->elems[i]; + } + + for (i = 0; i < 4; i++) { + if (n->kids[i]) { + n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate); + n2->kids[i]->parent = n2; + } else { + n2->kids[i] = NULL; + } + n2->counts[i] = n->counts[i]; + } + + return n2; +} +tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate) { + tree234 *t2; + + t2 = newtree234(t->cmp); + t2->root = copynode234(t->root, copyfn, copyfnstate); + t2->root->parent = NULL; + + return t2; +} + +#ifdef TEST + +/* + * Test code for the 2-3-4 tree. This code maintains an alternative + * representation of the data in the tree, in an array (using the + * obvious and slow insert and delete functions). After each tree + * operation, the verify() function is called, which ensures all + * the tree properties are preserved: + * - node->child->parent always equals node + * - tree->root->parent always equals NULL + * - number of kids == 0 or number of elements + 1; + * - tree has the same depth everywhere + * - every node has at least one element + * - subtree element counts are accurate + * - any NULL kid pointer is accompanied by a zero count + * - in a sorted tree: ordering property between elements of a + * node and elements of its children is preserved + * and also ensures the list represented by the tree is the same + * list it should be. (This last check also doubly verifies the + * ordering properties, because the `same list it should be' is by + * definition correctly ordered. It also ensures all nodes are + * distinct, because the enum functions would get caught in a loop + * if not.) + */ + +#include + +#define srealloc realloc + +/* + * Error reporting function. + */ +void error(char *fmt, ...) { + va_list ap; + printf("ERROR: "); + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + printf("\n"); +} + +/* The array representation of the data. */ +void **array; +int arraylen, arraysize; +cmpfn234 cmp; + +/* The tree representation of the same data. */ +tree234 *tree; + +/* + * Routines to provide a diagnostic printout of a tree. Currently + * relies on every element in the tree being a one-character string + * :-) + */ +typedef struct { + char **levels; +} dispctx; + +int dispnode(node234 *n, int level, dispctx *ctx) { + if (level == 0) { + int xpos = strlen(ctx->levels[0]); + int len; + + if (n->elems[2]) + len = sprintf(ctx->levels[0]+xpos, " %s%s%s", + n->elems[0], n->elems[1], n->elems[2]); + else if (n->elems[1]) + len = sprintf(ctx->levels[0]+xpos, " %s%s", + n->elems[0], n->elems[1]); + else + len = sprintf(ctx->levels[0]+xpos, " %s", + n->elems[0]); + return xpos + 1 + (len-1) / 2; + } else { + int xpos[4], nkids; + int nodelen, mypos, myleft, x, i; + + xpos[0] = dispnode(n->kids[0], level-3, ctx); + xpos[1] = dispnode(n->kids[1], level-3, ctx); + nkids = 2; + if (n->kids[2]) { + xpos[2] = dispnode(n->kids[2], level-3, ctx); + nkids = 3; + } + if (n->kids[3]) { + xpos[3] = dispnode(n->kids[3], level-3, ctx); + nkids = 4; + } + + if (nkids == 4) + mypos = (xpos[1] + xpos[2]) / 2; + else if (nkids == 3) + mypos = xpos[1]; + else + mypos = (xpos[0] + xpos[1]) / 2; + nodelen = nkids * 2 - 1; + myleft = mypos - ((nodelen-1)/2); + assert(myleft >= xpos[0]); + assert(myleft + nodelen-1 <= xpos[nkids-1]); + + x = strlen(ctx->levels[level]); + while (x <= xpos[0] && x < myleft) + ctx->levels[level][x++] = ' '; + while (x < myleft) + ctx->levels[level][x++] = '_'; + if (nkids==4) + x += sprintf(ctx->levels[level]+x, ".%s.%s.%s.", + n->elems[0], n->elems[1], n->elems[2]); + else if (nkids==3) + x += sprintf(ctx->levels[level]+x, ".%s.%s.", + n->elems[0], n->elems[1]); + else + x += sprintf(ctx->levels[level]+x, ".%s.", + n->elems[0]); + while (x < xpos[nkids-1]) + ctx->levels[level][x++] = '_'; + ctx->levels[level][x] = '\0'; + + x = strlen(ctx->levels[level-1]); + for (i = 0; i < nkids; i++) { + int rpos, pos; + rpos = xpos[i]; + if (i > 0 && i < nkids-1) + pos = myleft + 2*i; + else + pos = rpos; + if (rpos < pos) + rpos++; + while (x < pos && x < rpos) + ctx->levels[level-1][x++] = ' '; + if (x == pos) + ctx->levels[level-1][x++] = '|'; + while (x < pos || x < rpos) + ctx->levels[level-1][x++] = '_'; + if (x == pos) + ctx->levels[level-1][x++] = '|'; + } + ctx->levels[level-1][x] = '\0'; + + x = strlen(ctx->levels[level-2]); + for (i = 0; i < nkids; i++) { + int rpos = xpos[i]; + + while (x < rpos) + ctx->levels[level-2][x++] = ' '; + ctx->levels[level-2][x++] = '|'; + } + ctx->levels[level-2][x] = '\0'; + + return mypos; + } +} + +void disptree(tree234 *t) { + dispctx ctx; + char *leveldata; + int width = count234(t); + int ht = height234(t) * 3 - 2; + int i; + + if (!t->root) { + printf("[empty tree]\n"); + } + + leveldata = smalloc(ht * (width+2)); + ctx.levels = smalloc(ht * sizeof(char *)); + for (i = 0; i < ht; i++) { + ctx.levels[i] = leveldata + i * (width+2); + ctx.levels[i][0] = '\0'; + } + + (void) dispnode(t->root, ht-1, &ctx); + + for (i = ht; i-- ;) + printf("%s\n", ctx.levels[i]); + + sfree(ctx.levels); + sfree(leveldata); +} + +typedef struct { + int treedepth; + int elemcount; +} chkctx; + +int chknode(chkctx *ctx, int level, node234 *node, + void *lowbound, void *highbound) { + int nkids, nelems; + int i; + int count; + + /* Count the non-NULL kids. */ + for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++); + /* Ensure no kids beyond the first NULL are non-NULL. */ + for (i = nkids; i < 4; i++) + if (node->kids[i]) { + error("node %p: nkids=%d but kids[%d] non-NULL", + node, nkids, i); + } else if (node->counts[i]) { + error("node %p: kids[%d] NULL but count[%d]=%d nonzero", + node, i, i, node->counts[i]); + } + + /* Count the non-NULL elements. */ + for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++); + /* Ensure no elements beyond the first NULL are non-NULL. */ + for (i = nelems; i < 3; i++) + if (node->elems[i]) { + error("node %p: nelems=%d but elems[%d] non-NULL", + node, nelems, i); + } + + if (nkids == 0) { + /* + * If nkids==0, this is a leaf node; verify that the tree + * depth is the same everywhere. + */ + if (ctx->treedepth < 0) + ctx->treedepth = level; /* we didn't know the depth yet */ + else if (ctx->treedepth != level) + error("node %p: leaf at depth %d, previously seen depth %d", + node, level, ctx->treedepth); + } else { + /* + * If nkids != 0, then it should be nelems+1, unless nelems + * is 0 in which case nkids should also be 0 (and so we + * shouldn't be in this condition at all). + */ + int shouldkids = (nelems ? nelems+1 : 0); + if (nkids != shouldkids) { + error("node %p: %d elems should mean %d kids but has %d", + node, nelems, shouldkids, nkids); + } + } + + /* + * nelems should be at least 1. + */ + if (nelems == 0) { + error("node %p: no elems", node, nkids); + } + + /* + * Add nelems to the running element count of the whole tree. + */ + ctx->elemcount += nelems; + + /* + * Check ordering property: all elements should be strictly > + * lowbound, strictly < highbound, and strictly < each other in + * sequence. (lowbound and highbound are NULL at edges of tree + * - both NULL at root node - and NULL is considered to be < + * everything and > everything. IYSWIM.) + */ + if (cmp) { + for (i = -1; i < nelems; i++) { + void *lower = (i == -1 ? lowbound : node->elems[i]); + void *higher = (i+1 == nelems ? highbound : node->elems[i+1]); + if (lower && higher && cmp(lower, higher) >= 0) { + error("node %p: kid comparison [%d=%s,%d=%s] failed", + node, i, lower, i+1, higher); + } + } + } + + /* + * Check parent pointers: all non-NULL kids should have a + * parent pointer coming back to this node. + */ + for (i = 0; i < nkids; i++) + if (node->kids[i]->parent != node) { + error("node %p kid %d: parent ptr is %p not %p", + node, i, node->kids[i]->parent, node); + } + + + /* + * Now (finally!) recurse into subtrees. + */ + count = nelems; + + for (i = 0; i < nkids; i++) { + void *lower = (i == 0 ? lowbound : node->elems[i-1]); + void *higher = (i >= nelems ? highbound : node->elems[i]); + int subcount = chknode(ctx, level+1, node->kids[i], lower, higher); + if (node->counts[i] != subcount) { + error("node %p kid %d: count says %d, subtree really has %d", + node, i, node->counts[i], subcount); + } + count += subcount; + } + + return count; +} + +void verifytree(tree234 *tree, void **array, int arraylen) { + chkctx ctx; + int i; + void *p; + + ctx.treedepth = -1; /* depth unknown yet */ + ctx.elemcount = 0; /* no elements seen yet */ + /* + * Verify validity of tree properties. + */ + if (tree->root) { + if (tree->root->parent != NULL) + error("root->parent is %p should be null", tree->root->parent); + chknode(&ctx, 0, tree->root, NULL, NULL); + } + printf("tree depth: %d\n", ctx.treedepth); + /* + * Enumerate the tree and ensure it matches up to the array. + */ + for (i = 0; NULL != (p = index234(tree, i)); i++) { + if (i >= arraylen) + error("tree contains more than %d elements", arraylen); + if (array[i] != p) + error("enum at position %d: array says %s, tree says %s", + i, array[i], p); + } + if (ctx.elemcount != i) { + error("tree really contains %d elements, enum gave %d", + ctx.elemcount, i); + } + if (i < arraylen) { + error("enum gave only %d elements, array has %d", i, arraylen); + } + i = count234(tree); + if (ctx.elemcount != i) { + error("tree really contains %d elements, count234 gave %d", + ctx.elemcount, i); + } +} +void verify(void) { verifytree(tree, array, arraylen); } + +void internal_addtest(void *elem, int index, void *realret) { + int i, j; + void *retval; + + if (arraysize < arraylen+1) { + arraysize = arraylen+1+256; + array = (array == NULL ? smalloc(arraysize*sizeof(*array)) : + srealloc(array, arraysize*sizeof(*array))); + } + + i = index; + /* now i points to the first element >= elem */ + retval = elem; /* expect elem returned (success) */ + for (j = arraylen; j > i; j--) + array[j] = array[j-1]; + array[i] = elem; /* add elem to array */ + arraylen++; + + if (realret != retval) { + error("add: retval was %p expected %p", realret, retval); + } + + verify(); +} + +void addtest(void *elem) { + int i; + void *realret; + + realret = add234(tree, elem); + + i = 0; + while (i < arraylen && cmp(elem, array[i]) > 0) + i++; + if (i < arraylen && !cmp(elem, array[i])) { + void *retval = array[i]; /* expect that returned not elem */ + if (realret != retval) { + error("add: retval was %p expected %p", realret, retval); + } + } else + internal_addtest(elem, i, realret); +} + +void addpostest(void *elem, int i) { + void *realret; + + realret = addpos234(tree, elem, i); + + internal_addtest(elem, i, realret); +} + +void delpostest(int i) { + int index = i; + void *elem = array[i], *ret; + + /* i points to the right element */ + while (i < arraylen-1) { + array[i] = array[i+1]; + i++; + } + arraylen--; /* delete elem from array */ + + if (tree->cmp) + ret = del234(tree, elem); + else + ret = delpos234(tree, index); + + if (ret != elem) { + error("del returned %p, expected %p", ret, elem); + } + + verify(); +} + +void deltest(void *elem) { + int i; + + i = 0; + while (i < arraylen && cmp(elem, array[i]) > 0) + i++; + if (i >= arraylen || cmp(elem, array[i]) != 0) + return; /* don't do it! */ + delpostest(i); +} + +/* A sample data set and test utility. Designed for pseudo-randomness, + * and yet repeatability. */ + +/* + * This random number generator uses the `portable implementation' + * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits; + * change it if not. + */ +int randomnumber(unsigned *seed) { + *seed *= 1103515245; + *seed += 12345; + return ((*seed) / 65536) % 32768; +} + +int mycmp(void *av, void *bv) { + char const *a = (char const *)av; + char const *b = (char const *)bv; + return strcmp(a, b); +} + +#define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) + +char *strings[] = { + "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i", + "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E", + "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u", + "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y", + "m", "s", "l", "4", +#if 0 + "a", "ab", "absque", "coram", "de", + "palam", "clam", "cum", "ex", "e", + "sine", "tenus", "pro", "prae", + "banana", "carrot", "cabbage", "broccoli", "onion", "zebra", + "penguin", "blancmange", "pangolin", "whale", "hedgehog", + "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux", + "murfl", "spoo", "breen", "flarn", "octothorpe", + "snail", "tiger", "elephant", "octopus", "warthog", "armadillo", + "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin", + "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper", + "wand", "ring", "amulet" +#endif +}; + +#define NSTR lenof(strings) + +void findtest(void) { + static const int rels[] = { + REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT + }; + static const char *const relnames[] = { + "EQ", "GE", "LE", "LT", "GT" + }; + int i, j, rel, index; + char *p, *ret, *realret, *realret2; + int lo, hi, mid, c; + + for (i = 0; i < (int)NSTR; i++) { + p = strings[i]; + for (j = 0; j < (int)(sizeof(rels)/sizeof(*rels)); j++) { + rel = rels[j]; + + lo = 0; hi = arraylen-1; + while (lo <= hi) { + mid = (lo + hi) / 2; + c = strcmp(p, array[mid]); + if (c < 0) + hi = mid-1; + else if (c > 0) + lo = mid+1; + else + break; + } + + if (c == 0) { + if (rel == REL234_LT) + ret = (mid > 0 ? array[--mid] : NULL); + else if (rel == REL234_GT) + ret = (mid < arraylen-1 ? array[++mid] : NULL); + else + ret = array[mid]; + } else { + assert(lo == hi+1); + if (rel == REL234_LT || rel == REL234_LE) { + mid = hi; + ret = (hi >= 0 ? array[hi] : NULL); + } else if (rel == REL234_GT || rel == REL234_GE) { + mid = lo; + ret = (lo < arraylen ? array[lo] : NULL); + } else + ret = NULL; + } + + realret = findrelpos234(tree, p, NULL, rel, &index); + if (realret != ret) { + error("find(\"%s\",%s) gave %s should be %s", + p, relnames[j], realret, ret); + } + if (realret && index != mid) { + error("find(\"%s\",%s) gave %d should be %d", + p, relnames[j], index, mid); + } + if (realret && rel == REL234_EQ) { + realret2 = index234(tree, index); + if (realret2 != realret) { + error("find(\"%s\",%s) gave %s(%d) but %d -> %s", + p, relnames[j], realret, index, index, realret2); + } + } +#if 0 + printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j], + realret, index); +#endif + } + } + + realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index); + if (arraylen && (realret != array[0] || index != 0)) { + error("find(NULL,GT) gave %s(%d) should be %s(0)", + realret, index, array[0]); + } else if (!arraylen && (realret != NULL)) { + error("find(NULL,GT) gave %s(%d) should be NULL", + realret, index); + } + + realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index); + if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) { + error("find(NULL,LT) gave %s(%d) should be %s(0)", + realret, index, array[arraylen-1]); + } else if (!arraylen && (realret != NULL)) { + error("find(NULL,LT) gave %s(%d) should be NULL", + realret, index); + } +} + +void splittest(tree234 *tree, void **array, int arraylen) { + int i; + tree234 *tree3, *tree4; + for (i = 0; i <= arraylen; i++) { + tree3 = copytree234(tree, NULL, NULL); + tree4 = splitpos234(tree3, i, 0); + verifytree(tree3, array, i); + verifytree(tree4, array+i, arraylen-i); + join234(tree3, tree4); + freetree234(tree4); /* left empty by join */ + verifytree(tree3, array, arraylen); + freetree234(tree3); + } +} + +int main(void) { + int in[NSTR]; + int i, j, k; + int tworoot, tmplen; + unsigned seed = 0; + tree234 *tree2, *tree3, *tree4; + int c; + + setvbuf(stdout, NULL, _IOLBF, 0); + + for (i = 0; i < (int)NSTR; i++) in[i] = 0; + array = NULL; + arraylen = arraysize = 0; + tree = newtree234(mycmp); + cmp = mycmp; + + verify(); + for (i = 0; i < 10000; i++) { + j = randomnumber(&seed); + j %= NSTR; + printf("trial: %d\n", i); + if (in[j]) { + printf("deleting %s (%d)\n", strings[j], j); + deltest(strings[j]); + in[j] = 0; + } else { + printf("adding %s (%d)\n", strings[j], j); + addtest(strings[j]); + in[j] = 1; + } + disptree(tree); + findtest(); + } + + while (arraylen > 0) { + j = randomnumber(&seed); + j %= arraylen; + deltest(array[j]); + } + + freetree234(tree); + + /* + * Now try an unsorted tree. We don't really need to test + * delpos234 because we know del234 is based on it, so it's + * already been tested in the above sorted-tree code; but for + * completeness we'll use it to tear down our unsorted tree + * once we've built it. + */ + tree = newtree234(NULL); + cmp = NULL; + verify(); + for (i = 0; i < 1000; i++) { + printf("trial: %d\n", i); + j = randomnumber(&seed); + j %= NSTR; + k = randomnumber(&seed); + k %= count234(tree)+1; + printf("adding string %s at index %d\n", strings[j], k); + addpostest(strings[j], k); + } + + /* + * While we have this tree in its full form, we'll take a copy + * of it to use in split and join testing. + */ + tree2 = copytree234(tree, NULL, NULL); + verifytree(tree2, array, arraylen);/* check the copy is accurate */ + /* + * Split tests. Split the tree at every possible point and + * check the resulting subtrees. + */ + tworoot = (!tree2->root->elems[1]);/* see if it has a 2-root */ + splittest(tree2, array, arraylen); + /* + * Now do the split test again, but on a tree that has a 2-root + * (if the previous one didn't) or doesn't (if the previous one + * did). + */ + tmplen = arraylen; + while ((!tree2->root->elems[1]) == tworoot) { + delpos234(tree2, --tmplen); + } + printf("now trying splits on second tree\n"); + splittest(tree2, array, tmplen); + freetree234(tree2); + + /* + * Back to the main testing of uncounted trees. + */ + while (count234(tree) > 0) { + printf("cleanup: tree size %d\n", count234(tree)); + j = randomnumber(&seed); + j %= count234(tree); + printf("deleting string %s from index %d\n", (char *)array[j], j); + delpostest(j); + } + freetree234(tree); + + /* + * Finally, do some testing on split/join on _sorted_ trees. At + * the same time, we'll be testing split on very small trees. + */ + tree = newtree234(mycmp); + cmp = mycmp; + arraylen = 0; + for (i = 0; i < 16; i++) { + addtest(strings[i]); + tree2 = copytree234(tree, NULL, NULL); + splittest(tree2, array, arraylen); + freetree234(tree2); + } + freetree234(tree); + + /* + * Test silly cases of join: join(emptytree, emptytree), and + * also ensure join correctly spots when sorted trees fail the + * ordering constraint. + */ + tree = newtree234(mycmp); + tree2 = newtree234(mycmp); + tree3 = newtree234(mycmp); + tree4 = newtree234(mycmp); + assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */ + add234(tree2, strings[1]); + add234(tree4, strings[0]); + array[0] = strings[0]; + array[1] = strings[1]; + verifytree(tree, array, 0); + verifytree(tree2, array+1, 1); + verifytree(tree3, array, 0); + verifytree(tree4, array, 1); + + /* + * So: + * - join(tree,tree3) should leave both tree and tree3 unchanged. + * - joinr(tree,tree2) should leave both tree and tree2 unchanged. + * - join(tree4,tree3) should leave both tree3 and tree4 unchanged. + * - join(tree, tree2) should move the element from tree2 to tree. + * - joinr(tree4, tree3) should move the element from tree4 to tree3. + * - join(tree,tree3) should return NULL and leave both unchanged. + * - join(tree3,tree) should work and create a bigger tree in tree3. + */ + assert(tree == join234(tree, tree3)); + verifytree(tree, array, 0); + verifytree(tree3, array, 0); + assert(tree2 == join234r(tree, tree2)); + verifytree(tree, array, 0); + verifytree(tree2, array+1, 1); + assert(tree4 == join234(tree4, tree3)); + verifytree(tree3, array, 0); + verifytree(tree4, array, 1); + assert(tree == join234(tree, tree2)); + verifytree(tree, array+1, 1); + verifytree(tree2, array, 0); + assert(tree3 == join234r(tree4, tree3)); + verifytree(tree3, array, 1); + verifytree(tree4, array, 0); + assert(NULL == join234(tree, tree3)); + verifytree(tree, array+1, 1); + verifytree(tree3, array, 1); + assert(tree3 == join234(tree3, tree)); + verifytree(tree3, array, 2); + verifytree(tree, array, 0); + + return 0; +} + +#endif + +#if 0 /* sorted list of strings might be useful */ +{ + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", +} +#endif diff --git a/tree234.h b/tree234.h new file mode 100644 index 0000000..f75c8f7 --- /dev/null +++ b/tree234.h @@ -0,0 +1,202 @@ +/* + * tree234.h: header defining functions in tree234.c. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TREE234_H +#define TREE234_H + +/* + * This typedef is opaque outside tree234.c itself. + */ +typedef struct tree234_Tag tree234; + +typedef int (*cmpfn234)(void *, void *); + +typedef void *(*copyfn234)(void *state, void *element); + +/* + * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and + * lookups by key will fail: you can only look things up by numeric + * index, and you have to use addpos234() and delpos234(). + */ +tree234 *newtree234(cmpfn234 cmp); + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +void freetree234(tree234 *t); + +/* + * Add an element e to a sorted 2-3-4 tree t. Returns e on success, + * or if an existing element compares equal, returns that. + */ +void *add234(tree234 *t, void *e); + +/* + * Add an element e to an unsorted 2-3-4 tree t. Returns e on + * success, NULL on failure. (Failure should only occur if the + * index is out of range or the tree is sorted.) + * + * Index range can be from 0 to the tree's current element count, + * inclusive. + */ +void *addpos234(tree234 *t, void *e, int index); + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + * + * One obvious use for this function is in iterating over the whole + * of a tree (sorted or unsorted): + * + * for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p); + * + * or + * + * int maxcount = count234(tree); + * for (i = 0; i < maxcount; i++) { + * p = index234(tree, i); + * assert(p != NULL); + * consume(p); + * } + */ +void *index234(tree234 *t, int index); + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + * + * Three of these functions are special cases of findrelpos234. The + * non-`pos' variants lack the `index' parameter: if the parameter + * is present and non-NULL, it must point to an integer variable + * which will be filled with the numeric index of the returned + * element. + * + * The non-`rel' variants lack the `relation' parameter. This + * parameter allows you to specify what relation the element you + * provide has to the element you're looking for. This parameter + * can be: + * + * REL234_EQ - find only an element that compares equal to e + * REL234_LT - find the greatest element that compares < e + * REL234_LE - find the greatest element that compares <= e + * REL234_GT - find the smallest element that compares > e + * REL234_GE - find the smallest element that compares >= e + * + * Non-`rel' variants assume REL234_EQ. + * + * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be + * NULL. In this case, REL234_GT will return the smallest element + * in the tree, and REL234_LT will return the greatest. This gives + * an alternative means of iterating over a sorted tree, instead of + * using index234: + * + * // to loop forwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;) + * consume(p); + * + * // to loop backwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;) + * consume(p); + */ +enum { + REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE +}; +void *find234(tree234 *t, void *e, cmpfn234 cmp); +void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation); +void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index); +void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp, int relation, + int *index); + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + * + * delpos234 deletes the element at a particular tree index: it + * works on both sorted and unsorted trees. + * + * del234 deletes the element passed to it, so it only works on + * sorted trees. (It's equivalent to using findpos234 to determine + * the index of an element, and then passing that index to + * delpos234.) + * + * Both functions return a pointer to the element they delete, for + * the user to free or pass on elsewhere or whatever. If the index + * is out of range (delpos234) or the element is already not in the + * tree (del234) then they return NULL. + */ +void *del234(tree234 *t, void *e); +void *delpos234(tree234 *t, int index); + +/* + * Return the total element count of a tree234. + */ +int count234(tree234 *t); + +/* + * Split a tree234 into two valid tree234s. + * + * splitpos234 splits at a given index. If `before' is TRUE, the + * items at and after that index are left in t and the ones before + * are returned; if `before' is FALSE, the items before that index + * are left in t and the rest are returned. + * + * split234 splits at a given key. You can pass any of the + * relations used with findrel234, except for REL234_EQ. The items + * in the tree that satisfy the relation are returned; the + * remainder are left. + */ +tree234 *splitpos234(tree234 *t, int index, int before); +tree234 *split234(tree234 *t, void *e, cmpfn234 cmp, int rel); + +/* + * Join two tree234s together into a single one. + * + * All the elements in t1 are placed to the left of all the + * elements in t2. If the trees are sorted, there will be a test to + * ensure that this satisfies the ordering criterion, and NULL will + * be returned otherwise. If the trees are unsorted, there is no + * restriction on the use of join234. + * + * The tree returned is t1 (join234) or t2 (join234r), if the + * operation is successful. + */ +tree234 *join234(tree234 *t1, tree234 *t2); +tree234 *join234r(tree234 *t1, tree234 *t2); + +/* + * Make a complete copy of a tree234. Element pointers will be + * reused unless copyfn is non-NULL, in which case it will be used + * to copy each element. (copyfn takes two `void *' parameters; the + * first is private state and the second is the element. A simple + * copy routine probably won't need private state.) + */ +tree234 *copytree234(tree234 *t, copyfn234 copyfn, void *copyfnstate); + +#endif /* TREE234_H */ diff --git a/ustring.c b/ustring.c new file mode 100644 index 0000000..1573a19 --- /dev/null +++ b/ustring.c @@ -0,0 +1,174 @@ +/* + * ustring.c: Unicode string routines + */ + +#include +#include +#include "halibut.h" + +wchar_t *ustrdup(wchar_t *s) { + wchar_t *r; + if (s) { + r = mknewa(wchar_t, 1+ustrlen(s)); + ustrcpy(r, s); + } else { + r = mknew(wchar_t); + *r = 0; + } + return r; +} + +char *ustrtoa(wchar_t *s, char *outbuf, int size) { + char *p; + if (!s) { + *outbuf = '\0'; + return outbuf; + } + for (p = outbuf; *s && p < outbuf+size; p++,s++) + *p = *s; + if (p < outbuf+size) + *p = '\0'; + else + outbuf[size-1] = '\0'; + return outbuf; +} + +int ustrlen(wchar_t *s) { + int len = 0; + while (*s++) len++; + return len; +} + +wchar_t *uadv(wchar_t *s) { + return s + 1 + ustrlen(s); +} + +wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) { + wchar_t *ret = dest; + do { + *dest++ = *source; + } while (*source++); + return ret; +} + +int ustrcmp(wchar_t *lhs, wchar_t *rhs) { + if (!lhs && !rhs) return 0; + if (!lhs) return -1; + if (!rhs) return +1; + while (*lhs && *rhs && *lhs==*rhs) + lhs++, rhs++; + if (*lhs < *rhs) + return -1; + else if (*lhs > *rhs) + return 1; + return 0; +} + +wchar_t utolower(wchar_t c) { + if (c == L'\0') + return c; /* this property needed by ustricmp */ + /* FIXME: this doesn't even come close */ + if (c >= 'A' && c <= 'Z') + c += 'a'-'A'; + return c; +} + +int ustricmp(wchar_t *lhs, wchar_t *rhs) { + wchar_t lc, rc; + while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) + lhs++, rhs++; + if (!lc && !rc) + return 0; + if (lc < rc) + return -1; + else + return 1; +} + +wchar_t *ustrlow(wchar_t *s) { + wchar_t *p = s; + while (*p) { + *p = utolower(*p); + p++; + } + return s; +} + +int utoi(wchar_t *s) { + int sign = +1; + int n; + + if (*s == L'-') { + s++; + sign = -1; + } + + n = 0; + while (*s && *s >= L'0' && *s <= L'9') { + n *= 10; + n += (*s - '0'); + s++; + } + + return n; +} + +int utob(wchar_t *s) { + if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") || + !ustricmp(s, L"true") || !ustricmp(s, L"t")) + return TRUE; + return FALSE; +} + +int uisdigit(wchar_t c) { + return c >= L'0' && c <= L'9'; +} + +#define USTRFTIME_DELTA 128 +wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { + void *blk = NULL; + wchar_t *wblk, *wp; + char *fmt, *text, *p; + size_t size = 0; + size_t len; + + /* + * strftime has the entertaining property that it returns 0 + * _either_ on out-of-space _or_ on successful generation of + * the empty string. Hence we must ensure our format can never + * generate the empty string. Somebody throw a custard pie at + * whoever was responsible for that. Please? + */ + if (wfmt) { + len = ustrlen(wfmt); + fmt = mknewa(char, 2+len); + ustrtoa(wfmt, fmt+1, len+1); + fmt[0] = ' '; + } else + fmt = " %c"; + + while (1) { + size += USTRFTIME_DELTA; + blk = resize((char *)blk, size); + len = strftime((char *)blk, size-1, fmt, timespec); + if (len > 0) + break; + } + + /* Note: +1 for the terminating 0, -1 for the initial space in fmt */ + wblk = resize((wchar_t *)blk, len); + text = mknewa(char, len); + strftime(text, len, fmt+1, timespec); + /* + * We operate in the C locale, so this all ought to be kosher + * ASCII. If we ever move outside ASCII machines, we may need + * to make this more portable... + */ + for (wp = wblk, p = text; *p; p++, wp++) + *wp = *p; + *wp = 0; + if (wfmt) + sfree(fmt); + sfree(text); + return wblk; +} diff --git a/version.c b/version.c new file mode 100644 index 0000000..38fbca1 --- /dev/null +++ b/version.c @@ -0,0 +1,13 @@ +/* + * version.c: version string + */ + +#include + +#ifndef VERSION +#define VER "anonymous build (" __DATE__ " " __TIME__ ")" +#else +#define VER "version " VERSION +#endif + +const char *const version = VER; diff --git a/winhelp.c b/winhelp.c new file mode 100644 index 0000000..005409e --- /dev/null +++ b/winhelp.c @@ -0,0 +1,2127 @@ +/* + * winhelp.c a module to generate Windows .HLP files + * + * Documentation of the .HLP file format comes from the excellent + * HELPFILE.TXT, published alongside the Help decompiler HELPDECO + * by Manfred Winterhoff. This code would not have been possible + * without his efforts. Many thanks. + */ + +/* + * Potential future features: + * + * - perhaps LZ77 compression? This appears to cause a phase order + * problem: it's hard to do the compression until the data to be + * compressed is finalised, and yet you can't finalise the data + * to be compressed until you know how much of it is going into + * which TOPICBLOCK in order to work out the offsets in the + * topic headers - for which you have to have already done the + * compression. Perhaps the thing to do is to implement an LZ77 + * compressor that can guarantee to leave particular bytes in + * the stream as literals, and then go back and fix the offsets + * up later. Not pleasant. + * + * - It would be good to find out what relation (if any) the LCID + * record in the |SYSTEM section bears to the codepage used in + * the actual help text, so as to be able to vary that if the + * user needs it. For the moment I suspect we're stuck with + * Win1252. + * + * - tables might be nice. + * + * Unlikely future features: + * + * - Phrase compression sounds harder. It's reasonably easy + * (though space-costly) to analyse all the text in the file to + * determine the one key phrase which would save most space if + * replaced by a reference everywhere it appears; but finding + * the _1024_ most effective phrases seems much harder since a + * naive analysis might find lots of phrases that all overlap + * (so you wouldn't get the saving you expected, as after taking + * out the first phrase the rest would never crop up). In + * addition, MS hold US patent number 4955066 which may cover + * phrase compression, so perhaps it's best just to leave it. + * + * Cleanup work: + * + * - sort out begin_topic. Ideally we should have a separate + * topic_macro function that adds to the existing linkdata for + * the topic, because that's more flexible than a variadic + * function. This will be fiddly, though: if it's called before + * whlp_begin_topic then we must buffer macros, and if it's + * called afterwards then we must be able to go back and modify + * the linkdata2 of the topic start block. Foo. + * + * - find out what should happen if a single topiclink crosses + * _two_ topicblock boundaries. + * + * - What is the BlockSize in a topic header (first 4 bytes of + * LinkData1 in a type 2 record) supposed to mean? How on earth + * is it measured? The help file doesn't become perceptibly + * corrupt if I frob it randomly; and on some occasions taking a + * bit _out_ of the help file _increases_ that value. I have a + * feeling it's completely made up and/or vestigial, so for the + * moment I'm just making up a plausible value as I go along. + */ + +#include +#include +#include +#include +#include +#include + +#include "halibut.h" +#include "winhelp.h" +#include "tree234.h" + +#ifdef TESTMODE +/* + * This lot is useful for testing. Something like it will also be + * needed to use this module standalone. + */ +#define smalloc malloc +#define srealloc realloc +#define sfree free +#define mknew(type) ( (type *) smalloc (sizeof (type)) ) +#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) ) +#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) ) +#define lenof(array) ( sizeof(array) / sizeof(*(array)) ) +char *dupstr(char *s) { + char *r = mknewa(char, 1+strlen(s)); strcpy(r,s); return r; +} +#endif + +#define UNUSEDARG(x) ( (x) = (x) ) + +#define GET_32BIT_LSB_FIRST(cp) \ + (((unsigned long)(unsigned char)(cp)[0]) | \ + ((unsigned long)(unsigned char)(cp)[1] << 8) | \ + ((unsigned long)(unsigned char)(cp)[2] << 16) | \ + ((unsigned long)(unsigned char)(cp)[3] << 24)) + +#define PUT_32BIT_LSB_FIRST(cp, value) do { \ + (cp)[0] = 0xFF & (value); \ + (cp)[1] = 0xFF & ((value) >> 8); \ + (cp)[2] = 0xFF & ((value) >> 16); \ + (cp)[3] = 0xFF & ((value) >> 24); } while (0) + +#define GET_16BIT_LSB_FIRST(cp) \ + (((unsigned long)(unsigned char)(cp)[0]) | \ + ((unsigned long)(unsigned char)(cp)[1] << 8)) + +#define PUT_16BIT_LSB_FIRST(cp, value) do { \ + (cp)[0] = 0xFF & (value); \ + (cp)[1] = 0xFF & ((value) >> 8); } while (0) + +#define MAX_PAGE_SIZE 0x800 /* max page size in any B-tree */ +#define TOPIC_BLKSIZE 4096 /* implied by version/flags combo */ + +typedef struct WHLP_TOPIC_tag context; + +struct file { + char *name; /* file name, will need freeing */ + unsigned char *data; /* file data, will need freeing */ + int pos; /* position for adding data */ + int len; /* # of meaningful bytes in data */ + int size; /* # of allocated bytes in data */ + int fileoffset; /* offset in the real .HLP file */ +}; + +struct indexrec { + char *term; /* index term, will need freeing */ + context *topic; /* topic it links to */ + int count, offset; /* used when building |KWDATA */ +}; + +struct topiclink { + int topicoffset, topicpos; /* for referencing from elsewhere */ + int recordtype; + int len1, len2; + unsigned char *data1, *data2; + context *context; + struct topiclink *nonscroll, *scroll, *nexttopic; + int block_size; /* for the topic header - *boggle* */ +}; + +struct WHLP_TOPIC_tag { + char *name; /* needs freeing */ + unsigned long hash; + struct topiclink *link; /* this provides TOPICOFFSET */ + context *browse_next, *browse_prev; + char *title; /* needs freeing */ + int index; /* arbitrary number */ +}; + +struct fontdesc { + char *font; + int family, rendition, halfpoints; + int r, g, b; +}; + +struct WHLP_tag { + tree234 *files; /* stores `struct file' */ + tree234 *pre_contexts; /* stores `context' */ + tree234 *contexts; /* also stores `context' */ + tree234 *titles; /* _also_ stores `context' */ + tree234 *text; /* stores `struct topiclink' */ + tree234 *index; /* stores `struct indexrec' */ + tree234 *tabstops; /* stores `int' */ + tree234 *fontnames; /* stores `char *' */ + tree234 *fontdescs; /* stores `struct fontdesc' */ + struct file *systemfile; /* the |SYSTEM internal file */ + context *ptopic; /* primary topic */ + struct topiclink *prevtopic; /* to link type-2 records together */ + struct topiclink *link; /* while building a topiclink */ + unsigned char linkdata1[TOPIC_BLKSIZE]; /* while building a topiclink */ + unsigned char linkdata2[TOPIC_BLKSIZE]; /* while building a topiclink */ + int topicblock_remaining; /* while building |TOPIC section */ + int lasttopiclink; /* while building |TOPIC section */ + int firsttopiclink_offset; /* while building |TOPIC section */ + int lasttopicstart; /* while building |TOPIC section */ + int para_flags; + int para_attrs[7]; + int ncontexts; +}; + +/* Functions to return the index and leaf data for B-tree contents. */ +typedef int (*bt_index_fn)(const void *item, unsigned char *outbuf); +typedef int (*bt_leaf_fn)(const void *item, unsigned char *outbuf); + +/* Forward references. */ +static void whlp_para_reset(WHLP h); +static struct file *whlp_new_file(WHLP h, char *name); +static void whlp_file_add(struct file *f, const void *data, int len); +static void whlp_file_add_char(struct file *f, int data); +static void whlp_file_add_short(struct file *f, int data); +static void whlp_file_add_long(struct file *f, int data); +static void whlp_file_fill(struct file *f, int len); +static void whlp_file_seek(struct file *f, int pos, int whence); +static int whlp_file_offset(struct file *f); + +/* ---------------------------------------------------------------------- + * Fiddly little functions: B-tree compare, index and leaf functions. + */ + +/* The master index maps file names to help-file offsets. */ + +static int filecmp(void *av, void *bv) +{ + const struct file *a = (const struct file *)av; + const struct file *b = (const struct file *)bv; + return strcmp(a->name, b->name); +} + +static int fileindex(const void *av, unsigned char *outbuf) +{ + const struct file *a = (const struct file *)av; + int len = 1+strlen(a->name); + memcpy(outbuf, a->name, len); + return len; +} + +static int fileleaf(const void *av, unsigned char *outbuf) +{ + const struct file *a = (const struct file *)av; + int len = 1+strlen(a->name); + memcpy(outbuf, a->name, len); + PUT_32BIT_LSB_FIRST(outbuf+len, a->fileoffset); + return len+4; +} + +/* The |CONTEXT internal file maps help context hashes to TOPICOFFSETs. */ + +static int ctxcmp(void *av, void *bv) +{ + const context *a = (const context *)av; + const context *b = (const context *)bv; + if ((signed long)a->hash < (signed long)b->hash) + return -1; + if ((signed long)a->hash > (signed long)b->hash) + return +1; + return 0; +} + +static int ctxindex(const void *av, unsigned char *outbuf) +{ + const context *a = (const context *)av; + PUT_32BIT_LSB_FIRST(outbuf, a->hash); + return 4; +} + +static int ctxleaf(const void *av, unsigned char *outbuf) +{ + const context *a = (const context *)av; + PUT_32BIT_LSB_FIRST(outbuf, a->hash); + PUT_32BIT_LSB_FIRST(outbuf+4, a->link->topicoffset); + return 8; +} + +/* The |TTLBTREE internal file maps TOPICOFFSETs to title strings. */ + +static int ttlcmp(void *av, void *bv) +{ + const context *a = (const context *)av; + const context *b = (const context *)bv; + if (a->link->topicoffset < b->link->topicoffset) + return -1; + if (a->link->topicoffset > b->link->topicoffset) + return +1; + return 0; +} + +static int ttlindex(const void *av, unsigned char *outbuf) +{ + const context *a = (const context *)av; + PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset); + return 4; +} + +static int ttlleaf(const void *av, unsigned char *outbuf) +{ + const context *a = (const context *)av; + int slen; + PUT_32BIT_LSB_FIRST(outbuf, a->link->topicoffset); + slen = 1+strlen(a->title); + memcpy(outbuf+4, a->title, slen); + return 4+slen; +} + +/* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */ + +static int idxcmp(void *av, void *bv) +{ + const struct indexrec *a = (const struct indexrec *)av; + const struct indexrec *b = (const struct indexrec *)bv; + int cmp; + if ( (cmp = strcmp(a->term, b->term)) != 0) + return cmp; + /* Now sort on the index field of the topics. */ + if (a->topic->index < b->topic->index) + return -1; + if (a->topic->index > b->topic->index) + return +1; + return 0; +} + +static int idxindex(const void *av, unsigned char *outbuf) +{ + const struct indexrec *a = (const struct indexrec *)av; + int len = 1+strlen(a->term); + memcpy(outbuf, a->term, len); + return len; +} + +static int idxleaf(const void *av, unsigned char *outbuf) +{ + const struct indexrec *a = (const struct indexrec *)av; + int len = 1+strlen(a->term); + memcpy(outbuf, a->term, len); + PUT_16BIT_LSB_FIRST(outbuf+len, a->count); + PUT_32BIT_LSB_FIRST(outbuf+len+2, a->offset); + return len+6; +} + +/* + * The internal `tabstops' B-tree stores pointers-to-int. Sorting + * is by the low 16 bits of the number (above that is flags). + */ + +static int tabcmp(void *av, void *bv) +{ + const int *a = (const int *)av; + const int *b = (const int *)bv; + if ((*a & 0xFFFF) < (*b & 0xFFFF)) + return -1; + if ((*a & 0xFFFF) > (*b & 0xFFFF)) + return +1; + return 0; +} + +/* The internal `fontnames' B-tree stores strings. */ +static int fontcmp(void *av, void *bv) +{ + const char *a = (const char *)av; + const char *b = (const char *)bv; + return strcmp(a,b); +} + +/* ---------------------------------------------------------------------- + * Manage help contexts and topics. + */ + +/* + * This is the code to compute the hash of a context name. Copied + * straight from Winterhoff's documentation. + */ +static unsigned long context_hash(char *context) +{ + signed char bytemapping[256] = + "\x00\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF" + "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" + "\xF0\x0B\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\x0C\xFF" + "\x0A\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x0B\x0C\x0D\x0E\x0D" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F" + "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F" + "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F" + "\x80\x81\x82\x83\x0B\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" + "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" + "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF" + "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"; + unsigned long hash; + + /* Sanity check the size of unsigned long */ + enum { assertion = 1 / + (((unsigned long)0xFFFFFFFF) + 2 == (unsigned long)1) }; + + /* + * The hash algorithm starts the hash at 0 and updates it with + * each character. Therefore, logically, the hash of an empty + * string should be 0 (it starts at 0 and is never updated); + * but Winterhoff says it is in fact 1. Shouldn't matter, since + * I never plan to use empty context names, but I'll stick the + * special case in here anyway. + */ + if (!*context) + return 1; + + /* + * Now compute the hash in the normal way. + */ + hash = 0; + while (*context) { + hash = hash * 43 + bytemapping[(unsigned char)*context]; + context++; + } + return hash; +} + +WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash) +{ + context *ctx = mknew(context); + context *otherctx; + + /* + * Index contexts in order of creation, just so there's some + * sort of non-arbitrary ordering in the index B-tree. Call me + * fussy, but I don't like indexing on pointer values because I + * prefer the code to be deterministic when run under different + * C libraries. + */ + ctx->index = h->ncontexts++; + ctx->browse_prev = ctx->browse_next = NULL; + + if (context_name) { + /* + * We have a context name, which means we can put this + * context straight into the `contexts' tree. + */ + ctx->name = dupstr(context_name); + ctx->hash = context_hash(context_name); + otherctx = add234(h->contexts, ctx); + if (otherctx != ctx) { + /* + * Hash clash. Destroy the new context and return NULL, + * providing the clashing string. + */ + sfree(ctx->name); + sfree(ctx); + if (clash) *clash = otherctx->name; + return NULL; + } + } else { + /* + * We have no context name yet. Enter this into the + * pre_contexts tree of anonymous topics, which we will go + * through later and allocate unique context names and hash + * values. + */ + ctx->name = NULL; + addpos234(h->pre_contexts, ctx, count234(h->pre_contexts)); + } + return ctx; +} + +void whlp_prepare(WHLP h) +{ + /* + * We must go through pre_contexts and allocate a context ID to + * each anonymous context, making sure it doesn't clash with + * the existing contexts. + * + * Our own context IDs will just be of the form `t00000001', + * and we'll increment the number each time and skip over any + * IDs that clash with existing context names. + */ + int ctx_num = 0; + context *ctx, *otherctx; + + while ( (ctx = index234(h->pre_contexts, 0)) != NULL ) { + delpos234(h->pre_contexts, 0); + ctx->name = mknewa(char, 20); + do { + sprintf(ctx->name, "t%08d", ctx_num++); + ctx->hash = context_hash(ctx->name); + otherctx = add234(h->contexts, ctx); + } while (otherctx != ctx); + } + + /* + * Ensure paragraph attributes are clear for the start of text + * output. + */ + whlp_para_reset(h); +} + +char *whlp_topic_id(WHLP_TOPIC topic) +{ + return topic->name; +} + +void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...) +{ + struct topiclink *link = mknew(struct topiclink); + int len, slen; + char *macro; + va_list ap; + + link->nexttopic = NULL; + if (h->prevtopic) + h->prevtopic->nexttopic = link; + h->prevtopic = link; + + link->nonscroll = link->scroll = NULL; + link->context = topic; + link->block_size = 0; + + link->recordtype = 2; /* topic header */ + link->len1 = 4*7; /* standard linkdata1 size */ + link->data1 = mknewa(unsigned char, link->len1); + + slen = strlen(title); + assert(slen+1 <= TOPIC_BLKSIZE); + memcpy(h->linkdata2, title, slen+1); + len = slen+1; + + va_start(ap, title); + while ( (macro = va_arg(ap, char *)) != NULL) { + slen = strlen(macro); + assert(len+slen+1 <= TOPIC_BLKSIZE); + memcpy(h->linkdata2+len, macro, slen+1); + len += slen+1; + } + va_end(ap); + len--; /* lose the last \0 on the last macro */ + + link->len2 = len; + link->data2 = mknewa(unsigned char, link->len2); + memcpy(link->data2, h->linkdata2, link->len2); + + topic->title = dupstr(title); + topic->link = link; + + addpos234(h->text, link, count234(h->text)); +} + +void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after) +{ + UNUSEDARG(h); + + /* + * See if the `before' topic is already linked to another one, + * and break the link to that if so. Likewise the `after' + * topic. + */ + if (before->browse_next) + before->browse_next->browse_prev = NULL; + if (after->browse_prev) + after->browse_prev->browse_next = NULL; + before->browse_next = after; + after->browse_prev = before; +} + +/* ---------------------------------------------------------------------- + * Manage the actual generation of paragraph and text records. + */ + +static void whlp_linkdata(WHLP h, int which, int c) +{ + int *len = (which == 1 ? &h->link->len1 : &h->link->len2); + char *data = (which == 1 ? h->linkdata1 : h->linkdata2); + assert(*len < TOPIC_BLKSIZE); + data[(*len)++] = c; +} + +static void whlp_linkdata_short(WHLP h, int which, int data) +{ + whlp_linkdata(h, which, data & 0xFF); + whlp_linkdata(h, which, (data >> 8) & 0xFF); +} + +static void whlp_linkdata_long(WHLP h, int which, int data) +{ + whlp_linkdata(h, which, data & 0xFF); + whlp_linkdata(h, which, (data >> 8) & 0xFF); + whlp_linkdata(h, which, (data >> 16) & 0xFF); + whlp_linkdata(h, which, (data >> 24) & 0xFF); +} + +static void whlp_linkdata_cushort(WHLP h, int which, int data) +{ + if (data <= 0x7F) { + whlp_linkdata(h, which, data*2); + } else { + whlp_linkdata(h, which, 1 + (data%128 * 2)); + whlp_linkdata(h, which, data/128); + } +} + +static void whlp_linkdata_csshort(WHLP h, int which, int data) +{ + if (data >= -0x40 && data <= 0x3F) + whlp_linkdata_cushort(h, which, data+64); + else + whlp_linkdata_cushort(h, which, data+16384); +} + +static void whlp_linkdata_culong(WHLP h, int which, int data) +{ + if (data <= 0x7FFF) { + whlp_linkdata_short(h, which, data*2); + } else { + whlp_linkdata_short(h, which, 1 + (data%32768 * 2)); + whlp_linkdata_short(h, which, data/32768); + } +} + +static void whlp_linkdata_cslong(WHLP h, int which, int data) +{ + if (data >= -0x4000 && data <= 0x3FFF) + whlp_linkdata_culong(h, which, data+16384); + else + whlp_linkdata_culong(h, which, data+67108864); +} + +static void whlp_para_reset(WHLP h) +{ + int *p; + + h->para_flags = 0; + + while ( (p = index234(h->tabstops, 0)) != NULL) { + delpos234(h->tabstops, 0); + sfree(p); + } +} + +void whlp_para_attr(WHLP h, int attr_id, int attr_param) +{ + if (attr_id >= WHLP_PARA_SPACEABOVE && + attr_id <= WHLP_PARA_FIRSTLINEINDENT) { + h->para_flags |= 1 << attr_id; + h->para_attrs[attr_id] = attr_param; + } else if (attr_id == WHLP_PARA_ALIGNMENT) { + h->para_flags &= ~0xC00; + if (attr_param == WHLP_ALIGN_RIGHT) + h->para_flags |= 0x400; + else if (attr_param == WHLP_ALIGN_CENTRE) + h->para_flags |= 0x800; + } +} + +void whlp_set_tabstop(WHLP h, int tabstop, int alignment) +{ + int *p; + + if (alignment == WHLP_ALIGN_CENTRE) + tabstop |= 0x20000; + if (alignment == WHLP_ALIGN_RIGHT) + tabstop |= 0x10000; + + p = mknew(int); + *p = tabstop; + add234(h->tabstops, p); + h->para_flags |= 0x0200; +} + +void whlp_begin_para(WHLP h, int para_type) +{ + struct topiclink *link = mknew(struct topiclink); + int i; + + /* + * Clear these to NULL out of paranoia, although in records + * that aren't type 2 they should never actually be needed. + */ + link->nexttopic = NULL; + link->context = NULL; + link->nonscroll = link->scroll = NULL; + + link->recordtype = 32; /* text record */ + + h->link = link; + link->len1 = link->len2 = 0; + link->data1 = h->linkdata1; + link->data2 = h->linkdata2; + + if (para_type == WHLP_PARA_NONSCROLL && h->prevtopic && + !h->prevtopic->nonscroll) + h->prevtopic->nonscroll = link; + if (para_type == WHLP_PARA_SCROLL && h->prevtopic && + !h->prevtopic->scroll) + h->prevtopic->scroll = link; + + /* + * Now we're ready to start accumulating stuff in linkdata1 and + * linkdata2. Next we build up the paragraph info. Note that + * the TopicSize (cslong: size of LinkData1 minus the topicsize + * and topiclength fields) and TopicLength (cushort: size of + * LinkData2) fields are missing; we will put those on when we + * end the paragraph. + */ + whlp_linkdata(h, 1, 0); /* must-be-0x00 */ + whlp_linkdata(h, 1, 0x80); /* must-be-0x80 */ + whlp_linkdata_short(h, 1, 0); /* Winterhoff says `id'; always 0 AFAICT */ + whlp_linkdata_short(h, 1, h->para_flags); + for (i = WHLP_PARA_SPACEABOVE; i <= WHLP_PARA_FIRSTLINEINDENT; i++) { + if (h->para_flags & (1<para_attrs[i]); + } + if (h->para_flags & 0x0200) { + int ntabs; + /* + * Write out tab stop data. + */ + ntabs = count234(h->tabstops); + whlp_linkdata_csshort(h, 1, ntabs); + for (i = 0; i < ntabs; i++) { + int tab, *tabp; + tabp = index234(h->tabstops, i); + tab = *tabp; + if (tab & 0x30000) + tab |= 0x4000; + whlp_linkdata_cushort(h, 1, tab & 0xFFFF); + if (tab & 0x4000) + whlp_linkdata_cushort(h, 1, tab >> 16); + } + } + + /* + * Fine. Now we're ready to start writing actual text and + * formatting commands. + */ +} + +void whlp_set_font(WHLP h, int font_id) +{ + /* + * Write a NUL into linkdata2 to cause the reader to flip over + * to linkdata1 to see the formatting command. + */ + whlp_linkdata(h, 2, 0); + /* + * Now the formatting command is 0x80 followed by a short. + */ + whlp_linkdata(h, 1, 0x80); + whlp_linkdata_short(h, 1, font_id); +} + +void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata(h, 2, 0); + /* + * Now the formatting command is 0xE3 followed by the context + * hash. + */ + whlp_linkdata(h, 1, 0xE3); + whlp_linkdata_long(h, 1, target->hash); +} + +void whlp_end_hyperlink(WHLP h) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata(h, 2, 0); + /* + * Now the formatting command is 0x89. + */ + whlp_linkdata(h, 1, 0x89); +} + +void whlp_tab(WHLP h) +{ + /* + * Write a NUL into linkdata2. + */ + whlp_linkdata(h, 2, 0); + /* + * Now the formatting command is 0x83. + */ + whlp_linkdata(h, 1, 0x83); +} + +void whlp_text(WHLP h, char *text) +{ + while (*text) { + whlp_linkdata(h, 2, *text++); + } +} + +void whlp_end_para(WHLP h) +{ + int data1cut; + + /* + * Round off the paragraph with 0x82 and 0xFF formatting + * commands. Each requires a NUL in linkdata2. + */ + whlp_linkdata(h, 2, 0); + whlp_linkdata(h, 1, 0x82); + whlp_linkdata(h, 2, 0); + whlp_linkdata(h, 1, 0xFF); + + /* + * Now finish up: create the header of linkdata1 (TopicLength + * and TopicSize fields), allocate the real linkdata1 and + * linkdata2 fields, and copy them out of the buffers in h. + * Then insert the finished topiclink into the `text' tree, and + * clean up. + */ + data1cut = h->link->len1; + whlp_linkdata_cslong(h, 1, data1cut); + whlp_linkdata_cushort(h, 1, h->link->len2); + + h->link->data1 = mknewa(unsigned char, h->link->len1); + memcpy(h->link->data1, h->linkdata1 + data1cut, h->link->len1 - data1cut); + memcpy(h->link->data1 + h->link->len1 - data1cut, h->linkdata1, data1cut); + h->link->data2 = mknewa(unsigned char, h->link->len2); + memcpy(h->link->data2, h->linkdata2, h->link->len2); + + addpos234(h->text, h->link, count234(h->text)); + + /* Hack: accumulate the `blocksize' parameter in the topic header. */ + if (h->prevtopic) + h->prevtopic->block_size += 21 + h->link->len1 + h->link->len2; + + h->link = NULL; /* this is now in the tree */ + + whlp_para_reset(h); +} + +/* ---------------------------------------------------------------------- + * Manage the layout and generation of the |TOPIC section. + */ + +static void whlp_topicsect_write(WHLP h, struct file *f, void *data, int len, + int can_break) +{ + unsigned char *p = (unsigned char *)data; + + if (h->topicblock_remaining <= 0 || + h->topicblock_remaining < can_break) { + /* + * Start a new block. + */ + if (h->topicblock_remaining > 0) + whlp_file_fill(f, h->topicblock_remaining); + whlp_file_add_long(f, h->lasttopiclink); + h->firsttopiclink_offset = whlp_file_offset(f); + whlp_file_add_long(f, -1L); /* this will be filled in later */ + whlp_file_add_long(f, h->lasttopicstart); + h->topicblock_remaining = TOPIC_BLKSIZE - 12; + } + while (len > 0) { + int thislen = (h->topicblock_remaining < len ? + h->topicblock_remaining : len); + whlp_file_add(f, p, thislen); + p += thislen; + len -= thislen; + h->topicblock_remaining -= thislen; + if (len > 0 && h->topicblock_remaining <= 0) { + /* + * Start a new block. + */ + whlp_file_add_long(f, h->lasttopiclink); + h->firsttopiclink_offset = whlp_file_offset(f); + whlp_file_add_long(f, -1L); /* this will be filled in later */ + whlp_file_add_long(f, h->lasttopicstart); + h->topicblock_remaining = TOPIC_BLKSIZE - 12; + } + } +} + +static void whlp_topic_layout(WHLP h) +{ + int block, offset, pos; + int i, nlinks, size; + int topicnum; + struct topiclink *link; + struct file *f; + + /* + * Create a final TOPICLINK containing no usable data. + */ + link = mknew(struct topiclink); + link->nexttopic = NULL; + if (h->prevtopic) + h->prevtopic->nexttopic = link; + h->prevtopic = link; + link->data1 = mknewa(unsigned char, 0x1c); + link->block_size = 0; + link->data2 = NULL; + link->len1 = 0x1c; + link->len2 = 0; + link->nexttopic = NULL; + link->recordtype = 2; + link->nonscroll = link->scroll = NULL; + link->context = NULL; + addpos234(h->text, link, count234(h->text)); + + /* + * Each TOPICBLOCK has space for TOPIC_BLKSIZE-12 bytes. The + * size of each TOPICLINK is 21 bytes plus the combined lengths + * of LinkData1 and LinkData2. So we can now go through and + * break up the TOPICLINKs into TOPICBLOCKs, and also set up + * the TOPICOFFSET and TOPICPOS of each one while we do so. + */ + + block = 0; + offset = 0; + pos = 12; + nlinks = count234(h->text); + for (i = 0; i < nlinks; i++) { + link = index234(h->text, i); + size = 21 + link->len1 + link->len2; + /* + * We can't split within the topicblock header or within + * linkdata1. So if the split would fall in that area, + * start a new block _now_. + */ + if (TOPIC_BLKSIZE - pos < 21 + link->len1) { + block++; + offset = 0; + pos = 12; + } + link->topicoffset = block * 0x8000 + offset; + link->topicpos = block * 0x4000 + pos; + pos += size; + if (link->recordtype != 2) /* TOPICOFFSET doesn't count titles */ + offset += link->len2; + while (pos > TOPIC_BLKSIZE) { + block++; + offset = 0; + pos -= TOPIC_BLKSIZE - 12; + } + } + + /* + * Now we have laid out the TOPICLINKs into blocks, and + * determined the final TOPICOFFSET and TOPICPOS of each one. + * So now we can go through and write the headers of the type-2 + * records. + */ + + topicnum = 0; + for (i = 0; i < nlinks; i++) { + link = index234(h->text, i); + if (link->recordtype != 2) + continue; + + PUT_32BIT_LSB_FIRST(link->data1 + 0, link->block_size); + if (link->context && link->context->browse_prev) + PUT_32BIT_LSB_FIRST(link->data1 + 4, + link->context->browse_prev->link->topicoffset); + else + PUT_32BIT_LSB_FIRST(link->data1 + 4, 0xFFFFFFFFL); + if (link->context && link->context->browse_next) + PUT_32BIT_LSB_FIRST(link->data1 + 8, + link->context->browse_next->link->topicoffset); + else + PUT_32BIT_LSB_FIRST(link->data1 + 8, 0xFFFFFFFFL); + PUT_32BIT_LSB_FIRST(link->data1 + 12, topicnum); + topicnum++; + if (link->nonscroll) + PUT_32BIT_LSB_FIRST(link->data1 + 16, link->nonscroll->topicpos); + else + PUT_32BIT_LSB_FIRST(link->data1 + 16, 0xFFFFFFFFL); + if (link->scroll) + PUT_32BIT_LSB_FIRST(link->data1 + 20, link->scroll->topicpos); + else + PUT_32BIT_LSB_FIRST(link->data1 + 20, 0xFFFFFFFFL); + if (link->nexttopic) + PUT_32BIT_LSB_FIRST(link->data1 + 24, link->nexttopic->topicpos); + else + PUT_32BIT_LSB_FIRST(link->data1 + 24, 0xFFFFFFFFL); + } + + /* + * Having done all _that_, we're now finally ready to go + * through and create the |TOPIC section in its final form. + */ + + h->lasttopiclink = -1L; + h->lasttopicstart = 0L; + f = whlp_new_file(h, "|TOPIC"); + h->topicblock_remaining = -1; + whlp_topicsect_write(h, f, NULL, 0, 0); /* start the first block */ + for (i = 0; i < nlinks; i++) { + unsigned char header[21]; + struct topiclink *otherlink; + + link = index234(h->text, i); + + /* + * Create and output the TOPICLINK header. + */ + PUT_32BIT_LSB_FIRST(header + 0, 21 + link->len1 + link->len2); + PUT_32BIT_LSB_FIRST(header + 4, link->len2); + if (i == 0) { + PUT_32BIT_LSB_FIRST(header + 8, 0xFFFFFFFFL); + } else { + otherlink = index234(h->text, i-1); + PUT_32BIT_LSB_FIRST(header + 8, otherlink->topicpos); + } + if (i+1 >= nlinks) { + PUT_32BIT_LSB_FIRST(header + 12, 0xFFFFFFFFL); + } else { + otherlink = index234(h->text, i+1); + PUT_32BIT_LSB_FIRST(header + 12, otherlink->topicpos); + } + PUT_32BIT_LSB_FIRST(header + 16, 21 + link->len1); + header[20] = link->recordtype; + whlp_topicsect_write(h, f, header, 21, 21 + link->len1); + + /* + * Fill in the `first topiclink' pointer in the block + * header if appropriate. (We do this _after_ outputting + * the header because then we can be sure we'll be in the + * same block as we think we are.) + */ + if (h->firsttopiclink_offset > 0) { + whlp_file_seek(f, h->firsttopiclink_offset, 0); + whlp_file_add_long(f, link->topicpos); + h->firsttopiclink_offset = 0; + whlp_file_seek(f, 0, 2); + } + + /* + * Update the `last topiclink', and possibly `last + * topicstart', pointers. + */ + h->lasttopiclink = link->topicpos; + if (link->recordtype == 2) + h->lasttopicstart = link->topicpos; + + + /* + * Output LinkData1 and LinkData2. + */ + whlp_topicsect_write(h, f, link->data1, link->len1, link->len1); + whlp_topicsect_write(h, f, link->data2, link->len2, 0); + + /* + * Output the block header. + */ + + link = index234(h->text, i); + + } +} + +/* ---------------------------------------------------------------------- + * Manage the index sections (|KWDATA, |KWMAP, |KWBTREE). + */ + +void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic) +{ + struct indexrec *idx = mknew(struct indexrec); + + idx->term = dupstr(index); + idx->topic = topic; + /* + * If this reference is already in the tree, just silently drop + * the duplicate. + */ + if (add234(h->index, idx) != idx) { + sfree(idx->term); + sfree(idx); + } +} + +static void whlp_build_kwdata(WHLP h) +{ + struct file *f; + int i; + struct indexrec *first, *next; + + f = whlp_new_file(h, "|KWDATA"); + + /* + * Go through the index B-tree, condensing all sequences of + * records with the same term into a single one with a valid + * (count,offset) pair, and building up the KWDATA section. + */ + i = 0; + while ( (first = index234(h->index, i)) != NULL) { + first->count = 1; + first->offset = whlp_file_offset(f); + whlp_file_add_long(f, first->topic->link->topicoffset); + i++; + while ( (next = index234(h->index, i)) != NULL && + !strcmp(first->term, next->term)) { + /* + * The next index record has the same term. Fold it + * into this one and remove from the tree. + */ + whlp_file_add_long(f, next->topic->link->topicoffset); + first->count++; + delpos234(h->index, i); + sfree(next->term); + sfree(next); + } + } + + /* + * Now we should have `index' in a form that's ready to + * construct |KWBTREE. So we can return. + */ +} + +/* ---------------------------------------------------------------------- + * Standard chunks of data for the |SYSTEM and |FONT sections. + */ + +static void whlp_system_record(struct file *f, int id, + const void *data, int length) +{ + whlp_file_add_short(f, id); + whlp_file_add_short(f, length); + whlp_file_add(f, data, length); +} + +static void whlp_standard_systemsection(struct file *f) +{ + const char lcid[] = { 0, 0, 0, 0, 0, 0, 0, 0, 9, 4 }; + const char charset[] = { 0, 0, 0, 2, 0 }; + + whlp_file_add_short(f, 0x36C); /* magic number */ + whlp_file_add_short(f, 33); /* minor version: HCW 4.00 Win95+ */ + whlp_file_add_short(f, 1); /* major version */ + whlp_file_add_long(f, time(NULL)); /* generation date */ + whlp_file_add_short(f, 0); /* flags=0 means no compression */ + + /* + * Add some magic locale identifier information. (We ought to + * find out something about what all this means; see the TODO + * list at the top of the file.) + */ + whlp_system_record(f, 9, lcid, sizeof(lcid)); + whlp_system_record(f, 11, charset, sizeof(charset)); +} + +void whlp_title(WHLP h, char *title) +{ + whlp_system_record(h->systemfile, 1, title, 1+strlen(title)); +} + +void whlp_copyright(WHLP h, char *copyright) +{ + whlp_system_record(h->systemfile, 2, copyright, 1+strlen(copyright)); +} + +void whlp_start_macro(WHLP h, char *macro) +{ + whlp_system_record(h->systemfile, 4, macro, 1+strlen(macro)); +} + +void whlp_primary_topic(WHLP h, WHLP_TOPIC t) +{ + h->ptopic = t; +} + +static void whlp_do_primary_topic(WHLP h) +{ + unsigned char firsttopic[4]; + PUT_32BIT_LSB_FIRST(firsttopic, h->ptopic->link->topicoffset); + whlp_system_record(h->systemfile, 3, firsttopic, sizeof(firsttopic)); +} + +int whlp_create_font(WHLP h, char *font, int family, int halfpoints, + int rendition, int r, int g, int b) +{ + char *fontname = dupstr(font); + struct fontdesc *fontdesc; + int index; + + font = add234(h->fontnames, fontname); + if (font != fontname) { + /* The font name was already present. Free the new copy. */ + sfree(fontname); + } + + fontdesc = mknew(struct fontdesc); + fontdesc->font = font; + fontdesc->family = family; + fontdesc->halfpoints = halfpoints; + fontdesc->rendition = rendition; + fontdesc->r = r; + fontdesc->g = g; + fontdesc->b = b; + + index = count234(h->fontdescs); + addpos234(h->fontdescs, fontdesc, index); + return index; +} + +static void whlp_make_fontsection(WHLP h, struct file *f) +{ + int i; + char *fontname; + struct fontdesc *fontdesc; + + /* + * Header block: number of font names, number of font + * descriptors, offset to font names, and offset to font + * descriptors. + */ + whlp_file_add_short(f, count234(h->fontnames)); + whlp_file_add_short(f, count234(h->fontdescs)); + whlp_file_add_short(f, 8); + whlp_file_add_short(f, 8 + 32 * count234(h->fontnames)); + + /* + * Font names. + */ + for (i = 0; (fontname = index234(h->fontnames, i)) != NULL; i++) { + char data[32]; + memset(data, i, sizeof(data)); + strncpy(data, fontname, sizeof(data)); + whlp_file_add(f, data, sizeof(data)); + } + + /* + * Font descriptors. + */ + for (i = 0; (fontdesc = index234(h->fontdescs, i)) != NULL; i++) { + int fontpos; + void *ret; + + ret = findpos234(h->fontnames, fontdesc->font, NULL, &fontpos); + assert(ret != NULL); + + whlp_file_add_char(f, fontdesc->rendition); + whlp_file_add_char(f, fontdesc->halfpoints); + whlp_file_add_char(f, fontdesc->family); + whlp_file_add_short(f, fontpos); + /* Foreground RGB */ + whlp_file_add_char(f, fontdesc->r); + whlp_file_add_char(f, fontdesc->g); + whlp_file_add_char(f, fontdesc->b); + /* Background RGB is apparently unused and always set to zero */ + whlp_file_add_char(f, 0); + whlp_file_add_char(f, 0); + whlp_file_add_char(f, 0); + } + +} + +/* ---------------------------------------------------------------------- + * Routines to manage a B-tree type file. + */ + +static void whlp_make_btree(struct file *f, int flags, int pagesize, + char *dataformat, tree234 *tree, + struct file *map, + bt_index_fn indexfn, bt_leaf_fn leaffn) +{ + void **page_elements = NULL; + int npages = 0, pagessize = 0; + int npages_this_level, nentries, nlevels; + int total_leaf_entries; + char btdata[MAX_PAGE_SIZE]; + int btlen; + int page_start, fixups_offset, unused_bytes; + void *element; + int index; + + assert(pagesize <= MAX_PAGE_SIZE); + + /* + * Start with the B-tree header. We'll have to come back and + * fill in a few bits later. + */ + whlp_file_add_short(f, 0x293B); /* magic number */ + whlp_file_add_short(f, flags); + whlp_file_add_short(f, pagesize); + { + char data[16]; + memset(data, 0, sizeof(data)); + assert(strlen(dataformat) <= sizeof(data)); + memcpy(data, dataformat, strlen(dataformat)); + whlp_file_add(f, data, sizeof(data)); + } + whlp_file_add_short(f, 0); /* must-be-zero */ + fixups_offset = whlp_file_offset(f); + whlp_file_add_short(f, 0); /* page splits; fix up later */ + whlp_file_add_short(f, 0); /* root page index; fix up later */ + whlp_file_add_short(f, -1); /* must-be-minus-one */ + whlp_file_add_short(f, 0); /* total number of pages; fix later */ + whlp_file_add_short(f, 0); /* number of levels; fix later */ + whlp_file_add_long(f, count234(tree));/* total B-tree entries */ + + /* + * If we have a map section, leave space at the start for its + * element count. + */ + if (map) { + whlp_file_add_short(map, 0); + } + + /* + * Now create the leaf pages. + */ + index = 0; + + npages_this_level = 0; + total_leaf_entries = 0; + + element = index234(tree, index); + while (element) { + /* + * Make a new leaf page. + */ + npages_this_level++; + if (npages >= pagessize) { + pagessize = npages + 32; + page_elements = resize(page_elements, pagessize); + } + page_elements[npages++] = element; + + /* + * Leave space in the leaf page for the header. We'll + * come back and add it later. + */ + page_start = whlp_file_offset(f); + whlp_file_add(f, "12345678", 8); + unused_bytes = pagesize - 8; + nentries = 0; + + /* + * Now add leaf entries until we run out of room, or out of + * elements. + */ + while (element) { + btlen = leaffn(element, btdata); + if (btlen > unused_bytes) + break; + whlp_file_add(f, btdata, btlen); + unused_bytes -= btlen; + nentries++; + index++; + element = index234(tree, index); + } + + /* + * Now add the unused bytes, and then go back and put + * in the header. + */ + whlp_file_fill(f, unused_bytes); + whlp_file_seek(f, page_start, 0); + whlp_file_add_short(f, unused_bytes); + whlp_file_add_short(f, nentries); + /* Previous-page indicator will automatically go to -1 when + * absent. */ + whlp_file_add_short(f, npages-2); + /* Next-page indicator must be -1 if we're at the end. */ + if (!element) + whlp_file_add_short(f, -1); + else + whlp_file_add_short(f, npages); + whlp_file_seek(f, 0, 2); + + /* + * If we have a map section, add a map entry. + */ + if (map) { + whlp_file_add_long(map, total_leaf_entries); + whlp_file_add_short(map, npages_this_level-1); + } + total_leaf_entries += nentries; + } + + /* + * If we have a map section, write the total number of map + * entries into it. + */ + if (map) { + whlp_file_seek(map, 0, 0); + whlp_file_add_short(map, npages_this_level); + whlp_file_seek(map, 0, 2); + } + + /* + * Now create further levels until we're down to one page. + */ + nlevels = 1; + while (npages_this_level > 1) { + int first = npages - npages_this_level; + int last = npages - 1; + int current; + + nlevels++; + npages_this_level = 0; + + current = first; + while (current <= last) { + /* + * Make a new index page. + */ + npages_this_level++; + if (npages >= pagessize) { + pagessize = npages + 32; + page_elements = resize(page_elements, pagessize); + } + page_elements[npages++] = page_elements[current]; + + /* + * Leave space for some of the header, but we can put + * in the PreviousPage link already. + */ + page_start = whlp_file_offset(f); + whlp_file_add(f, "1234", 4); + whlp_file_add_short(f, current); + unused_bytes = pagesize - 6; + + /* + * Now add index entries until we run out of either + * space or pages. + */ + current++; + nentries = 0; + while (current <= last) { + btlen = indexfn(page_elements[current], btdata); + if (btlen + 2 > unused_bytes) + break; + whlp_file_add(f, btdata, btlen); + whlp_file_add_short(f, current); + unused_bytes -= btlen+2; + nentries++; + current++; + } + + /* + * Now add the unused bytes, and then go back and put + * in the header. + */ + whlp_file_fill(f, unused_bytes); + whlp_file_seek(f, page_start, 0); + whlp_file_add_short(f, unused_bytes); + whlp_file_add_short(f, nentries); + whlp_file_seek(f, 0, 2); + } + } + + /* + * Now we have all our pages ready, and we know where our root + * page is. Fix up the main B-tree header. + */ + whlp_file_seek(f, fixups_offset, 0); + /* Creation of every page requires a split unless it's the first in + * a new level. Hence, page splits equals pages minus levels. */ + whlp_file_add_short(f, npages - nlevels); + whlp_file_add_short(f, npages-1); /* root page index */ + whlp_file_add_short(f, -1); /* must-be-minus-one */ + whlp_file_add_short(f, npages); /* total number of pages */ + whlp_file_add_short(f, nlevels); /* number of levels */ + + /* Just for tidiness, seek to the end of the file :-) */ + whlp_file_seek(f, 0, 2); + + /* Clean up. */ + sfree(page_elements); +} + + +/* ---------------------------------------------------------------------- + * Routines to manage the `internal file' structure. + */ + +static struct file *whlp_new_file(WHLP h, char *name) +{ + struct file *f; + f = mknew(struct file); + f->data = NULL; + f->pos = f->len = f->size = 0; + if (name) { + f->name = dupstr(name); + add234(h->files, f); + } else { + f->name = NULL; + } + return f; +} + +static void whlp_free_file(struct file *f) +{ + sfree(f->data); + sfree(f->name); /* may be NULL */ + sfree(f); +} + +static void whlp_file_add(struct file *f, const void *data, int len) +{ + if (f->pos + len > f->size) { + f->size = f->pos + len + 1024; + f->data = resize(f->data, f->size); + } + memcpy(f->data + f->pos, data, len); + f->pos += len; + if (f->len < f->pos) + f->len = f->pos; +} + +static void whlp_file_add_char(struct file *f, int data) +{ + unsigned char s; + s = data & 0xFF; + whlp_file_add(f, &s, 1); +} + +static void whlp_file_add_short(struct file *f, int data) +{ + unsigned char s[2]; + PUT_16BIT_LSB_FIRST(s, data); + whlp_file_add(f, s, 2); +} + +static void whlp_file_add_long(struct file *f, int data) +{ + unsigned char s[4]; + PUT_32BIT_LSB_FIRST(s, data); + whlp_file_add(f, s, 4); +} + +static void whlp_file_fill(struct file *f, int len) +{ + if (f->pos + len > f->size) { + f->size = f->pos + len + 1024; + f->data = resize(f->data, f->size); + } + memset(f->data + f->pos, 0, len); + f->pos += len; + if (f->len < f->pos) + f->len = f->pos; +} + +static void whlp_file_seek(struct file *f, int pos, int whence) +{ + f->pos = (whence == 0 ? 0 : whence == 1 ? f->pos : f->len) + pos; +} + +static int whlp_file_offset(struct file *f) +{ + return f->pos; +} + +/* ---------------------------------------------------------------------- + * Open and close routines; final wrapper around everything. + */ + +WHLP whlp_new(void) +{ + WHLP ret; + struct file *f; + + ret = mknew(struct WHLP_tag); + + /* + * Internal B-trees. + */ + ret->files = newtree234(filecmp); + ret->pre_contexts = newtree234(NULL); + ret->contexts = newtree234(ctxcmp); + ret->titles = newtree234(ttlcmp); + ret->text = newtree234(NULL); + ret->index = newtree234(idxcmp); + ret->tabstops = newtree234(tabcmp); + ret->fontnames = newtree234(fontcmp); + ret->fontdescs = newtree234(NULL); + + /* + * Some standard files. + */ + f = whlp_new_file(ret, "|CTXOMAP"); + whlp_file_add_short(f, 0); /* dummy section */ + f = whlp_new_file(ret, "|SYSTEM"); + whlp_standard_systemsection(f); + ret->systemfile = f; + + /* + * Other variables. + */ + ret->prevtopic = NULL; + ret->ncontexts = 0; + ret->link = NULL; + + return ret; +} + +void whlp_close(WHLP h, char *filename) +{ + FILE *fp; + int filecount, offset, index, filelen; + struct file *file, *map, *md; + context *ctx; + int has_index; + + /* + * Lay out the topic section. + */ + whlp_topic_layout(h); + + /* + * Finish off the system section. + */ + whlp_do_primary_topic(h); + + /* + * Assemble the font section. + */ + file = whlp_new_file(h, "|FONT"); + whlp_make_fontsection(h, file); + + /* + * Set up the index. + */ + has_index = (count234(h->index) != 0); + if (has_index) + whlp_build_kwdata(h); + + /* + * Set up the `titles' B-tree for the |TTLBTREE section. + */ + for (index = 0; (ctx = index234(h->contexts, index)) != NULL; index++) + add234(h->titles, ctx); + + /* + * Construct the various B-trees. + */ + file = whlp_new_file(h, "|CONTEXT"); + whlp_make_btree(file, 0x0002, 0x0800, "L4", + h->contexts, NULL, ctxindex, ctxleaf); + + file = whlp_new_file(h, "|TTLBTREE"); + whlp_make_btree(file, 0x0002, 0x0800, "Lz", + h->titles, NULL, ttlindex, ttlleaf); + + if (has_index) { + file = whlp_new_file(h, "|KWBTREE"); + map = whlp_new_file(h, "|KWMAP"); + whlp_make_btree(file, 0x0002, 0x0800, "F24", + h->index, map, idxindex, idxleaf); + } + + /* + * Open the output file. + */ + fp = fopen(filename, "wb"); + if (!fp) { + whlp_abandon(h); + return; + } + + /* + * Work out all the file offsets. + */ + filecount = count234(h->files); + offset = 16; /* just after header */ + for (index = 0; index < filecount; index++) { + file = index234(h->files, index); + file->fileoffset = offset; + offset += 9 + file->len; /* 9 is size of file header */ + } + /* Now `offset' holds what will be the offset of the master directory. */ + + md = whlp_new_file(h, NULL); /* master directory file */ + whlp_make_btree(md, 0x0402, 0x0400, "z4", + h->files, NULL, fileindex, fileleaf); + + filelen = offset + 9 + md->len; + + /* + * Write out the file header. + */ + { + unsigned char header[16]; + PUT_32BIT_LSB_FIRST(header+0, 0x00035F3FL); /* magic */ + PUT_32BIT_LSB_FIRST(header+4, offset); /* offset to directory */ + PUT_32BIT_LSB_FIRST(header+8, 0xFFFFFFFFL); /* first free block */ + PUT_32BIT_LSB_FIRST(header+12, filelen); /* total file length */ + fwrite(header, 1, 16, fp); + } + + /* + * Now write out each file. + */ + for (index = 0; index <= filecount; index++) { + int used, reserved; + unsigned char header[9]; + + if (index == filecount) + file = md; /* master directory comes last */ + else + file = index234(h->files, index); + + used = file->len; + reserved = used + 9; + + /* File header. */ + PUT_32BIT_LSB_FIRST(header+0, reserved); + PUT_32BIT_LSB_FIRST(header+4, used); + header[8] = 0; /* flags */ + fwrite(header, 1, 9, fp); + + /* File data. */ + fwrite(file->data, 1, file->len, fp); + } + + fclose(fp); + + whlp_free_file(md); + + whlp_abandon(h); /* now free everything */ +} + +void whlp_abandon(WHLP h) +{ + struct file *f; + struct indexrec *idx; + struct topiclink *link; + struct fontdesc *fontdesc; + char *fontname; + context *ctx; + + /* Get rid of any lingering tab stops. */ + whlp_para_reset(h); + + /* Delete the (now empty) tabstops tree. */ + freetree234(h->tabstops); + + /* Delete the index tree and all its entries. */ + while ( (idx = index234(h->index, 0)) != NULL) { + delpos234(h->index, 0); + sfree(idx->term); + sfree(idx); + } + freetree234(h->index); + + /* Delete the text tree and all its topiclinks. */ + while ( (link = index234(h->text, 0)) != NULL) { + delpos234(h->text, 0); + sfree(link->data1); /* may be NULL */ + sfree(link->data2); /* may be NULL */ + sfree(link); + } + freetree234(h->text); + + /* Delete the fontdescs tree and all its entries. */ + while ( (fontdesc = index234(h->fontdescs, 0)) != NULL) { + delpos234(h->fontdescs, 0); + sfree(fontdesc); + } + freetree234(h->fontdescs); + + /* Delete the fontnames tree and all its entries. */ + while ( (fontname = index234(h->fontnames, 0)) != NULL) { + delpos234(h->fontnames, 0); + sfree(fontname); + } + freetree234(h->fontnames); + + /* There might be an unclosed paragraph in h->link. */ + if (h->link) + sfree(h->link); /* if so it won't have data1 or data2 */ + + /* + * `titles' contains copies of the `contexts' entries, so we + * don't need to free them here. + */ + freetree234(h->titles); + + /* + * `contexts' and `pre_contexts' _both_ contain contexts that + * need freeing. (pre_contexts shouldn't contain any, unless + * the help generation was abandoned half-way through.) + */ + while ( (ctx = index234(h->pre_contexts, 0)) != NULL) { + delpos234(h->index, 0); + sfree(ctx->name); + sfree(ctx->title); + sfree(ctx); + } + freetree234(h->pre_contexts); + while ( (ctx = index234(h->contexts, 0)) != NULL) { + delpos234(h->contexts, 0); + sfree(ctx->name); + sfree(ctx->title); + sfree(ctx); + } + freetree234(h->contexts); + + /* + * Free all the internal files. + */ + while ( (f = index234(h->files, 0)) != NULL ) { + delpos234(h->files, 0); + whlp_free_file(f); + } + freetree234(h->files); + + sfree(h); +} + +#ifdef TESTMODE + +int main(void) +{ + WHLP h; + WHLP_TOPIC t1, t2, t3; + char *e; + char mymacro[100]; + + h = whlp_new(); + + whlp_title(h, "Test Help File"); + whlp_copyright(h, "This manual is copyright \251 2001 Simon Tatham." + " All rights reversed."); + whlp_start_macro(h, "CB(\"btn_about\",\"&About\",\"About()\")"); + whlp_start_macro(h, "CB(\"btn_up\",\"&Up\",\"Contents()\")"); + whlp_start_macro(h, "BrowseButtons()"); + + whlp_create_font(h, "Arial", WHLP_FONTFAM_SANS, 30, + 0, 0, 0, 0); + whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_STRIKEOUT, 0, 0, 0); + whlp_create_font(h, "Times New Roman", WHLP_FONTFAM_SERIF, 24, + WHLP_FONT_ITALIC, 0, 0, 0); + whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24, + 0, 0, 0, 0); + + t1 = whlp_register_topic(h, "foobar", &e); + assert(t1 != NULL); + t2 = whlp_register_topic(h, "M359HPEHGW", &e); + assert(t2 != NULL); + t3 = whlp_register_topic(h, "Y5VQEXZQVJ", &e); + assert(t3 == NULL && !strcmp(e, "M359HPEHGW")); + t3 = whlp_register_topic(h, NULL, NULL); + assert(t3 != NULL); + + whlp_primary_topic(h, t2); + + whlp_prepare(h); + + whlp_begin_topic(h, t1, "First Topic", "DB(\"btn_up\")", NULL); + + whlp_begin_para(h, WHLP_PARA_NONSCROLL); + whlp_set_font(h, 0); + whlp_text(h, "Foobar"); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "This is a silly paragraph with "); + whlp_set_font(h, 3); + whlp_text(h, "code"); + whlp_set_font(h, 1); + whlp_text(h, " in it."); + whlp_end_para(h); + + whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "This second, equally silly, paragraph has "); + whlp_set_font(h, 2); + whlp_text(h, "emphasis"); + whlp_set_font(h, 1); + whlp_text(h, " just to prove we can do it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Now I'm going to waffle on indefinitely, in a vague attempt" + " to make some wrapping happen, and also to make the topicblock" + " go across its boundaries. This is going to take a fair amount" + " of text, so I'll just have to cheat and c'n'p a lot of it."); + whlp_end_para(h); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Have a "); + whlp_start_hyperlink(h, t2); + whlp_text(h, "hyperlink"); + whlp_end_hyperlink(h); + whlp_text(h, " to another topic."); + whlp_end_para(h); + + sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", + whlp_topic_id(t3)); + + whlp_begin_topic(h, t2, "Second Topic", mymacro, NULL); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "This topic contains no non-scrolling region. I would" + " illustrate this with a ludicrously long paragraph, but that" + " would get very tedious very quickly. Instead I'll just waffle" + " on pointlessly for a little bit and then shut up."); + whlp_end_para(h); + + whlp_set_tabstop(h, 36, WHLP_ALIGN_LEFT); + whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 36); + whlp_para_attr(h, WHLP_PARA_FIRSTLINEINDENT, -36); + whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "\225"); /* bullet */ + whlp_tab(h); + whlp_text(h, "This is a paragraph with a bullet. With any luck it should" + " work exactly like it used to in the old NASM help file."); + whlp_end_para(h); + + whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT); + whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE); + whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT); + whlp_para_attr(h, WHLP_PARA_SPACEABOVE, 12); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Ooh:"); whlp_tab(h); + whlp_text(h, "Right?"); whlp_tab(h); + whlp_text(h, "Centre?"); whlp_tab(h); + whlp_text(h, "Left?"); + whlp_end_para(h); + + whlp_set_tabstop(h, 128, WHLP_ALIGN_RIGHT); + whlp_set_tabstop(h, 256, WHLP_ALIGN_CENTRE); + whlp_set_tabstop(h, 384, WHLP_ALIGN_LEFT); + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "Aah:"); whlp_tab(h); + whlp_text(h, "R?"); whlp_tab(h); + whlp_text(h, "C?"); whlp_tab(h); + whlp_text(h, "L?"); + whlp_end_para(h); + + sprintf(mymacro, "CBB(\"btn_up\",\"JI(`',`%s')\");EB(\"btn_up\")", + whlp_topic_id(t1)); + + whlp_begin_topic(h, t3, "Third Topic", mymacro, NULL); + + whlp_begin_para(h, WHLP_PARA_SCROLL); + whlp_set_font(h, 1); + whlp_text(h, "This third topic is almost as boring as the first. Woo!"); + whlp_end_para(h); + + /* + * Browse sequence. + */ + whlp_browse_link(h, t1, t2); + whlp_browse_link(h, t2, t3); + + /* + * Index terms. + */ + whlp_index_term(h, "foobarbaz", t1); + whlp_index_term(h, "foobarbaz", t2); + whlp_index_term(h, "foobarbaz", t3); + whlp_index_term(h, "foobar", t1); + whlp_index_term(h, "foobar", t2); + whlp_index_term(h, "foobaz", t1); + whlp_index_term(h, "foobaz", t3); + whlp_index_term(h, "barbaz", t2); + whlp_index_term(h, "barbaz", t3); + whlp_index_term(h, "foo", t1); + whlp_index_term(h, "bar", t2); + whlp_index_term(h, "baz", t3); + + whlp_close(h, "test.hlp"); + return 0; +} + +#endif diff --git a/winhelp.h b/winhelp.h new file mode 100644 index 0000000..d0c81d5 --- /dev/null +++ b/winhelp.h @@ -0,0 +1,168 @@ +/* + * winhelp.h header file for winhelp.c + */ + +typedef struct WHLP_tag *WHLP; + +typedef struct WHLP_TOPIC_tag *WHLP_TOPIC; + +/* + * Initialise a new WHlp context and begin accumulating data in it. + */ +WHLP whlp_new(void); + +/* + * Close a WHlp context and write out the help file it has created. + */ +void whlp_close(WHLP h, char *filename); + +/* + * Abandon and free a WHlp context without writing out anything. + */ +void whlp_abandon(WHLP h); + +/* + * Specify the title and copyright notice of a help file. Also + * specify Help macros to be run on loading. + */ +void whlp_title(WHLP h, char *title); +void whlp_copyright(WHLP h, char *copyright); +void whlp_start_macro(WHLP h, char *macro); + +/* + * Register a help topic. Irritatingly, due to weird phase-order + * issues with the whole file format, you have to register all your + * topics _before_ actually outputting your text. This seems likely + * to require two passes over the source document. + * + * If you want to specify a particular context string (for + * reference from other programs, to provide context-sensitive + * help), you can supply it here. Otherwise, just pass NULL and a + * nondescript one will be allocated automatically. + * + * If you specify two context strings which clash under the Windows + * help file hash algorithm, this function will return NULL and + * provide a pointer to the other context string that this one + * clashed with, and you must tell your user to fix the clash. + * Sadly this is the only way to do it; despite HLP files having a + * perfectly good method of mapping arbitrary strings to things, + * they didn't see fit to use that method for help contexts, so + * instead they hash the context names and expect the hashes to be + * unique. Sigh. + * + * On success (i.e. in any circumstance other than a hash clash), a + * valid WHLP_TOPIC is returned for later use. + */ +WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash); + +/* + * Link two topics together in a browse sequence. Automatically + * takes care of the forward and reverse links. + */ +void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after); + +/* + * After calling whlp_register_topic for all topics, you should + * call this, which will sort out all loose ends and allocate + * context names for all anonymous topics. Then you can start + * writing actual text. + */ +void whlp_prepare(WHLP h); + +/* + * Create a link from an index term to a topic. + */ +void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic); + +/* + * Call this if you need the id of a topic and you don't already + * know it (for example, if whlp_prepare has allocated it + * anonymously for you). You might need this, for example, in + * creating macros for button-bar bindings. + * + * The string returned will be freed when the WHLP context is + * closed. You should not free it yourself. + * + * Do not call this before calling whlp_prepare(). + */ +char *whlp_topic_id(WHLP_TOPIC topic); + +/* + * Call this to specify which help topic will be the first one + * displayed when the help file is loaded. + */ +void whlp_primary_topic(WHLP h, WHLP_TOPIC topic); + +/* + * Call this when about to begin writing out the text for a topic. + * + * Any additional arguments are Help macros, terminated with a + * NULL. So the minimum call sequence is + * + * whlp_begin_topic(helpfile, mytopic, "Title", NULL); + */ +void whlp_begin_topic(WHLP h, WHLP_TOPIC topic, char *title, ...); + +/* + * Call this to set up a font descriptor. You supply the font name, + * the font size (in half-points), the graphic rendition flags + * (bold, italic etc), and the general font family (for Windows to + * select a fallback font if yours is unavailable). You can also + * specify a foreground colour for the text (but unfortunately not + * a background). + * + * Font descriptors are identified in whlp_set_font() by small + * integers, which are allocated from 0 upwards in the order you + * call whlp_create_font(). For your convenience, + * whlp_create_font() returns the integer allocated to each font + * descriptor you create, but you could work this out just as + * easily yourself by counting. + */ +enum { + WHLP_FONT_BOLD = 1, + WHLP_FONT_ITALIC = 2, + WHLP_FONT_UNDERLINE = 4, + WHLP_FONT_STRIKEOUT = 8, + WHLP_FONT_DOUBLEUND = 16, + WHLP_FONT_SMALLCAPS = 32 +}; +enum { + WHLP_FONTFAM_FIXED = 1, + WHLP_FONTFAM_SERIF = 2, + WHLP_FONTFAM_SANS = 3, + WHLP_FONTFAM_SCRIPT = 4, + WHLP_FONTFAM_DECOR = 5 +}; +int whlp_create_font(WHLP h, char *font, int family, int halfpoints, + int rendition, int r, int g, int b); + +/* + * Routines to output paragraphs and actual text (at last). + * + * You should start by calling whlp_para_attr() to set any + * paragraph attributes that differ from the standard settings. + * Next call whlp_begin_para() to start the paragraph. Then call + * the various in-paragraph functions until you have output the + * whole paragraph, and finally call whlp_end_para() to finish it + * off. + */ +enum { + WHLP_PARA_SPACEABOVE=1, WHLP_PARA_SPACEBELOW, WHLP_PARA_SPACELINES, + WHLP_PARA_LEFTINDENT, WHLP_PARA_RIGHTINDENT, WHLP_PARA_FIRSTLINEINDENT, + WHLP_PARA_ALIGNMENT +}; +enum { + WHLP_ALIGN_LEFT, WHLP_ALIGN_RIGHT, WHLP_ALIGN_CENTRE +}; +enum { + WHLP_PARA_SCROLL, WHLP_PARA_NONSCROLL +}; +void whlp_para_attr(WHLP h, int attr_id, int attr_param); +void whlp_set_tabstop(WHLP h, int tabstop, int alignment); +void whlp_begin_para(WHLP h, int para_type); +void whlp_end_para(WHLP h); +void whlp_set_font(WHLP h, int font_id); +void whlp_text(WHLP h, char *text); +void whlp_start_hyperlink(WHLP h, WHLP_TOPIC target); +void whlp_end_hyperlink(WHLP h); +void whlp_tab(WHLP h); -- 2.11.0