X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/04781c84bdab91b4a1a05115be75830841359bc9..a10f193fa848901b2f1eb0ea9553fcca968c62f5:/bk_html.c

diff --git a/bk_html.c b/bk_html.c
index 5cd145e..41a9f76 100644
--- a/bk_html.c
+++ b/bk_html.c
@@ -10,31 +10,6 @@
  *    sensible. Perhaps for the topmost section in the file, no
  *    fragment should be used? (Though it should probably still be
  *    _there_ even if unused.)
- * 
- *  - new configurability:
- *     * a few new things explicitly labelled as `FIXME:
- * 	 configurable' or similar.
- *     * HTML flavour.
- *     * Some means of specifying the distinction between
- * 	 restrict-charset and output-charset. It seems to me that
- * 	 `html-charset' is output-charset, and that
- * 	 restrict-charset usually wants to be either output-charset
- * 	 or UTF-8 (the latter indicating that any Unicode character
- * 	 is fair game and it will be specified using &#foo; if it
- * 	 isn't in output-charset). However, since XHTML defaults to
- * 	 UTF-8 and it's fiddly to tell it otherwise, it's just
- * 	 possible that some user may need to set restrict-charset
- * 	 to their charset of choice while leaving _output_-charset
- * 	 at UTF-8. Figure out some configuration, and apply it.
- *
- *  - test all HTML flavours and ensure they validate sensibly. Fix
- *    remaining confusion issues such as <?xml?> and obsoleteness
- *    of <a name>.
- * 
- *  - nonbreaking spaces.
- * 
- *  - free up all the data we have allocated while running this
- *    backend.
  */
 
 #include <stdio.h>
@@ -70,13 +45,18 @@ typedef struct {
     char *index_filename;
     char *template_filename;
     char *single_filename;
-    char *template_fragment;
+    char **template_fragments;
+    int ntfragments;
     char *head_end, *body_start, *body_end, *addr_start, *addr_end;
     char *body_tag, *nav_attr;
     wchar_t *author, *description;
+    wchar_t *index_text, *contents_text, *preamble_text, *title_separator;
+    wchar_t *nav_prev_text, *nav_next_text, *nav_separator;
+    wchar_t *index_main_sep, *index_multi_sep;
+    wchar_t *pre_versionid, *post_versionid;
     int restrict_charset, output_charset;
     enum {
-	HTML_3_2, HTML_4,
+	HTML_3_2, HTML_4, ISO_HTML,
 	XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT
     } htmlver;
     wchar_t *lquote, *rquote;
@@ -105,7 +85,7 @@ struct htmlsect {
     paragraph *title, *text;
     enum { NORMAL, TOP, INDEX } type;
     int contents_depth;
-    char *fragment;
+    char **fragments;
 };
 
 typedef struct {
@@ -141,7 +121,7 @@ typedef struct {
      * level.
      */
     FILE *fp;
-    int charset;
+    int charset, restrict_charset;
     charset_state cstate;
     int ver;
     enum {
@@ -171,7 +151,8 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
 			      htmlsect *sect, int depth);
 
 static htmlfile *html_new_file(htmlfilelist *list, char *filename);
-static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title);
+static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title,
+			       htmlconfig *cfg);
 
 /* Flags for html_words() flags parameter */
 #define NOTHING 0x00
@@ -190,9 +171,10 @@ static void element_attr(htmloutput *ho, char const *name, char const *value);
 static void element_attr_w(htmloutput *ho, char const *name,
 			   wchar_t const *value);
 static void html_text(htmloutput *ho, wchar_t const *str);
+static void html_text_nbsp(htmloutput *ho, wchar_t const *str);
 static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen);
 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-				     int maxlen, int quote_quotes);
+				     int maxlen, int quote_quotes, int nbsp);
 static void html_nl(htmloutput *ho);
 static void html_raw(htmloutput *ho, char *text);
 static void html_raw_as_attr(htmloutput *ho, char *text);
@@ -200,6 +182,7 @@ static void cleanup(htmloutput *ho);
 
 static void html_href(htmloutput *ho, htmlfile *thisfile,
 		      htmlfile *targetfile, char *targetfrag);
+static void html_fragment(htmloutput *ho, char const *fragment);
 
 static char *html_format(paragraph *p, char *template_string);
 static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
@@ -236,13 +219,26 @@ static htmlconfig html_configure(paragraph *source) {
     ret.contents_filename = dupstr("Contents.html");
     ret.index_filename = dupstr("IndexPage.html");
     ret.template_filename = dupstr("%n.html");
-    ret.template_fragment = dupstr("%b");
+    ret.ntfragments = 1;
+    ret.template_fragments = snewn(ret.ntfragments, char *);
+    ret.template_fragments[0] = dupstr("%b");
     ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
 	ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
     ret.author = ret.description = NULL;
-    ret.restrict_charset = CS_ASCII;
+    ret.restrict_charset = CS_UTF8;
     ret.output_charset = CS_ASCII;
     ret.htmlver = HTML_4;
+    ret.index_text = L"Index";
+    ret.contents_text = L"Contents";
+    ret.preamble_text = L"Preamble";
+    ret.title_separator = L" - ";
+    ret.nav_prev_text = L"Previous";
+    ret.nav_next_text = L"Next";
+    ret.nav_separator = L" | ";
+    ret.index_main_sep = L": ";
+    ret.index_multi_sep = L", ";
+    ret.pre_versionid = L"[";
+    ret.post_versionid = L"]";
     /*
      * Default quote characters are Unicode matched single quotes,
      * falling back to ordinary ASCII ".
@@ -274,11 +270,32 @@ static htmlconfig html_configure(paragraph *source) {
 	    if (!ustrnicmp(k, L"xhtml-", 6))
 		k++;		    /* treat `xhtml-' and `html-' the same */
 
-	    if (!ustricmp(k, L"html-charset")) {
-		char *csname = utoa_dup(uadv(k), CS_ASCII);
-		ret.restrict_charset = ret.output_charset =
-		    charset_from_localenc(csname);
-		sfree(csname);
+	    if (!ustricmp(k, L"html-restrict-charset")) {
+		ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k));
+	    } else if (!ustricmp(k, L"html-output-charset")) {
+		ret.output_charset = charset_from_ustr(&p->fpos, uadv(k));
+	    } else if (!ustricmp(k, L"html-version")) {
+		wchar_t *vername = uadv(k);
+		static const struct {
+		    const wchar_t *name;
+		    int ver;
+		} versions[] = {
+		    {L"html3.2", HTML_3_2},
+		    {L"html4", HTML_4},
+		    {L"iso-html", ISO_HTML},
+		    {L"xhtml1.0transitional", XHTML_1_0_TRANSITIONAL},
+		    {L"xhtml1.0strict", XHTML_1_0_STRICT}
+		};
+		int i;
+
+		for (i = 0; i < (int)lenof(versions); i++)
+		    if (!ustricmp(versions[i].name, vername))
+			break;
+
+		if (i == lenof(versions))
+		    error(err_htmlver, &p->fpos, vername);
+		else
+		    ret.htmlver = versions[i].ver;
 	    } else if (!ustricmp(k, L"html-single-filename")) {
 		sfree(ret.single_filename);
 		ret.single_filename = dupstr(adv(p->origkeyword));
@@ -292,8 +309,24 @@ static htmlconfig html_configure(paragraph *source) {
 		sfree(ret.template_filename);
 		ret.template_filename = dupstr(adv(p->origkeyword));
 	    } else if (!ustricmp(k, L"html-template-fragment")) {
-		sfree(ret.template_fragment);
-		ret.template_fragment = dupstr(adv(p->origkeyword));
+		char *frag = adv(p->origkeyword);
+		if (*frag) {
+		    while (ret.ntfragments--)
+			sfree(ret.template_fragments[ret.ntfragments]);
+		    sfree(ret.template_fragments);
+		    ret.template_fragments = NULL;
+		    ret.ntfragments = 0;
+		    while (*frag) {
+			ret.ntfragments++;
+			ret.template_fragments =
+			    sresize(ret.template_fragments,
+				    ret.ntfragments, char *);
+			ret.template_fragments[ret.ntfragments-1] =
+			    dupstr(frag);
+			frag = adv(frag);
+		    }
+		} else
+		    error(err_cfginsufarg, &p->fpos, p->origkeyword, 1);
 	    } else if (!ustricmp(k, L"html-chapter-numeric")) {
 		ret.achapter.just_numbers = utob(uadv(k));
 	    } else if (!ustricmp(k, L"html-chapter-suffix")) {
@@ -387,6 +420,28 @@ static htmlconfig html_configure(paragraph *source) {
 		ret.leaf_contains_contents = utob(uadv(k));
 	    } else if (!ustricmp(k, L"html-leaf-smallest-contents")) {
 		ret.leaf_smallest_contents = utoi(uadv(k));
+	    } else if (!ustricmp(k, L"html-index-text")) {
+		ret.index_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-contents-text")) {
+		ret.contents_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-preamble-text")) {
+		ret.preamble_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-title-separator")) {
+		ret.title_separator = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-prev-text")) {
+		ret.nav_prev_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-next-text")) {
+		ret.nav_next_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-separator")) {
+		ret.nav_separator = uadv(k);
+	    } else if (!ustricmp(k, L"html-index-main-separator")) {
+		ret.index_main_sep = uadv(k);
+	    } else if (!ustricmp(k, L"html-index-multiple-separator")) {
+		ret.index_multi_sep = uadv(k);
+	    } else if (!ustricmp(k, L"html-pre-versionid")) {
+		ret.pre_versionid = uadv(k);
+	    } else if (!ustricmp(k, L"html-post-versionid")) {
+		ret.post_versionid = uadv(k);
 	    }
 	}
     }
@@ -423,11 +478,13 @@ paragraph *html_config_filename(char *filename)
 }
 
 void html_backend(paragraph *sourceform, keywordlist *keywords,
-		  indexdata *idx, void *unused) {
+		  indexdata *idx, void *unused)
+{
     paragraph *p;
     htmlconfig conf;
     htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
     htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
+    int has_index;
 
     IGNORE(unused);
 
@@ -453,20 +510,19 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
      * source form but needs to be consistently mentioned in
      * contents links.
      * 
-     * While we're here, we'll also invent the HTML fragment name
+     * While we're here, we'll also invent the HTML fragment name(s)
      * for each section.
      */
     {
 	htmlsect *topsect, *sect;
 	int d;
 
-	topsect = html_new_sect(&sects, p);
+	topsect = html_new_sect(&sects, NULL, &conf);
 	topsect->type = TOP;
 	topsect->title = NULL;
 	topsect->text = sourceform;
 	topsect->contents_depth = contents_depth(conf, 0);
 	html_file_section(&conf, &files, topsect, -1);
-	topsect->fragment = NULL;
 
 	for (p = sourceform; p; p = p->next)
 	    if (is_heading_type(p->type)) {
@@ -477,7 +533,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		    continue;
 		}
 
-		sect = html_new_sect(&sects, p);
+		sect = html_new_sect(&sects, p, &conf);
 		sect->text = p->next;
 
 		sect->contents_depth = contents_depth(conf, d+1) - (d+1);
@@ -491,21 +547,32 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
 		html_file_section(&conf, &files, sect, d);
 
-		sect->fragment = html_format(p, conf.template_fragment);
-		sect->fragment = html_sanitise_fragment(&files, sect->file,
-							sect->fragment);
+		{
+		    int i;
+		    for (i=0; i < conf.ntfragments; i++) {
+			sect->fragments[i] =
+			    html_format(p, conf.template_fragments[i]);
+			sect->fragments[i] =
+			    html_sanitise_fragment(&files, sect->file,
+						   sect->fragments[i]);
+		    }
+		}
 	    }
 
-	/* And the index. */
-	sect = html_new_sect(&sects, NULL);
-	sect->text = NULL;
-	sect->type = INDEX;
-	sect->parent = topsect;
-	html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
-	sect->fragment = dupstr("Index");   /* FIXME: this _can't_ be right */
-	sect->fragment = html_sanitise_fragment(&files, sect->file,
-						sect->fragment);
-	files.index = sect->file;
+	/* And the index, if we have one. */
+	has_index = (count234(idx->entries) > 0);
+	if (has_index) {
+	    sect = html_new_sect(&sects, NULL, &conf);
+	    sect->text = NULL;
+	    sect->type = INDEX;
+	    sect->parent = topsect;
+            sect->contents_depth = 0;
+	    html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
+	    sect->fragments[0] = utoa_dup(conf.index_text, CS_ASCII);
+	    sect->fragments[0] = html_sanitise_fragment(&files, sect->file,
+							sect->fragments[0]);
+	    files.index = sect->file;
+	}
     }
 
     /*
@@ -550,7 +617,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		 * won't attempt to add it to the contents or
 		 * anything weird like that).
 		 */
-		sect = html_new_sect(&nonsects, p);
+		sect = html_new_sect(&nonsects, p, &conf);
 		sect->file = parent->file;
 		sect->parent = parent;
 		p->private_data = sect;
@@ -559,11 +626,11 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		 * Fragment IDs for these paragraphs will simply be
 		 * `p' followed by an integer.
 		 */
-		sect->fragment = snewn(40, char);
-		sprintf(sect->fragment, "p%d",
+		sect->fragments[0] = snewn(40, char);
+		sprintf(sect->fragments[0], "p%d",
 			sect->file->last_fragment_number++);
-		sect->fragment = html_sanitise_fragment(&files, sect->file,
-							sect->fragment);
+		sect->fragments[0] = html_sanitise_fragment(&files, sect->file,
+							    sect->fragments[0]);
 	    }
 	}
     }
@@ -617,9 +684,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	 * Run over the document inventing fragments. Each fragment
 	 * is of the form `i' followed by an integer.
 	 */
-	lastsect = NULL;
+	lastsect = sects.head;	       /* this is always the top section */
 	for (p = sourceform; p; p = p->next) {
-	    if (is_heading_type(p->type))
+	    if (is_heading_type(p->type) && p->type != para_Title)
 		lastsect = (htmlsect *)p->private_data;
 
 	    for (w = p->words; w; w = w->next)
@@ -697,6 +764,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
 	    ho.fp = fopen(f->filename, "w");
 	    ho.charset = conf.output_charset;
+	    ho.restrict_charset = conf.restrict_charset;
 	    ho.cstate = charset_init_state;
 	    ho.ver = conf.htmlver;
 	    ho.state = HO_NEUTRAL;
@@ -713,16 +781,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 			" 4.01//EN\"\n\"http://www.w3.org/TR/html4/"
 			"strict.dtd\">\n");
 		break;
+	      case ISO_HTML:
+		fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"ISO/IEC "
+			"15445:2000//DTD HTML//EN\">\n");
+		break;
 	      case XHTML_1_0_TRANSITIONAL:
-		/* FIXME: <?xml?> to specify character encoding.
-		 * This breaks HTML backwards compat, so perhaps avoid, or
-		 * perhaps only emit when not using the default UTF-8? */
+		fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+			charset_to_mimeenc(conf.output_charset));
 		fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
 			" 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/"
 			"xhtml1/DTD/xhtml1-transitional.dtd\">\n");
 		break;
 	      case XHTML_1_0_STRICT:
-		/* FIXME: <?xml?> to specify character encoding. */
+		fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+			charset_to_mimeenc(conf.output_charset));
 		fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
 			" 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
 			"DTD/xhtml1-strict.dtd\">\n");
@@ -769,7 +841,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
 		assert(f->last);
 		if (f->last != f->first && f->last->title) {
-		    html_text(&ho, L" - ");   /* FIXME: configurable? */
+		    html_text(&ho, conf.title_separator);
 		    html_words(&ho, f->last->title->words, NOTHING,
 			       f, keywords, &conf);
 		}
@@ -780,10 +852,29 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	    if (conf.head_end)
 		html_raw(&ho, conf.head_end);
 
+	    /*
+	     * Add any <head> data defined in specific sections
+	     * that go in this file. (This is mostly to allow <meta
+	     * name="AppleTitle"> tags for Mac online help.)
+	     */
+	    for (s = sects.head; s; s = s->next) {
+		if (s->file == f && s->text) {
+		    for (p = s->text;
+			 p && (p == s->text || p->type == para_Title ||
+			       !is_heading_type(p->type));
+			 p = p->next) {
+			if (p->type == para_Config) {
+			    if (!ustricmp(p->keyword, L"html-local-head")) {
+				html_raw(&ho, adv(p->origkeyword));
+			    }
+			}
+		    }
+		}
+	    }
+
 	    element_close(&ho, "head");
 	    html_nl(&ho);
 
-	    /* FIXME: need to be able to specify replacement for this */
 	    if (conf.body_tag)
 		html_raw(&ho, conf.body_tag);
 	    else
@@ -806,37 +897,38 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		    element_open(&ho, "a");
 		    element_attr(&ho, "href", prevf->filename);
 		}
-		html_text(&ho, L"Previous");/* FIXME: conf? */
+		html_text(&ho, conf.nav_prev_text);
 		if (prevf)
 		    element_close(&ho, "a");
 
-		html_text(&ho, L" | ");     /* FIXME: conf? */
+		html_text(&ho, conf.nav_separator);
 
 		if (f != files.head) {
 		    element_open(&ho, "a");
 		    element_attr(&ho, "href", files.head->filename);
 		}
-		html_text(&ho, L"Contents");/* FIXME: conf? */
+		html_text(&ho, conf.contents_text);
 		if (f != files.head)
 		    element_close(&ho, "a");
 
-		html_text(&ho, L" | ");     /* FIXME: conf? */
-
-		if (f != files.index) {
-		    element_open(&ho, "a");
-		    element_attr(&ho, "href", files.index->filename);
+		if (has_index) {
+		    html_text(&ho, conf.nav_separator);
+		    if (f != files.index) {
+			element_open(&ho, "a");
+			element_attr(&ho, "href", files.index->filename);
+		    }
+		    html_text(&ho, conf.index_text);
+		    if (f != files.index)
+			element_close(&ho, "a");
 		}
-		html_text(&ho, L"Index");/* FIXME: conf? */
-		if (f != files.index)
-		    element_close(&ho, "a");
 
-		html_text(&ho, L" | ");     /* FIXME: conf? */
+		html_text(&ho, conf.nav_separator);
 
 		if (f->next) {
 		    element_open(&ho, "a");
 		    element_attr(&ho, "href", f->next->filename);
 		}
-		html_text(&ho, L"Next");    /* FIXME: conf? */
+		html_text(&ho, conf.nav_next_text);
 		if (f->next)
 		    element_close(&ho, "a");
 
@@ -846,7 +938,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	    prevf = f;
 
 	    /*
-	     * Write out a prefix TOC for the file.
+	     * Write out a prefix TOC for the file (if a leaf file).
 	     * 
 	     * We start by going through the section list and
 	     * collecting the sections which need to be added to
@@ -929,6 +1021,12 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		     */
 		    displaying = TRUE;
 		} else {
+		    /*
+		     * Doesn't belong in this file, but it may be
+		     * a descendant of a section which does, in
+		     * which case we should consider it for the
+		     * main TOC of this file (for non-leaf files).
+		     */
 		    htmlsect *a, *ac;
 		    int depth, adepth;
 
@@ -988,19 +1086,18 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		    element_open(&ho, htag);
 
 		    /*
-		     * Provide anchor for cross-links to target.
-		     * 
-		     * FIXME: AIcurrentlyUI, this needs to be done
-		     * differently in XHTML because <a name> is
-		     * deprecated or obsolete.
+		     * Provide anchor(s) for cross-links to target.
 		     * 
 		     * (Also we'll have to do this separately in
 		     * other paragraph types - NumberedList and
 		     * BiblioCited.)
 		     */
-		    element_open(&ho, "a");
-		    element_attr(&ho, "name", s->fragment);
-		    element_close(&ho, "a");
+		    {
+			int i;
+			for (i=0; i < conf.ntfragments; i++)
+			    if (s->fragments[i])
+				html_fragment(&ho, s->fragments[i]);
+		    }
 
 		    html_section_title(&ho, s, f, keywords, &conf, TRUE);
 
@@ -1132,9 +1229,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 				element_open(&ho, "p");
 				if (p->private_data) {
 				    htmlsect *s = (htmlsect *)p->private_data;
-				    element_open(&ho, "a");
-				    element_attr(&ho, "name", s->fragment);
-				    element_close(&ho, "a");
+				    int i;
+				    for (i=0; i < conf.ntfragments; i++)
+					if (s->fragments[i])
+					    html_fragment(&ho, s->fragments[i]);
 				}
 				html_nl(&ho);
 				html_words(&ho, p->kwtext, ALL,
@@ -1150,9 +1248,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 				element_open(&ho, "li");
 				if (p->private_data) {
 				    htmlsect *s = (htmlsect *)p->private_data;
-				    element_open(&ho, "a");
-				    element_attr(&ho, "name", s->fragment);
-				    element_close(&ho, "a");
+				    int i;
+				    for (i=0; i < conf.ntfragments; i++)
+					if (s->fragments[i])
+					    html_fragment(&ho, s->fragments[i]);
 				}
 				html_nl(&ho);
 				stackhead->itemtype = LI;
@@ -1219,7 +1318,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 			    html_words(&ho, entry->text, MARKUP|LINKS,
 				       f, keywords, &conf);
 
-			    html_text(&ho, L": ");/* FIXME: configurable */
+			    html_text(&ho, conf.index_main_sep);
 
 			    for (j = 0; j < hi->nrefs; j++) {
 				htmlindexref *hr =
@@ -1227,7 +1326,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 				paragraph *p = hr->section->title;
 
 				if (j > 0)
-				    html_text(&ho, L", "); /* FIXME: conf */
+				    html_text(&ho, conf.index_multi_sep);
 
 				html_href(&ho, f, hr->section->file,
 					  hr->fragment);
@@ -1238,8 +1337,17 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 				else if (p && p->words)
 				    html_words(&ho, p->words, MARKUP|LINKS,
 					       f, keywords, &conf);
-				else
-				    html_text(&ho, L"FIXME");
+				else {
+				    /*
+				     * If there is no title at all,
+				     * this must be because our
+				     * target section is the
+				     * preamble section and there
+				     * is no title. So we use the
+				     * preamble_text.
+				     */
+				    html_text(&ho, conf.preamble_text);
+				}
 				element_close(&ho, "a");
 			    }
 			}
@@ -1263,33 +1371,48 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		    html_raw(&ho, conf.body_end);
 
 		if (conf.address_section) {
+		    int started = FALSE;
+		    if (conf.htmlver == ISO_HTML) {
+			/*
+			 * The ISO-HTML validator complains if
+			 * there isn't a <div> tag surrounding the
+			 * <address> tag. I'm uncertain of why this
+			 * should be - there appears to be no
+			 * mention of this in the ISO-HTML spec,
+			 * suggesting that it doesn't represent a
+			 * change from HTML 4, but nonetheless the
+			 * HTML 4 validator doesn't seem to mind.
+			 */
+			element_open(&ho, "div");
+		    }
 		    element_open(&ho, "address");
 		    if (conf.addr_start) {
 			html_raw(&ho, conf.addr_start);
 			html_nl(&ho);
+			started = TRUE;
 		    }
 		    if (conf.visible_version_id) {
-			int started = FALSE;
 			for (p = sourceform; p; p = p->next)
 			    if (p->type == para_VersionID) {
-				if (!started)
-				    element_open(&ho, "p");
-				else
+				if (started)
 				    element_empty(&ho, "br");
 				html_nl(&ho);
-				html_text(&ho, L"[");   /* FIXME: conf? */
+				html_text(&ho, conf.pre_versionid);
 				html_words(&ho, p->words, NOTHING,
 					   f, keywords, &conf);
-				html_text(&ho, L"]");   /* FIXME: conf? */
+				html_text(&ho, conf.post_versionid);
 				started = TRUE;
 			    }
-			if (started)
-			    element_close(&ho, "p");
 			done_version_ids = TRUE;
 		    }
-		    if (conf.addr_end)
+		    if (conf.addr_end) {
+			if (started)
+			    element_empty(&ho, "br");
 			html_raw(&ho, conf.addr_end);
+		    }
 		    element_close(&ho, "address");
+		    if (conf.htmlver == ISO_HTML)
+			element_close(&ho, "div");
 		}
 
 		if (!done_version_ids) {
@@ -1340,8 +1463,81 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     }
 
     /*
-     * FIXME: Free all the working data.
+     * Free all the working data.
      */
+    {
+	htmlfragment *frag;
+	while ( (frag = (htmlfragment *)delpos234(files.frags, 0)) != NULL ) {
+	    /*
+	     * frag->fragment is dynamically allocated, but will be
+	     * freed when we process the htmlsect structure which
+	     * it is attached to.
+	     */
+	    sfree(frag);
+	}
+	freetree234(files.frags);
+    }
+    {
+	htmlsect *sect, *tmp;
+	sect = sects.head;
+	while (sect) {
+	    int i;
+	    tmp = sect->next;
+	    for (i=0; i < conf.ntfragments; i++)
+		sfree(sect->fragments[i]);
+	    sfree(sect->fragments);
+	    sfree(sect);
+	    sect = tmp;
+	}
+	sect = nonsects.head;
+	while (sect) {
+	    int i;
+	    tmp = sect->next;
+	    for (i=0; i < conf.ntfragments; i++)
+		sfree(sect->fragments[i]);
+	    sfree(sect->fragments);
+	    sfree(sect);
+	    sect = tmp;
+	}
+    }
+    {
+	htmlfile *file, *tmp;
+	file = files.head;
+	while (file) {
+	    tmp = file->next;
+	    sfree(file->filename);
+	    sfree(file);
+	    file = tmp;
+	}
+    }
+    {
+	int i;
+	indexentry *entry;
+	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
+	    htmlindex *hi = (htmlindex *)entry->backend_data;
+	    sfree(hi);
+	}
+    }
+    {
+	paragraph *p;
+	word *w;
+	for (p = sourceform; p; p = p->next)
+	    for (w = p->words; w; w = w->next)
+		if (w->type == word_IndexRef) {
+		    htmlindexref *hr = (htmlindexref *)w->private_data;
+		    assert(hr != NULL);
+		    sfree(hr->fragment);
+		    sfree(hr);
+		}
+    }
+    sfree(conf.asect);
+    sfree(conf.single_filename);
+    sfree(conf.contents_filename);
+    sfree(conf.index_filename);
+    sfree(conf.template_filename);
+    while (conf.ntfragments--)
+	sfree(conf.template_fragments[conf.ntfragments]);
+    sfree(conf.template_fragments);
 }
 
 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
@@ -1439,7 +1635,8 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename)
     return ret;
 }
 
-static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title)
+static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title,
+			       htmlconfig *cfg)
 {
     htmlsect *ret = snew(htmlsect);
 
@@ -1455,6 +1652,13 @@ static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title)
     ret->parent = NULL;
     ret->type = NORMAL;
 
+    ret->fragments = snewn(cfg->ntfragments, char *);
+    {
+	int i;
+	for (i=0; i < cfg->ntfragments; i++)
+	    ret->fragments[i] = NULL;
+    }
+
     return ret;
 }
 
@@ -1478,12 +1682,16 @@ static void html_words(htmloutput *ho, word *words, int flags,
       case word_LowerXref:
 	if (flags & LINKS) {
 	    keyword *kwl = kw_lookup(keywords, w->text);
-	    paragraph *p = kwl->para;
-	    htmlsect *s = (htmlsect *)p->private_data;
+	    paragraph *p;
+	    htmlsect *s;
+
+	    assert(kwl);
+	    p = kwl->para;
+	    s = (htmlsect *)p->private_data;
 
 	    assert(s);
 
-	    html_href(ho, file, s->file, s->fragment);
+	    html_href(ho, file, s->file, s->fragments[0]);
 	}
 	break;
       case word_HyperEnd:
@@ -1494,9 +1702,7 @@ static void html_words(htmloutput *ho, word *words, int flags,
       case word_IndexRef:
 	if (flags & INDEXENTS) {
 	    htmlindexref *hr = (htmlindexref *)w->private_data;
-	    element_open(ho, "a");
-	    element_attr(ho, "name", hr->fragment);
-	    element_close(ho, "a");
+	    html_fragment(ho, hr->fragment);
 	    hr->generated = TRUE;
 	}
 	break;
@@ -1533,8 +1739,8 @@ static void html_words(htmloutput *ho, word *words, int flags,
 	    else
 		html_text(ho, cfg->rquote);
 	} else {
-	    if (cvt_ok(ho->charset, w->text) || !w->alt)
-		html_text(ho, w->text);
+	    if (!w->alt || cvt_ok(ho->restrict_charset, w->text))
+		html_text_nbsp(ho, w->text);
 	    else
 		html_words(ho, w->alt, flags, file, keywords, cfg);
 	}
@@ -1609,11 +1815,9 @@ static void html_charset_cleanup(htmloutput *ho)
 	fwrite(outbuf, 1, bytes, ho->fp);
 }
 
-static void return_to_neutral(htmloutput *ho)
+static void return_mostly_to_neutral(htmloutput *ho)
 {
-    if (ho->state == HO_IN_TEXT) {
-	html_charset_cleanup(ho);
-    } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
+    if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
 	fprintf(ho->fp, " />");
     } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) {
 	fprintf(ho->fp, ">");
@@ -1622,6 +1826,15 @@ static void return_to_neutral(htmloutput *ho)
     ho->state = HO_NEUTRAL;
 }
 
+static void return_to_neutral(htmloutput *ho)
+{
+    if (ho->state == HO_IN_TEXT) {
+	html_charset_cleanup(ho);
+    }
+
+    return_mostly_to_neutral(ho);
+}
+
 static void element_open(htmloutput *ho, char const *name)
 {
     return_to_neutral(ho);
@@ -1674,24 +1887,31 @@ static void element_attr_w(htmloutput *ho, char const *name,
 {
     html_charset_cleanup(ho);
     fprintf(ho->fp, " %s=\"", name);
-    html_text_limit_internal(ho, value, 0, TRUE);
+    html_text_limit_internal(ho, value, 0, TRUE, FALSE);
     html_charset_cleanup(ho);
     fputc('"', ho->fp);
 }
 
 static void html_text(htmloutput *ho, wchar_t const *text)
 {
-    html_text_limit(ho, text, 0);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, FALSE);
+}
+
+static void html_text_nbsp(htmloutput *ho, wchar_t const *text)
+{
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, TRUE);
 }
 
 static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen)
 {
-    return_to_neutral(ho);
-    html_text_limit_internal(ho, text, maxlen, FALSE);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, maxlen, FALSE, FALSE);
 }
 
 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-				     int maxlen, int quote_quotes)
+				     int maxlen, int quote_quotes, int nbsp)
 {
     int textlen = ustrlen(text);
     char outbuf[256];
@@ -1707,7 +1927,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
 	    if (text[lenbefore] == L'<' ||
 		text[lenbefore] == L'>' ||
 		text[lenbefore] == L'&' ||
-		(text[lenbefore] == L'"' && quote_quotes))
+		(text[lenbefore] == L'"' && quote_quotes) ||
+		(text[lenbefore] == L' ' && nbsp))
 		break;
 	lenafter = lenbefore;
 	bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf),
@@ -1737,7 +1958,10 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
 		fprintf(ho->fp, "&amp;");
 	    else if (*text == L'"')
 		fprintf(ho->fp, "&quot;");
-	    else
+	    else if (*text == L' ') {
+		assert(nbsp);
+		fprintf(ho->fp, "&nbsp;");
+	    } else
 		assert(!"Can't happen");
 	    text++, textlen--;
 	}
@@ -1769,6 +1993,15 @@ static void html_href(htmloutput *ho, htmlfile *thisfile,
     sfree(url);
 }
 
+static void html_fragment(htmloutput *ho, char const *fragment)
+{
+    element_open(ho, "a");
+    element_attr(ho, "name", fragment);
+    if (is_xhtml(ho->ver))
+	element_attr(ho, "id", fragment);
+    element_close(ho, "a");
+}
+
 static char *html_format(paragraph *p, char *template_string)
 {
     char *c, *t;
@@ -1812,6 +2045,7 @@ static char *html_format(paragraph *p, char *template_string)
 	    } else if (p->keyword && *p->keyword && fmt == 'k')
 		ws = p->keyword;
 	    else
+		/* %N comes here; also failure cases of other fmts */
 		w = p->words;
 
 	    if (ws) {
@@ -1865,6 +2099,13 @@ static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
 	*q = '\0';
     }
 
+    /* If there's nothing left, make something valid up */
+    if (!*text) {
+	static const char anonfrag[] = "anon";
+	text = sresize(text, lenof(anonfrag), char);
+	strcpy(text, anonfrag);
+    }
+
     /*
      * Now we check for clashes with other fragment names, and
      * adjust this one if necessary by appending a hyphen followed
@@ -1895,13 +2136,24 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
 				htmlfile *thisfile, keywordlist *keywords,
 				htmlconfig *cfg)
 {
+    if (ho->contents_level >= depth && ho->contents_level > 0) {
+	element_close(ho, "li");
+	html_nl(ho);
+    }
+
     while (ho->contents_level > depth) {
 	element_close(ho, "ul");
 	ho->contents_level--;
+	if (ho->contents_level > 0) {
+	    element_close(ho, "li");
+	}
+	html_nl(ho);
     }
 
     while (ho->contents_level < depth) {
+	html_nl(ho);
 	element_open(ho, "ul");
+	html_nl(ho);
 	ho->contents_level++;
     }
 
@@ -1909,10 +2161,10 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
 	return;
 
     element_open(ho, "li");
-    html_href(ho, thisfile, s->file, s->fragment);
+    html_href(ho, thisfile, s->file, s->fragments[0]);
     html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
     element_close(ho, "a");
-    element_close(ho, "li");
+    /* <li> will be closed by a later invocation */
 }
 
 static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
@@ -1950,9 +2202,16 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
 		   thisfile, keywords, cfg);
     } else {
 	assert(s->type != NORMAL);
-	if (s->type == TOP)
-	    html_text(ho, L"Preamble");/* FIXME: configure */
+	/*
+	 * If we're printing the full document title for _real_ and
+	 * there isn't one, we don't want to print `Preamble' at
+	 * the top of what ought to just be some text. If we need
+	 * it in any other context such as TOCs, we need to print
+	 * `Preamble'.
+	 */
+	if (s->type == TOP && !real)
+	    html_text(ho, cfg->preamble_text);
 	else if (s->type == INDEX)
-	    html_text(ho, L"Index");/* FIXME: configure */
+	    html_text(ho, cfg->index_text);
     }
 }