Added a new config directive `html-local-head', which permits me to

[sgt/halibut] / bk_html.c
diff --git a/bk_html.c b/bk_html.c

index 936126d..96a8982 100644 (file)
--- a/bk_html.c
+++ b/bk_html.c
@@ -10,35 +10,6 @@
   *    sensible. Perhaps for the topmost section in the file, no
   *    fragment should be used? (Though it should probably still be
   *    _there_ even if unused.)
- * 
- *  - new configurability:
- *     * a few new things explicitly labelled as `FIXME:
- *      configurable' or similar.
- *     * HTML flavour.
- *     * Some means of specifying the distinction between
- *      restrict-charset and output-charset. It seems to me that
- *      `html-charset' is output-charset, and that
- *      restrict-charset usually wants to be either output-charset
- *      or UTF-8 (the latter indicating that any Unicode character
- *      is fair game and it will be specified using &#foo; if it
- *      isn't in output-charset). However, since XHTML defaults to
- *      UTF-8 and it's fiddly to tell it otherwise, it's just
- *      possible that some user may need to set restrict-charset
- *      to their charset of choice while leaving _output_-charset
- *      at UTF-8. Figure out some configuration, and apply it.
- *
- *  - test all HTML flavours and ensure they validate sensibly. Fix
- *    remaining confusion issues such as <?xml?> and obsoleteness
- *    of <a name>.
- * 
- *  - proper naming of all fragment IDs. The ones for sections are
- *    fine; the ones for numbered list and bibliociteds are utter
- *    crap; the ones for indexes _might_ do but it might be worth
- *    giving some thought to how to do them better.
- *     + also set up a mechanism for ensuring that fragment IDs
- *      never clash.
- * 
- *  - nonbreaking spaces?
   */
  
  #include <stdio.h>
@@ -78,9 +49,13 @@ typedef struct {
      char *head_end, *body_start, *body_end, *addr_start, *addr_end;
      char *body_tag, *nav_attr;
      wchar_t *author, *description;
+    wchar_t *index_text, *contents_text, *preamble_text, *title_separator;
+    wchar_t *nav_prev_text, *nav_next_text, *nav_separator;
+    wchar_t *index_main_sep, *index_multi_sep;
+    wchar_t *pre_versionid, *post_versionid;
      int restrict_charset, output_charset;
      enum {
-       HTML_3_2, HTML_4,
+       HTML_3_2, HTML_4, ISO_HTML,
         XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT
      } htmlver;
      wchar_t *lquote, *rquote;
@@ -115,6 +90,7 @@ struct htmlsect {
  typedef struct {
      htmlfile *head, *tail;
      htmlfile *single, *index;
+    tree234 *frags;
  } htmlfilelist;
  
  typedef struct {
@@ -122,6 +98,11 @@ typedef struct {
  } htmlsectlist;
  
  typedef struct {
+    htmlfile *file;
+    char *fragment;
+} htmlfragment;
+
+typedef struct {
      int nrefs, refsize;
      word **refs;
  } htmlindex;
@@ -129,6 +110,7 @@ typedef struct {
  typedef struct {
      htmlsect *section;
      char *fragment;
+    int generated, referenced;
  } htmlindexref;
  
  typedef struct {
@@ -138,7 +120,7 @@ typedef struct {
       * level.
       */
      FILE *fp;
-    int charset;
+    int charset, restrict_charset;
      charset_state cstate;
      int ver;
      enum {
@@ -152,6 +134,18 @@ typedef struct {
      int contents_level;
  } htmloutput;
  
+static int html_fragment_compare(void *av, void *bv)
+{
+    htmlfragment *a = (htmlfragment *)av;
+    htmlfragment *b = (htmlfragment *)bv;
+    int cmp;
+
+    if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)
+       return cmp;
+    else
+       return strcmp(a->fragment, b->fragment);
+}
+
  static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
                               htmlsect *sect, int depth);
  
@@ -175,9 +169,10 @@ static void element_attr(htmloutput *ho, char const *name, char const *value);
  static void element_attr_w(htmloutput *ho, char const *name,
                            wchar_t const *value);
  static void html_text(htmloutput *ho, wchar_t const *str);
+static void html_text_nbsp(htmloutput *ho, wchar_t const *str);
  static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen);
  static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-                                    int maxlen, int quote_quotes);
+                                    int maxlen, int quote_quotes, int nbsp);
  static void html_nl(htmloutput *ho);
  static void html_raw(htmloutput *ho, char *text);
  static void html_raw_as_attr(htmloutput *ho, char *text);
@@ -185,16 +180,18 @@ static void cleanup(htmloutput *ho);
  
  static void html_href(htmloutput *ho, htmlfile *thisfile,
                       htmlfile *targetfile, char *targetfrag);
+static void html_fragment(htmloutput *ho, char const *fragment);
  
  static char *html_format(paragraph *p, char *template_string);
-static void html_sanitise_fragment(char *text);
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+                                   char *text);
  
  static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
                                 htmlfile *thisfile, keywordlist *keywords,
                                 htmlconfig *cfg);
  static void html_section_title(htmloutput *ho, htmlsect *s,
                                htmlfile *thisfile, keywordlist *keywords,
-                              htmlconfig *cfg);
+                              htmlconfig *cfg, int real);
  
  static htmlconfig html_configure(paragraph *source) {
      htmlconfig ret;
@@ -207,7 +204,7 @@ static htmlconfig html_configure(paragraph *source) {
      ret.achapter.just_numbers = FALSE;
      ret.achapter.number_suffix = L": ";
      ret.nasect = 1;
-    ret.asect = mknewa(sectlevel, ret.nasect);
+    ret.asect = snewn(ret.nasect, sectlevel);
      ret.asect[0].just_numbers = TRUE;
      ret.asect[0].number_suffix = L" ";
      ret.ncdepths = 0;
@@ -224,9 +221,20 @@ static htmlconfig html_configure(paragraph *source) {
      ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
         ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
      ret.author = ret.description = NULL;
-    ret.restrict_charset = CS_ASCII;
+    ret.restrict_charset = CS_UTF8;
      ret.output_charset = CS_ASCII;
      ret.htmlver = HTML_4;
+    ret.index_text = L"Index";
+    ret.contents_text = L"Contents";
+    ret.preamble_text = L"Preamble";
+    ret.title_separator = L" - ";
+    ret.nav_prev_text = L"Previous";
+    ret.nav_next_text = L"Next";
+    ret.nav_separator = L" | ";
+    ret.index_main_sep = L": ";
+    ret.index_multi_sep = L", ";
+    ret.pre_versionid = L"[";
+    ret.post_versionid = L"]";
      /*
       * Default quote characters are Unicode matched single quotes,
       * falling back to ordinary ASCII ".
@@ -258,11 +266,32 @@ static htmlconfig html_configure(paragraph *source) {
             if (!ustrnicmp(k, L"xhtml-", 6))
                 k++;                /* treat `xhtml-' and `html-' the same */
  
-           if (!ustricmp(k, L"html-charset")) {
-               char *csname = utoa_dup(uadv(k), CS_ASCII);
-               ret.restrict_charset = ret.output_charset =
-                   charset_from_localenc(csname);
-               sfree(csname);
+           if (!ustricmp(k, L"html-restrict-charset")) {
+               ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k));
+           } else if (!ustricmp(k, L"html-output-charset")) {
+               ret.output_charset = charset_from_ustr(&p->fpos, uadv(k));
+           } else if (!ustricmp(k, L"html-version")) {
+               wchar_t *vername = uadv(k);
+               static const struct {
+                   const wchar_t *name;
+                   int ver;
+               } versions[] = {
+                   {L"html3.2", HTML_3_2},
+                   {L"html4", HTML_4},
+                   {L"iso-html", ISO_HTML},
+                   {L"xhtml1.0transitional", XHTML_1_0_TRANSITIONAL},
+                   {L"xhtml1.0strict", XHTML_1_0_STRICT}
+               };
+               int i;
+
+               for (i = 0; i < (int)lenof(versions); i++)
+                   if (!ustricmp(versions[i].name, vername))
+                       break;
+
+               if (i == lenof(versions))
+                   error(err_htmlver, &p->fpos, vername);
+               else
+                   ret.htmlver = versions[i].ver;
             } else if (!ustricmp(k, L"html-single-filename")) {
                 sfree(ret.single_filename);
                 ret.single_filename = dupstr(adv(p->origkeyword));
@@ -293,7 +322,7 @@ static htmlconfig html_configure(paragraph *source) {
                 }
                 if (n >= ret.nasect) {
                     int i;
-                   ret.asect = resize(ret.asect, n+1);
+                   ret.asect = sresize(ret.asect, n+1, sectlevel);
                     for (i = ret.nasect; i <= n; i++)
                         ret.asect[i] = ret.asect[ret.nasect-1];
                     ret.nasect = n+1;
@@ -308,7 +337,7 @@ static htmlconfig html_configure(paragraph *source) {
                 }
                 if (n >= ret.nasect) {
                     int i;
-                   ret.asect = resize(ret.asect, n+1);
+                   ret.asect = sresize(ret.asect, n+1, sectlevel);
                     for (i = ret.nasect; i <= n; i++) {
                         ret.asect[i] = ret.asect[ret.nasect-1];
                     }
@@ -332,7 +361,8 @@ static htmlconfig html_configure(paragraph *source) {
                 }
                 if (n >= ret.ncdepths) {
                     int i;
-                   ret.contents_depths = resize(ret.contents_depths, n+1);
+                   ret.contents_depths =
+                       sresize(ret.contents_depths, n+1, int);
                     for (i = ret.ncdepths; i <= n; i++) {
                         ret.contents_depths[i] = i+2;
                     }
@@ -370,6 +400,28 @@ static htmlconfig html_configure(paragraph *source) {
                 ret.leaf_contains_contents = utob(uadv(k));
             } else if (!ustricmp(k, L"html-leaf-smallest-contents")) {
                 ret.leaf_smallest_contents = utoi(uadv(k));
+           } else if (!ustricmp(k, L"html-index-text")) {
+               ret.index_text = uadv(k);
+           } else if (!ustricmp(k, L"html-contents-text")) {
+               ret.contents_text = uadv(k);
+           } else if (!ustricmp(k, L"html-preamble-text")) {
+               ret.preamble_text = uadv(k);
+           } else if (!ustricmp(k, L"html-title-separator")) {
+               ret.title_separator = uadv(k);
+           } else if (!ustricmp(k, L"html-nav-prev-text")) {
+               ret.nav_prev_text = uadv(k);
+           } else if (!ustricmp(k, L"html-nav-next-text")) {
+               ret.nav_next_text = uadv(k);
+           } else if (!ustricmp(k, L"html-nav-separator")) {
+               ret.nav_separator = uadv(k);
+           } else if (!ustricmp(k, L"html-index-main-separator")) {
+               ret.index_main_sep = uadv(k);
+           } else if (!ustricmp(k, L"html-index-multiple-separator")) {
+               ret.index_multi_sep = uadv(k);
+           } else if (!ustricmp(k, L"html-pre-versionid")) {
+               ret.pre_versionid = uadv(k);
+           } else if (!ustricmp(k, L"html-post-versionid")) {
+               ret.post_versionid = uadv(k);
             }
         }
      }
@@ -406,11 +458,13 @@ paragraph *html_config_filename(char *filename)
  }
  
  void html_backend(paragraph *sourceform, keywordlist *keywords,
-                 indexdata *idx, void *unused) {
+                 indexdata *idx, void *unused)
+{
      paragraph *p;
      htmlconfig conf;
-    htmlfilelist files = { NULL, NULL, NULL, NULL };
+    htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
      htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
+    int has_index;
  
      IGNORE(unused);
  
@@ -425,6 +479,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
      for (p = sourceform; p; p = p->next)
         p->private_data = NULL;
  
+    files.frags = newtree234(html_fragment_compare);
+
      /*
       * Start by figuring out into which file each piece of the
       * document should be put. We'll do this by inventing an
@@ -441,7 +497,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
         htmlsect *topsect, *sect;
         int d;
  
-       topsect = html_new_sect(&sects, p);
+       topsect = html_new_sect(&sects, NULL);
         topsect->type = TOP;
         topsect->title = NULL;
         topsect->text = sourceform;
@@ -473,18 +529,24 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                 html_file_section(&conf, &files, sect, d);
  
                 sect->fragment = html_format(p, conf.template_fragment);
-               html_sanitise_fragment(sect->fragment);
-               /* FIXME: clash checking? add to a tree of (file,frag)? */
+               sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                                       sect->fragment);
             }
  
-       /* And the index. */
-       sect = html_new_sect(&sects, NULL);
-       sect->fragment = dupstr("Index");   /* FIXME: this _can't_ be right */
-       sect->text = NULL;
-       sect->type = INDEX;
-       sect->parent = topsect;
-       html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
-       files.index = sect->file;
+       /* And the index, if we have one. */
+       has_index = (count234(idx->entries) > 0);
+       if (has_index) {
+           sect = html_new_sect(&sects, NULL);
+           sect->text = NULL;
+           sect->type = INDEX;
+           sect->parent = topsect;
+            sect->contents_depth = 0;
+           html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
+           sect->fragment = utoa_dup(conf.index_text, CS_ASCII);
+           sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                                   sect->fragment);
+           files.index = sect->file;
+       }
      }
  
      /*
@@ -535,18 +597,31 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                 p->private_data = sect;
  
                 /*
-                * FIXME: We need a much better means of naming
-                * these, possibly involving an additional
-                * configuration template. For the moment I'll just
-                * invent something completely stupid.
+                * Fragment IDs for these paragraphs will simply be
+                * `p' followed by an integer.
                  */
-               sect->fragment = mknewa(char, 40);
-               sprintf(sect->fragment, "frag%p", sect);
+               sect->fragment = snewn(40, char);
+               sprintf(sect->fragment, "p%d",
+                       sect->file->last_fragment_number++);
+               sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                                       sect->fragment);
             }
         }
      }
  
      /*
+     * Reset the fragment numbers in each file. I've just used them
+     * to generate `p' fragment IDs for non-section paragraphs
+     * (numbered list elements, bibliocited), and now I want to use
+     * them for `i' fragment IDs for index entries.
+     */
+    {
+       htmlfile *file;
+       for (file = files.head; file; file = file->next)
+           file->last_fragment_number = 0;
+    }
+
+    /*
       * Now sort out the index. This involves:
       * 
       *         - For each index term, we set up an htmlindex structure to
@@ -571,7 +646,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
          */
  
         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
-           htmlindex *hi = mknew(htmlindex);
+           htmlindex *hi = snew(htmlindex);
  
             hi->nrefs = hi->refsize = 0;
             hi->refs = NULL;
@@ -582,30 +657,28 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
         /*
          * Run over the document inventing fragments. Each fragment
          * is of the form `i' followed by an integer.
-        * 
-        * FIXME: Probably in the file-organisation pass we should
-        * work out the fragment names of every section, so that we
-        * could load them all into a tree and hence ensure these
-        * index fragments don't clash with them.
          */
-       lastsect = NULL;
+       lastsect = sects.head;         /* this is always the top section */
         for (p = sourceform; p; p = p->next) {
-           if (is_heading_type(p->type))
+           if (is_heading_type(p->type) && p->type != para_Title)
                 lastsect = (htmlsect *)p->private_data;
  
             for (w = p->words; w; w = w->next)
                 if (w->type == word_IndexRef) {
-                   htmlindexref *hr = mknew(htmlindexref);
+                   htmlindexref *hr = snew(htmlindexref);
                     indextag *tag;
                     int i;
  
+                   hr->referenced = hr->generated = FALSE;
                     hr->section = lastsect;
-                   /* FIXME: clash checking */
                     {
                         char buf[40];
                         sprintf(buf, "i%d",
                                 lastsect->file->last_fragment_number++);
                         hr->fragment = dupstr(buf);
+                       hr->fragment =
+                           html_sanitise_fragment(&files, hr->section->file,
+                                                  hr->fragment);
                     }
                     w->private_data = hr;
  
@@ -619,7 +692,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
  
                         if (hi->nrefs >= hi->refsize) {
                             hi->refsize += 32;
-                           hi->refs = resize(hi->refs, hi->refsize);
+                           hi->refs = sresize(hi->refs, hi->refsize, word *);
                         }
  
                         hi->refs[hi->nrefs++] = w;
@@ -665,6 +738,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
  
             ho.fp = fopen(f->filename, "w");
             ho.charset = conf.output_charset;
+           ho.restrict_charset = conf.restrict_charset;
             ho.cstate = charset_init_state;
             ho.ver = conf.htmlver;
             ho.state = HO_NEUTRAL;
@@ -681,16 +755,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                         " 4.01//EN\"\n\"http://www.w3.org/TR/html4/"
                         "strict.dtd\">\n");
                 break;
+             case ISO_HTML:
+               fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"ISO/IEC "
+                       "15445:2000//DTD HTML//EN\">\n");
+               break;
               case XHTML_1_0_TRANSITIONAL:
-               /* FIXME: <?xml?> to specify character encoding.
-                * This breaks HTML backwards compat, so perhaps avoid, or
-                * perhaps only emit when not using the default UTF-8? */
+               fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+                       charset_to_mimeenc(conf.output_charset));
                 fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
                         " 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/"
                         "xhtml1/DTD/xhtml1-transitional.dtd\">\n");
                 break;
               case XHTML_1_0_STRICT:
-               /* FIXME: <?xml?> to specify character encoding. */
+               fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+                       charset_to_mimeenc(conf.output_charset));
                 fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
                         " 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
                         "DTD/xhtml1-strict.dtd\">\n");
@@ -737,7 +815,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
  
                 assert(f->last);
                 if (f->last != f->first && f->last->title) {
-                   html_text(&ho, L" - ");   /* FIXME: configurable? */
+                   html_text(&ho, conf.title_separator);
                     html_words(&ho, f->last->title->words, NOTHING,
                                f, keywords, &conf);
                 }
@@ -748,10 +826,28 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
             if (conf.head_end)
                 html_raw(&ho, conf.head_end);
  
+           /*
+            * Add any <head> data defined in specific sections
+            * that go in this file. (This is mostly to allow <meta
+            * name="AppleTitle"> tags for Mac online help.)
+            */
+           for (s = sects.head; s; s = s->next) {
+               if (s->file == f && s->text) {
+                   for (p = s->text;
+                        p && (p == s->text || !is_heading_type(p->type));
+                        p = p->next) {
+                       if (p->type == para_Config) {
+                           if (!ustricmp(p->keyword, L"html-local-head")) {
+                               html_raw(&ho, adv(p->origkeyword));
+                           }
+                       }
+                   }
+               }
+           }
+
             element_close(&ho, "head");
             html_nl(&ho);
  
-           /* FIXME: need to be able to specify replacement for this */
             if (conf.body_tag)
                 html_raw(&ho, conf.body_tag);
             else
@@ -774,37 +870,39 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                     element_open(&ho, "a");
                     element_attr(&ho, "href", prevf->filename);
                 }
-               html_text(&ho, L"Previous");/* FIXME: conf? */
+               html_text(&ho, conf.nav_prev_text);
                 if (prevf)
                     element_close(&ho, "a");
  
-               html_text(&ho, L" | ");     /* FIXME: conf? */
+               html_text(&ho, conf.nav_separator);
  
                 if (f != files.head) {
                     element_open(&ho, "a");
                     element_attr(&ho, "href", files.head->filename);
                 }
-               html_text(&ho, L"Contents");/* FIXME: conf? */
+               html_text(&ho, conf.contents_text);
                 if (f != files.head)
                     element_close(&ho, "a");
  
-               html_text(&ho, L" | ");     /* FIXME: conf? */
+               html_text(&ho, conf.nav_separator);
  
-               if (f != files.index) {
-                   element_open(&ho, "a");
-                   element_attr(&ho, "href", files.index->filename);
+               if (has_index) {
+                   if (f != files.index) {
+                       element_open(&ho, "a");
+                       element_attr(&ho, "href", files.index->filename);
+                   }
+                   html_text(&ho, conf.index_text);
+                   if (f != files.index)
+                       element_close(&ho, "a");
                 }
-               html_text(&ho, L"Index");/* FIXME: conf? */
-               if (f != files.index)
-                   element_close(&ho, "a");
  
-               html_text(&ho, L" | ");     /* FIXME: conf? */
+               html_text(&ho, conf.nav_separator);
  
                 if (f->next) {
                     element_open(&ho, "a");
                     element_attr(&ho, "href", f->next->filename);
                 }
-               html_text(&ho, L"Next");    /* FIXME: conf? */
+               html_text(&ho, conf.nav_next_text);
                 if (f->next)
                     element_close(&ho, "a");
  
@@ -858,7 +956,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                         if (adepth <= a->contents_depth) {
                             if (ntoc >= tocsize) {
                                 tocsize += 64;
-                               toc = resize(toc, tocsize);
+                               toc = sresize(toc, tocsize, htmlsect *);
                             }
                             toc[ntoc++] = s;
                         }
@@ -958,19 +1056,14 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                     /*
                      * Provide anchor for cross-links to target.
                      * 
-                    * FIXME: AIcurrentlyUI, this needs to be done
-                    * differently in XHTML because <a name> is
-                    * deprecated or obsolete.
-                    * 
                      * (Also we'll have to do this separately in
                      * other paragraph types - NumberedList and
                      * BiblioCited.)
                      */
-                   element_open(&ho, "a");
-                   element_attr(&ho, "name", s->fragment);
-                   element_close(&ho, "a");
+                   if (s->fragment)
+                       html_fragment(&ho, s->fragment);
  
-                   html_section_title(&ho, s, f, keywords, &conf);
+                   html_section_title(&ho, s, f, keywords, &conf, TRUE);
  
                     element_close(&ho, htag);
  
@@ -978,7 +1071,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                      * Now display the section text.
                      */
                     if (s->text) {
-                       stackhead = mknew(struct stackelement);
+                       stackhead = snew(struct stackelement);
                         stackhead->next = NULL;
                         stackhead->listtype = NOLIST;
                         stackhead->itemtype = NOITEM;
@@ -1027,7 +1120,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                                 break;
  
                               case para_LcontPush:
-                               se = mknew(struct stackelement);
+                               se = snew(struct stackelement);
                                 se->next = stackhead;
                                 se->listtype = NOLIST;
                                 se->itemtype = NOITEM;
@@ -1100,9 +1193,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                                 element_open(&ho, "p");
                                 if (p->private_data) {
                                     htmlsect *s = (htmlsect *)p->private_data;
-                                   element_open(&ho, "a");
-                                   element_attr(&ho, "name", s->fragment);
-                                   element_close(&ho, "a");
+                                   html_fragment(&ho, s->fragment);
                                 }
                                 html_nl(&ho);
                                 html_words(&ho, p->kwtext, ALL,
@@ -1118,9 +1209,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                                 element_open(&ho, "li");
                                 if (p->private_data) {
                                     htmlsect *s = (htmlsect *)p->private_data;
-                                   element_open(&ho, "a");
-                                   element_attr(&ho, "name", s->fragment);
-                                   element_close(&ho, "a");
+                                   html_fragment(&ho, s->fragment);
                                 }
                                 html_nl(&ho);
                                 stackhead->itemtype = LI;
@@ -1187,7 +1276,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                             html_words(&ho, entry->text, MARKUP|LINKS,
                                        f, keywords, &conf);
  
-                           html_text(&ho, L": ");/* FIXME: configurable */
+                           html_text(&ho, conf.index_main_sep);
  
                             for (j = 0; j < hi->nrefs; j++) {
                                 htmlindexref *hr =
@@ -1195,18 +1284,28 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                                 paragraph *p = hr->section->title;
  
                                 if (j > 0)
-                                   html_text(&ho, L", "); /* FIXME: conf */
+                                   html_text(&ho, conf.index_multi_sep);
  
                                 html_href(&ho, f, hr->section->file,
                                           hr->fragment);
+                               hr->referenced = TRUE;
                                 if (p && p->kwtext)
                                     html_words(&ho, p->kwtext, MARKUP|LINKS,
                                                f, keywords, &conf);
                                 else if (p && p->words)
                                     html_words(&ho, p->words, MARKUP|LINKS,
                                                f, keywords, &conf);
-                               else
-                                   html_text(&ho, L"FIXME");
+                               else {
+                                   /*
+                                    * If there is no title at all,
+                                    * this must be because our
+                                    * target section is the
+                                    * preamble section and there
+                                    * is no title. So we use the
+                                    * preamble_text.
+                                    */
+                                   html_text(&ho, conf.preamble_text);
+                               }
                                 element_close(&ho, "a");
                             }
                         }
@@ -1230,33 +1329,48 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                     html_raw(&ho, conf.body_end);
  
                 if (conf.address_section) {
+                   int started = FALSE;
+                   if (conf.htmlver == ISO_HTML) {
+                       /*
+                        * The ISO-HTML validator complains if
+                        * there isn't a <div> tag surrounding the
+                        * <address> tag. I'm uncertain of why this
+                        * should be - there appears to be no
+                        * mention of this in the ISO-HTML spec,
+                        * suggesting that it doesn't represent a
+                        * change from HTML 4, but nonetheless the
+                        * HTML 4 validator doesn't seem to mind.
+                        */
+                       element_open(&ho, "div");
+                   }
                     element_open(&ho, "address");
                     if (conf.addr_start) {
                         html_raw(&ho, conf.addr_start);
                         html_nl(&ho);
+                       started = TRUE;
                     }
                     if (conf.visible_version_id) {
-                       int started = FALSE;
                         for (p = sourceform; p; p = p->next)
                             if (p->type == para_VersionID) {
-                               if (!started)
-                                   element_open(&ho, "p");
-                               else
+                               if (started)
                                     element_empty(&ho, "br");
                                 html_nl(&ho);
-                               html_text(&ho, L"[");   /* FIXME: conf? */
+                               html_text(&ho, conf.pre_versionid);
                                 html_words(&ho, p->words, NOTHING,
                                            f, keywords, &conf);
-                               html_text(&ho, L"]");   /* FIXME: conf? */
+                               html_text(&ho, conf.post_versionid);
                                 started = TRUE;
                             }
-                       if (started)
-                           element_close(&ho, "p");
                         done_version_ids = TRUE;
                     }
-                   if (conf.addr_end)
+                   if (conf.addr_end) {
+                       if (started)
+                           element_empty(&ho, "br");
                         html_raw(&ho, conf.addr_end);
+                   }
                     element_close(&ho, "address");
+                   if (conf.htmlver == ISO_HTML)
+                       element_close(&ho, "div");
                 }
  
                 if (!done_version_ids) {
@@ -1290,9 +1404,90 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
      }
  
      /*
-     * FIXME: Figure out a way to free the htmlindex and
-     * htmlindexref structures.
+     * Go through and check that no index fragments were referenced
+     * without being generated, or indeed vice versa.
+     * 
+     * (When I actually get round to freeing everything, this can
+     * probably be the freeing loop as well.)
       */
+    for (p = sourceform; p; p = p->next) {
+       word *w;
+       for (w = p->words; w; w = w->next)
+           if (w->type == word_IndexRef) {
+               htmlindexref *hr = (htmlindexref *)w->private_data;
+
+               assert(!hr->referenced == !hr->generated);
+           }
+    }
+
+    /*
+     * Free all the working data.
+     */
+    sfree(conf.asect);
+    sfree(conf.single_filename);
+    sfree(conf.contents_filename);
+    sfree(conf.index_filename);
+    sfree(conf.template_filename);
+    sfree(conf.template_fragment);
+    {
+       htmlfragment *frag;
+       while ( (frag = (htmlfragment *)delpos234(files.frags, 0)) != NULL ) {
+           /*
+            * frag->fragment is dynamically allocated, but will be
+            * freed when we process the htmlsect structure which
+            * it is attached to.
+            */
+           sfree(frag);
+       }
+       freetree234(files.frags);
+    }
+    {
+       htmlsect *sect, *tmp;
+       sect = sects.head;
+       while (sect) {
+           tmp = sect->next;
+           sfree(sect->fragment);
+           sfree(sect);
+           sect = tmp;
+       }
+       sect = nonsects.head;
+       while (sect) {
+           tmp = sect->next;
+           sfree(sect->fragment);
+           sfree(sect);
+           sect = tmp;
+       }
+    }
+    {
+       htmlfile *file, *tmp;
+       file = files.head;
+       while (file) {
+           tmp = file->next;
+           sfree(file->filename);
+           sfree(file);
+           file = tmp;
+       }
+    }
+    {
+       int i;
+       indexentry *entry;
+       for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
+           htmlindex *hi = (htmlindex *)entry->backend_data;
+           sfree(hi);
+       }
+    }
+    {
+       paragraph *p;
+       word *w;
+       for (p = sourceform; p; p = p->next)
+           for (w = p->words; w; w = w->next)
+               if (w->type == word_IndexRef) {
+                   htmlindexref *hr = (htmlindexref *)w->private_data;
+                   assert(hr != NULL);
+                   sfree(hr->fragment);
+                   sfree(hr);
+               }
+    }
  }
  
  static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
@@ -1373,7 +1568,7 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
  
  static htmlfile *html_new_file(htmlfilelist *list, char *filename)
  {
-    htmlfile *ret = mknew(htmlfile);
+    htmlfile *ret = snew(htmlfile);
  
      ret->next = NULL;
      if (list->tail)
@@ -1392,7 +1587,7 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename)
  
  static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title)
  {
-    htmlsect *ret = mknew(htmlsect);
+    htmlsect *ret = snew(htmlsect);
  
      ret->next = NULL;
      if (list->tail)
@@ -1445,9 +1640,8 @@ static void html_words(htmloutput *ho, word *words, int flags,
        case word_IndexRef:
         if (flags & INDEXENTS) {
             htmlindexref *hr = (htmlindexref *)w->private_data;
-           element_open(ho, "a");
-           element_attr(ho, "name", hr->fragment);
-           element_close(ho, "a");
+           html_fragment(ho, hr->fragment);
+           hr->generated = TRUE;
         }
         break;
        case word_Normal:
@@ -1483,8 +1677,8 @@ static void html_words(htmloutput *ho, word *words, int flags,
             else
                 html_text(ho, cfg->rquote);
         } else {
-           if (cvt_ok(ho->charset, w->text) || !w->alt)
-               html_text(ho, w->text);
+           if (!w->alt || cvt_ok(ho->restrict_charset, w->text))
+               html_text_nbsp(ho, w->text);
             else
                 html_words(ho, w->alt, flags, file, keywords, cfg);
         }
@@ -1559,11 +1753,9 @@ static void html_charset_cleanup(htmloutput *ho)
         fwrite(outbuf, 1, bytes, ho->fp);
  }
  
-static void return_to_neutral(htmloutput *ho)
+static void return_mostly_to_neutral(htmloutput *ho)
  {
-    if (ho->state == HO_IN_TEXT) {
-       html_charset_cleanup(ho);
-    } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
+    if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
         fprintf(ho->fp, " />");
      } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) {
         fprintf(ho->fp, ">");
@@ -1572,6 +1764,15 @@ static void return_to_neutral(htmloutput *ho)
      ho->state = HO_NEUTRAL;
  }
  
+static void return_to_neutral(htmloutput *ho)
+{
+    if (ho->state == HO_IN_TEXT) {
+       html_charset_cleanup(ho);
+    }
+
+    return_mostly_to_neutral(ho);
+}
+
  static void element_open(htmloutput *ho, char const *name)
  {
      return_to_neutral(ho);
@@ -1624,24 +1825,31 @@ static void element_attr_w(htmloutput *ho, char const *name,
  {
      html_charset_cleanup(ho);
      fprintf(ho->fp, " %s=\"", name);
-    html_text_limit_internal(ho, value, 0, TRUE);
+    html_text_limit_internal(ho, value, 0, TRUE, FALSE);
      html_charset_cleanup(ho);
      fputc('"', ho->fp);
  }
  
  static void html_text(htmloutput *ho, wchar_t const *text)
  {
-    html_text_limit(ho, text, 0);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, FALSE);
+}
+
+static void html_text_nbsp(htmloutput *ho, wchar_t const *text)
+{
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, TRUE);
  }
  
  static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen)
  {
-    return_to_neutral(ho);
-    html_text_limit_internal(ho, text, maxlen, FALSE);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, maxlen, FALSE, FALSE);
  }
  
  static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-                                    int maxlen, int quote_quotes)
+                                    int maxlen, int quote_quotes, int nbsp)
  {
      int textlen = ustrlen(text);
      char outbuf[256];
@@ -1657,7 +1865,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
             if (text[lenbefore] == L'<' ||
                 text[lenbefore] == L'>' ||
                 text[lenbefore] == L'&' ||
-               (text[lenbefore] == L'"' && quote_quotes))
+               (text[lenbefore] == L'"' && quote_quotes) ||
+               (text[lenbefore] == L' ' && nbsp))
                 break;
         lenafter = lenbefore;
         bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf),
@@ -1687,7 +1896,10 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
                 fprintf(ho->fp, "&amp;");
             else if (*text == L'"')
                 fprintf(ho->fp, "&quot;");
-           else
+           else if (*text == L' ') {
+               assert(nbsp);
+               fprintf(ho->fp, "&nbsp;");
+           } else
                 assert(!"Can't happen");
             text++, textlen--;
         }
@@ -1719,6 +1931,15 @@ static void html_href(htmloutput *ho, htmlfile *thisfile,
      sfree(url);
  }
  
+static void html_fragment(htmloutput *ho, char const *fragment)
+{
+    element_open(ho, "a");
+    element_attr(ho, "name", fragment);
+    if (is_xhtml(ho->ver))
+       element_attr(ho, "id", fragment);
+    element_close(ho, "a");
+}
+
  static char *html_format(paragraph *p, char *template_string)
  {
      char *c, *t;
@@ -1786,7 +2007,8 @@ static char *html_format(paragraph *p, char *template_string)
      return rdtrimc(&rs);
  }
  
-static void html_sanitise_fragment(char *text)
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+                                   char *text)
  {
      /*
       * The HTML 4 spec's strictest definition of fragment names (<a
@@ -1801,18 +2023,43 @@ static void html_sanitise_fragment(char *text)
  
      while (*p && !((*p>='A' && *p<='Z') || (*p>='a' && *p<='z')))
         p++;
-    if (!(*q++ = *p++))
-       return;
-    while (*p) {
-       if ((*p>='A' && *p<='Z') ||
-           (*p>='a' && *p<='z') ||
-           (*p>='0' && *p<='9') ||
-           *p=='-' || *p=='_' || *p==':' || *p=='.')
-           *q++ = *p;
-       p++;
+    if ((*q++ = *p++) != '\0') {
+       while (*p) {
+           if ((*p>='A' && *p<='Z') ||
+               (*p>='a' && *p<='z') ||
+               (*p>='0' && *p<='9') ||
+               *p=='-' || *p=='_' || *p==':' || *p=='.')
+               *q++ = *p;
+           p++;
+       }
+
+       *q = '\0';
      }
  
-    *q = '\0';
+    /*
+     * Now we check for clashes with other fragment names, and
+     * adjust this one if necessary by appending a hyphen followed
+     * by a number.
+     */
+    {
+       htmlfragment *frag = snew(htmlfragment);
+       int len = 0;                   /* >0 indicates we have resized */
+       int suffix = 1;
+
+       frag->file = file;
+       frag->fragment = text;
+
+       while (add234(files->frags, frag) != frag) {
+           if (!len) {
+               len = strlen(text);
+               frag->fragment = text = sresize(text, len+20, char);
+           }
+
+           sprintf(text + len, "-%d", ++suffix);
+       }
+    }
+
+    return text;
  }
  
  static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
@@ -1834,13 +2081,14 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
  
      element_open(ho, "li");
      html_href(ho, thisfile, s->file, s->fragment);
-    html_section_title(ho, s, thisfile, keywords, cfg);
+    html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
      element_close(ho, "a");
      element_close(ho, "li");
  }
  
  static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
-                              keywordlist *keywords, htmlconfig *cfg)
+                              keywordlist *keywords, htmlconfig *cfg,
+                              int real)
  {
      if (s->title) {
         sectlevel *sl;
@@ -1869,13 +2117,20 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
             html_text(ho, sl->number_suffix);
         }
  
-       html_words(ho, s->title->words, MARKUP,
+       html_words(ho, s->title->words, real ? ALL : MARKUP,
                    thisfile, keywords, cfg);
      } else {
         assert(s->type != NORMAL);
-       if (s->type == TOP)
-           html_text(ho, L"Preamble");/* FIXME: configure */
+       /*
+        * If we're printing the full document title for _real_ and
+        * there isn't one, we don't want to print `Preamble' at
+        * the top of what ought to just be some text. If we need
+        * it in any other context such as TOCs, we need to print
+        * `Preamble'.
+        */
+       if (s->type == TOP && !real)
+           html_text(ho, cfg->preamble_text);
         else if (s->type == INDEX)
-           html_text(ho, L"Index");/* FIXME: configure */
+           html_text(ho, cfg->index_text);
      }
  }