The Unicode OVERLINE in the text backend documentation was probably supposed
[sgt/halibut] / bk_html.c
index 936126d..c58ecf1 100644 (file)
--- a/bk_html.c
+++ b/bk_html.c
  *    fine; the ones for numbered list and bibliociteds are utter
  *    crap; the ones for indexes _might_ do but it might be worth
  *    giving some thought to how to do them better.
- *     + also set up a mechanism for ensuring that fragment IDs
- *      never clash.
  * 
- *  - nonbreaking spaces?
+ *  - nonbreaking spaces.
+ * 
+ *  - free up all the data we have allocated while running this
+ *    backend.
  */
 
 #include <stdio.h>
@@ -115,6 +116,7 @@ struct htmlsect {
 typedef struct {
     htmlfile *head, *tail;
     htmlfile *single, *index;
+    tree234 *frags;
 } htmlfilelist;
 
 typedef struct {
@@ -122,6 +124,11 @@ typedef struct {
 } htmlsectlist;
 
 typedef struct {
+    htmlfile *file;
+    char *fragment;
+} htmlfragment;
+
+typedef struct {
     int nrefs, refsize;
     word **refs;
 } htmlindex;
@@ -129,6 +136,7 @@ typedef struct {
 typedef struct {
     htmlsect *section;
     char *fragment;
+    int generated, referenced;
 } htmlindexref;
 
 typedef struct {
@@ -152,6 +160,18 @@ typedef struct {
     int contents_level;
 } htmloutput;
 
+static int html_fragment_compare(void *av, void *bv)
+{
+    htmlfragment *a = (htmlfragment *)av;
+    htmlfragment *b = (htmlfragment *)bv;
+    int cmp;
+
+    if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)
+       return cmp;
+    else
+       return strcmp(a->fragment, b->fragment);
+}
+
 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
                              htmlsect *sect, int depth);
 
@@ -187,14 +207,15 @@ static void html_href(htmloutput *ho, htmlfile *thisfile,
                      htmlfile *targetfile, char *targetfrag);
 
 static char *html_format(paragraph *p, char *template_string);
-static void html_sanitise_fragment(char *text);
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+                                   char *text);
 
 static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
                                htmlfile *thisfile, keywordlist *keywords,
                                htmlconfig *cfg);
 static void html_section_title(htmloutput *ho, htmlsect *s,
                               htmlfile *thisfile, keywordlist *keywords,
-                              htmlconfig *cfg);
+                              htmlconfig *cfg, int real);
 
 static htmlconfig html_configure(paragraph *source) {
     htmlconfig ret;
@@ -207,7 +228,7 @@ static htmlconfig html_configure(paragraph *source) {
     ret.achapter.just_numbers = FALSE;
     ret.achapter.number_suffix = L": ";
     ret.nasect = 1;
-    ret.asect = mknewa(sectlevel, ret.nasect);
+    ret.asect = snewn(ret.nasect, sectlevel);
     ret.asect[0].just_numbers = TRUE;
     ret.asect[0].number_suffix = L" ";
     ret.ncdepths = 0;
@@ -293,7 +314,7 @@ static htmlconfig html_configure(paragraph *source) {
                }
                if (n >= ret.nasect) {
                    int i;
-                   ret.asect = resize(ret.asect, n+1);
+                   ret.asect = sresize(ret.asect, n+1, sectlevel);
                    for (i = ret.nasect; i <= n; i++)
                        ret.asect[i] = ret.asect[ret.nasect-1];
                    ret.nasect = n+1;
@@ -308,7 +329,7 @@ static htmlconfig html_configure(paragraph *source) {
                }
                if (n >= ret.nasect) {
                    int i;
-                   ret.asect = resize(ret.asect, n+1);
+                   ret.asect = sresize(ret.asect, n+1, sectlevel);
                    for (i = ret.nasect; i <= n; i++) {
                        ret.asect[i] = ret.asect[ret.nasect-1];
                    }
@@ -332,7 +353,8 @@ static htmlconfig html_configure(paragraph *source) {
                }
                if (n >= ret.ncdepths) {
                    int i;
-                   ret.contents_depths = resize(ret.contents_depths, n+1);
+                   ret.contents_depths =
+                       sresize(ret.contents_depths, n+1, int);
                    for (i = ret.ncdepths; i <= n; i++) {
                        ret.contents_depths[i] = i+2;
                    }
@@ -409,7 +431,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                  indexdata *idx, void *unused) {
     paragraph *p;
     htmlconfig conf;
-    htmlfilelist files = { NULL, NULL, NULL, NULL };
+    htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
     htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
 
     IGNORE(unused);
@@ -425,6 +447,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     for (p = sourceform; p; p = p->next)
        p->private_data = NULL;
 
+    files.frags = newtree234(html_fragment_compare);
+
     /*
      * Start by figuring out into which file each piece of the
      * document should be put. We'll do this by inventing an
@@ -473,17 +497,19 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                html_file_section(&conf, &files, sect, d);
 
                sect->fragment = html_format(p, conf.template_fragment);
-               html_sanitise_fragment(sect->fragment);
-               /* FIXME: clash checking? add to a tree of (file,frag)? */
+               sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                                       sect->fragment);
            }
 
        /* And the index. */
        sect = html_new_sect(&sects, NULL);
-       sect->fragment = dupstr("Index");   /* FIXME: this _can't_ be right */
        sect->text = NULL;
        sect->type = INDEX;
        sect->parent = topsect;
        html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
+       sect->fragment = dupstr("Index");   /* FIXME: this _can't_ be right */
+       sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                               sect->fragment);
        files.index = sect->file;
     }
 
@@ -540,8 +566,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                 * configuration template. For the moment I'll just
                 * invent something completely stupid.
                 */
-               sect->fragment = mknewa(char, 40);
+               sect->fragment = snewn(40, char);
                sprintf(sect->fragment, "frag%p", sect);
+               sect->fragment = html_sanitise_fragment(&files, sect->file,
+                                                       sect->fragment);
            }
        }
     }
@@ -571,7 +599,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
         */
 
        for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
-           htmlindex *hi = mknew(htmlindex);
+           htmlindex *hi = snew(htmlindex);
 
            hi->nrefs = hi->refsize = 0;
            hi->refs = NULL;
@@ -582,11 +610,6 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
        /*
         * Run over the document inventing fragments. Each fragment
         * is of the form `i' followed by an integer.
-        * 
-        * FIXME: Probably in the file-organisation pass we should
-        * work out the fragment names of every section, so that we
-        * could load them all into a tree and hence ensure these
-        * index fragments don't clash with them.
         */
        lastsect = NULL;
        for (p = sourceform; p; p = p->next) {
@@ -595,17 +618,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
            for (w = p->words; w; w = w->next)
                if (w->type == word_IndexRef) {
-                   htmlindexref *hr = mknew(htmlindexref);
+                   htmlindexref *hr = snew(htmlindexref);
                    indextag *tag;
                    int i;
 
+                   hr->referenced = hr->generated = FALSE;
                    hr->section = lastsect;
-                   /* FIXME: clash checking */
                    {
                        char buf[40];
                        sprintf(buf, "i%d",
                                lastsect->file->last_fragment_number++);
                        hr->fragment = dupstr(buf);
+                       hr->fragment =
+                           html_sanitise_fragment(&files, hr->section->file,
+                                                  hr->fragment);
                    }
                    w->private_data = hr;
 
@@ -619,7 +645,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
                        if (hi->nrefs >= hi->refsize) {
                            hi->refsize += 32;
-                           hi->refs = resize(hi->refs, hi->refsize);
+                           hi->refs = sresize(hi->refs, hi->refsize, word *);
                        }
 
                        hi->refs[hi->nrefs++] = w;
@@ -858,7 +884,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                        if (adepth <= a->contents_depth) {
                            if (ntoc >= tocsize) {
                                tocsize += 64;
-                               toc = resize(toc, tocsize);
+                               toc = sresize(toc, tocsize, htmlsect *);
                            }
                            toc[ntoc++] = s;
                        }
@@ -970,7 +996,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                    element_attr(&ho, "name", s->fragment);
                    element_close(&ho, "a");
 
-                   html_section_title(&ho, s, f, keywords, &conf);
+                   html_section_title(&ho, s, f, keywords, &conf, TRUE);
 
                    element_close(&ho, htag);
 
@@ -978,7 +1004,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                     * Now display the section text.
                     */
                    if (s->text) {
-                       stackhead = mknew(struct stackelement);
+                       stackhead = snew(struct stackelement);
                        stackhead->next = NULL;
                        stackhead->listtype = NOLIST;
                        stackhead->itemtype = NOITEM;
@@ -1027,7 +1053,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
                                break;
 
                              case para_LcontPush:
-                               se = mknew(struct stackelement);
+                               se = snew(struct stackelement);
                                se->next = stackhead;
                                se->listtype = NOLIST;
                                se->itemtype = NOITEM;
@@ -1199,6 +1225,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
                                html_href(&ho, f, hr->section->file,
                                          hr->fragment);
+                               hr->referenced = TRUE;
                                if (p && p->kwtext)
                                    html_words(&ho, p->kwtext, MARKUP|LINKS,
                                               f, keywords, &conf);
@@ -1290,8 +1317,24 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     }
 
     /*
-     * FIXME: Figure out a way to free the htmlindex and
-     * htmlindexref structures.
+     * Go through and check that no index fragments were referenced
+     * without being generated, or indeed vice versa.
+     * 
+     * (When I actually get round to freeing everything, this can
+     * probably be the freeing loop as well.)
+     */
+    for (p = sourceform; p; p = p->next) {
+       word *w;
+       for (w = p->words; w; w = w->next)
+           if (w->type == word_IndexRef) {
+               htmlindexref *hr = (htmlindexref *)w->private_data;
+
+               assert(!hr->referenced == !hr->generated);
+           }
+    }
+
+    /*
+     * FIXME: Free all the working data.
      */
 }
 
@@ -1373,7 +1416,7 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
 
 static htmlfile *html_new_file(htmlfilelist *list, char *filename)
 {
-    htmlfile *ret = mknew(htmlfile);
+    htmlfile *ret = snew(htmlfile);
 
     ret->next = NULL;
     if (list->tail)
@@ -1392,7 +1435,7 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename)
 
 static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title)
 {
-    htmlsect *ret = mknew(htmlsect);
+    htmlsect *ret = snew(htmlsect);
 
     ret->next = NULL;
     if (list->tail)
@@ -1448,6 +1491,7 @@ static void html_words(htmloutput *ho, word *words, int flags,
            element_open(ho, "a");
            element_attr(ho, "name", hr->fragment);
            element_close(ho, "a");
+           hr->generated = TRUE;
        }
        break;
       case word_Normal:
@@ -1786,7 +1830,8 @@ static char *html_format(paragraph *p, char *template_string)
     return rdtrimc(&rs);
 }
 
-static void html_sanitise_fragment(char *text)
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+                                   char *text)
 {
     /*
      * The HTML 4 spec's strictest definition of fragment names (<a
@@ -1801,18 +1846,43 @@ static void html_sanitise_fragment(char *text)
 
     while (*p && !((*p>='A' && *p<='Z') || (*p>='a' && *p<='z')))
        p++;
-    if (!(*q++ = *p++))
-       return;
-    while (*p) {
-       if ((*p>='A' && *p<='Z') ||
-           (*p>='a' && *p<='z') ||
-           (*p>='0' && *p<='9') ||
-           *p=='-' || *p=='_' || *p==':' || *p=='.')
-           *q++ = *p;
-       p++;
+    if ((*q++ = *p++) != '\0') {
+       while (*p) {
+           if ((*p>='A' && *p<='Z') ||
+               (*p>='a' && *p<='z') ||
+               (*p>='0' && *p<='9') ||
+               *p=='-' || *p=='_' || *p==':' || *p=='.')
+               *q++ = *p;
+           p++;
+       }
+
+       *q = '\0';
+    }
+
+    /*
+     * Now we check for clashes with other fragment names, and
+     * adjust this one if necessary by appending a hyphen followed
+     * by a number.
+     */
+    {
+       htmlfragment *frag = snew(htmlfragment);
+       int len = 0;                   /* >0 indicates we have resized */
+       int suffix = 1;
+
+       frag->file = file;
+       frag->fragment = text;
+
+       while (add234(files->frags, frag) != frag) {
+           if (!len) {
+               len = strlen(text);
+               frag->fragment = text = sresize(text, len+20, char);
+           }
+
+           sprintf(text + len, "-%d", ++suffix);
+       }
     }
 
-    *q = '\0';
+    return text;
 }
 
 static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
@@ -1834,13 +1904,14 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
 
     element_open(ho, "li");
     html_href(ho, thisfile, s->file, s->fragment);
-    html_section_title(ho, s, thisfile, keywords, cfg);
+    html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
     element_close(ho, "a");
     element_close(ho, "li");
 }
 
 static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
-                              keywordlist *keywords, htmlconfig *cfg)
+                              keywordlist *keywords, htmlconfig *cfg,
+                              int real)
 {
     if (s->title) {
        sectlevel *sl;
@@ -1869,7 +1940,7 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
            html_text(ho, sl->number_suffix);
        }
 
-       html_words(ho, s->title->words, MARKUP,
+       html_words(ho, s->title->words, real ? ALL : MARKUP,
                   thisfile, keywords, cfg);
     } else {
        assert(s->type != NORMAL);