X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/b73094945f8d726ff61b0fc12774ecc0f51804aa..3038ce7acdf6ea5f0af267138287b2ae829d1261:/bk_html.c

diff --git a/bk_html.c b/bk_html.c
index a185d70..1bd573a 100644
--- a/bk_html.c
+++ b/bk_html.c
@@ -10,17 +10,6 @@
  *    sensible. Perhaps for the topmost section in the file, no
  *    fragment should be used? (Though it should probably still be
  *    _there_ even if unused.)
- * 
- *  - new configurability:
- *     * index_text, contents_text, preamble_text, title_separator,
- * 	 nav_prev_text, nav_next_text, nav_separator,
- * 	 index_main_sep, index_multi_sep, pre_versionid,
- * 	 post_versionid
- * 
- *  - nonbreaking spaces.
- * 
- *  - free up all the data we have allocated while running this
- *    backend.
  */
 
 #include <stdio.h>
@@ -180,9 +169,10 @@ static void element_attr(htmloutput *ho, char const *name, char const *value);
 static void element_attr_w(htmloutput *ho, char const *name,
 			   wchar_t const *value);
 static void html_text(htmloutput *ho, wchar_t const *str);
+static void html_text_nbsp(htmloutput *ho, wchar_t const *str);
 static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen);
 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-				     int maxlen, int quote_quotes);
+				     int maxlen, int quote_quotes, int nbsp);
 static void html_nl(htmloutput *ho);
 static void html_raw(htmloutput *ho, char *text);
 static void html_raw_as_attr(htmloutput *ho, char *text);
@@ -277,13 +267,9 @@ static htmlconfig html_configure(paragraph *source) {
 		k++;		    /* treat `xhtml-' and `html-' the same */
 
 	    if (!ustricmp(k, L"html-restrict-charset")) {
-		char *csname = utoa_dup(uadv(k), CS_ASCII);
-		ret.restrict_charset = charset_from_localenc(csname);
-		sfree(csname);
+		ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k));
 	    } else if (!ustricmp(k, L"html-output-charset")) {
-		char *csname = utoa_dup(uadv(k), CS_ASCII);
-		ret.output_charset = charset_from_localenc(csname);
-		sfree(csname);
+		ret.output_charset = charset_from_ustr(&p->fpos, uadv(k));
 	    } else if (!ustricmp(k, L"html-version")) {
 		wchar_t *vername = uadv(k);
 		static const struct {
@@ -414,6 +400,28 @@ static htmlconfig html_configure(paragraph *source) {
 		ret.leaf_contains_contents = utob(uadv(k));
 	    } else if (!ustricmp(k, L"html-leaf-smallest-contents")) {
 		ret.leaf_smallest_contents = utoi(uadv(k));
+	    } else if (!ustricmp(k, L"html-index-text")) {
+		ret.index_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-contents-text")) {
+		ret.contents_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-preamble-text")) {
+		ret.preamble_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-title-separator")) {
+		ret.title_separator = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-prev-text")) {
+		ret.nav_prev_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-next-text")) {
+		ret.nav_next_text = uadv(k);
+	    } else if (!ustricmp(k, L"html-nav-separator")) {
+		ret.nav_separator = uadv(k);
+	    } else if (!ustricmp(k, L"html-index-main-separator")) {
+		ret.index_main_sep = uadv(k);
+	    } else if (!ustricmp(k, L"html-index-multiple-separator")) {
+		ret.index_multi_sep = uadv(k);
+	    } else if (!ustricmp(k, L"html-pre-versionid")) {
+		ret.pre_versionid = uadv(k);
+	    } else if (!ustricmp(k, L"html-post-versionid")) {
+		ret.post_versionid = uadv(k);
 	    }
 	}
     }
@@ -450,11 +458,13 @@ paragraph *html_config_filename(char *filename)
 }
 
 void html_backend(paragraph *sourceform, keywordlist *keywords,
-		  indexdata *idx, void *unused) {
+		  indexdata *idx, void *unused)
+{
     paragraph *p;
     htmlconfig conf;
     htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
     htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
+    int has_index;
 
     IGNORE(unused);
 
@@ -523,16 +533,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 							sect->fragment);
 	    }
 
-	/* And the index. */
-	sect = html_new_sect(&sects, NULL);
-	sect->text = NULL;
-	sect->type = INDEX;
-	sect->parent = topsect;
-	html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
-	sect->fragment = utoa_dup(conf.index_text, CS_ASCII);
-	sect->fragment = html_sanitise_fragment(&files, sect->file,
-						sect->fragment);
-	files.index = sect->file;
+	/* And the index, if we have one. */
+	has_index = (count234(idx->entries) > 0);
+	if (has_index) {
+	    sect = html_new_sect(&sects, NULL);
+	    sect->text = NULL;
+	    sect->type = INDEX;
+	    sect->parent = topsect;
+            sect->contents_depth = 0;
+	    html_file_section(&conf, &files, sect, 0);   /* peer of chapters */
+	    sect->fragment = utoa_dup(conf.index_text, CS_ASCII);
+	    sect->fragment = html_sanitise_fragment(&files, sect->file,
+						    sect->fragment);
+	    files.index = sect->file;
+	}
     }
 
     /*
@@ -853,13 +867,15 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
 		html_text(&ho, conf.nav_separator);
 
-		if (f != files.index) {
-		    element_open(&ho, "a");
-		    element_attr(&ho, "href", files.index->filename);
+		if (has_index) {
+		    if (f != files.index) {
+			element_open(&ho, "a");
+			element_attr(&ho, "href", files.index->filename);
+		    }
+		    html_text(&ho, conf.index_text);
+		    if (f != files.index)
+			element_close(&ho, "a");
 		}
-		html_text(&ho, conf.index_text);
-		if (f != files.index)
-		    element_close(&ho, "a");
 
 		html_text(&ho, conf.nav_separator);
 
@@ -1386,8 +1402,73 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     }
 
     /*
-     * FIXME: Free all the working data.
+     * Free all the working data.
      */
+    sfree(conf.asect);
+    sfree(conf.single_filename);
+    sfree(conf.contents_filename);
+    sfree(conf.index_filename);
+    sfree(conf.template_filename);
+    sfree(conf.template_fragment);
+    {
+	htmlfragment *frag;
+	while ( (frag = (htmlfragment *)delpos234(files.frags, 0)) != NULL ) {
+	    /*
+	     * frag->fragment is dynamically allocated, but will be
+	     * freed when we process the htmlsect structure which
+	     * it is attached to.
+	     */
+	    sfree(frag);
+	}
+	freetree234(files.frags);
+    }
+    {
+	htmlsect *sect, *tmp;
+	sect = sects.head;
+	while (sect) {
+	    tmp = sect->next;
+	    sfree(sect->fragment);
+	    sfree(sect);
+	    sect = tmp;
+	}
+	sect = nonsects.head;
+	while (sect) {
+	    tmp = sect->next;
+	    sfree(sect->fragment);
+	    sfree(sect);
+	    sect = tmp;
+	}
+    }
+    {
+	htmlfile *file, *tmp;
+	file = files.head;
+	while (file) {
+	    tmp = file->next;
+	    sfree(file->filename);
+	    sfree(file);
+	    file = tmp;
+	}
+    }
+    {
+	int i;
+	indexentry *entry;
+	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
+	    htmlindex *hi = (htmlindex *)entry->backend_data;
+	    sfree(hi);
+	}
+    }
+    {
+	paragraph *p;
+	word *w;
+	for (p = sourceform; p; p = p->next)
+	    for (w = p->words; w; w = w->next)
+		if (w->type == word_IndexRef) {
+		    htmlindexref *hr = (htmlindexref *)w->private_data;
+		    assert(hr != NULL);
+		    sfree(hr->fragment);
+		    sfree(hr);
+		}
+    }
 }
 
 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
@@ -1577,8 +1658,8 @@ static void html_words(htmloutput *ho, word *words, int flags,
 	    else
 		html_text(ho, cfg->rquote);
 	} else {
-	    if (cvt_ok(ho->restrict_charset, w->text) || !w->alt)
-		html_text(ho, w->text);
+	    if (!w->alt || cvt_ok(ho->restrict_charset, w->text))
+		html_text_nbsp(ho, w->text);
 	    else
 		html_words(ho, w->alt, flags, file, keywords, cfg);
 	}
@@ -1653,11 +1734,9 @@ static void html_charset_cleanup(htmloutput *ho)
 	fwrite(outbuf, 1, bytes, ho->fp);
 }
 
-static void return_to_neutral(htmloutput *ho)
+static void return_mostly_to_neutral(htmloutput *ho)
 {
-    if (ho->state == HO_IN_TEXT) {
-	html_charset_cleanup(ho);
-    } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
+    if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
 	fprintf(ho->fp, " />");
     } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) {
 	fprintf(ho->fp, ">");
@@ -1666,6 +1745,15 @@ static void return_to_neutral(htmloutput *ho)
     ho->state = HO_NEUTRAL;
 }
 
+static void return_to_neutral(htmloutput *ho)
+{
+    if (ho->state == HO_IN_TEXT) {
+	html_charset_cleanup(ho);
+    }
+
+    return_mostly_to_neutral(ho);
+}
+
 static void element_open(htmloutput *ho, char const *name)
 {
     return_to_neutral(ho);
@@ -1718,24 +1806,31 @@ static void element_attr_w(htmloutput *ho, char const *name,
 {
     html_charset_cleanup(ho);
     fprintf(ho->fp, " %s=\"", name);
-    html_text_limit_internal(ho, value, 0, TRUE);
+    html_text_limit_internal(ho, value, 0, TRUE, FALSE);
     html_charset_cleanup(ho);
     fputc('"', ho->fp);
 }
 
 static void html_text(htmloutput *ho, wchar_t const *text)
 {
-    html_text_limit(ho, text, 0);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, FALSE);
+}
+
+static void html_text_nbsp(htmloutput *ho, wchar_t const *text)
+{
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, 0, FALSE, TRUE);
 }
 
 static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen)
 {
-    return_to_neutral(ho);
-    html_text_limit_internal(ho, text, maxlen, FALSE);
+    return_mostly_to_neutral(ho);
+    html_text_limit_internal(ho, text, maxlen, FALSE, FALSE);
 }
 
 static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
-				     int maxlen, int quote_quotes)
+				     int maxlen, int quote_quotes, int nbsp)
 {
     int textlen = ustrlen(text);
     char outbuf[256];
@@ -1751,7 +1846,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
 	    if (text[lenbefore] == L'<' ||
 		text[lenbefore] == L'>' ||
 		text[lenbefore] == L'&' ||
-		(text[lenbefore] == L'"' && quote_quotes))
+		(text[lenbefore] == L'"' && quote_quotes) ||
+		(text[lenbefore] == L' ' && nbsp))
 		break;
 	lenafter = lenbefore;
 	bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf),
@@ -1781,7 +1877,10 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
 		fprintf(ho->fp, "&amp;");
 	    else if (*text == L'"')
 		fprintf(ho->fp, "&quot;");
-	    else
+	    else if (*text == L' ') {
+		assert(nbsp);
+		fprintf(ho->fp, "&nbsp;");
+	    } else
 		assert(!"Can't happen");
 	    text++, textlen--;
 	}