X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/56a99eb648cb255aed42d929823a07913e0d743a..24c1a072a6b3e9bf0f9fba6385ddf61e63b91154:/bk_html.c diff --git a/bk_html.c b/bk_html.c index f6091a3..1bd573a 100644 --- a/bk_html.c +++ b/bk_html.c @@ -10,28 +10,6 @@ * sensible. Perhaps for the topmost section in the file, no * fragment should be used? (Though it should probably still be * _there_ even if unused.) - * - * - new configurability: - * * index_text, contents_text, preamble_text, title_separator, - * nav_prev_text, nav_next_text, nav_separator, - * index_main_sep, index_multi_sep, pre_versionid, - * post_versionid - * * Some means of specifying the distinction between - * restrict-charset and output-charset. It seems to me that - * `html-charset' is output-charset, and that - * restrict-charset usually wants to be either output-charset - * or UTF-8 (the latter indicating that any Unicode character - * is fair game and it will be specified using &#foo; if it - * isn't in output-charset). However, since XHTML defaults to - * UTF-8 and it's fiddly to tell it otherwise, it's just - * possible that some user may need to set restrict-charset - * to their charset of choice while leaving _output_-charset - * at UTF-8. Figure out some configuration, and apply it. - * - * - nonbreaking spaces. - * - * - free up all the data we have allocated while running this - * backend. */ #include @@ -142,7 +120,7 @@ typedef struct { * level. */ FILE *fp; - int charset; + int charset, restrict_charset; charset_state cstate; int ver; enum { @@ -191,9 +169,10 @@ static void element_attr(htmloutput *ho, char const *name, char const *value); static void element_attr_w(htmloutput *ho, char const *name, wchar_t const *value); static void html_text(htmloutput *ho, wchar_t const *str); +static void html_text_nbsp(htmloutput *ho, wchar_t const *str); static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen); static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, - int maxlen, int quote_quotes); + int maxlen, int quote_quotes, int nbsp); static void html_nl(htmloutput *ho); static void html_raw(htmloutput *ho, char *text); static void html_raw_as_attr(htmloutput *ho, char *text); @@ -242,7 +221,7 @@ static htmlconfig html_configure(paragraph *source) { ret.head_end = ret.body_tag = ret.body_start = ret.body_end = ret.addr_start = ret.addr_end = ret.nav_attr = NULL; ret.author = ret.description = NULL; - ret.restrict_charset = CS_ASCII; + ret.restrict_charset = CS_UTF8; ret.output_charset = CS_ASCII; ret.htmlver = HTML_4; ret.index_text = L"Index"; @@ -287,11 +266,10 @@ static htmlconfig html_configure(paragraph *source) { if (!ustrnicmp(k, L"xhtml-", 6)) k++; /* treat `xhtml-' and `html-' the same */ - if (!ustricmp(k, L"html-charset")) { - char *csname = utoa_dup(uadv(k), CS_ASCII); - ret.restrict_charset = ret.output_charset = - charset_from_localenc(csname); - sfree(csname); + if (!ustricmp(k, L"html-restrict-charset")) { + ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k)); + } else if (!ustricmp(k, L"html-output-charset")) { + ret.output_charset = charset_from_ustr(&p->fpos, uadv(k)); } else if (!ustricmp(k, L"html-version")) { wchar_t *vername = uadv(k); static const struct { @@ -422,6 +400,28 @@ static htmlconfig html_configure(paragraph *source) { ret.leaf_contains_contents = utob(uadv(k)); } else if (!ustricmp(k, L"html-leaf-smallest-contents")) { ret.leaf_smallest_contents = utoi(uadv(k)); + } else if (!ustricmp(k, L"html-index-text")) { + ret.index_text = uadv(k); + } else if (!ustricmp(k, L"html-contents-text")) { + ret.contents_text = uadv(k); + } else if (!ustricmp(k, L"html-preamble-text")) { + ret.preamble_text = uadv(k); + } else if (!ustricmp(k, L"html-title-separator")) { + ret.title_separator = uadv(k); + } else if (!ustricmp(k, L"html-nav-prev-text")) { + ret.nav_prev_text = uadv(k); + } else if (!ustricmp(k, L"html-nav-next-text")) { + ret.nav_next_text = uadv(k); + } else if (!ustricmp(k, L"html-nav-separator")) { + ret.nav_separator = uadv(k); + } else if (!ustricmp(k, L"html-index-main-separator")) { + ret.index_main_sep = uadv(k); + } else if (!ustricmp(k, L"html-index-multiple-separator")) { + ret.index_multi_sep = uadv(k); + } else if (!ustricmp(k, L"html-pre-versionid")) { + ret.pre_versionid = uadv(k); + } else if (!ustricmp(k, L"html-post-versionid")) { + ret.post_versionid = uadv(k); } } } @@ -458,11 +458,13 @@ paragraph *html_config_filename(char *filename) } void html_backend(paragraph *sourceform, keywordlist *keywords, - indexdata *idx, void *unused) { + indexdata *idx, void *unused) +{ paragraph *p; htmlconfig conf; htmlfilelist files = { NULL, NULL, NULL, NULL, NULL }; htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; + int has_index; IGNORE(unused); @@ -531,16 +533,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, sect->fragment); } - /* And the index. */ - sect = html_new_sect(§s, NULL); - sect->text = NULL; - sect->type = INDEX; - sect->parent = topsect; - html_file_section(&conf, &files, sect, 0); /* peer of chapters */ - sect->fragment = utoa_dup(conf.index_text, CS_ASCII); - sect->fragment = html_sanitise_fragment(&files, sect->file, - sect->fragment); - files.index = sect->file; + /* And the index, if we have one. */ + has_index = (count234(idx->entries) > 0); + if (has_index) { + sect = html_new_sect(§s, NULL); + sect->text = NULL; + sect->type = INDEX; + sect->parent = topsect; + sect->contents_depth = 0; + html_file_section(&conf, &files, sect, 0); /* peer of chapters */ + sect->fragment = utoa_dup(conf.index_text, CS_ASCII); + sect->fragment = html_sanitise_fragment(&files, sect->file, + sect->fragment); + files.index = sect->file; + } } /* @@ -732,6 +738,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.fp = fopen(f->filename, "w"); ho.charset = conf.output_charset; + ho.restrict_charset = conf.restrict_charset; ho.cstate = charset_init_state; ho.ver = conf.htmlver; ho.state = HO_NEUTRAL; @@ -860,13 +867,15 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_text(&ho, conf.nav_separator); - if (f != files.index) { - element_open(&ho, "a"); - element_attr(&ho, "href", files.index->filename); + if (has_index) { + if (f != files.index) { + element_open(&ho, "a"); + element_attr(&ho, "href", files.index->filename); + } + html_text(&ho, conf.index_text); + if (f != files.index) + element_close(&ho, "a"); } - html_text(&ho, conf.index_text); - if (f != files.index) - element_close(&ho, "a"); html_text(&ho, conf.nav_separator); @@ -1393,8 +1402,73 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } /* - * FIXME: Free all the working data. + * Free all the working data. */ + sfree(conf.asect); + sfree(conf.single_filename); + sfree(conf.contents_filename); + sfree(conf.index_filename); + sfree(conf.template_filename); + sfree(conf.template_fragment); + { + htmlfragment *frag; + while ( (frag = (htmlfragment *)delpos234(files.frags, 0)) != NULL ) { + /* + * frag->fragment is dynamically allocated, but will be + * freed when we process the htmlsect structure which + * it is attached to. + */ + sfree(frag); + } + freetree234(files.frags); + } + { + htmlsect *sect, *tmp; + sect = sects.head; + while (sect) { + tmp = sect->next; + sfree(sect->fragment); + sfree(sect); + sect = tmp; + } + sect = nonsects.head; + while (sect) { + tmp = sect->next; + sfree(sect->fragment); + sfree(sect); + sect = tmp; + } + } + { + htmlfile *file, *tmp; + file = files.head; + while (file) { + tmp = file->next; + sfree(file->filename); + sfree(file); + file = tmp; + } + } + { + int i; + indexentry *entry; + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { + htmlindex *hi = (htmlindex *)entry->backend_data; + sfree(hi); + } + } + { + paragraph *p; + word *w; + for (p = sourceform; p; p = p->next) + for (w = p->words; w; w = w->next) + if (w->type == word_IndexRef) { + htmlindexref *hr = (htmlindexref *)w->private_data; + assert(hr != NULL); + sfree(hr->fragment); + sfree(hr); + } + } } static void html_file_section(htmlconfig *cfg, htmlfilelist *files, @@ -1584,8 +1658,8 @@ static void html_words(htmloutput *ho, word *words, int flags, else html_text(ho, cfg->rquote); } else { - if (cvt_ok(ho->charset, w->text) || !w->alt) - html_text(ho, w->text); + if (!w->alt || cvt_ok(ho->restrict_charset, w->text)) + html_text_nbsp(ho, w->text); else html_words(ho, w->alt, flags, file, keywords, cfg); } @@ -1660,11 +1734,9 @@ static void html_charset_cleanup(htmloutput *ho) fwrite(outbuf, 1, bytes, ho->fp); } -static void return_to_neutral(htmloutput *ho) +static void return_mostly_to_neutral(htmloutput *ho) { - if (ho->state == HO_IN_TEXT) { - html_charset_cleanup(ho); - } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { + if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { fprintf(ho->fp, " />"); } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) { fprintf(ho->fp, ">"); @@ -1673,6 +1745,15 @@ static void return_to_neutral(htmloutput *ho) ho->state = HO_NEUTRAL; } +static void return_to_neutral(htmloutput *ho) +{ + if (ho->state == HO_IN_TEXT) { + html_charset_cleanup(ho); + } + + return_mostly_to_neutral(ho); +} + static void element_open(htmloutput *ho, char const *name) { return_to_neutral(ho); @@ -1725,24 +1806,31 @@ static void element_attr_w(htmloutput *ho, char const *name, { html_charset_cleanup(ho); fprintf(ho->fp, " %s=\"", name); - html_text_limit_internal(ho, value, 0, TRUE); + html_text_limit_internal(ho, value, 0, TRUE, FALSE); html_charset_cleanup(ho); fputc('"', ho->fp); } static void html_text(htmloutput *ho, wchar_t const *text) { - html_text_limit(ho, text, 0); + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, 0, FALSE, FALSE); +} + +static void html_text_nbsp(htmloutput *ho, wchar_t const *text) +{ + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, 0, FALSE, TRUE); } static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen) { - return_to_neutral(ho); - html_text_limit_internal(ho, text, maxlen, FALSE); + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, maxlen, FALSE, FALSE); } static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, - int maxlen, int quote_quotes) + int maxlen, int quote_quotes, int nbsp) { int textlen = ustrlen(text); char outbuf[256]; @@ -1758,7 +1846,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, if (text[lenbefore] == L'<' || text[lenbefore] == L'>' || text[lenbefore] == L'&' || - (text[lenbefore] == L'"' && quote_quotes)) + (text[lenbefore] == L'"' && quote_quotes) || + (text[lenbefore] == L' ' && nbsp)) break; lenafter = lenbefore; bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf), @@ -1788,7 +1877,10 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, fprintf(ho->fp, "&"); else if (*text == L'"') fprintf(ho->fp, """); - else + else if (*text == L' ') { + assert(nbsp); + fprintf(ho->fp, " "); + } else assert(!"Can't happen"); text++, textlen--; }