X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/04781c84bdab91b4a1a05115be75830841359bc9..a10f193fa848901b2f1eb0ea9553fcca968c62f5:/bk_html.c diff --git a/bk_html.c b/bk_html.c index 5cd145e..41a9f76 100644 --- a/bk_html.c +++ b/bk_html.c @@ -10,31 +10,6 @@ * sensible. Perhaps for the topmost section in the file, no * fragment should be used? (Though it should probably still be * _there_ even if unused.) - * - * - new configurability: - * * a few new things explicitly labelled as `FIXME: - * configurable' or similar. - * * HTML flavour. - * * Some means of specifying the distinction between - * restrict-charset and output-charset. It seems to me that - * `html-charset' is output-charset, and that - * restrict-charset usually wants to be either output-charset - * or UTF-8 (the latter indicating that any Unicode character - * is fair game and it will be specified using &#foo; if it - * isn't in output-charset). However, since XHTML defaults to - * UTF-8 and it's fiddly to tell it otherwise, it's just - * possible that some user may need to set restrict-charset - * to their charset of choice while leaving _output_-charset - * at UTF-8. Figure out some configuration, and apply it. - * - * - test all HTML flavours and ensure they validate sensibly. Fix - * remaining confusion issues such as and obsoleteness - * of . - * - * - nonbreaking spaces. - * - * - free up all the data we have allocated while running this - * backend. */ #include @@ -70,13 +45,18 @@ typedef struct { char *index_filename; char *template_filename; char *single_filename; - char *template_fragment; + char **template_fragments; + int ntfragments; char *head_end, *body_start, *body_end, *addr_start, *addr_end; char *body_tag, *nav_attr; wchar_t *author, *description; + wchar_t *index_text, *contents_text, *preamble_text, *title_separator; + wchar_t *nav_prev_text, *nav_next_text, *nav_separator; + wchar_t *index_main_sep, *index_multi_sep; + wchar_t *pre_versionid, *post_versionid; int restrict_charset, output_charset; enum { - HTML_3_2, HTML_4, + HTML_3_2, HTML_4, ISO_HTML, XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT } htmlver; wchar_t *lquote, *rquote; @@ -105,7 +85,7 @@ struct htmlsect { paragraph *title, *text; enum { NORMAL, TOP, INDEX } type; int contents_depth; - char *fragment; + char **fragments; }; typedef struct { @@ -141,7 +121,7 @@ typedef struct { * level. */ FILE *fp; - int charset; + int charset, restrict_charset; charset_state cstate; int ver; enum { @@ -171,7 +151,8 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files, htmlsect *sect, int depth); static htmlfile *html_new_file(htmlfilelist *list, char *filename); -static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title); +static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title, + htmlconfig *cfg); /* Flags for html_words() flags parameter */ #define NOTHING 0x00 @@ -190,9 +171,10 @@ static void element_attr(htmloutput *ho, char const *name, char const *value); static void element_attr_w(htmloutput *ho, char const *name, wchar_t const *value); static void html_text(htmloutput *ho, wchar_t const *str); +static void html_text_nbsp(htmloutput *ho, wchar_t const *str); static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen); static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, - int maxlen, int quote_quotes); + int maxlen, int quote_quotes, int nbsp); static void html_nl(htmloutput *ho); static void html_raw(htmloutput *ho, char *text); static void html_raw_as_attr(htmloutput *ho, char *text); @@ -200,6 +182,7 @@ static void cleanup(htmloutput *ho); static void html_href(htmloutput *ho, htmlfile *thisfile, htmlfile *targetfile, char *targetfrag); +static void html_fragment(htmloutput *ho, char const *fragment); static char *html_format(paragraph *p, char *template_string); static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file, @@ -236,13 +219,26 @@ static htmlconfig html_configure(paragraph *source) { ret.contents_filename = dupstr("Contents.html"); ret.index_filename = dupstr("IndexPage.html"); ret.template_filename = dupstr("%n.html"); - ret.template_fragment = dupstr("%b"); + ret.ntfragments = 1; + ret.template_fragments = snewn(ret.ntfragments, char *); + ret.template_fragments[0] = dupstr("%b"); ret.head_end = ret.body_tag = ret.body_start = ret.body_end = ret.addr_start = ret.addr_end = ret.nav_attr = NULL; ret.author = ret.description = NULL; - ret.restrict_charset = CS_ASCII; + ret.restrict_charset = CS_UTF8; ret.output_charset = CS_ASCII; ret.htmlver = HTML_4; + ret.index_text = L"Index"; + ret.contents_text = L"Contents"; + ret.preamble_text = L"Preamble"; + ret.title_separator = L" - "; + ret.nav_prev_text = L"Previous"; + ret.nav_next_text = L"Next"; + ret.nav_separator = L" | "; + ret.index_main_sep = L": "; + ret.index_multi_sep = L", "; + ret.pre_versionid = L"["; + ret.post_versionid = L"]"; /* * Default quote characters are Unicode matched single quotes, * falling back to ordinary ASCII ". @@ -274,11 +270,32 @@ static htmlconfig html_configure(paragraph *source) { if (!ustrnicmp(k, L"xhtml-", 6)) k++; /* treat `xhtml-' and `html-' the same */ - if (!ustricmp(k, L"html-charset")) { - char *csname = utoa_dup(uadv(k), CS_ASCII); - ret.restrict_charset = ret.output_charset = - charset_from_localenc(csname); - sfree(csname); + if (!ustricmp(k, L"html-restrict-charset")) { + ret.restrict_charset = charset_from_ustr(&p->fpos, uadv(k)); + } else if (!ustricmp(k, L"html-output-charset")) { + ret.output_charset = charset_from_ustr(&p->fpos, uadv(k)); + } else if (!ustricmp(k, L"html-version")) { + wchar_t *vername = uadv(k); + static const struct { + const wchar_t *name; + int ver; + } versions[] = { + {L"html3.2", HTML_3_2}, + {L"html4", HTML_4}, + {L"iso-html", ISO_HTML}, + {L"xhtml1.0transitional", XHTML_1_0_TRANSITIONAL}, + {L"xhtml1.0strict", XHTML_1_0_STRICT} + }; + int i; + + for (i = 0; i < (int)lenof(versions); i++) + if (!ustricmp(versions[i].name, vername)) + break; + + if (i == lenof(versions)) + error(err_htmlver, &p->fpos, vername); + else + ret.htmlver = versions[i].ver; } else if (!ustricmp(k, L"html-single-filename")) { sfree(ret.single_filename); ret.single_filename = dupstr(adv(p->origkeyword)); @@ -292,8 +309,24 @@ static htmlconfig html_configure(paragraph *source) { sfree(ret.template_filename); ret.template_filename = dupstr(adv(p->origkeyword)); } else if (!ustricmp(k, L"html-template-fragment")) { - sfree(ret.template_fragment); - ret.template_fragment = dupstr(adv(p->origkeyword)); + char *frag = adv(p->origkeyword); + if (*frag) { + while (ret.ntfragments--) + sfree(ret.template_fragments[ret.ntfragments]); + sfree(ret.template_fragments); + ret.template_fragments = NULL; + ret.ntfragments = 0; + while (*frag) { + ret.ntfragments++; + ret.template_fragments = + sresize(ret.template_fragments, + ret.ntfragments, char *); + ret.template_fragments[ret.ntfragments-1] = + dupstr(frag); + frag = adv(frag); + } + } else + error(err_cfginsufarg, &p->fpos, p->origkeyword, 1); } else if (!ustricmp(k, L"html-chapter-numeric")) { ret.achapter.just_numbers = utob(uadv(k)); } else if (!ustricmp(k, L"html-chapter-suffix")) { @@ -387,6 +420,28 @@ static htmlconfig html_configure(paragraph *source) { ret.leaf_contains_contents = utob(uadv(k)); } else if (!ustricmp(k, L"html-leaf-smallest-contents")) { ret.leaf_smallest_contents = utoi(uadv(k)); + } else if (!ustricmp(k, L"html-index-text")) { + ret.index_text = uadv(k); + } else if (!ustricmp(k, L"html-contents-text")) { + ret.contents_text = uadv(k); + } else if (!ustricmp(k, L"html-preamble-text")) { + ret.preamble_text = uadv(k); + } else if (!ustricmp(k, L"html-title-separator")) { + ret.title_separator = uadv(k); + } else if (!ustricmp(k, L"html-nav-prev-text")) { + ret.nav_prev_text = uadv(k); + } else if (!ustricmp(k, L"html-nav-next-text")) { + ret.nav_next_text = uadv(k); + } else if (!ustricmp(k, L"html-nav-separator")) { + ret.nav_separator = uadv(k); + } else if (!ustricmp(k, L"html-index-main-separator")) { + ret.index_main_sep = uadv(k); + } else if (!ustricmp(k, L"html-index-multiple-separator")) { + ret.index_multi_sep = uadv(k); + } else if (!ustricmp(k, L"html-pre-versionid")) { + ret.pre_versionid = uadv(k); + } else if (!ustricmp(k, L"html-post-versionid")) { + ret.post_versionid = uadv(k); } } } @@ -423,11 +478,13 @@ paragraph *html_config_filename(char *filename) } void html_backend(paragraph *sourceform, keywordlist *keywords, - indexdata *idx, void *unused) { + indexdata *idx, void *unused) +{ paragraph *p; htmlconfig conf; htmlfilelist files = { NULL, NULL, NULL, NULL, NULL }; htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; + int has_index; IGNORE(unused); @@ -453,20 +510,19 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * source form but needs to be consistently mentioned in * contents links. * - * While we're here, we'll also invent the HTML fragment name + * While we're here, we'll also invent the HTML fragment name(s) * for each section. */ { htmlsect *topsect, *sect; int d; - topsect = html_new_sect(§s, p); + topsect = html_new_sect(§s, NULL, &conf); topsect->type = TOP; topsect->title = NULL; topsect->text = sourceform; topsect->contents_depth = contents_depth(conf, 0); html_file_section(&conf, &files, topsect, -1); - topsect->fragment = NULL; for (p = sourceform; p; p = p->next) if (is_heading_type(p->type)) { @@ -477,7 +533,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, continue; } - sect = html_new_sect(§s, p); + sect = html_new_sect(§s, p, &conf); sect->text = p->next; sect->contents_depth = contents_depth(conf, d+1) - (d+1); @@ -491,21 +547,32 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_file_section(&conf, &files, sect, d); - sect->fragment = html_format(p, conf.template_fragment); - sect->fragment = html_sanitise_fragment(&files, sect->file, - sect->fragment); + { + int i; + for (i=0; i < conf.ntfragments; i++) { + sect->fragments[i] = + html_format(p, conf.template_fragments[i]); + sect->fragments[i] = + html_sanitise_fragment(&files, sect->file, + sect->fragments[i]); + } + } } - /* And the index. */ - sect = html_new_sect(§s, NULL); - sect->text = NULL; - sect->type = INDEX; - sect->parent = topsect; - html_file_section(&conf, &files, sect, 0); /* peer of chapters */ - sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */ - sect->fragment = html_sanitise_fragment(&files, sect->file, - sect->fragment); - files.index = sect->file; + /* And the index, if we have one. */ + has_index = (count234(idx->entries) > 0); + if (has_index) { + sect = html_new_sect(§s, NULL, &conf); + sect->text = NULL; + sect->type = INDEX; + sect->parent = topsect; + sect->contents_depth = 0; + html_file_section(&conf, &files, sect, 0); /* peer of chapters */ + sect->fragments[0] = utoa_dup(conf.index_text, CS_ASCII); + sect->fragments[0] = html_sanitise_fragment(&files, sect->file, + sect->fragments[0]); + files.index = sect->file; + } } /* @@ -550,7 +617,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * won't attempt to add it to the contents or * anything weird like that). */ - sect = html_new_sect(&nonsects, p); + sect = html_new_sect(&nonsects, p, &conf); sect->file = parent->file; sect->parent = parent; p->private_data = sect; @@ -559,11 +626,11 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * Fragment IDs for these paragraphs will simply be * `p' followed by an integer. */ - sect->fragment = snewn(40, char); - sprintf(sect->fragment, "p%d", + sect->fragments[0] = snewn(40, char); + sprintf(sect->fragments[0], "p%d", sect->file->last_fragment_number++); - sect->fragment = html_sanitise_fragment(&files, sect->file, - sect->fragment); + sect->fragments[0] = html_sanitise_fragment(&files, sect->file, + sect->fragments[0]); } } } @@ -617,9 +684,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * Run over the document inventing fragments. Each fragment * is of the form `i' followed by an integer. */ - lastsect = NULL; + lastsect = sects.head; /* this is always the top section */ for (p = sourceform; p; p = p->next) { - if (is_heading_type(p->type)) + if (is_heading_type(p->type) && p->type != para_Title) lastsect = (htmlsect *)p->private_data; for (w = p->words; w; w = w->next) @@ -697,6 +764,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.fp = fopen(f->filename, "w"); ho.charset = conf.output_charset; + ho.restrict_charset = conf.restrict_charset; ho.cstate = charset_init_state; ho.ver = conf.htmlver; ho.state = HO_NEUTRAL; @@ -713,16 +781,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, " 4.01//EN\"\n\"http://www.w3.org/TR/html4/" "strict.dtd\">\n"); break; + case ISO_HTML: + fprintf(ho.fp, "\n"); + break; case XHTML_1_0_TRANSITIONAL: - /* FIXME: to specify character encoding. - * This breaks HTML backwards compat, so perhaps avoid, or - * perhaps only emit when not using the default UTF-8? */ + fprintf(ho.fp, "\n", + charset_to_mimeenc(conf.output_charset)); fprintf(ho.fp, "\n"); break; case XHTML_1_0_STRICT: - /* FIXME: to specify character encoding. */ + fprintf(ho.fp, "\n", + charset_to_mimeenc(conf.output_charset)); fprintf(ho.fp, "\n"); @@ -769,7 +841,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, assert(f->last); if (f->last != f->first && f->last->title) { - html_text(&ho, L" - "); /* FIXME: configurable? */ + html_text(&ho, conf.title_separator); html_words(&ho, f->last->title->words, NOTHING, f, keywords, &conf); } @@ -780,10 +852,29 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, if (conf.head_end) html_raw(&ho, conf.head_end); + /* + * Add any data defined in specific sections + * that go in this file. (This is mostly to allow tags for Mac online help.) + */ + for (s = sects.head; s; s = s->next) { + if (s->file == f && s->text) { + for (p = s->text; + p && (p == s->text || p->type == para_Title || + !is_heading_type(p->type)); + p = p->next) { + if (p->type == para_Config) { + if (!ustricmp(p->keyword, L"html-local-head")) { + html_raw(&ho, adv(p->origkeyword)); + } + } + } + } + } + element_close(&ho, "head"); html_nl(&ho); - /* FIXME: need to be able to specify replacement for this */ if (conf.body_tag) html_raw(&ho, conf.body_tag); else @@ -806,37 +897,38 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, "a"); element_attr(&ho, "href", prevf->filename); } - html_text(&ho, L"Previous");/* FIXME: conf? */ + html_text(&ho, conf.nav_prev_text); if (prevf) element_close(&ho, "a"); - html_text(&ho, L" | "); /* FIXME: conf? */ + html_text(&ho, conf.nav_separator); if (f != files.head) { element_open(&ho, "a"); element_attr(&ho, "href", files.head->filename); } - html_text(&ho, L"Contents");/* FIXME: conf? */ + html_text(&ho, conf.contents_text); if (f != files.head) element_close(&ho, "a"); - html_text(&ho, L" | "); /* FIXME: conf? */ - - if (f != files.index) { - element_open(&ho, "a"); - element_attr(&ho, "href", files.index->filename); + if (has_index) { + html_text(&ho, conf.nav_separator); + if (f != files.index) { + element_open(&ho, "a"); + element_attr(&ho, "href", files.index->filename); + } + html_text(&ho, conf.index_text); + if (f != files.index) + element_close(&ho, "a"); } - html_text(&ho, L"Index");/* FIXME: conf? */ - if (f != files.index) - element_close(&ho, "a"); - html_text(&ho, L" | "); /* FIXME: conf? */ + html_text(&ho, conf.nav_separator); if (f->next) { element_open(&ho, "a"); element_attr(&ho, "href", f->next->filename); } - html_text(&ho, L"Next"); /* FIXME: conf? */ + html_text(&ho, conf.nav_next_text); if (f->next) element_close(&ho, "a"); @@ -846,7 +938,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, prevf = f; /* - * Write out a prefix TOC for the file. + * Write out a prefix TOC for the file (if a leaf file). * * We start by going through the section list and * collecting the sections which need to be added to @@ -929,6 +1021,12 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, */ displaying = TRUE; } else { + /* + * Doesn't belong in this file, but it may be + * a descendant of a section which does, in + * which case we should consider it for the + * main TOC of this file (for non-leaf files). + */ htmlsect *a, *ac; int depth, adepth; @@ -988,19 +1086,18 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, htag); /* - * Provide anchor for cross-links to target. - * - * FIXME: AIcurrentlyUI, this needs to be done - * differently in XHTML because is - * deprecated or obsolete. + * Provide anchor(s) for cross-links to target. * * (Also we'll have to do this separately in * other paragraph types - NumberedList and * BiblioCited.) */ - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + { + int i; + for (i=0; i < conf.ntfragments; i++) + if (s->fragments[i]) + html_fragment(&ho, s->fragments[i]); + } html_section_title(&ho, s, f, keywords, &conf, TRUE); @@ -1132,9 +1229,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, "p"); if (p->private_data) { htmlsect *s = (htmlsect *)p->private_data; - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + int i; + for (i=0; i < conf.ntfragments; i++) + if (s->fragments[i]) + html_fragment(&ho, s->fragments[i]); } html_nl(&ho); html_words(&ho, p->kwtext, ALL, @@ -1150,9 +1248,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, "li"); if (p->private_data) { htmlsect *s = (htmlsect *)p->private_data; - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + int i; + for (i=0; i < conf.ntfragments; i++) + if (s->fragments[i]) + html_fragment(&ho, s->fragments[i]); } html_nl(&ho); stackhead->itemtype = LI; @@ -1219,7 +1318,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_words(&ho, entry->text, MARKUP|LINKS, f, keywords, &conf); - html_text(&ho, L": ");/* FIXME: configurable */ + html_text(&ho, conf.index_main_sep); for (j = 0; j < hi->nrefs; j++) { htmlindexref *hr = @@ -1227,7 +1326,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, paragraph *p = hr->section->title; if (j > 0) - html_text(&ho, L", "); /* FIXME: conf */ + html_text(&ho, conf.index_multi_sep); html_href(&ho, f, hr->section->file, hr->fragment); @@ -1238,8 +1337,17 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, else if (p && p->words) html_words(&ho, p->words, MARKUP|LINKS, f, keywords, &conf); - else - html_text(&ho, L"FIXME"); + else { + /* + * If there is no title at all, + * this must be because our + * target section is the + * preamble section and there + * is no title. So we use the + * preamble_text. + */ + html_text(&ho, conf.preamble_text); + } element_close(&ho, "a"); } } @@ -1263,33 +1371,48 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_raw(&ho, conf.body_end); if (conf.address_section) { + int started = FALSE; + if (conf.htmlver == ISO_HTML) { + /* + * The ISO-HTML validator complains if + * there isn't a
tag surrounding the + *
tag. I'm uncertain of why this + * should be - there appears to be no + * mention of this in the ISO-HTML spec, + * suggesting that it doesn't represent a + * change from HTML 4, but nonetheless the + * HTML 4 validator doesn't seem to mind. + */ + element_open(&ho, "div"); + } element_open(&ho, "address"); if (conf.addr_start) { html_raw(&ho, conf.addr_start); html_nl(&ho); + started = TRUE; } if (conf.visible_version_id) { - int started = FALSE; for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) { - if (!started) - element_open(&ho, "p"); - else + if (started) element_empty(&ho, "br"); html_nl(&ho); - html_text(&ho, L"["); /* FIXME: conf? */ + html_text(&ho, conf.pre_versionid); html_words(&ho, p->words, NOTHING, f, keywords, &conf); - html_text(&ho, L"]"); /* FIXME: conf? */ + html_text(&ho, conf.post_versionid); started = TRUE; } - if (started) - element_close(&ho, "p"); done_version_ids = TRUE; } - if (conf.addr_end) + if (conf.addr_end) { + if (started) + element_empty(&ho, "br"); html_raw(&ho, conf.addr_end); + } element_close(&ho, "address"); + if (conf.htmlver == ISO_HTML) + element_close(&ho, "div"); } if (!done_version_ids) { @@ -1340,8 +1463,81 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } /* - * FIXME: Free all the working data. + * Free all the working data. */ + { + htmlfragment *frag; + while ( (frag = (htmlfragment *)delpos234(files.frags, 0)) != NULL ) { + /* + * frag->fragment is dynamically allocated, but will be + * freed when we process the htmlsect structure which + * it is attached to. + */ + sfree(frag); + } + freetree234(files.frags); + } + { + htmlsect *sect, *tmp; + sect = sects.head; + while (sect) { + int i; + tmp = sect->next; + for (i=0; i < conf.ntfragments; i++) + sfree(sect->fragments[i]); + sfree(sect->fragments); + sfree(sect); + sect = tmp; + } + sect = nonsects.head; + while (sect) { + int i; + tmp = sect->next; + for (i=0; i < conf.ntfragments; i++) + sfree(sect->fragments[i]); + sfree(sect->fragments); + sfree(sect); + sect = tmp; + } + } + { + htmlfile *file, *tmp; + file = files.head; + while (file) { + tmp = file->next; + sfree(file->filename); + sfree(file); + file = tmp; + } + } + { + int i; + indexentry *entry; + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { + htmlindex *hi = (htmlindex *)entry->backend_data; + sfree(hi); + } + } + { + paragraph *p; + word *w; + for (p = sourceform; p; p = p->next) + for (w = p->words; w; w = w->next) + if (w->type == word_IndexRef) { + htmlindexref *hr = (htmlindexref *)w->private_data; + assert(hr != NULL); + sfree(hr->fragment); + sfree(hr); + } + } + sfree(conf.asect); + sfree(conf.single_filename); + sfree(conf.contents_filename); + sfree(conf.index_filename); + sfree(conf.template_filename); + while (conf.ntfragments--) + sfree(conf.template_fragments[conf.ntfragments]); + sfree(conf.template_fragments); } static void html_file_section(htmlconfig *cfg, htmlfilelist *files, @@ -1439,7 +1635,8 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename) return ret; } -static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title) +static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title, + htmlconfig *cfg) { htmlsect *ret = snew(htmlsect); @@ -1455,6 +1652,13 @@ static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title) ret->parent = NULL; ret->type = NORMAL; + ret->fragments = snewn(cfg->ntfragments, char *); + { + int i; + for (i=0; i < cfg->ntfragments; i++) + ret->fragments[i] = NULL; + } + return ret; } @@ -1478,12 +1682,16 @@ static void html_words(htmloutput *ho, word *words, int flags, case word_LowerXref: if (flags & LINKS) { keyword *kwl = kw_lookup(keywords, w->text); - paragraph *p = kwl->para; - htmlsect *s = (htmlsect *)p->private_data; + paragraph *p; + htmlsect *s; + + assert(kwl); + p = kwl->para; + s = (htmlsect *)p->private_data; assert(s); - html_href(ho, file, s->file, s->fragment); + html_href(ho, file, s->file, s->fragments[0]); } break; case word_HyperEnd: @@ -1494,9 +1702,7 @@ static void html_words(htmloutput *ho, word *words, int flags, case word_IndexRef: if (flags & INDEXENTS) { htmlindexref *hr = (htmlindexref *)w->private_data; - element_open(ho, "a"); - element_attr(ho, "name", hr->fragment); - element_close(ho, "a"); + html_fragment(ho, hr->fragment); hr->generated = TRUE; } break; @@ -1533,8 +1739,8 @@ static void html_words(htmloutput *ho, word *words, int flags, else html_text(ho, cfg->rquote); } else { - if (cvt_ok(ho->charset, w->text) || !w->alt) - html_text(ho, w->text); + if (!w->alt || cvt_ok(ho->restrict_charset, w->text)) + html_text_nbsp(ho, w->text); else html_words(ho, w->alt, flags, file, keywords, cfg); } @@ -1609,11 +1815,9 @@ static void html_charset_cleanup(htmloutput *ho) fwrite(outbuf, 1, bytes, ho->fp); } -static void return_to_neutral(htmloutput *ho) +static void return_mostly_to_neutral(htmloutput *ho) { - if (ho->state == HO_IN_TEXT) { - html_charset_cleanup(ho); - } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { + if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { fprintf(ho->fp, " />"); } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) { fprintf(ho->fp, ">"); @@ -1622,6 +1826,15 @@ static void return_to_neutral(htmloutput *ho) ho->state = HO_NEUTRAL; } +static void return_to_neutral(htmloutput *ho) +{ + if (ho->state == HO_IN_TEXT) { + html_charset_cleanup(ho); + } + + return_mostly_to_neutral(ho); +} + static void element_open(htmloutput *ho, char const *name) { return_to_neutral(ho); @@ -1674,24 +1887,31 @@ static void element_attr_w(htmloutput *ho, char const *name, { html_charset_cleanup(ho); fprintf(ho->fp, " %s=\"", name); - html_text_limit_internal(ho, value, 0, TRUE); + html_text_limit_internal(ho, value, 0, TRUE, FALSE); html_charset_cleanup(ho); fputc('"', ho->fp); } static void html_text(htmloutput *ho, wchar_t const *text) { - html_text_limit(ho, text, 0); + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, 0, FALSE, FALSE); +} + +static void html_text_nbsp(htmloutput *ho, wchar_t const *text) +{ + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, 0, FALSE, TRUE); } static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen) { - return_to_neutral(ho); - html_text_limit_internal(ho, text, maxlen, FALSE); + return_mostly_to_neutral(ho); + html_text_limit_internal(ho, text, maxlen, FALSE, FALSE); } static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, - int maxlen, int quote_quotes) + int maxlen, int quote_quotes, int nbsp) { int textlen = ustrlen(text); char outbuf[256]; @@ -1707,7 +1927,8 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, if (text[lenbefore] == L'<' || text[lenbefore] == L'>' || text[lenbefore] == L'&' || - (text[lenbefore] == L'"' && quote_quotes)) + (text[lenbefore] == L'"' && quote_quotes) || + (text[lenbefore] == L' ' && nbsp)) break; lenafter = lenbefore; bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf), @@ -1737,7 +1958,10 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, fprintf(ho->fp, "&"); else if (*text == L'"') fprintf(ho->fp, """); - else + else if (*text == L' ') { + assert(nbsp); + fprintf(ho->fp, " "); + } else assert(!"Can't happen"); text++, textlen--; } @@ -1769,6 +1993,15 @@ static void html_href(htmloutput *ho, htmlfile *thisfile, sfree(url); } +static void html_fragment(htmloutput *ho, char const *fragment) +{ + element_open(ho, "a"); + element_attr(ho, "name", fragment); + if (is_xhtml(ho->ver)) + element_attr(ho, "id", fragment); + element_close(ho, "a"); +} + static char *html_format(paragraph *p, char *template_string) { char *c, *t; @@ -1812,6 +2045,7 @@ static char *html_format(paragraph *p, char *template_string) } else if (p->keyword && *p->keyword && fmt == 'k') ws = p->keyword; else + /* %N comes here; also failure cases of other fmts */ w = p->words; if (ws) { @@ -1865,6 +2099,13 @@ static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file, *q = '\0'; } + /* If there's nothing left, make something valid up */ + if (!*text) { + static const char anonfrag[] = "anon"; + text = sresize(text, lenof(anonfrag), char); + strcpy(text, anonfrag); + } + /* * Now we check for clashes with other fragment names, and * adjust this one if necessary by appending a hyphen followed @@ -1895,13 +2136,24 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s, htmlfile *thisfile, keywordlist *keywords, htmlconfig *cfg) { + if (ho->contents_level >= depth && ho->contents_level > 0) { + element_close(ho, "li"); + html_nl(ho); + } + while (ho->contents_level > depth) { element_close(ho, "ul"); ho->contents_level--; + if (ho->contents_level > 0) { + element_close(ho, "li"); + } + html_nl(ho); } while (ho->contents_level < depth) { + html_nl(ho); element_open(ho, "ul"); + html_nl(ho); ho->contents_level++; } @@ -1909,10 +2161,10 @@ static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s, return; element_open(ho, "li"); - html_href(ho, thisfile, s->file, s->fragment); + html_href(ho, thisfile, s->file, s->fragments[0]); html_section_title(ho, s, thisfile, keywords, cfg, FALSE); element_close(ho, "a"); - element_close(ho, "li"); + /*
  • will be closed by a later invocation */ } static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, @@ -1950,9 +2202,16 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, thisfile, keywords, cfg); } else { assert(s->type != NORMAL); - if (s->type == TOP) - html_text(ho, L"Preamble");/* FIXME: configure */ + /* + * If we're printing the full document title for _real_ and + * there isn't one, we don't want to print `Preamble' at + * the top of what ought to just be some text. If we need + * it in any other context such as TOCs, we need to print + * `Preamble'. + */ + if (s->type == TOP && !real) + html_text(ho, cfg->preamble_text); else if (s->type == INDEX) - html_text(ho, L"Index");/* FIXME: configure */ + html_text(ho, cfg->index_text); } }