From 27bdc5ab5b1f2a90b06fc924346f014e1b272c65 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 20 Jun 2004 13:11:34 +0000 Subject: [PATCH] Introduce a configurable option to select the HTML flavour. Also fiddle with various small aspects of the output so that it actually validates in all supported flavours. git-svn-id: svn://svn.tartarus.org/sgt/halibut@4307 cda61777-01e9-0310-a592-d414129be87e --- bk_html.c | 101 ++++++++++++++++++++++++++++++++++++++++++-------------------- error.c | 8 +++++ halibut.h | 1 + 3 files changed, 77 insertions(+), 33 deletions(-) diff --git a/bk_html.c b/bk_html.c index 5cd145e..e67ca3b 100644 --- a/bk_html.c +++ b/bk_html.c @@ -14,7 +14,6 @@ * - new configurability: * * a few new things explicitly labelled as `FIXME: * configurable' or similar. - * * HTML flavour. * * Some means of specifying the distinction between * restrict-charset and output-charset. It seems to me that * `html-charset' is output-charset, and that @@ -26,10 +25,6 @@ * possible that some user may need to set restrict-charset * to their charset of choice while leaving _output_-charset * at UTF-8. Figure out some configuration, and apply it. - * - * - test all HTML flavours and ensure they validate sensibly. Fix - * remaining confusion issues such as and obsoleteness - * of . * * - nonbreaking spaces. * @@ -76,7 +71,7 @@ typedef struct { wchar_t *author, *description; int restrict_charset, output_charset; enum { - HTML_3_2, HTML_4, + HTML_3_2, HTML_4, ISO_HTML, XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT } htmlver; wchar_t *lquote, *rquote; @@ -200,6 +195,7 @@ static void cleanup(htmloutput *ho); static void html_href(htmloutput *ho, htmlfile *thisfile, htmlfile *targetfile, char *targetfrag); +static void html_fragment(htmloutput *ho, char const *fragment); static char *html_format(paragraph *p, char *template_string); static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file, @@ -279,6 +275,28 @@ static htmlconfig html_configure(paragraph *source) { ret.restrict_charset = ret.output_charset = charset_from_localenc(csname); sfree(csname); + } else if (!ustricmp(k, L"html-version")) { + wchar_t *vername = uadv(k); + static const struct { + const wchar_t *name; + int ver; + } versions[] = { + {L"html3.2", HTML_3_2}, + {L"html4", HTML_4}, + {L"iso-html", ISO_HTML}, + {L"xhtml1.0transitional", XHTML_1_0_TRANSITIONAL}, + {L"xhtml1.0strict", XHTML_1_0_STRICT} + }; + int i; + + for (i = 0; i < (int)lenof(versions); i++) + if (!ustricmp(versions[i].name, vername)) + break; + + if (i == lenof(versions)) + error(err_htmlver, &p->fpos, vername); + else + ret.htmlver = versions[i].ver; } else if (!ustricmp(k, L"html-single-filename")) { sfree(ret.single_filename); ret.single_filename = dupstr(adv(p->origkeyword)); @@ -713,16 +731,20 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, " 4.01//EN\"\n\"http://www.w3.org/TR/html4/" "strict.dtd\">\n"); break; + case ISO_HTML: + fprintf(ho.fp, "\n"); + break; case XHTML_1_0_TRANSITIONAL: - /* FIXME: to specify character encoding. - * This breaks HTML backwards compat, so perhaps avoid, or - * perhaps only emit when not using the default UTF-8? */ + fprintf(ho.fp, "\n", + charset_to_mimeenc(conf.output_charset)); fprintf(ho.fp, "\n"); break; case XHTML_1_0_STRICT: - /* FIXME: to specify character encoding. */ + fprintf(ho.fp, "\n", + charset_to_mimeenc(conf.output_charset)); fprintf(ho.fp, "\n"); @@ -990,17 +1012,12 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, /* * Provide anchor for cross-links to target. * - * FIXME: AIcurrentlyUI, this needs to be done - * differently in XHTML because is - * deprecated or obsolete. - * * (Also we'll have to do this separately in * other paragraph types - NumberedList and * BiblioCited.) */ - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + if (s->fragment) + html_fragment(&ho, s->fragment); html_section_title(&ho, s, f, keywords, &conf, TRUE); @@ -1132,9 +1149,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, "p"); if (p->private_data) { htmlsect *s = (htmlsect *)p->private_data; - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + html_fragment(&ho, s->fragment); } html_nl(&ho); html_words(&ho, p->kwtext, ALL, @@ -1150,9 +1165,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, element_open(&ho, "li"); if (p->private_data) { htmlsect *s = (htmlsect *)p->private_data; - element_open(&ho, "a"); - element_attr(&ho, "name", s->fragment); - element_close(&ho, "a"); + html_fragment(&ho, s->fragment); } html_nl(&ho); stackhead->itemtype = LI; @@ -1263,18 +1276,30 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_raw(&ho, conf.body_end); if (conf.address_section) { + int started = FALSE; + if (conf.htmlver == ISO_HTML) { + /* + * The ISO-HTML validator complains if + * there isn't a
tag surrounding the + *
tag. I'm uncertain of why this + * should be - there appears to be no + * mention of this in the ISO-HTML spec, + * suggesting that it doesn't represent a + * change from HTML 4, but nonetheless the + * HTML 4 validator doesn't seem to mind. + */ + element_open(&ho, "div"); + } element_open(&ho, "address"); if (conf.addr_start) { html_raw(&ho, conf.addr_start); html_nl(&ho); + started = TRUE; } if (conf.visible_version_id) { - int started = FALSE; for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) { - if (!started) - element_open(&ho, "p"); - else + if (started) element_empty(&ho, "br"); html_nl(&ho); html_text(&ho, L"["); /* FIXME: conf? */ @@ -1283,13 +1308,16 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, html_text(&ho, L"]"); /* FIXME: conf? */ started = TRUE; } - if (started) - element_close(&ho, "p"); done_version_ids = TRUE; } - if (conf.addr_end) + if (conf.addr_end) { + if (started) + element_empty(&ho, "br"); html_raw(&ho, conf.addr_end); + } element_close(&ho, "address"); + if (conf.htmlver == ISO_HTML) + element_close(&ho, "div"); } if (!done_version_ids) { @@ -1494,9 +1522,7 @@ static void html_words(htmloutput *ho, word *words, int flags, case word_IndexRef: if (flags & INDEXENTS) { htmlindexref *hr = (htmlindexref *)w->private_data; - element_open(ho, "a"); - element_attr(ho, "name", hr->fragment); - element_close(ho, "a"); + html_fragment(ho, hr->fragment); hr->generated = TRUE; } break; @@ -1769,6 +1795,15 @@ static void html_href(htmloutput *ho, htmlfile *thisfile, sfree(url); } +static void html_fragment(htmloutput *ho, char const *fragment) +{ + element_open(ho, "a"); + element_attr(ho, "name", fragment); + if (is_xhtml(ho->ver)) + element_attr(ho, "id", fragment); + element_close(ho, "a"); +} + static char *html_format(paragraph *p, char *template_string) { char *c, *t; diff --git a/error.c b/error.c index 879d081..0330e76 100644 --- a/error.c +++ b/error.c @@ -249,6 +249,14 @@ static void do_error(int code, va_list ap) { " than body width %d", i, j); flags = FILEPOS; break; + case err_htmlver: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = utoa_locale_dup(wsp); + sprintf(error, "unrecognised HTML version keyword `%.200s'", sp); + sfree(sp); + flags = FILEPOS; + break; case err_whatever: sp = va_arg(ap, char *); vsprintf(error, sp, ap); diff --git a/halibut.h b/halibut.h index 01cbf3e..88b39f2 100644 --- a/halibut.h +++ b/halibut.h @@ -240,6 +240,7 @@ enum { err_infodirentry, /* \cfg{info-dir-entry} missing param */ err_infonodechar, /* colon/comma in node name in info */ err_text_codeline, /* \c line too long in text backend */ + err_htmlver, /* unrecognised HTML version keyword */ err_whatever /* random error of another type */ }; -- 2.11.0