* - new configurability:
* * a few new things explicitly labelled as `FIXME:
* configurable' or similar.
- * * HTML flavour.
* * Some means of specifying the distinction between
* restrict-charset and output-charset. It seems to me that
* `html-charset' is output-charset, and that
* possible that some user may need to set restrict-charset
* to their charset of choice while leaving _output_-charset
* at UTF-8. Figure out some configuration, and apply it.
- *
- * - test all HTML flavours and ensure they validate sensibly. Fix
- * remaining confusion issues such as <?xml?> and obsoleteness
- * of <a name>.
*
- * - proper naming of all fragment IDs. The ones for sections are
- * fine; the ones for numbered list and bibliociteds are utter
- * crap; the ones for indexes _might_ do but it might be worth
- * giving some thought to how to do them better.
- * + also set up a mechanism for ensuring that fragment IDs
- * never clash.
+ * - nonbreaking spaces.
*
- * - nonbreaking spaces?
+ * - free up all the data we have allocated while running this
+ * backend.
*/
#include <stdio.h>
wchar_t *author, *description;
int restrict_charset, output_charset;
enum {
- HTML_3_2, HTML_4,
+ HTML_3_2, HTML_4, ISO_HTML,
XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT
} htmlver;
wchar_t *lquote, *rquote;
typedef struct {
htmlfile *head, *tail;
htmlfile *single, *index;
+ tree234 *frags;
} htmlfilelist;
typedef struct {
} htmlsectlist;
typedef struct {
+ htmlfile *file;
+ char *fragment;
+} htmlfragment;
+
+typedef struct {
int nrefs, refsize;
word **refs;
} htmlindex;
typedef struct {
htmlsect *section;
char *fragment;
+ int generated, referenced;
} htmlindexref;
typedef struct {
int contents_level;
} htmloutput;
+static int html_fragment_compare(void *av, void *bv)
+{
+ htmlfragment *a = (htmlfragment *)av;
+ htmlfragment *b = (htmlfragment *)bv;
+ int cmp;
+
+ if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)
+ return cmp;
+ else
+ return strcmp(a->fragment, b->fragment);
+}
+
static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
htmlsect *sect, int depth);
static void html_href(htmloutput *ho, htmlfile *thisfile,
htmlfile *targetfile, char *targetfrag);
+static void html_fragment(htmloutput *ho, char const *fragment);
static char *html_format(paragraph *p, char *template_string);
-static void html_sanitise_fragment(char *text);
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+ char *text);
static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
htmlfile *thisfile, keywordlist *keywords,
htmlconfig *cfg);
static void html_section_title(htmloutput *ho, htmlsect *s,
htmlfile *thisfile, keywordlist *keywords,
- htmlconfig *cfg);
+ htmlconfig *cfg, int real);
static htmlconfig html_configure(paragraph *source) {
htmlconfig ret;
ret.achapter.just_numbers = FALSE;
ret.achapter.number_suffix = L": ";
ret.nasect = 1;
- ret.asect = mknewa(sectlevel, ret.nasect);
+ ret.asect = snewn(ret.nasect, sectlevel);
ret.asect[0].just_numbers = TRUE;
ret.asect[0].number_suffix = L" ";
ret.ncdepths = 0;
ret.restrict_charset = ret.output_charset =
charset_from_localenc(csname);
sfree(csname);
+ } else if (!ustricmp(k, L"html-version")) {
+ wchar_t *vername = uadv(k);
+ static const struct {
+ const wchar_t *name;
+ int ver;
+ } versions[] = {
+ {L"html3.2", HTML_3_2},
+ {L"html4", HTML_4},
+ {L"iso-html", ISO_HTML},
+ {L"xhtml1.0transitional", XHTML_1_0_TRANSITIONAL},
+ {L"xhtml1.0strict", XHTML_1_0_STRICT}
+ };
+ int i;
+
+ for (i = 0; i < (int)lenof(versions); i++)
+ if (!ustricmp(versions[i].name, vername))
+ break;
+
+ if (i == lenof(versions))
+ error(err_htmlver, &p->fpos, vername);
+ else
+ ret.htmlver = versions[i].ver;
} else if (!ustricmp(k, L"html-single-filename")) {
sfree(ret.single_filename);
ret.single_filename = dupstr(adv(p->origkeyword));
}
if (n >= ret.nasect) {
int i;
- ret.asect = resize(ret.asect, n+1);
+ ret.asect = sresize(ret.asect, n+1, sectlevel);
for (i = ret.nasect; i <= n; i++)
ret.asect[i] = ret.asect[ret.nasect-1];
ret.nasect = n+1;
}
if (n >= ret.nasect) {
int i;
- ret.asect = resize(ret.asect, n+1);
+ ret.asect = sresize(ret.asect, n+1, sectlevel);
for (i = ret.nasect; i <= n; i++) {
ret.asect[i] = ret.asect[ret.nasect-1];
}
}
if (n >= ret.ncdepths) {
int i;
- ret.contents_depths = resize(ret.contents_depths, n+1);
+ ret.contents_depths =
+ sresize(ret.contents_depths, n+1, int);
for (i = ret.ncdepths; i <= n; i++) {
ret.contents_depths[i] = i+2;
}
indexdata *idx, void *unused) {
paragraph *p;
htmlconfig conf;
- htmlfilelist files = { NULL, NULL, NULL, NULL };
+ htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
IGNORE(unused);
for (p = sourceform; p; p = p->next)
p->private_data = NULL;
+ files.frags = newtree234(html_fragment_compare);
+
/*
* Start by figuring out into which file each piece of the
* document should be put. We'll do this by inventing an
html_file_section(&conf, &files, sect, d);
sect->fragment = html_format(p, conf.template_fragment);
- html_sanitise_fragment(sect->fragment);
- /* FIXME: clash checking? add to a tree of (file,frag)? */
+ sect->fragment = html_sanitise_fragment(&files, sect->file,
+ sect->fragment);
}
/* And the index. */
sect = html_new_sect(§s, NULL);
- sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */
sect->text = NULL;
sect->type = INDEX;
sect->parent = topsect;
html_file_section(&conf, &files, sect, 0); /* peer of chapters */
+ sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */
+ sect->fragment = html_sanitise_fragment(&files, sect->file,
+ sect->fragment);
files.index = sect->file;
}
p->private_data = sect;
/*
- * FIXME: We need a much better means of naming
- * these, possibly involving an additional
- * configuration template. For the moment I'll just
- * invent something completely stupid.
+ * Fragment IDs for these paragraphs will simply be
+ * `p' followed by an integer.
*/
- sect->fragment = mknewa(char, 40);
- sprintf(sect->fragment, "frag%p", sect);
+ sect->fragment = snewn(40, char);
+ sprintf(sect->fragment, "p%d",
+ sect->file->last_fragment_number++);
+ sect->fragment = html_sanitise_fragment(&files, sect->file,
+ sect->fragment);
}
}
}
/*
+ * Reset the fragment numbers in each file. I've just used them
+ * to generate `p' fragment IDs for non-section paragraphs
+ * (numbered list elements, bibliocited), and now I want to use
+ * them for `i' fragment IDs for index entries.
+ */
+ {
+ htmlfile *file;
+ for (file = files.head; file; file = file->next)
+ file->last_fragment_number = 0;
+ }
+
+ /*
* Now sort out the index. This involves:
*
* - For each index term, we set up an htmlindex structure to
*/
for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
- htmlindex *hi = mknew(htmlindex);
+ htmlindex *hi = snew(htmlindex);
hi->nrefs = hi->refsize = 0;
hi->refs = NULL;
/*
* Run over the document inventing fragments. Each fragment
* is of the form `i' followed by an integer.
- *
- * FIXME: Probably in the file-organisation pass we should
- * work out the fragment names of every section, so that we
- * could load them all into a tree and hence ensure these
- * index fragments don't clash with them.
*/
lastsect = NULL;
for (p = sourceform; p; p = p->next) {
for (w = p->words; w; w = w->next)
if (w->type == word_IndexRef) {
- htmlindexref *hr = mknew(htmlindexref);
+ htmlindexref *hr = snew(htmlindexref);
indextag *tag;
int i;
+ hr->referenced = hr->generated = FALSE;
hr->section = lastsect;
- /* FIXME: clash checking */
{
char buf[40];
sprintf(buf, "i%d",
lastsect->file->last_fragment_number++);
hr->fragment = dupstr(buf);
+ hr->fragment =
+ html_sanitise_fragment(&files, hr->section->file,
+ hr->fragment);
}
w->private_data = hr;
if (hi->nrefs >= hi->refsize) {
hi->refsize += 32;
- hi->refs = resize(hi->refs, hi->refsize);
+ hi->refs = sresize(hi->refs, hi->refsize, word *);
}
hi->refs[hi->nrefs++] = w;
" 4.01//EN\"\n\"http://www.w3.org/TR/html4/"
"strict.dtd\">\n");
break;
+ case ISO_HTML:
+ fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"ISO/IEC "
+ "15445:2000//DTD HTML//EN\">\n");
+ break;
case XHTML_1_0_TRANSITIONAL:
- /* FIXME: <?xml?> to specify character encoding.
- * This breaks HTML backwards compat, so perhaps avoid, or
- * perhaps only emit when not using the default UTF-8? */
+ fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+ charset_to_mimeenc(conf.output_charset));
fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
" 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/"
"xhtml1/DTD/xhtml1-transitional.dtd\">\n");
break;
case XHTML_1_0_STRICT:
- /* FIXME: <?xml?> to specify character encoding. */
+ fprintf(ho.fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n",
+ charset_to_mimeenc(conf.output_charset));
fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
" 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
"DTD/xhtml1-strict.dtd\">\n");
if (adepth <= a->contents_depth) {
if (ntoc >= tocsize) {
tocsize += 64;
- toc = resize(toc, tocsize);
+ toc = sresize(toc, tocsize, htmlsect *);
}
toc[ntoc++] = s;
}
/*
* Provide anchor for cross-links to target.
*
- * FIXME: AIcurrentlyUI, this needs to be done
- * differently in XHTML because <a name> is
- * deprecated or obsolete.
- *
* (Also we'll have to do this separately in
* other paragraph types - NumberedList and
* BiblioCited.)
*/
- element_open(&ho, "a");
- element_attr(&ho, "name", s->fragment);
- element_close(&ho, "a");
+ if (s->fragment)
+ html_fragment(&ho, s->fragment);
- html_section_title(&ho, s, f, keywords, &conf);
+ html_section_title(&ho, s, f, keywords, &conf, TRUE);
element_close(&ho, htag);
* Now display the section text.
*/
if (s->text) {
- stackhead = mknew(struct stackelement);
+ stackhead = snew(struct stackelement);
stackhead->next = NULL;
stackhead->listtype = NOLIST;
stackhead->itemtype = NOITEM;
break;
case para_LcontPush:
- se = mknew(struct stackelement);
+ se = snew(struct stackelement);
se->next = stackhead;
se->listtype = NOLIST;
se->itemtype = NOITEM;
element_open(&ho, "p");
if (p->private_data) {
htmlsect *s = (htmlsect *)p->private_data;
- element_open(&ho, "a");
- element_attr(&ho, "name", s->fragment);
- element_close(&ho, "a");
+ html_fragment(&ho, s->fragment);
}
html_nl(&ho);
html_words(&ho, p->kwtext, ALL,
element_open(&ho, "li");
if (p->private_data) {
htmlsect *s = (htmlsect *)p->private_data;
- element_open(&ho, "a");
- element_attr(&ho, "name", s->fragment);
- element_close(&ho, "a");
+ html_fragment(&ho, s->fragment);
}
html_nl(&ho);
stackhead->itemtype = LI;
html_href(&ho, f, hr->section->file,
hr->fragment);
+ hr->referenced = TRUE;
if (p && p->kwtext)
html_words(&ho, p->kwtext, MARKUP|LINKS,
f, keywords, &conf);
html_raw(&ho, conf.body_end);
if (conf.address_section) {
+ int started = FALSE;
+ if (conf.htmlver == ISO_HTML) {
+ /*
+ * The ISO-HTML validator complains if
+ * there isn't a <div> tag surrounding the
+ * <address> tag. I'm uncertain of why this
+ * should be - there appears to be no
+ * mention of this in the ISO-HTML spec,
+ * suggesting that it doesn't represent a
+ * change from HTML 4, but nonetheless the
+ * HTML 4 validator doesn't seem to mind.
+ */
+ element_open(&ho, "div");
+ }
element_open(&ho, "address");
if (conf.addr_start) {
html_raw(&ho, conf.addr_start);
html_nl(&ho);
+ started = TRUE;
}
if (conf.visible_version_id) {
- int started = FALSE;
for (p = sourceform; p; p = p->next)
if (p->type == para_VersionID) {
- if (!started)
- element_open(&ho, "p");
- else
+ if (started)
element_empty(&ho, "br");
html_nl(&ho);
html_text(&ho, L"["); /* FIXME: conf? */
html_text(&ho, L"]"); /* FIXME: conf? */
started = TRUE;
}
- if (started)
- element_close(&ho, "p");
done_version_ids = TRUE;
}
- if (conf.addr_end)
+ if (conf.addr_end) {
+ if (started)
+ element_empty(&ho, "br");
html_raw(&ho, conf.addr_end);
+ }
element_close(&ho, "address");
+ if (conf.htmlver == ISO_HTML)
+ element_close(&ho, "div");
}
if (!done_version_ids) {
}
/*
- * FIXME: Figure out a way to free the htmlindex and
- * htmlindexref structures.
+ * Go through and check that no index fragments were referenced
+ * without being generated, or indeed vice versa.
+ *
+ * (When I actually get round to freeing everything, this can
+ * probably be the freeing loop as well.)
+ */
+ for (p = sourceform; p; p = p->next) {
+ word *w;
+ for (w = p->words; w; w = w->next)
+ if (w->type == word_IndexRef) {
+ htmlindexref *hr = (htmlindexref *)w->private_data;
+
+ assert(!hr->referenced == !hr->generated);
+ }
+ }
+
+ /*
+ * FIXME: Free all the working data.
*/
}
static htmlfile *html_new_file(htmlfilelist *list, char *filename)
{
- htmlfile *ret = mknew(htmlfile);
+ htmlfile *ret = snew(htmlfile);
ret->next = NULL;
if (list->tail)
static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title)
{
- htmlsect *ret = mknew(htmlsect);
+ htmlsect *ret = snew(htmlsect);
ret->next = NULL;
if (list->tail)
case word_IndexRef:
if (flags & INDEXENTS) {
htmlindexref *hr = (htmlindexref *)w->private_data;
- element_open(ho, "a");
- element_attr(ho, "name", hr->fragment);
- element_close(ho, "a");
+ html_fragment(ho, hr->fragment);
+ hr->generated = TRUE;
}
break;
case word_Normal:
sfree(url);
}
+static void html_fragment(htmloutput *ho, char const *fragment)
+{
+ element_open(ho, "a");
+ element_attr(ho, "name", fragment);
+ if (is_xhtml(ho->ver))
+ element_attr(ho, "id", fragment);
+ element_close(ho, "a");
+}
+
static char *html_format(paragraph *p, char *template_string)
{
char *c, *t;
return rdtrimc(&rs);
}
-static void html_sanitise_fragment(char *text)
+static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
+ char *text)
{
/*
* The HTML 4 spec's strictest definition of fragment names (<a
while (*p && !((*p>='A' && *p<='Z') || (*p>='a' && *p<='z')))
p++;
- if (!(*q++ = *p++))
- return;
- while (*p) {
- if ((*p>='A' && *p<='Z') ||
- (*p>='a' && *p<='z') ||
- (*p>='0' && *p<='9') ||
- *p=='-' || *p=='_' || *p==':' || *p=='.')
- *q++ = *p;
- p++;
+ if ((*q++ = *p++) != '\0') {
+ while (*p) {
+ if ((*p>='A' && *p<='Z') ||
+ (*p>='a' && *p<='z') ||
+ (*p>='0' && *p<='9') ||
+ *p=='-' || *p=='_' || *p==':' || *p=='.')
+ *q++ = *p;
+ p++;
+ }
+
+ *q = '\0';
+ }
+
+ /*
+ * Now we check for clashes with other fragment names, and
+ * adjust this one if necessary by appending a hyphen followed
+ * by a number.
+ */
+ {
+ htmlfragment *frag = snew(htmlfragment);
+ int len = 0; /* >0 indicates we have resized */
+ int suffix = 1;
+
+ frag->file = file;
+ frag->fragment = text;
+
+ while (add234(files->frags, frag) != frag) {
+ if (!len) {
+ len = strlen(text);
+ frag->fragment = text = sresize(text, len+20, char);
+ }
+
+ sprintf(text + len, "-%d", ++suffix);
+ }
}
- *q = '\0';
+ return text;
}
static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
element_open(ho, "li");
html_href(ho, thisfile, s->file, s->fragment);
- html_section_title(ho, s, thisfile, keywords, cfg);
+ html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
element_close(ho, "a");
element_close(ho, "li");
}
static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile,
- keywordlist *keywords, htmlconfig *cfg)
+ keywordlist *keywords, htmlconfig *cfg,
+ int real)
{
if (s->title) {
sectlevel *sl;
html_text(ho, sl->number_suffix);
}
- html_words(ho, s->title->words, MARKUP,
+ html_words(ho, s->title->words, real ? ALL : MARKUP,
thisfile, keywords, cfg);
} else {
assert(s->type != NORMAL);