mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* HTML backend for Halibut
	3	*/
	4
	5	/*
	6	* TODO:
	7	*
	8	* - I'm never entirely convinced that having a fragment link to
	9	* come in at the start of the real text in the file is
	10	* sensible. Perhaps for the topmost section in the file, no
	11	* fragment should be used? (Though it should probably still be
	12	* _there_ even if unused.)
	13	*
	14	* - new configurability:
	15	* * a few new things explicitly labelled as `FIXME:
	16	* configurable' or similar.
	17	* * HTML flavour.
	18	* * Some means of specifying the distinction between
	19	* restrict-charset and output-charset. It seems to me that
	20	* `html-charset' is output-charset, and that
	21	* restrict-charset usually wants to be either output-charset
	22	* or UTF-8 (the latter indicating that any Unicode character
	23	* is fair game and it will be specified using &#foo; if it
	24	* isn't in output-charset). However, since XHTML defaults to
	25	* UTF-8 and it's fiddly to tell it otherwise, it's just
	26	* possible that some user may need to set restrict-charset
	27	* to their charset of choice while leaving _output_-charset
	28	* at UTF-8. Figure out some configuration, and apply it.
	29	*
	30	* - test all HTML flavours and ensure they validate sensibly. Fix
	31	* remaining confusion issues such as <?xml?> and obsoleteness
	32	* of <a name>.
	33	*
	34	* - proper naming of all fragment IDs. The ones for sections are
	35	* fine; the ones for numbered list and bibliociteds are utter
	36	* crap; the ones for indexes _might_ do but it might be worth
	37	* giving some thought to how to do them better.
	38	* + also set up a mechanism for ensuring that fragment IDs
	39	* never clash.
	40	*
	41	* - nonbreaking spaces?
	42	*/
	43
	44	#include <stdio.h>
	45	#include <stdlib.h>
	46	#include <assert.h>
	47	#include <limits.h>
	48	#include "halibut.h"
	49
	50	#define is_heading_type(type) ( (type) == para_Title \|\| \
	51	(type) == para_Chapter \|\| \
	52	(type) == para_Appendix \|\| \
	53	(type) == para_UnnumberedChapter \|\| \
	54	(type) == para_Heading \|\| \
	55	(type) == para_Subsect)
	56
	57	#define heading_depth(p) ( (p)->type == para_Subsect ? (p)->aux + 1 : \
	58	(p)->type == para_Heading ? 1 : \
	59	(p)->type == para_Title ? -1 : 0 )
	60
	61	typedef struct {
	62	int just_numbers;
	63	wchar_t *number_suffix;
	64	} sectlevel;
	65
	66	typedef struct {
	67	int nasect;
	68	sectlevel achapter, *asect;
	69	int contents_depths; / 0=main, 1=chapter, 2=sect etc */
	70	int ncdepths;
	71	int address_section, visible_version_id;
	72	int leaf_contains_contents, leaf_smallest_contents;
	73	char *contents_filename;
	74	char *index_filename;
	75	char *template_filename;
	76	char *single_filename;
	77	char *template_fragment;
	78	char head_end, body_start, body_end, addr_start, *addr_end;
	79	char body_tag, nav_attr;
	80	wchar_t author, description;
	81	int restrict_charset, output_charset;
	82	enum {
	83	HTML_3_2, HTML_4,
	84	XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT
	85	} htmlver;
	86	wchar_t lquote, rquote;
	87	int leaf_level;
	88	} htmlconfig;
	89
	90	#define contents_depth(conf, level) \
	91	( (conf).ncdepths > (level) ? (conf).contents_depths[level] : (level)+2 )
	92
	93	#define is_xhtml(ver) ((ver) >= XHTML_1_0_TRANSITIONAL)
	94
	95	typedef struct htmlfile htmlfile;
	96	typedef struct htmlsect htmlsect;
	97
	98	struct htmlfile {
	99	htmlfile *next;
	100	char *filename;
	101	int last_fragment_number;
	102	int min_heading_depth;
	103	htmlsect first, last; /* first/last highest-level sections */
	104	};
	105
	106	struct htmlsect {
	107	htmlsect next, parent;
	108	htmlfile *file;
	109	paragraph title, text;
	110	enum { NORMAL, TOP, INDEX } type;
	111	int contents_depth;
	112	char *fragment;
	113	};
	114
	115	typedef struct {
	116	htmlfile head, tail;
	117	htmlfile single, index;
	118	} htmlfilelist;
	119
	120	typedef struct {
	121	htmlsect head, tail;
	122	} htmlsectlist;
	123
	124	typedef struct {
	125	int nrefs, refsize;
	126	word **refs;
	127	} htmlindex;
	128
	129	typedef struct {
	130	htmlsect *section;
	131	char *fragment;
	132	} htmlindexref;
	133
	134	typedef struct {
	135	/*
	136	* This level deals with charset conversion, starting and
	137	* ending tags, and writing to the file. It's the lexical
	138	* level.
	139	*/
	140	FILE *fp;
	141	int charset;
	142	charset_state cstate;
	143	int ver;
	144	enum {
	145	HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT
	146	} state;
	147	/*
	148	* Stuff beyond here deals with the higher syntactic level: it
	149	* tracks how many levels of <ul> are currently open when
	150	* producing a contents list, for example.
	151	*/
	152	int contents_level;
	153	} htmloutput;
	154
	155	static void html_file_section(htmlconfig cfg, htmlfilelist files,
	156	htmlsect *sect, int depth);
	157
	158	static htmlfile html_new_file(htmlfilelist list, char *filename);
	159	static htmlsect html_new_sect(htmlsectlist list, paragraph *title);
	160
	161	/* Flags for html_words() flags parameter */
	162	#define NOTHING 0x00
	163	#define MARKUP 0x01
	164	#define LINKS 0x02
	165	#define INDEXENTS 0x04
	166	#define ALL 0x07
	167	static void html_words(htmloutput ho, word words, int flags,
	168	htmlfile file, keywordlist keywords, htmlconfig *cfg);
	169	static void html_codepara(htmloutput ho, word words);
	170
	171	static void element_open(htmloutput ho, char const name);
	172	static void element_close(htmloutput ho, char const name);
	173	static void element_empty(htmloutput ho, char const name);
	174	static void element_attr(htmloutput ho, char const name, char const *value);
	175	static void element_attr_w(htmloutput ho, char const name,
	176	wchar_t const *value);
	177	static void html_text(htmloutput ho, wchar_t const str);
	178	static void html_text_limit(htmloutput ho, wchar_t const str, int maxlen);
	179	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
	180	int maxlen, int quote_quotes);
	181	static void html_nl(htmloutput *ho);
	182	static void html_raw(htmloutput ho, char text);
	183	static void html_raw_as_attr(htmloutput ho, char text);
	184	static void cleanup(htmloutput *ho);
	185
	186	static void html_href(htmloutput ho, htmlfile thisfile,
	187	htmlfile targetfile, char targetfrag);
	188
	189	static char html_format(paragraph p, char *template_string);
	190	static void html_sanitise_fragment(char *text);
	191
	192	static void html_contents_entry(htmloutput ho, int depth, htmlsect s,
	193	htmlfile thisfile, keywordlist keywords,
	194	htmlconfig *cfg);
	195	static void html_section_title(htmloutput ho, htmlsect s,
	196	htmlfile thisfile, keywordlist keywords,
	197	htmlconfig *cfg);
	198
	199	static htmlconfig html_configure(paragraph *source) {
	200	htmlconfig ret;
	201	paragraph *p;
	202
	203	/*
	204	* Defaults.
	205	*/
	206	ret.leaf_level = 2;
	207	ret.achapter.just_numbers = FALSE;
	208	ret.achapter.number_suffix = L": ";
	209	ret.nasect = 1;
	210	ret.asect = mknewa(sectlevel, ret.nasect);
	211	ret.asect[0].just_numbers = TRUE;
	212	ret.asect[0].number_suffix = L" ";
	213	ret.ncdepths = 0;
	214	ret.contents_depths = 0;
	215	ret.visible_version_id = TRUE;
	216	ret.address_section = TRUE;
	217	ret.leaf_contains_contents = FALSE;
	218	ret.leaf_smallest_contents = 4;
	219	ret.single_filename = dupstr("Manual.html");
	220	ret.contents_filename = dupstr("Contents.html");
	221	ret.index_filename = dupstr("IndexPage.html");
	222	ret.template_filename = dupstr("%n.html");
	223	ret.template_fragment = dupstr("%b");
	224	ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
	225	ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
	226	ret.author = ret.description = NULL;
	227	ret.restrict_charset = CS_ASCII;
	228	ret.output_charset = CS_ASCII;
	229	ret.htmlver = HTML_4;
	230	/*
	231	* Default quote characters are Unicode matched single quotes,
	232	* falling back to ordinary ASCII ".
	233	*/
	234	ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
	235	ret.rquote = uadv(ret.lquote);
	236
	237	/*
	238	* Two-pass configuration so that we can pick up global config
	239	* (e.g. `quotes') before having it overridden by specific
	240	* config (`html-quotes'), irrespective of the order in which
	241	* they occur.
	242	*/
	243	for (p = source; p; p = p->next) {
	244	if (p->type == para_Config) {
	245	if (!ustricmp(p->keyword, L"quotes")) {
	246	if (uadv(p->keyword) && uadv(uadv(p->keyword))) {
	247	ret.lquote = uadv(p->keyword);
	248	ret.rquote = uadv(ret.lquote);
	249	}
	250	}
	251	}
	252	}
	253
	254	for (p = source; p; p = p->next) {
	255	if (p->type == para_Config) {
	256	wchar_t *k = p->keyword;
	257
	258	if (!ustrnicmp(k, L"xhtml-", 6))
	259	k++; /* treat `xhtml-' and `html-' the same */
	260
	261	if (!ustricmp(k, L"html-charset")) {
	262	char *csname = utoa_dup(uadv(k), CS_ASCII);
	263	ret.restrict_charset = ret.output_charset =
	264	charset_from_localenc(csname);
	265	sfree(csname);
	266	} else if (!ustricmp(k, L"html-single-filename")) {
	267	sfree(ret.single_filename);
	268	ret.single_filename = dupstr(adv(p->origkeyword));
	269	} else if (!ustricmp(k, L"html-contents-filename")) {
	270	sfree(ret.contents_filename);
	271	ret.contents_filename = dupstr(adv(p->origkeyword));
	272	} else if (!ustricmp(k, L"html-index-filename")) {
	273	sfree(ret.index_filename);
	274	ret.index_filename = dupstr(adv(p->origkeyword));
	275	} else if (!ustricmp(k, L"html-template-filename")) {
	276	sfree(ret.template_filename);
	277	ret.template_filename = dupstr(adv(p->origkeyword));
	278	} else if (!ustricmp(k, L"html-template-fragment")) {
	279	sfree(ret.template_fragment);
	280	ret.template_fragment = dupstr(adv(p->origkeyword));
	281	} else if (!ustricmp(k, L"html-chapter-numeric")) {
	282	ret.achapter.just_numbers = utob(uadv(k));
	283	} else if (!ustricmp(k, L"html-chapter-suffix")) {
	284	ret.achapter.number_suffix = uadv(k);
	285	} else if (!ustricmp(k, L"html-leaf-level")) {
	286	ret.leaf_level = utoi(uadv(k));
	287	} else if (!ustricmp(k, L"html-section-numeric")) {
	288	wchar_t *q = uadv(k);
	289	int n = 0;
	290	if (uisdigit(*q)) {
	291	n = utoi(q);
	292	q = uadv(q);
	293	}
	294	if (n >= ret.nasect) {
	295	int i;
	296	ret.asect = resize(ret.asect, n+1);
	297	for (i = ret.nasect; i <= n; i++)
	298	ret.asect[i] = ret.asect[ret.nasect-1];
	299	ret.nasect = n+1;
	300	}
	301	ret.asect[n].just_numbers = utob(q);
	302	} else if (!ustricmp(k, L"html-section-suffix")) {
	303	wchar_t *q = uadv(k);
	304	int n = 0;
	305	if (uisdigit(*q)) {
	306	n = utoi(q);
	307	q = uadv(q);
	308	}
	309	if (n >= ret.nasect) {
	310	int i;
	311	ret.asect = resize(ret.asect, n+1);
	312	for (i = ret.nasect; i <= n; i++) {
	313	ret.asect[i] = ret.asect[ret.nasect-1];
	314	}
	315	ret.nasect = n+1;
	316	}
	317	ret.asect[n].number_suffix = q;
	318	} else if (!ustricmp(k, L"html-contents-depth") \|\|
	319	!ustrnicmp(k, L"html-contents-depth-", 20)) {
	320	/*
	321	* Relic of old implementation: this directive used
	322	* to be written as \cfg{html-contents-depth-3}{2}
	323	* rather than the usual Halibut convention of
	324	* \cfg{html-contents-depth}{3}{2}. We therefore
	325	* support both.
	326	*/
	327	wchar_t *q = k[19] ? k+20 : uadv(k);
	328	int n = 0;
	329	if (uisdigit(*q)) {
	330	n = utoi(q);
	331	q = uadv(q);
	332	}
	333	if (n >= ret.ncdepths) {
	334	int i;
	335	ret.contents_depths = resize(ret.contents_depths, n+1);
	336	for (i = ret.ncdepths; i <= n; i++) {
	337	ret.contents_depths[i] = i+2;
	338	}
	339	ret.ncdepths = n+1;
	340	}
	341	ret.contents_depths[n] = utoi(q);
	342	} else if (!ustricmp(k, L"html-head-end")) {
	343	ret.head_end = adv(p->origkeyword);
	344	} else if (!ustricmp(k, L"html-body-tag")) {
	345	ret.body_tag = adv(p->origkeyword);
	346	} else if (!ustricmp(k, L"html-body-start")) {
	347	ret.body_start = adv(p->origkeyword);
	348	} else if (!ustricmp(k, L"html-body-end")) {
	349	ret.body_end = adv(p->origkeyword);
	350	} else if (!ustricmp(k, L"html-address-start")) {
	351	ret.addr_start = adv(p->origkeyword);
	352	} else if (!ustricmp(k, L"html-address-end")) {
	353	ret.addr_end = adv(p->origkeyword);
	354	} else if (!ustricmp(k, L"html-navigation-attributes")) {
	355	ret.nav_attr = adv(p->origkeyword);
	356	} else if (!ustricmp(k, L"html-author")) {
	357	ret.author = uadv(k);
	358	} else if (!ustricmp(k, L"html-description")) {
	359	ret.description = uadv(k);
	360	} else if (!ustricmp(k, L"html-suppress-address")) {
	361	ret.address_section = !utob(uadv(k));
	362	} else if (!ustricmp(k, L"html-versionid")) {
	363	ret.visible_version_id = utob(uadv(k));
	364	} else if (!ustricmp(k, L"html-quotes")) {
	365	if (uadv(k) && uadv(uadv(k))) {
	366	ret.lquote = uadv(k);
	367	ret.rquote = uadv(ret.lquote);
	368	}
	369	} else if (!ustricmp(k, L"html-leaf-contains-contents")) {
	370	ret.leaf_contains_contents = utob(uadv(k));
	371	} else if (!ustricmp(k, L"html-leaf-smallest-contents")) {
	372	ret.leaf_smallest_contents = utoi(uadv(k));
	373	}
	374	}
	375	}
	376
	377	/*
	378	* Now process fallbacks on quote characters.
	379	*/
	380	while (uadv(ret.rquote) && uadv(uadv(ret.rquote)) &&
	381	(!cvt_ok(ret.restrict_charset, ret.lquote) \|\|
	382	!cvt_ok(ret.restrict_charset, ret.rquote))) {
	383	ret.lquote = uadv(ret.rquote);
	384	ret.rquote = uadv(ret.lquote);
	385	}
	386
	387	return ret;
	388	}
	389
	390	paragraph html_config_filename(char filename)
	391	{
	392	/*
	393	* If the user passes in a single filename as a parameter to
	394	* the `--html' command-line option, then we should assume it
	395	* to imply _two_ config directives:
	396	* \cfg{html-single-filename}{whatever} and
	397	* \cfg{html-leaf-level}{0}; the rationale being that the user
	398	* wants their output _in that file_.
	399	*/
	400	paragraph p, q;
	401
	402	p = cmdline_cfg_simple("html-single-filename", filename, NULL);
	403	q = cmdline_cfg_simple("html-leaf-level", "0", NULL);
	404	p->next = q;
	405	return p;
	406	}
	407
	408	void html_backend(paragraph sourceform, keywordlist keywords,
	409	indexdata idx, void unused) {
	410	paragraph *p;
	411	htmlconfig conf;
	412	htmlfilelist files = { NULL, NULL, NULL, NULL };
	413	htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
	414
	415	IGNORE(unused);
	416
	417	conf = html_configure(sourceform);
	418
	419	/*
	420	* We're going to make heavy use of paragraphs' private data
	421	* fields in the forthcoming code. Clear them first, so we can
	422	* reliably tell whether we have auxiliary data for a
	423	* particular paragraph.
	424	*/
	425	for (p = sourceform; p; p = p->next)
	426	p->private_data = NULL;
	427
	428	/*
	429	* Start by figuring out into which file each piece of the
	430	* document should be put. We'll do this by inventing an
	431	* `htmlsect' structure and stashing it in the private_data
	432	* field of each section paragraph; we also need one additional
	433	* htmlsect for the document index, which won't show up in the
	434	* source form but needs to be consistently mentioned in
	435	* contents links.
	436	*
	437	* While we're here, we'll also invent the HTML fragment name
	438	* for each section.
	439	*/
	440	{
	441	htmlsect topsect, sect;
	442	int d;
	443
	444	topsect = html_new_sect(&sects, p);
	445	topsect->type = TOP;
	446	topsect->title = NULL;
	447	topsect->text = sourceform;
	448	topsect->contents_depth = contents_depth(conf, 0);
	449	html_file_section(&conf, &files, topsect, -1);
	450	topsect->fragment = NULL;
	451
	452	for (p = sourceform; p; p = p->next)
	453	if (is_heading_type(p->type)) {
	454	d = heading_depth(p);
	455
	456	if (p->type == para_Title) {
	457	topsect->title = p;
	458	continue;
	459	}
	460
	461	sect = html_new_sect(&sects, p);
	462	sect->text = p->next;
	463
	464	sect->contents_depth = contents_depth(conf, d+1) - (d+1);
	465
	466	if (p->parent) {
	467	sect->parent = (htmlsect *)p->parent->private_data;
	468	assert(sect->parent != NULL);
	469	} else
	470	sect->parent = topsect;
	471	p->private_data = sect;
	472
	473	html_file_section(&conf, &files, sect, d);
	474
	475	sect->fragment = html_format(p, conf.template_fragment);
	476	html_sanitise_fragment(sect->fragment);
	477	/* FIXME: clash checking? add to a tree of (file,frag)? */
	478	}
	479
	480	/* And the index. */
	481	sect = html_new_sect(&sects, NULL);
	482	sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */
	483	sect->text = NULL;
	484	sect->type = INDEX;
	485	sect->parent = topsect;
	486	html_file_section(&conf, &files, sect, 0); /* peer of chapters */
	487	files.index = sect->file;
	488	}
	489
	490	/*
	491	* Go through the keyword list and sort out fragment IDs for
	492	* all the potentially referenced paragraphs which _aren't_
	493	* headings.
	494	*/
	495	{
	496	int i;
	497	keyword *kw;
	498	htmlsect *sect;
	499
	500	for (i = 0; (kw = index234(keywords->keys, i)) != NULL; i++) {
	501	paragraph q, p = kw->para;
	502
	503	if (!is_heading_type(p->type)) {
	504	htmlsect *parent;
	505
	506	/*
	507	* Find the paragraph's parent htmlsect, to
	508	* determine which file it will end up in.
	509	*/
	510	q = p->parent;
	511	if (!q) {
	512	/*
	513	* Preamble paragraphs have no parent. So if we
	514	* have a non-heading with no parent, it must
	515	* be preamble, and therefore its parent
	516	* htmlsect must be the preamble one.
	517	*/
	518	assert(sects.head &&
	519	sects.head->type == TOP);
	520	parent = sects.head;
	521	} else
	522	parent = (htmlsect *)q->private_data;
	523
	524	/*
	525	* Now we can construct an htmlsect for this
	526	* paragraph itself, taking care to put it in the
	527	* list of non-sections rather than the list of
	528	* sections (so that traverses of the `sects' list
	529	* won't attempt to add it to the contents or
	530	* anything weird like that).
	531	*/
	532	sect = html_new_sect(&nonsects, p);
	533	sect->file = parent->file;
	534	sect->parent = parent;
	535	p->private_data = sect;
	536
	537	/*
	538	* FIXME: We need a much better means of naming
	539	* these, possibly involving an additional
	540	* configuration template. For the moment I'll just
	541	* invent something completely stupid.
	542	*/
	543	sect->fragment = mknewa(char, 40);
	544	sprintf(sect->fragment, "frag%p", sect);
	545	}
	546	}
	547	}
	548
	549	/*
	550	* Now sort out the index. This involves:
	551	*
	552	* - For each index term, we set up an htmlindex structure to
	553	* store all the references to that term.
	554	*
	555	* - Then we make a pass over the actual document, finding
	556	* every word_IndexRef; for each one, we actually figure out
	557	* the HTML filename/fragment pair we will use to reference
	558	* it, store that information in the private data field of
	559	* the word_IndexRef itself (so we can recreate it when the
	560	* time comes to output our HTML), and add a reference to it
	561	* to the index term in question.
	562	*/
	563	{
	564	int i;
	565	indexentry *entry;
	566	htmlsect *lastsect;
	567	word *w;
	568
	569	/*
	570	* Set up the htmlindex structures.
	571	*/
	572
	573	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
	574	htmlindex *hi = mknew(htmlindex);
	575
	576	hi->nrefs = hi->refsize = 0;
	577	hi->refs = NULL;
	578
	579	entry->backend_data = hi;
	580	}
	581
	582	/*
	583	* Run over the document inventing fragments. Each fragment
	584	* is of the form `i' followed by an integer.
	585	*
	586	* FIXME: Probably in the file-organisation pass we should
	587	* work out the fragment names of every section, so that we
	588	* could load them all into a tree and hence ensure these
	589	* index fragments don't clash with them.
	590	*/
	591	lastsect = NULL;
	592	for (p = sourceform; p; p = p->next) {
	593	if (is_heading_type(p->type))
	594	lastsect = (htmlsect *)p->private_data;
	595
	596	for (w = p->words; w; w = w->next)
	597	if (w->type == word_IndexRef) {
	598	htmlindexref *hr = mknew(htmlindexref);
	599	indextag *tag;
	600	int i;
	601
	602	hr->section = lastsect;
	603	/* FIXME: clash checking */
	604	{
	605	char buf[40];
	606	sprintf(buf, "i%d",
	607	lastsect->file->last_fragment_number++);
	608	hr->fragment = dupstr(buf);
	609	}
	610	w->private_data = hr;
	611
	612	tag = index_findtag(idx, w->text);
	613	if (!tag)
	614	break;
	615
	616	for (i = 0; i < tag->nrefs; i++) {
	617	indexentry *entry = tag->refs[i];
	618	htmlindex hi = (htmlindex )entry->backend_data;
	619
	620	if (hi->nrefs >= hi->refsize) {
	621	hi->refsize += 32;
	622	hi->refs = resize(hi->refs, hi->refsize);
	623	}
	624
	625	hi->refs[hi->nrefs++] = w;
	626	}
	627	}
	628	}
	629	}
	630
	631	/*
	632	* Now we're ready to write out the actual HTML files.
	633	*
	634	* For each file:
	635	*
	636	* - we open that file and write its header
	637	* - we run down the list of sections
	638	* - for each section directly contained within that file, we
	639	* output the section text
	640	* - for each section which is not in the file but which has a
	641	* parent that is, we output a contents entry for the
	642	* section if appropriate
	643	* - finally, we output the file trailer and close the file.
	644	*/
	645	{
	646	htmlfile f, prevf;
	647	htmlsect *s;
	648	paragraph *p;
	649
	650	prevf = NULL;
	651
	652	for (f = files.head; f; f = f->next) {
	653	htmloutput ho;
	654	int displaying;
	655	enum LISTTYPE { NOLIST, UL, OL, DL };
	656	enum ITEMTYPE { NOITEM, LI, DT, DD };
	657	struct stackelement {
	658	struct stackelement *next;
	659	enum LISTTYPE listtype;
	660	enum ITEMTYPE itemtype;
	661	} *stackhead;
	662
	663	#define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" )
	664	#define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" )
	665
	666	ho.fp = fopen(f->filename, "w");
	667	ho.charset = conf.output_charset;
	668	ho.cstate = charset_init_state;
	669	ho.ver = conf.htmlver;
	670	ho.state = HO_NEUTRAL;
	671	ho.contents_level = 0;
	672
	673	/* <!DOCTYPE>. */
	674	switch (conf.htmlver) {
	675	case HTML_3_2:
	676	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD "
	677	"HTML 3.2 Final//EN\">\n");
	678	break;
	679	case HTML_4:
	680	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML"
	681	" 4.01//EN\"\n\"http://www.w3.org/TR/html4/"
	682	"strict.dtd\">\n");
	683	break;
	684	case XHTML_1_0_TRANSITIONAL:
	685	/* FIXME: <?xml?> to specify character encoding.
	686	* This breaks HTML backwards compat, so perhaps avoid, or
	687	* perhaps only emit when not using the default UTF-8? */
	688	fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
	689	" 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/"
	690	"xhtml1/DTD/xhtml1-transitional.dtd\">\n");
	691	break;
	692	case XHTML_1_0_STRICT:
	693	/* FIXME: <?xml?> to specify character encoding. */
	694	fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
	695	" 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
	696	"DTD/xhtml1-strict.dtd\">\n");
	697	break;
	698	}
	699
	700	element_open(&ho, "html");
	701	if (is_xhtml(conf.htmlver)) {
	702	element_attr(&ho, "xmlns", "http://www.w3.org/1999/xhtml");
	703	}
	704	html_nl(&ho);
	705
	706	element_open(&ho, "head");
	707	html_nl(&ho);
	708
	709	element_empty(&ho, "meta");
	710	element_attr(&ho, "http-equiv", "content-type");
	711	{
	712	char buf[200];
	713	sprintf(buf, "text/html; charset=%.150s",
	714	charset_to_mimeenc(conf.output_charset));
	715	element_attr(&ho, "content", buf);
	716	}
	717	html_nl(&ho);
	718
	719	if (conf.author) {
	720	element_empty(&ho, "meta");
	721	element_attr(&ho, "name", "author");
	722	element_attr_w(&ho, "content", conf.author);
	723	html_nl(&ho);
	724	}
	725
	726	if (conf.description) {
	727	element_empty(&ho, "meta");
	728	element_attr(&ho, "name", "description");
	729	element_attr_w(&ho, "content", conf.description);
	730	html_nl(&ho);
	731	}
	732
	733	element_open(&ho, "title");
	734	if (f->first && f->first->title) {
	735	html_words(&ho, f->first->title->words, NOTHING,
	736	f, keywords, &conf);
	737
	738	assert(f->last);
	739	if (f->last != f->first && f->last->title) {
	740	html_text(&ho, L" - "); /* FIXME: configurable? */
	741	html_words(&ho, f->last->title->words, NOTHING,
	742	f, keywords, &conf);
	743	}
	744	}
	745	element_close(&ho, "title");
	746	html_nl(&ho);
	747
	748	if (conf.head_end)
	749	html_raw(&ho, conf.head_end);
	750
	751	element_close(&ho, "head");
	752	html_nl(&ho);
	753
	754	/* FIXME: need to be able to specify replacement for this */
	755	if (conf.body_tag)
	756	html_raw(&ho, conf.body_tag);
	757	else
	758	element_open(&ho, "body");
	759	html_nl(&ho);
	760
	761	if (conf.body_start)
	762	html_raw(&ho, conf.body_start);
	763
	764	/*
	765	* Write out a nav bar. Special case: we don't do this
	766	* if there is only one file.
	767	*/
	768	if (files.head != files.tail) {
	769	element_open(&ho, "p");
	770	if (conf.nav_attr)
	771	html_raw_as_attr(&ho, conf.nav_attr);
	772
	773	if (prevf) {
	774	element_open(&ho, "a");
	775	element_attr(&ho, "href", prevf->filename);
	776	}
	777	html_text(&ho, L"Previous");/* FIXME: conf? */
	778	if (prevf)
	779	element_close(&ho, "a");
	780
	781	html_text(&ho, L" \| "); /* FIXME: conf? */
	782
	783	if (f != files.head) {
	784	element_open(&ho, "a");
	785	element_attr(&ho, "href", files.head->filename);
	786	}
	787	html_text(&ho, L"Contents");/* FIXME: conf? */
	788	if (f != files.head)
	789	element_close(&ho, "a");
	790
	791	html_text(&ho, L" \| "); /* FIXME: conf? */
	792
	793	if (f != files.index) {
	794	element_open(&ho, "a");
	795	element_attr(&ho, "href", files.index->filename);
	796	}
	797	html_text(&ho, L"Index");/* FIXME: conf? */
	798	if (f != files.index)
	799	element_close(&ho, "a");
	800
	801	html_text(&ho, L" \| "); /* FIXME: conf? */
	802
	803	if (f->next) {
	804	element_open(&ho, "a");
	805	element_attr(&ho, "href", f->next->filename);
	806	}
	807	html_text(&ho, L"Next"); /* FIXME: conf? */
	808	if (f->next)
	809	element_close(&ho, "a");
	810
	811	element_close(&ho, "p");
	812	html_nl(&ho);
	813	}
	814	prevf = f;
	815
	816	/*
	817	* Write out a prefix TOC for the file.
	818	*
	819	* We start by going through the section list and
	820	* collecting the sections which need to be added to
	821	* the contents. On the way, we also test to see if
	822	* this file is a leaf file (defined as one which
	823	* contains all descendants of any section it
	824	* contains), because this will play a part in our
	825	* decision on whether or not to _output_ the TOC.
	826	*
	827	* Special case: we absolutely do not do this if we're
	828	* in single-file mode.
	829	*/
	830	if (files.head != files.tail) {
	831	int ntoc = 0, tocsize = 0;
	832	htmlsect **toc = NULL;
	833	int leaf = TRUE;
	834
	835	for (s = sects.head; s; s = s->next) {
	836	htmlsect a, ac;
	837	int depth, adepth;
	838
	839	/*
	840	* Search up from this section until we find
	841	* the highest-level one which belongs in this
	842	* file.
	843	*/
	844	depth = adepth = 0;
	845	a = NULL;
	846	for (ac = s; ac; ac = ac->parent) {
	847	if (ac->file == f) {
	848	a = ac;
	849	adepth = depth;
	850	}
	851	depth++;
	852	}
	853
	854	if (s->file != f && a != NULL)
	855	leaf = FALSE;
	856
	857	if (a) {
	858	if (adepth <= a->contents_depth) {
	859	if (ntoc >= tocsize) {
	860	tocsize += 64;
	861	toc = resize(toc, tocsize);
	862	}
	863	toc[ntoc++] = s;
	864	}
	865	}
	866	}
	867
	868	if (leaf && conf.leaf_contains_contents &&
	869	ntoc >= conf.leaf_smallest_contents) {
	870	int i;
	871
	872	for (i = 0; i < ntoc; i++) {
	873	htmlsect *s = toc[i];
	874	int hlevel = (s->type == TOP ? -1 :
	875	s->type == INDEX ? 0 :
	876	heading_depth(s->title))
	877	- f->min_heading_depth + 1;
	878
	879	assert(hlevel >= 1);
	880	html_contents_entry(&ho, hlevel, s,
	881	f, keywords, &conf);
	882	}
	883	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	884	}
	885	}
	886
	887	/*
	888	* Now go through the document and output some real
	889	* text.
	890	*/
	891	displaying = FALSE;
	892	for (s = sects.head; s; s = s->next) {
	893	if (s->file == f) {
	894	/*
	895	* This section belongs in this file.
	896	* Display it.
	897	*/
	898	displaying = TRUE;
	899	} else {
	900	htmlsect a, ac;
	901	int depth, adepth;
	902
	903	displaying = FALSE;
	904
	905	/*
	906	* Search up from this section until we find
	907	* the highest-level one which belongs in this
	908	* file.
	909	*/
	910	depth = adepth = 0;
	911	a = NULL;
	912	for (ac = s; ac; ac = ac->parent) {
	913	if (ac->file == f) {
	914	a = ac;
	915	adepth = depth;
	916	}
	917	depth++;
	918	}
	919
	920	if (a != NULL) {
	921	/*
	922	* This section does not belong in this
	923	* file, but an ancestor of it does. Write
	924	* out a contents table entry, if the depth
	925	* doesn't exceed the maximum contents
	926	* depth for the ancestor section.
	927	*/
	928	if (adepth <= a->contents_depth) {
	929	html_contents_entry(&ho, adepth, s,
	930	f, keywords, &conf);
	931	}
	932	}
	933	}
	934
	935	if (displaying) {
	936	int hlevel;
	937	char htag[3];
	938
	939	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	940
	941	/*
	942	* Display the section heading.
	943	*/
	944
	945	hlevel = (s->type == TOP ? -1 :
	946	s->type == INDEX ? 0 :
	947	heading_depth(s->title))
	948	- f->min_heading_depth + 1;
	949	assert(hlevel >= 1);
	950	/* HTML headings only go up to <h6> */
	951	if (hlevel > 6)
	952	hlevel = 6;
	953	htag[0] = 'h';
	954	htag[1] = '0' + hlevel;
	955	htag[2] = '\0';
	956	element_open(&ho, htag);
	957
	958	/*
	959	* Provide anchor for cross-links to target.
	960	*
	961	* FIXME: AIcurrentlyUI, this needs to be done
	962	* differently in XHTML because <a name> is
	963	* deprecated or obsolete.
	964	*
	965	* (Also we'll have to do this separately in
	966	* other paragraph types - NumberedList and
	967	* BiblioCited.)
	968	*/
	969	element_open(&ho, "a");
	970	element_attr(&ho, "name", s->fragment);
	971	element_close(&ho, "a");
	972
	973	html_section_title(&ho, s, f, keywords, &conf);
	974
	975	element_close(&ho, htag);
	976
	977	/*
	978	* Now display the section text.
	979	*/
	980	if (s->text) {
	981	stackhead = mknew(struct stackelement);
	982	stackhead->next = NULL;
	983	stackhead->listtype = NOLIST;
	984	stackhead->itemtype = NOITEM;
	985
	986	for (p = s->text;; p = p->next) {
	987	enum LISTTYPE listtype;
	988	struct stackelement *se;
	989
	990	/*
	991	* Preliminary switch to figure out what
	992	* sort of list we expect to be inside at
	993	* this stage.
	994	*
	995	* Since p may still be NULL at this point,
	996	* I invent a harmless paragraph type for
	997	* it if it is.
	998	*/
	999	switch (p ? p->type : para_Normal) {
	1000	case para_Rule:
	1001	case para_Normal:
	1002	case para_Copyright:
	1003	case para_BiblioCited:
	1004	case para_Code:
	1005	case para_QuotePush:
	1006	case para_QuotePop:
	1007	case para_Chapter:
	1008	case para_Appendix:
	1009	case para_UnnumberedChapter:
	1010	case para_Heading:
	1011	case para_Subsect:
	1012	case para_LcontPop:
	1013	listtype = NOLIST;
	1014	break;
	1015
	1016	case para_Bullet:
	1017	listtype = UL;
	1018	break;
	1019
	1020	case para_NumberedList:
	1021	listtype = OL;
	1022	break;
	1023
	1024	case para_DescribedThing:
	1025	case para_Description:
	1026	listtype = DL;
	1027	break;
	1028
	1029	case para_LcontPush:
	1030	se = mknew(struct stackelement);
	1031	se->next = stackhead;
	1032	se->listtype = NOLIST;
	1033	se->itemtype = NOITEM;
	1034	stackhead = se;
	1035	continue;
	1036
	1037	default: /* some totally non-printing para */
	1038	continue;
	1039	}
	1040
	1041	html_nl(&ho);
	1042
	1043	/*
	1044	* Terminate the most recent list item, if
	1045	* any. (We left this until after
	1046	* processing LcontPush, since in that case
	1047	* the list item won't want to be
	1048	* terminated until after the corresponding
	1049	* LcontPop.)
	1050	*/
	1051	if (stackhead->itemtype != NOITEM) {
	1052	element_close(&ho, itemname(stackhead->itemtype));
	1053	html_nl(&ho);
	1054	}
	1055	stackhead->itemtype = NOITEM;
	1056
	1057	/*
	1058	* Terminate the current list, if it's not
	1059	* the one we want to be in.
	1060	*/
	1061	if (listtype != stackhead->listtype &&
	1062	stackhead->listtype != NOLIST) {
	1063	element_close(&ho, listname(stackhead->listtype));
	1064	html_nl(&ho);
	1065	}
	1066
	1067	/*
	1068	* Leave the loop if our time has come.
	1069	*/
	1070	if (!p \|\| (is_heading_type(p->type) &&
	1071	p->type != para_Title))
	1072	break; /* end of section text */
	1073
	1074	/*
	1075	* Start a fresh list if necessary.
	1076	*/
	1077	if (listtype != stackhead->listtype &&
	1078	listtype != NOLIST)
	1079	element_open(&ho, listname(listtype));
	1080
	1081	stackhead->listtype = listtype;
	1082
	1083	switch (p->type) {
	1084	case para_Rule:
	1085	element_empty(&ho, "hr");
	1086	break;
	1087	case para_Code:
	1088	html_codepara(&ho, p->words);
	1089	break;
	1090	case para_Normal:
	1091	case para_Copyright:
	1092	element_open(&ho, "p");
	1093	html_nl(&ho);
	1094	html_words(&ho, p->words, ALL,
	1095	f, keywords, &conf);
	1096	html_nl(&ho);
	1097	element_close(&ho, "p");
	1098	break;
	1099	case para_BiblioCited:
	1100	element_open(&ho, "p");
	1101	if (p->private_data) {
	1102	htmlsect s = (htmlsect )p->private_data;
	1103	element_open(&ho, "a");
	1104	element_attr(&ho, "name", s->fragment);
	1105	element_close(&ho, "a");
	1106	}
	1107	html_nl(&ho);
	1108	html_words(&ho, p->kwtext, ALL,
	1109	f, keywords, &conf);
	1110	html_text(&ho, L" ");
	1111	html_words(&ho, p->words, ALL,
	1112	f, keywords, &conf);
	1113	html_nl(&ho);
	1114	element_close(&ho, "p");
	1115	break;
	1116	case para_Bullet:
	1117	case para_NumberedList:
	1118	element_open(&ho, "li");
	1119	if (p->private_data) {
	1120	htmlsect s = (htmlsect )p->private_data;
	1121	element_open(&ho, "a");
	1122	element_attr(&ho, "name", s->fragment);
	1123	element_close(&ho, "a");
	1124	}
	1125	html_nl(&ho);
	1126	stackhead->itemtype = LI;
	1127	html_words(&ho, p->words, ALL,
	1128	f, keywords, &conf);
	1129	break;
	1130	case para_DescribedThing:
	1131	element_open(&ho, "dt");
	1132	html_nl(&ho);
	1133	stackhead->itemtype = DT;
	1134	html_words(&ho, p->words, ALL,
	1135	f, keywords, &conf);
	1136	break;
	1137	case para_Description:
	1138	element_open(&ho, "dd");
	1139	html_nl(&ho);
	1140	stackhead->itemtype = DD;
	1141	html_words(&ho, p->words, ALL,
	1142	f, keywords, &conf);
	1143	break;
	1144
	1145	case para_QuotePush:
	1146	element_open(&ho, "blockquote");
	1147	break;
	1148	case para_QuotePop:
	1149	element_close(&ho, "blockquote");
	1150	break;
	1151
	1152	case para_LcontPop:
	1153	se = stackhead;
	1154	stackhead = stackhead->next;
	1155	assert(stackhead);
	1156	sfree(se);
	1157	break;
	1158	}
	1159	}
	1160
	1161	assert(stackhead && !stackhead->next);
	1162	sfree(stackhead);
	1163	}
	1164
	1165	if (s->type == INDEX) {
	1166	indexentry *entry;
	1167	int i;
	1168
	1169	/*
	1170	* This section is the index. I'll just
	1171	* render it as a single paragraph, with a
	1172	* colon between the index term and the
	1173	* references, and <br> in between each
	1174	* entry.
	1175	*/
	1176	element_open(&ho, "p");
	1177
	1178	for (i = 0; (entry =
	1179	index234(idx->entries, i)) != NULL; i++) {
	1180	htmlindex hi = (htmlindex )entry->backend_data;
	1181	int j;
	1182
	1183	if (i > 0)
	1184	element_empty(&ho, "br");
	1185	html_nl(&ho);
	1186
	1187	html_words(&ho, entry->text, MARKUP\|LINKS,
	1188	f, keywords, &conf);
	1189
	1190	html_text(&ho, L": ");/* FIXME: configurable */
	1191
	1192	for (j = 0; j < hi->nrefs; j++) {
	1193	htmlindexref *hr =
	1194	(htmlindexref *)hi->refs[j]->private_data;
	1195	paragraph *p = hr->section->title;
	1196
	1197	if (j > 0)
	1198	html_text(&ho, L", "); /* FIXME: conf */
	1199
	1200	html_href(&ho, f, hr->section->file,
	1201	hr->fragment);
	1202	if (p && p->kwtext)
	1203	html_words(&ho, p->kwtext, MARKUP\|LINKS,
	1204	f, keywords, &conf);
	1205	else if (p && p->words)
	1206	html_words(&ho, p->words, MARKUP\|LINKS,
	1207	f, keywords, &conf);
	1208	else
	1209	html_text(&ho, L"FIXME");
	1210	element_close(&ho, "a");
	1211	}
	1212	}
	1213	element_close(&ho, "p");
	1214	}
	1215	}
	1216	}
	1217
	1218	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	1219	html_nl(&ho);
	1220
	1221	{
	1222	/*
	1223	* Footer.
	1224	*/
	1225	int done_version_ids = FALSE;
	1226
	1227	element_empty(&ho, "hr");
	1228
	1229	if (conf.body_end)
	1230	html_raw(&ho, conf.body_end);
	1231
	1232	if (conf.address_section) {
	1233	element_open(&ho, "address");
	1234	if (conf.addr_start) {
	1235	html_raw(&ho, conf.addr_start);
	1236	html_nl(&ho);
	1237	}
	1238	if (conf.visible_version_id) {
	1239	int started = FALSE;
	1240	for (p = sourceform; p; p = p->next)
	1241	if (p->type == para_VersionID) {
	1242	if (!started)
	1243	element_open(&ho, "p");
	1244	else
	1245	element_empty(&ho, "br");
	1246	html_nl(&ho);
	1247	html_text(&ho, L"["); /* FIXME: conf? */
	1248	html_words(&ho, p->words, NOTHING,
	1249	f, keywords, &conf);
	1250	html_text(&ho, L"]"); /* FIXME: conf? */
	1251	started = TRUE;
	1252	}
	1253	if (started)
	1254	element_close(&ho, "p");
	1255	done_version_ids = TRUE;
	1256	}
	1257	if (conf.addr_end)
	1258	html_raw(&ho, conf.addr_end);
	1259	element_close(&ho, "address");
	1260	}
	1261
	1262	if (!done_version_ids) {
	1263	/*
	1264	* If the user didn't want the version IDs
	1265	* visible, I think we still have a duty to put
	1266	* them in an HTML comment.
	1267	*/
	1268	int started = FALSE;
	1269	for (p = sourceform; p; p = p->next)
	1270	if (p->type == para_VersionID) {
	1271	if (!started) {
	1272	html_raw(&ho, "<!-- version IDs:\n");
	1273	started = TRUE;
	1274	}
	1275	html_words(&ho, p->words, NOTHING,
	1276	f, keywords, &conf);
	1277	html_nl(&ho);
	1278	}
	1279	if (started)
	1280	html_raw(&ho, "-->\n");
	1281	}
	1282	}
	1283
	1284	element_close(&ho, "body");
	1285	html_nl(&ho);
	1286	element_close(&ho, "html");
	1287	html_nl(&ho);
	1288	cleanup(&ho);
	1289	}
	1290	}
	1291
	1292	/*
	1293	* FIXME: Figure out a way to free the htmlindex and
	1294	* htmlindexref structures.
	1295	*/
	1296	}
	1297
	1298	static void html_file_section(htmlconfig cfg, htmlfilelist files,
	1299	htmlsect *sect, int depth)
	1300	{
	1301	htmlfile *file;
	1302	int ldepth;
	1303
	1304	/*
	1305	* `depth' is derived from the heading_depth() macro at the top
	1306	* of this file, which counts title as -1, chapter as 0,
	1307	* heading as 1 and subsection as 2. However, the semantics of
	1308	* cfg->leaf_level are defined to count chapter as 1, heading
	1309	* as 2 etc. So first I increment depth :-(
	1310	*/
	1311	ldepth = depth + 1;
	1312
	1313	if (cfg->leaf_level == 0) {
	1314	/*
	1315	* leaf_level==0 is a special case, in which everything is
	1316	* put into a single file.
	1317	*/
	1318	if (!files->single)
	1319	files->single = html_new_file(files, cfg->single_filename);
	1320
	1321	file = files->single;
	1322	} else {
	1323	/*
	1324	* If the depth of this section is at or above leaf_level,
	1325	* we invent a fresh file and put this section at its head.
	1326	* Otherwise, we put it in the same file as its parent
	1327	* section.
	1328	*/
	1329	if (ldepth > cfg->leaf_level) {
	1330	/*
	1331	* We know that sect->parent cannot be NULL. The only
	1332	* circumstance in which it can be is if sect is at
	1333	* chapter or appendix level, i.e. ldepth==1; and if
	1334	* that's the case, then we cannot have entered this
	1335	* branch unless cfg->leaf_level==0, in which case we
	1336	* would be in the single-file case above and not here
	1337	* at all.
	1338	*/
	1339	assert(sect->parent);
	1340
	1341	file = sect->parent->file;
	1342	} else {
	1343	if (sect->type == TOP) {
	1344	file = html_new_file(files, cfg->contents_filename);
	1345	} else if (sect->type == INDEX) {
	1346	file = html_new_file(files, cfg->index_filename);
	1347	} else {
	1348	char *title;
	1349
	1350	assert(ldepth > 0 && sect->title);
	1351	title = html_format(sect->title, cfg->template_filename);
	1352	file = html_new_file(files, title);
	1353	sfree(title);
	1354	}
	1355	}
	1356	}
	1357
	1358	sect->file = file;
	1359
	1360	if (file->min_heading_depth > depth) {
	1361	/*
	1362	* This heading is at a higher level than any heading we
	1363	* have so far placed in this file; so we set the `first'
	1364	* pointer.
	1365	*/
	1366	file->min_heading_depth = depth;
	1367	file->first = sect;
	1368	}
	1369
	1370	if (file->min_heading_depth == depth)
	1371	file->last = sect;
	1372	}
	1373
	1374	static htmlfile html_new_file(htmlfilelist list, char *filename)
	1375	{
	1376	htmlfile *ret = mknew(htmlfile);
	1377
	1378	ret->next = NULL;
	1379	if (list->tail)
	1380	list->tail->next = ret;
	1381	else
	1382	list->head = ret;
	1383	list->tail = ret;
	1384
	1385	ret->filename = dupstr(filename);
	1386	ret->last_fragment_number = 0;
	1387	ret->min_heading_depth = INT_MAX;
	1388	ret->first = ret->last = NULL;
	1389
	1390	return ret;
	1391	}
	1392
	1393	static htmlsect html_new_sect(htmlsectlist list, paragraph *title)
	1394	{
	1395	htmlsect *ret = mknew(htmlsect);
	1396
	1397	ret->next = NULL;
	1398	if (list->tail)
	1399	list->tail->next = ret;
	1400	else
	1401	list->head = ret;
	1402	list->tail = ret;
	1403
	1404	ret->title = title;
	1405	ret->file = NULL;
	1406	ret->parent = NULL;
	1407	ret->type = NORMAL;
	1408
	1409	return ret;
	1410	}
	1411
	1412	static void html_words(htmloutput ho, word words, int flags,
	1413	htmlfile file, keywordlist keywords, htmlconfig *cfg)
	1414	{
	1415	word *w;
	1416	char *c;
	1417	int style, type;
	1418
	1419	for (w = words; w; w = w->next) switch (w->type) {
	1420	case word_HyperLink:
	1421	if (flags & LINKS) {
	1422	element_open(ho, "a");
	1423	c = utoa_dup(w->text, CS_ASCII);
	1424	element_attr(ho, "href", c);
	1425	sfree(c);
	1426	}
	1427	break;
	1428	case word_UpperXref:
	1429	case word_LowerXref:
	1430	if (flags & LINKS) {
	1431	keyword *kwl = kw_lookup(keywords, w->text);
	1432	paragraph *p = kwl->para;
	1433	htmlsect s = (htmlsect )p->private_data;
	1434
	1435	assert(s);
	1436
	1437	html_href(ho, file, s->file, s->fragment);
	1438	}
	1439	break;
	1440	case word_HyperEnd:
	1441	case word_XrefEnd:
	1442	if (flags & LINKS)
	1443	element_close(ho, "a");
	1444	break;
	1445	case word_IndexRef:
	1446	if (flags & INDEXENTS) {
	1447	htmlindexref hr = (htmlindexref )w->private_data;
	1448	element_open(ho, "a");
	1449	element_attr(ho, "name", hr->fragment);
	1450	element_close(ho, "a");
	1451	}
	1452	break;
	1453	case word_Normal:
	1454	case word_Emph:
	1455	case word_Code:
	1456	case word_WeakCode:
	1457	case word_WhiteSpace:
	1458	case word_EmphSpace:
	1459	case word_CodeSpace:
	1460	case word_WkCodeSpace:
	1461	case word_Quote:
	1462	case word_EmphQuote:
	1463	case word_CodeQuote:
	1464	case word_WkCodeQuote:
	1465	style = towordstyle(w->type);
	1466	type = removeattr(w->type);
	1467	if (style == word_Emph &&
	1468	(attraux(w->aux) == attr_First \|\|
	1469	attraux(w->aux) == attr_Only) &&
	1470	(flags & MARKUP))
	1471	element_open(ho, "em");
	1472	else if ((style == word_Code \|\| style == word_WeakCode) &&
	1473	(attraux(w->aux) == attr_First \|\|
	1474	attraux(w->aux) == attr_Only) &&
	1475	(flags & MARKUP))
	1476	element_open(ho, "code");
	1477
	1478	if (type == word_WhiteSpace)
	1479	html_text(ho, L" ");
	1480	else if (type == word_Quote) {
	1481	if (quoteaux(w->aux) == quote_Open)
	1482	html_text(ho, cfg->lquote);
	1483	else
	1484	html_text(ho, cfg->rquote);
	1485	} else {
	1486	if (cvt_ok(ho->charset, w->text) \|\| !w->alt)
	1487	html_text(ho, w->text);
	1488	else
	1489	html_words(ho, w->alt, flags, file, keywords, cfg);
	1490	}
	1491
	1492	if (style == word_Emph &&
	1493	(attraux(w->aux) == attr_Last \|\|
	1494	attraux(w->aux) == attr_Only) &&
	1495	(flags & MARKUP))
	1496	element_close(ho, "em");
	1497	else if ((style == word_Code \|\| style == word_WeakCode) &&
	1498	(attraux(w->aux) == attr_Last \|\|
	1499	attraux(w->aux) == attr_Only) &&
	1500	(flags & MARKUP))
	1501	element_close(ho, "code");
	1502
	1503	break;
	1504	}
	1505	}
	1506
	1507	static void html_codepara(htmloutput ho, word words)
	1508	{
	1509	element_open(ho, "pre");
	1510	element_open(ho, "code");
	1511	for (; words; words = words->next) if (words->type == word_WeakCode) {
	1512	char *open_tag;
	1513	wchar_t t, e;
	1514
	1515	t = words->text;
	1516	if (words->next && words->next->type == word_Emph) {
	1517	e = words->next->text;
	1518	words = words->next;
	1519	} else
	1520	e = NULL;
	1521
	1522	while (e && e && t) {
	1523	int n;
	1524	int ec = *e;
	1525
	1526	for (n = 0; t[n] && e[n] && e[n] == ec; n++);
	1527
	1528	open_tag = NULL;
	1529	if (ec == 'i')
	1530	open_tag = "em";
	1531	else if (ec == 'b')
	1532	open_tag = "b";
	1533	if (open_tag)
	1534	element_open(ho, open_tag);
	1535
	1536	html_text_limit(ho, t, n);
	1537
	1538	if (open_tag)
	1539	element_close(ho, open_tag);
	1540
	1541	t += n;
	1542	e += n;
	1543	}
	1544	html_text(ho, t);
	1545	html_nl(ho);
	1546	}
	1547	element_close(ho, "code");
	1548	element_close(ho, "pre");
	1549	}
	1550
	1551	static void html_charset_cleanup(htmloutput *ho)
	1552	{
	1553	char outbuf[256];
	1554	int bytes;
	1555
	1556	bytes = charset_from_unicode(NULL, NULL, outbuf, lenof(outbuf),
	1557	ho->charset, &ho->cstate, NULL);
	1558	if (bytes > 0)
	1559	fwrite(outbuf, 1, bytes, ho->fp);
	1560	}
	1561
	1562	static void return_to_neutral(htmloutput *ho)
	1563	{
	1564	if (ho->state == HO_IN_TEXT) {
	1565	html_charset_cleanup(ho);
	1566	} else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
	1567	fprintf(ho->fp, " />");
	1568	} else if (ho->state == HO_IN_EMPTY_TAG \|\| ho->state == HO_IN_TAG) {
	1569	fprintf(ho->fp, ">");
	1570	}
	1571
	1572	ho->state = HO_NEUTRAL;
	1573	}
	1574
	1575	static void element_open(htmloutput ho, char const name)
	1576	{
	1577	return_to_neutral(ho);
	1578	fprintf(ho->fp, "<%s", name);
	1579	ho->state = HO_IN_TAG;
	1580	}
	1581
	1582	static void element_close(htmloutput ho, char const name)
	1583	{
	1584	return_to_neutral(ho);
	1585	fprintf(ho->fp, "</%s>", name);
	1586	ho->state = HO_NEUTRAL;
	1587	}
	1588
	1589	static void element_empty(htmloutput ho, char const name)
	1590	{
	1591	return_to_neutral(ho);
	1592	fprintf(ho->fp, "<%s", name);
	1593	ho->state = HO_IN_EMPTY_TAG;
	1594	}
	1595
	1596	static void html_nl(htmloutput *ho)
	1597	{
	1598	return_to_neutral(ho);
	1599	fputc('\n', ho->fp);
	1600	}
	1601
	1602	static void html_raw(htmloutput ho, char text)
	1603	{
	1604	return_to_neutral(ho);
	1605	fputs(text, ho->fp);
	1606	}
	1607
	1608	static void html_raw_as_attr(htmloutput ho, char text)
	1609	{
	1610	assert(ho->state == HO_IN_TAG \|\| ho->state == HO_IN_EMPTY_TAG);
	1611	fputc(' ', ho->fp);
	1612	fputs(text, ho->fp);
	1613	}
	1614
	1615	static void element_attr(htmloutput ho, char const name, char const *value)
	1616	{
	1617	html_charset_cleanup(ho);
	1618	assert(ho->state == HO_IN_TAG \|\| ho->state == HO_IN_EMPTY_TAG);
	1619	fprintf(ho->fp, " %s=\"%s\"", name, value);
	1620	}
	1621
	1622	static void element_attr_w(htmloutput ho, char const name,
	1623	wchar_t const *value)
	1624	{
	1625	html_charset_cleanup(ho);
	1626	fprintf(ho->fp, " %s=\"", name);
	1627	html_text_limit_internal(ho, value, 0, TRUE);
	1628	html_charset_cleanup(ho);
	1629	fputc('"', ho->fp);
	1630	}
	1631
	1632	static void html_text(htmloutput ho, wchar_t const text)
	1633	{
	1634	html_text_limit(ho, text, 0);
	1635	}
	1636
	1637	static void html_text_limit(htmloutput ho, wchar_t const text, int maxlen)
	1638	{
	1639	return_to_neutral(ho);
	1640	html_text_limit_internal(ho, text, maxlen, FALSE);
	1641	}
	1642
	1643	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
	1644	int maxlen, int quote_quotes)
	1645	{
	1646	int textlen = ustrlen(text);
	1647	char outbuf[256];
	1648	int bytes, err;
	1649
	1650	if (maxlen > 0 && textlen > maxlen)
	1651	textlen = maxlen;
	1652
	1653	while (textlen > 0) {
	1654	/* Scan ahead for characters we really can't display in HTML. */
	1655	int lenbefore, lenafter;
	1656	for (lenbefore = 0; lenbefore < textlen; lenbefore++)
	1657	if (text[lenbefore] == L'<' \|\|
	1658	text[lenbefore] == L'>' \|\|
	1659	text[lenbefore] == L'&' \|\|
	1660	(text[lenbefore] == L'"' && quote_quotes))
	1661	break;
	1662	lenafter = lenbefore;
	1663	bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf),
	1664	ho->charset, &ho->cstate, &err);
	1665	textlen -= (lenbefore - lenafter);
	1666	if (bytes > 0)
	1667	fwrite(outbuf, 1, bytes, ho->fp);
	1668	if (err) {
	1669	/*
	1670	* We have encountered a character that cannot be
	1671	* displayed in the selected output charset. Therefore,
	1672	* we use an HTML numeric entity reference.
	1673	*/
	1674	assert(textlen > 0);
	1675	fprintf(ho->fp, "&#%ld;", (long int)*text);
	1676	text++, textlen--;
	1677	} else if (lenafter == 0 && textlen > 0) {
	1678	/*
	1679	* We have encountered a character which is special to
	1680	* HTML.
	1681	*/
	1682	if (*text == L'<')
	1683	fprintf(ho->fp, "<");
	1684	else if (*text == L'>')
	1685	fprintf(ho->fp, ">");
	1686	else if (*text == L'&')
	1687	fprintf(ho->fp, "&");
	1688	else if (*text == L'"')
	1689	fprintf(ho->fp, """);
	1690	else
	1691	assert(!"Can't happen");
	1692	text++, textlen--;
	1693	}
	1694	}
	1695	}
	1696
	1697	static void cleanup(htmloutput *ho)
	1698	{
	1699	return_to_neutral(ho);
	1700	fclose(ho->fp);
	1701	}
	1702
	1703	static void html_href(htmloutput ho, htmlfile thisfile,
	1704	htmlfile targetfile, char targetfrag)
	1705	{
	1706	rdstringc rs = { 0, 0, NULL };
	1707	char *url;
	1708
	1709	if (targetfile != thisfile)
	1710	rdaddsc(&rs, targetfile->filename);
	1711	if (targetfrag) {
	1712	rdaddc(&rs, '#');
	1713	rdaddsc(&rs, targetfrag);
	1714	}
	1715	url = rs.text;
	1716
	1717	element_open(ho, "a");
	1718	element_attr(ho, "href", url);
	1719	sfree(url);
	1720	}
	1721
	1722	static char html_format(paragraph p, char *template_string)
	1723	{
	1724	char c, t;
	1725	word *w;
	1726	wchar_t *ws, wsbuf[2];
	1727	rdstringc rs = { 0, 0, NULL };
	1728
	1729	t = template_string;
	1730	while (*t) {
	1731	if (*t == '%' && t[1]) {
	1732	int fmt;
	1733
	1734	t++;
	1735	fmt = *t++;
	1736
	1737	if (fmt == '%') {
	1738	rdaddc(&rs, fmt);
	1739	continue;
	1740	}
	1741
	1742	w = NULL;
	1743	ws = NULL;
	1744
	1745	if (p->kwtext && fmt == 'n')
	1746	w = p->kwtext;
	1747	else if (p->kwtext2 && fmt == 'b') {
	1748	/*
	1749	* HTML fragment names must start with a letter, so
	1750	* simply `1.2.3' is not adequate. In this case I'm
	1751	* going to cheat slightly by prepending the first
	1752	* character of the first word of kwtext, so that
	1753	* we get `C1' for chapter 1, `S2.3' for section
	1754	* 2.3 etc.
	1755	*/
	1756	if (p->kwtext && p->kwtext->text[0]) {
	1757	ws = wsbuf;
	1758	wsbuf[1] = '\0';
	1759	wsbuf[0] = p->kwtext->text[0];
	1760	}
	1761	w = p->kwtext2;
	1762	} else if (p->keyword && *p->keyword && fmt == 'k')
	1763	ws = p->keyword;
	1764	else
	1765	w = p->words;
	1766
	1767	if (ws) {
	1768	c = utoa_dup(ws, CS_ASCII);
	1769	rdaddsc(&rs,c);
	1770	sfree(c);
	1771	}
	1772
	1773	while (w) {
	1774	if (removeattr(w->type) == word_Normal) {
	1775	c = utoa_dup(w->text, CS_ASCII);
	1776	rdaddsc(&rs,c);
	1777	sfree(c);
	1778	}
	1779	w = w->next;
	1780	}
	1781	} else {
	1782	rdaddc(&rs, *t++);
	1783	}
	1784	}
	1785
	1786	return rdtrimc(&rs);
	1787	}
	1788
	1789	static void html_sanitise_fragment(char *text)
	1790	{
	1791	/*
	1792	* The HTML 4 spec's strictest definition of fragment names (<a
	1793	* name> and "id" attributes) says that they `must begin with a
	1794	* letter and may be followed by any number of letters, digits,
	1795	* hyphens, underscores, colons, and periods'.
	1796	*
	1797	* So here we unceremoniously rip out any characters not
	1798	* conforming to this limitation.
	1799	*/
	1800	char p = text, q = text;
	1801
	1802	while (p && !((p>='A' && p<='Z') \|\| (p>='a' && *p<='z')))
	1803	p++;
	1804	if (!(q++ = p++))
	1805	return;
	1806	while (*p) {
	1807	if ((p>='A' && p<='Z') \|\|
	1808	(p>='a' && p<='z') \|\|
	1809	(p>='0' && p<='9') \|\|
	1810	p=='-' \|\| p=='_' \|\| p==':' \|\| p=='.')
	1811	q++ = p;
	1812	p++;
	1813	}
	1814
	1815	*q = '\0';
	1816	}
	1817
	1818	static void html_contents_entry(htmloutput ho, int depth, htmlsect s,
	1819	htmlfile thisfile, keywordlist keywords,
	1820	htmlconfig *cfg)
	1821	{
	1822	while (ho->contents_level > depth) {
	1823	element_close(ho, "ul");
	1824	ho->contents_level--;
	1825	}
	1826
	1827	while (ho->contents_level < depth) {
	1828	element_open(ho, "ul");
	1829	ho->contents_level++;
	1830	}
	1831
	1832	if (!s)
	1833	return;
	1834
	1835	element_open(ho, "li");
	1836	html_href(ho, thisfile, s->file, s->fragment);
	1837	html_section_title(ho, s, thisfile, keywords, cfg);
	1838	element_close(ho, "a");
	1839	element_close(ho, "li");
	1840	}
	1841
	1842	static void html_section_title(htmloutput ho, htmlsect s, htmlfile *thisfile,
	1843	keywordlist keywords, htmlconfig cfg)
	1844	{
	1845	if (s->title) {
	1846	sectlevel *sl;
	1847	word *number;
	1848	int depth = heading_depth(s->title);
	1849
	1850	if (depth < 0)
	1851	sl = NULL;
	1852	else if (depth == 0)
	1853	sl = &cfg->achapter;
	1854	else if (depth <= cfg->nasect)
	1855	sl = &cfg->asect[depth-1];
	1856	else
	1857	sl = &cfg->asect[cfg->nasect-1];
	1858
	1859	if (!sl)
	1860	number = NULL;
	1861	else if (sl->just_numbers)
	1862	number = s->title->kwtext2;
	1863	else
	1864	number = s->title->kwtext;
	1865
	1866	if (number) {
	1867	html_words(ho, number, MARKUP,
	1868	thisfile, keywords, cfg);
	1869	html_text(ho, sl->number_suffix);
	1870	}
	1871
	1872	html_words(ho, s->title->words, MARKUP,
	1873	thisfile, keywords, cfg);
	1874	} else {
	1875	assert(s->type != NORMAL);
	1876	if (s->type == TOP)
	1877	html_text(ho, L"Preamble");/* FIXME: configure */
	1878	else if (s->type == INDEX)
	1879	html_text(ho, L"Index");/* FIXME: configure */
	1880	}
	1881	}