mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* HTML backend for Halibut
	3	*/
	4
	5	/*
	6	* TODO:
	7	*
	8	* - I'm never entirely convinced that having a fragment link to
	9	* come in at the start of the real text in the file is
	10	* sensible. Perhaps for the topmost section in the file, no
	11	* fragment should be used? (Though it should probably still be
	12	* _there_ even if unused.)
	13	*
	14	* - new configurability:
	15	* * a few new things explicitly labelled as `FIXME:
	16	* configurable' or similar.
	17	* * HTML flavour.
	18	* * Some means of specifying the distinction between
	19	* restrict-charset and output-charset. It seems to me that
	20	* `html-charset' is output-charset, and that
	21	* restrict-charset usually wants to be either output-charset
	22	* or UTF-8 (the latter indicating that any Unicode character
	23	* is fair game and it will be specified using &#foo; if it
	24	* isn't in output-charset). However, since XHTML defaults to
	25	* UTF-8 and it's fiddly to tell it otherwise, it's just
	26	* possible that some user may need to set restrict-charset
	27	* to their charset of choice while leaving _output_-charset
	28	* at UTF-8. Figure out some configuration, and apply it.
	29	*
	30	* - test all HTML flavours and ensure they validate sensibly. Fix
	31	* remaining confusion issues such as <?xml?> and obsoleteness
	32	* of <a name>.
	33	*
	34	* - proper naming of all fragment IDs. The ones for sections are
	35	* fine; the ones for numbered list and bibliociteds are utter
	36	* crap; the ones for indexes _might_ do but it might be worth
	37	* giving some thought to how to do them better.
	38	*
	39	* - nonbreaking spaces.
	40	*
	41	* - free up all the data we have allocated while running this
	42	* backend.
	43	*/
	44
	45	#include <stdio.h>
	46	#include <stdlib.h>
	47	#include <assert.h>
	48	#include <limits.h>
	49	#include "halibut.h"
	50
	51	#define is_heading_type(type) ( (type) == para_Title \|\| \
	52	(type) == para_Chapter \|\| \
	53	(type) == para_Appendix \|\| \
	54	(type) == para_UnnumberedChapter \|\| \
	55	(type) == para_Heading \|\| \
	56	(type) == para_Subsect)
	57
	58	#define heading_depth(p) ( (p)->type == para_Subsect ? (p)->aux + 1 : \
	59	(p)->type == para_Heading ? 1 : \
	60	(p)->type == para_Title ? -1 : 0 )
	61
	62	typedef struct {
	63	int just_numbers;
	64	wchar_t *number_suffix;
	65	} sectlevel;
	66
	67	typedef struct {
	68	int nasect;
	69	sectlevel achapter, *asect;
	70	int contents_depths; / 0=main, 1=chapter, 2=sect etc */
	71	int ncdepths;
	72	int address_section, visible_version_id;
	73	int leaf_contains_contents, leaf_smallest_contents;
	74	char *contents_filename;
	75	char *index_filename;
	76	char *template_filename;
	77	char *single_filename;
	78	char *template_fragment;
	79	char head_end, body_start, body_end, addr_start, *addr_end;
	80	char body_tag, nav_attr;
	81	wchar_t author, description;
	82	int restrict_charset, output_charset;
	83	enum {
	84	HTML_3_2, HTML_4,
	85	XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT
	86	} htmlver;
	87	wchar_t lquote, rquote;
	88	int leaf_level;
	89	} htmlconfig;
	90
	91	#define contents_depth(conf, level) \
	92	( (conf).ncdepths > (level) ? (conf).contents_depths[level] : (level)+2 )
	93
	94	#define is_xhtml(ver) ((ver) >= XHTML_1_0_TRANSITIONAL)
	95
	96	typedef struct htmlfile htmlfile;
	97	typedef struct htmlsect htmlsect;
	98
	99	struct htmlfile {
	100	htmlfile *next;
	101	char *filename;
	102	int last_fragment_number;
	103	int min_heading_depth;
	104	htmlsect first, last; /* first/last highest-level sections */
	105	};
	106
	107	struct htmlsect {
	108	htmlsect next, parent;
	109	htmlfile *file;
	110	paragraph title, text;
	111	enum { NORMAL, TOP, INDEX } type;
	112	int contents_depth;
	113	char *fragment;
	114	};
	115
	116	typedef struct {
	117	htmlfile head, tail;
	118	htmlfile single, index;
	119	tree234 *frags;
	120	} htmlfilelist;
	121
	122	typedef struct {
	123	htmlsect head, tail;
	124	} htmlsectlist;
	125
	126	typedef struct {
	127	htmlfile *file;
	128	char *fragment;
	129	} htmlfragment;
	130
	131	typedef struct {
	132	int nrefs, refsize;
	133	word **refs;
	134	} htmlindex;
	135
	136	typedef struct {
	137	htmlsect *section;
	138	char *fragment;
	139	int generated, referenced;
	140	} htmlindexref;
	141
	142	typedef struct {
	143	/*
	144	* This level deals with charset conversion, starting and
	145	* ending tags, and writing to the file. It's the lexical
	146	* level.
	147	*/
	148	FILE *fp;
	149	int charset;
	150	charset_state cstate;
	151	int ver;
	152	enum {
	153	HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT
	154	} state;
	155	/*
	156	* Stuff beyond here deals with the higher syntactic level: it
	157	* tracks how many levels of <ul> are currently open when
	158	* producing a contents list, for example.
	159	*/
	160	int contents_level;
	161	} htmloutput;
	162
	163	static int html_fragment_compare(void av, void bv)
	164	{
	165	htmlfragment a = (htmlfragment )av;
	166	htmlfragment b = (htmlfragment )bv;
	167	int cmp;
	168
	169	if ((cmp = strcmp(a->file->filename, b->file->filename)) != 0)
	170	return cmp;
	171	else
	172	return strcmp(a->fragment, b->fragment);
	173	}
	174
	175	static void html_file_section(htmlconfig cfg, htmlfilelist files,
	176	htmlsect *sect, int depth);
	177
	178	static htmlfile html_new_file(htmlfilelist list, char *filename);
	179	static htmlsect html_new_sect(htmlsectlist list, paragraph *title);
	180
	181	/* Flags for html_words() flags parameter */
	182	#define NOTHING 0x00
	183	#define MARKUP 0x01
	184	#define LINKS 0x02
	185	#define INDEXENTS 0x04
	186	#define ALL 0x07
	187	static void html_words(htmloutput ho, word words, int flags,
	188	htmlfile file, keywordlist keywords, htmlconfig *cfg);
	189	static void html_codepara(htmloutput ho, word words);
	190
	191	static void element_open(htmloutput ho, char const name);
	192	static void element_close(htmloutput ho, char const name);
	193	static void element_empty(htmloutput ho, char const name);
	194	static void element_attr(htmloutput ho, char const name, char const *value);
	195	static void element_attr_w(htmloutput ho, char const name,
	196	wchar_t const *value);
	197	static void html_text(htmloutput ho, wchar_t const str);
	198	static void html_text_limit(htmloutput ho, wchar_t const str, int maxlen);
	199	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
	200	int maxlen, int quote_quotes);
	201	static void html_nl(htmloutput *ho);
	202	static void html_raw(htmloutput ho, char text);
	203	static void html_raw_as_attr(htmloutput ho, char text);
	204	static void cleanup(htmloutput *ho);
	205
	206	static void html_href(htmloutput ho, htmlfile thisfile,
	207	htmlfile targetfile, char targetfrag);
	208
	209	static char html_format(paragraph p, char *template_string);
	210	static char html_sanitise_fragment(htmlfilelist files, htmlfile *file,
	211	char *text);
	212
	213	static void html_contents_entry(htmloutput ho, int depth, htmlsect s,
	214	htmlfile thisfile, keywordlist keywords,
	215	htmlconfig *cfg);
	216	static void html_section_title(htmloutput ho, htmlsect s,
	217	htmlfile thisfile, keywordlist keywords,
	218	htmlconfig *cfg, int real);
	219
	220	static htmlconfig html_configure(paragraph *source) {
	221	htmlconfig ret;
	222	paragraph *p;
	223
	224	/*
	225	* Defaults.
	226	*/
	227	ret.leaf_level = 2;
	228	ret.achapter.just_numbers = FALSE;
	229	ret.achapter.number_suffix = L": ";
	230	ret.nasect = 1;
	231	ret.asect = snewn(ret.nasect, sectlevel);
	232	ret.asect[0].just_numbers = TRUE;
	233	ret.asect[0].number_suffix = L" ";
	234	ret.ncdepths = 0;
	235	ret.contents_depths = 0;
	236	ret.visible_version_id = TRUE;
	237	ret.address_section = TRUE;
	238	ret.leaf_contains_contents = FALSE;
	239	ret.leaf_smallest_contents = 4;
	240	ret.single_filename = dupstr("Manual.html");
	241	ret.contents_filename = dupstr("Contents.html");
	242	ret.index_filename = dupstr("IndexPage.html");
	243	ret.template_filename = dupstr("%n.html");
	244	ret.template_fragment = dupstr("%b");
	245	ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
	246	ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
	247	ret.author = ret.description = NULL;
	248	ret.restrict_charset = CS_ASCII;
	249	ret.output_charset = CS_ASCII;
	250	ret.htmlver = HTML_4;
	251	/*
	252	* Default quote characters are Unicode matched single quotes,
	253	* falling back to ordinary ASCII ".
	254	*/
	255	ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0";
	256	ret.rquote = uadv(ret.lquote);
	257
	258	/*
	259	* Two-pass configuration so that we can pick up global config
	260	* (e.g. `quotes') before having it overridden by specific
	261	* config (`html-quotes'), irrespective of the order in which
	262	* they occur.
	263	*/
	264	for (p = source; p; p = p->next) {
	265	if (p->type == para_Config) {
	266	if (!ustricmp(p->keyword, L"quotes")) {
	267	if (uadv(p->keyword) && uadv(uadv(p->keyword))) {
	268	ret.lquote = uadv(p->keyword);
	269	ret.rquote = uadv(ret.lquote);
	270	}
	271	}
	272	}
	273	}
	274
	275	for (p = source; p; p = p->next) {
	276	if (p->type == para_Config) {
	277	wchar_t *k = p->keyword;
	278
	279	if (!ustrnicmp(k, L"xhtml-", 6))
	280	k++; /* treat `xhtml-' and `html-' the same */
	281
	282	if (!ustricmp(k, L"html-charset")) {
	283	char *csname = utoa_dup(uadv(k), CS_ASCII);
	284	ret.restrict_charset = ret.output_charset =
	285	charset_from_localenc(csname);
	286	sfree(csname);
	287	} else if (!ustricmp(k, L"html-single-filename")) {
	288	sfree(ret.single_filename);
	289	ret.single_filename = dupstr(adv(p->origkeyword));
	290	} else if (!ustricmp(k, L"html-contents-filename")) {
	291	sfree(ret.contents_filename);
	292	ret.contents_filename = dupstr(adv(p->origkeyword));
	293	} else if (!ustricmp(k, L"html-index-filename")) {
	294	sfree(ret.index_filename);
	295	ret.index_filename = dupstr(adv(p->origkeyword));
	296	} else if (!ustricmp(k, L"html-template-filename")) {
	297	sfree(ret.template_filename);
	298	ret.template_filename = dupstr(adv(p->origkeyword));
	299	} else if (!ustricmp(k, L"html-template-fragment")) {
	300	sfree(ret.template_fragment);
	301	ret.template_fragment = dupstr(adv(p->origkeyword));
	302	} else if (!ustricmp(k, L"html-chapter-numeric")) {
	303	ret.achapter.just_numbers = utob(uadv(k));
	304	} else if (!ustricmp(k, L"html-chapter-suffix")) {
	305	ret.achapter.number_suffix = uadv(k);
	306	} else if (!ustricmp(k, L"html-leaf-level")) {
	307	ret.leaf_level = utoi(uadv(k));
	308	} else if (!ustricmp(k, L"html-section-numeric")) {
	309	wchar_t *q = uadv(k);
	310	int n = 0;
	311	if (uisdigit(*q)) {
	312	n = utoi(q);
	313	q = uadv(q);
	314	}
	315	if (n >= ret.nasect) {
	316	int i;
	317	ret.asect = sresize(ret.asect, n+1, sectlevel);
	318	for (i = ret.nasect; i <= n; i++)
	319	ret.asect[i] = ret.asect[ret.nasect-1];
	320	ret.nasect = n+1;
	321	}
	322	ret.asect[n].just_numbers = utob(q);
	323	} else if (!ustricmp(k, L"html-section-suffix")) {
	324	wchar_t *q = uadv(k);
	325	int n = 0;
	326	if (uisdigit(*q)) {
	327	n = utoi(q);
	328	q = uadv(q);
	329	}
	330	if (n >= ret.nasect) {
	331	int i;
	332	ret.asect = sresize(ret.asect, n+1, sectlevel);
	333	for (i = ret.nasect; i <= n; i++) {
	334	ret.asect[i] = ret.asect[ret.nasect-1];
	335	}
	336	ret.nasect = n+1;
	337	}
	338	ret.asect[n].number_suffix = q;
	339	} else if (!ustricmp(k, L"html-contents-depth") \|\|
	340	!ustrnicmp(k, L"html-contents-depth-", 20)) {
	341	/*
	342	* Relic of old implementation: this directive used
	343	* to be written as \cfg{html-contents-depth-3}{2}
	344	* rather than the usual Halibut convention of
	345	* \cfg{html-contents-depth}{3}{2}. We therefore
	346	* support both.
	347	*/
	348	wchar_t *q = k[19] ? k+20 : uadv(k);
	349	int n = 0;
	350	if (uisdigit(*q)) {
	351	n = utoi(q);
	352	q = uadv(q);
	353	}
	354	if (n >= ret.ncdepths) {
	355	int i;
	356	ret.contents_depths =
	357	sresize(ret.contents_depths, n+1, int);
	358	for (i = ret.ncdepths; i <= n; i++) {
	359	ret.contents_depths[i] = i+2;
	360	}
	361	ret.ncdepths = n+1;
	362	}
	363	ret.contents_depths[n] = utoi(q);
	364	} else if (!ustricmp(k, L"html-head-end")) {
	365	ret.head_end = adv(p->origkeyword);
	366	} else if (!ustricmp(k, L"html-body-tag")) {
	367	ret.body_tag = adv(p->origkeyword);
	368	} else if (!ustricmp(k, L"html-body-start")) {
	369	ret.body_start = adv(p->origkeyword);
	370	} else if (!ustricmp(k, L"html-body-end")) {
	371	ret.body_end = adv(p->origkeyword);
	372	} else if (!ustricmp(k, L"html-address-start")) {
	373	ret.addr_start = adv(p->origkeyword);
	374	} else if (!ustricmp(k, L"html-address-end")) {
	375	ret.addr_end = adv(p->origkeyword);
	376	} else if (!ustricmp(k, L"html-navigation-attributes")) {
	377	ret.nav_attr = adv(p->origkeyword);
	378	} else if (!ustricmp(k, L"html-author")) {
	379	ret.author = uadv(k);
	380	} else if (!ustricmp(k, L"html-description")) {
	381	ret.description = uadv(k);
	382	} else if (!ustricmp(k, L"html-suppress-address")) {
	383	ret.address_section = !utob(uadv(k));
	384	} else if (!ustricmp(k, L"html-versionid")) {
	385	ret.visible_version_id = utob(uadv(k));
	386	} else if (!ustricmp(k, L"html-quotes")) {
	387	if (uadv(k) && uadv(uadv(k))) {
	388	ret.lquote = uadv(k);
	389	ret.rquote = uadv(ret.lquote);
	390	}
	391	} else if (!ustricmp(k, L"html-leaf-contains-contents")) {
	392	ret.leaf_contains_contents = utob(uadv(k));
	393	} else if (!ustricmp(k, L"html-leaf-smallest-contents")) {
	394	ret.leaf_smallest_contents = utoi(uadv(k));
	395	}
	396	}
	397	}
	398
	399	/*
	400	* Now process fallbacks on quote characters.
	401	*/
	402	while (uadv(ret.rquote) && uadv(uadv(ret.rquote)) &&
	403	(!cvt_ok(ret.restrict_charset, ret.lquote) \|\|
	404	!cvt_ok(ret.restrict_charset, ret.rquote))) {
	405	ret.lquote = uadv(ret.rquote);
	406	ret.rquote = uadv(ret.lquote);
	407	}
	408
	409	return ret;
	410	}
	411
	412	paragraph html_config_filename(char filename)
	413	{
	414	/*
	415	* If the user passes in a single filename as a parameter to
	416	* the `--html' command-line option, then we should assume it
	417	* to imply _two_ config directives:
	418	* \cfg{html-single-filename}{whatever} and
	419	* \cfg{html-leaf-level}{0}; the rationale being that the user
	420	* wants their output _in that file_.
	421	*/
	422	paragraph p, q;
	423
	424	p = cmdline_cfg_simple("html-single-filename", filename, NULL);
	425	q = cmdline_cfg_simple("html-leaf-level", "0", NULL);
	426	p->next = q;
	427	return p;
	428	}
	429
	430	void html_backend(paragraph sourceform, keywordlist keywords,
	431	indexdata idx, void unused) {
	432	paragraph *p;
	433	htmlconfig conf;
	434	htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
	435	htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
	436
	437	IGNORE(unused);
	438
	439	conf = html_configure(sourceform);
	440
	441	/*
	442	* We're going to make heavy use of paragraphs' private data
	443	* fields in the forthcoming code. Clear them first, so we can
	444	* reliably tell whether we have auxiliary data for a
	445	* particular paragraph.
	446	*/
	447	for (p = sourceform; p; p = p->next)
	448	p->private_data = NULL;
	449
	450	files.frags = newtree234(html_fragment_compare);
	451
	452	/*
	453	* Start by figuring out into which file each piece of the
	454	* document should be put. We'll do this by inventing an
	455	* `htmlsect' structure and stashing it in the private_data
	456	* field of each section paragraph; we also need one additional
	457	* htmlsect for the document index, which won't show up in the
	458	* source form but needs to be consistently mentioned in
	459	* contents links.
	460	*
	461	* While we're here, we'll also invent the HTML fragment name
	462	* for each section.
	463	*/
	464	{
	465	htmlsect topsect, sect;
	466	int d;
	467
	468	topsect = html_new_sect(&sects, p);
	469	topsect->type = TOP;
	470	topsect->title = NULL;
	471	topsect->text = sourceform;
	472	topsect->contents_depth = contents_depth(conf, 0);
	473	html_file_section(&conf, &files, topsect, -1);
	474	topsect->fragment = NULL;
	475
	476	for (p = sourceform; p; p = p->next)
	477	if (is_heading_type(p->type)) {
	478	d = heading_depth(p);
	479
	480	if (p->type == para_Title) {
	481	topsect->title = p;
	482	continue;
	483	}
	484
	485	sect = html_new_sect(&sects, p);
	486	sect->text = p->next;
	487
	488	sect->contents_depth = contents_depth(conf, d+1) - (d+1);
	489
	490	if (p->parent) {
	491	sect->parent = (htmlsect *)p->parent->private_data;
	492	assert(sect->parent != NULL);
	493	} else
	494	sect->parent = topsect;
	495	p->private_data = sect;
	496
	497	html_file_section(&conf, &files, sect, d);
	498
	499	sect->fragment = html_format(p, conf.template_fragment);
	500	sect->fragment = html_sanitise_fragment(&files, sect->file,
	501	sect->fragment);
	502	}
	503
	504	/* And the index. */
	505	sect = html_new_sect(&sects, NULL);
	506	sect->text = NULL;
	507	sect->type = INDEX;
	508	sect->parent = topsect;
	509	html_file_section(&conf, &files, sect, 0); /* peer of chapters */
	510	sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */
	511	sect->fragment = html_sanitise_fragment(&files, sect->file,
	512	sect->fragment);
	513	files.index = sect->file;
	514	}
	515
	516	/*
	517	* Go through the keyword list and sort out fragment IDs for
	518	* all the potentially referenced paragraphs which _aren't_
	519	* headings.
	520	*/
	521	{
	522	int i;
	523	keyword *kw;
	524	htmlsect *sect;
	525
	526	for (i = 0; (kw = index234(keywords->keys, i)) != NULL; i++) {
	527	paragraph q, p = kw->para;
	528
	529	if (!is_heading_type(p->type)) {
	530	htmlsect *parent;
	531
	532	/*
	533	* Find the paragraph's parent htmlsect, to
	534	* determine which file it will end up in.
	535	*/
	536	q = p->parent;
	537	if (!q) {
	538	/*
	539	* Preamble paragraphs have no parent. So if we
	540	* have a non-heading with no parent, it must
	541	* be preamble, and therefore its parent
	542	* htmlsect must be the preamble one.
	543	*/
	544	assert(sects.head &&
	545	sects.head->type == TOP);
	546	parent = sects.head;
	547	} else
	548	parent = (htmlsect *)q->private_data;
	549
	550	/*
	551	* Now we can construct an htmlsect for this
	552	* paragraph itself, taking care to put it in the
	553	* list of non-sections rather than the list of
	554	* sections (so that traverses of the `sects' list
	555	* won't attempt to add it to the contents or
	556	* anything weird like that).
	557	*/
	558	sect = html_new_sect(&nonsects, p);
	559	sect->file = parent->file;
	560	sect->parent = parent;
	561	p->private_data = sect;
	562
	563	/*
	564	* FIXME: We need a much better means of naming
	565	* these, possibly involving an additional
	566	* configuration template. For the moment I'll just
	567	* invent something completely stupid.
	568	*/
	569	sect->fragment = snewn(40, char);
	570	sprintf(sect->fragment, "frag%p", sect);
	571	sect->fragment = html_sanitise_fragment(&files, sect->file,
	572	sect->fragment);
	573	}
	574	}
	575	}
	576
	577	/*
	578	* Now sort out the index. This involves:
	579	*
	580	* - For each index term, we set up an htmlindex structure to
	581	* store all the references to that term.
	582	*
	583	* - Then we make a pass over the actual document, finding
	584	* every word_IndexRef; for each one, we actually figure out
	585	* the HTML filename/fragment pair we will use to reference
	586	* it, store that information in the private data field of
	587	* the word_IndexRef itself (so we can recreate it when the
	588	* time comes to output our HTML), and add a reference to it
	589	* to the index term in question.
	590	*/
	591	{
	592	int i;
	593	indexentry *entry;
	594	htmlsect *lastsect;
	595	word *w;
	596
	597	/*
	598	* Set up the htmlindex structures.
	599	*/
	600
	601	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
	602	htmlindex *hi = snew(htmlindex);
	603
	604	hi->nrefs = hi->refsize = 0;
	605	hi->refs = NULL;
	606
	607	entry->backend_data = hi;
	608	}
	609
	610	/*
	611	* Run over the document inventing fragments. Each fragment
	612	* is of the form `i' followed by an integer.
	613	*/
	614	lastsect = NULL;
	615	for (p = sourceform; p; p = p->next) {
	616	if (is_heading_type(p->type))
	617	lastsect = (htmlsect *)p->private_data;
	618
	619	for (w = p->words; w; w = w->next)
	620	if (w->type == word_IndexRef) {
	621	htmlindexref *hr = snew(htmlindexref);
	622	indextag *tag;
	623	int i;
	624
	625	hr->referenced = hr->generated = FALSE;
	626	hr->section = lastsect;
	627	{
	628	char buf[40];
	629	sprintf(buf, "i%d",
	630	lastsect->file->last_fragment_number++);
	631	hr->fragment = dupstr(buf);
	632	hr->fragment =
	633	html_sanitise_fragment(&files, hr->section->file,
	634	hr->fragment);
	635	}
	636	w->private_data = hr;
	637
	638	tag = index_findtag(idx, w->text);
	639	if (!tag)
	640	break;
	641
	642	for (i = 0; i < tag->nrefs; i++) {
	643	indexentry *entry = tag->refs[i];
	644	htmlindex hi = (htmlindex )entry->backend_data;
	645
	646	if (hi->nrefs >= hi->refsize) {
	647	hi->refsize += 32;
	648	hi->refs = sresize(hi->refs, hi->refsize, word *);
	649	}
	650
	651	hi->refs[hi->nrefs++] = w;
	652	}
	653	}
	654	}
	655	}
	656
	657	/*
	658	* Now we're ready to write out the actual HTML files.
	659	*
	660	* For each file:
	661	*
	662	* - we open that file and write its header
	663	* - we run down the list of sections
	664	* - for each section directly contained within that file, we
	665	* output the section text
	666	* - for each section which is not in the file but which has a
	667	* parent that is, we output a contents entry for the
	668	* section if appropriate
	669	* - finally, we output the file trailer and close the file.
	670	*/
	671	{
	672	htmlfile f, prevf;
	673	htmlsect *s;
	674	paragraph *p;
	675
	676	prevf = NULL;
	677
	678	for (f = files.head; f; f = f->next) {
	679	htmloutput ho;
	680	int displaying;
	681	enum LISTTYPE { NOLIST, UL, OL, DL };
	682	enum ITEMTYPE { NOITEM, LI, DT, DD };
	683	struct stackelement {
	684	struct stackelement *next;
	685	enum LISTTYPE listtype;
	686	enum ITEMTYPE itemtype;
	687	} *stackhead;
	688
	689	#define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" )
	690	#define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" )
	691
	692	ho.fp = fopen(f->filename, "w");
	693	ho.charset = conf.output_charset;
	694	ho.cstate = charset_init_state;
	695	ho.ver = conf.htmlver;
	696	ho.state = HO_NEUTRAL;
	697	ho.contents_level = 0;
	698
	699	/* <!DOCTYPE>. */
	700	switch (conf.htmlver) {
	701	case HTML_3_2:
	702	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD "
	703	"HTML 3.2 Final//EN\">\n");
	704	break;
	705	case HTML_4:
	706	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML"
	707	" 4.01//EN\"\n\"http://www.w3.org/TR/html4/"
	708	"strict.dtd\">\n");
	709	break;
	710	case XHTML_1_0_TRANSITIONAL:
	711	/* FIXME: <?xml?> to specify character encoding.
	712	* This breaks HTML backwards compat, so perhaps avoid, or
	713	* perhaps only emit when not using the default UTF-8? */
	714	fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
	715	" 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/"
	716	"xhtml1/DTD/xhtml1-transitional.dtd\">\n");
	717	break;
	718	case XHTML_1_0_STRICT:
	719	/* FIXME: <?xml?> to specify character encoding. */
	720	fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML"
	721	" 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/"
	722	"DTD/xhtml1-strict.dtd\">\n");
	723	break;
	724	}
	725
	726	element_open(&ho, "html");
	727	if (is_xhtml(conf.htmlver)) {
	728	element_attr(&ho, "xmlns", "http://www.w3.org/1999/xhtml");
	729	}
	730	html_nl(&ho);
	731
	732	element_open(&ho, "head");
	733	html_nl(&ho);
	734
	735	element_empty(&ho, "meta");
	736	element_attr(&ho, "http-equiv", "content-type");
	737	{
	738	char buf[200];
	739	sprintf(buf, "text/html; charset=%.150s",
	740	charset_to_mimeenc(conf.output_charset));
	741	element_attr(&ho, "content", buf);
	742	}
	743	html_nl(&ho);
	744
	745	if (conf.author) {
	746	element_empty(&ho, "meta");
	747	element_attr(&ho, "name", "author");
	748	element_attr_w(&ho, "content", conf.author);
	749	html_nl(&ho);
	750	}
	751
	752	if (conf.description) {
	753	element_empty(&ho, "meta");
	754	element_attr(&ho, "name", "description");
	755	element_attr_w(&ho, "content", conf.description);
	756	html_nl(&ho);
	757	}
	758
	759	element_open(&ho, "title");
	760	if (f->first && f->first->title) {
	761	html_words(&ho, f->first->title->words, NOTHING,
	762	f, keywords, &conf);
	763
	764	assert(f->last);
	765	if (f->last != f->first && f->last->title) {
	766	html_text(&ho, L" - "); /* FIXME: configurable? */
	767	html_words(&ho, f->last->title->words, NOTHING,
	768	f, keywords, &conf);
	769	}
	770	}
	771	element_close(&ho, "title");
	772	html_nl(&ho);
	773
	774	if (conf.head_end)
	775	html_raw(&ho, conf.head_end);
	776
	777	element_close(&ho, "head");
	778	html_nl(&ho);
	779
	780	/* FIXME: need to be able to specify replacement for this */
	781	if (conf.body_tag)
	782	html_raw(&ho, conf.body_tag);
	783	else
	784	element_open(&ho, "body");
	785	html_nl(&ho);
	786
	787	if (conf.body_start)
	788	html_raw(&ho, conf.body_start);
	789
	790	/*
	791	* Write out a nav bar. Special case: we don't do this
	792	* if there is only one file.
	793	*/
	794	if (files.head != files.tail) {
	795	element_open(&ho, "p");
	796	if (conf.nav_attr)
	797	html_raw_as_attr(&ho, conf.nav_attr);
	798
	799	if (prevf) {
	800	element_open(&ho, "a");
	801	element_attr(&ho, "href", prevf->filename);
	802	}
	803	html_text(&ho, L"Previous");/* FIXME: conf? */
	804	if (prevf)
	805	element_close(&ho, "a");
	806
	807	html_text(&ho, L" \| "); /* FIXME: conf? */
	808
	809	if (f != files.head) {
	810	element_open(&ho, "a");
	811	element_attr(&ho, "href", files.head->filename);
	812	}
	813	html_text(&ho, L"Contents");/* FIXME: conf? */
	814	if (f != files.head)
	815	element_close(&ho, "a");
	816
	817	html_text(&ho, L" \| "); /* FIXME: conf? */
	818
	819	if (f != files.index) {
	820	element_open(&ho, "a");
	821	element_attr(&ho, "href", files.index->filename);
	822	}
	823	html_text(&ho, L"Index");/* FIXME: conf? */
	824	if (f != files.index)
	825	element_close(&ho, "a");
	826
	827	html_text(&ho, L" \| "); /* FIXME: conf? */
	828
	829	if (f->next) {
	830	element_open(&ho, "a");
	831	element_attr(&ho, "href", f->next->filename);
	832	}
	833	html_text(&ho, L"Next"); /* FIXME: conf? */
	834	if (f->next)
	835	element_close(&ho, "a");
	836
	837	element_close(&ho, "p");
	838	html_nl(&ho);
	839	}
	840	prevf = f;
	841
	842	/*
	843	* Write out a prefix TOC for the file.
	844	*
	845	* We start by going through the section list and
	846	* collecting the sections which need to be added to
	847	* the contents. On the way, we also test to see if
	848	* this file is a leaf file (defined as one which
	849	* contains all descendants of any section it
	850	* contains), because this will play a part in our
	851	* decision on whether or not to _output_ the TOC.
	852	*
	853	* Special case: we absolutely do not do this if we're
	854	* in single-file mode.
	855	*/
	856	if (files.head != files.tail) {
	857	int ntoc = 0, tocsize = 0;
	858	htmlsect **toc = NULL;
	859	int leaf = TRUE;
	860
	861	for (s = sects.head; s; s = s->next) {
	862	htmlsect a, ac;
	863	int depth, adepth;
	864
	865	/*
	866	* Search up from this section until we find
	867	* the highest-level one which belongs in this
	868	* file.
	869	*/
	870	depth = adepth = 0;
	871	a = NULL;
	872	for (ac = s; ac; ac = ac->parent) {
	873	if (ac->file == f) {
	874	a = ac;
	875	adepth = depth;
	876	}
	877	depth++;
	878	}
	879
	880	if (s->file != f && a != NULL)
	881	leaf = FALSE;
	882
	883	if (a) {
	884	if (adepth <= a->contents_depth) {
	885	if (ntoc >= tocsize) {
	886	tocsize += 64;
	887	toc = sresize(toc, tocsize, htmlsect *);
	888	}
	889	toc[ntoc++] = s;
	890	}
	891	}
	892	}
	893
	894	if (leaf && conf.leaf_contains_contents &&
	895	ntoc >= conf.leaf_smallest_contents) {
	896	int i;
	897
	898	for (i = 0; i < ntoc; i++) {
	899	htmlsect *s = toc[i];
	900	int hlevel = (s->type == TOP ? -1 :
	901	s->type == INDEX ? 0 :
	902	heading_depth(s->title))
	903	- f->min_heading_depth + 1;
	904
	905	assert(hlevel >= 1);
	906	html_contents_entry(&ho, hlevel, s,
	907	f, keywords, &conf);
	908	}
	909	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	910	}
	911	}
	912
	913	/*
	914	* Now go through the document and output some real
	915	* text.
	916	*/
	917	displaying = FALSE;
	918	for (s = sects.head; s; s = s->next) {
	919	if (s->file == f) {
	920	/*
	921	* This section belongs in this file.
	922	* Display it.
	923	*/
	924	displaying = TRUE;
	925	} else {
	926	htmlsect a, ac;
	927	int depth, adepth;
	928
	929	displaying = FALSE;
	930
	931	/*
	932	* Search up from this section until we find
	933	* the highest-level one which belongs in this
	934	* file.
	935	*/
	936	depth = adepth = 0;
	937	a = NULL;
	938	for (ac = s; ac; ac = ac->parent) {
	939	if (ac->file == f) {
	940	a = ac;
	941	adepth = depth;
	942	}
	943	depth++;
	944	}
	945
	946	if (a != NULL) {
	947	/*
	948	* This section does not belong in this
	949	* file, but an ancestor of it does. Write
	950	* out a contents table entry, if the depth
	951	* doesn't exceed the maximum contents
	952	* depth for the ancestor section.
	953	*/
	954	if (adepth <= a->contents_depth) {
	955	html_contents_entry(&ho, adepth, s,
	956	f, keywords, &conf);
	957	}
	958	}
	959	}
	960
	961	if (displaying) {
	962	int hlevel;
	963	char htag[3];
	964
	965	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	966
	967	/*
	968	* Display the section heading.
	969	*/
	970
	971	hlevel = (s->type == TOP ? -1 :
	972	s->type == INDEX ? 0 :
	973	heading_depth(s->title))
	974	- f->min_heading_depth + 1;
	975	assert(hlevel >= 1);
	976	/* HTML headings only go up to <h6> */
	977	if (hlevel > 6)
	978	hlevel = 6;
	979	htag[0] = 'h';
	980	htag[1] = '0' + hlevel;
	981	htag[2] = '\0';
	982	element_open(&ho, htag);
	983
	984	/*
	985	* Provide anchor for cross-links to target.
	986	*
	987	* FIXME: AIcurrentlyUI, this needs to be done
	988	* differently in XHTML because <a name> is
	989	* deprecated or obsolete.
	990	*
	991	* (Also we'll have to do this separately in
	992	* other paragraph types - NumberedList and
	993	* BiblioCited.)
	994	*/
	995	element_open(&ho, "a");
	996	element_attr(&ho, "name", s->fragment);
	997	element_close(&ho, "a");
	998
	999	html_section_title(&ho, s, f, keywords, &conf, TRUE);
	1000
	1001	element_close(&ho, htag);
	1002
	1003	/*
	1004	* Now display the section text.
	1005	*/
	1006	if (s->text) {
	1007	stackhead = snew(struct stackelement);
	1008	stackhead->next = NULL;
	1009	stackhead->listtype = NOLIST;
	1010	stackhead->itemtype = NOITEM;
	1011
	1012	for (p = s->text;; p = p->next) {
	1013	enum LISTTYPE listtype;
	1014	struct stackelement *se;
	1015
	1016	/*
	1017	* Preliminary switch to figure out what
	1018	* sort of list we expect to be inside at
	1019	* this stage.
	1020	*
	1021	* Since p may still be NULL at this point,
	1022	* I invent a harmless paragraph type for
	1023	* it if it is.
	1024	*/
	1025	switch (p ? p->type : para_Normal) {
	1026	case para_Rule:
	1027	case para_Normal:
	1028	case para_Copyright:
	1029	case para_BiblioCited:
	1030	case para_Code:
	1031	case para_QuotePush:
	1032	case para_QuotePop:
	1033	case para_Chapter:
	1034	case para_Appendix:
	1035	case para_UnnumberedChapter:
	1036	case para_Heading:
	1037	case para_Subsect:
	1038	case para_LcontPop:
	1039	listtype = NOLIST;
	1040	break;
	1041
	1042	case para_Bullet:
	1043	listtype = UL;
	1044	break;
	1045
	1046	case para_NumberedList:
	1047	listtype = OL;
	1048	break;
	1049
	1050	case para_DescribedThing:
	1051	case para_Description:
	1052	listtype = DL;
	1053	break;
	1054
	1055	case para_LcontPush:
	1056	se = snew(struct stackelement);
	1057	se->next = stackhead;
	1058	se->listtype = NOLIST;
	1059	se->itemtype = NOITEM;
	1060	stackhead = se;
	1061	continue;
	1062
	1063	default: /* some totally non-printing para */
	1064	continue;
	1065	}
	1066
	1067	html_nl(&ho);
	1068
	1069	/*
	1070	* Terminate the most recent list item, if
	1071	* any. (We left this until after
	1072	* processing LcontPush, since in that case
	1073	* the list item won't want to be
	1074	* terminated until after the corresponding
	1075	* LcontPop.)
	1076	*/
	1077	if (stackhead->itemtype != NOITEM) {
	1078	element_close(&ho, itemname(stackhead->itemtype));
	1079	html_nl(&ho);
	1080	}
	1081	stackhead->itemtype = NOITEM;
	1082
	1083	/*
	1084	* Terminate the current list, if it's not
	1085	* the one we want to be in.
	1086	*/
	1087	if (listtype != stackhead->listtype &&
	1088	stackhead->listtype != NOLIST) {
	1089	element_close(&ho, listname(stackhead->listtype));
	1090	html_nl(&ho);
	1091	}
	1092
	1093	/*
	1094	* Leave the loop if our time has come.
	1095	*/
	1096	if (!p \|\| (is_heading_type(p->type) &&
	1097	p->type != para_Title))
	1098	break; /* end of section text */
	1099
	1100	/*
	1101	* Start a fresh list if necessary.
	1102	*/
	1103	if (listtype != stackhead->listtype &&
	1104	listtype != NOLIST)
	1105	element_open(&ho, listname(listtype));
	1106
	1107	stackhead->listtype = listtype;
	1108
	1109	switch (p->type) {
	1110	case para_Rule:
	1111	element_empty(&ho, "hr");
	1112	break;
	1113	case para_Code:
	1114	html_codepara(&ho, p->words);
	1115	break;
	1116	case para_Normal:
	1117	case para_Copyright:
	1118	element_open(&ho, "p");
	1119	html_nl(&ho);
	1120	html_words(&ho, p->words, ALL,
	1121	f, keywords, &conf);
	1122	html_nl(&ho);
	1123	element_close(&ho, "p");
	1124	break;
	1125	case para_BiblioCited:
	1126	element_open(&ho, "p");
	1127	if (p->private_data) {
	1128	htmlsect s = (htmlsect )p->private_data;
	1129	element_open(&ho, "a");
	1130	element_attr(&ho, "name", s->fragment);
	1131	element_close(&ho, "a");
	1132	}
	1133	html_nl(&ho);
	1134	html_words(&ho, p->kwtext, ALL,
	1135	f, keywords, &conf);
	1136	html_text(&ho, L" ");
	1137	html_words(&ho, p->words, ALL,
	1138	f, keywords, &conf);
	1139	html_nl(&ho);
	1140	element_close(&ho, "p");
	1141	break;
	1142	case para_Bullet:
	1143	case para_NumberedList:
	1144	element_open(&ho, "li");
	1145	if (p->private_data) {
	1146	htmlsect s = (htmlsect )p->private_data;
	1147	element_open(&ho, "a");
	1148	element_attr(&ho, "name", s->fragment);
	1149	element_close(&ho, "a");
	1150	}
	1151	html_nl(&ho);
	1152	stackhead->itemtype = LI;
	1153	html_words(&ho, p->words, ALL,
	1154	f, keywords, &conf);
	1155	break;
	1156	case para_DescribedThing:
	1157	element_open(&ho, "dt");
	1158	html_nl(&ho);
	1159	stackhead->itemtype = DT;
	1160	html_words(&ho, p->words, ALL,
	1161	f, keywords, &conf);
	1162	break;
	1163	case para_Description:
	1164	element_open(&ho, "dd");
	1165	html_nl(&ho);
	1166	stackhead->itemtype = DD;
	1167	html_words(&ho, p->words, ALL,
	1168	f, keywords, &conf);
	1169	break;
	1170
	1171	case para_QuotePush:
	1172	element_open(&ho, "blockquote");
	1173	break;
	1174	case para_QuotePop:
	1175	element_close(&ho, "blockquote");
	1176	break;
	1177
	1178	case para_LcontPop:
	1179	se = stackhead;
	1180	stackhead = stackhead->next;
	1181	assert(stackhead);
	1182	sfree(se);
	1183	break;
	1184	}
	1185	}
	1186
	1187	assert(stackhead && !stackhead->next);
	1188	sfree(stackhead);
	1189	}
	1190
	1191	if (s->type == INDEX) {
	1192	indexentry *entry;
	1193	int i;
	1194
	1195	/*
	1196	* This section is the index. I'll just
	1197	* render it as a single paragraph, with a
	1198	* colon between the index term and the
	1199	* references, and <br> in between each
	1200	* entry.
	1201	*/
	1202	element_open(&ho, "p");
	1203
	1204	for (i = 0; (entry =
	1205	index234(idx->entries, i)) != NULL; i++) {
	1206	htmlindex hi = (htmlindex )entry->backend_data;
	1207	int j;
	1208
	1209	if (i > 0)
	1210	element_empty(&ho, "br");
	1211	html_nl(&ho);
	1212
	1213	html_words(&ho, entry->text, MARKUP\|LINKS,
	1214	f, keywords, &conf);
	1215
	1216	html_text(&ho, L": ");/* FIXME: configurable */
	1217
	1218	for (j = 0; j < hi->nrefs; j++) {
	1219	htmlindexref *hr =
	1220	(htmlindexref *)hi->refs[j]->private_data;
	1221	paragraph *p = hr->section->title;
	1222
	1223	if (j > 0)
	1224	html_text(&ho, L", "); /* FIXME: conf */
	1225
	1226	html_href(&ho, f, hr->section->file,
	1227	hr->fragment);
	1228	hr->referenced = TRUE;
	1229	if (p && p->kwtext)
	1230	html_words(&ho, p->kwtext, MARKUP\|LINKS,
	1231	f, keywords, &conf);
	1232	else if (p && p->words)
	1233	html_words(&ho, p->words, MARKUP\|LINKS,
	1234	f, keywords, &conf);
	1235	else
	1236	html_text(&ho, L"FIXME");
	1237	element_close(&ho, "a");
	1238	}
	1239	}
	1240	element_close(&ho, "p");
	1241	}
	1242	}
	1243	}
	1244
	1245	html_contents_entry(&ho, 0, NULL, f, keywords, &conf);
	1246	html_nl(&ho);
	1247
	1248	{
	1249	/*
	1250	* Footer.
	1251	*/
	1252	int done_version_ids = FALSE;
	1253
	1254	element_empty(&ho, "hr");
	1255
	1256	if (conf.body_end)
	1257	html_raw(&ho, conf.body_end);
	1258
	1259	if (conf.address_section) {
	1260	element_open(&ho, "address");
	1261	if (conf.addr_start) {
	1262	html_raw(&ho, conf.addr_start);
	1263	html_nl(&ho);
	1264	}
	1265	if (conf.visible_version_id) {
	1266	int started = FALSE;
	1267	for (p = sourceform; p; p = p->next)
	1268	if (p->type == para_VersionID) {
	1269	if (!started)
	1270	element_open(&ho, "p");
	1271	else
	1272	element_empty(&ho, "br");
	1273	html_nl(&ho);
	1274	html_text(&ho, L"["); /* FIXME: conf? */
	1275	html_words(&ho, p->words, NOTHING,
	1276	f, keywords, &conf);
	1277	html_text(&ho, L"]"); /* FIXME: conf? */
	1278	started = TRUE;
	1279	}
	1280	if (started)
	1281	element_close(&ho, "p");
	1282	done_version_ids = TRUE;
	1283	}
	1284	if (conf.addr_end)
	1285	html_raw(&ho, conf.addr_end);
	1286	element_close(&ho, "address");
	1287	}
	1288
	1289	if (!done_version_ids) {
	1290	/*
	1291	* If the user didn't want the version IDs
	1292	* visible, I think we still have a duty to put
	1293	* them in an HTML comment.
	1294	*/
	1295	int started = FALSE;
	1296	for (p = sourceform; p; p = p->next)
	1297	if (p->type == para_VersionID) {
	1298	if (!started) {
	1299	html_raw(&ho, "<!-- version IDs:\n");
	1300	started = TRUE;
	1301	}
	1302	html_words(&ho, p->words, NOTHING,
	1303	f, keywords, &conf);
	1304	html_nl(&ho);
	1305	}
	1306	if (started)
	1307	html_raw(&ho, "-->\n");
	1308	}
	1309	}
	1310
	1311	element_close(&ho, "body");
	1312	html_nl(&ho);
	1313	element_close(&ho, "html");
	1314	html_nl(&ho);
	1315	cleanup(&ho);
	1316	}
	1317	}
	1318
	1319	/*
	1320	* Go through and check that no index fragments were referenced
	1321	* without being generated, or indeed vice versa.
	1322	*
	1323	* (When I actually get round to freeing everything, this can
	1324	* probably be the freeing loop as well.)
	1325	*/
	1326	for (p = sourceform; p; p = p->next) {
	1327	word *w;
	1328	for (w = p->words; w; w = w->next)
	1329	if (w->type == word_IndexRef) {
	1330	htmlindexref hr = (htmlindexref )w->private_data;
	1331
	1332	assert(!hr->referenced == !hr->generated);
	1333	}
	1334	}
	1335
	1336	/*
	1337	* FIXME: Free all the working data.
	1338	*/
	1339	}
	1340
	1341	static void html_file_section(htmlconfig cfg, htmlfilelist files,
	1342	htmlsect *sect, int depth)
	1343	{
	1344	htmlfile *file;
	1345	int ldepth;
	1346
	1347	/*
	1348	* `depth' is derived from the heading_depth() macro at the top
	1349	* of this file, which counts title as -1, chapter as 0,
	1350	* heading as 1 and subsection as 2. However, the semantics of
	1351	* cfg->leaf_level are defined to count chapter as 1, heading
	1352	* as 2 etc. So first I increment depth :-(
	1353	*/
	1354	ldepth = depth + 1;
	1355
	1356	if (cfg->leaf_level == 0) {
	1357	/*
	1358	* leaf_level==0 is a special case, in which everything is
	1359	* put into a single file.
	1360	*/
	1361	if (!files->single)
	1362	files->single = html_new_file(files, cfg->single_filename);
	1363
	1364	file = files->single;
	1365	} else {
	1366	/*
	1367	* If the depth of this section is at or above leaf_level,
	1368	* we invent a fresh file and put this section at its head.
	1369	* Otherwise, we put it in the same file as its parent
	1370	* section.
	1371	*/
	1372	if (ldepth > cfg->leaf_level) {
	1373	/*
	1374	* We know that sect->parent cannot be NULL. The only
	1375	* circumstance in which it can be is if sect is at
	1376	* chapter or appendix level, i.e. ldepth==1; and if
	1377	* that's the case, then we cannot have entered this
	1378	* branch unless cfg->leaf_level==0, in which case we
	1379	* would be in the single-file case above and not here
	1380	* at all.
	1381	*/
	1382	assert(sect->parent);
	1383
	1384	file = sect->parent->file;
	1385	} else {
	1386	if (sect->type == TOP) {
	1387	file = html_new_file(files, cfg->contents_filename);
	1388	} else if (sect->type == INDEX) {
	1389	file = html_new_file(files, cfg->index_filename);
	1390	} else {
	1391	char *title;
	1392
	1393	assert(ldepth > 0 && sect->title);
	1394	title = html_format(sect->title, cfg->template_filename);
	1395	file = html_new_file(files, title);
	1396	sfree(title);
	1397	}
	1398	}
	1399	}
	1400
	1401	sect->file = file;
	1402
	1403	if (file->min_heading_depth > depth) {
	1404	/*
	1405	* This heading is at a higher level than any heading we
	1406	* have so far placed in this file; so we set the `first'
	1407	* pointer.
	1408	*/
	1409	file->min_heading_depth = depth;
	1410	file->first = sect;
	1411	}
	1412
	1413	if (file->min_heading_depth == depth)
	1414	file->last = sect;
	1415	}
	1416
	1417	static htmlfile html_new_file(htmlfilelist list, char *filename)
	1418	{
	1419	htmlfile *ret = snew(htmlfile);
	1420
	1421	ret->next = NULL;
	1422	if (list->tail)
	1423	list->tail->next = ret;
	1424	else
	1425	list->head = ret;
	1426	list->tail = ret;
	1427
	1428	ret->filename = dupstr(filename);
	1429	ret->last_fragment_number = 0;
	1430	ret->min_heading_depth = INT_MAX;
	1431	ret->first = ret->last = NULL;
	1432
	1433	return ret;
	1434	}
	1435
	1436	static htmlsect html_new_sect(htmlsectlist list, paragraph *title)
	1437	{
	1438	htmlsect *ret = snew(htmlsect);
	1439
	1440	ret->next = NULL;
	1441	if (list->tail)
	1442	list->tail->next = ret;
	1443	else
	1444	list->head = ret;
	1445	list->tail = ret;
	1446
	1447	ret->title = title;
	1448	ret->file = NULL;
	1449	ret->parent = NULL;
	1450	ret->type = NORMAL;
	1451
	1452	return ret;
	1453	}
	1454
	1455	static void html_words(htmloutput ho, word words, int flags,
	1456	htmlfile file, keywordlist keywords, htmlconfig *cfg)
	1457	{
	1458	word *w;
	1459	char *c;
	1460	int style, type;
	1461
	1462	for (w = words; w; w = w->next) switch (w->type) {
	1463	case word_HyperLink:
	1464	if (flags & LINKS) {
	1465	element_open(ho, "a");
	1466	c = utoa_dup(w->text, CS_ASCII);
	1467	element_attr(ho, "href", c);
	1468	sfree(c);
	1469	}
	1470	break;
	1471	case word_UpperXref:
	1472	case word_LowerXref:
	1473	if (flags & LINKS) {
	1474	keyword *kwl = kw_lookup(keywords, w->text);
	1475	paragraph *p = kwl->para;
	1476	htmlsect s = (htmlsect )p->private_data;
	1477
	1478	assert(s);
	1479
	1480	html_href(ho, file, s->file, s->fragment);
	1481	}
	1482	break;
	1483	case word_HyperEnd:
	1484	case word_XrefEnd:
	1485	if (flags & LINKS)
	1486	element_close(ho, "a");
	1487	break;
	1488	case word_IndexRef:
	1489	if (flags & INDEXENTS) {
	1490	htmlindexref hr = (htmlindexref )w->private_data;
	1491	element_open(ho, "a");
	1492	element_attr(ho, "name", hr->fragment);
	1493	element_close(ho, "a");
	1494	hr->generated = TRUE;
	1495	}
	1496	break;
	1497	case word_Normal:
	1498	case word_Emph:
	1499	case word_Code:
	1500	case word_WeakCode:
	1501	case word_WhiteSpace:
	1502	case word_EmphSpace:
	1503	case word_CodeSpace:
	1504	case word_WkCodeSpace:
	1505	case word_Quote:
	1506	case word_EmphQuote:
	1507	case word_CodeQuote:
	1508	case word_WkCodeQuote:
	1509	style = towordstyle(w->type);
	1510	type = removeattr(w->type);
	1511	if (style == word_Emph &&
	1512	(attraux(w->aux) == attr_First \|\|
	1513	attraux(w->aux) == attr_Only) &&
	1514	(flags & MARKUP))
	1515	element_open(ho, "em");
	1516	else if ((style == word_Code \|\| style == word_WeakCode) &&
	1517	(attraux(w->aux) == attr_First \|\|
	1518	attraux(w->aux) == attr_Only) &&
	1519	(flags & MARKUP))
	1520	element_open(ho, "code");
	1521
	1522	if (type == word_WhiteSpace)
	1523	html_text(ho, L" ");
	1524	else if (type == word_Quote) {
	1525	if (quoteaux(w->aux) == quote_Open)
	1526	html_text(ho, cfg->lquote);
	1527	else
	1528	html_text(ho, cfg->rquote);
	1529	} else {
	1530	if (cvt_ok(ho->charset, w->text) \|\| !w->alt)
	1531	html_text(ho, w->text);
	1532	else
	1533	html_words(ho, w->alt, flags, file, keywords, cfg);
	1534	}
	1535
	1536	if (style == word_Emph &&
	1537	(attraux(w->aux) == attr_Last \|\|
	1538	attraux(w->aux) == attr_Only) &&
	1539	(flags & MARKUP))
	1540	element_close(ho, "em");
	1541	else if ((style == word_Code \|\| style == word_WeakCode) &&
	1542	(attraux(w->aux) == attr_Last \|\|
	1543	attraux(w->aux) == attr_Only) &&
	1544	(flags & MARKUP))
	1545	element_close(ho, "code");
	1546
	1547	break;
	1548	}
	1549	}
	1550
	1551	static void html_codepara(htmloutput ho, word words)
	1552	{
	1553	element_open(ho, "pre");
	1554	element_open(ho, "code");
	1555	for (; words; words = words->next) if (words->type == word_WeakCode) {
	1556	char *open_tag;
	1557	wchar_t t, e;
	1558
	1559	t = words->text;
	1560	if (words->next && words->next->type == word_Emph) {
	1561	e = words->next->text;
	1562	words = words->next;
	1563	} else
	1564	e = NULL;
	1565
	1566	while (e && e && t) {
	1567	int n;
	1568	int ec = *e;
	1569
	1570	for (n = 0; t[n] && e[n] && e[n] == ec; n++);
	1571
	1572	open_tag = NULL;
	1573	if (ec == 'i')
	1574	open_tag = "em";
	1575	else if (ec == 'b')
	1576	open_tag = "b";
	1577	if (open_tag)
	1578	element_open(ho, open_tag);
	1579
	1580	html_text_limit(ho, t, n);
	1581
	1582	if (open_tag)
	1583	element_close(ho, open_tag);
	1584
	1585	t += n;
	1586	e += n;
	1587	}
	1588	html_text(ho, t);
	1589	html_nl(ho);
	1590	}
	1591	element_close(ho, "code");
	1592	element_close(ho, "pre");
	1593	}
	1594
	1595	static void html_charset_cleanup(htmloutput *ho)
	1596	{
	1597	char outbuf[256];
	1598	int bytes;
	1599
	1600	bytes = charset_from_unicode(NULL, NULL, outbuf, lenof(outbuf),
	1601	ho->charset, &ho->cstate, NULL);
	1602	if (bytes > 0)
	1603	fwrite(outbuf, 1, bytes, ho->fp);
	1604	}
	1605
	1606	static void return_to_neutral(htmloutput *ho)
	1607	{
	1608	if (ho->state == HO_IN_TEXT) {
	1609	html_charset_cleanup(ho);
	1610	} else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) {
	1611	fprintf(ho->fp, " />");
	1612	} else if (ho->state == HO_IN_EMPTY_TAG \|\| ho->state == HO_IN_TAG) {
	1613	fprintf(ho->fp, ">");
	1614	}
	1615
	1616	ho->state = HO_NEUTRAL;
	1617	}
	1618
	1619	static void element_open(htmloutput ho, char const name)
	1620	{
	1621	return_to_neutral(ho);
	1622	fprintf(ho->fp, "<%s", name);
	1623	ho->state = HO_IN_TAG;
	1624	}
	1625
	1626	static void element_close(htmloutput ho, char const name)
	1627	{
	1628	return_to_neutral(ho);
	1629	fprintf(ho->fp, "</%s>", name);
	1630	ho->state = HO_NEUTRAL;
	1631	}
	1632
	1633	static void element_empty(htmloutput ho, char const name)
	1634	{
	1635	return_to_neutral(ho);
	1636	fprintf(ho->fp, "<%s", name);
	1637	ho->state = HO_IN_EMPTY_TAG;
	1638	}
	1639
	1640	static void html_nl(htmloutput *ho)
	1641	{
	1642	return_to_neutral(ho);
	1643	fputc('\n', ho->fp);
	1644	}
	1645
	1646	static void html_raw(htmloutput ho, char text)
	1647	{
	1648	return_to_neutral(ho);
	1649	fputs(text, ho->fp);
	1650	}
	1651
	1652	static void html_raw_as_attr(htmloutput ho, char text)
	1653	{
	1654	assert(ho->state == HO_IN_TAG \|\| ho->state == HO_IN_EMPTY_TAG);
	1655	fputc(' ', ho->fp);
	1656	fputs(text, ho->fp);
	1657	}
	1658
	1659	static void element_attr(htmloutput ho, char const name, char const *value)
	1660	{
	1661	html_charset_cleanup(ho);
	1662	assert(ho->state == HO_IN_TAG \|\| ho->state == HO_IN_EMPTY_TAG);
	1663	fprintf(ho->fp, " %s=\"%s\"", name, value);
	1664	}
	1665
	1666	static void element_attr_w(htmloutput ho, char const name,
	1667	wchar_t const *value)
	1668	{
	1669	html_charset_cleanup(ho);
	1670	fprintf(ho->fp, " %s=\"", name);
	1671	html_text_limit_internal(ho, value, 0, TRUE);
	1672	html_charset_cleanup(ho);
	1673	fputc('"', ho->fp);
	1674	}
	1675
	1676	static void html_text(htmloutput ho, wchar_t const text)
	1677	{
	1678	html_text_limit(ho, text, 0);
	1679	}
	1680
	1681	static void html_text_limit(htmloutput ho, wchar_t const text, int maxlen)
	1682	{
	1683	return_to_neutral(ho);
	1684	html_text_limit_internal(ho, text, maxlen, FALSE);
	1685	}
	1686
	1687	static void html_text_limit_internal(htmloutput ho, wchar_t const text,
	1688	int maxlen, int quote_quotes)
	1689	{
	1690	int textlen = ustrlen(text);
	1691	char outbuf[256];
	1692	int bytes, err;
	1693
	1694	if (maxlen > 0 && textlen > maxlen)
	1695	textlen = maxlen;
	1696
	1697	while (textlen > 0) {
	1698	/* Scan ahead for characters we really can't display in HTML. */
	1699	int lenbefore, lenafter;
	1700	for (lenbefore = 0; lenbefore < textlen; lenbefore++)
	1701	if (text[lenbefore] == L'<' \|\|
	1702	text[lenbefore] == L'>' \|\|
	1703	text[lenbefore] == L'&' \|\|
	1704	(text[lenbefore] == L'"' && quote_quotes))
	1705	break;
	1706	lenafter = lenbefore;
	1707	bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf),
	1708	ho->charset, &ho->cstate, &err);
	1709	textlen -= (lenbefore - lenafter);
	1710	if (bytes > 0)
	1711	fwrite(outbuf, 1, bytes, ho->fp);
	1712	if (err) {
	1713	/*
	1714	* We have encountered a character that cannot be
	1715	* displayed in the selected output charset. Therefore,
	1716	* we use an HTML numeric entity reference.
	1717	*/
	1718	assert(textlen > 0);
	1719	fprintf(ho->fp, "&#%ld;", (long int)*text);
	1720	text++, textlen--;
	1721	} else if (lenafter == 0 && textlen > 0) {
	1722	/*
	1723	* We have encountered a character which is special to
	1724	* HTML.
	1725	*/
	1726	if (*text == L'<')
	1727	fprintf(ho->fp, "<");
	1728	else if (*text == L'>')
	1729	fprintf(ho->fp, ">");
	1730	else if (*text == L'&')
	1731	fprintf(ho->fp, "&");
	1732	else if (*text == L'"')
	1733	fprintf(ho->fp, """);
	1734	else
	1735	assert(!"Can't happen");
	1736	text++, textlen--;
	1737	}
	1738	}
	1739	}
	1740
	1741	static void cleanup(htmloutput *ho)
	1742	{
	1743	return_to_neutral(ho);
	1744	fclose(ho->fp);
	1745	}
	1746
	1747	static void html_href(htmloutput ho, htmlfile thisfile,
	1748	htmlfile targetfile, char targetfrag)
	1749	{
	1750	rdstringc rs = { 0, 0, NULL };
	1751	char *url;
	1752
	1753	if (targetfile != thisfile)
	1754	rdaddsc(&rs, targetfile->filename);
	1755	if (targetfrag) {
	1756	rdaddc(&rs, '#');
	1757	rdaddsc(&rs, targetfrag);
	1758	}
	1759	url = rs.text;
	1760
	1761	element_open(ho, "a");
	1762	element_attr(ho, "href", url);
	1763	sfree(url);
	1764	}
	1765
	1766	static char html_format(paragraph p, char *template_string)
	1767	{
	1768	char c, t;
	1769	word *w;
	1770	wchar_t *ws, wsbuf[2];
	1771	rdstringc rs = { 0, 0, NULL };
	1772
	1773	t = template_string;
	1774	while (*t) {
	1775	if (*t == '%' && t[1]) {
	1776	int fmt;
	1777
	1778	t++;
	1779	fmt = *t++;
	1780
	1781	if (fmt == '%') {
	1782	rdaddc(&rs, fmt);
	1783	continue;
	1784	}
	1785
	1786	w = NULL;
	1787	ws = NULL;
	1788
	1789	if (p->kwtext && fmt == 'n')
	1790	w = p->kwtext;
	1791	else if (p->kwtext2 && fmt == 'b') {
	1792	/*
	1793	* HTML fragment names must start with a letter, so
	1794	* simply `1.2.3' is not adequate. In this case I'm
	1795	* going to cheat slightly by prepending the first
	1796	* character of the first word of kwtext, so that
	1797	* we get `C1' for chapter 1, `S2.3' for section
	1798	* 2.3 etc.
	1799	*/
	1800	if (p->kwtext && p->kwtext->text[0]) {
	1801	ws = wsbuf;
	1802	wsbuf[1] = '\0';
	1803	wsbuf[0] = p->kwtext->text[0];
	1804	}
	1805	w = p->kwtext2;
	1806	} else if (p->keyword && *p->keyword && fmt == 'k')
	1807	ws = p->keyword;
	1808	else
	1809	w = p->words;
	1810
	1811	if (ws) {
	1812	c = utoa_dup(ws, CS_ASCII);
	1813	rdaddsc(&rs,c);
	1814	sfree(c);
	1815	}
	1816
	1817	while (w) {
	1818	if (removeattr(w->type) == word_Normal) {
	1819	c = utoa_dup(w->text, CS_ASCII);
	1820	rdaddsc(&rs,c);
	1821	sfree(c);
	1822	}
	1823	w = w->next;
	1824	}
	1825	} else {
	1826	rdaddc(&rs, *t++);
	1827	}
	1828	}
	1829
	1830	return rdtrimc(&rs);
	1831	}
	1832
	1833	static char html_sanitise_fragment(htmlfilelist files, htmlfile *file,
	1834	char *text)
	1835	{
	1836	/*
	1837	* The HTML 4 spec's strictest definition of fragment names (<a
	1838	* name> and "id" attributes) says that they `must begin with a
	1839	* letter and may be followed by any number of letters, digits,
	1840	* hyphens, underscores, colons, and periods'.
	1841	*
	1842	* So here we unceremoniously rip out any characters not
	1843	* conforming to this limitation.
	1844	*/
	1845	char p = text, q = text;
	1846
	1847	while (p && !((p>='A' && p<='Z') \|\| (p>='a' && *p<='z')))
	1848	p++;
	1849	if ((q++ = p++) != '\0') {
	1850	while (*p) {
	1851	if ((p>='A' && p<='Z') \|\|
	1852	(p>='a' && p<='z') \|\|
	1853	(p>='0' && p<='9') \|\|
	1854	p=='-' \|\| p=='_' \|\| p==':' \|\| p=='.')
	1855	q++ = p;
	1856	p++;
	1857	}
	1858
	1859	*q = '\0';
	1860	}
	1861
	1862	/*
	1863	* Now we check for clashes with other fragment names, and
	1864	* adjust this one if necessary by appending a hyphen followed
	1865	* by a number.
	1866	*/
	1867	{
	1868	htmlfragment *frag = snew(htmlfragment);
	1869	int len = 0; /* >0 indicates we have resized */
	1870	int suffix = 1;
	1871
	1872	frag->file = file;
	1873	frag->fragment = text;
	1874
	1875	while (add234(files->frags, frag) != frag) {
	1876	if (!len) {
	1877	len = strlen(text);
	1878	frag->fragment = text = sresize(text, len+20, char);
	1879	}
	1880
	1881	sprintf(text + len, "-%d", ++suffix);
	1882	}
	1883	}
	1884
	1885	return text;
	1886	}
	1887
	1888	static void html_contents_entry(htmloutput ho, int depth, htmlsect s,
	1889	htmlfile thisfile, keywordlist keywords,
	1890	htmlconfig *cfg)
	1891	{
	1892	while (ho->contents_level > depth) {
	1893	element_close(ho, "ul");
	1894	ho->contents_level--;
	1895	}
	1896
	1897	while (ho->contents_level < depth) {
	1898	element_open(ho, "ul");
	1899	ho->contents_level++;
	1900	}
	1901
	1902	if (!s)
	1903	return;
	1904
	1905	element_open(ho, "li");
	1906	html_href(ho, thisfile, s->file, s->fragment);
	1907	html_section_title(ho, s, thisfile, keywords, cfg, FALSE);
	1908	element_close(ho, "a");
	1909	element_close(ho, "li");
	1910	}
	1911
	1912	static void html_section_title(htmloutput ho, htmlsect s, htmlfile *thisfile,
	1913	keywordlist keywords, htmlconfig cfg,
	1914	int real)
	1915	{
	1916	if (s->title) {
	1917	sectlevel *sl;
	1918	word *number;
	1919	int depth = heading_depth(s->title);
	1920
	1921	if (depth < 0)
	1922	sl = NULL;
	1923	else if (depth == 0)
	1924	sl = &cfg->achapter;
	1925	else if (depth <= cfg->nasect)
	1926	sl = &cfg->asect[depth-1];
	1927	else
	1928	sl = &cfg->asect[cfg->nasect-1];
	1929
	1930	if (!sl)
	1931	number = NULL;
	1932	else if (sl->just_numbers)
	1933	number = s->title->kwtext2;
	1934	else
	1935	number = s->title->kwtext;
	1936
	1937	if (number) {
	1938	html_words(ho, number, MARKUP,
	1939	thisfile, keywords, cfg);
	1940	html_text(ho, sl->number_suffix);
	1941	}
	1942
	1943	html_words(ho, s->title->words, real ? ALL : MARKUP,
	1944	thisfile, keywords, cfg);
	1945	} else {
	1946	assert(s->type != NORMAL);
	1947	if (s->type == TOP)
	1948	html_text(ho, L"Preamble");/* FIXME: configure */
	1949	else if (s->type == INDEX)
	1950	html_text(ho, L"Index");/* FIXME: configure */
	1951	}
	1952	}