mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* xhtml backend for Halibut
	3	* (initial implementation by James Aylett)
	4	*
	5	* Still to do:
	6	*
	7	* +++ doesn't handle non-breaking hyphens. Not sure how to yet.
	8	* +++ entity names (from a file -- ideally supply normal SGML files)
	9	* +++ configuration directive to file split where the current layout
	10	* code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
	11	* perhaps others.
	12	*
	13	* Limitations:
	14	*
	15	* +++ biblio/index references target the nearest section marker, rather
	16	* than having a dedicated target themselves. In large bibliographies
	17	* this will cause problems. (The solution is to fake up a response
	18	* from xhtml_find_section(), probably linking it into the sections
	19	* chain just in case we need it again, and to make freeing it up
	20	* easier.) docsrc.pl used to work as we do, however, and SGT agrees that
	21	* this is acceptable for now.
	22	* +++ can't cope with leaf-level == 0. It's all to do with the
	23	* top-level file not being normal, probably not even having a valid
	24	* section level, and stuff like that. I question whether this is an
	25	* issue, frankly; small manuals that fit on one page should probably
	26	* not be written in halibut at all.
	27	*/
	28
	29	#include <stdio.h>
	30	#include <stdlib.h>
	31	#include <string.h>
	32	#include <assert.h>
	33	#include "halibut.h"
	34
	35	/*
	36	* FILENAME_TEMPLATE (overridable in config of course) allows you
	37	* to choose the general form for your HTML file names. It is
	38	* slightly printf-styled (% followed by a single character is a
	39	* formatting directive, %% is a literal %). Formatting directives
	40	* are:
	41	*
	42	* - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
	43	* - %b is the section number on its own (`1.2').
	44	* - %k is the section's _internal_ keyword.
	45	* - %N is the section's visible title in the output, again minus
	46	* whitespace.
	47	*
	48	* %n, %b and %k will all default to %N if the section is
	49	* unnumbered (`Bibliography' is often a good example).
	50	*
	51	* FRAGMENT_TEMPLATE is the same, but defines the <a name="foo">
	52	* markers used to cross-reference to particular subsections of a
	53	* file.
	54	*/
	55
	56	#define FILENAME_SINGLE "Manual.html"
	57	#define FILENAME_CONTENTS "Contents.html"
	58	#define FILENAME_INDEX "IndexPage.html"
	59	#define FILENAME_TEMPLATE "%n.html"
	60	#define FRAGMENT_TEMPLATE "%b"
	61
	62	struct xhtmlsection_Struct {
	63	struct xhtmlsection_Struct next; / next sibling (NULL if split across files) */
	64	struct xhtmlsection_Struct child; / NULL if split across files */
	65	struct xhtmlsection_Struct parent; / NULL if split across files */
	66	struct xhtmlsection_Struct chain; / single structure independent of weird trees */
	67	paragraph *para;
	68	struct xhtmlfile_Struct file; / which file is this a part of? */
	69	char fragment; / fragment id within the file */
	70	int level;
	71	};
	72
	73	struct xhtmlfile_Struct {
	74	struct xhtmlfile_Struct *next;
	75	struct xhtmlfile_Struct *child;
	76	struct xhtmlfile_Struct *parent;
	77	char *filename;
	78	struct xhtmlsection_Struct sections; / sections within this file (only one for non-leaf) */
	79	int is_leaf; /* is this file a leaf file, ie does it not have any children? */
	80	};
	81
	82	typedef struct xhtmlsection_Struct xhtmlsection;
	83	typedef struct xhtmlfile_Struct xhtmlfile;
	84	typedef struct xhtmlindex_Struct xhtmlindex;
	85
	86	struct xhtmlindex_Struct {
	87	int nsection;
	88	int size;
	89	xhtmlsection **sections;
	90	};
	91
	92	typedef struct {
	93	int just_numbers;
	94	wchar_t *number_suffix;
	95	} xhtmlheadfmt;
	96
	97	typedef struct {
	98	int contents_depth[6];
	99	int leaf_contains_contents;
	100	int leaf_level;
	101	int leaf_smallest_contents;
	102	int include_version_id;
	103	wchar_t author, description;
	104	wchar_t head_end, body, body_start, body_end, address_start, address_end, *nav_attrs;
	105	int suppress_address;
	106	xhtmlheadfmt fchapter, *fsect;
	107	int nfsect;
	108	char contents_filename, index_filename;
	109	char single_filename, template_filename, *template_fragment;
	110	} xhtmlconfig;
	111
	112	/static void xhtml_level(paragraph , int);
	113	static void xhtml_level_0(paragraph *);
	114	static void xhtml_docontents(FILE , paragraph , int);
	115	static void xhtml_dosections(FILE , paragraph , int);
	116	static void xhtml_dobody(FILE , paragraph , int);*/
	117
	118	static void xhtml_doheader(FILE , word );
	119	static void xhtml_dofooter(FILE *);
	120	static void xhtml_versionid(FILE , word , int);
	121
	122	static void xhtml_utostr(wchar_t , char *);
	123	static int xhtml_para_level(paragraph *);
	124	static int xhtml_reservedchar(int);
	125
	126	static int xhtml_convert(wchar_t , int, char *, int);
	127	static void xhtml_rdaddwc(rdstringc , word , word *, int);
	128	static void xhtml_para(FILE , word , int);
	129	static void xhtml_codepara(FILE , word );
	130	static void xhtml_heading(FILE , paragraph , int);
	131
	132	/* File-global variables are much easier than passing these things
	133	* all over the place. Evil, but easier. We can replace this with a single
	134	* structure at some point.
	135	*/
	136	static xhtmlconfig conf;
	137	static keywordlist *keywords;
	138	static indexdata *idx;
	139	static xhtmlfile *topfile;
	140	static xhtmlsection *topsection;
	141	static paragraph *sourceparas;
	142	static xhtmlfile *lastfile;
	143	static xhtmlfile *xhtml_last_file = NULL;
	144	static int last_level=-1, start_level;
	145	static xhtmlsection *currentsection;
	146
	147	static xhtmlconfig xhtml_configure(paragraph *source)
	148	{
	149	xhtmlconfig ret;
	150
	151	/*
	152	* Defaults.
	153	*/
	154	ret.contents_depth[0] = 2;
	155	ret.contents_depth[1] = 3;
	156	ret.contents_depth[2] = 4;
	157	ret.contents_depth[3] = 5;
	158	ret.contents_depth[4] = 6;
	159	ret.contents_depth[5] = 7;
	160	ret.leaf_level = 2;
	161	ret.leaf_smallest_contents = 4;
	162	ret.leaf_contains_contents = FALSE;
	163	ret.include_version_id = TRUE;
	164	ret.author = NULL;
	165	ret.description = NULL;
	166	ret.head_end = NULL;
	167	ret.body = NULL;
	168	ret.body_start = NULL;
	169	ret.body_end = NULL;
	170	ret.address_start = NULL;
	171	ret.address_end = NULL;
	172	ret.nav_attrs = NULL;
	173	ret.suppress_address = FALSE;
	174
	175	ret.fchapter.just_numbers = FALSE;
	176	ret.fchapter.number_suffix = L": ";
	177	ret.nfsect = 2;
	178	ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
	179	ret.fsect[0].just_numbers = FALSE;
	180	ret.fsect[0].number_suffix = L": ";
	181	ret.fsect[1].just_numbers = TRUE;
	182	ret.fsect[1].number_suffix = L" ";
	183	ret.contents_filename = strdup(FILENAME_CONTENTS);
	184	ret.single_filename = strdup(FILENAME_SINGLE);
	185	ret.index_filename = strdup(FILENAME_INDEX);
	186	ret.template_filename = strdup(FILENAME_TEMPLATE);
	187	ret.template_fragment = strdup(FRAGMENT_TEMPLATE);
	188
	189	for (; source; source = source->next)
	190	{
	191	if (source->type == para_Config)
	192	{
	193	if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
	194	sfree(ret.contents_filename);
	195	ret.contents_filename = utoa_dup(uadv(source->keyword));
	196	} else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
	197	sfree(ret.single_filename);
	198	ret.single_filename = utoa_dup(uadv(source->keyword));
	199	} else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
	200	sfree(ret.index_filename);
	201	ret.index_filename = utoa_dup(uadv(source->keyword));
	202	} else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
	203	sfree(ret.template_filename);
	204	ret.template_filename = utoa_dup(uadv(source->keyword));
	205	} else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
	206	sfree(ret.template_fragment);
	207	ret.template_fragment = utoa_dup(uadv(source->keyword));
	208	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
	209	ret.contents_depth[0] = utoi(uadv(source->keyword));
	210	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
	211	ret.contents_depth[1] = utoi(uadv(source->keyword));
	212	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
	213	ret.contents_depth[2] = utoi(uadv(source->keyword));
	214	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
	215	ret.contents_depth[3] = utoi(uadv(source->keyword));
	216	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
	217	ret.contents_depth[4] = utoi(uadv(source->keyword));
	218	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
	219	ret.contents_depth[5] = utoi(uadv(source->keyword));
	220	} else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
	221	ret.leaf_level = utoi(uadv(source->keyword));
	222	} else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
	223	ret.leaf_smallest_contents = utoi(uadv(source->keyword));
	224	} else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
	225	ret.include_version_id = utob(uadv(source->keyword));
	226	} else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
	227	ret.leaf_contains_contents = utob(uadv(source->keyword));
	228	} else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
	229	ret.suppress_address = utob(uadv(source->keyword));
	230	} else if (!ustricmp(source->keyword, L"xhtml-author")) {
	231	ret.author = uadv(source->keyword);
	232	} else if (!ustricmp(source->keyword, L"xhtml-description")) {
	233	ret.description = uadv(source->keyword);
	234	} else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
	235	ret.head_end = uadv(source->keyword);
	236	} else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
	237	ret.body_start = uadv(source->keyword);
	238	} else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
	239	ret.body = uadv(source->keyword);
	240	} else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
	241	ret.body_end = uadv(source->keyword);
	242	} else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
	243	ret.address_start = uadv(source->keyword);
	244	} else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
	245	ret.address_end = uadv(source->keyword);
	246	} else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
	247	ret.nav_attrs = uadv(source->keyword);
	248	} else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
	249	ret.fchapter.just_numbers = utob(uadv(source->keyword));
	250	} else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
	251	ret.fchapter.number_suffix = uadv(source->keyword);
	252	} else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
	253	wchar_t *p = uadv(source->keyword);
	254	int n = 0;
	255	if (uisdigit(*p)) {
	256	n = utoi(p);
	257	p = uadv(p);
	258	}
	259	if (n >= ret.nfsect) {
	260	int i;
	261	ret.fsect = resize(ret.fsect, n+1);
	262	for (i = ret.nfsect; i <= n; i++)
	263	ret.fsect[i] = ret.fsect[ret.nfsect-1];
	264	ret.nfsect = n+1;
	265	}
	266	ret.fsect[n].just_numbers = utob(p);
	267	} else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
	268	wchar_t *p = uadv(source->keyword);
	269	int n = 0;
	270	if (uisdigit(*p)) {
	271	n = utoi(p);
	272	p = uadv(p);
	273	}
	274	if (n >= ret.nfsect) {
	275	int i;
	276	ret.fsect = resize(ret.fsect, n+1);
	277	for (i = ret.nfsect; i <= n; i++)
	278	ret.fsect[i] = ret.fsect[ret.nfsect-1];
	279	ret.nfsect = n+1;
	280	}
	281	ret.fsect[n].number_suffix = p;
	282	}
	283	}
	284	}
	285
	286	/* printf(" !!! leaf_level = %i\n", ret.leaf_level);
	287	printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
	288	printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
	289	printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
	290	printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
	291	printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
	292	printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
	293	printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
	294	return ret;
	295	}
	296
	297	paragraph xhtml_config_filename(char filename)
	298	{
	299	/*
	300	* If the user passes in a single filename as a parameter to
	301	* the `--html' command-line option, then we should assume it
	302	* to imply _two_ config directives:
	303	* \cfg{xhtml-single-filename}{whatever} and
	304	* \cfg{xhtml-leaf-level}{0}; the rationale being that the user
	305	* wants their output _in that file_.
	306	*/
	307
	308	paragraph *p[2];
	309	int i, len;
	310	wchar_t ufilename, up;
	311
	312	for (i = 0; i < 2; i++) {
	313	p[i] = mknew(paragraph);
	314	memset(p[i], 0, sizeof(*p[i]));
	315	p[i]->type = para_Config;
	316	p[i]->next = NULL;
	317	p[i]->fpos.filename = "<command line>";
	318	p[i]->fpos.line = p[i]->fpos.col = -1;
	319	}
	320
	321	ufilename = ufroma_dup(filename);
	322	len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
	323	p[0]->keyword = mknewa(wchar_t, len);
	324	up = p[0]->keyword;
	325	ustrcpy(up, L"xhtml-single-filename");
	326	up = uadv(up);
	327	ustrcpy(up, ufilename);
	328	up = uadv(up);
	329	*up = L'\0';
	330	assert(up - p[0]->keyword < len);
	331	sfree(ufilename);
	332
	333	len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
	334	p[1]->keyword = mknewa(wchar_t, len);
	335	up = p[1]->keyword;
	336	ustrcpy(up, L"xhtml-leaf-level");
	337	up = uadv(up);
	338	ustrcpy(up, L"0");
	339	up = uadv(up);
	340	*up = L'\0';
	341	assert(up - p[1]->keyword < len);
	342
	343	p[0]->next = p[1];
	344
	345	return p[0];
	346	}
	347
	348	static xhtmlsection xhtml_new_section(xhtmlsection last)
	349	{
	350	xhtmlsection *ret = mknew(xhtmlsection);
	351	ret->next=NULL;
	352	ret->child=NULL;
	353	ret->parent=NULL;
	354	ret->chain=last;
	355	ret->para=NULL;
	356	ret->file=NULL;
	357	ret->fragment=NULL;
	358	ret->level=-1; /* marker: end of chain */
	359	return ret;
	360	}
	361
	362	/* Returns NULL or the section that marks that paragraph */
	363	static xhtmlsection xhtml_find_section(paragraph p)
	364	{
	365	xhtmlsection *ret = topsection;
	366	if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
	367	paragraph *p2 = sourceparas;
	368	paragraph *p3 = NULL;
	369	while (p2 && p2!=p) {
	370	if (xhtml_para_level(p2)!=-1) {
	371	p3 = p2;
	372	}
	373	p2=p2->next;
	374	}
	375	if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
	376	/* Note that this can happen, if you have a cross-reference to before the first chapter starts.
	377	* So don't do that, then.
	378	*/
	379	return NULL;
	380	}
	381	p=p3;
	382	}
	383	while (ret && ret->para != p) {
	384	/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
	385	ret=ret->chain;
	386	}
	387	return ret;
	388	}
	389
	390	static void xhtml_format(paragraph p, char template_string, rdstringc *r)
	391	{
	392	char c, t;
	393	word *w;
	394	wchar_t *ws;
	395
	396	t = template_string;
	397	while (*t) {
	398	if (*t == '%' && t[1]) {
	399	int fmt;
	400
	401	t++;
	402	fmt = *t++;
	403
	404	if (fmt == '%') {
	405	rdaddc(r, fmt);
	406	continue;
	407	}
	408
	409	w = NULL;
	410	ws = NULL;
	411
	412	if (p->kwtext && fmt == 'n')
	413	w = p->kwtext;
	414	else if (p->kwtext2 && fmt == 'b')
	415	w = p->kwtext2;
	416	else if (p->keyword && *p->keyword && fmt == 'k')
	417	ws = p->keyword;
	418	else
	419	w = p->words;
	420
	421	while (w) {
	422	switch (removeattr(w->type))
	423	{
	424	case word_Normal:
	425	/*case word_Emph:
	426	case word_Code:
	427	case word_WeakCode:*/
	428	xhtml_utostr(w->text, &c);
	429	rdaddsc(r,c);
	430	sfree(c);
	431	break;
	432	}
	433	w = w->next;
	434	}
	435	if (ws) {
	436	xhtml_utostr(ws, &c);
	437	rdaddsc(r,c);
	438	sfree(c);
	439	}
	440	} else {
	441	rdaddc(r, *t++);
	442	}
	443	}
	444	}
	445
	446	static xhtmlfile xhtml_new_file(xhtmlsection sect)
	447	{
	448	xhtmlfile *ret = mknew(xhtmlfile);
	449
	450	ret->next=NULL;
	451	ret->child=NULL;
	452	ret->parent=NULL;
	453	ret->filename=NULL;
	454	ret->sections=sect;
	455	ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
	456	if (sect==NULL) {
	457	if (conf.leaf_level==0) { /* currently unused */
	458	ret->filename = smalloc(strlen(conf.single_filename)+1);
	459	sprintf(ret->filename, conf.single_filename);
	460	} else {
	461	ret->filename = smalloc(strlen(conf.contents_filename)+1);
	462	sprintf(ret->filename, conf.contents_filename);
	463	}
	464	} else {
	465	paragraph *p = sect->para;
	466	rdstringc fname_c = { 0, 0, NULL };
	467	xhtml_format(p, conf.template_filename, &fname_c);
	468	ret->filename = rdtrimc(&fname_c);
	469	}
	470	/* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
	471	return ret;
	472	}
	473
	474	/*
	475	* Walk the tree fixing up files which are actually leaf (ie
	476	* have no children) but aren't at leaf level, so they have the
	477	* leaf flag set.
	478	*/
	479	void xhtml_fixup_layout(xhtmlfile* file)
	480	{
	481	if (file->child==NULL) {
	482	file->is_leaf = TRUE;
	483	} else {
	484	xhtml_fixup_layout(file->child);
	485	}
	486	if (file->next)
	487	xhtml_fixup_layout(file->next);
	488	}
	489
	490	/*
	491	* Create the tree structure so we know where everything goes.
	492	* Method:
	493	*
	494	* Ignoring file splitting, we have three choices with each new section:
	495	*
	496	* +-----------------+-----------------+
	497	* \| \| \|
	498	* X +----X----+ (1)
	499	* \| \|
	500	* Y (2)
	501	* \|
	502	* (3)
	503	*
	504	* Y is the last section we added (currentsect).
	505	* If sect is the section we want to add, then:
	506	*
	507	* (1) if sect->level < currentsect->level
	508	* (2) if sect->level == currentsect->level
	509	* (3) if sect->level > currentsect->level
	510	*
	511	* This requires the constraint that you never skip section numbers
	512	* (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
	513	*
	514	* Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
	515	* more than one level at a time. Lots of asserts, and probably part of
	516	* the algorithm here, rely on this being true. (It currently isn't
	517	* enforced by halibut, however.)
	518	*
	519	* File splitting makes this harder. For instance, say we added at (3)
	520	* above and now need to add another section. We are splitting at level
	521	* 2, ie the level of Y. Z is the last section we added:
	522	*
	523	* +-----------------+-----------------+
	524	* \| \| \|
	525	* X +----X----+ (1)
	526	* \| \|
	527	* +----Y----+ (1)
	528	* \| \|
	529	* Z (2)
	530	* \|
	531	* (3)
	532	*
	533	* The (1) case is now split; we need to search upwards to find where
	534	* to actually link in. The other two cases remain the same (and will
	535	* always be like this).
	536	*
	537	* File splitting makes this harder, however. The decision of whether
	538	* to split to a new file is always on the same condition, however (is
	539	* the level of this section higher than the leaf_level configuration
	540	* value or not).
	541	*
	542	* Treating the cases backwards:
	543	*
	544	* (3) same file if sect->level > conf.leaf_level, otherwise new file
	545	*
	546	* if in the same file, currentsect->child points to sect
	547	* otherwise the linking is done through the file tree (which works
	548	* in more or less the same way, ie currentfile->child points to
	549	* the new file)
	550	*
	551	* (2) same file if sect->level > conf.leaf_level, otherwise new file
	552	*
	553	* if in the same file, currentsect->next points to sect
	554	* otherwise file linking and currentfile->next points to the new
	555	* file (we know that Z must have caused a new file to be created)
	556	*
	557	* (1) same file if sect->level > conf.leaf_level, otherwise new file
	558	*
	559	* this is actually effectively the same case as (2) here,
	560	* except that we first have to travel up the sections to figure
	561	* out which section this new one will be a sibling of. In doing
	562	* so, we may disappear off the top of a file and have to go up
	563	* to its parent in the file tree.
	564	*
	565	*/
	566	static void xhtml_ponder_layout(paragraph *p)
	567	{
	568	xhtmlsection *lastsection;
	569	xhtmlsection *currentsect;
	570	xhtmlfile *currentfile;
	571
	572	lastfile = NULL;
	573	topsection = xhtml_new_section(NULL);
	574	topfile = xhtml_new_file(NULL);
	575	lastsection = topsection;
	576	currentfile = topfile;
	577	currentsect = topsection;
	578
	579	if (conf.leaf_level == 0) {
	580	topfile->is_leaf = 1;
	581	topfile->sections = topsection;
	582	topsection->file = topfile;
	583	}
	584
	585	for (; p; p=p->next)
	586	{
	587	int level = xhtml_para_level(p);
	588	if (level>0) /* actually a section */
	589	{
	590	xhtmlsection *sect;
	591	rdstringc frag_c = { 0, 0, NULL };
	592
	593	sect = xhtml_new_section(lastsection);
	594	lastsection = sect;
	595	sect->para = p;
	596
	597	xhtml_format(p, conf.template_fragment, &frag_c);
	598	sect->fragment = rdtrimc(&frag_c);
	599	sect->level = level;
	600	/* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
	601
	602	if (level>currentsect->level) { /* case (3) */
	603	if (level>conf.leaf_level) { /* same file */
	604	assert(currentfile->is_leaf);
	605	currentsect->child = sect;
	606	sect->parent=currentsect;
	607	sect->file=currentfile;
	608	/* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
	609	currentsect=sect;
	610	} else { /* new file */
	611	xhtmlfile *file = xhtml_new_file(sect);
	612	assert(!currentfile->is_leaf);
	613	currentfile->child=file;
	614	sect->file=file;
	615	file->parent=currentfile;
	616	/* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
	617	currentfile=file;
	618	currentsect=sect;
	619	}
	620	} else if (level >= currentsect->file->sections->level) {
	621	/* Case (1) or (2) AND still under the section that starts
	622	* the current file.
	623	*
	624	* I'm not convinced that this couldn't be rolled in with the
	625	* final else {} leg further down. It seems a lot of effort
	626	* this way.
	627	*/
	628	if (level>conf.leaf_level) { /* stick within the same file */
	629	assert(currentfile->is_leaf);
	630	sect->file = currentfile;
	631	while (currentsect && currentsect->level > level &&
	632	currentsect->file==currentsect->parent->file) {
	633	currentsect = currentsect->parent;
	634	}
	635	assert(currentsect);
	636	currentsect->next = sect;
	637	assert(currentsect->level == sect->level);
	638	sect->parent = currentsect->parent;
	639	currentsect = sect;
	640	/* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
	641	} else { /* new file */
	642	xhtmlfile *file = xhtml_new_file(sect);
	643	sect->file=file;
	644	currentfile->next=file;
	645	file->parent=currentfile->parent;
	646	file->is_leaf=(level==conf.leaf_level);
	647	file->sections=sect;
	648	/* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
	649	currentfile=file;
	650	currentsect=sect;
	651	}
	652	} else { /* Case (1) or (2) and we must move up the file tree first */
	653	/* this loop is now probably irrelevant - we know we can't connect
	654	* to anything in the current file */
	655	while (currentsect && level<currentsect->level) {
	656	currentsect=currentsect->parent;
	657	if (currentsect) {
	658	/* printf(" * up one level to '%s'\n", currentsect->fragment);*/
	659	} else {
	660	/* printf(" * up one level (off top of current file)\n");*/
	661	}
	662	}
	663	if (currentsect) {
	664	/* I'm pretty sure this can now never fire */
	665	assert(currentfile->is_leaf);
	666	/* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
	667	sect->file = currentfile;
	668	currentsect->next=sect;
	669	currentsect=sect;
	670	} else { /* find a file we can attach to */
	671	while (currentfile && currentfile->sections && level<currentfile->sections->level) {
	672	currentfile=currentfile->parent;
	673	if (currentfile) {
	674	/* printf(" * up one file level to '%s'\n", currentfile->filename);*/
	675	} else {
	676	/* printf(" * up one file level (off top of tree)\n");*/
	677	}
	678	}
	679	if (currentfile) { /* new file (we had to skip up a file to
	680	get here, so we must be dealing with a
	681	level no lower than the configured
	682	leaf_level */
	683	xhtmlfile *file = xhtml_new_file(sect);
	684	currentfile->next=file;
	685	sect->file=file;
	686	file->parent=currentfile->parent;
	687	file->is_leaf=(level==conf.leaf_level);
	688	file->sections=sect;
	689	/* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
	690	currentfile=file;
	691	currentsect=sect;
	692	} else {
	693	fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
	694	}
	695	}
	696	}
	697	}
	698	}
	699	topsection = lastsection; /* get correct end of the chain */
	700	xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
	701	}
	702
	703	static void xhtml_do_index();
	704	static void xhtml_do_file(xhtmlfile *file);
	705	static void xhtml_do_top_file(xhtmlfile file, paragraph sourceform);
	706	static void xhtml_do_paras(FILE fp, paragraph p, paragraph *end, int indexable);
	707	static int xhtml_do_contents_limit(FILE fp, xhtmlfile file, int limit);
	708	static int xhtml_do_contents_section_limit(FILE fp, xhtmlsection section, int limit);
	709	static int xhtml_add_contents_entry(FILE fp, xhtmlsection section, int limit);
	710	static int xhtml_do_contents(FILE fp, xhtmlfile file);
	711	static int xhtml_do_naked_contents(FILE fp, xhtmlfile file);
	712	static void xhtml_do_sections(FILE fp, xhtmlsection sections);
	713
	714	/*
	715	* Do all the files in this structure.
	716	*/
	717	static void xhtml_do_files(xhtmlfile *file)
	718	{
	719	xhtml_do_file(file);
	720	if (file->child)
	721	xhtml_do_files(file->child);
	722	if (file->next)
	723	xhtml_do_files(file->next);
	724	}
	725
	726	/*
	727	* Free up all memory used by the file tree from 'xfile' downwards
	728	*/
	729	static void xhtml_free_file(xhtmlfile* xfile)
	730	{
	731	if (xfile==NULL) {
	732	return;
	733	}
	734
	735	if (xfile->filename) {
	736	sfree(xfile->filename);
	737	}
	738	xhtml_free_file(xfile->child);
	739	xhtml_free_file(xfile->next);
	740	sfree(xfile);
	741	}
	742
	743	/*
	744	* Main function.
	745	*/
	746	void xhtml_backend(paragraph sourceform, keywordlist in_keywords,
	747	indexdata *in_idx)
	748	{
	749	/* int i;*/
	750	indexentry *ientry;
	751	int ti;
	752	xhtmlsection *xsect;
	753
	754	sourceparas = sourceform;
	755	conf = xhtml_configure(sourceform);
	756	keywords = in_keywords;
	757	idx = in_idx;
	758
	759	/* Clear up the index entries backend data pointers */
	760	for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
	761	ientry->backend_data=NULL;
	762	}
	763
	764	xhtml_ponder_layout(sourceform);
	765
	766	/* old system ... (writes to .alt, but gets some stuff wrong and is ugly) /
	767	/* xhtml_level_0(sourceform);
	768	for (i=1; i<=conf.leaf_level; i++)
	769	{
	770	xhtml_level(sourceform, i);
	771	}*/
	772
	773	/* new system ... (writes to .html, but isn't fully trusted) /
	774	xhtml_do_top_file(topfile, sourceform);
	775	assert(!topfile->next); /* shouldn't have a sibling at all */
	776	if (topfile->child) {
	777	xhtml_do_files(topfile->child);
	778	xhtml_do_index();
	779	}
	780
	781	/* release file, section, index data structures */
	782	xsect = topsection;
	783	while (xsect) {
	784	xhtmlsection *tmp = xsect->chain;
	785	if (xsect->fragment) {
	786	sfree(xsect->fragment);
	787	}
	788	sfree(xsect);
	789	xsect = tmp;
	790	}
	791	xhtml_free_file(topfile);
	792	for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
	793	if (ientry->backend_data!=NULL) {
	794	xhtmlindex xi = (xhtmlindex) ientry->backend_data;
	795	if (xi->sections!=NULL) {
	796	sfree(xi->sections);
	797	}
	798	sfree(xi);
	799	}
	800	ientry->backend_data = NULL;
	801	}
	802	sfree(conf.fsect);
	803	}
	804
	805	static int xhtml_para_level(paragraph *p)
	806	{
	807	switch (p->type)
	808	{
	809	case para_Title:
	810	return 0;
	811	break;
	812	case para_UnnumberedChapter:
	813	case para_Chapter:
	814	case para_Appendix:
	815	return 1;
	816	break;
	817	/* case para_BiblioCited:
	818	return 2;
	819	break;*/
	820	case para_Heading:
	821	case para_Subsect:
	822	return p->aux+2;
	823	break;
	824	default:
	825	return -1;
	826	break;
	827	}
	828	}
	829
	830	/* Output the nav links for the current file.
	831	* file == NULL means we're doing the index
	832	*/
	833	static void xhtml_donavlinks(FILE fp, xhtmlfile file)
	834	{
	835	xhtmlfile *xhtml_next_file = NULL;
	836	fprintf(fp, "<p");
	837	if (conf.nav_attrs!=NULL) {
	838	fprintf(fp, " %ls>", conf.nav_attrs);
	839	} else {
	840	fprintf(fp, ">");
	841	}
	842	if (xhtml_last_file==NULL) {
	843	fprintf(fp, "Previous \| ");
	844	} else {
	845	fprintf(fp, "<a href='%s'>Previous</a> \| ", xhtml_last_file->filename);
	846	}
	847	fprintf(fp, "<a href='%s'>Contents</a> \| ", conf.contents_filename);
	848	if (file == NULL) {
	849	fprintf(fp, "Index \| ");
	850	} else {
	851	fprintf(fp, "<a href='%s'>Index</a> \| ", conf.index_filename);
	852	}
	853	if (file != NULL) { /* otherwise we're doing nav links for the index */
	854	if (xhtml_next_file==NULL)
	855	xhtml_next_file = file->child;
	856	if (xhtml_next_file==NULL)
	857	xhtml_next_file = file->next;
	858	if (xhtml_next_file==NULL)
	859	xhtml_next_file = file->parent->next;
	860	}
	861	if (xhtml_next_file==NULL) {
	862	if (file==NULL) { /* index, so no next file */
	863	fprintf(fp, "Next ");
	864	} else {
	865	fprintf(fp, "<a href='%s'>Next</a>", conf.index_filename);
	866	}
	867	} else {
	868	fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
	869	}
	870	fprintf(fp, "</p>\n");
	871	}
	872
	873	/* Write out the index file */
	874	static void xhtml_do_index_body(FILE *fp)
	875	{
	876	indexentry *y;
	877	int ti;
	878
	879	if (count234(idx->entries) == 0)
	880	return; /* don't write anything at all */
	881
	882	fprintf(fp, "<dl>\n");
	883	/* iterate over idx->entries using the tree functions and display everything */
	884	for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
	885	if (y->backend_data) {
	886	int i;
	887	xhtmlindex *xi;
	888
	889	fprintf(fp, "<dt>");
	890	xhtml_para(fp, y->text, FALSE);
	891	fprintf(fp, "</dt>\n<dd>");
	892
	893	xi = (xhtmlindex*) y->backend_data;
	894	for (i=0; i<xi->nsection; i++) {
	895	xhtmlsection *sect = xi->sections[i];
	896	if (sect) {
	897	fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment);
	898	if (sect->para->kwtext) {
	899	xhtml_para(fp, sect->para->kwtext, FALSE);
	900	} else if (sect->para->words) {
	901	xhtml_para(fp, sect->para->words, FALSE);
	902	}
	903	fprintf(fp, "</a>");
	904	if (i+1<xi->nsection) {
	905	fprintf(fp, ", ");
	906	}
	907	}
	908	}
	909	fprintf(fp, "</dd>\n");
	910	}
	911	}
	912	fprintf(fp, "</dl>\n");
	913	}
	914	static void xhtml_do_index()
	915	{
	916	word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} };
	917	FILE *fp = fopen(conf.index_filename, "w");
	918
	919	if (fp==NULL)
	920	fatal(err_cantopenw, conf.index_filename);
	921	xhtml_doheader(fp, &temp_word);
	922	xhtml_donavlinks(fp, NULL);
	923
	924	xhtml_do_index_body(fp);
	925
	926	xhtml_donavlinks(fp, NULL);
	927	xhtml_dofooter(fp);
	928	fclose(fp);
	929	}
	930
	931	/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
	932	static void xhtml_do_file(xhtmlfile *file)
	933	{
	934	FILE *fp = fopen(file->filename, "w");
	935	if (fp==NULL)
	936	fatal(err_cantopenw, file->filename);
	937
	938	if (file->sections->para->words) {
	939	xhtml_doheader(fp, file->sections->para->words);
	940	} else if (file->sections->para->kwtext) {
	941	xhtml_doheader(fp, file->sections->para->kwtext);
	942	} else {
	943	xhtml_doheader(fp, NULL);
	944	}
	945
	946	xhtml_donavlinks(fp, file);
	947
	948	if (file->is_leaf && conf.leaf_contains_contents &&
	949	xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
	950	xhtml_do_contents(fp, file);
	951	xhtml_do_sections(fp, file->sections);
	952	if (!file->is_leaf)
	953	xhtml_do_naked_contents(fp, file);
	954
	955	xhtml_donavlinks(fp, file);
	956
	957	xhtml_dofooter(fp);
	958	fclose(fp);
	959
	960	xhtml_last_file = file;
	961	}
	962
	963	/* Output the top-level file. */
	964	static void xhtml_do_top_file(xhtmlfile file, paragraph sourceform)
	965	{
	966	paragraph *p;
	967	int done=FALSE;
	968	FILE *fp = fopen(file->filename, "w");
	969	if (fp==NULL)
	970	fatal(err_cantopenw, file->filename);
	971
	972	/* Do the title -- only one allowed */
	973	for (p = sourceform; p && !done; p = p->next)
	974	{
	975	if (p->type == para_Title)
	976	{
	977	xhtml_doheader(fp, p->words);
	978	done=TRUE;
	979	}
	980	}
	981	if (!done)
	982	xhtml_doheader(fp, NULL /* Eek! */);
	983
	984	/*
	985	* Display the title.
	986	*/
	987	for (p = sourceform; p; p = p->next)
	988	{
	989	if (p->type == para_Title) {
	990	xhtml_heading(fp, p, FALSE);
	991	break;
	992	}
	993	}
	994
	995	/* Do the preamble */
	996	for (p = sourceform; p; p = p->next)
	997	{
	998	if (p->type == para_Chapter \|\| p->type == para_Heading \|\|
	999	p->type == para_Subsect \|\| p->type == para_Appendix \|\|
	1000	p->type == para_UnnumberedChapter) {
	1001	/*
	1002	* We've found the end of the preamble. Do every normal
	1003	* paragraph up to there.
	1004	*/
	1005	xhtml_do_paras(fp, sourceform, p, FALSE);
	1006	break;
	1007	}
	1008	}
	1009
	1010	xhtml_do_contents(fp, file);
	1011	xhtml_do_sections(fp, file->sections);
	1012
	1013	/*
	1014	* Put the index in the top file if we're in single-file mode
	1015	* (leaf-level 0).
	1016	*/
	1017	if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
	1018	fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
	1019	xhtml_do_index_body(fp);
	1020	}
	1021
	1022	xhtml_dofooter(fp);
	1023	fclose(fp);
	1024	}
	1025
	1026	/* Convert a Unicode string to an ASCII one. '?' is
	1027	* used for unmappable characters.
	1028	*/
	1029	static void xhtml_utostr(wchar_t in, char *out)
	1030	{
	1031	int l = ustrlen(in);
	1032	int i;
	1033	*out = smalloc(l+1);
	1034	for (i=0; i<l; i++)
	1035	{
	1036	if (in[i]>=32 && in[i]<=126)
	1037	(*out)[i]=(char)in[i];
	1038	else
	1039	(*out)[i]='?';
	1040	}
	1041	(*out)[i]=0;
	1042	}
	1043
	1044	/*
	1045	* Write contents for the given file, and subfiles, down to
	1046	* the appropriate contents depth. Returns the number of
	1047	* entries written.
	1048	*/
	1049	static int xhtml_do_contents(FILE fp, xhtmlfile file)
	1050	{
	1051	int level, limit, count = 0;
	1052	if (!file)
	1053	return 0;
	1054
	1055	level = (file->sections)?(file->sections->level):(0);
	1056	limit = conf.contents_depth[(level>5)?(5):(level)];
	1057	start_level = (file->is_leaf) ? (level-1) : (level);
	1058	last_level = start_level;
	1059
	1060	count += xhtml_do_contents_section_limit(fp, file->sections, limit);
	1061	count += xhtml_do_contents_limit(fp, file->child, limit);
	1062	if (fp!=NULL) {
	1063	while (last_level > start_level) {
	1064	last_level--;
	1065	fprintf(fp, "</li></ul>\n");
	1066	}
	1067	}
	1068	return count;
	1069	}
	1070
	1071	/* As above, but doesn't do anything in the current file */
	1072	static int xhtml_do_naked_contents(FILE fp, xhtmlfile file)
	1073	{
	1074	int level, limit, start_level, count = 0;
	1075	if (!file)
	1076	return 0;
	1077
	1078	level = (file->sections)?(file->sections->level):(0);
	1079	limit = conf.contents_depth[(level>5)?(5):(level)];
	1080	start_level = (file->is_leaf) ? (level-1) : (level);
	1081	last_level = start_level;
	1082
	1083	count = xhtml_do_contents_limit(fp, file->child, limit);
	1084	if (fp!=NULL) {
	1085	while (last_level > start_level) {
	1086	last_level--;
	1087	fprintf(fp, "</li></ul>\n");
	1088	}
	1089	}
	1090	return count;
	1091	}
	1092
	1093	/*
	1094	* Write contents for the given file, children, and siblings, down to
	1095	* given limit contents depth.
	1096	*/
	1097	static int xhtml_do_contents_limit(FILE fp, xhtmlfile file, int limit)
	1098	{
	1099	int count = 0;
	1100	while (file) {
	1101	count += xhtml_do_contents_section_limit(fp, file->sections, limit);
	1102	count += xhtml_do_contents_limit(fp, file->child, limit);
	1103	file = file->next;
	1104	}
	1105	return count;
	1106	}
	1107
	1108	/*
	1109	* Write contents entries for the given section tree, down to the
	1110	* limit contents depth.
	1111	*/
	1112	static int xhtml_do_contents_section_deep_limit(FILE fp, xhtmlsection section, int limit)
	1113	{
	1114	int count = 0;
	1115	while (section) {
	1116	if (!xhtml_add_contents_entry(fp, section, limit))
	1117	return 0;
	1118	else
	1119	count++;
	1120	count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
	1121	section = section->next;
	1122	}
	1123	return count;
	1124	}
	1125
	1126	/*
	1127	* Write contents entries for the given section tree, down to the
	1128	* limit contents depth.
	1129	*/
	1130	static int xhtml_do_contents_section_limit(FILE fp, xhtmlsection section, int limit)
	1131	{
	1132	int count = 0;
	1133	if (!section)
	1134	return 0;
	1135	xhtml_add_contents_entry(fp, section, limit);
	1136	count=1;
	1137	count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
	1138	/* section=section->child;
	1139	while (section && xhtml_add_contents_entry(fp, section, limit)) {
	1140	section = section->next;
	1141	}*/
	1142	return count;
	1143	}
	1144
	1145	/*
	1146	* Add a section entry, unless we're exceeding the limit, in which
	1147	* case return FALSE (otherwise return TRUE).
	1148	*/
	1149	static int xhtml_add_contents_entry(FILE fp, xhtmlsection section, int limit)
	1150	{
	1151	if (!section \|\| section->level > limit)
	1152	return FALSE;
	1153	if (fp==NULL \|\| section->level < 0)
	1154	return TRUE;
	1155	if (last_level > section->level) {
	1156	while (last_level > section->level) {
	1157	last_level--;
	1158	fprintf(fp, "</li></ul>\n");
	1159	}
	1160	fprintf(fp, "</li>\n");
	1161	} else if (last_level < section->level) {
	1162	assert(last_level == section->level - 1);
	1163	last_level++;
	1164	fprintf(fp, "<ul>\n");
	1165	} else {
	1166	fprintf(fp, "</li>\n");
	1167	}
	1168	fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
	1169	if (section->para->kwtext) {
	1170	xhtml_para(fp, section->para->kwtext, FALSE);
	1171	if (section->para->words) {
	1172	fprintf(fp, ": ");
	1173	}
	1174	}
	1175	if (section->para->words) {
	1176	xhtml_para(fp, section->para->words, FALSE);
	1177	}
	1178	fprintf(fp, "</a>\n");
	1179	return TRUE;
	1180	}
	1181
	1182	/*
	1183	* Write all the sections in this file. Do all paragraphs in this section, then all
	1184	* children (recursively), then go on to the next one (tail recursively).
	1185	*/
	1186	static void xhtml_do_sections(FILE fp, xhtmlsection sections)
	1187	{
	1188	while (sections) {
	1189	currentsection = sections;
	1190	xhtml_do_paras(fp, sections->para, NULL, TRUE);
	1191	xhtml_do_sections(fp, sections->child);
	1192	sections = sections->next;
	1193	}
	1194	}
	1195
	1196	/* Write this list of paragraphs. Close off all lists at the end. */
	1197	static void xhtml_do_paras(FILE fp, paragraph p, paragraph *end,
	1198	int indexable)
	1199	{
	1200	int last_type = -1, ptype, first=TRUE;
	1201	stack lcont_stack = stk_new();
	1202	if (!p)
	1203	return;
	1204
	1205	/* for (; p && (xhtml_para_level(p)>limit \|\| xhtml_para_level(p)==-1 \|\| first); p=p->next) {*/
	1206	for (; p && p != end && (xhtml_para_level(p)==-1 \|\| first); p=p->next) {
	1207	first=FALSE;
	1208	switch (ptype = p->type)
	1209	{
	1210	/*
	1211	* Things we ignore because we've already processed them or
	1212	* aren't going to touch them in this pass.
	1213	*/
	1214	case para_IM:
	1215	case para_BR:
	1216	case para_Biblio: /* only touch BiblioCited */
	1217	case para_VersionID:
	1218	case para_NoCite:
	1219	case para_Title:
	1220	break;
	1221
	1222	/*
	1223	* Chapter titles.
	1224	*/
	1225	case para_Chapter:
	1226	case para_Appendix:
	1227	case para_UnnumberedChapter:
	1228	xhtml_heading(fp, p, indexable);
	1229	break;
	1230
	1231	case para_Heading:
	1232	case para_Subsect:
	1233	xhtml_heading(fp, p, indexable);
	1234	break;
	1235
	1236	case para_Rule:
	1237	fprintf(fp, "\n<hr />\n");
	1238	break;
	1239
	1240	case para_Normal:
	1241	case para_Copyright:
	1242	fprintf(fp, "\n<p>");
	1243	xhtml_para(fp, p->words, indexable);
	1244	fprintf(fp, "</p>\n");
	1245	break;
	1246
	1247	case para_LcontPush:
	1248	{
	1249	int *p;
	1250	p = mknew(int);
	1251	*p = last_type;
	1252	stk_push(lcont_stack, p);
	1253	last_type = para_Normal;
	1254	}
	1255	break;
	1256	case para_LcontPop:
	1257	{
	1258	int *p = stk_pop(lcont_stack);
	1259	assert(p);
	1260	ptype = last_type = *p;
	1261	sfree(p);
	1262	goto closeofflist; /* ick */
	1263	}
	1264	break;
	1265	case para_QuotePush:
	1266	fprintf(fp, "<blockquote>\n");
	1267	break;
	1268	case para_QuotePop:
	1269	fprintf(fp, "</blockquote>\n");
	1270	break;
	1271
	1272	case para_Bullet:
	1273	case para_NumberedList:
	1274	case para_Description:
	1275	case para_DescribedThing:
	1276	case para_BiblioCited:
	1277	if (last_type!=p->type &&
	1278	!(last_type==para_DescribedThing && p->type==para_Description) &&
	1279	!(last_type==para_Description && p->type==para_DescribedThing)) {
	1280	/* start up list if necessary */
	1281	if (p->type == para_Bullet) {
	1282	fprintf(fp, "<ul>\n");
	1283	} else if (p->type == para_NumberedList) {
	1284	fprintf(fp, "<ol>\n");
	1285	} else if (p->type == para_BiblioCited \|\|
	1286	p->type == para_DescribedThing \|\|
	1287	p->type == para_Description) {
	1288	fprintf(fp, "<dl>\n");
	1289	}
	1290	}
	1291	if (p->type == para_Bullet \|\| p->type == para_NumberedList) {
	1292	fprintf(fp, "<li>");
	1293	} else if (p->type == para_DescribedThing) {
	1294	fprintf(fp, "<dt>");
	1295	} else if (p->type == para_Description) {
	1296	fprintf(fp, "<dd>");
	1297	} else if (p->type == para_BiblioCited) {
	1298	fprintf(fp, "<dt>");
	1299	xhtml_para(fp, p->kwtext, indexable);
	1300	fprintf(fp, "</dt>\n<dd>");
	1301	}
	1302	xhtml_para(fp, p->words, indexable);
	1303	{
	1304	paragraph *p2 = p->next;
	1305	if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
	1306	break;
	1307	}
	1308
	1309	closeofflist:
	1310	if (ptype == para_BiblioCited) {
	1311	fprintf(fp, "</dd>\n");
	1312	} else if (ptype == para_DescribedThing) {
	1313	fprintf(fp, "</dt>");
	1314	} else if (ptype == para_Description) {
	1315	fprintf(fp, "</dd>");
	1316	} else if (ptype == para_Bullet \|\| ptype == para_NumberedList) {
	1317	fprintf(fp, "</li>");
	1318	}
	1319	if (ptype == para_Bullet \|\| ptype == para_NumberedList \|\|
	1320	ptype == para_BiblioCited \|\| ptype == para_Description \|\|
	1321	ptype == para_DescribedThing)
	1322	/* close off list if necessary */
	1323	{
	1324	paragraph *p2 = p->next;
	1325	int close_off=FALSE;
	1326	/* if (p2 && (xhtml_para_level(p2)>limit \|\| xhtml_para_level(p2)==-1)) {*/
	1327	if (p2 && xhtml_para_level(p2)==-1) {
	1328	if (p2->type != ptype &&
	1329	!(p2->type==para_DescribedThing && ptype==para_Description) &&
	1330	!(p2->type==para_Description && ptype==para_DescribedThing) &&
	1331	p2->type != para_LcontPush)
	1332	close_off=TRUE;
	1333	} else {
	1334	close_off=TRUE;
	1335	}
	1336	if (close_off) {
	1337	if (ptype == para_Bullet) {
	1338	fprintf(fp, "</ul>\n");
	1339	} else if (ptype == para_NumberedList) {
	1340	fprintf(fp, "</ol>\n");
	1341	} else if (ptype == para_BiblioCited \|\|
	1342	ptype == para_Description \|\|
	1343	ptype == para_DescribedThing) {
	1344	fprintf(fp, "</dl>\n");
	1345	}
	1346	}
	1347	}
	1348	break;
	1349
	1350	case para_Code:
	1351	xhtml_codepara(fp, p->words);
	1352	break;
	1353	}
	1354	last_type = ptype;
	1355	}
	1356
	1357	stk_free(lcont_stack);
	1358	}
	1359
	1360	/*
	1361	* Output a header for this XHTML file.
	1362	*/
	1363	static void xhtml_doheader(FILE fp, word title)
	1364	{
	1365	fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
	1366	fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
	1367	fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
	1368	if (title==NULL)
	1369	fprintf(fp, "The thing with no name!");
	1370	else
	1371	xhtml_para(fp, title, FALSE);
	1372	fprintf(fp, "</title>\n");
	1373	fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
	1374	if (conf.author)
	1375	fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
	1376	if (conf.description)
	1377	fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
	1378	if (conf.head_end)
	1379	fprintf(fp, "%ls\n", conf.head_end);
	1380	fprintf(fp, "</head>\n\n");
	1381	if (conf.body)
	1382	fprintf(fp, "%ls\n", conf.body);
	1383	else
	1384	fprintf(fp, "<body>\n");
	1385	if (conf.body_start)
	1386	fprintf(fp, "%ls\n", conf.body_start);
	1387	}
	1388
	1389	/*
	1390	* Output a footer for this XHTML file.
	1391	*/
	1392	static void xhtml_dofooter(FILE *fp)
	1393	{
	1394	fprintf(fp, "\n<hr />\n\n");
	1395	if (conf.body_end)
	1396	fprintf(fp, "%ls\n", conf.body_end);
	1397	if (!conf.suppress_address) {
	1398	fprintf(fp,"<address>\n");
	1399	if (conf.address_start)
	1400	fprintf(fp, "%ls\n", conf.address_start);
	1401	/* Do the version ID */
	1402	if (conf.include_version_id) {
	1403	paragraph *p;
	1404	int started = 0;
	1405	for (p = sourceparas; p; p = p->next)
	1406	if (p->type == para_VersionID) {
	1407	xhtml_versionid(fp, p->words, started);
	1408	started = 1;
	1409	}
	1410	}
	1411	if (conf.address_end)
	1412	fprintf(fp, "%ls\n", conf.address_end);
	1413	fprintf(fp, "</address>\n");
	1414	}
	1415	fprintf(fp, "</body>\n\n</html>\n");
	1416	}
	1417
	1418	/*
	1419	* Output the versionid paragraph. Typically this is a version control
	1420	* ID string (such as $Id...$ in RCS).
	1421	*/
	1422	static void xhtml_versionid(FILE fp, word text, int started)
	1423	{
	1424	rdstringc t = { 0, 0, NULL };
	1425
	1426	rdaddc(&t, '['); /* FIXME: configurability */
	1427	xhtml_rdaddwc(&t, text, NULL, FALSE);
	1428	rdaddc(&t, ']'); /* FIXME: configurability */
	1429
	1430	if (started)
	1431	fprintf(fp, "<br />\n");
	1432	fprintf(fp, "%s\n", t.text);
	1433	sfree(t.text);
	1434	}
	1435
	1436	/* Is this an XHTML reserved character? */
	1437	static int xhtml_reservedchar(int c)
	1438	{
	1439	if (c=='&' \|\| c=='<' \|\| c=='>' \|\| c=='"')
	1440	return TRUE;
	1441	else
	1442	return FALSE;
	1443	}
	1444
	1445	/*
	1446	* Convert a wide string into valid XHTML: Anything outside ASCII will
	1447	* be fixed up as an entity. Currently we don't worry about constraining the
	1448	* encoded character set, which we should probably do at some point (we can
	1449	* still fix up and return FALSE - see the last comment here). We also don't
	1450	* currently
	1451	*
	1452	* Because this is only used for words, spaces are HARD spaces (any other
	1453	* spaces will be word_Whitespace not word_Normal). So they become
	1454	* Unless hard_spaces is FALSE, of course (code paragraphs break the above
	1455	* rule).
	1456	*
	1457	* If `result' is non-NULL, mallocs the resulting string and stores a pointer to
	1458	* it in `*result'. If `result' is NULL, merely checks whether all
	1459	* characters in the string are feasible.
	1460	*
	1461	* Return is nonzero if all characters are OK. If not all
	1462	* characters are OK but `result' is non-NULL, a result _will_
	1463	* still be generated!
	1464	*/
	1465	static int xhtml_convert(wchar_t s, int maxlen, char *result,
	1466	int hard_spaces) {
	1467	int doing = (result != 0);
	1468	int ok = TRUE;
	1469	char *p = NULL;
	1470	int plen = 0, psize = 0;
	1471
	1472	if (maxlen <= 0)
	1473	maxlen = -1;
	1474
	1475	for (; *s && maxlen != 0; s++, maxlen--) {
	1476	wchar_t c = *s;
	1477
	1478	#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
	1479
	1480	if (((c == 32 && !hard_spaces) \|\| (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
	1481	/* Char is OK. */
	1482	if (doing)
	1483	{
	1484	ensure_size(plen);
	1485	p[plen++] = (char)c;
	1486	}
	1487	} else {
	1488	/* Char needs fixing up. */
	1489	/* ok = FALSE; -- currently we never return FALSE; we
	1490	* might want to when considering a character set for the
	1491	* encoded document.
	1492	*/
	1493	if (doing)
	1494	{
	1495	if (c==32) { /* a space in a word is a hard space */
	1496	ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
	1497	sprintf(p+plen, " ");
	1498	plen+=6;
	1499	} else {
	1500	/* FIXME: entity names! */
	1501	ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
	1502	plen+=sprintf(p+plen, "&#%04i;", (int)c);
	1503	}
	1504	}
	1505	}
	1506	}
	1507	if (doing) {
	1508	p = resize(p, plen+1);
	1509	p[plen] = '\0';
	1510	*result = p;
	1511	}
	1512	return ok;
	1513	}
	1514
	1515	/*
	1516	* This formats the given words as XHTML.
	1517	*
	1518	* `indexable', if FALSE, prohibits adding any index references.
	1519	* You might use this, for example, if an index reference occurred
	1520	* in a section title, to prevent phony index references when the
	1521	* section title is processed in strange places such as contents
	1522	* sections.
	1523	*/
	1524	static void xhtml_rdaddwc(rdstringc rs, word text, word *end, int indexable) {
	1525	char *c;
	1526	keyword *kwl;
	1527	xhtmlsection *sect;
	1528	indextag *itag;
	1529	int ti;
	1530
	1531	for (; text && text != end; text = text->next) {
	1532	switch (text->type) {
	1533	case word_HyperLink:
	1534	xhtml_utostr(text->text, &c);
	1535	rdaddsc(rs, "<a href=\"");
	1536	rdaddsc(rs, c);
	1537	rdaddsc(rs, "\">");
	1538	sfree(c);
	1539	break;
	1540
	1541	case word_UpperXref:
	1542	case word_LowerXref:
	1543	kwl = kw_lookup(keywords, text->text);
	1544	if (kwl) {
	1545	sect=xhtml_find_section(kwl->para);
	1546	if (sect) {
	1547	rdaddsc(rs, "<a href=\"");
	1548	rdaddsc(rs, sect->file->filename);
	1549	rdaddc(rs, '#');
	1550	rdaddsc(rs, sect->fragment);
	1551	rdaddsc(rs, "\">");
	1552	} else {
	1553	rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
	1554	error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
	1555	}
	1556	} else {
	1557	rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
	1558	error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
	1559	}
	1560	break;
	1561
	1562	case word_IndexRef: /* in theory we could make an index target here */
	1563	/* rdaddsc(rs, "<a name=\"idx-");
	1564	xhtml_utostr(text->text, &c);
	1565	rdaddsc(rs, c);
	1566	sfree(c);
	1567	rdaddsc(rs, "\"></a>");*/
	1568	/* what we _do_ need to do is to fix up the backend data
	1569	* for any indexentry this points to.
	1570	*/
	1571	if (!indexable)
	1572	break;
	1573
	1574	for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
	1575	/* FIXME: really ustricmp() and not ustrcmp()? */
	1576	if (ustricmp(itag->name, text->text)==0) {
	1577	break;
	1578	}
	1579	}
	1580	if (itag!=NULL) {
	1581	if (itag->refs!=NULL) {
	1582	int i;
	1583	for (i=0; i<itag->nrefs; i++) {
	1584	xhtmlindex *idx_ref;
	1585	indexentry *ientry;
	1586
	1587	ientry = itag->refs[i];
	1588	if (ientry->backend_data==NULL) {
	1589	idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
	1590	if (idx_ref==NULL)
	1591	fatal(err_nomemory);
	1592	idx_ref->nsection = 0;
	1593	idx_ref->size = 4;
	1594	idx_ref->sections = (xhtmlsection*) smalloc(idx_ref->size sizeof(xhtmlsection*));
	1595	if (idx_ref->sections==NULL)
	1596	fatal(err_nomemory);
	1597	ientry->backend_data = idx_ref;
	1598	} else {
	1599	idx_ref = ientry->backend_data;
	1600	if (idx_ref->nsection+1 > idx_ref->size) {
	1601	int new_size = idx_ref->size * 2;
	1602	idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
	1603	if (idx_ref->sections==NULL) {
	1604	fatal(err_nomemory);
	1605	}
	1606	idx_ref->size = new_size;
	1607	}
	1608	}
	1609	idx_ref->sections[idx_ref->nsection++] = currentsection;
	1610	#if 0
	1611	#endif
	1612	}
	1613	} else {
	1614	fatal(err_whatever, "Index tag had no entries!");
	1615	}
	1616	} else {
	1617	fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
	1618	fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
	1619	}
	1620	break;
	1621
	1622	case word_HyperEnd:
	1623	case word_XrefEnd:
	1624	rdaddsc(rs, "</a>");
	1625	break;
	1626
	1627	case word_Normal:
	1628	case word_Emph:
	1629	case word_Code:
	1630	case word_WeakCode:
	1631	case word_WhiteSpace:
	1632	case word_EmphSpace:
	1633	case word_CodeSpace:
	1634	case word_WkCodeSpace:
	1635	case word_Quote:
	1636	case word_EmphQuote:
	1637	case word_CodeQuote:
	1638	case word_WkCodeQuote:
	1639	assert(text->type != word_CodeQuote &&
	1640	text->type != word_WkCodeQuote);
	1641	if (towordstyle(text->type) == word_Emph &&
	1642	(attraux(text->aux) == attr_First \|\|
	1643	attraux(text->aux) == attr_Only))
	1644	rdaddsc(rs, "<em>");
	1645	else if ((towordstyle(text->type) == word_Code \|\| towordstyle(text->type) == word_WeakCode) &&
	1646	(attraux(text->aux) == attr_First \|\|
	1647	attraux(text->aux) == attr_Only))
	1648	rdaddsc(rs, "<code>");
	1649
	1650	if (removeattr(text->type) == word_Normal) {
	1651	if (xhtml_convert(text->text, 0, &c, TRUE)) /* spaces in the word are hard */
	1652	rdaddsc(rs, c);
	1653	else
	1654	xhtml_rdaddwc(rs, text->alt, NULL, indexable);
	1655	sfree(c);
	1656	} else if (removeattr(text->type) == word_WhiteSpace) {
	1657	rdaddc(rs, ' ');
	1658	} else if (removeattr(text->type) == word_Quote) {
	1659	rdaddsc(rs, """);
	1660	}
	1661
	1662	if (towordstyle(text->type) == word_Emph &&
	1663	(attraux(text->aux) == attr_Last \|\|
	1664	attraux(text->aux) == attr_Only))
	1665	rdaddsc(rs, "</em>");
	1666	else if ((towordstyle(text->type) == word_Code \|\| towordstyle(text->type) == word_WeakCode) &&
	1667	(attraux(text->aux) == attr_Last \|\|
	1668	attraux(text->aux) == attr_Only))
	1669	rdaddsc(rs, "</code>");
	1670	break;
	1671	}
	1672	}
	1673	}
	1674
	1675	/* Output a heading, formatted as XHTML.
	1676	*/
	1677	static void xhtml_heading(FILE fp, paragraph p, int indexable)
	1678	{
	1679	rdstringc t = { 0, 0, NULL };
	1680	word *tprefix = p->kwtext;
	1681	word *nprefix = p->kwtext2;
	1682	word *text = p->words;
	1683	int level = xhtml_para_level(p);
	1684	xhtmlsection *sect = xhtml_find_section(p);
	1685	xhtmlheadfmt *fmt;
	1686	char *fragment;
	1687	if (sect) {
	1688	fragment = sect->fragment;
	1689	} else {
	1690	if (p->type == para_Title)
	1691	fragment = "title";
	1692	else {
	1693	fragment = ""; /* FIXME: what else can we do? */
	1694	error(err_whatever, "Couldn't locate heading cross-reference!");
	1695	}
	1696	}
	1697
	1698	if (p->type == para_Title)
	1699	fmt = NULL;
	1700	else if (level == 1)
	1701	fmt = &conf.fchapter;
	1702	else if (level-1 < conf.nfsect)
	1703	fmt = &conf.fsect[level-1];
	1704	else
	1705	fmt = &conf.fsect[conf.nfsect-1];
	1706
	1707	if (fmt && fmt->just_numbers && nprefix) {
	1708	xhtml_rdaddwc(&t, nprefix, NULL, indexable);
	1709	if (fmt) {
	1710	char *c;
	1711	if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
	1712	rdaddsc(&t, c);
	1713	sfree(c);
	1714	}
	1715	}
	1716	} else if (fmt && !fmt->just_numbers && tprefix) {
	1717	xhtml_rdaddwc(&t, tprefix, NULL, indexable);
	1718	if (fmt) {
	1719	char *c;
	1720	if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
	1721	rdaddsc(&t, c);
	1722	sfree(c);
	1723	}
	1724	}
	1725	}
	1726	xhtml_rdaddwc(&t, text, NULL, indexable);
	1727	/*
	1728	* If we're outputting in single-file mode, we need to lower
	1729	* the level of each heading by one, because the overall
	1730	* document title will be sitting right at the top as an <h1>
	1731	* and so chapters and sections should start at <h2>.
	1732	*
	1733	* Even if not, the document title will come back from
	1734	* xhtml_para_level() as level zero, so we must increment that
	1735	* no matter what leaf_level is set to.
	1736	*/
	1737	if (conf.leaf_level == 0 \|\| level == 0)
	1738	level++;
	1739	fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
	1740	sfree(t.text);
	1741	}
	1742
	1743	/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
	1744	* This looks pretty simple; I may have missed something ...
	1745	*/
	1746	static void xhtml_para(FILE fp, word text, int indexable)
	1747	{
	1748	rdstringc out = { 0, 0, NULL };
	1749	xhtml_rdaddwc(&out, text, NULL, indexable);
	1750	fprintf(fp, "%s", out.text);
	1751	sfree(out.text);
	1752	}
	1753
	1754	/* Output a code paragraph. I'm treating this as preformatted, which
	1755	* may not be entirely correct. See xhtml_para() for my worries about
	1756	* this being overly-simple; however I think that most of the complexity
	1757	* of the text backend came entirely out of word wrapping anyway.
	1758	*/
	1759	static void xhtml_codepara(FILE fp, word text)
	1760	{
	1761	fprintf(fp, "<pre>");
	1762	for (; text; text = text->next) if (text->type == word_WeakCode) {
	1763	word here, next;
	1764	char *c;
	1765
	1766	/*
	1767	* See if this WeakCode is followed by an Emph to indicate
	1768	* emphasis.
	1769	*/
	1770	here = text;
	1771	if (text->next && text->next->type == word_Emph) {
	1772	next = text = text->next;
	1773	} else
	1774	next = NULL;
	1775
	1776	if (next) {
	1777	wchar_t t, e;
	1778	int n;
	1779
	1780	t = here->text;
	1781	e = next->text;
	1782
	1783	while (*e) {
	1784	int ec = *e;
	1785
	1786	for (n = 0; t[n] && e[n] && e[n] == ec; n++);
	1787	xhtml_convert(t, n, &c, FALSE);
	1788	fprintf(fp, "%s%s%s",
	1789	(ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
	1790	c,
	1791	(ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
	1792	sfree(c);
	1793
	1794	t += n;
	1795	e += n;
	1796	}
	1797
	1798	xhtml_convert(t, 0, &c, FALSE);
	1799	fprintf(fp, "%s\n", c);
	1800	sfree(c);
	1801	} else {
	1802	xhtml_convert(here->text, 0, &c, FALSE);
	1803	fprintf(fp, "%s\n", c);
	1804	sfree(c);
	1805	}
	1806	}
	1807	fprintf(fp, "</pre>\n");
	1808	}