mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* xhtml backend for Halibut
	3	* (initial implementation by James Aylett)
	4	*
	5	* Still to do:
	6	*
	7	* +++ doesn't handle non-breaking hyphens. Not sure how to yet.
	8	* +++ entity names (from a file -- ideally supply normal SGML files)
	9	* +++ configuration directive to file split where the current layout
	10	* code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
	11	* perhaps others.
	12	*
	13	* Limitations:
	14	*
	15	* +++ biblio/index references target the nearest section marker, rather
	16	* than having a dedicated target themselves. In large bibliographies
	17	* this will cause problems. (The solution is to fake up a response
	18	* from xhtml_find_section(), probably linking it into the sections
	19	* chain just in case we need it again, and to make freeing it up
	20	* easier.) docsrc.pl used to work as we do, however, and SGT agrees that
	21	* this is acceptable for now.
	22	* +++ can't cope with leaf-level == 0. It's all to do with the
	23	* top-level file not being normal, probably not even having a valid
	24	* section level, and stuff like that. I question whether this is an
	25	* issue, frankly; small manuals that fit on one page should probably
	26	* not be written in halibut at all.
	27	*/
	28
	29	#include <stdio.h>
	30	#include <stdlib.h>
	31	#include <string.h>
	32	#include <assert.h>
	33	#include "halibut.h"
	34
	35	/*
	36	* FILENAME_TEMPLATE (overridable in config of course) allows you
	37	* to choose the general form for your HTML file names. It is
	38	* slightly printf-styled (% followed by a single character is a
	39	* formatting directive, %% is a literal %). Formatting directives
	40	* are:
	41	*
	42	* - %n is the section type-plus-number, minus whitespace (`Chapter1.2').
	43	* - %b is the section number on its own (`1.2').
	44	* - %k is the section's _internal_ keyword.
	45	* - %N is the section's visible title in the output, again minus
	46	* whitespace.
	47	*
	48	* %n, %b and %k will all default to %N if the section is
	49	* unnumbered (`Bibliography' is often a good example).
	50	*
	51	* FRAGMENT_TEMPLATE is the same, but defines the <a name="foo">
	52	* markers used to cross-reference to particular subsections of a
	53	* file.
	54	*/
	55
	56	#define FILENAME_SINGLE "Manual.html"
	57	#define FILENAME_CONTENTS "Contents.html"
	58	#define FILENAME_INDEX "IndexPage.html"
	59	#define FILENAME_TEMPLATE "%n.html"
	60	#define FRAGMENT_TEMPLATE "%b"
	61
	62	struct xhtmlsection_Struct {
	63	struct xhtmlsection_Struct next; / next sibling (NULL if split across files) */
	64	struct xhtmlsection_Struct child; / NULL if split across files */
	65	struct xhtmlsection_Struct parent; / NULL if split across files */
	66	struct xhtmlsection_Struct chain; / single structure independent of weird trees */
	67	paragraph *para;
	68	struct xhtmlfile_Struct file; / which file is this a part of? */
	69	char fragment; / fragment id within the file */
	70	int level;
	71	};
	72
	73	struct xhtmlfile_Struct {
	74	struct xhtmlfile_Struct *next;
	75	struct xhtmlfile_Struct *child;
	76	struct xhtmlfile_Struct *parent;
	77	char *filename;
	78	struct xhtmlsection_Struct sections; / sections within this file (only one for non-leaf) */
	79	int is_leaf; /* is this file a leaf file, ie does it not have any children? */
	80	};
	81
	82	typedef struct xhtmlsection_Struct xhtmlsection;
	83	typedef struct xhtmlfile_Struct xhtmlfile;
	84	typedef struct xhtmlindex_Struct xhtmlindex;
	85
	86	struct xhtmlindex_Struct {
	87	int nsection;
	88	int size;
	89	xhtmlsection **sections;
	90	};
	91
	92	typedef struct {
	93	int just_numbers;
	94	wchar_t *number_suffix;
	95	} xhtmlheadfmt;
	96
	97	typedef struct {
	98	int contents_depth[6];
	99	int leaf_contains_contents;
	100	int leaf_level;
	101	int leaf_smallest_contents;
	102	int include_version_id;
	103	wchar_t author, description;
	104	wchar_t head_end, body, body_start, body_end, address_start, address_end, *nav_attrs;
	105	int suppress_address;
	106	xhtmlheadfmt fchapter, *fsect;
	107	int nfsect;
	108	char contents_filename, index_filename;
	109	char single_filename, template_filename, *template_fragment;
	110	} xhtmlconfig;
	111
	112	/static void xhtml_level(paragraph , int);
	113	static void xhtml_level_0(paragraph *);
	114	static void xhtml_docontents(FILE , paragraph , int);
	115	static void xhtml_dosections(FILE , paragraph , int);
	116	static void xhtml_dobody(FILE , paragraph , int);*/
	117
	118	static void xhtml_doheader(FILE , word );
	119	static void xhtml_dofooter(FILE *);
	120	static void xhtml_versionid(FILE , word , int);
	121
	122	static void xhtml_utostr(wchar_t , char *);
	123	static int xhtml_para_level(paragraph *);
	124	static int xhtml_reservedchar(int);
	125
	126	static int xhtml_convert(wchar_t , int, char *, int);
	127	static void xhtml_rdaddwc(rdstringc , word , word *, int);
	128	static void xhtml_para(FILE , word , int);
	129	static void xhtml_codepara(FILE , word );
	130	static void xhtml_heading(FILE , paragraph , int);
	131
	132	/* File-global variables are much easier than passing these things
	133	* all over the place. Evil, but easier. We can replace this with a single
	134	* structure at some point.
	135	*/
	136	static xhtmlconfig conf;
	137	static keywordlist *keywords;
	138	static indexdata *idx;
	139	static xhtmlfile *topfile;
	140	static xhtmlsection *topsection;
	141	static paragraph *sourceparas;
	142	static xhtmlfile *lastfile;
	143	static xhtmlfile *xhtml_last_file = NULL;
	144	static int last_level=-1, start_level;
	145	static xhtmlsection *currentsection;
	146
	147	static xhtmlconfig xhtml_configure(paragraph *source)
	148	{
	149	xhtmlconfig ret;
	150
	151	/*
	152	* Defaults.
	153	*/
	154	ret.contents_depth[0] = 2;
	155	ret.contents_depth[1] = 3;
	156	ret.contents_depth[2] = 4;
	157	ret.contents_depth[3] = 5;
	158	ret.contents_depth[4] = 6;
	159	ret.contents_depth[5] = 7;
	160	ret.leaf_level = 2;
	161	ret.leaf_smallest_contents = 4;
	162	ret.leaf_contains_contents = FALSE;
	163	ret.include_version_id = TRUE;
	164	ret.author = NULL;
	165	ret.description = NULL;
	166	ret.head_end = NULL;
	167	ret.body = NULL;
	168	ret.body_start = NULL;
	169	ret.body_end = NULL;
	170	ret.address_start = NULL;
	171	ret.address_end = NULL;
	172	ret.nav_attrs = NULL;
	173	ret.suppress_address = FALSE;
	174
	175	ret.fchapter.just_numbers = FALSE;
	176	ret.fchapter.number_suffix = L": ";
	177	ret.nfsect = 2;
	178	ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
	179	ret.fsect[0].just_numbers = FALSE;
	180	ret.fsect[0].number_suffix = L": ";
	181	ret.fsect[1].just_numbers = TRUE;
	182	ret.fsect[1].number_suffix = L" ";
	183	ret.contents_filename = strdup(FILENAME_CONTENTS);
	184	ret.single_filename = strdup(FILENAME_SINGLE);
	185	ret.index_filename = strdup(FILENAME_INDEX);
	186	ret.template_filename = strdup(FILENAME_TEMPLATE);
	187	ret.template_fragment = strdup(FRAGMENT_TEMPLATE);
	188
	189	for (; source; source = source->next)
	190	{
	191	if (source->type == para_Config)
	192	{
	193	if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
	194	sfree(ret.contents_filename);
	195	ret.contents_filename = dupstr(adv(source->origkeyword));
	196	} else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
	197	sfree(ret.single_filename);
	198	ret.single_filename = dupstr(adv(source->origkeyword));
	199	} else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
	200	sfree(ret.index_filename);
	201	ret.index_filename = dupstr(adv(source->origkeyword));
	202	} else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
	203	sfree(ret.template_filename);
	204	ret.template_filename = dupstr(adv(source->origkeyword));
	205	} else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
	206	sfree(ret.template_fragment);
	207	ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII);
	208	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
	209	ret.contents_depth[0] = utoi(uadv(source->keyword));
	210	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
	211	ret.contents_depth[1] = utoi(uadv(source->keyword));
	212	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) {
	213	ret.contents_depth[2] = utoi(uadv(source->keyword));
	214	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) {
	215	ret.contents_depth[3] = utoi(uadv(source->keyword));
	216	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) {
	217	ret.contents_depth[4] = utoi(uadv(source->keyword));
	218	} else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) {
	219	ret.contents_depth[5] = utoi(uadv(source->keyword));
	220	} else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) {
	221	ret.leaf_level = utoi(uadv(source->keyword));
	222	} else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) {
	223	ret.leaf_smallest_contents = utoi(uadv(source->keyword));
	224	} else if (!ustricmp(source->keyword, L"xhtml-versionid")) {
	225	ret.include_version_id = utob(uadv(source->keyword));
	226	} else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) {
	227	ret.leaf_contains_contents = utob(uadv(source->keyword));
	228	} else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) {
	229	ret.suppress_address = utob(uadv(source->keyword));
	230	} else if (!ustricmp(source->keyword, L"xhtml-author")) {
	231	ret.author = uadv(source->keyword);
	232	} else if (!ustricmp(source->keyword, L"xhtml-description")) {
	233	ret.description = uadv(source->keyword);
	234	} else if (!ustricmp(source->keyword, L"xhtml-head-end")) {
	235	ret.head_end = uadv(source->keyword);
	236	} else if (!ustricmp(source->keyword, L"xhtml-body-start")) {
	237	ret.body_start = uadv(source->keyword);
	238	} else if (!ustricmp(source->keyword, L"xhtml-body-tag")) {
	239	ret.body = uadv(source->keyword);
	240	} else if (!ustricmp(source->keyword, L"xhtml-body-end")) {
	241	ret.body_end = uadv(source->keyword);
	242	} else if (!ustricmp(source->keyword, L"xhtml-address-start")) {
	243	ret.address_start = uadv(source->keyword);
	244	} else if (!ustricmp(source->keyword, L"xhtml-address-end")) {
	245	ret.address_end = uadv(source->keyword);
	246	} else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) {
	247	ret.nav_attrs = uadv(source->keyword);
	248	} else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) {
	249	ret.fchapter.just_numbers = utob(uadv(source->keyword));
	250	} else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) {
	251	ret.fchapter.number_suffix = uadv(source->keyword);
	252	} else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) {
	253	wchar_t *p = uadv(source->keyword);
	254	int n = 0;
	255	if (uisdigit(*p)) {
	256	n = utoi(p);
	257	p = uadv(p);
	258	}
	259	if (n >= ret.nfsect) {
	260	int i;
	261	ret.fsect = resize(ret.fsect, n+1);
	262	for (i = ret.nfsect; i <= n; i++)
	263	ret.fsect[i] = ret.fsect[ret.nfsect-1];
	264	ret.nfsect = n+1;
	265	}
	266	ret.fsect[n].just_numbers = utob(p);
	267	} else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) {
	268	wchar_t *p = uadv(source->keyword);
	269	int n = 0;
	270	if (uisdigit(*p)) {
	271	n = utoi(p);
	272	p = uadv(p);
	273	}
	274	if (n >= ret.nfsect) {
	275	int i;
	276	ret.fsect = resize(ret.fsect, n+1);
	277	for (i = ret.nfsect; i <= n; i++)
	278	ret.fsect[i] = ret.fsect[ret.nfsect-1];
	279	ret.nfsect = n+1;
	280	}
	281	ret.fsect[n].number_suffix = p;
	282	}
	283	}
	284	}
	285
	286	/* printf(" !!! leaf_level = %i\n", ret.leaf_level);
	287	printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
	288	printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
	289	printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
	290	printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
	291	printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
	292	printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
	293	printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/
	294	return ret;
	295	}
	296
	297	paragraph xhtml_config_filename(char filename)
	298	{
	299	/*
	300	* If the user passes in a single filename as a parameter to
	301	* the `--html' command-line option, then we should assume it
	302	* to imply _two_ config directives:
	303	* \cfg{xhtml-single-filename}{whatever} and
	304	* \cfg{xhtml-leaf-level}{0}; the rationale being that the user
	305	* wants their output _in that file_.
	306	*/
	307	paragraph p, q;
	308
	309	p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL);
	310	q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL);
	311	p->next = q;
	312	return p;
	313	}
	314
	315	static xhtmlsection xhtml_new_section(xhtmlsection last)
	316	{
	317	xhtmlsection *ret = mknew(xhtmlsection);
	318	ret->next=NULL;
	319	ret->child=NULL;
	320	ret->parent=NULL;
	321	ret->chain=last;
	322	ret->para=NULL;
	323	ret->file=NULL;
	324	ret->fragment=NULL;
	325	ret->level=-1; /* marker: end of chain */
	326	return ret;
	327	}
	328
	329	/* Returns NULL or the section that marks that paragraph */
	330	static xhtmlsection xhtml_find_section(paragraph p)
	331	{
	332	xhtmlsection *ret = topsection;
	333	if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */
	334	paragraph *p2 = sourceparas;
	335	paragraph *p3 = NULL;
	336	while (p2 && p2!=p) {
	337	if (xhtml_para_level(p2)!=-1) {
	338	p3 = p2;
	339	}
	340	p2=p2->next;
	341	}
	342	if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */
	343	/* Note that this can happen, if you have a cross-reference to before the first chapter starts.
	344	* So don't do that, then.
	345	*/
	346	return NULL;
	347	}
	348	p=p3;
	349	}
	350	while (ret && ret->para != p) {
	351	/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
	352	ret=ret->chain;
	353	}
	354	return ret;
	355	}
	356
	357	static void xhtml_format(paragraph p, char template_string, rdstringc *r)
	358	{
	359	char c, t;
	360	word *w;
	361	wchar_t *ws;
	362
	363	t = template_string;
	364	while (*t) {
	365	if (*t == '%' && t[1]) {
	366	int fmt;
	367
	368	t++;
	369	fmt = *t++;
	370
	371	if (fmt == '%') {
	372	rdaddc(r, fmt);
	373	continue;
	374	}
	375
	376	w = NULL;
	377	ws = NULL;
	378
	379	if (p->kwtext && fmt == 'n')
	380	w = p->kwtext;
	381	else if (p->kwtext2 && fmt == 'b')
	382	w = p->kwtext2;
	383	else if (p->keyword && *p->keyword && fmt == 'k')
	384	ws = p->keyword;
	385	else
	386	w = p->words;
	387
	388	while (w) {
	389	switch (removeattr(w->type))
	390	{
	391	case word_Normal:
	392	/*case word_Emph:
	393	case word_Code:
	394	case word_WeakCode:*/
	395	xhtml_utostr(w->text, &c);
	396	rdaddsc(r,c);
	397	sfree(c);
	398	break;
	399	}
	400	w = w->next;
	401	}
	402	if (ws) {
	403	xhtml_utostr(ws, &c);
	404	rdaddsc(r,c);
	405	sfree(c);
	406	}
	407	} else {
	408	rdaddc(r, *t++);
	409	}
	410	}
	411	}
	412
	413	static xhtmlfile xhtml_new_file(xhtmlsection sect)
	414	{
	415	xhtmlfile *ret = mknew(xhtmlfile);
	416
	417	ret->next=NULL;
	418	ret->child=NULL;
	419	ret->parent=NULL;
	420	ret->filename=NULL;
	421	ret->sections=sect;
	422	ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level);
	423	if (sect==NULL) {
	424	if (conf.leaf_level==0) { /* currently unused */
	425	ret->filename = smalloc(strlen(conf.single_filename)+1);
	426	sprintf(ret->filename, conf.single_filename);
	427	} else {
	428	ret->filename = smalloc(strlen(conf.contents_filename)+1);
	429	sprintf(ret->filename, conf.contents_filename);
	430	}
	431	} else {
	432	paragraph *p = sect->para;
	433	rdstringc fname_c = { 0, 0, NULL };
	434	xhtml_format(p, conf.template_filename, &fname_c);
	435	ret->filename = rdtrimc(&fname_c);
	436	}
	437	/* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/
	438	return ret;
	439	}
	440
	441	/*
	442	* Walk the tree fixing up files which are actually leaf (ie
	443	* have no children) but aren't at leaf level, so they have the
	444	* leaf flag set.
	445	*/
	446	void xhtml_fixup_layout(xhtmlfile* file)
	447	{
	448	if (file->child==NULL) {
	449	file->is_leaf = TRUE;
	450	} else {
	451	xhtml_fixup_layout(file->child);
	452	}
	453	if (file->next)
	454	xhtml_fixup_layout(file->next);
	455	}
	456
	457	/*
	458	* Create the tree structure so we know where everything goes.
	459	* Method:
	460	*
	461	* Ignoring file splitting, we have three choices with each new section:
	462	*
	463	* +-----------------+-----------------+
	464	* \| \| \|
	465	* X +----X----+ (1)
	466	* \| \|
	467	* Y (2)
	468	* \|
	469	* (3)
	470	*
	471	* Y is the last section we added (currentsect).
	472	* If sect is the section we want to add, then:
	473	*
	474	* (1) if sect->level < currentsect->level
	475	* (2) if sect->level == currentsect->level
	476	* (3) if sect->level > currentsect->level
	477	*
	478	* This requires the constraint that you never skip section numbers
	479	* (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
	480	*
	481	* Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
	482	* more than one level at a time. Lots of asserts, and probably part of
	483	* the algorithm here, rely on this being true. (It currently isn't
	484	* enforced by halibut, however.)
	485	*
	486	* File splitting makes this harder. For instance, say we added at (3)
	487	* above and now need to add another section. We are splitting at level
	488	* 2, ie the level of Y. Z is the last section we added:
	489	*
	490	* +-----------------+-----------------+
	491	* \| \| \|
	492	* X +----X----+ (1)
	493	* \| \|
	494	* +----Y----+ (1)
	495	* \| \|
	496	* Z (2)
	497	* \|
	498	* (3)
	499	*
	500	* The (1) case is now split; we need to search upwards to find where
	501	* to actually link in. The other two cases remain the same (and will
	502	* always be like this).
	503	*
	504	* File splitting makes this harder, however. The decision of whether
	505	* to split to a new file is always on the same condition, however (is
	506	* the level of this section higher than the leaf_level configuration
	507	* value or not).
	508	*
	509	* Treating the cases backwards:
	510	*
	511	* (3) same file if sect->level > conf.leaf_level, otherwise new file
	512	*
	513	* if in the same file, currentsect->child points to sect
	514	* otherwise the linking is done through the file tree (which works
	515	* in more or less the same way, ie currentfile->child points to
	516	* the new file)
	517	*
	518	* (2) same file if sect->level > conf.leaf_level, otherwise new file
	519	*
	520	* if in the same file, currentsect->next points to sect
	521	* otherwise file linking and currentfile->next points to the new
	522	* file (we know that Z must have caused a new file to be created)
	523	*
	524	* (1) same file if sect->level > conf.leaf_level, otherwise new file
	525	*
	526	* this is actually effectively the same case as (2) here,
	527	* except that we first have to travel up the sections to figure
	528	* out which section this new one will be a sibling of. In doing
	529	* so, we may disappear off the top of a file and have to go up
	530	* to its parent in the file tree.
	531	*
	532	*/
	533	static void xhtml_ponder_layout(paragraph *p)
	534	{
	535	xhtmlsection *lastsection;
	536	xhtmlsection *currentsect;
	537	xhtmlfile *currentfile;
	538
	539	lastfile = NULL;
	540	topsection = xhtml_new_section(NULL);
	541	topfile = xhtml_new_file(NULL);
	542	lastsection = topsection;
	543	currentfile = topfile;
	544	currentsect = topsection;
	545
	546	if (conf.leaf_level == 0) {
	547	topfile->is_leaf = 1;
	548	topfile->sections = topsection;
	549	topsection->file = topfile;
	550	}
	551
	552	for (; p; p=p->next)
	553	{
	554	int level = xhtml_para_level(p);
	555	if (level>0) /* actually a section */
	556	{
	557	xhtmlsection *sect;
	558	rdstringc frag_c = { 0, 0, NULL };
	559
	560	sect = xhtml_new_section(lastsection);
	561	lastsection = sect;
	562	sect->para = p;
	563
	564	xhtml_format(p, conf.template_fragment, &frag_c);
	565	sect->fragment = rdtrimc(&frag_c);
	566	sect->level = level;
	567	/* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/
	568
	569	if (level>currentsect->level) { /* case (3) */
	570	if (level>conf.leaf_level) { /* same file */
	571	assert(currentfile->is_leaf);
	572	currentsect->child = sect;
	573	sect->parent=currentsect;
	574	sect->file=currentfile;
	575	/* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/
	576	currentsect=sect;
	577	} else { /* new file */
	578	xhtmlfile *file = xhtml_new_file(sect);
	579	assert(!currentfile->is_leaf);
	580	currentfile->child=file;
	581	sect->file=file;
	582	file->parent=currentfile;
	583	/* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/
	584	currentfile=file;
	585	currentsect=sect;
	586	}
	587	} else if (level >= currentsect->file->sections->level) {
	588	/* Case (1) or (2) AND still under the section that starts
	589	* the current file.
	590	*
	591	* I'm not convinced that this couldn't be rolled in with the
	592	* final else {} leg further down. It seems a lot of effort
	593	* this way.
	594	*/
	595	if (level>conf.leaf_level) { /* stick within the same file */
	596	assert(currentfile->is_leaf);
	597	sect->file = currentfile;
	598	while (currentsect && currentsect->level > level &&
	599	currentsect->file==currentsect->parent->file) {
	600	currentsect = currentsect->parent;
	601	}
	602	assert(currentsect);
	603	currentsect->next = sect;
	604	assert(currentsect->level == sect->level);
	605	sect->parent = currentsect->parent;
	606	currentsect = sect;
	607	/* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/
	608	} else { /* new file */
	609	xhtmlfile *file = xhtml_new_file(sect);
	610	sect->file=file;
	611	currentfile->next=file;
	612	file->parent=currentfile->parent;
	613	file->is_leaf=(level==conf.leaf_level);
	614	file->sections=sect;
	615	/* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/
	616	currentfile=file;
	617	currentsect=sect;
	618	}
	619	} else { /* Case (1) or (2) and we must move up the file tree first */
	620	/* this loop is now probably irrelevant - we know we can't connect
	621	* to anything in the current file */
	622	while (currentsect && level<currentsect->level) {
	623	currentsect=currentsect->parent;
	624	if (currentsect) {
	625	/* printf(" * up one level to '%s'\n", currentsect->fragment);*/
	626	} else {
	627	/* printf(" * up one level (off top of current file)\n");*/
	628	}
	629	}
	630	if (currentsect) {
	631	/* I'm pretty sure this can now never fire */
	632	assert(currentfile->is_leaf);
	633	/* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/
	634	sect->file = currentfile;
	635	currentsect->next=sect;
	636	currentsect=sect;
	637	} else { /* find a file we can attach to */
	638	while (currentfile && currentfile->sections && level<currentfile->sections->level) {
	639	currentfile=currentfile->parent;
	640	if (currentfile) {
	641	/* printf(" * up one file level to '%s'\n", currentfile->filename);*/
	642	} else {
	643	/* printf(" * up one file level (off top of tree)\n");*/
	644	}
	645	}
	646	if (currentfile) { /* new file (we had to skip up a file to
	647	get here, so we must be dealing with a
	648	level no lower than the configured
	649	leaf_level */
	650	xhtmlfile *file = xhtml_new_file(sect);
	651	currentfile->next=file;
	652	sect->file=file;
	653	file->parent=currentfile->parent;
	654	file->is_leaf=(level==conf.leaf_level);
	655	file->sections=sect;
	656	/* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/
	657	currentfile=file;
	658	currentsect=sect;
	659	} else {
	660	fatal(err_whatever, "Ran off the top trying to connect sibling: strange document.");
	661	}
	662	}
	663	}
	664	}
	665	}
	666	topsection = lastsection; /* get correct end of the chain */
	667	xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
	668	}
	669
	670	static void xhtml_do_index();
	671	static void xhtml_do_file(xhtmlfile *file);
	672	static void xhtml_do_top_file(xhtmlfile file, paragraph sourceform);
	673	static void xhtml_do_paras(FILE fp, paragraph p, paragraph *end, int indexable);
	674	static int xhtml_do_contents_limit(FILE fp, xhtmlfile file, int limit);
	675	static int xhtml_do_contents_section_limit(FILE fp, xhtmlsection section, int limit);
	676	static int xhtml_add_contents_entry(FILE fp, xhtmlsection section, int limit);
	677	static int xhtml_do_contents(FILE fp, xhtmlfile file);
	678	static int xhtml_do_naked_contents(FILE fp, xhtmlfile file);
	679	static void xhtml_do_sections(FILE fp, xhtmlsection sections);
	680
	681	/*
	682	* Do all the files in this structure.
	683	*/
	684	static void xhtml_do_files(xhtmlfile *file)
	685	{
	686	xhtml_do_file(file);
	687	if (file->child)
	688	xhtml_do_files(file->child);
	689	if (file->next)
	690	xhtml_do_files(file->next);
	691	}
	692
	693	/*
	694	* Free up all memory used by the file tree from 'xfile' downwards
	695	*/
	696	static void xhtml_free_file(xhtmlfile* xfile)
	697	{
	698	if (xfile==NULL) {
	699	return;
	700	}
	701
	702	if (xfile->filename) {
	703	sfree(xfile->filename);
	704	}
	705	xhtml_free_file(xfile->child);
	706	xhtml_free_file(xfile->next);
	707	sfree(xfile);
	708	}
	709
	710	/*
	711	* Main function.
	712	*/
	713	void xhtml_backend(paragraph sourceform, keywordlist in_keywords,
	714	indexdata in_idx, void unused)
	715	{
	716	/* int i;*/
	717	indexentry *ientry;
	718	int ti;
	719	xhtmlsection *xsect;
	720
	721	IGNORE(unused);
	722
	723	sourceparas = sourceform;
	724	conf = xhtml_configure(sourceform);
	725	keywords = in_keywords;
	726	idx = in_idx;
	727
	728	/* Clear up the index entries backend data pointers */
	729	for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
	730	ientry->backend_data=NULL;
	731	}
	732
	733	xhtml_ponder_layout(sourceform);
	734
	735	/* old system ... (writes to .alt, but gets some stuff wrong and is ugly) /
	736	/* xhtml_level_0(sourceform);
	737	for (i=1; i<=conf.leaf_level; i++)
	738	{
	739	xhtml_level(sourceform, i);
	740	}*/
	741
	742	/* new system ... (writes to .html, but isn't fully trusted) /
	743	xhtml_do_top_file(topfile, sourceform);
	744	assert(!topfile->next); /* shouldn't have a sibling at all */
	745	if (topfile->child) {
	746	xhtml_do_files(topfile->child);
	747	xhtml_do_index();
	748	}
	749
	750	/* release file, section, index data structures */
	751	xsect = topsection;
	752	while (xsect) {
	753	xhtmlsection *tmp = xsect->chain;
	754	if (xsect->fragment) {
	755	sfree(xsect->fragment);
	756	}
	757	sfree(xsect);
	758	xsect = tmp;
	759	}
	760	xhtml_free_file(topfile);
	761	for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) {
	762	if (ientry->backend_data!=NULL) {
	763	xhtmlindex xi = (xhtmlindex) ientry->backend_data;
	764	if (xi->sections!=NULL) {
	765	sfree(xi->sections);
	766	}
	767	sfree(xi);
	768	}
	769	ientry->backend_data = NULL;
	770	}
	771	sfree(conf.fsect);
	772	}
	773
	774	static int xhtml_para_level(paragraph *p)
	775	{
	776	switch (p->type)
	777	{
	778	case para_Title:
	779	return 0;
	780	break;
	781	case para_UnnumberedChapter:
	782	case para_Chapter:
	783	case para_Appendix:
	784	return 1;
	785	break;
	786	/* case para_BiblioCited:
	787	return 2;
	788	break;*/
	789	case para_Heading:
	790	case para_Subsect:
	791	return p->aux+2;
	792	break;
	793	default:
	794	return -1;
	795	break;
	796	}
	797	}
	798
	799	/* Output the nav links for the current file.
	800	* file == NULL means we're doing the index
	801	*/
	802	static void xhtml_donavlinks(FILE fp, xhtmlfile file)
	803	{
	804	xhtmlfile *xhtml_next_file = NULL;
	805	fprintf(fp, "<p");
	806	if (conf.nav_attrs!=NULL) {
	807	fprintf(fp, " %ls>", conf.nav_attrs);
	808	} else {
	809	fprintf(fp, ">");
	810	}
	811	if (xhtml_last_file==NULL) {
	812	fprintf(fp, "Previous \| ");
	813	} else {
	814	fprintf(fp, "<a href=\"%s\">Previous</a> \| ", xhtml_last_file->filename);
	815	}
	816	fprintf(fp, "<a href=\"%s\">Contents</a> \| ", conf.contents_filename);
	817	if (file == NULL) {
	818	fprintf(fp, "Index \| ");
	819	} else {
	820	fprintf(fp, "<a href=\"%s\">Index</a> \| ", conf.index_filename);
	821	}
	822	if (file != NULL) { /* otherwise we're doing nav links for the index */
	823	if (xhtml_next_file==NULL)
	824	xhtml_next_file = file->child;
	825	if (xhtml_next_file==NULL)
	826	xhtml_next_file = file->next;
	827	if (xhtml_next_file==NULL)
	828	xhtml_next_file = file->parent->next;
	829	}
	830	if (xhtml_next_file==NULL) {
	831	if (file==NULL) { /* index, so no next file */
	832	fprintf(fp, "Next ");
	833	} else {
	834	fprintf(fp, "<a href=\"%s\">Next</a>", conf.index_filename);
	835	}
	836	} else {
	837	fprintf(fp, "<a href=\"%s\">Next</a>", xhtml_next_file->filename);
	838	}
	839	fprintf(fp, "</p>\n");
	840	}
	841
	842	/* Write out the index file */
	843	static void xhtml_do_index_body(FILE *fp)
	844	{
	845	indexentry *y;
	846	int ti;
	847
	848	if (count234(idx->entries) == 0)
	849	return; /* don't write anything at all */
	850
	851	fprintf(fp, "<dl>\n");
	852	/* iterate over idx->entries using the tree functions and display everything */
	853	for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) {
	854	if (y->backend_data) {
	855	int i;
	856	xhtmlindex *xi;
	857
	858	fprintf(fp, "<dt>");
	859	xhtml_para(fp, y->text, FALSE);
	860	fprintf(fp, "</dt>\n<dd>");
	861
	862	xi = (xhtmlindex*) y->backend_data;
	863	for (i=0; i<xi->nsection; i++) {
	864	xhtmlsection *sect = xi->sections[i];
	865	if (sect) {
	866	fprintf(fp, "<a href=\"%s#%s\">", sect->file->filename, sect->fragment);
	867	if (sect->para->kwtext) {
	868	xhtml_para(fp, sect->para->kwtext, FALSE);
	869	} else if (sect->para->words) {
	870	xhtml_para(fp, sect->para->words, FALSE);
	871	}
	872	fprintf(fp, "</a>");
	873	if (i+1<xi->nsection) {
	874	fprintf(fp, ", ");
	875	}
	876	}
	877	}
	878	fprintf(fp, "</dd>\n");
	879	}
	880	}
	881	fprintf(fp, "</dl>\n");
	882	}
	883	static void xhtml_do_index()
	884	{
	885	word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index",
	886	{ NULL, 0, 0}, NULL };
	887	FILE *fp = fopen(conf.index_filename, "w");
	888
	889	if (fp==NULL)
	890	fatal(err_cantopenw, conf.index_filename);
	891	xhtml_doheader(fp, &temp_word);
	892	xhtml_donavlinks(fp, NULL);
	893
	894	xhtml_do_index_body(fp);
	895
	896	xhtml_donavlinks(fp, NULL);
	897	xhtml_dofooter(fp);
	898	fclose(fp);
	899	}
	900
	901	/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
	902	static void xhtml_do_file(xhtmlfile *file)
	903	{
	904	FILE *fp = fopen(file->filename, "w");
	905	if (fp==NULL)
	906	fatal(err_cantopenw, file->filename);
	907
	908	if (file->sections->para->words) {
	909	xhtml_doheader(fp, file->sections->para->words);
	910	} else if (file->sections->para->kwtext) {
	911	xhtml_doheader(fp, file->sections->para->kwtext);
	912	} else {
	913	xhtml_doheader(fp, NULL);
	914	}
	915
	916	xhtml_donavlinks(fp, file);
	917
	918	if (file->is_leaf && conf.leaf_contains_contents &&
	919	xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents)
	920	xhtml_do_contents(fp, file);
	921	xhtml_do_sections(fp, file->sections);
	922	if (!file->is_leaf)
	923	xhtml_do_naked_contents(fp, file);
	924
	925	xhtml_donavlinks(fp, file);
	926
	927	xhtml_dofooter(fp);
	928	fclose(fp);
	929
	930	xhtml_last_file = file;
	931	}
	932
	933	/* Output the top-level file. */
	934	static void xhtml_do_top_file(xhtmlfile file, paragraph sourceform)
	935	{
	936	paragraph *p;
	937	int done=FALSE;
	938	FILE *fp = fopen(file->filename, "w");
	939	if (fp==NULL)
	940	fatal(err_cantopenw, file->filename);
	941
	942	/* Do the title -- only one allowed */
	943	for (p = sourceform; p && !done; p = p->next)
	944	{
	945	if (p->type == para_Title)
	946	{
	947	xhtml_doheader(fp, p->words);
	948	done=TRUE;
	949	}
	950	}
	951	if (!done)
	952	xhtml_doheader(fp, NULL /* Eek! */);
	953
	954	/*
	955	* Display the title.
	956	*/
	957	for (p = sourceform; p; p = p->next)
	958	{
	959	if (p->type == para_Title) {
	960	xhtml_heading(fp, p, FALSE);
	961	break;
	962	}
	963	}
	964
	965	/* Do the preamble */
	966	for (p = sourceform; p; p = p->next)
	967	{
	968	if (p->type == para_Chapter \|\| p->type == para_Heading \|\|
	969	p->type == para_Subsect \|\| p->type == para_Appendix \|\|
	970	p->type == para_UnnumberedChapter) {
	971	/*
	972	* We've found the end of the preamble. Do every normal
	973	* paragraph up to there.
	974	*/
	975	xhtml_do_paras(fp, sourceform, p, FALSE);
	976	break;
	977	}
	978	}
	979
	980	xhtml_do_contents(fp, file);
	981	xhtml_do_sections(fp, file->sections);
	982
	983	/*
	984	* Put the index in the top file if we're in single-file mode
	985	* (leaf-level 0).
	986	*/
	987	if (conf.leaf_level == 0 && count234(idx->entries) > 0) {
	988	fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
	989	xhtml_do_index_body(fp);
	990	}
	991
	992	xhtml_dofooter(fp);
	993	fclose(fp);
	994	}
	995
	996	/* Convert a Unicode string to an ASCII one. '?' is
	997	* used for unmappable characters.
	998	*/
	999	static void xhtml_utostr(wchar_t in, char *out)
	1000	{
	1001	int l = ustrlen(in);
	1002	int i;
	1003	*out = smalloc(l+1);
	1004	for (i=0; i<l; i++)
	1005	{
	1006	if (in[i]>=32 && in[i]<=126)
	1007	(*out)[i]=(char)in[i];
	1008	else
	1009	(*out)[i]='?';
	1010	}
	1011	(*out)[i]=0;
	1012	}
	1013
	1014	/*
	1015	* Write contents for the given file, and subfiles, down to
	1016	* the appropriate contents depth. Returns the number of
	1017	* entries written.
	1018	*/
	1019	static int xhtml_do_contents(FILE fp, xhtmlfile file)
	1020	{
	1021	int level, limit, count = 0;
	1022	if (!file)
	1023	return 0;
	1024
	1025	level = (file->sections)?(file->sections->level):(0);
	1026	limit = conf.contents_depth[(level>5)?(5):(level)];
	1027	start_level = (file->is_leaf) ? (level-1) : (level);
	1028	last_level = start_level;
	1029
	1030	count += xhtml_do_contents_section_limit(fp, file->sections, limit);
	1031	count += xhtml_do_contents_limit(fp, file->child, limit);
	1032	if (fp!=NULL) {
	1033	while (last_level > start_level) {
	1034	last_level--;
	1035	fprintf(fp, "</li></ul>\n");
	1036	}
	1037	}
	1038	return count;
	1039	}
	1040
	1041	/* As above, but doesn't do anything in the current file */
	1042	static int xhtml_do_naked_contents(FILE fp, xhtmlfile file)
	1043	{
	1044	int level, limit, start_level, count = 0;
	1045	if (!file)
	1046	return 0;
	1047
	1048	level = (file->sections)?(file->sections->level):(0);
	1049	limit = conf.contents_depth[(level>5)?(5):(level)];
	1050	start_level = (file->is_leaf) ? (level-1) : (level);
	1051	last_level = start_level;
	1052
	1053	count = xhtml_do_contents_limit(fp, file->child, limit);
	1054	if (fp!=NULL) {
	1055	while (last_level > start_level) {
	1056	last_level--;
	1057	fprintf(fp, "</li></ul>\n");
	1058	}
	1059	}
	1060	return count;
	1061	}
	1062
	1063	/*
	1064	* Write contents for the given file, children, and siblings, down to
	1065	* given limit contents depth.
	1066	*/
	1067	static int xhtml_do_contents_limit(FILE fp, xhtmlfile file, int limit)
	1068	{
	1069	int count = 0;
	1070	while (file) {
	1071	count += xhtml_do_contents_section_limit(fp, file->sections, limit);
	1072	count += xhtml_do_contents_limit(fp, file->child, limit);
	1073	file = file->next;
	1074	}
	1075	return count;
	1076	}
	1077
	1078	/*
	1079	* Write contents entries for the given section tree, down to the
	1080	* limit contents depth.
	1081	*/
	1082	static int xhtml_do_contents_section_deep_limit(FILE fp, xhtmlsection section, int limit)
	1083	{
	1084	int count = 0;
	1085	while (section) {
	1086	if (!xhtml_add_contents_entry(fp, section, limit))
	1087	return 0;
	1088	else
	1089	count++;
	1090	count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
	1091	section = section->next;
	1092	}
	1093	return count;
	1094	}
	1095
	1096	/*
	1097	* Write contents entries for the given section tree, down to the
	1098	* limit contents depth.
	1099	*/
	1100	static int xhtml_do_contents_section_limit(FILE fp, xhtmlsection section, int limit)
	1101	{
	1102	int count = 0;
	1103	if (!section)
	1104	return 0;
	1105	xhtml_add_contents_entry(fp, section, limit);
	1106	count=1;
	1107	count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
	1108	/* section=section->child;
	1109	while (section && xhtml_add_contents_entry(fp, section, limit)) {
	1110	section = section->next;
	1111	}*/
	1112	return count;
	1113	}
	1114
	1115	/*
	1116	* Add a section entry, unless we're exceeding the limit, in which
	1117	* case return FALSE (otherwise return TRUE).
	1118	*/
	1119	static int xhtml_add_contents_entry(FILE fp, xhtmlsection section, int limit)
	1120	{
	1121	if (!section \|\| section->level > limit)
	1122	return FALSE;
	1123	if (fp==NULL \|\| section->level < 0)
	1124	return TRUE;
	1125	if (last_level > section->level) {
	1126	while (last_level > section->level) {
	1127	last_level--;
	1128	fprintf(fp, "</li></ul>\n");
	1129	}
	1130	fprintf(fp, "</li>\n");
	1131	} else if (last_level < section->level) {
	1132	assert(last_level == section->level - 1);
	1133	last_level++;
	1134	fprintf(fp, "<ul>\n");
	1135	} else {
	1136	fprintf(fp, "</li>\n");
	1137	}
	1138	fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment);
	1139	if (section->para->kwtext) {
	1140	xhtml_para(fp, section->para->kwtext, FALSE);
	1141	if (section->para->words) {
	1142	fprintf(fp, ": ");
	1143	}
	1144	}
	1145	if (section->para->words) {
	1146	xhtml_para(fp, section->para->words, FALSE);
	1147	}
	1148	fprintf(fp, "</a>\n");
	1149	return TRUE;
	1150	}
	1151
	1152	/*
	1153	* Write all the sections in this file. Do all paragraphs in this section, then all
	1154	* children (recursively), then go on to the next one (tail recursively).
	1155	*/
	1156	static void xhtml_do_sections(FILE fp, xhtmlsection sections)
	1157	{
	1158	while (sections) {
	1159	currentsection = sections;
	1160	xhtml_do_paras(fp, sections->para, NULL, TRUE);
	1161	xhtml_do_sections(fp, sections->child);
	1162	sections = sections->next;
	1163	}
	1164	}
	1165
	1166	/* Write this list of paragraphs. Close off all lists at the end. */
	1167	static void xhtml_do_paras(FILE fp, paragraph p, paragraph *end,
	1168	int indexable)
	1169	{
	1170	int last_type = -1, ptype, first=TRUE;
	1171	stack lcont_stack = stk_new();
	1172	if (!p)
	1173	return;
	1174
	1175	/* for (; p && (xhtml_para_level(p)>limit \|\| xhtml_para_level(p)==-1 \|\| first); p=p->next) {*/
	1176	for (; p && p != end && (xhtml_para_level(p)==-1 \|\| first); p=p->next) {
	1177	first=FALSE;
	1178	switch (ptype = p->type)
	1179	{
	1180	/*
	1181	* Things we ignore because we've already processed them or
	1182	* aren't going to touch them in this pass.
	1183	*/
	1184	case para_IM:
	1185	case para_BR:
	1186	case para_Biblio: /* only touch BiblioCited */
	1187	case para_VersionID:
	1188	case para_NoCite:
	1189	case para_Title:
	1190	break;
	1191
	1192	/*
	1193	* Chapter titles.
	1194	*/
	1195	case para_Chapter:
	1196	case para_Appendix:
	1197	case para_UnnumberedChapter:
	1198	xhtml_heading(fp, p, indexable);
	1199	break;
	1200
	1201	case para_Heading:
	1202	case para_Subsect:
	1203	xhtml_heading(fp, p, indexable);
	1204	break;
	1205
	1206	case para_Rule:
	1207	fprintf(fp, "\n<hr />\n");
	1208	break;
	1209
	1210	case para_Normal:
	1211	case para_Copyright:
	1212	fprintf(fp, "\n<p>");
	1213	xhtml_para(fp, p->words, indexable);
	1214	fprintf(fp, "</p>\n");
	1215	break;
	1216
	1217	case para_LcontPush:
	1218	{
	1219	int *p;
	1220	p = mknew(int);
	1221	*p = last_type;
	1222	stk_push(lcont_stack, p);
	1223	last_type = para_Normal;
	1224	}
	1225	break;
	1226	case para_LcontPop:
	1227	{
	1228	int *p = stk_pop(lcont_stack);
	1229	assert(p);
	1230	ptype = last_type = *p;
	1231	sfree(p);
	1232	goto closeofflist; /* ick */
	1233	}
	1234	break;
	1235	case para_QuotePush:
	1236	fprintf(fp, "<blockquote>\n");
	1237	break;
	1238	case para_QuotePop:
	1239	fprintf(fp, "</blockquote>\n");
	1240	break;
	1241
	1242	case para_Bullet:
	1243	case para_NumberedList:
	1244	case para_Description:
	1245	case para_DescribedThing:
	1246	case para_BiblioCited:
	1247	if (last_type!=p->type &&
	1248	!(last_type==para_DescribedThing && p->type==para_Description) &&
	1249	!(last_type==para_Description && p->type==para_DescribedThing)) {
	1250	/* start up list if necessary */
	1251	if (p->type == para_Bullet) {
	1252	fprintf(fp, "<ul>\n");
	1253	} else if (p->type == para_NumberedList) {
	1254	fprintf(fp, "<ol>\n");
	1255	} else if (p->type == para_BiblioCited \|\|
	1256	p->type == para_DescribedThing \|\|
	1257	p->type == para_Description) {
	1258	fprintf(fp, "<dl>\n");
	1259	}
	1260	}
	1261	if (p->type == para_Bullet \|\| p->type == para_NumberedList) {
	1262	fprintf(fp, "<li>");
	1263	} else if (p->type == para_DescribedThing) {
	1264	fprintf(fp, "<dt>");
	1265	} else if (p->type == para_Description) {
	1266	fprintf(fp, "<dd>");
	1267	} else if (p->type == para_BiblioCited) {
	1268	fprintf(fp, "<dt>");
	1269	xhtml_para(fp, p->kwtext, indexable);
	1270	fprintf(fp, "</dt>\n<dd>");
	1271	}
	1272	xhtml_para(fp, p->words, indexable);
	1273	{
	1274	paragraph *p2 = p->next;
	1275	if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush)
	1276	break;
	1277	}
	1278
	1279	closeofflist:
	1280	if (ptype == para_BiblioCited) {
	1281	fprintf(fp, "</dd>\n");
	1282	} else if (ptype == para_DescribedThing) {
	1283	fprintf(fp, "</dt>");
	1284	} else if (ptype == para_Description) {
	1285	fprintf(fp, "</dd>");
	1286	} else if (ptype == para_Bullet \|\| ptype == para_NumberedList) {
	1287	fprintf(fp, "</li>");
	1288	}
	1289	if (ptype == para_Bullet \|\| ptype == para_NumberedList \|\|
	1290	ptype == para_BiblioCited \|\| ptype == para_Description \|\|
	1291	ptype == para_DescribedThing)
	1292	/* close off list if necessary */
	1293	{
	1294	paragraph *p2 = p->next;
	1295	int close_off=FALSE;
	1296	/* if (p2 && (xhtml_para_level(p2)>limit \|\| xhtml_para_level(p2)==-1)) {*/
	1297	if (p2 && xhtml_para_level(p2)==-1) {
	1298	if (p2->type != ptype &&
	1299	!(p2->type==para_DescribedThing && ptype==para_Description) &&
	1300	!(p2->type==para_Description && ptype==para_DescribedThing) &&
	1301	p2->type != para_LcontPush)
	1302	close_off=TRUE;
	1303	} else {
	1304	close_off=TRUE;
	1305	}
	1306	if (close_off) {
	1307	if (ptype == para_Bullet) {
	1308	fprintf(fp, "</ul>\n");
	1309	} else if (ptype == para_NumberedList) {
	1310	fprintf(fp, "</ol>\n");
	1311	} else if (ptype == para_BiblioCited \|\|
	1312	ptype == para_Description \|\|
	1313	ptype == para_DescribedThing) {
	1314	fprintf(fp, "</dl>\n");
	1315	}
	1316	}
	1317	}
	1318	break;
	1319
	1320	case para_Code:
	1321	xhtml_codepara(fp, p->words);
	1322	break;
	1323	}
	1324	last_type = ptype;
	1325	}
	1326
	1327	stk_free(lcont_stack);
	1328	}
	1329
	1330	/*
	1331	* Output a header for this XHTML file.
	1332	*/
	1333	static void xhtml_doheader(FILE fp, word title)
	1334	{
	1335	fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
	1336	fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
	1337	fprintf(fp, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n<head>\n<title>");
	1338	if (title==NULL)
	1339	fprintf(fp, "The thing with no name!");
	1340	else
	1341	xhtml_para(fp, title, FALSE);
	1342	fprintf(fp, "</title>\n");
	1343	fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version);
	1344	if (conf.author)
	1345	fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
	1346	if (conf.description)
	1347	fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description);
	1348	if (conf.head_end)
	1349	fprintf(fp, "%ls\n", conf.head_end);
	1350	fprintf(fp, "</head>\n\n");
	1351	if (conf.body)
	1352	fprintf(fp, "%ls\n", conf.body);
	1353	else
	1354	fprintf(fp, "<body>\n");
	1355	if (conf.body_start)
	1356	fprintf(fp, "%ls\n", conf.body_start);
	1357	}
	1358
	1359	/*
	1360	* Output a footer for this XHTML file.
	1361	*/
	1362	static void xhtml_dofooter(FILE *fp)
	1363	{
	1364	fprintf(fp, "\n<hr />\n\n");
	1365	if (conf.body_end)
	1366	fprintf(fp, "%ls\n", conf.body_end);
	1367	if (!conf.suppress_address) {
	1368	fprintf(fp,"<address>\n");
	1369	if (conf.address_start)
	1370	fprintf(fp, "%ls\n", conf.address_start);
	1371	/* Do the version ID */
	1372	if (conf.include_version_id) {
	1373	paragraph *p;
	1374	int started = 0;
	1375	for (p = sourceparas; p; p = p->next)
	1376	if (p->type == para_VersionID) {
	1377	xhtml_versionid(fp, p->words, started);
	1378	started = 1;
	1379	}
	1380	}
	1381	if (conf.address_end)
	1382	fprintf(fp, "%ls\n", conf.address_end);
	1383	fprintf(fp, "</address>\n");
	1384	}
	1385	fprintf(fp, "</body>\n\n</html>\n");
	1386	}
	1387
	1388	/*
	1389	* Output the versionid paragraph. Typically this is a version control
	1390	* ID string (such as $Id...$ in RCS).
	1391	*/
	1392	static void xhtml_versionid(FILE fp, word text, int started)
	1393	{
	1394	rdstringc t = { 0, 0, NULL };
	1395
	1396	rdaddc(&t, '['); /* FIXME: configurability */
	1397	xhtml_rdaddwc(&t, text, NULL, FALSE);
	1398	rdaddc(&t, ']'); /* FIXME: configurability */
	1399
	1400	if (started)
	1401	fprintf(fp, "<br />\n");
	1402	fprintf(fp, "%s\n", t.text);
	1403	sfree(t.text);
	1404	}
	1405
	1406	/* Is this an XHTML reserved character? */
	1407	static int xhtml_reservedchar(int c)
	1408	{
	1409	if (c=='&' \|\| c=='<' \|\| c=='>' \|\| c=='"')
	1410	return TRUE;
	1411	else
	1412	return FALSE;
	1413	}
	1414
	1415	/*
	1416	* Convert a wide string into valid XHTML: Anything outside ASCII will
	1417	* be fixed up as an entity. Currently we don't worry about constraining the
	1418	* encoded character set, which we should probably do at some point (we can
	1419	* still fix up and return FALSE - see the last comment here). We also don't
	1420	* currently
	1421	*
	1422	* Because this is only used for words, spaces are HARD spaces (any other
	1423	* spaces will be word_Whitespace not word_Normal). So they become
	1424	* Unless hard_spaces is FALSE, of course (code paragraphs break the above
	1425	* rule).
	1426	*
	1427	* If `result' is non-NULL, mallocs the resulting string and stores a pointer to
	1428	* it in `*result'. If `result' is NULL, merely checks whether all
	1429	* characters in the string are feasible.
	1430	*
	1431	* Return is nonzero if all characters are OK. If not all
	1432	* characters are OK but `result' is non-NULL, a result _will_
	1433	* still be generated!
	1434	*/
	1435	static int xhtml_convert(wchar_t s, int maxlen, char *result,
	1436	int hard_spaces) {
	1437	int doing = (result != 0);
	1438	int ok = TRUE;
	1439	char *p = NULL;
	1440	int plen = 0, psize = 0;
	1441
	1442	if (maxlen <= 0)
	1443	maxlen = -1;
	1444
	1445	for (; *s && maxlen != 0; s++, maxlen--) {
	1446	wchar_t c = *s;
	1447
	1448	#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
	1449
	1450	if (((c == 32 && !hard_spaces) \|\| (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) {
	1451	/* Char is OK. */
	1452	if (doing)
	1453	{
	1454	ensure_size(plen);
	1455	p[plen++] = (char)c;
	1456	}
	1457	} else {
	1458	/* Char needs fixing up. */
	1459	/* ok = FALSE; -- currently we never return FALSE; we
	1460	* might want to when considering a character set for the
	1461	* encoded document.
	1462	*/
	1463	if (doing)
	1464	{
	1465	if (c==32) { /* a space in a word is a hard space */
	1466	ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */
	1467	sprintf(p+plen, " ");
	1468	plen+=6;
	1469	} else {
	1470	/* FIXME: entity names! */
	1471	ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */
	1472	plen+=sprintf(p+plen, "&#%04i;", (int)c);
	1473	}
	1474	}
	1475	}
	1476	}
	1477	if (doing) {
	1478	p = resize(p, plen+1);
	1479	p[plen] = '\0';
	1480	*result = p;
	1481	}
	1482	return ok;
	1483	}
	1484
	1485	/*
	1486	* This formats the given words as XHTML.
	1487	*
	1488	* `indexable', if FALSE, prohibits adding any index references.
	1489	* You might use this, for example, if an index reference occurred
	1490	* in a section title, to prevent phony index references when the
	1491	* section title is processed in strange places such as contents
	1492	* sections.
	1493	*/
	1494	static void xhtml_rdaddwc(rdstringc rs, word text, word *end, int indexable) {
	1495	char *c;
	1496	keyword *kwl;
	1497	xhtmlsection *sect;
	1498	indextag *itag;
	1499	int ti;
	1500
	1501	for (; text && text != end; text = text->next) {
	1502	switch (text->type) {
	1503	case word_HyperLink:
	1504	xhtml_utostr(text->text, &c);
	1505	rdaddsc(rs, "<a href=\"");
	1506	rdaddsc(rs, c);
	1507	rdaddsc(rs, "\">");
	1508	sfree(c);
	1509	break;
	1510
	1511	case word_UpperXref:
	1512	case word_LowerXref:
	1513	kwl = kw_lookup(keywords, text->text);
	1514	if (kwl) {
	1515	sect=xhtml_find_section(kwl->para);
	1516	if (sect) {
	1517	rdaddsc(rs, "<a href=\"");
	1518	rdaddsc(rs, sect->file->filename);
	1519	rdaddc(rs, '#');
	1520	rdaddsc(rs, sect->fragment);
	1521	rdaddsc(rs, "\">");
	1522	} else {
	1523	rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
	1524	error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)");
	1525	}
	1526	} else {
	1527	rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
	1528	error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)");
	1529	}
	1530	break;
	1531
	1532	case word_IndexRef: /* in theory we could make an index target here */
	1533	/* rdaddsc(rs, "<a name=\"idx-");
	1534	xhtml_utostr(text->text, &c);
	1535	rdaddsc(rs, c);
	1536	sfree(c);
	1537	rdaddsc(rs, "\"></a>");*/
	1538	/* what we _do_ need to do is to fix up the backend data
	1539	* for any indexentry this points to.
	1540	*/
	1541	if (!indexable)
	1542	break;
	1543
	1544	for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) {
	1545	/* FIXME: really ustricmp() and not ustrcmp()? */
	1546	if (ustricmp(itag->name, text->text)==0) {
	1547	break;
	1548	}
	1549	}
	1550	if (itag!=NULL) {
	1551	if (itag->refs!=NULL) {
	1552	int i;
	1553	for (i=0; i<itag->nrefs; i++) {
	1554	xhtmlindex *idx_ref;
	1555	indexentry *ientry;
	1556
	1557	ientry = itag->refs[i];
	1558	if (ientry->backend_data==NULL) {
	1559	idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex));
	1560	if (idx_ref==NULL)
	1561	fatal(err_nomemory);
	1562	idx_ref->nsection = 0;
	1563	idx_ref->size = 4;
	1564	idx_ref->sections = (xhtmlsection*) smalloc(idx_ref->size sizeof(xhtmlsection*));
	1565	if (idx_ref->sections==NULL)
	1566	fatal(err_nomemory);
	1567	ientry->backend_data = idx_ref;
	1568	} else {
	1569	idx_ref = ientry->backend_data;
	1570	if (idx_ref->nsection+1 > idx_ref->size) {
	1571	int new_size = idx_ref->size * 2;
	1572	idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection));
	1573	if (idx_ref->sections==NULL) {
	1574	fatal(err_nomemory);
	1575	}
	1576	idx_ref->size = new_size;
	1577	}
	1578	}
	1579	idx_ref->sections[idx_ref->nsection++] = currentsection;
	1580	#if 0
	1581	#endif
	1582	}
	1583	} else {
	1584	fatal(err_whatever, "Index tag had no entries!");
	1585	}
	1586	} else {
	1587	fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
	1588	fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)");
	1589	}
	1590	break;
	1591
	1592	case word_HyperEnd:
	1593	case word_XrefEnd:
	1594	rdaddsc(rs, "</a>");
	1595	break;
	1596
	1597	case word_Normal:
	1598	case word_Emph:
	1599	case word_Code:
	1600	case word_WeakCode:
	1601	case word_WhiteSpace:
	1602	case word_EmphSpace:
	1603	case word_CodeSpace:
	1604	case word_WkCodeSpace:
	1605	case word_Quote:
	1606	case word_EmphQuote:
	1607	case word_CodeQuote:
	1608	case word_WkCodeQuote:
	1609	assert(text->type != word_CodeQuote &&
	1610	text->type != word_WkCodeQuote);
	1611	if (towordstyle(text->type) == word_Emph &&
	1612	(attraux(text->aux) == attr_First \|\|
	1613	attraux(text->aux) == attr_Only))
	1614	rdaddsc(rs, "<em>");
	1615	else if ((towordstyle(text->type) == word_Code \|\| towordstyle(text->type) == word_WeakCode) &&
	1616	(attraux(text->aux) == attr_First \|\|
	1617	attraux(text->aux) == attr_Only))
	1618	rdaddsc(rs, "<code>");
	1619
	1620	if (removeattr(text->type) == word_Normal) {
	1621	if (xhtml_convert(text->text, 0, &c, TRUE) \|\| !text->alt)
	1622	/* spaces in the word are hard */
	1623	rdaddsc(rs, c);
	1624	else
	1625	xhtml_rdaddwc(rs, text->alt, NULL, indexable);
	1626	sfree(c);
	1627	} else if (removeattr(text->type) == word_WhiteSpace) {
	1628	rdaddc(rs, ' ');
	1629	} else if (removeattr(text->type) == word_Quote) {
	1630	rdaddsc(rs, """);
	1631	}
	1632
	1633	if (towordstyle(text->type) == word_Emph &&
	1634	(attraux(text->aux) == attr_Last \|\|
	1635	attraux(text->aux) == attr_Only))
	1636	rdaddsc(rs, "</em>");
	1637	else if ((towordstyle(text->type) == word_Code \|\| towordstyle(text->type) == word_WeakCode) &&
	1638	(attraux(text->aux) == attr_Last \|\|
	1639	attraux(text->aux) == attr_Only))
	1640	rdaddsc(rs, "</code>");
	1641	break;
	1642	}
	1643	}
	1644	}
	1645
	1646	/* Output a heading, formatted as XHTML.
	1647	*/
	1648	static void xhtml_heading(FILE fp, paragraph p, int indexable)
	1649	{
	1650	rdstringc t = { 0, 0, NULL };
	1651	word *tprefix = p->kwtext;
	1652	word *nprefix = p->kwtext2;
	1653	word *text = p->words;
	1654	int level = xhtml_para_level(p);
	1655	xhtmlsection *sect = xhtml_find_section(p);
	1656	xhtmlheadfmt *fmt;
	1657	char *fragment;
	1658	if (sect) {
	1659	fragment = sect->fragment;
	1660	} else {
	1661	if (p->type == para_Title)
	1662	fragment = "title";
	1663	else {
	1664	fragment = ""; /* FIXME: what else can we do? */
	1665	error(err_whatever, "Couldn't locate heading cross-reference!");
	1666	}
	1667	}
	1668
	1669	if (p->type == para_Title)
	1670	fmt = NULL;
	1671	else if (level == 1)
	1672	fmt = &conf.fchapter;
	1673	else if (level-1 < conf.nfsect)
	1674	fmt = &conf.fsect[level-1];
	1675	else
	1676	fmt = &conf.fsect[conf.nfsect-1];
	1677
	1678	if (fmt && fmt->just_numbers && nprefix) {
	1679	xhtml_rdaddwc(&t, nprefix, NULL, indexable);
	1680	if (fmt) {
	1681	char *c;
	1682	if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
	1683	rdaddsc(&t, c);
	1684	sfree(c);
	1685	}
	1686	}
	1687	} else if (fmt && !fmt->just_numbers && tprefix) {
	1688	xhtml_rdaddwc(&t, tprefix, NULL, indexable);
	1689	if (fmt) {
	1690	char *c;
	1691	if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) {
	1692	rdaddsc(&t, c);
	1693	sfree(c);
	1694	}
	1695	}
	1696	}
	1697	xhtml_rdaddwc(&t, text, NULL, indexable);
	1698	/*
	1699	* If we're outputting in single-file mode, we need to lower
	1700	* the level of each heading by one, because the overall
	1701	* document title will be sitting right at the top as an <h1>
	1702	* and so chapters and sections should start at <h2>.
	1703	*
	1704	* Even if not, the document title will come back from
	1705	* xhtml_para_level() as level zero, so we must increment that
	1706	* no matter what leaf_level is set to.
	1707	*/
	1708	if (conf.leaf_level == 0 \|\| level == 0)
	1709	level++;
	1710	fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level);
	1711	sfree(t.text);
	1712	}
	1713
	1714	/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
	1715	* This looks pretty simple; I may have missed something ...
	1716	*/
	1717	static void xhtml_para(FILE fp, word text, int indexable)
	1718	{
	1719	rdstringc out = { 0, 0, NULL };
	1720	xhtml_rdaddwc(&out, text, NULL, indexable);
	1721	fprintf(fp, "%s", out.text);
	1722	sfree(out.text);
	1723	}
	1724
	1725	/* Output a code paragraph. I'm treating this as preformatted, which
	1726	* may not be entirely correct. See xhtml_para() for my worries about
	1727	* this being overly-simple; however I think that most of the complexity
	1728	* of the text backend came entirely out of word wrapping anyway.
	1729	*/
	1730	static void xhtml_codepara(FILE fp, word text)
	1731	{
	1732	fprintf(fp, "<pre>");
	1733	for (; text; text = text->next) if (text->type == word_WeakCode) {
	1734	word here, next;
	1735	char *c;
	1736
	1737	/*
	1738	* See if this WeakCode is followed by an Emph to indicate
	1739	* emphasis.
	1740	*/
	1741	here = text;
	1742	if (text->next && text->next->type == word_Emph) {
	1743	next = text = text->next;
	1744	} else
	1745	next = NULL;
	1746
	1747	if (next) {
	1748	wchar_t t, e;
	1749	int n;
	1750
	1751	t = here->text;
	1752	e = next->text;
	1753
	1754	while (*e) {
	1755	int ec = *e;
	1756
	1757	for (n = 0; t[n] && e[n] && e[n] == ec; n++);
	1758	xhtml_convert(t, n, &c, FALSE);
	1759	fprintf(fp, "%s%s%s",
	1760	(ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""),
	1761	c,
	1762	(ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : ""));
	1763	sfree(c);
	1764
	1765	t += n;
	1766	e += n;
	1767	}
	1768
	1769	xhtml_convert(t, 0, &c, FALSE);
	1770	fprintf(fp, "%s\n", c);
	1771	sfree(c);
	1772	} else {
	1773	xhtml_convert(here->text, 0, &c, FALSE);
	1774	fprintf(fp, "%s\n", c);
	1775	sfree(c);
	1776	}
	1777	}
	1778	fprintf(fp, "</pre>\n");
	1779	}