mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* input.c: read the source form
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <time.h>
	8	#include "halibut.h"
	9
	10	#define TAB_STOP 8 /* for column number tracking */
	11
	12	static void setpos(input in, char fname) {
	13	in->pos.filename = fname;
	14	in->pos.line = 1;
	15	in->pos.col = (in->reportcols ? 1 : -1);
	16	}
	17
	18	static void unget(input in, int c, filepos pos) {
	19	if (in->npushback >= in->pushbacksize) {
	20	in->pushbacksize = in->npushback + 16;
	21	in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
	22	}
	23	in->pushback[in->npushback].chr = c;
	24	in->pushback[in->npushback].pos = pos; / structure copy */
	25	in->npushback++;
	26	}
	27
	28	/* ---------------------------------------------------------------------- */
	29	/*
	30	* Macro subsystem
	31	*/
	32	typedef struct macro_Tag macro;
	33	struct macro_Tag {
	34	wchar_t name, text;
	35	};
	36	struct macrostack_Tag {
	37	macrostack *next;
	38	wchar_t *text;
	39	int ptr, npushback;
	40	filepos pos;
	41	};
	42	static int macrocmp(void av, void bv) {
	43	macro a = (macro )av, b = (macro )bv;
	44	return ustrcmp(a->name, b->name);
	45	}
	46	static void macrodef(tree234 macros, wchar_t name, wchar_t *text,
	47	filepos fpos) {
	48	macro *m = snew(macro);
	49	m->name = name;
	50	m->text = text;
	51	if (add234(macros, m) != m) {
	52	error(err_macroexists, &fpos, name);
	53	sfree(name);
	54	sfree(text);
	55	}
	56	}
	57	static int macrolookup(tree234 macros, input in, wchar_t *name,
	58	filepos *pos) {
	59	macro m, *gotit;
	60	m.name = name;
	61	gotit = find234(macros, &m, NULL);
	62	if (gotit) {
	63	macrostack *expansion = snew(macrostack);
	64	expansion->next = in->stack;
	65	expansion->text = gotit->text;
	66	expansion->pos = pos; / structure copy */
	67	expansion->ptr = 0;
	68	expansion->npushback = in->npushback;
	69	in->stack = expansion;
	70	return TRUE;
	71	} else
	72	return FALSE;
	73	}
	74	static void macrocleanup(tree234 *macros) {
	75	int ti;
	76	macro *m;
	77	for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
	78	sfree(m->name);
	79	sfree(m->text);
	80	sfree(m);
	81	}
	82	freetree234(macros);
	83	}
	84
	85	static void input_configure(input in, paragraph cfg) {
	86	assert(cfg->type == para_Config);
	87
	88	if (!ustricmp(cfg->keyword, L"input-charset")) {
	89	char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
	90	in->charset = charset_from_localenc(csname);
	91	sfree(csname);
	92	}
	93	}
	94
	95	/*
	96	* Can return EOF
	97	*/
	98	static int get(input in, filepos pos, rdstringc *rsc) {
	99	int pushbackpt = in->stack ? in->stack->npushback : 0;
	100	if (in->npushback > pushbackpt) {
	101	--in->npushback;
	102	if (pos)
	103	pos = in->pushback[in->npushback].pos; / structure copy */
	104	return in->pushback[in->npushback].chr;
	105	}
	106	else if (in->stack) {
	107	wchar_t c = in->stack->text[in->stack->ptr];
	108	if (in->stack->text[++in->stack->ptr] == L'\0') {
	109	macrostack *tmp = in->stack;
	110	in->stack = tmp->next;
	111	sfree(tmp);
	112	}
	113	return c;
	114	}
	115	else if (in->currfp) {
	116
	117	while (in->wcpos >= in->nwc) {
	118
	119	int c = getc(in->currfp);
	120
	121	if (c == EOF) {
	122	fclose(in->currfp);
	123	in->currfp = NULL;
	124	return EOF;
	125	}
	126
	127	if (rsc)
	128	rdaddc(rsc, c);
	129
	130	/* Track line numbers, for error reporting */
	131	if (pos)
	132	*pos = in->pos;
	133	if (in->reportcols) {
	134	switch (c) {
	135	case '\t':
	136	in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
	137	break;
	138	case '\n':
	139	in->pos.col = 1;
	140	in->pos.line++;
	141	break;
	142	default:
	143	in->pos.col++;
	144	break;
	145	}
	146	} else {
	147	in->pos.col = -1;
	148	if (c == '\n')
	149	in->pos.line++;
	150	}
	151
	152	/*
	153	* Do input character set translation, so that we return
	154	* Unicode.
	155	*/
	156	{
	157	char buf[1];
	158	char const *p;
	159	int inlen;
	160
	161	buf[0] = (char)c;
	162	p = buf;
	163	inlen = 1;
	164
	165	in->nwc = charset_to_unicode(&p, &inlen,
	166	in->wc, lenof(in->wc),
	167	in->charset, &in->csstate,
	168	NULL, 0);
	169	assert(p == buf+1 && inlen == 0);
	170
	171	in->wcpos = 0;
	172	}
	173	}
	174
	175	return in->wc[in->wcpos++];
	176
	177	} else
	178	return EOF;
	179	}
	180
	181	/*
	182	* Lexical analysis of source files.
	183	*/
	184	typedef struct token_Tag token;
	185	struct token_Tag {
	186	int type;
	187	int cmd, aux;
	188	wchar_t *text;
	189	char *origtext;
	190	filepos pos;
	191	};
	192	enum {
	193	tok_eof, /* end of file */
	194	tok_eop, /* end of paragraph */
	195	tok_white, /* whitespace */
	196	tok_word, /* a word or word fragment */
	197	tok_cmd, /* \command */
	198	tok_lbrace, /* { */
	199	tok_rbrace /* } */
	200	};
	201
	202	/* Halibut command keywords. */
	203	enum {
	204	c__invalid, /* invalid command */
	205	c__comment, /* comment command (\#) */
	206	c__escaped, /* escaped character */
	207	c__nop, /* no-op */
	208	c__nbsp, /* nonbreaking space */
	209	c_A, /* appendix heading */
	210	c_B, /* bibliography entry */
	211	c_BR, /* bibliography rewrite */
	212	c_C, /* chapter heading */
	213	c_H, /* heading */
	214	c_I, /* invisible index mark */
	215	c_IM, /* index merge/rewrite */
	216	c_K, /* capitalised cross-reference */
	217	c_S, /* aux field is 0, 1, 2, ... */
	218	c_U, /* unnumbered-chapter heading */
	219	c_W, /* Web hyperlink */
	220	c_b, /* bulletted list */
	221	c_c, /* code */
	222	c_cfg, /* configuration directive */
	223	c_copyright, /* copyright statement */
	224	c_cw, /* weak code */
	225	c_date, /* document processing date */
	226	c_dd, /* description list: description */
	227	c_define, /* macro definition */
	228	c_dt, /* description list: described thing */
	229	c_e, /* emphasis */
	230	c_i, /* visible index mark */
	231	c_ii, /* uncapitalised visible index mark */
	232	c_k, /* uncapitalised cross-reference */
	233	c_lcont, /* continuation para(s) for list item */
	234	c_n, /* numbered list */
	235	c_nocite, /* bibliography trickery */
	236	c_preamble, /* (obsolete) preamble text */
	237	c_q, /* quote marks */
	238	c_quote, /* block-quoted paragraphs */
	239	c_rule, /* horizontal rule */
	240	c_title, /* document title */
	241	c_u, /* aux field is char code */
	242	c_versionid /* document RCS id */
	243	};
	244
	245	/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
	246	#define iswhite(c) ( (c)==32 \|\| (c)==9 \|\| (c)==13 \|\| (c)==10 )
	247	#define isnl(c) ( (c)==10 )
	248	#define isdec(c) ( ((c)>='0'&&(c)<='9') )
	249	#define fromdec(c) ( (c)-'0' )
	250	#define ishex(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='F') \|\| ((c)>='a'&&(c)<='f'))
	251	#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
	252	#define iscmd(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='Z') \|\| ((c)>='a'&&(c)<='z'))
	253
	254	/*
	255	* Keyword comparison function. Like strcmp, but between a wchar_t *
	256	* and a char *.
	257	*/
	258	static int kwcmp(wchar_t const p, char const q) {
	259	int i;
	260	do {
	261	i = p - q;
	262	} while (p++ && q++ && !i);
	263	return i;
	264	}
	265
	266	/*
	267	* Match a keyword.
	268	*/
	269	static void match_kw(token *tok) {
	270	/*
	271	* FIXME. The ids are explicit in here so as to allow long-name
	272	* equivalents to the various very short keywords.
	273	*/
	274	static const struct { char const *name; int id; } keywords[] = {
	275	{"#", c__comment}, /* comment command (\#) */
	276	{"-", c__escaped}, /* nonbreaking hyphen */
	277	{".", c__nop}, /* no-op */
	278	{"A", c_A}, /* appendix heading */
	279	{"B", c_B}, /* bibliography entry */
	280	{"BR", c_BR}, /* bibliography rewrite */
	281	{"C", c_C}, /* chapter heading */
	282	{"H", c_H}, /* heading */
	283	{"I", c_I}, /* invisible index mark */
	284	{"IM", c_IM}, /* index merge/rewrite */
	285	{"K", c_K}, /* capitalised cross-reference */
	286	{"U", c_U}, /* unnumbered-chapter heading */
	287	{"W", c_W}, /* Web hyperlink */
	288	{"\\", c__escaped}, /* escaped backslash (\\) */
	289	{"_", c__nbsp}, /* nonbreaking space (\_) */
	290	{"b", c_b}, /* bulletted list */
	291	{"c", c_c}, /* code */
	292	{"cfg", c_cfg}, /* configuration directive */
	293	{"copyright", c_copyright}, /* copyright statement */
	294	{"cw", c_cw}, /* weak code */
	295	{"date", c_date}, /* document processing date */
	296	{"dd", c_dd}, /* description list: description */
	297	{"define", c_define}, /* macro definition */
	298	{"dt", c_dt}, /* description list: described thing */
	299	{"e", c_e}, /* emphasis */
	300	{"i", c_i}, /* visible index mark */
	301	{"ii", c_ii}, /* uncapitalised visible index mark */
	302	{"k", c_k}, /* uncapitalised cross-reference */
	303	{"lcont", c_lcont}, /* continuation para(s) for list item */
	304	{"n", c_n}, /* numbered list */
	305	{"nocite", c_nocite}, /* bibliography trickery */
	306	{"preamble", c_preamble}, /* (obsolete) preamble text */
	307	{"q", c_q}, /* quote marks */
	308	{"quote", c_quote}, /* block-quoted paragraphs */
	309	{"rule", c_rule}, /* horizontal rule */
	310	{"title", c_title}, /* document title */
	311	{"versionid", c_versionid}, /* document RCS id */
	312	{"{", c__escaped}, /* escaped lbrace (\{) */
	313	{"}", c__escaped}, /* escaped rbrace (\}) */
	314	};
	315	int i, j, k, c;
	316
	317	/*
	318	* Special cases: \S{0,1,2,...} and \uABCD. If the syntax
	319	* doesn't match correctly, we just fall through to the
	320	* binary-search phase.
	321	*/
	322	if (tok->text[0] == 'S') {
	323	/* We expect numeric characters thereafter. */
	324	wchar_t *p = tok->text+1;
	325	int n;
	326	if (!*p)
	327	n = 1;
	328	else {
	329	n = 0;
	330	while (p && isdec(p)) {
	331	n = 10 * n + fromdec(*p);
	332	p++;
	333	}
	334	}
	335	if (!*p) {
	336	tok->cmd = c_S;
	337	tok->aux = n;
	338	return;
	339	}
	340	} else if (tok->text[0] == 'u') {
	341	/* We expect hex characters thereafter. */
	342	wchar_t *p = tok->text+1;
	343	int n = 0;
	344	while (p && ishex(p)) {
	345	n = 16 * n + fromhex(*p);
	346	p++;
	347	}
	348	if (!*p) {
	349	tok->cmd = c_u;
	350	tok->aux = n;
	351	return;
	352	}
	353	}
	354
	355	i = -1;
	356	j = sizeof(keywords)/sizeof(*keywords);
	357	while (j-i > 1) {
	358	k = (i+j)/2;
	359	c = kwcmp(tok->text, keywords[k].name);
	360	if (c < 0)
	361	j = k;
	362	else if (c > 0)
	363	i = k;
	364	else /* c == 0 */ {
	365	tok->cmd = keywords[k].id;
	366	return;
	367	}
	368	}
	369
	370	tok->cmd = c__invalid;
	371	}
	372
	373
	374	/*
	375	* Read a token from the input file, in the normal way (`normal' in
	376	* the sense that code paragraphs work a different way).
	377	*/
	378	token get_token(input *in) {
	379	int c;
	380	int nls;
	381	int prevpos;
	382	token ret;
	383	rdstring rs = { 0, 0, NULL };
	384	rdstringc rsc = { 0, 0, NULL };
	385	filepos cpos;
	386
	387	ret.text = NULL; /* default */
	388	ret.origtext = NULL; /* default */
	389	if (in->pushback_chars) {
	390	rdaddsc(&rsc, in->pushback_chars);
	391	sfree(in->pushback_chars);
	392	in->pushback_chars = NULL;
	393	}
	394	c = get(in, &cpos, &rsc);
	395	ret.pos = cpos;
	396	if (iswhite(c)) { /* tok_white or tok_eop */
	397	nls = 0;
	398	prevpos = 0;
	399	do {
	400	if (isnl(c))
	401	nls++;
	402	prevpos = rsc.pos;
	403	} while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
	404	if (c == EOF) {
	405	ret.type = tok_eof;
	406	sfree(rsc.text);
	407	return ret;
	408	}
	409	if (rsc.text) {
	410	in->pushback_chars = dupstr(rsc.text + prevpos);
	411	sfree(rsc.text);
	412	}
	413	unget(in, c, &cpos);
	414	ret.type = (nls > 1 ? tok_eop : tok_white);
	415	return ret;
	416	} else if (c == EOF) { /* tok_eof */
	417	ret.type = tok_eof;
	418	sfree(rsc.text);
	419	return ret;
	420	} else if (c == '\\') { /* tok_cmd */
	421	rsc.pos = prevpos = 0;
	422	c = get(in, &cpos, &rsc);
	423	if (c == '-' \|\| c == '\\' \|\| c == '_' \|\|
	424	c == '#' \|\| c == '{' \|\| c == '}' \|\| c == '.') {
	425	/* single-char command */
	426	rdadd(&rs, c);
	427	} else if (c == 'u') {
	428	int len = 0;
	429	do {
	430	rdadd(&rs, c);
	431	len++;
	432	prevpos = rsc.pos;
	433	c = get(in, &cpos, &rsc);
	434	} while (ishex(c) && len < 5);
	435	unget(in, c, &cpos);
	436	} else if (iscmd(c)) {
	437	do {
	438	rdadd(&rs, c);
	439	prevpos = rsc.pos;
	440	c = get(in, &cpos, &rsc);
	441	} while (iscmd(c));
	442	unget(in, c, &cpos);
	443	}
	444	/*
	445	* Now match the command against the list of available
	446	* ones.
	447	*/
	448	ret.type = tok_cmd;
	449	ret.text = ustrdup(rs.text);
	450	if (rsc.text) {
	451	in->pushback_chars = dupstr(rsc.text + prevpos);
	452	rsc.text[prevpos] = '\0';
	453	ret.origtext = dupstr(rsc.text);
	454	} else {
	455	ret.origtext = dupstr("");
	456	}
	457	match_kw(&ret);
	458	sfree(rs.text);
	459	sfree(rsc.text);
	460	return ret;
	461	} else if (c == '{') { /* tok_lbrace */
	462	ret.type = tok_lbrace;
	463	sfree(rsc.text);
	464	return ret;
	465	} else if (c == '}') { /* tok_rbrace */
	466	ret.type = tok_rbrace;
	467	sfree(rsc.text);
	468	return ret;
	469	} else { /* tok_word */
	470	/*
	471	* Read a word: the longest possible contiguous sequence of
	472	* things other than whitespace, backslash, braces and
	473	* hyphen. A hyphen terminates the word but is returned as
	474	* part of it; everything else is pushed back for the next
	475	* token. The `aux' field contains TRUE if the word ends in
	476	* a hyphen.
	477	*/
	478	ret.aux = FALSE; /* assumed for now */
	479	prevpos = 0;
	480	while (1) {
	481	if (iswhite(c) \|\| c=='{' \|\| c=='}' \|\| c=='\\' \|\| c==EOF) {
	482	/* Put back the character that caused termination */
	483	unget(in, c, &cpos);
	484	break;
	485	} else {
	486	rdadd(&rs, c);
	487	if (c == '-') {
	488	prevpos = rsc.pos;
	489	ret.aux = TRUE;
	490	break; /* hyphen terminates word */
	491	}
	492	}
	493	prevpos = rsc.pos;
	494	c = get(in, &cpos, &rsc);
	495	}
	496	ret.type = tok_word;
	497	ret.text = ustrdup(rs.text);
	498	if (rsc.text) {
	499	in->pushback_chars = dupstr(rsc.text + prevpos);
	500	rsc.text[prevpos] = '\0';
	501	ret.origtext = dupstr(rsc.text);
	502	} else {
	503	ret.origtext = dupstr("");
	504	}
	505	sfree(rs.text);
	506	sfree(rsc.text);
	507	return ret;
	508	}
	509	}
	510
	511	/*
	512	* Determine whether the next input character is an open brace (for
	513	* telling code paragraphs from paragraphs which merely start with
	514	* code).
	515	*/
	516	int isbrace(input *in) {
	517	int c;
	518	filepos cpos;
	519
	520	c = get(in, &cpos, NULL);
	521	unget(in, c, &cpos);
	522	return (c == '{');
	523	}
	524
	525	/*
	526	* Read the rest of a line that starts `\c'. Including nothing at
	527	* all (tok_word with empty text).
	528	*/
	529	token get_codepar_token(input *in) {
	530	int c;
	531	token ret;
	532	rdstring rs = { 0, 0, NULL };
	533	filepos cpos;
	534
	535	ret.type = tok_word;
	536	ret.origtext = NULL;
	537	c = get(in, &cpos, NULL); /* expect (and discard) one space */
	538	ret.pos = cpos;
	539	if (c == ' ') {
	540	c = get(in, &cpos, NULL);
	541	ret.pos = cpos;
	542	}
	543	while (!isnl(c) && c != EOF) {
	544	int c2 = c;
	545	c = get(in, &cpos, NULL);
	546	/* Discard \r just before \n. */
	547	if (c2 != 13 \|\| !isnl(c))
	548	rdadd(&rs, c2);
	549	}
	550	unget(in, c, &cpos);
	551	ret.text = ustrdup(rs.text);
	552	sfree(rs.text);
	553	return ret;
	554	}
	555
	556	/*
	557	* Adds a new word to a linked list
	558	*/
	559	static word addword(word newword, word **hptrptr) {
	560	word *mnewword;
	561	if (!hptrptr)
	562	return NULL;
	563	mnewword = snew(word);
	564	mnewword = newword; / structure copy */
	565	mnewword->next = NULL;
	566	**hptrptr = mnewword;
	567	*hptrptr = &mnewword->next;
	568	return mnewword;
	569	}
	570
	571	/*
	572	* Adds a new paragraph to a linked list
	573	*/
	574	static paragraph addpara(paragraph newpara, paragraph **hptrptr) {
	575	paragraph *mnewpara = snew(paragraph);
	576	mnewpara = newpara; / structure copy */
	577	mnewpara->next = NULL;
	578	**hptrptr = mnewpara;
	579	*hptrptr = &mnewpara->next;
	580	return mnewpara;
	581	}
	582
	583	/*
	584	* Destructor before token is reassigned; should catch most memory
	585	* leaks
	586	*/
	587	#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
	588
	589	/*
	590	* Reads a single file (ie until get() returns EOF)
	591	*/
	592	static void read_file(paragraph **ret, input in, indexdata *idx) {
	593	token t;
	594	paragraph par;
	595	word wd, whptr, idximplicit;
	596	tree234 *macros;
	597	wchar_t utext[2], *wdtext;
	598	int style, spcstyle;
	599	int already;
	600	int iswhite, seenwhite;
	601	int type;
	602	int prev_para_type;
	603	struct stack_item {
	604	enum {
	605	stack_nop = 0, /* do nothing (for error recovery) */
	606	stack_ualt = 1, /* \u alternative */
	607	stack_style = 2, /* \e, \c, \cw */
	608	stack_idx = 4, /* \I, \i, \ii */
	609	stack_hyper = 8, /* \W */
	610	stack_quote = 16, /* \q */
	611	} type;
	612	word *whptr; / to restore from \u alternatives */
	613	word *idximplicit; / to restore from \u alternatives */
	614	filepos fpos;
	615	int in_code;
	616	} *sitem;
	617	stack parsestk;
	618	struct crossparaitem {
	619	int type; /* currently c_lcont, c_quote or -1 */
	620	int seen_lcont, seen_quote;
	621	};
	622	stack crossparastk;
	623	word indexword, uword, *iword;
	624	word *idxwordlist;
	625	rdstring indexstr;
	626	int index_downcase, index_visible, indexing;
	627	const rdstring nullrs = { 0, 0, NULL };
	628	wchar_t uchr;
	629
	630	t.text = NULL;
	631	t.origtext = NULL;
	632	macros = newtree234(macrocmp);
	633	already = FALSE;
	634
	635	crossparastk = stk_new();
	636
	637	/*
	638	* Loop on each paragraph.
	639	*/
	640	while (1) {
	641	int start_cmd = c__invalid;
	642	par.words = NULL;
	643	par.keyword = NULL;
	644	par.origkeyword = NULL;
	645	whptr = &par.words;
	646
	647	/*
	648	* Get a token.
	649	*/
	650	do {
	651	if (!already) {
	652	dtor(t), t = get_token(in);
	653	}
	654	already = FALSE;
	655	} while (t.type == tok_eop);
	656	if (t.type == tok_eof)
	657	break;
	658
	659	/*
	660	* Parse code paragraphs separately.
	661	*/
	662	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
	663	int wtype = word_WeakCode;
	664
	665	par.type = para_Code;
	666	par.fpos = t.pos;
	667	while (1) {
	668	dtor(t), t = get_codepar_token(in);
	669	wd.type = wtype;
	670	wd.breaks = FALSE; /* shouldn't need this... */
	671	wd.text = ustrdup(t.text);
	672	wd.alt = NULL;
	673	wd.fpos = t.pos;
	674	addword(wd, &whptr);
	675	dtor(t), t = get_token(in);
	676	if (t.type == tok_white) {
	677	/*
	678	* The newline after a code-paragraph line
	679	*/
	680	dtor(t), t = get_token(in);
	681	}
	682	if (t.type == tok_eop \|\| t.type == tok_eof \|\|
	683	t.type == tok_rbrace) { /* might be } terminating \lcont */
	684	if (t.type == tok_rbrace)
	685	already = TRUE;
	686	break;
	687	} else if (t.type == tok_cmd && t.cmd == c_c) {
	688	wtype = word_WeakCode;
	689	} else if (t.type == tok_cmd && t.cmd == c_e &&
	690	wtype == word_WeakCode) {
	691	wtype = word_Emph;
	692	} else {
	693	error(err_brokencodepara, &t.pos);
	694	prev_para_type = par.type;
	695	addpara(par, ret);
	696	while (t.type != tok_eop) /* error recovery: */
	697	dtor(t), t = get_token(in); /* eat rest of paragraph */
	698	goto codeparabroken; /* ick, but such is life */
	699	}
	700	}
	701	prev_para_type = par.type;
	702	addpara(par, ret);
	703	codeparabroken:
	704	continue;
	705	}
	706
	707	/*
	708	* Spot the special commands that define a grouping of more
	709	* than one paragraph, and also the closing braces that
	710	* finish them.
	711	*/
	712	if (t.type == tok_cmd &&
	713	(t.cmd == c_lcont \|\| t.cmd == c_quote)) {
	714	struct crossparaitem sitem, stop;
	715	int cmd = t.cmd;
	716
	717	/*
	718	* Expect, and swallow, an open brace.
	719	*/
	720	dtor(t), t = get_token(in);
	721	if (t.type != tok_lbrace) {
	722	error(err_explbr, &t.pos);
	723	continue;
	724	}
	725
	726	/*
	727	* Also expect, and swallow, any whitespace after that
	728	* (a newline before a code paragraph wouldn't be
	729	* surprising).
	730	*/
	731	do {
	732	dtor(t), t = get_token(in);
	733	} while (t.type == tok_white);
	734	already = TRUE;
	735
	736	if (cmd == c_lcont) {
	737	/*
	738	* \lcont causes a continuation of a list item into
	739	* multiple paragraphs (which may in turn contain
	740	* nested lists, code paras etc). Hence, the previous
	741	* paragraph must be of a list type.
	742	*/
	743	sitem = snew(struct crossparaitem);
	744	stop = (struct crossparaitem *)stk_top(crossparastk);
	745	if (stop)
	746	sitem = stop;
	747	else
	748	sitem->seen_quote = sitem->seen_lcont = 0;
	749
	750	if (prev_para_type == para_Bullet \|\|
	751	prev_para_type == para_NumberedList \|\|
	752	prev_para_type == para_Description) {
	753	sitem->type = c_lcont;
	754	sitem->seen_lcont = 1;
	755	par.type = para_LcontPush;
	756	prev_para_type = par.type;
	757	addpara(par, ret);
	758	} else {
	759	/*
	760	* Push a null item on the cross-para stack so that
	761	* when we see the corresponding closing brace we
	762	* don't give a cascade error.
	763	*/
	764	sitem->type = -1;
	765	error(err_misplacedlcont, &t.pos);
	766	}
	767	} else {
	768	/*
	769	* \quote causes a group of paragraphs to be
	770	* block-quoted (typically they will be indented a
	771	* bit).
	772	*/
	773	sitem = snew(struct crossparaitem);
	774	stop = (struct crossparaitem *)stk_top(crossparastk);
	775	if (stop)
	776	sitem = stop;
	777	else
	778	sitem->seen_quote = sitem->seen_lcont = 0;
	779	sitem->type = c_quote;
	780	sitem->seen_quote = 1;
	781	par.type = para_QuotePush;
	782	prev_para_type = par.type;
	783	addpara(par, ret);
	784	}
	785	stk_push(crossparastk, sitem);
	786	continue;
	787	} else if (t.type == tok_rbrace) {
	788	struct crossparaitem *sitem = stk_pop(crossparastk);
	789	if (!sitem)
	790	error(err_unexbrace, &t.pos);
	791	else {
	792	switch (sitem->type) {
	793	case c_lcont:
	794	par.type = para_LcontPop;
	795	prev_para_type = par.type;
	796	addpara(par, ret);
	797	break;
	798	case c_quote:
	799	par.type = para_QuotePop;
	800	prev_para_type = par.type;
	801	addpara(par, ret);
	802	break;
	803	}
	804	sfree(sitem);
	805	}
	806	continue;
	807	}
	808
	809	/*
	810	* This token begins a paragraph. See if it's one of the
	811	* special commands that define a paragraph type.
	812	*
	813	* (note that \# is special in a way, and \nocite takes no
	814	* text)
	815	*/
	816	par.type = para_Normal;
	817	if (t.type == tok_cmd) {
	818	int needkw;
	819	int is_macro = FALSE;
	820
	821	par.fpos = t.pos;
	822	switch (t.cmd) {
	823	default:
	824	needkw = -1;
	825	break;
	826	case c__invalid:
	827	error(err_badparatype, t.text, &t.pos);
	828	needkw = 4;
	829	break;
	830	case c__comment:
	831	if (isbrace(in))
	832	break; /* `\#{': isn't a comment para */
	833	do {
	834	dtor(t), t = get_token(in);
	835	} while (t.type != tok_eop && t.type != tok_eof);
	836	continue; /* next paragraph */
	837	/*
	838	* `needkw' values:
	839	*
	840	* 1 -- exactly one keyword
	841	* 2 -- at least one keyword
	842	* 4 -- any number of keywords including zero
	843	* 8 -- at least one keyword and then nothing else
	844	* 16 -- nothing at all! no keywords, no body
	845	* 32 -- no keywords at all
	846	*/
	847	case c_A: needkw = 2; par.type = para_Appendix; break;
	848	case c_B: needkw = 2; par.type = para_Biblio; break;
	849	case c_BR: needkw = 1; par.type = para_BR;
	850	start_cmd = c_BR; break;
	851	case c_C: needkw = 2; par.type = para_Chapter; break;
	852	case c_H: needkw = 2; par.type = para_Heading;
	853	par.aux = 0;
	854	break;
	855	case c_IM: needkw = 2; par.type = para_IM;
	856	start_cmd = c_IM; break;
	857	case c_S: needkw = 2; par.type = para_Subsect;
	858	par.aux = t.aux; break;
	859	case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
	860	/* For \b and \n the keyword is optional */
	861	case c_b: needkw = 4; par.type = para_Bullet; break;
	862	case c_dt: needkw = 4; par.type = para_DescribedThing; break;
	863	case c_dd: needkw = 4; par.type = para_Description; break;
	864	case c_n: needkw = 4; par.type = para_NumberedList; break;
	865	case c_cfg: needkw = 8; par.type = para_Config;
	866	start_cmd = c_cfg; break;
	867	case c_copyright: needkw = 32; par.type = para_Copyright; break;
	868	case c_define: is_macro = TRUE; needkw = 1; break;
	869	/* For \nocite the keyword is _everything_ */
	870	case c_nocite: needkw = 8; par.type = para_NoCite; break;
	871	case c_preamble: needkw = 32; par.type = para_Normal; break;
	872	case c_rule: needkw = 16; par.type = para_Rule; break;
	873	case c_title: needkw = 32; par.type = para_Title; break;
	874	case c_versionid: needkw = 32; par.type = para_VersionID; break;
	875	}
	876
	877	if (par.type == para_Chapter \|\|
	878	par.type == para_Heading \|\|
	879	par.type == para_Subsect \|\|
	880	par.type == para_Appendix \|\|
	881	par.type == para_UnnumberedChapter) {
	882	struct crossparaitem *sitem = stk_top(crossparastk);
	883	if (sitem && (sitem->seen_lcont \|\| sitem->seen_quote)) {
	884	error(err_sectmarkerinblock,
	885	&t.pos,
	886	(sitem->seen_lcont ? "lcont" : "quote"));
	887	}
	888	}
	889
	890	if (needkw > 0) {
	891	rdstring rs = { 0, 0, NULL };
	892	rdstringc rsc = { 0, 0, NULL };
	893	int nkeys = 0;
	894	filepos fp;
	895
	896	/* Get keywords. */
	897	dtor(t), t = get_token(in);
	898	fp = t.pos;
	899	while (t.type == tok_lbrace) {
	900	/* This is a keyword. */
	901	nkeys++;
	902	/* FIXME: there will be bugs if anyone specifies an
	903	* empty keyword (\foo{}), so trap this case. */
	904	while (dtor(t), t = get_token(in),
	905	t.type == tok_word \|\|
	906	t.type == tok_white \|\|
	907	(t.type == tok_cmd && t.cmd == c__nbsp) \|\|
	908	(t.type == tok_cmd && t.cmd == c__escaped) \|\|
	909	(t.type == tok_cmd && t.cmd == c_u)) {
	910	if (t.type == tok_white \|\|
	911	(t.type == tok_cmd && t.cmd == c__nbsp)) {
	912	rdadd(&rs, ' ');
	913	rdaddc(&rsc, ' ');
	914	} else if (t.type == tok_cmd && t.cmd == c_u) {
	915	rdadd(&rs, t.aux);
	916	rdaddc(&rsc, '\\');
	917	rdaddsc(&rsc, t.origtext);
	918	} else {
	919	rdadds(&rs, t.text);
	920	rdaddsc(&rsc, t.origtext);
	921	}
	922	}
	923	if (t.type != tok_rbrace) {
	924	error(err_kwunclosed, &t.pos);
	925	continue;
	926	}
	927	rdadd(&rs, 0); /* add string terminator */
	928	rdaddc(&rsc, 0); /* add string terminator */
	929	dtor(t), t = get_token(in); /* eat right brace */
	930	}
	931
	932	rdadd(&rs, 0); /* add string terminator */
	933	rdaddc(&rsc, 0); /* add string terminator */
	934
	935	/* See whether we have the right number of keywords. */
	936	if ((needkw & 48) && nkeys > 0)
	937	error(err_kwillegal, &fp);
	938	if ((needkw & 11) && nkeys == 0)
	939	error(err_kwexpected, &fp);
	940	if ((needkw & 5) && nkeys > 1)
	941	error(err_kwtoomany, &fp);
	942
	943	if (is_macro) {
	944	/*
	945	* Macro definition. Get the rest of the line
	946	* as a code-paragraph token, repeatedly until
	947	* there's nothing more left of it. Separate
	948	* with newlines.
	949	*/
	950	rdstring macrotext = { 0, 0, NULL };
	951	while (1) {
	952	dtor(t), t = get_codepar_token(in);
	953	if (macrotext.pos > 0)
	954	rdadd(&macrotext, L'\n');
	955	rdadds(&macrotext, t.text);
	956	dtor(t), t = get_token(in);
	957	if (t.type == tok_eop) break;
	958	}
	959	macrodef(macros, rs.text, macrotext.text, fp);
	960	continue; /* next paragraph */
	961	}
	962
	963	par.keyword = rdtrim(&rs);
	964	par.origkeyword = rdtrimc(&rsc);
	965
	966	/* Move to EOP in case of needkw==8 or 16 (no body) */
	967	if (needkw & 24) {
	968	/* We allow whitespace even when we expect no para body */
	969	while (t.type == tok_white)
	970	dtor(t), t = get_token(in);
	971	if (t.type != tok_eop && t.type != tok_eof &&
	972	(start_cmd == c__invalid \|\|
	973	t.type != tok_cmd \|\| t.cmd != start_cmd)) {
	974	error(err_bodyillegal, &t.pos);
	975	/* Error recovery: eat the rest of the paragraph */
	976	while (t.type != tok_eop && t.type != tok_eof &&
	977	(start_cmd == c__invalid \|\|
	978	t.type != tok_cmd \|\| t.cmd != start_cmd))
	979	dtor(t), t = get_token(in);
	980	}
	981	if (t.type == tok_cmd)
	982	already = TRUE;/* inhibit get_token at top of loop */
	983	prev_para_type = par.type;
	984	addpara(par, ret);
	985
	986	if (par.type == para_Config) {
	987	input_configure(in, &par);
	988	}
	989	continue; /* next paragraph */
	990	}
	991	}
	992	}
	993
	994	/*
	995	* Now read the actual paragraph, word by word, adding to
	996	* the paragraph list.
	997	*
	998	* Mid-paragraph commands:
	999	*
	1000	* \K \k
	1001	* \c \cw
	1002	* \e
	1003	* \i \ii
	1004	* \I
	1005	* \u
	1006	* \W
	1007	* \date
	1008	* \\ \{ \}
	1009	*/
	1010	parsestk = stk_new();
	1011	style = word_Normal;
	1012	spcstyle = word_WhiteSpace;
	1013	indexing = FALSE;
	1014	seenwhite = TRUE;
	1015	while (t.type != tok_eop && t.type != tok_eof) {
	1016	iswhite = FALSE;
	1017	already = FALSE;
	1018
	1019	/* Handle implicit paragraph breaks after \IM, \BR etc */
	1020	if (start_cmd != c__invalid &&
	1021	t.type == tok_cmd && t.cmd == start_cmd) {
	1022	already = TRUE; /* inhibit get_token at top of loop */
	1023	break;
	1024	}
	1025
	1026	if (t.type == tok_cmd && t.cmd == c__nop) {
	1027	dtor(t), t = get_token(in);
	1028	continue; /* do nothing! */
	1029	}
	1030
	1031	if (t.type == tok_cmd && t.cmd == c__escaped) {
	1032	t.type = tok_word; /* nice and simple */
	1033	t.aux = 0; /* even if `\-' - nonbreaking! */
	1034	}
	1035	if (t.type == tok_cmd && t.cmd == c__nbsp) {
	1036	t.type = tok_word; /* nice and simple */
	1037	sfree(t.text);
	1038	t.text = ustrdup(L" "); /* text is ` ' not `_' */
	1039	t.aux = 0; /* (nonbreaking) */
	1040	}
	1041	switch (t.type) {
	1042	case tok_white:
	1043	if (whptr == &par.words)
	1044	break; /* strip whitespace at start of para */
	1045	wd.text = NULL;
	1046	wd.type = spcstyle;
	1047	wd.alt = NULL;
	1048	wd.aux = 0;
	1049	wd.fpos = t.pos;
	1050	wd.breaks = FALSE;
	1051
	1052	/*
	1053	* Inhibit use of whitespace if it's (probably the
	1054	* newline) before a repeat \IM / \BR type
	1055	* directive.
	1056	*/
	1057	if (start_cmd != c__invalid) {
	1058	dtor(t), t = get_token(in);
	1059	already = TRUE;
	1060	if (t.type == tok_cmd && t.cmd == start_cmd)
	1061	break;
	1062	}
	1063
	1064	if (indexing)
	1065	rdadd(&indexstr, ' ');
	1066	if (!indexing \|\| index_visible)
	1067	addword(wd, &whptr);
	1068	if (indexing)
	1069	addword(wd, &idximplicit);
	1070	iswhite = TRUE;
	1071	break;
	1072	case tok_word:
	1073	if (indexing)
	1074	rdadds(&indexstr, t.text);
	1075	wd.type = style;
	1076	wd.alt = NULL;
	1077	wd.aux = 0;
	1078	wd.fpos = t.pos;
	1079	wd.breaks = t.aux;
	1080	if (!indexing \|\| index_visible) {
	1081	wd.text = ustrdup(t.text);
	1082	addword(wd, &whptr);
	1083	}
	1084	if (indexing) {
	1085	wd.text = ustrdup(t.text);
	1086	addword(wd, &idximplicit);
	1087	}
	1088	break;
	1089	case tok_lbrace:
	1090	error(err_unexbrace, &t.pos);
	1091	/* Error recovery: push nop */
	1092	sitem = snew(struct stack_item);
	1093	sitem->type = stack_nop;
	1094	sitem->fpos = t.pos;
	1095	stk_push(parsestk, sitem);
	1096	break;
	1097	case tok_rbrace:
	1098	sitem = stk_pop(parsestk);
	1099	if (!sitem) {
	1100	/*
	1101	* This closing brace could have been an
	1102	* indication that the cross-paragraph stack
	1103	* wants popping. Accordingly, we treat it here
	1104	* as an indication that the paragraph is over.
	1105	*/
	1106	already = TRUE;
	1107	goto finished_para;
	1108	} else {
	1109	if (sitem->type & stack_ualt) {
	1110	whptr = sitem->whptr;
	1111	idximplicit = sitem->idximplicit;
	1112	}
	1113	if (sitem->type & stack_style) {
	1114	style = word_Normal;
	1115	spcstyle = word_WhiteSpace;
	1116	}
	1117	if (sitem->type & stack_idx) {
	1118	indexword->text = ustrdup(indexstr.text);
	1119	if (index_downcase) {
	1120	word *w;
	1121
	1122	ustrlow(indexword->text);
	1123	ustrlow(indexstr.text);
	1124
	1125	for (w = idxwordlist; w; w = w->next)
	1126	if (w->text)
	1127	ustrlow(w->text);
	1128	}
	1129	indexing = FALSE;
	1130	rdadd(&indexstr, L'\0');
	1131	index_merge(idx, FALSE, indexstr.text,
	1132	idxwordlist, &sitem->fpos);
	1133	sfree(indexstr.text);
	1134	}
	1135	if (sitem->type & stack_hyper) {
	1136	wd.text = NULL;
	1137	wd.type = word_HyperEnd;
	1138	wd.alt = NULL;
	1139	wd.aux = 0;
	1140	wd.fpos = t.pos;
	1141	wd.breaks = FALSE;
	1142	if (!indexing \|\| index_visible)
	1143	addword(wd, &whptr);
	1144	if (indexing)
	1145	addword(wd, &idximplicit);
	1146	}
	1147	if (sitem->type & stack_quote) {
	1148	wd.text = NULL;
	1149	wd.type = toquotestyle(style);
	1150	wd.alt = NULL;
	1151	wd.aux = quote_Close;
	1152	wd.fpos = t.pos;
	1153	wd.breaks = FALSE;
	1154	if (!indexing \|\| index_visible)
	1155	addword(wd, &whptr);
	1156	if (indexing) {
	1157	rdadd(&indexstr, L'"');
	1158	addword(wd, &idximplicit);
	1159	}
	1160	}
	1161	}
	1162	sfree(sitem);
	1163	break;
	1164	case tok_cmd:
	1165	switch (t.cmd) {
	1166	case c__comment:
	1167	/*
	1168	* In-paragraph comment: \#{ balanced braces }
	1169	*
	1170	* Anything goes here; even tok_eop. We should
	1171	* eat whitespace after the close brace _if_
	1172	* there was whitespace before the \#.
	1173	*/
	1174	dtor(t), t = get_token(in);
	1175	if (t.type != tok_lbrace) {
	1176	error(err_explbr, &t.pos);
	1177	} else {
	1178	int braces = 1;
	1179	while (braces > 0) {
	1180	dtor(t), t = get_token(in);
	1181	if (t.type == tok_lbrace)
	1182	braces++;
	1183	else if (t.type == tok_rbrace)
	1184	braces--;
	1185	else if (t.type == tok_eof) {
	1186	error(err_commenteof, &t.pos);
	1187	break;
	1188	}
	1189	}
	1190	}
	1191	if (seenwhite) {
	1192	already = TRUE;
	1193	dtor(t), t = get_token(in);
	1194	if (t.type == tok_white) {
	1195	iswhite = TRUE;
	1196	already = FALSE;
	1197	}
	1198	}
	1199	break;
	1200	case c_q:
	1201	dtor(t), t = get_token(in);
	1202	if (t.type != tok_lbrace) {
	1203	error(err_explbr, &t.pos);
	1204	} else {
	1205	/*
	1206	* Enforce that \q may not be used anywhere
	1207	* within \c. (It shouldn't be necessary
	1208	* since the whole point of \c should be
	1209	* that the user wants to exercise exact
	1210	* control over the glyphs used, and
	1211	* forbidding it has the useful effect of
	1212	* relieving some backends of having to
	1213	* make difficult decisions.)
	1214	*/
	1215	int stype;
	1216
	1217	if (style != word_Code && style != word_WeakCode) {
	1218	wd.text = NULL;
	1219	wd.type = toquotestyle(style);
	1220	wd.alt = NULL;
	1221	wd.aux = quote_Open;
	1222	wd.fpos = t.pos;
	1223	wd.breaks = FALSE;
	1224	if (!indexing \|\| index_visible)
	1225	addword(wd, &whptr);
	1226	if (indexing) {
	1227	rdadd(&indexstr, L'"');
	1228	addword(wd, &idximplicit);
	1229	}
	1230	stype = stack_quote;
	1231	} else {
	1232	error(err_codequote, &t.pos);
	1233	stype = stack_nop;
	1234	}
	1235	sitem = snew(struct stack_item);
	1236	sitem->fpos = t.pos;
	1237	sitem->type = stype;
	1238	stk_push(parsestk, sitem);
	1239	}
	1240	break;
	1241	case c_K:
	1242	case c_k:
	1243	case c_W:
	1244	case c_date:
	1245	/*
	1246	* Keyword, hyperlink, or \date. We expect a
	1247	* left brace, some text, and then a right
	1248	* brace. No nesting; no arguments.
	1249	*/
	1250	wd.fpos = t.pos;
	1251	wd.breaks = FALSE;
	1252	if (t.cmd == c_K)
	1253	wd.type = word_UpperXref;
	1254	else if (t.cmd == c_k)
	1255	wd.type = word_LowerXref;
	1256	else if (t.cmd == c_W)
	1257	wd.type = word_HyperLink;
	1258	else
	1259	wd.type = word_Normal;
	1260	dtor(t), t = get_token(in);
	1261	if (t.type != tok_lbrace) {
	1262	if (wd.type == word_Normal) {
	1263	time_t thetime = time(NULL);
	1264	struct tm *broken = localtime(&thetime);
	1265	already = TRUE;
	1266	wdtext = ustrftime(NULL, broken);
	1267	wd.type = style;
	1268	} else {
	1269	error(err_explbr, &t.pos);
	1270	wdtext = NULL;
	1271	}
	1272	} else {
	1273	rdstring rs = { 0, 0, NULL };
	1274	while (dtor(t), t = get_token(in),
	1275	t.type == tok_word \|\| t.type == tok_white) {
	1276	if (t.type == tok_white)
	1277	rdadd(&rs, ' ');
	1278	else
	1279	rdadds(&rs, t.text);
	1280	}
	1281	if (wd.type == word_Normal) {
	1282	time_t thetime = time(NULL);
	1283	struct tm *broken = localtime(&thetime);
	1284	wdtext = ustrftime(rs.text, broken);
	1285	wd.type = style;
	1286	} else {
	1287	wdtext = ustrdup(rs.text);
	1288	}
	1289	sfree(rs.text);
	1290	if (t.type != tok_rbrace) {
	1291	error(err_kwexprbr, &t.pos);
	1292	}
	1293	}
	1294	wd.alt = NULL;
	1295	wd.aux = 0;
	1296	if (!indexing \|\| index_visible) {
	1297	wd.text = ustrdup(wdtext);
	1298	addword(wd, &whptr);
	1299	}
	1300	if (indexing) {
	1301	wd.text = ustrdup(wdtext);
	1302	addword(wd, &idximplicit);
	1303	}
	1304	sfree(wdtext);
	1305	if (wd.type == word_HyperLink) {
	1306	/*
	1307	* Hyperlinks are different: they then
	1308	* expect another left brace, to begin
	1309	* delimiting the text marked by the link.
	1310	*/
	1311	dtor(t), t = get_token(in);
	1312	sitem = snew(struct stack_item);
	1313	sitem->fpos = wd.fpos;
	1314	sitem->type = stack_hyper;
	1315	/*
	1316	* Special cases: \W{}\i, \W{}\ii
	1317	*/
	1318	if (t.type == tok_cmd &&
	1319	(t.cmd == c_i \|\| t.cmd == c_ii)) {
	1320	if (indexing) {
	1321	error(err_nestedindex, &t.pos);
	1322	} else {
	1323	/* Add an index-reference word with no
	1324	* text as yet */
	1325	wd.type = word_IndexRef;
	1326	wd.text = NULL;
	1327	wd.alt = NULL;
	1328	wd.aux = 0;
	1329	wd.breaks = FALSE;
	1330	indexword = addword(wd, &whptr);
	1331	/* Set up a rdstring to read the
	1332	* index text */
	1333	indexstr = nullrs;
	1334	/* Flags so that we do the Right
	1335	* Things with text */
	1336	index_visible = (type != c_I);
	1337	index_downcase = (type == c_ii);
	1338	indexing = TRUE;
	1339	idxwordlist = NULL;
	1340	idximplicit = &idxwordlist;
	1341
	1342	sitem->type \|= stack_idx;
	1343	}
	1344	dtor(t), t = get_token(in);
	1345	}
	1346	/*
	1347	* Special cases: \W{}\c, \W{}\e, \W{}\cw
	1348	*/
	1349	if (t.type == tok_cmd &&
	1350	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	1351	if (style != word_Normal)
	1352	error(err_nestedstyles, &t.pos);
	1353	else {
	1354	style = (t.cmd == c_c ? word_Code :
	1355	t.cmd == c_cw ? word_WeakCode :
	1356	word_Emph);
	1357	spcstyle = tospacestyle(style);
	1358	sitem->type \|= stack_style;
	1359	}
	1360	dtor(t), t = get_token(in);
	1361	}
	1362	if (t.type != tok_lbrace) {
	1363	error(err_explbr, &t.pos);
	1364	sfree(sitem);
	1365	} else {
	1366	stk_push(parsestk, sitem);
	1367	}
	1368	}
	1369	break;
	1370	case c_c:
	1371	case c_cw:
	1372	case c_e:
	1373	type = t.cmd;
	1374	if (style != word_Normal) {
	1375	error(err_nestedstyles, &t.pos);
	1376	/* Error recovery: eat lbrace, push nop. */
	1377	dtor(t), t = get_token(in);
	1378	sitem = snew(struct stack_item);
	1379	sitem->fpos = t.pos;
	1380	sitem->type = stack_nop;
	1381	stk_push(parsestk, sitem);
	1382	}
	1383	dtor(t), t = get_token(in);
	1384	if (t.type != tok_lbrace) {
	1385	error(err_explbr, &t.pos);
	1386	} else {
	1387	style = (type == c_c ? word_Code :
	1388	type == c_cw ? word_WeakCode :
	1389	word_Emph);
	1390	spcstyle = tospacestyle(style);
	1391	sitem = snew(struct stack_item);
	1392	sitem->fpos = t.pos;
	1393	sitem->type = stack_style;
	1394	stk_push(parsestk, sitem);
	1395	}
	1396	break;
	1397	case c_i:
	1398	case c_ii:
	1399	case c_I:
	1400	type = t.cmd;
	1401	if (indexing) {
	1402	error(err_nestedindex, &t.pos);
	1403	/* Error recovery: eat lbrace, push nop. */
	1404	dtor(t), t = get_token(in);
	1405	sitem = snew(struct stack_item);
	1406	sitem->fpos = t.pos;
	1407	sitem->type = stack_nop;
	1408	stk_push(parsestk, sitem);
	1409	}
	1410	sitem = snew(struct stack_item);
	1411	sitem->fpos = t.pos;
	1412	sitem->type = stack_idx;
	1413	dtor(t), t = get_token(in);
	1414	/*
	1415	* Special cases: \i\c, \i\e, \i\cw
	1416	*/
	1417	wd.fpos = t.pos;
	1418	if (t.type == tok_cmd &&
	1419	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	1420	if (style != word_Normal)
	1421	error(err_nestedstyles, &t.pos);
	1422	else {
	1423	style = (t.cmd == c_c ? word_Code :
	1424	t.cmd == c_cw ? word_WeakCode :
	1425	word_Emph);
	1426	spcstyle = tospacestyle(style);
	1427	sitem->type \|= stack_style;
	1428	}
	1429	dtor(t), t = get_token(in);
	1430	}
	1431	if (t.type != tok_lbrace) {
	1432	sfree(sitem);
	1433	error(err_explbr, &t.pos);
	1434	} else {
	1435	/* Add an index-reference word with no text as yet */
	1436	wd.type = word_IndexRef;
	1437	wd.text = NULL;
	1438	wd.alt = NULL;
	1439	wd.aux = 0;
	1440	wd.breaks = FALSE;
	1441	indexword = addword(wd, &whptr);
	1442	/* Set up a rdstring to read the index text */
	1443	indexstr = nullrs;
	1444	/* Flags so that we do the Right Things with text */
	1445	index_visible = (type != c_I);
	1446	index_downcase = (type == c_ii);
	1447	indexing = TRUE;
	1448	idxwordlist = NULL;
	1449	idximplicit = &idxwordlist;
	1450	/* Stack item to close the indexing on exit */
	1451	stk_push(parsestk, sitem);
	1452	}
	1453	break;
	1454	case c_u:
	1455	uchr = t.aux;
	1456	utext[0] = uchr; utext[1] = 0;
	1457	wd.type = style;
	1458	wd.breaks = FALSE;
	1459	wd.alt = NULL;
	1460	wd.aux = 0;
	1461	wd.fpos = t.pos;
	1462	if (!indexing \|\| index_visible) {
	1463	wd.text = ustrdup(utext);
	1464	uword = addword(wd, &whptr);
	1465	} else
	1466	uword = NULL;
	1467	if (indexing) {
	1468	wd.text = ustrdup(utext);
	1469	iword = addword(wd, &idximplicit);
	1470	} else
	1471	iword = NULL;
	1472	dtor(t), t = get_token(in);
	1473	if (t.type == tok_lbrace) {
	1474	/*
	1475	* \u with a left brace. Until the brace
	1476	* closes, all further words go on a
	1477	* sidetrack from the main thread of the
	1478	* paragraph.
	1479	*/
	1480	sitem = snew(struct stack_item);
	1481	sitem->fpos = t.pos;
	1482	sitem->type = stack_ualt;
	1483	sitem->whptr = whptr;
	1484	sitem->idximplicit = idximplicit;
	1485	stk_push(parsestk, sitem);
	1486	whptr = uword ? &uword->alt : NULL;
	1487	idximplicit = iword ? &iword->alt : NULL;
	1488	} else {
	1489	if (indexing)
	1490	rdadd(&indexstr, uchr);
	1491	already = TRUE;
	1492	}
	1493	break;
	1494	default:
	1495	if (!macrolookup(macros, in, t.text, &t.pos))
	1496	error(err_badmidcmd, t.text, &t.pos);
	1497	break;
	1498	}
	1499	}
	1500	if (!already)
	1501	dtor(t), t = get_token(in);
	1502	seenwhite = iswhite;
	1503	}
	1504	finished_para:
	1505	/* Check the stack is empty */
	1506	if (stk_top(parsestk)) {
	1507	while ((sitem = stk_pop(parsestk)))
	1508	sfree(sitem);
	1509	error(err_missingrbrace, &t.pos);
	1510	}
	1511	stk_free(parsestk);
	1512	prev_para_type = par.type;
	1513	addpara(par, ret);
	1514	if (t.type == tok_eof)
	1515	already = TRUE;
	1516	}
	1517
	1518	if (stk_top(crossparastk)) {
	1519	void *p;
	1520
	1521	error(err_missingrbrace2, &t.pos);
	1522	while ((p = stk_pop(crossparastk)))
	1523	sfree(p);
	1524	}
	1525
	1526	/*
	1527	* We break to here rather than returning, because otherwise
	1528	* this cleanup doesn't happen.
	1529	*/
	1530	dtor(t);
	1531	macrocleanup(macros);
	1532
	1533	stk_free(crossparastk);
	1534	}
	1535
	1536	paragraph read_input(input in, indexdata *idx) {
	1537	paragraph *head = NULL;
	1538	paragraph **hptr = &head;
	1539
	1540	while (in->currindex < in->nfiles) {
	1541	in->currfp = fopen(in->filenames[in->currindex], "r");
	1542	if (in->currfp) {
	1543	setpos(in, in->filenames[in->currindex]);
	1544	in->charset = in->defcharset;
	1545	in->csstate = charset_init_state;
	1546	in->wcpos = in->nwc = 0;
	1547	in->pushback_chars = NULL;
	1548	read_file(&hptr, in, idx);
	1549	}
	1550	in->currindex++;
	1551	}
	1552
	1553	return head;
	1554	}