mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* input.c: read the source form
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <time.h>
	8	#include "halibut.h"
	9
	10	#define TAB_STOP 8 /* for column number tracking */
	11
	12	static void setpos(input in, char fname) {
	13	in->pos.filename = fname;
	14	in->pos.line = 1;
	15	in->pos.col = (in->reportcols ? 1 : -1);
	16	}
	17
	18	static void unget(input in, int c, filepos pos) {
	19	if (in->npushback >= in->pushbacksize) {
	20	in->pushbacksize = in->npushback + 16;
	21	in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
	22	}
	23	in->pushback[in->npushback].chr = c;
	24	in->pushback[in->npushback].pos = pos; / structure copy */
	25	in->npushback++;
	26	}
	27
	28	/* ---------------------------------------------------------------------- */
	29	/*
	30	* Macro subsystem
	31	*/
	32	typedef struct macro_Tag macro;
	33	struct macro_Tag {
	34	wchar_t name, text;
	35	};
	36	struct macrostack_Tag {
	37	macrostack *next;
	38	wchar_t *text;
	39	int ptr, npushback;
	40	filepos pos;
	41	};
	42	static int macrocmp(void av, void bv) {
	43	macro a = (macro )av, b = (macro )bv;
	44	return ustrcmp(a->name, b->name);
	45	}
	46	static void macrodef(tree234 macros, wchar_t name, wchar_t *text,
	47	filepos fpos) {
	48	macro *m = snew(macro);
	49	m->name = name;
	50	m->text = text;
	51	if (add234(macros, m) != m) {
	52	error(err_macroexists, &fpos, name);
	53	sfree(name);
	54	sfree(text);
	55	}
	56	}
	57	static int macrolookup(tree234 macros, input in, wchar_t *name,
	58	filepos *pos) {
	59	macro m, *gotit;
	60	m.name = name;
	61	gotit = find234(macros, &m, NULL);
	62	if (gotit) {
	63	macrostack *expansion = snew(macrostack);
	64	expansion->next = in->stack;
	65	expansion->text = gotit->text;
	66	expansion->pos = pos; / structure copy */
	67	expansion->ptr = 0;
	68	expansion->npushback = in->npushback;
	69	in->stack = expansion;
	70	return TRUE;
	71	} else
	72	return FALSE;
	73	}
	74	static void macrocleanup(tree234 *macros) {
	75	int ti;
	76	macro *m;
	77	for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
	78	sfree(m->name);
	79	sfree(m->text);
	80	sfree(m);
	81	}
	82	freetree234(macros);
	83	}
	84
	85	static void input_configure(input in, paragraph cfg) {
	86	assert(cfg->type == para_Config);
	87
	88	if (!ustricmp(cfg->keyword, L"input-charset")) {
	89	in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
	90	}
	91	}
	92
	93	/*
	94	* Can return EOF
	95	*/
	96	static int get(input in, filepos pos, rdstringc *rsc) {
	97	int pushbackpt = in->stack ? in->stack->npushback : 0;
	98	if (in->npushback > pushbackpt) {
	99	--in->npushback;
	100	if (pos)
	101	pos = in->pushback[in->npushback].pos; / structure copy */
	102	return in->pushback[in->npushback].chr;
	103	}
	104	else if (in->stack) {
	105	wchar_t c = in->stack->text[in->stack->ptr];
	106	if (pos)
	107	*pos = in->stack->pos;
	108	if (in->stack->text[++in->stack->ptr] == L'\0') {
	109	macrostack *tmp = in->stack;
	110	in->stack = tmp->next;
	111	sfree(tmp);
	112	}
	113	return c;
	114	}
	115	else if (in->currfp) {
	116
	117	while (in->wcpos >= in->nwc) {
	118
	119	int c = getc(in->currfp);
	120
	121	if (c == EOF) {
	122	fclose(in->currfp);
	123	in->currfp = NULL;
	124	return EOF;
	125	}
	126
	127	if (rsc)
	128	rdaddc(rsc, c);
	129
	130	/* Track line numbers, for error reporting */
	131	if (pos)
	132	*pos = in->pos;
	133	if (in->reportcols) {
	134	switch (c) {
	135	case '\t':
	136	in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
	137	break;
	138	case '\n':
	139	in->pos.col = 1;
	140	in->pos.line++;
	141	break;
	142	default:
	143	in->pos.col++;
	144	break;
	145	}
	146	} else {
	147	in->pos.col = -1;
	148	if (c == '\n')
	149	in->pos.line++;
	150	}
	151
	152	/*
	153	* Do input character set translation, so that we return
	154	* Unicode.
	155	*/
	156	{
	157	char buf[1];
	158	char const *p;
	159	int inlen;
	160
	161	buf[0] = (char)c;
	162	p = buf;
	163	inlen = 1;
	164
	165	in->nwc = charset_to_unicode(&p, &inlen,
	166	in->wc, lenof(in->wc),
	167	in->charset, &in->csstate,
	168	NULL, 0);
	169	assert(p == buf+1 && inlen == 0);
	170
	171	in->wcpos = 0;
	172	}
	173	}
	174
	175	return in->wc[in->wcpos++];
	176
	177	} else
	178	return EOF;
	179	}
	180
	181	/*
	182	* Lexical analysis of source files.
	183	*/
	184	typedef struct token_Tag token;
	185	struct token_Tag {
	186	int type;
	187	int cmd, aux;
	188	wchar_t *text;
	189	char *origtext;
	190	filepos pos;
	191	};
	192	enum {
	193	tok_eof, /* end of file */
	194	tok_eop, /* end of paragraph */
	195	tok_white, /* whitespace */
	196	tok_word, /* a word or word fragment */
	197	tok_cmd, /* \command */
	198	tok_lbrace, /* { */
	199	tok_rbrace /* } */
	200	};
	201
	202	/* Halibut command keywords. */
	203	enum {
	204	c__invalid, /* invalid command */
	205	c__comment, /* comment command (\#) */
	206	c__escaped, /* escaped character */
	207	c__nop, /* no-op */
	208	c__nbsp, /* nonbreaking space */
	209	c_A, /* appendix heading */
	210	c_B, /* bibliography entry */
	211	c_BR, /* bibliography rewrite */
	212	c_C, /* chapter heading */
	213	c_H, /* heading */
	214	c_I, /* invisible index mark */
	215	c_IM, /* index merge/rewrite */
	216	c_K, /* capitalised cross-reference */
	217	c_S, /* aux field is 0, 1, 2, ... */
	218	c_U, /* unnumbered-chapter heading */
	219	c_W, /* Web hyperlink */
	220	c_b, /* bulletted list */
	221	c_c, /* code */
	222	c_cfg, /* configuration directive */
	223	c_copyright, /* copyright statement */
	224	c_cq, /* quoted code (sugar for \q{\cw{x}}) */
	225	c_cw, /* weak code */
	226	c_date, /* document processing date */
	227	c_dd, /* description list: description */
	228	c_define, /* macro definition */
	229	c_dt, /* description list: described thing */
	230	c_e, /* emphasis */
	231	c_i, /* visible index mark */
	232	c_ii, /* uncapitalised visible index mark */
	233	c_k, /* uncapitalised cross-reference */
	234	c_lcont, /* continuation para(s) for list item */
	235	c_n, /* numbered list */
	236	c_nocite, /* bibliography trickery */
	237	c_preamble, /* (obsolete) preamble text */
	238	c_q, /* quote marks */
	239	c_quote, /* block-quoted paragraphs */
	240	c_rule, /* horizontal rule */
	241	c_title, /* document title */
	242	c_u, /* aux field is char code */
	243	c_versionid /* document RCS id */
	244	};
	245
	246	/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
	247	#define iswhite(c) ( (c)==32 \|\| (c)==9 \|\| (c)==13 \|\| (c)==10 )
	248	#define isnl(c) ( (c)==10 )
	249	#define isdec(c) ( ((c)>='0'&&(c)<='9') )
	250	#define fromdec(c) ( (c)-'0' )
	251	#define ishex(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='F') \|\| ((c)>='a'&&(c)<='f'))
	252	#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
	253	#define iscmd(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='Z') \|\| ((c)>='a'&&(c)<='z'))
	254
	255	/*
	256	* Keyword comparison function. Like strcmp, but between a wchar_t *
	257	* and a char *.
	258	*/
	259	static int kwcmp(wchar_t const p, char const q) {
	260	int i;
	261	do {
	262	i = p - q;
	263	} while (p++ && q++ && !i);
	264	return i;
	265	}
	266
	267	/*
	268	* Match a keyword.
	269	*/
	270	static void match_kw(token *tok) {
	271	/*
	272	* FIXME. The ids are explicit in here so as to allow long-name
	273	* equivalents to the various very short keywords.
	274	*/
	275	static const struct { char const *name; int id; } keywords[] = {
	276	{"#", c__comment}, /* comment command (\#) */
	277	{"-", c__escaped}, /* nonbreaking hyphen */
	278	{".", c__nop}, /* no-op */
	279	{"A", c_A}, /* appendix heading */
	280	{"B", c_B}, /* bibliography entry */
	281	{"BR", c_BR}, /* bibliography rewrite */
	282	{"C", c_C}, /* chapter heading */
	283	{"H", c_H}, /* heading */
	284	{"I", c_I}, /* invisible index mark */
	285	{"IM", c_IM}, /* index merge/rewrite */
	286	{"K", c_K}, /* capitalised cross-reference */
	287	{"U", c_U}, /* unnumbered-chapter heading */
	288	{"W", c_W}, /* Web hyperlink */
	289	{"\\", c__escaped}, /* escaped backslash (\\) */
	290	{"_", c__nbsp}, /* nonbreaking space (\_) */
	291	{"b", c_b}, /* bulletted list */
	292	{"c", c_c}, /* code */
	293	{"cfg", c_cfg}, /* configuration directive */
	294	{"copyright", c_copyright}, /* copyright statement */
	295	{"cq", c_cq}, /* quoted code (sugar for \q{\cw{x}}) */
	296	{"cw", c_cw}, /* weak code */
	297	{"date", c_date}, /* document processing date */
	298	{"dd", c_dd}, /* description list: description */
	299	{"define", c_define}, /* macro definition */
	300	{"dt", c_dt}, /* description list: described thing */
	301	{"e", c_e}, /* emphasis */
	302	{"i", c_i}, /* visible index mark */
	303	{"ii", c_ii}, /* uncapitalised visible index mark */
	304	{"k", c_k}, /* uncapitalised cross-reference */
	305	{"lcont", c_lcont}, /* continuation para(s) for list item */
	306	{"n", c_n}, /* numbered list */
	307	{"nocite", c_nocite}, /* bibliography trickery */
	308	{"preamble", c_preamble}, /* (obsolete) preamble text */
	309	{"q", c_q}, /* quote marks */
	310	{"quote", c_quote}, /* block-quoted paragraphs */
	311	{"rule", c_rule}, /* horizontal rule */
	312	{"title", c_title}, /* document title */
	313	{"versionid", c_versionid}, /* document RCS id */
	314	{"{", c__escaped}, /* escaped lbrace (\{) */
	315	{"}", c__escaped}, /* escaped rbrace (\}) */
	316	};
	317	int i, j, k, c;
	318
	319	/*
	320	* Special cases: \S{0,1,2,...} and \uABCD. If the syntax
	321	* doesn't match correctly, we just fall through to the
	322	* binary-search phase.
	323	*/
	324	if (tok->text[0] == 'S') {
	325	/* We expect numeric characters thereafter. */
	326	wchar_t *p = tok->text+1;
	327	int n;
	328	if (!*p)
	329	n = 1;
	330	else {
	331	n = 0;
	332	while (p && isdec(p)) {
	333	n = 10 * n + fromdec(*p);
	334	p++;
	335	}
	336	}
	337	if (!*p) {
	338	tok->cmd = c_S;
	339	tok->aux = n;
	340	return;
	341	}
	342	} else if (tok->text[0] == 'u') {
	343	/* We expect hex characters thereafter. */
	344	wchar_t *p = tok->text+1;
	345	int n = 0;
	346	while (p && ishex(p)) {
	347	n = 16 * n + fromhex(*p);
	348	p++;
	349	}
	350	if (!*p) {
	351	tok->cmd = c_u;
	352	tok->aux = n;
	353	return;
	354	}
	355	}
	356
	357	i = -1;
	358	j = sizeof(keywords)/sizeof(*keywords);
	359	while (j-i > 1) {
	360	k = (i+j)/2;
	361	c = kwcmp(tok->text, keywords[k].name);
	362	if (c < 0)
	363	j = k;
	364	else if (c > 0)
	365	i = k;
	366	else /* c == 0 */ {
	367	tok->cmd = keywords[k].id;
	368	return;
	369	}
	370	}
	371
	372	tok->cmd = c__invalid;
	373	}
	374
	375
	376	/*
	377	* Read a token from the input file, in the normal way (`normal' in
	378	* the sense that code paragraphs work a different way).
	379	*/
	380	token get_token(input *in) {
	381	int c;
	382	int nls;
	383	int prevpos;
	384	token ret;
	385	rdstring rs = { 0, 0, NULL };
	386	rdstringc rsc = { 0, 0, NULL };
	387	filepos cpos;
	388
	389	ret.text = NULL; /* default */
	390	ret.origtext = NULL; /* default */
	391	if (in->pushback_chars) {
	392	rdaddsc(&rsc, in->pushback_chars);
	393	sfree(in->pushback_chars);
	394	in->pushback_chars = NULL;
	395	}
	396	c = get(in, &cpos, &rsc);
	397	ret.pos = cpos;
	398	if (iswhite(c)) { /* tok_white or tok_eop */
	399	nls = 0;
	400	prevpos = 0;
	401	do {
	402	if (isnl(c))
	403	nls++;
	404	prevpos = rsc.pos;
	405	} while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
	406	if (c == EOF) {
	407	ret.type = tok_eof;
	408	sfree(rsc.text);
	409	return ret;
	410	}
	411	if (rsc.text) {
	412	in->pushback_chars = dupstr(rsc.text + prevpos);
	413	sfree(rsc.text);
	414	}
	415	unget(in, c, &cpos);
	416	ret.type = (nls > 1 ? tok_eop : tok_white);
	417	return ret;
	418	} else if (c == EOF) { /* tok_eof */
	419	ret.type = tok_eof;
	420	sfree(rsc.text);
	421	return ret;
	422	} else if (c == '\\') { /* tok_cmd */
	423	rsc.pos = prevpos = 0;
	424	c = get(in, &cpos, &rsc);
	425	if (c == '-' \|\| c == '\\' \|\| c == '_' \|\|
	426	c == '#' \|\| c == '{' \|\| c == '}' \|\| c == '.') {
	427	/* single-char command */
	428	rdadd(&rs, c);
	429	prevpos = rsc.pos;
	430	} else if (c == 'u') {
	431	int len = 0;
	432	do {
	433	rdadd(&rs, c);
	434	len++;
	435	prevpos = rsc.pos;
	436	c = get(in, &cpos, &rsc);
	437	} while (ishex(c) && len < 5);
	438	unget(in, c, &cpos);
	439	} else if (iscmd(c)) {
	440	do {
	441	rdadd(&rs, c);
	442	prevpos = rsc.pos;
	443	c = get(in, &cpos, &rsc);
	444	} while (iscmd(c));
	445	unget(in, c, &cpos);
	446	}
	447	/*
	448	* Now match the command against the list of available
	449	* ones.
	450	*/
	451	ret.type = tok_cmd;
	452	ret.text = ustrdup(rs.text);
	453	if (rsc.text) {
	454	in->pushback_chars = dupstr(rsc.text + prevpos);
	455	rsc.text[prevpos] = '\0';
	456	ret.origtext = dupstr(rsc.text);
	457	} else {
	458	ret.origtext = dupstr("");
	459	}
	460	match_kw(&ret);
	461	sfree(rs.text);
	462	sfree(rsc.text);
	463	return ret;
	464	} else if (c == '{') { /* tok_lbrace */
	465	ret.type = tok_lbrace;
	466	sfree(rsc.text);
	467	return ret;
	468	} else if (c == '}') { /* tok_rbrace */
	469	ret.type = tok_rbrace;
	470	sfree(rsc.text);
	471	return ret;
	472	} else { /* tok_word */
	473	/*
	474	* Read a word: the longest possible contiguous sequence of
	475	* things other than whitespace, backslash, braces and
	476	* hyphen. A hyphen terminates the word but is returned as
	477	* part of it; everything else is pushed back for the next
	478	* token. The `aux' field contains TRUE if the word ends in
	479	* a hyphen.
	480	*/
	481	ret.aux = FALSE; /* assumed for now */
	482	prevpos = 0;
	483	while (1) {
	484	if (iswhite(c) \|\| c=='{' \|\| c=='}' \|\| c=='\\' \|\| c==EOF) {
	485	/* Put back the character that caused termination */
	486	unget(in, c, &cpos);
	487	break;
	488	} else {
	489	rdadd(&rs, c);
	490	if (c == '-') {
	491	prevpos = rsc.pos;
	492	ret.aux = TRUE;
	493	break; /* hyphen terminates word */
	494	}
	495	}
	496	prevpos = rsc.pos;
	497	c = get(in, &cpos, &rsc);
	498	}
	499	ret.type = tok_word;
	500	ret.text = ustrdup(rs.text);
	501	if (rsc.text) {
	502	in->pushback_chars = dupstr(rsc.text + prevpos);
	503	rsc.text[prevpos] = '\0';
	504	ret.origtext = dupstr(rsc.text);
	505	} else {
	506	ret.origtext = dupstr("");
	507	}
	508	sfree(rs.text);
	509	sfree(rsc.text);
	510	return ret;
	511	}
	512	}
	513
	514	/*
	515	* Determine whether the next input character is an open brace (for
	516	* telling code paragraphs from paragraphs which merely start with
	517	* code).
	518	*/
	519	int isbrace(input *in) {
	520	int c;
	521	filepos cpos;
	522
	523	c = get(in, &cpos, NULL);
	524	unget(in, c, &cpos);
	525	return (c == '{');
	526	}
	527
	528	/*
	529	* Read the rest of a line that starts `\c'. Including nothing at
	530	* all (tok_word with empty text).
	531	*/
	532	token get_codepar_token(input *in) {
	533	int c;
	534	token ret;
	535	rdstring rs = { 0, 0, NULL };
	536	filepos cpos;
	537
	538	ret.type = tok_word;
	539	ret.origtext = NULL;
	540	c = get(in, &cpos, NULL); /* expect (and discard) one space */
	541	ret.pos = cpos;
	542	if (c == ' ') {
	543	c = get(in, &cpos, NULL);
	544	ret.pos = cpos;
	545	}
	546	while (!isnl(c) && c != EOF) {
	547	int c2 = c;
	548	c = get(in, &cpos, NULL);
	549	/* Discard \r just before \n. */
	550	if (c2 != 13 \|\| !isnl(c))
	551	rdadd(&rs, c2);
	552	}
	553	unget(in, c, &cpos);
	554	ret.text = ustrdup(rs.text);
	555	sfree(rs.text);
	556	return ret;
	557	}
	558
	559	/*
	560	* Adds a new word to a linked list
	561	*/
	562	static word addword(word newword, word **hptrptr) {
	563	word *mnewword;
	564	if (!hptrptr)
	565	return NULL;
	566	mnewword = snew(word);
	567	mnewword = newword; / structure copy */
	568	mnewword->next = NULL;
	569	**hptrptr = mnewword;
	570	*hptrptr = &mnewword->next;
	571	return mnewword;
	572	}
	573
	574	/*
	575	* Adds a new paragraph to a linked list
	576	*/
	577	static paragraph addpara(paragraph newpara, paragraph **hptrptr) {
	578	paragraph *mnewpara = snew(paragraph);
	579	mnewpara = newpara; / structure copy */
	580	mnewpara->next = NULL;
	581	**hptrptr = mnewpara;
	582	*hptrptr = &mnewpara->next;
	583	return mnewpara;
	584	}
	585
	586	/*
	587	* Destructor before token is reassigned; should catch most memory
	588	* leaks
	589	*/
	590	#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
	591
	592	/*
	593	* Reads a single file (ie until get() returns EOF)
	594	*/
	595	static void read_file(paragraph **ret, input in, indexdata *idx,
	596	tree234 *macros) {
	597	token t;
	598	paragraph par;
	599	word wd, whptr, idximplicit;
	600	wchar_t utext[2], *wdtext;
	601	int style, spcstyle;
	602	int already;
	603	int iswhite, seenwhite;
	604	int type;
	605	int prev_para_type;
	606	struct stack_item {
	607	enum {
	608	stack_nop = 0, /* do nothing (for error recovery) */
	609	stack_ualt = 1, /* \u alternative */
	610	stack_style = 2, /* \e, \c, \cw */
	611	stack_idx = 4, /* \I, \i, \ii */
	612	stack_hyper = 8, /* \W */
	613	stack_quote = 16 /* \q */
	614	} type;
	615	word *whptr; / to restore from \u alternatives */
	616	word *idximplicit; / to restore from \u alternatives */
	617	filepos fpos;
	618	int in_code;
	619	} *sitem;
	620	stack parsestk;
	621	struct crossparaitem {
	622	int type; /* currently c_lcont, c_quote or -1 */
	623	int seen_lcont, seen_quote;
	624	};
	625	stack crossparastk;
	626	word indexword, uword, *iword;
	627	word *idxwordlist;
	628	rdstring indexstr;
	629	int index_downcase, index_visible, indexing;
	630	const rdstring nullrs = { 0, 0, NULL };
	631	wchar_t uchr;
	632
	633	t.text = NULL;
	634	t.origtext = NULL;
	635	already = FALSE;
	636
	637	crossparastk = stk_new();
	638
	639	/*
	640	* Loop on each paragraph.
	641	*/
	642	while (1) {
	643	int start_cmd = c__invalid;
	644	par.words = NULL;
	645	par.keyword = NULL;
	646	par.origkeyword = NULL;
	647	whptr = &par.words;
	648
	649	/*
	650	* Get a token.
	651	*/
	652	do {
	653	if (!already) {
	654	dtor(t), t = get_token(in);
	655	}
	656	already = FALSE;
	657	} while (t.type == tok_eop);
	658	if (t.type == tok_eof)
	659	break;
	660
	661	/*
	662	* Parse code paragraphs separately.
	663	*/
	664	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
	665	int wtype = word_WeakCode;
	666
	667	par.type = para_Code;
	668	par.fpos = t.pos;
	669	while (1) {
	670	dtor(t), t = get_codepar_token(in);
	671	wd.type = wtype;
	672	wd.breaks = FALSE; /* shouldn't need this... */
	673	wd.text = ustrdup(t.text);
	674	wd.alt = NULL;
	675	wd.fpos = t.pos;
	676	addword(wd, &whptr);
	677	dtor(t), t = get_token(in);
	678	if (t.type == tok_white) {
	679	/*
	680	* The newline after a code-paragraph line
	681	*/
	682	dtor(t), t = get_token(in);
	683	}
	684	if (t.type == tok_eop \|\| t.type == tok_eof \|\|
	685	t.type == tok_rbrace) { /* might be } terminating \lcont */
	686	if (t.type == tok_rbrace)
	687	already = TRUE;
	688	break;
	689	} else if (t.type == tok_cmd && t.cmd == c_c) {
	690	wtype = word_WeakCode;
	691	} else if (t.type == tok_cmd && t.cmd == c_e &&
	692	wtype == word_WeakCode) {
	693	wtype = word_Emph;
	694	} else {
	695	error(err_brokencodepara, &t.pos);
	696	prev_para_type = par.type;
	697	addpara(par, ret);
	698	while (t.type != tok_eop) /* error recovery: */
	699	dtor(t), t = get_token(in); /* eat rest of paragraph */
	700	goto codeparabroken; /* ick, but such is life */
	701	}
	702	}
	703	prev_para_type = par.type;
	704	addpara(par, ret);
	705	codeparabroken:
	706	continue;
	707	}
	708
	709	/*
	710	* Spot the special commands that define a grouping of more
	711	* than one paragraph, and also the closing braces that
	712	* finish them.
	713	*/
	714	if (t.type == tok_cmd &&
	715	(t.cmd == c_lcont \|\| t.cmd == c_quote)) {
	716	struct crossparaitem sitem, stop;
	717	int cmd = t.cmd;
	718
	719	/*
	720	* Expect, and swallow, an open brace.
	721	*/
	722	dtor(t), t = get_token(in);
	723	if (t.type != tok_lbrace) {
	724	error(err_explbr, &t.pos);
	725	continue;
	726	}
	727
	728	/*
	729	* Also expect, and swallow, any whitespace after that
	730	* (a newline before a code paragraph wouldn't be
	731	* surprising).
	732	*/
	733	do {
	734	dtor(t), t = get_token(in);
	735	} while (t.type == tok_white);
	736	already = TRUE;
	737
	738	if (cmd == c_lcont) {
	739	/*
	740	* \lcont causes a continuation of a list item into
	741	* multiple paragraphs (which may in turn contain
	742	* nested lists, code paras etc). Hence, the previous
	743	* paragraph must be of a list type.
	744	*/
	745	sitem = snew(struct crossparaitem);
	746	stop = (struct crossparaitem *)stk_top(crossparastk);
	747	if (stop)
	748	sitem = stop;
	749	else
	750	sitem->seen_quote = sitem->seen_lcont = 0;
	751
	752	if (prev_para_type == para_Bullet \|\|
	753	prev_para_type == para_NumberedList \|\|
	754	prev_para_type == para_Description) {
	755	sitem->type = c_lcont;
	756	sitem->seen_lcont = 1;
	757	par.type = para_LcontPush;
	758	prev_para_type = par.type;
	759	addpara(par, ret);
	760	} else {
	761	/*
	762	* Push a null item on the cross-para stack so that
	763	* when we see the corresponding closing brace we
	764	* don't give a cascade error.
	765	*/
	766	sitem->type = -1;
	767	error(err_misplacedlcont, &t.pos);
	768	}
	769	} else {
	770	/*
	771	* \quote causes a group of paragraphs to be
	772	* block-quoted (typically they will be indented a
	773	* bit).
	774	*/
	775	sitem = snew(struct crossparaitem);
	776	stop = (struct crossparaitem *)stk_top(crossparastk);
	777	if (stop)
	778	sitem = stop;
	779	else
	780	sitem->seen_quote = sitem->seen_lcont = 0;
	781	sitem->type = c_quote;
	782	sitem->seen_quote = 1;
	783	par.type = para_QuotePush;
	784	prev_para_type = par.type;
	785	addpara(par, ret);
	786	}
	787	stk_push(crossparastk, sitem);
	788	continue;
	789	} else if (t.type == tok_rbrace) {
	790	struct crossparaitem *sitem = stk_pop(crossparastk);
	791	if (!sitem)
	792	error(err_unexbrace, &t.pos);
	793	else {
	794	switch (sitem->type) {
	795	case c_lcont:
	796	par.type = para_LcontPop;
	797	prev_para_type = par.type;
	798	addpara(par, ret);
	799	break;
	800	case c_quote:
	801	par.type = para_QuotePop;
	802	prev_para_type = par.type;
	803	addpara(par, ret);
	804	break;
	805	}
	806	sfree(sitem);
	807	}
	808	continue;
	809	}
	810
	811	while (t.type == tok_cmd &&
	812	macrolookup(macros, in, t.text, &t.pos)) {
	813	dtor(t), t = get_token(in);
	814	}
	815
	816	/*
	817	* This token begins a paragraph. See if it's one of the
	818	* special commands that define a paragraph type.
	819	*
	820	* (note that \# is special in a way, and \nocite takes no
	821	* text)
	822	*/
	823	par.type = para_Normal;
	824	if (t.type == tok_cmd) {
	825	int needkw;
	826	int is_macro = FALSE;
	827
	828	par.fpos = t.pos;
	829	switch (t.cmd) {
	830	default:
	831	needkw = -1;
	832	break;
	833	case c__invalid:
	834	error(err_badparatype, t.text, &t.pos);
	835	needkw = 4;
	836	break;
	837	case c__comment:
	838	if (isbrace(in))
	839	break; /* `\#{': isn't a comment para */
	840	do {
	841	dtor(t), t = get_token(in);
	842	} while (t.type != tok_eop && t.type != tok_eof);
	843	continue; /* next paragraph */
	844	/*
	845	* `needkw' values:
	846	*
	847	* 1 -- exactly one keyword
	848	* 2 -- at least one keyword
	849	* 4 -- any number of keywords including zero
	850	* 8 -- at least one keyword and then nothing else
	851	* 16 -- nothing at all! no keywords, no body
	852	* 32 -- no keywords at all
	853	*/
	854	case c_A: needkw = 2; par.type = para_Appendix; break;
	855	case c_B: needkw = 2; par.type = para_Biblio; break;
	856	case c_BR: needkw = 1; par.type = para_BR;
	857	start_cmd = c_BR; break;
	858	case c_C: needkw = 2; par.type = para_Chapter; break;
	859	case c_H: needkw = 2; par.type = para_Heading;
	860	par.aux = 0;
	861	break;
	862	case c_IM: needkw = 2; par.type = para_IM;
	863	start_cmd = c_IM; break;
	864	case c_S: needkw = 2; par.type = para_Subsect;
	865	par.aux = t.aux; break;
	866	case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
	867	/* For \b and \n the keyword is optional */
	868	case c_b: needkw = 4; par.type = para_Bullet; break;
	869	case c_dt: needkw = 4; par.type = para_DescribedThing; break;
	870	case c_dd: needkw = 4; par.type = para_Description; break;
	871	case c_n: needkw = 4; par.type = para_NumberedList; break;
	872	case c_cfg: needkw = 8; par.type = para_Config;
	873	start_cmd = c_cfg; break;
	874	case c_copyright: needkw = 32; par.type = para_Copyright; break;
	875	case c_define: is_macro = TRUE; needkw = 1; break;
	876	/* For \nocite the keyword is _everything_ */
	877	case c_nocite: needkw = 8; par.type = para_NoCite; break;
	878	case c_preamble: needkw = 32; par.type = para_Normal; break;
	879	case c_rule: needkw = 16; par.type = para_Rule; break;
	880	case c_title: needkw = 32; par.type = para_Title; break;
	881	case c_versionid: needkw = 32; par.type = para_VersionID; break;
	882	}
	883
	884	if (par.type == para_Chapter \|\|
	885	par.type == para_Heading \|\|
	886	par.type == para_Subsect \|\|
	887	par.type == para_Appendix \|\|
	888	par.type == para_UnnumberedChapter) {
	889	struct crossparaitem *sitem = stk_top(crossparastk);
	890	if (sitem && (sitem->seen_lcont \|\| sitem->seen_quote)) {
	891	error(err_sectmarkerinblock,
	892	&t.pos,
	893	(sitem->seen_lcont ? "lcont" : "quote"));
	894	}
	895	}
	896
	897	if (needkw > 0) {
	898	rdstring rs = { 0, 0, NULL };
	899	rdstringc rsc = { 0, 0, NULL };
	900	int nkeys = 0;
	901	filepos fp;
	902
	903	/* Get keywords. */
	904	dtor(t), t = get_token(in);
	905	fp = t.pos;
	906	while (t.type == tok_lbrace \|\|
	907	(t.type == tok_white && (needkw & 24))) {
	908	/*
	909	* In paragraph types which can't accept any
	910	* body text (such as \cfg), we are lenient
	911	* about whitespace between keywords. This is
	912	* important for \cfg in particular since it
	913	* can often have many keywords which are long
	914	* pieces of text, so it's useful to permit the
	915	* user to wrap the line between them.
	916	*/
	917	if (t.type == tok_white) {
	918	dtor(t), t = get_token(in); /* eat the space */
	919	continue;
	920	}
	921	/* This is a keyword. */
	922	nkeys++;
	923	/* FIXME: there will be bugs if anyone specifies an
	924	* empty keyword (\foo{}), so trap this case. */
	925	while (dtor(t), t = get_token(in),
	926	t.type == tok_word \|\|
	927	t.type == tok_white \|\|
	928	(t.type == tok_cmd && t.cmd == c__nbsp) \|\|
	929	(t.type == tok_cmd && t.cmd == c__escaped) \|\|
	930	(t.type == tok_cmd && t.cmd == c_u)) {
	931	if (t.type == tok_white \|\|
	932	(t.type == tok_cmd && t.cmd == c__nbsp)) {
	933	rdadd(&rs, ' ');
	934	rdaddc(&rsc, ' ');
	935	} else if (t.type == tok_cmd && t.cmd == c_u) {
	936	rdadd(&rs, t.aux);
	937	rdaddc(&rsc, '\\');
	938	rdaddsc(&rsc, t.origtext);
	939	} else {
	940	rdadds(&rs, t.text);
	941	rdaddsc(&rsc, t.origtext);
	942	}
	943	}
	944	if (t.type != tok_rbrace) {
	945	error(err_kwunclosed, &t.pos);
	946	continue;
	947	}
	948	rdadd(&rs, 0); /* add string terminator */
	949	rdaddc(&rsc, 0); /* add string terminator */
	950	dtor(t), t = get_token(in); /* eat right brace */
	951	}
	952
	953	rdadd(&rs, 0); /* add string terminator */
	954	rdaddc(&rsc, 0); /* add string terminator */
	955
	956	/* See whether we have the right number of keywords. */
	957	if ((needkw & 48) && nkeys > 0)
	958	error(err_kwillegal, &fp);
	959	if ((needkw & 11) && nkeys == 0)
	960	error(err_kwexpected, &fp);
	961	if ((needkw & 5) && nkeys > 1)
	962	error(err_kwtoomany, &fp);
	963
	964	if (is_macro) {
	965	/*
	966	* Macro definition. Get the rest of the line
	967	* as a code-paragraph token, repeatedly until
	968	* there's nothing more left of it. Separate
	969	* with newlines.
	970	*/
	971	rdstring macrotext = { 0, 0, NULL };
	972	while (1) {
	973	dtor(t), t = get_codepar_token(in);
	974	if (macrotext.pos > 0)
	975	rdadd(&macrotext, L'\n');
	976	rdadds(&macrotext, t.text);
	977	dtor(t), t = get_token(in);
	978	if (t.type == tok_eop) break;
	979	}
	980	macrodef(macros, rs.text, macrotext.text, fp);
	981	continue; /* next paragraph */
	982	}
	983
	984	par.keyword = rdtrim(&rs);
	985	par.origkeyword = rdtrimc(&rsc);
	986
	987	/* Move to EOP in case of needkw==8 or 16 (no body) */
	988	if (needkw & 24) {
	989	/* We allow whitespace even when we expect no para body */
	990	while (t.type == tok_white)
	991	dtor(t), t = get_token(in);
	992	if (t.type != tok_eop && t.type != tok_eof &&
	993	(start_cmd == c__invalid \|\|
	994	t.type != tok_cmd \|\| t.cmd != start_cmd)) {
	995	error(err_bodyillegal, &t.pos);
	996	/* Error recovery: eat the rest of the paragraph */
	997	while (t.type != tok_eop && t.type != tok_eof &&
	998	(start_cmd == c__invalid \|\|
	999	t.type != tok_cmd \|\| t.cmd != start_cmd))
	1000	dtor(t), t = get_token(in);
	1001	}
	1002	if (t.type == tok_cmd)
	1003	already = TRUE;/* inhibit get_token at top of loop */
	1004	prev_para_type = par.type;
	1005	addpara(par, ret);
	1006
	1007	if (par.type == para_Config) {
	1008	input_configure(in, &par);
	1009	}
	1010	continue; /* next paragraph */
	1011	}
	1012	}
	1013	}
	1014
	1015	/*
	1016	* Now read the actual paragraph, word by word, adding to
	1017	* the paragraph list.
	1018	*
	1019	* Mid-paragraph commands:
	1020	*
	1021	* \K \k
	1022	* \c \cw \cq
	1023	* \e
	1024	* \i \ii
	1025	* \I
	1026	* \q
	1027	* \u
	1028	* \W
	1029	* \date
	1030	* \\ \{ \}
	1031	*/
	1032	parsestk = stk_new();
	1033	style = word_Normal;
	1034	spcstyle = word_WhiteSpace;
	1035	indexing = FALSE;
	1036	seenwhite = TRUE;
	1037	while (t.type != tok_eop && t.type != tok_eof) {
	1038	iswhite = FALSE;
	1039	already = FALSE;
	1040
	1041	/* Handle implicit paragraph breaks after \IM, \BR etc */
	1042	if (start_cmd != c__invalid &&
	1043	t.type == tok_cmd && t.cmd == start_cmd) {
	1044	already = TRUE; /* inhibit get_token at top of loop */
	1045	break;
	1046	}
	1047
	1048	if (t.type == tok_cmd && t.cmd == c__nop) {
	1049	dtor(t), t = get_token(in);
	1050	continue; /* do nothing! */
	1051	}
	1052
	1053	if (t.type == tok_cmd && t.cmd == c__escaped) {
	1054	t.type = tok_word; /* nice and simple */
	1055	t.aux = 0; /* even if `\-' - nonbreaking! */
	1056	}
	1057	if (t.type == tok_cmd && t.cmd == c__nbsp) {
	1058	t.type = tok_word; /* nice and simple */
	1059	sfree(t.text);
	1060	t.text = ustrdup(L" "); /* text is ` ' not `_' */
	1061	t.aux = 0; /* (nonbreaking) */
	1062	}
	1063	switch (t.type) {
	1064	case tok_white:
	1065	if (whptr == &par.words)
	1066	break; /* strip whitespace at start of para */
	1067	wd.text = NULL;
	1068	wd.type = spcstyle;
	1069	wd.alt = NULL;
	1070	wd.aux = 0;
	1071	wd.fpos = t.pos;
	1072	wd.breaks = FALSE;
	1073
	1074	/*
	1075	* Inhibit use of whitespace if it's (probably the
	1076	* newline) before a repeat \IM / \BR type
	1077	* directive.
	1078	*/
	1079	if (start_cmd != c__invalid) {
	1080	dtor(t), t = get_token(in);
	1081	already = TRUE;
	1082	if (t.type == tok_cmd && t.cmd == start_cmd)
	1083	break;
	1084	}
	1085
	1086	if (indexing)
	1087	rdadd(&indexstr, ' ');
	1088	if (!indexing \|\| index_visible)
	1089	addword(wd, &whptr);
	1090	if (indexing)
	1091	addword(wd, &idximplicit);
	1092	iswhite = TRUE;
	1093	break;
	1094	case tok_word:
	1095	if (indexing)
	1096	rdadds(&indexstr, t.text);
	1097	wd.type = style;
	1098	wd.alt = NULL;
	1099	wd.aux = 0;
	1100	wd.fpos = t.pos;
	1101	wd.breaks = t.aux;
	1102	if (!indexing \|\| index_visible) {
	1103	wd.text = ustrdup(t.text);
	1104	addword(wd, &whptr);
	1105	}
	1106	if (indexing) {
	1107	wd.text = ustrdup(t.text);
	1108	addword(wd, &idximplicit);
	1109	}
	1110	break;
	1111	case tok_lbrace:
	1112	error(err_unexbrace, &t.pos);
	1113	/* Error recovery: push nop */
	1114	sitem = snew(struct stack_item);
	1115	sitem->type = stack_nop;
	1116	sitem->fpos = t.pos;
	1117	stk_push(parsestk, sitem);
	1118	break;
	1119	case tok_rbrace:
	1120	sitem = stk_pop(parsestk);
	1121	if (!sitem) {
	1122	/*
	1123	* This closing brace could have been an
	1124	* indication that the cross-paragraph stack
	1125	* wants popping. Accordingly, we treat it here
	1126	* as an indication that the paragraph is over.
	1127	*/
	1128	already = TRUE;
	1129	goto finished_para;
	1130	} else {
	1131	if (sitem->type & stack_ualt) {
	1132	whptr = sitem->whptr;
	1133	idximplicit = sitem->idximplicit;
	1134	}
	1135	if (sitem->type & stack_style) {
	1136	style = word_Normal;
	1137	spcstyle = word_WhiteSpace;
	1138	}
	1139	if (sitem->type & stack_idx) {
	1140	indexword->text = ustrdup(indexstr.text);
	1141	if (index_downcase) {
	1142	word *w;
	1143
	1144	ustrlow(indexword->text);
	1145	ustrlow(indexstr.text);
	1146
	1147	for (w = idxwordlist; w; w = w->next)
	1148	if (w->text)
	1149	ustrlow(w->text);
	1150	}
	1151	indexing = FALSE;
	1152	rdadd(&indexstr, L'\0');
	1153	index_merge(idx, FALSE, indexstr.text,
	1154	idxwordlist, &sitem->fpos);
	1155	sfree(indexstr.text);
	1156	}
	1157	if (sitem->type & stack_hyper) {
	1158	wd.text = NULL;
	1159	wd.type = word_HyperEnd;
	1160	wd.alt = NULL;
	1161	wd.aux = 0;
	1162	wd.fpos = t.pos;
	1163	wd.breaks = FALSE;
	1164	if (!indexing \|\| index_visible)
	1165	addword(wd, &whptr);
	1166	if (indexing)
	1167	addword(wd, &idximplicit);
	1168	}
	1169	if (sitem->type & stack_quote) {
	1170	wd.text = NULL;
	1171	wd.type = toquotestyle(style);
	1172	wd.alt = NULL;
	1173	wd.aux = quote_Close;
	1174	wd.fpos = t.pos;
	1175	wd.breaks = FALSE;
	1176	if (!indexing \|\| index_visible)
	1177	addword(wd, &whptr);
	1178	if (indexing) {
	1179	rdadd(&indexstr, L'"');
	1180	addword(wd, &idximplicit);
	1181	}
	1182	}
	1183	}
	1184	sfree(sitem);
	1185	break;
	1186	case tok_cmd:
	1187	switch (t.cmd) {
	1188	case c__comment:
	1189	/*
	1190	* In-paragraph comment: \#{ balanced braces }
	1191	*
	1192	* Anything goes here; even tok_eop. We should
	1193	* eat whitespace after the close brace _if_
	1194	* there was whitespace before the \#.
	1195	*/
	1196	dtor(t), t = get_token(in);
	1197	if (t.type != tok_lbrace) {
	1198	error(err_explbr, &t.pos);
	1199	} else {
	1200	int braces = 1;
	1201	while (braces > 0) {
	1202	dtor(t), t = get_token(in);
	1203	if (t.type == tok_lbrace)
	1204	braces++;
	1205	else if (t.type == tok_rbrace)
	1206	braces--;
	1207	else if (t.type == tok_eof) {
	1208	error(err_commenteof, &t.pos);
	1209	break;
	1210	}
	1211	}
	1212	}
	1213	if (seenwhite) {
	1214	already = TRUE;
	1215	dtor(t), t = get_token(in);
	1216	if (t.type == tok_white) {
	1217	iswhite = TRUE;
	1218	already = FALSE;
	1219	}
	1220	}
	1221	break;
	1222	case c_q:
	1223	case c_cq:
	1224	type = t.cmd;
	1225	dtor(t), t = get_token(in);
	1226	if (t.type != tok_lbrace) {
	1227	error(err_explbr, &t.pos);
	1228	} else {
	1229	/*
	1230	* Enforce that \q may not be used anywhere
	1231	* within \c. (It shouldn't be necessary
	1232	* since the whole point of \c should be
	1233	* that the user wants to exercise exact
	1234	* control over the glyphs used, and
	1235	* forbidding it has the useful effect of
	1236	* relieving some backends of having to
	1237	* make difficult decisions.)
	1238	*/
	1239	int stype;
	1240
	1241	if (style != word_Code && style != word_WeakCode) {
	1242	wd.text = NULL;
	1243	wd.type = toquotestyle(style);
	1244	wd.alt = NULL;
	1245	wd.aux = quote_Open;
	1246	wd.fpos = t.pos;
	1247	wd.breaks = FALSE;
	1248	if (!indexing \|\| index_visible)
	1249	addword(wd, &whptr);
	1250	if (indexing) {
	1251	rdadd(&indexstr, L'"');
	1252	addword(wd, &idximplicit);
	1253	}
	1254	stype = stack_quote;
	1255	} else {
	1256	error(err_codequote, &t.pos);
	1257	stype = stack_nop;
	1258	}
	1259	sitem = snew(struct stack_item);
	1260	sitem->fpos = t.pos;
	1261	sitem->type = stype;
	1262	if (type == c_cq) {
	1263	if (style != word_Normal) {
	1264	error(err_nestedstyles, &t.pos);
	1265	} else {
	1266	style = word_WeakCode;
	1267	spcstyle = tospacestyle(style);
	1268	sitem->type \|= stack_style;
	1269	}
	1270	}
	1271	stk_push(parsestk, sitem);
	1272	}
	1273	break;
	1274	case c_K:
	1275	case c_k:
	1276	case c_W:
	1277	case c_date:
	1278	/*
	1279	* Keyword, hyperlink, or \date. We expect a
	1280	* left brace, some text, and then a right
	1281	* brace. No nesting; no arguments.
	1282	*/
	1283	wd.fpos = t.pos;
	1284	wd.breaks = FALSE;
	1285	if (t.cmd == c_K)
	1286	wd.type = word_UpperXref;
	1287	else if (t.cmd == c_k)
	1288	wd.type = word_LowerXref;
	1289	else if (t.cmd == c_W)
	1290	wd.type = word_HyperLink;
	1291	else
	1292	wd.type = word_Normal;
	1293	dtor(t), t = get_token(in);
	1294	if (t.type != tok_lbrace) {
	1295	if (wd.type == word_Normal) {
	1296	time_t thetime = time(NULL);
	1297	struct tm *broken = localtime(&thetime);
	1298	already = TRUE;
	1299	wdtext = ustrftime(NULL, broken);
	1300	wd.type = style;
	1301	} else {
	1302	error(err_explbr, &t.pos);
	1303	wdtext = NULL;
	1304	}
	1305	} else {
	1306	rdstring rs = { 0, 0, NULL };
	1307	while (dtor(t), t = get_token(in),
	1308	t.type == tok_word \|\| t.type == tok_white) {
	1309	if (t.type == tok_white)
	1310	rdadd(&rs, ' ');
	1311	else
	1312	rdadds(&rs, t.text);
	1313	}
	1314	if (wd.type == word_Normal) {
	1315	time_t thetime = time(NULL);
	1316	struct tm *broken = localtime(&thetime);
	1317	wdtext = ustrftime(rs.text, broken);
	1318	wd.type = style;
	1319	} else {
	1320	wdtext = ustrdup(rs.text);
	1321	}
	1322	sfree(rs.text);
	1323	if (t.type != tok_rbrace) {
	1324	error(err_kwexprbr, &t.pos);
	1325	}
	1326	}
	1327	wd.alt = NULL;
	1328	wd.aux = 0;
	1329	if (!indexing \|\| index_visible) {
	1330	wd.text = ustrdup(wdtext);
	1331	addword(wd, &whptr);
	1332	}
	1333	if (indexing) {
	1334	wd.text = ustrdup(wdtext);
	1335	addword(wd, &idximplicit);
	1336	}
	1337	sfree(wdtext);
	1338	if (wd.type == word_HyperLink) {
	1339	/*
	1340	* Hyperlinks are different: they then
	1341	* expect another left brace, to begin
	1342	* delimiting the text marked by the link.
	1343	*/
	1344	dtor(t), t = get_token(in);
	1345	sitem = snew(struct stack_item);
	1346	sitem->fpos = wd.fpos;
	1347	sitem->type = stack_hyper;
	1348	/*
	1349	* Special cases: \W{}\i, \W{}\ii
	1350	*/
	1351	if (t.type == tok_cmd &&
	1352	(t.cmd == c_i \|\| t.cmd == c_ii)) {
	1353	if (indexing) {
	1354	error(err_nestedindex, &t.pos);
	1355	} else {
	1356	/* Add an index-reference word with no
	1357	* text as yet */
	1358	wd.type = word_IndexRef;
	1359	wd.text = NULL;
	1360	wd.alt = NULL;
	1361	wd.aux = 0;
	1362	wd.breaks = FALSE;
	1363	indexword = addword(wd, &whptr);
	1364	/* Set up a rdstring to read the
	1365	* index text */
	1366	indexstr = nullrs;
	1367	/* Flags so that we do the Right
	1368	* Things with text */
	1369	index_visible = (type != c_I);
	1370	index_downcase = (type == c_ii);
	1371	indexing = TRUE;
	1372	idxwordlist = NULL;
	1373	idximplicit = &idxwordlist;
	1374
	1375	sitem->type \|= stack_idx;
	1376	}
	1377	dtor(t), t = get_token(in);
	1378	}
	1379	/*
	1380	* Special cases: \W{}\c, \W{}\e, \W{}\cw
	1381	*/
	1382	if (t.type == tok_cmd &&
	1383	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	1384	if (style != word_Normal)
	1385	error(err_nestedstyles, &t.pos);
	1386	else {
	1387	style = (t.cmd == c_c ? word_Code :
	1388	t.cmd == c_cw ? word_WeakCode :
	1389	word_Emph);
	1390	spcstyle = tospacestyle(style);
	1391	sitem->type \|= stack_style;
	1392	}
	1393	dtor(t), t = get_token(in);
	1394	}
	1395	if (t.type != tok_lbrace) {
	1396	error(err_explbr, &t.pos);
	1397	sfree(sitem);
	1398	} else {
	1399	stk_push(parsestk, sitem);
	1400	}
	1401	}
	1402	break;
	1403	case c_c:
	1404	case c_cw:
	1405	case c_e:
	1406	type = t.cmd;
	1407	if (style != word_Normal) {
	1408	error(err_nestedstyles, &t.pos);
	1409	/* Error recovery: eat lbrace, push nop. */
	1410	dtor(t), t = get_token(in);
	1411	sitem = snew(struct stack_item);
	1412	sitem->fpos = t.pos;
	1413	sitem->type = stack_nop;
	1414	stk_push(parsestk, sitem);
	1415	}
	1416	dtor(t), t = get_token(in);
	1417	if (t.type != tok_lbrace) {
	1418	error(err_explbr, &t.pos);
	1419	} else {
	1420	style = (type == c_c ? word_Code :
	1421	type == c_cw ? word_WeakCode :
	1422	word_Emph);
	1423	spcstyle = tospacestyle(style);
	1424	sitem = snew(struct stack_item);
	1425	sitem->fpos = t.pos;
	1426	sitem->type = stack_style;
	1427	stk_push(parsestk, sitem);
	1428	}
	1429	break;
	1430	case c_i:
	1431	case c_ii:
	1432	case c_I:
	1433	type = t.cmd;
	1434	if (indexing) {
	1435	error(err_nestedindex, &t.pos);
	1436	/* Error recovery: eat lbrace, push nop. */
	1437	dtor(t), t = get_token(in);
	1438	sitem = snew(struct stack_item);
	1439	sitem->fpos = t.pos;
	1440	sitem->type = stack_nop;
	1441	stk_push(parsestk, sitem);
	1442	}
	1443	sitem = snew(struct stack_item);
	1444	sitem->fpos = t.pos;
	1445	sitem->type = stack_idx;
	1446	dtor(t), t = get_token(in);
	1447	/*
	1448	* Special cases: \i\c, \i\e, \i\cw
	1449	*/
	1450	wd.fpos = t.pos;
	1451	if (t.type == tok_cmd &&
	1452	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	1453	if (style != word_Normal)
	1454	error(err_nestedstyles, &t.pos);
	1455	else {
	1456	style = (t.cmd == c_c ? word_Code :
	1457	t.cmd == c_cw ? word_WeakCode :
	1458	word_Emph);
	1459	spcstyle = tospacestyle(style);
	1460	sitem->type \|= stack_style;
	1461	}
	1462	dtor(t), t = get_token(in);
	1463	}
	1464	if (t.type != tok_lbrace) {
	1465	sfree(sitem);
	1466	error(err_explbr, &t.pos);
	1467	} else {
	1468	/* Add an index-reference word with no text as yet */
	1469	wd.type = word_IndexRef;
	1470	wd.text = NULL;
	1471	wd.alt = NULL;
	1472	wd.aux = 0;
	1473	wd.breaks = FALSE;
	1474	indexword = addword(wd, &whptr);
	1475	/* Set up a rdstring to read the index text */
	1476	indexstr = nullrs;
	1477	/* Flags so that we do the Right Things with text */
	1478	index_visible = (type != c_I);
	1479	index_downcase = (type == c_ii);
	1480	indexing = TRUE;
	1481	idxwordlist = NULL;
	1482	idximplicit = &idxwordlist;
	1483	/* Stack item to close the indexing on exit */
	1484	stk_push(parsestk, sitem);
	1485	}
	1486	break;
	1487	case c_u:
	1488	uchr = t.aux;
	1489	utext[0] = uchr; utext[1] = 0;
	1490	wd.type = style;
	1491	wd.breaks = FALSE;
	1492	wd.alt = NULL;
	1493	wd.aux = 0;
	1494	wd.fpos = t.pos;
	1495	if (!indexing \|\| index_visible) {
	1496	wd.text = ustrdup(utext);
	1497	uword = addword(wd, &whptr);
	1498	} else
	1499	uword = NULL;
	1500	if (indexing) {
	1501	wd.text = ustrdup(utext);
	1502	iword = addword(wd, &idximplicit);
	1503	} else
	1504	iword = NULL;
	1505	dtor(t), t = get_token(in);
	1506	if (t.type == tok_lbrace) {
	1507	/*
	1508	* \u with a left brace. Until the brace
	1509	* closes, all further words go on a
	1510	* sidetrack from the main thread of the
	1511	* paragraph.
	1512	*/
	1513	sitem = snew(struct stack_item);
	1514	sitem->fpos = t.pos;
	1515	sitem->type = stack_ualt;
	1516	sitem->whptr = whptr;
	1517	sitem->idximplicit = idximplicit;
	1518	stk_push(parsestk, sitem);
	1519	whptr = uword ? &uword->alt : NULL;
	1520	idximplicit = iword ? &iword->alt : NULL;
	1521	} else {
	1522	if (indexing)
	1523	rdadd(&indexstr, uchr);
	1524	already = TRUE;
	1525	}
	1526	break;
	1527	default:
	1528	if (!macrolookup(macros, in, t.text, &t.pos))
	1529	error(err_badmidcmd, t.text, &t.pos);
	1530	break;
	1531	}
	1532	}
	1533	if (!already)
	1534	dtor(t), t = get_token(in);
	1535	seenwhite = iswhite;
	1536	}
	1537	finished_para:
	1538	/* Check the stack is empty */
	1539	if (stk_top(parsestk)) {
	1540	while ((sitem = stk_pop(parsestk)))
	1541	sfree(sitem);
	1542	error(err_missingrbrace, &t.pos);
	1543	}
	1544	stk_free(parsestk);
	1545	prev_para_type = par.type;
	1546	/*
	1547	* Before we add the paragraph to the output list, we
	1548	* should check that there was any text in it at all; there
	1549	* might not be if (for example) the paragraph contained
	1550	* nothing but an unrecognised command sequence, and if we
	1551	* put an empty paragraph on the list it may confuse the
	1552	* back ends later on.
	1553	*/
	1554	if (par.words) {
	1555	addpara(par, ret);
	1556	}
	1557	if (t.type == tok_eof)
	1558	already = TRUE;
	1559	}
	1560
	1561	if (stk_top(crossparastk)) {
	1562	void *p;
	1563
	1564	error(err_missingrbrace2, &t.pos);
	1565	while ((p = stk_pop(crossparastk)))
	1566	sfree(p);
	1567	}
	1568
	1569	/*
	1570	* We break to here rather than returning, because otherwise
	1571	* this cleanup doesn't happen.
	1572	*/
	1573	dtor(t);
	1574
	1575	stk_free(crossparastk);
	1576	}
	1577
	1578	struct {
	1579	char const *magic;
	1580	size_t nmagic;
	1581	void (reader)(input );
	1582	} magics[] = {
	1583	{ "%!FontType1-", 12, &read_pfa_file },
	1584	{ "%!PS-AdobeFont-", 15, &read_pfa_file },
	1585	{ "StartFontMetrics", 16, &read_afm_file },
	1586	};
	1587
	1588	paragraph read_input(input in, indexdata *idx) {
	1589	paragraph *head = NULL;
	1590	paragraph **hptr = &head;
	1591	tree234 *macros;
	1592	char mag[16];
	1593	size_t len, i;
	1594	void (reader)(input );
	1595
	1596	macros = newtree234(macrocmp);
	1597
	1598	while (in->currindex < in->nfiles) {
	1599	in->currfp = fopen(in->filenames[in->currindex], "r");
	1600	if (in->currfp) {
	1601	setpos(in, in->filenames[in->currindex]);
	1602	in->charset = in->defcharset;
	1603	in->csstate = charset_init_state;
	1604	in->wcpos = in->nwc = 0;
	1605	in->pushback_chars = NULL;
	1606	reader = NULL;
	1607	len = fread(mag, 1, sizeof(mag), in->currfp);
	1608	for (i = 0; i < lenof(magics); i++) {
	1609	if (len >= magics[i].nmagic &&
	1610	memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) {
	1611	reader = magics[i].reader;
	1612	break;
	1613	}
	1614	}
	1615	rewind(in->currfp);
	1616	if (reader == NULL)
	1617	read_file(&hptr, in, idx, macros);
	1618	else
	1619	(*reader)(in);
	1620	}
	1621	in->currindex++;
	1622	}
	1623
	1624	macrocleanup(macros);
	1625
	1626	return head;
	1627	}