mdw@git.distorted.org.uk Git - sgt/halibut/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* input.c: read the source form
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <time.h>
	8	#include "halibut.h"
	9
	10	#define TAB_STOP 8 /* for column number tracking */
	11
	12	static void setpos(input in, char fname) {
	13	in->pos.filename = fname;
	14	in->pos.line = 1;
	15	in->pos.col = (in->reportcols ? 1 : -1);
	16	}
	17
	18	static void unget(input in, int c, filepos pos) {
	19	if (in->npushback >= in->pushbacksize) {
	20	in->pushbacksize = in->npushback + 16;
	21	in->pushback = resize(in->pushback, in->pushbacksize);
	22	}
	23	in->pushback[in->npushback].chr = c;
	24	in->pushback[in->npushback].pos = pos; / structure copy */
	25	in->npushback++;
	26	}
	27
	28	/* ---------------------------------------------------------------------- */
	29	/*
	30	* Macro subsystem
	31	*/
	32	typedef struct macro_Tag macro;
	33	struct macro_Tag {
	34	wchar_t name, text;
	35	};
	36	struct macrostack_Tag {
	37	macrostack *next;
	38	wchar_t *text;
	39	int ptr, npushback;
	40	filepos pos;
	41	};
	42	static int macrocmp(void av, void bv) {
	43	macro a = (macro )av, b = (macro )bv;
	44	return ustrcmp(a->name, b->name);
	45	}
	46	static void macrodef(tree234 macros, wchar_t name, wchar_t *text,
	47	filepos fpos) {
	48	macro *m = mknew(macro);
	49	m->name = name;
	50	m->text = text;
	51	if (add234(macros, m) != m) {
	52	error(err_macroexists, &fpos, name);
	53	sfree(name);
	54	sfree(text);
	55	}
	56	}
	57	static int macrolookup(tree234 macros, input in, wchar_t *name,
	58	filepos *pos) {
	59	macro m, *gotit;
	60	m.name = name;
	61	gotit = find234(macros, &m, NULL);
	62	if (gotit) {
	63	macrostack *expansion = mknew(macrostack);
	64	expansion->next = in->stack;
	65	expansion->text = gotit->text;
	66	expansion->pos = pos; / structure copy */
	67	expansion->ptr = 0;
	68	expansion->npushback = in->npushback;
	69	in->stack = expansion;
	70	return TRUE;
	71	} else
	72	return FALSE;
	73	}
	74	static void macrocleanup(tree234 *macros) {
	75	int ti;
	76	macro *m;
	77	for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
	78	sfree(m->name);
	79	sfree(m->text);
	80	sfree(m);
	81	}
	82	freetree234(macros);
	83	}
	84
	85	/*
	86	* Can return EOF
	87	*/
	88	static int get(input in, filepos pos) {
	89	int pushbackpt = in->stack ? in->stack->npushback : 0;
	90	if (in->npushback > pushbackpt) {
	91	--in->npushback;
	92	if (pos)
	93	pos = in->pushback[in->npushback].pos; / structure copy */
	94	return in->pushback[in->npushback].chr;
	95	}
	96	else if (in->stack) {
	97	wchar_t c = in->stack->text[in->stack->ptr];
	98	if (in->stack->text[++in->stack->ptr] == L'\0') {
	99	macrostack *tmp = in->stack;
	100	in->stack = tmp->next;
	101	sfree(tmp);
	102	}
	103	return c;
	104	}
	105	else if (in->currfp) {
	106	int c = getc(in->currfp);
	107
	108	if (c == EOF) {
	109	fclose(in->currfp);
	110	in->currfp = NULL;
	111	}
	112	/* Track line numbers, for error reporting */
	113	if (pos)
	114	*pos = in->pos;
	115	if (in->reportcols) {
	116	switch (c) {
	117	case '\t':
	118	in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
	119	break;
	120	case '\n':
	121	in->pos.col = 1;
	122	in->pos.line++;
	123	break;
	124	default:
	125	in->pos.col++;
	126	break;
	127	}
	128	} else {
	129	in->pos.col = -1;
	130	if (c == '\n')
	131	in->pos.line++;
	132	}
	133	/* FIXME: do input charmap translation. We should be returning
	134	* Unicode here. */
	135	return c;
	136	} else
	137	return EOF;
	138	}
	139
	140	/*
	141	* Lexical analysis of source files.
	142	*/
	143	typedef struct token_Tag token;
	144	struct token_Tag {
	145	int type;
	146	int cmd, aux;
	147	wchar_t *text;
	148	filepos pos;
	149	};
	150	enum {
	151	tok_eof, /* end of file */
	152	tok_eop, /* end of paragraph */
	153	tok_white, /* whitespace */
	154	tok_word, /* a word or word fragment */
	155	tok_cmd, /* \command */
	156	tok_lbrace, /* { */
	157	tok_rbrace /* } */
	158	};
	159
	160	/* Halibut command keywords. */
	161	enum {
	162	c__invalid, /* invalid command */
	163	c__comment, /* comment command (\#) */
	164	c__escaped, /* escaped character */
	165	c__nbsp, /* nonbreaking space */
	166	c_A, /* appendix heading */
	167	c_B, /* bibliography entry */
	168	c_BR, /* bibliography rewrite */
	169	c_C, /* chapter heading */
	170	c_H, /* heading */
	171	c_I, /* invisible index mark */
	172	c_IM, /* index merge/rewrite */
	173	c_K, /* capitalised cross-reference */
	174	c_S, /* aux field is 0, 1, 2, ... */
	175	c_U, /* unnumbered-chapter heading */
	176	c_W, /* Web hyperlink */
	177	c_b, /* bulletted list */
	178	c_c, /* code */
	179	c_cfg, /* configuration directive */
	180	c_copyright, /* copyright statement */
	181	c_cw, /* weak code */
	182	c_date, /* document processing date */
	183	c_define, /* macro definition */
	184	c_e, /* emphasis */
	185	c_i, /* visible index mark */
	186	c_ii, /* uncapitalised visible index mark */
	187	c_k, /* uncapitalised cross-reference */
	188	c_n, /* numbered list */
	189	c_nocite, /* bibliography trickery */
	190	c_preamble, /* document preamble text */
	191	c_q, /* quote marks */
	192	c_rule, /* horizontal rule */
	193	c_title, /* document title */
	194	c_u, /* aux field is char code */
	195	c_versionid /* document RCS id */
	196	};
	197
	198	/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
	199	#define iswhite(c) ( (c)==32 \|\| (c)==9 \|\| (c)==13 \|\| (c)==10 )
	200	#define isnl(c) ( (c)==10 )
	201	#define isdec(c) ( ((c)>='0'&&(c)<='9') )
	202	#define fromdec(c) ( (c)-'0' )
	203	#define ishex(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='F') \|\| ((c)>='a'&&(c)<='f'))
	204	#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
	205	#define iscmd(c) ( ((c)>='0'&&(c)<='9') \|\| ((c)>='A'&&(c)<='Z') \|\| ((c)>='a'&&(c)<='z'))
	206
	207	/*
	208	* Keyword comparison function. Like strcmp, but between a wchar_t *
	209	* and a char *.
	210	*/
	211	static int kwcmp(wchar_t const p, char const q) {
	212	int i;
	213	do {
	214	i = p - q;
	215	} while (p++ && q++ && !i);
	216	return i;
	217	}
	218
	219	/*
	220	* Match a keyword.
	221	*/
	222	static void match_kw(token *tok) {
	223	/*
	224	* FIXME. The ids are explicit in here so as to allow long-name
	225	* equivalents to the various very short keywords.
	226	*/
	227	static const struct { char const *name; int id; } keywords[] = {
	228	{"#", c__comment}, /* comment command (\#) */
	229	{"-", c__escaped}, /* nonbreaking hyphen */
	230	{"A", c_A}, /* appendix heading */
	231	{"B", c_B}, /* bibliography entry */
	232	{"BR", c_BR}, /* bibliography rewrite */
	233	{"C", c_C}, /* chapter heading */
	234	{"H", c_H}, /* heading */
	235	{"I", c_I}, /* invisible index mark */
	236	{"IM", c_IM}, /* index merge/rewrite */
	237	{"K", c_K}, /* capitalised cross-reference */
	238	{"U", c_U}, /* unnumbered-chapter heading */
	239	{"W", c_W}, /* Web hyperlink */
	240	{"\\", c__escaped}, /* escaped backslash (\\) */
	241	{"_", c__nbsp}, /* nonbreaking space (\_) */
	242	{"b", c_b}, /* bulletted list */
	243	{"c", c_c}, /* code */
	244	{"cfg", c_cfg}, /* configuration directive */
	245	{"copyright", c_copyright}, /* copyright statement */
	246	{"cw", c_cw}, /* weak code */
	247	{"date", c_date}, /* document processing date */
	248	{"define", c_define}, /* macro definition */
	249	{"e", c_e}, /* emphasis */
	250	{"i", c_i}, /* visible index mark */
	251	{"ii", c_ii}, /* uncapitalised visible index mark */
	252	{"k", c_k}, /* uncapitalised cross-reference */
	253	{"n", c_n}, /* numbered list */
	254	{"nocite", c_nocite}, /* bibliography trickery */
	255	{"preamble", c_preamble}, /* document preamble text */
	256	{"q", c_q}, /* quote marks */
	257	{"rule", c_rule}, /* horizontal rule */
	258	{"title", c_title}, /* document title */
	259	{"versionid", c_versionid}, /* document RCS id */
	260	{"{", c__escaped}, /* escaped lbrace (\{) */
	261	{"}", c__escaped}, /* escaped rbrace (\}) */
	262	};
	263	int i, j, k, c;
	264
	265	/*
	266	* Special cases: \S{0,1,2,...} and \uABCD. If the syntax
	267	* doesn't match correctly, we just fall through to the
	268	* binary-search phase.
	269	*/
	270	if (tok->text[0] == 'S') {
	271	/* We expect numeric characters thereafter. */
	272	wchar_t *p = tok->text+1;
	273	int n;
	274	if (!*p)
	275	n = 1;
	276	else {
	277	n = 0;
	278	while (p && isdec(p)) {
	279	n = 10 * n + fromdec(*p);
	280	p++;
	281	}
	282	}
	283	if (!*p) {
	284	tok->cmd = c_S;
	285	tok->aux = n;
	286	return;
	287	}
	288	} else if (tok->text[0] == 'u') {
	289	/* We expect hex characters thereafter. */
	290	wchar_t *p = tok->text+1;
	291	int n = 0;
	292	while (p && ishex(p)) {
	293	n = 16 * n + fromhex(*p);
	294	p++;
	295	}
	296	if (!*p) {
	297	tok->cmd = c_u;
	298	tok->aux = n;
	299	return;
	300	}
	301	}
	302
	303	i = -1;
	304	j = sizeof(keywords)/sizeof(*keywords);
	305	while (j-i > 1) {
	306	k = (i+j)/2;
	307	c = kwcmp(tok->text, keywords[k].name);
	308	if (c < 0)
	309	j = k;
	310	else if (c > 0)
	311	i = k;
	312	else /* c == 0 */ {
	313	tok->cmd = keywords[k].id;
	314	return;
	315	}
	316	}
	317
	318	tok->cmd = c__invalid;
	319	}
	320
	321
	322	/*
	323	* Read a token from the input file, in the normal way (`normal' in
	324	* the sense that code paragraphs work a different way).
	325	*/
	326	token get_token(input *in) {
	327	int c;
	328	int nls;
	329	token ret;
	330	rdstring rs = { 0, 0, NULL };
	331	filepos cpos;
	332
	333	ret.text = NULL; /* default */
	334	c = get(in, &cpos);
	335	ret.pos = cpos;
	336	if (iswhite(c)) { /* tok_white or tok_eop */
	337	nls = 0;
	338	do {
	339	if (isnl(c))
	340	nls++;
	341	} while ((c = get(in, &cpos)) != EOF && iswhite(c));
	342	if (c == EOF) {
	343	ret.type = tok_eof;
	344	return ret;
	345	}
	346	unget(in, c, &cpos);
	347	ret.type = (nls > 1 ? tok_eop : tok_white);
	348	return ret;
	349	} else if (c == EOF) { /* tok_eof */
	350	ret.type = tok_eof;
	351	return ret;
	352	} else if (c == '\\') { /* tok_cmd */
	353	c = get(in, &cpos);
	354	if (c == '-' \|\| c == '\\' \|\| c == '_' \|\|
	355	c == '#' \|\| c == '{' \|\| c == '}') {
	356	/* single-char command */
	357	rdadd(&rs, c);
	358	} else if (c == 'u') {
	359	int len = 0;
	360	do {
	361	rdadd(&rs, c);
	362	len++;
	363	c = get(in, &cpos);
	364	} while (ishex(c) && len < 5);
	365	unget(in, c, &cpos);
	366	} else if (iscmd(c)) {
	367	do {
	368	rdadd(&rs, c);
	369	c = get(in, &cpos);
	370	} while (iscmd(c));
	371	unget(in, c, &cpos);
	372	}
	373	/*
	374	* Now match the command against the list of available
	375	* ones.
	376	*/
	377	ret.type = tok_cmd;
	378	ret.text = ustrdup(rs.text);
	379	match_kw(&ret);
	380	sfree(rs.text);
	381	return ret;
	382	} else if (c == '{') { /* tok_lbrace */
	383	ret.type = tok_lbrace;
	384	return ret;
	385	} else if (c == '}') { /* tok_rbrace */
	386	ret.type = tok_rbrace;
	387	return ret;
	388	} else { /* tok_word */
	389	/*
	390	* Read a word: the longest possible contiguous sequence of
	391	* things other than whitespace, backslash, braces and
	392	* hyphen. A hyphen terminates the word but is returned as
	393	* part of it; everything else is pushed back for the next
	394	* token. The `aux' field contains TRUE if the word ends in
	395	* a hyphen.
	396	*/
	397	ret.aux = FALSE; /* assumed for now */
	398	while (1) {
	399	if (iswhite(c) \|\| c=='{' \|\| c=='}' \|\| c=='\\' \|\| c==EOF) {
	400	/* Put back the character that caused termination */
	401	unget(in, c, &cpos);
	402	break;
	403	} else {
	404	rdadd(&rs, c);
	405	if (c == '-') {
	406	ret.aux = TRUE;
	407	break; /* hyphen terminates word */
	408	}
	409	}
	410	c = get(in, &cpos);
	411	}
	412	ret.type = tok_word;
	413	ret.text = ustrdup(rs.text);
	414	sfree(rs.text);
	415	return ret;
	416	}
	417	}
	418
	419	/*
	420	* Determine whether the next input character is an open brace (for
	421	* telling code paragraphs from paragraphs which merely start with
	422	* code).
	423	*/
	424	int isbrace(input *in) {
	425	int c;
	426	filepos cpos;
	427
	428	c = get(in, &cpos);
	429	unget(in, c, &cpos);
	430	return (c == '{');
	431	}
	432
	433	/*
	434	* Read the rest of a line that starts `\c'. Including nothing at
	435	* all (tok_word with empty text).
	436	*/
	437	token get_codepar_token(input *in) {
	438	int c;
	439	token ret;
	440	rdstring rs = { 0, 0, NULL };
	441	filepos cpos;
	442
	443	ret.type = tok_word;
	444	c = get(in, &cpos); /* expect (and discard) one space */
	445	ret.pos = cpos;
	446	if (c == ' ') {
	447	c = get(in, &cpos);
	448	ret.pos = cpos;
	449	}
	450	while (!isnl(c) && c != EOF) {
	451	int c2 = c;
	452	c = get(in, &cpos);
	453	/* Discard \r just before \n. */
	454	if (c2 != 13 \|\| !isnl(c))
	455	rdadd(&rs, c2);
	456	}
	457	unget(in, c, &cpos);
	458	ret.text = ustrdup(rs.text);
	459	sfree(rs.text);
	460	return ret;
	461	}
	462
	463	/*
	464	* Adds a new word to a linked list
	465	*/
	466	static word addword(word newword, word **hptrptr) {
	467	word *mnewword;
	468	if (!hptrptr)
	469	return NULL;
	470	mnewword = mknew(word);
	471	mnewword = newword; / structure copy */
	472	mnewword->next = NULL;
	473	**hptrptr = mnewword;
	474	*hptrptr = &mnewword->next;
	475	return mnewword;
	476	}
	477
	478	/*
	479	* Adds a new paragraph to a linked list
	480	*/
	481	static paragraph addpara(paragraph newpara, paragraph **hptrptr) {
	482	paragraph *mnewpara = mknew(paragraph);
	483	mnewpara = newpara; / structure copy */
	484	mnewpara->next = NULL;
	485	**hptrptr = mnewpara;
	486	*hptrptr = &mnewpara->next;
	487	return mnewpara;
	488	}
	489
	490	/*
	491	* Destructor before token is reassigned; should catch most memory
	492	* leaks
	493	*/
	494	#define dtor(t) ( sfree(t.text) )
	495
	496	/*
	497	* Reads a single file (ie until get() returns EOF)
	498	*/
	499	static void read_file(paragraph **ret, input in, indexdata *idx) {
	500	token t;
	501	paragraph par;
	502	word wd, whptr, idximplicit;
	503	tree234 *macros;
	504	wchar_t utext[2], *wdtext;
	505	int style, spcstyle;
	506	int already;
	507	int iswhite, seenwhite;
	508	int type;
	509	struct stack_item {
	510	enum {
	511	stack_nop = 0, /* do nothing (for error recovery) */
	512	stack_ualt = 1, /* \u alternative */
	513	stack_style = 2, /* \e, \c, \cw */
	514	stack_idx = 4, /* \I, \i, \ii */
	515	stack_hyper = 8, /* \W */
	516	stack_quote = 16, /* \q */
	517	} type;
	518	word *whptr; / to restore from \u alternatives */
	519	word *idximplicit; / to restore from \u alternatives */
	520	} *sitem;
	521	stack parsestk;
	522	word indexword, uword, *iword;
	523	word *idxwordlist;
	524	rdstring indexstr;
	525	int index_downcase, index_visible, indexing;
	526	const rdstring nullrs = { 0, 0, NULL };
	527	wchar_t uchr;
	528
	529	t.text = NULL;
	530	macros = newtree234(macrocmp);
	531
	532	/*
	533	* Loop on each paragraph.
	534	*/
	535	while (1) {
	536	par.words = NULL;
	537	par.keyword = NULL;
	538	whptr = &par.words;
	539
	540	/*
	541	* Get a token.
	542	*/
	543	dtor(t), t = get_token(in);
	544	if (t.type == tok_eof)
	545	return;
	546
	547	/*
	548	* Parse code paragraphs separately.
	549	*/
	550	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
	551	par.type = para_Code;
	552	par.fpos = t.pos;
	553	while (1) {
	554	dtor(t), t = get_codepar_token(in);
	555	wd.type = word_WeakCode;
	556	wd.breaks = FALSE; /* shouldn't need this... */
	557	wd.text = ustrdup(t.text);
	558	wd.alt = NULL;
	559	wd.fpos = t.pos;
	560	addword(wd, &whptr);
	561	dtor(t), t = get_token(in);
	562	if (t.type == tok_white) {
	563	/*
	564	* The newline after a code-paragraph line
	565	*/
	566	dtor(t), t = get_token(in);
	567	}
	568	if (t.type == tok_eop \|\| t.type == tok_eof)
	569	break;
	570	else if (t.type != tok_cmd \|\| t.cmd != c_c) {
	571	error(err_brokencodepara, &t.pos);
	572	addpara(par, ret);
	573	while (t.type != tok_eop) /* error recovery: */
	574	dtor(t), t = get_token(in); /* eat rest of paragraph */
	575	goto codeparabroken; /* ick, but such is life */
	576	}
	577	}
	578	addpara(par, ret);
	579	codeparabroken:
	580	continue;
	581	}
	582
	583	/*
	584	* This token begins a paragraph. See if it's one of the
	585	* special commands that define a paragraph type.
	586	*
	587	* (note that \# is special in a way, and \nocite takes no
	588	* text)
	589	*/
	590	par.type = para_Normal;
	591	if (t.type == tok_cmd) {
	592	int needkw;
	593	int is_macro = FALSE;
	594
	595	par.fpos = t.pos;
	596	switch (t.cmd) {
	597	default:
	598	needkw = -1;
	599	break;
	600	case c__invalid:
	601	error(err_badparatype, t.text, &t.pos);
	602	needkw = 4;
	603	break;
	604	case c__comment:
	605	if (isbrace(in))
	606	break; /* `\#{': isn't a comment para */
	607	do {
	608	dtor(t), t = get_token(in);
	609	} while (t.type != tok_eop && t.type != tok_eof);
	610	continue; /* next paragraph */
	611	/*
	612	* `needkw' values:
	613	*
	614	* 1 -- exactly one keyword
	615	* 2 -- at least one keyword
	616	* 4 -- any number of keywords including zero
	617	* 8 -- at least one keyword and then nothing else
	618	* 16 -- nothing at all! no keywords, no body
	619	* 32 -- no keywords at all
	620	*/
	621	case c_A: needkw = 2; par.type = para_Appendix; break;
	622	case c_B: needkw = 2; par.type = para_Biblio; break;
	623	case c_BR: needkw = 1; par.type = para_BR; break;
	624	case c_C: needkw = 2; par.type = para_Chapter; break;
	625	case c_H: needkw = 2; par.type = para_Heading;
	626	par.aux = 0;
	627	break;
	628	case c_IM: needkw = 2; par.type = para_IM; break;
	629	case c_S: needkw = 2; par.type = para_Subsect;
	630	par.aux = t.aux; break;
	631	case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
	632	/* For \b and \n the keyword is optional */
	633	case c_b: needkw = 4; par.type = para_Bullet; break;
	634	case c_n: needkw = 4; par.type = para_NumberedList; break;
	635	case c_cfg: needkw = 8; par.type = para_Config; break;
	636	case c_copyright: needkw = 32; par.type = para_Copyright; break;
	637	case c_define: is_macro = TRUE; needkw = 1; break;
	638	/* For \nocite the keyword is _everything_ */
	639	case c_nocite: needkw = 8; par.type = para_NoCite; break;
	640	case c_preamble: needkw = 32; par.type = para_Preamble; break;
	641	case c_rule: needkw = 16; par.type = para_Rule; break;
	642	case c_title: needkw = 32; par.type = para_Title; break;
	643	case c_versionid: needkw = 32; par.type = para_VersionID; break;
	644	}
	645
	646	if (needkw > 0) {
	647	rdstring rs = { 0, 0, NULL };
	648	int nkeys = 0;
	649	filepos fp;
	650
	651	/* Get keywords. */
	652	dtor(t), t = get_token(in);
	653	fp = t.pos;
	654	while (t.type == tok_lbrace) {
	655	/* This is a keyword. */
	656	nkeys++;
	657	/* FIXME: there will be bugs if anyone specifies an
	658	* empty keyword (\foo{}), so trap this case. */
	659	while (dtor(t), t = get_token(in),
	660	t.type == tok_word \|\|
	661	t.type == tok_white \|\|
	662	(t.type == tok_cmd && t.cmd == c__nbsp) \|\|
	663	(t.type == tok_cmd && t.cmd == c__escaped)) {
	664	if (t.type == tok_white \|\|
	665	(t.type == tok_cmd && t.cmd == c__nbsp))
	666	rdadd(&rs, ' ');
	667	else
	668	rdadds(&rs, t.text);
	669	}
	670	if (t.type != tok_rbrace) {
	671	error(err_kwunclosed, &t.pos);
	672	continue;
	673	}
	674	rdadd(&rs, 0); /* add string terminator */
	675	dtor(t), t = get_token(in); /* eat right brace */
	676	}
	677
	678	rdadd(&rs, 0); /* add string terminator */
	679
	680	/* See whether we have the right number of keywords. */
	681	if ((needkw & 48) && nkeys > 0)
	682	error(err_kwillegal, &fp);
	683	if ((needkw & 11) && nkeys == 0)
	684	error(err_kwexpected, &fp);
	685	if ((needkw & 5) && nkeys > 1)
	686	error(err_kwtoomany, &fp);
	687
	688	if (is_macro) {
	689	/*
	690	* Macro definition. Get the rest of the line
	691	* as a code-paragraph token, repeatedly until
	692	* there's nothing more left of it. Separate
	693	* with newlines.
	694	*/
	695	rdstring macrotext = { 0, 0, NULL };
	696	while (1) {
	697	dtor(t), t = get_codepar_token(in);
	698	if (macrotext.pos > 0)
	699	rdadd(&macrotext, L'\n');
	700	rdadds(&macrotext, t.text);
	701	dtor(t), t = get_token(in);
	702	if (t.type == tok_eop) break;
	703	}
	704	macrodef(macros, rs.text, macrotext.text, fp);
	705	continue; /* next paragraph */
	706	}
	707
	708	par.keyword = rdtrim(&rs);
	709
	710	/* Move to EOP in case of needkw==8 or 16 (no body) */
	711	if (needkw & 24) {
	712	if (t.type != tok_eop && t.type != tok_eof) {
	713	error(err_bodyillegal, &t.pos);
	714	/* Error recovery: eat the rest of the paragraph */
	715	while (t.type != tok_eop && t.type != tok_eof)
	716	dtor(t), t = get_token(in);
	717	}
	718	addpara(par, ret);
	719	continue; /* next paragraph */
	720	}
	721	}
	722	}
	723
	724	/*
	725	* Now read the actual paragraph, word by word, adding to
	726	* the paragraph list.
	727	*
	728	* Mid-paragraph commands:
	729	*
	730	* \K \k
	731	* \c \cw
	732	* \e
	733	* \i \ii
	734	* \I
	735	* \u
	736	* \W
	737	* \date
	738	* \\ \{ \}
	739	*/
	740	parsestk = stk_new();
	741	style = word_Normal;
	742	spcstyle = word_WhiteSpace;
	743	indexing = FALSE;
	744	seenwhite = TRUE;
	745	while (t.type != tok_eop && t.type != tok_eof) {
	746	iswhite = FALSE;
	747	already = FALSE;
	748	if (t.type == tok_cmd && t.cmd == c__escaped) {
	749	t.type = tok_word; /* nice and simple */
	750	t.aux = 0; /* even if `\-' - nonbreaking! */
	751	}
	752	if (t.type == tok_cmd && t.cmd == c__nbsp) {
	753	t.type = tok_word; /* nice and simple */
	754	sfree(t.text);
	755	t.text = ustrdup(L" "); /* text is ` ' not `_' */
	756	t.aux = 0; /* (nonbreaking) */
	757	}
	758	switch (t.type) {
	759	case tok_white:
	760	if (whptr == &par.words)
	761	break; /* strip whitespace at start of para */
	762	wd.text = NULL;
	763	wd.type = spcstyle;
	764	wd.alt = NULL;
	765	wd.aux = 0;
	766	wd.fpos = t.pos;
	767	wd.breaks = FALSE;
	768	if (indexing)
	769	rdadd(&indexstr, ' ');
	770	if (!indexing \|\| index_visible)
	771	addword(wd, &whptr);
	772	if (indexing)
	773	addword(wd, &idximplicit);
	774	iswhite = TRUE;
	775	break;
	776	case tok_word:
	777	if (indexing)
	778	rdadds(&indexstr, t.text);
	779	wd.type = style;
	780	wd.alt = NULL;
	781	wd.aux = 0;
	782	wd.fpos = t.pos;
	783	wd.breaks = t.aux;
	784	if (!indexing \|\| index_visible) {
	785	wd.text = ustrdup(t.text);
	786	addword(wd, &whptr);
	787	}
	788	if (indexing) {
	789	wd.text = ustrdup(t.text);
	790	addword(wd, &idximplicit);
	791	}
	792	break;
	793	case tok_lbrace:
	794	error(err_unexbrace, &t.pos);
	795	/* Error recovery: push nop */
	796	sitem = mknew(struct stack_item);
	797	sitem->type = stack_nop;
	798	stk_push(parsestk, sitem);
	799	break;
	800	case tok_rbrace:
	801	sitem = stk_pop(parsestk);
	802	if (!sitem)
	803	error(err_unexbrace, &t.pos);
	804	else {
	805	if (sitem->type & stack_ualt) {
	806	whptr = sitem->whptr;
	807	idximplicit = sitem->idximplicit;
	808	}
	809	if (sitem->type & stack_style) {
	810	style = word_Normal;
	811	spcstyle = word_WhiteSpace;
	812	}
	813	if (sitem->type & stack_idx) {
	814	indexword->text = ustrdup(indexstr.text);
	815	if (index_downcase)
	816	ustrlow(indexword->text);
	817	indexing = FALSE;
	818	rdadd(&indexstr, L'\0');
	819	index_merge(idx, FALSE, indexstr.text, idxwordlist);
	820	sfree(indexstr.text);
	821	}
	822	if (sitem->type & stack_hyper) {
	823	wd.text = NULL;
	824	wd.type = word_HyperEnd;
	825	wd.alt = NULL;
	826	wd.aux = 0;
	827	wd.fpos = t.pos;
	828	wd.breaks = FALSE;
	829	if (!indexing \|\| index_visible)
	830	addword(wd, &whptr);
	831	if (indexing)
	832	addword(wd, &idximplicit);
	833	}
	834	if (sitem->type & stack_quote) {
	835	wd.text = NULL;
	836	wd.type = toquotestyle(style);
	837	wd.alt = NULL;
	838	wd.aux = quote_Close;
	839	wd.fpos = t.pos;
	840	wd.breaks = FALSE;
	841	if (!indexing \|\| index_visible)
	842	addword(wd, &whptr);
	843	if (indexing) {
	844	rdadd(&indexstr, L'"');
	845	addword(wd, &idximplicit);
	846	}
	847	}
	848	}
	849	sfree(sitem);
	850	break;
	851	case tok_cmd:
	852	switch (t.cmd) {
	853	case c__comment:
	854	/*
	855	* In-paragraph comment: \#{ balanced braces }
	856	*
	857	* Anything goes here; even tok_eop. We should
	858	* eat whitespace after the close brace _if_
	859	* there was whitespace before the \#.
	860	*/
	861	dtor(t), t = get_token(in);
	862	if (t.type != tok_lbrace) {
	863	error(err_explbr, &t.pos);
	864	} else {
	865	int braces = 1;
	866	while (braces > 0) {
	867	dtor(t), t = get_token(in);
	868	if (t.type == tok_lbrace)
	869	braces++;
	870	else if (t.type == tok_rbrace)
	871	braces--;
	872	else if (t.type == tok_eof) {
	873	error(err_commenteof, &t.pos);
	874	break;
	875	}
	876	}
	877	}
	878	if (seenwhite) {
	879	already = TRUE;
	880	dtor(t), t = get_token(in);
	881	if (t.type == tok_white) {
	882	iswhite = TRUE;
	883	already = FALSE;
	884	}
	885	}
	886	break;
	887	case c_q:
	888	dtor(t), t = get_token(in);
	889	if (t.type != tok_lbrace) {
	890	error(err_explbr, &t.pos);
	891	} else {
	892	wd.text = NULL;
	893	wd.type = toquotestyle(style);
	894	wd.alt = NULL;
	895	wd.aux = quote_Open;
	896	wd.fpos = t.pos;
	897	wd.breaks = FALSE;
	898	if (!indexing \|\| index_visible)
	899	addword(wd, &whptr);
	900	if (indexing) {
	901	rdadd(&indexstr, L'"');
	902	addword(wd, &idximplicit);
	903	}
	904	sitem = mknew(struct stack_item);
	905	sitem->type = stack_quote;
	906	stk_push(parsestk, sitem);
	907	}
	908	break;
	909	case c_K:
	910	case c_k:
	911	case c_W:
	912	case c_date:
	913	/*
	914	* Keyword, hyperlink, or \date. We expect a
	915	* left brace, some text, and then a right
	916	* brace. No nesting; no arguments.
	917	*/
	918	wd.fpos = t.pos;
	919	wd.breaks = FALSE;
	920	if (t.cmd == c_K)
	921	wd.type = word_UpperXref;
	922	else if (t.cmd == c_k)
	923	wd.type = word_LowerXref;
	924	else if (t.cmd == c_W)
	925	wd.type = word_HyperLink;
	926	else
	927	wd.type = word_Normal;
	928	dtor(t), t = get_token(in);
	929	if (t.type != tok_lbrace) {
	930	if (wd.type == word_Normal) {
	931	time_t thetime = time(NULL);
	932	struct tm *broken = localtime(&thetime);
	933	already = TRUE;
	934	wdtext = ustrftime(NULL, broken);
	935	wd.type = style;
	936	} else {
	937	error(err_explbr, &t.pos);
	938	wdtext = NULL;
	939	}
	940	} else {
	941	rdstring rs = { 0, 0, NULL };
	942	while (dtor(t), t = get_token(in),
	943	t.type == tok_word \|\| t.type == tok_white) {
	944	if (t.type == tok_white)
	945	rdadd(&rs, ' ');
	946	else
	947	rdadds(&rs, t.text);
	948	}
	949	if (wd.type == word_Normal) {
	950	time_t thetime = time(NULL);
	951	struct tm *broken = localtime(&thetime);
	952	wdtext = ustrftime(rs.text, broken);
	953	wd.type = style;
	954	} else {
	955	wdtext = ustrdup(rs.text);
	956	}
	957	sfree(rs.text);
	958	if (t.type != tok_rbrace) {
	959	error(err_kwexprbr, &t.pos);
	960	}
	961	}
	962	wd.alt = NULL;
	963	wd.aux = 0;
	964	if (!indexing \|\| index_visible) {
	965	wd.text = ustrdup(wdtext);
	966	addword(wd, &whptr);
	967	}
	968	if (indexing) {
	969	wd.text = ustrdup(wdtext);
	970	addword(wd, &idximplicit);
	971	}
	972	sfree(wdtext);
	973	if (wd.type == word_HyperLink) {
	974	/*
	975	* Hyperlinks are different: they then
	976	* expect another left brace, to begin
	977	* delimiting the text marked by the link.
	978	*/
	979	dtor(t), t = get_token(in);
	980	/*
	981	* Special cases: \W{}\c, \W{}\e, \W{}\cw
	982	*/
	983	sitem = mknew(struct stack_item);
	984	sitem->type = stack_hyper;
	985	if (t.type == tok_cmd &&
	986	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	987	if (style != word_Normal)
	988	error(err_nestedstyles, &t.pos);
	989	else {
	990	style = (t.cmd == c_c ? word_Code :
	991	t.cmd == c_cw ? word_WeakCode :
	992	word_Emph);
	993	spcstyle = tospacestyle(style);
	994	sitem->type \|= stack_style;
	995	}
	996	dtor(t), t = get_token(in);
	997	}
	998	if (t.type != tok_lbrace) {
	999	error(err_explbr, &t.pos);
	1000	sfree(sitem);
	1001	} else {
	1002	stk_push(parsestk, sitem);
	1003	}
	1004	}
	1005	break;
	1006	case c_c:
	1007	case c_cw:
	1008	case c_e:
	1009	type = t.cmd;
	1010	if (style != word_Normal) {
	1011	error(err_nestedstyles, &t.pos);
	1012	/* Error recovery: eat lbrace, push nop. */
	1013	dtor(t), t = get_token(in);
	1014	sitem = mknew(struct stack_item);
	1015	sitem->type = stack_nop;
	1016	stk_push(parsestk, sitem);
	1017	}
	1018	dtor(t), t = get_token(in);
	1019	if (t.type != tok_lbrace) {
	1020	error(err_explbr, &t.pos);
	1021	} else {
	1022	style = (type == c_c ? word_Code :
	1023	type == c_cw ? word_WeakCode :
	1024	word_Emph);
	1025	spcstyle = tospacestyle(style);
	1026	sitem = mknew(struct stack_item);
	1027	sitem->type = stack_style;
	1028	stk_push(parsestk, sitem);
	1029	}
	1030	break;
	1031	case c_i:
	1032	case c_ii:
	1033	case c_I:
	1034	type = t.cmd;
	1035	if (indexing) {
	1036	error(err_nestedindex, &t.pos);
	1037	/* Error recovery: eat lbrace, push nop. */
	1038	dtor(t), t = get_token(in);
	1039	sitem = mknew(struct stack_item);
	1040	sitem->type = stack_nop;
	1041	stk_push(parsestk, sitem);
	1042	}
	1043	sitem = mknew(struct stack_item);
	1044	sitem->type = stack_idx;
	1045	dtor(t), t = get_token(in);
	1046	/*
	1047	* Special cases: \i\c, \i\e, \i\cw
	1048	*/
	1049	wd.fpos = t.pos;
	1050	if (t.type == tok_cmd &&
	1051	(t.cmd == c_e \|\| t.cmd == c_c \|\| t.cmd == c_cw)) {
	1052	if (style != word_Normal)
	1053	error(err_nestedstyles, &t.pos);
	1054	else {
	1055	style = (t.cmd == c_c ? word_Code :
	1056	t.cmd == c_cw ? word_WeakCode :
	1057	word_Emph);
	1058	spcstyle = tospacestyle(style);
	1059	sitem->type \|= stack_style;
	1060	}
	1061	dtor(t), t = get_token(in);
	1062	}
	1063	if (t.type != tok_lbrace) {
	1064	sfree(sitem);
	1065	error(err_explbr, &t.pos);
	1066	} else {
	1067	/* Add an index-reference word with no text as yet */
	1068	wd.type = word_IndexRef;
	1069	wd.text = NULL;
	1070	wd.alt = NULL;
	1071	wd.aux = 0;
	1072	wd.breaks = FALSE;
	1073	indexword = addword(wd, &whptr);
	1074	/* Set up a rdstring to read the index text */
	1075	indexstr = nullrs;
	1076	/* Flags so that we do the Right Things with text */
	1077	index_visible = (type != c_I);
	1078	index_downcase = (type == c_ii);
	1079	indexing = TRUE;
	1080	idxwordlist = NULL;
	1081	idximplicit = &idxwordlist;
	1082	/* Stack item to close the indexing on exit */
	1083	stk_push(parsestk, sitem);
	1084	}
	1085	break;
	1086	case c_u:
	1087	uchr = t.aux;
	1088	utext[0] = uchr; utext[1] = 0;
	1089	wd.type = style;
	1090	wd.breaks = FALSE;
	1091	wd.alt = NULL;
	1092	wd.aux = 0;
	1093	wd.fpos = t.pos;
	1094	if (!indexing \|\| index_visible) {
	1095	wd.text = ustrdup(utext);
	1096	uword = addword(wd, &whptr);
	1097	} else
	1098	uword = NULL;
	1099	if (indexing) {
	1100	wd.text = ustrdup(utext);
	1101	iword = addword(wd, &idximplicit);
	1102	} else
	1103	iword = NULL;
	1104	dtor(t), t = get_token(in);
	1105	if (t.type == tok_lbrace) {
	1106	/*
	1107	* \u with a left brace. Until the brace
	1108	* closes, all further words go on a
	1109	* sidetrack from the main thread of the
	1110	* paragraph.
	1111	*/
	1112	sitem = mknew(struct stack_item);
	1113	sitem->type = stack_ualt;
	1114	sitem->whptr = whptr;
	1115	sitem->idximplicit = idximplicit;
	1116	stk_push(parsestk, sitem);
	1117	whptr = uword ? &uword->alt : NULL;
	1118	idximplicit = iword ? &iword->alt : NULL;
	1119	} else {
	1120	if (indexing)
	1121	rdadd(&indexstr, uchr);
	1122	already = TRUE;
	1123	}
	1124	break;
	1125	default:
	1126	if (!macrolookup(macros, in, t.text, &t.pos))
	1127	error(err_badmidcmd, t.text, &t.pos);
	1128	break;
	1129	}
	1130	}
	1131	if (!already)
	1132	dtor(t), t = get_token(in);
	1133	seenwhite = iswhite;
	1134	}
	1135	/* Check the stack is empty */
	1136	if (NULL != (sitem = stk_pop(parsestk))) {
	1137	do {
	1138	sfree(sitem);
	1139	sitem = stk_pop(parsestk);
	1140	} while (sitem);
	1141	error(err_missingrbrace, &t.pos);
	1142	}
	1143	stk_free(parsestk);
	1144	addpara(par, ret);
	1145	}
	1146	dtor(t);
	1147	macrocleanup(macros);
	1148	}
	1149
	1150	paragraph read_input(input in, indexdata *idx) {
	1151	paragraph *head = NULL;
	1152	paragraph **hptr = &head;
	1153
	1154	while (in->currindex < in->nfiles) {
	1155	in->currfp = fopen(in->filenames[in->currindex], "r");
	1156	if (in->currfp) {
	1157	setpos(in, in->filenames[in->currindex]);
	1158	read_file(&hptr, in, idx);
	1159	}
	1160	in->currindex++;
	1161	}
	1162
	1163	return head;
	1164	}