Sort out error handling everywhere a charset name is converted into
[sgt/halibut] / input.c
1 /*
2 * input.c: read the source form
3 */
4
5 #include <stdio.h>
6 #include <assert.h>
7 #include <time.h>
8 #include "halibut.h"
9
10 #define TAB_STOP 8 /* for column number tracking */
11
12 static void setpos(input *in, char *fname) {
13 in->pos.filename = fname;
14 in->pos.line = 1;
15 in->pos.col = (in->reportcols ? 1 : -1);
16 }
17
18 static void unget(input *in, int c, filepos *pos) {
19 if (in->npushback >= in->pushbacksize) {
20 in->pushbacksize = in->npushback + 16;
21 in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
22 }
23 in->pushback[in->npushback].chr = c;
24 in->pushback[in->npushback].pos = *pos; /* structure copy */
25 in->npushback++;
26 }
27
28 /* ---------------------------------------------------------------------- */
29 /*
30 * Macro subsystem
31 */
32 typedef struct macro_Tag macro;
33 struct macro_Tag {
34 wchar_t *name, *text;
35 };
36 struct macrostack_Tag {
37 macrostack *next;
38 wchar_t *text;
39 int ptr, npushback;
40 filepos pos;
41 };
42 static int macrocmp(void *av, void *bv) {
43 macro *a = (macro *)av, *b = (macro *)bv;
44 return ustrcmp(a->name, b->name);
45 }
46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
47 filepos fpos) {
48 macro *m = snew(macro);
49 m->name = name;
50 m->text = text;
51 if (add234(macros, m) != m) {
52 error(err_macroexists, &fpos, name);
53 sfree(name);
54 sfree(text);
55 }
56 }
57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
58 filepos *pos) {
59 macro m, *gotit;
60 m.name = name;
61 gotit = find234(macros, &m, NULL);
62 if (gotit) {
63 macrostack *expansion = snew(macrostack);
64 expansion->next = in->stack;
65 expansion->text = gotit->text;
66 expansion->pos = *pos; /* structure copy */
67 expansion->ptr = 0;
68 expansion->npushback = in->npushback;
69 in->stack = expansion;
70 return TRUE;
71 } else
72 return FALSE;
73 }
74 static void macrocleanup(tree234 *macros) {
75 int ti;
76 macro *m;
77 for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
78 sfree(m->name);
79 sfree(m->text);
80 sfree(m);
81 }
82 freetree234(macros);
83 }
84
85 static void input_configure(input *in, paragraph *cfg) {
86 assert(cfg->type == para_Config);
87
88 if (!ustricmp(cfg->keyword, L"input-charset")) {
89 in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
90 }
91 }
92
93 /*
94 * Can return EOF
95 */
96 static int get(input *in, filepos *pos, rdstringc *rsc) {
97 int pushbackpt = in->stack ? in->stack->npushback : 0;
98 if (in->npushback > pushbackpt) {
99 --in->npushback;
100 if (pos)
101 *pos = in->pushback[in->npushback].pos; /* structure copy */
102 return in->pushback[in->npushback].chr;
103 }
104 else if (in->stack) {
105 wchar_t c = in->stack->text[in->stack->ptr];
106 if (in->stack->text[++in->stack->ptr] == L'\0') {
107 macrostack *tmp = in->stack;
108 in->stack = tmp->next;
109 sfree(tmp);
110 }
111 return c;
112 }
113 else if (in->currfp) {
114
115 while (in->wcpos >= in->nwc) {
116
117 int c = getc(in->currfp);
118
119 if (c == EOF) {
120 fclose(in->currfp);
121 in->currfp = NULL;
122 return EOF;
123 }
124
125 if (rsc)
126 rdaddc(rsc, c);
127
128 /* Track line numbers, for error reporting */
129 if (pos)
130 *pos = in->pos;
131 if (in->reportcols) {
132 switch (c) {
133 case '\t':
134 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
135 break;
136 case '\n':
137 in->pos.col = 1;
138 in->pos.line++;
139 break;
140 default:
141 in->pos.col++;
142 break;
143 }
144 } else {
145 in->pos.col = -1;
146 if (c == '\n')
147 in->pos.line++;
148 }
149
150 /*
151 * Do input character set translation, so that we return
152 * Unicode.
153 */
154 {
155 char buf[1];
156 char const *p;
157 int inlen;
158
159 buf[0] = (char)c;
160 p = buf;
161 inlen = 1;
162
163 in->nwc = charset_to_unicode(&p, &inlen,
164 in->wc, lenof(in->wc),
165 in->charset, &in->csstate,
166 NULL, 0);
167 assert(p == buf+1 && inlen == 0);
168
169 in->wcpos = 0;
170 }
171 }
172
173 return in->wc[in->wcpos++];
174
175 } else
176 return EOF;
177 }
178
179 /*
180 * Lexical analysis of source files.
181 */
182 typedef struct token_Tag token;
183 struct token_Tag {
184 int type;
185 int cmd, aux;
186 wchar_t *text;
187 char *origtext;
188 filepos pos;
189 };
190 enum {
191 tok_eof, /* end of file */
192 tok_eop, /* end of paragraph */
193 tok_white, /* whitespace */
194 tok_word, /* a word or word fragment */
195 tok_cmd, /* \command */
196 tok_lbrace, /* { */
197 tok_rbrace /* } */
198 };
199
200 /* Halibut command keywords. */
201 enum {
202 c__invalid, /* invalid command */
203 c__comment, /* comment command (\#) */
204 c__escaped, /* escaped character */
205 c__nop, /* no-op */
206 c__nbsp, /* nonbreaking space */
207 c_A, /* appendix heading */
208 c_B, /* bibliography entry */
209 c_BR, /* bibliography rewrite */
210 c_C, /* chapter heading */
211 c_H, /* heading */
212 c_I, /* invisible index mark */
213 c_IM, /* index merge/rewrite */
214 c_K, /* capitalised cross-reference */
215 c_S, /* aux field is 0, 1, 2, ... */
216 c_U, /* unnumbered-chapter heading */
217 c_W, /* Web hyperlink */
218 c_b, /* bulletted list */
219 c_c, /* code */
220 c_cfg, /* configuration directive */
221 c_copyright, /* copyright statement */
222 c_cw, /* weak code */
223 c_date, /* document processing date */
224 c_dd, /* description list: description */
225 c_define, /* macro definition */
226 c_dt, /* description list: described thing */
227 c_e, /* emphasis */
228 c_i, /* visible index mark */
229 c_ii, /* uncapitalised visible index mark */
230 c_k, /* uncapitalised cross-reference */
231 c_lcont, /* continuation para(s) for list item */
232 c_n, /* numbered list */
233 c_nocite, /* bibliography trickery */
234 c_preamble, /* (obsolete) preamble text */
235 c_q, /* quote marks */
236 c_quote, /* block-quoted paragraphs */
237 c_rule, /* horizontal rule */
238 c_title, /* document title */
239 c_u, /* aux field is char code */
240 c_versionid /* document RCS id */
241 };
242
243 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
244 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
245 #define isnl(c) ( (c)==10 )
246 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
247 #define fromdec(c) ( (c)-'0' )
248 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
249 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
250 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
251
252 /*
253 * Keyword comparison function. Like strcmp, but between a wchar_t *
254 * and a char *.
255 */
256 static int kwcmp(wchar_t const *p, char const *q) {
257 int i;
258 do {
259 i = *p - *q;
260 } while (*p++ && *q++ && !i);
261 return i;
262 }
263
264 /*
265 * Match a keyword.
266 */
267 static void match_kw(token *tok) {
268 /*
269 * FIXME. The ids are explicit in here so as to allow long-name
270 * equivalents to the various very short keywords.
271 */
272 static const struct { char const *name; int id; } keywords[] = {
273 {"#", c__comment}, /* comment command (\#) */
274 {"-", c__escaped}, /* nonbreaking hyphen */
275 {".", c__nop}, /* no-op */
276 {"A", c_A}, /* appendix heading */
277 {"B", c_B}, /* bibliography entry */
278 {"BR", c_BR}, /* bibliography rewrite */
279 {"C", c_C}, /* chapter heading */
280 {"H", c_H}, /* heading */
281 {"I", c_I}, /* invisible index mark */
282 {"IM", c_IM}, /* index merge/rewrite */
283 {"K", c_K}, /* capitalised cross-reference */
284 {"U", c_U}, /* unnumbered-chapter heading */
285 {"W", c_W}, /* Web hyperlink */
286 {"\\", c__escaped}, /* escaped backslash (\\) */
287 {"_", c__nbsp}, /* nonbreaking space (\_) */
288 {"b", c_b}, /* bulletted list */
289 {"c", c_c}, /* code */
290 {"cfg", c_cfg}, /* configuration directive */
291 {"copyright", c_copyright}, /* copyright statement */
292 {"cw", c_cw}, /* weak code */
293 {"date", c_date}, /* document processing date */
294 {"dd", c_dd}, /* description list: description */
295 {"define", c_define}, /* macro definition */
296 {"dt", c_dt}, /* description list: described thing */
297 {"e", c_e}, /* emphasis */
298 {"i", c_i}, /* visible index mark */
299 {"ii", c_ii}, /* uncapitalised visible index mark */
300 {"k", c_k}, /* uncapitalised cross-reference */
301 {"lcont", c_lcont}, /* continuation para(s) for list item */
302 {"n", c_n}, /* numbered list */
303 {"nocite", c_nocite}, /* bibliography trickery */
304 {"preamble", c_preamble}, /* (obsolete) preamble text */
305 {"q", c_q}, /* quote marks */
306 {"quote", c_quote}, /* block-quoted paragraphs */
307 {"rule", c_rule}, /* horizontal rule */
308 {"title", c_title}, /* document title */
309 {"versionid", c_versionid}, /* document RCS id */
310 {"{", c__escaped}, /* escaped lbrace (\{) */
311 {"}", c__escaped}, /* escaped rbrace (\}) */
312 };
313 int i, j, k, c;
314
315 /*
316 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
317 * doesn't match correctly, we just fall through to the
318 * binary-search phase.
319 */
320 if (tok->text[0] == 'S') {
321 /* We expect numeric characters thereafter. */
322 wchar_t *p = tok->text+1;
323 int n;
324 if (!*p)
325 n = 1;
326 else {
327 n = 0;
328 while (*p && isdec(*p)) {
329 n = 10 * n + fromdec(*p);
330 p++;
331 }
332 }
333 if (!*p) {
334 tok->cmd = c_S;
335 tok->aux = n;
336 return;
337 }
338 } else if (tok->text[0] == 'u') {
339 /* We expect hex characters thereafter. */
340 wchar_t *p = tok->text+1;
341 int n = 0;
342 while (*p && ishex(*p)) {
343 n = 16 * n + fromhex(*p);
344 p++;
345 }
346 if (!*p) {
347 tok->cmd = c_u;
348 tok->aux = n;
349 return;
350 }
351 }
352
353 i = -1;
354 j = sizeof(keywords)/sizeof(*keywords);
355 while (j-i > 1) {
356 k = (i+j)/2;
357 c = kwcmp(tok->text, keywords[k].name);
358 if (c < 0)
359 j = k;
360 else if (c > 0)
361 i = k;
362 else /* c == 0 */ {
363 tok->cmd = keywords[k].id;
364 return;
365 }
366 }
367
368 tok->cmd = c__invalid;
369 }
370
371
372 /*
373 * Read a token from the input file, in the normal way (`normal' in
374 * the sense that code paragraphs work a different way).
375 */
376 token get_token(input *in) {
377 int c;
378 int nls;
379 int prevpos;
380 token ret;
381 rdstring rs = { 0, 0, NULL };
382 rdstringc rsc = { 0, 0, NULL };
383 filepos cpos;
384
385 ret.text = NULL; /* default */
386 ret.origtext = NULL; /* default */
387 if (in->pushback_chars) {
388 rdaddsc(&rsc, in->pushback_chars);
389 sfree(in->pushback_chars);
390 in->pushback_chars = NULL;
391 }
392 c = get(in, &cpos, &rsc);
393 ret.pos = cpos;
394 if (iswhite(c)) { /* tok_white or tok_eop */
395 nls = 0;
396 prevpos = 0;
397 do {
398 if (isnl(c))
399 nls++;
400 prevpos = rsc.pos;
401 } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
402 if (c == EOF) {
403 ret.type = tok_eof;
404 sfree(rsc.text);
405 return ret;
406 }
407 if (rsc.text) {
408 in->pushback_chars = dupstr(rsc.text + prevpos);
409 sfree(rsc.text);
410 }
411 unget(in, c, &cpos);
412 ret.type = (nls > 1 ? tok_eop : tok_white);
413 return ret;
414 } else if (c == EOF) { /* tok_eof */
415 ret.type = tok_eof;
416 sfree(rsc.text);
417 return ret;
418 } else if (c == '\\') { /* tok_cmd */
419 rsc.pos = prevpos = 0;
420 c = get(in, &cpos, &rsc);
421 if (c == '-' || c == '\\' || c == '_' ||
422 c == '#' || c == '{' || c == '}' || c == '.') {
423 /* single-char command */
424 rdadd(&rs, c);
425 } else if (c == 'u') {
426 int len = 0;
427 do {
428 rdadd(&rs, c);
429 len++;
430 prevpos = rsc.pos;
431 c = get(in, &cpos, &rsc);
432 } while (ishex(c) && len < 5);
433 unget(in, c, &cpos);
434 } else if (iscmd(c)) {
435 do {
436 rdadd(&rs, c);
437 prevpos = rsc.pos;
438 c = get(in, &cpos, &rsc);
439 } while (iscmd(c));
440 unget(in, c, &cpos);
441 }
442 /*
443 * Now match the command against the list of available
444 * ones.
445 */
446 ret.type = tok_cmd;
447 ret.text = ustrdup(rs.text);
448 if (rsc.text) {
449 in->pushback_chars = dupstr(rsc.text + prevpos);
450 rsc.text[prevpos] = '\0';
451 ret.origtext = dupstr(rsc.text);
452 } else {
453 ret.origtext = dupstr("");
454 }
455 match_kw(&ret);
456 sfree(rs.text);
457 sfree(rsc.text);
458 return ret;
459 } else if (c == '{') { /* tok_lbrace */
460 ret.type = tok_lbrace;
461 sfree(rsc.text);
462 return ret;
463 } else if (c == '}') { /* tok_rbrace */
464 ret.type = tok_rbrace;
465 sfree(rsc.text);
466 return ret;
467 } else { /* tok_word */
468 /*
469 * Read a word: the longest possible contiguous sequence of
470 * things other than whitespace, backslash, braces and
471 * hyphen. A hyphen terminates the word but is returned as
472 * part of it; everything else is pushed back for the next
473 * token. The `aux' field contains TRUE if the word ends in
474 * a hyphen.
475 */
476 ret.aux = FALSE; /* assumed for now */
477 prevpos = 0;
478 while (1) {
479 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
480 /* Put back the character that caused termination */
481 unget(in, c, &cpos);
482 break;
483 } else {
484 rdadd(&rs, c);
485 if (c == '-') {
486 prevpos = rsc.pos;
487 ret.aux = TRUE;
488 break; /* hyphen terminates word */
489 }
490 }
491 prevpos = rsc.pos;
492 c = get(in, &cpos, &rsc);
493 }
494 ret.type = tok_word;
495 ret.text = ustrdup(rs.text);
496 if (rsc.text) {
497 in->pushback_chars = dupstr(rsc.text + prevpos);
498 rsc.text[prevpos] = '\0';
499 ret.origtext = dupstr(rsc.text);
500 } else {
501 ret.origtext = dupstr("");
502 }
503 sfree(rs.text);
504 sfree(rsc.text);
505 return ret;
506 }
507 }
508
509 /*
510 * Determine whether the next input character is an open brace (for
511 * telling code paragraphs from paragraphs which merely start with
512 * code).
513 */
514 int isbrace(input *in) {
515 int c;
516 filepos cpos;
517
518 c = get(in, &cpos, NULL);
519 unget(in, c, &cpos);
520 return (c == '{');
521 }
522
523 /*
524 * Read the rest of a line that starts `\c'. Including nothing at
525 * all (tok_word with empty text).
526 */
527 token get_codepar_token(input *in) {
528 int c;
529 token ret;
530 rdstring rs = { 0, 0, NULL };
531 filepos cpos;
532
533 ret.type = tok_word;
534 ret.origtext = NULL;
535 c = get(in, &cpos, NULL); /* expect (and discard) one space */
536 ret.pos = cpos;
537 if (c == ' ') {
538 c = get(in, &cpos, NULL);
539 ret.pos = cpos;
540 }
541 while (!isnl(c) && c != EOF) {
542 int c2 = c;
543 c = get(in, &cpos, NULL);
544 /* Discard \r just before \n. */
545 if (c2 != 13 || !isnl(c))
546 rdadd(&rs, c2);
547 }
548 unget(in, c, &cpos);
549 ret.text = ustrdup(rs.text);
550 sfree(rs.text);
551 return ret;
552 }
553
554 /*
555 * Adds a new word to a linked list
556 */
557 static word *addword(word newword, word ***hptrptr) {
558 word *mnewword;
559 if (!hptrptr)
560 return NULL;
561 mnewword = snew(word);
562 *mnewword = newword; /* structure copy */
563 mnewword->next = NULL;
564 **hptrptr = mnewword;
565 *hptrptr = &mnewword->next;
566 return mnewword;
567 }
568
569 /*
570 * Adds a new paragraph to a linked list
571 */
572 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
573 paragraph *mnewpara = snew(paragraph);
574 *mnewpara = newpara; /* structure copy */
575 mnewpara->next = NULL;
576 **hptrptr = mnewpara;
577 *hptrptr = &mnewpara->next;
578 return mnewpara;
579 }
580
581 /*
582 * Destructor before token is reassigned; should catch most memory
583 * leaks
584 */
585 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
586
587 /*
588 * Reads a single file (ie until get() returns EOF)
589 */
590 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
591 token t;
592 paragraph par;
593 word wd, **whptr, **idximplicit;
594 tree234 *macros;
595 wchar_t utext[2], *wdtext;
596 int style, spcstyle;
597 int already;
598 int iswhite, seenwhite;
599 int type;
600 int prev_para_type;
601 struct stack_item {
602 enum {
603 stack_nop = 0, /* do nothing (for error recovery) */
604 stack_ualt = 1, /* \u alternative */
605 stack_style = 2, /* \e, \c, \cw */
606 stack_idx = 4, /* \I, \i, \ii */
607 stack_hyper = 8, /* \W */
608 stack_quote = 16, /* \q */
609 } type;
610 word **whptr; /* to restore from \u alternatives */
611 word **idximplicit; /* to restore from \u alternatives */
612 filepos fpos;
613 int in_code;
614 } *sitem;
615 stack parsestk;
616 struct crossparaitem {
617 int type; /* currently c_lcont, c_quote or -1 */
618 int seen_lcont, seen_quote;
619 };
620 stack crossparastk;
621 word *indexword, *uword, *iword;
622 word *idxwordlist;
623 rdstring indexstr;
624 int index_downcase, index_visible, indexing;
625 const rdstring nullrs = { 0, 0, NULL };
626 wchar_t uchr;
627
628 t.text = NULL;
629 t.origtext = NULL;
630 macros = newtree234(macrocmp);
631 already = FALSE;
632
633 crossparastk = stk_new();
634
635 /*
636 * Loop on each paragraph.
637 */
638 while (1) {
639 int start_cmd = c__invalid;
640 par.words = NULL;
641 par.keyword = NULL;
642 par.origkeyword = NULL;
643 whptr = &par.words;
644
645 /*
646 * Get a token.
647 */
648 do {
649 if (!already) {
650 dtor(t), t = get_token(in);
651 }
652 already = FALSE;
653 } while (t.type == tok_eop);
654 if (t.type == tok_eof)
655 break;
656
657 /*
658 * Parse code paragraphs separately.
659 */
660 if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
661 int wtype = word_WeakCode;
662
663 par.type = para_Code;
664 par.fpos = t.pos;
665 while (1) {
666 dtor(t), t = get_codepar_token(in);
667 wd.type = wtype;
668 wd.breaks = FALSE; /* shouldn't need this... */
669 wd.text = ustrdup(t.text);
670 wd.alt = NULL;
671 wd.fpos = t.pos;
672 addword(wd, &whptr);
673 dtor(t), t = get_token(in);
674 if (t.type == tok_white) {
675 /*
676 * The newline after a code-paragraph line
677 */
678 dtor(t), t = get_token(in);
679 }
680 if (t.type == tok_eop || t.type == tok_eof ||
681 t.type == tok_rbrace) { /* might be } terminating \lcont */
682 if (t.type == tok_rbrace)
683 already = TRUE;
684 break;
685 } else if (t.type == tok_cmd && t.cmd == c_c) {
686 wtype = word_WeakCode;
687 } else if (t.type == tok_cmd && t.cmd == c_e &&
688 wtype == word_WeakCode) {
689 wtype = word_Emph;
690 } else {
691 error(err_brokencodepara, &t.pos);
692 prev_para_type = par.type;
693 addpara(par, ret);
694 while (t.type != tok_eop) /* error recovery: */
695 dtor(t), t = get_token(in); /* eat rest of paragraph */
696 goto codeparabroken; /* ick, but such is life */
697 }
698 }
699 prev_para_type = par.type;
700 addpara(par, ret);
701 codeparabroken:
702 continue;
703 }
704
705 /*
706 * Spot the special commands that define a grouping of more
707 * than one paragraph, and also the closing braces that
708 * finish them.
709 */
710 if (t.type == tok_cmd &&
711 (t.cmd == c_lcont || t.cmd == c_quote)) {
712 struct crossparaitem *sitem, *stop;
713 int cmd = t.cmd;
714
715 /*
716 * Expect, and swallow, an open brace.
717 */
718 dtor(t), t = get_token(in);
719 if (t.type != tok_lbrace) {
720 error(err_explbr, &t.pos);
721 continue;
722 }
723
724 /*
725 * Also expect, and swallow, any whitespace after that
726 * (a newline before a code paragraph wouldn't be
727 * surprising).
728 */
729 do {
730 dtor(t), t = get_token(in);
731 } while (t.type == tok_white);
732 already = TRUE;
733
734 if (cmd == c_lcont) {
735 /*
736 * \lcont causes a continuation of a list item into
737 * multiple paragraphs (which may in turn contain
738 * nested lists, code paras etc). Hence, the previous
739 * paragraph must be of a list type.
740 */
741 sitem = snew(struct crossparaitem);
742 stop = (struct crossparaitem *)stk_top(crossparastk);
743 if (stop)
744 *sitem = *stop;
745 else
746 sitem->seen_quote = sitem->seen_lcont = 0;
747
748 if (prev_para_type == para_Bullet ||
749 prev_para_type == para_NumberedList ||
750 prev_para_type == para_Description) {
751 sitem->type = c_lcont;
752 sitem->seen_lcont = 1;
753 par.type = para_LcontPush;
754 prev_para_type = par.type;
755 addpara(par, ret);
756 } else {
757 /*
758 * Push a null item on the cross-para stack so that
759 * when we see the corresponding closing brace we
760 * don't give a cascade error.
761 */
762 sitem->type = -1;
763 error(err_misplacedlcont, &t.pos);
764 }
765 } else {
766 /*
767 * \quote causes a group of paragraphs to be
768 * block-quoted (typically they will be indented a
769 * bit).
770 */
771 sitem = snew(struct crossparaitem);
772 stop = (struct crossparaitem *)stk_top(crossparastk);
773 if (stop)
774 *sitem = *stop;
775 else
776 sitem->seen_quote = sitem->seen_lcont = 0;
777 sitem->type = c_quote;
778 sitem->seen_quote = 1;
779 par.type = para_QuotePush;
780 prev_para_type = par.type;
781 addpara(par, ret);
782 }
783 stk_push(crossparastk, sitem);
784 continue;
785 } else if (t.type == tok_rbrace) {
786 struct crossparaitem *sitem = stk_pop(crossparastk);
787 if (!sitem)
788 error(err_unexbrace, &t.pos);
789 else {
790 switch (sitem->type) {
791 case c_lcont:
792 par.type = para_LcontPop;
793 prev_para_type = par.type;
794 addpara(par, ret);
795 break;
796 case c_quote:
797 par.type = para_QuotePop;
798 prev_para_type = par.type;
799 addpara(par, ret);
800 break;
801 }
802 sfree(sitem);
803 }
804 continue;
805 }
806
807 /*
808 * This token begins a paragraph. See if it's one of the
809 * special commands that define a paragraph type.
810 *
811 * (note that \# is special in a way, and \nocite takes no
812 * text)
813 */
814 par.type = para_Normal;
815 if (t.type == tok_cmd) {
816 int needkw;
817 int is_macro = FALSE;
818
819 par.fpos = t.pos;
820 switch (t.cmd) {
821 default:
822 needkw = -1;
823 break;
824 case c__invalid:
825 error(err_badparatype, t.text, &t.pos);
826 needkw = 4;
827 break;
828 case c__comment:
829 if (isbrace(in))
830 break; /* `\#{': isn't a comment para */
831 do {
832 dtor(t), t = get_token(in);
833 } while (t.type != tok_eop && t.type != tok_eof);
834 continue; /* next paragraph */
835 /*
836 * `needkw' values:
837 *
838 * 1 -- exactly one keyword
839 * 2 -- at least one keyword
840 * 4 -- any number of keywords including zero
841 * 8 -- at least one keyword and then nothing else
842 * 16 -- nothing at all! no keywords, no body
843 * 32 -- no keywords at all
844 */
845 case c_A: needkw = 2; par.type = para_Appendix; break;
846 case c_B: needkw = 2; par.type = para_Biblio; break;
847 case c_BR: needkw = 1; par.type = para_BR;
848 start_cmd = c_BR; break;
849 case c_C: needkw = 2; par.type = para_Chapter; break;
850 case c_H: needkw = 2; par.type = para_Heading;
851 par.aux = 0;
852 break;
853 case c_IM: needkw = 2; par.type = para_IM;
854 start_cmd = c_IM; break;
855 case c_S: needkw = 2; par.type = para_Subsect;
856 par.aux = t.aux; break;
857 case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
858 /* For \b and \n the keyword is optional */
859 case c_b: needkw = 4; par.type = para_Bullet; break;
860 case c_dt: needkw = 4; par.type = para_DescribedThing; break;
861 case c_dd: needkw = 4; par.type = para_Description; break;
862 case c_n: needkw = 4; par.type = para_NumberedList; break;
863 case c_cfg: needkw = 8; par.type = para_Config;
864 start_cmd = c_cfg; break;
865 case c_copyright: needkw = 32; par.type = para_Copyright; break;
866 case c_define: is_macro = TRUE; needkw = 1; break;
867 /* For \nocite the keyword is _everything_ */
868 case c_nocite: needkw = 8; par.type = para_NoCite; break;
869 case c_preamble: needkw = 32; par.type = para_Normal; break;
870 case c_rule: needkw = 16; par.type = para_Rule; break;
871 case c_title: needkw = 32; par.type = para_Title; break;
872 case c_versionid: needkw = 32; par.type = para_VersionID; break;
873 }
874
875 if (par.type == para_Chapter ||
876 par.type == para_Heading ||
877 par.type == para_Subsect ||
878 par.type == para_Appendix ||
879 par.type == para_UnnumberedChapter) {
880 struct crossparaitem *sitem = stk_top(crossparastk);
881 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
882 error(err_sectmarkerinblock,
883 &t.pos,
884 (sitem->seen_lcont ? "lcont" : "quote"));
885 }
886 }
887
888 if (needkw > 0) {
889 rdstring rs = { 0, 0, NULL };
890 rdstringc rsc = { 0, 0, NULL };
891 int nkeys = 0;
892 filepos fp;
893
894 /* Get keywords. */
895 dtor(t), t = get_token(in);
896 fp = t.pos;
897 while (t.type == tok_lbrace) {
898 /* This is a keyword. */
899 nkeys++;
900 /* FIXME: there will be bugs if anyone specifies an
901 * empty keyword (\foo{}), so trap this case. */
902 while (dtor(t), t = get_token(in),
903 t.type == tok_word ||
904 t.type == tok_white ||
905 (t.type == tok_cmd && t.cmd == c__nbsp) ||
906 (t.type == tok_cmd && t.cmd == c__escaped) ||
907 (t.type == tok_cmd && t.cmd == c_u)) {
908 if (t.type == tok_white ||
909 (t.type == tok_cmd && t.cmd == c__nbsp)) {
910 rdadd(&rs, ' ');
911 rdaddc(&rsc, ' ');
912 } else if (t.type == tok_cmd && t.cmd == c_u) {
913 rdadd(&rs, t.aux);
914 rdaddc(&rsc, '\\');
915 rdaddsc(&rsc, t.origtext);
916 } else {
917 rdadds(&rs, t.text);
918 rdaddsc(&rsc, t.origtext);
919 }
920 }
921 if (t.type != tok_rbrace) {
922 error(err_kwunclosed, &t.pos);
923 continue;
924 }
925 rdadd(&rs, 0); /* add string terminator */
926 rdaddc(&rsc, 0); /* add string terminator */
927 dtor(t), t = get_token(in); /* eat right brace */
928 }
929
930 rdadd(&rs, 0); /* add string terminator */
931 rdaddc(&rsc, 0); /* add string terminator */
932
933 /* See whether we have the right number of keywords. */
934 if ((needkw & 48) && nkeys > 0)
935 error(err_kwillegal, &fp);
936 if ((needkw & 11) && nkeys == 0)
937 error(err_kwexpected, &fp);
938 if ((needkw & 5) && nkeys > 1)
939 error(err_kwtoomany, &fp);
940
941 if (is_macro) {
942 /*
943 * Macro definition. Get the rest of the line
944 * as a code-paragraph token, repeatedly until
945 * there's nothing more left of it. Separate
946 * with newlines.
947 */
948 rdstring macrotext = { 0, 0, NULL };
949 while (1) {
950 dtor(t), t = get_codepar_token(in);
951 if (macrotext.pos > 0)
952 rdadd(&macrotext, L'\n');
953 rdadds(&macrotext, t.text);
954 dtor(t), t = get_token(in);
955 if (t.type == tok_eop) break;
956 }
957 macrodef(macros, rs.text, macrotext.text, fp);
958 continue; /* next paragraph */
959 }
960
961 par.keyword = rdtrim(&rs);
962 par.origkeyword = rdtrimc(&rsc);
963
964 /* Move to EOP in case of needkw==8 or 16 (no body) */
965 if (needkw & 24) {
966 /* We allow whitespace even when we expect no para body */
967 while (t.type == tok_white)
968 dtor(t), t = get_token(in);
969 if (t.type != tok_eop && t.type != tok_eof &&
970 (start_cmd == c__invalid ||
971 t.type != tok_cmd || t.cmd != start_cmd)) {
972 error(err_bodyillegal, &t.pos);
973 /* Error recovery: eat the rest of the paragraph */
974 while (t.type != tok_eop && t.type != tok_eof &&
975 (start_cmd == c__invalid ||
976 t.type != tok_cmd || t.cmd != start_cmd))
977 dtor(t), t = get_token(in);
978 }
979 if (t.type == tok_cmd)
980 already = TRUE;/* inhibit get_token at top of loop */
981 prev_para_type = par.type;
982 addpara(par, ret);
983
984 if (par.type == para_Config) {
985 input_configure(in, &par);
986 }
987 continue; /* next paragraph */
988 }
989 }
990 }
991
992 /*
993 * Now read the actual paragraph, word by word, adding to
994 * the paragraph list.
995 *
996 * Mid-paragraph commands:
997 *
998 * \K \k
999 * \c \cw
1000 * \e
1001 * \i \ii
1002 * \I
1003 * \u
1004 * \W
1005 * \date
1006 * \\ \{ \}
1007 */
1008 parsestk = stk_new();
1009 style = word_Normal;
1010 spcstyle = word_WhiteSpace;
1011 indexing = FALSE;
1012 seenwhite = TRUE;
1013 while (t.type != tok_eop && t.type != tok_eof) {
1014 iswhite = FALSE;
1015 already = FALSE;
1016
1017 /* Handle implicit paragraph breaks after \IM, \BR etc */
1018 if (start_cmd != c__invalid &&
1019 t.type == tok_cmd && t.cmd == start_cmd) {
1020 already = TRUE; /* inhibit get_token at top of loop */
1021 break;
1022 }
1023
1024 if (t.type == tok_cmd && t.cmd == c__nop) {
1025 dtor(t), t = get_token(in);
1026 continue; /* do nothing! */
1027 }
1028
1029 if (t.type == tok_cmd && t.cmd == c__escaped) {
1030 t.type = tok_word; /* nice and simple */
1031 t.aux = 0; /* even if `\-' - nonbreaking! */
1032 }
1033 if (t.type == tok_cmd && t.cmd == c__nbsp) {
1034 t.type = tok_word; /* nice and simple */
1035 sfree(t.text);
1036 t.text = ustrdup(L" "); /* text is ` ' not `_' */
1037 t.aux = 0; /* (nonbreaking) */
1038 }
1039 switch (t.type) {
1040 case tok_white:
1041 if (whptr == &par.words)
1042 break; /* strip whitespace at start of para */
1043 wd.text = NULL;
1044 wd.type = spcstyle;
1045 wd.alt = NULL;
1046 wd.aux = 0;
1047 wd.fpos = t.pos;
1048 wd.breaks = FALSE;
1049
1050 /*
1051 * Inhibit use of whitespace if it's (probably the
1052 * newline) before a repeat \IM / \BR type
1053 * directive.
1054 */
1055 if (start_cmd != c__invalid) {
1056 dtor(t), t = get_token(in);
1057 already = TRUE;
1058 if (t.type == tok_cmd && t.cmd == start_cmd)
1059 break;
1060 }
1061
1062 if (indexing)
1063 rdadd(&indexstr, ' ');
1064 if (!indexing || index_visible)
1065 addword(wd, &whptr);
1066 if (indexing)
1067 addword(wd, &idximplicit);
1068 iswhite = TRUE;
1069 break;
1070 case tok_word:
1071 if (indexing)
1072 rdadds(&indexstr, t.text);
1073 wd.type = style;
1074 wd.alt = NULL;
1075 wd.aux = 0;
1076 wd.fpos = t.pos;
1077 wd.breaks = t.aux;
1078 if (!indexing || index_visible) {
1079 wd.text = ustrdup(t.text);
1080 addword(wd, &whptr);
1081 }
1082 if (indexing) {
1083 wd.text = ustrdup(t.text);
1084 addword(wd, &idximplicit);
1085 }
1086 break;
1087 case tok_lbrace:
1088 error(err_unexbrace, &t.pos);
1089 /* Error recovery: push nop */
1090 sitem = snew(struct stack_item);
1091 sitem->type = stack_nop;
1092 sitem->fpos = t.pos;
1093 stk_push(parsestk, sitem);
1094 break;
1095 case tok_rbrace:
1096 sitem = stk_pop(parsestk);
1097 if (!sitem) {
1098 /*
1099 * This closing brace could have been an
1100 * indication that the cross-paragraph stack
1101 * wants popping. Accordingly, we treat it here
1102 * as an indication that the paragraph is over.
1103 */
1104 already = TRUE;
1105 goto finished_para;
1106 } else {
1107 if (sitem->type & stack_ualt) {
1108 whptr = sitem->whptr;
1109 idximplicit = sitem->idximplicit;
1110 }
1111 if (sitem->type & stack_style) {
1112 style = word_Normal;
1113 spcstyle = word_WhiteSpace;
1114 }
1115 if (sitem->type & stack_idx) {
1116 indexword->text = ustrdup(indexstr.text);
1117 if (index_downcase) {
1118 word *w;
1119
1120 ustrlow(indexword->text);
1121 ustrlow(indexstr.text);
1122
1123 for (w = idxwordlist; w; w = w->next)
1124 if (w->text)
1125 ustrlow(w->text);
1126 }
1127 indexing = FALSE;
1128 rdadd(&indexstr, L'\0');
1129 index_merge(idx, FALSE, indexstr.text,
1130 idxwordlist, &sitem->fpos);
1131 sfree(indexstr.text);
1132 }
1133 if (sitem->type & stack_hyper) {
1134 wd.text = NULL;
1135 wd.type = word_HyperEnd;
1136 wd.alt = NULL;
1137 wd.aux = 0;
1138 wd.fpos = t.pos;
1139 wd.breaks = FALSE;
1140 if (!indexing || index_visible)
1141 addword(wd, &whptr);
1142 if (indexing)
1143 addword(wd, &idximplicit);
1144 }
1145 if (sitem->type & stack_quote) {
1146 wd.text = NULL;
1147 wd.type = toquotestyle(style);
1148 wd.alt = NULL;
1149 wd.aux = quote_Close;
1150 wd.fpos = t.pos;
1151 wd.breaks = FALSE;
1152 if (!indexing || index_visible)
1153 addword(wd, &whptr);
1154 if (indexing) {
1155 rdadd(&indexstr, L'"');
1156 addword(wd, &idximplicit);
1157 }
1158 }
1159 }
1160 sfree(sitem);
1161 break;
1162 case tok_cmd:
1163 switch (t.cmd) {
1164 case c__comment:
1165 /*
1166 * In-paragraph comment: \#{ balanced braces }
1167 *
1168 * Anything goes here; even tok_eop. We should
1169 * eat whitespace after the close brace _if_
1170 * there was whitespace before the \#.
1171 */
1172 dtor(t), t = get_token(in);
1173 if (t.type != tok_lbrace) {
1174 error(err_explbr, &t.pos);
1175 } else {
1176 int braces = 1;
1177 while (braces > 0) {
1178 dtor(t), t = get_token(in);
1179 if (t.type == tok_lbrace)
1180 braces++;
1181 else if (t.type == tok_rbrace)
1182 braces--;
1183 else if (t.type == tok_eof) {
1184 error(err_commenteof, &t.pos);
1185 break;
1186 }
1187 }
1188 }
1189 if (seenwhite) {
1190 already = TRUE;
1191 dtor(t), t = get_token(in);
1192 if (t.type == tok_white) {
1193 iswhite = TRUE;
1194 already = FALSE;
1195 }
1196 }
1197 break;
1198 case c_q:
1199 dtor(t), t = get_token(in);
1200 if (t.type != tok_lbrace) {
1201 error(err_explbr, &t.pos);
1202 } else {
1203 /*
1204 * Enforce that \q may not be used anywhere
1205 * within \c. (It shouldn't be necessary
1206 * since the whole point of \c should be
1207 * that the user wants to exercise exact
1208 * control over the glyphs used, and
1209 * forbidding it has the useful effect of
1210 * relieving some backends of having to
1211 * make difficult decisions.)
1212 */
1213 int stype;
1214
1215 if (style != word_Code && style != word_WeakCode) {
1216 wd.text = NULL;
1217 wd.type = toquotestyle(style);
1218 wd.alt = NULL;
1219 wd.aux = quote_Open;
1220 wd.fpos = t.pos;
1221 wd.breaks = FALSE;
1222 if (!indexing || index_visible)
1223 addword(wd, &whptr);
1224 if (indexing) {
1225 rdadd(&indexstr, L'"');
1226 addword(wd, &idximplicit);
1227 }
1228 stype = stack_quote;
1229 } else {
1230 error(err_codequote, &t.pos);
1231 stype = stack_nop;
1232 }
1233 sitem = snew(struct stack_item);
1234 sitem->fpos = t.pos;
1235 sitem->type = stype;
1236 stk_push(parsestk, sitem);
1237 }
1238 break;
1239 case c_K:
1240 case c_k:
1241 case c_W:
1242 case c_date:
1243 /*
1244 * Keyword, hyperlink, or \date. We expect a
1245 * left brace, some text, and then a right
1246 * brace. No nesting; no arguments.
1247 */
1248 wd.fpos = t.pos;
1249 wd.breaks = FALSE;
1250 if (t.cmd == c_K)
1251 wd.type = word_UpperXref;
1252 else if (t.cmd == c_k)
1253 wd.type = word_LowerXref;
1254 else if (t.cmd == c_W)
1255 wd.type = word_HyperLink;
1256 else
1257 wd.type = word_Normal;
1258 dtor(t), t = get_token(in);
1259 if (t.type != tok_lbrace) {
1260 if (wd.type == word_Normal) {
1261 time_t thetime = time(NULL);
1262 struct tm *broken = localtime(&thetime);
1263 already = TRUE;
1264 wdtext = ustrftime(NULL, broken);
1265 wd.type = style;
1266 } else {
1267 error(err_explbr, &t.pos);
1268 wdtext = NULL;
1269 }
1270 } else {
1271 rdstring rs = { 0, 0, NULL };
1272 while (dtor(t), t = get_token(in),
1273 t.type == tok_word || t.type == tok_white) {
1274 if (t.type == tok_white)
1275 rdadd(&rs, ' ');
1276 else
1277 rdadds(&rs, t.text);
1278 }
1279 if (wd.type == word_Normal) {
1280 time_t thetime = time(NULL);
1281 struct tm *broken = localtime(&thetime);
1282 wdtext = ustrftime(rs.text, broken);
1283 wd.type = style;
1284 } else {
1285 wdtext = ustrdup(rs.text);
1286 }
1287 sfree(rs.text);
1288 if (t.type != tok_rbrace) {
1289 error(err_kwexprbr, &t.pos);
1290 }
1291 }
1292 wd.alt = NULL;
1293 wd.aux = 0;
1294 if (!indexing || index_visible) {
1295 wd.text = ustrdup(wdtext);
1296 addword(wd, &whptr);
1297 }
1298 if (indexing) {
1299 wd.text = ustrdup(wdtext);
1300 addword(wd, &idximplicit);
1301 }
1302 sfree(wdtext);
1303 if (wd.type == word_HyperLink) {
1304 /*
1305 * Hyperlinks are different: they then
1306 * expect another left brace, to begin
1307 * delimiting the text marked by the link.
1308 */
1309 dtor(t), t = get_token(in);
1310 sitem = snew(struct stack_item);
1311 sitem->fpos = wd.fpos;
1312 sitem->type = stack_hyper;
1313 /*
1314 * Special cases: \W{}\i, \W{}\ii
1315 */
1316 if (t.type == tok_cmd &&
1317 (t.cmd == c_i || t.cmd == c_ii)) {
1318 if (indexing) {
1319 error(err_nestedindex, &t.pos);
1320 } else {
1321 /* Add an index-reference word with no
1322 * text as yet */
1323 wd.type = word_IndexRef;
1324 wd.text = NULL;
1325 wd.alt = NULL;
1326 wd.aux = 0;
1327 wd.breaks = FALSE;
1328 indexword = addword(wd, &whptr);
1329 /* Set up a rdstring to read the
1330 * index text */
1331 indexstr = nullrs;
1332 /* Flags so that we do the Right
1333 * Things with text */
1334 index_visible = (type != c_I);
1335 index_downcase = (type == c_ii);
1336 indexing = TRUE;
1337 idxwordlist = NULL;
1338 idximplicit = &idxwordlist;
1339
1340 sitem->type |= stack_idx;
1341 }
1342 dtor(t), t = get_token(in);
1343 }
1344 /*
1345 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1346 */
1347 if (t.type == tok_cmd &&
1348 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1349 if (style != word_Normal)
1350 error(err_nestedstyles, &t.pos);
1351 else {
1352 style = (t.cmd == c_c ? word_Code :
1353 t.cmd == c_cw ? word_WeakCode :
1354 word_Emph);
1355 spcstyle = tospacestyle(style);
1356 sitem->type |= stack_style;
1357 }
1358 dtor(t), t = get_token(in);
1359 }
1360 if (t.type != tok_lbrace) {
1361 error(err_explbr, &t.pos);
1362 sfree(sitem);
1363 } else {
1364 stk_push(parsestk, sitem);
1365 }
1366 }
1367 break;
1368 case c_c:
1369 case c_cw:
1370 case c_e:
1371 type = t.cmd;
1372 if (style != word_Normal) {
1373 error(err_nestedstyles, &t.pos);
1374 /* Error recovery: eat lbrace, push nop. */
1375 dtor(t), t = get_token(in);
1376 sitem = snew(struct stack_item);
1377 sitem->fpos = t.pos;
1378 sitem->type = stack_nop;
1379 stk_push(parsestk, sitem);
1380 }
1381 dtor(t), t = get_token(in);
1382 if (t.type != tok_lbrace) {
1383 error(err_explbr, &t.pos);
1384 } else {
1385 style = (type == c_c ? word_Code :
1386 type == c_cw ? word_WeakCode :
1387 word_Emph);
1388 spcstyle = tospacestyle(style);
1389 sitem = snew(struct stack_item);
1390 sitem->fpos = t.pos;
1391 sitem->type = stack_style;
1392 stk_push(parsestk, sitem);
1393 }
1394 break;
1395 case c_i:
1396 case c_ii:
1397 case c_I:
1398 type = t.cmd;
1399 if (indexing) {
1400 error(err_nestedindex, &t.pos);
1401 /* Error recovery: eat lbrace, push nop. */
1402 dtor(t), t = get_token(in);
1403 sitem = snew(struct stack_item);
1404 sitem->fpos = t.pos;
1405 sitem->type = stack_nop;
1406 stk_push(parsestk, sitem);
1407 }
1408 sitem = snew(struct stack_item);
1409 sitem->fpos = t.pos;
1410 sitem->type = stack_idx;
1411 dtor(t), t = get_token(in);
1412 /*
1413 * Special cases: \i\c, \i\e, \i\cw
1414 */
1415 wd.fpos = t.pos;
1416 if (t.type == tok_cmd &&
1417 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1418 if (style != word_Normal)
1419 error(err_nestedstyles, &t.pos);
1420 else {
1421 style = (t.cmd == c_c ? word_Code :
1422 t.cmd == c_cw ? word_WeakCode :
1423 word_Emph);
1424 spcstyle = tospacestyle(style);
1425 sitem->type |= stack_style;
1426 }
1427 dtor(t), t = get_token(in);
1428 }
1429 if (t.type != tok_lbrace) {
1430 sfree(sitem);
1431 error(err_explbr, &t.pos);
1432 } else {
1433 /* Add an index-reference word with no text as yet */
1434 wd.type = word_IndexRef;
1435 wd.text = NULL;
1436 wd.alt = NULL;
1437 wd.aux = 0;
1438 wd.breaks = FALSE;
1439 indexword = addword(wd, &whptr);
1440 /* Set up a rdstring to read the index text */
1441 indexstr = nullrs;
1442 /* Flags so that we do the Right Things with text */
1443 index_visible = (type != c_I);
1444 index_downcase = (type == c_ii);
1445 indexing = TRUE;
1446 idxwordlist = NULL;
1447 idximplicit = &idxwordlist;
1448 /* Stack item to close the indexing on exit */
1449 stk_push(parsestk, sitem);
1450 }
1451 break;
1452 case c_u:
1453 uchr = t.aux;
1454 utext[0] = uchr; utext[1] = 0;
1455 wd.type = style;
1456 wd.breaks = FALSE;
1457 wd.alt = NULL;
1458 wd.aux = 0;
1459 wd.fpos = t.pos;
1460 if (!indexing || index_visible) {
1461 wd.text = ustrdup(utext);
1462 uword = addword(wd, &whptr);
1463 } else
1464 uword = NULL;
1465 if (indexing) {
1466 wd.text = ustrdup(utext);
1467 iword = addword(wd, &idximplicit);
1468 } else
1469 iword = NULL;
1470 dtor(t), t = get_token(in);
1471 if (t.type == tok_lbrace) {
1472 /*
1473 * \u with a left brace. Until the brace
1474 * closes, all further words go on a
1475 * sidetrack from the main thread of the
1476 * paragraph.
1477 */
1478 sitem = snew(struct stack_item);
1479 sitem->fpos = t.pos;
1480 sitem->type = stack_ualt;
1481 sitem->whptr = whptr;
1482 sitem->idximplicit = idximplicit;
1483 stk_push(parsestk, sitem);
1484 whptr = uword ? &uword->alt : NULL;
1485 idximplicit = iword ? &iword->alt : NULL;
1486 } else {
1487 if (indexing)
1488 rdadd(&indexstr, uchr);
1489 already = TRUE;
1490 }
1491 break;
1492 default:
1493 if (!macrolookup(macros, in, t.text, &t.pos))
1494 error(err_badmidcmd, t.text, &t.pos);
1495 break;
1496 }
1497 }
1498 if (!already)
1499 dtor(t), t = get_token(in);
1500 seenwhite = iswhite;
1501 }
1502 finished_para:
1503 /* Check the stack is empty */
1504 if (stk_top(parsestk)) {
1505 while ((sitem = stk_pop(parsestk)))
1506 sfree(sitem);
1507 error(err_missingrbrace, &t.pos);
1508 }
1509 stk_free(parsestk);
1510 prev_para_type = par.type;
1511 addpara(par, ret);
1512 if (t.type == tok_eof)
1513 already = TRUE;
1514 }
1515
1516 if (stk_top(crossparastk)) {
1517 void *p;
1518
1519 error(err_missingrbrace2, &t.pos);
1520 while ((p = stk_pop(crossparastk)))
1521 sfree(p);
1522 }
1523
1524 /*
1525 * We break to here rather than returning, because otherwise
1526 * this cleanup doesn't happen.
1527 */
1528 dtor(t);
1529 macrocleanup(macros);
1530
1531 stk_free(crossparastk);
1532 }
1533
1534 paragraph *read_input(input *in, indexdata *idx) {
1535 paragraph *head = NULL;
1536 paragraph **hptr = &head;
1537
1538 while (in->currindex < in->nfiles) {
1539 in->currfp = fopen(in->filenames[in->currindex], "r");
1540 if (in->currfp) {
1541 setpos(in, in->filenames[in->currindex]);
1542 in->charset = in->defcharset;
1543 in->csstate = charset_init_state;
1544 in->wcpos = in->nwc = 0;
1545 in->pushback_chars = NULL;
1546 read_file(&hptr, in, idx);
1547 }
1548 in->currindex++;
1549 }
1550
1551 return head;
1552 }