Switch the memory allocation macros from the Halibut ones
[sgt/halibut] / input.c
1 /*
2 * input.c: read the source form
3 */
4
5 #include <stdio.h>
6 #include <assert.h>
7 #include <time.h>
8 #include "halibut.h"
9
10 #define TAB_STOP 8 /* for column number tracking */
11
12 static void setpos(input *in, char *fname) {
13 in->pos.filename = fname;
14 in->pos.line = 1;
15 in->pos.col = (in->reportcols ? 1 : -1);
16 }
17
18 static void unget(input *in, int c, filepos *pos) {
19 if (in->npushback >= in->pushbacksize) {
20 in->pushbacksize = in->npushback + 16;
21 in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
22 }
23 in->pushback[in->npushback].chr = c;
24 in->pushback[in->npushback].pos = *pos; /* structure copy */
25 in->npushback++;
26 }
27
28 /* ---------------------------------------------------------------------- */
29 /*
30 * Macro subsystem
31 */
32 typedef struct macro_Tag macro;
33 struct macro_Tag {
34 wchar_t *name, *text;
35 };
36 struct macrostack_Tag {
37 macrostack *next;
38 wchar_t *text;
39 int ptr, npushback;
40 filepos pos;
41 };
42 static int macrocmp(void *av, void *bv) {
43 macro *a = (macro *)av, *b = (macro *)bv;
44 return ustrcmp(a->name, b->name);
45 }
46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
47 filepos fpos) {
48 macro *m = snew(macro);
49 m->name = name;
50 m->text = text;
51 if (add234(macros, m) != m) {
52 error(err_macroexists, &fpos, name);
53 sfree(name);
54 sfree(text);
55 }
56 }
57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
58 filepos *pos) {
59 macro m, *gotit;
60 m.name = name;
61 gotit = find234(macros, &m, NULL);
62 if (gotit) {
63 macrostack *expansion = snew(macrostack);
64 expansion->next = in->stack;
65 expansion->text = gotit->text;
66 expansion->pos = *pos; /* structure copy */
67 expansion->ptr = 0;
68 expansion->npushback = in->npushback;
69 in->stack = expansion;
70 return TRUE;
71 } else
72 return FALSE;
73 }
74 static void macrocleanup(tree234 *macros) {
75 int ti;
76 macro *m;
77 for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
78 sfree(m->name);
79 sfree(m->text);
80 sfree(m);
81 }
82 freetree234(macros);
83 }
84
85 static void input_configure(input *in, paragraph *cfg) {
86 assert(cfg->type == para_Config);
87
88 if (!ustricmp(cfg->keyword, L"input-charset")) {
89 char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
90 in->charset = charset_from_localenc(csname);
91 sfree(csname);
92 }
93 }
94
95 /*
96 * Can return EOF
97 */
98 static int get(input *in, filepos *pos, rdstringc *rsc) {
99 int pushbackpt = in->stack ? in->stack->npushback : 0;
100 if (in->npushback > pushbackpt) {
101 --in->npushback;
102 if (pos)
103 *pos = in->pushback[in->npushback].pos; /* structure copy */
104 return in->pushback[in->npushback].chr;
105 }
106 else if (in->stack) {
107 wchar_t c = in->stack->text[in->stack->ptr];
108 if (in->stack->text[++in->stack->ptr] == L'\0') {
109 macrostack *tmp = in->stack;
110 in->stack = tmp->next;
111 sfree(tmp);
112 }
113 return c;
114 }
115 else if (in->currfp) {
116
117 while (in->wcpos >= in->nwc) {
118
119 int c = getc(in->currfp);
120
121 if (c == EOF) {
122 fclose(in->currfp);
123 in->currfp = NULL;
124 return EOF;
125 }
126
127 if (rsc)
128 rdaddc(rsc, c);
129
130 /* Track line numbers, for error reporting */
131 if (pos)
132 *pos = in->pos;
133 if (in->reportcols) {
134 switch (c) {
135 case '\t':
136 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
137 break;
138 case '\n':
139 in->pos.col = 1;
140 in->pos.line++;
141 break;
142 default:
143 in->pos.col++;
144 break;
145 }
146 } else {
147 in->pos.col = -1;
148 if (c == '\n')
149 in->pos.line++;
150 }
151
152 /*
153 * Do input character set translation, so that we return
154 * Unicode.
155 */
156 {
157 char buf[1];
158 char const *p;
159 int inlen;
160
161 buf[0] = (char)c;
162 p = buf;
163 inlen = 1;
164
165 in->nwc = charset_to_unicode(&p, &inlen,
166 in->wc, lenof(in->wc),
167 in->charset, &in->csstate,
168 NULL, 0);
169 assert(p == buf+1 && inlen == 0);
170
171 in->wcpos = 0;
172 }
173 }
174
175 return in->wc[in->wcpos++];
176
177 } else
178 return EOF;
179 }
180
181 /*
182 * Lexical analysis of source files.
183 */
184 typedef struct token_Tag token;
185 struct token_Tag {
186 int type;
187 int cmd, aux;
188 wchar_t *text;
189 char *origtext;
190 filepos pos;
191 };
192 enum {
193 tok_eof, /* end of file */
194 tok_eop, /* end of paragraph */
195 tok_white, /* whitespace */
196 tok_word, /* a word or word fragment */
197 tok_cmd, /* \command */
198 tok_lbrace, /* { */
199 tok_rbrace /* } */
200 };
201
202 /* Halibut command keywords. */
203 enum {
204 c__invalid, /* invalid command */
205 c__comment, /* comment command (\#) */
206 c__escaped, /* escaped character */
207 c__nop, /* no-op */
208 c__nbsp, /* nonbreaking space */
209 c_A, /* appendix heading */
210 c_B, /* bibliography entry */
211 c_BR, /* bibliography rewrite */
212 c_C, /* chapter heading */
213 c_H, /* heading */
214 c_I, /* invisible index mark */
215 c_IM, /* index merge/rewrite */
216 c_K, /* capitalised cross-reference */
217 c_S, /* aux field is 0, 1, 2, ... */
218 c_U, /* unnumbered-chapter heading */
219 c_W, /* Web hyperlink */
220 c_b, /* bulletted list */
221 c_c, /* code */
222 c_cfg, /* configuration directive */
223 c_copyright, /* copyright statement */
224 c_cw, /* weak code */
225 c_date, /* document processing date */
226 c_dd, /* description list: description */
227 c_define, /* macro definition */
228 c_dt, /* description list: described thing */
229 c_e, /* emphasis */
230 c_i, /* visible index mark */
231 c_ii, /* uncapitalised visible index mark */
232 c_k, /* uncapitalised cross-reference */
233 c_lcont, /* continuation para(s) for list item */
234 c_n, /* numbered list */
235 c_nocite, /* bibliography trickery */
236 c_preamble, /* (obsolete) preamble text */
237 c_q, /* quote marks */
238 c_quote, /* block-quoted paragraphs */
239 c_rule, /* horizontal rule */
240 c_title, /* document title */
241 c_u, /* aux field is char code */
242 c_versionid /* document RCS id */
243 };
244
245 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
246 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
247 #define isnl(c) ( (c)==10 )
248 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
249 #define fromdec(c) ( (c)-'0' )
250 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
251 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
252 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
253
254 /*
255 * Keyword comparison function. Like strcmp, but between a wchar_t *
256 * and a char *.
257 */
258 static int kwcmp(wchar_t const *p, char const *q) {
259 int i;
260 do {
261 i = *p - *q;
262 } while (*p++ && *q++ && !i);
263 return i;
264 }
265
266 /*
267 * Match a keyword.
268 */
269 static void match_kw(token *tok) {
270 /*
271 * FIXME. The ids are explicit in here so as to allow long-name
272 * equivalents to the various very short keywords.
273 */
274 static const struct { char const *name; int id; } keywords[] = {
275 {"#", c__comment}, /* comment command (\#) */
276 {"-", c__escaped}, /* nonbreaking hyphen */
277 {".", c__nop}, /* no-op */
278 {"A", c_A}, /* appendix heading */
279 {"B", c_B}, /* bibliography entry */
280 {"BR", c_BR}, /* bibliography rewrite */
281 {"C", c_C}, /* chapter heading */
282 {"H", c_H}, /* heading */
283 {"I", c_I}, /* invisible index mark */
284 {"IM", c_IM}, /* index merge/rewrite */
285 {"K", c_K}, /* capitalised cross-reference */
286 {"U", c_U}, /* unnumbered-chapter heading */
287 {"W", c_W}, /* Web hyperlink */
288 {"\\", c__escaped}, /* escaped backslash (\\) */
289 {"_", c__nbsp}, /* nonbreaking space (\_) */
290 {"b", c_b}, /* bulletted list */
291 {"c", c_c}, /* code */
292 {"cfg", c_cfg}, /* configuration directive */
293 {"copyright", c_copyright}, /* copyright statement */
294 {"cw", c_cw}, /* weak code */
295 {"date", c_date}, /* document processing date */
296 {"dd", c_dd}, /* description list: description */
297 {"define", c_define}, /* macro definition */
298 {"dt", c_dt}, /* description list: described thing */
299 {"e", c_e}, /* emphasis */
300 {"i", c_i}, /* visible index mark */
301 {"ii", c_ii}, /* uncapitalised visible index mark */
302 {"k", c_k}, /* uncapitalised cross-reference */
303 {"lcont", c_lcont}, /* continuation para(s) for list item */
304 {"n", c_n}, /* numbered list */
305 {"nocite", c_nocite}, /* bibliography trickery */
306 {"preamble", c_preamble}, /* (obsolete) preamble text */
307 {"q", c_q}, /* quote marks */
308 {"quote", c_quote}, /* block-quoted paragraphs */
309 {"rule", c_rule}, /* horizontal rule */
310 {"title", c_title}, /* document title */
311 {"versionid", c_versionid}, /* document RCS id */
312 {"{", c__escaped}, /* escaped lbrace (\{) */
313 {"}", c__escaped}, /* escaped rbrace (\}) */
314 };
315 int i, j, k, c;
316
317 /*
318 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
319 * doesn't match correctly, we just fall through to the
320 * binary-search phase.
321 */
322 if (tok->text[0] == 'S') {
323 /* We expect numeric characters thereafter. */
324 wchar_t *p = tok->text+1;
325 int n;
326 if (!*p)
327 n = 1;
328 else {
329 n = 0;
330 while (*p && isdec(*p)) {
331 n = 10 * n + fromdec(*p);
332 p++;
333 }
334 }
335 if (!*p) {
336 tok->cmd = c_S;
337 tok->aux = n;
338 return;
339 }
340 } else if (tok->text[0] == 'u') {
341 /* We expect hex characters thereafter. */
342 wchar_t *p = tok->text+1;
343 int n = 0;
344 while (*p && ishex(*p)) {
345 n = 16 * n + fromhex(*p);
346 p++;
347 }
348 if (!*p) {
349 tok->cmd = c_u;
350 tok->aux = n;
351 return;
352 }
353 }
354
355 i = -1;
356 j = sizeof(keywords)/sizeof(*keywords);
357 while (j-i > 1) {
358 k = (i+j)/2;
359 c = kwcmp(tok->text, keywords[k].name);
360 if (c < 0)
361 j = k;
362 else if (c > 0)
363 i = k;
364 else /* c == 0 */ {
365 tok->cmd = keywords[k].id;
366 return;
367 }
368 }
369
370 tok->cmd = c__invalid;
371 }
372
373
374 /*
375 * Read a token from the input file, in the normal way (`normal' in
376 * the sense that code paragraphs work a different way).
377 */
378 token get_token(input *in) {
379 int c;
380 int nls;
381 int prevpos;
382 token ret;
383 rdstring rs = { 0, 0, NULL };
384 rdstringc rsc = { 0, 0, NULL };
385 filepos cpos;
386
387 ret.text = NULL; /* default */
388 ret.origtext = NULL; /* default */
389 if (in->pushback_chars) {
390 rdaddsc(&rsc, in->pushback_chars);
391 sfree(in->pushback_chars);
392 in->pushback_chars = NULL;
393 }
394 c = get(in, &cpos, &rsc);
395 ret.pos = cpos;
396 if (iswhite(c)) { /* tok_white or tok_eop */
397 nls = 0;
398 prevpos = 0;
399 do {
400 if (isnl(c))
401 nls++;
402 prevpos = rsc.pos;
403 } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
404 if (c == EOF) {
405 ret.type = tok_eof;
406 sfree(rsc.text);
407 return ret;
408 }
409 if (rsc.text) {
410 in->pushback_chars = dupstr(rsc.text + prevpos);
411 sfree(rsc.text);
412 }
413 unget(in, c, &cpos);
414 ret.type = (nls > 1 ? tok_eop : tok_white);
415 return ret;
416 } else if (c == EOF) { /* tok_eof */
417 ret.type = tok_eof;
418 sfree(rsc.text);
419 return ret;
420 } else if (c == '\\') { /* tok_cmd */
421 rsc.pos = prevpos = 0;
422 c = get(in, &cpos, &rsc);
423 if (c == '-' || c == '\\' || c == '_' ||
424 c == '#' || c == '{' || c == '}' || c == '.') {
425 /* single-char command */
426 rdadd(&rs, c);
427 } else if (c == 'u') {
428 int len = 0;
429 do {
430 rdadd(&rs, c);
431 len++;
432 prevpos = rsc.pos;
433 c = get(in, &cpos, &rsc);
434 } while (ishex(c) && len < 5);
435 unget(in, c, &cpos);
436 } else if (iscmd(c)) {
437 do {
438 rdadd(&rs, c);
439 prevpos = rsc.pos;
440 c = get(in, &cpos, &rsc);
441 } while (iscmd(c));
442 unget(in, c, &cpos);
443 }
444 /*
445 * Now match the command against the list of available
446 * ones.
447 */
448 ret.type = tok_cmd;
449 ret.text = ustrdup(rs.text);
450 if (rsc.text) {
451 in->pushback_chars = dupstr(rsc.text + prevpos);
452 rsc.text[prevpos] = '\0';
453 ret.origtext = dupstr(rsc.text);
454 } else {
455 ret.origtext = dupstr("");
456 }
457 match_kw(&ret);
458 sfree(rs.text);
459 sfree(rsc.text);
460 return ret;
461 } else if (c == '{') { /* tok_lbrace */
462 ret.type = tok_lbrace;
463 sfree(rsc.text);
464 return ret;
465 } else if (c == '}') { /* tok_rbrace */
466 ret.type = tok_rbrace;
467 sfree(rsc.text);
468 return ret;
469 } else { /* tok_word */
470 /*
471 * Read a word: the longest possible contiguous sequence of
472 * things other than whitespace, backslash, braces and
473 * hyphen. A hyphen terminates the word but is returned as
474 * part of it; everything else is pushed back for the next
475 * token. The `aux' field contains TRUE if the word ends in
476 * a hyphen.
477 */
478 ret.aux = FALSE; /* assumed for now */
479 prevpos = 0;
480 while (1) {
481 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
482 /* Put back the character that caused termination */
483 unget(in, c, &cpos);
484 break;
485 } else {
486 rdadd(&rs, c);
487 if (c == '-') {
488 prevpos = rsc.pos;
489 ret.aux = TRUE;
490 break; /* hyphen terminates word */
491 }
492 }
493 prevpos = rsc.pos;
494 c = get(in, &cpos, &rsc);
495 }
496 ret.type = tok_word;
497 ret.text = ustrdup(rs.text);
498 if (rsc.text) {
499 in->pushback_chars = dupstr(rsc.text + prevpos);
500 rsc.text[prevpos] = '\0';
501 ret.origtext = dupstr(rsc.text);
502 } else {
503 ret.origtext = dupstr("");
504 }
505 sfree(rs.text);
506 sfree(rsc.text);
507 return ret;
508 }
509 }
510
511 /*
512 * Determine whether the next input character is an open brace (for
513 * telling code paragraphs from paragraphs which merely start with
514 * code).
515 */
516 int isbrace(input *in) {
517 int c;
518 filepos cpos;
519
520 c = get(in, &cpos, NULL);
521 unget(in, c, &cpos);
522 return (c == '{');
523 }
524
525 /*
526 * Read the rest of a line that starts `\c'. Including nothing at
527 * all (tok_word with empty text).
528 */
529 token get_codepar_token(input *in) {
530 int c;
531 token ret;
532 rdstring rs = { 0, 0, NULL };
533 filepos cpos;
534
535 ret.type = tok_word;
536 ret.origtext = NULL;
537 c = get(in, &cpos, NULL); /* expect (and discard) one space */
538 ret.pos = cpos;
539 if (c == ' ') {
540 c = get(in, &cpos, NULL);
541 ret.pos = cpos;
542 }
543 while (!isnl(c) && c != EOF) {
544 int c2 = c;
545 c = get(in, &cpos, NULL);
546 /* Discard \r just before \n. */
547 if (c2 != 13 || !isnl(c))
548 rdadd(&rs, c2);
549 }
550 unget(in, c, &cpos);
551 ret.text = ustrdup(rs.text);
552 sfree(rs.text);
553 return ret;
554 }
555
556 /*
557 * Adds a new word to a linked list
558 */
559 static word *addword(word newword, word ***hptrptr) {
560 word *mnewword;
561 if (!hptrptr)
562 return NULL;
563 mnewword = snew(word);
564 *mnewword = newword; /* structure copy */
565 mnewword->next = NULL;
566 **hptrptr = mnewword;
567 *hptrptr = &mnewword->next;
568 return mnewword;
569 }
570
571 /*
572 * Adds a new paragraph to a linked list
573 */
574 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
575 paragraph *mnewpara = snew(paragraph);
576 *mnewpara = newpara; /* structure copy */
577 mnewpara->next = NULL;
578 **hptrptr = mnewpara;
579 *hptrptr = &mnewpara->next;
580 return mnewpara;
581 }
582
583 /*
584 * Destructor before token is reassigned; should catch most memory
585 * leaks
586 */
587 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
588
589 /*
590 * Reads a single file (ie until get() returns EOF)
591 */
592 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
593 token t;
594 paragraph par;
595 word wd, **whptr, **idximplicit;
596 tree234 *macros;
597 wchar_t utext[2], *wdtext;
598 int style, spcstyle;
599 int already;
600 int iswhite, seenwhite;
601 int type;
602 int prev_para_type;
603 struct stack_item {
604 enum {
605 stack_nop = 0, /* do nothing (for error recovery) */
606 stack_ualt = 1, /* \u alternative */
607 stack_style = 2, /* \e, \c, \cw */
608 stack_idx = 4, /* \I, \i, \ii */
609 stack_hyper = 8, /* \W */
610 stack_quote = 16, /* \q */
611 } type;
612 word **whptr; /* to restore from \u alternatives */
613 word **idximplicit; /* to restore from \u alternatives */
614 filepos fpos;
615 } *sitem;
616 stack parsestk;
617 struct crossparaitem {
618 int type; /* currently c_lcont, c_quote or -1 */
619 int seen_lcont, seen_quote;
620 };
621 stack crossparastk;
622 word *indexword, *uword, *iword;
623 word *idxwordlist;
624 rdstring indexstr;
625 int index_downcase, index_visible, indexing;
626 const rdstring nullrs = { 0, 0, NULL };
627 wchar_t uchr;
628
629 t.text = NULL;
630 t.origtext = NULL;
631 macros = newtree234(macrocmp);
632 already = FALSE;
633
634 crossparastk = stk_new();
635
636 /*
637 * Loop on each paragraph.
638 */
639 while (1) {
640 int start_cmd = c__invalid;
641 par.words = NULL;
642 par.keyword = NULL;
643 par.origkeyword = NULL;
644 whptr = &par.words;
645
646 /*
647 * Get a token.
648 */
649 do {
650 if (!already) {
651 dtor(t), t = get_token(in);
652 }
653 already = FALSE;
654 } while (t.type == tok_eop);
655 if (t.type == tok_eof)
656 break;
657
658 /*
659 * Parse code paragraphs separately.
660 */
661 if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
662 int wtype = word_WeakCode;
663
664 par.type = para_Code;
665 par.fpos = t.pos;
666 while (1) {
667 dtor(t), t = get_codepar_token(in);
668 wd.type = wtype;
669 wd.breaks = FALSE; /* shouldn't need this... */
670 wd.text = ustrdup(t.text);
671 wd.alt = NULL;
672 wd.fpos = t.pos;
673 addword(wd, &whptr);
674 dtor(t), t = get_token(in);
675 if (t.type == tok_white) {
676 /*
677 * The newline after a code-paragraph line
678 */
679 dtor(t), t = get_token(in);
680 }
681 if (t.type == tok_eop || t.type == tok_eof ||
682 t.type == tok_rbrace) { /* might be } terminating \lcont */
683 if (t.type == tok_rbrace)
684 already = TRUE;
685 break;
686 } else if (t.type == tok_cmd && t.cmd == c_c) {
687 wtype = word_WeakCode;
688 } else if (t.type == tok_cmd && t.cmd == c_e &&
689 wtype == word_WeakCode) {
690 wtype = word_Emph;
691 } else {
692 error(err_brokencodepara, &t.pos);
693 prev_para_type = par.type;
694 addpara(par, ret);
695 while (t.type != tok_eop) /* error recovery: */
696 dtor(t), t = get_token(in); /* eat rest of paragraph */
697 goto codeparabroken; /* ick, but such is life */
698 }
699 }
700 prev_para_type = par.type;
701 addpara(par, ret);
702 codeparabroken:
703 continue;
704 }
705
706 /*
707 * Spot the special commands that define a grouping of more
708 * than one paragraph, and also the closing braces that
709 * finish them.
710 */
711 if (t.type == tok_cmd &&
712 (t.cmd == c_lcont || t.cmd == c_quote)) {
713 struct crossparaitem *sitem, *stop;
714 int cmd = t.cmd;
715
716 /*
717 * Expect, and swallow, an open brace.
718 */
719 dtor(t), t = get_token(in);
720 if (t.type != tok_lbrace) {
721 error(err_explbr, &t.pos);
722 continue;
723 }
724
725 /*
726 * Also expect, and swallow, any whitespace after that
727 * (a newline before a code paragraph wouldn't be
728 * surprising).
729 */
730 do {
731 dtor(t), t = get_token(in);
732 } while (t.type == tok_white);
733 already = TRUE;
734
735 if (cmd == c_lcont) {
736 /*
737 * \lcont causes a continuation of a list item into
738 * multiple paragraphs (which may in turn contain
739 * nested lists, code paras etc). Hence, the previous
740 * paragraph must be of a list type.
741 */
742 sitem = snew(struct crossparaitem);
743 stop = (struct crossparaitem *)stk_top(crossparastk);
744 if (stop)
745 *sitem = *stop;
746 else
747 sitem->seen_quote = sitem->seen_lcont = 0;
748
749 if (prev_para_type == para_Bullet ||
750 prev_para_type == para_NumberedList ||
751 prev_para_type == para_Description) {
752 sitem->type = c_lcont;
753 sitem->seen_lcont = 1;
754 par.type = para_LcontPush;
755 prev_para_type = par.type;
756 addpara(par, ret);
757 } else {
758 /*
759 * Push a null item on the cross-para stack so that
760 * when we see the corresponding closing brace we
761 * don't give a cascade error.
762 */
763 sitem->type = -1;
764 error(err_misplacedlcont, &t.pos);
765 }
766 } else {
767 /*
768 * \quote causes a group of paragraphs to be
769 * block-quoted (typically they will be indented a
770 * bit).
771 */
772 sitem = snew(struct crossparaitem);
773 stop = (struct crossparaitem *)stk_top(crossparastk);
774 if (stop)
775 *sitem = *stop;
776 else
777 sitem->seen_quote = sitem->seen_lcont = 0;
778 sitem->type = c_quote;
779 sitem->seen_quote = 1;
780 par.type = para_QuotePush;
781 prev_para_type = par.type;
782 addpara(par, ret);
783 }
784 stk_push(crossparastk, sitem);
785 continue;
786 } else if (t.type == tok_rbrace) {
787 struct crossparaitem *sitem = stk_pop(crossparastk);
788 if (!sitem)
789 error(err_unexbrace, &t.pos);
790 else {
791 switch (sitem->type) {
792 case c_lcont:
793 par.type = para_LcontPop;
794 prev_para_type = par.type;
795 addpara(par, ret);
796 break;
797 case c_quote:
798 par.type = para_QuotePop;
799 prev_para_type = par.type;
800 addpara(par, ret);
801 break;
802 }
803 sfree(sitem);
804 }
805 continue;
806 }
807
808 /*
809 * This token begins a paragraph. See if it's one of the
810 * special commands that define a paragraph type.
811 *
812 * (note that \# is special in a way, and \nocite takes no
813 * text)
814 */
815 par.type = para_Normal;
816 if (t.type == tok_cmd) {
817 int needkw;
818 int is_macro = FALSE;
819
820 par.fpos = t.pos;
821 switch (t.cmd) {
822 default:
823 needkw = -1;
824 break;
825 case c__invalid:
826 error(err_badparatype, t.text, &t.pos);
827 needkw = 4;
828 break;
829 case c__comment:
830 if (isbrace(in))
831 break; /* `\#{': isn't a comment para */
832 do {
833 dtor(t), t = get_token(in);
834 } while (t.type != tok_eop && t.type != tok_eof);
835 continue; /* next paragraph */
836 /*
837 * `needkw' values:
838 *
839 * 1 -- exactly one keyword
840 * 2 -- at least one keyword
841 * 4 -- any number of keywords including zero
842 * 8 -- at least one keyword and then nothing else
843 * 16 -- nothing at all! no keywords, no body
844 * 32 -- no keywords at all
845 */
846 case c_A: needkw = 2; par.type = para_Appendix; break;
847 case c_B: needkw = 2; par.type = para_Biblio; break;
848 case c_BR: needkw = 1; par.type = para_BR;
849 start_cmd = c_BR; break;
850 case c_C: needkw = 2; par.type = para_Chapter; break;
851 case c_H: needkw = 2; par.type = para_Heading;
852 par.aux = 0;
853 break;
854 case c_IM: needkw = 2; par.type = para_IM;
855 start_cmd = c_IM; break;
856 case c_S: needkw = 2; par.type = para_Subsect;
857 par.aux = t.aux; break;
858 case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
859 /* For \b and \n the keyword is optional */
860 case c_b: needkw = 4; par.type = para_Bullet; break;
861 case c_dt: needkw = 4; par.type = para_DescribedThing; break;
862 case c_dd: needkw = 4; par.type = para_Description; break;
863 case c_n: needkw = 4; par.type = para_NumberedList; break;
864 case c_cfg: needkw = 8; par.type = para_Config;
865 start_cmd = c_cfg; break;
866 case c_copyright: needkw = 32; par.type = para_Copyright; break;
867 case c_define: is_macro = TRUE; needkw = 1; break;
868 /* For \nocite the keyword is _everything_ */
869 case c_nocite: needkw = 8; par.type = para_NoCite; break;
870 case c_preamble: needkw = 32; par.type = para_Normal; break;
871 case c_rule: needkw = 16; par.type = para_Rule; break;
872 case c_title: needkw = 32; par.type = para_Title; break;
873 case c_versionid: needkw = 32; par.type = para_VersionID; break;
874 }
875
876 if (par.type == para_Chapter ||
877 par.type == para_Heading ||
878 par.type == para_Subsect ||
879 par.type == para_Appendix ||
880 par.type == para_UnnumberedChapter) {
881 struct crossparaitem *sitem = stk_top(crossparastk);
882 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
883 error(err_sectmarkerinblock,
884 &t.pos,
885 (sitem->seen_lcont ? "lcont" : "quote"));
886 }
887 }
888
889 if (needkw > 0) {
890 rdstring rs = { 0, 0, NULL };
891 rdstringc rsc = { 0, 0, NULL };
892 int nkeys = 0;
893 filepos fp;
894
895 /* Get keywords. */
896 dtor(t), t = get_token(in);
897 fp = t.pos;
898 while (t.type == tok_lbrace) {
899 /* This is a keyword. */
900 nkeys++;
901 /* FIXME: there will be bugs if anyone specifies an
902 * empty keyword (\foo{}), so trap this case. */
903 while (dtor(t), t = get_token(in),
904 t.type == tok_word ||
905 t.type == tok_white ||
906 (t.type == tok_cmd && t.cmd == c__nbsp) ||
907 (t.type == tok_cmd && t.cmd == c__escaped) ||
908 (t.type == tok_cmd && t.cmd == c_u)) {
909 if (t.type == tok_white ||
910 (t.type == tok_cmd && t.cmd == c__nbsp)) {
911 rdadd(&rs, ' ');
912 rdaddc(&rsc, ' ');
913 } else if (t.type == tok_cmd && t.cmd == c_u) {
914 rdadd(&rs, t.aux);
915 rdaddc(&rsc, '\\');
916 rdaddsc(&rsc, t.origtext);
917 } else {
918 rdadds(&rs, t.text);
919 rdaddsc(&rsc, t.origtext);
920 }
921 }
922 if (t.type != tok_rbrace) {
923 error(err_kwunclosed, &t.pos);
924 continue;
925 }
926 rdadd(&rs, 0); /* add string terminator */
927 rdaddc(&rsc, 0); /* add string terminator */
928 dtor(t), t = get_token(in); /* eat right brace */
929 }
930
931 rdadd(&rs, 0); /* add string terminator */
932 rdaddc(&rsc, 0); /* add string terminator */
933
934 /* See whether we have the right number of keywords. */
935 if ((needkw & 48) && nkeys > 0)
936 error(err_kwillegal, &fp);
937 if ((needkw & 11) && nkeys == 0)
938 error(err_kwexpected, &fp);
939 if ((needkw & 5) && nkeys > 1)
940 error(err_kwtoomany, &fp);
941
942 if (is_macro) {
943 /*
944 * Macro definition. Get the rest of the line
945 * as a code-paragraph token, repeatedly until
946 * there's nothing more left of it. Separate
947 * with newlines.
948 */
949 rdstring macrotext = { 0, 0, NULL };
950 while (1) {
951 dtor(t), t = get_codepar_token(in);
952 if (macrotext.pos > 0)
953 rdadd(&macrotext, L'\n');
954 rdadds(&macrotext, t.text);
955 dtor(t), t = get_token(in);
956 if (t.type == tok_eop) break;
957 }
958 macrodef(macros, rs.text, macrotext.text, fp);
959 continue; /* next paragraph */
960 }
961
962 par.keyword = rdtrim(&rs);
963 par.origkeyword = rdtrimc(&rsc);
964
965 /* Move to EOP in case of needkw==8 or 16 (no body) */
966 if (needkw & 24) {
967 /* We allow whitespace even when we expect no para body */
968 while (t.type == tok_white)
969 dtor(t), t = get_token(in);
970 if (t.type != tok_eop && t.type != tok_eof &&
971 (start_cmd == c__invalid ||
972 t.type != tok_cmd || t.cmd != start_cmd)) {
973 error(err_bodyillegal, &t.pos);
974 /* Error recovery: eat the rest of the paragraph */
975 while (t.type != tok_eop && t.type != tok_eof &&
976 (start_cmd == c__invalid ||
977 t.type != tok_cmd || t.cmd != start_cmd))
978 dtor(t), t = get_token(in);
979 }
980 if (t.type == tok_cmd)
981 already = TRUE;/* inhibit get_token at top of loop */
982 prev_para_type = par.type;
983 addpara(par, ret);
984
985 if (par.type == para_Config) {
986 input_configure(in, &par);
987 }
988 continue; /* next paragraph */
989 }
990 }
991 }
992
993 /*
994 * Now read the actual paragraph, word by word, adding to
995 * the paragraph list.
996 *
997 * Mid-paragraph commands:
998 *
999 * \K \k
1000 * \c \cw
1001 * \e
1002 * \i \ii
1003 * \I
1004 * \u
1005 * \W
1006 * \date
1007 * \\ \{ \}
1008 */
1009 parsestk = stk_new();
1010 style = word_Normal;
1011 spcstyle = word_WhiteSpace;
1012 indexing = FALSE;
1013 seenwhite = TRUE;
1014 while (t.type != tok_eop && t.type != tok_eof) {
1015 iswhite = FALSE;
1016 already = FALSE;
1017
1018 /* Handle implicit paragraph breaks after \IM, \BR etc */
1019 if (start_cmd != c__invalid &&
1020 t.type == tok_cmd && t.cmd == start_cmd) {
1021 already = TRUE; /* inhibit get_token at top of loop */
1022 break;
1023 }
1024
1025 if (t.type == tok_cmd && t.cmd == c__nop) {
1026 dtor(t), t = get_token(in);
1027 continue; /* do nothing! */
1028 }
1029
1030 if (t.type == tok_cmd && t.cmd == c__escaped) {
1031 t.type = tok_word; /* nice and simple */
1032 t.aux = 0; /* even if `\-' - nonbreaking! */
1033 }
1034 if (t.type == tok_cmd && t.cmd == c__nbsp) {
1035 t.type = tok_word; /* nice and simple */
1036 sfree(t.text);
1037 t.text = ustrdup(L" "); /* text is ` ' not `_' */
1038 t.aux = 0; /* (nonbreaking) */
1039 }
1040 switch (t.type) {
1041 case tok_white:
1042 if (whptr == &par.words)
1043 break; /* strip whitespace at start of para */
1044 wd.text = NULL;
1045 wd.type = spcstyle;
1046 wd.alt = NULL;
1047 wd.aux = 0;
1048 wd.fpos = t.pos;
1049 wd.breaks = FALSE;
1050
1051 /*
1052 * Inhibit use of whitespace if it's (probably the
1053 * newline) before a repeat \IM / \BR type
1054 * directive.
1055 */
1056 if (start_cmd != c__invalid) {
1057 dtor(t), t = get_token(in);
1058 already = TRUE;
1059 if (t.type == tok_cmd && t.cmd == start_cmd)
1060 break;
1061 }
1062
1063 if (indexing)
1064 rdadd(&indexstr, ' ');
1065 if (!indexing || index_visible)
1066 addword(wd, &whptr);
1067 if (indexing)
1068 addword(wd, &idximplicit);
1069 iswhite = TRUE;
1070 break;
1071 case tok_word:
1072 if (indexing)
1073 rdadds(&indexstr, t.text);
1074 wd.type = style;
1075 wd.alt = NULL;
1076 wd.aux = 0;
1077 wd.fpos = t.pos;
1078 wd.breaks = t.aux;
1079 if (!indexing || index_visible) {
1080 wd.text = ustrdup(t.text);
1081 addword(wd, &whptr);
1082 }
1083 if (indexing) {
1084 wd.text = ustrdup(t.text);
1085 addword(wd, &idximplicit);
1086 }
1087 break;
1088 case tok_lbrace:
1089 error(err_unexbrace, &t.pos);
1090 /* Error recovery: push nop */
1091 sitem = snew(struct stack_item);
1092 sitem->type = stack_nop;
1093 sitem->fpos = t.pos;
1094 stk_push(parsestk, sitem);
1095 break;
1096 case tok_rbrace:
1097 sitem = stk_pop(parsestk);
1098 if (!sitem) {
1099 /*
1100 * This closing brace could have been an
1101 * indication that the cross-paragraph stack
1102 * wants popping. Accordingly, we treat it here
1103 * as an indication that the paragraph is over.
1104 */
1105 already = TRUE;
1106 goto finished_para;
1107 } else {
1108 if (sitem->type & stack_ualt) {
1109 whptr = sitem->whptr;
1110 idximplicit = sitem->idximplicit;
1111 }
1112 if (sitem->type & stack_style) {
1113 style = word_Normal;
1114 spcstyle = word_WhiteSpace;
1115 }
1116 if (sitem->type & stack_idx) {
1117 indexword->text = ustrdup(indexstr.text);
1118 if (index_downcase) {
1119 word *w;
1120
1121 ustrlow(indexword->text);
1122 ustrlow(indexstr.text);
1123
1124 for (w = idxwordlist; w; w = w->next)
1125 if (w->text)
1126 ustrlow(w->text);
1127 }
1128 indexing = FALSE;
1129 rdadd(&indexstr, L'\0');
1130 index_merge(idx, FALSE, indexstr.text,
1131 idxwordlist, &sitem->fpos);
1132 sfree(indexstr.text);
1133 }
1134 if (sitem->type & stack_hyper) {
1135 wd.text = NULL;
1136 wd.type = word_HyperEnd;
1137 wd.alt = NULL;
1138 wd.aux = 0;
1139 wd.fpos = t.pos;
1140 wd.breaks = FALSE;
1141 if (!indexing || index_visible)
1142 addword(wd, &whptr);
1143 if (indexing)
1144 addword(wd, &idximplicit);
1145 }
1146 if (sitem->type & stack_quote) {
1147 wd.text = NULL;
1148 wd.type = toquotestyle(style);
1149 wd.alt = NULL;
1150 wd.aux = quote_Close;
1151 wd.fpos = t.pos;
1152 wd.breaks = FALSE;
1153 if (!indexing || index_visible)
1154 addword(wd, &whptr);
1155 if (indexing) {
1156 rdadd(&indexstr, L'"');
1157 addword(wd, &idximplicit);
1158 }
1159 }
1160 }
1161 sfree(sitem);
1162 break;
1163 case tok_cmd:
1164 switch (t.cmd) {
1165 case c__comment:
1166 /*
1167 * In-paragraph comment: \#{ balanced braces }
1168 *
1169 * Anything goes here; even tok_eop. We should
1170 * eat whitespace after the close brace _if_
1171 * there was whitespace before the \#.
1172 */
1173 dtor(t), t = get_token(in);
1174 if (t.type != tok_lbrace) {
1175 error(err_explbr, &t.pos);
1176 } else {
1177 int braces = 1;
1178 while (braces > 0) {
1179 dtor(t), t = get_token(in);
1180 if (t.type == tok_lbrace)
1181 braces++;
1182 else if (t.type == tok_rbrace)
1183 braces--;
1184 else if (t.type == tok_eof) {
1185 error(err_commenteof, &t.pos);
1186 break;
1187 }
1188 }
1189 }
1190 if (seenwhite) {
1191 already = TRUE;
1192 dtor(t), t = get_token(in);
1193 if (t.type == tok_white) {
1194 iswhite = TRUE;
1195 already = FALSE;
1196 }
1197 }
1198 break;
1199 case c_q:
1200 dtor(t), t = get_token(in);
1201 if (t.type != tok_lbrace) {
1202 error(err_explbr, &t.pos);
1203 } else {
1204 wd.text = NULL;
1205 wd.type = toquotestyle(style);
1206 wd.alt = NULL;
1207 wd.aux = quote_Open;
1208 wd.fpos = t.pos;
1209 wd.breaks = FALSE;
1210 if (!indexing || index_visible)
1211 addword(wd, &whptr);
1212 if (indexing) {
1213 rdadd(&indexstr, L'"');
1214 addword(wd, &idximplicit);
1215 }
1216 sitem = snew(struct stack_item);
1217 sitem->fpos = t.pos;
1218 sitem->type = stack_quote;
1219 stk_push(parsestk, sitem);
1220 }
1221 break;
1222 case c_K:
1223 case c_k:
1224 case c_W:
1225 case c_date:
1226 /*
1227 * Keyword, hyperlink, or \date. We expect a
1228 * left brace, some text, and then a right
1229 * brace. No nesting; no arguments.
1230 */
1231 wd.fpos = t.pos;
1232 wd.breaks = FALSE;
1233 if (t.cmd == c_K)
1234 wd.type = word_UpperXref;
1235 else if (t.cmd == c_k)
1236 wd.type = word_LowerXref;
1237 else if (t.cmd == c_W)
1238 wd.type = word_HyperLink;
1239 else
1240 wd.type = word_Normal;
1241 dtor(t), t = get_token(in);
1242 if (t.type != tok_lbrace) {
1243 if (wd.type == word_Normal) {
1244 time_t thetime = time(NULL);
1245 struct tm *broken = localtime(&thetime);
1246 already = TRUE;
1247 wdtext = ustrftime(NULL, broken);
1248 wd.type = style;
1249 } else {
1250 error(err_explbr, &t.pos);
1251 wdtext = NULL;
1252 }
1253 } else {
1254 rdstring rs = { 0, 0, NULL };
1255 while (dtor(t), t = get_token(in),
1256 t.type == tok_word || t.type == tok_white) {
1257 if (t.type == tok_white)
1258 rdadd(&rs, ' ');
1259 else
1260 rdadds(&rs, t.text);
1261 }
1262 if (wd.type == word_Normal) {
1263 time_t thetime = time(NULL);
1264 struct tm *broken = localtime(&thetime);
1265 wdtext = ustrftime(rs.text, broken);
1266 wd.type = style;
1267 } else {
1268 wdtext = ustrdup(rs.text);
1269 }
1270 sfree(rs.text);
1271 if (t.type != tok_rbrace) {
1272 error(err_kwexprbr, &t.pos);
1273 }
1274 }
1275 wd.alt = NULL;
1276 wd.aux = 0;
1277 if (!indexing || index_visible) {
1278 wd.text = ustrdup(wdtext);
1279 addword(wd, &whptr);
1280 }
1281 if (indexing) {
1282 wd.text = ustrdup(wdtext);
1283 addword(wd, &idximplicit);
1284 }
1285 sfree(wdtext);
1286 if (wd.type == word_HyperLink) {
1287 /*
1288 * Hyperlinks are different: they then
1289 * expect another left brace, to begin
1290 * delimiting the text marked by the link.
1291 */
1292 dtor(t), t = get_token(in);
1293 sitem = snew(struct stack_item);
1294 sitem->fpos = wd.fpos;
1295 sitem->type = stack_hyper;
1296 /*
1297 * Special cases: \W{}\i, \W{}\ii
1298 */
1299 if (t.type == tok_cmd &&
1300 (t.cmd == c_i || t.cmd == c_ii)) {
1301 if (indexing) {
1302 error(err_nestedindex, &t.pos);
1303 } else {
1304 /* Add an index-reference word with no
1305 * text as yet */
1306 wd.type = word_IndexRef;
1307 wd.text = NULL;
1308 wd.alt = NULL;
1309 wd.aux = 0;
1310 wd.breaks = FALSE;
1311 indexword = addword(wd, &whptr);
1312 /* Set up a rdstring to read the
1313 * index text */
1314 indexstr = nullrs;
1315 /* Flags so that we do the Right
1316 * Things with text */
1317 index_visible = (type != c_I);
1318 index_downcase = (type == c_ii);
1319 indexing = TRUE;
1320 idxwordlist = NULL;
1321 idximplicit = &idxwordlist;
1322
1323 sitem->type |= stack_idx;
1324 }
1325 dtor(t), t = get_token(in);
1326 }
1327 /*
1328 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1329 */
1330 if (t.type == tok_cmd &&
1331 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1332 if (style != word_Normal)
1333 error(err_nestedstyles, &t.pos);
1334 else {
1335 style = (t.cmd == c_c ? word_Code :
1336 t.cmd == c_cw ? word_WeakCode :
1337 word_Emph);
1338 spcstyle = tospacestyle(style);
1339 sitem->type |= stack_style;
1340 }
1341 dtor(t), t = get_token(in);
1342 }
1343 if (t.type != tok_lbrace) {
1344 error(err_explbr, &t.pos);
1345 sfree(sitem);
1346 } else {
1347 stk_push(parsestk, sitem);
1348 }
1349 }
1350 break;
1351 case c_c:
1352 case c_cw:
1353 case c_e:
1354 type = t.cmd;
1355 if (style != word_Normal) {
1356 error(err_nestedstyles, &t.pos);
1357 /* Error recovery: eat lbrace, push nop. */
1358 dtor(t), t = get_token(in);
1359 sitem = snew(struct stack_item);
1360 sitem->fpos = t.pos;
1361 sitem->type = stack_nop;
1362 stk_push(parsestk, sitem);
1363 }
1364 dtor(t), t = get_token(in);
1365 if (t.type != tok_lbrace) {
1366 error(err_explbr, &t.pos);
1367 } else {
1368 style = (type == c_c ? word_Code :
1369 type == c_cw ? word_WeakCode :
1370 word_Emph);
1371 spcstyle = tospacestyle(style);
1372 sitem = snew(struct stack_item);
1373 sitem->fpos = t.pos;
1374 sitem->type = stack_style;
1375 stk_push(parsestk, sitem);
1376 }
1377 break;
1378 case c_i:
1379 case c_ii:
1380 case c_I:
1381 type = t.cmd;
1382 if (indexing) {
1383 error(err_nestedindex, &t.pos);
1384 /* Error recovery: eat lbrace, push nop. */
1385 dtor(t), t = get_token(in);
1386 sitem = snew(struct stack_item);
1387 sitem->fpos = t.pos;
1388 sitem->type = stack_nop;
1389 stk_push(parsestk, sitem);
1390 }
1391 sitem = snew(struct stack_item);
1392 sitem->fpos = t.pos;
1393 sitem->type = stack_idx;
1394 dtor(t), t = get_token(in);
1395 /*
1396 * Special cases: \i\c, \i\e, \i\cw
1397 */
1398 wd.fpos = t.pos;
1399 if (t.type == tok_cmd &&
1400 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1401 if (style != word_Normal)
1402 error(err_nestedstyles, &t.pos);
1403 else {
1404 style = (t.cmd == c_c ? word_Code :
1405 t.cmd == c_cw ? word_WeakCode :
1406 word_Emph);
1407 spcstyle = tospacestyle(style);
1408 sitem->type |= stack_style;
1409 }
1410 dtor(t), t = get_token(in);
1411 }
1412 if (t.type != tok_lbrace) {
1413 sfree(sitem);
1414 error(err_explbr, &t.pos);
1415 } else {
1416 /* Add an index-reference word with no text as yet */
1417 wd.type = word_IndexRef;
1418 wd.text = NULL;
1419 wd.alt = NULL;
1420 wd.aux = 0;
1421 wd.breaks = FALSE;
1422 indexword = addword(wd, &whptr);
1423 /* Set up a rdstring to read the index text */
1424 indexstr = nullrs;
1425 /* Flags so that we do the Right Things with text */
1426 index_visible = (type != c_I);
1427 index_downcase = (type == c_ii);
1428 indexing = TRUE;
1429 idxwordlist = NULL;
1430 idximplicit = &idxwordlist;
1431 /* Stack item to close the indexing on exit */
1432 stk_push(parsestk, sitem);
1433 }
1434 break;
1435 case c_u:
1436 uchr = t.aux;
1437 utext[0] = uchr; utext[1] = 0;
1438 wd.type = style;
1439 wd.breaks = FALSE;
1440 wd.alt = NULL;
1441 wd.aux = 0;
1442 wd.fpos = t.pos;
1443 if (!indexing || index_visible) {
1444 wd.text = ustrdup(utext);
1445 uword = addword(wd, &whptr);
1446 } else
1447 uword = NULL;
1448 if (indexing) {
1449 wd.text = ustrdup(utext);
1450 iword = addword(wd, &idximplicit);
1451 } else
1452 iword = NULL;
1453 dtor(t), t = get_token(in);
1454 if (t.type == tok_lbrace) {
1455 /*
1456 * \u with a left brace. Until the brace
1457 * closes, all further words go on a
1458 * sidetrack from the main thread of the
1459 * paragraph.
1460 */
1461 sitem = snew(struct stack_item);
1462 sitem->fpos = t.pos;
1463 sitem->type = stack_ualt;
1464 sitem->whptr = whptr;
1465 sitem->idximplicit = idximplicit;
1466 stk_push(parsestk, sitem);
1467 whptr = uword ? &uword->alt : NULL;
1468 idximplicit = iword ? &iword->alt : NULL;
1469 } else {
1470 if (indexing)
1471 rdadd(&indexstr, uchr);
1472 already = TRUE;
1473 }
1474 break;
1475 default:
1476 if (!macrolookup(macros, in, t.text, &t.pos))
1477 error(err_badmidcmd, t.text, &t.pos);
1478 break;
1479 }
1480 }
1481 if (!already)
1482 dtor(t), t = get_token(in);
1483 seenwhite = iswhite;
1484 }
1485 finished_para:
1486 /* Check the stack is empty */
1487 if (stk_top(parsestk)) {
1488 while ((sitem = stk_pop(parsestk)))
1489 sfree(sitem);
1490 error(err_missingrbrace, &t.pos);
1491 }
1492 stk_free(parsestk);
1493 prev_para_type = par.type;
1494 addpara(par, ret);
1495 if (t.type == tok_eof)
1496 already = TRUE;
1497 }
1498
1499 if (stk_top(crossparastk)) {
1500 void *p;
1501
1502 error(err_missingrbrace2, &t.pos);
1503 while ((p = stk_pop(crossparastk)))
1504 sfree(p);
1505 }
1506
1507 /*
1508 * We break to here rather than returning, because otherwise
1509 * this cleanup doesn't happen.
1510 */
1511 dtor(t);
1512 macrocleanup(macros);
1513
1514 stk_free(crossparastk);
1515 }
1516
1517 paragraph *read_input(input *in, indexdata *idx) {
1518 paragraph *head = NULL;
1519 paragraph **hptr = &head;
1520
1521 while (in->currindex < in->nfiles) {
1522 in->currfp = fopen(in->filenames[in->currindex], "r");
1523 if (in->currfp) {
1524 setpos(in, in->filenames[in->currindex]);
1525 in->charset = in->defcharset;
1526 in->csstate = charset_init_state;
1527 in->wcpos = in->nwc = 0;
1528 in->pushback_chars = NULL;
1529 read_file(&hptr, in, idx);
1530 }
1531 in->currindex++;
1532 }
1533
1534 return head;
1535 }