Infrastructure changes for character set support. ustrtoa,
[sgt/halibut] / input.c
1 /*
2 * input.c: read the source form
3 */
4
5 #include <stdio.h>
6 #include <assert.h>
7 #include <time.h>
8 #include "halibut.h"
9
10 #define TAB_STOP 8 /* for column number tracking */
11
12 static void setpos(input *in, char *fname) {
13 in->pos.filename = fname;
14 in->pos.line = 1;
15 in->pos.col = (in->reportcols ? 1 : -1);
16 }
17
18 static void unget(input *in, int c, filepos *pos) {
19 if (in->npushback >= in->pushbacksize) {
20 in->pushbacksize = in->npushback + 16;
21 in->pushback = resize(in->pushback, in->pushbacksize);
22 }
23 in->pushback[in->npushback].chr = c;
24 in->pushback[in->npushback].pos = *pos; /* structure copy */
25 in->npushback++;
26 }
27
28 /* ---------------------------------------------------------------------- */
29 /*
30 * Macro subsystem
31 */
32 typedef struct macro_Tag macro;
33 struct macro_Tag {
34 wchar_t *name, *text;
35 };
36 struct macrostack_Tag {
37 macrostack *next;
38 wchar_t *text;
39 int ptr, npushback;
40 filepos pos;
41 };
42 static int macrocmp(void *av, void *bv) {
43 macro *a = (macro *)av, *b = (macro *)bv;
44 return ustrcmp(a->name, b->name);
45 }
46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
47 filepos fpos) {
48 macro *m = mknew(macro);
49 m->name = name;
50 m->text = text;
51 if (add234(macros, m) != m) {
52 error(err_macroexists, &fpos, name);
53 sfree(name);
54 sfree(text);
55 }
56 }
57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
58 filepos *pos) {
59 macro m, *gotit;
60 m.name = name;
61 gotit = find234(macros, &m, NULL);
62 if (gotit) {
63 macrostack *expansion = mknew(macrostack);
64 expansion->next = in->stack;
65 expansion->text = gotit->text;
66 expansion->pos = *pos; /* structure copy */
67 expansion->ptr = 0;
68 expansion->npushback = in->npushback;
69 in->stack = expansion;
70 return TRUE;
71 } else
72 return FALSE;
73 }
74 static void macrocleanup(tree234 *macros) {
75 int ti;
76 macro *m;
77 for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
78 sfree(m->name);
79 sfree(m->text);
80 sfree(m);
81 }
82 freetree234(macros);
83 }
84
85 static void input_configure(input *in, paragraph *cfg) {
86 assert(cfg->type == para_Config);
87
88 if (!ustricmp(cfg->keyword, L"input-charset")) {
89 char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
90 in->charset = charset_from_localenc(csname);
91 sfree(csname);
92 }
93 }
94
95 /*
96 * Can return EOF
97 */
98 static int get(input *in, filepos *pos, rdstringc *rsc) {
99 int pushbackpt = in->stack ? in->stack->npushback : 0;
100 if (in->npushback > pushbackpt) {
101 --in->npushback;
102 if (pos)
103 *pos = in->pushback[in->npushback].pos; /* structure copy */
104 return in->pushback[in->npushback].chr;
105 }
106 else if (in->stack) {
107 wchar_t c = in->stack->text[in->stack->ptr];
108 if (in->stack->text[++in->stack->ptr] == L'\0') {
109 macrostack *tmp = in->stack;
110 in->stack = tmp->next;
111 sfree(tmp);
112 }
113 return c;
114 }
115 else if (in->currfp) {
116
117 while (in->wcpos >= in->nwc) {
118
119 int c = getc(in->currfp);
120
121 if (c == EOF) {
122 fclose(in->currfp);
123 in->currfp = NULL;
124 return EOF;
125 }
126
127 if (rsc)
128 rdaddc(rsc, c);
129
130 /* Track line numbers, for error reporting */
131 if (pos)
132 *pos = in->pos;
133 if (in->reportcols) {
134 switch (c) {
135 case '\t':
136 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
137 break;
138 case '\n':
139 in->pos.col = 1;
140 in->pos.line++;
141 break;
142 default:
143 in->pos.col++;
144 break;
145 }
146 } else {
147 in->pos.col = -1;
148 if (c == '\n')
149 in->pos.line++;
150 }
151
152 /*
153 * Do input character set translation, so that we return
154 * Unicode.
155 */
156 {
157 char buf[1];
158 char const *p;
159 int inlen;
160
161 buf[0] = (char)c;
162 p = buf;
163 inlen = 1;
164
165 in->nwc = charset_to_unicode(&p, &inlen,
166 in->wc, lenof(in->wc),
167 in->charset, &in->csstate,
168 NULL, 0);
169 assert(p == buf+1 && inlen == 0);
170
171 in->wcpos = 0;
172 }
173 }
174
175 return in->wc[in->wcpos++];
176
177 } else
178 return EOF;
179 }
180
181 /*
182 * Lexical analysis of source files.
183 */
184 typedef struct token_Tag token;
185 struct token_Tag {
186 int type;
187 int cmd, aux;
188 wchar_t *text;
189 char *origtext;
190 filepos pos;
191 };
192 enum {
193 tok_eof, /* end of file */
194 tok_eop, /* end of paragraph */
195 tok_white, /* whitespace */
196 tok_word, /* a word or word fragment */
197 tok_cmd, /* \command */
198 tok_lbrace, /* { */
199 tok_rbrace /* } */
200 };
201
202 /* Halibut command keywords. */
203 enum {
204 c__invalid, /* invalid command */
205 c__comment, /* comment command (\#) */
206 c__escaped, /* escaped character */
207 c__nop, /* no-op */
208 c__nbsp, /* nonbreaking space */
209 c_A, /* appendix heading */
210 c_B, /* bibliography entry */
211 c_BR, /* bibliography rewrite */
212 c_C, /* chapter heading */
213 c_H, /* heading */
214 c_I, /* invisible index mark */
215 c_IM, /* index merge/rewrite */
216 c_K, /* capitalised cross-reference */
217 c_S, /* aux field is 0, 1, 2, ... */
218 c_U, /* unnumbered-chapter heading */
219 c_W, /* Web hyperlink */
220 c_b, /* bulletted list */
221 c_c, /* code */
222 c_cfg, /* configuration directive */
223 c_copyright, /* copyright statement */
224 c_cw, /* weak code */
225 c_date, /* document processing date */
226 c_dd, /* description list: description */
227 c_define, /* macro definition */
228 c_dt, /* description list: described thing */
229 c_e, /* emphasis */
230 c_i, /* visible index mark */
231 c_ii, /* uncapitalised visible index mark */
232 c_k, /* uncapitalised cross-reference */
233 c_lcont, /* continuation para(s) for list item */
234 c_n, /* numbered list */
235 c_nocite, /* bibliography trickery */
236 c_preamble, /* (obsolete) preamble text */
237 c_q, /* quote marks */
238 c_quote, /* block-quoted paragraphs */
239 c_rule, /* horizontal rule */
240 c_title, /* document title */
241 c_u, /* aux field is char code */
242 c_versionid /* document RCS id */
243 };
244
245 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
246 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
247 #define isnl(c) ( (c)==10 )
248 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
249 #define fromdec(c) ( (c)-'0' )
250 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
251 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
252 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
253
254 /*
255 * Keyword comparison function. Like strcmp, but between a wchar_t *
256 * and a char *.
257 */
258 static int kwcmp(wchar_t const *p, char const *q) {
259 int i;
260 do {
261 i = *p - *q;
262 } while (*p++ && *q++ && !i);
263 return i;
264 }
265
266 /*
267 * Match a keyword.
268 */
269 static void match_kw(token *tok) {
270 /*
271 * FIXME. The ids are explicit in here so as to allow long-name
272 * equivalents to the various very short keywords.
273 */
274 static const struct { char const *name; int id; } keywords[] = {
275 {"#", c__comment}, /* comment command (\#) */
276 {"-", c__escaped}, /* nonbreaking hyphen */
277 {".", c__nop}, /* no-op */
278 {"A", c_A}, /* appendix heading */
279 {"B", c_B}, /* bibliography entry */
280 {"BR", c_BR}, /* bibliography rewrite */
281 {"C", c_C}, /* chapter heading */
282 {"H", c_H}, /* heading */
283 {"I", c_I}, /* invisible index mark */
284 {"IM", c_IM}, /* index merge/rewrite */
285 {"K", c_K}, /* capitalised cross-reference */
286 {"U", c_U}, /* unnumbered-chapter heading */
287 {"W", c_W}, /* Web hyperlink */
288 {"\\", c__escaped}, /* escaped backslash (\\) */
289 {"_", c__nbsp}, /* nonbreaking space (\_) */
290 {"b", c_b}, /* bulletted list */
291 {"c", c_c}, /* code */
292 {"cfg", c_cfg}, /* configuration directive */
293 {"copyright", c_copyright}, /* copyright statement */
294 {"cw", c_cw}, /* weak code */
295 {"date", c_date}, /* document processing date */
296 {"dd", c_dd}, /* description list: description */
297 {"define", c_define}, /* macro definition */
298 {"dt", c_dt}, /* description list: described thing */
299 {"e", c_e}, /* emphasis */
300 {"i", c_i}, /* visible index mark */
301 {"ii", c_ii}, /* uncapitalised visible index mark */
302 {"k", c_k}, /* uncapitalised cross-reference */
303 {"lcont", c_lcont}, /* continuation para(s) for list item */
304 {"n", c_n}, /* numbered list */
305 {"nocite", c_nocite}, /* bibliography trickery */
306 {"preamble", c_preamble}, /* (obsolete) preamble text */
307 {"q", c_q}, /* quote marks */
308 {"quote", c_quote}, /* block-quoted paragraphs */
309 {"rule", c_rule}, /* horizontal rule */
310 {"title", c_title}, /* document title */
311 {"versionid", c_versionid}, /* document RCS id */
312 {"{", c__escaped}, /* escaped lbrace (\{) */
313 {"}", c__escaped}, /* escaped rbrace (\}) */
314 };
315 int i, j, k, c;
316
317 /*
318 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
319 * doesn't match correctly, we just fall through to the
320 * binary-search phase.
321 */
322 if (tok->text[0] == 'S') {
323 /* We expect numeric characters thereafter. */
324 wchar_t *p = tok->text+1;
325 int n;
326 if (!*p)
327 n = 1;
328 else {
329 n = 0;
330 while (*p && isdec(*p)) {
331 n = 10 * n + fromdec(*p);
332 p++;
333 }
334 }
335 if (!*p) {
336 tok->cmd = c_S;
337 tok->aux = n;
338 return;
339 }
340 } else if (tok->text[0] == 'u') {
341 /* We expect hex characters thereafter. */
342 wchar_t *p = tok->text+1;
343 int n = 0;
344 while (*p && ishex(*p)) {
345 n = 16 * n + fromhex(*p);
346 p++;
347 }
348 if (!*p) {
349 tok->cmd = c_u;
350 tok->aux = n;
351 return;
352 }
353 }
354
355 i = -1;
356 j = sizeof(keywords)/sizeof(*keywords);
357 while (j-i > 1) {
358 k = (i+j)/2;
359 c = kwcmp(tok->text, keywords[k].name);
360 if (c < 0)
361 j = k;
362 else if (c > 0)
363 i = k;
364 else /* c == 0 */ {
365 tok->cmd = keywords[k].id;
366 return;
367 }
368 }
369
370 tok->cmd = c__invalid;
371 }
372
373
374 /*
375 * Read a token from the input file, in the normal way (`normal' in
376 * the sense that code paragraphs work a different way).
377 */
378 token get_token(input *in) {
379 int c;
380 int nls;
381 int prevpos;
382 token ret;
383 rdstring rs = { 0, 0, NULL };
384 rdstringc rsc = { 0, 0, NULL };
385 filepos cpos;
386
387 ret.text = NULL; /* default */
388 ret.origtext = NULL; /* default */
389 if (in->pushback_chars) {
390 rdaddsc(&rsc, in->pushback_chars);
391 sfree(in->pushback_chars);
392 in->pushback_chars = NULL;
393 }
394 c = get(in, &cpos, &rsc);
395 ret.pos = cpos;
396 if (iswhite(c)) { /* tok_white or tok_eop */
397 nls = 0;
398 prevpos = 0;
399 do {
400 if (isnl(c))
401 nls++;
402 prevpos = rsc.pos;
403 } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
404 if (c == EOF) {
405 ret.type = tok_eof;
406 sfree(rsc.text);
407 return ret;
408 }
409 if (rsc.text) {
410 in->pushback_chars = dupstr(rsc.text + prevpos);
411 sfree(rsc.text);
412 }
413 unget(in, c, &cpos);
414 ret.type = (nls > 1 ? tok_eop : tok_white);
415 return ret;
416 } else if (c == EOF) { /* tok_eof */
417 ret.type = tok_eof;
418 sfree(rsc.text);
419 return ret;
420 } else if (c == '\\') { /* tok_cmd */
421 rsc.pos = prevpos = 0;
422 c = get(in, &cpos, &rsc);
423 if (c == '-' || c == '\\' || c == '_' ||
424 c == '#' || c == '{' || c == '}' || c == '.') {
425 /* single-char command */
426 rdadd(&rs, c);
427 } else if (c == 'u') {
428 int len = 0;
429 do {
430 rdadd(&rs, c);
431 len++;
432 prevpos = rsc.pos;
433 c = get(in, &cpos, &rsc);
434 } while (ishex(c) && len < 5);
435 unget(in, c, &cpos);
436 } else if (iscmd(c)) {
437 do {
438 rdadd(&rs, c);
439 prevpos = rsc.pos;
440 c = get(in, &cpos, &rsc);
441 } while (iscmd(c));
442 unget(in, c, &cpos);
443 }
444 /*
445 * Now match the command against the list of available
446 * ones.
447 */
448 ret.type = tok_cmd;
449 ret.text = ustrdup(rs.text);
450 if (rsc.text) {
451 in->pushback_chars = dupstr(rsc.text + prevpos);
452 rsc.text[prevpos] = '\0';
453 ret.origtext = dupstr(rsc.text);
454 } else {
455 ret.origtext = dupstr("");
456 }
457 match_kw(&ret);
458 sfree(rs.text);
459 sfree(rsc.text);
460 return ret;
461 } else if (c == '{') { /* tok_lbrace */
462 ret.type = tok_lbrace;
463 sfree(rsc.text);
464 return ret;
465 } else if (c == '}') { /* tok_rbrace */
466 ret.type = tok_rbrace;
467 sfree(rsc.text);
468 return ret;
469 } else { /* tok_word */
470 /*
471 * Read a word: the longest possible contiguous sequence of
472 * things other than whitespace, backslash, braces and
473 * hyphen. A hyphen terminates the word but is returned as
474 * part of it; everything else is pushed back for the next
475 * token. The `aux' field contains TRUE if the word ends in
476 * a hyphen.
477 */
478 ret.aux = FALSE; /* assumed for now */
479 prevpos = 0;
480 while (1) {
481 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
482 /* Put back the character that caused termination */
483 unget(in, c, &cpos);
484 break;
485 } else {
486 rdadd(&rs, c);
487 if (c == '-') {
488 prevpos = rsc.pos;
489 ret.aux = TRUE;
490 break; /* hyphen terminates word */
491 }
492 }
493 prevpos = rsc.pos;
494 c = get(in, &cpos, &rsc);
495 }
496 ret.type = tok_word;
497 ret.text = ustrdup(rs.text);
498 if (rsc.text) {
499 in->pushback_chars = dupstr(rsc.text + prevpos);
500 rsc.text[prevpos] = '\0';
501 ret.origtext = dupstr(rsc.text);
502 } else {
503 ret.origtext = dupstr("");
504 }
505 sfree(rs.text);
506 sfree(rsc.text);
507 return ret;
508 }
509 }
510
511 /*
512 * Determine whether the next input character is an open brace (for
513 * telling code paragraphs from paragraphs which merely start with
514 * code).
515 */
516 int isbrace(input *in) {
517 int c;
518 filepos cpos;
519
520 c = get(in, &cpos, NULL);
521 unget(in, c, &cpos);
522 return (c == '{');
523 }
524
525 /*
526 * Read the rest of a line that starts `\c'. Including nothing at
527 * all (tok_word with empty text).
528 */
529 token get_codepar_token(input *in) {
530 int c;
531 token ret;
532 rdstring rs = { 0, 0, NULL };
533 filepos cpos;
534
535 ret.type = tok_word;
536 ret.origtext = NULL;
537 c = get(in, &cpos, NULL); /* expect (and discard) one space */
538 ret.pos = cpos;
539 if (c == ' ') {
540 c = get(in, &cpos, NULL);
541 ret.pos = cpos;
542 }
543 while (!isnl(c) && c != EOF) {
544 int c2 = c;
545 c = get(in, &cpos, NULL);
546 /* Discard \r just before \n. */
547 if (c2 != 13 || !isnl(c))
548 rdadd(&rs, c2);
549 }
550 unget(in, c, &cpos);
551 ret.text = ustrdup(rs.text);
552 sfree(rs.text);
553 return ret;
554 }
555
556 /*
557 * Adds a new word to a linked list
558 */
559 static word *addword(word newword, word ***hptrptr) {
560 word *mnewword;
561 if (!hptrptr)
562 return NULL;
563 mnewword = mknew(word);
564 *mnewword = newword; /* structure copy */
565 mnewword->next = NULL;
566 **hptrptr = mnewword;
567 *hptrptr = &mnewword->next;
568 return mnewword;
569 }
570
571 /*
572 * Adds a new paragraph to a linked list
573 */
574 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
575 paragraph *mnewpara = mknew(paragraph);
576 *mnewpara = newpara; /* structure copy */
577 mnewpara->next = NULL;
578 **hptrptr = mnewpara;
579 *hptrptr = &mnewpara->next;
580 return mnewpara;
581 }
582
583 /*
584 * Destructor before token is reassigned; should catch most memory
585 * leaks
586 */
587 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
588
589 /*
590 * Reads a single file (ie until get() returns EOF)
591 */
592 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
593 token t;
594 paragraph par;
595 word wd, **whptr, **idximplicit;
596 tree234 *macros;
597 wchar_t utext[2], *wdtext;
598 int style, spcstyle;
599 int already;
600 int iswhite, seenwhite;
601 int type;
602 int prev_para_type;
603 struct stack_item {
604 enum {
605 stack_nop = 0, /* do nothing (for error recovery) */
606 stack_ualt = 1, /* \u alternative */
607 stack_style = 2, /* \e, \c, \cw */
608 stack_idx = 4, /* \I, \i, \ii */
609 stack_hyper = 8, /* \W */
610 stack_quote = 16, /* \q */
611 } type;
612 word **whptr; /* to restore from \u alternatives */
613 word **idximplicit; /* to restore from \u alternatives */
614 filepos fpos;
615 } *sitem;
616 stack parsestk;
617 struct crossparaitem {
618 int type; /* currently c_lcont, c_quote or -1 */
619 int seen_lcont, seen_quote;
620 };
621 stack crossparastk;
622 word *indexword, *uword, *iword;
623 word *idxwordlist;
624 rdstring indexstr;
625 int index_downcase, index_visible, indexing;
626 const rdstring nullrs = { 0, 0, NULL };
627 wchar_t uchr;
628
629 t.text = NULL;
630 t.origtext = NULL;
631 macros = newtree234(macrocmp);
632 already = FALSE;
633
634 crossparastk = stk_new();
635
636 /*
637 * Loop on each paragraph.
638 */
639 while (1) {
640 int start_cmd = c__invalid;
641 par.words = NULL;
642 par.keyword = NULL;
643 par.origkeyword = NULL;
644 whptr = &par.words;
645
646 /*
647 * Get a token.
648 */
649 do {
650 if (!already) {
651 dtor(t), t = get_token(in);
652 }
653 already = FALSE;
654 } while (t.type == tok_eop);
655 if (t.type == tok_eof)
656 break;
657
658 /*
659 * Parse code paragraphs separately.
660 */
661 if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
662 int wtype = word_WeakCode;
663
664 par.type = para_Code;
665 par.fpos = t.pos;
666 while (1) {
667 dtor(t), t = get_codepar_token(in);
668 wd.type = wtype;
669 wd.breaks = FALSE; /* shouldn't need this... */
670 wd.text = ustrdup(t.text);
671 wd.alt = NULL;
672 wd.fpos = t.pos;
673 addword(wd, &whptr);
674 dtor(t), t = get_token(in);
675 if (t.type == tok_white) {
676 /*
677 * The newline after a code-paragraph line
678 */
679 dtor(t), t = get_token(in);
680 }
681 if (t.type == tok_eop || t.type == tok_eof ||
682 t.type == tok_rbrace) { /* might be } terminating \lcont */
683 if (t.type == tok_rbrace)
684 already = TRUE;
685 break;
686 } else if (t.type == tok_cmd && t.cmd == c_c) {
687 wtype = word_WeakCode;
688 } else if (t.type == tok_cmd && t.cmd == c_e &&
689 wtype == word_WeakCode) {
690 wtype = word_Emph;
691 } else {
692 error(err_brokencodepara, &t.pos);
693 prev_para_type = par.type;
694 addpara(par, ret);
695 while (t.type != tok_eop) /* error recovery: */
696 dtor(t), t = get_token(in); /* eat rest of paragraph */
697 goto codeparabroken; /* ick, but such is life */
698 }
699 }
700 prev_para_type = par.type;
701 addpara(par, ret);
702 codeparabroken:
703 continue;
704 }
705
706 /*
707 * Spot the special commands that define a grouping of more
708 * than one paragraph, and also the closing braces that
709 * finish them.
710 */
711 if (t.type == tok_cmd &&
712 (t.cmd == c_lcont || t.cmd == c_quote)) {
713 struct crossparaitem *sitem, *stop;
714 int cmd = t.cmd;
715
716 /*
717 * Expect, and swallow, an open brace.
718 */
719 dtor(t), t = get_token(in);
720 if (t.type != tok_lbrace) {
721 error(err_explbr, &t.pos);
722 continue;
723 }
724
725 /*
726 * Also expect, and swallow, any whitespace after that
727 * (a newline before a code paragraph wouldn't be
728 * surprising).
729 */
730 do {
731 dtor(t), t = get_token(in);
732 } while (t.type == tok_white);
733 already = TRUE;
734
735 if (cmd == c_lcont) {
736 /*
737 * \lcont causes a continuation of a list item into
738 * multiple paragraphs (which may in turn contain
739 * nested lists, code paras etc). Hence, the previous
740 * paragraph must be of a list type.
741 */
742 sitem = mknew(struct crossparaitem);
743 stop = (struct crossparaitem *)stk_top(crossparastk);
744 if (stop)
745 *sitem = *stop;
746 else
747 sitem->seen_quote = sitem->seen_lcont = 0;
748
749 if (prev_para_type == para_Bullet ||
750 prev_para_type == para_NumberedList ||
751 prev_para_type == para_Description) {
752 sitem->type = c_lcont;
753 sitem->seen_lcont = 1;
754 par.type = para_LcontPush;
755 prev_para_type = par.type;
756 addpara(par, ret);
757 } else {
758 /*
759 * Push a null item on the cross-para stack so that
760 * when we see the corresponding closing brace we
761 * don't give a cascade error.
762 */
763 sitem->type = -1;
764 error(err_misplacedlcont, &t.pos);
765 }
766 } else {
767 /*
768 * \quote causes a group of paragraphs to be
769 * block-quoted (typically they will be indented a
770 * bit).
771 */
772 sitem = mknew(struct crossparaitem);
773 stop = (struct crossparaitem *)stk_top(crossparastk);
774 if (stop)
775 *sitem = *stop;
776 else
777 sitem->seen_quote = sitem->seen_lcont = 0;
778 sitem->type = c_quote;
779 sitem->seen_quote = 1;
780 par.type = para_QuotePush;
781 prev_para_type = par.type;
782 addpara(par, ret);
783 }
784 stk_push(crossparastk, sitem);
785 continue;
786 } else if (t.type == tok_rbrace) {
787 struct crossparaitem *sitem = stk_pop(crossparastk);
788 if (!sitem)
789 error(err_unexbrace, &t.pos);
790 else {
791 switch (sitem->type) {
792 case c_lcont:
793 par.type = para_LcontPop;
794 prev_para_type = par.type;
795 addpara(par, ret);
796 break;
797 case c_quote:
798 par.type = para_QuotePop;
799 prev_para_type = par.type;
800 addpara(par, ret);
801 break;
802 }
803 sfree(sitem);
804 }
805 continue;
806 }
807
808 /*
809 * This token begins a paragraph. See if it's one of the
810 * special commands that define a paragraph type.
811 *
812 * (note that \# is special in a way, and \nocite takes no
813 * text)
814 */
815 par.type = para_Normal;
816 if (t.type == tok_cmd) {
817 int needkw;
818 int is_macro = FALSE;
819
820 par.fpos = t.pos;
821 switch (t.cmd) {
822 default:
823 needkw = -1;
824 break;
825 case c__invalid:
826 error(err_badparatype, t.text, &t.pos);
827 needkw = 4;
828 break;
829 case c__comment:
830 if (isbrace(in))
831 break; /* `\#{': isn't a comment para */
832 do {
833 dtor(t), t = get_token(in);
834 } while (t.type != tok_eop && t.type != tok_eof);
835 continue; /* next paragraph */
836 /*
837 * `needkw' values:
838 *
839 * 1 -- exactly one keyword
840 * 2 -- at least one keyword
841 * 4 -- any number of keywords including zero
842 * 8 -- at least one keyword and then nothing else
843 * 16 -- nothing at all! no keywords, no body
844 * 32 -- no keywords at all
845 */
846 case c_A: needkw = 2; par.type = para_Appendix; break;
847 case c_B: needkw = 2; par.type = para_Biblio; break;
848 case c_BR: needkw = 1; par.type = para_BR;
849 start_cmd = c_BR; break;
850 case c_C: needkw = 2; par.type = para_Chapter; break;
851 case c_H: needkw = 2; par.type = para_Heading;
852 par.aux = 0;
853 break;
854 case c_IM: needkw = 2; par.type = para_IM;
855 start_cmd = c_IM; break;
856 case c_S: needkw = 2; par.type = para_Subsect;
857 par.aux = t.aux; break;
858 case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
859 /* For \b and \n the keyword is optional */
860 case c_b: needkw = 4; par.type = para_Bullet; break;
861 case c_dt: needkw = 4; par.type = para_DescribedThing; break;
862 case c_dd: needkw = 4; par.type = para_Description; break;
863 case c_n: needkw = 4; par.type = para_NumberedList; break;
864 case c_cfg: needkw = 8; par.type = para_Config;
865 start_cmd = c_cfg; break;
866 case c_copyright: needkw = 32; par.type = para_Copyright; break;
867 case c_define: is_macro = TRUE; needkw = 1; break;
868 /* For \nocite the keyword is _everything_ */
869 case c_nocite: needkw = 8; par.type = para_NoCite; break;
870 case c_preamble: needkw = 32; par.type = para_Normal; break;
871 case c_rule: needkw = 16; par.type = para_Rule; break;
872 case c_title: needkw = 32; par.type = para_Title; break;
873 case c_versionid: needkw = 32; par.type = para_VersionID; break;
874 }
875
876 if (par.type == para_Chapter ||
877 par.type == para_Heading ||
878 par.type == para_Subsect ||
879 par.type == para_Appendix ||
880 par.type == para_UnnumberedChapter) {
881 struct crossparaitem *sitem = stk_top(crossparastk);
882 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
883 error(err_sectmarkerinblock,
884 &t.pos,
885 (sitem->seen_lcont ? "lcont" : "quote"));
886 }
887 }
888
889 if (needkw > 0) {
890 rdstring rs = { 0, 0, NULL };
891 rdstringc rsc = { 0, 0, NULL };
892 int nkeys = 0;
893 filepos fp;
894
895 /* Get keywords. */
896 dtor(t), t = get_token(in);
897 fp = t.pos;
898 while (t.type == tok_lbrace) {
899 /* This is a keyword. */
900 nkeys++;
901 /* FIXME: there will be bugs if anyone specifies an
902 * empty keyword (\foo{}), so trap this case. */
903 while (dtor(t), t = get_token(in),
904 t.type == tok_word ||
905 t.type == tok_white ||
906 (t.type == tok_cmd && t.cmd == c__nbsp) ||
907 (t.type == tok_cmd && t.cmd == c__escaped)) {
908 if (t.type == tok_white ||
909 (t.type == tok_cmd && t.cmd == c__nbsp)) {
910 rdadd(&rs, ' ');
911 rdaddc(&rsc, ' ');
912 } else {
913 rdadds(&rs, t.text);
914 rdaddsc(&rsc, t.origtext);
915 }
916 }
917 if (t.type != tok_rbrace) {
918 error(err_kwunclosed, &t.pos);
919 continue;
920 }
921 rdadd(&rs, 0); /* add string terminator */
922 rdaddc(&rsc, 0); /* add string terminator */
923 dtor(t), t = get_token(in); /* eat right brace */
924 }
925
926 rdadd(&rs, 0); /* add string terminator */
927 rdaddc(&rsc, 0); /* add string terminator */
928
929 /* See whether we have the right number of keywords. */
930 if ((needkw & 48) && nkeys > 0)
931 error(err_kwillegal, &fp);
932 if ((needkw & 11) && nkeys == 0)
933 error(err_kwexpected, &fp);
934 if ((needkw & 5) && nkeys > 1)
935 error(err_kwtoomany, &fp);
936
937 if (is_macro) {
938 /*
939 * Macro definition. Get the rest of the line
940 * as a code-paragraph token, repeatedly until
941 * there's nothing more left of it. Separate
942 * with newlines.
943 */
944 rdstring macrotext = { 0, 0, NULL };
945 while (1) {
946 dtor(t), t = get_codepar_token(in);
947 if (macrotext.pos > 0)
948 rdadd(&macrotext, L'\n');
949 rdadds(&macrotext, t.text);
950 dtor(t), t = get_token(in);
951 if (t.type == tok_eop) break;
952 }
953 macrodef(macros, rs.text, macrotext.text, fp);
954 continue; /* next paragraph */
955 }
956
957 par.keyword = rdtrim(&rs);
958 par.origkeyword = rdtrimc(&rsc);
959
960 /* Move to EOP in case of needkw==8 or 16 (no body) */
961 if (needkw & 24) {
962 /* We allow whitespace even when we expect no para body */
963 while (t.type == tok_white)
964 dtor(t), t = get_token(in);
965 if (t.type != tok_eop && t.type != tok_eof &&
966 (start_cmd == c__invalid ||
967 t.type != tok_cmd || t.cmd != start_cmd)) {
968 error(err_bodyillegal, &t.pos);
969 /* Error recovery: eat the rest of the paragraph */
970 while (t.type != tok_eop && t.type != tok_eof &&
971 (start_cmd == c__invalid ||
972 t.type != tok_cmd || t.cmd != start_cmd))
973 dtor(t), t = get_token(in);
974 }
975 if (t.type == tok_cmd)
976 already = TRUE;/* inhibit get_token at top of loop */
977 prev_para_type = par.type;
978 addpara(par, ret);
979
980 if (par.type == para_Config) {
981 input_configure(in, &par);
982 }
983 continue; /* next paragraph */
984 }
985 }
986 }
987
988 /*
989 * Now read the actual paragraph, word by word, adding to
990 * the paragraph list.
991 *
992 * Mid-paragraph commands:
993 *
994 * \K \k
995 * \c \cw
996 * \e
997 * \i \ii
998 * \I
999 * \u
1000 * \W
1001 * \date
1002 * \\ \{ \}
1003 */
1004 parsestk = stk_new();
1005 style = word_Normal;
1006 spcstyle = word_WhiteSpace;
1007 indexing = FALSE;
1008 seenwhite = TRUE;
1009 while (t.type != tok_eop && t.type != tok_eof) {
1010 iswhite = FALSE;
1011 already = FALSE;
1012
1013 /* Handle implicit paragraph breaks after \IM, \BR etc */
1014 if (start_cmd != c__invalid &&
1015 t.type == tok_cmd && t.cmd == start_cmd) {
1016 already = TRUE; /* inhibit get_token at top of loop */
1017 break;
1018 }
1019
1020 if (t.type == tok_cmd && t.cmd == c__nop) {
1021 dtor(t), t = get_token(in);
1022 continue; /* do nothing! */
1023 }
1024
1025 if (t.type == tok_cmd && t.cmd == c__escaped) {
1026 t.type = tok_word; /* nice and simple */
1027 t.aux = 0; /* even if `\-' - nonbreaking! */
1028 }
1029 if (t.type == tok_cmd && t.cmd == c__nbsp) {
1030 t.type = tok_word; /* nice and simple */
1031 sfree(t.text);
1032 t.text = ustrdup(L" "); /* text is ` ' not `_' */
1033 t.aux = 0; /* (nonbreaking) */
1034 }
1035 switch (t.type) {
1036 case tok_white:
1037 if (whptr == &par.words)
1038 break; /* strip whitespace at start of para */
1039 wd.text = NULL;
1040 wd.type = spcstyle;
1041 wd.alt = NULL;
1042 wd.aux = 0;
1043 wd.fpos = t.pos;
1044 wd.breaks = FALSE;
1045
1046 /*
1047 * Inhibit use of whitespace if it's (probably the
1048 * newline) before a repeat \IM / \BR type
1049 * directive.
1050 */
1051 if (start_cmd != c__invalid) {
1052 dtor(t), t = get_token(in);
1053 already = TRUE;
1054 if (t.type == tok_cmd && t.cmd == start_cmd)
1055 break;
1056 }
1057
1058 if (indexing)
1059 rdadd(&indexstr, ' ');
1060 if (!indexing || index_visible)
1061 addword(wd, &whptr);
1062 if (indexing)
1063 addword(wd, &idximplicit);
1064 iswhite = TRUE;
1065 break;
1066 case tok_word:
1067 if (indexing)
1068 rdadds(&indexstr, t.text);
1069 wd.type = style;
1070 wd.alt = NULL;
1071 wd.aux = 0;
1072 wd.fpos = t.pos;
1073 wd.breaks = t.aux;
1074 if (!indexing || index_visible) {
1075 wd.text = ustrdup(t.text);
1076 addword(wd, &whptr);
1077 }
1078 if (indexing) {
1079 wd.text = ustrdup(t.text);
1080 addword(wd, &idximplicit);
1081 }
1082 break;
1083 case tok_lbrace:
1084 error(err_unexbrace, &t.pos);
1085 /* Error recovery: push nop */
1086 sitem = mknew(struct stack_item);
1087 sitem->type = stack_nop;
1088 sitem->fpos = t.pos;
1089 stk_push(parsestk, sitem);
1090 break;
1091 case tok_rbrace:
1092 sitem = stk_pop(parsestk);
1093 if (!sitem) {
1094 /*
1095 * This closing brace could have been an
1096 * indication that the cross-paragraph stack
1097 * wants popping. Accordingly, we treat it here
1098 * as an indication that the paragraph is over.
1099 */
1100 already = TRUE;
1101 goto finished_para;
1102 } else {
1103 if (sitem->type & stack_ualt) {
1104 whptr = sitem->whptr;
1105 idximplicit = sitem->idximplicit;
1106 }
1107 if (sitem->type & stack_style) {
1108 style = word_Normal;
1109 spcstyle = word_WhiteSpace;
1110 }
1111 if (sitem->type & stack_idx) {
1112 indexword->text = ustrdup(indexstr.text);
1113 if (index_downcase) {
1114 word *w;
1115
1116 ustrlow(indexword->text);
1117 ustrlow(indexstr.text);
1118
1119 for (w = idxwordlist; w; w = w->next)
1120 if (w->text)
1121 ustrlow(w->text);
1122 }
1123 indexing = FALSE;
1124 rdadd(&indexstr, L'\0');
1125 index_merge(idx, FALSE, indexstr.text,
1126 idxwordlist, &sitem->fpos);
1127 sfree(indexstr.text);
1128 }
1129 if (sitem->type & stack_hyper) {
1130 wd.text = NULL;
1131 wd.type = word_HyperEnd;
1132 wd.alt = NULL;
1133 wd.aux = 0;
1134 wd.fpos = t.pos;
1135 wd.breaks = FALSE;
1136 if (!indexing || index_visible)
1137 addword(wd, &whptr);
1138 if (indexing)
1139 addword(wd, &idximplicit);
1140 }
1141 if (sitem->type & stack_quote) {
1142 wd.text = NULL;
1143 wd.type = toquotestyle(style);
1144 wd.alt = NULL;
1145 wd.aux = quote_Close;
1146 wd.fpos = t.pos;
1147 wd.breaks = FALSE;
1148 if (!indexing || index_visible)
1149 addword(wd, &whptr);
1150 if (indexing) {
1151 rdadd(&indexstr, L'"');
1152 addword(wd, &idximplicit);
1153 }
1154 }
1155 }
1156 sfree(sitem);
1157 break;
1158 case tok_cmd:
1159 switch (t.cmd) {
1160 case c__comment:
1161 /*
1162 * In-paragraph comment: \#{ balanced braces }
1163 *
1164 * Anything goes here; even tok_eop. We should
1165 * eat whitespace after the close brace _if_
1166 * there was whitespace before the \#.
1167 */
1168 dtor(t), t = get_token(in);
1169 if (t.type != tok_lbrace) {
1170 error(err_explbr, &t.pos);
1171 } else {
1172 int braces = 1;
1173 while (braces > 0) {
1174 dtor(t), t = get_token(in);
1175 if (t.type == tok_lbrace)
1176 braces++;
1177 else if (t.type == tok_rbrace)
1178 braces--;
1179 else if (t.type == tok_eof) {
1180 error(err_commenteof, &t.pos);
1181 break;
1182 }
1183 }
1184 }
1185 if (seenwhite) {
1186 already = TRUE;
1187 dtor(t), t = get_token(in);
1188 if (t.type == tok_white) {
1189 iswhite = TRUE;
1190 already = FALSE;
1191 }
1192 }
1193 break;
1194 case c_q:
1195 dtor(t), t = get_token(in);
1196 if (t.type != tok_lbrace) {
1197 error(err_explbr, &t.pos);
1198 } else {
1199 wd.text = NULL;
1200 wd.type = toquotestyle(style);
1201 wd.alt = NULL;
1202 wd.aux = quote_Open;
1203 wd.fpos = t.pos;
1204 wd.breaks = FALSE;
1205 if (!indexing || index_visible)
1206 addword(wd, &whptr);
1207 if (indexing) {
1208 rdadd(&indexstr, L'"');
1209 addword(wd, &idximplicit);
1210 }
1211 sitem = mknew(struct stack_item);
1212 sitem->fpos = t.pos;
1213 sitem->type = stack_quote;
1214 stk_push(parsestk, sitem);
1215 }
1216 break;
1217 case c_K:
1218 case c_k:
1219 case c_W:
1220 case c_date:
1221 /*
1222 * Keyword, hyperlink, or \date. We expect a
1223 * left brace, some text, and then a right
1224 * brace. No nesting; no arguments.
1225 */
1226 wd.fpos = t.pos;
1227 wd.breaks = FALSE;
1228 if (t.cmd == c_K)
1229 wd.type = word_UpperXref;
1230 else if (t.cmd == c_k)
1231 wd.type = word_LowerXref;
1232 else if (t.cmd == c_W)
1233 wd.type = word_HyperLink;
1234 else
1235 wd.type = word_Normal;
1236 dtor(t), t = get_token(in);
1237 if (t.type != tok_lbrace) {
1238 if (wd.type == word_Normal) {
1239 time_t thetime = time(NULL);
1240 struct tm *broken = localtime(&thetime);
1241 already = TRUE;
1242 wdtext = ustrftime(NULL, broken);
1243 wd.type = style;
1244 } else {
1245 error(err_explbr, &t.pos);
1246 wdtext = NULL;
1247 }
1248 } else {
1249 rdstring rs = { 0, 0, NULL };
1250 while (dtor(t), t = get_token(in),
1251 t.type == tok_word || t.type == tok_white) {
1252 if (t.type == tok_white)
1253 rdadd(&rs, ' ');
1254 else
1255 rdadds(&rs, t.text);
1256 }
1257 if (wd.type == word_Normal) {
1258 time_t thetime = time(NULL);
1259 struct tm *broken = localtime(&thetime);
1260 wdtext = ustrftime(rs.text, broken);
1261 wd.type = style;
1262 } else {
1263 wdtext = ustrdup(rs.text);
1264 }
1265 sfree(rs.text);
1266 if (t.type != tok_rbrace) {
1267 error(err_kwexprbr, &t.pos);
1268 }
1269 }
1270 wd.alt = NULL;
1271 wd.aux = 0;
1272 if (!indexing || index_visible) {
1273 wd.text = ustrdup(wdtext);
1274 addword(wd, &whptr);
1275 }
1276 if (indexing) {
1277 wd.text = ustrdup(wdtext);
1278 addword(wd, &idximplicit);
1279 }
1280 sfree(wdtext);
1281 if (wd.type == word_HyperLink) {
1282 /*
1283 * Hyperlinks are different: they then
1284 * expect another left brace, to begin
1285 * delimiting the text marked by the link.
1286 */
1287 dtor(t), t = get_token(in);
1288 sitem = mknew(struct stack_item);
1289 sitem->fpos = wd.fpos;
1290 sitem->type = stack_hyper;
1291 /*
1292 * Special cases: \W{}\i, \W{}\ii
1293 */
1294 if (t.type == tok_cmd &&
1295 (t.cmd == c_i || t.cmd == c_ii)) {
1296 if (indexing) {
1297 error(err_nestedindex, &t.pos);
1298 } else {
1299 /* Add an index-reference word with no
1300 * text as yet */
1301 wd.type = word_IndexRef;
1302 wd.text = NULL;
1303 wd.alt = NULL;
1304 wd.aux = 0;
1305 wd.breaks = FALSE;
1306 indexword = addword(wd, &whptr);
1307 /* Set up a rdstring to read the
1308 * index text */
1309 indexstr = nullrs;
1310 /* Flags so that we do the Right
1311 * Things with text */
1312 index_visible = (type != c_I);
1313 index_downcase = (type == c_ii);
1314 indexing = TRUE;
1315 idxwordlist = NULL;
1316 idximplicit = &idxwordlist;
1317
1318 sitem->type |= stack_idx;
1319 }
1320 dtor(t), t = get_token(in);
1321 }
1322 /*
1323 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1324 */
1325 if (t.type == tok_cmd &&
1326 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1327 if (style != word_Normal)
1328 error(err_nestedstyles, &t.pos);
1329 else {
1330 style = (t.cmd == c_c ? word_Code :
1331 t.cmd == c_cw ? word_WeakCode :
1332 word_Emph);
1333 spcstyle = tospacestyle(style);
1334 sitem->type |= stack_style;
1335 }
1336 dtor(t), t = get_token(in);
1337 }
1338 if (t.type != tok_lbrace) {
1339 error(err_explbr, &t.pos);
1340 sfree(sitem);
1341 } else {
1342 stk_push(parsestk, sitem);
1343 }
1344 }
1345 break;
1346 case c_c:
1347 case c_cw:
1348 case c_e:
1349 type = t.cmd;
1350 if (style != word_Normal) {
1351 error(err_nestedstyles, &t.pos);
1352 /* Error recovery: eat lbrace, push nop. */
1353 dtor(t), t = get_token(in);
1354 sitem = mknew(struct stack_item);
1355 sitem->fpos = t.pos;
1356 sitem->type = stack_nop;
1357 stk_push(parsestk, sitem);
1358 }
1359 dtor(t), t = get_token(in);
1360 if (t.type != tok_lbrace) {
1361 error(err_explbr, &t.pos);
1362 } else {
1363 style = (type == c_c ? word_Code :
1364 type == c_cw ? word_WeakCode :
1365 word_Emph);
1366 spcstyle = tospacestyle(style);
1367 sitem = mknew(struct stack_item);
1368 sitem->fpos = t.pos;
1369 sitem->type = stack_style;
1370 stk_push(parsestk, sitem);
1371 }
1372 break;
1373 case c_i:
1374 case c_ii:
1375 case c_I:
1376 type = t.cmd;
1377 if (indexing) {
1378 error(err_nestedindex, &t.pos);
1379 /* Error recovery: eat lbrace, push nop. */
1380 dtor(t), t = get_token(in);
1381 sitem = mknew(struct stack_item);
1382 sitem->fpos = t.pos;
1383 sitem->type = stack_nop;
1384 stk_push(parsestk, sitem);
1385 }
1386 sitem = mknew(struct stack_item);
1387 sitem->fpos = t.pos;
1388 sitem->type = stack_idx;
1389 dtor(t), t = get_token(in);
1390 /*
1391 * Special cases: \i\c, \i\e, \i\cw
1392 */
1393 wd.fpos = t.pos;
1394 if (t.type == tok_cmd &&
1395 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1396 if (style != word_Normal)
1397 error(err_nestedstyles, &t.pos);
1398 else {
1399 style = (t.cmd == c_c ? word_Code :
1400 t.cmd == c_cw ? word_WeakCode :
1401 word_Emph);
1402 spcstyle = tospacestyle(style);
1403 sitem->type |= stack_style;
1404 }
1405 dtor(t), t = get_token(in);
1406 }
1407 if (t.type != tok_lbrace) {
1408 sfree(sitem);
1409 error(err_explbr, &t.pos);
1410 } else {
1411 /* Add an index-reference word with no text as yet */
1412 wd.type = word_IndexRef;
1413 wd.text = NULL;
1414 wd.alt = NULL;
1415 wd.aux = 0;
1416 wd.breaks = FALSE;
1417 indexword = addword(wd, &whptr);
1418 /* Set up a rdstring to read the index text */
1419 indexstr = nullrs;
1420 /* Flags so that we do the Right Things with text */
1421 index_visible = (type != c_I);
1422 index_downcase = (type == c_ii);
1423 indexing = TRUE;
1424 idxwordlist = NULL;
1425 idximplicit = &idxwordlist;
1426 /* Stack item to close the indexing on exit */
1427 stk_push(parsestk, sitem);
1428 }
1429 break;
1430 case c_u:
1431 uchr = t.aux;
1432 utext[0] = uchr; utext[1] = 0;
1433 wd.type = style;
1434 wd.breaks = FALSE;
1435 wd.alt = NULL;
1436 wd.aux = 0;
1437 wd.fpos = t.pos;
1438 if (!indexing || index_visible) {
1439 wd.text = ustrdup(utext);
1440 uword = addword(wd, &whptr);
1441 } else
1442 uword = NULL;
1443 if (indexing) {
1444 wd.text = ustrdup(utext);
1445 iword = addword(wd, &idximplicit);
1446 } else
1447 iword = NULL;
1448 dtor(t), t = get_token(in);
1449 if (t.type == tok_lbrace) {
1450 /*
1451 * \u with a left brace. Until the brace
1452 * closes, all further words go on a
1453 * sidetrack from the main thread of the
1454 * paragraph.
1455 */
1456 sitem = mknew(struct stack_item);
1457 sitem->fpos = t.pos;
1458 sitem->type = stack_ualt;
1459 sitem->whptr = whptr;
1460 sitem->idximplicit = idximplicit;
1461 stk_push(parsestk, sitem);
1462 whptr = uword ? &uword->alt : NULL;
1463 idximplicit = iword ? &iword->alt : NULL;
1464 } else {
1465 if (indexing)
1466 rdadd(&indexstr, uchr);
1467 already = TRUE;
1468 }
1469 break;
1470 default:
1471 if (!macrolookup(macros, in, t.text, &t.pos))
1472 error(err_badmidcmd, t.text, &t.pos);
1473 break;
1474 }
1475 }
1476 if (!already)
1477 dtor(t), t = get_token(in);
1478 seenwhite = iswhite;
1479 }
1480 finished_para:
1481 /* Check the stack is empty */
1482 if (stk_top(parsestk)) {
1483 while ((sitem = stk_pop(parsestk)))
1484 sfree(sitem);
1485 error(err_missingrbrace, &t.pos);
1486 }
1487 stk_free(parsestk);
1488 prev_para_type = par.type;
1489 addpara(par, ret);
1490 if (t.type == tok_eof)
1491 already = TRUE;
1492 }
1493
1494 if (stk_top(crossparastk)) {
1495 void *p;
1496
1497 error(err_missingrbrace2, &t.pos);
1498 while ((p = stk_pop(crossparastk)))
1499 sfree(p);
1500 }
1501
1502 /*
1503 * We break to here rather than returning, because otherwise
1504 * this cleanup doesn't happen.
1505 */
1506 dtor(t);
1507 macrocleanup(macros);
1508
1509 stk_free(crossparastk);
1510 }
1511
1512 paragraph *read_input(input *in, indexdata *idx) {
1513 paragraph *head = NULL;
1514 paragraph **hptr = &head;
1515
1516 while (in->currindex < in->nfiles) {
1517 in->currfp = fopen(in->filenames[in->currindex], "r");
1518 if (in->currfp) {
1519 setpos(in, in->filenames[in->currindex]);
1520 in->charset = in->defcharset;
1521 in->csstate = charset_init_state;
1522 in->wcpos = in->nwc = 0;
1523 in->pushback_chars = NULL;
1524 read_file(&hptr, in, idx);
1525 }
1526 in->currindex++;
1527 }
1528
1529 return head;
1530 }