Enforce that \q may not be used anywhere within \c. It shouldn't be
[sgt/halibut] / input.c
1 /*
2 * input.c: read the source form
3 */
4
5 #include <stdio.h>
6 #include <assert.h>
7 #include <time.h>
8 #include "halibut.h"
9
10 #define TAB_STOP 8 /* for column number tracking */
11
12 static void setpos(input *in, char *fname) {
13 in->pos.filename = fname;
14 in->pos.line = 1;
15 in->pos.col = (in->reportcols ? 1 : -1);
16 }
17
18 static void unget(input *in, int c, filepos *pos) {
19 if (in->npushback >= in->pushbacksize) {
20 in->pushbacksize = in->npushback + 16;
21 in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
22 }
23 in->pushback[in->npushback].chr = c;
24 in->pushback[in->npushback].pos = *pos; /* structure copy */
25 in->npushback++;
26 }
27
28 /* ---------------------------------------------------------------------- */
29 /*
30 * Macro subsystem
31 */
32 typedef struct macro_Tag macro;
33 struct macro_Tag {
34 wchar_t *name, *text;
35 };
36 struct macrostack_Tag {
37 macrostack *next;
38 wchar_t *text;
39 int ptr, npushback;
40 filepos pos;
41 };
42 static int macrocmp(void *av, void *bv) {
43 macro *a = (macro *)av, *b = (macro *)bv;
44 return ustrcmp(a->name, b->name);
45 }
46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
47 filepos fpos) {
48 macro *m = snew(macro);
49 m->name = name;
50 m->text = text;
51 if (add234(macros, m) != m) {
52 error(err_macroexists, &fpos, name);
53 sfree(name);
54 sfree(text);
55 }
56 }
57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
58 filepos *pos) {
59 macro m, *gotit;
60 m.name = name;
61 gotit = find234(macros, &m, NULL);
62 if (gotit) {
63 macrostack *expansion = snew(macrostack);
64 expansion->next = in->stack;
65 expansion->text = gotit->text;
66 expansion->pos = *pos; /* structure copy */
67 expansion->ptr = 0;
68 expansion->npushback = in->npushback;
69 in->stack = expansion;
70 return TRUE;
71 } else
72 return FALSE;
73 }
74 static void macrocleanup(tree234 *macros) {
75 int ti;
76 macro *m;
77 for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
78 sfree(m->name);
79 sfree(m->text);
80 sfree(m);
81 }
82 freetree234(macros);
83 }
84
85 static void input_configure(input *in, paragraph *cfg) {
86 assert(cfg->type == para_Config);
87
88 if (!ustricmp(cfg->keyword, L"input-charset")) {
89 char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
90 in->charset = charset_from_localenc(csname);
91 sfree(csname);
92 }
93 }
94
95 /*
96 * Can return EOF
97 */
98 static int get(input *in, filepos *pos, rdstringc *rsc) {
99 int pushbackpt = in->stack ? in->stack->npushback : 0;
100 if (in->npushback > pushbackpt) {
101 --in->npushback;
102 if (pos)
103 *pos = in->pushback[in->npushback].pos; /* structure copy */
104 return in->pushback[in->npushback].chr;
105 }
106 else if (in->stack) {
107 wchar_t c = in->stack->text[in->stack->ptr];
108 if (in->stack->text[++in->stack->ptr] == L'\0') {
109 macrostack *tmp = in->stack;
110 in->stack = tmp->next;
111 sfree(tmp);
112 }
113 return c;
114 }
115 else if (in->currfp) {
116
117 while (in->wcpos >= in->nwc) {
118
119 int c = getc(in->currfp);
120
121 if (c == EOF) {
122 fclose(in->currfp);
123 in->currfp = NULL;
124 return EOF;
125 }
126
127 if (rsc)
128 rdaddc(rsc, c);
129
130 /* Track line numbers, for error reporting */
131 if (pos)
132 *pos = in->pos;
133 if (in->reportcols) {
134 switch (c) {
135 case '\t':
136 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
137 break;
138 case '\n':
139 in->pos.col = 1;
140 in->pos.line++;
141 break;
142 default:
143 in->pos.col++;
144 break;
145 }
146 } else {
147 in->pos.col = -1;
148 if (c == '\n')
149 in->pos.line++;
150 }
151
152 /*
153 * Do input character set translation, so that we return
154 * Unicode.
155 */
156 {
157 char buf[1];
158 char const *p;
159 int inlen;
160
161 buf[0] = (char)c;
162 p = buf;
163 inlen = 1;
164
165 in->nwc = charset_to_unicode(&p, &inlen,
166 in->wc, lenof(in->wc),
167 in->charset, &in->csstate,
168 NULL, 0);
169 assert(p == buf+1 && inlen == 0);
170
171 in->wcpos = 0;
172 }
173 }
174
175 return in->wc[in->wcpos++];
176
177 } else
178 return EOF;
179 }
180
181 /*
182 * Lexical analysis of source files.
183 */
184 typedef struct token_Tag token;
185 struct token_Tag {
186 int type;
187 int cmd, aux;
188 wchar_t *text;
189 char *origtext;
190 filepos pos;
191 };
192 enum {
193 tok_eof, /* end of file */
194 tok_eop, /* end of paragraph */
195 tok_white, /* whitespace */
196 tok_word, /* a word or word fragment */
197 tok_cmd, /* \command */
198 tok_lbrace, /* { */
199 tok_rbrace /* } */
200 };
201
202 /* Halibut command keywords. */
203 enum {
204 c__invalid, /* invalid command */
205 c__comment, /* comment command (\#) */
206 c__escaped, /* escaped character */
207 c__nop, /* no-op */
208 c__nbsp, /* nonbreaking space */
209 c_A, /* appendix heading */
210 c_B, /* bibliography entry */
211 c_BR, /* bibliography rewrite */
212 c_C, /* chapter heading */
213 c_H, /* heading */
214 c_I, /* invisible index mark */
215 c_IM, /* index merge/rewrite */
216 c_K, /* capitalised cross-reference */
217 c_S, /* aux field is 0, 1, 2, ... */
218 c_U, /* unnumbered-chapter heading */
219 c_W, /* Web hyperlink */
220 c_b, /* bulletted list */
221 c_c, /* code */
222 c_cfg, /* configuration directive */
223 c_copyright, /* copyright statement */
224 c_cw, /* weak code */
225 c_date, /* document processing date */
226 c_dd, /* description list: description */
227 c_define, /* macro definition */
228 c_dt, /* description list: described thing */
229 c_e, /* emphasis */
230 c_i, /* visible index mark */
231 c_ii, /* uncapitalised visible index mark */
232 c_k, /* uncapitalised cross-reference */
233 c_lcont, /* continuation para(s) for list item */
234 c_n, /* numbered list */
235 c_nocite, /* bibliography trickery */
236 c_preamble, /* (obsolete) preamble text */
237 c_q, /* quote marks */
238 c_quote, /* block-quoted paragraphs */
239 c_rule, /* horizontal rule */
240 c_title, /* document title */
241 c_u, /* aux field is char code */
242 c_versionid /* document RCS id */
243 };
244
245 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
246 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
247 #define isnl(c) ( (c)==10 )
248 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
249 #define fromdec(c) ( (c)-'0' )
250 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
251 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
252 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
253
254 /*
255 * Keyword comparison function. Like strcmp, but between a wchar_t *
256 * and a char *.
257 */
258 static int kwcmp(wchar_t const *p, char const *q) {
259 int i;
260 do {
261 i = *p - *q;
262 } while (*p++ && *q++ && !i);
263 return i;
264 }
265
266 /*
267 * Match a keyword.
268 */
269 static void match_kw(token *tok) {
270 /*
271 * FIXME. The ids are explicit in here so as to allow long-name
272 * equivalents to the various very short keywords.
273 */
274 static const struct { char const *name; int id; } keywords[] = {
275 {"#", c__comment}, /* comment command (\#) */
276 {"-", c__escaped}, /* nonbreaking hyphen */
277 {".", c__nop}, /* no-op */
278 {"A", c_A}, /* appendix heading */
279 {"B", c_B}, /* bibliography entry */
280 {"BR", c_BR}, /* bibliography rewrite */
281 {"C", c_C}, /* chapter heading */
282 {"H", c_H}, /* heading */
283 {"I", c_I}, /* invisible index mark */
284 {"IM", c_IM}, /* index merge/rewrite */
285 {"K", c_K}, /* capitalised cross-reference */
286 {"U", c_U}, /* unnumbered-chapter heading */
287 {"W", c_W}, /* Web hyperlink */
288 {"\\", c__escaped}, /* escaped backslash (\\) */
289 {"_", c__nbsp}, /* nonbreaking space (\_) */
290 {"b", c_b}, /* bulletted list */
291 {"c", c_c}, /* code */
292 {"cfg", c_cfg}, /* configuration directive */
293 {"copyright", c_copyright}, /* copyright statement */
294 {"cw", c_cw}, /* weak code */
295 {"date", c_date}, /* document processing date */
296 {"dd", c_dd}, /* description list: description */
297 {"define", c_define}, /* macro definition */
298 {"dt", c_dt}, /* description list: described thing */
299 {"e", c_e}, /* emphasis */
300 {"i", c_i}, /* visible index mark */
301 {"ii", c_ii}, /* uncapitalised visible index mark */
302 {"k", c_k}, /* uncapitalised cross-reference */
303 {"lcont", c_lcont}, /* continuation para(s) for list item */
304 {"n", c_n}, /* numbered list */
305 {"nocite", c_nocite}, /* bibliography trickery */
306 {"preamble", c_preamble}, /* (obsolete) preamble text */
307 {"q", c_q}, /* quote marks */
308 {"quote", c_quote}, /* block-quoted paragraphs */
309 {"rule", c_rule}, /* horizontal rule */
310 {"title", c_title}, /* document title */
311 {"versionid", c_versionid}, /* document RCS id */
312 {"{", c__escaped}, /* escaped lbrace (\{) */
313 {"}", c__escaped}, /* escaped rbrace (\}) */
314 };
315 int i, j, k, c;
316
317 /*
318 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
319 * doesn't match correctly, we just fall through to the
320 * binary-search phase.
321 */
322 if (tok->text[0] == 'S') {
323 /* We expect numeric characters thereafter. */
324 wchar_t *p = tok->text+1;
325 int n;
326 if (!*p)
327 n = 1;
328 else {
329 n = 0;
330 while (*p && isdec(*p)) {
331 n = 10 * n + fromdec(*p);
332 p++;
333 }
334 }
335 if (!*p) {
336 tok->cmd = c_S;
337 tok->aux = n;
338 return;
339 }
340 } else if (tok->text[0] == 'u') {
341 /* We expect hex characters thereafter. */
342 wchar_t *p = tok->text+1;
343 int n = 0;
344 while (*p && ishex(*p)) {
345 n = 16 * n + fromhex(*p);
346 p++;
347 }
348 if (!*p) {
349 tok->cmd = c_u;
350 tok->aux = n;
351 return;
352 }
353 }
354
355 i = -1;
356 j = sizeof(keywords)/sizeof(*keywords);
357 while (j-i > 1) {
358 k = (i+j)/2;
359 c = kwcmp(tok->text, keywords[k].name);
360 if (c < 0)
361 j = k;
362 else if (c > 0)
363 i = k;
364 else /* c == 0 */ {
365 tok->cmd = keywords[k].id;
366 return;
367 }
368 }
369
370 tok->cmd = c__invalid;
371 }
372
373
374 /*
375 * Read a token from the input file, in the normal way (`normal' in
376 * the sense that code paragraphs work a different way).
377 */
378 token get_token(input *in) {
379 int c;
380 int nls;
381 int prevpos;
382 token ret;
383 rdstring rs = { 0, 0, NULL };
384 rdstringc rsc = { 0, 0, NULL };
385 filepos cpos;
386
387 ret.text = NULL; /* default */
388 ret.origtext = NULL; /* default */
389 if (in->pushback_chars) {
390 rdaddsc(&rsc, in->pushback_chars);
391 sfree(in->pushback_chars);
392 in->pushback_chars = NULL;
393 }
394 c = get(in, &cpos, &rsc);
395 ret.pos = cpos;
396 if (iswhite(c)) { /* tok_white or tok_eop */
397 nls = 0;
398 prevpos = 0;
399 do {
400 if (isnl(c))
401 nls++;
402 prevpos = rsc.pos;
403 } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
404 if (c == EOF) {
405 ret.type = tok_eof;
406 sfree(rsc.text);
407 return ret;
408 }
409 if (rsc.text) {
410 in->pushback_chars = dupstr(rsc.text + prevpos);
411 sfree(rsc.text);
412 }
413 unget(in, c, &cpos);
414 ret.type = (nls > 1 ? tok_eop : tok_white);
415 return ret;
416 } else if (c == EOF) { /* tok_eof */
417 ret.type = tok_eof;
418 sfree(rsc.text);
419 return ret;
420 } else if (c == '\\') { /* tok_cmd */
421 rsc.pos = prevpos = 0;
422 c = get(in, &cpos, &rsc);
423 if (c == '-' || c == '\\' || c == '_' ||
424 c == '#' || c == '{' || c == '}' || c == '.') {
425 /* single-char command */
426 rdadd(&rs, c);
427 } else if (c == 'u') {
428 int len = 0;
429 do {
430 rdadd(&rs, c);
431 len++;
432 prevpos = rsc.pos;
433 c = get(in, &cpos, &rsc);
434 } while (ishex(c) && len < 5);
435 unget(in, c, &cpos);
436 } else if (iscmd(c)) {
437 do {
438 rdadd(&rs, c);
439 prevpos = rsc.pos;
440 c = get(in, &cpos, &rsc);
441 } while (iscmd(c));
442 unget(in, c, &cpos);
443 }
444 /*
445 * Now match the command against the list of available
446 * ones.
447 */
448 ret.type = tok_cmd;
449 ret.text = ustrdup(rs.text);
450 if (rsc.text) {
451 in->pushback_chars = dupstr(rsc.text + prevpos);
452 rsc.text[prevpos] = '\0';
453 ret.origtext = dupstr(rsc.text);
454 } else {
455 ret.origtext = dupstr("");
456 }
457 match_kw(&ret);
458 sfree(rs.text);
459 sfree(rsc.text);
460 return ret;
461 } else if (c == '{') { /* tok_lbrace */
462 ret.type = tok_lbrace;
463 sfree(rsc.text);
464 return ret;
465 } else if (c == '}') { /* tok_rbrace */
466 ret.type = tok_rbrace;
467 sfree(rsc.text);
468 return ret;
469 } else { /* tok_word */
470 /*
471 * Read a word: the longest possible contiguous sequence of
472 * things other than whitespace, backslash, braces and
473 * hyphen. A hyphen terminates the word but is returned as
474 * part of it; everything else is pushed back for the next
475 * token. The `aux' field contains TRUE if the word ends in
476 * a hyphen.
477 */
478 ret.aux = FALSE; /* assumed for now */
479 prevpos = 0;
480 while (1) {
481 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
482 /* Put back the character that caused termination */
483 unget(in, c, &cpos);
484 break;
485 } else {
486 rdadd(&rs, c);
487 if (c == '-') {
488 prevpos = rsc.pos;
489 ret.aux = TRUE;
490 break; /* hyphen terminates word */
491 }
492 }
493 prevpos = rsc.pos;
494 c = get(in, &cpos, &rsc);
495 }
496 ret.type = tok_word;
497 ret.text = ustrdup(rs.text);
498 if (rsc.text) {
499 in->pushback_chars = dupstr(rsc.text + prevpos);
500 rsc.text[prevpos] = '\0';
501 ret.origtext = dupstr(rsc.text);
502 } else {
503 ret.origtext = dupstr("");
504 }
505 sfree(rs.text);
506 sfree(rsc.text);
507 return ret;
508 }
509 }
510
511 /*
512 * Determine whether the next input character is an open brace (for
513 * telling code paragraphs from paragraphs which merely start with
514 * code).
515 */
516 int isbrace(input *in) {
517 int c;
518 filepos cpos;
519
520 c = get(in, &cpos, NULL);
521 unget(in, c, &cpos);
522 return (c == '{');
523 }
524
525 /*
526 * Read the rest of a line that starts `\c'. Including nothing at
527 * all (tok_word with empty text).
528 */
529 token get_codepar_token(input *in) {
530 int c;
531 token ret;
532 rdstring rs = { 0, 0, NULL };
533 filepos cpos;
534
535 ret.type = tok_word;
536 ret.origtext = NULL;
537 c = get(in, &cpos, NULL); /* expect (and discard) one space */
538 ret.pos = cpos;
539 if (c == ' ') {
540 c = get(in, &cpos, NULL);
541 ret.pos = cpos;
542 }
543 while (!isnl(c) && c != EOF) {
544 int c2 = c;
545 c = get(in, &cpos, NULL);
546 /* Discard \r just before \n. */
547 if (c2 != 13 || !isnl(c))
548 rdadd(&rs, c2);
549 }
550 unget(in, c, &cpos);
551 ret.text = ustrdup(rs.text);
552 sfree(rs.text);
553 return ret;
554 }
555
556 /*
557 * Adds a new word to a linked list
558 */
559 static word *addword(word newword, word ***hptrptr) {
560 word *mnewword;
561 if (!hptrptr)
562 return NULL;
563 mnewword = snew(word);
564 *mnewword = newword; /* structure copy */
565 mnewword->next = NULL;
566 **hptrptr = mnewword;
567 *hptrptr = &mnewword->next;
568 return mnewword;
569 }
570
571 /*
572 * Adds a new paragraph to a linked list
573 */
574 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
575 paragraph *mnewpara = snew(paragraph);
576 *mnewpara = newpara; /* structure copy */
577 mnewpara->next = NULL;
578 **hptrptr = mnewpara;
579 *hptrptr = &mnewpara->next;
580 return mnewpara;
581 }
582
583 /*
584 * Destructor before token is reassigned; should catch most memory
585 * leaks
586 */
587 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
588
589 /*
590 * Reads a single file (ie until get() returns EOF)
591 */
592 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
593 token t;
594 paragraph par;
595 word wd, **whptr, **idximplicit;
596 tree234 *macros;
597 wchar_t utext[2], *wdtext;
598 int style, spcstyle;
599 int already;
600 int iswhite, seenwhite;
601 int type;
602 int prev_para_type;
603 struct stack_item {
604 enum {
605 stack_nop = 0, /* do nothing (for error recovery) */
606 stack_ualt = 1, /* \u alternative */
607 stack_style = 2, /* \e, \c, \cw */
608 stack_idx = 4, /* \I, \i, \ii */
609 stack_hyper = 8, /* \W */
610 stack_quote = 16, /* \q */
611 } type;
612 word **whptr; /* to restore from \u alternatives */
613 word **idximplicit; /* to restore from \u alternatives */
614 filepos fpos;
615 int in_code;
616 } *sitem;
617 stack parsestk;
618 struct crossparaitem {
619 int type; /* currently c_lcont, c_quote or -1 */
620 int seen_lcont, seen_quote;
621 };
622 stack crossparastk;
623 word *indexword, *uword, *iword;
624 word *idxwordlist;
625 rdstring indexstr;
626 int index_downcase, index_visible, indexing;
627 const rdstring nullrs = { 0, 0, NULL };
628 wchar_t uchr;
629
630 t.text = NULL;
631 t.origtext = NULL;
632 macros = newtree234(macrocmp);
633 already = FALSE;
634
635 crossparastk = stk_new();
636
637 /*
638 * Loop on each paragraph.
639 */
640 while (1) {
641 int start_cmd = c__invalid;
642 par.words = NULL;
643 par.keyword = NULL;
644 par.origkeyword = NULL;
645 whptr = &par.words;
646
647 /*
648 * Get a token.
649 */
650 do {
651 if (!already) {
652 dtor(t), t = get_token(in);
653 }
654 already = FALSE;
655 } while (t.type == tok_eop);
656 if (t.type == tok_eof)
657 break;
658
659 /*
660 * Parse code paragraphs separately.
661 */
662 if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
663 int wtype = word_WeakCode;
664
665 par.type = para_Code;
666 par.fpos = t.pos;
667 while (1) {
668 dtor(t), t = get_codepar_token(in);
669 wd.type = wtype;
670 wd.breaks = FALSE; /* shouldn't need this... */
671 wd.text = ustrdup(t.text);
672 wd.alt = NULL;
673 wd.fpos = t.pos;
674 addword(wd, &whptr);
675 dtor(t), t = get_token(in);
676 if (t.type == tok_white) {
677 /*
678 * The newline after a code-paragraph line
679 */
680 dtor(t), t = get_token(in);
681 }
682 if (t.type == tok_eop || t.type == tok_eof ||
683 t.type == tok_rbrace) { /* might be } terminating \lcont */
684 if (t.type == tok_rbrace)
685 already = TRUE;
686 break;
687 } else if (t.type == tok_cmd && t.cmd == c_c) {
688 wtype = word_WeakCode;
689 } else if (t.type == tok_cmd && t.cmd == c_e &&
690 wtype == word_WeakCode) {
691 wtype = word_Emph;
692 } else {
693 error(err_brokencodepara, &t.pos);
694 prev_para_type = par.type;
695 addpara(par, ret);
696 while (t.type != tok_eop) /* error recovery: */
697 dtor(t), t = get_token(in); /* eat rest of paragraph */
698 goto codeparabroken; /* ick, but such is life */
699 }
700 }
701 prev_para_type = par.type;
702 addpara(par, ret);
703 codeparabroken:
704 continue;
705 }
706
707 /*
708 * Spot the special commands that define a grouping of more
709 * than one paragraph, and also the closing braces that
710 * finish them.
711 */
712 if (t.type == tok_cmd &&
713 (t.cmd == c_lcont || t.cmd == c_quote)) {
714 struct crossparaitem *sitem, *stop;
715 int cmd = t.cmd;
716
717 /*
718 * Expect, and swallow, an open brace.
719 */
720 dtor(t), t = get_token(in);
721 if (t.type != tok_lbrace) {
722 error(err_explbr, &t.pos);
723 continue;
724 }
725
726 /*
727 * Also expect, and swallow, any whitespace after that
728 * (a newline before a code paragraph wouldn't be
729 * surprising).
730 */
731 do {
732 dtor(t), t = get_token(in);
733 } while (t.type == tok_white);
734 already = TRUE;
735
736 if (cmd == c_lcont) {
737 /*
738 * \lcont causes a continuation of a list item into
739 * multiple paragraphs (which may in turn contain
740 * nested lists, code paras etc). Hence, the previous
741 * paragraph must be of a list type.
742 */
743 sitem = snew(struct crossparaitem);
744 stop = (struct crossparaitem *)stk_top(crossparastk);
745 if (stop)
746 *sitem = *stop;
747 else
748 sitem->seen_quote = sitem->seen_lcont = 0;
749
750 if (prev_para_type == para_Bullet ||
751 prev_para_type == para_NumberedList ||
752 prev_para_type == para_Description) {
753 sitem->type = c_lcont;
754 sitem->seen_lcont = 1;
755 par.type = para_LcontPush;
756 prev_para_type = par.type;
757 addpara(par, ret);
758 } else {
759 /*
760 * Push a null item on the cross-para stack so that
761 * when we see the corresponding closing brace we
762 * don't give a cascade error.
763 */
764 sitem->type = -1;
765 error(err_misplacedlcont, &t.pos);
766 }
767 } else {
768 /*
769 * \quote causes a group of paragraphs to be
770 * block-quoted (typically they will be indented a
771 * bit).
772 */
773 sitem = snew(struct crossparaitem);
774 stop = (struct crossparaitem *)stk_top(crossparastk);
775 if (stop)
776 *sitem = *stop;
777 else
778 sitem->seen_quote = sitem->seen_lcont = 0;
779 sitem->type = c_quote;
780 sitem->seen_quote = 1;
781 par.type = para_QuotePush;
782 prev_para_type = par.type;
783 addpara(par, ret);
784 }
785 stk_push(crossparastk, sitem);
786 continue;
787 } else if (t.type == tok_rbrace) {
788 struct crossparaitem *sitem = stk_pop(crossparastk);
789 if (!sitem)
790 error(err_unexbrace, &t.pos);
791 else {
792 switch (sitem->type) {
793 case c_lcont:
794 par.type = para_LcontPop;
795 prev_para_type = par.type;
796 addpara(par, ret);
797 break;
798 case c_quote:
799 par.type = para_QuotePop;
800 prev_para_type = par.type;
801 addpara(par, ret);
802 break;
803 }
804 sfree(sitem);
805 }
806 continue;
807 }
808
809 /*
810 * This token begins a paragraph. See if it's one of the
811 * special commands that define a paragraph type.
812 *
813 * (note that \# is special in a way, and \nocite takes no
814 * text)
815 */
816 par.type = para_Normal;
817 if (t.type == tok_cmd) {
818 int needkw;
819 int is_macro = FALSE;
820
821 par.fpos = t.pos;
822 switch (t.cmd) {
823 default:
824 needkw = -1;
825 break;
826 case c__invalid:
827 error(err_badparatype, t.text, &t.pos);
828 needkw = 4;
829 break;
830 case c__comment:
831 if (isbrace(in))
832 break; /* `\#{': isn't a comment para */
833 do {
834 dtor(t), t = get_token(in);
835 } while (t.type != tok_eop && t.type != tok_eof);
836 continue; /* next paragraph */
837 /*
838 * `needkw' values:
839 *
840 * 1 -- exactly one keyword
841 * 2 -- at least one keyword
842 * 4 -- any number of keywords including zero
843 * 8 -- at least one keyword and then nothing else
844 * 16 -- nothing at all! no keywords, no body
845 * 32 -- no keywords at all
846 */
847 case c_A: needkw = 2; par.type = para_Appendix; break;
848 case c_B: needkw = 2; par.type = para_Biblio; break;
849 case c_BR: needkw = 1; par.type = para_BR;
850 start_cmd = c_BR; break;
851 case c_C: needkw = 2; par.type = para_Chapter; break;
852 case c_H: needkw = 2; par.type = para_Heading;
853 par.aux = 0;
854 break;
855 case c_IM: needkw = 2; par.type = para_IM;
856 start_cmd = c_IM; break;
857 case c_S: needkw = 2; par.type = para_Subsect;
858 par.aux = t.aux; break;
859 case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
860 /* For \b and \n the keyword is optional */
861 case c_b: needkw = 4; par.type = para_Bullet; break;
862 case c_dt: needkw = 4; par.type = para_DescribedThing; break;
863 case c_dd: needkw = 4; par.type = para_Description; break;
864 case c_n: needkw = 4; par.type = para_NumberedList; break;
865 case c_cfg: needkw = 8; par.type = para_Config;
866 start_cmd = c_cfg; break;
867 case c_copyright: needkw = 32; par.type = para_Copyright; break;
868 case c_define: is_macro = TRUE; needkw = 1; break;
869 /* For \nocite the keyword is _everything_ */
870 case c_nocite: needkw = 8; par.type = para_NoCite; break;
871 case c_preamble: needkw = 32; par.type = para_Normal; break;
872 case c_rule: needkw = 16; par.type = para_Rule; break;
873 case c_title: needkw = 32; par.type = para_Title; break;
874 case c_versionid: needkw = 32; par.type = para_VersionID; break;
875 }
876
877 if (par.type == para_Chapter ||
878 par.type == para_Heading ||
879 par.type == para_Subsect ||
880 par.type == para_Appendix ||
881 par.type == para_UnnumberedChapter) {
882 struct crossparaitem *sitem = stk_top(crossparastk);
883 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
884 error(err_sectmarkerinblock,
885 &t.pos,
886 (sitem->seen_lcont ? "lcont" : "quote"));
887 }
888 }
889
890 if (needkw > 0) {
891 rdstring rs = { 0, 0, NULL };
892 rdstringc rsc = { 0, 0, NULL };
893 int nkeys = 0;
894 filepos fp;
895
896 /* Get keywords. */
897 dtor(t), t = get_token(in);
898 fp = t.pos;
899 while (t.type == tok_lbrace) {
900 /* This is a keyword. */
901 nkeys++;
902 /* FIXME: there will be bugs if anyone specifies an
903 * empty keyword (\foo{}), so trap this case. */
904 while (dtor(t), t = get_token(in),
905 t.type == tok_word ||
906 t.type == tok_white ||
907 (t.type == tok_cmd && t.cmd == c__nbsp) ||
908 (t.type == tok_cmd && t.cmd == c__escaped) ||
909 (t.type == tok_cmd && t.cmd == c_u)) {
910 if (t.type == tok_white ||
911 (t.type == tok_cmd && t.cmd == c__nbsp)) {
912 rdadd(&rs, ' ');
913 rdaddc(&rsc, ' ');
914 } else if (t.type == tok_cmd && t.cmd == c_u) {
915 rdadd(&rs, t.aux);
916 rdaddc(&rsc, '\\');
917 rdaddsc(&rsc, t.origtext);
918 } else {
919 rdadds(&rs, t.text);
920 rdaddsc(&rsc, t.origtext);
921 }
922 }
923 if (t.type != tok_rbrace) {
924 error(err_kwunclosed, &t.pos);
925 continue;
926 }
927 rdadd(&rs, 0); /* add string terminator */
928 rdaddc(&rsc, 0); /* add string terminator */
929 dtor(t), t = get_token(in); /* eat right brace */
930 }
931
932 rdadd(&rs, 0); /* add string terminator */
933 rdaddc(&rsc, 0); /* add string terminator */
934
935 /* See whether we have the right number of keywords. */
936 if ((needkw & 48) && nkeys > 0)
937 error(err_kwillegal, &fp);
938 if ((needkw & 11) && nkeys == 0)
939 error(err_kwexpected, &fp);
940 if ((needkw & 5) && nkeys > 1)
941 error(err_kwtoomany, &fp);
942
943 if (is_macro) {
944 /*
945 * Macro definition. Get the rest of the line
946 * as a code-paragraph token, repeatedly until
947 * there's nothing more left of it. Separate
948 * with newlines.
949 */
950 rdstring macrotext = { 0, 0, NULL };
951 while (1) {
952 dtor(t), t = get_codepar_token(in);
953 if (macrotext.pos > 0)
954 rdadd(&macrotext, L'\n');
955 rdadds(&macrotext, t.text);
956 dtor(t), t = get_token(in);
957 if (t.type == tok_eop) break;
958 }
959 macrodef(macros, rs.text, macrotext.text, fp);
960 continue; /* next paragraph */
961 }
962
963 par.keyword = rdtrim(&rs);
964 par.origkeyword = rdtrimc(&rsc);
965
966 /* Move to EOP in case of needkw==8 or 16 (no body) */
967 if (needkw & 24) {
968 /* We allow whitespace even when we expect no para body */
969 while (t.type == tok_white)
970 dtor(t), t = get_token(in);
971 if (t.type != tok_eop && t.type != tok_eof &&
972 (start_cmd == c__invalid ||
973 t.type != tok_cmd || t.cmd != start_cmd)) {
974 error(err_bodyillegal, &t.pos);
975 /* Error recovery: eat the rest of the paragraph */
976 while (t.type != tok_eop && t.type != tok_eof &&
977 (start_cmd == c__invalid ||
978 t.type != tok_cmd || t.cmd != start_cmd))
979 dtor(t), t = get_token(in);
980 }
981 if (t.type == tok_cmd)
982 already = TRUE;/* inhibit get_token at top of loop */
983 prev_para_type = par.type;
984 addpara(par, ret);
985
986 if (par.type == para_Config) {
987 input_configure(in, &par);
988 }
989 continue; /* next paragraph */
990 }
991 }
992 }
993
994 /*
995 * Now read the actual paragraph, word by word, adding to
996 * the paragraph list.
997 *
998 * Mid-paragraph commands:
999 *
1000 * \K \k
1001 * \c \cw
1002 * \e
1003 * \i \ii
1004 * \I
1005 * \u
1006 * \W
1007 * \date
1008 * \\ \{ \}
1009 */
1010 parsestk = stk_new();
1011 style = word_Normal;
1012 spcstyle = word_WhiteSpace;
1013 indexing = FALSE;
1014 seenwhite = TRUE;
1015 while (t.type != tok_eop && t.type != tok_eof) {
1016 iswhite = FALSE;
1017 already = FALSE;
1018
1019 /* Handle implicit paragraph breaks after \IM, \BR etc */
1020 if (start_cmd != c__invalid &&
1021 t.type == tok_cmd && t.cmd == start_cmd) {
1022 already = TRUE; /* inhibit get_token at top of loop */
1023 break;
1024 }
1025
1026 if (t.type == tok_cmd && t.cmd == c__nop) {
1027 dtor(t), t = get_token(in);
1028 continue; /* do nothing! */
1029 }
1030
1031 if (t.type == tok_cmd && t.cmd == c__escaped) {
1032 t.type = tok_word; /* nice and simple */
1033 t.aux = 0; /* even if `\-' - nonbreaking! */
1034 }
1035 if (t.type == tok_cmd && t.cmd == c__nbsp) {
1036 t.type = tok_word; /* nice and simple */
1037 sfree(t.text);
1038 t.text = ustrdup(L" "); /* text is ` ' not `_' */
1039 t.aux = 0; /* (nonbreaking) */
1040 }
1041 switch (t.type) {
1042 case tok_white:
1043 if (whptr == &par.words)
1044 break; /* strip whitespace at start of para */
1045 wd.text = NULL;
1046 wd.type = spcstyle;
1047 wd.alt = NULL;
1048 wd.aux = 0;
1049 wd.fpos = t.pos;
1050 wd.breaks = FALSE;
1051
1052 /*
1053 * Inhibit use of whitespace if it's (probably the
1054 * newline) before a repeat \IM / \BR type
1055 * directive.
1056 */
1057 if (start_cmd != c__invalid) {
1058 dtor(t), t = get_token(in);
1059 already = TRUE;
1060 if (t.type == tok_cmd && t.cmd == start_cmd)
1061 break;
1062 }
1063
1064 if (indexing)
1065 rdadd(&indexstr, ' ');
1066 if (!indexing || index_visible)
1067 addword(wd, &whptr);
1068 if (indexing)
1069 addword(wd, &idximplicit);
1070 iswhite = TRUE;
1071 break;
1072 case tok_word:
1073 if (indexing)
1074 rdadds(&indexstr, t.text);
1075 wd.type = style;
1076 wd.alt = NULL;
1077 wd.aux = 0;
1078 wd.fpos = t.pos;
1079 wd.breaks = t.aux;
1080 if (!indexing || index_visible) {
1081 wd.text = ustrdup(t.text);
1082 addword(wd, &whptr);
1083 }
1084 if (indexing) {
1085 wd.text = ustrdup(t.text);
1086 addword(wd, &idximplicit);
1087 }
1088 break;
1089 case tok_lbrace:
1090 error(err_unexbrace, &t.pos);
1091 /* Error recovery: push nop */
1092 sitem = snew(struct stack_item);
1093 sitem->type = stack_nop;
1094 sitem->fpos = t.pos;
1095 stk_push(parsestk, sitem);
1096 break;
1097 case tok_rbrace:
1098 sitem = stk_pop(parsestk);
1099 if (!sitem) {
1100 /*
1101 * This closing brace could have been an
1102 * indication that the cross-paragraph stack
1103 * wants popping. Accordingly, we treat it here
1104 * as an indication that the paragraph is over.
1105 */
1106 already = TRUE;
1107 goto finished_para;
1108 } else {
1109 if (sitem->type & stack_ualt) {
1110 whptr = sitem->whptr;
1111 idximplicit = sitem->idximplicit;
1112 }
1113 if (sitem->type & stack_style) {
1114 style = word_Normal;
1115 spcstyle = word_WhiteSpace;
1116 }
1117 if (sitem->type & stack_idx) {
1118 indexword->text = ustrdup(indexstr.text);
1119 if (index_downcase) {
1120 word *w;
1121
1122 ustrlow(indexword->text);
1123 ustrlow(indexstr.text);
1124
1125 for (w = idxwordlist; w; w = w->next)
1126 if (w->text)
1127 ustrlow(w->text);
1128 }
1129 indexing = FALSE;
1130 rdadd(&indexstr, L'\0');
1131 index_merge(idx, FALSE, indexstr.text,
1132 idxwordlist, &sitem->fpos);
1133 sfree(indexstr.text);
1134 }
1135 if (sitem->type & stack_hyper) {
1136 wd.text = NULL;
1137 wd.type = word_HyperEnd;
1138 wd.alt = NULL;
1139 wd.aux = 0;
1140 wd.fpos = t.pos;
1141 wd.breaks = FALSE;
1142 if (!indexing || index_visible)
1143 addword(wd, &whptr);
1144 if (indexing)
1145 addword(wd, &idximplicit);
1146 }
1147 if (sitem->type & stack_quote) {
1148 wd.text = NULL;
1149 wd.type = toquotestyle(style);
1150 wd.alt = NULL;
1151 wd.aux = quote_Close;
1152 wd.fpos = t.pos;
1153 wd.breaks = FALSE;
1154 if (!indexing || index_visible)
1155 addword(wd, &whptr);
1156 if (indexing) {
1157 rdadd(&indexstr, L'"');
1158 addword(wd, &idximplicit);
1159 }
1160 }
1161 }
1162 sfree(sitem);
1163 break;
1164 case tok_cmd:
1165 switch (t.cmd) {
1166 case c__comment:
1167 /*
1168 * In-paragraph comment: \#{ balanced braces }
1169 *
1170 * Anything goes here; even tok_eop. We should
1171 * eat whitespace after the close brace _if_
1172 * there was whitespace before the \#.
1173 */
1174 dtor(t), t = get_token(in);
1175 if (t.type != tok_lbrace) {
1176 error(err_explbr, &t.pos);
1177 } else {
1178 int braces = 1;
1179 while (braces > 0) {
1180 dtor(t), t = get_token(in);
1181 if (t.type == tok_lbrace)
1182 braces++;
1183 else if (t.type == tok_rbrace)
1184 braces--;
1185 else if (t.type == tok_eof) {
1186 error(err_commenteof, &t.pos);
1187 break;
1188 }
1189 }
1190 }
1191 if (seenwhite) {
1192 already = TRUE;
1193 dtor(t), t = get_token(in);
1194 if (t.type == tok_white) {
1195 iswhite = TRUE;
1196 already = FALSE;
1197 }
1198 }
1199 break;
1200 case c_q:
1201 dtor(t), t = get_token(in);
1202 if (t.type != tok_lbrace) {
1203 error(err_explbr, &t.pos);
1204 } else {
1205 /*
1206 * Enforce that \q may not be used anywhere
1207 * within \c. (It shouldn't be necessary
1208 * since the whole point of \c should be
1209 * that the user wants to exercise exact
1210 * control over the glyphs used, and
1211 * forbidding it has the useful effect of
1212 * relieving some backends of having to
1213 * make difficult decisions.)
1214 */
1215 int stype;
1216
1217 if (style != word_Code && style != word_WeakCode) {
1218 wd.text = NULL;
1219 wd.type = toquotestyle(style);
1220 wd.alt = NULL;
1221 wd.aux = quote_Open;
1222 wd.fpos = t.pos;
1223 wd.breaks = FALSE;
1224 if (!indexing || index_visible)
1225 addword(wd, &whptr);
1226 if (indexing) {
1227 rdadd(&indexstr, L'"');
1228 addword(wd, &idximplicit);
1229 }
1230 stype = stack_quote;
1231 } else {
1232 error(err_codequote, &t.pos);
1233 stype = stack_nop;
1234 }
1235 sitem = snew(struct stack_item);
1236 sitem->fpos = t.pos;
1237 sitem->type = stype;
1238 stk_push(parsestk, sitem);
1239 }
1240 break;
1241 case c_K:
1242 case c_k:
1243 case c_W:
1244 case c_date:
1245 /*
1246 * Keyword, hyperlink, or \date. We expect a
1247 * left brace, some text, and then a right
1248 * brace. No nesting; no arguments.
1249 */
1250 wd.fpos = t.pos;
1251 wd.breaks = FALSE;
1252 if (t.cmd == c_K)
1253 wd.type = word_UpperXref;
1254 else if (t.cmd == c_k)
1255 wd.type = word_LowerXref;
1256 else if (t.cmd == c_W)
1257 wd.type = word_HyperLink;
1258 else
1259 wd.type = word_Normal;
1260 dtor(t), t = get_token(in);
1261 if (t.type != tok_lbrace) {
1262 if (wd.type == word_Normal) {
1263 time_t thetime = time(NULL);
1264 struct tm *broken = localtime(&thetime);
1265 already = TRUE;
1266 wdtext = ustrftime(NULL, broken);
1267 wd.type = style;
1268 } else {
1269 error(err_explbr, &t.pos);
1270 wdtext = NULL;
1271 }
1272 } else {
1273 rdstring rs = { 0, 0, NULL };
1274 while (dtor(t), t = get_token(in),
1275 t.type == tok_word || t.type == tok_white) {
1276 if (t.type == tok_white)
1277 rdadd(&rs, ' ');
1278 else
1279 rdadds(&rs, t.text);
1280 }
1281 if (wd.type == word_Normal) {
1282 time_t thetime = time(NULL);
1283 struct tm *broken = localtime(&thetime);
1284 wdtext = ustrftime(rs.text, broken);
1285 wd.type = style;
1286 } else {
1287 wdtext = ustrdup(rs.text);
1288 }
1289 sfree(rs.text);
1290 if (t.type != tok_rbrace) {
1291 error(err_kwexprbr, &t.pos);
1292 }
1293 }
1294 wd.alt = NULL;
1295 wd.aux = 0;
1296 if (!indexing || index_visible) {
1297 wd.text = ustrdup(wdtext);
1298 addword(wd, &whptr);
1299 }
1300 if (indexing) {
1301 wd.text = ustrdup(wdtext);
1302 addword(wd, &idximplicit);
1303 }
1304 sfree(wdtext);
1305 if (wd.type == word_HyperLink) {
1306 /*
1307 * Hyperlinks are different: they then
1308 * expect another left brace, to begin
1309 * delimiting the text marked by the link.
1310 */
1311 dtor(t), t = get_token(in);
1312 sitem = snew(struct stack_item);
1313 sitem->fpos = wd.fpos;
1314 sitem->type = stack_hyper;
1315 /*
1316 * Special cases: \W{}\i, \W{}\ii
1317 */
1318 if (t.type == tok_cmd &&
1319 (t.cmd == c_i || t.cmd == c_ii)) {
1320 if (indexing) {
1321 error(err_nestedindex, &t.pos);
1322 } else {
1323 /* Add an index-reference word with no
1324 * text as yet */
1325 wd.type = word_IndexRef;
1326 wd.text = NULL;
1327 wd.alt = NULL;
1328 wd.aux = 0;
1329 wd.breaks = FALSE;
1330 indexword = addword(wd, &whptr);
1331 /* Set up a rdstring to read the
1332 * index text */
1333 indexstr = nullrs;
1334 /* Flags so that we do the Right
1335 * Things with text */
1336 index_visible = (type != c_I);
1337 index_downcase = (type == c_ii);
1338 indexing = TRUE;
1339 idxwordlist = NULL;
1340 idximplicit = &idxwordlist;
1341
1342 sitem->type |= stack_idx;
1343 }
1344 dtor(t), t = get_token(in);
1345 }
1346 /*
1347 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1348 */
1349 if (t.type == tok_cmd &&
1350 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1351 if (style != word_Normal)
1352 error(err_nestedstyles, &t.pos);
1353 else {
1354 style = (t.cmd == c_c ? word_Code :
1355 t.cmd == c_cw ? word_WeakCode :
1356 word_Emph);
1357 spcstyle = tospacestyle(style);
1358 sitem->type |= stack_style;
1359 }
1360 dtor(t), t = get_token(in);
1361 }
1362 if (t.type != tok_lbrace) {
1363 error(err_explbr, &t.pos);
1364 sfree(sitem);
1365 } else {
1366 stk_push(parsestk, sitem);
1367 }
1368 }
1369 break;
1370 case c_c:
1371 case c_cw:
1372 case c_e:
1373 type = t.cmd;
1374 if (style != word_Normal) {
1375 error(err_nestedstyles, &t.pos);
1376 /* Error recovery: eat lbrace, push nop. */
1377 dtor(t), t = get_token(in);
1378 sitem = snew(struct stack_item);
1379 sitem->fpos = t.pos;
1380 sitem->type = stack_nop;
1381 stk_push(parsestk, sitem);
1382 }
1383 dtor(t), t = get_token(in);
1384 if (t.type != tok_lbrace) {
1385 error(err_explbr, &t.pos);
1386 } else {
1387 style = (type == c_c ? word_Code :
1388 type == c_cw ? word_WeakCode :
1389 word_Emph);
1390 spcstyle = tospacestyle(style);
1391 sitem = snew(struct stack_item);
1392 sitem->fpos = t.pos;
1393 sitem->type = stack_style;
1394 stk_push(parsestk, sitem);
1395 }
1396 break;
1397 case c_i:
1398 case c_ii:
1399 case c_I:
1400 type = t.cmd;
1401 if (indexing) {
1402 error(err_nestedindex, &t.pos);
1403 /* Error recovery: eat lbrace, push nop. */
1404 dtor(t), t = get_token(in);
1405 sitem = snew(struct stack_item);
1406 sitem->fpos = t.pos;
1407 sitem->type = stack_nop;
1408 stk_push(parsestk, sitem);
1409 }
1410 sitem = snew(struct stack_item);
1411 sitem->fpos = t.pos;
1412 sitem->type = stack_idx;
1413 dtor(t), t = get_token(in);
1414 /*
1415 * Special cases: \i\c, \i\e, \i\cw
1416 */
1417 wd.fpos = t.pos;
1418 if (t.type == tok_cmd &&
1419 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1420 if (style != word_Normal)
1421 error(err_nestedstyles, &t.pos);
1422 else {
1423 style = (t.cmd == c_c ? word_Code :
1424 t.cmd == c_cw ? word_WeakCode :
1425 word_Emph);
1426 spcstyle = tospacestyle(style);
1427 sitem->type |= stack_style;
1428 }
1429 dtor(t), t = get_token(in);
1430 }
1431 if (t.type != tok_lbrace) {
1432 sfree(sitem);
1433 error(err_explbr, &t.pos);
1434 } else {
1435 /* Add an index-reference word with no text as yet */
1436 wd.type = word_IndexRef;
1437 wd.text = NULL;
1438 wd.alt = NULL;
1439 wd.aux = 0;
1440 wd.breaks = FALSE;
1441 indexword = addword(wd, &whptr);
1442 /* Set up a rdstring to read the index text */
1443 indexstr = nullrs;
1444 /* Flags so that we do the Right Things with text */
1445 index_visible = (type != c_I);
1446 index_downcase = (type == c_ii);
1447 indexing = TRUE;
1448 idxwordlist = NULL;
1449 idximplicit = &idxwordlist;
1450 /* Stack item to close the indexing on exit */
1451 stk_push(parsestk, sitem);
1452 }
1453 break;
1454 case c_u:
1455 uchr = t.aux;
1456 utext[0] = uchr; utext[1] = 0;
1457 wd.type = style;
1458 wd.breaks = FALSE;
1459 wd.alt = NULL;
1460 wd.aux = 0;
1461 wd.fpos = t.pos;
1462 if (!indexing || index_visible) {
1463 wd.text = ustrdup(utext);
1464 uword = addword(wd, &whptr);
1465 } else
1466 uword = NULL;
1467 if (indexing) {
1468 wd.text = ustrdup(utext);
1469 iword = addword(wd, &idximplicit);
1470 } else
1471 iword = NULL;
1472 dtor(t), t = get_token(in);
1473 if (t.type == tok_lbrace) {
1474 /*
1475 * \u with a left brace. Until the brace
1476 * closes, all further words go on a
1477 * sidetrack from the main thread of the
1478 * paragraph.
1479 */
1480 sitem = snew(struct stack_item);
1481 sitem->fpos = t.pos;
1482 sitem->type = stack_ualt;
1483 sitem->whptr = whptr;
1484 sitem->idximplicit = idximplicit;
1485 stk_push(parsestk, sitem);
1486 whptr = uword ? &uword->alt : NULL;
1487 idximplicit = iword ? &iword->alt : NULL;
1488 } else {
1489 if (indexing)
1490 rdadd(&indexstr, uchr);
1491 already = TRUE;
1492 }
1493 break;
1494 default:
1495 if (!macrolookup(macros, in, t.text, &t.pos))
1496 error(err_badmidcmd, t.text, &t.pos);
1497 break;
1498 }
1499 }
1500 if (!already)
1501 dtor(t), t = get_token(in);
1502 seenwhite = iswhite;
1503 }
1504 finished_para:
1505 /* Check the stack is empty */
1506 if (stk_top(parsestk)) {
1507 while ((sitem = stk_pop(parsestk)))
1508 sfree(sitem);
1509 error(err_missingrbrace, &t.pos);
1510 }
1511 stk_free(parsestk);
1512 prev_para_type = par.type;
1513 addpara(par, ret);
1514 if (t.type == tok_eof)
1515 already = TRUE;
1516 }
1517
1518 if (stk_top(crossparastk)) {
1519 void *p;
1520
1521 error(err_missingrbrace2, &t.pos);
1522 while ((p = stk_pop(crossparastk)))
1523 sfree(p);
1524 }
1525
1526 /*
1527 * We break to here rather than returning, because otherwise
1528 * this cleanup doesn't happen.
1529 */
1530 dtor(t);
1531 macrocleanup(macros);
1532
1533 stk_free(crossparastk);
1534 }
1535
1536 paragraph *read_input(input *in, indexdata *idx) {
1537 paragraph *head = NULL;
1538 paragraph **hptr = &head;
1539
1540 while (in->currindex < in->nfiles) {
1541 in->currfp = fopen(in->filenames[in->currindex], "r");
1542 if (in->currfp) {
1543 setpos(in, in->filenames[in->currindex]);
1544 in->charset = in->defcharset;
1545 in->csstate = charset_init_state;
1546 in->wcpos = in->nwc = 0;
1547 in->pushback_chars = NULL;
1548 read_file(&hptr, in, idx);
1549 }
1550 in->currindex++;
1551 }
1552
1553 return head;
1554 }