Right; I'm finally sick of typing \q{\cw{foo}}, so I've invented a
[sgt/halibut] / input.c
1 /*
2 * input.c: read the source form
3 */
4
5 #include <stdio.h>
6 #include <assert.h>
7 #include <time.h>
8 #include "halibut.h"
9
10 #define TAB_STOP 8 /* for column number tracking */
11
12 static void setpos(input *in, char *fname) {
13 in->pos.filename = fname;
14 in->pos.line = 1;
15 in->pos.col = (in->reportcols ? 1 : -1);
16 }
17
18 static void unget(input *in, int c, filepos *pos) {
19 if (in->npushback >= in->pushbacksize) {
20 in->pushbacksize = in->npushback + 16;
21 in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
22 }
23 in->pushback[in->npushback].chr = c;
24 in->pushback[in->npushback].pos = *pos; /* structure copy */
25 in->npushback++;
26 }
27
28 /* ---------------------------------------------------------------------- */
29 /*
30 * Macro subsystem
31 */
32 typedef struct macro_Tag macro;
33 struct macro_Tag {
34 wchar_t *name, *text;
35 };
36 struct macrostack_Tag {
37 macrostack *next;
38 wchar_t *text;
39 int ptr, npushback;
40 filepos pos;
41 };
42 static int macrocmp(void *av, void *bv) {
43 macro *a = (macro *)av, *b = (macro *)bv;
44 return ustrcmp(a->name, b->name);
45 }
46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
47 filepos fpos) {
48 macro *m = snew(macro);
49 m->name = name;
50 m->text = text;
51 if (add234(macros, m) != m) {
52 error(err_macroexists, &fpos, name);
53 sfree(name);
54 sfree(text);
55 }
56 }
57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
58 filepos *pos) {
59 macro m, *gotit;
60 m.name = name;
61 gotit = find234(macros, &m, NULL);
62 if (gotit) {
63 macrostack *expansion = snew(macrostack);
64 expansion->next = in->stack;
65 expansion->text = gotit->text;
66 expansion->pos = *pos; /* structure copy */
67 expansion->ptr = 0;
68 expansion->npushback = in->npushback;
69 in->stack = expansion;
70 return TRUE;
71 } else
72 return FALSE;
73 }
74 static void macrocleanup(tree234 *macros) {
75 int ti;
76 macro *m;
77 for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
78 sfree(m->name);
79 sfree(m->text);
80 sfree(m);
81 }
82 freetree234(macros);
83 }
84
85 static void input_configure(input *in, paragraph *cfg) {
86 assert(cfg->type == para_Config);
87
88 if (!ustricmp(cfg->keyword, L"input-charset")) {
89 in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
90 }
91 }
92
93 /*
94 * Can return EOF
95 */
96 static int get(input *in, filepos *pos, rdstringc *rsc) {
97 int pushbackpt = in->stack ? in->stack->npushback : 0;
98 if (in->npushback > pushbackpt) {
99 --in->npushback;
100 if (pos)
101 *pos = in->pushback[in->npushback].pos; /* structure copy */
102 return in->pushback[in->npushback].chr;
103 }
104 else if (in->stack) {
105 wchar_t c = in->stack->text[in->stack->ptr];
106 if (in->stack->text[++in->stack->ptr] == L'\0') {
107 macrostack *tmp = in->stack;
108 in->stack = tmp->next;
109 sfree(tmp);
110 }
111 return c;
112 }
113 else if (in->currfp) {
114
115 while (in->wcpos >= in->nwc) {
116
117 int c = getc(in->currfp);
118
119 if (c == EOF) {
120 fclose(in->currfp);
121 in->currfp = NULL;
122 return EOF;
123 }
124
125 if (rsc)
126 rdaddc(rsc, c);
127
128 /* Track line numbers, for error reporting */
129 if (pos)
130 *pos = in->pos;
131 if (in->reportcols) {
132 switch (c) {
133 case '\t':
134 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
135 break;
136 case '\n':
137 in->pos.col = 1;
138 in->pos.line++;
139 break;
140 default:
141 in->pos.col++;
142 break;
143 }
144 } else {
145 in->pos.col = -1;
146 if (c == '\n')
147 in->pos.line++;
148 }
149
150 /*
151 * Do input character set translation, so that we return
152 * Unicode.
153 */
154 {
155 char buf[1];
156 char const *p;
157 int inlen;
158
159 buf[0] = (char)c;
160 p = buf;
161 inlen = 1;
162
163 in->nwc = charset_to_unicode(&p, &inlen,
164 in->wc, lenof(in->wc),
165 in->charset, &in->csstate,
166 NULL, 0);
167 assert(p == buf+1 && inlen == 0);
168
169 in->wcpos = 0;
170 }
171 }
172
173 return in->wc[in->wcpos++];
174
175 } else
176 return EOF;
177 }
178
179 /*
180 * Lexical analysis of source files.
181 */
182 typedef struct token_Tag token;
183 struct token_Tag {
184 int type;
185 int cmd, aux;
186 wchar_t *text;
187 char *origtext;
188 filepos pos;
189 };
190 enum {
191 tok_eof, /* end of file */
192 tok_eop, /* end of paragraph */
193 tok_white, /* whitespace */
194 tok_word, /* a word or word fragment */
195 tok_cmd, /* \command */
196 tok_lbrace, /* { */
197 tok_rbrace /* } */
198 };
199
200 /* Halibut command keywords. */
201 enum {
202 c__invalid, /* invalid command */
203 c__comment, /* comment command (\#) */
204 c__escaped, /* escaped character */
205 c__nop, /* no-op */
206 c__nbsp, /* nonbreaking space */
207 c_A, /* appendix heading */
208 c_B, /* bibliography entry */
209 c_BR, /* bibliography rewrite */
210 c_C, /* chapter heading */
211 c_H, /* heading */
212 c_I, /* invisible index mark */
213 c_IM, /* index merge/rewrite */
214 c_K, /* capitalised cross-reference */
215 c_S, /* aux field is 0, 1, 2, ... */
216 c_U, /* unnumbered-chapter heading */
217 c_W, /* Web hyperlink */
218 c_b, /* bulletted list */
219 c_c, /* code */
220 c_cfg, /* configuration directive */
221 c_copyright, /* copyright statement */
222 c_cq, /* quoted code (sugar for \q{\cw{x}}) */
223 c_cw, /* weak code */
224 c_date, /* document processing date */
225 c_dd, /* description list: description */
226 c_define, /* macro definition */
227 c_dt, /* description list: described thing */
228 c_e, /* emphasis */
229 c_i, /* visible index mark */
230 c_ii, /* uncapitalised visible index mark */
231 c_k, /* uncapitalised cross-reference */
232 c_lcont, /* continuation para(s) for list item */
233 c_n, /* numbered list */
234 c_nocite, /* bibliography trickery */
235 c_preamble, /* (obsolete) preamble text */
236 c_q, /* quote marks */
237 c_quote, /* block-quoted paragraphs */
238 c_rule, /* horizontal rule */
239 c_title, /* document title */
240 c_u, /* aux field is char code */
241 c_versionid /* document RCS id */
242 };
243
244 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
245 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
246 #define isnl(c) ( (c)==10 )
247 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
248 #define fromdec(c) ( (c)-'0' )
249 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
250 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
251 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
252
253 /*
254 * Keyword comparison function. Like strcmp, but between a wchar_t *
255 * and a char *.
256 */
257 static int kwcmp(wchar_t const *p, char const *q) {
258 int i;
259 do {
260 i = *p - *q;
261 } while (*p++ && *q++ && !i);
262 return i;
263 }
264
265 /*
266 * Match a keyword.
267 */
268 static void match_kw(token *tok) {
269 /*
270 * FIXME. The ids are explicit in here so as to allow long-name
271 * equivalents to the various very short keywords.
272 */
273 static const struct { char const *name; int id; } keywords[] = {
274 {"#", c__comment}, /* comment command (\#) */
275 {"-", c__escaped}, /* nonbreaking hyphen */
276 {".", c__nop}, /* no-op */
277 {"A", c_A}, /* appendix heading */
278 {"B", c_B}, /* bibliography entry */
279 {"BR", c_BR}, /* bibliography rewrite */
280 {"C", c_C}, /* chapter heading */
281 {"H", c_H}, /* heading */
282 {"I", c_I}, /* invisible index mark */
283 {"IM", c_IM}, /* index merge/rewrite */
284 {"K", c_K}, /* capitalised cross-reference */
285 {"U", c_U}, /* unnumbered-chapter heading */
286 {"W", c_W}, /* Web hyperlink */
287 {"\\", c__escaped}, /* escaped backslash (\\) */
288 {"_", c__nbsp}, /* nonbreaking space (\_) */
289 {"b", c_b}, /* bulletted list */
290 {"c", c_c}, /* code */
291 {"cfg", c_cfg}, /* configuration directive */
292 {"copyright", c_copyright}, /* copyright statement */
293 {"cq", c_cq}, /* quoted code (sugar for \q{\cw{x}}) */
294 {"cw", c_cw}, /* weak code */
295 {"date", c_date}, /* document processing date */
296 {"dd", c_dd}, /* description list: description */
297 {"define", c_define}, /* macro definition */
298 {"dt", c_dt}, /* description list: described thing */
299 {"e", c_e}, /* emphasis */
300 {"i", c_i}, /* visible index mark */
301 {"ii", c_ii}, /* uncapitalised visible index mark */
302 {"k", c_k}, /* uncapitalised cross-reference */
303 {"lcont", c_lcont}, /* continuation para(s) for list item */
304 {"n", c_n}, /* numbered list */
305 {"nocite", c_nocite}, /* bibliography trickery */
306 {"preamble", c_preamble}, /* (obsolete) preamble text */
307 {"q", c_q}, /* quote marks */
308 {"quote", c_quote}, /* block-quoted paragraphs */
309 {"rule", c_rule}, /* horizontal rule */
310 {"title", c_title}, /* document title */
311 {"versionid", c_versionid}, /* document RCS id */
312 {"{", c__escaped}, /* escaped lbrace (\{) */
313 {"}", c__escaped}, /* escaped rbrace (\}) */
314 };
315 int i, j, k, c;
316
317 /*
318 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
319 * doesn't match correctly, we just fall through to the
320 * binary-search phase.
321 */
322 if (tok->text[0] == 'S') {
323 /* We expect numeric characters thereafter. */
324 wchar_t *p = tok->text+1;
325 int n;
326 if (!*p)
327 n = 1;
328 else {
329 n = 0;
330 while (*p && isdec(*p)) {
331 n = 10 * n + fromdec(*p);
332 p++;
333 }
334 }
335 if (!*p) {
336 tok->cmd = c_S;
337 tok->aux = n;
338 return;
339 }
340 } else if (tok->text[0] == 'u') {
341 /* We expect hex characters thereafter. */
342 wchar_t *p = tok->text+1;
343 int n = 0;
344 while (*p && ishex(*p)) {
345 n = 16 * n + fromhex(*p);
346 p++;
347 }
348 if (!*p) {
349 tok->cmd = c_u;
350 tok->aux = n;
351 return;
352 }
353 }
354
355 i = -1;
356 j = sizeof(keywords)/sizeof(*keywords);
357 while (j-i > 1) {
358 k = (i+j)/2;
359 c = kwcmp(tok->text, keywords[k].name);
360 if (c < 0)
361 j = k;
362 else if (c > 0)
363 i = k;
364 else /* c == 0 */ {
365 tok->cmd = keywords[k].id;
366 return;
367 }
368 }
369
370 tok->cmd = c__invalid;
371 }
372
373
374 /*
375 * Read a token from the input file, in the normal way (`normal' in
376 * the sense that code paragraphs work a different way).
377 */
378 token get_token(input *in) {
379 int c;
380 int nls;
381 int prevpos;
382 token ret;
383 rdstring rs = { 0, 0, NULL };
384 rdstringc rsc = { 0, 0, NULL };
385 filepos cpos;
386
387 ret.text = NULL; /* default */
388 ret.origtext = NULL; /* default */
389 if (in->pushback_chars) {
390 rdaddsc(&rsc, in->pushback_chars);
391 sfree(in->pushback_chars);
392 in->pushback_chars = NULL;
393 }
394 c = get(in, &cpos, &rsc);
395 ret.pos = cpos;
396 if (iswhite(c)) { /* tok_white or tok_eop */
397 nls = 0;
398 prevpos = 0;
399 do {
400 if (isnl(c))
401 nls++;
402 prevpos = rsc.pos;
403 } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
404 if (c == EOF) {
405 ret.type = tok_eof;
406 sfree(rsc.text);
407 return ret;
408 }
409 if (rsc.text) {
410 in->pushback_chars = dupstr(rsc.text + prevpos);
411 sfree(rsc.text);
412 }
413 unget(in, c, &cpos);
414 ret.type = (nls > 1 ? tok_eop : tok_white);
415 return ret;
416 } else if (c == EOF) { /* tok_eof */
417 ret.type = tok_eof;
418 sfree(rsc.text);
419 return ret;
420 } else if (c == '\\') { /* tok_cmd */
421 rsc.pos = prevpos = 0;
422 c = get(in, &cpos, &rsc);
423 if (c == '-' || c == '\\' || c == '_' ||
424 c == '#' || c == '{' || c == '}' || c == '.') {
425 /* single-char command */
426 rdadd(&rs, c);
427 } else if (c == 'u') {
428 int len = 0;
429 do {
430 rdadd(&rs, c);
431 len++;
432 prevpos = rsc.pos;
433 c = get(in, &cpos, &rsc);
434 } while (ishex(c) && len < 5);
435 unget(in, c, &cpos);
436 } else if (iscmd(c)) {
437 do {
438 rdadd(&rs, c);
439 prevpos = rsc.pos;
440 c = get(in, &cpos, &rsc);
441 } while (iscmd(c));
442 unget(in, c, &cpos);
443 }
444 /*
445 * Now match the command against the list of available
446 * ones.
447 */
448 ret.type = tok_cmd;
449 ret.text = ustrdup(rs.text);
450 if (rsc.text) {
451 in->pushback_chars = dupstr(rsc.text + prevpos);
452 rsc.text[prevpos] = '\0';
453 ret.origtext = dupstr(rsc.text);
454 } else {
455 ret.origtext = dupstr("");
456 }
457 match_kw(&ret);
458 sfree(rs.text);
459 sfree(rsc.text);
460 return ret;
461 } else if (c == '{') { /* tok_lbrace */
462 ret.type = tok_lbrace;
463 sfree(rsc.text);
464 return ret;
465 } else if (c == '}') { /* tok_rbrace */
466 ret.type = tok_rbrace;
467 sfree(rsc.text);
468 return ret;
469 } else { /* tok_word */
470 /*
471 * Read a word: the longest possible contiguous sequence of
472 * things other than whitespace, backslash, braces and
473 * hyphen. A hyphen terminates the word but is returned as
474 * part of it; everything else is pushed back for the next
475 * token. The `aux' field contains TRUE if the word ends in
476 * a hyphen.
477 */
478 ret.aux = FALSE; /* assumed for now */
479 prevpos = 0;
480 while (1) {
481 if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
482 /* Put back the character that caused termination */
483 unget(in, c, &cpos);
484 break;
485 } else {
486 rdadd(&rs, c);
487 if (c == '-') {
488 prevpos = rsc.pos;
489 ret.aux = TRUE;
490 break; /* hyphen terminates word */
491 }
492 }
493 prevpos = rsc.pos;
494 c = get(in, &cpos, &rsc);
495 }
496 ret.type = tok_word;
497 ret.text = ustrdup(rs.text);
498 if (rsc.text) {
499 in->pushback_chars = dupstr(rsc.text + prevpos);
500 rsc.text[prevpos] = '\0';
501 ret.origtext = dupstr(rsc.text);
502 } else {
503 ret.origtext = dupstr("");
504 }
505 sfree(rs.text);
506 sfree(rsc.text);
507 return ret;
508 }
509 }
510
511 /*
512 * Determine whether the next input character is an open brace (for
513 * telling code paragraphs from paragraphs which merely start with
514 * code).
515 */
516 int isbrace(input *in) {
517 int c;
518 filepos cpos;
519
520 c = get(in, &cpos, NULL);
521 unget(in, c, &cpos);
522 return (c == '{');
523 }
524
525 /*
526 * Read the rest of a line that starts `\c'. Including nothing at
527 * all (tok_word with empty text).
528 */
529 token get_codepar_token(input *in) {
530 int c;
531 token ret;
532 rdstring rs = { 0, 0, NULL };
533 filepos cpos;
534
535 ret.type = tok_word;
536 ret.origtext = NULL;
537 c = get(in, &cpos, NULL); /* expect (and discard) one space */
538 ret.pos = cpos;
539 if (c == ' ') {
540 c = get(in, &cpos, NULL);
541 ret.pos = cpos;
542 }
543 while (!isnl(c) && c != EOF) {
544 int c2 = c;
545 c = get(in, &cpos, NULL);
546 /* Discard \r just before \n. */
547 if (c2 != 13 || !isnl(c))
548 rdadd(&rs, c2);
549 }
550 unget(in, c, &cpos);
551 ret.text = ustrdup(rs.text);
552 sfree(rs.text);
553 return ret;
554 }
555
556 /*
557 * Adds a new word to a linked list
558 */
559 static word *addword(word newword, word ***hptrptr) {
560 word *mnewword;
561 if (!hptrptr)
562 return NULL;
563 mnewword = snew(word);
564 *mnewword = newword; /* structure copy */
565 mnewword->next = NULL;
566 **hptrptr = mnewword;
567 *hptrptr = &mnewword->next;
568 return mnewword;
569 }
570
571 /*
572 * Adds a new paragraph to a linked list
573 */
574 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
575 paragraph *mnewpara = snew(paragraph);
576 *mnewpara = newpara; /* structure copy */
577 mnewpara->next = NULL;
578 **hptrptr = mnewpara;
579 *hptrptr = &mnewpara->next;
580 return mnewpara;
581 }
582
583 /*
584 * Destructor before token is reassigned; should catch most memory
585 * leaks
586 */
587 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
588
589 /*
590 * Reads a single file (ie until get() returns EOF)
591 */
592 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
593 token t;
594 paragraph par;
595 word wd, **whptr, **idximplicit;
596 tree234 *macros;
597 wchar_t utext[2], *wdtext;
598 int style, spcstyle;
599 int already;
600 int iswhite, seenwhite;
601 int type;
602 int prev_para_type;
603 struct stack_item {
604 enum {
605 stack_nop = 0, /* do nothing (for error recovery) */
606 stack_ualt = 1, /* \u alternative */
607 stack_style = 2, /* \e, \c, \cw */
608 stack_idx = 4, /* \I, \i, \ii */
609 stack_hyper = 8, /* \W */
610 stack_quote = 16, /* \q */
611 } type;
612 word **whptr; /* to restore from \u alternatives */
613 word **idximplicit; /* to restore from \u alternatives */
614 filepos fpos;
615 int in_code;
616 } *sitem;
617 stack parsestk;
618 struct crossparaitem {
619 int type; /* currently c_lcont, c_quote or -1 */
620 int seen_lcont, seen_quote;
621 };
622 stack crossparastk;
623 word *indexword, *uword, *iword;
624 word *idxwordlist;
625 rdstring indexstr;
626 int index_downcase, index_visible, indexing;
627 const rdstring nullrs = { 0, 0, NULL };
628 wchar_t uchr;
629
630 t.text = NULL;
631 t.origtext = NULL;
632 macros = newtree234(macrocmp);
633 already = FALSE;
634
635 crossparastk = stk_new();
636
637 /*
638 * Loop on each paragraph.
639 */
640 while (1) {
641 int start_cmd = c__invalid;
642 par.words = NULL;
643 par.keyword = NULL;
644 par.origkeyword = NULL;
645 whptr = &par.words;
646
647 /*
648 * Get a token.
649 */
650 do {
651 if (!already) {
652 dtor(t), t = get_token(in);
653 }
654 already = FALSE;
655 } while (t.type == tok_eop);
656 if (t.type == tok_eof)
657 break;
658
659 /*
660 * Parse code paragraphs separately.
661 */
662 if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
663 int wtype = word_WeakCode;
664
665 par.type = para_Code;
666 par.fpos = t.pos;
667 while (1) {
668 dtor(t), t = get_codepar_token(in);
669 wd.type = wtype;
670 wd.breaks = FALSE; /* shouldn't need this... */
671 wd.text = ustrdup(t.text);
672 wd.alt = NULL;
673 wd.fpos = t.pos;
674 addword(wd, &whptr);
675 dtor(t), t = get_token(in);
676 if (t.type == tok_white) {
677 /*
678 * The newline after a code-paragraph line
679 */
680 dtor(t), t = get_token(in);
681 }
682 if (t.type == tok_eop || t.type == tok_eof ||
683 t.type == tok_rbrace) { /* might be } terminating \lcont */
684 if (t.type == tok_rbrace)
685 already = TRUE;
686 break;
687 } else if (t.type == tok_cmd && t.cmd == c_c) {
688 wtype = word_WeakCode;
689 } else if (t.type == tok_cmd && t.cmd == c_e &&
690 wtype == word_WeakCode) {
691 wtype = word_Emph;
692 } else {
693 error(err_brokencodepara, &t.pos);
694 prev_para_type = par.type;
695 addpara(par, ret);
696 while (t.type != tok_eop) /* error recovery: */
697 dtor(t), t = get_token(in); /* eat rest of paragraph */
698 goto codeparabroken; /* ick, but such is life */
699 }
700 }
701 prev_para_type = par.type;
702 addpara(par, ret);
703 codeparabroken:
704 continue;
705 }
706
707 /*
708 * Spot the special commands that define a grouping of more
709 * than one paragraph, and also the closing braces that
710 * finish them.
711 */
712 if (t.type == tok_cmd &&
713 (t.cmd == c_lcont || t.cmd == c_quote)) {
714 struct crossparaitem *sitem, *stop;
715 int cmd = t.cmd;
716
717 /*
718 * Expect, and swallow, an open brace.
719 */
720 dtor(t), t = get_token(in);
721 if (t.type != tok_lbrace) {
722 error(err_explbr, &t.pos);
723 continue;
724 }
725
726 /*
727 * Also expect, and swallow, any whitespace after that
728 * (a newline before a code paragraph wouldn't be
729 * surprising).
730 */
731 do {
732 dtor(t), t = get_token(in);
733 } while (t.type == tok_white);
734 already = TRUE;
735
736 if (cmd == c_lcont) {
737 /*
738 * \lcont causes a continuation of a list item into
739 * multiple paragraphs (which may in turn contain
740 * nested lists, code paras etc). Hence, the previous
741 * paragraph must be of a list type.
742 */
743 sitem = snew(struct crossparaitem);
744 stop = (struct crossparaitem *)stk_top(crossparastk);
745 if (stop)
746 *sitem = *stop;
747 else
748 sitem->seen_quote = sitem->seen_lcont = 0;
749
750 if (prev_para_type == para_Bullet ||
751 prev_para_type == para_NumberedList ||
752 prev_para_type == para_Description) {
753 sitem->type = c_lcont;
754 sitem->seen_lcont = 1;
755 par.type = para_LcontPush;
756 prev_para_type = par.type;
757 addpara(par, ret);
758 } else {
759 /*
760 * Push a null item on the cross-para stack so that
761 * when we see the corresponding closing brace we
762 * don't give a cascade error.
763 */
764 sitem->type = -1;
765 error(err_misplacedlcont, &t.pos);
766 }
767 } else {
768 /*
769 * \quote causes a group of paragraphs to be
770 * block-quoted (typically they will be indented a
771 * bit).
772 */
773 sitem = snew(struct crossparaitem);
774 stop = (struct crossparaitem *)stk_top(crossparastk);
775 if (stop)
776 *sitem = *stop;
777 else
778 sitem->seen_quote = sitem->seen_lcont = 0;
779 sitem->type = c_quote;
780 sitem->seen_quote = 1;
781 par.type = para_QuotePush;
782 prev_para_type = par.type;
783 addpara(par, ret);
784 }
785 stk_push(crossparastk, sitem);
786 continue;
787 } else if (t.type == tok_rbrace) {
788 struct crossparaitem *sitem = stk_pop(crossparastk);
789 if (!sitem)
790 error(err_unexbrace, &t.pos);
791 else {
792 switch (sitem->type) {
793 case c_lcont:
794 par.type = para_LcontPop;
795 prev_para_type = par.type;
796 addpara(par, ret);
797 break;
798 case c_quote:
799 par.type = para_QuotePop;
800 prev_para_type = par.type;
801 addpara(par, ret);
802 break;
803 }
804 sfree(sitem);
805 }
806 continue;
807 }
808
809 /*
810 * This token begins a paragraph. See if it's one of the
811 * special commands that define a paragraph type.
812 *
813 * (note that \# is special in a way, and \nocite takes no
814 * text)
815 */
816 par.type = para_Normal;
817 if (t.type == tok_cmd) {
818 int needkw;
819 int is_macro = FALSE;
820
821 par.fpos = t.pos;
822 switch (t.cmd) {
823 default:
824 needkw = -1;
825 break;
826 case c__invalid:
827 error(err_badparatype, t.text, &t.pos);
828 needkw = 4;
829 break;
830 case c__comment:
831 if (isbrace(in))
832 break; /* `\#{': isn't a comment para */
833 do {
834 dtor(t), t = get_token(in);
835 } while (t.type != tok_eop && t.type != tok_eof);
836 continue; /* next paragraph */
837 /*
838 * `needkw' values:
839 *
840 * 1 -- exactly one keyword
841 * 2 -- at least one keyword
842 * 4 -- any number of keywords including zero
843 * 8 -- at least one keyword and then nothing else
844 * 16 -- nothing at all! no keywords, no body
845 * 32 -- no keywords at all
846 */
847 case c_A: needkw = 2; par.type = para_Appendix; break;
848 case c_B: needkw = 2; par.type = para_Biblio; break;
849 case c_BR: needkw = 1; par.type = para_BR;
850 start_cmd = c_BR; break;
851 case c_C: needkw = 2; par.type = para_Chapter; break;
852 case c_H: needkw = 2; par.type = para_Heading;
853 par.aux = 0;
854 break;
855 case c_IM: needkw = 2; par.type = para_IM;
856 start_cmd = c_IM; break;
857 case c_S: needkw = 2; par.type = para_Subsect;
858 par.aux = t.aux; break;
859 case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
860 /* For \b and \n the keyword is optional */
861 case c_b: needkw = 4; par.type = para_Bullet; break;
862 case c_dt: needkw = 4; par.type = para_DescribedThing; break;
863 case c_dd: needkw = 4; par.type = para_Description; break;
864 case c_n: needkw = 4; par.type = para_NumberedList; break;
865 case c_cfg: needkw = 8; par.type = para_Config;
866 start_cmd = c_cfg; break;
867 case c_copyright: needkw = 32; par.type = para_Copyright; break;
868 case c_define: is_macro = TRUE; needkw = 1; break;
869 /* For \nocite the keyword is _everything_ */
870 case c_nocite: needkw = 8; par.type = para_NoCite; break;
871 case c_preamble: needkw = 32; par.type = para_Normal; break;
872 case c_rule: needkw = 16; par.type = para_Rule; break;
873 case c_title: needkw = 32; par.type = para_Title; break;
874 case c_versionid: needkw = 32; par.type = para_VersionID; break;
875 }
876
877 if (par.type == para_Chapter ||
878 par.type == para_Heading ||
879 par.type == para_Subsect ||
880 par.type == para_Appendix ||
881 par.type == para_UnnumberedChapter) {
882 struct crossparaitem *sitem = stk_top(crossparastk);
883 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
884 error(err_sectmarkerinblock,
885 &t.pos,
886 (sitem->seen_lcont ? "lcont" : "quote"));
887 }
888 }
889
890 if (needkw > 0) {
891 rdstring rs = { 0, 0, NULL };
892 rdstringc rsc = { 0, 0, NULL };
893 int nkeys = 0;
894 filepos fp;
895
896 /* Get keywords. */
897 dtor(t), t = get_token(in);
898 fp = t.pos;
899 while (t.type == tok_lbrace) {
900 /* This is a keyword. */
901 nkeys++;
902 /* FIXME: there will be bugs if anyone specifies an
903 * empty keyword (\foo{}), so trap this case. */
904 while (dtor(t), t = get_token(in),
905 t.type == tok_word ||
906 t.type == tok_white ||
907 (t.type == tok_cmd && t.cmd == c__nbsp) ||
908 (t.type == tok_cmd && t.cmd == c__escaped) ||
909 (t.type == tok_cmd && t.cmd == c_u)) {
910 if (t.type == tok_white ||
911 (t.type == tok_cmd && t.cmd == c__nbsp)) {
912 rdadd(&rs, ' ');
913 rdaddc(&rsc, ' ');
914 } else if (t.type == tok_cmd && t.cmd == c_u) {
915 rdadd(&rs, t.aux);
916 rdaddc(&rsc, '\\');
917 rdaddsc(&rsc, t.origtext);
918 } else {
919 rdadds(&rs, t.text);
920 rdaddsc(&rsc, t.origtext);
921 }
922 }
923 if (t.type != tok_rbrace) {
924 error(err_kwunclosed, &t.pos);
925 continue;
926 }
927 rdadd(&rs, 0); /* add string terminator */
928 rdaddc(&rsc, 0); /* add string terminator */
929 dtor(t), t = get_token(in); /* eat right brace */
930 }
931
932 rdadd(&rs, 0); /* add string terminator */
933 rdaddc(&rsc, 0); /* add string terminator */
934
935 /* See whether we have the right number of keywords. */
936 if ((needkw & 48) && nkeys > 0)
937 error(err_kwillegal, &fp);
938 if ((needkw & 11) && nkeys == 0)
939 error(err_kwexpected, &fp);
940 if ((needkw & 5) && nkeys > 1)
941 error(err_kwtoomany, &fp);
942
943 if (is_macro) {
944 /*
945 * Macro definition. Get the rest of the line
946 * as a code-paragraph token, repeatedly until
947 * there's nothing more left of it. Separate
948 * with newlines.
949 */
950 rdstring macrotext = { 0, 0, NULL };
951 while (1) {
952 dtor(t), t = get_codepar_token(in);
953 if (macrotext.pos > 0)
954 rdadd(&macrotext, L'\n');
955 rdadds(&macrotext, t.text);
956 dtor(t), t = get_token(in);
957 if (t.type == tok_eop) break;
958 }
959 macrodef(macros, rs.text, macrotext.text, fp);
960 continue; /* next paragraph */
961 }
962
963 par.keyword = rdtrim(&rs);
964 par.origkeyword = rdtrimc(&rsc);
965
966 /* Move to EOP in case of needkw==8 or 16 (no body) */
967 if (needkw & 24) {
968 /* We allow whitespace even when we expect no para body */
969 while (t.type == tok_white)
970 dtor(t), t = get_token(in);
971 if (t.type != tok_eop && t.type != tok_eof &&
972 (start_cmd == c__invalid ||
973 t.type != tok_cmd || t.cmd != start_cmd)) {
974 error(err_bodyillegal, &t.pos);
975 /* Error recovery: eat the rest of the paragraph */
976 while (t.type != tok_eop && t.type != tok_eof &&
977 (start_cmd == c__invalid ||
978 t.type != tok_cmd || t.cmd != start_cmd))
979 dtor(t), t = get_token(in);
980 }
981 if (t.type == tok_cmd)
982 already = TRUE;/* inhibit get_token at top of loop */
983 prev_para_type = par.type;
984 addpara(par, ret);
985
986 if (par.type == para_Config) {
987 input_configure(in, &par);
988 }
989 continue; /* next paragraph */
990 }
991 }
992 }
993
994 /*
995 * Now read the actual paragraph, word by word, adding to
996 * the paragraph list.
997 *
998 * Mid-paragraph commands:
999 *
1000 * \K \k
1001 * \c \cw \cq
1002 * \e
1003 * \i \ii
1004 * \I
1005 * \q
1006 * \u
1007 * \W
1008 * \date
1009 * \\ \{ \}
1010 */
1011 parsestk = stk_new();
1012 style = word_Normal;
1013 spcstyle = word_WhiteSpace;
1014 indexing = FALSE;
1015 seenwhite = TRUE;
1016 while (t.type != tok_eop && t.type != tok_eof) {
1017 iswhite = FALSE;
1018 already = FALSE;
1019
1020 /* Handle implicit paragraph breaks after \IM, \BR etc */
1021 if (start_cmd != c__invalid &&
1022 t.type == tok_cmd && t.cmd == start_cmd) {
1023 already = TRUE; /* inhibit get_token at top of loop */
1024 break;
1025 }
1026
1027 if (t.type == tok_cmd && t.cmd == c__nop) {
1028 dtor(t), t = get_token(in);
1029 continue; /* do nothing! */
1030 }
1031
1032 if (t.type == tok_cmd && t.cmd == c__escaped) {
1033 t.type = tok_word; /* nice and simple */
1034 t.aux = 0; /* even if `\-' - nonbreaking! */
1035 }
1036 if (t.type == tok_cmd && t.cmd == c__nbsp) {
1037 t.type = tok_word; /* nice and simple */
1038 sfree(t.text);
1039 t.text = ustrdup(L" "); /* text is ` ' not `_' */
1040 t.aux = 0; /* (nonbreaking) */
1041 }
1042 switch (t.type) {
1043 case tok_white:
1044 if (whptr == &par.words)
1045 break; /* strip whitespace at start of para */
1046 wd.text = NULL;
1047 wd.type = spcstyle;
1048 wd.alt = NULL;
1049 wd.aux = 0;
1050 wd.fpos = t.pos;
1051 wd.breaks = FALSE;
1052
1053 /*
1054 * Inhibit use of whitespace if it's (probably the
1055 * newline) before a repeat \IM / \BR type
1056 * directive.
1057 */
1058 if (start_cmd != c__invalid) {
1059 dtor(t), t = get_token(in);
1060 already = TRUE;
1061 if (t.type == tok_cmd && t.cmd == start_cmd)
1062 break;
1063 }
1064
1065 if (indexing)
1066 rdadd(&indexstr, ' ');
1067 if (!indexing || index_visible)
1068 addword(wd, &whptr);
1069 if (indexing)
1070 addword(wd, &idximplicit);
1071 iswhite = TRUE;
1072 break;
1073 case tok_word:
1074 if (indexing)
1075 rdadds(&indexstr, t.text);
1076 wd.type = style;
1077 wd.alt = NULL;
1078 wd.aux = 0;
1079 wd.fpos = t.pos;
1080 wd.breaks = t.aux;
1081 if (!indexing || index_visible) {
1082 wd.text = ustrdup(t.text);
1083 addword(wd, &whptr);
1084 }
1085 if (indexing) {
1086 wd.text = ustrdup(t.text);
1087 addword(wd, &idximplicit);
1088 }
1089 break;
1090 case tok_lbrace:
1091 error(err_unexbrace, &t.pos);
1092 /* Error recovery: push nop */
1093 sitem = snew(struct stack_item);
1094 sitem->type = stack_nop;
1095 sitem->fpos = t.pos;
1096 stk_push(parsestk, sitem);
1097 break;
1098 case tok_rbrace:
1099 sitem = stk_pop(parsestk);
1100 if (!sitem) {
1101 /*
1102 * This closing brace could have been an
1103 * indication that the cross-paragraph stack
1104 * wants popping. Accordingly, we treat it here
1105 * as an indication that the paragraph is over.
1106 */
1107 already = TRUE;
1108 goto finished_para;
1109 } else {
1110 if (sitem->type & stack_ualt) {
1111 whptr = sitem->whptr;
1112 idximplicit = sitem->idximplicit;
1113 }
1114 if (sitem->type & stack_style) {
1115 style = word_Normal;
1116 spcstyle = word_WhiteSpace;
1117 }
1118 if (sitem->type & stack_idx) {
1119 indexword->text = ustrdup(indexstr.text);
1120 if (index_downcase) {
1121 word *w;
1122
1123 ustrlow(indexword->text);
1124 ustrlow(indexstr.text);
1125
1126 for (w = idxwordlist; w; w = w->next)
1127 if (w->text)
1128 ustrlow(w->text);
1129 }
1130 indexing = FALSE;
1131 rdadd(&indexstr, L'\0');
1132 index_merge(idx, FALSE, indexstr.text,
1133 idxwordlist, &sitem->fpos);
1134 sfree(indexstr.text);
1135 }
1136 if (sitem->type & stack_hyper) {
1137 wd.text = NULL;
1138 wd.type = word_HyperEnd;
1139 wd.alt = NULL;
1140 wd.aux = 0;
1141 wd.fpos = t.pos;
1142 wd.breaks = FALSE;
1143 if (!indexing || index_visible)
1144 addword(wd, &whptr);
1145 if (indexing)
1146 addword(wd, &idximplicit);
1147 }
1148 if (sitem->type & stack_quote) {
1149 wd.text = NULL;
1150 wd.type = toquotestyle(style);
1151 wd.alt = NULL;
1152 wd.aux = quote_Close;
1153 wd.fpos = t.pos;
1154 wd.breaks = FALSE;
1155 if (!indexing || index_visible)
1156 addword(wd, &whptr);
1157 if (indexing) {
1158 rdadd(&indexstr, L'"');
1159 addword(wd, &idximplicit);
1160 }
1161 }
1162 }
1163 sfree(sitem);
1164 break;
1165 case tok_cmd:
1166 switch (t.cmd) {
1167 case c__comment:
1168 /*
1169 * In-paragraph comment: \#{ balanced braces }
1170 *
1171 * Anything goes here; even tok_eop. We should
1172 * eat whitespace after the close brace _if_
1173 * there was whitespace before the \#.
1174 */
1175 dtor(t), t = get_token(in);
1176 if (t.type != tok_lbrace) {
1177 error(err_explbr, &t.pos);
1178 } else {
1179 int braces = 1;
1180 while (braces > 0) {
1181 dtor(t), t = get_token(in);
1182 if (t.type == tok_lbrace)
1183 braces++;
1184 else if (t.type == tok_rbrace)
1185 braces--;
1186 else if (t.type == tok_eof) {
1187 error(err_commenteof, &t.pos);
1188 break;
1189 }
1190 }
1191 }
1192 if (seenwhite) {
1193 already = TRUE;
1194 dtor(t), t = get_token(in);
1195 if (t.type == tok_white) {
1196 iswhite = TRUE;
1197 already = FALSE;
1198 }
1199 }
1200 break;
1201 case c_q:
1202 case c_cq:
1203 type = t.cmd;
1204 dtor(t), t = get_token(in);
1205 if (t.type != tok_lbrace) {
1206 error(err_explbr, &t.pos);
1207 } else {
1208 /*
1209 * Enforce that \q may not be used anywhere
1210 * within \c. (It shouldn't be necessary
1211 * since the whole point of \c should be
1212 * that the user wants to exercise exact
1213 * control over the glyphs used, and
1214 * forbidding it has the useful effect of
1215 * relieving some backends of having to
1216 * make difficult decisions.)
1217 */
1218 int stype;
1219
1220 if (style != word_Code && style != word_WeakCode) {
1221 wd.text = NULL;
1222 wd.type = toquotestyle(style);
1223 wd.alt = NULL;
1224 wd.aux = quote_Open;
1225 wd.fpos = t.pos;
1226 wd.breaks = FALSE;
1227 if (!indexing || index_visible)
1228 addword(wd, &whptr);
1229 if (indexing) {
1230 rdadd(&indexstr, L'"');
1231 addword(wd, &idximplicit);
1232 }
1233 stype = stack_quote;
1234 } else {
1235 error(err_codequote, &t.pos);
1236 stype = stack_nop;
1237 }
1238 sitem = snew(struct stack_item);
1239 sitem->fpos = t.pos;
1240 sitem->type = stype;
1241 if (type == c_cq) {
1242 if (style != word_Normal) {
1243 error(err_nestedstyles, &t.pos);
1244 } else {
1245 style = word_WeakCode;
1246 spcstyle = tospacestyle(style);
1247 sitem->type |= stack_style;
1248 }
1249 }
1250 stk_push(parsestk, sitem);
1251 }
1252 break;
1253 case c_K:
1254 case c_k:
1255 case c_W:
1256 case c_date:
1257 /*
1258 * Keyword, hyperlink, or \date. We expect a
1259 * left brace, some text, and then a right
1260 * brace. No nesting; no arguments.
1261 */
1262 wd.fpos = t.pos;
1263 wd.breaks = FALSE;
1264 if (t.cmd == c_K)
1265 wd.type = word_UpperXref;
1266 else if (t.cmd == c_k)
1267 wd.type = word_LowerXref;
1268 else if (t.cmd == c_W)
1269 wd.type = word_HyperLink;
1270 else
1271 wd.type = word_Normal;
1272 dtor(t), t = get_token(in);
1273 if (t.type != tok_lbrace) {
1274 if (wd.type == word_Normal) {
1275 time_t thetime = time(NULL);
1276 struct tm *broken = localtime(&thetime);
1277 already = TRUE;
1278 wdtext = ustrftime(NULL, broken);
1279 wd.type = style;
1280 } else {
1281 error(err_explbr, &t.pos);
1282 wdtext = NULL;
1283 }
1284 } else {
1285 rdstring rs = { 0, 0, NULL };
1286 while (dtor(t), t = get_token(in),
1287 t.type == tok_word || t.type == tok_white) {
1288 if (t.type == tok_white)
1289 rdadd(&rs, ' ');
1290 else
1291 rdadds(&rs, t.text);
1292 }
1293 if (wd.type == word_Normal) {
1294 time_t thetime = time(NULL);
1295 struct tm *broken = localtime(&thetime);
1296 wdtext = ustrftime(rs.text, broken);
1297 wd.type = style;
1298 } else {
1299 wdtext = ustrdup(rs.text);
1300 }
1301 sfree(rs.text);
1302 if (t.type != tok_rbrace) {
1303 error(err_kwexprbr, &t.pos);
1304 }
1305 }
1306 wd.alt = NULL;
1307 wd.aux = 0;
1308 if (!indexing || index_visible) {
1309 wd.text = ustrdup(wdtext);
1310 addword(wd, &whptr);
1311 }
1312 if (indexing) {
1313 wd.text = ustrdup(wdtext);
1314 addword(wd, &idximplicit);
1315 }
1316 sfree(wdtext);
1317 if (wd.type == word_HyperLink) {
1318 /*
1319 * Hyperlinks are different: they then
1320 * expect another left brace, to begin
1321 * delimiting the text marked by the link.
1322 */
1323 dtor(t), t = get_token(in);
1324 sitem = snew(struct stack_item);
1325 sitem->fpos = wd.fpos;
1326 sitem->type = stack_hyper;
1327 /*
1328 * Special cases: \W{}\i, \W{}\ii
1329 */
1330 if (t.type == tok_cmd &&
1331 (t.cmd == c_i || t.cmd == c_ii)) {
1332 if (indexing) {
1333 error(err_nestedindex, &t.pos);
1334 } else {
1335 /* Add an index-reference word with no
1336 * text as yet */
1337 wd.type = word_IndexRef;
1338 wd.text = NULL;
1339 wd.alt = NULL;
1340 wd.aux = 0;
1341 wd.breaks = FALSE;
1342 indexword = addword(wd, &whptr);
1343 /* Set up a rdstring to read the
1344 * index text */
1345 indexstr = nullrs;
1346 /* Flags so that we do the Right
1347 * Things with text */
1348 index_visible = (type != c_I);
1349 index_downcase = (type == c_ii);
1350 indexing = TRUE;
1351 idxwordlist = NULL;
1352 idximplicit = &idxwordlist;
1353
1354 sitem->type |= stack_idx;
1355 }
1356 dtor(t), t = get_token(in);
1357 }
1358 /*
1359 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1360 */
1361 if (t.type == tok_cmd &&
1362 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1363 if (style != word_Normal)
1364 error(err_nestedstyles, &t.pos);
1365 else {
1366 style = (t.cmd == c_c ? word_Code :
1367 t.cmd == c_cw ? word_WeakCode :
1368 word_Emph);
1369 spcstyle = tospacestyle(style);
1370 sitem->type |= stack_style;
1371 }
1372 dtor(t), t = get_token(in);
1373 }
1374 if (t.type != tok_lbrace) {
1375 error(err_explbr, &t.pos);
1376 sfree(sitem);
1377 } else {
1378 stk_push(parsestk, sitem);
1379 }
1380 }
1381 break;
1382 case c_c:
1383 case c_cw:
1384 case c_e:
1385 type = t.cmd;
1386 if (style != word_Normal) {
1387 error(err_nestedstyles, &t.pos);
1388 /* Error recovery: eat lbrace, push nop. */
1389 dtor(t), t = get_token(in);
1390 sitem = snew(struct stack_item);
1391 sitem->fpos = t.pos;
1392 sitem->type = stack_nop;
1393 stk_push(parsestk, sitem);
1394 }
1395 dtor(t), t = get_token(in);
1396 if (t.type != tok_lbrace) {
1397 error(err_explbr, &t.pos);
1398 } else {
1399 style = (type == c_c ? word_Code :
1400 type == c_cw ? word_WeakCode :
1401 word_Emph);
1402 spcstyle = tospacestyle(style);
1403 sitem = snew(struct stack_item);
1404 sitem->fpos = t.pos;
1405 sitem->type = stack_style;
1406 stk_push(parsestk, sitem);
1407 }
1408 break;
1409 case c_i:
1410 case c_ii:
1411 case c_I:
1412 type = t.cmd;
1413 if (indexing) {
1414 error(err_nestedindex, &t.pos);
1415 /* Error recovery: eat lbrace, push nop. */
1416 dtor(t), t = get_token(in);
1417 sitem = snew(struct stack_item);
1418 sitem->fpos = t.pos;
1419 sitem->type = stack_nop;
1420 stk_push(parsestk, sitem);
1421 }
1422 sitem = snew(struct stack_item);
1423 sitem->fpos = t.pos;
1424 sitem->type = stack_idx;
1425 dtor(t), t = get_token(in);
1426 /*
1427 * Special cases: \i\c, \i\e, \i\cw
1428 */
1429 wd.fpos = t.pos;
1430 if (t.type == tok_cmd &&
1431 (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1432 if (style != word_Normal)
1433 error(err_nestedstyles, &t.pos);
1434 else {
1435 style = (t.cmd == c_c ? word_Code :
1436 t.cmd == c_cw ? word_WeakCode :
1437 word_Emph);
1438 spcstyle = tospacestyle(style);
1439 sitem->type |= stack_style;
1440 }
1441 dtor(t), t = get_token(in);
1442 }
1443 if (t.type != tok_lbrace) {
1444 sfree(sitem);
1445 error(err_explbr, &t.pos);
1446 } else {
1447 /* Add an index-reference word with no text as yet */
1448 wd.type = word_IndexRef;
1449 wd.text = NULL;
1450 wd.alt = NULL;
1451 wd.aux = 0;
1452 wd.breaks = FALSE;
1453 indexword = addword(wd, &whptr);
1454 /* Set up a rdstring to read the index text */
1455 indexstr = nullrs;
1456 /* Flags so that we do the Right Things with text */
1457 index_visible = (type != c_I);
1458 index_downcase = (type == c_ii);
1459 indexing = TRUE;
1460 idxwordlist = NULL;
1461 idximplicit = &idxwordlist;
1462 /* Stack item to close the indexing on exit */
1463 stk_push(parsestk, sitem);
1464 }
1465 break;
1466 case c_u:
1467 uchr = t.aux;
1468 utext[0] = uchr; utext[1] = 0;
1469 wd.type = style;
1470 wd.breaks = FALSE;
1471 wd.alt = NULL;
1472 wd.aux = 0;
1473 wd.fpos = t.pos;
1474 if (!indexing || index_visible) {
1475 wd.text = ustrdup(utext);
1476 uword = addword(wd, &whptr);
1477 } else
1478 uword = NULL;
1479 if (indexing) {
1480 wd.text = ustrdup(utext);
1481 iword = addword(wd, &idximplicit);
1482 } else
1483 iword = NULL;
1484 dtor(t), t = get_token(in);
1485 if (t.type == tok_lbrace) {
1486 /*
1487 * \u with a left brace. Until the brace
1488 * closes, all further words go on a
1489 * sidetrack from the main thread of the
1490 * paragraph.
1491 */
1492 sitem = snew(struct stack_item);
1493 sitem->fpos = t.pos;
1494 sitem->type = stack_ualt;
1495 sitem->whptr = whptr;
1496 sitem->idximplicit = idximplicit;
1497 stk_push(parsestk, sitem);
1498 whptr = uword ? &uword->alt : NULL;
1499 idximplicit = iword ? &iword->alt : NULL;
1500 } else {
1501 if (indexing)
1502 rdadd(&indexstr, uchr);
1503 already = TRUE;
1504 }
1505 break;
1506 default:
1507 if (!macrolookup(macros, in, t.text, &t.pos))
1508 error(err_badmidcmd, t.text, &t.pos);
1509 break;
1510 }
1511 }
1512 if (!already)
1513 dtor(t), t = get_token(in);
1514 seenwhite = iswhite;
1515 }
1516 finished_para:
1517 /* Check the stack is empty */
1518 if (stk_top(parsestk)) {
1519 while ((sitem = stk_pop(parsestk)))
1520 sfree(sitem);
1521 error(err_missingrbrace, &t.pos);
1522 }
1523 stk_free(parsestk);
1524 prev_para_type = par.type;
1525 addpara(par, ret);
1526 if (t.type == tok_eof)
1527 already = TRUE;
1528 }
1529
1530 if (stk_top(crossparastk)) {
1531 void *p;
1532
1533 error(err_missingrbrace2, &t.pos);
1534 while ((p = stk_pop(crossparastk)))
1535 sfree(p);
1536 }
1537
1538 /*
1539 * We break to here rather than returning, because otherwise
1540 * this cleanup doesn't happen.
1541 */
1542 dtor(t);
1543 macrocleanup(macros);
1544
1545 stk_free(crossparastk);
1546 }
1547
1548 paragraph *read_input(input *in, indexdata *idx) {
1549 paragraph *head = NULL;
1550 paragraph **hptr = &head;
1551
1552 while (in->currindex < in->nfiles) {
1553 in->currfp = fopen(in->filenames[in->currindex], "r");
1554 if (in->currfp) {
1555 setpos(in, in->filenames[in->currindex]);
1556 in->charset = in->defcharset;
1557 in->csstate = charset_init_state;
1558 in->wcpos = in->nwc = 0;
1559 in->pushback_chars = NULL;
1560 read_file(&hptr, in, idx);
1561 }
1562 in->currindex++;
1563 }
1564
1565 return head;
1566 }