mdw@git.distorted.org.uk Git - sgt/halibut/blob - input.c

   1 /*
   2  * input.c: read the source form
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <time.h>
   8 #include "halibut.h"
   9
  10 #define TAB_STOP 8                     /* for column number tracking */
  11
  12 static void setpos(input *in, char *fname) {
  13     in->pos.filename = fname;
  14     in->pos.line = 1;
  15     in->pos.col = (in->reportcols ? 1 : -1);
  16 }
  17
  18 static void unget(input *in, int c, filepos *pos) {
  19     if (in->npushback >= in->pushbacksize) {
  20         in->pushbacksize = in->npushback + 16;
  21         in->pushback = resize(in->pushback, in->pushbacksize);
  22     }
  23     in->pushback[in->npushback].chr = c;
  24     in->pushback[in->npushback].pos = *pos;   /* structure copy */
  25     in->npushback++;
  26 }
  27
  28 /* ---------------------------------------------------------------------- */
  29 /*
  30  * Macro subsystem
  31  */
  32 typedef struct macro_Tag macro;
  33 struct macro_Tag {
  34     wchar_t *name, *text;
  35 };
  36 struct macrostack_Tag {
  37     macrostack *next;
  38     wchar_t *text;
  39     int ptr, npushback;
  40     filepos pos;
  41 };
  42 static int macrocmp(void *av, void *bv) {
  43     macro *a = (macro *)av, *b = (macro *)bv;
  44     return ustrcmp(a->name, b->name);
  45 }
  46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
  47                      filepos fpos) {
  48     macro *m = mknew(macro);
  49     m->name = name;
  50     m->text = text;
  51     if (add234(macros, m) != m) {
  52         error(err_macroexists, &fpos, name);
  53         sfree(name);
  54         sfree(text);
  55     }
  56 }
  57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
  58                        filepos *pos) {
  59     macro m, *gotit;
  60     m.name = name;
  61     gotit = find234(macros, &m, NULL);
  62     if (gotit) {
  63         macrostack *expansion = mknew(macrostack);
  64         expansion->next = in->stack;
  65         expansion->text = gotit->text;
  66         expansion->pos = *pos;         /* structure copy */
  67         expansion->ptr = 0;
  68         expansion->npushback = in->npushback;
  69         in->stack = expansion;
  70         return TRUE;
  71     } else
  72         return FALSE;
  73 }
  74 static void macrocleanup(tree234 *macros) {
  75     int ti;
  76     macro *m;
  77     for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
  78         sfree(m->name);
  79         sfree(m->text);
  80         sfree(m);
  81     }
  82     freetree234(macros);
  83 }
  84
  85 /*
  86  * Can return EOF
  87  */
  88 static int get(input *in, filepos *pos) {
  89     int pushbackpt = in->stack ? in->stack->npushback : 0;
  90     if (in->npushback > pushbackpt) {
  91         --in->npushback;
  92         if (pos)
  93             *pos = in->pushback[in->npushback].pos;   /* structure copy */
  94         return in->pushback[in->npushback].chr;
  95     }
  96     else if (in->stack) {
  97         wchar_t c = in->stack->text[in->stack->ptr];
  98         if (in->stack->text[++in->stack->ptr] == L'\0') {
  99             macrostack *tmp = in->stack;
 100             in->stack = tmp->next;
 101             sfree(tmp);
 102         }
 103         return c;
 104     }
 105     else if (in->currfp) {
 106         int c = getc(in->currfp);
 107
 108         if (c == EOF) {
 109             fclose(in->currfp);
 110             in->currfp = NULL;
 111         }
 112         /* Track line numbers, for error reporting */
 113         if (pos)
 114             *pos = in->pos;
 115         if (in->reportcols) {
 116             switch (c) {
 117               case '\t':
 118                 in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
 119                 break;
 120               case '\n':
 121                 in->pos.col = 1;
 122                 in->pos.line++;
 123                 break;
 124               default:
 125                 in->pos.col++;
 126                 break;
 127             }
 128         } else {
 129             in->pos.col = -1;
 130             if (c == '\n')
 131                 in->pos.line++;
 132         }
 133         /* FIXME: do input charmap translation. We should be returning
 134          * Unicode here. */
 135         return c;
 136     } else
 137         return EOF;
 138 }
 139
 140 /*
 141  * Lexical analysis of source files.
 142  */
 143 typedef struct token_Tag token;
 144 struct token_Tag {
 145     int type;
 146     int cmd, aux;
 147     wchar_t *text;
 148     filepos pos;
 149 };
 150 enum {
 151     tok_eof,                           /* end of file */
 152     tok_eop,                           /* end of paragraph */
 153     tok_white,                         /* whitespace */
 154     tok_word,                          /* a word or word fragment */
 155     tok_cmd,                           /* \command */
 156     tok_lbrace,                        /* { */
 157     tok_rbrace                         /* } */
 158 };
 159
 160 /* Halibut command keywords. */
 161 enum {
 162     c__invalid,                        /* invalid command */
 163     c__comment,                        /* comment command (\#) */
 164     c__escaped,                        /* escaped character */
 165     c__nop,                            /* no-op */
 166     c__nbsp,                           /* nonbreaking space */
 167     c_A,                               /* appendix heading */
 168     c_B,                               /* bibliography entry */
 169     c_BR,                              /* bibliography rewrite */
 170     c_C,                               /* chapter heading */
 171     c_H,                               /* heading */
 172     c_I,                               /* invisible index mark */
 173     c_IM,                              /* index merge/rewrite */
 174     c_K,                               /* capitalised cross-reference */
 175     c_S,                               /* aux field is 0, 1, 2, ... */
 176     c_U,                               /* unnumbered-chapter heading */
 177     c_W,                               /* Web hyperlink */
 178     c_b,                               /* bulletted list */
 179     c_c,                               /* code */
 180     c_cfg,                             /* configuration directive */
 181     c_copyright,                       /* copyright statement */
 182     c_cw,                              /* weak code */
 183     c_date,                            /* document processing date */
 184     c_dd,                              /* description list: description */
 185     c_define,                          /* macro definition */
 186     c_dt,                              /* description list: described thing */
 187     c_e,                               /* emphasis */
 188     c_i,                               /* visible index mark */
 189     c_ii,                              /* uncapitalised visible index mark */
 190     c_k,                               /* uncapitalised cross-reference */
 191     c_lcont,                           /* continuation para(s) for list item */
 192     c_n,                               /* numbered list */
 193     c_nocite,                          /* bibliography trickery */
 194     c_preamble,                        /* (obsolete) preamble text */
 195     c_q,                               /* quote marks */
 196     c_quote,                           /* block-quoted paragraphs */
 197     c_rule,                            /* horizontal rule */
 198     c_title,                           /* document title */
 199     c_u,                               /* aux field is char code */
 200     c_versionid                        /* document RCS id */
 201 };
 202
 203 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
 204 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
 205 #define isnl(c) ( (c)==10 )
 206 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
 207 #define fromdec(c) ( (c)-'0' )
 208 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
 209 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
 210 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
 211
 212 /*
 213  * Keyword comparison function. Like strcmp, but between a wchar_t *
 214  * and a char *.
 215  */
 216 static int kwcmp(wchar_t const *p, char const *q) {
 217     int i;
 218     do {
 219         i = *p - *q;
 220     } while (*p++ && *q++ && !i);
 221     return i;
 222 }
 223
 224 /*
 225  * Match a keyword.
 226  */
 227 static void match_kw(token *tok) {
 228     /*
 229      * FIXME. The ids are explicit in here so as to allow long-name
 230      * equivalents to the various very short keywords.
 231      */
 232     static const struct { char const *name; int id; } keywords[] = {
 233         {"#", c__comment},             /* comment command (\#) */
 234         {"-", c__escaped},             /* nonbreaking hyphen */
 235         {".", c__nop},                 /* no-op */
 236         {"A", c_A},                    /* appendix heading */
 237         {"B", c_B},                    /* bibliography entry */
 238         {"BR", c_BR},                  /* bibliography rewrite */
 239         {"C", c_C},                    /* chapter heading */
 240         {"H", c_H},                    /* heading */
 241         {"I", c_I},                    /* invisible index mark */
 242         {"IM", c_IM},                  /* index merge/rewrite */
 243         {"K", c_K},                    /* capitalised cross-reference */
 244         {"U", c_U},                    /* unnumbered-chapter heading */
 245         {"W", c_W},                    /* Web hyperlink */
 246         {"\\", c__escaped},            /* escaped backslash (\\) */
 247         {"_", c__nbsp},                /* nonbreaking space (\_) */
 248         {"b", c_b},                    /* bulletted list */
 249         {"c", c_c},                    /* code */
 250         {"cfg", c_cfg},                /* configuration directive */
 251         {"copyright", c_copyright},    /* copyright statement */
 252         {"cw", c_cw},                  /* weak code */
 253         {"date", c_date},              /* document processing date */
 254         {"dd", c_dd},                  /* description list: description */
 255         {"define", c_define},          /* macro definition */
 256         {"dt", c_dt},                  /* description list: described thing */
 257         {"e", c_e},                    /* emphasis */
 258         {"i", c_i},                    /* visible index mark */
 259         {"ii", c_ii},                  /* uncapitalised visible index mark */
 260         {"k", c_k},                    /* uncapitalised cross-reference */
 261         {"lcont", c_lcont},            /* continuation para(s) for list item */
 262         {"n", c_n},                    /* numbered list */
 263         {"nocite", c_nocite},          /* bibliography trickery */
 264         {"preamble", c_preamble},      /* (obsolete) preamble text */
 265         {"q", c_q},                    /* quote marks */
 266         {"quote", c_quote},            /* block-quoted paragraphs */
 267         {"rule", c_rule},              /* horizontal rule */
 268         {"title", c_title},            /* document title */
 269         {"versionid", c_versionid},    /* document RCS id */
 270         {"{", c__escaped},             /* escaped lbrace (\{) */
 271         {"}", c__escaped},             /* escaped rbrace (\}) */
 272     };
 273     int i, j, k, c;
 274
 275     /*
 276      * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
 277      * doesn't match correctly, we just fall through to the
 278      * binary-search phase.
 279      */
 280     if (tok->text[0] == 'S') {
 281         /* We expect numeric characters thereafter. */
 282         wchar_t *p = tok->text+1;
 283         int n;
 284         if (!*p)
 285             n = 1;
 286         else {
 287             n = 0;
 288             while (*p && isdec(*p)) {
 289                 n = 10 * n + fromdec(*p);
 290                 p++;
 291             }
 292         }
 293         if (!*p) {
 294             tok->cmd = c_S;
 295             tok->aux = n;
 296             return;
 297         }
 298     } else if (tok->text[0] == 'u') {
 299         /* We expect hex characters thereafter. */
 300         wchar_t *p = tok->text+1;
 301         int n = 0;
 302         while (*p && ishex(*p)) {
 303             n = 16 * n + fromhex(*p);
 304             p++;
 305         }
 306         if (!*p) {
 307             tok->cmd = c_u;
 308             tok->aux = n;
 309             return;
 310         }
 311     }
 312
 313     i = -1;
 314     j = sizeof(keywords)/sizeof(*keywords);
 315     while (j-i > 1) {
 316         k = (i+j)/2;
 317         c = kwcmp(tok->text, keywords[k].name);
 318         if (c < 0)
 319             j = k;
 320         else if (c > 0)
 321             i = k;
 322         else /* c == 0 */ {
 323             tok->cmd = keywords[k].id;
 324             return;
 325         }
 326     }
 327
 328     tok->cmd = c__invalid;
 329 }
 330
 331
 332 /*
 333  * Read a token from the input file, in the normal way (`normal' in
 334  * the sense that code paragraphs work a different way).
 335  */
 336 token get_token(input *in) {
 337     int c;
 338     int nls;
 339     token ret;
 340     rdstring rs = { 0, 0, NULL };
 341     filepos cpos;
 342
 343     ret.text = NULL;                   /* default */
 344     c = get(in, &cpos);
 345     ret.pos = cpos;
 346     if (iswhite(c)) {                  /* tok_white or tok_eop */
 347         nls = 0;
 348         do {
 349             if (isnl(c))
 350                 nls++;
 351         } while ((c = get(in, &cpos)) != EOF && iswhite(c));
 352         if (c == EOF) {
 353             ret.type = tok_eof;
 354             return ret;
 355         }
 356         unget(in, c, &cpos);
 357         ret.type = (nls > 1 ? tok_eop : tok_white);
 358         return ret;
 359     } else if (c == EOF) {             /* tok_eof */
 360         ret.type = tok_eof;
 361         return ret;
 362     } else if (c == '\\') {            /* tok_cmd */
 363         c = get(in, &cpos);
 364         if (c == '-' || c == '\\' || c == '_' ||
 365             c == '#' || c == '{' || c == '}' || c == '.') {
 366             /* single-char command */
 367             rdadd(&rs, c);
 368         } else if (c == 'u') {
 369             int len = 0;
 370             do {
 371                 rdadd(&rs, c);
 372                 len++;
 373                 c = get(in, &cpos);
 374             } while (ishex(c) && len < 5);
 375             unget(in, c, &cpos);
 376         } else if (iscmd(c)) {
 377             do {
 378                 rdadd(&rs, c);
 379                 c = get(in, &cpos);
 380             } while (iscmd(c));
 381             unget(in, c, &cpos);
 382         }
 383         /*
 384          * Now match the command against the list of available
 385          * ones.
 386          */
 387         ret.type = tok_cmd;
 388         ret.text = ustrdup(rs.text);
 389         match_kw(&ret);
 390         sfree(rs.text);
 391         return ret;
 392     } else if (c == '{') {             /* tok_lbrace */
 393         ret.type = tok_lbrace;
 394         return ret;
 395     } else if (c == '}') {             /* tok_rbrace */
 396         ret.type = tok_rbrace;
 397         return ret;
 398     } else {                           /* tok_word */
 399         /*
 400          * Read a word: the longest possible contiguous sequence of
 401          * things other than whitespace, backslash, braces and
 402          * hyphen. A hyphen terminates the word but is returned as
 403          * part of it; everything else is pushed back for the next
 404          * token. The `aux' field contains TRUE if the word ends in
 405          * a hyphen.
 406          */
 407         ret.aux = FALSE;               /* assumed for now */
 408         while (1) {
 409             if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
 410                 /* Put back the character that caused termination */
 411                 unget(in, c, &cpos);
 412                 break;
 413             } else {
 414                 rdadd(&rs, c);
 415                 if (c == '-') {
 416                     ret.aux = TRUE;
 417                     break;             /* hyphen terminates word */
 418                 }
 419             }
 420             c = get(in, &cpos);
 421         }
 422         ret.type = tok_word;
 423         ret.text = ustrdup(rs.text);
 424         sfree(rs.text);
 425         return ret;
 426     }
 427 }
 428
 429 /*
 430  * Determine whether the next input character is an open brace (for
 431  * telling code paragraphs from paragraphs which merely start with
 432  * code).
 433  */
 434 int isbrace(input *in) {
 435     int c;
 436     filepos cpos;
 437
 438     c = get(in, &cpos);
 439     unget(in, c, &cpos);
 440     return (c == '{');
 441 }
 442
 443 /*
 444  * Read the rest of a line that starts `\c'. Including nothing at
 445  * all (tok_word with empty text).
 446  */
 447 token get_codepar_token(input *in) {
 448     int c;
 449     token ret;
 450     rdstring rs = { 0, 0, NULL };
 451     filepos cpos;
 452
 453     ret.type = tok_word;
 454     c = get(in, &cpos);                /* expect (and discard) one space */
 455     ret.pos = cpos;
 456     if (c == ' ') {
 457         c = get(in, &cpos);
 458         ret.pos = cpos;
 459     }
 460     while (!isnl(c) && c != EOF) {
 461         int c2 = c;
 462         c = get(in, &cpos);
 463         /* Discard \r just before \n. */
 464         if (c2 != 13 || !isnl(c))
 465             rdadd(&rs, c2);
 466     }
 467     unget(in, c, &cpos);
 468     ret.text = ustrdup(rs.text);
 469     sfree(rs.text);
 470     return ret;
 471 }
 472
 473 /*
 474  * Adds a new word to a linked list
 475  */
 476 static word *addword(word newword, word ***hptrptr) {
 477     word *mnewword;
 478     if (!hptrptr)
 479         return NULL;
 480     mnewword = mknew(word);
 481     *mnewword = newword;               /* structure copy */
 482     mnewword->next = NULL;
 483     **hptrptr = mnewword;
 484     *hptrptr = &mnewword->next;
 485     return mnewword;
 486 }
 487
 488 /*
 489  * Adds a new paragraph to a linked list
 490  */
 491 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 492     paragraph *mnewpara = mknew(paragraph);
 493     *mnewpara = newpara;               /* structure copy */
 494     mnewpara->next = NULL;
 495     **hptrptr = mnewpara;
 496     *hptrptr = &mnewpara->next;
 497     return mnewpara;
 498 }
 499
 500 /*
 501  * Destructor before token is reassigned; should catch most memory
 502  * leaks
 503  */
 504 #define dtor(t) ( sfree(t.text) )
 505
 506 /*
 507  * Reads a single file (ie until get() returns EOF)
 508  */
 509 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 510     token t;
 511     paragraph par;
 512     word wd, **whptr, **idximplicit;
 513     tree234 *macros;
 514     wchar_t utext[2], *wdtext;
 515     int style, spcstyle;
 516     int already;
 517     int iswhite, seenwhite;
 518     int type;
 519     int prev_para_type;
 520     struct stack_item {
 521         enum {
 522             stack_nop = 0,             /* do nothing (for error recovery) */
 523             stack_ualt = 1,            /* \u alternative */
 524             stack_style = 2,           /* \e, \c, \cw */
 525             stack_idx = 4,             /* \I, \i, \ii */
 526             stack_hyper = 8,           /* \W */
 527             stack_quote = 16,          /* \q */
 528         } type;
 529         word **whptr;                  /* to restore from \u alternatives */
 530         word **idximplicit;            /* to restore from \u alternatives */
 531         filepos fpos;
 532     } *sitem;
 533     stack parsestk;
 534     struct crossparaitem {
 535         int type;                      /* currently c_lcont, c_quote or -1 */
 536         int seen_lcont, seen_quote;
 537     };
 538     stack crossparastk;
 539     word *indexword, *uword, *iword;
 540     word *idxwordlist;
 541     rdstring indexstr;
 542     int index_downcase, index_visible, indexing;
 543     const rdstring nullrs = { 0, 0, NULL };
 544     wchar_t uchr;
 545
 546     t.text = NULL;
 547     macros = newtree234(macrocmp);
 548     already = FALSE;
 549
 550     crossparastk = stk_new();
 551
 552     /*
 553      * Loop on each paragraph.
 554      */
 555     while (1) {
 556         int start_cmd = c__invalid;
 557         par.words = NULL;
 558         par.keyword = NULL;
 559         whptr = &par.words;
 560
 561         /*
 562          * Get a token.
 563          */
 564         do {
 565             if (!already) {
 566                 dtor(t), t = get_token(in);
 567             }
 568             already = FALSE;
 569         } while (t.type == tok_eop);
 570         if (t.type == tok_eof)
 571             break;
 572
 573         /*
 574          * Parse code paragraphs separately.
 575          */
 576         if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 577             int wtype = word_WeakCode;
 578
 579             par.type = para_Code;
 580             par.fpos = t.pos;
 581             while (1) {
 582                 dtor(t), t = get_codepar_token(in);
 583                 wd.type = wtype;
 584                 wd.breaks = FALSE;     /* shouldn't need this... */
 585                 wd.text = ustrdup(t.text);
 586                 wd.alt = NULL;
 587                 wd.fpos = t.pos;
 588                 addword(wd, &whptr);
 589                 dtor(t), t = get_token(in);
 590                 if (t.type == tok_white) {
 591                     /*
 592                      * The newline after a code-paragraph line
 593                      */
 594                     dtor(t), t = get_token(in);
 595                 }
 596                 if (t.type == tok_eop || t.type == tok_eof ||
 597                     t.type == tok_rbrace) { /* might be } terminating \lcont */
 598                     if (t.type == tok_rbrace)
 599                         already = TRUE;
 600                     break;
 601                 } else if (t.type == tok_cmd && t.cmd == c_c) {
 602                     wtype = word_WeakCode;
 603                 } else if (t.type == tok_cmd && t.cmd == c_e &&
 604                            wtype == word_WeakCode) {
 605                     wtype = word_Emph;
 606                 } else {
 607                     error(err_brokencodepara, &t.pos);
 608                     prev_para_type = par.type;
 609                     addpara(par, ret);
 610                     while (t.type != tok_eop)   /* error recovery: */
 611                         dtor(t), t = get_token(in);   /* eat rest of paragraph */
 612                     goto codeparabroken;   /* ick, but such is life */
 613                 }
 614             }
 615             prev_para_type = par.type;
 616             addpara(par, ret);
 617             codeparabroken:
 618             continue;
 619         }
 620
 621         /*
 622          * Spot the special commands that define a grouping of more
 623          * than one paragraph, and also the closing braces that
 624          * finish them.
 625          */
 626         if (t.type == tok_cmd &&
 627             (t.cmd == c_lcont || t.cmd == c_quote)) {
 628             struct crossparaitem *sitem, *stop;
 629             int cmd = t.cmd;
 630
 631             /*
 632              * Expect, and swallow, an open brace.
 633              */
 634             dtor(t), t = get_token(in);
 635             if (t.type != tok_lbrace) {
 636                 error(err_explbr, &t.pos);
 637                 continue;
 638             }
 639
 640             /*
 641              * Also expect, and swallow, any whitespace after that
 642              * (a newline before a code paragraph wouldn't be
 643              * surprising).
 644              */
 645             do {
 646                 dtor(t), t = get_token(in);
 647             } while (t.type == tok_white);
 648             already = TRUE;
 649
 650             if (cmd == c_lcont) {
 651                 /*
 652                  * \lcont causes a continuation of a list item into
 653                  * multiple paragraphs (which may in turn contain
 654                  * nested lists, code paras etc). Hence, the previous
 655                  * paragraph must be of a list type.
 656                  */
 657                 sitem = mknew(struct crossparaitem);
 658                 stop = (struct crossparaitem *)stk_top(crossparastk);
 659                 if (stop)
 660                     *sitem = *stop;
 661                 else
 662                     sitem->seen_quote = sitem->seen_lcont = 0;
 663
 664                 if (prev_para_type == para_Bullet ||
 665                     prev_para_type == para_NumberedList ||
 666                     prev_para_type == para_Description) {
 667                     sitem->type = c_lcont;
 668                     sitem->seen_lcont = 1;
 669                     par.type = para_LcontPush;
 670                     prev_para_type = par.type;
 671                     addpara(par, ret);
 672                 } else {
 673                     /*
 674                      * Push a null item on the cross-para stack so that
 675                      * when we see the corresponding closing brace we
 676                      * don't give a cascade error.
 677                      */
 678                     sitem->type = -1;
 679                     error(err_misplacedlcont, &t.pos);
 680                 }
 681             } else {
 682                 /*
 683                  * \quote causes a group of paragraphs to be
 684                  * block-quoted (typically they will be indented a
 685                  * bit).
 686                  */
 687                 sitem = mknew(struct crossparaitem);
 688                 stop = (struct crossparaitem *)stk_top(crossparastk);
 689                 if (stop)
 690                     *sitem = *stop;
 691                 else
 692                     sitem->seen_quote = sitem->seen_lcont = 0;
 693                 sitem->type = c_quote;
 694                 sitem->seen_quote = 1;
 695                 par.type = para_QuotePush;
 696                 prev_para_type = par.type;
 697                 addpara(par, ret);
 698             }
 699             stk_push(crossparastk, sitem);
 700             continue;
 701         } else if (t.type == tok_rbrace) {
 702             struct crossparaitem *sitem = stk_pop(crossparastk);
 703             if (!sitem)
 704                 error(err_unexbrace, &t.pos);
 705             else {
 706                 switch (sitem->type) {
 707                   case c_lcont:
 708                     par.type = para_LcontPop;
 709                     prev_para_type = par.type;
 710                     addpara(par, ret);
 711                     break;
 712                   case c_quote:
 713                     par.type = para_QuotePop;
 714                     prev_para_type = par.type;
 715                     addpara(par, ret);
 716                     break;
 717                 }
 718                 sfree(sitem);
 719             }
 720             continue;
 721         }
 722
 723         /*
 724          * This token begins a paragraph. See if it's one of the
 725          * special commands that define a paragraph type.
 726          *
 727          * (note that \# is special in a way, and \nocite takes no
 728          * text)
 729          */
 730         par.type = para_Normal;
 731         if (t.type == tok_cmd) {
 732             int needkw;
 733             int is_macro = FALSE;
 734
 735             par.fpos = t.pos;
 736             switch (t.cmd) {
 737               default:
 738                 needkw = -1;
 739                 break;
 740               case c__invalid:
 741                 error(err_badparatype, t.text, &t.pos);
 742                 needkw = 4;
 743                 break;
 744               case c__comment:
 745                 if (isbrace(in))
 746                     break;             /* `\#{': isn't a comment para */
 747                 do {
 748                     dtor(t), t = get_token(in);
 749                 } while (t.type != tok_eop && t.type != tok_eof);
 750                 continue;              /* next paragraph */
 751                 /*
 752                  * `needkw' values:
 753                  *
 754                  *   1 -- exactly one keyword
 755                  *   2 -- at least one keyword
 756                  *   4 -- any number of keywords including zero
 757                  *   8 -- at least one keyword and then nothing else
 758                  *  16 -- nothing at all! no keywords, no body
 759                  *  32 -- no keywords at all
 760                  */
 761               case c_A: needkw = 2; par.type = para_Appendix; break;
 762               case c_B: needkw = 2; par.type = para_Biblio; break;
 763               case c_BR: needkw = 1; par.type = para_BR;
 764                 start_cmd = c_BR; break;
 765               case c_C: needkw = 2; par.type = para_Chapter; break;
 766               case c_H: needkw = 2; par.type = para_Heading;
 767                 par.aux = 0;
 768                 break;
 769               case c_IM: needkw = 2; par.type = para_IM;
 770                 start_cmd = c_IM; break;
 771               case c_S: needkw = 2; par.type = para_Subsect;
 772                 par.aux = t.aux; break;
 773               case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
 774                 /* For \b and \n the keyword is optional */
 775               case c_b: needkw = 4; par.type = para_Bullet; break;
 776               case c_dt: needkw = 4; par.type = para_DescribedThing; break;
 777               case c_dd: needkw = 4; par.type = para_Description; break;
 778               case c_n: needkw = 4; par.type = para_NumberedList; break;
 779               case c_cfg: needkw = 8; par.type = para_Config;
 780                 start_cmd = c_cfg; break;
 781               case c_copyright: needkw = 32; par.type = para_Copyright; break;
 782               case c_define: is_macro = TRUE; needkw = 1; break;
 783                 /* For \nocite the keyword is _everything_ */
 784               case c_nocite: needkw = 8; par.type = para_NoCite; break;
 785               case c_preamble: needkw = 32; par.type = para_Normal; break;
 786               case c_rule: needkw = 16; par.type = para_Rule; break;
 787               case c_title: needkw = 32; par.type = para_Title; break;
 788               case c_versionid: needkw = 32; par.type = para_VersionID; break;
 789             }
 790
 791             if (par.type == para_Chapter ||
 792                 par.type == para_Heading ||
 793                 par.type == para_Subsect ||
 794                 par.type == para_Appendix ||
 795                 par.type == para_UnnumberedChapter) {
 796                 struct crossparaitem *sitem = stk_top(crossparastk);
 797                 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
 798                     error(err_sectmarkerinblock,
 799                           &t.pos,
 800                           (sitem->seen_lcont ? "lcont" : "quote"));
 801                 }
 802             }
 803
 804             if (needkw > 0) {
 805                 rdstring rs = { 0, 0, NULL };
 806                 int nkeys = 0;
 807                 filepos fp;
 808
 809                 /* Get keywords. */
 810                 dtor(t), t = get_token(in);
 811                 fp = t.pos;
 812                 while (t.type == tok_lbrace) {
 813                     /* This is a keyword. */
 814                     nkeys++;
 815                     /* FIXME: there will be bugs if anyone specifies an
 816                      * empty keyword (\foo{}), so trap this case. */
 817                     while (dtor(t), t = get_token(in),
 818                            t.type == tok_word ||
 819                            t.type == tok_white ||
 820                            (t.type == tok_cmd && t.cmd == c__nbsp) ||
 821                            (t.type == tok_cmd && t.cmd == c__escaped)) {
 822                         if (t.type == tok_white ||
 823                             (t.type == tok_cmd && t.cmd == c__nbsp))
 824                             rdadd(&rs, ' ');
 825                         else
 826                             rdadds(&rs, t.text);
 827                     }
 828                     if (t.type != tok_rbrace) {
 829                         error(err_kwunclosed, &t.pos);
 830                         continue;
 831                     }
 832                     rdadd(&rs, 0);     /* add string terminator */
 833                     dtor(t), t = get_token(in); /* eat right brace */
 834                 }
 835
 836                 rdadd(&rs, 0);     /* add string terminator */
 837
 838                 /* See whether we have the right number of keywords. */
 839                 if ((needkw & 48) && nkeys > 0)
 840                     error(err_kwillegal, &fp);
 841                 if ((needkw & 11) && nkeys == 0)
 842                     error(err_kwexpected, &fp);
 843                 if ((needkw & 5) && nkeys > 1)
 844                     error(err_kwtoomany, &fp);
 845
 846                 if (is_macro) {
 847                     /*
 848                      * Macro definition. Get the rest of the line
 849                      * as a code-paragraph token, repeatedly until
 850                      * there's nothing more left of it. Separate
 851                      * with newlines.
 852                      */
 853                     rdstring macrotext = { 0, 0, NULL };
 854                     while (1) {
 855                         dtor(t), t = get_codepar_token(in);
 856                         if (macrotext.pos > 0)
 857                             rdadd(&macrotext, L'\n');
 858                         rdadds(&macrotext, t.text);
 859                         dtor(t), t = get_token(in);
 860                         if (t.type == tok_eop) break;
 861                     }
 862                     macrodef(macros, rs.text, macrotext.text, fp);
 863                     continue;          /* next paragraph */
 864                 }
 865
 866                 par.keyword = rdtrim(&rs);
 867
 868                 /* Move to EOP in case of needkw==8 or 16 (no body) */
 869                 if (needkw & 24) {
 870                     /* We allow whitespace even when we expect no para body */
 871                     while (t.type == tok_white)
 872                         dtor(t), t = get_token(in);
 873                     if (t.type != tok_eop && t.type != tok_eof &&
 874                         (start_cmd == c__invalid ||
 875                          t.type != tok_cmd || t.cmd != start_cmd)) {
 876                         error(err_bodyillegal, &t.pos);
 877                         /* Error recovery: eat the rest of the paragraph */
 878                         while (t.type != tok_eop && t.type != tok_eof &&
 879                                (start_cmd == c__invalid ||
 880                                 t.type != tok_cmd || t.cmd != start_cmd))
 881                             dtor(t), t = get_token(in);
 882                     }
 883                     if (t.type == tok_cmd)
 884                         already = TRUE;/* inhibit get_token at top of loop */
 885                     prev_para_type = par.type;
 886                     addpara(par, ret);
 887                     continue;          /* next paragraph */
 888                 }
 889             }
 890         }
 891
 892         /*
 893          * Now read the actual paragraph, word by word, adding to
 894          * the paragraph list.
 895          *
 896          * Mid-paragraph commands:
 897          *
 898          *  \K \k
 899          *  \c \cw
 900          *  \e
 901          *  \i \ii
 902          *  \I
 903          *  \u
 904          *  \W
 905          *  \date
 906          *  \\ \{ \}
 907          */
 908         parsestk = stk_new();
 909         style = word_Normal;
 910         spcstyle = word_WhiteSpace;
 911         indexing = FALSE;
 912         seenwhite = TRUE;
 913         while (t.type != tok_eop && t.type != tok_eof) {
 914             iswhite = FALSE;
 915             already = FALSE;
 916
 917             /* Handle implicit paragraph breaks after \IM, \BR etc */
 918             if (start_cmd != c__invalid &&
 919                 t.type == tok_cmd && t.cmd == start_cmd) {
 920                 already = TRUE;        /* inhibit get_token at top of loop */
 921                 break;
 922             }
 923
 924             if (t.type == tok_cmd && t.cmd == c__nop) {
 925                 dtor(t), t = get_token(in);
 926                 continue;              /* do nothing! */
 927             }
 928
 929             if (t.type == tok_cmd && t.cmd == c__escaped) {
 930                 t.type = tok_word;     /* nice and simple */
 931                 t.aux = 0;             /* even if `\-' - nonbreaking! */
 932             }
 933             if (t.type == tok_cmd && t.cmd == c__nbsp) {
 934                 t.type = tok_word;     /* nice and simple */
 935                 sfree(t.text);
 936                 t.text = ustrdup(L" ");  /* text is ` ' not `_' */
 937                 t.aux = 0;             /* (nonbreaking) */
 938             }
 939             switch (t.type) {
 940               case tok_white:
 941                 if (whptr == &par.words)
 942                     break;             /* strip whitespace at start of para */
 943                 wd.text = NULL;
 944                 wd.type = spcstyle;
 945                 wd.alt = NULL;
 946                 wd.aux = 0;
 947                 wd.fpos = t.pos;
 948                 wd.breaks = FALSE;
 949
 950                 /*
 951                  * Inhibit use of whitespace if it's (probably the
 952                  * newline) before a repeat \IM / \BR type
 953                  * directive.
 954                  */
 955                 if (start_cmd != c__invalid) {
 956                     dtor(t), t = get_token(in);
 957                     already = TRUE;
 958                     if (t.type == tok_cmd && t.cmd == start_cmd)
 959                         break;
 960                 }
 961
 962                 if (indexing)
 963                     rdadd(&indexstr, ' ');
 964                 if (!indexing || index_visible)
 965                     addword(wd, &whptr);
 966                 if (indexing)
 967                     addword(wd, &idximplicit);
 968                 iswhite = TRUE;
 969                 break;
 970               case tok_word:
 971                 if (indexing)
 972                     rdadds(&indexstr, t.text);
 973                 wd.type = style;
 974                 wd.alt = NULL;
 975                 wd.aux = 0;
 976                 wd.fpos = t.pos;
 977                 wd.breaks = t.aux;
 978                 if (!indexing || index_visible) {
 979                     wd.text = ustrdup(t.text);
 980                     addword(wd, &whptr);
 981                 }
 982                 if (indexing) {
 983                     wd.text = ustrdup(t.text);
 984                     addword(wd, &idximplicit);
 985                 }
 986                 break;
 987               case tok_lbrace:
 988                 error(err_unexbrace, &t.pos);
 989                 /* Error recovery: push nop */
 990                 sitem = mknew(struct stack_item);
 991                 sitem->type = stack_nop;
 992                 sitem->fpos = t.pos;
 993                 stk_push(parsestk, sitem);
 994                 break;
 995               case tok_rbrace:
 996                 sitem = stk_pop(parsestk);
 997                 if (!sitem) {
 998                     /*
 999                      * This closing brace could have been an
1000                      * indication that the cross-paragraph stack
1001                      * wants popping. Accordingly, we treat it here
1002                      * as an indication that the paragraph is over.
1003                      */
1004                     already = TRUE;
1005                     goto finished_para;
1006                 } else {
1007                     if (sitem->type & stack_ualt) {
1008                         whptr = sitem->whptr;
1009                         idximplicit = sitem->idximplicit;
1010                     }
1011                     if (sitem->type & stack_style) {
1012                         style = word_Normal;
1013                         spcstyle = word_WhiteSpace;
1014                     }
1015                     if (sitem->type & stack_idx) {
1016                         indexword->text = ustrdup(indexstr.text);
1017                         if (index_downcase) {
1018                             word *w;
1019
1020                             ustrlow(indexword->text);
1021                             ustrlow(indexstr.text);
1022
1023                             for (w = idxwordlist; w; w = w->next)
1024                                 if (w->text)
1025                                     ustrlow(w->text);
1026                         }
1027                         indexing = FALSE;
1028                         rdadd(&indexstr, L'\0');
1029                         index_merge(idx, FALSE, indexstr.text,
1030                                     idxwordlist, &sitem->fpos);
1031                         sfree(indexstr.text);
1032                     }
1033                     if (sitem->type & stack_hyper) {
1034                         wd.text = NULL;
1035                         wd.type = word_HyperEnd;
1036                         wd.alt = NULL;
1037                         wd.aux = 0;
1038                         wd.fpos = t.pos;
1039                         wd.breaks = FALSE;
1040                         if (!indexing || index_visible)
1041                             addword(wd, &whptr);
1042                         if (indexing)
1043                             addword(wd, &idximplicit);
1044                     }
1045                     if (sitem->type & stack_quote) {
1046                         wd.text = NULL;
1047                         wd.type = toquotestyle(style);
1048                         wd.alt = NULL;
1049                         wd.aux = quote_Close;
1050                         wd.fpos = t.pos;
1051                         wd.breaks = FALSE;
1052                         if (!indexing || index_visible)
1053                             addword(wd, &whptr);
1054                         if (indexing) {
1055                             rdadd(&indexstr, L'"');
1056                             addword(wd, &idximplicit);
1057                         }
1058                     }
1059                 }
1060                 sfree(sitem);
1061                 break;
1062               case tok_cmd:
1063                 switch (t.cmd) {
1064                   case c__comment:
1065                     /*
1066                      * In-paragraph comment: \#{ balanced braces }
1067                      *
1068                      * Anything goes here; even tok_eop. We should
1069                      * eat whitespace after the close brace _if_
1070                      * there was whitespace before the \#.
1071                      */
1072                     dtor(t), t = get_token(in);
1073                     if (t.type != tok_lbrace) {
1074                         error(err_explbr, &t.pos);
1075                     } else {
1076                         int braces = 1;
1077                         while (braces > 0) {
1078                             dtor(t), t = get_token(in);
1079                             if (t.type == tok_lbrace)
1080                                 braces++;
1081                             else if (t.type == tok_rbrace)
1082                                 braces--;
1083                             else if (t.type == tok_eof) {
1084                                 error(err_commenteof, &t.pos);
1085                                 break;
1086                             }
1087                         }
1088                     }
1089                     if (seenwhite) {
1090                         already = TRUE;
1091                         dtor(t), t = get_token(in);
1092                         if (t.type == tok_white) {
1093                             iswhite = TRUE;
1094                             already = FALSE;
1095                         }
1096                     }
1097                     break;
1098                   case c_q:
1099                     dtor(t), t = get_token(in);
1100                     if (t.type != tok_lbrace) {
1101                         error(err_explbr, &t.pos);
1102                     } else {
1103                         wd.text = NULL;
1104                         wd.type = toquotestyle(style);
1105                         wd.alt = NULL;
1106                         wd.aux = quote_Open;
1107                         wd.fpos = t.pos;
1108                         wd.breaks = FALSE;
1109                         if (!indexing || index_visible)
1110                             addword(wd, &whptr);
1111                         if (indexing) {
1112                             rdadd(&indexstr, L'"');
1113                             addword(wd, &idximplicit);
1114                         }
1115                         sitem = mknew(struct stack_item);
1116                         sitem->fpos = t.pos;
1117                         sitem->type = stack_quote;
1118                         stk_push(parsestk, sitem);
1119                     }
1120                     break;
1121                   case c_K:
1122                   case c_k:
1123                   case c_W:
1124                   case c_date:
1125                     /*
1126                      * Keyword, hyperlink, or \date. We expect a
1127                      * left brace, some text, and then a right
1128                      * brace. No nesting; no arguments.
1129                      */
1130                     wd.fpos = t.pos;
1131                     wd.breaks = FALSE;
1132                     if (t.cmd == c_K)
1133                         wd.type = word_UpperXref;
1134                     else if (t.cmd == c_k)
1135                         wd.type = word_LowerXref;
1136                     else if (t.cmd == c_W)
1137                         wd.type = word_HyperLink;
1138                     else
1139                         wd.type = word_Normal;
1140                     dtor(t), t = get_token(in);
1141                     if (t.type != tok_lbrace) {
1142                         if (wd.type == word_Normal) {
1143                             time_t thetime = time(NULL);
1144                             struct tm *broken = localtime(&thetime);
1145                             already = TRUE;
1146                             wdtext = ustrftime(NULL, broken);
1147                             wd.type = style;
1148                         } else {
1149                             error(err_explbr, &t.pos);
1150                             wdtext = NULL;
1151                         }
1152                     } else {
1153                         rdstring rs = { 0, 0, NULL };
1154                         while (dtor(t), t = get_token(in),
1155                                t.type == tok_word || t.type == tok_white) {
1156                             if (t.type == tok_white)
1157                                 rdadd(&rs, ' ');
1158                             else
1159                                 rdadds(&rs, t.text);
1160                         }
1161                         if (wd.type == word_Normal) {
1162                             time_t thetime = time(NULL);
1163                             struct tm *broken = localtime(&thetime);
1164                             wdtext = ustrftime(rs.text, broken);
1165                             wd.type = style;
1166                         } else {
1167                             wdtext = ustrdup(rs.text);
1168                         }
1169                         sfree(rs.text);
1170                         if (t.type != tok_rbrace) {
1171                             error(err_kwexprbr, &t.pos);
1172                         }
1173                     }
1174                     wd.alt = NULL;
1175                     wd.aux = 0;
1176                     if (!indexing || index_visible) {
1177                         wd.text = ustrdup(wdtext);
1178                         addword(wd, &whptr);
1179                     }
1180                     if (indexing) {
1181                         wd.text = ustrdup(wdtext);
1182                         addword(wd, &idximplicit);
1183                     }
1184                     sfree(wdtext);
1185                     if (wd.type == word_HyperLink) {
1186                         /*
1187                          * Hyperlinks are different: they then
1188                          * expect another left brace, to begin
1189                          * delimiting the text marked by the link.
1190                          */
1191                         dtor(t), t = get_token(in);
1192                         sitem = mknew(struct stack_item);
1193                         sitem->fpos = wd.fpos;
1194                         sitem->type = stack_hyper;
1195                         /*
1196                          * Special cases: \W{}\i, \W{}\ii
1197                          */
1198                         if (t.type == tok_cmd &&
1199                             (t.cmd == c_i || t.cmd == c_ii)) {
1200                             if (indexing) {
1201                                 error(err_nestedindex, &t.pos);
1202                             } else {
1203                                 /* Add an index-reference word with no
1204                                  * text as yet */
1205                                 wd.type = word_IndexRef;
1206                                 wd.text = NULL;
1207                                 wd.alt = NULL;
1208                                 wd.aux = 0;
1209                                 wd.breaks = FALSE;
1210                                 indexword = addword(wd, &whptr);
1211                                 /* Set up a rdstring to read the
1212                                  * index text */
1213                                 indexstr = nullrs;
1214                                 /* Flags so that we do the Right
1215                                  * Things with text */
1216                                 index_visible = (type != c_I);
1217                                 index_downcase = (type == c_ii);
1218                                 indexing = TRUE;
1219                                 idxwordlist = NULL;
1220                                 idximplicit = &idxwordlist;
1221
1222                                 sitem->type |= stack_idx;
1223                             }
1224                             dtor(t), t = get_token(in);
1225                         }
1226                         /*
1227                          * Special cases: \W{}\c, \W{}\e, \W{}\cw
1228                          */
1229                         if (t.type == tok_cmd &&
1230                             (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1231                             if (style != word_Normal)
1232                                 error(err_nestedstyles, &t.pos);
1233                             else {
1234                                 style = (t.cmd == c_c ? word_Code :
1235                                          t.cmd == c_cw ? word_WeakCode :
1236                                          word_Emph);
1237                                 spcstyle = tospacestyle(style);
1238                                 sitem->type |= stack_style;
1239                             }
1240                             dtor(t), t = get_token(in);
1241                         }
1242                         if (t.type != tok_lbrace) {
1243                             error(err_explbr, &t.pos);
1244                             sfree(sitem);
1245                         } else {
1246                             stk_push(parsestk, sitem);
1247                         }
1248                     }
1249                     break;
1250                   case c_c:
1251                   case c_cw:
1252                   case c_e:
1253                     type = t.cmd;
1254                     if (style != word_Normal) {
1255                         error(err_nestedstyles, &t.pos);
1256                         /* Error recovery: eat lbrace, push nop. */
1257                         dtor(t), t = get_token(in);
1258                         sitem = mknew(struct stack_item);
1259                         sitem->fpos = t.pos;
1260                         sitem->type = stack_nop;
1261                         stk_push(parsestk, sitem);
1262                     }
1263                     dtor(t), t = get_token(in);
1264                     if (t.type != tok_lbrace) {
1265                         error(err_explbr, &t.pos);
1266                     } else {
1267                         style = (type == c_c ? word_Code :
1268                                  type == c_cw ? word_WeakCode :
1269                                  word_Emph);
1270                         spcstyle = tospacestyle(style);
1271                         sitem = mknew(struct stack_item);
1272                         sitem->fpos = t.pos;
1273                         sitem->type = stack_style;
1274                         stk_push(parsestk, sitem);
1275                     }
1276                     break;
1277                   case c_i:
1278                   case c_ii:
1279                   case c_I:
1280                     type = t.cmd;
1281                     if (indexing) {
1282                         error(err_nestedindex, &t.pos);
1283                         /* Error recovery: eat lbrace, push nop. */
1284                         dtor(t), t = get_token(in);
1285                         sitem = mknew(struct stack_item);
1286                         sitem->fpos = t.pos;
1287                         sitem->type = stack_nop;
1288                         stk_push(parsestk, sitem);
1289                     }
1290                     sitem = mknew(struct stack_item);
1291                     sitem->fpos = t.pos;
1292                     sitem->type = stack_idx;
1293                     dtor(t), t = get_token(in);
1294                     /*
1295                      * Special cases: \i\c, \i\e, \i\cw
1296                      */
1297                     wd.fpos = t.pos;
1298                     if (t.type == tok_cmd &&
1299                         (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1300                         if (style != word_Normal)
1301                             error(err_nestedstyles, &t.pos);
1302                         else {
1303                             style = (t.cmd == c_c ? word_Code :
1304                                      t.cmd == c_cw ? word_WeakCode :
1305                                      word_Emph);
1306                             spcstyle = tospacestyle(style);
1307                             sitem->type |= stack_style;
1308                         }
1309                         dtor(t), t = get_token(in);
1310                     }
1311                     if (t.type != tok_lbrace) {
1312                         sfree(sitem);
1313                         error(err_explbr, &t.pos);
1314                     } else {
1315                         /* Add an index-reference word with no text as yet */
1316                         wd.type = word_IndexRef;
1317                         wd.text = NULL;
1318                         wd.alt = NULL;
1319                         wd.aux = 0;
1320                         wd.breaks = FALSE;
1321                         indexword = addword(wd, &whptr);
1322                         /* Set up a rdstring to read the index text */
1323                         indexstr = nullrs;
1324                         /* Flags so that we do the Right Things with text */
1325                         index_visible = (type != c_I);
1326                         index_downcase = (type == c_ii);
1327                         indexing = TRUE;
1328                         idxwordlist = NULL;
1329                         idximplicit = &idxwordlist;
1330                         /* Stack item to close the indexing on exit */
1331                         stk_push(parsestk, sitem);
1332                     }
1333                     break;
1334                   case c_u:
1335                     uchr = t.aux;
1336                     utext[0] = uchr; utext[1] = 0;
1337                     wd.type = style;
1338                     wd.breaks = FALSE;
1339                     wd.alt = NULL;
1340                     wd.aux = 0;
1341                     wd.fpos = t.pos;
1342                     if (!indexing || index_visible) {
1343                         wd.text = ustrdup(utext);
1344                         uword = addword(wd, &whptr);
1345                     } else
1346                         uword = NULL;
1347                     if (indexing) {
1348                         wd.text = ustrdup(utext);
1349                         iword = addword(wd, &idximplicit);
1350                     } else
1351                         iword = NULL;
1352                     dtor(t), t = get_token(in);
1353                     if (t.type == tok_lbrace) {
1354                         /*
1355                          * \u with a left brace. Until the brace
1356                          * closes, all further words go on a
1357                          * sidetrack from the main thread of the
1358                          * paragraph.
1359                          */
1360                         sitem = mknew(struct stack_item);
1361                         sitem->fpos = t.pos;
1362                         sitem->type = stack_ualt;
1363                         sitem->whptr = whptr;
1364                         sitem->idximplicit = idximplicit;
1365                         stk_push(parsestk, sitem);
1366                         whptr = uword ? &uword->alt : NULL;
1367                         idximplicit = iword ? &iword->alt : NULL;
1368                     } else {
1369                         if (indexing)
1370                             rdadd(&indexstr, uchr);
1371                         already = TRUE;
1372                     }
1373                     break;
1374                   default:
1375                     if (!macrolookup(macros, in, t.text, &t.pos))
1376                         error(err_badmidcmd, t.text, &t.pos);
1377                     break;
1378                 }
1379             }
1380             if (!already)
1381                 dtor(t), t = get_token(in);
1382             seenwhite = iswhite;
1383         }
1384         finished_para:
1385         /* Check the stack is empty */
1386         if (stk_top(parsestk)) {
1387             while ((sitem = stk_pop(parsestk)))
1388                 sfree(sitem);
1389             error(err_missingrbrace, &t.pos);
1390         }
1391         stk_free(parsestk);
1392         prev_para_type = par.type;
1393         addpara(par, ret);
1394         if (t.type == tok_eof)
1395             already = TRUE;
1396     }
1397
1398     if (stk_top(crossparastk)) {
1399         void *p;
1400
1401         error(err_missingrbrace2, &t.pos);
1402         while ((p = stk_pop(crossparastk)))
1403             sfree(p);
1404     }
1405
1406     /*
1407      * We break to here rather than returning, because otherwise
1408      * this cleanup doesn't happen.
1409      */
1410     dtor(t);
1411     macrocleanup(macros);
1412
1413     stk_free(crossparastk);
1414 }
1415
1416 paragraph *read_input(input *in, indexdata *idx) {
1417     paragraph *head = NULL;
1418     paragraph **hptr = &head;
1419
1420     while (in->currindex < in->nfiles) {
1421         in->currfp = fopen(in->filenames[in->currindex], "r");
1422         if (in->currfp) {
1423             setpos(in, in->filenames[in->currindex]);
1424             read_file(&hptr, in, idx);
1425         }
1426         in->currindex++;
1427     }
1428
1429     return head;
1430 }