mdw@git.distorted.org.uk Git - sgt/halibut/blob - input.c

   1 /*
   2  * input.c: read the source form
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <time.h>
   8 #include "halibut.h"
   9
  10 #define TAB_STOP 8                     /* for column number tracking */
  11
  12 static void setpos(input *in, char *fname) {
  13     in->pos.filename = fname;
  14     in->pos.line = 1;
  15     in->pos.col = (in->reportcols ? 1 : -1);
  16 }
  17
  18 static void unget(input *in, int c, filepos *pos) {
  19     if (in->npushback >= in->pushbacksize) {
  20         in->pushbacksize = in->npushback + 16;
  21         in->pushback = resize(in->pushback, in->pushbacksize);
  22     }
  23     in->pushback[in->npushback].chr = c;
  24     in->pushback[in->npushback].pos = *pos;   /* structure copy */
  25     in->npushback++;
  26 }
  27
  28 /* ---------------------------------------------------------------------- */
  29 /*
  30  * Macro subsystem
  31  */
  32 typedef struct macro_Tag macro;
  33 struct macro_Tag {
  34     wchar_t *name, *text;
  35 };
  36 struct macrostack_Tag {
  37     macrostack *next;
  38     wchar_t *text;
  39     int ptr, npushback;
  40     filepos pos;
  41 };
  42 static int macrocmp(void *av, void *bv) {
  43     macro *a = (macro *)av, *b = (macro *)bv;
  44     return ustrcmp(a->name, b->name);
  45 }
  46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
  47                      filepos fpos) {
  48     macro *m = mknew(macro);
  49     m->name = name;
  50     m->text = text;
  51     if (add234(macros, m) != m) {
  52         error(err_macroexists, &fpos, name);
  53         sfree(name);
  54         sfree(text);
  55     }
  56 }
  57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
  58                        filepos *pos) {
  59     macro m, *gotit;
  60     m.name = name;
  61     gotit = find234(macros, &m, NULL);
  62     if (gotit) {
  63         macrostack *expansion = mknew(macrostack);
  64         expansion->next = in->stack;
  65         expansion->text = gotit->text;
  66         expansion->pos = *pos;         /* structure copy */
  67         expansion->ptr = 0;
  68         expansion->npushback = in->npushback;
  69         in->stack = expansion;
  70         return TRUE;
  71     } else
  72         return FALSE;
  73 }
  74 static void macrocleanup(tree234 *macros) {
  75     int ti;
  76     macro *m;
  77     for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
  78         sfree(m->name);
  79         sfree(m->text);
  80         sfree(m);
  81     }
  82     freetree234(macros);
  83 }
  84
  85 static void input_configure(input *in, paragraph *cfg) {
  86     assert(cfg->type == para_Config);
  87
  88     if (!ustricmp(cfg->keyword, L"input-charset")) {
  89         char *csname = utoa_dup(uadv(cfg->keyword));
  90         in->charset = charset_from_localenc(csname);
  91         sfree(csname);
  92     }
  93 }
  94
  95 /*
  96  * Can return EOF
  97  */
  98 static int get(input *in, filepos *pos) {
  99     int pushbackpt = in->stack ? in->stack->npushback : 0;
 100     if (in->npushback > pushbackpt) {
 101         --in->npushback;
 102         if (pos)
 103             *pos = in->pushback[in->npushback].pos;   /* structure copy */
 104         return in->pushback[in->npushback].chr;
 105     }
 106     else if (in->stack) {
 107         wchar_t c = in->stack->text[in->stack->ptr];
 108         if (in->stack->text[++in->stack->ptr] == L'\0') {
 109             macrostack *tmp = in->stack;
 110             in->stack = tmp->next;
 111             sfree(tmp);
 112         }
 113         return c;
 114     }
 115     else if (in->currfp) {
 116
 117         while (in->wcpos >= in->nwc) {
 118
 119             int c = getc(in->currfp);
 120
 121             if (c == EOF) {
 122                 fclose(in->currfp);
 123                 in->currfp = NULL;
 124                 return EOF;
 125             }
 126             /* Track line numbers, for error reporting */
 127             if (pos)
 128                 *pos = in->pos;
 129             if (in->reportcols) {
 130                 switch (c) {
 131                   case '\t':
 132                     in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
 133                     break;
 134                   case '\n':
 135                     in->pos.col = 1;
 136                     in->pos.line++;
 137                     break;
 138                   default:
 139                     in->pos.col++;
 140                     break;
 141                 }
 142             } else {
 143                 in->pos.col = -1;
 144                 if (c == '\n')
 145                     in->pos.line++;
 146             }
 147
 148             /*
 149              * Do input character set translation, so that we return
 150              * Unicode.
 151              */
 152             {
 153                 char buf[1];
 154                 char const *p;
 155                 int inlen;
 156
 157                 buf[0] = (char)c;
 158                 p = buf;
 159                 inlen = 1;
 160
 161                 in->nwc = charset_to_unicode(&p, &inlen,
 162                                              in->wc, lenof(in->wc),
 163                                              in->charset, &in->csstate,
 164                                              NULL, 0);
 165                 assert(p == buf+1 && inlen == 0);
 166
 167                 in->wcpos = 0;
 168             }
 169         }
 170
 171         return in->wc[in->wcpos++];
 172
 173     } else
 174         return EOF;
 175 }
 176
 177 /*
 178  * Lexical analysis of source files.
 179  */
 180 typedef struct token_Tag token;
 181 struct token_Tag {
 182     int type;
 183     int cmd, aux;
 184     wchar_t *text;
 185     filepos pos;
 186 };
 187 enum {
 188     tok_eof,                           /* end of file */
 189     tok_eop,                           /* end of paragraph */
 190     tok_white,                         /* whitespace */
 191     tok_word,                          /* a word or word fragment */
 192     tok_cmd,                           /* \command */
 193     tok_lbrace,                        /* { */
 194     tok_rbrace                         /* } */
 195 };
 196
 197 /* Halibut command keywords. */
 198 enum {
 199     c__invalid,                        /* invalid command */
 200     c__comment,                        /* comment command (\#) */
 201     c__escaped,                        /* escaped character */
 202     c__nop,                            /* no-op */
 203     c__nbsp,                           /* nonbreaking space */
 204     c_A,                               /* appendix heading */
 205     c_B,                               /* bibliography entry */
 206     c_BR,                              /* bibliography rewrite */
 207     c_C,                               /* chapter heading */
 208     c_H,                               /* heading */
 209     c_I,                               /* invisible index mark */
 210     c_IM,                              /* index merge/rewrite */
 211     c_K,                               /* capitalised cross-reference */
 212     c_S,                               /* aux field is 0, 1, 2, ... */
 213     c_U,                               /* unnumbered-chapter heading */
 214     c_W,                               /* Web hyperlink */
 215     c_b,                               /* bulletted list */
 216     c_c,                               /* code */
 217     c_cfg,                             /* configuration directive */
 218     c_copyright,                       /* copyright statement */
 219     c_cw,                              /* weak code */
 220     c_date,                            /* document processing date */
 221     c_dd,                              /* description list: description */
 222     c_define,                          /* macro definition */
 223     c_dt,                              /* description list: described thing */
 224     c_e,                               /* emphasis */
 225     c_i,                               /* visible index mark */
 226     c_ii,                              /* uncapitalised visible index mark */
 227     c_k,                               /* uncapitalised cross-reference */
 228     c_lcont,                           /* continuation para(s) for list item */
 229     c_n,                               /* numbered list */
 230     c_nocite,                          /* bibliography trickery */
 231     c_preamble,                        /* (obsolete) preamble text */
 232     c_q,                               /* quote marks */
 233     c_quote,                           /* block-quoted paragraphs */
 234     c_rule,                            /* horizontal rule */
 235     c_title,                           /* document title */
 236     c_u,                               /* aux field is char code */
 237     c_versionid                        /* document RCS id */
 238 };
 239
 240 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
 241 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
 242 #define isnl(c) ( (c)==10 )
 243 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
 244 #define fromdec(c) ( (c)-'0' )
 245 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
 246 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
 247 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
 248
 249 /*
 250  * Keyword comparison function. Like strcmp, but between a wchar_t *
 251  * and a char *.
 252  */
 253 static int kwcmp(wchar_t const *p, char const *q) {
 254     int i;
 255     do {
 256         i = *p - *q;
 257     } while (*p++ && *q++ && !i);
 258     return i;
 259 }
 260
 261 /*
 262  * Match a keyword.
 263  */
 264 static void match_kw(token *tok) {
 265     /*
 266      * FIXME. The ids are explicit in here so as to allow long-name
 267      * equivalents to the various very short keywords.
 268      */
 269     static const struct { char const *name; int id; } keywords[] = {
 270         {"#", c__comment},             /* comment command (\#) */
 271         {"-", c__escaped},             /* nonbreaking hyphen */
 272         {".", c__nop},                 /* no-op */
 273         {"A", c_A},                    /* appendix heading */
 274         {"B", c_B},                    /* bibliography entry */
 275         {"BR", c_BR},                  /* bibliography rewrite */
 276         {"C", c_C},                    /* chapter heading */
 277         {"H", c_H},                    /* heading */
 278         {"I", c_I},                    /* invisible index mark */
 279         {"IM", c_IM},                  /* index merge/rewrite */
 280         {"K", c_K},                    /* capitalised cross-reference */
 281         {"U", c_U},                    /* unnumbered-chapter heading */
 282         {"W", c_W},                    /* Web hyperlink */
 283         {"\\", c__escaped},            /* escaped backslash (\\) */
 284         {"_", c__nbsp},                /* nonbreaking space (\_) */
 285         {"b", c_b},                    /* bulletted list */
 286         {"c", c_c},                    /* code */
 287         {"cfg", c_cfg},                /* configuration directive */
 288         {"copyright", c_copyright},    /* copyright statement */
 289         {"cw", c_cw},                  /* weak code */
 290         {"date", c_date},              /* document processing date */
 291         {"dd", c_dd},                  /* description list: description */
 292         {"define", c_define},          /* macro definition */
 293         {"dt", c_dt},                  /* description list: described thing */
 294         {"e", c_e},                    /* emphasis */
 295         {"i", c_i},                    /* visible index mark */
 296         {"ii", c_ii},                  /* uncapitalised visible index mark */
 297         {"k", c_k},                    /* uncapitalised cross-reference */
 298         {"lcont", c_lcont},            /* continuation para(s) for list item */
 299         {"n", c_n},                    /* numbered list */
 300         {"nocite", c_nocite},          /* bibliography trickery */
 301         {"preamble", c_preamble},      /* (obsolete) preamble text */
 302         {"q", c_q},                    /* quote marks */
 303         {"quote", c_quote},            /* block-quoted paragraphs */
 304         {"rule", c_rule},              /* horizontal rule */
 305         {"title", c_title},            /* document title */
 306         {"versionid", c_versionid},    /* document RCS id */
 307         {"{", c__escaped},             /* escaped lbrace (\{) */
 308         {"}", c__escaped},             /* escaped rbrace (\}) */
 309     };
 310     int i, j, k, c;
 311
 312     /*
 313      * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
 314      * doesn't match correctly, we just fall through to the
 315      * binary-search phase.
 316      */
 317     if (tok->text[0] == 'S') {
 318         /* We expect numeric characters thereafter. */
 319         wchar_t *p = tok->text+1;
 320         int n;
 321         if (!*p)
 322             n = 1;
 323         else {
 324             n = 0;
 325             while (*p && isdec(*p)) {
 326                 n = 10 * n + fromdec(*p);
 327                 p++;
 328             }
 329         }
 330         if (!*p) {
 331             tok->cmd = c_S;
 332             tok->aux = n;
 333             return;
 334         }
 335     } else if (tok->text[0] == 'u') {
 336         /* We expect hex characters thereafter. */
 337         wchar_t *p = tok->text+1;
 338         int n = 0;
 339         while (*p && ishex(*p)) {
 340             n = 16 * n + fromhex(*p);
 341             p++;
 342         }
 343         if (!*p) {
 344             tok->cmd = c_u;
 345             tok->aux = n;
 346             return;
 347         }
 348     }
 349
 350     i = -1;
 351     j = sizeof(keywords)/sizeof(*keywords);
 352     while (j-i > 1) {
 353         k = (i+j)/2;
 354         c = kwcmp(tok->text, keywords[k].name);
 355         if (c < 0)
 356             j = k;
 357         else if (c > 0)
 358             i = k;
 359         else /* c == 0 */ {
 360             tok->cmd = keywords[k].id;
 361             return;
 362         }
 363     }
 364
 365     tok->cmd = c__invalid;
 366 }
 367
 368
 369 /*
 370  * Read a token from the input file, in the normal way (`normal' in
 371  * the sense that code paragraphs work a different way).
 372  */
 373 token get_token(input *in) {
 374     int c;
 375     int nls;
 376     token ret;
 377     rdstring rs = { 0, 0, NULL };
 378     filepos cpos;
 379
 380     ret.text = NULL;                   /* default */
 381     c = get(in, &cpos);
 382     ret.pos = cpos;
 383     if (iswhite(c)) {                  /* tok_white or tok_eop */
 384         nls = 0;
 385         do {
 386             if (isnl(c))
 387                 nls++;
 388         } while ((c = get(in, &cpos)) != EOF && iswhite(c));
 389         if (c == EOF) {
 390             ret.type = tok_eof;
 391             return ret;
 392         }
 393         unget(in, c, &cpos);
 394         ret.type = (nls > 1 ? tok_eop : tok_white);
 395         return ret;
 396     } else if (c == EOF) {             /* tok_eof */
 397         ret.type = tok_eof;
 398         return ret;
 399     } else if (c == '\\') {            /* tok_cmd */
 400         c = get(in, &cpos);
 401         if (c == '-' || c == '\\' || c == '_' ||
 402             c == '#' || c == '{' || c == '}' || c == '.') {
 403             /* single-char command */
 404             rdadd(&rs, c);
 405         } else if (c == 'u') {
 406             int len = 0;
 407             do {
 408                 rdadd(&rs, c);
 409                 len++;
 410                 c = get(in, &cpos);
 411             } while (ishex(c) && len < 5);
 412             unget(in, c, &cpos);
 413         } else if (iscmd(c)) {
 414             do {
 415                 rdadd(&rs, c);
 416                 c = get(in, &cpos);
 417             } while (iscmd(c));
 418             unget(in, c, &cpos);
 419         }
 420         /*
 421          * Now match the command against the list of available
 422          * ones.
 423          */
 424         ret.type = tok_cmd;
 425         ret.text = ustrdup(rs.text);
 426         match_kw(&ret);
 427         sfree(rs.text);
 428         return ret;
 429     } else if (c == '{') {             /* tok_lbrace */
 430         ret.type = tok_lbrace;
 431         return ret;
 432     } else if (c == '}') {             /* tok_rbrace */
 433         ret.type = tok_rbrace;
 434         return ret;
 435     } else {                           /* tok_word */
 436         /*
 437          * Read a word: the longest possible contiguous sequence of
 438          * things other than whitespace, backslash, braces and
 439          * hyphen. A hyphen terminates the word but is returned as
 440          * part of it; everything else is pushed back for the next
 441          * token. The `aux' field contains TRUE if the word ends in
 442          * a hyphen.
 443          */
 444         ret.aux = FALSE;               /* assumed for now */
 445         while (1) {
 446             if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
 447                 /* Put back the character that caused termination */
 448                 unget(in, c, &cpos);
 449                 break;
 450             } else {
 451                 rdadd(&rs, c);
 452                 if (c == '-') {
 453                     ret.aux = TRUE;
 454                     break;             /* hyphen terminates word */
 455                 }
 456             }
 457             c = get(in, &cpos);
 458         }
 459         ret.type = tok_word;
 460         ret.text = ustrdup(rs.text);
 461         sfree(rs.text);
 462         return ret;
 463     }
 464 }
 465
 466 /*
 467  * Determine whether the next input character is an open brace (for
 468  * telling code paragraphs from paragraphs which merely start with
 469  * code).
 470  */
 471 int isbrace(input *in) {
 472     int c;
 473     filepos cpos;
 474
 475     c = get(in, &cpos);
 476     unget(in, c, &cpos);
 477     return (c == '{');
 478 }
 479
 480 /*
 481  * Read the rest of a line that starts `\c'. Including nothing at
 482  * all (tok_word with empty text).
 483  */
 484 token get_codepar_token(input *in) {
 485     int c;
 486     token ret;
 487     rdstring rs = { 0, 0, NULL };
 488     filepos cpos;
 489
 490     ret.type = tok_word;
 491     c = get(in, &cpos);                /* expect (and discard) one space */
 492     ret.pos = cpos;
 493     if (c == ' ') {
 494         c = get(in, &cpos);
 495         ret.pos = cpos;
 496     }
 497     while (!isnl(c) && c != EOF) {
 498         int c2 = c;
 499         c = get(in, &cpos);
 500         /* Discard \r just before \n. */
 501         if (c2 != 13 || !isnl(c))
 502             rdadd(&rs, c2);
 503     }
 504     unget(in, c, &cpos);
 505     ret.text = ustrdup(rs.text);
 506     sfree(rs.text);
 507     return ret;
 508 }
 509
 510 /*
 511  * Adds a new word to a linked list
 512  */
 513 static word *addword(word newword, word ***hptrptr) {
 514     word *mnewword;
 515     if (!hptrptr)
 516         return NULL;
 517     mnewword = mknew(word);
 518     *mnewword = newword;               /* structure copy */
 519     mnewword->next = NULL;
 520     **hptrptr = mnewword;
 521     *hptrptr = &mnewword->next;
 522     return mnewword;
 523 }
 524
 525 /*
 526  * Adds a new paragraph to a linked list
 527  */
 528 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 529     paragraph *mnewpara = mknew(paragraph);
 530     *mnewpara = newpara;               /* structure copy */
 531     mnewpara->next = NULL;
 532     **hptrptr = mnewpara;
 533     *hptrptr = &mnewpara->next;
 534     return mnewpara;
 535 }
 536
 537 /*
 538  * Destructor before token is reassigned; should catch most memory
 539  * leaks
 540  */
 541 #define dtor(t) ( sfree(t.text) )
 542
 543 /*
 544  * Reads a single file (ie until get() returns EOF)
 545  */
 546 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 547     token t;
 548     paragraph par;
 549     word wd, **whptr, **idximplicit;
 550     tree234 *macros;
 551     wchar_t utext[2], *wdtext;
 552     int style, spcstyle;
 553     int already;
 554     int iswhite, seenwhite;
 555     int type;
 556     int prev_para_type;
 557     struct stack_item {
 558         enum {
 559             stack_nop = 0,             /* do nothing (for error recovery) */
 560             stack_ualt = 1,            /* \u alternative */
 561             stack_style = 2,           /* \e, \c, \cw */
 562             stack_idx = 4,             /* \I, \i, \ii */
 563             stack_hyper = 8,           /* \W */
 564             stack_quote = 16,          /* \q */
 565         } type;
 566         word **whptr;                  /* to restore from \u alternatives */
 567         word **idximplicit;            /* to restore from \u alternatives */
 568         filepos fpos;
 569     } *sitem;
 570     stack parsestk;
 571     struct crossparaitem {
 572         int type;                      /* currently c_lcont, c_quote or -1 */
 573         int seen_lcont, seen_quote;
 574     };
 575     stack crossparastk;
 576     word *indexword, *uword, *iword;
 577     word *idxwordlist;
 578     rdstring indexstr;
 579     int index_downcase, index_visible, indexing;
 580     const rdstring nullrs = { 0, 0, NULL };
 581     wchar_t uchr;
 582
 583     t.text = NULL;
 584     macros = newtree234(macrocmp);
 585     already = FALSE;
 586
 587     crossparastk = stk_new();
 588
 589     /*
 590      * Loop on each paragraph.
 591      */
 592     while (1) {
 593         int start_cmd = c__invalid;
 594         par.words = NULL;
 595         par.keyword = NULL;
 596         whptr = &par.words;
 597
 598         /*
 599          * Get a token.
 600          */
 601         do {
 602             if (!already) {
 603                 dtor(t), t = get_token(in);
 604             }
 605             already = FALSE;
 606         } while (t.type == tok_eop);
 607         if (t.type == tok_eof)
 608             break;
 609
 610         /*
 611          * Parse code paragraphs separately.
 612          */
 613         if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 614             int wtype = word_WeakCode;
 615
 616             par.type = para_Code;
 617             par.fpos = t.pos;
 618             while (1) {
 619                 dtor(t), t = get_codepar_token(in);
 620                 wd.type = wtype;
 621                 wd.breaks = FALSE;     /* shouldn't need this... */
 622                 wd.text = ustrdup(t.text);
 623                 wd.alt = NULL;
 624                 wd.fpos = t.pos;
 625                 addword(wd, &whptr);
 626                 dtor(t), t = get_token(in);
 627                 if (t.type == tok_white) {
 628                     /*
 629                      * The newline after a code-paragraph line
 630                      */
 631                     dtor(t), t = get_token(in);
 632                 }
 633                 if (t.type == tok_eop || t.type == tok_eof ||
 634                     t.type == tok_rbrace) { /* might be } terminating \lcont */
 635                     if (t.type == tok_rbrace)
 636                         already = TRUE;
 637                     break;
 638                 } else if (t.type == tok_cmd && t.cmd == c_c) {
 639                     wtype = word_WeakCode;
 640                 } else if (t.type == tok_cmd && t.cmd == c_e &&
 641                            wtype == word_WeakCode) {
 642                     wtype = word_Emph;
 643                 } else {
 644                     error(err_brokencodepara, &t.pos);
 645                     prev_para_type = par.type;
 646                     addpara(par, ret);
 647                     while (t.type != tok_eop)   /* error recovery: */
 648                         dtor(t), t = get_token(in);   /* eat rest of paragraph */
 649                     goto codeparabroken;   /* ick, but such is life */
 650                 }
 651             }
 652             prev_para_type = par.type;
 653             addpara(par, ret);
 654             codeparabroken:
 655             continue;
 656         }
 657
 658         /*
 659          * Spot the special commands that define a grouping of more
 660          * than one paragraph, and also the closing braces that
 661          * finish them.
 662          */
 663         if (t.type == tok_cmd &&
 664             (t.cmd == c_lcont || t.cmd == c_quote)) {
 665             struct crossparaitem *sitem, *stop;
 666             int cmd = t.cmd;
 667
 668             /*
 669              * Expect, and swallow, an open brace.
 670              */
 671             dtor(t), t = get_token(in);
 672             if (t.type != tok_lbrace) {
 673                 error(err_explbr, &t.pos);
 674                 continue;
 675             }
 676
 677             /*
 678              * Also expect, and swallow, any whitespace after that
 679              * (a newline before a code paragraph wouldn't be
 680              * surprising).
 681              */
 682             do {
 683                 dtor(t), t = get_token(in);
 684             } while (t.type == tok_white);
 685             already = TRUE;
 686
 687             if (cmd == c_lcont) {
 688                 /*
 689                  * \lcont causes a continuation of a list item into
 690                  * multiple paragraphs (which may in turn contain
 691                  * nested lists, code paras etc). Hence, the previous
 692                  * paragraph must be of a list type.
 693                  */
 694                 sitem = mknew(struct crossparaitem);
 695                 stop = (struct crossparaitem *)stk_top(crossparastk);
 696                 if (stop)
 697                     *sitem = *stop;
 698                 else
 699                     sitem->seen_quote = sitem->seen_lcont = 0;
 700
 701                 if (prev_para_type == para_Bullet ||
 702                     prev_para_type == para_NumberedList ||
 703                     prev_para_type == para_Description) {
 704                     sitem->type = c_lcont;
 705                     sitem->seen_lcont = 1;
 706                     par.type = para_LcontPush;
 707                     prev_para_type = par.type;
 708                     addpara(par, ret);
 709                 } else {
 710                     /*
 711                      * Push a null item on the cross-para stack so that
 712                      * when we see the corresponding closing brace we
 713                      * don't give a cascade error.
 714                      */
 715                     sitem->type = -1;
 716                     error(err_misplacedlcont, &t.pos);
 717                 }
 718             } else {
 719                 /*
 720                  * \quote causes a group of paragraphs to be
 721                  * block-quoted (typically they will be indented a
 722                  * bit).
 723                  */
 724                 sitem = mknew(struct crossparaitem);
 725                 stop = (struct crossparaitem *)stk_top(crossparastk);
 726                 if (stop)
 727                     *sitem = *stop;
 728                 else
 729                     sitem->seen_quote = sitem->seen_lcont = 0;
 730                 sitem->type = c_quote;
 731                 sitem->seen_quote = 1;
 732                 par.type = para_QuotePush;
 733                 prev_para_type = par.type;
 734                 addpara(par, ret);
 735             }
 736             stk_push(crossparastk, sitem);
 737             continue;
 738         } else if (t.type == tok_rbrace) {
 739             struct crossparaitem *sitem = stk_pop(crossparastk);
 740             if (!sitem)
 741                 error(err_unexbrace, &t.pos);
 742             else {
 743                 switch (sitem->type) {
 744                   case c_lcont:
 745                     par.type = para_LcontPop;
 746                     prev_para_type = par.type;
 747                     addpara(par, ret);
 748                     break;
 749                   case c_quote:
 750                     par.type = para_QuotePop;
 751                     prev_para_type = par.type;
 752                     addpara(par, ret);
 753                     break;
 754                 }
 755                 sfree(sitem);
 756             }
 757             continue;
 758         }
 759
 760         /*
 761          * This token begins a paragraph. See if it's one of the
 762          * special commands that define a paragraph type.
 763          *
 764          * (note that \# is special in a way, and \nocite takes no
 765          * text)
 766          */
 767         par.type = para_Normal;
 768         if (t.type == tok_cmd) {
 769             int needkw;
 770             int is_macro = FALSE;
 771
 772             par.fpos = t.pos;
 773             switch (t.cmd) {
 774               default:
 775                 needkw = -1;
 776                 break;
 777               case c__invalid:
 778                 error(err_badparatype, t.text, &t.pos);
 779                 needkw = 4;
 780                 break;
 781               case c__comment:
 782                 if (isbrace(in))
 783                     break;             /* `\#{': isn't a comment para */
 784                 do {
 785                     dtor(t), t = get_token(in);
 786                 } while (t.type != tok_eop && t.type != tok_eof);
 787                 continue;              /* next paragraph */
 788                 /*
 789                  * `needkw' values:
 790                  *
 791                  *   1 -- exactly one keyword
 792                  *   2 -- at least one keyword
 793                  *   4 -- any number of keywords including zero
 794                  *   8 -- at least one keyword and then nothing else
 795                  *  16 -- nothing at all! no keywords, no body
 796                  *  32 -- no keywords at all
 797                  */
 798               case c_A: needkw = 2; par.type = para_Appendix; break;
 799               case c_B: needkw = 2; par.type = para_Biblio; break;
 800               case c_BR: needkw = 1; par.type = para_BR;
 801                 start_cmd = c_BR; break;
 802               case c_C: needkw = 2; par.type = para_Chapter; break;
 803               case c_H: needkw = 2; par.type = para_Heading;
 804                 par.aux = 0;
 805                 break;
 806               case c_IM: needkw = 2; par.type = para_IM;
 807                 start_cmd = c_IM; break;
 808               case c_S: needkw = 2; par.type = para_Subsect;
 809                 par.aux = t.aux; break;
 810               case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
 811                 /* For \b and \n the keyword is optional */
 812               case c_b: needkw = 4; par.type = para_Bullet; break;
 813               case c_dt: needkw = 4; par.type = para_DescribedThing; break;
 814               case c_dd: needkw = 4; par.type = para_Description; break;
 815               case c_n: needkw = 4; par.type = para_NumberedList; break;
 816               case c_cfg: needkw = 8; par.type = para_Config;
 817                 start_cmd = c_cfg; break;
 818               case c_copyright: needkw = 32; par.type = para_Copyright; break;
 819               case c_define: is_macro = TRUE; needkw = 1; break;
 820                 /* For \nocite the keyword is _everything_ */
 821               case c_nocite: needkw = 8; par.type = para_NoCite; break;
 822               case c_preamble: needkw = 32; par.type = para_Normal; break;
 823               case c_rule: needkw = 16; par.type = para_Rule; break;
 824               case c_title: needkw = 32; par.type = para_Title; break;
 825               case c_versionid: needkw = 32; par.type = para_VersionID; break;
 826             }
 827
 828             if (par.type == para_Chapter ||
 829                 par.type == para_Heading ||
 830                 par.type == para_Subsect ||
 831                 par.type == para_Appendix ||
 832                 par.type == para_UnnumberedChapter) {
 833                 struct crossparaitem *sitem = stk_top(crossparastk);
 834                 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
 835                     error(err_sectmarkerinblock,
 836                           &t.pos,
 837                           (sitem->seen_lcont ? "lcont" : "quote"));
 838                 }
 839             }
 840
 841             if (needkw > 0) {
 842                 rdstring rs = { 0, 0, NULL };
 843                 int nkeys = 0;
 844                 filepos fp;
 845
 846                 /* Get keywords. */
 847                 dtor(t), t = get_token(in);
 848                 fp = t.pos;
 849                 while (t.type == tok_lbrace) {
 850                     /* This is a keyword. */
 851                     nkeys++;
 852                     /* FIXME: there will be bugs if anyone specifies an
 853                      * empty keyword (\foo{}), so trap this case. */
 854                     while (dtor(t), t = get_token(in),
 855                            t.type == tok_word ||
 856                            t.type == tok_white ||
 857                            (t.type == tok_cmd && t.cmd == c__nbsp) ||
 858                            (t.type == tok_cmd && t.cmd == c__escaped)) {
 859                         if (t.type == tok_white ||
 860                             (t.type == tok_cmd && t.cmd == c__nbsp))
 861                             rdadd(&rs, ' ');
 862                         else
 863                             rdadds(&rs, t.text);
 864                     }
 865                     if (t.type != tok_rbrace) {
 866                         error(err_kwunclosed, &t.pos);
 867                         continue;
 868                     }
 869                     rdadd(&rs, 0);     /* add string terminator */
 870                     dtor(t), t = get_token(in); /* eat right brace */
 871                 }
 872
 873                 rdadd(&rs, 0);     /* add string terminator */
 874
 875                 /* See whether we have the right number of keywords. */
 876                 if ((needkw & 48) && nkeys > 0)
 877                     error(err_kwillegal, &fp);
 878                 if ((needkw & 11) && nkeys == 0)
 879                     error(err_kwexpected, &fp);
 880                 if ((needkw & 5) && nkeys > 1)
 881                     error(err_kwtoomany, &fp);
 882
 883                 if (is_macro) {
 884                     /*
 885                      * Macro definition. Get the rest of the line
 886                      * as a code-paragraph token, repeatedly until
 887                      * there's nothing more left of it. Separate
 888                      * with newlines.
 889                      */
 890                     rdstring macrotext = { 0, 0, NULL };
 891                     while (1) {
 892                         dtor(t), t = get_codepar_token(in);
 893                         if (macrotext.pos > 0)
 894                             rdadd(&macrotext, L'\n');
 895                         rdadds(&macrotext, t.text);
 896                         dtor(t), t = get_token(in);
 897                         if (t.type == tok_eop) break;
 898                     }
 899                     macrodef(macros, rs.text, macrotext.text, fp);
 900                     continue;          /* next paragraph */
 901                 }
 902
 903                 par.keyword = rdtrim(&rs);
 904
 905                 /* Move to EOP in case of needkw==8 or 16 (no body) */
 906                 if (needkw & 24) {
 907                     /* We allow whitespace even when we expect no para body */
 908                     while (t.type == tok_white)
 909                         dtor(t), t = get_token(in);
 910                     if (t.type != tok_eop && t.type != tok_eof &&
 911                         (start_cmd == c__invalid ||
 912                          t.type != tok_cmd || t.cmd != start_cmd)) {
 913                         error(err_bodyillegal, &t.pos);
 914                         /* Error recovery: eat the rest of the paragraph */
 915                         while (t.type != tok_eop && t.type != tok_eof &&
 916                                (start_cmd == c__invalid ||
 917                                 t.type != tok_cmd || t.cmd != start_cmd))
 918                             dtor(t), t = get_token(in);
 919                     }
 920                     if (t.type == tok_cmd)
 921                         already = TRUE;/* inhibit get_token at top of loop */
 922                     prev_para_type = par.type;
 923                     addpara(par, ret);
 924
 925                     if (par.type == para_Config) {
 926                         input_configure(in, &par);
 927                     }
 928                     continue;          /* next paragraph */
 929                 }
 930             }
 931         }
 932
 933         /*
 934          * Now read the actual paragraph, word by word, adding to
 935          * the paragraph list.
 936          *
 937          * Mid-paragraph commands:
 938          *
 939          *  \K \k
 940          *  \c \cw
 941          *  \e
 942          *  \i \ii
 943          *  \I
 944          *  \u
 945          *  \W
 946          *  \date
 947          *  \\ \{ \}
 948          */
 949         parsestk = stk_new();
 950         style = word_Normal;
 951         spcstyle = word_WhiteSpace;
 952         indexing = FALSE;
 953         seenwhite = TRUE;
 954         while (t.type != tok_eop && t.type != tok_eof) {
 955             iswhite = FALSE;
 956             already = FALSE;
 957
 958             /* Handle implicit paragraph breaks after \IM, \BR etc */
 959             if (start_cmd != c__invalid &&
 960                 t.type == tok_cmd && t.cmd == start_cmd) {
 961                 already = TRUE;        /* inhibit get_token at top of loop */
 962                 break;
 963             }
 964
 965             if (t.type == tok_cmd && t.cmd == c__nop) {
 966                 dtor(t), t = get_token(in);
 967                 continue;              /* do nothing! */
 968             }
 969
 970             if (t.type == tok_cmd && t.cmd == c__escaped) {
 971                 t.type = tok_word;     /* nice and simple */
 972                 t.aux = 0;             /* even if `\-' - nonbreaking! */
 973             }
 974             if (t.type == tok_cmd && t.cmd == c__nbsp) {
 975                 t.type = tok_word;     /* nice and simple */
 976                 sfree(t.text);
 977                 t.text = ustrdup(L" ");  /* text is ` ' not `_' */
 978                 t.aux = 0;             /* (nonbreaking) */
 979             }
 980             switch (t.type) {
 981               case tok_white:
 982                 if (whptr == &par.words)
 983                     break;             /* strip whitespace at start of para */
 984                 wd.text = NULL;
 985                 wd.type = spcstyle;
 986                 wd.alt = NULL;
 987                 wd.aux = 0;
 988                 wd.fpos = t.pos;
 989                 wd.breaks = FALSE;
 990
 991                 /*
 992                  * Inhibit use of whitespace if it's (probably the
 993                  * newline) before a repeat \IM / \BR type
 994                  * directive.
 995                  */
 996                 if (start_cmd != c__invalid) {
 997                     dtor(t), t = get_token(in);
 998                     already = TRUE;
 999                     if (t.type == tok_cmd && t.cmd == start_cmd)
1000                         break;
1001                 }
1002
1003                 if (indexing)
1004                     rdadd(&indexstr, ' ');
1005                 if (!indexing || index_visible)
1006                     addword(wd, &whptr);
1007                 if (indexing)
1008                     addword(wd, &idximplicit);
1009                 iswhite = TRUE;
1010                 break;
1011               case tok_word:
1012                 if (indexing)
1013                     rdadds(&indexstr, t.text);
1014                 wd.type = style;
1015                 wd.alt = NULL;
1016                 wd.aux = 0;
1017                 wd.fpos = t.pos;
1018                 wd.breaks = t.aux;
1019                 if (!indexing || index_visible) {
1020                     wd.text = ustrdup(t.text);
1021                     addword(wd, &whptr);
1022                 }
1023                 if (indexing) {
1024                     wd.text = ustrdup(t.text);
1025                     addword(wd, &idximplicit);
1026                 }
1027                 break;
1028               case tok_lbrace:
1029                 error(err_unexbrace, &t.pos);
1030                 /* Error recovery: push nop */
1031                 sitem = mknew(struct stack_item);
1032                 sitem->type = stack_nop;
1033                 sitem->fpos = t.pos;
1034                 stk_push(parsestk, sitem);
1035                 break;
1036               case tok_rbrace:
1037                 sitem = stk_pop(parsestk);
1038                 if (!sitem) {
1039                     /*
1040                      * This closing brace could have been an
1041                      * indication that the cross-paragraph stack
1042                      * wants popping. Accordingly, we treat it here
1043                      * as an indication that the paragraph is over.
1044                      */
1045                     already = TRUE;
1046                     goto finished_para;
1047                 } else {
1048                     if (sitem->type & stack_ualt) {
1049                         whptr = sitem->whptr;
1050                         idximplicit = sitem->idximplicit;
1051                     }
1052                     if (sitem->type & stack_style) {
1053                         style = word_Normal;
1054                         spcstyle = word_WhiteSpace;
1055                     }
1056                     if (sitem->type & stack_idx) {
1057                         indexword->text = ustrdup(indexstr.text);
1058                         if (index_downcase) {
1059                             word *w;
1060
1061                             ustrlow(indexword->text);
1062                             ustrlow(indexstr.text);
1063
1064                             for (w = idxwordlist; w; w = w->next)
1065                                 if (w->text)
1066                                     ustrlow(w->text);
1067                         }
1068                         indexing = FALSE;
1069                         rdadd(&indexstr, L'\0');
1070                         index_merge(idx, FALSE, indexstr.text,
1071                                     idxwordlist, &sitem->fpos);
1072                         sfree(indexstr.text);
1073                     }
1074                     if (sitem->type & stack_hyper) {
1075                         wd.text = NULL;
1076                         wd.type = word_HyperEnd;
1077                         wd.alt = NULL;
1078                         wd.aux = 0;
1079                         wd.fpos = t.pos;
1080                         wd.breaks = FALSE;
1081                         if (!indexing || index_visible)
1082                             addword(wd, &whptr);
1083                         if (indexing)
1084                             addword(wd, &idximplicit);
1085                     }
1086                     if (sitem->type & stack_quote) {
1087                         wd.text = NULL;
1088                         wd.type = toquotestyle(style);
1089                         wd.alt = NULL;
1090                         wd.aux = quote_Close;
1091                         wd.fpos = t.pos;
1092                         wd.breaks = FALSE;
1093                         if (!indexing || index_visible)
1094                             addword(wd, &whptr);
1095                         if (indexing) {
1096                             rdadd(&indexstr, L'"');
1097                             addword(wd, &idximplicit);
1098                         }
1099                     }
1100                 }
1101                 sfree(sitem);
1102                 break;
1103               case tok_cmd:
1104                 switch (t.cmd) {
1105                   case c__comment:
1106                     /*
1107                      * In-paragraph comment: \#{ balanced braces }
1108                      *
1109                      * Anything goes here; even tok_eop. We should
1110                      * eat whitespace after the close brace _if_
1111                      * there was whitespace before the \#.
1112                      */
1113                     dtor(t), t = get_token(in);
1114                     if (t.type != tok_lbrace) {
1115                         error(err_explbr, &t.pos);
1116                     } else {
1117                         int braces = 1;
1118                         while (braces > 0) {
1119                             dtor(t), t = get_token(in);
1120                             if (t.type == tok_lbrace)
1121                                 braces++;
1122                             else if (t.type == tok_rbrace)
1123                                 braces--;
1124                             else if (t.type == tok_eof) {
1125                                 error(err_commenteof, &t.pos);
1126                                 break;
1127                             }
1128                         }
1129                     }
1130                     if (seenwhite) {
1131                         already = TRUE;
1132                         dtor(t), t = get_token(in);
1133                         if (t.type == tok_white) {
1134                             iswhite = TRUE;
1135                             already = FALSE;
1136                         }
1137                     }
1138                     break;
1139                   case c_q:
1140                     dtor(t), t = get_token(in);
1141                     if (t.type != tok_lbrace) {
1142                         error(err_explbr, &t.pos);
1143                     } else {
1144                         wd.text = NULL;
1145                         wd.type = toquotestyle(style);
1146                         wd.alt = NULL;
1147                         wd.aux = quote_Open;
1148                         wd.fpos = t.pos;
1149                         wd.breaks = FALSE;
1150                         if (!indexing || index_visible)
1151                             addword(wd, &whptr);
1152                         if (indexing) {
1153                             rdadd(&indexstr, L'"');
1154                             addword(wd, &idximplicit);
1155                         }
1156                         sitem = mknew(struct stack_item);
1157                         sitem->fpos = t.pos;
1158                         sitem->type = stack_quote;
1159                         stk_push(parsestk, sitem);
1160                     }
1161                     break;
1162                   case c_K:
1163                   case c_k:
1164                   case c_W:
1165                   case c_date:
1166                     /*
1167                      * Keyword, hyperlink, or \date. We expect a
1168                      * left brace, some text, and then a right
1169                      * brace. No nesting; no arguments.
1170                      */
1171                     wd.fpos = t.pos;
1172                     wd.breaks = FALSE;
1173                     if (t.cmd == c_K)
1174                         wd.type = word_UpperXref;
1175                     else if (t.cmd == c_k)
1176                         wd.type = word_LowerXref;
1177                     else if (t.cmd == c_W)
1178                         wd.type = word_HyperLink;
1179                     else
1180                         wd.type = word_Normal;
1181                     dtor(t), t = get_token(in);
1182                     if (t.type != tok_lbrace) {
1183                         if (wd.type == word_Normal) {
1184                             time_t thetime = time(NULL);
1185                             struct tm *broken = localtime(&thetime);
1186                             already = TRUE;
1187                             wdtext = ustrftime(NULL, broken);
1188                             wd.type = style;
1189                         } else {
1190                             error(err_explbr, &t.pos);
1191                             wdtext = NULL;
1192                         }
1193                     } else {
1194                         rdstring rs = { 0, 0, NULL };
1195                         while (dtor(t), t = get_token(in),
1196                                t.type == tok_word || t.type == tok_white) {
1197                             if (t.type == tok_white)
1198                                 rdadd(&rs, ' ');
1199                             else
1200                                 rdadds(&rs, t.text);
1201                         }
1202                         if (wd.type == word_Normal) {
1203                             time_t thetime = time(NULL);
1204                             struct tm *broken = localtime(&thetime);
1205                             wdtext = ustrftime(rs.text, broken);
1206                             wd.type = style;
1207                         } else {
1208                             wdtext = ustrdup(rs.text);
1209                         }
1210                         sfree(rs.text);
1211                         if (t.type != tok_rbrace) {
1212                             error(err_kwexprbr, &t.pos);
1213                         }
1214                     }
1215                     wd.alt = NULL;
1216                     wd.aux = 0;
1217                     if (!indexing || index_visible) {
1218                         wd.text = ustrdup(wdtext);
1219                         addword(wd, &whptr);
1220                     }
1221                     if (indexing) {
1222                         wd.text = ustrdup(wdtext);
1223                         addword(wd, &idximplicit);
1224                     }
1225                     sfree(wdtext);
1226                     if (wd.type == word_HyperLink) {
1227                         /*
1228                          * Hyperlinks are different: they then
1229                          * expect another left brace, to begin
1230                          * delimiting the text marked by the link.
1231                          */
1232                         dtor(t), t = get_token(in);
1233                         sitem = mknew(struct stack_item);
1234                         sitem->fpos = wd.fpos;
1235                         sitem->type = stack_hyper;
1236                         /*
1237                          * Special cases: \W{}\i, \W{}\ii
1238                          */
1239                         if (t.type == tok_cmd &&
1240                             (t.cmd == c_i || t.cmd == c_ii)) {
1241                             if (indexing) {
1242                                 error(err_nestedindex, &t.pos);
1243                             } else {
1244                                 /* Add an index-reference word with no
1245                                  * text as yet */
1246                                 wd.type = word_IndexRef;
1247                                 wd.text = NULL;
1248                                 wd.alt = NULL;
1249                                 wd.aux = 0;
1250                                 wd.breaks = FALSE;
1251                                 indexword = addword(wd, &whptr);
1252                                 /* Set up a rdstring to read the
1253                                  * index text */
1254                                 indexstr = nullrs;
1255                                 /* Flags so that we do the Right
1256                                  * Things with text */
1257                                 index_visible = (type != c_I);
1258                                 index_downcase = (type == c_ii);
1259                                 indexing = TRUE;
1260                                 idxwordlist = NULL;
1261                                 idximplicit = &idxwordlist;
1262
1263                                 sitem->type |= stack_idx;
1264                             }
1265                             dtor(t), t = get_token(in);
1266                         }
1267                         /*
1268                          * Special cases: \W{}\c, \W{}\e, \W{}\cw
1269                          */
1270                         if (t.type == tok_cmd &&
1271                             (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1272                             if (style != word_Normal)
1273                                 error(err_nestedstyles, &t.pos);
1274                             else {
1275                                 style = (t.cmd == c_c ? word_Code :
1276                                          t.cmd == c_cw ? word_WeakCode :
1277                                          word_Emph);
1278                                 spcstyle = tospacestyle(style);
1279                                 sitem->type |= stack_style;
1280                             }
1281                             dtor(t), t = get_token(in);
1282                         }
1283                         if (t.type != tok_lbrace) {
1284                             error(err_explbr, &t.pos);
1285                             sfree(sitem);
1286                         } else {
1287                             stk_push(parsestk, sitem);
1288                         }
1289                     }
1290                     break;
1291                   case c_c:
1292                   case c_cw:
1293                   case c_e:
1294                     type = t.cmd;
1295                     if (style != word_Normal) {
1296                         error(err_nestedstyles, &t.pos);
1297                         /* Error recovery: eat lbrace, push nop. */
1298                         dtor(t), t = get_token(in);
1299                         sitem = mknew(struct stack_item);
1300                         sitem->fpos = t.pos;
1301                         sitem->type = stack_nop;
1302                         stk_push(parsestk, sitem);
1303                     }
1304                     dtor(t), t = get_token(in);
1305                     if (t.type != tok_lbrace) {
1306                         error(err_explbr, &t.pos);
1307                     } else {
1308                         style = (type == c_c ? word_Code :
1309                                  type == c_cw ? word_WeakCode :
1310                                  word_Emph);
1311                         spcstyle = tospacestyle(style);
1312                         sitem = mknew(struct stack_item);
1313                         sitem->fpos = t.pos;
1314                         sitem->type = stack_style;
1315                         stk_push(parsestk, sitem);
1316                     }
1317                     break;
1318                   case c_i:
1319                   case c_ii:
1320                   case c_I:
1321                     type = t.cmd;
1322                     if (indexing) {
1323                         error(err_nestedindex, &t.pos);
1324                         /* Error recovery: eat lbrace, push nop. */
1325                         dtor(t), t = get_token(in);
1326                         sitem = mknew(struct stack_item);
1327                         sitem->fpos = t.pos;
1328                         sitem->type = stack_nop;
1329                         stk_push(parsestk, sitem);
1330                     }
1331                     sitem = mknew(struct stack_item);
1332                     sitem->fpos = t.pos;
1333                     sitem->type = stack_idx;
1334                     dtor(t), t = get_token(in);
1335                     /*
1336                      * Special cases: \i\c, \i\e, \i\cw
1337                      */
1338                     wd.fpos = t.pos;
1339                     if (t.type == tok_cmd &&
1340                         (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1341                         if (style != word_Normal)
1342                             error(err_nestedstyles, &t.pos);
1343                         else {
1344                             style = (t.cmd == c_c ? word_Code :
1345                                      t.cmd == c_cw ? word_WeakCode :
1346                                      word_Emph);
1347                             spcstyle = tospacestyle(style);
1348                             sitem->type |= stack_style;
1349                         }
1350                         dtor(t), t = get_token(in);
1351                     }
1352                     if (t.type != tok_lbrace) {
1353                         sfree(sitem);
1354                         error(err_explbr, &t.pos);
1355                     } else {
1356                         /* Add an index-reference word with no text as yet */
1357                         wd.type = word_IndexRef;
1358                         wd.text = NULL;
1359                         wd.alt = NULL;
1360                         wd.aux = 0;
1361                         wd.breaks = FALSE;
1362                         indexword = addword(wd, &whptr);
1363                         /* Set up a rdstring to read the index text */
1364                         indexstr = nullrs;
1365                         /* Flags so that we do the Right Things with text */
1366                         index_visible = (type != c_I);
1367                         index_downcase = (type == c_ii);
1368                         indexing = TRUE;
1369                         idxwordlist = NULL;
1370                         idximplicit = &idxwordlist;
1371                         /* Stack item to close the indexing on exit */
1372                         stk_push(parsestk, sitem);
1373                     }
1374                     break;
1375                   case c_u:
1376                     uchr = t.aux;
1377                     utext[0] = uchr; utext[1] = 0;
1378                     wd.type = style;
1379                     wd.breaks = FALSE;
1380                     wd.alt = NULL;
1381                     wd.aux = 0;
1382                     wd.fpos = t.pos;
1383                     if (!indexing || index_visible) {
1384                         wd.text = ustrdup(utext);
1385                         uword = addword(wd, &whptr);
1386                     } else
1387                         uword = NULL;
1388                     if (indexing) {
1389                         wd.text = ustrdup(utext);
1390                         iword = addword(wd, &idximplicit);
1391                     } else
1392                         iword = NULL;
1393                     dtor(t), t = get_token(in);
1394                     if (t.type == tok_lbrace) {
1395                         /*
1396                          * \u with a left brace. Until the brace
1397                          * closes, all further words go on a
1398                          * sidetrack from the main thread of the
1399                          * paragraph.
1400                          */
1401                         sitem = mknew(struct stack_item);
1402                         sitem->fpos = t.pos;
1403                         sitem->type = stack_ualt;
1404                         sitem->whptr = whptr;
1405                         sitem->idximplicit = idximplicit;
1406                         stk_push(parsestk, sitem);
1407                         whptr = uword ? &uword->alt : NULL;
1408                         idximplicit = iword ? &iword->alt : NULL;
1409                     } else {
1410                         if (indexing)
1411                             rdadd(&indexstr, uchr);
1412                         already = TRUE;
1413                     }
1414                     break;
1415                   default:
1416                     if (!macrolookup(macros, in, t.text, &t.pos))
1417                         error(err_badmidcmd, t.text, &t.pos);
1418                     break;
1419                 }
1420             }
1421             if (!already)
1422                 dtor(t), t = get_token(in);
1423             seenwhite = iswhite;
1424         }
1425         finished_para:
1426         /* Check the stack is empty */
1427         if (stk_top(parsestk)) {
1428             while ((sitem = stk_pop(parsestk)))
1429                 sfree(sitem);
1430             error(err_missingrbrace, &t.pos);
1431         }
1432         stk_free(parsestk);
1433         prev_para_type = par.type;
1434         addpara(par, ret);
1435         if (t.type == tok_eof)
1436             already = TRUE;
1437     }
1438
1439     if (stk_top(crossparastk)) {
1440         void *p;
1441
1442         error(err_missingrbrace2, &t.pos);
1443         while ((p = stk_pop(crossparastk)))
1444             sfree(p);
1445     }
1446
1447     /*
1448      * We break to here rather than returning, because otherwise
1449      * this cleanup doesn't happen.
1450      */
1451     dtor(t);
1452     macrocleanup(macros);
1453
1454     stk_free(crossparastk);
1455 }
1456
1457 paragraph *read_input(input *in, indexdata *idx) {
1458     paragraph *head = NULL;
1459     paragraph **hptr = &head;
1460
1461     while (in->currindex < in->nfiles) {
1462         in->currfp = fopen(in->filenames[in->currindex], "r");
1463         if (in->currfp) {
1464             setpos(in, in->filenames[in->currindex]);
1465             in->charset = in->defcharset;
1466             in->csstate = charset_init_state;
1467             read_file(&hptr, in, idx);
1468         }
1469         in->currindex++;
1470     }
1471
1472     return head;
1473 }