mdw@git.distorted.org.uk Git - sgt/halibut/blob - input.c

   1 /*
   2  * input.c: read the source form
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <time.h>
   8 #include "halibut.h"
   9
  10 #define TAB_STOP 8                     /* for column number tracking */
  11
  12 static void setpos(input *in, char *fname) {
  13     in->pos.filename = fname;
  14     in->pos.line = 1;
  15     in->pos.col = (in->reportcols ? 1 : -1);
  16 }
  17
  18 static void unget(input *in, int c, filepos *pos) {
  19     if (in->npushback >= in->pushbacksize) {
  20         in->pushbacksize = in->npushback + 16;
  21         in->pushback = resize(in->pushback, in->pushbacksize);
  22     }
  23     in->pushback[in->npushback].chr = c;
  24     in->pushback[in->npushback].pos = *pos;   /* structure copy */
  25     in->npushback++;
  26 }
  27
  28 /* ---------------------------------------------------------------------- */
  29 /*
  30  * Macro subsystem
  31  */
  32 typedef struct macro_Tag macro;
  33 struct macro_Tag {
  34     wchar_t *name, *text;
  35 };
  36 struct macrostack_Tag {
  37     macrostack *next;
  38     wchar_t *text;
  39     int ptr, npushback;
  40     filepos pos;
  41 };
  42 static int macrocmp(void *av, void *bv) {
  43     macro *a = (macro *)av, *b = (macro *)bv;
  44     return ustrcmp(a->name, b->name);
  45 }
  46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
  47                      filepos fpos) {
  48     macro *m = mknew(macro);
  49     m->name = name;
  50     m->text = text;
  51     if (add234(macros, m) != m) {
  52         error(err_macroexists, &fpos, name);
  53         sfree(name);
  54         sfree(text);
  55     }
  56 }
  57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
  58                        filepos *pos) {
  59     macro m, *gotit;
  60     m.name = name;
  61     gotit = find234(macros, &m, NULL);
  62     if (gotit) {
  63         macrostack *expansion = mknew(macrostack);
  64         expansion->next = in->stack;
  65         expansion->text = gotit->text;
  66         expansion->pos = *pos;         /* structure copy */
  67         expansion->ptr = 0;
  68         expansion->npushback = in->npushback;
  69         in->stack = expansion;
  70         return TRUE;
  71     } else
  72         return FALSE;
  73 }
  74 static void macrocleanup(tree234 *macros) {
  75     int ti;
  76     macro *m;
  77     for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
  78         sfree(m->name);
  79         sfree(m->text);
  80         sfree(m);
  81     }
  82     freetree234(macros);
  83 }
  84
  85 static void input_configure(input *in, paragraph *cfg) {
  86     assert(cfg->type == para_Config);
  87
  88     if (!ustricmp(cfg->keyword, L"input-charset")) {
  89         char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
  90         in->charset = charset_from_localenc(csname);
  91         sfree(csname);
  92     }
  93 }
  94
  95 /*
  96  * Can return EOF
  97  */
  98 static int get(input *in, filepos *pos, rdstringc *rsc) {
  99     int pushbackpt = in->stack ? in->stack->npushback : 0;
 100     if (in->npushback > pushbackpt) {
 101         --in->npushback;
 102         if (pos)
 103             *pos = in->pushback[in->npushback].pos;   /* structure copy */
 104         return in->pushback[in->npushback].chr;
 105     }
 106     else if (in->stack) {
 107         wchar_t c = in->stack->text[in->stack->ptr];
 108         if (in->stack->text[++in->stack->ptr] == L'\0') {
 109             macrostack *tmp = in->stack;
 110             in->stack = tmp->next;
 111             sfree(tmp);
 112         }
 113         return c;
 114     }
 115     else if (in->currfp) {
 116
 117         while (in->wcpos >= in->nwc) {
 118
 119             int c = getc(in->currfp);
 120
 121             if (c == EOF) {
 122                 fclose(in->currfp);
 123                 in->currfp = NULL;
 124                 return EOF;
 125             }
 126
 127             if (rsc)
 128                 rdaddc(rsc, c);
 129
 130             /* Track line numbers, for error reporting */
 131             if (pos)
 132                 *pos = in->pos;
 133             if (in->reportcols) {
 134                 switch (c) {
 135                   case '\t':
 136                     in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
 137                     break;
 138                   case '\n':
 139                     in->pos.col = 1;
 140                     in->pos.line++;
 141                     break;
 142                   default:
 143                     in->pos.col++;
 144                     break;
 145                 }
 146             } else {
 147                 in->pos.col = -1;
 148                 if (c == '\n')
 149                     in->pos.line++;
 150             }
 151
 152             /*
 153              * Do input character set translation, so that we return
 154              * Unicode.
 155              */
 156             {
 157                 char buf[1];
 158                 char const *p;
 159                 int inlen;
 160
 161                 buf[0] = (char)c;
 162                 p = buf;
 163                 inlen = 1;
 164
 165                 in->nwc = charset_to_unicode(&p, &inlen,
 166                                              in->wc, lenof(in->wc),
 167                                              in->charset, &in->csstate,
 168                                              NULL, 0);
 169                 assert(p == buf+1 && inlen == 0);
 170
 171                 in->wcpos = 0;
 172             }
 173         }
 174
 175         return in->wc[in->wcpos++];
 176
 177     } else
 178         return EOF;
 179 }
 180
 181 /*
 182  * Lexical analysis of source files.
 183  */
 184 typedef struct token_Tag token;
 185 struct token_Tag {
 186     int type;
 187     int cmd, aux;
 188     wchar_t *text;
 189     char *origtext;
 190     filepos pos;
 191 };
 192 enum {
 193     tok_eof,                           /* end of file */
 194     tok_eop,                           /* end of paragraph */
 195     tok_white,                         /* whitespace */
 196     tok_word,                          /* a word or word fragment */
 197     tok_cmd,                           /* \command */
 198     tok_lbrace,                        /* { */
 199     tok_rbrace                         /* } */
 200 };
 201
 202 /* Halibut command keywords. */
 203 enum {
 204     c__invalid,                        /* invalid command */
 205     c__comment,                        /* comment command (\#) */
 206     c__escaped,                        /* escaped character */
 207     c__nop,                            /* no-op */
 208     c__nbsp,                           /* nonbreaking space */
 209     c_A,                               /* appendix heading */
 210     c_B,                               /* bibliography entry */
 211     c_BR,                              /* bibliography rewrite */
 212     c_C,                               /* chapter heading */
 213     c_H,                               /* heading */
 214     c_I,                               /* invisible index mark */
 215     c_IM,                              /* index merge/rewrite */
 216     c_K,                               /* capitalised cross-reference */
 217     c_S,                               /* aux field is 0, 1, 2, ... */
 218     c_U,                               /* unnumbered-chapter heading */
 219     c_W,                               /* Web hyperlink */
 220     c_b,                               /* bulletted list */
 221     c_c,                               /* code */
 222     c_cfg,                             /* configuration directive */
 223     c_copyright,                       /* copyright statement */
 224     c_cw,                              /* weak code */
 225     c_date,                            /* document processing date */
 226     c_dd,                              /* description list: description */
 227     c_define,                          /* macro definition */
 228     c_dt,                              /* description list: described thing */
 229     c_e,                               /* emphasis */
 230     c_i,                               /* visible index mark */
 231     c_ii,                              /* uncapitalised visible index mark */
 232     c_k,                               /* uncapitalised cross-reference */
 233     c_lcont,                           /* continuation para(s) for list item */
 234     c_n,                               /* numbered list */
 235     c_nocite,                          /* bibliography trickery */
 236     c_preamble,                        /* (obsolete) preamble text */
 237     c_q,                               /* quote marks */
 238     c_quote,                           /* block-quoted paragraphs */
 239     c_rule,                            /* horizontal rule */
 240     c_title,                           /* document title */
 241     c_u,                               /* aux field is char code */
 242     c_versionid                        /* document RCS id */
 243 };
 244
 245 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
 246 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
 247 #define isnl(c) ( (c)==10 )
 248 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
 249 #define fromdec(c) ( (c)-'0' )
 250 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
 251 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
 252 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
 253
 254 /*
 255  * Keyword comparison function. Like strcmp, but between a wchar_t *
 256  * and a char *.
 257  */
 258 static int kwcmp(wchar_t const *p, char const *q) {
 259     int i;
 260     do {
 261         i = *p - *q;
 262     } while (*p++ && *q++ && !i);
 263     return i;
 264 }
 265
 266 /*
 267  * Match a keyword.
 268  */
 269 static void match_kw(token *tok) {
 270     /*
 271      * FIXME. The ids are explicit in here so as to allow long-name
 272      * equivalents to the various very short keywords.
 273      */
 274     static const struct { char const *name; int id; } keywords[] = {
 275         {"#", c__comment},             /* comment command (\#) */
 276         {"-", c__escaped},             /* nonbreaking hyphen */
 277         {".", c__nop},                 /* no-op */
 278         {"A", c_A},                    /* appendix heading */
 279         {"B", c_B},                    /* bibliography entry */
 280         {"BR", c_BR},                  /* bibliography rewrite */
 281         {"C", c_C},                    /* chapter heading */
 282         {"H", c_H},                    /* heading */
 283         {"I", c_I},                    /* invisible index mark */
 284         {"IM", c_IM},                  /* index merge/rewrite */
 285         {"K", c_K},                    /* capitalised cross-reference */
 286         {"U", c_U},                    /* unnumbered-chapter heading */
 287         {"W", c_W},                    /* Web hyperlink */
 288         {"\\", c__escaped},            /* escaped backslash (\\) */
 289         {"_", c__nbsp},                /* nonbreaking space (\_) */
 290         {"b", c_b},                    /* bulletted list */
 291         {"c", c_c},                    /* code */
 292         {"cfg", c_cfg},                /* configuration directive */
 293         {"copyright", c_copyright},    /* copyright statement */
 294         {"cw", c_cw},                  /* weak code */
 295         {"date", c_date},              /* document processing date */
 296         {"dd", c_dd},                  /* description list: description */
 297         {"define", c_define},          /* macro definition */
 298         {"dt", c_dt},                  /* description list: described thing */
 299         {"e", c_e},                    /* emphasis */
 300         {"i", c_i},                    /* visible index mark */
 301         {"ii", c_ii},                  /* uncapitalised visible index mark */
 302         {"k", c_k},                    /* uncapitalised cross-reference */
 303         {"lcont", c_lcont},            /* continuation para(s) for list item */
 304         {"n", c_n},                    /* numbered list */
 305         {"nocite", c_nocite},          /* bibliography trickery */
 306         {"preamble", c_preamble},      /* (obsolete) preamble text */
 307         {"q", c_q},                    /* quote marks */
 308         {"quote", c_quote},            /* block-quoted paragraphs */
 309         {"rule", c_rule},              /* horizontal rule */
 310         {"title", c_title},            /* document title */
 311         {"versionid", c_versionid},    /* document RCS id */
 312         {"{", c__escaped},             /* escaped lbrace (\{) */
 313         {"}", c__escaped},             /* escaped rbrace (\}) */
 314     };
 315     int i, j, k, c;
 316
 317     /*
 318      * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
 319      * doesn't match correctly, we just fall through to the
 320      * binary-search phase.
 321      */
 322     if (tok->text[0] == 'S') {
 323         /* We expect numeric characters thereafter. */
 324         wchar_t *p = tok->text+1;
 325         int n;
 326         if (!*p)
 327             n = 1;
 328         else {
 329             n = 0;
 330             while (*p && isdec(*p)) {
 331                 n = 10 * n + fromdec(*p);
 332                 p++;
 333             }
 334         }
 335         if (!*p) {
 336             tok->cmd = c_S;
 337             tok->aux = n;
 338             return;
 339         }
 340     } else if (tok->text[0] == 'u') {
 341         /* We expect hex characters thereafter. */
 342         wchar_t *p = tok->text+1;
 343         int n = 0;
 344         while (*p && ishex(*p)) {
 345             n = 16 * n + fromhex(*p);
 346             p++;
 347         }
 348         if (!*p) {
 349             tok->cmd = c_u;
 350             tok->aux = n;
 351             return;
 352         }
 353     }
 354
 355     i = -1;
 356     j = sizeof(keywords)/sizeof(*keywords);
 357     while (j-i > 1) {
 358         k = (i+j)/2;
 359         c = kwcmp(tok->text, keywords[k].name);
 360         if (c < 0)
 361             j = k;
 362         else if (c > 0)
 363             i = k;
 364         else /* c == 0 */ {
 365             tok->cmd = keywords[k].id;
 366             return;
 367         }
 368     }
 369
 370     tok->cmd = c__invalid;
 371 }
 372
 373
 374 /*
 375  * Read a token from the input file, in the normal way (`normal' in
 376  * the sense that code paragraphs work a different way).
 377  */
 378 token get_token(input *in) {
 379     int c;
 380     int nls;
 381     int prevpos;
 382     token ret;
 383     rdstring rs = { 0, 0, NULL };
 384     rdstringc rsc = { 0, 0, NULL };
 385     filepos cpos;
 386
 387     ret.text = NULL;                   /* default */
 388     ret.origtext = NULL;               /* default */
 389     if (in->pushback_chars) {
 390         rdaddsc(&rsc, in->pushback_chars);
 391         sfree(in->pushback_chars);
 392         in->pushback_chars = NULL;
 393     }
 394     c = get(in, &cpos, &rsc);
 395     ret.pos = cpos;
 396     if (iswhite(c)) {                  /* tok_white or tok_eop */
 397         nls = 0;
 398         prevpos = 0;
 399         do {
 400             if (isnl(c))
 401                 nls++;
 402             prevpos = rsc.pos;
 403         } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
 404         if (c == EOF) {
 405             ret.type = tok_eof;
 406             sfree(rsc.text);
 407             return ret;
 408         }
 409         if (rsc.text) {
 410             in->pushback_chars = dupstr(rsc.text + prevpos);
 411             sfree(rsc.text);
 412         }
 413         unget(in, c, &cpos);
 414         ret.type = (nls > 1 ? tok_eop : tok_white);
 415         return ret;
 416     } else if (c == EOF) {             /* tok_eof */
 417         ret.type = tok_eof;
 418         sfree(rsc.text);
 419         return ret;
 420     } else if (c == '\\') {            /* tok_cmd */
 421         rsc.pos = prevpos = 0;
 422         c = get(in, &cpos, &rsc);
 423         if (c == '-' || c == '\\' || c == '_' ||
 424             c == '#' || c == '{' || c == '}' || c == '.') {
 425             /* single-char command */
 426             rdadd(&rs, c);
 427         } else if (c == 'u') {
 428             int len = 0;
 429             do {
 430                 rdadd(&rs, c);
 431                 len++;
 432                 prevpos = rsc.pos;
 433                 c = get(in, &cpos, &rsc);
 434             } while (ishex(c) && len < 5);
 435             unget(in, c, &cpos);
 436         } else if (iscmd(c)) {
 437             do {
 438                 rdadd(&rs, c);
 439                 prevpos = rsc.pos;
 440                 c = get(in, &cpos, &rsc);
 441             } while (iscmd(c));
 442             unget(in, c, &cpos);
 443         }
 444         /*
 445          * Now match the command against the list of available
 446          * ones.
 447          */
 448         ret.type = tok_cmd;
 449         ret.text = ustrdup(rs.text);
 450         if (rsc.text) {
 451             in->pushback_chars = dupstr(rsc.text + prevpos);
 452             rsc.text[prevpos] = '\0';
 453             ret.origtext = dupstr(rsc.text);
 454         } else {
 455             ret.origtext = dupstr("");
 456         }
 457         match_kw(&ret);
 458         sfree(rs.text);
 459         sfree(rsc.text);
 460         return ret;
 461     } else if (c == '{') {             /* tok_lbrace */
 462         ret.type = tok_lbrace;
 463         sfree(rsc.text);
 464         return ret;
 465     } else if (c == '}') {             /* tok_rbrace */
 466         ret.type = tok_rbrace;
 467         sfree(rsc.text);
 468         return ret;
 469     } else {                           /* tok_word */
 470         /*
 471          * Read a word: the longest possible contiguous sequence of
 472          * things other than whitespace, backslash, braces and
 473          * hyphen. A hyphen terminates the word but is returned as
 474          * part of it; everything else is pushed back for the next
 475          * token. The `aux' field contains TRUE if the word ends in
 476          * a hyphen.
 477          */
 478         ret.aux = FALSE;               /* assumed for now */
 479         prevpos = 0;
 480         while (1) {
 481             if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
 482                 /* Put back the character that caused termination */
 483                 unget(in, c, &cpos);
 484                 break;
 485             } else {
 486                 rdadd(&rs, c);
 487                 if (c == '-') {
 488                     prevpos = rsc.pos;
 489                     ret.aux = TRUE;
 490                     break;             /* hyphen terminates word */
 491                 }
 492             }
 493             prevpos = rsc.pos;
 494             c = get(in, &cpos, &rsc);
 495         }
 496         ret.type = tok_word;
 497         ret.text = ustrdup(rs.text);
 498         if (rsc.text) {
 499             in->pushback_chars = dupstr(rsc.text + prevpos);
 500             rsc.text[prevpos] = '\0';
 501             ret.origtext = dupstr(rsc.text);
 502         } else {
 503             ret.origtext = dupstr("");
 504         }
 505         sfree(rs.text);
 506         sfree(rsc.text);
 507         return ret;
 508     }
 509 }
 510
 511 /*
 512  * Determine whether the next input character is an open brace (for
 513  * telling code paragraphs from paragraphs which merely start with
 514  * code).
 515  */
 516 int isbrace(input *in) {
 517     int c;
 518     filepos cpos;
 519
 520     c = get(in, &cpos, NULL);
 521     unget(in, c, &cpos);
 522     return (c == '{');
 523 }
 524
 525 /*
 526  * Read the rest of a line that starts `\c'. Including nothing at
 527  * all (tok_word with empty text).
 528  */
 529 token get_codepar_token(input *in) {
 530     int c;
 531     token ret;
 532     rdstring rs = { 0, 0, NULL };
 533     filepos cpos;
 534
 535     ret.type = tok_word;
 536     ret.origtext = NULL;
 537     c = get(in, &cpos, NULL);          /* expect (and discard) one space */
 538     ret.pos = cpos;
 539     if (c == ' ') {
 540         c = get(in, &cpos, NULL);
 541         ret.pos = cpos;
 542     }
 543     while (!isnl(c) && c != EOF) {
 544         int c2 = c;
 545         c = get(in, &cpos, NULL);
 546         /* Discard \r just before \n. */
 547         if (c2 != 13 || !isnl(c))
 548             rdadd(&rs, c2);
 549     }
 550     unget(in, c, &cpos);
 551     ret.text = ustrdup(rs.text);
 552     sfree(rs.text);
 553     return ret;
 554 }
 555
 556 /*
 557  * Adds a new word to a linked list
 558  */
 559 static word *addword(word newword, word ***hptrptr) {
 560     word *mnewword;
 561     if (!hptrptr)
 562         return NULL;
 563     mnewword = mknew(word);
 564     *mnewword = newword;               /* structure copy */
 565     mnewword->next = NULL;
 566     **hptrptr = mnewword;
 567     *hptrptr = &mnewword->next;
 568     return mnewword;
 569 }
 570
 571 /*
 572  * Adds a new paragraph to a linked list
 573  */
 574 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 575     paragraph *mnewpara = mknew(paragraph);
 576     *mnewpara = newpara;               /* structure copy */
 577     mnewpara->next = NULL;
 578     **hptrptr = mnewpara;
 579     *hptrptr = &mnewpara->next;
 580     return mnewpara;
 581 }
 582
 583 /*
 584  * Destructor before token is reassigned; should catch most memory
 585  * leaks
 586  */
 587 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
 588
 589 /*
 590  * Reads a single file (ie until get() returns EOF)
 591  */
 592 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 593     token t;
 594     paragraph par;
 595     word wd, **whptr, **idximplicit;
 596     tree234 *macros;
 597     wchar_t utext[2], *wdtext;
 598     int style, spcstyle;
 599     int already;
 600     int iswhite, seenwhite;
 601     int type;
 602     int prev_para_type;
 603     struct stack_item {
 604         enum {
 605             stack_nop = 0,             /* do nothing (for error recovery) */
 606             stack_ualt = 1,            /* \u alternative */
 607             stack_style = 2,           /* \e, \c, \cw */
 608             stack_idx = 4,             /* \I, \i, \ii */
 609             stack_hyper = 8,           /* \W */
 610             stack_quote = 16,          /* \q */
 611         } type;
 612         word **whptr;                  /* to restore from \u alternatives */
 613         word **idximplicit;            /* to restore from \u alternatives */
 614         filepos fpos;
 615     } *sitem;
 616     stack parsestk;
 617     struct crossparaitem {
 618         int type;                      /* currently c_lcont, c_quote or -1 */
 619         int seen_lcont, seen_quote;
 620     };
 621     stack crossparastk;
 622     word *indexword, *uword, *iword;
 623     word *idxwordlist;
 624     rdstring indexstr;
 625     int index_downcase, index_visible, indexing;
 626     const rdstring nullrs = { 0, 0, NULL };
 627     wchar_t uchr;
 628
 629     t.text = NULL;
 630     t.origtext = NULL;
 631     macros = newtree234(macrocmp);
 632     already = FALSE;
 633
 634     crossparastk = stk_new();
 635
 636     /*
 637      * Loop on each paragraph.
 638      */
 639     while (1) {
 640         int start_cmd = c__invalid;
 641         par.words = NULL;
 642         par.keyword = NULL;
 643         par.origkeyword = NULL;
 644         whptr = &par.words;
 645
 646         /*
 647          * Get a token.
 648          */
 649         do {
 650             if (!already) {
 651                 dtor(t), t = get_token(in);
 652             }
 653             already = FALSE;
 654         } while (t.type == tok_eop);
 655         if (t.type == tok_eof)
 656             break;
 657
 658         /*
 659          * Parse code paragraphs separately.
 660          */
 661         if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 662             int wtype = word_WeakCode;
 663
 664             par.type = para_Code;
 665             par.fpos = t.pos;
 666             while (1) {
 667                 dtor(t), t = get_codepar_token(in);
 668                 wd.type = wtype;
 669                 wd.breaks = FALSE;     /* shouldn't need this... */
 670                 wd.text = ustrdup(t.text);
 671                 wd.alt = NULL;
 672                 wd.fpos = t.pos;
 673                 addword(wd, &whptr);
 674                 dtor(t), t = get_token(in);
 675                 if (t.type == tok_white) {
 676                     /*
 677                      * The newline after a code-paragraph line
 678                      */
 679                     dtor(t), t = get_token(in);
 680                 }
 681                 if (t.type == tok_eop || t.type == tok_eof ||
 682                     t.type == tok_rbrace) { /* might be } terminating \lcont */
 683                     if (t.type == tok_rbrace)
 684                         already = TRUE;
 685                     break;
 686                 } else if (t.type == tok_cmd && t.cmd == c_c) {
 687                     wtype = word_WeakCode;
 688                 } else if (t.type == tok_cmd && t.cmd == c_e &&
 689                            wtype == word_WeakCode) {
 690                     wtype = word_Emph;
 691                 } else {
 692                     error(err_brokencodepara, &t.pos);
 693                     prev_para_type = par.type;
 694                     addpara(par, ret);
 695                     while (t.type != tok_eop)   /* error recovery: */
 696                         dtor(t), t = get_token(in);   /* eat rest of paragraph */
 697                     goto codeparabroken;   /* ick, but such is life */
 698                 }
 699             }
 700             prev_para_type = par.type;
 701             addpara(par, ret);
 702             codeparabroken:
 703             continue;
 704         }
 705
 706         /*
 707          * Spot the special commands that define a grouping of more
 708          * than one paragraph, and also the closing braces that
 709          * finish them.
 710          */
 711         if (t.type == tok_cmd &&
 712             (t.cmd == c_lcont || t.cmd == c_quote)) {
 713             struct crossparaitem *sitem, *stop;
 714             int cmd = t.cmd;
 715
 716             /*
 717              * Expect, and swallow, an open brace.
 718              */
 719             dtor(t), t = get_token(in);
 720             if (t.type != tok_lbrace) {
 721                 error(err_explbr, &t.pos);
 722                 continue;
 723             }
 724
 725             /*
 726              * Also expect, and swallow, any whitespace after that
 727              * (a newline before a code paragraph wouldn't be
 728              * surprising).
 729              */
 730             do {
 731                 dtor(t), t = get_token(in);
 732             } while (t.type == tok_white);
 733             already = TRUE;
 734
 735             if (cmd == c_lcont) {
 736                 /*
 737                  * \lcont causes a continuation of a list item into
 738                  * multiple paragraphs (which may in turn contain
 739                  * nested lists, code paras etc). Hence, the previous
 740                  * paragraph must be of a list type.
 741                  */
 742                 sitem = mknew(struct crossparaitem);
 743                 stop = (struct crossparaitem *)stk_top(crossparastk);
 744                 if (stop)
 745                     *sitem = *stop;
 746                 else
 747                     sitem->seen_quote = sitem->seen_lcont = 0;
 748
 749                 if (prev_para_type == para_Bullet ||
 750                     prev_para_type == para_NumberedList ||
 751                     prev_para_type == para_Description) {
 752                     sitem->type = c_lcont;
 753                     sitem->seen_lcont = 1;
 754                     par.type = para_LcontPush;
 755                     prev_para_type = par.type;
 756                     addpara(par, ret);
 757                 } else {
 758                     /*
 759                      * Push a null item on the cross-para stack so that
 760                      * when we see the corresponding closing brace we
 761                      * don't give a cascade error.
 762                      */
 763                     sitem->type = -1;
 764                     error(err_misplacedlcont, &t.pos);
 765                 }
 766             } else {
 767                 /*
 768                  * \quote causes a group of paragraphs to be
 769                  * block-quoted (typically they will be indented a
 770                  * bit).
 771                  */
 772                 sitem = mknew(struct crossparaitem);
 773                 stop = (struct crossparaitem *)stk_top(crossparastk);
 774                 if (stop)
 775                     *sitem = *stop;
 776                 else
 777                     sitem->seen_quote = sitem->seen_lcont = 0;
 778                 sitem->type = c_quote;
 779                 sitem->seen_quote = 1;
 780                 par.type = para_QuotePush;
 781                 prev_para_type = par.type;
 782                 addpara(par, ret);
 783             }
 784             stk_push(crossparastk, sitem);
 785             continue;
 786         } else if (t.type == tok_rbrace) {
 787             struct crossparaitem *sitem = stk_pop(crossparastk);
 788             if (!sitem)
 789                 error(err_unexbrace, &t.pos);
 790             else {
 791                 switch (sitem->type) {
 792                   case c_lcont:
 793                     par.type = para_LcontPop;
 794                     prev_para_type = par.type;
 795                     addpara(par, ret);
 796                     break;
 797                   case c_quote:
 798                     par.type = para_QuotePop;
 799                     prev_para_type = par.type;
 800                     addpara(par, ret);
 801                     break;
 802                 }
 803                 sfree(sitem);
 804             }
 805             continue;
 806         }
 807
 808         /*
 809          * This token begins a paragraph. See if it's one of the
 810          * special commands that define a paragraph type.
 811          *
 812          * (note that \# is special in a way, and \nocite takes no
 813          * text)
 814          */
 815         par.type = para_Normal;
 816         if (t.type == tok_cmd) {
 817             int needkw;
 818             int is_macro = FALSE;
 819
 820             par.fpos = t.pos;
 821             switch (t.cmd) {
 822               default:
 823                 needkw = -1;
 824                 break;
 825               case c__invalid:
 826                 error(err_badparatype, t.text, &t.pos);
 827                 needkw = 4;
 828                 break;
 829               case c__comment:
 830                 if (isbrace(in))
 831                     break;             /* `\#{': isn't a comment para */
 832                 do {
 833                     dtor(t), t = get_token(in);
 834                 } while (t.type != tok_eop && t.type != tok_eof);
 835                 continue;              /* next paragraph */
 836                 /*
 837                  * `needkw' values:
 838                  *
 839                  *   1 -- exactly one keyword
 840                  *   2 -- at least one keyword
 841                  *   4 -- any number of keywords including zero
 842                  *   8 -- at least one keyword and then nothing else
 843                  *  16 -- nothing at all! no keywords, no body
 844                  *  32 -- no keywords at all
 845                  */
 846               case c_A: needkw = 2; par.type = para_Appendix; break;
 847               case c_B: needkw = 2; par.type = para_Biblio; break;
 848               case c_BR: needkw = 1; par.type = para_BR;
 849                 start_cmd = c_BR; break;
 850               case c_C: needkw = 2; par.type = para_Chapter; break;
 851               case c_H: needkw = 2; par.type = para_Heading;
 852                 par.aux = 0;
 853                 break;
 854               case c_IM: needkw = 2; par.type = para_IM;
 855                 start_cmd = c_IM; break;
 856               case c_S: needkw = 2; par.type = para_Subsect;
 857                 par.aux = t.aux; break;
 858               case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
 859                 /* For \b and \n the keyword is optional */
 860               case c_b: needkw = 4; par.type = para_Bullet; break;
 861               case c_dt: needkw = 4; par.type = para_DescribedThing; break;
 862               case c_dd: needkw = 4; par.type = para_Description; break;
 863               case c_n: needkw = 4; par.type = para_NumberedList; break;
 864               case c_cfg: needkw = 8; par.type = para_Config;
 865                 start_cmd = c_cfg; break;
 866               case c_copyright: needkw = 32; par.type = para_Copyright; break;
 867               case c_define: is_macro = TRUE; needkw = 1; break;
 868                 /* For \nocite the keyword is _everything_ */
 869               case c_nocite: needkw = 8; par.type = para_NoCite; break;
 870               case c_preamble: needkw = 32; par.type = para_Normal; break;
 871               case c_rule: needkw = 16; par.type = para_Rule; break;
 872               case c_title: needkw = 32; par.type = para_Title; break;
 873               case c_versionid: needkw = 32; par.type = para_VersionID; break;
 874             }
 875
 876             if (par.type == para_Chapter ||
 877                 par.type == para_Heading ||
 878                 par.type == para_Subsect ||
 879                 par.type == para_Appendix ||
 880                 par.type == para_UnnumberedChapter) {
 881                 struct crossparaitem *sitem = stk_top(crossparastk);
 882                 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
 883                     error(err_sectmarkerinblock,
 884                           &t.pos,
 885                           (sitem->seen_lcont ? "lcont" : "quote"));
 886                 }
 887             }
 888
 889             if (needkw > 0) {
 890                 rdstring rs = { 0, 0, NULL };
 891                 rdstringc rsc = { 0, 0, NULL };
 892                 int nkeys = 0;
 893                 filepos fp;
 894
 895                 /* Get keywords. */
 896                 dtor(t), t = get_token(in);
 897                 fp = t.pos;
 898                 while (t.type == tok_lbrace) {
 899                     /* This is a keyword. */
 900                     nkeys++;
 901                     /* FIXME: there will be bugs if anyone specifies an
 902                      * empty keyword (\foo{}), so trap this case. */
 903                     while (dtor(t), t = get_token(in),
 904                            t.type == tok_word ||
 905                            t.type == tok_white ||
 906                            (t.type == tok_cmd && t.cmd == c__nbsp) ||
 907                            (t.type == tok_cmd && t.cmd == c__escaped)) {
 908                         if (t.type == tok_white ||
 909                             (t.type == tok_cmd && t.cmd == c__nbsp)) {
 910                             rdadd(&rs, ' ');
 911                             rdaddc(&rsc, ' ');
 912                         } else {
 913                             rdadds(&rs, t.text);
 914                             rdaddsc(&rsc, t.origtext);
 915                         }
 916                     }
 917                     if (t.type != tok_rbrace) {
 918                         error(err_kwunclosed, &t.pos);
 919                         continue;
 920                     }
 921                     rdadd(&rs, 0);     /* add string terminator */
 922                     rdaddc(&rsc, 0);   /* add string terminator */
 923                     dtor(t), t = get_token(in); /* eat right brace */
 924                 }
 925
 926                 rdadd(&rs, 0);         /* add string terminator */
 927                 rdaddc(&rsc, 0);       /* add string terminator */
 928
 929                 /* See whether we have the right number of keywords. */
 930                 if ((needkw & 48) && nkeys > 0)
 931                     error(err_kwillegal, &fp);
 932                 if ((needkw & 11) && nkeys == 0)
 933                     error(err_kwexpected, &fp);
 934                 if ((needkw & 5) && nkeys > 1)
 935                     error(err_kwtoomany, &fp);
 936
 937                 if (is_macro) {
 938                     /*
 939                      * Macro definition. Get the rest of the line
 940                      * as a code-paragraph token, repeatedly until
 941                      * there's nothing more left of it. Separate
 942                      * with newlines.
 943                      */
 944                     rdstring macrotext = { 0, 0, NULL };
 945                     while (1) {
 946                         dtor(t), t = get_codepar_token(in);
 947                         if (macrotext.pos > 0)
 948                             rdadd(&macrotext, L'\n');
 949                         rdadds(&macrotext, t.text);
 950                         dtor(t), t = get_token(in);
 951                         if (t.type == tok_eop) break;
 952                     }
 953                     macrodef(macros, rs.text, macrotext.text, fp);
 954                     continue;          /* next paragraph */
 955                 }
 956
 957                 par.keyword = rdtrim(&rs);
 958                 par.origkeyword = rdtrimc(&rsc);
 959
 960                 /* Move to EOP in case of needkw==8 or 16 (no body) */
 961                 if (needkw & 24) {
 962                     /* We allow whitespace even when we expect no para body */
 963                     while (t.type == tok_white)
 964                         dtor(t), t = get_token(in);
 965                     if (t.type != tok_eop && t.type != tok_eof &&
 966                         (start_cmd == c__invalid ||
 967                          t.type != tok_cmd || t.cmd != start_cmd)) {
 968                         error(err_bodyillegal, &t.pos);
 969                         /* Error recovery: eat the rest of the paragraph */
 970                         while (t.type != tok_eop && t.type != tok_eof &&
 971                                (start_cmd == c__invalid ||
 972                                 t.type != tok_cmd || t.cmd != start_cmd))
 973                             dtor(t), t = get_token(in);
 974                     }
 975                     if (t.type == tok_cmd)
 976                         already = TRUE;/* inhibit get_token at top of loop */
 977                     prev_para_type = par.type;
 978                     addpara(par, ret);
 979
 980                     if (par.type == para_Config) {
 981                         input_configure(in, &par);
 982                     }
 983                     continue;          /* next paragraph */
 984                 }
 985             }
 986         }
 987
 988         /*
 989          * Now read the actual paragraph, word by word, adding to
 990          * the paragraph list.
 991          *
 992          * Mid-paragraph commands:
 993          *
 994          *  \K \k
 995          *  \c \cw
 996          *  \e
 997          *  \i \ii
 998          *  \I
 999          *  \u
1000          *  \W
1001          *  \date
1002          *  \\ \{ \}
1003          */
1004         parsestk = stk_new();
1005         style = word_Normal;
1006         spcstyle = word_WhiteSpace;
1007         indexing = FALSE;
1008         seenwhite = TRUE;
1009         while (t.type != tok_eop && t.type != tok_eof) {
1010             iswhite = FALSE;
1011             already = FALSE;
1012
1013             /* Handle implicit paragraph breaks after \IM, \BR etc */
1014             if (start_cmd != c__invalid &&
1015                 t.type == tok_cmd && t.cmd == start_cmd) {
1016                 already = TRUE;        /* inhibit get_token at top of loop */
1017                 break;
1018             }
1019
1020             if (t.type == tok_cmd && t.cmd == c__nop) {
1021                 dtor(t), t = get_token(in);
1022                 continue;              /* do nothing! */
1023             }
1024
1025             if (t.type == tok_cmd && t.cmd == c__escaped) {
1026                 t.type = tok_word;     /* nice and simple */
1027                 t.aux = 0;             /* even if `\-' - nonbreaking! */
1028             }
1029             if (t.type == tok_cmd && t.cmd == c__nbsp) {
1030                 t.type = tok_word;     /* nice and simple */
1031                 sfree(t.text);
1032                 t.text = ustrdup(L" ");  /* text is ` ' not `_' */
1033                 t.aux = 0;             /* (nonbreaking) */
1034             }
1035             switch (t.type) {
1036               case tok_white:
1037                 if (whptr == &par.words)
1038                     break;             /* strip whitespace at start of para */
1039                 wd.text = NULL;
1040                 wd.type = spcstyle;
1041                 wd.alt = NULL;
1042                 wd.aux = 0;
1043                 wd.fpos = t.pos;
1044                 wd.breaks = FALSE;
1045
1046                 /*
1047                  * Inhibit use of whitespace if it's (probably the
1048                  * newline) before a repeat \IM / \BR type
1049                  * directive.
1050                  */
1051                 if (start_cmd != c__invalid) {
1052                     dtor(t), t = get_token(in);
1053                     already = TRUE;
1054                     if (t.type == tok_cmd && t.cmd == start_cmd)
1055                         break;
1056                 }
1057
1058                 if (indexing)
1059                     rdadd(&indexstr, ' ');
1060                 if (!indexing || index_visible)
1061                     addword(wd, &whptr);
1062                 if (indexing)
1063                     addword(wd, &idximplicit);
1064                 iswhite = TRUE;
1065                 break;
1066               case tok_word:
1067                 if (indexing)
1068                     rdadds(&indexstr, t.text);
1069                 wd.type = style;
1070                 wd.alt = NULL;
1071                 wd.aux = 0;
1072                 wd.fpos = t.pos;
1073                 wd.breaks = t.aux;
1074                 if (!indexing || index_visible) {
1075                     wd.text = ustrdup(t.text);
1076                     addword(wd, &whptr);
1077                 }
1078                 if (indexing) {
1079                     wd.text = ustrdup(t.text);
1080                     addword(wd, &idximplicit);
1081                 }
1082                 break;
1083               case tok_lbrace:
1084                 error(err_unexbrace, &t.pos);
1085                 /* Error recovery: push nop */
1086                 sitem = mknew(struct stack_item);
1087                 sitem->type = stack_nop;
1088                 sitem->fpos = t.pos;
1089                 stk_push(parsestk, sitem);
1090                 break;
1091               case tok_rbrace:
1092                 sitem = stk_pop(parsestk);
1093                 if (!sitem) {
1094                     /*
1095                      * This closing brace could have been an
1096                      * indication that the cross-paragraph stack
1097                      * wants popping. Accordingly, we treat it here
1098                      * as an indication that the paragraph is over.
1099                      */
1100                     already = TRUE;
1101                     goto finished_para;
1102                 } else {
1103                     if (sitem->type & stack_ualt) {
1104                         whptr = sitem->whptr;
1105                         idximplicit = sitem->idximplicit;
1106                     }
1107                     if (sitem->type & stack_style) {
1108                         style = word_Normal;
1109                         spcstyle = word_WhiteSpace;
1110                     }
1111                     if (sitem->type & stack_idx) {
1112                         indexword->text = ustrdup(indexstr.text);
1113                         if (index_downcase) {
1114                             word *w;
1115
1116                             ustrlow(indexword->text);
1117                             ustrlow(indexstr.text);
1118
1119                             for (w = idxwordlist; w; w = w->next)
1120                                 if (w->text)
1121                                     ustrlow(w->text);
1122                         }
1123                         indexing = FALSE;
1124                         rdadd(&indexstr, L'\0');
1125                         index_merge(idx, FALSE, indexstr.text,
1126                                     idxwordlist, &sitem->fpos);
1127                         sfree(indexstr.text);
1128                     }
1129                     if (sitem->type & stack_hyper) {
1130                         wd.text = NULL;
1131                         wd.type = word_HyperEnd;
1132                         wd.alt = NULL;
1133                         wd.aux = 0;
1134                         wd.fpos = t.pos;
1135                         wd.breaks = FALSE;
1136                         if (!indexing || index_visible)
1137                             addword(wd, &whptr);
1138                         if (indexing)
1139                             addword(wd, &idximplicit);
1140                     }
1141                     if (sitem->type & stack_quote) {
1142                         wd.text = NULL;
1143                         wd.type = toquotestyle(style);
1144                         wd.alt = NULL;
1145                         wd.aux = quote_Close;
1146                         wd.fpos = t.pos;
1147                         wd.breaks = FALSE;
1148                         if (!indexing || index_visible)
1149                             addword(wd, &whptr);
1150                         if (indexing) {
1151                             rdadd(&indexstr, L'"');
1152                             addword(wd, &idximplicit);
1153                         }
1154                     }
1155                 }
1156                 sfree(sitem);
1157                 break;
1158               case tok_cmd:
1159                 switch (t.cmd) {
1160                   case c__comment:
1161                     /*
1162                      * In-paragraph comment: \#{ balanced braces }
1163                      *
1164                      * Anything goes here; even tok_eop. We should
1165                      * eat whitespace after the close brace _if_
1166                      * there was whitespace before the \#.
1167                      */
1168                     dtor(t), t = get_token(in);
1169                     if (t.type != tok_lbrace) {
1170                         error(err_explbr, &t.pos);
1171                     } else {
1172                         int braces = 1;
1173                         while (braces > 0) {
1174                             dtor(t), t = get_token(in);
1175                             if (t.type == tok_lbrace)
1176                                 braces++;
1177                             else if (t.type == tok_rbrace)
1178                                 braces--;
1179                             else if (t.type == tok_eof) {
1180                                 error(err_commenteof, &t.pos);
1181                                 break;
1182                             }
1183                         }
1184                     }
1185                     if (seenwhite) {
1186                         already = TRUE;
1187                         dtor(t), t = get_token(in);
1188                         if (t.type == tok_white) {
1189                             iswhite = TRUE;
1190                             already = FALSE;
1191                         }
1192                     }
1193                     break;
1194                   case c_q:
1195                     dtor(t), t = get_token(in);
1196                     if (t.type != tok_lbrace) {
1197                         error(err_explbr, &t.pos);
1198                     } else {
1199                         wd.text = NULL;
1200                         wd.type = toquotestyle(style);
1201                         wd.alt = NULL;
1202                         wd.aux = quote_Open;
1203                         wd.fpos = t.pos;
1204                         wd.breaks = FALSE;
1205                         if (!indexing || index_visible)
1206                             addword(wd, &whptr);
1207                         if (indexing) {
1208                             rdadd(&indexstr, L'"');
1209                             addword(wd, &idximplicit);
1210                         }
1211                         sitem = mknew(struct stack_item);
1212                         sitem->fpos = t.pos;
1213                         sitem->type = stack_quote;
1214                         stk_push(parsestk, sitem);
1215                     }
1216                     break;
1217                   case c_K:
1218                   case c_k:
1219                   case c_W:
1220                   case c_date:
1221                     /*
1222                      * Keyword, hyperlink, or \date. We expect a
1223                      * left brace, some text, and then a right
1224                      * brace. No nesting; no arguments.
1225                      */
1226                     wd.fpos = t.pos;
1227                     wd.breaks = FALSE;
1228                     if (t.cmd == c_K)
1229                         wd.type = word_UpperXref;
1230                     else if (t.cmd == c_k)
1231                         wd.type = word_LowerXref;
1232                     else if (t.cmd == c_W)
1233                         wd.type = word_HyperLink;
1234                     else
1235                         wd.type = word_Normal;
1236                     dtor(t), t = get_token(in);
1237                     if (t.type != tok_lbrace) {
1238                         if (wd.type == word_Normal) {
1239                             time_t thetime = time(NULL);
1240                             struct tm *broken = localtime(&thetime);
1241                             already = TRUE;
1242                             wdtext = ustrftime(NULL, broken);
1243                             wd.type = style;
1244                         } else {
1245                             error(err_explbr, &t.pos);
1246                             wdtext = NULL;
1247                         }
1248                     } else {
1249                         rdstring rs = { 0, 0, NULL };
1250                         while (dtor(t), t = get_token(in),
1251                                t.type == tok_word || t.type == tok_white) {
1252                             if (t.type == tok_white)
1253                                 rdadd(&rs, ' ');
1254                             else
1255                                 rdadds(&rs, t.text);
1256                         }
1257                         if (wd.type == word_Normal) {
1258                             time_t thetime = time(NULL);
1259                             struct tm *broken = localtime(&thetime);
1260                             wdtext = ustrftime(rs.text, broken);
1261                             wd.type = style;
1262                         } else {
1263                             wdtext = ustrdup(rs.text);
1264                         }
1265                         sfree(rs.text);
1266                         if (t.type != tok_rbrace) {
1267                             error(err_kwexprbr, &t.pos);
1268                         }
1269                     }
1270                     wd.alt = NULL;
1271                     wd.aux = 0;
1272                     if (!indexing || index_visible) {
1273                         wd.text = ustrdup(wdtext);
1274                         addword(wd, &whptr);
1275                     }
1276                     if (indexing) {
1277                         wd.text = ustrdup(wdtext);
1278                         addword(wd, &idximplicit);
1279                     }
1280                     sfree(wdtext);
1281                     if (wd.type == word_HyperLink) {
1282                         /*
1283                          * Hyperlinks are different: they then
1284                          * expect another left brace, to begin
1285                          * delimiting the text marked by the link.
1286                          */
1287                         dtor(t), t = get_token(in);
1288                         sitem = mknew(struct stack_item);
1289                         sitem->fpos = wd.fpos;
1290                         sitem->type = stack_hyper;
1291                         /*
1292                          * Special cases: \W{}\i, \W{}\ii
1293                          */
1294                         if (t.type == tok_cmd &&
1295                             (t.cmd == c_i || t.cmd == c_ii)) {
1296                             if (indexing) {
1297                                 error(err_nestedindex, &t.pos);
1298                             } else {
1299                                 /* Add an index-reference word with no
1300                                  * text as yet */
1301                                 wd.type = word_IndexRef;
1302                                 wd.text = NULL;
1303                                 wd.alt = NULL;
1304                                 wd.aux = 0;
1305                                 wd.breaks = FALSE;
1306                                 indexword = addword(wd, &whptr);
1307                                 /* Set up a rdstring to read the
1308                                  * index text */
1309                                 indexstr = nullrs;
1310                                 /* Flags so that we do the Right
1311                                  * Things with text */
1312                                 index_visible = (type != c_I);
1313                                 index_downcase = (type == c_ii);
1314                                 indexing = TRUE;
1315                                 idxwordlist = NULL;
1316                                 idximplicit = &idxwordlist;
1317
1318                                 sitem->type |= stack_idx;
1319                             }
1320                             dtor(t), t = get_token(in);
1321                         }
1322                         /*
1323                          * Special cases: \W{}\c, \W{}\e, \W{}\cw
1324                          */
1325                         if (t.type == tok_cmd &&
1326                             (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1327                             if (style != word_Normal)
1328                                 error(err_nestedstyles, &t.pos);
1329                             else {
1330                                 style = (t.cmd == c_c ? word_Code :
1331                                          t.cmd == c_cw ? word_WeakCode :
1332                                          word_Emph);
1333                                 spcstyle = tospacestyle(style);
1334                                 sitem->type |= stack_style;
1335                             }
1336                             dtor(t), t = get_token(in);
1337                         }
1338                         if (t.type != tok_lbrace) {
1339                             error(err_explbr, &t.pos);
1340                             sfree(sitem);
1341                         } else {
1342                             stk_push(parsestk, sitem);
1343                         }
1344                     }
1345                     break;
1346                   case c_c:
1347                   case c_cw:
1348                   case c_e:
1349                     type = t.cmd;
1350                     if (style != word_Normal) {
1351                         error(err_nestedstyles, &t.pos);
1352                         /* Error recovery: eat lbrace, push nop. */
1353                         dtor(t), t = get_token(in);
1354                         sitem = mknew(struct stack_item);
1355                         sitem->fpos = t.pos;
1356                         sitem->type = stack_nop;
1357                         stk_push(parsestk, sitem);
1358                     }
1359                     dtor(t), t = get_token(in);
1360                     if (t.type != tok_lbrace) {
1361                         error(err_explbr, &t.pos);
1362                     } else {
1363                         style = (type == c_c ? word_Code :
1364                                  type == c_cw ? word_WeakCode :
1365                                  word_Emph);
1366                         spcstyle = tospacestyle(style);
1367                         sitem = mknew(struct stack_item);
1368                         sitem->fpos = t.pos;
1369                         sitem->type = stack_style;
1370                         stk_push(parsestk, sitem);
1371                     }
1372                     break;
1373                   case c_i:
1374                   case c_ii:
1375                   case c_I:
1376                     type = t.cmd;
1377                     if (indexing) {
1378                         error(err_nestedindex, &t.pos);
1379                         /* Error recovery: eat lbrace, push nop. */
1380                         dtor(t), t = get_token(in);
1381                         sitem = mknew(struct stack_item);
1382                         sitem->fpos = t.pos;
1383                         sitem->type = stack_nop;
1384                         stk_push(parsestk, sitem);
1385                     }
1386                     sitem = mknew(struct stack_item);
1387                     sitem->fpos = t.pos;
1388                     sitem->type = stack_idx;
1389                     dtor(t), t = get_token(in);
1390                     /*
1391                      * Special cases: \i\c, \i\e, \i\cw
1392                      */
1393                     wd.fpos = t.pos;
1394                     if (t.type == tok_cmd &&
1395                         (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1396                         if (style != word_Normal)
1397                             error(err_nestedstyles, &t.pos);
1398                         else {
1399                             style = (t.cmd == c_c ? word_Code :
1400                                      t.cmd == c_cw ? word_WeakCode :
1401                                      word_Emph);
1402                             spcstyle = tospacestyle(style);
1403                             sitem->type |= stack_style;
1404                         }
1405                         dtor(t), t = get_token(in);
1406                     }
1407                     if (t.type != tok_lbrace) {
1408                         sfree(sitem);
1409                         error(err_explbr, &t.pos);
1410                     } else {
1411                         /* Add an index-reference word with no text as yet */
1412                         wd.type = word_IndexRef;
1413                         wd.text = NULL;
1414                         wd.alt = NULL;
1415                         wd.aux = 0;
1416                         wd.breaks = FALSE;
1417                         indexword = addword(wd, &whptr);
1418                         /* Set up a rdstring to read the index text */
1419                         indexstr = nullrs;
1420                         /* Flags so that we do the Right Things with text */
1421                         index_visible = (type != c_I);
1422                         index_downcase = (type == c_ii);
1423                         indexing = TRUE;
1424                         idxwordlist = NULL;
1425                         idximplicit = &idxwordlist;
1426                         /* Stack item to close the indexing on exit */
1427                         stk_push(parsestk, sitem);
1428                     }
1429                     break;
1430                   case c_u:
1431                     uchr = t.aux;
1432                     utext[0] = uchr; utext[1] = 0;
1433                     wd.type = style;
1434                     wd.breaks = FALSE;
1435                     wd.alt = NULL;
1436                     wd.aux = 0;
1437                     wd.fpos = t.pos;
1438                     if (!indexing || index_visible) {
1439                         wd.text = ustrdup(utext);
1440                         uword = addword(wd, &whptr);
1441                     } else
1442                         uword = NULL;
1443                     if (indexing) {
1444                         wd.text = ustrdup(utext);
1445                         iword = addword(wd, &idximplicit);
1446                     } else
1447                         iword = NULL;
1448                     dtor(t), t = get_token(in);
1449                     if (t.type == tok_lbrace) {
1450                         /*
1451                          * \u with a left brace. Until the brace
1452                          * closes, all further words go on a
1453                          * sidetrack from the main thread of the
1454                          * paragraph.
1455                          */
1456                         sitem = mknew(struct stack_item);
1457                         sitem->fpos = t.pos;
1458                         sitem->type = stack_ualt;
1459                         sitem->whptr = whptr;
1460                         sitem->idximplicit = idximplicit;
1461                         stk_push(parsestk, sitem);
1462                         whptr = uword ? &uword->alt : NULL;
1463                         idximplicit = iword ? &iword->alt : NULL;
1464                     } else {
1465                         if (indexing)
1466                             rdadd(&indexstr, uchr);
1467                         already = TRUE;
1468                     }
1469                     break;
1470                   default:
1471                     if (!macrolookup(macros, in, t.text, &t.pos))
1472                         error(err_badmidcmd, t.text, &t.pos);
1473                     break;
1474                 }
1475             }
1476             if (!already)
1477                 dtor(t), t = get_token(in);
1478             seenwhite = iswhite;
1479         }
1480         finished_para:
1481         /* Check the stack is empty */
1482         if (stk_top(parsestk)) {
1483             while ((sitem = stk_pop(parsestk)))
1484                 sfree(sitem);
1485             error(err_missingrbrace, &t.pos);
1486         }
1487         stk_free(parsestk);
1488         prev_para_type = par.type;
1489         addpara(par, ret);
1490         if (t.type == tok_eof)
1491             already = TRUE;
1492     }
1493
1494     if (stk_top(crossparastk)) {
1495         void *p;
1496
1497         error(err_missingrbrace2, &t.pos);
1498         while ((p = stk_pop(crossparastk)))
1499             sfree(p);
1500     }
1501
1502     /*
1503      * We break to here rather than returning, because otherwise
1504      * this cleanup doesn't happen.
1505      */
1506     dtor(t);
1507     macrocleanup(macros);
1508
1509     stk_free(crossparastk);
1510 }
1511
1512 paragraph *read_input(input *in, indexdata *idx) {
1513     paragraph *head = NULL;
1514     paragraph **hptr = &head;
1515
1516     while (in->currindex < in->nfiles) {
1517         in->currfp = fopen(in->filenames[in->currindex], "r");
1518         if (in->currfp) {
1519             setpos(in, in->filenames[in->currindex]);
1520             in->charset = in->defcharset;
1521             in->csstate = charset_init_state;
1522             in->wcpos = in->nwc = 0;
1523             in->pushback_chars = NULL;
1524             read_file(&hptr, in, idx);
1525         }
1526         in->currindex++;
1527     }
1528
1529     return head;
1530 }