mdw@git.distorted.org.uk Git - sgt/halibut/blob - input.c

   1 /*
   2  * input.c: read the source form
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <time.h>
   8 #include "halibut.h"
   9
  10 #define TAB_STOP 8                     /* for column number tracking */
  11
  12 static void setpos(input *in, char *fname) {
  13     in->pos.filename = fname;
  14     in->pos.line = 1;
  15     in->pos.col = (in->reportcols ? 1 : -1);
  16 }
  17
  18 static void unget(input *in, int c, filepos *pos) {
  19     if (in->npushback >= in->pushbacksize) {
  20         in->pushbacksize = in->npushback + 16;
  21         in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
  22     }
  23     in->pushback[in->npushback].chr = c;
  24     in->pushback[in->npushback].pos = *pos;   /* structure copy */
  25     in->npushback++;
  26 }
  27
  28 /* ---------------------------------------------------------------------- */
  29 /*
  30  * Macro subsystem
  31  */
  32 typedef struct macro_Tag macro;
  33 struct macro_Tag {
  34     wchar_t *name, *text;
  35 };
  36 struct macrostack_Tag {
  37     macrostack *next;
  38     wchar_t *text;
  39     int ptr, npushback;
  40     filepos pos;
  41 };
  42 static int macrocmp(void *av, void *bv) {
  43     macro *a = (macro *)av, *b = (macro *)bv;
  44     return ustrcmp(a->name, b->name);
  45 }
  46 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
  47                      filepos fpos) {
  48     macro *m = snew(macro);
  49     m->name = name;
  50     m->text = text;
  51     if (add234(macros, m) != m) {
  52         error(err_macroexists, &fpos, name);
  53         sfree(name);
  54         sfree(text);
  55     }
  56 }
  57 static int macrolookup(tree234 *macros, input *in, wchar_t *name,
  58                        filepos *pos) {
  59     macro m, *gotit;
  60     m.name = name;
  61     gotit = find234(macros, &m, NULL);
  62     if (gotit) {
  63         macrostack *expansion = snew(macrostack);
  64         expansion->next = in->stack;
  65         expansion->text = gotit->text;
  66         expansion->pos = *pos;         /* structure copy */
  67         expansion->ptr = 0;
  68         expansion->npushback = in->npushback;
  69         in->stack = expansion;
  70         return TRUE;
  71     } else
  72         return FALSE;
  73 }
  74 static void macrocleanup(tree234 *macros) {
  75     int ti;
  76     macro *m;
  77     for (ti = 0; (m = (macro *)index234(macros, ti)) != NULL; ti++) {
  78         sfree(m->name);
  79         sfree(m->text);
  80         sfree(m);
  81     }
  82     freetree234(macros);
  83 }
  84
  85 static void input_configure(input *in, paragraph *cfg) {
  86     assert(cfg->type == para_Config);
  87
  88     if (!ustricmp(cfg->keyword, L"input-charset")) {
  89         in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
  90     }
  91 }
  92
  93 /*
  94  * Can return EOF
  95  */
  96 static int get(input *in, filepos *pos, rdstringc *rsc) {
  97     int pushbackpt = in->stack ? in->stack->npushback : 0;
  98     if (in->npushback > pushbackpt) {
  99         --in->npushback;
 100         if (pos)
 101             *pos = in->pushback[in->npushback].pos;   /* structure copy */
 102         return in->pushback[in->npushback].chr;
 103     }
 104     else if (in->stack) {
 105         wchar_t c = in->stack->text[in->stack->ptr];
 106         if (in->stack->text[++in->stack->ptr] == L'\0') {
 107             macrostack *tmp = in->stack;
 108             in->stack = tmp->next;
 109             sfree(tmp);
 110         }
 111         return c;
 112     }
 113     else if (in->currfp) {
 114
 115         while (in->wcpos >= in->nwc) {
 116
 117             int c = getc(in->currfp);
 118
 119             if (c == EOF) {
 120                 fclose(in->currfp);
 121                 in->currfp = NULL;
 122                 return EOF;
 123             }
 124
 125             if (rsc)
 126                 rdaddc(rsc, c);
 127
 128             /* Track line numbers, for error reporting */
 129             if (pos)
 130                 *pos = in->pos;
 131             if (in->reportcols) {
 132                 switch (c) {
 133                   case '\t':
 134                     in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
 135                     break;
 136                   case '\n':
 137                     in->pos.col = 1;
 138                     in->pos.line++;
 139                     break;
 140                   default:
 141                     in->pos.col++;
 142                     break;
 143                 }
 144             } else {
 145                 in->pos.col = -1;
 146                 if (c == '\n')
 147                     in->pos.line++;
 148             }
 149
 150             /*
 151              * Do input character set translation, so that we return
 152              * Unicode.
 153              */
 154             {
 155                 char buf[1];
 156                 char const *p;
 157                 int inlen;
 158
 159                 buf[0] = (char)c;
 160                 p = buf;
 161                 inlen = 1;
 162
 163                 in->nwc = charset_to_unicode(&p, &inlen,
 164                                              in->wc, lenof(in->wc),
 165                                              in->charset, &in->csstate,
 166                                              NULL, 0);
 167                 assert(p == buf+1 && inlen == 0);
 168
 169                 in->wcpos = 0;
 170             }
 171         }
 172
 173         return in->wc[in->wcpos++];
 174
 175     } else
 176         return EOF;
 177 }
 178
 179 /*
 180  * Lexical analysis of source files.
 181  */
 182 typedef struct token_Tag token;
 183 struct token_Tag {
 184     int type;
 185     int cmd, aux;
 186     wchar_t *text;
 187     char *origtext;
 188     filepos pos;
 189 };
 190 enum {
 191     tok_eof,                           /* end of file */
 192     tok_eop,                           /* end of paragraph */
 193     tok_white,                         /* whitespace */
 194     tok_word,                          /* a word or word fragment */
 195     tok_cmd,                           /* \command */
 196     tok_lbrace,                        /* { */
 197     tok_rbrace                         /* } */
 198 };
 199
 200 /* Halibut command keywords. */
 201 enum {
 202     c__invalid,                        /* invalid command */
 203     c__comment,                        /* comment command (\#) */
 204     c__escaped,                        /* escaped character */
 205     c__nop,                            /* no-op */
 206     c__nbsp,                           /* nonbreaking space */
 207     c_A,                               /* appendix heading */
 208     c_B,                               /* bibliography entry */
 209     c_BR,                              /* bibliography rewrite */
 210     c_C,                               /* chapter heading */
 211     c_H,                               /* heading */
 212     c_I,                               /* invisible index mark */
 213     c_IM,                              /* index merge/rewrite */
 214     c_K,                               /* capitalised cross-reference */
 215     c_S,                               /* aux field is 0, 1, 2, ... */
 216     c_U,                               /* unnumbered-chapter heading */
 217     c_W,                               /* Web hyperlink */
 218     c_b,                               /* bulletted list */
 219     c_c,                               /* code */
 220     c_cfg,                             /* configuration directive */
 221     c_copyright,                       /* copyright statement */
 222     c_cw,                              /* weak code */
 223     c_date,                            /* document processing date */
 224     c_dd,                              /* description list: description */
 225     c_define,                          /* macro definition */
 226     c_dt,                              /* description list: described thing */
 227     c_e,                               /* emphasis */
 228     c_i,                               /* visible index mark */
 229     c_ii,                              /* uncapitalised visible index mark */
 230     c_k,                               /* uncapitalised cross-reference */
 231     c_lcont,                           /* continuation para(s) for list item */
 232     c_n,                               /* numbered list */
 233     c_nocite,                          /* bibliography trickery */
 234     c_preamble,                        /* (obsolete) preamble text */
 235     c_q,                               /* quote marks */
 236     c_quote,                           /* block-quoted paragraphs */
 237     c_rule,                            /* horizontal rule */
 238     c_title,                           /* document title */
 239     c_u,                               /* aux field is char code */
 240     c_versionid                        /* document RCS id */
 241 };
 242
 243 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
 244 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
 245 #define isnl(c) ( (c)==10 )
 246 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
 247 #define fromdec(c) ( (c)-'0' )
 248 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
 249 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
 250 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
 251
 252 /*
 253  * Keyword comparison function. Like strcmp, but between a wchar_t *
 254  * and a char *.
 255  */
 256 static int kwcmp(wchar_t const *p, char const *q) {
 257     int i;
 258     do {
 259         i = *p - *q;
 260     } while (*p++ && *q++ && !i);
 261     return i;
 262 }
 263
 264 /*
 265  * Match a keyword.
 266  */
 267 static void match_kw(token *tok) {
 268     /*
 269      * FIXME. The ids are explicit in here so as to allow long-name
 270      * equivalents to the various very short keywords.
 271      */
 272     static const struct { char const *name; int id; } keywords[] = {
 273         {"#", c__comment},             /* comment command (\#) */
 274         {"-", c__escaped},             /* nonbreaking hyphen */
 275         {".", c__nop},                 /* no-op */
 276         {"A", c_A},                    /* appendix heading */
 277         {"B", c_B},                    /* bibliography entry */
 278         {"BR", c_BR},                  /* bibliography rewrite */
 279         {"C", c_C},                    /* chapter heading */
 280         {"H", c_H},                    /* heading */
 281         {"I", c_I},                    /* invisible index mark */
 282         {"IM", c_IM},                  /* index merge/rewrite */
 283         {"K", c_K},                    /* capitalised cross-reference */
 284         {"U", c_U},                    /* unnumbered-chapter heading */
 285         {"W", c_W},                    /* Web hyperlink */
 286         {"\\", c__escaped},            /* escaped backslash (\\) */
 287         {"_", c__nbsp},                /* nonbreaking space (\_) */
 288         {"b", c_b},                    /* bulletted list */
 289         {"c", c_c},                    /* code */
 290         {"cfg", c_cfg},                /* configuration directive */
 291         {"copyright", c_copyright},    /* copyright statement */
 292         {"cw", c_cw},                  /* weak code */
 293         {"date", c_date},              /* document processing date */
 294         {"dd", c_dd},                  /* description list: description */
 295         {"define", c_define},          /* macro definition */
 296         {"dt", c_dt},                  /* description list: described thing */
 297         {"e", c_e},                    /* emphasis */
 298         {"i", c_i},                    /* visible index mark */
 299         {"ii", c_ii},                  /* uncapitalised visible index mark */
 300         {"k", c_k},                    /* uncapitalised cross-reference */
 301         {"lcont", c_lcont},            /* continuation para(s) for list item */
 302         {"n", c_n},                    /* numbered list */
 303         {"nocite", c_nocite},          /* bibliography trickery */
 304         {"preamble", c_preamble},      /* (obsolete) preamble text */
 305         {"q", c_q},                    /* quote marks */
 306         {"quote", c_quote},            /* block-quoted paragraphs */
 307         {"rule", c_rule},              /* horizontal rule */
 308         {"title", c_title},            /* document title */
 309         {"versionid", c_versionid},    /* document RCS id */
 310         {"{", c__escaped},             /* escaped lbrace (\{) */
 311         {"}", c__escaped},             /* escaped rbrace (\}) */
 312     };
 313     int i, j, k, c;
 314
 315     /*
 316      * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
 317      * doesn't match correctly, we just fall through to the
 318      * binary-search phase.
 319      */
 320     if (tok->text[0] == 'S') {
 321         /* We expect numeric characters thereafter. */
 322         wchar_t *p = tok->text+1;
 323         int n;
 324         if (!*p)
 325             n = 1;
 326         else {
 327             n = 0;
 328             while (*p && isdec(*p)) {
 329                 n = 10 * n + fromdec(*p);
 330                 p++;
 331             }
 332         }
 333         if (!*p) {
 334             tok->cmd = c_S;
 335             tok->aux = n;
 336             return;
 337         }
 338     } else if (tok->text[0] == 'u') {
 339         /* We expect hex characters thereafter. */
 340         wchar_t *p = tok->text+1;
 341         int n = 0;
 342         while (*p && ishex(*p)) {
 343             n = 16 * n + fromhex(*p);
 344             p++;
 345         }
 346         if (!*p) {
 347             tok->cmd = c_u;
 348             tok->aux = n;
 349             return;
 350         }
 351     }
 352
 353     i = -1;
 354     j = sizeof(keywords)/sizeof(*keywords);
 355     while (j-i > 1) {
 356         k = (i+j)/2;
 357         c = kwcmp(tok->text, keywords[k].name);
 358         if (c < 0)
 359             j = k;
 360         else if (c > 0)
 361             i = k;
 362         else /* c == 0 */ {
 363             tok->cmd = keywords[k].id;
 364             return;
 365         }
 366     }
 367
 368     tok->cmd = c__invalid;
 369 }
 370
 371
 372 /*
 373  * Read a token from the input file, in the normal way (`normal' in
 374  * the sense that code paragraphs work a different way).
 375  */
 376 token get_token(input *in) {
 377     int c;
 378     int nls;
 379     int prevpos;
 380     token ret;
 381     rdstring rs = { 0, 0, NULL };
 382     rdstringc rsc = { 0, 0, NULL };
 383     filepos cpos;
 384
 385     ret.text = NULL;                   /* default */
 386     ret.origtext = NULL;               /* default */
 387     if (in->pushback_chars) {
 388         rdaddsc(&rsc, in->pushback_chars);
 389         sfree(in->pushback_chars);
 390         in->pushback_chars = NULL;
 391     }
 392     c = get(in, &cpos, &rsc);
 393     ret.pos = cpos;
 394     if (iswhite(c)) {                  /* tok_white or tok_eop */
 395         nls = 0;
 396         prevpos = 0;
 397         do {
 398             if (isnl(c))
 399                 nls++;
 400             prevpos = rsc.pos;
 401         } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
 402         if (c == EOF) {
 403             ret.type = tok_eof;
 404             sfree(rsc.text);
 405             return ret;
 406         }
 407         if (rsc.text) {
 408             in->pushback_chars = dupstr(rsc.text + prevpos);
 409             sfree(rsc.text);
 410         }
 411         unget(in, c, &cpos);
 412         ret.type = (nls > 1 ? tok_eop : tok_white);
 413         return ret;
 414     } else if (c == EOF) {             /* tok_eof */
 415         ret.type = tok_eof;
 416         sfree(rsc.text);
 417         return ret;
 418     } else if (c == '\\') {            /* tok_cmd */
 419         rsc.pos = prevpos = 0;
 420         c = get(in, &cpos, &rsc);
 421         if (c == '-' || c == '\\' || c == '_' ||
 422             c == '#' || c == '{' || c == '}' || c == '.') {
 423             /* single-char command */
 424             rdadd(&rs, c);
 425         } else if (c == 'u') {
 426             int len = 0;
 427             do {
 428                 rdadd(&rs, c);
 429                 len++;
 430                 prevpos = rsc.pos;
 431                 c = get(in, &cpos, &rsc);
 432             } while (ishex(c) && len < 5);
 433             unget(in, c, &cpos);
 434         } else if (iscmd(c)) {
 435             do {
 436                 rdadd(&rs, c);
 437                 prevpos = rsc.pos;
 438                 c = get(in, &cpos, &rsc);
 439             } while (iscmd(c));
 440             unget(in, c, &cpos);
 441         }
 442         /*
 443          * Now match the command against the list of available
 444          * ones.
 445          */
 446         ret.type = tok_cmd;
 447         ret.text = ustrdup(rs.text);
 448         if (rsc.text) {
 449             in->pushback_chars = dupstr(rsc.text + prevpos);
 450             rsc.text[prevpos] = '\0';
 451             ret.origtext = dupstr(rsc.text);
 452         } else {
 453             ret.origtext = dupstr("");
 454         }
 455         match_kw(&ret);
 456         sfree(rs.text);
 457         sfree(rsc.text);
 458         return ret;
 459     } else if (c == '{') {             /* tok_lbrace */
 460         ret.type = tok_lbrace;
 461         sfree(rsc.text);
 462         return ret;
 463     } else if (c == '}') {             /* tok_rbrace */
 464         ret.type = tok_rbrace;
 465         sfree(rsc.text);
 466         return ret;
 467     } else {                           /* tok_word */
 468         /*
 469          * Read a word: the longest possible contiguous sequence of
 470          * things other than whitespace, backslash, braces and
 471          * hyphen. A hyphen terminates the word but is returned as
 472          * part of it; everything else is pushed back for the next
 473          * token. The `aux' field contains TRUE if the word ends in
 474          * a hyphen.
 475          */
 476         ret.aux = FALSE;               /* assumed for now */
 477         prevpos = 0;
 478         while (1) {
 479             if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
 480                 /* Put back the character that caused termination */
 481                 unget(in, c, &cpos);
 482                 break;
 483             } else {
 484                 rdadd(&rs, c);
 485                 if (c == '-') {
 486                     prevpos = rsc.pos;
 487                     ret.aux = TRUE;
 488                     break;             /* hyphen terminates word */
 489                 }
 490             }
 491             prevpos = rsc.pos;
 492             c = get(in, &cpos, &rsc);
 493         }
 494         ret.type = tok_word;
 495         ret.text = ustrdup(rs.text);
 496         if (rsc.text) {
 497             in->pushback_chars = dupstr(rsc.text + prevpos);
 498             rsc.text[prevpos] = '\0';
 499             ret.origtext = dupstr(rsc.text);
 500         } else {
 501             ret.origtext = dupstr("");
 502         }
 503         sfree(rs.text);
 504         sfree(rsc.text);
 505         return ret;
 506     }
 507 }
 508
 509 /*
 510  * Determine whether the next input character is an open brace (for
 511  * telling code paragraphs from paragraphs which merely start with
 512  * code).
 513  */
 514 int isbrace(input *in) {
 515     int c;
 516     filepos cpos;
 517
 518     c = get(in, &cpos, NULL);
 519     unget(in, c, &cpos);
 520     return (c == '{');
 521 }
 522
 523 /*
 524  * Read the rest of a line that starts `\c'. Including nothing at
 525  * all (tok_word with empty text).
 526  */
 527 token get_codepar_token(input *in) {
 528     int c;
 529     token ret;
 530     rdstring rs = { 0, 0, NULL };
 531     filepos cpos;
 532
 533     ret.type = tok_word;
 534     ret.origtext = NULL;
 535     c = get(in, &cpos, NULL);          /* expect (and discard) one space */
 536     ret.pos = cpos;
 537     if (c == ' ') {
 538         c = get(in, &cpos, NULL);
 539         ret.pos = cpos;
 540     }
 541     while (!isnl(c) && c != EOF) {
 542         int c2 = c;
 543         c = get(in, &cpos, NULL);
 544         /* Discard \r just before \n. */
 545         if (c2 != 13 || !isnl(c))
 546             rdadd(&rs, c2);
 547     }
 548     unget(in, c, &cpos);
 549     ret.text = ustrdup(rs.text);
 550     sfree(rs.text);
 551     return ret;
 552 }
 553
 554 /*
 555  * Adds a new word to a linked list
 556  */
 557 static word *addword(word newword, word ***hptrptr) {
 558     word *mnewword;
 559     if (!hptrptr)
 560         return NULL;
 561     mnewword = snew(word);
 562     *mnewword = newword;               /* structure copy */
 563     mnewword->next = NULL;
 564     **hptrptr = mnewword;
 565     *hptrptr = &mnewword->next;
 566     return mnewword;
 567 }
 568
 569 /*
 570  * Adds a new paragraph to a linked list
 571  */
 572 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 573     paragraph *mnewpara = snew(paragraph);
 574     *mnewpara = newpara;               /* structure copy */
 575     mnewpara->next = NULL;
 576     **hptrptr = mnewpara;
 577     *hptrptr = &mnewpara->next;
 578     return mnewpara;
 579 }
 580
 581 /*
 582  * Destructor before token is reassigned; should catch most memory
 583  * leaks
 584  */
 585 #define dtor(t) ( sfree(t.text), sfree(t.origtext) )
 586
 587 /*
 588  * Reads a single file (ie until get() returns EOF)
 589  */
 590 static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 591     token t;
 592     paragraph par;
 593     word wd, **whptr, **idximplicit;
 594     tree234 *macros;
 595     wchar_t utext[2], *wdtext;
 596     int style, spcstyle;
 597     int already;
 598     int iswhite, seenwhite;
 599     int type;
 600     int prev_para_type;
 601     struct stack_item {
 602         enum {
 603             stack_nop = 0,             /* do nothing (for error recovery) */
 604             stack_ualt = 1,            /* \u alternative */
 605             stack_style = 2,           /* \e, \c, \cw */
 606             stack_idx = 4,             /* \I, \i, \ii */
 607             stack_hyper = 8,           /* \W */
 608             stack_quote = 16,          /* \q */
 609         } type;
 610         word **whptr;                  /* to restore from \u alternatives */
 611         word **idximplicit;            /* to restore from \u alternatives */
 612         filepos fpos;
 613         int in_code;
 614     } *sitem;
 615     stack parsestk;
 616     struct crossparaitem {
 617         int type;                      /* currently c_lcont, c_quote or -1 */
 618         int seen_lcont, seen_quote;
 619     };
 620     stack crossparastk;
 621     word *indexword, *uword, *iword;
 622     word *idxwordlist;
 623     rdstring indexstr;
 624     int index_downcase, index_visible, indexing;
 625     const rdstring nullrs = { 0, 0, NULL };
 626     wchar_t uchr;
 627
 628     t.text = NULL;
 629     t.origtext = NULL;
 630     macros = newtree234(macrocmp);
 631     already = FALSE;
 632
 633     crossparastk = stk_new();
 634
 635     /*
 636      * Loop on each paragraph.
 637      */
 638     while (1) {
 639         int start_cmd = c__invalid;
 640         par.words = NULL;
 641         par.keyword = NULL;
 642         par.origkeyword = NULL;
 643         whptr = &par.words;
 644
 645         /*
 646          * Get a token.
 647          */
 648         do {
 649             if (!already) {
 650                 dtor(t), t = get_token(in);
 651             }
 652             already = FALSE;
 653         } while (t.type == tok_eop);
 654         if (t.type == tok_eof)
 655             break;
 656
 657         /*
 658          * Parse code paragraphs separately.
 659          */
 660         if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 661             int wtype = word_WeakCode;
 662
 663             par.type = para_Code;
 664             par.fpos = t.pos;
 665             while (1) {
 666                 dtor(t), t = get_codepar_token(in);
 667                 wd.type = wtype;
 668                 wd.breaks = FALSE;     /* shouldn't need this... */
 669                 wd.text = ustrdup(t.text);
 670                 wd.alt = NULL;
 671                 wd.fpos = t.pos;
 672                 addword(wd, &whptr);
 673                 dtor(t), t = get_token(in);
 674                 if (t.type == tok_white) {
 675                     /*
 676                      * The newline after a code-paragraph line
 677                      */
 678                     dtor(t), t = get_token(in);
 679                 }
 680                 if (t.type == tok_eop || t.type == tok_eof ||
 681                     t.type == tok_rbrace) { /* might be } terminating \lcont */
 682                     if (t.type == tok_rbrace)
 683                         already = TRUE;
 684                     break;
 685                 } else if (t.type == tok_cmd && t.cmd == c_c) {
 686                     wtype = word_WeakCode;
 687                 } else if (t.type == tok_cmd && t.cmd == c_e &&
 688                            wtype == word_WeakCode) {
 689                     wtype = word_Emph;
 690                 } else {
 691                     error(err_brokencodepara, &t.pos);
 692                     prev_para_type = par.type;
 693                     addpara(par, ret);
 694                     while (t.type != tok_eop)   /* error recovery: */
 695                         dtor(t), t = get_token(in);   /* eat rest of paragraph */
 696                     goto codeparabroken;   /* ick, but such is life */
 697                 }
 698             }
 699             prev_para_type = par.type;
 700             addpara(par, ret);
 701             codeparabroken:
 702             continue;
 703         }
 704
 705         /*
 706          * Spot the special commands that define a grouping of more
 707          * than one paragraph, and also the closing braces that
 708          * finish them.
 709          */
 710         if (t.type == tok_cmd &&
 711             (t.cmd == c_lcont || t.cmd == c_quote)) {
 712             struct crossparaitem *sitem, *stop;
 713             int cmd = t.cmd;
 714
 715             /*
 716              * Expect, and swallow, an open brace.
 717              */
 718             dtor(t), t = get_token(in);
 719             if (t.type != tok_lbrace) {
 720                 error(err_explbr, &t.pos);
 721                 continue;
 722             }
 723
 724             /*
 725              * Also expect, and swallow, any whitespace after that
 726              * (a newline before a code paragraph wouldn't be
 727              * surprising).
 728              */
 729             do {
 730                 dtor(t), t = get_token(in);
 731             } while (t.type == tok_white);
 732             already = TRUE;
 733
 734             if (cmd == c_lcont) {
 735                 /*
 736                  * \lcont causes a continuation of a list item into
 737                  * multiple paragraphs (which may in turn contain
 738                  * nested lists, code paras etc). Hence, the previous
 739                  * paragraph must be of a list type.
 740                  */
 741                 sitem = snew(struct crossparaitem);
 742                 stop = (struct crossparaitem *)stk_top(crossparastk);
 743                 if (stop)
 744                     *sitem = *stop;
 745                 else
 746                     sitem->seen_quote = sitem->seen_lcont = 0;
 747
 748                 if (prev_para_type == para_Bullet ||
 749                     prev_para_type == para_NumberedList ||
 750                     prev_para_type == para_Description) {
 751                     sitem->type = c_lcont;
 752                     sitem->seen_lcont = 1;
 753                     par.type = para_LcontPush;
 754                     prev_para_type = par.type;
 755                     addpara(par, ret);
 756                 } else {
 757                     /*
 758                      * Push a null item on the cross-para stack so that
 759                      * when we see the corresponding closing brace we
 760                      * don't give a cascade error.
 761                      */
 762                     sitem->type = -1;
 763                     error(err_misplacedlcont, &t.pos);
 764                 }
 765             } else {
 766                 /*
 767                  * \quote causes a group of paragraphs to be
 768                  * block-quoted (typically they will be indented a
 769                  * bit).
 770                  */
 771                 sitem = snew(struct crossparaitem);
 772                 stop = (struct crossparaitem *)stk_top(crossparastk);
 773                 if (stop)
 774                     *sitem = *stop;
 775                 else
 776                     sitem->seen_quote = sitem->seen_lcont = 0;
 777                 sitem->type = c_quote;
 778                 sitem->seen_quote = 1;
 779                 par.type = para_QuotePush;
 780                 prev_para_type = par.type;
 781                 addpara(par, ret);
 782             }
 783             stk_push(crossparastk, sitem);
 784             continue;
 785         } else if (t.type == tok_rbrace) {
 786             struct crossparaitem *sitem = stk_pop(crossparastk);
 787             if (!sitem)
 788                 error(err_unexbrace, &t.pos);
 789             else {
 790                 switch (sitem->type) {
 791                   case c_lcont:
 792                     par.type = para_LcontPop;
 793                     prev_para_type = par.type;
 794                     addpara(par, ret);
 795                     break;
 796                   case c_quote:
 797                     par.type = para_QuotePop;
 798                     prev_para_type = par.type;
 799                     addpara(par, ret);
 800                     break;
 801                 }
 802                 sfree(sitem);
 803             }
 804             continue;
 805         }
 806
 807         /*
 808          * This token begins a paragraph. See if it's one of the
 809          * special commands that define a paragraph type.
 810          *
 811          * (note that \# is special in a way, and \nocite takes no
 812          * text)
 813          */
 814         par.type = para_Normal;
 815         if (t.type == tok_cmd) {
 816             int needkw;
 817             int is_macro = FALSE;
 818
 819             par.fpos = t.pos;
 820             switch (t.cmd) {
 821               default:
 822                 needkw = -1;
 823                 break;
 824               case c__invalid:
 825                 error(err_badparatype, t.text, &t.pos);
 826                 needkw = 4;
 827                 break;
 828               case c__comment:
 829                 if (isbrace(in))
 830                     break;             /* `\#{': isn't a comment para */
 831                 do {
 832                     dtor(t), t = get_token(in);
 833                 } while (t.type != tok_eop && t.type != tok_eof);
 834                 continue;              /* next paragraph */
 835                 /*
 836                  * `needkw' values:
 837                  *
 838                  *   1 -- exactly one keyword
 839                  *   2 -- at least one keyword
 840                  *   4 -- any number of keywords including zero
 841                  *   8 -- at least one keyword and then nothing else
 842                  *  16 -- nothing at all! no keywords, no body
 843                  *  32 -- no keywords at all
 844                  */
 845               case c_A: needkw = 2; par.type = para_Appendix; break;
 846               case c_B: needkw = 2; par.type = para_Biblio; break;
 847               case c_BR: needkw = 1; par.type = para_BR;
 848                 start_cmd = c_BR; break;
 849               case c_C: needkw = 2; par.type = para_Chapter; break;
 850               case c_H: needkw = 2; par.type = para_Heading;
 851                 par.aux = 0;
 852                 break;
 853               case c_IM: needkw = 2; par.type = para_IM;
 854                 start_cmd = c_IM; break;
 855               case c_S: needkw = 2; par.type = para_Subsect;
 856                 par.aux = t.aux; break;
 857               case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
 858                 /* For \b and \n the keyword is optional */
 859               case c_b: needkw = 4; par.type = para_Bullet; break;
 860               case c_dt: needkw = 4; par.type = para_DescribedThing; break;
 861               case c_dd: needkw = 4; par.type = para_Description; break;
 862               case c_n: needkw = 4; par.type = para_NumberedList; break;
 863               case c_cfg: needkw = 8; par.type = para_Config;
 864                 start_cmd = c_cfg; break;
 865               case c_copyright: needkw = 32; par.type = para_Copyright; break;
 866               case c_define: is_macro = TRUE; needkw = 1; break;
 867                 /* For \nocite the keyword is _everything_ */
 868               case c_nocite: needkw = 8; par.type = para_NoCite; break;
 869               case c_preamble: needkw = 32; par.type = para_Normal; break;
 870               case c_rule: needkw = 16; par.type = para_Rule; break;
 871               case c_title: needkw = 32; par.type = para_Title; break;
 872               case c_versionid: needkw = 32; par.type = para_VersionID; break;
 873             }
 874
 875             if (par.type == para_Chapter ||
 876                 par.type == para_Heading ||
 877                 par.type == para_Subsect ||
 878                 par.type == para_Appendix ||
 879                 par.type == para_UnnumberedChapter) {
 880                 struct crossparaitem *sitem = stk_top(crossparastk);
 881                 if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
 882                     error(err_sectmarkerinblock,
 883                           &t.pos,
 884                           (sitem->seen_lcont ? "lcont" : "quote"));
 885                 }
 886             }
 887
 888             if (needkw > 0) {
 889                 rdstring rs = { 0, 0, NULL };
 890                 rdstringc rsc = { 0, 0, NULL };
 891                 int nkeys = 0;
 892                 filepos fp;
 893
 894                 /* Get keywords. */
 895                 dtor(t), t = get_token(in);
 896                 fp = t.pos;
 897                 while (t.type == tok_lbrace) {
 898                     /* This is a keyword. */
 899                     nkeys++;
 900                     /* FIXME: there will be bugs if anyone specifies an
 901                      * empty keyword (\foo{}), so trap this case. */
 902                     while (dtor(t), t = get_token(in),
 903                            t.type == tok_word ||
 904                            t.type == tok_white ||
 905                            (t.type == tok_cmd && t.cmd == c__nbsp) ||
 906                            (t.type == tok_cmd && t.cmd == c__escaped) ||
 907                            (t.type == tok_cmd && t.cmd == c_u)) {
 908                         if (t.type == tok_white ||
 909                             (t.type == tok_cmd && t.cmd == c__nbsp)) {
 910                             rdadd(&rs, ' ');
 911                             rdaddc(&rsc, ' ');
 912                         } else if (t.type == tok_cmd && t.cmd == c_u) {
 913                             rdadd(&rs, t.aux);
 914                             rdaddc(&rsc, '\\');
 915                             rdaddsc(&rsc, t.origtext);
 916                         } else {
 917                             rdadds(&rs, t.text);
 918                             rdaddsc(&rsc, t.origtext);
 919                         }
 920                     }
 921                     if (t.type != tok_rbrace) {
 922                         error(err_kwunclosed, &t.pos);
 923                         continue;
 924                     }
 925                     rdadd(&rs, 0);     /* add string terminator */
 926                     rdaddc(&rsc, 0);   /* add string terminator */
 927                     dtor(t), t = get_token(in); /* eat right brace */
 928                 }
 929
 930                 rdadd(&rs, 0);         /* add string terminator */
 931                 rdaddc(&rsc, 0);       /* add string terminator */
 932
 933                 /* See whether we have the right number of keywords. */
 934                 if ((needkw & 48) && nkeys > 0)
 935                     error(err_kwillegal, &fp);
 936                 if ((needkw & 11) && nkeys == 0)
 937                     error(err_kwexpected, &fp);
 938                 if ((needkw & 5) && nkeys > 1)
 939                     error(err_kwtoomany, &fp);
 940
 941                 if (is_macro) {
 942                     /*
 943                      * Macro definition. Get the rest of the line
 944                      * as a code-paragraph token, repeatedly until
 945                      * there's nothing more left of it. Separate
 946                      * with newlines.
 947                      */
 948                     rdstring macrotext = { 0, 0, NULL };
 949                     while (1) {
 950                         dtor(t), t = get_codepar_token(in);
 951                         if (macrotext.pos > 0)
 952                             rdadd(&macrotext, L'\n');
 953                         rdadds(&macrotext, t.text);
 954                         dtor(t), t = get_token(in);
 955                         if (t.type == tok_eop) break;
 956                     }
 957                     macrodef(macros, rs.text, macrotext.text, fp);
 958                     continue;          /* next paragraph */
 959                 }
 960
 961                 par.keyword = rdtrim(&rs);
 962                 par.origkeyword = rdtrimc(&rsc);
 963
 964                 /* Move to EOP in case of needkw==8 or 16 (no body) */
 965                 if (needkw & 24) {
 966                     /* We allow whitespace even when we expect no para body */
 967                     while (t.type == tok_white)
 968                         dtor(t), t = get_token(in);
 969                     if (t.type != tok_eop && t.type != tok_eof &&
 970                         (start_cmd == c__invalid ||
 971                          t.type != tok_cmd || t.cmd != start_cmd)) {
 972                         error(err_bodyillegal, &t.pos);
 973                         /* Error recovery: eat the rest of the paragraph */
 974                         while (t.type != tok_eop && t.type != tok_eof &&
 975                                (start_cmd == c__invalid ||
 976                                 t.type != tok_cmd || t.cmd != start_cmd))
 977                             dtor(t), t = get_token(in);
 978                     }
 979                     if (t.type == tok_cmd)
 980                         already = TRUE;/* inhibit get_token at top of loop */
 981                     prev_para_type = par.type;
 982                     addpara(par, ret);
 983
 984                     if (par.type == para_Config) {
 985                         input_configure(in, &par);
 986                     }
 987                     continue;          /* next paragraph */
 988                 }
 989             }
 990         }
 991
 992         /*
 993          * Now read the actual paragraph, word by word, adding to
 994          * the paragraph list.
 995          *
 996          * Mid-paragraph commands:
 997          *
 998          *  \K \k
 999          *  \c \cw
1000          *  \e
1001          *  \i \ii
1002          *  \I
1003          *  \u
1004          *  \W
1005          *  \date
1006          *  \\ \{ \}
1007          */
1008         parsestk = stk_new();
1009         style = word_Normal;
1010         spcstyle = word_WhiteSpace;
1011         indexing = FALSE;
1012         seenwhite = TRUE;
1013         while (t.type != tok_eop && t.type != tok_eof) {
1014             iswhite = FALSE;
1015             already = FALSE;
1016
1017             /* Handle implicit paragraph breaks after \IM, \BR etc */
1018             if (start_cmd != c__invalid &&
1019                 t.type == tok_cmd && t.cmd == start_cmd) {
1020                 already = TRUE;        /* inhibit get_token at top of loop */
1021                 break;
1022             }
1023
1024             if (t.type == tok_cmd && t.cmd == c__nop) {
1025                 dtor(t), t = get_token(in);
1026                 continue;              /* do nothing! */
1027             }
1028
1029             if (t.type == tok_cmd && t.cmd == c__escaped) {
1030                 t.type = tok_word;     /* nice and simple */
1031                 t.aux = 0;             /* even if `\-' - nonbreaking! */
1032             }
1033             if (t.type == tok_cmd && t.cmd == c__nbsp) {
1034                 t.type = tok_word;     /* nice and simple */
1035                 sfree(t.text);
1036                 t.text = ustrdup(L" ");  /* text is ` ' not `_' */
1037                 t.aux = 0;             /* (nonbreaking) */
1038             }
1039             switch (t.type) {
1040               case tok_white:
1041                 if (whptr == &par.words)
1042                     break;             /* strip whitespace at start of para */
1043                 wd.text = NULL;
1044                 wd.type = spcstyle;
1045                 wd.alt = NULL;
1046                 wd.aux = 0;
1047                 wd.fpos = t.pos;
1048                 wd.breaks = FALSE;
1049
1050                 /*
1051                  * Inhibit use of whitespace if it's (probably the
1052                  * newline) before a repeat \IM / \BR type
1053                  * directive.
1054                  */
1055                 if (start_cmd != c__invalid) {
1056                     dtor(t), t = get_token(in);
1057                     already = TRUE;
1058                     if (t.type == tok_cmd && t.cmd == start_cmd)
1059                         break;
1060                 }
1061
1062                 if (indexing)
1063                     rdadd(&indexstr, ' ');
1064                 if (!indexing || index_visible)
1065                     addword(wd, &whptr);
1066                 if (indexing)
1067                     addword(wd, &idximplicit);
1068                 iswhite = TRUE;
1069                 break;
1070               case tok_word:
1071                 if (indexing)
1072                     rdadds(&indexstr, t.text);
1073                 wd.type = style;
1074                 wd.alt = NULL;
1075                 wd.aux = 0;
1076                 wd.fpos = t.pos;
1077                 wd.breaks = t.aux;
1078                 if (!indexing || index_visible) {
1079                     wd.text = ustrdup(t.text);
1080                     addword(wd, &whptr);
1081                 }
1082                 if (indexing) {
1083                     wd.text = ustrdup(t.text);
1084                     addword(wd, &idximplicit);
1085                 }
1086                 break;
1087               case tok_lbrace:
1088                 error(err_unexbrace, &t.pos);
1089                 /* Error recovery: push nop */
1090                 sitem = snew(struct stack_item);
1091                 sitem->type = stack_nop;
1092                 sitem->fpos = t.pos;
1093                 stk_push(parsestk, sitem);
1094                 break;
1095               case tok_rbrace:
1096                 sitem = stk_pop(parsestk);
1097                 if (!sitem) {
1098                     /*
1099                      * This closing brace could have been an
1100                      * indication that the cross-paragraph stack
1101                      * wants popping. Accordingly, we treat it here
1102                      * as an indication that the paragraph is over.
1103                      */
1104                     already = TRUE;
1105                     goto finished_para;
1106                 } else {
1107                     if (sitem->type & stack_ualt) {
1108                         whptr = sitem->whptr;
1109                         idximplicit = sitem->idximplicit;
1110                     }
1111                     if (sitem->type & stack_style) {
1112                         style = word_Normal;
1113                         spcstyle = word_WhiteSpace;
1114                     }
1115                     if (sitem->type & stack_idx) {
1116                         indexword->text = ustrdup(indexstr.text);
1117                         if (index_downcase) {
1118                             word *w;
1119
1120                             ustrlow(indexword->text);
1121                             ustrlow(indexstr.text);
1122
1123                             for (w = idxwordlist; w; w = w->next)
1124                                 if (w->text)
1125                                     ustrlow(w->text);
1126                         }
1127                         indexing = FALSE;
1128                         rdadd(&indexstr, L'\0');
1129                         index_merge(idx, FALSE, indexstr.text,
1130                                     idxwordlist, &sitem->fpos);
1131                         sfree(indexstr.text);
1132                     }
1133                     if (sitem->type & stack_hyper) {
1134                         wd.text = NULL;
1135                         wd.type = word_HyperEnd;
1136                         wd.alt = NULL;
1137                         wd.aux = 0;
1138                         wd.fpos = t.pos;
1139                         wd.breaks = FALSE;
1140                         if (!indexing || index_visible)
1141                             addword(wd, &whptr);
1142                         if (indexing)
1143                             addword(wd, &idximplicit);
1144                     }
1145                     if (sitem->type & stack_quote) {
1146                         wd.text = NULL;
1147                         wd.type = toquotestyle(style);
1148                         wd.alt = NULL;
1149                         wd.aux = quote_Close;
1150                         wd.fpos = t.pos;
1151                         wd.breaks = FALSE;
1152                         if (!indexing || index_visible)
1153                             addword(wd, &whptr);
1154                         if (indexing) {
1155                             rdadd(&indexstr, L'"');
1156                             addword(wd, &idximplicit);
1157                         }
1158                     }
1159                 }
1160                 sfree(sitem);
1161                 break;
1162               case tok_cmd:
1163                 switch (t.cmd) {
1164                   case c__comment:
1165                     /*
1166                      * In-paragraph comment: \#{ balanced braces }
1167                      *
1168                      * Anything goes here; even tok_eop. We should
1169                      * eat whitespace after the close brace _if_
1170                      * there was whitespace before the \#.
1171                      */
1172                     dtor(t), t = get_token(in);
1173                     if (t.type != tok_lbrace) {
1174                         error(err_explbr, &t.pos);
1175                     } else {
1176                         int braces = 1;
1177                         while (braces > 0) {
1178                             dtor(t), t = get_token(in);
1179                             if (t.type == tok_lbrace)
1180                                 braces++;
1181                             else if (t.type == tok_rbrace)
1182                                 braces--;
1183                             else if (t.type == tok_eof) {
1184                                 error(err_commenteof, &t.pos);
1185                                 break;
1186                             }
1187                         }
1188                     }
1189                     if (seenwhite) {
1190                         already = TRUE;
1191                         dtor(t), t = get_token(in);
1192                         if (t.type == tok_white) {
1193                             iswhite = TRUE;
1194                             already = FALSE;
1195                         }
1196                     }
1197                     break;
1198                   case c_q:
1199                     dtor(t), t = get_token(in);
1200                     if (t.type != tok_lbrace) {
1201                         error(err_explbr, &t.pos);
1202                     } else {
1203                         /*
1204                          * Enforce that \q may not be used anywhere
1205                          * within \c. (It shouldn't be necessary
1206                          * since the whole point of \c should be
1207                          * that the user wants to exercise exact
1208                          * control over the glyphs used, and
1209                          * forbidding it has the useful effect of
1210                          * relieving some backends of having to
1211                          * make difficult decisions.)
1212                          */
1213                         int stype;
1214
1215                         if (style != word_Code && style != word_WeakCode) {
1216                             wd.text = NULL;
1217                             wd.type = toquotestyle(style);
1218                             wd.alt = NULL;
1219                             wd.aux = quote_Open;
1220                             wd.fpos = t.pos;
1221                             wd.breaks = FALSE;
1222                             if (!indexing || index_visible)
1223                                 addword(wd, &whptr);
1224                             if (indexing) {
1225                                 rdadd(&indexstr, L'"');
1226                                 addword(wd, &idximplicit);
1227                             }
1228                             stype = stack_quote;
1229                         } else {
1230                             error(err_codequote, &t.pos);
1231                             stype = stack_nop;
1232                         }
1233                         sitem = snew(struct stack_item);
1234                         sitem->fpos = t.pos;
1235                         sitem->type = stype;
1236                         stk_push(parsestk, sitem);
1237                     }
1238                     break;
1239                   case c_K:
1240                   case c_k:
1241                   case c_W:
1242                   case c_date:
1243                     /*
1244                      * Keyword, hyperlink, or \date. We expect a
1245                      * left brace, some text, and then a right
1246                      * brace. No nesting; no arguments.
1247                      */
1248                     wd.fpos = t.pos;
1249                     wd.breaks = FALSE;
1250                     if (t.cmd == c_K)
1251                         wd.type = word_UpperXref;
1252                     else if (t.cmd == c_k)
1253                         wd.type = word_LowerXref;
1254                     else if (t.cmd == c_W)
1255                         wd.type = word_HyperLink;
1256                     else
1257                         wd.type = word_Normal;
1258                     dtor(t), t = get_token(in);
1259                     if (t.type != tok_lbrace) {
1260                         if (wd.type == word_Normal) {
1261                             time_t thetime = time(NULL);
1262                             struct tm *broken = localtime(&thetime);
1263                             already = TRUE;
1264                             wdtext = ustrftime(NULL, broken);
1265                             wd.type = style;
1266                         } else {
1267                             error(err_explbr, &t.pos);
1268                             wdtext = NULL;
1269                         }
1270                     } else {
1271                         rdstring rs = { 0, 0, NULL };
1272                         while (dtor(t), t = get_token(in),
1273                                t.type == tok_word || t.type == tok_white) {
1274                             if (t.type == tok_white)
1275                                 rdadd(&rs, ' ');
1276                             else
1277                                 rdadds(&rs, t.text);
1278                         }
1279                         if (wd.type == word_Normal) {
1280                             time_t thetime = time(NULL);
1281                             struct tm *broken = localtime(&thetime);
1282                             wdtext = ustrftime(rs.text, broken);
1283                             wd.type = style;
1284                         } else {
1285                             wdtext = ustrdup(rs.text);
1286                         }
1287                         sfree(rs.text);
1288                         if (t.type != tok_rbrace) {
1289                             error(err_kwexprbr, &t.pos);
1290                         }
1291                     }
1292                     wd.alt = NULL;
1293                     wd.aux = 0;
1294                     if (!indexing || index_visible) {
1295                         wd.text = ustrdup(wdtext);
1296                         addword(wd, &whptr);
1297                     }
1298                     if (indexing) {
1299                         wd.text = ustrdup(wdtext);
1300                         addword(wd, &idximplicit);
1301                     }
1302                     sfree(wdtext);
1303                     if (wd.type == word_HyperLink) {
1304                         /*
1305                          * Hyperlinks are different: they then
1306                          * expect another left brace, to begin
1307                          * delimiting the text marked by the link.
1308                          */
1309                         dtor(t), t = get_token(in);
1310                         sitem = snew(struct stack_item);
1311                         sitem->fpos = wd.fpos;
1312                         sitem->type = stack_hyper;
1313                         /*
1314                          * Special cases: \W{}\i, \W{}\ii
1315                          */
1316                         if (t.type == tok_cmd &&
1317                             (t.cmd == c_i || t.cmd == c_ii)) {
1318                             if (indexing) {
1319                                 error(err_nestedindex, &t.pos);
1320                             } else {
1321                                 /* Add an index-reference word with no
1322                                  * text as yet */
1323                                 wd.type = word_IndexRef;
1324                                 wd.text = NULL;
1325                                 wd.alt = NULL;
1326                                 wd.aux = 0;
1327                                 wd.breaks = FALSE;
1328                                 indexword = addword(wd, &whptr);
1329                                 /* Set up a rdstring to read the
1330                                  * index text */
1331                                 indexstr = nullrs;
1332                                 /* Flags so that we do the Right
1333                                  * Things with text */
1334                                 index_visible = (type != c_I);
1335                                 index_downcase = (type == c_ii);
1336                                 indexing = TRUE;
1337                                 idxwordlist = NULL;
1338                                 idximplicit = &idxwordlist;
1339
1340                                 sitem->type |= stack_idx;
1341                             }
1342                             dtor(t), t = get_token(in);
1343                         }
1344                         /*
1345                          * Special cases: \W{}\c, \W{}\e, \W{}\cw
1346                          */
1347                         if (t.type == tok_cmd &&
1348                             (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1349                             if (style != word_Normal)
1350                                 error(err_nestedstyles, &t.pos);
1351                             else {
1352                                 style = (t.cmd == c_c ? word_Code :
1353                                          t.cmd == c_cw ? word_WeakCode :
1354                                          word_Emph);
1355                                 spcstyle = tospacestyle(style);
1356                                 sitem->type |= stack_style;
1357                             }
1358                             dtor(t), t = get_token(in);
1359                         }
1360                         if (t.type != tok_lbrace) {
1361                             error(err_explbr, &t.pos);
1362                             sfree(sitem);
1363                         } else {
1364                             stk_push(parsestk, sitem);
1365                         }
1366                     }
1367                     break;
1368                   case c_c:
1369                   case c_cw:
1370                   case c_e:
1371                     type = t.cmd;
1372                     if (style != word_Normal) {
1373                         error(err_nestedstyles, &t.pos);
1374                         /* Error recovery: eat lbrace, push nop. */
1375                         dtor(t), t = get_token(in);
1376                         sitem = snew(struct stack_item);
1377                         sitem->fpos = t.pos;
1378                         sitem->type = stack_nop;
1379                         stk_push(parsestk, sitem);
1380                     }
1381                     dtor(t), t = get_token(in);
1382                     if (t.type != tok_lbrace) {
1383                         error(err_explbr, &t.pos);
1384                     } else {
1385                         style = (type == c_c ? word_Code :
1386                                  type == c_cw ? word_WeakCode :
1387                                  word_Emph);
1388                         spcstyle = tospacestyle(style);
1389                         sitem = snew(struct stack_item);
1390                         sitem->fpos = t.pos;
1391                         sitem->type = stack_style;
1392                         stk_push(parsestk, sitem);
1393                     }
1394                     break;
1395                   case c_i:
1396                   case c_ii:
1397                   case c_I:
1398                     type = t.cmd;
1399                     if (indexing) {
1400                         error(err_nestedindex, &t.pos);
1401                         /* Error recovery: eat lbrace, push nop. */
1402                         dtor(t), t = get_token(in);
1403                         sitem = snew(struct stack_item);
1404                         sitem->fpos = t.pos;
1405                         sitem->type = stack_nop;
1406                         stk_push(parsestk, sitem);
1407                     }
1408                     sitem = snew(struct stack_item);
1409                     sitem->fpos = t.pos;
1410                     sitem->type = stack_idx;
1411                     dtor(t), t = get_token(in);
1412                     /*
1413                      * Special cases: \i\c, \i\e, \i\cw
1414                      */
1415                     wd.fpos = t.pos;
1416                     if (t.type == tok_cmd &&
1417                         (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
1418                         if (style != word_Normal)
1419                             error(err_nestedstyles, &t.pos);
1420                         else {
1421                             style = (t.cmd == c_c ? word_Code :
1422                                      t.cmd == c_cw ? word_WeakCode :
1423                                      word_Emph);
1424                             spcstyle = tospacestyle(style);
1425                             sitem->type |= stack_style;
1426                         }
1427                         dtor(t), t = get_token(in);
1428                     }
1429                     if (t.type != tok_lbrace) {
1430                         sfree(sitem);
1431                         error(err_explbr, &t.pos);
1432                     } else {
1433                         /* Add an index-reference word with no text as yet */
1434                         wd.type = word_IndexRef;
1435                         wd.text = NULL;
1436                         wd.alt = NULL;
1437                         wd.aux = 0;
1438                         wd.breaks = FALSE;
1439                         indexword = addword(wd, &whptr);
1440                         /* Set up a rdstring to read the index text */
1441                         indexstr = nullrs;
1442                         /* Flags so that we do the Right Things with text */
1443                         index_visible = (type != c_I);
1444                         index_downcase = (type == c_ii);
1445                         indexing = TRUE;
1446                         idxwordlist = NULL;
1447                         idximplicit = &idxwordlist;
1448                         /* Stack item to close the indexing on exit */
1449                         stk_push(parsestk, sitem);
1450                     }
1451                     break;
1452                   case c_u:
1453                     uchr = t.aux;
1454                     utext[0] = uchr; utext[1] = 0;
1455                     wd.type = style;
1456                     wd.breaks = FALSE;
1457                     wd.alt = NULL;
1458                     wd.aux = 0;
1459                     wd.fpos = t.pos;
1460                     if (!indexing || index_visible) {
1461                         wd.text = ustrdup(utext);
1462                         uword = addword(wd, &whptr);
1463                     } else
1464                         uword = NULL;
1465                     if (indexing) {
1466                         wd.text = ustrdup(utext);
1467                         iword = addword(wd, &idximplicit);
1468                     } else
1469                         iword = NULL;
1470                     dtor(t), t = get_token(in);
1471                     if (t.type == tok_lbrace) {
1472                         /*
1473                          * \u with a left brace. Until the brace
1474                          * closes, all further words go on a
1475                          * sidetrack from the main thread of the
1476                          * paragraph.
1477                          */
1478                         sitem = snew(struct stack_item);
1479                         sitem->fpos = t.pos;
1480                         sitem->type = stack_ualt;
1481                         sitem->whptr = whptr;
1482                         sitem->idximplicit = idximplicit;
1483                         stk_push(parsestk, sitem);
1484                         whptr = uword ? &uword->alt : NULL;
1485                         idximplicit = iword ? &iword->alt : NULL;
1486                     } else {
1487                         if (indexing)
1488                             rdadd(&indexstr, uchr);
1489                         already = TRUE;
1490                     }
1491                     break;
1492                   default:
1493                     if (!macrolookup(macros, in, t.text, &t.pos))
1494                         error(err_badmidcmd, t.text, &t.pos);
1495                     break;
1496                 }
1497             }
1498             if (!already)
1499                 dtor(t), t = get_token(in);
1500             seenwhite = iswhite;
1501         }
1502         finished_para:
1503         /* Check the stack is empty */
1504         if (stk_top(parsestk)) {
1505             while ((sitem = stk_pop(parsestk)))
1506                 sfree(sitem);
1507             error(err_missingrbrace, &t.pos);
1508         }
1509         stk_free(parsestk);
1510         prev_para_type = par.type;
1511         addpara(par, ret);
1512         if (t.type == tok_eof)
1513             already = TRUE;
1514     }
1515
1516     if (stk_top(crossparastk)) {
1517         void *p;
1518
1519         error(err_missingrbrace2, &t.pos);
1520         while ((p = stk_pop(crossparastk)))
1521             sfree(p);
1522     }
1523
1524     /*
1525      * We break to here rather than returning, because otherwise
1526      * this cleanup doesn't happen.
1527      */
1528     dtor(t);
1529     macrocleanup(macros);
1530
1531     stk_free(crossparastk);
1532 }
1533
1534 paragraph *read_input(input *in, indexdata *idx) {
1535     paragraph *head = NULL;
1536     paragraph **hptr = &head;
1537
1538     while (in->currindex < in->nfiles) {
1539         in->currfp = fopen(in->filenames[in->currindex], "r");
1540         if (in->currfp) {
1541             setpos(in, in->filenames[in->currindex]);
1542             in->charset = in->defcharset;
1543             in->csstate = charset_init_state;
1544             in->wcpos = in->nwc = 0;
1545             in->pushback_chars = NULL;
1546             read_file(&hptr, in, idx);
1547         }
1548         in->currindex++;
1549     }
1550
1551     return head;
1552 }