X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/d7482997dd1ca71b70df43c15dd5956f435a1a7e..4b3c5afb39849b3d0e738248daec9ab7dd8aac6d:/input.c?ds=sidebyside diff --git a/input.c b/input.c index 15e0a97..0535495 100644 --- a/input.c +++ b/input.c @@ -180,11 +180,14 @@ enum { c_copyright, /* copyright statement */ c_cw, /* weak code */ c_date, /* document processing date */ + c_dd, /* description list: description */ c_define, /* macro definition */ + c_dt, /* description list: described thing */ c_e, /* emphasis */ c_i, /* visible index mark */ c_ii, /* uncapitalised visible index mark */ c_k, /* uncapitalised cross-reference */ + c_lcont, /* continuation para(s) for list item */ c_n, /* numbered list */ c_nocite, /* bibliography trickery */ c_preamble, /* document preamble text */ @@ -245,11 +248,14 @@ static void match_kw(token *tok) { {"copyright", c_copyright}, /* copyright statement */ {"cw", c_cw}, /* weak code */ {"date", c_date}, /* document processing date */ + {"dd", c_dd}, /* description list: description */ {"define", c_define}, /* macro definition */ + {"dt", c_dt}, /* description list: described thing */ {"e", c_e}, /* emphasis */ {"i", c_i}, /* visible index mark */ {"ii", c_ii}, /* uncapitalised visible index mark */ {"k", c_k}, /* uncapitalised cross-reference */ + {"lcont", c_lcont}, /* continuation para(s) for list item */ {"n", c_n}, /* numbered list */ {"nocite", c_nocite}, /* bibliography trickery */ {"preamble", c_preamble}, /* document preamble text */ @@ -506,6 +512,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { int already; int iswhite, seenwhite; int type; + int prev_para_type; struct stack_item { enum { stack_nop = 0, /* do nothing (for error recovery) */ @@ -519,6 +526,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { word **idximplicit; /* to restore from \u alternatives */ } *sitem; stack parsestk; + struct crossparaitem { + int type; /* currently c_lcont or -1 */ + int seen_lcont; + }; + stack crossparastk; word *indexword, *uword, *iword; word *idxwordlist; rdstring indexstr; @@ -528,11 +540,15 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { t.text = NULL; macros = newtree234(macrocmp); + already = FALSE; + + crossparastk = stk_new(); /* * Loop on each paragraph. */ while (1) { + int start_cmd = c__invalid; par.words = NULL; par.keyword = NULL; whptr = &par.words; @@ -540,19 +556,26 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* * Get a token. */ - dtor(t), t = get_token(in); + do { + if (!already) { + dtor(t), t = get_token(in); + } + already = FALSE; + } while (t.type == tok_eop); if (t.type == tok_eof) - return; + break; /* * Parse code paragraphs separately. */ if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) { + int wtype = word_WeakCode; + par.type = para_Code; par.fpos = t.pos; while (1) { dtor(t), t = get_codepar_token(in); - wd.type = word_WeakCode; + wd.type = wtype; wd.breaks = FALSE; /* shouldn't need this... */ wd.text = ustrdup(t.text); wd.alt = NULL; @@ -567,20 +590,90 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } if (t.type == tok_eop || t.type == tok_eof) break; - else if (t.type != tok_cmd || t.cmd != c_c) { + else if (t.type == tok_cmd && t.cmd == c_c) + wtype = word_WeakCode; + else if (t.type == tok_cmd && t.cmd == c_e && + wtype == word_WeakCode) + wtype = word_Emph; + else { error(err_brokencodepara, &t.pos); + prev_para_type = par.type; addpara(par, ret); while (t.type != tok_eop) /* error recovery: */ dtor(t), t = get_token(in); /* eat rest of paragraph */ goto codeparabroken; /* ick, but such is life */ } } + prev_para_type = par.type; addpara(par, ret); codeparabroken: continue; } /* + * Spot the special commands that define a grouping of more + * than one paragraph, and also the closing braces that + * finish them. + */ + if (t.type == tok_cmd && + t.cmd == c_lcont) { + struct crossparaitem *sitem, *stop; + + /* + * Expect, and swallow, an open brace. + */ + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + continue; + } + + /* + * \lcont causes a continuation of a list item into + * multiple paragraphs (which may in turn contain + * nested lists, code paras etc). Hence, the previous + * paragraph must be of a list type. + */ + sitem = mknew(struct crossparaitem); + stop = (struct crossparaitem *)stk_top(crossparastk); + if (prev_para_type == para_Bullet || + prev_para_type == para_NumberedList || + prev_para_type == para_Description) { + sitem->type = c_lcont; + sitem->seen_lcont = 1; + par.type = para_LcontPush; + prev_para_type = par.type; + addpara(par, ret); + } else { + /* + * Push a null item on the cross-para stack so that + * when we see the corresponding closing brace we + * don't give a cascade error. + */ + sitem->type = -1; + sitem->seen_lcont = (stop ? stop->seen_lcont : 0); + error(err_misplacedlcont, &t.pos); + } + stk_push(crossparastk, sitem); + continue; + } else if (t.type == tok_rbrace) { + struct crossparaitem *sitem = stk_pop(crossparastk); + if (!sitem) + error(err_unexbrace, &t.pos); + else { + switch (sitem->type) { + case c_lcont: + par.type = para_LcontPop; + prev_para_type = par.type; + addpara(par, ret); + break; + } + sfree(sitem); + } + continue; + } + + /* * This token begins a paragraph. See if it's one of the * special commands that define a paragraph type. * @@ -620,19 +713,24 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { */ case c_A: needkw = 2; par.type = para_Appendix; break; case c_B: needkw = 2; par.type = para_Biblio; break; - case c_BR: needkw = 1; par.type = para_BR; break; + case c_BR: needkw = 1; par.type = para_BR; + start_cmd = c_BR; break; case c_C: needkw = 2; par.type = para_Chapter; break; case c_H: needkw = 2; par.type = para_Heading; par.aux = 0; break; - case c_IM: needkw = 2; par.type = para_IM; break; + case c_IM: needkw = 2; par.type = para_IM; + start_cmd = c_IM; break; case c_S: needkw = 2; par.type = para_Subsect; par.aux = t.aux; break; case c_U: needkw = 32; par.type = para_UnnumberedChapter; break; /* For \b and \n the keyword is optional */ case c_b: needkw = 4; par.type = para_Bullet; break; + case c_dt: needkw = 4; par.type = para_DescribedThing; break; + case c_dd: needkw = 4; par.type = para_Description; break; case c_n: needkw = 4; par.type = para_NumberedList; break; - case c_cfg: needkw = 8; par.type = para_Config; break; + case c_cfg: needkw = 8; par.type = para_Config; + start_cmd = c_cfg; break; case c_copyright: needkw = 32; par.type = para_Copyright; break; case c_define: is_macro = TRUE; needkw = 1; break; /* For \nocite the keyword is _everything_ */ @@ -643,6 +741,17 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case c_versionid: needkw = 32; par.type = para_VersionID; break; } + if (par.type == para_Chapter || + par.type == para_Heading || + par.type == para_Subsect || + par.type == para_Appendix || + par.type == para_UnnumberedChapter) { + struct crossparaitem *sitem = stk_top(crossparastk); + if (sitem && sitem->seen_lcont) { + error(err_sectmarkerinlcont, &t.pos); + } + } + if (needkw > 0) { rdstring rs = { 0, 0, NULL }; int nkeys = 0; @@ -709,12 +818,22 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* Move to EOP in case of needkw==8 or 16 (no body) */ if (needkw & 24) { - if (t.type != tok_eop && t.type != tok_eof) { + /* We allow whitespace even when we expect no para body */ + while (t.type == tok_white) + dtor(t), t = get_token(in); + if (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) { error(err_bodyillegal, &t.pos); /* Error recovery: eat the rest of the paragraph */ - while (t.type != tok_eop && t.type != tok_eof) + while (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) dtor(t), t = get_token(in); } + if (t.type == tok_cmd) + already = TRUE;/* inhibit get_token at top of loop */ + prev_para_type = par.type; addpara(par, ret); continue; /* next paragraph */ } @@ -745,6 +864,14 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { while (t.type != tok_eop && t.type != tok_eof) { iswhite = FALSE; already = FALSE; + + /* Handle implicit paragraph breaks after \IM, \BR etc */ + if (start_cmd != c__invalid && + t.type == tok_cmd && t.cmd == start_cmd) { + already = TRUE; /* inhibit get_token at top of loop */ + break; + } + if (t.type == tok_cmd && t.cmd == c__escaped) { t.type = tok_word; /* nice and simple */ t.aux = 0; /* even if `\-' - nonbreaking! */ @@ -765,6 +892,19 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wd.aux = 0; wd.fpos = t.pos; wd.breaks = FALSE; + + /* + * Inhibit use of whitespace if it's (probably the + * newline) before a repeat \IM / \BR type + * directive. + */ + if (start_cmd != c__invalid) { + dtor(t), t = get_token(in); + already = TRUE; + if (t.type == tok_cmd && t.cmd == start_cmd) + break; + } + if (indexing) rdadd(&indexstr, ' '); if (!indexing || index_visible) @@ -799,9 +939,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { break; case tok_rbrace: sitem = stk_pop(parsestk); - if (!sitem) - error(err_unexbrace, &t.pos); - else { + if (!sitem) { + /* + * This closing brace could have been an + * indication that the cross-paragraph stack + * wants popping. Accordingly, we treat it here + * as an indication that the paragraph is over. + */ + already = TRUE; + goto finished_para; + } else { if (sitem->type & stack_ualt) { whptr = sitem->whptr; idximplicit = sitem->idximplicit; @@ -1132,19 +1279,36 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { dtor(t), t = get_token(in); seenwhite = iswhite; } + finished_para: /* Check the stack is empty */ - if (NULL != (sitem = stk_pop(parsestk))) { - do { + if (stk_top(parsestk)) { + while ((sitem = stk_pop(parsestk))) sfree(sitem); - sitem = stk_pop(parsestk); - } while (sitem); error(err_missingrbrace, &t.pos); } stk_free(parsestk); + prev_para_type = par.type; addpara(par, ret); + if (t.type == tok_eof) + already = TRUE; } + + if (stk_top(crossparastk)) { + void *p; + + error(err_missingrbrace2, &t.pos); + while ((p = stk_pop(crossparastk))) + sfree(p); + } + + /* + * We break to here rather than returning, because otherwise + * this cleanup doesn't happen. + */ dtor(t); macrocleanup(macros); + + stk_free(crossparastk); } paragraph *read_input(input *in, indexdata *idx) {