freetree234(macros);
}
+static void input_configure(input *in, paragraph *cfg) {
+ assert(cfg->type == para_Config);
+
+ if (!ustricmp(cfg->keyword, L"input-charset")) {
+ char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
+ in->charset = charset_from_localenc(csname);
+ sfree(csname);
+ }
+}
+
/*
* Can return EOF
*/
-static int get(input *in, filepos *pos) {
+static int get(input *in, filepos *pos, rdstringc *rsc) {
int pushbackpt = in->stack ? in->stack->npushback : 0;
if (in->npushback > pushbackpt) {
--in->npushback;
return c;
}
else if (in->currfp) {
- int c = getc(in->currfp);
- if (c == EOF) {
- fclose(in->currfp);
- in->currfp = NULL;
- }
- /* Track line numbers, for error reporting */
- if (pos)
- *pos = in->pos;
- if (in->reportcols) {
- switch (c) {
- case '\t':
- in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
- break;
- case '\n':
- in->pos.col = 1;
- in->pos.line++;
- break;
- default:
- in->pos.col++;
- break;
+ while (in->wcpos >= in->nwc) {
+
+ int c = getc(in->currfp);
+
+ if (c == EOF) {
+ fclose(in->currfp);
+ in->currfp = NULL;
+ return EOF;
+ }
+
+ if (rsc)
+ rdaddc(rsc, c);
+
+ /* Track line numbers, for error reporting */
+ if (pos)
+ *pos = in->pos;
+ if (in->reportcols) {
+ switch (c) {
+ case '\t':
+ in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
+ break;
+ case '\n':
+ in->pos.col = 1;
+ in->pos.line++;
+ break;
+ default:
+ in->pos.col++;
+ break;
+ }
+ } else {
+ in->pos.col = -1;
+ if (c == '\n')
+ in->pos.line++;
+ }
+
+ /*
+ * Do input character set translation, so that we return
+ * Unicode.
+ */
+ {
+ char buf[1];
+ char const *p;
+ int inlen;
+
+ buf[0] = (char)c;
+ p = buf;
+ inlen = 1;
+
+ in->nwc = charset_to_unicode(&p, &inlen,
+ in->wc, lenof(in->wc),
+ in->charset, &in->csstate,
+ NULL, 0);
+ assert(p == buf+1 && inlen == 0);
+
+ in->wcpos = 0;
}
- } else {
- in->pos.col = -1;
- if (c == '\n')
- in->pos.line++;
}
- /* FIXME: do input charmap translation. We should be returning
- * Unicode here. */
- return c;
+
+ return in->wc[in->wcpos++];
+
} else
return EOF;
}
int type;
int cmd, aux;
wchar_t *text;
+ char *origtext;
filepos pos;
};
enum {
c__invalid, /* invalid command */
c__comment, /* comment command (\#) */
c__escaped, /* escaped character */
+ c__nop, /* no-op */
c__nbsp, /* nonbreaking space */
c_A, /* appendix heading */
c_B, /* bibliography entry */
c_copyright, /* copyright statement */
c_cw, /* weak code */
c_date, /* document processing date */
+ c_dd, /* description list: description */
c_define, /* macro definition */
+ c_dt, /* description list: described thing */
c_e, /* emphasis */
c_i, /* visible index mark */
c_ii, /* uncapitalised visible index mark */
c_k, /* uncapitalised cross-reference */
+ c_lcont, /* continuation para(s) for list item */
c_n, /* numbered list */
c_nocite, /* bibliography trickery */
- c_preamble, /* document preamble text */
+ c_preamble, /* (obsolete) preamble text */
c_q, /* quote marks */
+ c_quote, /* block-quoted paragraphs */
c_rule, /* horizontal rule */
c_title, /* document title */
c_u, /* aux field is char code */
static const struct { char const *name; int id; } keywords[] = {
{"#", c__comment}, /* comment command (\#) */
{"-", c__escaped}, /* nonbreaking hyphen */
+ {".", c__nop}, /* no-op */
{"A", c_A}, /* appendix heading */
{"B", c_B}, /* bibliography entry */
{"BR", c_BR}, /* bibliography rewrite */
{"copyright", c_copyright}, /* copyright statement */
{"cw", c_cw}, /* weak code */
{"date", c_date}, /* document processing date */
+ {"dd", c_dd}, /* description list: description */
{"define", c_define}, /* macro definition */
+ {"dt", c_dt}, /* description list: described thing */
{"e", c_e}, /* emphasis */
{"i", c_i}, /* visible index mark */
{"ii", c_ii}, /* uncapitalised visible index mark */
{"k", c_k}, /* uncapitalised cross-reference */
+ {"lcont", c_lcont}, /* continuation para(s) for list item */
{"n", c_n}, /* numbered list */
{"nocite", c_nocite}, /* bibliography trickery */
- {"preamble", c_preamble}, /* document preamble text */
+ {"preamble", c_preamble}, /* (obsolete) preamble text */
{"q", c_q}, /* quote marks */
+ {"quote", c_quote}, /* block-quoted paragraphs */
{"rule", c_rule}, /* horizontal rule */
{"title", c_title}, /* document title */
{"versionid", c_versionid}, /* document RCS id */
token get_token(input *in) {
int c;
int nls;
+ int prevpos;
token ret;
rdstring rs = { 0, 0, NULL };
+ rdstringc rsc = { 0, 0, NULL };
filepos cpos;
ret.text = NULL; /* default */
- c = get(in, &cpos);
+ ret.origtext = NULL; /* default */
+ if (in->pushback_chars) {
+ rdaddsc(&rsc, in->pushback_chars);
+ sfree(in->pushback_chars);
+ in->pushback_chars = NULL;
+ }
+ c = get(in, &cpos, &rsc);
ret.pos = cpos;
if (iswhite(c)) { /* tok_white or tok_eop */
nls = 0;
+ prevpos = 0;
do {
if (isnl(c))
nls++;
- } while ((c = get(in, &cpos)) != EOF && iswhite(c));
+ prevpos = rsc.pos;
+ } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
if (c == EOF) {
ret.type = tok_eof;
+ sfree(rsc.text);
return ret;
}
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ sfree(rsc.text);
+ }
unget(in, c, &cpos);
ret.type = (nls > 1 ? tok_eop : tok_white);
return ret;
} else if (c == EOF) { /* tok_eof */
ret.type = tok_eof;
+ sfree(rsc.text);
return ret;
} else if (c == '\\') { /* tok_cmd */
- c = get(in, &cpos);
+ rsc.pos = prevpos = 0;
+ c = get(in, &cpos, &rsc);
if (c == '-' || c == '\\' || c == '_' ||
- c == '#' || c == '{' || c == '}') {
+ c == '#' || c == '{' || c == '}' || c == '.') {
/* single-char command */
rdadd(&rs, c);
} else if (c == 'u') {
do {
rdadd(&rs, c);
len++;
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
} while (ishex(c) && len < 5);
unget(in, c, &cpos);
} else if (iscmd(c)) {
do {
rdadd(&rs, c);
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
} while (iscmd(c));
unget(in, c, &cpos);
}
*/
ret.type = tok_cmd;
ret.text = ustrdup(rs.text);
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ rsc.text[prevpos] = '\0';
+ ret.origtext = dupstr(rsc.text);
+ } else {
+ ret.origtext = dupstr("");
+ }
match_kw(&ret);
sfree(rs.text);
+ sfree(rsc.text);
return ret;
} else if (c == '{') { /* tok_lbrace */
ret.type = tok_lbrace;
+ sfree(rsc.text);
return ret;
} else if (c == '}') { /* tok_rbrace */
ret.type = tok_rbrace;
+ sfree(rsc.text);
return ret;
} else { /* tok_word */
/*
* a hyphen.
*/
ret.aux = FALSE; /* assumed for now */
+ prevpos = 0;
while (1) {
if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
/* Put back the character that caused termination */
} else {
rdadd(&rs, c);
if (c == '-') {
+ prevpos = rsc.pos;
ret.aux = TRUE;
break; /* hyphen terminates word */
}
}
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
}
ret.type = tok_word;
ret.text = ustrdup(rs.text);
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ rsc.text[prevpos] = '\0';
+ ret.origtext = dupstr(rsc.text);
+ } else {
+ ret.origtext = dupstr("");
+ }
sfree(rs.text);
+ sfree(rsc.text);
return ret;
}
}
int c;
filepos cpos;
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
unget(in, c, &cpos);
return (c == '{');
}
filepos cpos;
ret.type = tok_word;
- c = get(in, &cpos); /* expect (and discard) one space */
+ ret.origtext = NULL;
+ c = get(in, &cpos, NULL); /* expect (and discard) one space */
ret.pos = cpos;
if (c == ' ') {
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
ret.pos = cpos;
}
while (!isnl(c) && c != EOF) {
int c2 = c;
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
/* Discard \r just before \n. */
if (c2 != 13 || !isnl(c))
rdadd(&rs, c2);
* Destructor before token is reassigned; should catch most memory
* leaks
*/
-#define dtor(t) ( sfree(t.text) )
+#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
/*
* Reads a single file (ie until get() returns EOF)
int already;
int iswhite, seenwhite;
int type;
+ int prev_para_type;
struct stack_item {
enum {
stack_nop = 0, /* do nothing (for error recovery) */
} type;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
+ filepos fpos;
} *sitem;
stack parsestk;
+ struct crossparaitem {
+ int type; /* currently c_lcont, c_quote or -1 */
+ int seen_lcont, seen_quote;
+ };
+ stack crossparastk;
word *indexword, *uword, *iword;
word *idxwordlist;
rdstring indexstr;
wchar_t uchr;
t.text = NULL;
+ t.origtext = NULL;
macros = newtree234(macrocmp);
+ already = FALSE;
+
+ crossparastk = stk_new();
/*
* Loop on each paragraph.
*/
while (1) {
+ int start_cmd = c__invalid;
par.words = NULL;
par.keyword = NULL;
+ par.origkeyword = NULL;
whptr = &par.words;
/*
* Get a token.
*/
- dtor(t), t = get_token(in);
+ do {
+ if (!already) {
+ dtor(t), t = get_token(in);
+ }
+ already = FALSE;
+ } while (t.type == tok_eop);
if (t.type == tok_eof)
- return;
+ break;
/*
* Parse code paragraphs separately.
*/
if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
+ int wtype = word_WeakCode;
+
par.type = para_Code;
par.fpos = t.pos;
while (1) {
dtor(t), t = get_codepar_token(in);
- wd.type = word_WeakCode;
+ wd.type = wtype;
wd.breaks = FALSE; /* shouldn't need this... */
wd.text = ustrdup(t.text);
wd.alt = NULL;
*/
dtor(t), t = get_token(in);
}
- if (t.type == tok_eop || t.type == tok_eof)
+ if (t.type == tok_eop || t.type == tok_eof ||
+ t.type == tok_rbrace) { /* might be } terminating \lcont */
+ if (t.type == tok_rbrace)
+ already = TRUE;
break;
- else if (t.type != tok_cmd || t.cmd != c_c) {
+ } else if (t.type == tok_cmd && t.cmd == c_c) {
+ wtype = word_WeakCode;
+ } else if (t.type == tok_cmd && t.cmd == c_e &&
+ wtype == word_WeakCode) {
+ wtype = word_Emph;
+ } else {
error(err_brokencodepara, &t.pos);
+ prev_para_type = par.type;
addpara(par, ret);
while (t.type != tok_eop) /* error recovery: */
dtor(t), t = get_token(in); /* eat rest of paragraph */
goto codeparabroken; /* ick, but such is life */
}
}
+ prev_para_type = par.type;
addpara(par, ret);
codeparabroken:
continue;
}
/*
+ * Spot the special commands that define a grouping of more
+ * than one paragraph, and also the closing braces that
+ * finish them.
+ */
+ if (t.type == tok_cmd &&
+ (t.cmd == c_lcont || t.cmd == c_quote)) {
+ struct crossparaitem *sitem, *stop;
+ int cmd = t.cmd;
+
+ /*
+ * Expect, and swallow, an open brace.
+ */
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ continue;
+ }
+
+ /*
+ * Also expect, and swallow, any whitespace after that
+ * (a newline before a code paragraph wouldn't be
+ * surprising).
+ */
+ do {
+ dtor(t), t = get_token(in);
+ } while (t.type == tok_white);
+ already = TRUE;
+
+ if (cmd == c_lcont) {
+ /*
+ * \lcont causes a continuation of a list item into
+ * multiple paragraphs (which may in turn contain
+ * nested lists, code paras etc). Hence, the previous
+ * paragraph must be of a list type.
+ */
+ sitem = mknew(struct crossparaitem);
+ stop = (struct crossparaitem *)stk_top(crossparastk);
+ if (stop)
+ *sitem = *stop;
+ else
+ sitem->seen_quote = sitem->seen_lcont = 0;
+
+ if (prev_para_type == para_Bullet ||
+ prev_para_type == para_NumberedList ||
+ prev_para_type == para_Description) {
+ sitem->type = c_lcont;
+ sitem->seen_lcont = 1;
+ par.type = para_LcontPush;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ } else {
+ /*
+ * Push a null item on the cross-para stack so that
+ * when we see the corresponding closing brace we
+ * don't give a cascade error.
+ */
+ sitem->type = -1;
+ error(err_misplacedlcont, &t.pos);
+ }
+ } else {
+ /*
+ * \quote causes a group of paragraphs to be
+ * block-quoted (typically they will be indented a
+ * bit).
+ */
+ sitem = mknew(struct crossparaitem);
+ stop = (struct crossparaitem *)stk_top(crossparastk);
+ if (stop)
+ *sitem = *stop;
+ else
+ sitem->seen_quote = sitem->seen_lcont = 0;
+ sitem->type = c_quote;
+ sitem->seen_quote = 1;
+ par.type = para_QuotePush;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ }
+ stk_push(crossparastk, sitem);
+ continue;
+ } else if (t.type == tok_rbrace) {
+ struct crossparaitem *sitem = stk_pop(crossparastk);
+ if (!sitem)
+ error(err_unexbrace, &t.pos);
+ else {
+ switch (sitem->type) {
+ case c_lcont:
+ par.type = para_LcontPop;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ break;
+ case c_quote:
+ par.type = para_QuotePop;
+ prev_para_type = par.type;
+ addpara(par, ret);
+ break;
+ }
+ sfree(sitem);
+ }
+ continue;
+ }
+
+ /*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
*
*/
case c_A: needkw = 2; par.type = para_Appendix; break;
case c_B: needkw = 2; par.type = para_Biblio; break;
- case c_BR: needkw = 1; par.type = para_BR; break;
+ case c_BR: needkw = 1; par.type = para_BR;
+ start_cmd = c_BR; break;
case c_C: needkw = 2; par.type = para_Chapter; break;
case c_H: needkw = 2; par.type = para_Heading;
par.aux = 0;
break;
- case c_IM: needkw = 2; par.type = para_IM; break;
+ case c_IM: needkw = 2; par.type = para_IM;
+ start_cmd = c_IM; break;
case c_S: needkw = 2; par.type = para_Subsect;
par.aux = t.aux; break;
case c_U: needkw = 32; par.type = para_UnnumberedChapter; break;
/* For \b and \n the keyword is optional */
case c_b: needkw = 4; par.type = para_Bullet; break;
+ case c_dt: needkw = 4; par.type = para_DescribedThing; break;
+ case c_dd: needkw = 4; par.type = para_Description; break;
case c_n: needkw = 4; par.type = para_NumberedList; break;
- case c_cfg: needkw = 8; par.type = para_Config; break;
+ case c_cfg: needkw = 8; par.type = para_Config;
+ start_cmd = c_cfg; break;
case c_copyright: needkw = 32; par.type = para_Copyright; break;
case c_define: is_macro = TRUE; needkw = 1; break;
/* For \nocite the keyword is _everything_ */
case c_nocite: needkw = 8; par.type = para_NoCite; break;
- case c_preamble: needkw = 32; par.type = para_Preamble; break;
+ case c_preamble: needkw = 32; par.type = para_Normal; break;
case c_rule: needkw = 16; par.type = para_Rule; break;
case c_title: needkw = 32; par.type = para_Title; break;
case c_versionid: needkw = 32; par.type = para_VersionID; break;
}
+ if (par.type == para_Chapter ||
+ par.type == para_Heading ||
+ par.type == para_Subsect ||
+ par.type == para_Appendix ||
+ par.type == para_UnnumberedChapter) {
+ struct crossparaitem *sitem = stk_top(crossparastk);
+ if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
+ error(err_sectmarkerinblock,
+ &t.pos,
+ (sitem->seen_lcont ? "lcont" : "quote"));
+ }
+ }
+
if (needkw > 0) {
rdstring rs = { 0, 0, NULL };
+ rdstringc rsc = { 0, 0, NULL };
int nkeys = 0;
filepos fp;
t.type == tok_word ||
t.type == tok_white ||
(t.type == tok_cmd && t.cmd == c__nbsp) ||
- (t.type == tok_cmd && t.cmd == c__escaped)) {
+ (t.type == tok_cmd && t.cmd == c__escaped) ||
+ (t.type == tok_cmd && t.cmd == c_u)) {
if (t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp))
+ (t.type == tok_cmd && t.cmd == c__nbsp)) {
rdadd(&rs, ' ');
- else
+ rdaddc(&rsc, ' ');
+ } else if (t.type == tok_cmd && t.cmd == c_u) {
+ rdadd(&rs, t.aux);
+ rdaddc(&rsc, '\\');
+ rdaddsc(&rsc, t.origtext);
+ } else {
rdadds(&rs, t.text);
+ rdaddsc(&rsc, t.origtext);
+ }
}
if (t.type != tok_rbrace) {
error(err_kwunclosed, &t.pos);
continue;
}
rdadd(&rs, 0); /* add string terminator */
+ rdaddc(&rsc, 0); /* add string terminator */
dtor(t), t = get_token(in); /* eat right brace */
}
- rdadd(&rs, 0); /* add string terminator */
+ rdadd(&rs, 0); /* add string terminator */
+ rdaddc(&rsc, 0); /* add string terminator */
/* See whether we have the right number of keywords. */
if ((needkw & 48) && nkeys > 0)
}
par.keyword = rdtrim(&rs);
+ par.origkeyword = rdtrimc(&rsc);
/* Move to EOP in case of needkw==8 or 16 (no body) */
if (needkw & 24) {
- if (t.type != tok_eop && t.type != tok_eof) {
+ /* We allow whitespace even when we expect no para body */
+ while (t.type == tok_white)
+ dtor(t), t = get_token(in);
+ if (t.type != tok_eop && t.type != tok_eof &&
+ (start_cmd == c__invalid ||
+ t.type != tok_cmd || t.cmd != start_cmd)) {
error(err_bodyillegal, &t.pos);
/* Error recovery: eat the rest of the paragraph */
- while (t.type != tok_eop && t.type != tok_eof)
+ while (t.type != tok_eop && t.type != tok_eof &&
+ (start_cmd == c__invalid ||
+ t.type != tok_cmd || t.cmd != start_cmd))
dtor(t), t = get_token(in);
}
+ if (t.type == tok_cmd)
+ already = TRUE;/* inhibit get_token at top of loop */
+ prev_para_type = par.type;
addpara(par, ret);
+
+ if (par.type == para_Config) {
+ input_configure(in, &par);
+ }
continue; /* next paragraph */
}
}
while (t.type != tok_eop && t.type != tok_eof) {
iswhite = FALSE;
already = FALSE;
+
+ /* Handle implicit paragraph breaks after \IM, \BR etc */
+ if (start_cmd != c__invalid &&
+ t.type == tok_cmd && t.cmd == start_cmd) {
+ already = TRUE; /* inhibit get_token at top of loop */
+ break;
+ }
+
+ if (t.type == tok_cmd && t.cmd == c__nop) {
+ dtor(t), t = get_token(in);
+ continue; /* do nothing! */
+ }
+
if (t.type == tok_cmd && t.cmd == c__escaped) {
t.type = tok_word; /* nice and simple */
t.aux = 0; /* even if `\-' - nonbreaking! */
wd.aux = 0;
wd.fpos = t.pos;
wd.breaks = FALSE;
+
+ /*
+ * Inhibit use of whitespace if it's (probably the
+ * newline) before a repeat \IM / \BR type
+ * directive.
+ */
+ if (start_cmd != c__invalid) {
+ dtor(t), t = get_token(in);
+ already = TRUE;
+ if (t.type == tok_cmd && t.cmd == start_cmd)
+ break;
+ }
+
if (indexing)
rdadd(&indexstr, ' ');
if (!indexing || index_visible)
/* Error recovery: push nop */
sitem = mknew(struct stack_item);
sitem->type = stack_nop;
+ sitem->fpos = t.pos;
stk_push(parsestk, sitem);
break;
case tok_rbrace:
sitem = stk_pop(parsestk);
- if (!sitem)
- error(err_unexbrace, &t.pos);
- else {
+ if (!sitem) {
+ /*
+ * This closing brace could have been an
+ * indication that the cross-paragraph stack
+ * wants popping. Accordingly, we treat it here
+ * as an indication that the paragraph is over.
+ */
+ already = TRUE;
+ goto finished_para;
+ } else {
if (sitem->type & stack_ualt) {
whptr = sitem->whptr;
idximplicit = sitem->idximplicit;
}
if (sitem->type & stack_idx) {
indexword->text = ustrdup(indexstr.text);
- if (index_downcase)
+ if (index_downcase) {
+ word *w;
+
ustrlow(indexword->text);
+ ustrlow(indexstr.text);
+
+ for (w = idxwordlist; w; w = w->next)
+ if (w->text)
+ ustrlow(w->text);
+ }
indexing = FALSE;
rdadd(&indexstr, L'\0');
- index_merge(idx, FALSE, indexstr.text, idxwordlist);
+ index_merge(idx, FALSE, indexstr.text,
+ idxwordlist, &sitem->fpos);
sfree(indexstr.text);
}
if (sitem->type & stack_hyper) {
addword(wd, &idximplicit);
}
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_quote;
stk_push(parsestk, sitem);
}
* delimiting the text marked by the link.
*/
dtor(t), t = get_token(in);
+ sitem = mknew(struct stack_item);
+ sitem->fpos = wd.fpos;
+ sitem->type = stack_hyper;
+ /*
+ * Special cases: \W{}\i, \W{}\ii
+ */
+ if (t.type == tok_cmd &&
+ (t.cmd == c_i || t.cmd == c_ii)) {
+ if (indexing) {
+ error(err_nestedindex, &t.pos);
+ } else {
+ /* Add an index-reference word with no
+ * text as yet */
+ wd.type = word_IndexRef;
+ wd.text = NULL;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.breaks = FALSE;
+ indexword = addword(wd, &whptr);
+ /* Set up a rdstring to read the
+ * index text */
+ indexstr = nullrs;
+ /* Flags so that we do the Right
+ * Things with text */
+ index_visible = (type != c_I);
+ index_downcase = (type == c_ii);
+ indexing = TRUE;
+ idxwordlist = NULL;
+ idximplicit = &idxwordlist;
+
+ sitem->type |= stack_idx;
+ }
+ dtor(t), t = get_token(in);
+ }
/*
* Special cases: \W{}\c, \W{}\e, \W{}\cw
*/
- sitem = mknew(struct stack_item);
- sitem->type = stack_hyper;
if (t.type == tok_cmd &&
(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
if (style != word_Normal)
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
word_Emph);
spcstyle = tospacestyle(style);
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_style;
stk_push(parsestk, sitem);
}
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_idx;
dtor(t), t = get_token(in);
/*
* paragraph.
*/
sitem = mknew(struct stack_item);
+ sitem->fpos = t.pos;
sitem->type = stack_ualt;
sitem->whptr = whptr;
sitem->idximplicit = idximplicit;
dtor(t), t = get_token(in);
seenwhite = iswhite;
}
+ finished_para:
/* Check the stack is empty */
- if (NULL != (sitem = stk_pop(parsestk))) {
- do {
+ if (stk_top(parsestk)) {
+ while ((sitem = stk_pop(parsestk)))
sfree(sitem);
- sitem = stk_pop(parsestk);
- } while (sitem);
error(err_missingrbrace, &t.pos);
}
stk_free(parsestk);
+ prev_para_type = par.type;
addpara(par, ret);
+ if (t.type == tok_eof)
+ already = TRUE;
}
+
+ if (stk_top(crossparastk)) {
+ void *p;
+
+ error(err_missingrbrace2, &t.pos);
+ while ((p = stk_pop(crossparastk)))
+ sfree(p);
+ }
+
+ /*
+ * We break to here rather than returning, because otherwise
+ * this cleanup doesn't happen.
+ */
dtor(t);
macrocleanup(macros);
+
+ stk_free(crossparastk);
}
paragraph *read_input(input *in, indexdata *idx) {
in->currfp = fopen(in->filenames[in->currindex], "r");
if (in->currfp) {
setpos(in, in->filenames[in->currindex]);
+ in->charset = in->defcharset;
+ in->csstate = charset_init_state;
+ in->wcpos = in->nwc = 0;
+ in->pushback_chars = NULL;
read_file(&hptr, in, idx);
}
in->currindex++;