static void unget(input *in, int c, filepos *pos) {
if (in->npushback >= in->pushbacksize) {
in->pushbacksize = in->npushback + 16;
- in->pushback = resize(in->pushback, in->pushbacksize);
+ in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
}
in->pushback[in->npushback].chr = c;
in->pushback[in->npushback].pos = *pos; /* structure copy */
}
static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
filepos fpos) {
- macro *m = mknew(macro);
+ macro *m = snew(macro);
m->name = name;
m->text = text;
if (add234(macros, m) != m) {
m.name = name;
gotit = find234(macros, &m, NULL);
if (gotit) {
- macrostack *expansion = mknew(macrostack);
+ macrostack *expansion = snew(macrostack);
expansion->next = in->stack;
expansion->text = gotit->text;
expansion->pos = *pos; /* structure copy */
assert(cfg->type == para_Config);
if (!ustricmp(cfg->keyword, L"input-charset")) {
- char *csname = utoa_dup(uadv(cfg->keyword));
- in->charset = charset_from_localenc(csname);
- sfree(csname);
+ in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
}
}
/*
* Can return EOF
*/
-static int get(input *in, filepos *pos) {
+static int get(input *in, filepos *pos, rdstringc *rsc) {
int pushbackpt = in->stack ? in->stack->npushback : 0;
if (in->npushback > pushbackpt) {
--in->npushback;
}
else if (in->stack) {
wchar_t c = in->stack->text[in->stack->ptr];
+ if (pos)
+ *pos = in->stack->pos;
if (in->stack->text[++in->stack->ptr] == L'\0') {
macrostack *tmp = in->stack;
in->stack = tmp->next;
in->currfp = NULL;
return EOF;
}
+
+ if (rsc)
+ rdaddc(rsc, c);
+
/* Track line numbers, for error reporting */
if (pos)
*pos = in->pos;
int type;
int cmd, aux;
wchar_t *text;
+ char *origtext;
filepos pos;
};
enum {
c_c, /* code */
c_cfg, /* configuration directive */
c_copyright, /* copyright statement */
+ c_cq, /* quoted code (sugar for \q{\cw{x}}) */
c_cw, /* weak code */
c_date, /* document processing date */
c_dd, /* description list: description */
{"c", c_c}, /* code */
{"cfg", c_cfg}, /* configuration directive */
{"copyright", c_copyright}, /* copyright statement */
+ {"cq", c_cq}, /* quoted code (sugar for \q{\cw{x}}) */
{"cw", c_cw}, /* weak code */
{"date", c_date}, /* document processing date */
{"dd", c_dd}, /* description list: description */
token get_token(input *in) {
int c;
int nls;
+ int prevpos;
token ret;
rdstring rs = { 0, 0, NULL };
+ rdstringc rsc = { 0, 0, NULL };
filepos cpos;
ret.text = NULL; /* default */
- c = get(in, &cpos);
+ ret.origtext = NULL; /* default */
+ if (in->pushback_chars) {
+ rdaddsc(&rsc, in->pushback_chars);
+ sfree(in->pushback_chars);
+ in->pushback_chars = NULL;
+ }
+ c = get(in, &cpos, &rsc);
ret.pos = cpos;
if (iswhite(c)) { /* tok_white or tok_eop */
nls = 0;
+ prevpos = 0;
do {
if (isnl(c))
nls++;
- } while ((c = get(in, &cpos)) != EOF && iswhite(c));
+ prevpos = rsc.pos;
+ } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
if (c == EOF) {
ret.type = tok_eof;
+ sfree(rsc.text);
return ret;
}
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ sfree(rsc.text);
+ }
unget(in, c, &cpos);
ret.type = (nls > 1 ? tok_eop : tok_white);
return ret;
} else if (c == EOF) { /* tok_eof */
ret.type = tok_eof;
+ sfree(rsc.text);
return ret;
} else if (c == '\\') { /* tok_cmd */
- c = get(in, &cpos);
+ rsc.pos = prevpos = 0;
+ c = get(in, &cpos, &rsc);
if (c == '-' || c == '\\' || c == '_' ||
c == '#' || c == '{' || c == '}' || c == '.') {
/* single-char command */
rdadd(&rs, c);
+ prevpos = rsc.pos;
} else if (c == 'u') {
int len = 0;
do {
rdadd(&rs, c);
len++;
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
} while (ishex(c) && len < 5);
unget(in, c, &cpos);
} else if (iscmd(c)) {
do {
rdadd(&rs, c);
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
} while (iscmd(c));
unget(in, c, &cpos);
}
*/
ret.type = tok_cmd;
ret.text = ustrdup(rs.text);
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ rsc.text[prevpos] = '\0';
+ ret.origtext = dupstr(rsc.text);
+ } else {
+ ret.origtext = dupstr("");
+ }
match_kw(&ret);
sfree(rs.text);
+ sfree(rsc.text);
return ret;
} else if (c == '{') { /* tok_lbrace */
ret.type = tok_lbrace;
+ sfree(rsc.text);
return ret;
} else if (c == '}') { /* tok_rbrace */
ret.type = tok_rbrace;
+ sfree(rsc.text);
return ret;
} else { /* tok_word */
/*
* a hyphen.
*/
ret.aux = FALSE; /* assumed for now */
+ prevpos = 0;
while (1) {
if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
/* Put back the character that caused termination */
} else {
rdadd(&rs, c);
if (c == '-') {
+ prevpos = rsc.pos;
ret.aux = TRUE;
break; /* hyphen terminates word */
}
}
- c = get(in, &cpos);
+ prevpos = rsc.pos;
+ c = get(in, &cpos, &rsc);
}
ret.type = tok_word;
ret.text = ustrdup(rs.text);
+ if (rsc.text) {
+ in->pushback_chars = dupstr(rsc.text + prevpos);
+ rsc.text[prevpos] = '\0';
+ ret.origtext = dupstr(rsc.text);
+ } else {
+ ret.origtext = dupstr("");
+ }
sfree(rs.text);
+ sfree(rsc.text);
return ret;
}
}
int c;
filepos cpos;
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
unget(in, c, &cpos);
return (c == '{');
}
filepos cpos;
ret.type = tok_word;
- c = get(in, &cpos); /* expect (and discard) one space */
+ ret.origtext = NULL;
+ c = get(in, &cpos, NULL); /* expect (and discard) one space */
ret.pos = cpos;
if (c == ' ') {
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
ret.pos = cpos;
}
while (!isnl(c) && c != EOF) {
int c2 = c;
- c = get(in, &cpos);
+ c = get(in, &cpos, NULL);
/* Discard \r just before \n. */
if (c2 != 13 || !isnl(c))
rdadd(&rs, c2);
word *mnewword;
if (!hptrptr)
return NULL;
- mnewword = mknew(word);
+ mnewword = snew(word);
*mnewword = newword; /* structure copy */
mnewword->next = NULL;
**hptrptr = mnewword;
* Adds a new paragraph to a linked list
*/
static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
- paragraph *mnewpara = mknew(paragraph);
+ paragraph *mnewpara = snew(paragraph);
*mnewpara = newpara; /* structure copy */
mnewpara->next = NULL;
**hptrptr = mnewpara;
* Destructor before token is reassigned; should catch most memory
* leaks
*/
-#define dtor(t) ( sfree(t.text) )
+#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
/*
* Reads a single file (ie until get() returns EOF)
*/
-static void read_file(paragraph ***ret, input *in, indexdata *idx) {
+static void read_file(paragraph ***ret, input *in, indexdata *idx,
+ tree234 *macros) {
token t;
paragraph par;
word wd, **whptr, **idximplicit;
- tree234 *macros;
wchar_t utext[2], *wdtext;
int style, spcstyle;
int already;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
filepos fpos;
+ int in_code;
} *sitem;
stack parsestk;
struct crossparaitem {
wchar_t uchr;
t.text = NULL;
- macros = newtree234(macrocmp);
+ t.origtext = NULL;
already = FALSE;
crossparastk = stk_new();
int start_cmd = c__invalid;
par.words = NULL;
par.keyword = NULL;
+ par.origkeyword = NULL;
whptr = &par.words;
/*
* nested lists, code paras etc). Hence, the previous
* paragraph must be of a list type.
*/
- sitem = mknew(struct crossparaitem);
+ sitem = snew(struct crossparaitem);
stop = (struct crossparaitem *)stk_top(crossparastk);
if (stop)
*sitem = *stop;
* block-quoted (typically they will be indented a
* bit).
*/
- sitem = mknew(struct crossparaitem);
+ sitem = snew(struct crossparaitem);
stop = (struct crossparaitem *)stk_top(crossparastk);
if (stop)
*sitem = *stop;
continue;
}
+ while (t.type == tok_cmd &&
+ macrolookup(macros, in, t.text, &t.pos)) {
+ dtor(t), t = get_token(in);
+ }
+
/*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
if (needkw > 0) {
rdstring rs = { 0, 0, NULL };
+ rdstringc rsc = { 0, 0, NULL };
int nkeys = 0;
filepos fp;
t.type == tok_word ||
t.type == tok_white ||
(t.type == tok_cmd && t.cmd == c__nbsp) ||
- (t.type == tok_cmd && t.cmd == c__escaped)) {
+ (t.type == tok_cmd && t.cmd == c__escaped) ||
+ (t.type == tok_cmd && t.cmd == c_u)) {
if (t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp))
+ (t.type == tok_cmd && t.cmd == c__nbsp)) {
rdadd(&rs, ' ');
- else
+ rdaddc(&rsc, ' ');
+ } else if (t.type == tok_cmd && t.cmd == c_u) {
+ rdadd(&rs, t.aux);
+ rdaddc(&rsc, '\\');
+ rdaddsc(&rsc, t.origtext);
+ } else {
rdadds(&rs, t.text);
+ rdaddsc(&rsc, t.origtext);
+ }
}
if (t.type != tok_rbrace) {
error(err_kwunclosed, &t.pos);
continue;
}
rdadd(&rs, 0); /* add string terminator */
+ rdaddc(&rsc, 0); /* add string terminator */
dtor(t), t = get_token(in); /* eat right brace */
}
- rdadd(&rs, 0); /* add string terminator */
+ rdadd(&rs, 0); /* add string terminator */
+ rdaddc(&rsc, 0); /* add string terminator */
/* See whether we have the right number of keywords. */
if ((needkw & 48) && nkeys > 0)
}
par.keyword = rdtrim(&rs);
+ par.origkeyword = rdtrimc(&rsc);
/* Move to EOP in case of needkw==8 or 16 (no body) */
if (needkw & 24) {
* Mid-paragraph commands:
*
* \K \k
- * \c \cw
+ * \c \cw \cq
* \e
* \i \ii
* \I
+ * \q
* \u
* \W
* \date
case tok_lbrace:
error(err_unexbrace, &t.pos);
/* Error recovery: push nop */
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->type = stack_nop;
sitem->fpos = t.pos;
stk_push(parsestk, sitem);
}
break;
case c_q:
+ case c_cq:
+ type = t.cmd;
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
error(err_explbr, &t.pos);
} else {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Open;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing) {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
+ /*
+ * Enforce that \q may not be used anywhere
+ * within \c. (It shouldn't be necessary
+ * since the whole point of \c should be
+ * that the user wants to exercise exact
+ * control over the glyphs used, and
+ * forbidding it has the useful effect of
+ * relieving some backends of having to
+ * make difficult decisions.)
+ */
+ int stype;
+
+ if (style != word_Code && style != word_WeakCode) {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Open;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing) {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ stype = stack_quote;
+ } else {
+ error(err_codequote, &t.pos);
+ stype = stack_nop;
}
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
- sitem->type = stack_quote;
+ sitem->type = stype;
+ if (type == c_cq) {
+ if (style != word_Normal) {
+ error(err_nestedstyles, &t.pos);
+ } else {
+ style = word_WeakCode;
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ }
stk_push(parsestk, sitem);
}
break;
* delimiting the text marked by the link.
*/
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = wd.fpos;
sitem->type = stack_hyper;
/*
error(err_nestedstyles, &t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
type == c_cw ? word_WeakCode :
word_Emph);
spcstyle = tospacestyle(style);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_style;
stk_push(parsestk, sitem);
error(err_nestedindex, &t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_idx;
dtor(t), t = get_token(in);
* sidetrack from the main thread of the
* paragraph.
*/
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_ualt;
sitem->whptr = whptr;
}
stk_free(parsestk);
prev_para_type = par.type;
- addpara(par, ret);
+ /*
+ * Before we add the paragraph to the output list, we
+ * should check that there was any text in it at all; there
+ * might not be if (for example) the paragraph contained
+ * nothing but an unrecognised command sequence, and if we
+ * put an empty paragraph on the list it may confuse the
+ * back ends later on.
+ */
+ if (par.words) {
+ addpara(par, ret);
+ }
if (t.type == tok_eof)
already = TRUE;
}
* this cleanup doesn't happen.
*/
dtor(t);
- macrocleanup(macros);
stk_free(crossparastk);
}
paragraph *read_input(input *in, indexdata *idx) {
paragraph *head = NULL;
paragraph **hptr = &head;
+ tree234 *macros;
+
+ macros = newtree234(macrocmp);
while (in->currindex < in->nfiles) {
in->currfp = fopen(in->filenames[in->currindex], "r");
setpos(in, in->filenames[in->currindex]);
in->charset = in->defcharset;
in->csstate = charset_init_state;
- read_file(&hptr, in, idx);
+ in->wcpos = in->nwc = 0;
+ in->pushback_chars = NULL;
+ read_file(&hptr, in, idx, macros);
}
in->currindex++;
}
+ macrocleanup(macros);
+
return head;
}