X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/e34ba5c3b8a7bcb8fceb437125da3a6a6f6d2dba..7c570bf386b055563f689fb3c424af29227db8e6:/input.c diff --git a/input.c b/input.c index d607e86..4eb1d8c 100644 --- a/input.c +++ b/input.c @@ -18,7 +18,7 @@ static void setpos(input *in, char *fname) { static void unget(input *in, int c, filepos *pos) { if (in->npushback >= in->pushbacksize) { in->pushbacksize = in->npushback + 16; - in->pushback = resize(in->pushback, in->pushbacksize); + in->pushback = sresize(in->pushback, in->pushbacksize, pushback); } in->pushback[in->npushback].chr = c; in->pushback[in->npushback].pos = *pos; /* structure copy */ @@ -45,7 +45,7 @@ static int macrocmp(void *av, void *bv) { } static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text, filepos fpos) { - macro *m = mknew(macro); + macro *m = snew(macro); m->name = name; m->text = text; if (add234(macros, m) != m) { @@ -60,7 +60,7 @@ static int macrolookup(tree234 *macros, input *in, wchar_t *name, m.name = name; gotit = find234(macros, &m, NULL); if (gotit) { - macrostack *expansion = mknew(macrostack); + macrostack *expansion = snew(macrostack); expansion->next = in->stack; expansion->text = gotit->text; expansion->pos = *pos; /* structure copy */ @@ -86,7 +86,7 @@ static void input_configure(input *in, paragraph *cfg) { assert(cfg->type == para_Config); if (!ustricmp(cfg->keyword, L"input-charset")) { - char *csname = utoa_dup(uadv(cfg->keyword)); + char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII); in->charset = charset_from_localenc(csname); sfree(csname); } @@ -95,7 +95,7 @@ static void input_configure(input *in, paragraph *cfg) { /* * Can return EOF */ -static int get(input *in, filepos *pos) { +static int get(input *in, filepos *pos, rdstringc *rsc) { int pushbackpt = in->stack ? in->stack->npushback : 0; if (in->npushback > pushbackpt) { --in->npushback; @@ -123,6 +123,10 @@ static int get(input *in, filepos *pos) { in->currfp = NULL; return EOF; } + + if (rsc) + rdaddc(rsc, c); + /* Track line numbers, for error reporting */ if (pos) *pos = in->pos; @@ -182,6 +186,7 @@ struct token_Tag { int type; int cmd, aux; wchar_t *text; + char *origtext; filepos pos; }; enum { @@ -373,31 +378,48 @@ static void match_kw(token *tok) { token get_token(input *in) { int c; int nls; + int prevpos; token ret; rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; filepos cpos; ret.text = NULL; /* default */ - c = get(in, &cpos); + ret.origtext = NULL; /* default */ + if (in->pushback_chars) { + rdaddsc(&rsc, in->pushback_chars); + sfree(in->pushback_chars); + in->pushback_chars = NULL; + } + c = get(in, &cpos, &rsc); ret.pos = cpos; if (iswhite(c)) { /* tok_white or tok_eop */ nls = 0; + prevpos = 0; do { if (isnl(c)) nls++; - } while ((c = get(in, &cpos)) != EOF && iswhite(c)); + prevpos = rsc.pos; + } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c)); if (c == EOF) { ret.type = tok_eof; + sfree(rsc.text); return ret; } + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + sfree(rsc.text); + } unget(in, c, &cpos); ret.type = (nls > 1 ? tok_eop : tok_white); return ret; } else if (c == EOF) { /* tok_eof */ ret.type = tok_eof; + sfree(rsc.text); return ret; } else if (c == '\\') { /* tok_cmd */ - c = get(in, &cpos); + rsc.pos = prevpos = 0; + c = get(in, &cpos, &rsc); if (c == '-' || c == '\\' || c == '_' || c == '#' || c == '{' || c == '}' || c == '.') { /* single-char command */ @@ -407,13 +429,15 @@ token get_token(input *in) { do { rdadd(&rs, c); len++; - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (ishex(c) && len < 5); unget(in, c, &cpos); } else if (iscmd(c)) { do { rdadd(&rs, c); - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (iscmd(c)); unget(in, c, &cpos); } @@ -423,14 +447,24 @@ token get_token(input *in) { */ ret.type = tok_cmd; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } match_kw(&ret); sfree(rs.text); + sfree(rsc.text); return ret; } else if (c == '{') { /* tok_lbrace */ ret.type = tok_lbrace; + sfree(rsc.text); return ret; } else if (c == '}') { /* tok_rbrace */ ret.type = tok_rbrace; + sfree(rsc.text); return ret; } else { /* tok_word */ /* @@ -442,6 +476,7 @@ token get_token(input *in) { * a hyphen. */ ret.aux = FALSE; /* assumed for now */ + prevpos = 0; while (1) { if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) { /* Put back the character that caused termination */ @@ -450,15 +485,25 @@ token get_token(input *in) { } else { rdadd(&rs, c); if (c == '-') { + prevpos = rsc.pos; ret.aux = TRUE; break; /* hyphen terminates word */ } } - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } ret.type = tok_word; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } sfree(rs.text); + sfree(rsc.text); return ret; } } @@ -472,7 +517,7 @@ int isbrace(input *in) { int c; filepos cpos; - c = get(in, &cpos); + c = get(in, &cpos, NULL); unget(in, c, &cpos); return (c == '{'); } @@ -488,15 +533,16 @@ token get_codepar_token(input *in) { filepos cpos; ret.type = tok_word; - c = get(in, &cpos); /* expect (and discard) one space */ + ret.origtext = NULL; + c = get(in, &cpos, NULL); /* expect (and discard) one space */ ret.pos = cpos; if (c == ' ') { - c = get(in, &cpos); + c = get(in, &cpos, NULL); ret.pos = cpos; } while (!isnl(c) && c != EOF) { int c2 = c; - c = get(in, &cpos); + c = get(in, &cpos, NULL); /* Discard \r just before \n. */ if (c2 != 13 || !isnl(c)) rdadd(&rs, c2); @@ -514,7 +560,7 @@ static word *addword(word newword, word ***hptrptr) { word *mnewword; if (!hptrptr) return NULL; - mnewword = mknew(word); + mnewword = snew(word); *mnewword = newword; /* structure copy */ mnewword->next = NULL; **hptrptr = mnewword; @@ -526,7 +572,7 @@ static word *addword(word newword, word ***hptrptr) { * Adds a new paragraph to a linked list */ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { - paragraph *mnewpara = mknew(paragraph); + paragraph *mnewpara = snew(paragraph); *mnewpara = newpara; /* structure copy */ mnewpara->next = NULL; **hptrptr = mnewpara; @@ -538,7 +584,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { * Destructor before token is reassigned; should catch most memory * leaks */ -#define dtor(t) ( sfree(t.text) ) +#define dtor(t) ( sfree(t.text), sfree(t.origtext) ) /* * Reads a single file (ie until get() returns EOF) @@ -566,6 +612,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { word **whptr; /* to restore from \u alternatives */ word **idximplicit; /* to restore from \u alternatives */ filepos fpos; + int in_code; } *sitem; stack parsestk; struct crossparaitem { @@ -581,6 +628,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wchar_t uchr; t.text = NULL; + t.origtext = NULL; macros = newtree234(macrocmp); already = FALSE; @@ -593,6 +641,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { int start_cmd = c__invalid; par.words = NULL; par.keyword = NULL; + par.origkeyword = NULL; whptr = &par.words; /* @@ -691,7 +740,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * nested lists, code paras etc). Hence, the previous * paragraph must be of a list type. */ - sitem = mknew(struct crossparaitem); + sitem = snew(struct crossparaitem); stop = (struct crossparaitem *)stk_top(crossparastk); if (stop) *sitem = *stop; @@ -721,7 +770,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * block-quoted (typically they will be indented a * bit). */ - sitem = mknew(struct crossparaitem); + sitem = snew(struct crossparaitem); stop = (struct crossparaitem *)stk_top(crossparastk); if (stop) *sitem = *stop; @@ -840,6 +889,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (needkw > 0) { rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; int nkeys = 0; filepos fp; @@ -855,22 +905,32 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { t.type == tok_word || t.type == tok_white || (t.type == tok_cmd && t.cmd == c__nbsp) || - (t.type == tok_cmd && t.cmd == c__escaped)) { + (t.type == tok_cmd && t.cmd == c__escaped) || + (t.type == tok_cmd && t.cmd == c_u)) { if (t.type == tok_white || - (t.type == tok_cmd && t.cmd == c__nbsp)) + (t.type == tok_cmd && t.cmd == c__nbsp)) { rdadd(&rs, ' '); - else + rdaddc(&rsc, ' '); + } else if (t.type == tok_cmd && t.cmd == c_u) { + rdadd(&rs, t.aux); + rdaddc(&rsc, '\\'); + rdaddsc(&rsc, t.origtext); + } else { rdadds(&rs, t.text); + rdaddsc(&rsc, t.origtext); + } } if (t.type != tok_rbrace) { error(err_kwunclosed, &t.pos); continue; } rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ dtor(t), t = get_token(in); /* eat right brace */ } - rdadd(&rs, 0); /* add string terminator */ + rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ /* See whether we have the right number of keywords. */ if ((needkw & 48) && nkeys > 0) @@ -901,6 +961,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } par.keyword = rdtrim(&rs); + par.origkeyword = rdtrimc(&rsc); /* Move to EOP in case of needkw==8 or 16 (no body) */ if (needkw & 24) { @@ -1028,7 +1089,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case tok_lbrace: error(err_unexbrace, &t.pos); /* Error recovery: push nop */ - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->type = stack_nop; sitem->fpos = t.pos; stk_push(parsestk, sitem); @@ -1141,21 +1202,39 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (t.type != tok_lbrace) { error(err_explbr, &t.pos); } else { - wd.text = NULL; - wd.type = toquotestyle(style); - wd.alt = NULL; - wd.aux = quote_Open; - wd.fpos = t.pos; - wd.breaks = FALSE; - if (!indexing || index_visible) - addword(wd, &whptr); - if (indexing) { - rdadd(&indexstr, L'"'); - addword(wd, &idximplicit); + /* + * Enforce that \q may not be used anywhere + * within \c. (It shouldn't be necessary + * since the whole point of \c should be + * that the user wants to exercise exact + * control over the glyphs used, and + * forbidding it has the useful effect of + * relieving some backends of having to + * make difficult decisions.) + */ + int stype; + + if (style != word_Code && style != word_WeakCode) { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Open; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + stype = stack_quote; + } else { + error(err_codequote, &t.pos); + stype = stack_nop; } - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; - sitem->type = stack_quote; + sitem->type = stype; stk_push(parsestk, sitem); } break; @@ -1230,7 +1309,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * delimiting the text marked by the link. */ dtor(t), t = get_token(in); - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = wd.fpos; sitem->type = stack_hyper; /* @@ -1296,7 +1375,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { error(err_nestedstyles, &t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; sitem->type = stack_nop; stk_push(parsestk, sitem); @@ -1309,7 +1388,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { type == c_cw ? word_WeakCode : word_Emph); spcstyle = tospacestyle(style); - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; sitem->type = stack_style; stk_push(parsestk, sitem); @@ -1323,12 +1402,12 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { error(err_nestedindex, &t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; sitem->type = stack_nop; stk_push(parsestk, sitem); } - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; sitem->type = stack_idx; dtor(t), t = get_token(in); @@ -1398,7 +1477,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * sidetrack from the main thread of the * paragraph. */ - sitem = mknew(struct stack_item); + sitem = snew(struct stack_item); sitem->fpos = t.pos; sitem->type = stack_ualt; sitem->whptr = whptr; @@ -1464,6 +1543,8 @@ paragraph *read_input(input *in, indexdata *idx) { setpos(in, in->filenames[in->currindex]); in->charset = in->defcharset; in->csstate = charset_init_state; + in->wcpos = in->nwc = 0; + in->pushback_chars = NULL; read_file(&hptr, in, idx); } in->currindex++;