X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/8b7d993d70c05f4d68bad30cdc08d8af771d0707..e34ba5c3b8a7bcb8fceb437125da3a6a6f6d2dba:/input.c diff --git a/input.c b/input.c index 4c3b10d..d607e86 100644 --- a/input.c +++ b/input.c @@ -82,6 +82,16 @@ static void macrocleanup(tree234 *macros) { freetree234(macros); } +static void input_configure(input *in, paragraph *cfg) { + assert(cfg->type == para_Config); + + if (!ustricmp(cfg->keyword, L"input-charset")) { + char *csname = utoa_dup(uadv(cfg->keyword)); + in->charset = charset_from_localenc(csname); + sfree(csname); + } +} + /* * Can return EOF */ @@ -103,36 +113,63 @@ static int get(input *in, filepos *pos) { return c; } else if (in->currfp) { - int c = getc(in->currfp); - if (c == EOF) { - fclose(in->currfp); - in->currfp = NULL; - } - /* Track line numbers, for error reporting */ - if (pos) - *pos = in->pos; - if (in->reportcols) { - switch (c) { - case '\t': - in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP; - break; - case '\n': - in->pos.col = 1; - in->pos.line++; - break; - default: - in->pos.col++; - break; + while (in->wcpos >= in->nwc) { + + int c = getc(in->currfp); + + if (c == EOF) { + fclose(in->currfp); + in->currfp = NULL; + return EOF; + } + /* Track line numbers, for error reporting */ + if (pos) + *pos = in->pos; + if (in->reportcols) { + switch (c) { + case '\t': + in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP; + break; + case '\n': + in->pos.col = 1; + in->pos.line++; + break; + default: + in->pos.col++; + break; + } + } else { + in->pos.col = -1; + if (c == '\n') + in->pos.line++; + } + + /* + * Do input character set translation, so that we return + * Unicode. + */ + { + char buf[1]; + char const *p; + int inlen; + + buf[0] = (char)c; + p = buf; + inlen = 1; + + in->nwc = charset_to_unicode(&p, &inlen, + in->wc, lenof(in->wc), + in->charset, &in->csstate, + NULL, 0); + assert(p == buf+1 && inlen == 0); + + in->wcpos = 0; } - } else { - in->pos.col = -1; - if (c == '\n') - in->pos.line++; } - /* FIXME: do input charmap translation. We should be returning - * Unicode here. */ - return c; + + return in->wc[in->wcpos++]; + } else return EOF; } @@ -528,6 +565,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } type; word **whptr; /* to restore from \u alternatives */ word **idximplicit; /* to restore from \u alternatives */ + filepos fpos; } *sitem; stack parsestk; struct crossparaitem { @@ -592,14 +630,17 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { */ dtor(t), t = get_token(in); } - if (t.type == tok_eop || t.type == tok_eof) + if (t.type == tok_eop || t.type == tok_eof || + t.type == tok_rbrace) { /* might be } terminating \lcont */ + if (t.type == tok_rbrace) + already = TRUE; break; - else if (t.type == tok_cmd && t.cmd == c_c) + } else if (t.type == tok_cmd && t.cmd == c_c) { wtype = word_WeakCode; - else if (t.type == tok_cmd && t.cmd == c_e && - wtype == word_WeakCode) + } else if (t.type == tok_cmd && t.cmd == c_e && + wtype == word_WeakCode) { wtype = word_Emph; - else { + } else { error(err_brokencodepara, &t.pos); prev_para_type = par.type; addpara(par, ret); @@ -633,6 +674,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { continue; } + /* + * Also expect, and swallow, any whitespace after that + * (a newline before a code paragraph wouldn't be + * surprising). + */ + do { + dtor(t), t = get_token(in); + } while (t.type == tok_white); + already = TRUE; + if (cmd == c_lcont) { /* * \lcont causes a continuation of a list item into @@ -870,6 +921,10 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { already = TRUE;/* inhibit get_token at top of loop */ prev_para_type = par.type; addpara(par, ret); + + if (par.type == para_Config) { + input_configure(in, &par); + } continue; /* next paragraph */ } } @@ -975,6 +1030,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* Error recovery: push nop */ sitem = mknew(struct stack_item); sitem->type = stack_nop; + sitem->fpos = t.pos; stk_push(parsestk, sitem); break; case tok_rbrace: @@ -1011,7 +1067,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } indexing = FALSE; rdadd(&indexstr, L'\0'); - index_merge(idx, FALSE, indexstr.text, idxwordlist); + index_merge(idx, FALSE, indexstr.text, + idxwordlist, &sitem->fpos); sfree(indexstr.text); } if (sitem->type & stack_hyper) { @@ -1097,6 +1154,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { addword(wd, &idximplicit); } sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_quote; stk_push(parsestk, sitem); } @@ -1172,11 +1230,43 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * delimiting the text marked by the link. */ dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->fpos = wd.fpos; + sitem->type = stack_hyper; + /* + * Special cases: \W{}\i, \W{}\ii + */ + if (t.type == tok_cmd && + (t.cmd == c_i || t.cmd == c_ii)) { + if (indexing) { + error(err_nestedindex, &t.pos); + } else { + /* Add an index-reference word with no + * text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + wd.aux = 0; + wd.breaks = FALSE; + indexword = addword(wd, &whptr); + /* Set up a rdstring to read the + * index text */ + indexstr = nullrs; + /* Flags so that we do the Right + * Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; + + sitem->type |= stack_idx; + } + dtor(t), t = get_token(in); + } /* * Special cases: \W{}\c, \W{}\e, \W{}\cw */ - sitem = mknew(struct stack_item); - sitem->type = stack_hyper; if (t.type == tok_cmd && (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { if (style != word_Normal) @@ -1207,6 +1297,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_nop; stk_push(parsestk, sitem); } @@ -1219,6 +1310,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { word_Emph); spcstyle = tospacestyle(style); sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_style; stk_push(parsestk, sitem); } @@ -1232,10 +1324,12 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_nop; stk_push(parsestk, sitem); } sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_idx; dtor(t), t = get_token(in); /* @@ -1305,6 +1399,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * paragraph. */ sitem = mknew(struct stack_item); + sitem->fpos = t.pos; sitem->type = stack_ualt; sitem->whptr = whptr; sitem->idximplicit = idximplicit; @@ -1367,6 +1462,8 @@ paragraph *read_input(input *in, indexdata *idx) { in->currfp = fopen(in->filenames[in->currindex], "r"); if (in->currfp) { setpos(in, in->filenames[in->currindex]); + in->charset = in->defcharset; + in->csstate = charset_init_state; read_file(&hptr, in, idx); } in->currindex++;