X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/b774705014b73abf1052a937deef534d34ca259d..refs/heads/master:/input.c diff --git a/input.c b/input.c index 365fec0..1ec733c 100644 --- a/input.c +++ b/input.c @@ -49,7 +49,7 @@ static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text, m->name = name; m->text = text; if (add234(macros, m) != m) { - error(err_macroexists, &fpos, name); + err_macroexists(&fpos, name); sfree(name); sfree(text); } @@ -103,6 +103,8 @@ static int get(input *in, filepos *pos, rdstringc *rsc) { } else if (in->stack) { wchar_t c = in->stack->text[in->stack->ptr]; + if (pos) + *pos = in->stack->pos; if (in->stack->text[++in->stack->ptr] == L'\0') { macrostack *tmp = in->stack; in->stack = tmp->next; @@ -117,7 +119,8 @@ static int get(input *in, filepos *pos, rdstringc *rsc) { int c = getc(in->currfp); if (c == EOF) { - fclose(in->currfp); + if (in->wantclose) + fclose(in->currfp); in->currfp = NULL; return EOF; } @@ -424,6 +427,7 @@ token get_token(input *in) { c == '#' || c == '{' || c == '}' || c == '.') { /* single-char command */ rdadd(&rs, c); + prevpos = rsc.pos; } else if (c == 'u') { int len = 0; do { @@ -589,11 +593,11 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { /* * Reads a single file (ie until get() returns EOF) */ -static void read_file(paragraph ***ret, input *in, indexdata *idx) { +static void read_file(paragraph ***ret, input *in, indexdata *idx, + tree234 *macros) { token t; paragraph par; word wd, **whptr, **idximplicit; - tree234 *macros; wchar_t utext[2], *wdtext; int style, spcstyle; int already; @@ -607,7 +611,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { stack_style = 2, /* \e, \c, \cw */ stack_idx = 4, /* \I, \i, \ii */ stack_hyper = 8, /* \W */ - stack_quote = 16, /* \q */ + stack_quote = 16 /* \q */ } type; word **whptr; /* to restore from \u alternatives */ word **idximplicit; /* to restore from \u alternatives */ @@ -629,7 +633,6 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { t.text = NULL; t.origtext = NULL; - macros = newtree234(macrocmp); already = FALSE; crossparastk = stk_new(); @@ -690,7 +693,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wtype == word_WeakCode) { wtype = word_Emph; } else { - error(err_brokencodepara, &t.pos); + err_brokencodepara(&t.pos); prev_para_type = par.type; addpara(par, ret); while (t.type != tok_eop) /* error recovery: */ @@ -719,7 +722,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { */ dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); continue; } @@ -762,7 +765,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * don't give a cascade error. */ sitem->type = -1; - error(err_misplacedlcont, &t.pos); + err_misplacedlcont(&t.pos); } } else { /* @@ -787,7 +790,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } else if (t.type == tok_rbrace) { struct crossparaitem *sitem = stk_pop(crossparastk); if (!sitem) - error(err_unexbrace, &t.pos); + err_unexbrace(&t.pos); else { switch (sitem->type) { case c_lcont: @@ -806,6 +809,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { continue; } + while (t.type == tok_cmd && + macrolookup(macros, in, t.text, &t.pos)) { + dtor(t), t = get_token(in); + } + /* * This token begins a paragraph. See if it's one of the * special commands that define a paragraph type. @@ -824,12 +832,14 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { needkw = -1; break; case c__invalid: - error(err_badparatype, t.text, &t.pos); + err_badparatype(t.text, &t.pos); needkw = 4; break; case c__comment: - if (isbrace(in)) + if (isbrace(in)) { + needkw = -1; break; /* `\#{': isn't a comment para */ + } do { dtor(t), t = get_token(in); } while (t.type != tok_eop && t.type != tok_eof); @@ -881,8 +891,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { par.type == para_UnnumberedChapter) { struct crossparaitem *sitem = stk_top(crossparastk); if (sitem && (sitem->seen_lcont || sitem->seen_quote)) { - error(err_sectmarkerinblock, - &t.pos, + err_sectmarkerinblock( &t.pos, (sitem->seen_lcont ? "lcont" : "quote")); } } @@ -896,7 +905,21 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* Get keywords. */ dtor(t), t = get_token(in); fp = t.pos; - while (t.type == tok_lbrace) { + while (t.type == tok_lbrace || + (t.type == tok_white && (needkw & 24))) { + /* + * In paragraph types which can't accept any + * body text (such as \cfg), we are lenient + * about whitespace between keywords. This is + * important for \cfg in particular since it + * can often have many keywords which are long + * pieces of text, so it's useful to permit the + * user to wrap the line between them. + */ + if (t.type == tok_white) { + dtor(t), t = get_token(in); /* eat the space */ + continue; + } /* This is a keyword. */ nkeys++; /* FIXME: there will be bugs if anyone specifies an @@ -921,7 +944,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } } if (t.type != tok_rbrace) { - error(err_kwunclosed, &t.pos); + err_kwunclosed(&t.pos); continue; } rdadd(&rs, 0); /* add string terminator */ @@ -934,11 +957,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { /* See whether we have the right number of keywords. */ if ((needkw & 48) && nkeys > 0) - error(err_kwillegal, &fp); + err_kwillegal(&fp); if ((needkw & 11) && nkeys == 0) - error(err_kwexpected, &fp); + err_kwexpected(&fp); if ((needkw & 5) && nkeys > 1) - error(err_kwtoomany, &fp); + err_kwtoomany(&fp); if (is_macro) { /* @@ -954,7 +977,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { rdadd(¯otext, L'\n'); rdadds(¯otext, t.text); dtor(t), t = get_token(in); - if (t.type == tok_eop) break; + if (t.type == tok_eop || t.type == tok_eof) + break; } macrodef(macros, rs.text, macrotext.text, fp); continue; /* next paragraph */ @@ -971,7 +995,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (t.type != tok_eop && t.type != tok_eof && (start_cmd == c__invalid || t.type != tok_cmd || t.cmd != start_cmd)) { - error(err_bodyillegal, &t.pos); + err_bodyillegal(&t.pos); /* Error recovery: eat the rest of the paragraph */ while (t.type != tok_eop && t.type != tok_eof && (start_cmd == c__invalid || @@ -1088,7 +1112,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } break; case tok_lbrace: - error(err_unexbrace, &t.pos); + err_unexbrace(&t.pos); /* Error recovery: push nop */ sitem = snew(struct stack_item); sitem->type = stack_nop; @@ -1174,7 +1198,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { */ dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { int braces = 1; while (braces > 0) { @@ -1184,7 +1208,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { else if (t.type == tok_rbrace) braces--; else if (t.type == tok_eof) { - error(err_commenteof, &t.pos); + err_commenteof(&t.pos); break; } } @@ -1203,7 +1227,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { type = t.cmd; dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { /* * Enforce that \q may not be used anywhere @@ -1232,7 +1256,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } stype = stack_quote; } else { - error(err_codequote, &t.pos); + err_codequote(&t.pos); stype = stack_nop; } sitem = snew(struct stack_item); @@ -1240,7 +1264,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { sitem->type = stype; if (type == c_cq) { if (style != word_Normal) { - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); } else { style = word_WeakCode; spcstyle = tospacestyle(style); @@ -1278,7 +1302,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wdtext = ustrftime(NULL, broken); wd.type = style; } else { - error(err_explbr, &t.pos); + err_explbr(&t.pos); wdtext = NULL; } } else { @@ -1300,7 +1324,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } sfree(rs.text); if (t.type != tok_rbrace) { - error(err_kwexprbr, &t.pos); + err_kwexprbr(&t.pos); } } wd.alt = NULL; @@ -1330,7 +1354,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (t.type == tok_cmd && (t.cmd == c_i || t.cmd == c_ii)) { if (indexing) { - error(err_nestedindex, &t.pos); + err_nestedindex(&t.pos); } else { /* Add an index-reference word with no * text as yet */ @@ -1361,7 +1385,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (t.type == tok_cmd && (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { if (style != word_Normal) - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); else { style = (t.cmd == c_c ? word_Code : t.cmd == c_cw ? word_WeakCode : @@ -1372,7 +1396,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { dtor(t), t = get_token(in); } if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); sfree(sitem); } else { stk_push(parsestk, sitem); @@ -1384,7 +1408,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case c_e: type = t.cmd; if (style != word_Normal) { - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = snew(struct stack_item); @@ -1394,7 +1418,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { style = (type == c_c ? word_Code : type == c_cw ? word_WeakCode : @@ -1411,7 +1435,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { case c_I: type = t.cmd; if (indexing) { - error(err_nestedindex, &t.pos); + err_nestedindex(&t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = snew(struct stack_item); @@ -1430,7 +1454,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (t.type == tok_cmd && (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { if (style != word_Normal) - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); else { style = (t.cmd == c_c ? word_Code : t.cmd == c_cw ? word_WeakCode : @@ -1442,7 +1466,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } if (t.type != tok_lbrace) { sfree(sitem); - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { /* Add an index-reference word with no text as yet */ wd.type = word_IndexRef; @@ -1505,7 +1529,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { break; default: if (!macrolookup(macros, in, t.text, &t.pos)) - error(err_badmidcmd, t.text, &t.pos); + err_badmidcmd(t.text, &t.pos); break; } } @@ -1518,11 +1542,21 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (stk_top(parsestk)) { while ((sitem = stk_pop(parsestk))) sfree(sitem); - error(err_missingrbrace, &t.pos); + err_missingrbrace(&t.pos); } stk_free(parsestk); prev_para_type = par.type; - addpara(par, ret); + /* + * Before we add the paragraph to the output list, we + * should check that there was any text in it at all; there + * might not be if (for example) the paragraph contained + * nothing but an unrecognised command sequence, and if we + * put an empty paragraph on the list it may confuse the + * back ends later on. + */ + if (par.words) { + addpara(par, ret); + } if (t.type == tok_eof) already = TRUE; } @@ -1530,7 +1564,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (stk_top(crossparastk)) { void *p; - error(err_missingrbrace2, &t.pos); + err_missingrbrace2(&t.pos); while ((p = stk_pop(crossparastk))) sfree(p); } @@ -1540,27 +1574,89 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { * this cleanup doesn't happen. */ dtor(t); - macrocleanup(macros); stk_free(crossparastk); } +struct { + char const *magic; + size_t nmagic; + int binary; + void (*reader)(input *); +} magics[] = { + { "%!FontType1-", 12, FALSE, &read_pfa_file }, + { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file }, + { "\x80\x01", 2, TRUE, &read_pfb_file }, + { "StartFontMetrics", 16, FALSE, &read_afm_file }, + { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file }, + { "true", 4, TRUE, &read_sfnt_file }, +}; + paragraph *read_input(input *in, indexdata *idx) { paragraph *head = NULL; paragraph **hptr = &head; + tree234 *macros; + char mag[16]; + size_t len, i; + int binary; + void (*reader)(input *); + + macros = newtree234(macrocmp); while (in->currindex < in->nfiles) { - in->currfp = fopen(in->filenames[in->currindex], "r"); + setpos(in, in->filenames[in->currindex]); + in->charset = in->defcharset; + in->csstate = charset_init_state; + in->wcpos = in->nwc = 0; + in->pushback_chars = NULL; + + if (!in->filenames[in->currindex]) { + in->currfp = stdin; + in->wantclose = FALSE; /* don't fclose stdin */ + /* + * When reading standard input, we always expect to see + * an actual Halibut file and not any of the unusual + * input types like fonts. + */ + reader = NULL; + } else { + /* + * Open the file in binary mode to look for magic + * numbers. We'll switch to text mode if we find we're + * looking at a text file type. + */ + in->currfp = fopen(in->filenames[in->currindex], "rb"); + binary = FALSE; /* default to Halibut source, which is text */ + if (in->currfp) { + in->wantclose = TRUE; + reader = NULL; + len = fread(mag, 1, sizeof(mag), in->currfp); + for (i = 0; i < lenof(magics); i++) { + if (len >= magics[i].nmagic && + memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) { + reader = magics[i].reader; + binary = magics[i].binary; + break; + } + } + rewind(in->currfp); + } + if (!binary) { + fclose(in->currfp); + in->currfp = fopen(in->filenames[in->currindex], "r"); + } + } if (in->currfp) { - setpos(in, in->filenames[in->currindex]); - in->charset = in->defcharset; - in->csstate = charset_init_state; - in->wcpos = in->nwc = 0; - in->pushback_chars = NULL; - read_file(&hptr, in, idx); + if (reader == NULL) { + read_file(&hptr, in, idx, macros); + } else { + (*reader)(in); + } } in->currindex++; } + macrocleanup(macros); + return head; }