X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/176f95481102941f215f5b36a17e8b43f6bd513e..refs/heads/master:/input.c diff --git a/input.c b/input.c index d8d44f9..1ec733c 100644 --- a/input.c +++ b/input.c @@ -49,7 +49,7 @@ static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text, m->name = name; m->text = text; if (add234(macros, m) != m) { - error(err_macroexists, &fpos, name); + err_macroexists(&fpos, name); sfree(name); sfree(text); } @@ -119,7 +119,8 @@ static int get(input *in, filepos *pos, rdstringc *rsc) { int c = getc(in->currfp); if (c == EOF) { - fclose(in->currfp); + if (in->wantclose) + fclose(in->currfp); in->currfp = NULL; return EOF; } @@ -610,7 +611,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, stack_style = 2, /* \e, \c, \cw */ stack_idx = 4, /* \I, \i, \ii */ stack_hyper = 8, /* \W */ - stack_quote = 16, /* \q */ + stack_quote = 16 /* \q */ } type; word **whptr; /* to restore from \u alternatives */ word **idximplicit; /* to restore from \u alternatives */ @@ -692,7 +693,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, wtype == word_WeakCode) { wtype = word_Emph; } else { - error(err_brokencodepara, &t.pos); + err_brokencodepara(&t.pos); prev_para_type = par.type; addpara(par, ret); while (t.type != tok_eop) /* error recovery: */ @@ -721,7 +722,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, */ dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); continue; } @@ -764,7 +765,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, * don't give a cascade error. */ sitem->type = -1; - error(err_misplacedlcont, &t.pos); + err_misplacedlcont(&t.pos); } } else { /* @@ -789,7 +790,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } else if (t.type == tok_rbrace) { struct crossparaitem *sitem = stk_pop(crossparastk); if (!sitem) - error(err_unexbrace, &t.pos); + err_unexbrace(&t.pos); else { switch (sitem->type) { case c_lcont: @@ -831,12 +832,14 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, needkw = -1; break; case c__invalid: - error(err_badparatype, t.text, &t.pos); + err_badparatype(t.text, &t.pos); needkw = 4; break; case c__comment: - if (isbrace(in)) + if (isbrace(in)) { + needkw = -1; break; /* `\#{': isn't a comment para */ + } do { dtor(t), t = get_token(in); } while (t.type != tok_eop && t.type != tok_eof); @@ -888,8 +891,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, par.type == para_UnnumberedChapter) { struct crossparaitem *sitem = stk_top(crossparastk); if (sitem && (sitem->seen_lcont || sitem->seen_quote)) { - error(err_sectmarkerinblock, - &t.pos, + err_sectmarkerinblock( &t.pos, (sitem->seen_lcont ? "lcont" : "quote")); } } @@ -903,7 +905,21 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, /* Get keywords. */ dtor(t), t = get_token(in); fp = t.pos; - while (t.type == tok_lbrace) { + while (t.type == tok_lbrace || + (t.type == tok_white && (needkw & 24))) { + /* + * In paragraph types which can't accept any + * body text (such as \cfg), we are lenient + * about whitespace between keywords. This is + * important for \cfg in particular since it + * can often have many keywords which are long + * pieces of text, so it's useful to permit the + * user to wrap the line between them. + */ + if (t.type == tok_white) { + dtor(t), t = get_token(in); /* eat the space */ + continue; + } /* This is a keyword. */ nkeys++; /* FIXME: there will be bugs if anyone specifies an @@ -928,7 +944,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } } if (t.type != tok_rbrace) { - error(err_kwunclosed, &t.pos); + err_kwunclosed(&t.pos); continue; } rdadd(&rs, 0); /* add string terminator */ @@ -941,11 +957,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, /* See whether we have the right number of keywords. */ if ((needkw & 48) && nkeys > 0) - error(err_kwillegal, &fp); + err_kwillegal(&fp); if ((needkw & 11) && nkeys == 0) - error(err_kwexpected, &fp); + err_kwexpected(&fp); if ((needkw & 5) && nkeys > 1) - error(err_kwtoomany, &fp); + err_kwtoomany(&fp); if (is_macro) { /* @@ -961,7 +977,8 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, rdadd(¯otext, L'\n'); rdadds(¯otext, t.text); dtor(t), t = get_token(in); - if (t.type == tok_eop) break; + if (t.type == tok_eop || t.type == tok_eof) + break; } macrodef(macros, rs.text, macrotext.text, fp); continue; /* next paragraph */ @@ -978,7 +995,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (t.type != tok_eop && t.type != tok_eof && (start_cmd == c__invalid || t.type != tok_cmd || t.cmd != start_cmd)) { - error(err_bodyillegal, &t.pos); + err_bodyillegal(&t.pos); /* Error recovery: eat the rest of the paragraph */ while (t.type != tok_eop && t.type != tok_eof && (start_cmd == c__invalid || @@ -1095,7 +1112,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } break; case tok_lbrace: - error(err_unexbrace, &t.pos); + err_unexbrace(&t.pos); /* Error recovery: push nop */ sitem = snew(struct stack_item); sitem->type = stack_nop; @@ -1181,7 +1198,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, */ dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { int braces = 1; while (braces > 0) { @@ -1191,7 +1208,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, else if (t.type == tok_rbrace) braces--; else if (t.type == tok_eof) { - error(err_commenteof, &t.pos); + err_commenteof(&t.pos); break; } } @@ -1210,7 +1227,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, type = t.cmd; dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { /* * Enforce that \q may not be used anywhere @@ -1239,7 +1256,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } stype = stack_quote; } else { - error(err_codequote, &t.pos); + err_codequote(&t.pos); stype = stack_nop; } sitem = snew(struct stack_item); @@ -1247,7 +1264,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, sitem->type = stype; if (type == c_cq) { if (style != word_Normal) { - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); } else { style = word_WeakCode; spcstyle = tospacestyle(style); @@ -1285,7 +1302,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, wdtext = ustrftime(NULL, broken); wd.type = style; } else { - error(err_explbr, &t.pos); + err_explbr(&t.pos); wdtext = NULL; } } else { @@ -1307,7 +1324,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } sfree(rs.text); if (t.type != tok_rbrace) { - error(err_kwexprbr, &t.pos); + err_kwexprbr(&t.pos); } } wd.alt = NULL; @@ -1337,7 +1354,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (t.type == tok_cmd && (t.cmd == c_i || t.cmd == c_ii)) { if (indexing) { - error(err_nestedindex, &t.pos); + err_nestedindex(&t.pos); } else { /* Add an index-reference word with no * text as yet */ @@ -1368,7 +1385,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (t.type == tok_cmd && (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { if (style != word_Normal) - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); else { style = (t.cmd == c_c ? word_Code : t.cmd == c_cw ? word_WeakCode : @@ -1379,7 +1396,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, dtor(t), t = get_token(in); } if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); sfree(sitem); } else { stk_push(parsestk, sitem); @@ -1391,7 +1408,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, case c_e: type = t.cmd; if (style != word_Normal) { - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = snew(struct stack_item); @@ -1401,7 +1418,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { style = (type == c_c ? word_Code : type == c_cw ? word_WeakCode : @@ -1418,7 +1435,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, case c_I: type = t.cmd; if (indexing) { - error(err_nestedindex, &t.pos); + err_nestedindex(&t.pos); /* Error recovery: eat lbrace, push nop. */ dtor(t), t = get_token(in); sitem = snew(struct stack_item); @@ -1437,7 +1454,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (t.type == tok_cmd && (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { if (style != word_Normal) - error(err_nestedstyles, &t.pos); + err_nestedstyles(&t.pos); else { style = (t.cmd == c_c ? word_Code : t.cmd == c_cw ? word_WeakCode : @@ -1449,7 +1466,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, } if (t.type != tok_lbrace) { sfree(sitem); - error(err_explbr, &t.pos); + err_explbr(&t.pos); } else { /* Add an index-reference word with no text as yet */ wd.type = word_IndexRef; @@ -1512,7 +1529,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, break; default: if (!macrolookup(macros, in, t.text, &t.pos)) - error(err_badmidcmd, t.text, &t.pos); + err_badmidcmd(t.text, &t.pos); break; } } @@ -1525,7 +1542,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (stk_top(parsestk)) { while ((sitem = stk_pop(parsestk))) sfree(sitem); - error(err_missingrbrace, &t.pos); + err_missingrbrace(&t.pos); } stk_free(parsestk); prev_para_type = par.type; @@ -1539,8 +1556,6 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, */ if (par.words) { addpara(par, ret); - } else { - error(err_emptypara, &par.fpos); } if (t.type == tok_eof) already = TRUE; @@ -1549,7 +1564,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, if (stk_top(crossparastk)) { void *p; - error(err_missingrbrace2, &t.pos); + err_missingrbrace2(&t.pos); while ((p = stk_pop(crossparastk))) sfree(p); } @@ -1563,22 +1578,80 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx, stk_free(crossparastk); } +struct { + char const *magic; + size_t nmagic; + int binary; + void (*reader)(input *); +} magics[] = { + { "%!FontType1-", 12, FALSE, &read_pfa_file }, + { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file }, + { "\x80\x01", 2, TRUE, &read_pfb_file }, + { "StartFontMetrics", 16, FALSE, &read_afm_file }, + { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file }, + { "true", 4, TRUE, &read_sfnt_file }, +}; + paragraph *read_input(input *in, indexdata *idx) { paragraph *head = NULL; paragraph **hptr = &head; tree234 *macros; + char mag[16]; + size_t len, i; + int binary; + void (*reader)(input *); macros = newtree234(macrocmp); while (in->currindex < in->nfiles) { - in->currfp = fopen(in->filenames[in->currindex], "r"); + setpos(in, in->filenames[in->currindex]); + in->charset = in->defcharset; + in->csstate = charset_init_state; + in->wcpos = in->nwc = 0; + in->pushback_chars = NULL; + + if (!in->filenames[in->currindex]) { + in->currfp = stdin; + in->wantclose = FALSE; /* don't fclose stdin */ + /* + * When reading standard input, we always expect to see + * an actual Halibut file and not any of the unusual + * input types like fonts. + */ + reader = NULL; + } else { + /* + * Open the file in binary mode to look for magic + * numbers. We'll switch to text mode if we find we're + * looking at a text file type. + */ + in->currfp = fopen(in->filenames[in->currindex], "rb"); + binary = FALSE; /* default to Halibut source, which is text */ + if (in->currfp) { + in->wantclose = TRUE; + reader = NULL; + len = fread(mag, 1, sizeof(mag), in->currfp); + for (i = 0; i < lenof(magics); i++) { + if (len >= magics[i].nmagic && + memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) { + reader = magics[i].reader; + binary = magics[i].binary; + break; + } + } + rewind(in->currfp); + } + if (!binary) { + fclose(in->currfp); + in->currfp = fopen(in->filenames[in->currindex], "r"); + } + } if (in->currfp) { - setpos(in, in->filenames[in->currindex]); - in->charset = in->defcharset; - in->csstate = charset_init_state; - in->wcpos = in->nwc = 0; - in->pushback_chars = NULL; - read_file(&hptr, in, idx, macros); + if (reader == NULL) { + read_file(&hptr, in, idx, macros); + } else { + (*reader)(in); + } } in->currindex++; }