m->name = name;
m->text = text;
if (add234(macros, m) != m) {
- error(err_macroexists, &fpos, name);
+ err_macroexists(&fpos, name);
sfree(name);
sfree(text);
}
int c = getc(in->currfp);
if (c == EOF) {
- fclose(in->currfp);
+ if (in->wantclose)
+ fclose(in->currfp);
in->currfp = NULL;
return EOF;
}
c == '#' || c == '{' || c == '}' || c == '.') {
/* single-char command */
rdadd(&rs, c);
+ prevpos = rsc.pos;
} else if (c == 'u') {
int len = 0;
do {
stack_style = 2, /* \e, \c, \cw */
stack_idx = 4, /* \I, \i, \ii */
stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
+ stack_quote = 16 /* \q */
} type;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
wtype == word_WeakCode) {
wtype = word_Emph;
} else {
- error(err_brokencodepara, &t.pos);
+ err_brokencodepara(&t.pos);
prev_para_type = par.type;
addpara(par, ret);
while (t.type != tok_eop) /* error recovery: */
*/
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
continue;
}
* don't give a cascade error.
*/
sitem->type = -1;
- error(err_misplacedlcont, &t.pos);
+ err_misplacedlcont(&t.pos);
}
} else {
/*
} else if (t.type == tok_rbrace) {
struct crossparaitem *sitem = stk_pop(crossparastk);
if (!sitem)
- error(err_unexbrace, &t.pos);
+ err_unexbrace(&t.pos);
else {
switch (sitem->type) {
case c_lcont:
needkw = -1;
break;
case c__invalid:
- error(err_badparatype, t.text, &t.pos);
+ err_badparatype(t.text, &t.pos);
needkw = 4;
break;
case c__comment:
- if (isbrace(in))
+ if (isbrace(in)) {
+ needkw = -1;
break; /* `\#{': isn't a comment para */
+ }
do {
dtor(t), t = get_token(in);
} while (t.type != tok_eop && t.type != tok_eof);
par.type == para_UnnumberedChapter) {
struct crossparaitem *sitem = stk_top(crossparastk);
if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
- error(err_sectmarkerinblock,
- &t.pos,
+ err_sectmarkerinblock( &t.pos,
(sitem->seen_lcont ? "lcont" : "quote"));
}
}
/* Get keywords. */
dtor(t), t = get_token(in);
fp = t.pos;
- while (t.type == tok_lbrace) {
+ while (t.type == tok_lbrace ||
+ (t.type == tok_white && (needkw & 24))) {
+ /*
+ * In paragraph types which can't accept any
+ * body text (such as \cfg), we are lenient
+ * about whitespace between keywords. This is
+ * important for \cfg in particular since it
+ * can often have many keywords which are long
+ * pieces of text, so it's useful to permit the
+ * user to wrap the line between them.
+ */
+ if (t.type == tok_white) {
+ dtor(t), t = get_token(in); /* eat the space */
+ continue;
+ }
/* This is a keyword. */
nkeys++;
/* FIXME: there will be bugs if anyone specifies an
}
}
if (t.type != tok_rbrace) {
- error(err_kwunclosed, &t.pos);
+ err_kwunclosed(&t.pos);
continue;
}
rdadd(&rs, 0); /* add string terminator */
/* See whether we have the right number of keywords. */
if ((needkw & 48) && nkeys > 0)
- error(err_kwillegal, &fp);
+ err_kwillegal(&fp);
if ((needkw & 11) && nkeys == 0)
- error(err_kwexpected, &fp);
+ err_kwexpected(&fp);
if ((needkw & 5) && nkeys > 1)
- error(err_kwtoomany, &fp);
+ err_kwtoomany(&fp);
if (is_macro) {
/*
rdadd(¯otext, L'\n');
rdadds(¯otext, t.text);
dtor(t), t = get_token(in);
- if (t.type == tok_eop) break;
+ if (t.type == tok_eop || t.type == tok_eof)
+ break;
}
macrodef(macros, rs.text, macrotext.text, fp);
continue; /* next paragraph */
if (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
t.type != tok_cmd || t.cmd != start_cmd)) {
- error(err_bodyillegal, &t.pos);
+ err_bodyillegal(&t.pos);
/* Error recovery: eat the rest of the paragraph */
while (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
}
break;
case tok_lbrace:
- error(err_unexbrace, &t.pos);
+ err_unexbrace(&t.pos);
/* Error recovery: push nop */
sitem = snew(struct stack_item);
sitem->type = stack_nop;
*/
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
int braces = 1;
while (braces > 0) {
else if (t.type == tok_rbrace)
braces--;
else if (t.type == tok_eof) {
- error(err_commenteof, &t.pos);
+ err_commenteof(&t.pos);
break;
}
}
type = t.cmd;
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
/*
* Enforce that \q may not be used anywhere
}
stype = stack_quote;
} else {
- error(err_codequote, &t.pos);
+ err_codequote(&t.pos);
stype = stack_nop;
}
sitem = snew(struct stack_item);
sitem->type = stype;
if (type == c_cq) {
if (style != word_Normal) {
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
} else {
style = word_WeakCode;
spcstyle = tospacestyle(style);
wdtext = ustrftime(NULL, broken);
wd.type = style;
} else {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
wdtext = NULL;
}
} else {
}
sfree(rs.text);
if (t.type != tok_rbrace) {
- error(err_kwexprbr, &t.pos);
+ err_kwexprbr(&t.pos);
}
}
wd.alt = NULL;
if (t.type == tok_cmd &&
(t.cmd == c_i || t.cmd == c_ii)) {
if (indexing) {
- error(err_nestedindex, &t.pos);
+ err_nestedindex(&t.pos);
} else {
/* Add an index-reference word with no
* text as yet */
if (t.type == tok_cmd &&
(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
else {
style = (t.cmd == c_c ? word_Code :
t.cmd == c_cw ? word_WeakCode :
dtor(t), t = get_token(in);
}
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
sfree(sitem);
} else {
stk_push(parsestk, sitem);
case c_e:
type = t.cmd;
if (style != word_Normal) {
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = snew(struct stack_item);
}
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
style = (type == c_c ? word_Code :
type == c_cw ? word_WeakCode :
case c_I:
type = t.cmd;
if (indexing) {
- error(err_nestedindex, &t.pos);
+ err_nestedindex(&t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
sitem = snew(struct stack_item);
if (t.type == tok_cmd &&
(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
else {
style = (t.cmd == c_c ? word_Code :
t.cmd == c_cw ? word_WeakCode :
}
if (t.type != tok_lbrace) {
sfree(sitem);
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
/* Add an index-reference word with no text as yet */
wd.type = word_IndexRef;
break;
default:
if (!macrolookup(macros, in, t.text, &t.pos))
- error(err_badmidcmd, t.text, &t.pos);
+ err_badmidcmd(t.text, &t.pos);
break;
}
}
if (stk_top(parsestk)) {
while ((sitem = stk_pop(parsestk)))
sfree(sitem);
- error(err_missingrbrace, &t.pos);
+ err_missingrbrace(&t.pos);
}
stk_free(parsestk);
prev_para_type = par.type;
- addpara(par, ret);
+ /*
+ * Before we add the paragraph to the output list, we
+ * should check that there was any text in it at all; there
+ * might not be if (for example) the paragraph contained
+ * nothing but an unrecognised command sequence, and if we
+ * put an empty paragraph on the list it may confuse the
+ * back ends later on.
+ */
+ if (par.words) {
+ addpara(par, ret);
+ }
if (t.type == tok_eof)
already = TRUE;
}
if (stk_top(crossparastk)) {
void *p;
- error(err_missingrbrace2, &t.pos);
+ err_missingrbrace2(&t.pos);
while ((p = stk_pop(crossparastk)))
sfree(p);
}
stk_free(crossparastk);
}
+struct {
+ char const *magic;
+ size_t nmagic;
+ int binary;
+ void (*reader)(input *);
+} magics[] = {
+ { "%!FontType1-", 12, FALSE, &read_pfa_file },
+ { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file },
+ { "\x80\x01", 2, TRUE, &read_pfb_file },
+ { "StartFontMetrics", 16, FALSE, &read_afm_file },
+ { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file },
+ { "true", 4, TRUE, &read_sfnt_file },
+};
+
paragraph *read_input(input *in, indexdata *idx) {
paragraph *head = NULL;
paragraph **hptr = &head;
tree234 *macros;
+ char mag[16];
+ size_t len, i;
+ int binary;
+ void (*reader)(input *);
macros = newtree234(macrocmp);
while (in->currindex < in->nfiles) {
- in->currfp = fopen(in->filenames[in->currindex], "r");
+ setpos(in, in->filenames[in->currindex]);
+ in->charset = in->defcharset;
+ in->csstate = charset_init_state;
+ in->wcpos = in->nwc = 0;
+ in->pushback_chars = NULL;
+
+ if (!in->filenames[in->currindex]) {
+ in->currfp = stdin;
+ in->wantclose = FALSE; /* don't fclose stdin */
+ /*
+ * When reading standard input, we always expect to see
+ * an actual Halibut file and not any of the unusual
+ * input types like fonts.
+ */
+ reader = NULL;
+ } else {
+ /*
+ * Open the file in binary mode to look for magic
+ * numbers. We'll switch to text mode if we find we're
+ * looking at a text file type.
+ */
+ in->currfp = fopen(in->filenames[in->currindex], "rb");
+ binary = FALSE; /* default to Halibut source, which is text */
+ if (in->currfp) {
+ in->wantclose = TRUE;
+ reader = NULL;
+ len = fread(mag, 1, sizeof(mag), in->currfp);
+ for (i = 0; i < lenof(magics); i++) {
+ if (len >= magics[i].nmagic &&
+ memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) {
+ reader = magics[i].reader;
+ binary = magics[i].binary;
+ break;
+ }
+ }
+ rewind(in->currfp);
+ }
+ if (!binary) {
+ fclose(in->currfp);
+ in->currfp = fopen(in->filenames[in->currindex], "r");
+ }
+ }
if (in->currfp) {
- setpos(in, in->filenames[in->currindex]);
- in->charset = in->defcharset;
- in->csstate = charset_init_state;
- in->wcpos = in->nwc = 0;
- in->pushback_chars = NULL;
- read_file(&hptr, in, idx, macros);
+ if (reader == NULL) {
+ read_file(&hptr, in, idx, macros);
+ } else {
+ (*reader)(in);
+ }
}
in->currindex++;
}