c == '#' || c == '{' || c == '}' || c == '.') {
/* single-char command */
rdadd(&rs, c);
+ prevpos = rsc.pos;
} else if (c == 'u') {
int len = 0;
do {
/*
* Reads a single file (ie until get() returns EOF)
*/
-static void read_file(paragraph ***ret, input *in, indexdata *idx) {
+static void read_file(paragraph ***ret, input *in, indexdata *idx,
+ tree234 *macros) {
token t;
paragraph par;
word wd, **whptr, **idximplicit;
- tree234 *macros;
wchar_t utext[2], *wdtext;
int style, spcstyle;
int already;
stack_style = 2, /* \e, \c, \cw */
stack_idx = 4, /* \I, \i, \ii */
stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
+ stack_quote = 16 /* \q */
} type;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
t.text = NULL;
t.origtext = NULL;
- macros = newtree234(macrocmp);
already = FALSE;
crossparastk = stk_new();
continue;
}
+ while (t.type == tok_cmd &&
+ macrolookup(macros, in, t.text, &t.pos)) {
+ dtor(t), t = get_token(in);
+ }
+
/*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
needkw = 4;
break;
case c__comment:
- if (isbrace(in))
+ if (isbrace(in)) {
+ needkw = -1;
break; /* `\#{': isn't a comment para */
+ }
do {
dtor(t), t = get_token(in);
} while (t.type != tok_eop && t.type != tok_eof);
/* Get keywords. */
dtor(t), t = get_token(in);
fp = t.pos;
- while (t.type == tok_lbrace) {
+ while (t.type == tok_lbrace ||
+ (t.type == tok_white && (needkw & 24))) {
+ /*
+ * In paragraph types which can't accept any
+ * body text (such as \cfg), we are lenient
+ * about whitespace between keywords. This is
+ * important for \cfg in particular since it
+ * can often have many keywords which are long
+ * pieces of text, so it's useful to permit the
+ * user to wrap the line between them.
+ */
+ if (t.type == tok_white) {
+ dtor(t), t = get_token(in); /* eat the space */
+ continue;
+ }
/* This is a keyword. */
nkeys++;
/* FIXME: there will be bugs if anyone specifies an
}
stk_free(parsestk);
prev_para_type = par.type;
- addpara(par, ret);
+ /*
+ * Before we add the paragraph to the output list, we
+ * should check that there was any text in it at all; there
+ * might not be if (for example) the paragraph contained
+ * nothing but an unrecognised command sequence, and if we
+ * put an empty paragraph on the list it may confuse the
+ * back ends later on.
+ */
+ if (par.words) {
+ addpara(par, ret);
+ }
if (t.type == tok_eof)
already = TRUE;
}
* this cleanup doesn't happen.
*/
dtor(t);
- macrocleanup(macros);
stk_free(crossparastk);
}
+struct {
+ char const *magic;
+ size_t nmagic;
+ void (*reader)(input *);
+} magics[] = {
+ { "%!FontType1-", 12, &read_pfa_file },
+ { "%!PS-AdobeFont-", 15, &read_pfa_file },
+ { "\x80\x01", 2, &read_pfb_file },
+ { "StartFontMetrics", 16, &read_afm_file },
+ { "\x00\x01\x00\x00", 4, &read_sfnt_file },
+ { "true", 4, &read_sfnt_file },
+};
+
paragraph *read_input(input *in, indexdata *idx) {
paragraph *head = NULL;
paragraph **hptr = &head;
+ tree234 *macros;
+ char mag[16];
+ size_t len, i;
+ void (*reader)(input *);
+
+ macros = newtree234(macrocmp);
while (in->currindex < in->nfiles) {
in->currfp = fopen(in->filenames[in->currindex], "r");
in->csstate = charset_init_state;
in->wcpos = in->nwc = 0;
in->pushback_chars = NULL;
- read_file(&hptr, in, idx);
+ reader = NULL;
+ len = fread(mag, 1, sizeof(mag), in->currfp);
+ for (i = 0; i < lenof(magics); i++) {
+ if (len >= magics[i].nmagic &&
+ memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) {
+ reader = magics[i].reader;
+ break;
+ }
+ }
+ rewind(in->currfp);
+ if (reader == NULL)
+ read_file(&hptr, in, idx, macros);
+ else
+ (*reader)(in);
}
in->currindex++;
}
+ macrocleanup(macros);
+
return head;
}