static void unget(input *in, int c, filepos *pos) {
if (in->npushback >= in->pushbacksize) {
in->pushbacksize = in->npushback + 16;
- in->pushback = resize(in->pushback, in->pushbacksize);
+ in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
}
in->pushback[in->npushback].chr = c;
in->pushback[in->npushback].pos = *pos; /* structure copy */
}
static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
filepos fpos) {
- macro *m = mknew(macro);
+ macro *m = snew(macro);
m->name = name;
m->text = text;
if (add234(macros, m) != m) {
- error(err_macroexists, &fpos, name);
+ err_macroexists(&fpos, name);
sfree(name);
sfree(text);
}
m.name = name;
gotit = find234(macros, &m, NULL);
if (gotit) {
- macrostack *expansion = mknew(macrostack);
+ macrostack *expansion = snew(macrostack);
expansion->next = in->stack;
expansion->text = gotit->text;
expansion->pos = *pos; /* structure copy */
assert(cfg->type == para_Config);
if (!ustricmp(cfg->keyword, L"input-charset")) {
- char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
- in->charset = charset_from_localenc(csname);
- sfree(csname);
+ in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
}
}
}
else if (in->stack) {
wchar_t c = in->stack->text[in->stack->ptr];
+ if (pos)
+ *pos = in->stack->pos;
if (in->stack->text[++in->stack->ptr] == L'\0') {
macrostack *tmp = in->stack;
in->stack = tmp->next;
int c = getc(in->currfp);
if (c == EOF) {
- fclose(in->currfp);
+ if (in->wantclose)
+ fclose(in->currfp);
in->currfp = NULL;
return EOF;
}
c_c, /* code */
c_cfg, /* configuration directive */
c_copyright, /* copyright statement */
+ c_cq, /* quoted code (sugar for \q{\cw{x}}) */
c_cw, /* weak code */
c_date, /* document processing date */
c_dd, /* description list: description */
{"c", c_c}, /* code */
{"cfg", c_cfg}, /* configuration directive */
{"copyright", c_copyright}, /* copyright statement */
+ {"cq", c_cq}, /* quoted code (sugar for \q{\cw{x}}) */
{"cw", c_cw}, /* weak code */
{"date", c_date}, /* document processing date */
{"dd", c_dd}, /* description list: description */
c == '#' || c == '{' || c == '}' || c == '.') {
/* single-char command */
rdadd(&rs, c);
+ prevpos = rsc.pos;
} else if (c == 'u') {
int len = 0;
do {
word *mnewword;
if (!hptrptr)
return NULL;
- mnewword = mknew(word);
+ mnewword = snew(word);
*mnewword = newword; /* structure copy */
mnewword->next = NULL;
**hptrptr = mnewword;
* Adds a new paragraph to a linked list
*/
static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
- paragraph *mnewpara = mknew(paragraph);
+ paragraph *mnewpara = snew(paragraph);
*mnewpara = newpara; /* structure copy */
mnewpara->next = NULL;
**hptrptr = mnewpara;
/*
* Reads a single file (ie until get() returns EOF)
*/
-static void read_file(paragraph ***ret, input *in, indexdata *idx) {
+static void read_file(paragraph ***ret, input *in, indexdata *idx,
+ tree234 *macros) {
token t;
paragraph par;
word wd, **whptr, **idximplicit;
- tree234 *macros;
wchar_t utext[2], *wdtext;
int style, spcstyle;
int already;
stack_style = 2, /* \e, \c, \cw */
stack_idx = 4, /* \I, \i, \ii */
stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
+ stack_quote = 16 /* \q */
} type;
word **whptr; /* to restore from \u alternatives */
word **idximplicit; /* to restore from \u alternatives */
filepos fpos;
+ int in_code;
} *sitem;
stack parsestk;
struct crossparaitem {
t.text = NULL;
t.origtext = NULL;
- macros = newtree234(macrocmp);
already = FALSE;
crossparastk = stk_new();
wtype == word_WeakCode) {
wtype = word_Emph;
} else {
- error(err_brokencodepara, &t.pos);
+ err_brokencodepara(&t.pos);
prev_para_type = par.type;
addpara(par, ret);
while (t.type != tok_eop) /* error recovery: */
*/
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
continue;
}
* nested lists, code paras etc). Hence, the previous
* paragraph must be of a list type.
*/
- sitem = mknew(struct crossparaitem);
+ sitem = snew(struct crossparaitem);
stop = (struct crossparaitem *)stk_top(crossparastk);
if (stop)
*sitem = *stop;
* don't give a cascade error.
*/
sitem->type = -1;
- error(err_misplacedlcont, &t.pos);
+ err_misplacedlcont(&t.pos);
}
} else {
/*
* block-quoted (typically they will be indented a
* bit).
*/
- sitem = mknew(struct crossparaitem);
+ sitem = snew(struct crossparaitem);
stop = (struct crossparaitem *)stk_top(crossparastk);
if (stop)
*sitem = *stop;
} else if (t.type == tok_rbrace) {
struct crossparaitem *sitem = stk_pop(crossparastk);
if (!sitem)
- error(err_unexbrace, &t.pos);
+ err_unexbrace(&t.pos);
else {
switch (sitem->type) {
case c_lcont:
continue;
}
+ while (t.type == tok_cmd &&
+ macrolookup(macros, in, t.text, &t.pos)) {
+ dtor(t), t = get_token(in);
+ }
+
/*
* This token begins a paragraph. See if it's one of the
* special commands that define a paragraph type.
needkw = -1;
break;
case c__invalid:
- error(err_badparatype, t.text, &t.pos);
+ err_badparatype(t.text, &t.pos);
needkw = 4;
break;
case c__comment:
- if (isbrace(in))
+ if (isbrace(in)) {
+ needkw = -1;
break; /* `\#{': isn't a comment para */
+ }
do {
dtor(t), t = get_token(in);
} while (t.type != tok_eop && t.type != tok_eof);
par.type == para_UnnumberedChapter) {
struct crossparaitem *sitem = stk_top(crossparastk);
if (sitem && (sitem->seen_lcont || sitem->seen_quote)) {
- error(err_sectmarkerinblock,
- &t.pos,
+ err_sectmarkerinblock( &t.pos,
(sitem->seen_lcont ? "lcont" : "quote"));
}
}
/* Get keywords. */
dtor(t), t = get_token(in);
fp = t.pos;
- while (t.type == tok_lbrace) {
+ while (t.type == tok_lbrace ||
+ (t.type == tok_white && (needkw & 24))) {
+ /*
+ * In paragraph types which can't accept any
+ * body text (such as \cfg), we are lenient
+ * about whitespace between keywords. This is
+ * important for \cfg in particular since it
+ * can often have many keywords which are long
+ * pieces of text, so it's useful to permit the
+ * user to wrap the line between them.
+ */
+ if (t.type == tok_white) {
+ dtor(t), t = get_token(in); /* eat the space */
+ continue;
+ }
/* This is a keyword. */
nkeys++;
/* FIXME: there will be bugs if anyone specifies an
}
}
if (t.type != tok_rbrace) {
- error(err_kwunclosed, &t.pos);
+ err_kwunclosed(&t.pos);
continue;
}
rdadd(&rs, 0); /* add string terminator */
/* See whether we have the right number of keywords. */
if ((needkw & 48) && nkeys > 0)
- error(err_kwillegal, &fp);
+ err_kwillegal(&fp);
if ((needkw & 11) && nkeys == 0)
- error(err_kwexpected, &fp);
+ err_kwexpected(&fp);
if ((needkw & 5) && nkeys > 1)
- error(err_kwtoomany, &fp);
+ err_kwtoomany(&fp);
if (is_macro) {
/*
rdadd(¯otext, L'\n');
rdadds(¯otext, t.text);
dtor(t), t = get_token(in);
- if (t.type == tok_eop) break;
+ if (t.type == tok_eop || t.type == tok_eof)
+ break;
}
macrodef(macros, rs.text, macrotext.text, fp);
continue; /* next paragraph */
if (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
t.type != tok_cmd || t.cmd != start_cmd)) {
- error(err_bodyillegal, &t.pos);
+ err_bodyillegal(&t.pos);
/* Error recovery: eat the rest of the paragraph */
while (t.type != tok_eop && t.type != tok_eof &&
(start_cmd == c__invalid ||
* Mid-paragraph commands:
*
* \K \k
- * \c \cw
+ * \c \cw \cq
* \e
* \i \ii
* \I
+ * \q
* \u
* \W
* \date
}
break;
case tok_lbrace:
- error(err_unexbrace, &t.pos);
+ err_unexbrace(&t.pos);
/* Error recovery: push nop */
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->type = stack_nop;
sitem->fpos = t.pos;
stk_push(parsestk, sitem);
*/
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
int braces = 1;
while (braces > 0) {
else if (t.type == tok_rbrace)
braces--;
else if (t.type == tok_eof) {
- error(err_commenteof, &t.pos);
+ err_commenteof(&t.pos);
break;
}
}
}
break;
case c_q:
+ case c_cq:
+ type = t.cmd;
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Open;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing) {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
+ /*
+ * Enforce that \q may not be used anywhere
+ * within \c. (It shouldn't be necessary
+ * since the whole point of \c should be
+ * that the user wants to exercise exact
+ * control over the glyphs used, and
+ * forbidding it has the useful effect of
+ * relieving some backends of having to
+ * make difficult decisions.)
+ */
+ int stype;
+
+ if (style != word_Code && style != word_WeakCode) {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Open;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing) {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ stype = stack_quote;
+ } else {
+ err_codequote(&t.pos);
+ stype = stack_nop;
}
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
- sitem->type = stack_quote;
+ sitem->type = stype;
+ if (type == c_cq) {
+ if (style != word_Normal) {
+ err_nestedstyles(&t.pos);
+ } else {
+ style = word_WeakCode;
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ }
stk_push(parsestk, sitem);
}
break;
wdtext = ustrftime(NULL, broken);
wd.type = style;
} else {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
wdtext = NULL;
}
} else {
}
sfree(rs.text);
if (t.type != tok_rbrace) {
- error(err_kwexprbr, &t.pos);
+ err_kwexprbr(&t.pos);
}
}
wd.alt = NULL;
* delimiting the text marked by the link.
*/
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = wd.fpos;
sitem->type = stack_hyper;
/*
if (t.type == tok_cmd &&
(t.cmd == c_i || t.cmd == c_ii)) {
if (indexing) {
- error(err_nestedindex, &t.pos);
+ err_nestedindex(&t.pos);
} else {
/* Add an index-reference word with no
* text as yet */
if (t.type == tok_cmd &&
(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
else {
style = (t.cmd == c_c ? word_Code :
t.cmd == c_cw ? word_WeakCode :
dtor(t), t = get_token(in);
}
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
sfree(sitem);
} else {
stk_push(parsestk, sitem);
case c_e:
type = t.cmd;
if (style != word_Normal) {
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
dtor(t), t = get_token(in);
if (t.type != tok_lbrace) {
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
style = (type == c_c ? word_Code :
type == c_cw ? word_WeakCode :
word_Emph);
spcstyle = tospacestyle(style);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_style;
stk_push(parsestk, sitem);
case c_I:
type = t.cmd;
if (indexing) {
- error(err_nestedindex, &t.pos);
+ err_nestedindex(&t.pos);
/* Error recovery: eat lbrace, push nop. */
dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_nop;
stk_push(parsestk, sitem);
}
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_idx;
dtor(t), t = get_token(in);
if (t.type == tok_cmd &&
(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
+ err_nestedstyles(&t.pos);
else {
style = (t.cmd == c_c ? word_Code :
t.cmd == c_cw ? word_WeakCode :
}
if (t.type != tok_lbrace) {
sfree(sitem);
- error(err_explbr, &t.pos);
+ err_explbr(&t.pos);
} else {
/* Add an index-reference word with no text as yet */
wd.type = word_IndexRef;
* sidetrack from the main thread of the
* paragraph.
*/
- sitem = mknew(struct stack_item);
+ sitem = snew(struct stack_item);
sitem->fpos = t.pos;
sitem->type = stack_ualt;
sitem->whptr = whptr;
break;
default:
if (!macrolookup(macros, in, t.text, &t.pos))
- error(err_badmidcmd, t.text, &t.pos);
+ err_badmidcmd(t.text, &t.pos);
break;
}
}
if (stk_top(parsestk)) {
while ((sitem = stk_pop(parsestk)))
sfree(sitem);
- error(err_missingrbrace, &t.pos);
+ err_missingrbrace(&t.pos);
}
stk_free(parsestk);
prev_para_type = par.type;
- addpara(par, ret);
+ /*
+ * Before we add the paragraph to the output list, we
+ * should check that there was any text in it at all; there
+ * might not be if (for example) the paragraph contained
+ * nothing but an unrecognised command sequence, and if we
+ * put an empty paragraph on the list it may confuse the
+ * back ends later on.
+ */
+ if (par.words) {
+ addpara(par, ret);
+ }
if (t.type == tok_eof)
already = TRUE;
}
if (stk_top(crossparastk)) {
void *p;
- error(err_missingrbrace2, &t.pos);
+ err_missingrbrace2(&t.pos);
while ((p = stk_pop(crossparastk)))
sfree(p);
}
* this cleanup doesn't happen.
*/
dtor(t);
- macrocleanup(macros);
stk_free(crossparastk);
}
+struct {
+ char const *magic;
+ size_t nmagic;
+ int binary;
+ void (*reader)(input *);
+} magics[] = {
+ { "%!FontType1-", 12, FALSE, &read_pfa_file },
+ { "%!PS-AdobeFont-", 15, FALSE, &read_pfa_file },
+ { "\x80\x01", 2, TRUE, &read_pfb_file },
+ { "StartFontMetrics", 16, FALSE, &read_afm_file },
+ { "\x00\x01\x00\x00", 4, TRUE, &read_sfnt_file },
+ { "true", 4, TRUE, &read_sfnt_file },
+};
+
paragraph *read_input(input *in, indexdata *idx) {
paragraph *head = NULL;
paragraph **hptr = &head;
+ tree234 *macros;
+ char mag[16];
+ size_t len, i;
+ int binary;
+ void (*reader)(input *);
+
+ macros = newtree234(macrocmp);
while (in->currindex < in->nfiles) {
- in->currfp = fopen(in->filenames[in->currindex], "r");
+ setpos(in, in->filenames[in->currindex]);
+ in->charset = in->defcharset;
+ in->csstate = charset_init_state;
+ in->wcpos = in->nwc = 0;
+ in->pushback_chars = NULL;
+
+ if (!in->filenames[in->currindex]) {
+ in->currfp = stdin;
+ in->wantclose = FALSE; /* don't fclose stdin */
+ /*
+ * When reading standard input, we always expect to see
+ * an actual Halibut file and not any of the unusual
+ * input types like fonts.
+ */
+ reader = NULL;
+ } else {
+ /*
+ * Open the file in binary mode to look for magic
+ * numbers. We'll switch to text mode if we find we're
+ * looking at a text file type.
+ */
+ in->currfp = fopen(in->filenames[in->currindex], "rb");
+ binary = FALSE; /* default to Halibut source, which is text */
+ if (in->currfp) {
+ in->wantclose = TRUE;
+ reader = NULL;
+ len = fread(mag, 1, sizeof(mag), in->currfp);
+ for (i = 0; i < lenof(magics); i++) {
+ if (len >= magics[i].nmagic &&
+ memcmp(mag, magics[i].magic, magics[i].nmagic) == 0) {
+ reader = magics[i].reader;
+ binary = magics[i].binary;
+ break;
+ }
+ }
+ rewind(in->currfp);
+ }
+ if (!binary) {
+ fclose(in->currfp);
+ in->currfp = fopen(in->filenames[in->currindex], "r");
+ }
+ }
if (in->currfp) {
- setpos(in, in->filenames[in->currindex]);
- in->charset = in->defcharset;
- in->csstate = charset_init_state;
- in->wcpos = in->nwc = 0;
- in->pushback_chars = NULL;
- read_file(&hptr, in, idx);
+ if (reader == NULL) {
+ read_file(&hptr, in, idx, macros);
+ } else {
+ (*reader)(in);
+ }
}
in->currindex++;
}
+ macrocleanup(macros);
+
return head;
}