More careful context hash calculation which doesn't depend on size
[sgt/halibut] / input.c
diff --git a/input.c b/input.c
index 1187e94..365fec0 100644 (file)
--- a/input.c
+++ b/input.c
@@ -18,7 +18,7 @@ static void setpos(input *in, char *fname) {
 static void unget(input *in, int c, filepos *pos) {
     if (in->npushback >= in->pushbacksize) {
        in->pushbacksize = in->npushback + 16;
-       in->pushback = resize(in->pushback, in->pushbacksize);
+       in->pushback = sresize(in->pushback, in->pushbacksize, pushback);
     }
     in->pushback[in->npushback].chr = c;
     in->pushback[in->npushback].pos = *pos;   /* structure copy */
@@ -45,7 +45,7 @@ static int macrocmp(void *av, void *bv) {
 }
 static void macrodef(tree234 *macros, wchar_t *name, wchar_t *text,
                     filepos fpos) {
-    macro *m = mknew(macro);
+    macro *m = snew(macro);
     m->name = name;
     m->text = text;
     if (add234(macros, m) != m) {
@@ -60,7 +60,7 @@ static int macrolookup(tree234 *macros, input *in, wchar_t *name,
     m.name = name;
     gotit = find234(macros, &m, NULL);
     if (gotit) {
-       macrostack *expansion = mknew(macrostack);
+       macrostack *expansion = snew(macrostack);
        expansion->next = in->stack;
        expansion->text = gotit->text;
        expansion->pos = *pos;         /* structure copy */
@@ -86,9 +86,7 @@ static void input_configure(input *in, paragraph *cfg) {
     assert(cfg->type == para_Config);
 
     if (!ustricmp(cfg->keyword, L"input-charset")) {
-       char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
-       in->charset = charset_from_localenc(csname);
-       sfree(csname);
+       in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
     }
 }
 
@@ -221,6 +219,7 @@ enum {
     c_c,                              /* code */
     c_cfg,                            /* configuration directive */
     c_copyright,                      /* copyright statement */
+    c_cq,                             /* quoted code (sugar for \q{\cw{x}}) */
     c_cw,                             /* weak code */
     c_date,                           /* document processing date */
     c_dd,                             /* description list: description */
@@ -291,6 +290,7 @@ static void match_kw(token *tok) {
        {"c", c_c},                    /* code */
        {"cfg", c_cfg},                /* configuration directive */
        {"copyright", c_copyright},    /* copyright statement */
+       {"cq", c_cq},                  /* quoted code (sugar for \q{\cw{x}}) */
        {"cw", c_cw},                  /* weak code */
        {"date", c_date},              /* document processing date */
        {"dd", c_dd},                  /* description list: description */
@@ -560,7 +560,7 @@ static word *addword(word newword, word ***hptrptr) {
     word *mnewword;
     if (!hptrptr)
        return NULL;
-    mnewword = mknew(word);
+    mnewword = snew(word);
     *mnewword = newword;              /* structure copy */
     mnewword->next = NULL;
     **hptrptr = mnewword;
@@ -572,7 +572,7 @@ static word *addword(word newword, word ***hptrptr) {
  * Adds a new paragraph to a linked list
  */
 static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
-    paragraph *mnewpara = mknew(paragraph);
+    paragraph *mnewpara = snew(paragraph);
     *mnewpara = newpara;              /* structure copy */
     mnewpara->next = NULL;
     **hptrptr = mnewpara;
@@ -612,6 +612,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
        word **whptr;                  /* to restore from \u alternatives */
        word **idximplicit;            /* to restore from \u alternatives */
        filepos fpos;
+       int in_code;
     } *sitem;
     stack parsestk;
     struct crossparaitem {
@@ -739,7 +740,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                 * nested lists, code paras etc). Hence, the previous
                 * paragraph must be of a list type.
                 */
-               sitem = mknew(struct crossparaitem);
+               sitem = snew(struct crossparaitem);
                stop = (struct crossparaitem *)stk_top(crossparastk);
                if (stop)
                    *sitem = *stop;
@@ -769,7 +770,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                 * block-quoted (typically they will be indented a
                 * bit).
                 */
-               sitem = mknew(struct crossparaitem);
+               sitem = snew(struct crossparaitem);
                stop = (struct crossparaitem *)stk_top(crossparastk);
                if (stop)
                    *sitem = *stop;
@@ -904,11 +905,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                           t.type == tok_word || 
                           t.type == tok_white ||
                           (t.type == tok_cmd && t.cmd == c__nbsp) ||
-                          (t.type == tok_cmd && t.cmd == c__escaped)) {
+                          (t.type == tok_cmd && t.cmd == c__escaped) ||
+                          (t.type == tok_cmd && t.cmd == c_u)) {
                        if (t.type == tok_white ||
                            (t.type == tok_cmd && t.cmd == c__nbsp)) {
                            rdadd(&rs, ' ');
                            rdaddc(&rsc, ' ');
+                       } else if (t.type == tok_cmd && t.cmd == c_u) {
+                           rdadd(&rs, t.aux);
+                           rdaddc(&rsc, '\\');
+                           rdaddsc(&rsc, t.origtext);
                        } else {
                            rdadds(&rs, t.text);
                            rdaddsc(&rsc, t.origtext);
@@ -992,10 +998,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
         * Mid-paragraph commands:
         *
         *  \K \k
-        *  \c \cw
+        *  \c \cw \cq
         *  \e
         *  \i \ii
         *  \I
+         *  \q
         *  \u
         *  \W
         *  \date
@@ -1083,7 +1090,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
              case tok_lbrace:
                error(err_unexbrace, &t.pos);
                /* Error recovery: push nop */
-               sitem = mknew(struct stack_item);
+               sitem = snew(struct stack_item);
                sitem->type = stack_nop;
                sitem->fpos = t.pos;
                stk_push(parsestk, sitem);
@@ -1192,25 +1199,54 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                    }
                    break;
                  case c_q:
+                  case c_cq:
+                    type = t.cmd;
                    dtor(t), t = get_token(in);
                    if (t.type != tok_lbrace) {
                        error(err_explbr, &t.pos);
                    } else {
-                       wd.text = NULL;
-                       wd.type = toquotestyle(style);
-                       wd.alt = NULL;
-                       wd.aux = quote_Open;
-                       wd.fpos = t.pos;
-                       wd.breaks = FALSE;
-                       if (!indexing || index_visible)
-                           addword(wd, &whptr);
-                       if (indexing) {
-                           rdadd(&indexstr, L'"');
-                           addword(wd, &idximplicit);
+                       /*
+                        * Enforce that \q may not be used anywhere
+                        * within \c. (It shouldn't be necessary
+                        * since the whole point of \c should be
+                        * that the user wants to exercise exact
+                        * control over the glyphs used, and
+                        * forbidding it has the useful effect of
+                        * relieving some backends of having to
+                        * make difficult decisions.)
+                        */
+                       int stype;
+
+                       if (style != word_Code && style != word_WeakCode) {
+                           wd.text = NULL;
+                           wd.type = toquotestyle(style);
+                           wd.alt = NULL;
+                           wd.aux = quote_Open;
+                           wd.fpos = t.pos;
+                           wd.breaks = FALSE;
+                           if (!indexing || index_visible)
+                               addword(wd, &whptr);
+                           if (indexing) {
+                               rdadd(&indexstr, L'"');
+                               addword(wd, &idximplicit);
+                           }
+                           stype = stack_quote;
+                       } else {
+                           error(err_codequote, &t.pos);
+                           stype = stack_nop;
                        }
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
-                       sitem->type = stack_quote;
+                       sitem->type = stype;
+                        if (type == c_cq) {
+                            if (style != word_Normal) {
+                                error(err_nestedstyles, &t.pos);
+                            } else {
+                                style = word_WeakCode;
+                                spcstyle = tospacestyle(style);
+                                sitem->type |= stack_style;
+                            }
+                        }
                        stk_push(parsestk, sitem);
                    }
                    break;
@@ -1285,7 +1321,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                         * delimiting the text marked by the link.
                         */
                        dtor(t), t = get_token(in);
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = wd.fpos;
                        sitem->type = stack_hyper;
                        /*
@@ -1351,7 +1387,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                        error(err_nestedstyles, &t.pos);
                        /* Error recovery: eat lbrace, push nop. */
                        dtor(t), t = get_token(in);
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
                        sitem->type = stack_nop;
                        stk_push(parsestk, sitem);
@@ -1364,7 +1400,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                                 type == c_cw ? word_WeakCode :
                                 word_Emph);
                        spcstyle = tospacestyle(style);
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
                        sitem->type = stack_style;
                        stk_push(parsestk, sitem);
@@ -1378,12 +1414,12 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                        error(err_nestedindex, &t.pos);
                        /* Error recovery: eat lbrace, push nop. */
                        dtor(t), t = get_token(in);
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
                        sitem->type = stack_nop;
                        stk_push(parsestk, sitem);
                    }
-                   sitem = mknew(struct stack_item);
+                   sitem = snew(struct stack_item);
                    sitem->fpos = t.pos;
                    sitem->type = stack_idx;
                    dtor(t), t = get_token(in);
@@ -1453,7 +1489,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                         * sidetrack from the main thread of the
                         * paragraph.
                         */
-                       sitem = mknew(struct stack_item);
+                       sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
                        sitem->type = stack_ualt;
                        sitem->whptr = whptr;