Explicit bounds checking on the bmp[] array.
[sgt/halibut] / input.c
diff --git a/input.c b/input.c
index 59ea326..365fec0 100644 (file)
--- a/input.c
+++ b/input.c
@@ -86,9 +86,7 @@ static void input_configure(input *in, paragraph *cfg) {
     assert(cfg->type == para_Config);
 
     if (!ustricmp(cfg->keyword, L"input-charset")) {
-       char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
-       in->charset = charset_from_localenc(csname);
-       sfree(csname);
+       in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
     }
 }
 
@@ -221,6 +219,7 @@ enum {
     c_c,                              /* code */
     c_cfg,                            /* configuration directive */
     c_copyright,                      /* copyright statement */
+    c_cq,                             /* quoted code (sugar for \q{\cw{x}}) */
     c_cw,                             /* weak code */
     c_date,                           /* document processing date */
     c_dd,                             /* description list: description */
@@ -291,6 +290,7 @@ static void match_kw(token *tok) {
        {"c", c_c},                    /* code */
        {"cfg", c_cfg},                /* configuration directive */
        {"copyright", c_copyright},    /* copyright statement */
+       {"cq", c_cq},                  /* quoted code (sugar for \q{\cw{x}}) */
        {"cw", c_cw},                  /* weak code */
        {"date", c_date},              /* document processing date */
        {"dd", c_dd},                  /* description list: description */
@@ -612,6 +612,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
        word **whptr;                  /* to restore from \u alternatives */
        word **idximplicit;            /* to restore from \u alternatives */
        filepos fpos;
+       int in_code;
     } *sitem;
     stack parsestk;
     struct crossparaitem {
@@ -997,10 +998,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
         * Mid-paragraph commands:
         *
         *  \K \k
-        *  \c \cw
+        *  \c \cw \cq
         *  \e
         *  \i \ii
         *  \I
+         *  \q
         *  \u
         *  \W
         *  \date
@@ -1197,25 +1199,54 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                    }
                    break;
                  case c_q:
+                  case c_cq:
+                    type = t.cmd;
                    dtor(t), t = get_token(in);
                    if (t.type != tok_lbrace) {
                        error(err_explbr, &t.pos);
                    } else {
-                       wd.text = NULL;
-                       wd.type = toquotestyle(style);
-                       wd.alt = NULL;
-                       wd.aux = quote_Open;
-                       wd.fpos = t.pos;
-                       wd.breaks = FALSE;
-                       if (!indexing || index_visible)
-                           addword(wd, &whptr);
-                       if (indexing) {
-                           rdadd(&indexstr, L'"');
-                           addword(wd, &idximplicit);
+                       /*
+                        * Enforce that \q may not be used anywhere
+                        * within \c. (It shouldn't be necessary
+                        * since the whole point of \c should be
+                        * that the user wants to exercise exact
+                        * control over the glyphs used, and
+                        * forbidding it has the useful effect of
+                        * relieving some backends of having to
+                        * make difficult decisions.)
+                        */
+                       int stype;
+
+                       if (style != word_Code && style != word_WeakCode) {
+                           wd.text = NULL;
+                           wd.type = toquotestyle(style);
+                           wd.alt = NULL;
+                           wd.aux = quote_Open;
+                           wd.fpos = t.pos;
+                           wd.breaks = FALSE;
+                           if (!indexing || index_visible)
+                               addword(wd, &whptr);
+                           if (indexing) {
+                               rdadd(&indexstr, L'"');
+                               addword(wd, &idximplicit);
+                           }
+                           stype = stack_quote;
+                       } else {
+                           error(err_codequote, &t.pos);
+                           stype = stack_nop;
                        }
                        sitem = snew(struct stack_item);
                        sitem->fpos = t.pos;
-                       sitem->type = stack_quote;
+                       sitem->type = stype;
+                        if (type == c_cq) {
+                            if (style != word_Normal) {
+                                error(err_nestedstyles, &t.pos);
+                            } else {
+                                style = word_WeakCode;
+                                spcstyle = tospacestyle(style);
+                                sitem->type |= stack_style;
+                            }
+                        }
                        stk_push(parsestk, sitem);
                    }
                    break;