Explicit bounds checking on the bmp[] array.

[sgt/halibut] / input.c
diff --git a/input.c b/input.c

index 59ea326..365fec0 100644 (file)
--- a/input.c
+++ b/input.c
@@ -86,9 +86,7 @@ static void input_configure(input *in, paragraph *cfg) {
      assert(cfg->type == para_Config);
  
      if (!ustricmp(cfg->keyword, L"input-charset")) {
-       char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
-       in->charset = charset_from_localenc(csname);
-       sfree(csname);
+       in->charset = charset_from_ustr(&cfg->fpos, uadv(cfg->keyword));
      }
  }
  
@@ -221,6 +219,7 @@ enum {
      c_c,                              /* code */
      c_cfg,                            /* configuration directive */
      c_copyright,                      /* copyright statement */
+    c_cq,                             /* quoted code (sugar for \q{\cw{x}}) */
      c_cw,                             /* weak code */
      c_date,                           /* document processing date */
      c_dd,                             /* description list: description */
@@ -291,6 +290,7 @@ static void match_kw(token *tok) {
         {"c", c_c},                    /* code */
         {"cfg", c_cfg},                /* configuration directive */
         {"copyright", c_copyright},    /* copyright statement */
+       {"cq", c_cq},                  /* quoted code (sugar for \q{\cw{x}}) */
         {"cw", c_cw},                  /* weak code */
         {"date", c_date},              /* document processing date */
         {"dd", c_dd},                  /* description list: description */
@@ -612,6 +612,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
         word **whptr;                  /* to restore from \u alternatives */
         word **idximplicit;            /* to restore from \u alternatives */
         filepos fpos;
+       int in_code;
      } *sitem;
      stack parsestk;
      struct crossparaitem {
@@ -997,10 +998,11 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
          * Mid-paragraph commands:
          *
          *  \K \k
-        *  \c \cw
+        *  \c \cw \cq
          *  \e
          *  \i \ii
          *  \I
+         *  \q
          *  \u
          *  \W
          *  \date
@@ -1197,25 +1199,54 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                     }
                     break;
                   case c_q:
+                  case c_cq:
+                    type = t.cmd;
                     dtor(t), t = get_token(in);
                     if (t.type != tok_lbrace) {
                         error(err_explbr, &t.pos);
                     } else {
-                       wd.text = NULL;
-                       wd.type = toquotestyle(style);
-                       wd.alt = NULL;
-                       wd.aux = quote_Open;
-                       wd.fpos = t.pos;
-                       wd.breaks = FALSE;
-                       if (!indexing || index_visible)
-                           addword(wd, &whptr);
-                       if (indexing) {
-                           rdadd(&indexstr, L'"');
-                           addword(wd, &idximplicit);
+                       /*
+                        * Enforce that \q may not be used anywhere
+                        * within \c. (It shouldn't be necessary
+                        * since the whole point of \c should be
+                        * that the user wants to exercise exact
+                        * control over the glyphs used, and
+                        * forbidding it has the useful effect of
+                        * relieving some backends of having to
+                        * make difficult decisions.)
+                        */
+                       int stype;
+
+                       if (style != word_Code && style != word_WeakCode) {
+                           wd.text = NULL;
+                           wd.type = toquotestyle(style);
+                           wd.alt = NULL;
+                           wd.aux = quote_Open;
+                           wd.fpos = t.pos;
+                           wd.breaks = FALSE;
+                           if (!indexing || index_visible)
+                               addword(wd, &whptr);
+                           if (indexing) {
+                               rdadd(&indexstr, L'"');
+                               addword(wd, &idximplicit);
+                           }
+                           stype = stack_quote;
+                       } else {
+                           error(err_codequote, &t.pos);
+                           stype = stack_nop;
                         }
                         sitem = snew(struct stack_item);
                         sitem->fpos = t.pos;
-                       sitem->type = stack_quote;
+                       sitem->type = stype;
+                        if (type == c_cq) {
+                            if (style != word_Normal) {
+                                error(err_nestedstyles, &t.pos);
+                            } else {
+                                style = word_WeakCode;
+                                spcstyle = tospacestyle(style);
+                                sitem->type |= stack_style;
+                            }
+                        }
                         stk_push(parsestk, sitem);
                     }
                     break;