Add an error check for correct formatting in Deflate uncompressed

[sgt/halibut] / ustring.c
diff --git a/ustring.c b/ustring.c

index 50c02d4..3c5698c 100644 (file)
--- a/ustring.c
+++ b/ustring.c
@@ -11,10 +11,10 @@
  wchar_t *ustrdup(wchar_t const *s) {
      wchar_t *r;
      if (s) {
-       r = mknewa(wchar_t, 1+ustrlen(s));
+       r = snewn(1+ustrlen(s), wchar_t);
         ustrcpy(r, s);
      } else {
-       r = mknew(wchar_t);
+       r = snew(wchar_t);
         *r = 0;
      }
      return r;
@@ -100,7 +100,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
      len = ustrlen(s);
  
      outlen = len + 10;
-    outbuf = mknewa(char, outlen);
+    outbuf = snewn(outlen, char);
  
      outpos = 0;
      outbuf[outpos] = '\0';
@@ -116,7 +116,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
         }
         if (!ret) {
             outlen = outlen * 3 / 2;
-           outbuf = resize(outbuf, outlen);
+           outbuf = sresize(outbuf, outlen, char);
         }
         outpos += ret;
         outbuf[outpos] = '\0';
@@ -125,7 +125,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
       * Clean up
       */
      outlen = outpos + 32;
-    outbuf = resize(outbuf, outlen);
+    outbuf = sresize(outbuf, outlen, char);
      ret = charset_from_unicode(NULL, 0,
                                outbuf + outpos, outlen - outpos + 1,
                                charset, &state, NULL);
@@ -157,12 +157,12 @@ wchar_t *ufroma_dup(char const *s, int charset) {
  
      len = strlen(s) + 1;
      do {
-       buf = resize(buf, len);
+       buf = sresize(buf, len, wchar_t);
         ustrfroma(s, buf, len, charset);
         len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
      } while (ustrlen(buf) >= len-1);
  
-    buf = resize(buf, ustrlen(buf)+1);
+    buf = sresize(buf, ustrlen(buf)+1, wchar_t);
      return buf;
  }
  
@@ -172,19 +172,20 @@ char *utoa_locale_dup(wchar_t const *s)
       * This variant uses the C library locale.
       */
      char *ret;
-    int len;
+    int len, outlen;
      size_t siz;
  
      len = ustrlen(s);
  
-    ret = mknewa(char, 1 + MB_CUR_MAX * len);
+    outlen = 1 + MB_CUR_MAX * len;
+    ret = snewn(outlen+1, char);
  
-    siz = wcstombs(ret, s, len);
+    siz = wcstombs(ret, s, outlen);
  
      if (siz) {
-       assert(siz <= MB_CUR_MAX * len);
+       assert(siz <= (size_t)(outlen));
         ret[siz] = '\0';
-       ret = resize(ret, siz+1);
+       ret = sresize(ret, siz+1, char);
         return ret;
      }
  
@@ -203,19 +204,20 @@ wchar_t *ufroma_locale_dup(char const *s)
       * This variant uses the C library locale.
       */
      wchar_t *ret;
-    int len;
+    int len, outlen;
      size_t siz;
  
      len = strlen(s);
  
-    ret = mknewa(wchar_t, 1 + 2*len);  /* be conservative */
+    outlen = 1 + 2*len;
+    ret = snewn(outlen+1, wchar_t);  /* be conservative */
  
-    siz = mbstowcs(ret, s, len);
+    siz = mbstowcs(ret, s, outlen);
  
      if (siz) {
-       assert(siz <= (size_t)(2 * len));
+       assert(siz <= (size_t)(outlen));
         ret[siz] = L'\0';
-       ret = resize(ret, siz+1);
+       ret = sresize(ret, siz+1, wchar_t);
         return ret;
      }
  
@@ -246,6 +248,15 @@ wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source) {
      return ret;
  }
  
+wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n) {
+    wchar_t *ret = dest;
+    do {
+       *dest++ = *source;
+       if (*source) source++;
+    } while (n-- > 0);
+    return ret;
+}
+
  int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
      if (!lhs && !rhs) return 0;
      if (!lhs) return -1;
@@ -279,7 +290,7 @@ int uisalpha(wchar_t c) {
  #endif
  }
  
-int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+int ustricmp(wchar_t const *lhs, wchar_t const *rhs) {
      wchar_t lc, rc;
      while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
         lhs++, rhs++;
@@ -291,6 +302,19 @@ int ustricmp(wchar_t *lhs, wchar_t *rhs) {
         return 1;
  }
  
+int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen) {
+    wchar_t lc = 0, rc = 0;
+    while (maxlen-- > 0 &&
+          (lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+       lhs++, rhs++;
+    if (lc < rc)
+       return -1;
+    else if (lc > rc)
+       return 1;
+    else
+       return 0;
+}
+
  wchar_t *ustrlow(wchar_t *s) {
      wchar_t *p = s;
      while (*p) {
@@ -300,7 +324,7 @@ wchar_t *ustrlow(wchar_t *s) {
      return s;
  }
  
-int utoi(wchar_t *s) {
+int utoi(wchar_t const *s) {
      int sign = +1;
      int n;
  
@@ -316,10 +340,18 @@ int utoi(wchar_t *s) {
         s++;
      }
  
-    return n;
+    return n * sign;
+}
+
+double utof(wchar_t const *s)
+{
+    char *cs = utoa_dup(s, CS_ASCII);
+    double ret = atof(cs);
+    sfree(cs);
+    return ret;
  }
  
-int utob(wchar_t *s) {
+int utob(wchar_t const *s) {
      if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
         !ustricmp(s, L"true") || !ustricmp(s, L"t"))
         return TRUE;
@@ -331,19 +363,9 @@ int uisdigit(wchar_t c) {
  }
  
  #define USTRFTIME_DELTA 128
-wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
-    void *blk = NULL;
-    wchar_t *wblk, *wp;
-    char *fmt, *text, *p;
-    size_t size = 0;
-    size_t len;
-
-    /*
-     * FIXME: really we ought to copy non-% parts of the format
-     * ourselves, and only resort to strftime for % parts. Also we
-     * should use wcsftime if it's present.
-     */
-
+static void ustrftime_internal(rdstring *rs, char formatchr,
+                              const struct tm *timespec)
+{
      /*
       * strftime has the entertaining property that it returns 0
       * _either_ on out-of-space _or_ on successful generation of
@@ -351,38 +373,73 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
       * generate the empty string. Somebody throw a custard pie at
       * whoever was responsible for that. Please?
       */
-    if (wfmt) {
-       len = ustrlen(wfmt);
-       fmt = mknewa(char, 2+len);
-       ustrtoa(wfmt, fmt+1, len+1, CS_ASCII);   /* CS_FIXME? */
-       fmt[0] = ' ';
-    } else
-       fmt = " %c";
-
-    while (1) {
+
+#ifdef HAS_WCSFTIME
+    wchar_t *buf = NULL;
+    wchar_t fmt[4];
+    int size, ret;
+
+    fmt[0] = L' ';
+    fmt[1] = L'%';
+    /* Format chars are all ASCII, so conversion to Unicode is no problem */
+    fmt[2] = formatchr;
+    fmt[3] = L'\0';
+
+    size = 0;
+    do {
         size += USTRFTIME_DELTA;
-       blk = resize((char *)blk, size);
-       len = strftime((char *)blk, size-1, fmt, timespec);
-       if (len > 0)
-           break;
+       buf = sresize(buf, size, wchar_t);
+       ret = (int) wcsftime(buf, size, fmt, timespec);
+    } while (ret == 0);
+
+    rdadds(rs, buf+1);
+    sfree(buf);
+#else
+    char *buf = NULL;
+    wchar_t *cvtbuf;
+    char fmt[4];
+    int size, ret;
+
+    fmt[0] = ' ';
+    fmt[1] = '%';
+    fmt[2] = formatchr;
+    fmt[3] = '\0';
+
+    size = 0;
+    do {
+       size += USTRFTIME_DELTA;
+       buf = sresize(buf, size, char);
+       ret = (int) strftime(buf, size, fmt, timespec);
+    } while (ret == 0);
+
+    cvtbuf = ufroma_locale_dup(buf+1);
+    rdadds(rs, cvtbuf);
+    sfree(cvtbuf);
+    sfree(buf);
+#endif
+}
+
+wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec)
+{
+    rdstring rs = { 0, 0, NULL };
+
+    if (!wfmt)
+       wfmt = L"%c";
+
+    while (*wfmt) {
+       if (wfmt[0] == L'%' && wfmt[1] == L'%') {
+           rdadd(&rs, L'%');
+           wfmt += 2;
+       } else if (wfmt[0] == L'%' && wfmt[1]) {
+           ustrftime_internal(&rs, wfmt[1], timespec);
+           wfmt += 2;
+       } else {
+           rdadd(&rs, wfmt[0]);
+           wfmt++;
+       }
      }
  
-    /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
-    wblk = resize((wchar_t *)blk, len);
-    text = mknewa(char, len);
-    strftime(text, len, fmt+1, timespec);
-    /*
-     * We operate in the C locale, so this all ought to be kosher
-     * ASCII. If we ever move outside ASCII machines, we may need
-     * to make this more portable...
-     */
-    for (wp = wblk, p = text; *p; p++, wp++)
-       *wp = *p;
-    *wp = 0;
-    if (wfmt)
-       sfree(fmt);
-    sfree(text);
-    return wblk;
+    return rdtrim(&rs);
  }
  
  /*
@@ -404,3 +461,28 @@ int cvt_ok(int charset, const wchar_t *s)
      }
      return TRUE;
  }
+
+/*
+ * Wrapper around charset_from_localenc which accepts the charset
+ * name as a wide string (since that happens to be more useful).
+ * Also throws a Halibut error and falls back to CS_ASCII if the
+ * charset is unrecognised, meaning the rest of the program can
+ * rely on always getting a valid charset id back from this
+ * function.
+ */
+int charset_from_ustr(filepos *fpos, const wchar_t *name)
+{
+    char *csname;
+    int charset;
+
+    csname = utoa_dup(name, CS_ASCII);
+    charset = charset_from_localenc(csname);
+
+    if (charset == CS_NONE) {
+       charset = CS_ASCII;
+       error(err_charset, fpos, name);
+    }
+
+    sfree(csname);
+    return charset;
+}