*/
#include <wchar.h>
+#include <stdlib.h>
+#include <assert.h>
#include <time.h>
#include "halibut.h"
-wchar_t *ustrdup(wchar_t *s) {
+wchar_t *ustrdup(wchar_t const *s) {
wchar_t *r;
if (s) {
- r = mknewa(wchar_t, 1+ustrlen(s));
+ r = snewn(1+ustrlen(s), wchar_t);
ustrcpy(r, s);
} else {
- r = mknew(wchar_t);
+ r = snew(wchar_t);
*r = 0;
}
return r;
}
-char *ustrtoa(wchar_t *s, char *outbuf, int size) {
- char *p;
+static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size,
+ int charset, int careful) {
+ int len, ret, err;
+ charset_state state = CHARSET_INIT_STATE;
+
if (!s) {
*outbuf = '\0';
return outbuf;
}
- for (p = outbuf; *s && p < outbuf+size; p++,s++)
- *p = *s;
- if (p < outbuf+size)
- *p = '\0';
- else
- outbuf[size-1] = '\0';
+
+ len = ustrlen(s);
+ size--; /* leave room for terminating NUL */
+ *outbuf = '\0';
+ while (len > 0) {
+ err = 0;
+ ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
+ (careful ? &err : NULL));
+ if (err)
+ return NULL;
+ if (!ret)
+ return outbuf;
+ size -= ret;
+ outbuf += ret;
+ *outbuf = '\0';
+ }
+ /*
+ * Clean up
+ */
+ ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL);
+ size -= ret;
+ outbuf += ret;
+ *outbuf = '\0';
return outbuf;
}
-wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) {
- wchar_t *p;
+char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) {
+ return ustrtoa_internal(s, outbuf, size, charset, FALSE);
+}
+
+char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) {
+ return ustrtoa_internal(s, outbuf, size, charset, TRUE);
+}
+
+wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) {
+ int len, ret;
+ charset_state state = CHARSET_INIT_STATE;
+
if (!s) {
*outbuf = L'\0';
return outbuf;
}
- for (p = outbuf; *s && p < outbuf+size; p++,s++)
- *p = *s;
- if (p < outbuf+size)
- *p = '\0';
- else
- outbuf[size-1] = '\0';
+
+ len = strlen(s);
+ size--; /* allow for terminating NUL */
+ *outbuf = L'\0';
+ while (len > 0) {
+ ret = charset_to_unicode(&s, &len, outbuf, size,
+ charset, &state, NULL, 0);
+ if (!ret)
+ return outbuf;
+ outbuf += ret;
+ size -= ret;
+ *outbuf = L'\0';
+ }
return outbuf;
}
-char *utoa_dup(wchar_t *s) {
- int len;
- char *buf = NULL;
+char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
+{
+ char *outbuf;
+ int outpos, outlen, len, ret, err;
+ charset_state state = CHARSET_INIT_STATE;
- len = ustrlen(s) + 1;
- do {
- buf = resize(buf, len);
- ustrtoa(s, buf, len);
- len = (3 * len) / 2 + 1; /* this guarantees a strict increase */
- } while ((int)strlen(buf) >= len-1);
+ if (!s) {
+ return dupstr("");
+ }
- buf = resize(buf, strlen(buf)+1);
- return buf;
+ len = ustrlen(s);
+
+ outlen = len + 10;
+ outbuf = snewn(outlen, char);
+
+ outpos = 0;
+ outbuf[outpos] = '\0';
+
+ while (len > 0) {
+ err = 0;
+ ret = charset_from_unicode(&s, &len,
+ outbuf + outpos, outlen - outpos - 1,
+ charset, &state, (careful ? &err : NULL));
+ if (err) {
+ sfree(outbuf);
+ return NULL;
+ }
+ if (!ret) {
+ outlen = outlen * 3 / 2;
+ outbuf = sresize(outbuf, outlen, char);
+ }
+ outpos += ret;
+ outbuf[outpos] = '\0';
+ }
+ /*
+ * Clean up
+ */
+ outlen = outpos + 32;
+ outbuf = sresize(outbuf, outlen, char);
+ ret = charset_from_unicode(NULL, 0,
+ outbuf + outpos, outlen - outpos + 1,
+ charset, &state, NULL);
+ outpos += ret;
+ outbuf[outpos] = '\0';
+ if (lenp)
+ *lenp = outpos;
+ return outbuf;
+}
+
+char *utoa_dup(wchar_t const *s, int charset)
+{
+ return utoa_internal_dup(s, charset, NULL, FALSE);
+}
+
+char *utoa_dup_len(wchar_t const *s, int charset, int *len)
+{
+ return utoa_internal_dup(s, charset, len, FALSE);
+}
+
+char *utoa_careful_dup(wchar_t const *s, int charset)
+{
+ return utoa_internal_dup(s, charset, NULL, TRUE);
}
-wchar_t *ufroma_dup(char *s) {
+wchar_t *ufroma_dup(char const *s, int charset) {
int len;
wchar_t *buf = NULL;
len = strlen(s) + 1;
do {
- buf = resize(buf, len);
- ustrfroma(s, buf, len);
+ buf = sresize(buf, len, wchar_t);
+ ustrfroma(s, buf, len, charset);
len = (3 * len) / 2 + 1; /* this guarantees a strict increase */
} while (ustrlen(buf) >= len-1);
- buf = resize(buf, ustrlen(buf)+1);
+ buf = sresize(buf, ustrlen(buf)+1, wchar_t);
return buf;
}
-int ustrlen(wchar_t *s) {
+char *utoa_locale_dup(wchar_t const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ char *ret;
+ int len, outlen;
+ size_t siz;
+
+ len = ustrlen(s);
+
+ outlen = 1 + MB_CUR_MAX * len;
+ ret = snewn(outlen+1, char);
+
+ siz = wcstombs(ret, s, outlen);
+
+ if (siz) {
+ assert(siz <= (size_t)(outlen));
+ ret[siz] = '\0';
+ ret = sresize(ret, siz+1, char);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal utoa_dup.
+ */
+ return utoa_dup(s, charset_from_locale());
+}
+
+wchar_t *ufroma_locale_dup(char const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ wchar_t *ret;
+ int len, outlen;
+ size_t siz;
+
+ len = strlen(s);
+
+ outlen = 1 + 2*len;
+ ret = snewn(outlen+1, wchar_t); /* be conservative */
+
+ siz = mbstowcs(ret, s, outlen);
+
+ if (siz) {
+ assert(siz <= (size_t)(outlen));
+ ret[siz] = L'\0';
+ ret = sresize(ret, siz+1, wchar_t);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal ufroma_dup.
+ */
+ return ufroma_dup(s, charset_from_locale());
+}
+
+int ustrlen(wchar_t const *s) {
int len = 0;
while (*s++) len++;
return len;
return s + 1 + ustrlen(s);
}
-wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) {
+wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source) {
wchar_t *ret = dest;
do {
*dest++ = *source;
return ret;
}
+wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n) {
+ wchar_t *ret = dest;
+ do {
+ *dest++ = *source;
+ if (*source) source++;
+ } while (n-- > 0);
+ return ret;
+}
+
int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
if (!lhs && !rhs) return 0;
if (!lhs) return -1;
wchar_t utolower(wchar_t c) {
if (c == L'\0')
return c; /* this property needed by ustricmp */
- /* FIXME: this doesn't even come close */
+#ifdef HAS_TOWLOWER
+ return towlower(c);
+#else
if (c >= 'A' && c <= 'Z')
c += 'a'-'A';
return c;
+#endif
}
int uisalpha(wchar_t c) {
- /* FIXME: this doesn't even come close */
+#ifdef HAS_ISWALPHA
+ return iswalpha(c);
+#else
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+#endif
}
-int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+int ustricmp(wchar_t const *lhs, wchar_t const *rhs) {
wchar_t lc, rc;
while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
lhs++, rhs++;
return 1;
}
+int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen) {
+ wchar_t lc = 0, rc = 0;
+ while (maxlen-- > 0 &&
+ (lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+ lhs++, rhs++;
+ if (lc < rc)
+ return -1;
+ else if (lc > rc)
+ return 1;
+ else
+ return 0;
+}
+
wchar_t *ustrlow(wchar_t *s) {
wchar_t *p = s;
while (*p) {
return s;
}
-int utoi(wchar_t *s) {
+int utoi(wchar_t const *s) {
int sign = +1;
int n;
return n;
}
-int utob(wchar_t *s) {
+double utof(wchar_t const *s)
+{
+ char *cs = utoa_dup(s, CS_ASCII);
+ double ret = atof(cs);
+ sfree(cs);
+ return ret;
+}
+
+int utob(wchar_t const *s) {
if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
!ustricmp(s, L"true") || !ustricmp(s, L"t"))
return TRUE;
}
#define USTRFTIME_DELTA 128
-wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
- void *blk = NULL;
- wchar_t *wblk, *wp;
- char *fmt, *text, *p;
- size_t size = 0;
- size_t len;
-
+static void ustrftime_internal(rdstring *rs, char formatchr,
+ const struct tm *timespec)
+{
/*
* strftime has the entertaining property that it returns 0
* _either_ on out-of-space _or_ on successful generation of
* generate the empty string. Somebody throw a custard pie at
* whoever was responsible for that. Please?
*/
- if (wfmt) {
- len = ustrlen(wfmt);
- fmt = mknewa(char, 2+len);
- ustrtoa(wfmt, fmt+1, len+1);
- fmt[0] = ' ';
- } else
- fmt = " %c";
-
- while (1) {
+
+#ifdef HAS_WCSFTIME
+ wchar_t *buf = NULL;
+ wchar_t fmt[4];
+ int size, ret;
+
+ fmt[0] = L' ';
+ fmt[1] = L'%';
+ /* Format chars are all ASCII, so conversion to Unicode is no problem */
+ fmt[2] = formatchr;
+ fmt[3] = L'\0';
+
+ size = 0;
+ do {
size += USTRFTIME_DELTA;
- blk = resize((char *)blk, size);
- len = strftime((char *)blk, size-1, fmt, timespec);
- if (len > 0)
- break;
+ buf = sresize(buf, size, wchar_t);
+ ret = (int) wcsftime(buf, size, fmt, timespec);
+ } while (ret == 0);
+
+ rdadds(rs, buf+1);
+ sfree(buf);
+#else
+ char *buf = NULL;
+ wchar_t *cvtbuf;
+ char fmt[4];
+ int size, ret;
+
+ fmt[0] = ' ';
+ fmt[1] = '%';
+ fmt[2] = formatchr;
+ fmt[3] = '\0';
+
+ size = 0;
+ do {
+ size += USTRFTIME_DELTA;
+ buf = sresize(buf, size, char);
+ ret = (int) strftime(buf, size, fmt, timespec);
+ } while (ret == 0);
+
+ cvtbuf = ufroma_locale_dup(buf+1);
+ rdadds(rs, cvtbuf);
+ sfree(cvtbuf);
+ sfree(buf);
+#endif
+}
+
+wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec)
+{
+ rdstring rs = { 0, 0, NULL };
+
+ if (!wfmt)
+ wfmt = L"%c";
+
+ while (*wfmt) {
+ if (wfmt[0] == L'%' && wfmt[1] == L'%') {
+ rdadd(&rs, L'%');
+ wfmt += 2;
+ } else if (wfmt[0] == L'%' && wfmt[1]) {
+ ustrftime_internal(&rs, wfmt[1], timespec);
+ wfmt += 2;
+ } else {
+ rdadd(&rs, wfmt[0]);
+ wfmt++;
+ }
}
- /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
- wblk = resize((wchar_t *)blk, len);
- text = mknewa(char, len);
- strftime(text, len, fmt+1, timespec);
- /*
- * We operate in the C locale, so this all ought to be kosher
- * ASCII. If we ever move outside ASCII machines, we may need
- * to make this more portable...
- */
- for (wp = wblk, p = text; *p; p++, wp++)
- *wp = *p;
- *wp = 0;
- if (wfmt)
- sfree(fmt);
- sfree(text);
- return wblk;
+ return rdtrim(&rs);
+}
+
+/*
+ * Determine whether a Unicode string can be translated into a
+ * given charset without any missing characters.
+ */
+int cvt_ok(int charset, const wchar_t *s)
+{
+ char buf[256];
+ charset_state state = CHARSET_INIT_STATE;
+ int err, len = ustrlen(s);
+
+ err = 0;
+ while (len > 0) {
+ (void)charset_from_unicode(&s, &len, buf, lenof(buf),
+ charset, &state, &err);
+ if (err)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/*
+ * Wrapper around charset_from_localenc which accepts the charset
+ * name as a wide string (since that happens to be more useful).
+ * Also throws a Halibut error and falls back to CS_ASCII if the
+ * charset is unrecognised, meaning the rest of the program can
+ * rely on always getting a valid charset id back from this
+ * function.
+ */
+int charset_from_ustr(filepos *fpos, const wchar_t *name)
+{
+ char *csname;
+ int charset;
+
+ csname = utoa_dup(name, CS_ASCII);
+ charset = charset_from_localenc(csname);
+
+ if (charset == CS_NONE) {
+ charset = CS_ASCII;
+ error(err_charset, fpos, name);
+ }
+
+ sfree(csname);
+ return charset;
}