X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/9badd775749c8ac319b44d37c7514bc7e1a1804d..8f664e7e91c918cd13248f6b684580c4dd2cdb31:/ustring.c diff --git a/ustring.c b/ustring.c index 50c02d4..3c5698c 100644 --- a/ustring.c +++ b/ustring.c @@ -11,10 +11,10 @@ wchar_t *ustrdup(wchar_t const *s) { wchar_t *r; if (s) { - r = mknewa(wchar_t, 1+ustrlen(s)); + r = snewn(1+ustrlen(s), wchar_t); ustrcpy(r, s); } else { - r = mknew(wchar_t); + r = snew(wchar_t); *r = 0; } return r; @@ -100,7 +100,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) len = ustrlen(s); outlen = len + 10; - outbuf = mknewa(char, outlen); + outbuf = snewn(outlen, char); outpos = 0; outbuf[outpos] = '\0'; @@ -116,7 +116,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) } if (!ret) { outlen = outlen * 3 / 2; - outbuf = resize(outbuf, outlen); + outbuf = sresize(outbuf, outlen, char); } outpos += ret; outbuf[outpos] = '\0'; @@ -125,7 +125,7 @@ char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) * Clean up */ outlen = outpos + 32; - outbuf = resize(outbuf, outlen); + outbuf = sresize(outbuf, outlen, char); ret = charset_from_unicode(NULL, 0, outbuf + outpos, outlen - outpos + 1, charset, &state, NULL); @@ -157,12 +157,12 @@ wchar_t *ufroma_dup(char const *s, int charset) { len = strlen(s) + 1; do { - buf = resize(buf, len); + buf = sresize(buf, len, wchar_t); ustrfroma(s, buf, len, charset); len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ } while (ustrlen(buf) >= len-1); - buf = resize(buf, ustrlen(buf)+1); + buf = sresize(buf, ustrlen(buf)+1, wchar_t); return buf; } @@ -172,19 +172,20 @@ char *utoa_locale_dup(wchar_t const *s) * This variant uses the C library locale. */ char *ret; - int len; + int len, outlen; size_t siz; len = ustrlen(s); - ret = mknewa(char, 1 + MB_CUR_MAX * len); + outlen = 1 + MB_CUR_MAX * len; + ret = snewn(outlen+1, char); - siz = wcstombs(ret, s, len); + siz = wcstombs(ret, s, outlen); if (siz) { - assert(siz <= MB_CUR_MAX * len); + assert(siz <= (size_t)(outlen)); ret[siz] = '\0'; - ret = resize(ret, siz+1); + ret = sresize(ret, siz+1, char); return ret; } @@ -203,19 +204,20 @@ wchar_t *ufroma_locale_dup(char const *s) * This variant uses the C library locale. */ wchar_t *ret; - int len; + int len, outlen; size_t siz; len = strlen(s); - ret = mknewa(wchar_t, 1 + 2*len); /* be conservative */ + outlen = 1 + 2*len; + ret = snewn(outlen+1, wchar_t); /* be conservative */ - siz = mbstowcs(ret, s, len); + siz = mbstowcs(ret, s, outlen); if (siz) { - assert(siz <= (size_t)(2 * len)); + assert(siz <= (size_t)(outlen)); ret[siz] = L'\0'; - ret = resize(ret, siz+1); + ret = sresize(ret, siz+1, wchar_t); return ret; } @@ -246,6 +248,15 @@ wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source) { return ret; } +wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n) { + wchar_t *ret = dest; + do { + *dest++ = *source; + if (*source) source++; + } while (n-- > 0); + return ret; +} + int ustrcmp(wchar_t *lhs, wchar_t *rhs) { if (!lhs && !rhs) return 0; if (!lhs) return -1; @@ -279,7 +290,7 @@ int uisalpha(wchar_t c) { #endif } -int ustricmp(wchar_t *lhs, wchar_t *rhs) { +int ustricmp(wchar_t const *lhs, wchar_t const *rhs) { wchar_t lc, rc; while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) lhs++, rhs++; @@ -291,6 +302,19 @@ int ustricmp(wchar_t *lhs, wchar_t *rhs) { return 1; } +int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen) { + wchar_t lc = 0, rc = 0; + while (maxlen-- > 0 && + (lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) + lhs++, rhs++; + if (lc < rc) + return -1; + else if (lc > rc) + return 1; + else + return 0; +} + wchar_t *ustrlow(wchar_t *s) { wchar_t *p = s; while (*p) { @@ -300,7 +324,7 @@ wchar_t *ustrlow(wchar_t *s) { return s; } -int utoi(wchar_t *s) { +int utoi(wchar_t const *s) { int sign = +1; int n; @@ -316,10 +340,18 @@ int utoi(wchar_t *s) { s++; } - return n; + return n * sign; +} + +double utof(wchar_t const *s) +{ + char *cs = utoa_dup(s, CS_ASCII); + double ret = atof(cs); + sfree(cs); + return ret; } -int utob(wchar_t *s) { +int utob(wchar_t const *s) { if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") || !ustricmp(s, L"true") || !ustricmp(s, L"t")) return TRUE; @@ -331,19 +363,9 @@ int uisdigit(wchar_t c) { } #define USTRFTIME_DELTA 128 -wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { - void *blk = NULL; - wchar_t *wblk, *wp; - char *fmt, *text, *p; - size_t size = 0; - size_t len; - - /* - * FIXME: really we ought to copy non-% parts of the format - * ourselves, and only resort to strftime for % parts. Also we - * should use wcsftime if it's present. - */ - +static void ustrftime_internal(rdstring *rs, char formatchr, + const struct tm *timespec) +{ /* * strftime has the entertaining property that it returns 0 * _either_ on out-of-space _or_ on successful generation of @@ -351,38 +373,73 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { * generate the empty string. Somebody throw a custard pie at * whoever was responsible for that. Please? */ - if (wfmt) { - len = ustrlen(wfmt); - fmt = mknewa(char, 2+len); - ustrtoa(wfmt, fmt+1, len+1, CS_ASCII); /* CS_FIXME? */ - fmt[0] = ' '; - } else - fmt = " %c"; - - while (1) { + +#ifdef HAS_WCSFTIME + wchar_t *buf = NULL; + wchar_t fmt[4]; + int size, ret; + + fmt[0] = L' '; + fmt[1] = L'%'; + /* Format chars are all ASCII, so conversion to Unicode is no problem */ + fmt[2] = formatchr; + fmt[3] = L'\0'; + + size = 0; + do { size += USTRFTIME_DELTA; - blk = resize((char *)blk, size); - len = strftime((char *)blk, size-1, fmt, timespec); - if (len > 0) - break; + buf = sresize(buf, size, wchar_t); + ret = (int) wcsftime(buf, size, fmt, timespec); + } while (ret == 0); + + rdadds(rs, buf+1); + sfree(buf); +#else + char *buf = NULL; + wchar_t *cvtbuf; + char fmt[4]; + int size, ret; + + fmt[0] = ' '; + fmt[1] = '%'; + fmt[2] = formatchr; + fmt[3] = '\0'; + + size = 0; + do { + size += USTRFTIME_DELTA; + buf = sresize(buf, size, char); + ret = (int) strftime(buf, size, fmt, timespec); + } while (ret == 0); + + cvtbuf = ufroma_locale_dup(buf+1); + rdadds(rs, cvtbuf); + sfree(cvtbuf); + sfree(buf); +#endif +} + +wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec) +{ + rdstring rs = { 0, 0, NULL }; + + if (!wfmt) + wfmt = L"%c"; + + while (*wfmt) { + if (wfmt[0] == L'%' && wfmt[1] == L'%') { + rdadd(&rs, L'%'); + wfmt += 2; + } else if (wfmt[0] == L'%' && wfmt[1]) { + ustrftime_internal(&rs, wfmt[1], timespec); + wfmt += 2; + } else { + rdadd(&rs, wfmt[0]); + wfmt++; + } } - /* Note: +1 for the terminating 0, -1 for the initial space in fmt */ - wblk = resize((wchar_t *)blk, len); - text = mknewa(char, len); - strftime(text, len, fmt+1, timespec); - /* - * We operate in the C locale, so this all ought to be kosher - * ASCII. If we ever move outside ASCII machines, we may need - * to make this more portable... - */ - for (wp = wblk, p = text; *p; p++, wp++) - *wp = *p; - *wp = 0; - if (wfmt) - sfree(fmt); - sfree(text); - return wblk; + return rdtrim(&rs); } /* @@ -404,3 +461,28 @@ int cvt_ok(int charset, const wchar_t *s) } return TRUE; } + +/* + * Wrapper around charset_from_localenc which accepts the charset + * name as a wide string (since that happens to be more useful). + * Also throws a Halibut error and falls back to CS_ASCII if the + * charset is unrecognised, meaning the rest of the program can + * rely on always getting a valid charset id back from this + * function. + */ +int charset_from_ustr(filepos *fpos, const wchar_t *name) +{ + char *csname; + int charset; + + csname = utoa_dup(name, CS_ASCII); + charset = charset_from_localenc(csname); + + if (charset == CS_NONE) { + charset = CS_ASCII; + error(err_charset, fpos, name); + } + + sfree(csname); + return charset; +}