[sgt/halibut] / ustring.c

/*
 * ustring.c: Unicode string routines
 */

#include <wchar.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include "halibut.h"

wchar_t *ustrdup(wchar_t const *s) {
    wchar_t *r;
    if (s) {
	r = snewn(1+ustrlen(s), wchar_t);
	ustrcpy(r, s);
    } else {
	r = snew(wchar_t);
	*r = 0;
    }
    return r;
}

static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size,
			      int charset, int careful) {
    int len, ret, err;
    charset_state state = CHARSET_INIT_STATE;

    if (!s) {
	*outbuf = '\0';
	return outbuf;
    }

    len = ustrlen(s);
    size--;			       /* leave room for terminating NUL */
    *outbuf = '\0';
    while (len > 0) {
	err = 0;
	ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
				   (careful ? &err : NULL));
	if (err)
	    return NULL;
	if (!ret)
	    return outbuf;
	size -= ret;
	outbuf += ret;
	*outbuf = '\0';
    }
    /*
     * Clean up
     */
    ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL);
    size -= ret;
    outbuf += ret;
    *outbuf = '\0';
    return outbuf;
}

char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) {
    return ustrtoa_internal(s, outbuf, size, charset, FALSE);
}

char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) {
    return ustrtoa_internal(s, outbuf, size, charset, TRUE);
}

wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) {
    int len, ret;
    charset_state state = CHARSET_INIT_STATE;

    if (!s) {
	*outbuf = L'\0';
	return outbuf;
    }

    len = strlen(s);
    size--;			       /* allow for terminating NUL */
    *outbuf = L'\0';
    while (len > 0) {
	ret = charset_to_unicode(&s, &len, outbuf, size,
				 charset, &state, NULL, 0);
	if (!ret)
	    return outbuf;
	outbuf += ret;
	size -= ret;
	*outbuf = L'\0';
    }
    return outbuf;
}

char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
{
    char *outbuf;
    int outpos, outlen, len, ret, err;
    charset_state state = CHARSET_INIT_STATE;

    if (!s) {
	return dupstr("");
    }

    len = ustrlen(s);

    outlen = len + 10;
    outbuf = snewn(outlen, char);

    outpos = 0;
    outbuf[outpos] = '\0';

    while (len > 0) {
	err = 0;
	ret = charset_from_unicode(&s, &len,
				   outbuf + outpos, outlen - outpos - 1,
				   charset, &state, (careful ? &err : NULL));
	if (err) {
	    sfree(outbuf);
	    return NULL;
	}
	if (!ret) {
	    outlen = outlen * 3 / 2;
	    outbuf = sresize(outbuf, outlen, char);
	}
	outpos += ret;
	outbuf[outpos] = '\0';
    }
    /*
     * Clean up
     */
    outlen = outpos + 32;
    outbuf = sresize(outbuf, outlen, char);
    ret = charset_from_unicode(NULL, 0,
			       outbuf + outpos, outlen - outpos + 1,
			       charset, &state, NULL);
    outpos += ret;
    outbuf[outpos] = '\0';
    if (lenp)
	*lenp = outpos;
    return outbuf;
}

char *utoa_dup(wchar_t const *s, int charset)
{
    return utoa_internal_dup(s, charset, NULL, FALSE);
}

char *utoa_dup_len(wchar_t const *s, int charset, int *len)
{
    return utoa_internal_dup(s, charset, len, FALSE);
}

char *utoa_careful_dup(wchar_t const *s, int charset)
{
    return utoa_internal_dup(s, charset, NULL, TRUE);
}

wchar_t *ufroma_dup(char const *s, int charset) {
    int len;
    wchar_t *buf = NULL;

    len = strlen(s) + 1;
    do {
	buf = sresize(buf, len, wchar_t);
	ustrfroma(s, buf, len, charset);
	len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
    } while (ustrlen(buf) >= len-1);

    buf = sresize(buf, ustrlen(buf)+1, wchar_t);
    return buf;
}

char *utoa_locale_dup(wchar_t const *s)
{
    /*
     * This variant uses the C library locale.
     */
    char *ret;
    int len, outlen;
    size_t siz;

    len = ustrlen(s);

    outlen = 1 + MB_CUR_MAX * len;
    ret = snewn(outlen+1, char);

    siz = wcstombs(ret, s, outlen);

    if (siz) {
	assert(siz <= (size_t)(outlen));
	ret[siz] = '\0';
	ret = sresize(ret, siz+1, char);
	return ret;
    }

    /*
     * If that failed, try a different strategy (which we will also
     * attempt in the total absence of wcstombs). Retrieve the
     * locale's charset from nl_langinfo or equivalent, and use
     * normal utoa_dup.
     */
    return utoa_dup(s, charset_from_locale());
}

wchar_t *ufroma_locale_dup(char const *s)
{
    /*
     * This variant uses the C library locale.
     */
    wchar_t *ret;
    int len, outlen;
    size_t siz;

    len = strlen(s);

    outlen = 1 + 2*len;
    ret = snewn(outlen+1, wchar_t);  /* be conservative */

    siz = mbstowcs(ret, s, outlen);

    if (siz) {
	assert(siz <= (size_t)(outlen));
	ret[siz] = L'\0';
	ret = sresize(ret, siz+1, wchar_t);
	return ret;
    }

    /*
     * If that failed, try a different strategy (which we will also
     * attempt in the total absence of wcstombs). Retrieve the
     * locale's charset from nl_langinfo or equivalent, and use
     * normal ufroma_dup.
     */
    return ufroma_dup(s, charset_from_locale());
}

int ustrlen(wchar_t const *s) {
    int len = 0;
    while (*s++) len++;
    return len;
}

wchar_t *uadv(wchar_t *s) {
    return s + 1 + ustrlen(s);
}

wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source) {
    wchar_t *ret = dest;
    do {
	*dest++ = *source;
    } while (*source++);
    return ret;
}

wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n) {
    wchar_t *ret = dest;
    do {
	*dest++ = *source;
	if (*source) source++;
    } while (n-- > 0);
    return ret;
}

int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
    if (!lhs && !rhs) return 0;
    if (!lhs) return -1;
    if (!rhs) return +1;
    while (*lhs && *rhs && *lhs==*rhs)
	lhs++, rhs++;
    if (*lhs < *rhs)
	return -1;
    else if (*lhs > *rhs)
	return 1;
    return 0;
}

wchar_t utolower(wchar_t c) {
    if (c == L'\0')
	return c;		       /* this property needed by ustricmp */
#ifdef HAS_TOWLOWER
    return towlower(c);
#else
    if (c >= 'A' && c <= 'Z')
	c += 'a'-'A';
    return c;
#endif
}

int uisalpha(wchar_t c) {
#ifdef HAS_ISWALPHA
    return iswalpha(c);
#else
    return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
#endif
}

int ustricmp(wchar_t const *lhs, wchar_t const *rhs) {
    wchar_t lc, rc;
    while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
	lhs++, rhs++;
    if (!lc && !rc)
	return 0;
    if (lc < rc)
	return -1;
    else
	return 1;
}

int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen) {
    wchar_t lc = 0, rc = 0;
    while (maxlen-- > 0 &&
	   (lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
	lhs++, rhs++;
    if (lc < rc)
	return -1;
    else if (lc > rc)
	return 1;
    else
	return 0;
}

wchar_t *ustrlow(wchar_t *s) {
    wchar_t *p = s;
    while (*p) {
	*p = utolower(*p);
	p++;
    }
    return s;
}

int utoi(wchar_t const *s) {
    int sign = +1;
    int n;

    if (*s == L'-') {
	s++;
	sign = -1;
    }

    n = 0;
    while (*s && *s >= L'0' && *s <= L'9') {
	n *= 10;
	n += (*s - '0');
	s++;
    }

    return n * sign;
}

double utof(wchar_t const *s)
{
    char *cs = utoa_dup(s, CS_ASCII);
    double ret = atof(cs);
    sfree(cs);
    return ret;
}

int utob(wchar_t const *s) {
    if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
	!ustricmp(s, L"true") || !ustricmp(s, L"t"))
	return TRUE;
    return FALSE;
}

int uisdigit(wchar_t c) {
    return c >= L'0' && c <= L'9';
}

#define USTRFTIME_DELTA 128
static void ustrftime_internal(rdstring *rs, char formatchr,
			       const struct tm *timespec)
{
    /*
     * strftime has the entertaining property that it returns 0
     * _either_ on out-of-space _or_ on successful generation of
     * the empty string. Hence we must ensure our format can never
     * generate the empty string. Somebody throw a custard pie at
     * whoever was responsible for that. Please?
     */

#ifdef HAS_WCSFTIME
    wchar_t *buf = NULL;
    wchar_t fmt[4];
    int size, ret;

    fmt[0] = L' ';
    fmt[1] = L'%';
    /* Format chars are all ASCII, so conversion to Unicode is no problem */
    fmt[2] = formatchr;
    fmt[3] = L'\0';

    size = 0;
    do {
	size += USTRFTIME_DELTA;
	buf = sresize(buf, size, wchar_t);
	ret = (int) wcsftime(buf, size, fmt, timespec);
    } while (ret == 0);

    rdadds(rs, buf+1);
    sfree(buf);
#else
    char *buf = NULL;
    wchar_t *cvtbuf;
    char fmt[4];
    int size, ret;

    fmt[0] = ' ';
    fmt[1] = '%';
    fmt[2] = formatchr;
    fmt[3] = '\0';

    size = 0;
    do {
	size += USTRFTIME_DELTA;
	buf = sresize(buf, size, char);
	ret = (int) strftime(buf, size, fmt, timespec);
    } while (ret == 0);

    cvtbuf = ufroma_locale_dup(buf+1);
    rdadds(rs, cvtbuf);
    sfree(cvtbuf);
    sfree(buf);
#endif
}

wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec)
{
    rdstring rs = { 0, 0, NULL };

    if (!wfmt)
	wfmt = L"%c";

    while (*wfmt) {
	if (wfmt[0] == L'%' && wfmt[1] == L'%') {
	    rdadd(&rs, L'%');
	    wfmt += 2;
	} else if (wfmt[0] == L'%' && wfmt[1]) {
	    ustrftime_internal(&rs, wfmt[1], timespec);
	    wfmt += 2;
	} else {
	    rdadd(&rs, wfmt[0]);
	    wfmt++;
	}
    }

    return rdtrim(&rs);
}

/*
 * Determine whether a Unicode string can be translated into a
 * given charset without any missing characters.
 */
int cvt_ok(int charset, const wchar_t *s)
{
    char buf[256];
    charset_state state = CHARSET_INIT_STATE;
    int err, len = ustrlen(s);

    err = 0;
    while (len > 0) {
	(void)charset_from_unicode(&s, &len, buf, lenof(buf),
				   charset, &state, &err);
	if (err)
	    return FALSE;
    }
    return TRUE;
}

/*
 * Wrapper around charset_from_localenc which accepts the charset
 * name as a wide string (since that happens to be more useful).
 * Also throws a Halibut error and falls back to CS_ASCII if the
 * charset is unrecognised, meaning the rest of the program can
 * rely on always getting a valid charset id back from this
 * function.
 */
int charset_from_ustr(filepos *fpos, const wchar_t *name)
{
    char *csname;
    int charset;

    csname = utoa_dup(name, CS_ASCII);
    charset = charset_from_localenc(csname);

    if (charset == CS_NONE) {
	charset = CS_ASCII;
	error(err_charset, fpos, name);
    }

    sfree(csname);
    return charset;
}
Commit	Line	Data
d7482997	1	/*
	2	* ustring.c: Unicode string routines
	3	*/
	4
	5	#include <wchar.h>
7e976207	6	#include <stdlib.h>
7e976207	7	#include <assert.h>
d7482997	8	#include <time.h>
	9	#include "halibut.h"
	10
e4ea58f8	11	wchar_t ustrdup(wchar_t const s) {
d7482997	12	wchar_t *r;
d7482997	13	if (s) {
f1530049	14	r = snewn(1+ustrlen(s), wchar_t);
d7482997	15	ustrcpy(r, s);
d7482997	16	} else {
f1530049	17	r = snew(wchar_t);
d7482997	18	*r = 0;
	19	}
	20	return r;
	21	}
	22
e4ea58f8	23	static char ustrtoa_internal(wchar_t const s, char *outbuf, int size,
	24	int charset, int careful) {
	25	int len, ret, err;
	26	charset_state state = CHARSET_INIT_STATE;
	27
d7482997	28	if (!s) {
	29	*outbuf = '\0';
	30	return outbuf;
	31	}
e4ea58f8	32
	33	len = ustrlen(s);
	34	size--; /* leave room for terminating NUL */
	35	*outbuf = '\0';
	36	while (len > 0) {
	37	err = 0;
	38	ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
	39	(careful ? &err : NULL));
	40	if (err)
	41	return NULL;
	42	if (!ret)
	43	return outbuf;
	44	size -= ret;
	45	outbuf += ret;
	46	*outbuf = '\0';
	47	}
	48	/*
	49	* Clean up
	50	*/
	51	ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL);
	52	size -= ret;
	53	outbuf += ret;
	54	*outbuf = '\0';
d7482997	55	return outbuf;
	56	}
	57
e4ea58f8	58	char ustrtoa(wchar_t const s, char *outbuf, int size, int charset) {
	59	return ustrtoa_internal(s, outbuf, size, charset, FALSE);
	60	}
	61
	62	char ustrtoa_careful(wchar_t const s, char *outbuf, int size, int charset) {
	63	return ustrtoa_internal(s, outbuf, size, charset, TRUE);
	64	}
	65
	66	wchar_t ustrfroma(char const s, wchar_t *outbuf, int size, int charset) {
	67	int len, ret;
	68	charset_state state = CHARSET_INIT_STATE;
	69
ba9c1487	70	if (!s) {
	71	*outbuf = L'\0';
	72	return outbuf;
	73	}
e4ea58f8	74
	75	len = strlen(s);
	76	size--; /* allow for terminating NUL */
	77	*outbuf = L'\0';
	78	while (len > 0) {
	79	ret = charset_to_unicode(&s, &len, outbuf, size,
	80	charset, &state, NULL, 0);
	81	if (!ret)
	82	return outbuf;
	83	outbuf += ret;
	84	size -= ret;
	85	*outbuf = L'\0';
	86	}
ba9c1487	87	return outbuf;
	88	}
	89
e4ea58f8	90	char utoa_internal_dup(wchar_t const s, int charset, int *lenp, int careful)
	91	{
	92	char *outbuf;
	93	int outpos, outlen, len, ret, err;
	94	charset_state state = CHARSET_INIT_STATE;
50d6b4bd	95
e4ea58f8	96	if (!s) {
	97	return dupstr("");
	98	}
50d6b4bd	99
e4ea58f8	100	len = ustrlen(s);
	101
	102	outlen = len + 10;
f1530049	103	outbuf = snewn(outlen, char);
e4ea58f8	104
	105	outpos = 0;
	106	outbuf[outpos] = '\0';
	107
	108	while (len > 0) {
	109	err = 0;
	110	ret = charset_from_unicode(&s, &len,
	111	outbuf + outpos, outlen - outpos - 1,
	112	charset, &state, (careful ? &err : NULL));
	113	if (err) {
	114	sfree(outbuf);
	115	return NULL;
	116	}
	117	if (!ret) {
	118	outlen = outlen * 3 / 2;
f1530049	119	outbuf = sresize(outbuf, outlen, char);
e4ea58f8	120	}
	121	outpos += ret;
	122	outbuf[outpos] = '\0';
	123	}
	124	/*
	125	* Clean up
	126	*/
	127	outlen = outpos + 32;
f1530049	128	outbuf = sresize(outbuf, outlen, char);
e4ea58f8	129	ret = charset_from_unicode(NULL, 0,
	130	outbuf + outpos, outlen - outpos + 1,
	131	charset, &state, NULL);
	132	outpos += ret;
	133	outbuf[outpos] = '\0';
	134	if (lenp)
	135	*lenp = outpos;
	136	return outbuf;
50d6b4bd	137	}
50d6b4bd	138
e4ea58f8	139	char utoa_dup(wchar_t const s, int charset)
	140	{
	141	return utoa_internal_dup(s, charset, NULL, FALSE);
	142	}
	143
	144	char utoa_dup_len(wchar_t const s, int charset, int *len)
	145	{
	146	return utoa_internal_dup(s, charset, len, FALSE);
	147	}
	148
	149	char utoa_careful_dup(wchar_t const s, int charset)
	150	{
	151	return utoa_internal_dup(s, charset, NULL, TRUE);
	152	}
	153
	154	wchar_t ufroma_dup(char const s, int charset) {
ba9c1487	155	int len;
	156	wchar_t *buf = NULL;
	157
	158	len = strlen(s) + 1;
	159	do {
f1530049	160	buf = sresize(buf, len, wchar_t);
e4ea58f8	161	ustrfroma(s, buf, len, charset);
ba9c1487	162	len = (3 * len) / 2 + 1; /* this guarantees a strict increase */
	163	} while (ustrlen(buf) >= len-1);
	164
f1530049	165	buf = sresize(buf, ustrlen(buf)+1, wchar_t);
ba9c1487	166	return buf;
	167	}
	168
7e976207	169	char utoa_locale_dup(wchar_t const s)
	170	{
	171	/*
	172	* This variant uses the C library locale.
	173	*/
	174	char *ret;
8281de1b	175	int len, outlen;
7e976207	176	size_t siz;
	177
	178	len = ustrlen(s);
	179
8281de1b	180	outlen = 1 + MB_CUR_MAX * len;
8281de1b	181	ret = snewn(outlen+1, char);
7e976207	182
8281de1b	183	siz = wcstombs(ret, s, outlen);
7e976207	184
7e976207	185	if (siz) {
8281de1b	186	assert(siz <= (size_t)(outlen));
7e976207	187	ret[siz] = '\0';
f1530049	188	ret = sresize(ret, siz+1, char);
7e976207	189	return ret;
	190	}
	191
	192	/*
	193	* If that failed, try a different strategy (which we will also
	194	* attempt in the total absence of wcstombs). Retrieve the
	195	* locale's charset from nl_langinfo or equivalent, and use
	196	* normal utoa_dup.
	197	*/
	198	return utoa_dup(s, charset_from_locale());
	199	}
	200
	201	wchar_t ufroma_locale_dup(char const s)
	202	{
	203	/*
	204	* This variant uses the C library locale.
	205	*/
	206	wchar_t *ret;
8281de1b	207	int len, outlen;
7e976207	208	size_t siz;
	209
	210	len = strlen(s);
	211
8281de1b	212	outlen = 1 + 2*len;
8281de1b	213	ret = snewn(outlen+1, wchar_t); /* be conservative */
7e976207	214
8281de1b	215	siz = mbstowcs(ret, s, outlen);
7e976207	216
7e976207	217	if (siz) {
8281de1b	218	assert(siz <= (size_t)(outlen));
7e976207	219	ret[siz] = L'\0';
f1530049	220	ret = sresize(ret, siz+1, wchar_t);
7e976207	221	return ret;
	222	}
	223
	224	/*
	225	* If that failed, try a different strategy (which we will also
	226	* attempt in the total absence of wcstombs). Retrieve the
	227	* locale's charset from nl_langinfo or equivalent, and use
	228	* normal ufroma_dup.
	229	*/
	230	return ufroma_dup(s, charset_from_locale());
	231	}
	232
5dd44dce	233	int ustrlen(wchar_t const *s) {
d7482997	234	int len = 0;
	235	while (*s++) len++;
	236	return len;
	237	}
	238
	239	wchar_t uadv(wchar_t s) {
	240	return s + 1 + ustrlen(s);
	241	}
	242
5dd44dce	243	wchar_t ustrcpy(wchar_t dest, wchar_t const *source) {
d7482997	244	wchar_t *ret = dest;
	245	do {
	246	dest++ = source;
	247	} while (*source++);
	248	return ret;
	249	}
	250
08e78486	251	wchar_t ustrncpy(wchar_t dest, wchar_t const *source, int n) {
	252	wchar_t *ret = dest;
	253	do {
	254	dest++ = source;
	255	if (*source) source++;
	256	} while (n-- > 0);
	257	return ret;
	258	}
	259
d7482997	260	int ustrcmp(wchar_t lhs, wchar_t rhs) {
	261	if (!lhs && !rhs) return 0;
	262	if (!lhs) return -1;
	263	if (!rhs) return +1;
	264	while (lhs && rhs && lhs==rhs)
	265	lhs++, rhs++;
	266	if (lhs < rhs)
	267	return -1;
	268	else if (lhs > rhs)
	269	return 1;
	270	return 0;
	271	}
	272
	273	wchar_t utolower(wchar_t c) {
	274	if (c == L'\0')
	275	return c; /* this property needed by ustricmp */
9badd775	276	#ifdef HAS_TOWLOWER
	277	return towlower(c);
	278	#else
d7482997	279	if (c >= 'A' && c <= 'Z')
	280	c += 'a'-'A';
	281	return c;
9badd775	282	#endif
d7482997	283	}
d7482997	284
831da32e	285	int uisalpha(wchar_t c) {
9badd775	286	#ifdef HAS_ISWALPHA
	287	return iswalpha(c);
	288	#else
831da32e	289	return (c >= 'A' && c <= 'Z') \|\| (c >= 'a' && c <= 'z');
9badd775	290	#endif
831da32e	291	}
831da32e	292
78c73085	293	int ustricmp(wchar_t const lhs, wchar_t const rhs) {
d7482997	294	wchar_t lc, rc;
	295	while ((lc = utolower(lhs)) == (rc = utolower(rhs)) && lc && rc)
	296	lhs++, rhs++;
	297	if (!lc && !rc)
	298	return 0;
	299	if (lc < rc)
	300	return -1;
	301	else
	302	return 1;
	303	}
	304
78c73085	305	int ustrnicmp(wchar_t const lhs, wchar_t const rhs, int maxlen) {
	306	wchar_t lc = 0, rc = 0;
	307	while (maxlen-- > 0 &&
	308	(lc = utolower(lhs)) == (rc = utolower(rhs)) && lc && rc)
	309	lhs++, rhs++;
	310	if (lc < rc)
	311	return -1;
	312	else if (lc > rc)
	313	return 1;
	314	else
	315	return 0;
	316	}
	317
d7482997	318	wchar_t ustrlow(wchar_t s) {
	319	wchar_t *p = s;
	320	while (*p) {
	321	p = utolower(p);
	322	p++;
	323	}
	324	return s;
	325	}
	326
dd567011	327	int utoi(wchar_t const *s) {
d7482997	328	int sign = +1;
	329	int n;
	330
	331	if (*s == L'-') {
	332	s++;
	333	sign = -1;
	334	}
	335
	336	n = 0;
	337	while (s && s >= L'0' && *s <= L'9') {
	338	n *= 10;
	339	n += (*s - '0');
	340	s++;
	341	}
	342
26c8c119	343	return n * sign;
d7482997	344	}
d7482997	345
dd567011	346	double utof(wchar_t const *s)
	347	{
	348	char *cs = utoa_dup(s, CS_ASCII);
	349	double ret = atof(cs);
	350	sfree(cs);
	351	return ret;
	352	}
	353
	354	int utob(wchar_t const *s) {
d7482997	355	if (!ustricmp(s, L"yes") \|\| !ustricmp(s, L"y") \|\|
	356	!ustricmp(s, L"true") \|\| !ustricmp(s, L"t"))
	357	return TRUE;
	358	return FALSE;
	359	}
	360
	361	int uisdigit(wchar_t c) {
	362	return c >= L'0' && c <= L'9';
	363	}
	364
	365	#define USTRFTIME_DELTA 128
c8422236	366	static void ustrftime_internal(rdstring *rs, char formatchr,
	367	const struct tm *timespec)
	368	{
e4ea58f8	369	/*
d7482997	370	* strftime has the entertaining property that it returns 0
	371	* _either_ on out-of-space _or_ on successful generation of
	372	* the empty string. Hence we must ensure our format can never
	373	* generate the empty string. Somebody throw a custard pie at
	374	* whoever was responsible for that. Please?
	375	*/
c8422236	376
	377	#ifdef HAS_WCSFTIME
	378	wchar_t *buf = NULL;
	379	wchar_t fmt[4];
	380	int size, ret;
	381
	382	fmt[0] = L' ';
	383	fmt[1] = L'%';
	384	/* Format chars are all ASCII, so conversion to Unicode is no problem */
	385	fmt[2] = formatchr;
	386	fmt[3] = L'\0';
	387
	388	size = 0;
	389	do {
d7482997	390	size += USTRFTIME_DELTA;
f1530049	391	buf = sresize(buf, size, wchar_t);
c8422236	392	ret = (int) wcsftime(buf, size, fmt, timespec);
	393	} while (ret == 0);
	394
	395	rdadds(rs, buf+1);
	396	sfree(buf);
	397	#else
	398	char *buf = NULL;
	399	wchar_t *cvtbuf;
	400	char fmt[4];
	401	int size, ret;
	402
	403	fmt[0] = ' ';
	404	fmt[1] = '%';
	405	fmt[2] = formatchr;
	406	fmt[3] = '\0';
	407
	408	size = 0;
	409	do {
	410	size += USTRFTIME_DELTA;
f1530049	411	buf = sresize(buf, size, char);
c8422236	412	ret = (int) strftime(buf, size, fmt, timespec);
	413	} while (ret == 0);
	414
	415	cvtbuf = ufroma_locale_dup(buf+1);
	416	rdadds(rs, cvtbuf);
	417	sfree(cvtbuf);
	418	sfree(buf);
	419	#endif
	420	}
	421
	422	wchar_t ustrftime(const wchar_t wfmt, const struct tm *timespec)
	423	{
	424	rdstring rs = { 0, 0, NULL };
	425
	426	if (!wfmt)
	427	wfmt = L"%c";
	428
	429	while (*wfmt) {
	430	if (wfmt[0] == L'%' && wfmt[1] == L'%') {
	431	rdadd(&rs, L'%');
	432	wfmt += 2;
	433	} else if (wfmt[0] == L'%' && wfmt[1]) {
	434	ustrftime_internal(&rs, wfmt[1], timespec);
	435	wfmt += 2;
	436	} else {
	437	rdadd(&rs, wfmt[0]);
	438	wfmt++;
	439	}
d7482997	440	}
d7482997	441
c8422236	442	return rdtrim(&rs);
d7482997	443	}
91f93b94	444
	445	/*
	446	* Determine whether a Unicode string can be translated into a
	447	* given charset without any missing characters.
	448	*/
	449	int cvt_ok(int charset, const wchar_t *s)
	450	{
	451	char buf[256];
	452	charset_state state = CHARSET_INIT_STATE;
	453	int err, len = ustrlen(s);
	454
	455	err = 0;
	456	while (len > 0) {
	457	(void)charset_from_unicode(&s, &len, buf, lenof(buf),
	458	charset, &state, &err);
	459	if (err)
	460	return FALSE;
	461	}
	462	return TRUE;
	463	}
0960a3d8	464
	465	/*
	466	* Wrapper around charset_from_localenc which accepts the charset
	467	* name as a wide string (since that happens to be more useful).
	468	* Also throws a Halibut error and falls back to CS_ASCII if the
	469	* charset is unrecognised, meaning the rest of the program can
	470	* rely on always getting a valid charset id back from this
	471	* function.
	472	*/
	473	int charset_from_ustr(filepos fpos, const wchar_t name)
	474	{
	475	char *csname;
	476	int charset;
	477
	478	csname = utoa_dup(name, CS_ASCII);
	479	charset = charset_from_localenc(csname);
	480
	481	if (charset == CS_NONE) {
	482	charset = CS_ASCII;
	483	error(err_charset, fpos, name);
	484	}
	485
	486	sfree(csname);
	487	return charset;
	488	}