[sgt/halibut] / ustring.c

/*
 * ustring.c: Unicode string routines
 */

#include <wchar.h>
#include <time.h>
#include "halibut.h"

wchar_t *ustrdup(wchar_t *s) {
    wchar_t *r;
    if (s) {
	r = mknewa(wchar_t, 1+ustrlen(s));
	ustrcpy(r, s);
    } else {
	r = mknew(wchar_t);
	*r = 0;
    }
    return r;
}

char *ustrtoa(wchar_t *s, char *outbuf, int size) {
    char *p;
    if (!s) {
	*outbuf = '\0';
	return outbuf;
    }
    for (p = outbuf; *s && p < outbuf+size; p++,s++)
	*p = *s;
    if (p < outbuf+size)
	*p = '\0';
    else
	outbuf[size-1] = '\0';
    return outbuf;
}

wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) {
    wchar_t *p;
    if (!s) {
	*outbuf = L'\0';
	return outbuf;
    }
    for (p = outbuf; *s && p < outbuf+size; p++,s++)
	*p = *s;
    if (p < outbuf+size)
	*p = '\0';
    else
	outbuf[size-1] = '\0';
    return outbuf;
}

char *utoa_dup(wchar_t *s) {
    int len;
    char *buf = NULL;

    len = ustrlen(s) + 1;
    do {
	buf = resize(buf, len);
	ustrtoa(s, buf, len);
	len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
    } while ((int)strlen(buf) >= len-1);

    buf = resize(buf, strlen(buf)+1);
    return buf;
}

wchar_t *ufroma_dup(char *s) {
    int len;
    wchar_t *buf = NULL;

    len = strlen(s) + 1;
    do {
	buf = resize(buf, len);
	ustrfroma(s, buf, len);
	len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
    } while (ustrlen(buf) >= len-1);

    buf = resize(buf, ustrlen(buf)+1);
    return buf;
}

int ustrlen(wchar_t const *s) {
    int len = 0;
    while (*s++) len++;
    return len;
}

wchar_t *uadv(wchar_t *s) {
    return s + 1 + ustrlen(s);
}

wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source) {
    wchar_t *ret = dest;
    do {
	*dest++ = *source;
    } while (*source++);
    return ret;
}

int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
    if (!lhs && !rhs) return 0;
    if (!lhs) return -1;
    if (!rhs) return +1;
    while (*lhs && *rhs && *lhs==*rhs)
	lhs++, rhs++;
    if (*lhs < *rhs)
	return -1;
    else if (*lhs > *rhs)
	return 1;
    return 0;
}

wchar_t utolower(wchar_t c) {
    if (c == L'\0')
	return c;		       /* this property needed by ustricmp */
    /* FIXME: this doesn't even come close */
    if (c >= 'A' && c <= 'Z')
	c += 'a'-'A';
    return c;
}

int uisalpha(wchar_t c) {
    /* FIXME: this doesn't even come close */
    return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

int ustricmp(wchar_t *lhs, wchar_t *rhs) {
    wchar_t lc, rc;
    while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
	lhs++, rhs++;
    if (!lc && !rc)
	return 0;
    if (lc < rc)
	return -1;
    else
	return 1;
}

wchar_t *ustrlow(wchar_t *s) {
    wchar_t *p = s;
    while (*p) {
	*p = utolower(*p);
	p++;
    }
    return s;
}

int utoi(wchar_t *s) {
    int sign = +1;
    int n;

    if (*s == L'-') {
	s++;
	sign = -1;
    }

    n = 0;
    while (*s && *s >= L'0' && *s <= L'9') {
	n *= 10;
	n += (*s - '0');
	s++;
    }

    return n;
}

int utob(wchar_t *s) {
    if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
	!ustricmp(s, L"true") || !ustricmp(s, L"t"))
	return TRUE;
    return FALSE;
}

int uisdigit(wchar_t c) {
    return c >= L'0' && c <= L'9';
}

#define USTRFTIME_DELTA 128
wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
    void *blk = NULL;
    wchar_t *wblk, *wp;
    char *fmt, *text, *p;
    size_t size = 0;
    size_t len;

    /*
     * strftime has the entertaining property that it returns 0
     * _either_ on out-of-space _or_ on successful generation of
     * the empty string. Hence we must ensure our format can never
     * generate the empty string. Somebody throw a custard pie at
     * whoever was responsible for that. Please?
     */
    if (wfmt) {
	len = ustrlen(wfmt);
	fmt = mknewa(char, 2+len);
	ustrtoa(wfmt, fmt+1, len+1);
	fmt[0] = ' ';
    } else
	fmt = " %c";

    while (1) {
	size += USTRFTIME_DELTA;
	blk = resize((char *)blk, size);
	len = strftime((char *)blk, size-1, fmt, timespec);
	if (len > 0)
	    break;
    }

    /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
    wblk = resize((wchar_t *)blk, len);
    text = mknewa(char, len);
    strftime(text, len, fmt+1, timespec);
    /*
     * We operate in the C locale, so this all ought to be kosher
     * ASCII. If we ever move outside ASCII machines, we may need
     * to make this more portable...
     */
    for (wp = wblk, p = text; *p; p++, wp++)
	*wp = *p;
    *wp = 0;
    if (wfmt)
	sfree(fmt);
    sfree(text);
    return wblk;
}
Commit	Line	Data
d7482997	1	/*
	2	* ustring.c: Unicode string routines
	3	*/
	4
	5	#include <wchar.h>
	6	#include <time.h>
	7	#include "halibut.h"
	8
	9	wchar_t ustrdup(wchar_t s) {
	10	wchar_t *r;
	11	if (s) {
	12	r = mknewa(wchar_t, 1+ustrlen(s));
	13	ustrcpy(r, s);
	14	} else {
	15	r = mknew(wchar_t);
	16	*r = 0;
	17	}
	18	return r;
	19	}
	20
	21	char ustrtoa(wchar_t s, char *outbuf, int size) {
	22	char *p;
	23	if (!s) {
	24	*outbuf = '\0';
	25	return outbuf;
	26	}
	27	for (p = outbuf; *s && p < outbuf+size; p++,s++)
	28	p = s;
	29	if (p < outbuf+size)
	30	*p = '\0';
	31	else
	32	outbuf[size-1] = '\0';
	33	return outbuf;
	34	}
	35
ba9c1487	36	wchar_t ustrfroma(char s, wchar_t *outbuf, int size) {
	37	wchar_t *p;
	38	if (!s) {
	39	*outbuf = L'\0';
	40	return outbuf;
	41	}
	42	for (p = outbuf; *s && p < outbuf+size; p++,s++)
	43	p = s;
	44	if (p < outbuf+size)
	45	*p = '\0';
	46	else
	47	outbuf[size-1] = '\0';
	48	return outbuf;
	49	}
	50
50d6b4bd	51	char utoa_dup(wchar_t s) {
	52	int len;
	53	char *buf = NULL;
	54
	55	len = ustrlen(s) + 1;
	56	do {
	57	buf = resize(buf, len);
	58	ustrtoa(s, buf, len);
	59	len = (3 * len) / 2 + 1; /* this guarantees a strict increase */
	60	} while ((int)strlen(buf) >= len-1);
	61
	62	buf = resize(buf, strlen(buf)+1);
	63	return buf;
	64	}
	65
ba9c1487	66	wchar_t ufroma_dup(char s) {
	67	int len;
	68	wchar_t *buf = NULL;
	69
	70	len = strlen(s) + 1;
	71	do {
	72	buf = resize(buf, len);
	73	ustrfroma(s, buf, len);
	74	len = (3 * len) / 2 + 1; /* this guarantees a strict increase */
	75	} while (ustrlen(buf) >= len-1);
	76
	77	buf = resize(buf, ustrlen(buf)+1);
	78	return buf;
	79	}
	80
5dd44dce	81	int ustrlen(wchar_t const *s) {
d7482997	82	int len = 0;
	83	while (*s++) len++;
	84	return len;
	85	}
	86
	87	wchar_t uadv(wchar_t s) {
	88	return s + 1 + ustrlen(s);
	89	}
	90
5dd44dce	91	wchar_t ustrcpy(wchar_t dest, wchar_t const *source) {
d7482997	92	wchar_t *ret = dest;
	93	do {
	94	dest++ = source;
	95	} while (*source++);
	96	return ret;
	97	}
	98
	99	int ustrcmp(wchar_t lhs, wchar_t rhs) {
	100	if (!lhs && !rhs) return 0;
	101	if (!lhs) return -1;
	102	if (!rhs) return +1;
	103	while (lhs && rhs && lhs==rhs)
	104	lhs++, rhs++;
	105	if (lhs < rhs)
	106	return -1;
	107	else if (lhs > rhs)
	108	return 1;
	109	return 0;
	110	}
	111
	112	wchar_t utolower(wchar_t c) {
	113	if (c == L'\0')
	114	return c; /* this property needed by ustricmp */
	115	/* FIXME: this doesn't even come close */
	116	if (c >= 'A' && c <= 'Z')
	117	c += 'a'-'A';
	118	return c;
	119	}
	120
831da32e	121	int uisalpha(wchar_t c) {
	122	/* FIXME: this doesn't even come close */
	123	return (c >= 'A' && c <= 'Z') \|\| (c >= 'a' && c <= 'z');
	124	}
	125
d7482997	126	int ustricmp(wchar_t lhs, wchar_t rhs) {
	127	wchar_t lc, rc;
	128	while ((lc = utolower(lhs)) == (rc = utolower(rhs)) && lc && rc)
	129	lhs++, rhs++;
	130	if (!lc && !rc)
	131	return 0;
	132	if (lc < rc)
	133	return -1;
	134	else
	135	return 1;
	136	}
	137
	138	wchar_t ustrlow(wchar_t s) {
	139	wchar_t *p = s;
	140	while (*p) {
	141	p = utolower(p);
	142	p++;
	143	}
	144	return s;
	145	}
	146
	147	int utoi(wchar_t *s) {
	148	int sign = +1;
	149	int n;
	150
	151	if (*s == L'-') {
	152	s++;
	153	sign = -1;
	154	}
	155
	156	n = 0;
	157	while (s && s >= L'0' && *s <= L'9') {
	158	n *= 10;
	159	n += (*s - '0');
	160	s++;
	161	}
	162
	163	return n;
	164	}
	165
	166	int utob(wchar_t *s) {
	167	if (!ustricmp(s, L"yes") \|\| !ustricmp(s, L"y") \|\|
	168	!ustricmp(s, L"true") \|\| !ustricmp(s, L"t"))
	169	return TRUE;
	170	return FALSE;
	171	}
	172
	173	int uisdigit(wchar_t c) {
	174	return c >= L'0' && c <= L'9';
	175	}
	176
	177	#define USTRFTIME_DELTA 128
	178	wchar_t ustrftime(wchar_t wfmt, struct tm *timespec) {
	179	void *blk = NULL;
	180	wchar_t wblk, wp;
	181	char fmt, text, *p;
	182	size_t size = 0;
	183	size_t len;
	184
	185	/*
	186	* strftime has the entertaining property that it returns 0
	187	* _either_ on out-of-space _or_ on successful generation of
	188	* the empty string. Hence we must ensure our format can never
	189	* generate the empty string. Somebody throw a custard pie at
190	* whoever was responsible for that. Please?
191	*/
192	if (wfmt) {
193	len = ustrlen(wfmt);
194	fmt = mknewa(char, 2+len);
195	ustrtoa(wfmt, fmt+1, len+1);
196	fmt[0] = ' ';
197	} else
198	fmt = " %c";
199
200	while (1) {
201	size += USTRFTIME_DELTA;
202	blk = resize((char *)blk, size);
203	len = strftime((char *)blk, size-1, fmt, timespec);
204	if (len > 0)
205	break;
206	}
207
208	/* Note: +1 for the terminating 0, -1 for the initial space in fmt */
209	wblk = resize((wchar_t *)blk, len);
210	text = mknewa(char, len);
211	strftime(text, len, fmt+1, timespec);
212	/*
213	* We operate in the C locale, so this all ought to be kosher
214	* ASCII. If we ever move outside ASCII machines, we may need
215	* to make this more portable...
216	*/
217	for (wp = wblk, p = text; *p; p++, wp++)
218	wp = p;
219	*wp = 0;
220	if (wfmt)
221	sfree(fmt);
222	sfree(text);
223	return wblk;
224	}