2 * ustring.c: Unicode string routines
9 wchar_t *ustrdup(wchar_t const *s
) {
12 r
= mknewa(wchar_t, 1+ustrlen(s
));
21 static char *ustrtoa_internal(wchar_t const *s
, char *outbuf
, int size
,
22 int charset
, int careful
) {
24 charset_state state
= CHARSET_INIT_STATE
;
32 size
--; /* leave room for terminating NUL */
36 ret
= charset_from_unicode(&s
, &len
, outbuf
, size
, charset
, &state
,
37 (careful ?
&err
: NULL
));
49 ret
= charset_from_unicode(NULL
, 0, outbuf
, size
, charset
, &state
, NULL
);
56 char *ustrtoa(wchar_t const *s
, char *outbuf
, int size
, int charset
) {
57 return ustrtoa_internal(s
, outbuf
, size
, charset
, FALSE
);
60 char *ustrtoa_careful(wchar_t const *s
, char *outbuf
, int size
, int charset
) {
61 return ustrtoa_internal(s
, outbuf
, size
, charset
, TRUE
);
64 wchar_t *ustrfroma(char const *s
, wchar_t *outbuf
, int size
, int charset
) {
66 charset_state state
= CHARSET_INIT_STATE
;
74 size
--; /* allow for terminating NUL */
77 ret
= charset_to_unicode(&s
, &len
, outbuf
, size
,
78 charset
, &state
, NULL
, 0);
88 char *utoa_internal_dup(wchar_t const *s
, int charset
, int *lenp
, int careful
)
91 int outpos
, outlen
, len
, ret
, err
;
92 charset_state state
= CHARSET_INIT_STATE
;
101 outbuf
= mknewa(char, outlen
);
104 outbuf
[outpos
] = '\0';
108 ret
= charset_from_unicode(&s
, &len
,
109 outbuf
+ outpos
, outlen
- outpos
- 1,
110 charset
, &state
, (careful ?
&err
: NULL
));
116 outlen
= outlen
* 3 / 2;
117 outbuf
= resize(outbuf
, outlen
);
120 outbuf
[outpos
] = '\0';
125 outlen
= outpos
+ 32;
126 outbuf
= resize(outbuf
, outlen
);
127 ret
= charset_from_unicode(NULL
, 0,
128 outbuf
+ outpos
, outlen
- outpos
+ 1,
129 charset
, &state
, NULL
);
131 outbuf
[outpos
] = '\0';
137 char *utoa_dup(wchar_t const *s
, int charset
)
139 return utoa_internal_dup(s
, charset
, NULL
, FALSE
);
142 char *utoa_dup_len(wchar_t const *s
, int charset
, int *len
)
144 return utoa_internal_dup(s
, charset
, len
, FALSE
);
147 char *utoa_careful_dup(wchar_t const *s
, int charset
)
149 return utoa_internal_dup(s
, charset
, NULL
, TRUE
);
152 wchar_t *ufroma_dup(char const *s
, int charset
) {
158 buf
= resize(buf
, len
);
159 ustrfroma(s
, buf
, len
, charset
);
160 len
= (3 * len
) / 2 + 1; /* this guarantees a strict increase */
161 } while (ustrlen(buf
) >= len
-1);
163 buf
= resize(buf
, ustrlen(buf
)+1);
167 int ustrlen(wchar_t const *s
) {
173 wchar_t *uadv(wchar_t *s
) {
174 return s
+ 1 + ustrlen(s
);
177 wchar_t *ustrcpy(wchar_t *dest
, wchar_t const *source
) {
185 int ustrcmp(wchar_t *lhs
, wchar_t *rhs
) {
186 if (!lhs
&& !rhs
) return 0;
189 while (*lhs
&& *rhs
&& *lhs
==*rhs
)
193 else if (*lhs
> *rhs
)
198 wchar_t utolower(wchar_t c
) {
200 return c
; /* this property needed by ustricmp */
201 /* FIXME: this doesn't even come close */
202 if (c
>= 'A' && c
<= 'Z')
207 int uisalpha(wchar_t c
) {
208 /* FIXME: this doesn't even come close */
209 return (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z');
212 int ustricmp(wchar_t *lhs
, wchar_t *rhs
) {
214 while ((lc
= utolower(*lhs
)) == (rc
= utolower(*rhs
)) && lc
&& rc
)
224 wchar_t *ustrlow(wchar_t *s
) {
233 int utoi(wchar_t *s
) {
243 while (*s
&& *s
>= L
'0' && *s
<= L
'9') {
252 int utob(wchar_t *s
) {
253 if (!ustricmp(s
, L
"yes") || !ustricmp(s
, L
"y") ||
254 !ustricmp(s
, L
"true") || !ustricmp(s
, L
"t"))
259 int uisdigit(wchar_t c
) {
260 return c
>= L
'0' && c
<= L
'9';
263 #define USTRFTIME_DELTA 128
264 wchar_t *ustrftime(wchar_t *wfmt
, struct tm
*timespec
) {
267 char *fmt
, *text
, *p
;
272 * FIXME: really we ought to copy non-% parts of the format
273 * ourselves, and only resort to strftime for % parts. Also we
274 * should use wcsftime if it's present.
278 * strftime has the entertaining property that it returns 0
279 * _either_ on out-of-space _or_ on successful generation of
280 * the empty string. Hence we must ensure our format can never
281 * generate the empty string. Somebody throw a custard pie at
282 * whoever was responsible for that. Please?
286 fmt
= mknewa(char, 2+len
);
287 ustrtoa(wfmt
, fmt
+1, len
+1, CS_ASCII
); /* CS_FIXME? */
293 size
+= USTRFTIME_DELTA
;
294 blk
= resize((char *)blk
, size
);
295 len
= strftime((char *)blk
, size
-1, fmt
, timespec
);
300 /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
301 wblk
= resize((wchar_t *)blk
, len
);
302 text
= mknewa(char, len
);
303 strftime(text
, len
, fmt
+1, timespec
);
305 * We operate in the C locale, so this all ought to be kosher
306 * ASCII. If we ever move outside ASCII machines, we may need
307 * to make this more portable...
309 for (wp
= wblk
, p
= text
; *p
; p
++, wp
++)