2 * This file is part of DisOrder
3 * Copyright (C) 2004, 2007 Richard Kettlewell
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
37 const char *casefold(const char *ptr
) {
44 /* Convert UTF-8 to UCS-32 */
45 PARSE_UTF8(s
, c
, return ptr
);
47 if(c
< UNICODE_NCHARS
) {
48 /* If this a known character, convert it to lower case */
49 const struct unidata
*const ud
= &unidata
[c
/ 256][c
% 256];
50 c
+= ud
->lower_offset
;
52 /* Convert UCS-4 back to UTF-8 */
59 static enum unicode_gc_cat
cat(uint32_t c
) {
60 if(c
< UNICODE_NCHARS
) {
61 /* If this a known character, convert it to lower case */
62 const struct unidata
*const ud
= &unidata
[c
/ 256][c
% 256];
68 /* XXX this is a bit kludgy */
70 char **words(const char *s
, int *nvecp
) {
80 PARSE_UTF8(s
, c
, return 0);
81 /* special cases first */
92 /* do the rest on category */
106 /* letters, digits and symbols are considered to be part of
112 dynstr_append_bytes(&d
, start
, s
- start
);
126 dynstr_terminate(&d
);
127 vector_append(&v
, d
.vec
);
141 /* control and punctuation is completely ignored */
147 /* pick up the final word */
148 dynstr_terminate(&d
);
149 vector_append(&v
, d
.vec
);
151 vector_terminate(&v
);