2 * This file is part of DisOrder
3 * Copyright (C) 2004, 2007 Richard Kettlewell
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
38 const char *casefold(const char *ptr
) {
39 return utf8_casefold_canon(ptr
, strlen(ptr
), 0);
42 static enum unicode_General_Category
cat(uint32_t c
) {
43 if(c
< UNICODE_NCHARS
) {
44 const struct unidata
*const ud
= &unidata
[c
/ UNICODE_MODULUS
][c
% UNICODE_MODULUS
];
45 return ud
->general_category
;
47 return unicode_General_Category_Cn
;
50 /* XXX this is a bit kludgy */
52 char **words(const char *s
, int *nvecp
) {
62 PARSE_UTF8(s
, c
, return 0);
63 /* special cases first */
74 /* do the rest on category */
76 case unicode_General_Category_Ll
:
77 case unicode_General_Category_Lm
:
78 case unicode_General_Category_Lo
:
79 case unicode_General_Category_Lt
:
80 case unicode_General_Category_Lu
:
81 case unicode_General_Category_Nd
:
82 case unicode_General_Category_Nl
:
83 case unicode_General_Category_No
:
84 case unicode_General_Category_Sc
:
85 case unicode_General_Category_Sk
:
86 case unicode_General_Category_Sm
:
87 case unicode_General_Category_So
:
88 /* letters, digits and symbols are considered to be part of
94 dynstr_append_bytes(&d
, start
, s
- start
);
97 case unicode_General_Category_Cc
:
98 case unicode_General_Category_Cf
:
99 case unicode_General_Category_Co
:
100 case unicode_General_Category_Cs
:
101 case unicode_General_Category_Zl
:
102 case unicode_General_Category_Zp
:
103 case unicode_General_Category_Zs
:
104 case unicode_General_Category_Pe
:
105 case unicode_General_Category_Ps
:
108 dynstr_terminate(&d
);
109 vector_append(&v
, d
.vec
);
114 case unicode_General_Category_Mc
:
115 case unicode_General_Category_Me
:
116 case unicode_General_Category_Mn
:
117 case unicode_General_Category_Pc
:
118 case unicode_General_Category_Pd
:
119 case unicode_General_Category_Pf
:
120 case unicode_General_Category_Pi
:
121 case unicode_General_Category_Po
:
122 case unicode_General_Category_Cn
:
123 /* control and punctuation is completely ignored */
129 /* pick up the final word */
130 dynstr_terminate(&d
);
131 vector_append(&v
, d
.vec
);
133 vector_terminate(&v
);