2 * This file is part of DisOrder.
3 * Copyright (C) 2005, 2007, 2008 Richard Kettlewell
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 /** @brief Open a Unicode test file */
23 static FILE *open_unicode_test(const char *path
) {
29 if((base
= strrchr(path
, '/')))
33 if(!(fp
= fopen(base
, "r"))) {
34 snprintf(buffer
, sizeof buffer
,
35 "wget http://www.unicode.org/Public/5.0.0/ucd/%s", path
);
36 if((w
= system(buffer
)))
37 fatal(0, "%s: %s", buffer
, wstat(w
));
38 if(chmod(base
, 0444) < 0)
39 fatal(errno
, "chmod %s", base
);
40 if(!(fp
= fopen(base
, "r")))
41 fatal(errno
, "%s", base
);
46 /** @brief Run breaking tests for utf32_grapheme_boundary() etc */
47 static void breaktest(const char *path
,
48 int (*breakfn
)(const uint32_t *, size_t, size_t)) {
49 FILE *fp
= open_unicode_test(path
);
53 char break_allowed
[1024];
54 uint32_t buffer
[1024];
56 while(!inputline(path
, fp
, &l
, '\n')) {
58 if(l
[0] == '#') continue;
62 if(*lp
== ' ' || *lp
== '\t') {
68 if((unsigned char)*lp
== 0xC3 && (unsigned char)lp
[1] == 0xB7) {
69 /* 00F7 DIVISION SIGN */
70 break_allowed
[bn
] = 1;
74 if((unsigned char)*lp
== 0xC3 && (unsigned char)lp
[1] == 0x97) {
75 /* 00D7 MULTIPLICATION SIGN */
76 break_allowed
[bn
] = 0;
80 if(isxdigit((unsigned char)*lp
)) {
81 buffer
[bn
++] = strtoul(lp
, &lp
, 16);
84 fatal(0, "%s:%d: evil line: %s", path
, lineno
, l
);
86 for(n
= 0; n
<= bn
; ++n
) {
87 if(breakfn(buffer
, bn
, n
) != break_allowed
[n
]) {
89 "%s:%d: offset %zu: mismatch\n"
102 /** @brief Tests for @ref lib/unicode.h */
103 static void test_unicode(void) {
107 uint32_t buffer
[1024];
108 uint32_t *c
[6], *NFD_c
[6], *NFKD_c
[6], *NFC_c
[6], *NFKC_c
[6]; /* 1-indexed */
111 fp
= open_unicode_test("NormalizationTest.txt");
112 while(!inputline("NormalizationTest.txt", fp
, &l
, '\n')) {
114 if(*l
== '#' || *l
== '@')
119 c
[cn
++] = &buffer
[bn
];
120 while(*lp
&& *lp
!= '#') {
129 c
[cn
++] = &buffer
[bn
];
133 buffer
[bn
++] = strtoul(lp
, &lp
, 16);
137 for(cn
= 1; cn
<= 5; ++cn
) {
138 NFD_c
[cn
] = utf32_decompose_canon(c
[cn
], utf32_len(c
[cn
]), 0);
139 NFKD_c
[cn
] = utf32_decompose_compat(c
[cn
], utf32_len(c
[cn
]), 0);
140 NFC_c
[cn
] = utf32_compose_canon(c
[cn
], utf32_len(c
[cn
]), 0);
141 NFKC_c
[cn
] = utf32_compose_compat(c
[cn
], utf32_len(c
[cn
]), 0);
143 #define unt_check(T, A, B) do { \
145 if(utf32_cmp(c[A], T##_c[B])) { \
147 "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
149 fprintf(stderr, " c%d:%s\n", \
150 A, format_utf32(c[A])); \
151 fprintf(stderr, " c%d:%s\n", \
152 B, format_utf32(c[B])); \
153 fprintf(stderr, "%4s(c%d):%s\n", \
154 #T, B, format_utf32(T##_c[B])); \
158 unt_check(NFD
, 3, 1);
159 unt_check(NFD
, 3, 2);
160 unt_check(NFD
, 3, 3);
161 unt_check(NFD
, 5, 4);
162 unt_check(NFD
, 5, 5);
163 unt_check(NFKD
, 5, 1);
164 unt_check(NFKD
, 5, 2);
165 unt_check(NFKD
, 5, 3);
166 unt_check(NFKD
, 5, 4);
167 unt_check(NFKD
, 5, 5);
168 unt_check(NFC
, 2, 1);
169 unt_check(NFC
, 2, 2);
170 unt_check(NFC
, 2, 3);
171 unt_check(NFC
, 4, 4);
172 unt_check(NFC
, 4, 5);
173 unt_check(NFKC
, 4, 1);
174 unt_check(NFKC
, 4, 2);
175 unt_check(NFKC
, 4, 3);
176 unt_check(NFKC
, 4, 4);
177 unt_check(NFKC
, 4, 5);
178 for(cn
= 1; cn
<= 5; ++cn
) {
185 breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary
);
186 breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary
);
187 insist(utf32_combining_class(0x40000) == 0);
188 insist(utf32_combining_class(0xE0000) == 0);