2 * This file is part of DisOrder.
3 * Copyright (C) 2005, 2007, 2008 Richard Kettlewell
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 /** @brief Open a Unicode test file */
21 static FILE *open_unicode_test(const char *path
) {
27 if((base
= strrchr(path
, '/')))
31 if(!(fp
= fopen(base
, "r"))) {
32 snprintf(buffer
, sizeof buffer
,
33 "wget http://www.unicode.org/Public/5.1.0/ucd/%s", path
);
34 if((w
= system(buffer
)))
35 disorder_fatal(0, "%s: %s", buffer
, wstat(w
));
36 if(chmod(base
, 0444) < 0)
37 disorder_fatal(errno
, "chmod %s", base
);
38 if(!(fp
= fopen(base
, "r")))
39 disorder_fatal(errno
, "%s", base
);
44 /** @brief Run breaking tests for utf32_grapheme_boundary() etc */
45 static void breaktest(const char *path
,
46 int (*breakfn
)(const uint32_t *, size_t, size_t)) {
47 FILE *fp
= open_unicode_test(path
);
51 char break_allowed
[1024];
52 uint32_t buffer
[1024];
54 while(!inputline(path
, fp
, &l
, '\n')) {
56 if(l
[0] == '#') continue;
60 if(*lp
== ' ' || *lp
== '\t') {
66 if((unsigned char)*lp
== 0xC3 && (unsigned char)lp
[1] == 0xB7) {
67 /* 00F7 DIVISION SIGN */
68 break_allowed
[bn
] = 1;
72 if((unsigned char)*lp
== 0xC3 && (unsigned char)lp
[1] == 0x97) {
73 /* 00D7 MULTIPLICATION SIGN */
74 break_allowed
[bn
] = 0;
78 if(isxdigit((unsigned char)*lp
)) {
79 buffer
[bn
++] = strtoul(lp
, &lp
, 16);
82 disorder_fatal(0, "%s:%d: evil line: %s", path
, lineno
, l
);
84 for(n
= 0; n
<= bn
; ++n
) {
85 if(breakfn(buffer
, bn
, n
) != break_allowed
[n
]) {
87 "%s:%d: offset %zu: mismatch\n"
100 /** @brief Tests for @ref lib/unicode.h */
101 static void test_unicode(void) {
105 uint32_t buffer
[1024];
106 uint32_t *c
[6], *NFD_c
[6], *NFKD_c
[6], *NFC_c
[6], *NFKC_c
[6]; /* 1-indexed */
109 fp
= open_unicode_test("NormalizationTest.txt");
110 while(!inputline("NormalizationTest.txt", fp
, &l
, '\n')) {
112 if(*l
== '#' || *l
== '@')
117 c
[cn
++] = &buffer
[bn
];
118 while(*lp
&& *lp
!= '#') {
127 c
[cn
++] = &buffer
[bn
];
131 buffer
[bn
++] = strtoul(lp
, &lp
, 16);
135 for(cn
= 1; cn
<= 5; ++cn
) {
136 NFD_c
[cn
] = utf32_decompose_canon(c
[cn
], utf32_len(c
[cn
]), 0);
137 NFKD_c
[cn
] = utf32_decompose_compat(c
[cn
], utf32_len(c
[cn
]), 0);
138 NFC_c
[cn
] = utf32_compose_canon(c
[cn
], utf32_len(c
[cn
]), 0);
139 NFKC_c
[cn
] = utf32_compose_compat(c
[cn
], utf32_len(c
[cn
]), 0);
141 #define unt_check(T, A, B) do { \
143 if(utf32_cmp(c[A], T##_c[B])) { \
145 "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
147 fprintf(stderr, " c%d:%s\n", \
148 A, format_utf32(c[A])); \
149 fprintf(stderr, " c%d:%s\n", \
150 B, format_utf32(c[B])); \
151 fprintf(stderr, "%4s(c%d):%s\n", \
152 #T, B, format_utf32(T##_c[B])); \
156 unt_check(NFD
, 3, 1);
157 unt_check(NFD
, 3, 2);
158 unt_check(NFD
, 3, 3);
159 unt_check(NFD
, 5, 4);
160 unt_check(NFD
, 5, 5);
161 unt_check(NFKD
, 5, 1);
162 unt_check(NFKD
, 5, 2);
163 unt_check(NFKD
, 5, 3);
164 unt_check(NFKD
, 5, 4);
165 unt_check(NFKD
, 5, 5);
166 unt_check(NFC
, 2, 1);
167 unt_check(NFC
, 2, 2);
168 unt_check(NFC
, 2, 3);
169 unt_check(NFC
, 4, 4);
170 unt_check(NFC
, 4, 5);
171 unt_check(NFKC
, 4, 1);
172 unt_check(NFKC
, 4, 2);
173 unt_check(NFKC
, 4, 3);
174 unt_check(NFKC
, 4, 4);
175 unt_check(NFKC
, 4, 5);
176 for(cn
= 1; cn
<= 5; ++cn
) {
183 breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary
);
184 breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary
);
185 insist(utf32_combining_class(0x40000) == 0);
186 insist(utf32_combining_class(0xE0000) == 0);