mdw@git.distorted.org.uk Git - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2005, 2007 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/test.c @brief Library tests */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
	30	#include <assert.h>
	31	#include <sys/types.h>
	32	#include <sys/stat.h>
	33
	34	#include "utf8.h"
	35	#include "mem.h"
	36	#include "log.h"
	37	#include "vector.h"
	38	#include "charset.h"
	39	#include "mime.h"
	40	#include "hex.h"
	41	#include "heap.h"
	42	#include "unicode.h"
	43	#include "inputline.h"
	44	#include "wstat.h"
	45
	46	static int tests, errors;
	47	static int fail_first;
	48
	49	static void count_error() {
	50	++errors;
	51	if(fail_first)
	52	abort();
	53	}
	54
	55	/** @brief Checks that @p expr is nonzero */
	56	#define insist(expr) do { \
	57	if(!(expr)) { \
	58	count_error(); \
	59	fprintf(stderr, "%s:%d: error checking %s\n", \
	60	__FILE__, __LINE__, #expr); \
	61	} \
	62	++tests; \
	63	} while(0)
	64
	65	static const char format(const char s) {
	66	struct dynstr d;
	67	int c;
	68	char buf[10];
	69
	70	dynstr_init(&d);
	71	while((c = (unsigned char)*s++)) {
	72	if(c >= ' ' && c <= '~')
	73	dynstr_append(&d, c);
	74	else {
	75	sprintf(buf, "\\x%02X", (unsigned)c);
	76	dynstr_append_string(&d, buf);
	77	}
	78	}
	79	dynstr_terminate(&d);
	80	return d.vec;
	81	}
	82
	83	static const char format_utf32(const uint32_t s) {
	84	struct dynstr d;
	85	uint32_t c;
	86	char buf[64];
	87
	88	dynstr_init(&d);
	89	while((c = *s++)) {
	90	sprintf(buf, " %04lX", (long)c);
	91	dynstr_append_string(&d, buf);
	92	}
	93	dynstr_terminate(&d);
	94	return d.vec;
	95	}
	96
	97	#define check_string(GOT, WANT) do { \
	98	const char *g = GOT; \
	99	const char *w = WANT; \
	100	\
	101	if(w == 0) { \
	102	fprintf(stderr, "%s:%d: %s returned 0\n", \
	103	__FILE__, __LINE__, #GOT); \
	104	count_error(); \
	105	} else if(strcmp(w, g)) { \
	106	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	107	__FILE__, __LINE__, #GOT, format(g), format(w)); \
	108	count_error(); \
	109	} \
	110	++tests; \
	111	} while(0)
	112
	113	static uint32_t ucs4parse(const char s) {
	114	struct dynstr_ucs4 d;
	115	char *e;
	116
	117	dynstr_ucs4_init(&d);
	118	while(*s) {
	119	errno = 0;
	120	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	121	if(errno) fatal(errno, "strtoul (%s)", s);
	122	s = e;
	123	}
	124	dynstr_ucs4_terminate(&d);
	125	return d.vec;
	126	}
	127
	128	static void test_utf8(void) {
	129	/* Test validutf8, convert to UCS-4, check the answer is right,
	130	* convert back to UTF-8, check we got to where we started */
	131	#define U8(CHARS, WORDS) do { \
	132	uint32_t *w = ucs4parse(WORDS); \
	133	uint32_t *ucs; \
	134	char *u8; \
	135	\
	136	insist(validutf8(CHARS)); \
	137	ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
	138	insist(ucs != 0); \
	139	insist(!utf32_cmp(w, ucs)); \
	140	u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \
	141	insist(u8 != 0); \
	142	insist(!strcmp(u8, CHARS)); \
	143	} while(0)
	144
	145	fprintf(stderr, "test_utf8\n");
	146	#define validutf8(S) utf8_valid((S), strlen(S))
	147
	148	/* empty string */
	149
	150	U8("", "");
	151
	152	/* ASCII characters */
	153
	154	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	155	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	156	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	157	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	158	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	159	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	160	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	161	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	162	"0x7c 0x7d 0x7e");
	163	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	164	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	165	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	166	"0x1e 0x1f 0x7f");
	167
	168	/* from RFC3629 */
	169
	170	/* UTF8-2 = %xC2-DF UTF8-tail */
	171	insist(!validutf8("\xC0\x80"));
	172	insist(!validutf8("\xC1\x80"));
	173	insist(!validutf8("\xC2\x7F"));
	174	U8("\xC2\x80", "0x80");
	175	U8("\xDF\xBF", "0x7FF");
	176	insist(!validutf8("\xDF\xC0"));
	177
	178	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	179	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	180	*/
	181	insist(!validutf8("\xE0\x9F\x80"));
	182	U8("\xE0\xA0\x80", "0x800");
	183	U8("\xE0\xBF\xBF", "0xFFF");
	184	insist(!validutf8("\xE0\xC0\xBF"));
	185
	186	insist(!validutf8("\xE1\x80\x7F"));
	187	U8("\xE1\x80\x80", "0x1000");
	188	U8("\xEC\xBF\xBF", "0xCFFF");
	189	insist(!validutf8("\xEC\xC0\xBF"));
	190
	191	U8("\xED\x80\x80", "0xD000");
	192	U8("\xED\x9F\xBF", "0xD7FF");
	193	insist(!validutf8("\xED\xA0\xBF"));
	194
	195	insist(!validutf8("\xEE\x7f\x80"));
	196	U8("\xEE\x80\x80", "0xE000");
	197	U8("\xEF\xBF\xBF", "0xFFFF");
	198	insist(!validutf8("\xEF\xC0\xBF"));
	199
	200	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	201	* %xF4 %x80-8F 2( UTF8-tail )
	202	*/
	203	insist(!validutf8("\xF0\x8F\x80\x80"));
	204	U8("\xF0\x90\x80\x80", "0x10000");
	205	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	206	insist(!validutf8("\xF0\xC0\x80\x80"));
	207
	208	insist(!validutf8("\xF1\x80\x80\x7F"));
	209	U8("\xF1\x80\x80\x80", "0x40000");
	210	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	211	insist(!validutf8("\xF3\xC0\x80\x80"));
	212
	213	insist(!validutf8("\xF4\x80\x80\x7F"));
	214	U8("\xF4\x80\x80\x80", "0x100000");
	215	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
	216	insist(!validutf8("\xF4\x90\x80\x80"));
	217
	218	/* miscellaneous non-UTF-8 rubbish */
	219	insist(!validutf8("\x80"));
	220	insist(!validutf8("\xBF"));
	221	insist(!validutf8("\xC0"));
	222	insist(!validutf8("\xC0\x7F"));
	223	insist(!validutf8("\xC0\xC0"));
	224	insist(!validutf8("\xE0"));
	225	insist(!validutf8("\xE0\x7F"));
	226	insist(!validutf8("\xE0\xC0"));
	227	insist(!validutf8("\xE0\x80"));
	228	insist(!validutf8("\xE0\x80\x7f"));
	229	insist(!validutf8("\xE0\x80\xC0"));
	230	insist(!validutf8("\xF0"));
	231	insist(!validutf8("\xF0\x7F"));
	232	insist(!validutf8("\xF0\xC0"));
	233	insist(!validutf8("\xF0\x80"));
	234	insist(!validutf8("\xF0\x80\x7f"));
	235	insist(!validutf8("\xF0\x80\xC0"));
	236	insist(!validutf8("\xF0\x80\x80\x7f"));
	237	insist(!validutf8("\xF0\x80\x80\xC0"));
	238	insist(!validutf8("\xF5\x80\x80\x80"));
	239	insist(!validutf8("\xF8"));
	240	}
	241
	242	static void test_mime(void) {
	243	char t, n, *v;
	244
	245	fprintf(stderr, "test_mime\n");
	246
	247	t = n = v = 0;
	248	insist(!mime_content_type("text/plain", &t, &n, &v));
	249	insist(!strcmp(t, "text/plain"));
	250	insist(n == 0);
	251	insist(v == 0);
	252
	253	t = n = v = 0;
	254	insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
	255	insist(!strcmp(t, "text/plain"));
	256	insist(n == 0);
	257	insist(v == 0);
	258
	259	t = n = v = 0;
	260	insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
	261	insist(!strcmp(t, "text/plain"));
	262	insist(!strcmp(n, "charset"));
	263	insist(!strcmp(v, "utf-8"));
	264
	265	t = n = v = 0;
	266	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	267	insist(!strcmp(t, "text/plain"));
	268	insist(!strcmp(n, "charset"));
	269	insist(!strcmp(v, "ISO-8859-1"));
	270
	271	/* XXX mime_parse */
	272	/* XXX mime_multipart */
	273	/* XXX mime_rfc2388_content_disposition */
	274
	275	check_string(mime_qp(""), "");
	276	check_string(mime_qp("foobar"), "foobar");
	277	check_string(mime_qp("foo=20bar"), "foo bar");
	278	check_string(mime_qp("x \r\ny"), "x\r\ny");
	279	check_string(mime_qp("x=\r\ny"), "xy");
	280	check_string(mime_qp("x= \r\ny"), "xy");
	281	check_string(mime_qp("x =\r\ny"), "x y");
	282	check_string(mime_qp("x = \r\ny"), "x y");
	283
	284	/* from RFC2045 */
	285	check_string(mime_qp("Now's the time =\r\n"
	286	"for all folk to come=\r\n"
	287	" to the aid of their country."),
	288	"Now's the time for all folk to come to the aid of their country.");
	289
	290	check_string(mime_base64(""), "");
	291	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	292	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	293	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	294	check_string(mime_base64("BBBB//BB////"),
	295	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	296	check_string(mime_base64("B B B B / / B B / / / /"),
	297	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	298	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	299	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	300	check_string(mime_base64("BBBB="),
	301	"\x04\x10\x41");
	302	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	303	"\x04\x10\x41");
	304	check_string(mime_base64("BBBB BB=="),
	305	"\x04\x10\x41" "\x04");
	306	check_string(mime_base64("BBBB BBB="),
	307	"\x04\x10\x41" "\x04\x10");
	308	}
	309
	310	static void test_cookies(void) {
	311	struct cookiedata cd[1];
	312
	313	fprintf(stderr, "test_cookies\n");
	314
	315	/* These are the examples from RFC2109 */
	316	insist(!parse_cookie("$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"", cd));
	317	insist(!strcmp(cd->version, "1"));
	318	insist(cd->ncookies = 1);
	319	insist(find_cookie(cd, "Customer") == &cd->cookies[0]);
	320	check_string(cd->cookies[0].value, "WILE_E_COYOTE");
	321	check_string(cd->cookies[0].path, "/acme");
	322	insist(cd->cookies[0].domain == 0);
	323	insist(!parse_cookie("$Version=\"1\";\n"
	324	"Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n"
	325	"Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"",
	326	cd));
	327	insist(cd->ncookies = 2);
	328	insist(find_cookie(cd, "Customer") == &cd->cookies[0]);
	329	insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]);
	330	check_string(cd->cookies[0].value, "WILE_E_COYOTE");
	331	check_string(cd->cookies[0].path, "/acme");
	332	insist(cd->cookies[0].domain == 0);
	333	check_string(cd->cookies[1].value, "Rocket_Launcher_0001");
	334	check_string(cd->cookies[1].path, "/acme");
	335	insist(cd->cookies[1].domain == 0);
	336	insist(!parse_cookie("$Version=\"1\";\n"
	337	"Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n"
	338	"Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";\n"
	339	"Shipping=\"FedEx\"; $Path=\"/acme\"",
	340	cd));
	341	insist(cd->ncookies = 3);
	342	insist(find_cookie(cd, "Customer") == &cd->cookies[0]);
	343	insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]);
	344	insist(find_cookie(cd, "Shipping") == &cd->cookies[2]);
	345	check_string(cd->cookies[0].value, "WILE_E_COYOTE");
	346	check_string(cd->cookies[0].path, "/acme");
	347	insist(cd->cookies[0].domain == 0);
	348	check_string(cd->cookies[1].value, "Rocket_Launcher_0001");
	349	check_string(cd->cookies[1].path, "/acme");
	350	insist(cd->cookies[1].domain == 0);
	351	check_string(cd->cookies[2].value, "FedEx");
	352	check_string(cd->cookies[2].path, "/acme");
	353	insist(cd->cookies[2].domain == 0);
	354	}
	355
	356	static void test_hex(void) {
	357	unsigned n;
	358	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
	359	uint8_t *u;
	360	size_t ul;
	361
	362	fprintf(stderr, "test_hex\n");
	363
	364	for(n = 0; n <= UCHAR_MAX; ++n) {
	365	if(!isxdigit(n))
	366	insist(unhexdigitq(n) == -1);
	367	}
	368	insist(unhexdigitq('0') == 0);
	369	insist(unhexdigitq('1') == 1);
	370	insist(unhexdigitq('2') == 2);
	371	insist(unhexdigitq('3') == 3);
	372	insist(unhexdigitq('4') == 4);
	373	insist(unhexdigitq('5') == 5);
	374	insist(unhexdigitq('6') == 6);
	375	insist(unhexdigitq('7') == 7);
	376	insist(unhexdigitq('8') == 8);
	377	insist(unhexdigitq('9') == 9);
	378	insist(unhexdigitq('a') == 10);
	379	insist(unhexdigitq('b') == 11);
	380	insist(unhexdigitq('c') == 12);
	381	insist(unhexdigitq('d') == 13);
	382	insist(unhexdigitq('e') == 14);
	383	insist(unhexdigitq('f') == 15);
	384	insist(unhexdigitq('A') == 10);
	385	insist(unhexdigitq('B') == 11);
	386	insist(unhexdigitq('C') == 12);
	387	insist(unhexdigitq('D') == 13);
	388	insist(unhexdigitq('E') == 14);
	389	insist(unhexdigitq('F') == 15);
	390	check_string(hex(h, sizeof h), "00ff807f");
	391	check_string(hex(0, 0), "");
	392	u = unhex("00ff807f", &ul);
	393	insist(ul == 4);
	394	insist(memcmp(u, h, 4) == 0);
	395	u = unhex("00FF807F", &ul);
	396	insist(ul == 4);
	397	insist(memcmp(u, h, 4) == 0);
	398	u = unhex("", &ul);
	399	insist(ul == 0);
	400	fprintf(stderr, "2 ERROR reports expected {\n");
	401	insist(unhex("F", 0) == 0);
	402	insist(unhex("az", 0) == 0);
	403	fprintf(stderr, "}\n");
	404	}
	405
	406	static void test_casefold(void) {
	407	uint32_t c, l;
	408	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
	409
	410	fprintf(stderr, "test_casefold\n");
	411
	412	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	413	* seem to be any public test vectors for these algorithms. */
	414
	415	for(c = 1; c < 256; ++c) {
	416	input = utf32_to_utf8(&c, 1, 0);
	417	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
	418	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
	419	switch(c) {
	420	default:
	421	if((c >= 'A' && c <= 'Z')
	422	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	423	l = c ^ 0x20;
	424	else
	425	l = c;
	426	break;
	427	case 0xB5: /* MICRO SIGN */
	428	l = 0x3BC; /* GREEK SMALL LETTER MU */
	429	break;
	430	case 0xDF: /* LATIN SMALL LETTER SHARP S */
	431	insist(!strcmp(canon_folded, "ss"));
	432	insist(!strcmp(compat_folded, "ss"));
	433	l = 0;
	434	break;
	435	}
	436	if(l) {
	437	uint32_t *d;
	438	/* Case-folded data is now normalized */
	439	d = utf32_decompose_canon(&l, 1, 0);
	440	canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
	441	if(strcmp(canon_folded, canon_expected)) {
	442	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	443	__FILE__, __LINE__, (unsigned long)c,
	444	format(canon_folded), format(canon_expected));
	445	count_error();
	446	}
	447	++tests;
	448	d = utf32_decompose_compat(&l, 1, 0);
	449	compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
	450	if(strcmp(compat_folded, compat_expected)) {
	451	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
	452	__FILE__, __LINE__, (unsigned long)c,
	453	format(compat_folded), format(compat_expected));
	454	count_error();
	455	}
	456	++tests;
	457	}
	458	}
	459	check_string(utf8_casefold_canon("", 0, 0), "");
	460	}
	461
	462	struct {
	463	const char *in;
	464	const char *expect[10];
	465	} wtest[] = {
	466	/* Empty string */
	467	{ "", { 0 } },
	468	/* Only whitespace and punctuation */
	469	{ " ", { 0 } },
	470	{ " ' ", { 0 } },
	471	{ " ! ", { 0 } },
	472	{ " \"\" ", { 0 } },
	473	{ " @ ", { 0 } },
	474	/* Basics */
	475	{ "wibble", { "wibble", 0 } },
	476	{ " wibble", { "wibble", 0 } },
	477	{ " wibble ", { "wibble", 0 } },
	478	{ "wibble ", { "wibble", 0 } },
	479	{ "wibble spong", { "wibble", "spong", 0 } },
	480	{ " wibble spong", { "wibble", "spong", 0 } },
	481	{ " wibble spong ", { "wibble", "spong", 0 } },
	482	{ "wibble spong ", { "wibble", "spong", 0 } },
	483	{ "wibble spong splat foo zot ", { "wibble", "spong", "splat", "foo", "zot", 0 } },
	484	/* Apostrophes */
	485	{ "wibble 'spong", { "wibble", "spong", 0 } },
	486	{ " wibble's", { "wibble's", 0 } },
	487	{ " wibblespong' ", { "wibblespong", 0 } },
	488	{ "wibble sp''ong ", { "wibble", "sp", "ong", 0 } },
	489	};
	490	#define NWTEST (sizeof wtest / sizeof *wtest)
	491
	492	static void test_words(void) {
	493	size_t t, nexpect, ngot, i;
	494	int right;
	495
	496	fprintf(stderr, "test_words\n");
	497	for(t = 0; t < NWTEST; ++t) {
	498	char **got = utf8_word_split(wtest[t].in, strlen(wtest[t].in), &ngot, 0);
	499
	500	for(nexpect = 0; wtest[t].expect[nexpect]; ++nexpect)
	501	;
	502	if(nexpect == ngot) {
	503	for(i = 0; i < ngot; ++i)
	504	if(strcmp(wtest[t].expect[i], got[i]))
	505	break;
	506	right = i == ngot;
	507	} else
	508	right = 0;
	509	if(!right) {
	510	fprintf(stderr, "word split %zu failed\n", t);
	511	fprintf(stderr, "input: %s\n", wtest[t].in);
	512	fprintf(stderr, " \| %-30s \| %-30s\n",
	513	"expected", "got");
	514	for(i = 0; i < nexpect \|\| i < ngot; ++i) {
	515	const char *e = i < nexpect ? wtest[t].expect[i] : "<none>";
	516	const char *g = i < ngot ? got[i] : "<none>";
	517	fprintf(stderr, " %2zu \| %-30s \| %-30s\n", i, e, g);
	518	}
	519	count_error();
	520	}
	521	++tests;
	522	}
	523	}
	524
	525	/** @brief Less-than comparison function for integer heap */
	526	static inline int int_lt(int a, int b) { return a < b; }
	527
	528	/** @struct iheap
	529	* @brief A heap with @c int elements */
	530	HEAP_TYPE(iheap, int, int_lt);
	531	HEAP_DEFINE(iheap, int, int_lt);
	532
	533	/** @brief Tests for @ref heap.h */
	534	static void test_heap(void) {
	535	struct iheap h[1];
	536	int n;
	537	int last = -1;
	538
	539	fprintf(stderr, "test_heap\n");
	540
	541	iheap_init(h);
	542	for(n = 0; n < 1000; ++n)
	543	iheap_insert(h, random() % 100);
	544	for(n = 0; n < 1000; ++n) {
	545	const int latest = iheap_remove(h);
	546	if(last > latest)
	547	fprintf(stderr, "should have %d <= %d\n", last, latest);
	548	insist(last <= latest);
	549	last = latest;
	550	}
	551	putchar('\n');
	552	}
	553
	554	/** @brief Open a Unicode test file */
	555	static FILE open_unicode_test(const char path) {
	556	const char *base;
	557	FILE *fp;
	558	char buffer[1024];
	559	int w;
	560
	561	if((base = strrchr(path, '/')))
	562	++base;
	563	else
	564	base = path;
	565	if(!(fp = fopen(base, "r"))) {
	566	snprintf(buffer, sizeof buffer,
	567	"wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
	568	if((w = system(buffer)))
	569	fatal(0, "%s: %s", buffer, wstat(w));
	570	if(chmod(base, 0444) < 0)
	571	fatal(errno, "chmod %s", base);
	572	if(!(fp = fopen(base, "r")))
	573	fatal(errno, "%s", base);
	574	}
	575	return fp;
	576	}
	577
	578	/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
	579	static void breaktest(const char *path,
	580	int (breakfn)(const uint32_t , size_t, size_t)) {
	581	FILE *fp = open_unicode_test(path);
	582	int lineno = 0;
	583	char l, lp;
	584	size_t bn, n;
	585	char break_allowed[1024];
	586	uint32_t buffer[1024];
	587
	588	while(!inputline(path, fp, &l, '\n')) {
	589	++lineno;
	590	if(l[0] == '#') continue;
	591	bn = 0;
	592	lp = l;
	593	while(*lp) {
	594	if(lp == ' ' \|\| lp == '\t') {
	595	++lp;
	596	continue;
	597	}
	598	if(*lp == '#')
	599	break;
	600	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
	601	/* 00F7 DIVISION SIGN */
	602	break_allowed[bn] = 1;
	603	lp += 2;
	604	continue;
	605	}
	606	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
	607	/* 00D7 MULTIPLICATION SIGN */
	608	break_allowed[bn] = 0;
	609	lp += 2;
	610	continue;
	611	}
	612	if(isxdigit((unsigned char)*lp)) {
	613	buffer[bn++] = strtoul(lp, &lp, 16);
	614	continue;
	615	}
	616	fatal(0, "%s:%d: evil line: %s", path, lineno, l);
	617	}
	618	for(n = 0; n <= bn; ++n) {
	619	if(breakfn(buffer, bn, n) != break_allowed[n]) {
	620	fprintf(stderr,
	621	"%s:%d: offset %zu: mismatch\n"
	622	"%s\n"
	623	"\n",
	624	path, lineno, n, l);
	625	count_error();
	626	}
	627	++tests;
	628	}
	629	xfree(l);
	630	}
	631	fclose(fp);
	632	}
	633
	634	/** @brief Tests for @ref lib/unicode.h */
	635	static void test_unicode(void) {
	636	FILE *fp;
	637	int lineno = 0;
	638	char l, lp;
	639	uint32_t buffer[1024];
	640	uint32_t c[6], NFD_c[6], NFKD_c[6], NFC_c[6], NFKC_c[6]; / 1-indexed */
	641	int cn, bn;
	642
	643	fprintf(stderr, "test_unicode\n");
	644	fp = open_unicode_test("NormalizationTest.txt");
	645	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	646	++lineno;
	647	if(l == '#' \|\| l == '@')
	648	continue;
	649	bn = 0;
	650	cn = 1;
	651	lp = l;
	652	c[cn++] = &buffer[bn];
	653	while(lp && lp != '#') {
	654	if(*lp == ' ') {
	655	++lp;
	656	continue;
	657	}
	658	if(*lp == ';') {
	659	buffer[bn++] = 0;
	660	if(cn == 6)
	661	break;
	662	c[cn++] = &buffer[bn];
	663	++lp;
	664	continue;
	665	}
	666	buffer[bn++] = strtoul(lp, &lp, 16);
	667	}
	668	buffer[bn] = 0;
	669	assert(cn == 6);
	670	for(cn = 1; cn <= 5; ++cn) {
	671	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	672	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	673	NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
	674	NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
	675	}
	676	#define unt_check(T, A, B) do { \
	677	++tests; \
	678	if(utf32_cmp(c[A], T##_c[B])) { \
	679	fprintf(stderr, \
	680	"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
	681	lineno, A, B); \
	682	fprintf(stderr, " c%d:%s\n", \
	683	A, format_utf32(c[A])); \
	684	fprintf(stderr, " c%d:%s\n", \
	685	B, format_utf32(c[B])); \
	686	fprintf(stderr, "%4s(c%d):%s\n", \
	687	#T, B, format_utf32(T##_c[B])); \
	688	count_error(); \
	689	} \
	690	} while(0)
	691	unt_check(NFD, 3, 1);
	692	unt_check(NFD, 3, 2);
	693	unt_check(NFD, 3, 3);
	694	unt_check(NFD, 5, 4);
	695	unt_check(NFD, 5, 5);
	696	unt_check(NFKD, 5, 1);
	697	unt_check(NFKD, 5, 2);
	698	unt_check(NFKD, 5, 3);
	699	unt_check(NFKD, 5, 4);
	700	unt_check(NFKD, 5, 5);
	701	unt_check(NFC, 2, 1);
	702	unt_check(NFC, 2, 2);
	703	unt_check(NFC, 2, 3);
	704	unt_check(NFC, 4, 4);
	705	unt_check(NFC, 4, 5);
	706	unt_check(NFKC, 4, 1);
	707	unt_check(NFKC, 4, 2);
	708	unt_check(NFKC, 4, 3);
	709	unt_check(NFKC, 4, 4);
	710	unt_check(NFKC, 4, 5);
	711	for(cn = 1; cn <= 5; ++cn) {
	712	xfree(NFD_c[cn]);
	713	xfree(NFKD_c[cn]);
	714	}
	715	xfree(l);
	716	}
	717	fclose(fp);
	718	breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
	719	breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
	720	}
	721
	722	int main(void) {
	723	fail_first = !!getenv("FAIL_FIRST");
	724	insist('\n' == 0x0A);
	725	insist('\r' == 0x0D);
	726	insist(' ' == 0x20);
	727	insist('0' == 0x30);
	728	insist('9' == 0x39);
	729	insist('A' == 0x41);
	730	insist('Z' == 0x5A);
	731	insist('a' == 0x61);
	732	insist('z' == 0x7A);
	733	/* addr.c */
	734	/* asprintf.c */
	735	/* authhash.c */
	736	/* basen.c */
	737	/* charset.c */
	738	/* client.c */
	739	/* configuration.c */
	740	/* event.c */
	741	/* fprintf.c */
	742	/* heap.c */
	743	test_heap();
	744	/* hex.c */
	745	test_hex();
	746	/* inputline.c */
	747	/* kvp.c */
	748	/* log.c */
	749	/* mem.c */
	750	/* mime.c */
	751	test_mime();
	752	test_cookies();
	753	/* mixer.c */
	754	/* plugin.c */
	755	/* printf.c */
	756	/* queue.c */
	757	/* sink.c */
	758	/* snprintf.c */
	759	/* split.c */
	760	/* syscalls.c */
	761	/* table.c */
	762	/* unicode.c */
	763	test_unicode();
	764	/* utf8.c */
	765	test_utf8();
	766	/* vector.c */
	767	/* words.c */
	768	test_casefold();
	769	test_words();
	770	/* XXX words() */
	771	/* wstat.c */
	772	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	773	return !!errors;
	774	}
	775
	776	/*
	777	Local Variables:
	778	c-basic-offset:2
	779	comment-column:40
	780	fill-column:79
	781	indent-tabs-mode:nil
	782	End:
	783	*/