X-Git-Url: https://git.distorted.org.uk/~mdw/disorder/blobdiff_plain/763d5e6ad88ef3ba1cd1d7742d060e4f1e54c6b8..35b651f0cc0999deae42c92f2cbca3ecf88fe157:/lib/words.c diff --git a/lib/words.c b/lib/words.c index eaad8ab..01c9db2 100644 --- a/lib/words.c +++ b/lib/words.c @@ -1,6 +1,6 @@ /* * This file is part of DisOrder - * Copyright (C) 2004 Richard Kettlewell + * Copyright (C) 2004, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,54 +29,22 @@ #include "table.h" #include "words.h" #include "utf8.h" +#include "log.h" +#include "charset.h" -#include "casefold.h" -#include "unicodegc.h" +#include "unidata.h" +#include "unicode.h" const char *casefold(const char *ptr) { - struct dynstr d; - int l, r, m; - uint32_t c; - const struct cm *t; - const char *start, *s = ptr; - - dynstr_init(&d); - while(*s) { - start = s; - PARSE_UTF8(s, c, return ptr); - /* seek the folded equivalent */ - t = cm[c & CM_MASK]; - l = 0; - r = cmn[c & CM_MASK] - 1; - while(l <= r && c != t[m = (l + r) / 2].ch) - if(c < t[m].ch) - r = m - 1; - else - l = m + 1; - if(l <= r) - dynstr_append_string(&d, t[m].tr); - else - dynstr_append_bytes(&d, start, s - start); - } - dynstr_terminate(&d); - return d.vec; + return utf8_casefold_canon(ptr, strlen(ptr), 0); } static enum unicode_gc_cat cat(uint32_t c) { - int l, r, m; - - l = 0; - r = sizeof gcs / sizeof *gcs; - while(l <= r) { - m = (l + r) / 2; - if(c < gcs[m].l) - r = m - 1; - else if(c > gcs[m].h) - l = m + 1; - else - return gcs[m].cat; - } - return unicode_gc_none; + if(c < UNICODE_NCHARS) { + const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS]; + return ud->gc; + } else + return unicode_gc_Cn; } /* XXX this is a bit kludgy */ @@ -151,7 +119,7 @@ char **words(const char *s, int *nvecp) { case unicode_gc_Pf: case unicode_gc_Pi: case unicode_gc_Po: - case unicode_gc_none: + case unicode_gc_Cn: /* control and punctuation is completely ignored */ break;