+/** @brief Normalize a tag
+ * @param s Tag
+ * @param ns Length of tag
+ * @return Normalized string or NULL on error
+ *
+ * The return value will be:
+ * - case-folded
+ * - have no leading or trailing space
+ * - have no combining characters
+ * - all spacing between words will be a single U+0020 SPACE
+ */
+static char *normalize_tag(const char *s, size_t ns) {
+ uint32_t *s32, **w32;
+ size_t ns32, nw32, i;
+ struct dynstr d[1];
+
+ if(!(s32 = utf8_to_utf32(s, ns, &ns32)))
+ return 0;
+ if(!(s32 = utf32_casefold_compat(s32, ns32, &ns32))) /* ->NFKD */
+ return 0;
+ ns32 = remove_combining_chars(s32, ns32);
+ /* Split into words, no Word_Break tailoring */
+ w32 = utf32_word_split(s32, ns32, &nw32, 0);
+ /* Compose back into a string */
+ dynstr_init(d);
+ for(i = 0; i < nw32; ++i) {
+ if(i)
+ dynstr_append(d, ' ');
+ dynstr_append_string(d, utf32_to_utf8(w32[i], utf32_len(w32[i]), 0));
+ }
+ dynstr_terminate(d);
+ return d->vec;
+}
+