-/** @brief Return true if code point @p n is part of an initial sequence of Format/Extend
- * @param s Start of string
- * @param ns Length of string
- * @param n Start position
- * @return True if it is, false otherwise
- *
- * This whole stack is not very efficient; we assume we don't see many of the
- * problematic characters.
- */
-static int utf32__is_initial_sequence(const uint32_t *s,
- size_t attribute((unused)) ns,
- size_t n) {
- enum unicode_Word_Break wb;
-
- while(n > 0) {
- --n;
- wb = utf32__word_break(s[n]);
- if(wb != unicode_Word_Break_Extend
- && wb != unicode_Word_Break_Format)
- return 0;
- }
- return 1;
-}
-
-/** @brief Return the index of the first non-Extend/Format character from n
- * @param s Start of string
- * @param ns Length of string
- * @param n Start position
- * @return Index of first suitable character or @p ns
- */
-static size_t utf32__first_not_ignorable(const uint32_t *s, size_t ns,
- size_t n) {
- while(n < ns) {
- const enum unicode_Word_Break wb = utf32__word_break(s[n]);
- if((wb != unicode_Word_Break_Extend
- && wb != unicode_Word_Break_Format)
- || utf32__is_initial_sequence(s, ns, n))
- return n;
- ++n;
- }
- return ns;
-}
-
-/** @brief Return the index of the last non-Extend/Format character from n
- * @param s Start of string
- * @param ns Length of string
- * @param n Start position
- * @return Index of first suitable character or (size_t)-1
- */
-static size_t utf32__last_not_ignorable(const uint32_t *s, size_t ns,
- size_t n) {
- do {
- const enum unicode_Word_Break wb = utf32__word_break(s[n]);
- if((wb != unicode_Word_Break_Extend
- && wb != unicode_Word_Break_Format)
- || utf32__is_initial_sequence(s, ns, n))
- return n;
- } while(n--);
- return n; /* will be (size_t)-1 */