+static void do_utf8(long int input_chr,
+ charset_state *state,
+ void (*emit)(void *ctx, long int output),
+ void *emitctx)
+{
+ charset_state ustate;
+ charset_spec const *utf8;
+
+ ustate.s1 = 0;
+ ustate.s0 = state->s0 & 0x03ffffffL;
+ utf8 = charset_find_spec(CS_UTF8);
+ utf8->read(utf8, input_chr, &ustate, emit, emitctx);
+ state->s0 = (state->s0 & ~0x03ffffffL) | (ustate.s0 & 0x03ffffffL);
+}
+
+static void docs_utf8(long int input_chr,
+ charset_state *state,
+ void (*emit)(void *ctx, long int output),
+ void *emitctx)
+{
+ int retstate;
+
+ /*
+ * Bits [25:0] of s0 are reserved for read_utf8().
+ * Bits [27:26] are a tiny state machine to recognise ESC % @.
+ */
+ retstate = (state->s0 & 0x0c000000L) >> 26;
+ if (retstate == 1 && input_chr == '%')
+ retstate = 2;
+ else if (retstate == 2 && input_chr == '@') {
+ /* If we've got a partial UTF-8 sequence, complain. */
+ if (state->s0 & 0x03ffffffL)
+ emit(emitctx, ERROR);
+ state->s0 = 0;
+ return;
+ } else {
+ if (retstate >= 1) do_utf8(ESC, state, emit, emitctx);
+ if (retstate >= 2) do_utf8('%', state, emit, emitctx);
+ retstate = 0;
+ if (input_chr == ESC)
+ retstate = 1;
+ else {
+ do_utf8(input_chr, state, emit, emitctx);
+ }
+ }
+ state->s0 = (state->s0 & ~0x0c000000L) | (retstate << 26);
+}
+
+