+struct gen {
+ unsigned f; /* Flags */
+#define f_lsr 1u /* Overflow from previous word */
+#define f_load 2u /* Outstanding @LOAD@ */
+#define f_fip 4u /* Final-pass offset is set */
+ instr_v iv; /* Instruction vector */
+ size_t fip; /* Offset for final-pass reduction */
+ size_t w; /* Currently loaded target word */
+ size_t wi; /* Left-shifts for current word */
+ gfreduce *r; /* Reduction context pointer */
+};
+
+#define INSTR(g_, op_, arg_) do { \
+ struct gen *_g = (g_); \
+ instr_v *_iv = &_g->iv; \
+ size_t _i = DA_LEN(_iv); \
+ \
+ DA_ENSURE(_iv, 1); \
+ DA(_iv)[_i].op = (op_); \
+ DA(_iv)[_i].arg = (arg_); \
+ DA_EXTEND(_iv, 1); \
+} while (0)
+
+static void emit_load(struct gen *g, size_t w)
+{
+ /* --- If this is not the low-order word then note final-pass start --- *
+ *
+ * Once we've eliminated the whole high-degree words, there will possibly
+ * remain a few high-degree bits. We can further reduce the subject
+ * polynomial by subtracting an appropriate multiple of %$p'$%, but if we
+ * do this naively we'll end up addressing `low-order' words beyond the
+ * bottom of our input. We solve this problem by storing an alternative
+ * start position for this final pass (which works because we scan bits
+ * right-to-left).
+ */
+
+ if (!(g->f & f_fip) && w < g->r->lim) {
+ g->fip = DA_LEN(&g->iv);
+ g->f |= f_fip;
+ }
+
+ /* --- Actually emit the instruction --- */
+
+ INSTR(g, GFRI_LOAD, w);
+ g->f |= f_load;
+ g->w = w;
+}
+
+static void emit_right_shifts(struct gen *g)
+{
+ gfreduce_instr *ip;
+ size_t i, wl;
+
+ /* --- Close off the current word --- *
+ *
+ * If we shifted into this current word with a nonzero bit offset, then
+ * we'll also need to arrange to perform a sequence of right shifts into
+ * the following word, which we might as well do by scanning the
+ * instruction sequence (which starts at @wi@).
+ *
+ * Either way, we leave a @LOAD@ unmatched if there was one before, in the
+ * hope that callers have an easier time; @g->w@ is updated to reflect the
+ * currently open word.
+ */
+
+ if (!(g->f & f_lsr))
+ return;
+
+ wl = DA_LEN(&g->iv);
+ INSTR(g, GFRI_STORE, g->w);
+ emit_load(g, g->w - 1);
+ for (i = g->wi; i < wl; i++) {
+ ip = &DA(&g->iv)[i];
+ assert(ip->op == GFRI_LSL);
+ if (ip->arg)
+ INSTR(g, GFRI_LSR, MPW_BITS - ip->arg);
+ }
+ g->f &= ~f_lsr;
+}
+
+static void ensure_loaded(struct gen *g, size_t w)
+{
+ if (!(g->f & f_load)) {
+ emit_load(g, w);
+ g->wi = DA_LEN(&g->iv);
+ } else if (w != g->w) {
+ emit_right_shifts(g);
+ if (w != g->w) {
+ INSTR(g, GFRI_STORE, g->w);
+ emit_load(g, w);
+ }
+ g->wi = DA_LEN(&g->iv);
+ }
+}
+