+ a = MP_COPY(a);
+ b = MP_COPY(b);
+ MP_MODIFY(d, 2 * n + 1);
+ dv = d->v; dvl = d->vl;
+ MPX_ZERO(dv, dvl);
+ av = a->v; avl = a->vl;
+ bv = b->v; bvl = b->vl;
+ mv = mm->m->v; mvl = mm->m->vl;
+ y = *bv;
+
+ /* --- Montgomery multiplication phase --- */
+
+ i = 0;
+ mi = mm->mi->v[0];
+ while (i < n && av < avl) {
+ mpw x = *av++;
+ mpw u = MPW((*dv + x * y) * mi);
+ MPX_UMLAN(dv, dvl, bv, bvl, x);
+ MPX_UMLAN(dv, dvl, mv, mvl, u);
+ dv++;
+ i++;
+ }
+
+ /* --- Simpler Montgomery reduction phase --- */
+
+ while (i < n) {
+ mpw u = MPW(*dv * mi);
+ MPX_UMLAN(dv, dvl, mv, mvl, u);
+ dv++;
+ i++;
+ }
+
+ /* --- Done --- */
+
+ memmove(d->v, dv, MPWS(dvl - dv));
+ d->vl -= dv - d->v;
+ MP_SHRINK(d);
+ d->f = (a->f | b->f) & MP_BURN;
+ if (MP_CMP(d, >=, mm->m))
+ d = mp_sub(d, d, mm->m);
+ MP_DROP(a);
+ MP_DROP(b);
+ }