(mpmont_expr): Use sliding window exponentiation, with a drop-through
[u/mdw/catacomb] / mpmont.c
1 /* -*-c-*-
2 *
3 * $Id: mpmont.c,v 1.10 2000/07/29 17:05:43 mdw Exp $
4 *
5 * Montgomery reduction
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpmont.c,v $
33 * Revision 1.10 2000/07/29 17:05:43 mdw
34 * (mpmont_expr): Use sliding window exponentiation, with a drop-through
35 * for small exponents to use a simple left-to-right bitwise routine. This
36 * can reduce modexp times by up to a quarter.
37 *
38 * Revision 1.9 2000/06/17 11:45:09 mdw
39 * Major memory management overhaul. Added arena support. Use the secure
40 * arena for secret integers. Replace and improve the MP management macros
41 * (e.g., replace MP_MODIFY by MP_DEST).
42 *
43 * Revision 1.8 1999/12/22 15:55:00 mdw
44 * Adjust Karatsuba parameters.
45 *
46 * Revision 1.7 1999/12/11 01:51:14 mdw
47 * Use a Karatsuba-based reduction for large moduli.
48 *
49 * Revision 1.6 1999/12/10 23:18:39 mdw
50 * Change interface for suggested destinations.
51 *
52 * Revision 1.5 1999/11/22 13:58:40 mdw
53 * Add an option to disable Montgomery reduction, so that performance
54 * comparisons can be done.
55 *
56 * Revision 1.4 1999/11/21 12:27:06 mdw
57 * Remove a division from the Montgomery setup by calculating
58 * %$R^2 \bmod m$% first and then %$R \bmod m$% by Montgomery reduction of
59 * %$R^2$%.
60 *
61 * Revision 1.3 1999/11/21 11:35:10 mdw
62 * Performance improvement: use @mp_sqr@ and @mpmont_reduce@ instead of
63 * @mpmont_mul@ for squaring in exponentiation.
64 *
65 * Revision 1.2 1999/11/19 13:17:26 mdw
66 * Add extra interface to exponentiation which returns a Montgomerized
67 * result.
68 *
69 * Revision 1.1 1999/11/17 18:02:16 mdw
70 * New multiprecision integer arithmetic suite.
71 *
72 */
73
74 /*----- Header files ------------------------------------------------------*/
75
76 #include "mp.h"
77 #include "mpmont.h"
78
79 /*----- Tweakables --------------------------------------------------------*/
80
81 /* --- @MPMONT_DISABLE@ --- *
82 *
83 * Replace all the clever Montgomery reduction with good old-fashioned long
84 * division.
85 */
86
87 /* #define MPMONT_DISABLE */
88
89 /*----- Main code ---------------------------------------------------------*/
90
91 /* --- @mpmont_create@ --- *
92 *
93 * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context
94 * @mp *m@ = modulus to use
95 *
96 * Returns: ---
97 *
98 * Use: Initializes a Montgomery reduction context ready for use.
99 * The argument @m@ must be a positive odd integer.
100 */
101
102 #ifdef MPMONT_DISABLE
103
104 void mpmont_create(mpmont *mm, mp *m)
105 {
106 mp_shrink(m);
107 mm->m = MP_COPY(m);
108 mm->r = MP_ONE;
109 mm->r2 = MP_ONE;
110 mm->mi = MP_ONE;
111 }
112
113 #else
114
115 void mpmont_create(mpmont *mm, mp *m)
116 {
117 size_t n = MP_LEN(m);
118 mp *r2 = mp_new(2 * n + 1, 0);
119 mp r;
120
121 /* --- Validate the arguments --- */
122
123 assert(((void)"Montgomery modulus must be positive",
124 (m->f & MP_NEG) == 0));
125 assert(((void)"Montgomery modulus must be odd", m->v[0] & 1));
126
127 /* --- Take a copy of the modulus --- */
128
129 mp_shrink(m);
130 mm->m = MP_COPY(m);
131
132 /* --- Determine %$R^2$% --- */
133
134 mm->n = n;
135 MPX_ZERO(r2->v, r2->vl - 1);
136 r2->vl[-1] = 1;
137
138 /* --- Find the magic value @mi@ --- */
139
140 mp_build(&r, r2->v + n, r2->vl);
141 mm->mi = MP_NEW;
142 mp_gcd(0, 0, &mm->mi, &r, m);
143 mm->mi = mp_sub(mm->mi, &r, mm->mi);
144
145 /* --- Discover the values %$R \bmod m$% and %$R^2 \bmod m$% --- */
146
147 mm->r2 = MP_NEW;
148 mp_div(0, &mm->r2, r2, m);
149 mm->r = mpmont_reduce(mm, MP_NEW, mm->r2);
150 MP_DROP(r2);
151 }
152
153 #endif
154
155 /* --- @mpmont_destroy@ --- *
156 *
157 * Arguments: @mpmont *mm@ = pointer to a Montgomery reduction context
158 *
159 * Returns: ---
160 *
161 * Use: Disposes of a context when it's no longer of any use to
162 * anyone.
163 */
164
165 void mpmont_destroy(mpmont *mm)
166 {
167 MP_DROP(mm->m);
168 MP_DROP(mm->r);
169 MP_DROP(mm->r2);
170 MP_DROP(mm->mi);
171 }
172
173 /* --- @mpmont_reduce@ --- *
174 *
175 * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context
176 * @mp *d@ = destination
177 * @mp *a@ = source, assumed positive
178 *
179 * Returns: Result, %$a R^{-1} \bmod m$%.
180 */
181
182 #ifdef MPMONT_DISABLE
183
184 mp *mpmont_reduce(mpmont *mm, mp *d, mp *a)
185 {
186 mp_div(0, &d, a, mm->m);
187 return (d);
188 }
189
190 #else
191
192 mp *mpmont_reduce(mpmont *mm, mp *d, mp *a)
193 {
194 size_t n = mm->n;
195
196 /* --- Check for serious Karatsuba reduction --- */
197
198 if (n > KARATSUBA_CUTOFF * 3) {
199 mp al;
200 mpw *vl;
201 mp *u;
202
203 if (MP_LEN(a) >= n)
204 vl = a->v + n;
205 else
206 vl = a->vl;
207 mp_build(&al, a->v, vl);
208 u = mp_mul(MP_NEW, &al, mm->mi);
209 if (MP_LEN(u) > n)
210 u->vl = u->v + n;
211 u = mp_mul(u, u, mm->m);
212 d = mp_add(d, a, u);
213 mp_drop(u);
214 }
215
216 /* --- Otherwise do it the hard way --- */
217
218 else {
219 mpw *dv, *dvl;
220 mpw *mv, *mvl;
221 mpw mi;
222 size_t k = n;
223
224 /* --- Initial conditioning of the arguments --- */
225
226 a = MP_COPY(a);
227 if (d)
228 MP_DROP(d);
229 d = a;
230 MP_DEST(d, 2 * n + 1, a->f);
231
232 dv = d->v; dvl = d->vl;
233 mv = mm->m->v; mvl = mm->m->vl;
234
235 /* --- Let's go to work --- */
236
237 mi = mm->mi->v[0];
238 while (k--) {
239 mpw u = MPW(*dv * mi);
240 MPX_UMLAN(dv, dvl, mv, mvl, u);
241 dv++;
242 }
243 }
244
245 /* --- Wrap everything up --- */
246
247 memmove(d->v, d->v + n, MPWS(MP_LEN(d) - n));
248 d->vl -= n;
249 if (MP_CMP(d, >=, mm->m))
250 d = mp_sub(d, d, mm->m);
251 MP_SHRINK(d);
252 return (d);
253 }
254
255 #endif
256
257 /* --- @mpmont_mul@ --- *
258 *
259 * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context
260 * @mp *d@ = destination
261 * @mp *a, *b@ = sources, assumed positive
262 *
263 * Returns: Result, %$a b R^{-1} \bmod m$%.
264 */
265
266 #ifdef MPMONT_DISABLE
267
268 mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b)
269 {
270 d = mp_mul(d, a, b);
271 mp_div(0, &d, d, mm->m);
272 return (d);
273 }
274
275 #else
276
277 mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b)
278 {
279 if (mm->n > KARATSUBA_CUTOFF * 3) {
280 d = mp_mul(d, a, b);
281 d = mpmont_reduce(mm, d, d);
282 } else {
283 mpw *dv, *dvl;
284 mpw *av, *avl;
285 mpw *bv, *bvl;
286 mpw *mv, *mvl;
287 mpw y;
288 size_t n, i;
289 mpw mi;
290
291 /* --- Initial conditioning of the arguments --- */
292
293 if (MP_LEN(a) > MP_LEN(b)) {
294 mp *t = a; a = b; b = t;
295 }
296 n = MP_LEN(mm->m);
297
298 a = MP_COPY(a);
299 b = MP_COPY(b);
300 MP_DEST(d, 2 * n + 1, a->f | b->f | MP_UNDEF);
301 dv = d->v; dvl = d->vl;
302 MPX_ZERO(dv, dvl);
303 av = a->v; avl = a->vl;
304 bv = b->v; bvl = b->vl;
305 mv = mm->m->v; mvl = mm->m->vl;
306 y = *bv;
307
308 /* --- Montgomery multiplication phase --- */
309
310 i = 0;
311 mi = mm->mi->v[0];
312 while (i < n && av < avl) {
313 mpw x = *av++;
314 mpw u = MPW((*dv + x * y) * mi);
315 MPX_UMLAN(dv, dvl, bv, bvl, x);
316 MPX_UMLAN(dv, dvl, mv, mvl, u);
317 dv++;
318 i++;
319 }
320
321 /* --- Simpler Montgomery reduction phase --- */
322
323 while (i < n) {
324 mpw u = MPW(*dv * mi);
325 MPX_UMLAN(dv, dvl, mv, mvl, u);
326 dv++;
327 i++;
328 }
329
330 /* --- Done --- */
331
332 memmove(d->v, dv, MPWS(dvl - dv));
333 d->vl -= dv - d->v;
334 MP_SHRINK(d);
335 d->f = (a->f | b->f) & MP_BURN;
336 if (MP_CMP(d, >=, mm->m))
337 d = mp_sub(d, d, mm->m);
338 MP_DROP(a);
339 MP_DROP(b);
340 }
341
342 return (d);
343 }
344
345 #endif
346
347 /* --- @mpmont_expr@ --- *
348 *
349 * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context
350 * @mp *d@ = fake destination
351 * @mp *a@ = base
352 * @mp *e@ = exponent
353 *
354 * Returns: Result, %$a^e R \bmod m$%.
355 */
356
357 #define WINSZ 5
358 #define TABSZ (1 << (WINSZ - 1))
359
360 #define THRESH (((MPW_BITS / WINSZ) << 2) + 1)
361
362 static mp *exp_simple(mpmont *mm, mp *d, mp *a, mp *e)
363 {
364 mpscan sc;
365 mp *ar;
366 mp *x = MP_COPY(mm->r);
367 mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
368 unsigned sq = 0;
369
370 mp_rscan(&sc, e);
371 if (!MP_RSTEP(&sc))
372 goto exit;
373 while (!MP_RBIT(&sc))
374 MP_RSTEP(&sc);
375
376 /* --- Do the main body of the work --- */
377
378 ar = mpmont_mul(mm, MP_NEW, a, mm->r2);
379 for (;;) {
380 sq++;
381 while (sq) {
382 mp *y;
383 y = mp_sqr(spare, x);
384 y = mpmont_reduce(mm, y, y);
385 spare = x; x = y;
386 sq--;
387 }
388 { mp *y = mpmont_mul(mm, spare, x, ar); spare = x; x = y; }
389 sq = 0;
390 for (;;) {
391 if (!MP_RSTEP(&sc))
392 goto done;
393 if (MP_RBIT(&sc))
394 break;
395 sq++;
396 }
397 }
398
399 /* --- Do a final round of squaring --- */
400
401 done:
402 while (sq) {
403 mp *y;
404 y = mp_sqr(spare, x);
405 y = mpmont_reduce(mm, y, y);
406 spare = x; x = y;
407 sq--;
408 }
409
410 /* --- Done --- */
411
412 MP_DROP(ar);
413 exit:
414 if (spare != MP_NEW)
415 MP_DROP(spare);
416 if (d != MP_NEW)
417 MP_DROP(d);
418 return (x);
419 }
420
421 mp *mpmont_expr(mpmont *mm, mp *d, mp *a, mp *e)
422 {
423 mp **tab;
424 mp *ar, *a2;
425 mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
426 mp *x = MP_COPY(mm->r);
427 unsigned i, sq = 0;
428 mpscan sc;
429
430 /* --- Do we bother? --- */
431
432 MP_SHRINK(e);
433 if (MP_LEN(e) == 0)
434 goto exit;
435 if (MP_LEN(e) < THRESH) {
436 x->ref--;
437 return (exp_simple(mm, d, a, e));
438 }
439
440 /* --- Do the precomputation --- */
441
442 ar = mpmont_mul(mm, MP_NEW, a, mm->r2);
443 a2 = mp_sqr(MP_NEW, ar);
444 a2 = mpmont_reduce(mm, a2, a2);
445 tab = xmalloc(TABSZ * sizeof(mp *));
446 tab[0] = ar;
447 for (i = 1; i < TABSZ; i++)
448 tab[i] = mpmont_mul(mm, MP_NEW, tab[i - 1], a2);
449 mp_drop(a2);
450 mp_rscan(&sc, e);
451
452 /* --- Skip top-end zero bits --- *
453 *
454 * If the initial step worked, there must be a set bit somewhere, so keep
455 * stepping until I find it.
456 */
457
458 MP_RSTEP(&sc);
459 while (!MP_RBIT(&sc)) {
460 MP_RSTEP(&sc);
461 }
462
463 /* --- Now for the main work --- */
464
465 for (;;) {
466 unsigned l = 0;
467 unsigned z = 0;
468
469 /* --- The next bit is set, so read a window index --- *
470 *
471 * Reset @i@ to zero and increment @sq@. Then, until either I read
472 * @WINSZ@ bits or I run out of bits, scan in a bit: if it's clear, bump
473 * the @z@ counter; if it's set, push a set bit into @i@, shift it over
474 * by @z@ bits, bump @sq@ by @z + 1@ and clear @z@. By the end of this
475 * palaver, @i@ is an index to the precomputed value in @tab@.
476 */
477
478 i = 0;
479 sq++;
480 for (;;) {
481 l++;
482 if (l >= WINSZ || !MP_RSTEP(&sc))
483 break;
484 if (!MP_RBIT(&sc))
485 z++;
486 else {
487 i = ((i << 1) | 1) << z;
488 sq += z + 1;
489 z = 0;
490 }
491 }
492
493 /* --- Do the squaring --- *
494 *
495 * Remember that @sq@ carries over from the zero-skipping stuff below.
496 */
497
498 while (sq) {
499 mp *y;
500 y = mp_sqr(spare, x);
501 y = mpmont_reduce(mm, y, y);
502 spare = x; x = y;
503 sq--;
504 }
505
506 /* --- Do the multiply --- */
507
508 { mp *y = mpmont_mul(mm, spare, x, tab[i]); spare = x; x = y; }
509
510 /* --- Now grind along through the rest of the bits --- */
511
512 sq = z;
513 for (;;) {
514 if (!MP_RSTEP(&sc))
515 goto done;
516 if (MP_RBIT(&sc))
517 break;
518 sq++;
519 }
520 }
521
522 /* --- Do a final round of squaring --- */
523
524 done:
525 while (sq) {
526 mp *y;
527 y = mp_sqr(spare, x);
528 y = mpmont_reduce(mm, y, y);
529 spare = x; x = y;
530 sq--;
531 }
532
533 /* --- Done --- */
534
535 for (i = 0; i < TABSZ; i++)
536 mp_drop(tab[i]);
537 xfree(tab);
538 exit:
539 if (d != MP_NEW)
540 mp_drop(d);
541 if (spare)
542 mp_drop(spare);
543 return (x);
544 }
545
546 /* --- @mpmont_exp@ --- *
547 *
548 * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context
549 * @mp *d@ = fake destination
550 * @mp *a@ = base
551 * @mp *e@ = exponent
552 *
553 * Returns: Result, %$a^e \bmod m$%.
554 */
555
556 mp *mpmont_exp(mpmont *mm, mp *d, mp *a, mp *e)
557 {
558 d = mpmont_expr(mm, d, a, e);
559 d = mpmont_reduce(mm, d, d);
560 return (d);
561 }
562
563 /*----- Test rig ----------------------------------------------------------*/
564
565 #ifdef TEST_RIG
566
567 static int tcreate(dstr *v)
568 {
569 mp *m = *(mp **)v[0].buf;
570 mp *mi = *(mp **)v[1].buf;
571 mp *r = *(mp **)v[2].buf;
572 mp *r2 = *(mp **)v[3].buf;
573
574 mpmont mm;
575 int ok = 1;
576
577 mpmont_create(&mm, m);
578
579 if (mm.mi->v[0] != mi->v[0]) {
580 fprintf(stderr, "\n*** bad mi: found %lu, expected %lu",
581 (unsigned long)mm.mi->v[0], (unsigned long)mi->v[0]);
582 fputs("\nm = ", stderr); mp_writefile(m, stderr, 10);
583 fputc('\n', stderr);
584 ok = 0;
585 }
586
587 if (MP_CMP(mm.r, !=, r)) {
588 fputs("\n*** bad r", stderr);
589 fputs("\nm = ", stderr); mp_writefile(m, stderr, 10);
590 fputs("\nexpected ", stderr); mp_writefile(r, stderr, 10);
591 fputs("\n found ", stderr); mp_writefile(mm.r, stderr, 10);
592 fputc('\n', stderr);
593 ok = 0;
594 }
595
596 if (MP_CMP(mm.r2, !=, r2)) {
597 fputs("\n*** bad r2", stderr);
598 fputs("\nm = ", stderr); mp_writefile(m, stderr, 10);
599 fputs("\nexpected ", stderr); mp_writefile(r2, stderr, 10);
600 fputs("\n found ", stderr); mp_writefile(mm.r2, stderr, 10);
601 fputc('\n', stderr);
602 ok = 0;
603 }
604
605 MP_DROP(m);
606 MP_DROP(mi);
607 MP_DROP(r);
608 MP_DROP(r2);
609 mpmont_destroy(&mm);
610 assert(mparena_count(MPARENA_GLOBAL) == 0);
611 return (ok);
612 }
613
614 static int tmul(dstr *v)
615 {
616 mp *m = *(mp **)v[0].buf;
617 mp *a = *(mp **)v[1].buf;
618 mp *b = *(mp **)v[2].buf;
619 mp *r = *(mp **)v[3].buf;
620 int ok = 1;
621
622 mpmont mm;
623 mpmont_create(&mm, m);
624
625 {
626 mp *qr = mp_mul(MP_NEW, a, b);
627 mp_div(0, &qr, qr, m);
628
629 if (MP_CMP(qr, !=, r)) {
630 fputs("\n*** classical modmul failed", stderr);
631 fputs("\n m = ", stderr); mp_writefile(m, stderr, 10);
632 fputs("\n a = ", stderr); mp_writefile(a, stderr, 10);
633 fputs("\n b = ", stderr); mp_writefile(b, stderr, 10);
634 fputs("\n r = ", stderr); mp_writefile(r, stderr, 10);
635 fputs("\nqr = ", stderr); mp_writefile(qr, stderr, 10);
636 fputc('\n', stderr);
637 ok = 0;
638 }
639
640 mp_drop(qr);
641 }
642
643 {
644 mp *ar = mpmont_mul(&mm, MP_NEW, a, mm.r2);
645 mp *br = mpmont_mul(&mm, MP_NEW, b, mm.r2);
646 mp *mr = mpmont_mul(&mm, MP_NEW, ar, br);
647 mr = mpmont_reduce(&mm, mr, mr);
648 if (MP_CMP(mr, !=, r)) {
649 fputs("\n*** montgomery modmul failed", stderr);
650 fputs("\n m = ", stderr); mp_writefile(m, stderr, 10);
651 fputs("\n a = ", stderr); mp_writefile(a, stderr, 10);
652 fputs("\n b = ", stderr); mp_writefile(b, stderr, 10);
653 fputs("\n r = ", stderr); mp_writefile(r, stderr, 10);
654 fputs("\nmr = ", stderr); mp_writefile(mr, stderr, 10);
655 fputc('\n', stderr);
656 ok = 0;
657 }
658 MP_DROP(ar); MP_DROP(br);
659 mp_drop(mr);
660 }
661
662
663 MP_DROP(m);
664 MP_DROP(a);
665 MP_DROP(b);
666 MP_DROP(r);
667 mpmont_destroy(&mm);
668 assert(mparena_count(MPARENA_GLOBAL) == 0);
669 return ok;
670 }
671
672 static int texp(dstr *v)
673 {
674 mp *m = *(mp **)v[0].buf;
675 mp *a = *(mp **)v[1].buf;
676 mp *b = *(mp **)v[2].buf;
677 mp *r = *(mp **)v[3].buf;
678 mp *mr;
679 int ok = 1;
680
681 mpmont mm;
682 mpmont_create(&mm, m);
683
684 mr = mpmont_exp(&mm, MP_NEW, a, b);
685
686 if (MP_CMP(mr, !=, r)) {
687 fputs("\n*** montgomery modexp failed", stderr);
688 fputs("\n m = ", stderr); mp_writefile(m, stderr, 10);
689 fputs("\n a = ", stderr); mp_writefile(a, stderr, 10);
690 fputs("\n e = ", stderr); mp_writefile(b, stderr, 10);
691 fputs("\n r = ", stderr); mp_writefile(r, stderr, 10);
692 fputs("\nmr = ", stderr); mp_writefile(mr, stderr, 10);
693 fputc('\n', stderr);
694 ok = 0;
695 }
696
697 MP_DROP(m);
698 MP_DROP(a);
699 MP_DROP(b);
700 MP_DROP(r);
701 MP_DROP(mr);
702 mpmont_destroy(&mm);
703 assert(mparena_count(MPARENA_GLOBAL) == 0);
704 return ok;
705 }
706
707
708 static test_chunk tests[] = {
709 { "create", tcreate, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
710 { "mul", tmul, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
711 { "exp", texp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
712 { 0, 0, { 0 } },
713 };
714
715 int main(int argc, char *argv[])
716 {
717 sub_init();
718 test_run(argc, argv, tests, SRCDIR "/tests/mpmont");
719 return (0);
720 }
721
722 #endif
723
724 /*----- That's all, folks -------------------------------------------------*/