Random field-element selection.
[u/mdw/catacomb] / gfreduce.c
CommitLineData
ceb3f0c0 1/* -*-c-*-
2 *
c3caa2fa 3 * $Id: gfreduce.c,v 1.2 2004/03/21 22:52:06 mdw Exp $
ceb3f0c0 4 *
5 * Efficient reduction modulo sparse binary polynomials
6 *
7 * (c) 2004 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: gfreduce.c,v $
c3caa2fa 33 * Revision 1.2 2004/03/21 22:52:06 mdw
34 * Merge and close elliptic curve branch.
35 *
ceb3f0c0 36 * Revision 1.1.2.1 2004/03/21 22:39:46 mdw
37 * Elliptic curves on binary fields work.
38 *
39 */
40
41/*----- Header files ------------------------------------------------------*/
42
43#include <mLib/alloc.h>
44#include <mLib/darray.h>
45#include <mLib/macros.h>
46
47#include "gf.h"
48#include "gfreduce.h"
49#include "gfreduce-exp.h"
50#include "fibrand.h"
51#include "mprand.h"
52
53/*----- Data structures ---------------------------------------------------*/
54
55DA_DECL(instr_v, gfreduce_instr);
56
57/*----- Main code ---------------------------------------------------------*/
58
59/* --- What's going on here? --- *
60 *
61 * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any
62 * sense fast. Here, we do efficient reduction modulo sparse polynomials.
63 *
64 * Suppose we have a polynomial @X@ we're trying to reduce mod @P@. If we
65 * take the topmost nonzero word of @X@, call it @w@, then we can eliminate
66 * it by subtracting off @w P x^{k}@ for an appropriate value of @k@. The
67 * trick is in observing that if @P@ is sparse we can do this multiplication
68 * and subtraction efficiently, just by XORing appropriate shifts of @w@ into
69 * @X@.
70 *
71 * The first tricky bit is in working out when to stop. I'll use eight-bit
72 * words to demonstrate what I'm talking about.
73 *
74 * xxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
75 * 001ppppp pppppppp pppppppp pppppppp
76 * |<rp>|
77 * |<------------ bp ------------->|
78 * |<------------ nw --------------->|
79 *
80 * The trick of taking whole words off of @X@ stops working when there are
81 * only @nw@ words left. Then we have to mask off the bottom bits of @w@
82 * before continuing.
83 */
84
85/* --- @gfreduce_create@ --- *
86 *
87 * Arguments: @gfreduce *r@ = structure to fill in
88 * @mp *x@ = a (hopefully sparse) polynomial
89 *
90 * Returns: ---
91 *
92 * Use: Initializes a context structure for reduction.
93 */
94
95void gfreduce_create(gfreduce *r, mp *p)
96{
97 instr_v iv = DA_INIT;
98 unsigned long d, dw;
99 mpscan sc;
100 unsigned long i;
101 gfreduce_instr *ip;
102 unsigned f = 0;
103 size_t w, ww, wi, wl, ll;
104
105 /* --- Sort out the easy stuff --- */
106
107 d = mp_bits(p); assert(d); d--;
108 r->lim = d/MPW_BITS;
109 dw = d%MPW_BITS;
110 if (!dw)
111 r->mask = 0;
112 else {
113 r->mask = MPW(((mpw)-1) << dw);
114 r->lim++;
115 }
116 r->p = mp_copy(p);
117
118 /* --- Stash a new instruction --- */
119
120#define INSTR(op_, arg_) do { \
121 DA_ENSURE(&iv, 1); \
122 DA(&iv)[DA_LEN(&iv)].op = (op_); \
123 DA(&iv)[DA_LEN(&iv)].arg = (arg_); \
124 DA_EXTEND(&iv, 1); \
125} while (0)
126
127#define f_lsr 1u
128
129 w = (d + MPW_BITS - 1)/MPW_BITS;
130 INSTR(GFRI_LOAD, w);
131 wi = DA_LEN(&iv);
132 f = 0;
133 ll = 0;
134 for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
135 if (!mp_bit(&sc))
136 continue;
137 ww = (d - i + MPW_BITS - 1)/MPW_BITS;
138 if (ww != w) {
139 wl = DA_LEN(&iv);
140 INSTR(GFRI_STORE, w);
141 if (!ll)
142 ll = DA_LEN(&iv);
143 if (!(f & f_lsr))
144 INSTR(GFRI_LOAD, ww);
145 else {
146 INSTR(GFRI_LOAD, w - 1);
147 for (; wi < wl; wi++) {
148 ip = &DA(&iv)[wi];
149 assert(ip->op == GFRI_LSL);
150 if (ip->arg)
151 INSTR(GFRI_LSR, MPW_BITS - ip->arg);
152 }
153 if (w - 1 != ww) {
154 INSTR(GFRI_STORE, w - 1);
155 INSTR(GFRI_LOAD, ww);
156 }
157 f &= ~f_lsr;
158 }
159 w = ww;
160 wi = DA_LEN(&iv);
161 }
162 INSTR(GFRI_LSL, (i - d)%MPW_BITS);
163 if ((i - d)%MPW_BITS)
164 f |= f_lsr;
165 }
166 wl = DA_LEN(&iv);
167 INSTR(GFRI_STORE, w);
168 if (!ll)
169 ll = DA_LEN(&iv);
170 if (f & f_lsr) {
171 INSTR(GFRI_LOAD, w - 1);
172 for (; wi < wl; wi++) {
173 ip = &DA(&iv)[wi];
174 assert(ip->op == GFRI_LSL);
175 if (ip->arg)
176 INSTR(GFRI_LSR, MPW_BITS - ip->arg);
177 }
178 INSTR(GFRI_STORE, w - 1);
179 }
180
181#undef INSTR
182
183 r->in = DA_LEN(&iv);
184 r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
185 r->liv = r->iv + ll;
186 memcpy(r->iv, DA(&iv), r->in * sizeof(gfreduce_instr));
187 DA_DESTROY(&iv);
188}
189
190/* --- @gfreduce_destroy@ --- *
191 *
192 * Arguments: @gfreduce *r@ = structure to free
193 *
194 * Returns: ---
195 *
196 * Use: Reclaims the resources from a reduction context.
197 */
198
199void gfreduce_destroy(gfreduce *r)
200{
201 mp_drop(r->p);
202 xfree(r->iv);
203}
204
205/* --- @gfreduce_dump@ --- *
206 *
207 * Arguments: @gfreduce *r@ = structure to dump
208 * @FILE *fp@ = file to dump on
209 *
210 * Returns: ---
211 *
212 * Use: Dumps a reduction context.
213 */
214
215void gfreduce_dump(gfreduce *r, FILE *fp)
216{
217 size_t i;
218
219 fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
220 fprintf(fp, "\n lim = %lu; mask = %lx\n",
221 (unsigned long)r->lim, (unsigned long)r->mask);
222 for (i = 0; i < r->in; i++) {
223 static const char *opname[] = { "load", "lsl", "lsr", "store" };
224 assert(r->iv[i].op < N(opname));
225 fprintf(fp, " %s %lu\n",
226 opname[r->iv[i].op],
227 (unsigned long)r->iv[i].arg);
228 }
229}
230
231/* --- @gfreduce_do@ --- *
232 *
233 * Arguments: @gfreduce *r@ = reduction context
234 * @mp *d@ = destination
235 * @mp *x@ = source
236 *
237 * Returns: Destination, @x@ reduced modulo the reduction poly.
238 */
239
240static void run(const gfreduce_instr *i, const gfreduce_instr *il,
241 mpw *v, mpw z)
242{
243 mpw w = 0;
244
245 for (; i < il; i++) {
246 switch (i->op) {
247 case GFRI_LOAD: w = *(v - i->arg); break;
248 case GFRI_LSL: w ^= z << i->arg; break;
249 case GFRI_LSR: w ^= z >> i->arg; break;
250 case GFRI_STORE: *(v - i->arg) = MPW(w); break;
251 default: abort();
252 }
253 }
254}
255
256mp *gfreduce_do(gfreduce *r, mp *d, mp *x)
257{
258 mpw *v, *vl;
259 const gfreduce_instr *il;
260 mpw z;
261
262 /* --- Try to reuse the source's space --- */
263
264 MP_COPY(x);
265 if (d) MP_DROP(d);
266 MP_DEST(x, MP_LEN(x), x->f);
267
268 /* --- Do the reduction --- */
269
270 il = r->iv + r->in;
271 if (MP_LEN(x) >= r->lim) {
272 v = x->v + r->lim;
273 vl = x->vl;
274 while (vl-- > v) {
275 while (*vl) {
276 z = *vl;
277 *vl = 0;
278 run(r->iv, il, vl, z);
279 }
280 }
281 if (r->mask) {
282 while (*vl & r->mask) {
283 z = *vl & r->mask;
284 *vl &= ~r->mask;
285 run(r->iv, il, vl, z);
286 }
287 }
288 }
289
290 /* --- Done --- */
291
292 MP_SHRINK(x);
293 return (x);
294}
295
296/* --- @gfreduce_sqrt@ --- *
297 *
298 * Arguments: @gfreduce *r@ = pointer to reduction context
299 * @mp *d@ = destination
300 * @mp *x@ = some polynomial
301 *
302 * Returns: The square root of @x@ modulo @r->p@, or null.
303 */
304
305mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x)
306{
307 mp *y = MP_COPY(x);
308 mp *z, *spare = MP_NEW;
309 unsigned long m = mp_bits(r->p) - 1;
310 unsigned long i;
311
312 for (i = 0; i < m - 1; i++) {
313 mp *t = gf_sqr(spare, y);
314 spare = y;
315 y = gfreduce_do(r, t, t);
316 }
317 z = gf_sqr(spare, y);
318 z = gfreduce_do(r, z, z);
319 if (!MP_EQ(x, z)) {
320 mp_drop(y);
321 y = 0;
322 }
323 mp_drop(z);
324 mp_drop(d);
325 return (y);
326}
327
328/* --- @gfreduce_trace@ --- *
329 *
330 * Arguments: @gfreduce *r@ = pointer to reduction context
331 * @mp *x@ = some polynomial
332 *
333 * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
334 * if %$x \in \gf{2^m}$%).
335 */
336
337int gfreduce_trace(gfreduce *r, mp *x)
338{
339 mp *y = MP_COPY(x);
340 mp *spare = MP_NEW;
341 unsigned long m = mp_bits(r->p) - 1;
342 unsigned long i;
343 int rc;
344
345 for (i = 0; i < m - 1; i++) {
346 mp *t = gf_sqr(spare, y);
347 spare = y;
348 y = gfreduce_do(r, t, t);
349 y = gf_add(y, y, x);
350 }
351 rc = !MP_ISZERO(y);
352 mp_drop(spare);
353 mp_drop(y);
354 return (rc);
355}
356
357/* --- @gfreduce_halftrace@ --- *
358 *
359 * Arguments: @gfreduce *r@ = pointer to reduction context
360 * @mp *d@ = destination
361 * @mp *x@ = some polynomial
362 *
363 * Returns: The half-trace of @x@.
364 * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
365 * if %$x \in \gf{2^m}$% with %$m$% odd).
366 */
367
368mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x)
369{
370 mp *y = MP_COPY(x);
371 mp *spare = MP_NEW;
372 unsigned long m = mp_bits(r->p) - 1;
373 unsigned long i;
374
375 mp_drop(d);
376 for (i = 0; i < m - 1; i += 2) {
377 mp *t = gf_sqr(spare, y);
378 spare = y;
379 y = gfreduce_do(r, t, t);
380 t = gf_sqr(spare, y);
381 spare = y;
382 y = gfreduce_do(r, t, t);
383 y = gf_add(y, y, x);
384 }
385 mp_drop(spare);
386 return (y);
387}
388
389/* --- @gfreduce_quadsolve@ --- *
390 *
391 * Arguments: @gfreduce *r@ = pointer to reduction context
392 * @mp *d@ = destination
393 * @mp *x@ = some polynomial
394 *
395 * Returns: A polynomial @y@ such that %$y^2 + y = x$%, or null.
396 */
397
398mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x)
399{
400 unsigned long m = mp_bits(r->p) - 1;
401 mp *t;
402
403 MP_COPY(x);
404 if (m & 1)
405 d = gfreduce_halftrace(r, d, x);
406 else {
407 mp *z, *w, *rho = MP_NEW;
408 mp *spare = MP_NEW;
409 grand *fr = fibrand_create(0);
410 unsigned long i;
411
412 for (;;) {
413 rho = mprand(rho, m, fr, 0);
414 z = MP_ZERO;
415 w = MP_COPY(rho);
416 for (i = 0; i < m - 1; i++) {
417 t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
418 t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
419 t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
420 z = gf_add(z, z, t);
421 w = gf_add(w, w, rho);
422 }
423 if (!MP_ISZERO(w))
424 break;
425 MP_DROP(z);
426 MP_DROP(w);
427 }
428 if (d) MP_DROP(d);
429 MP_DROP(w);
430 MP_DROP(spare);
431 MP_DROP(rho);
432 fr->ops->destroy(fr);
433 d = z;
434 }
435
436 t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
437 if (!MP_EQ(t, x)) {
438 MP_DROP(d);
439 d = 0;
440 }
441 MP_DROP(t);
442 MP_DROP(x);
443 d->v[0] &= ~(mpw)1;
444 return (d);
445}
446
447/* --- @gfreduce_exp@ --- *
448 *
449 * Arguments: @gfreduce *gr@ = pointer to reduction context
450 * @mp *d@ = fake destination
451 * @mp *a@ = base
452 * @mp *e@ = exponent
453 *
454 * Returns: Result, %$a^e \bmod m$%.
455 */
456
457mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e)
458{
459 mp *x = MP_ONE;
460 mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
461
462 MP_SHRINK(e);
463 if (!MP_LEN(e))
464 ;
465 else if (MP_LEN(e) < EXP_THRESH)
466 EXP_SIMPLE(x, a, e);
467 else
468 EXP_WINDOW(x, a, e);
469 mp_drop(d);
470 mp_drop(spare);
471 return (x);
472}
473
474/*----- Test rig ----------------------------------------------------------*/
475
476#ifdef TEST_RIG
477
478#define MP(x) mp_readstring(MP_NEW, #x, 0, 0)
479
480static int vreduce(dstr *v)
481{
482 mp *d = *(mp **)v[0].buf;
483 mp *n = *(mp **)v[1].buf;
484 mp *r = *(mp **)v[2].buf;
485 mp *c;
486 int ok = 1;
487 gfreduce rr;
488
489 gfreduce_create(&rr, d);
490 c = gfreduce_do(&rr, MP_NEW, n);
491 if (!MP_EQ(c, r)) {
492 fprintf(stderr, "\n*** reduction failed\n*** ");
493 gfreduce_dump(&rr, stderr);
494 fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
495 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
496 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
497 fprintf(stderr, "\n");
498 ok = 0;
499 }
500 gfreduce_destroy(&rr);
501 mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
502 assert(mparena_count(MPARENA_GLOBAL) == 0);
503 return (ok);
504}
505
506static int vmodexp(dstr *v)
507{
508 mp *p = *(mp **)v[0].buf;
509 mp *g = *(mp **)v[1].buf;
510 mp *x = *(mp **)v[2].buf;
511 mp *r = *(mp **)v[3].buf;
512 mp *c;
513 int ok = 1;
514 gfreduce rr;
515
516 gfreduce_create(&rr, p);
517 c = gfreduce_exp(&rr, MP_NEW, g, x);
518 if (!MP_EQ(c, r)) {
519 fprintf(stderr, "\n*** modexp failed\n*** ");
520 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
521 fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
522 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
523 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
524 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
525 fprintf(stderr, "\n");
526 ok = 0;
527 }
528 gfreduce_destroy(&rr);
529 mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
530 assert(mparena_count(MPARENA_GLOBAL) == 0);
531 return (ok);
532}
533
534static int vsqrt(dstr *v)
535{
536 mp *p = *(mp **)v[0].buf;
537 mp *x = *(mp **)v[1].buf;
538 mp *r = *(mp **)v[2].buf;
539 mp *c;
540 int ok = 1;
541 gfreduce rr;
542
543 gfreduce_create(&rr, p);
544 c = gfreduce_sqrt(&rr, MP_NEW, x);
545 if (!MP_EQ(c, r)) {
546 fprintf(stderr, "\n*** sqrt failed\n*** ");
547 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
548 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
549 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
550 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
551 fprintf(stderr, "\n");
552 ok = 0;
553 }
554 gfreduce_destroy(&rr);
555 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
556 assert(mparena_count(MPARENA_GLOBAL) == 0);
557 return (ok);
558}
559
560static int vtr(dstr *v)
561{
562 mp *p = *(mp **)v[0].buf;
563 mp *x = *(mp **)v[1].buf;
564 int r = *(int *)v[2].buf, c;
565 int ok = 1;
566 gfreduce rr;
567
568 gfreduce_create(&rr, p);
569 c = gfreduce_trace(&rr, x);
570 if (c != r) {
571 fprintf(stderr, "\n*** trace failed\n*** ");
572 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
573 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
574 fprintf(stderr, "\n*** c = %d", c);
575 fprintf(stderr, "\n*** r = %d", r);
576 fprintf(stderr, "\n");
577 ok = 0;
578 }
579 gfreduce_destroy(&rr);
580 mp_drop(p); mp_drop(x);
581 assert(mparena_count(MPARENA_GLOBAL) == 0);
582 return (ok);
583}
584
585static int vhftr(dstr *v)
586{
587 mp *p = *(mp **)v[0].buf;
588 mp *x = *(mp **)v[1].buf;
589 mp *r = *(mp **)v[2].buf;
590 mp *c;
591 int ok = 1;
592 gfreduce rr;
593
594 gfreduce_create(&rr, p);
595 c = gfreduce_halftrace(&rr, MP_NEW, x);
596 if (!MP_EQ(c, r)) {
597 fprintf(stderr, "\n*** halftrace failed\n*** ");
598 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
599 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
600 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
601 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
602 fprintf(stderr, "\n");
603 ok = 0;
604 }
605 gfreduce_destroy(&rr);
606 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
607 assert(mparena_count(MPARENA_GLOBAL) == 0);
608 return (ok);
609}
610
611static int vquad(dstr *v)
612{
613 mp *p = *(mp **)v[0].buf;
614 mp *x = *(mp **)v[1].buf;
615 mp *r = *(mp **)v[2].buf;
616 mp *c;
617 int ok = 1;
618 gfreduce rr;
619
620 gfreduce_create(&rr, p);
621 c = gfreduce_quadsolve(&rr, MP_NEW, x);
622 if (!MP_EQ(c, r)) {
623 fprintf(stderr, "\n*** quadsolve failed\n*** ");
624 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
625 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
626 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
627 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
628 fprintf(stderr, "\n");
629 ok = 0;
630 }
631 gfreduce_destroy(&rr);
632 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
633 assert(mparena_count(MPARENA_GLOBAL) == 0);
634 return (ok);
635}
636
637static test_chunk defs[] = {
638 { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
639 { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
640 { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
641 { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
642 { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
643 { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
644 { 0, 0, { 0 } }
645};
646
647int main(int argc, char *argv[])
648{
649 test_run(argc, argv, defs, SRCDIR"/tests/gfreduce");
650 return (0);
651}
652
653#endif
654
655/*----- That's all, folks -------------------------------------------------*/