164122d8bceb014632568f52b318a3c89526b529
[u/mdw/catacomb] / math / gfreduce.c
1 /* -*-c-*-
2 *
3 * Efficient reduction modulo sparse binary polynomials
4 *
5 * (c) 2004 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include <mLib/alloc.h>
31 #include <mLib/darray.h>
32 #include <mLib/macros.h>
33
34 #include "gf.h"
35 #include "gfreduce.h"
36 #include "gfreduce-exp.h"
37 #include "fibrand.h"
38 #include "mprand.h"
39
40 /*----- Data structures ---------------------------------------------------*/
41
42 DA_DECL(instr_v, gfreduce_instr);
43
44 /*----- Main code ---------------------------------------------------------*/
45
46 /* --- What's going on here? --- *
47 *
48 * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any
49 * sense fast. Here, we do efficient reduction modulo sparse polynomials.
50 *
51 * Suppose we have a polynomial @X@ we're trying to reduce mod @P@. If we
52 * take the topmost nonzero word of @X@, call it @w@, then we can eliminate
53 * it by subtracting off @w P x^{k}@ for an appropriate value of @k@. The
54 * trick is in observing that if @P@ is sparse we can do this multiplication
55 * and subtraction efficiently, just by XORing appropriate shifts of @w@ into
56 * @X@.
57 *
58 * The first tricky bit is in working out when to stop. I'll use eight-bit
59 * words to demonstrate what I'm talking about.
60 *
61 * xxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
62 * 001ppppp pppppppp pppppppp pppppppp
63 * |<rp>|
64 * |<------------ bp ------------->|
65 * |<------------ nw --------------->|
66 *
67 * The trick of taking whole words off of @X@ stops working when there are
68 * only @nw@ words left. Then we have to mask off the bottom bits of @w@
69 * before continuing.
70 */
71
72 /* --- @gfreduce_create@ --- *
73 *
74 * Arguments: @gfreduce *r@ = structure to fill in
75 * @mp *x@ = a (hopefully sparse) polynomial
76 *
77 * Returns: ---
78 *
79 * Use: Initializes a context structure for reduction.
80 */
81
82 void gfreduce_create(gfreduce *r, mp *p)
83 {
84 instr_v iv = DA_INIT;
85 unsigned long d;
86 unsigned dw;
87 mpscan sc;
88 unsigned long i;
89 gfreduce_instr *ip;
90 unsigned f = 0;
91 size_t w, ww, wi, wl, ll, bb;
92
93 /* --- Sort out the easy stuff --- */
94
95 d = mp_bits(p); assert(d); d--;
96 r->lim = d/MPW_BITS;
97 dw = d%MPW_BITS;
98 if (!dw)
99 r->mask = 0;
100 else {
101 r->mask = MPW(((mpw)-1) << dw);
102 r->lim++;
103 }
104 r->p = mp_copy(p);
105
106 /* --- Stash a new instruction --- */
107
108 #define INSTR(op_, arg_) do { \
109 DA_ENSURE(&iv, 1); \
110 DA(&iv)[DA_LEN(&iv)].op = (op_); \
111 DA(&iv)[DA_LEN(&iv)].arg = (arg_); \
112 DA_EXTEND(&iv, 1); \
113 } while (0)
114
115 #define f_lsr 1u
116
117 w = (d + MPW_BITS - 1)/MPW_BITS;
118 INSTR(GFRI_LOAD, w);
119 wi = DA_LEN(&iv);
120 f = 0;
121 ll = 0;
122 bb = MPW_BITS - dw;
123 for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
124 if (!mp_bit(&sc))
125 continue;
126 ww = (d - i + MPW_BITS - 1)/MPW_BITS;
127 if (ww != w) {
128 wl = DA_LEN(&iv);
129 INSTR(GFRI_STORE, w);
130 if (!ll)
131 ll = DA_LEN(&iv);
132 if (!(f & f_lsr))
133 INSTR(GFRI_LOAD, ww);
134 else {
135 INSTR(GFRI_LOAD, w - 1);
136 for (; wi < wl; wi++) {
137 ip = &DA(&iv)[wi];
138 assert(ip->op == GFRI_LSL);
139 if (ip->arg)
140 INSTR(GFRI_LSR, MPW_BITS - ip->arg);
141 }
142 if (w - 1 != ww) {
143 INSTR(GFRI_STORE, w - 1);
144 INSTR(GFRI_LOAD, ww);
145 }
146 f &= ~f_lsr;
147 }
148 w = ww;
149 wi = DA_LEN(&iv);
150 }
151 INSTR(GFRI_LSL, (bb + i)%MPW_BITS);
152 if ((bb + i)%MPW_BITS)
153 f |= f_lsr;
154 }
155 wl = DA_LEN(&iv);
156 INSTR(GFRI_STORE, w);
157 if (!ll)
158 ll = DA_LEN(&iv);
159 if (f & f_lsr) {
160 INSTR(GFRI_LOAD, w - 1);
161 for (; wi < wl; wi++) {
162 ip = &DA(&iv)[wi];
163 assert(ip->op == GFRI_LSL);
164 if (ip->arg)
165 INSTR(GFRI_LSR, MPW_BITS - ip->arg);
166 }
167 INSTR(GFRI_STORE, w - 1);
168 }
169
170 #undef INSTR
171
172 r->in = DA_LEN(&iv);
173 r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
174 r->liv = r->iv + ll;
175 memcpy(r->iv, DA(&iv), r->in * sizeof(gfreduce_instr));
176 DA_DESTROY(&iv);
177 }
178
179 /* --- @gfreduce_destroy@ --- *
180 *
181 * Arguments: @gfreduce *r@ = structure to free
182 *
183 * Returns: ---
184 *
185 * Use: Reclaims the resources from a reduction context.
186 */
187
188 void gfreduce_destroy(gfreduce *r)
189 {
190 mp_drop(r->p);
191 xfree(r->iv);
192 }
193
194 /* --- @gfreduce_dump@ --- *
195 *
196 * Arguments: @gfreduce *r@ = structure to dump
197 * @FILE *fp@ = file to dump on
198 *
199 * Returns: ---
200 *
201 * Use: Dumps a reduction context.
202 */
203
204 void gfreduce_dump(gfreduce *r, FILE *fp)
205 {
206 size_t i;
207
208 fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
209 fprintf(fp, "\n lim = %lu; mask = %lx\n",
210 (unsigned long)r->lim, (unsigned long)r->mask);
211 for (i = 0; i < r->in; i++) {
212 static const char *opname[] = { "load", "lsl", "lsr", "store" };
213 assert(r->iv[i].op < N(opname));
214 fprintf(fp, " %s %lu\n",
215 opname[r->iv[i].op],
216 (unsigned long)r->iv[i].arg);
217 }
218 }
219
220 /* --- @gfreduce_do@ --- *
221 *
222 * Arguments: @gfreduce *r@ = reduction context
223 * @mp *d@ = destination
224 * @mp *x@ = source
225 *
226 * Returns: Destination, @x@ reduced modulo the reduction poly.
227 */
228
229 static void run(const gfreduce_instr *i, const gfreduce_instr *il,
230 mpw *v, mpw z)
231 {
232 mpw w = 0;
233
234 for (; i < il; i++) {
235 switch (i->op) {
236 case GFRI_LOAD: w = *(v - i->arg); break;
237 case GFRI_LSL: w ^= z << i->arg; break;
238 case GFRI_LSR: w ^= z >> i->arg; break;
239 case GFRI_STORE: *(v - i->arg) = MPW(w); break;
240 default: abort();
241 }
242 }
243 }
244
245 mp *gfreduce_do(gfreduce *r, mp *d, mp *x)
246 {
247 mpw *v, *vl;
248 const gfreduce_instr *il;
249 mpw z;
250
251 /* --- Try to reuse the source's space --- */
252
253 MP_COPY(x);
254 if (d) MP_DROP(d);
255 MP_DEST(x, MP_LEN(x), x->f);
256
257 /* --- Do the reduction --- */
258
259 il = r->iv + r->in;
260 if (MP_LEN(x) >= r->lim) {
261 v = x->v + r->lim;
262 vl = x->vl;
263 while (vl-- > v) {
264 while (*vl) {
265 z = *vl;
266 *vl = 0;
267 run(r->iv, il, vl, z);
268 }
269 }
270 if (r->mask) {
271 while (*vl & r->mask) {
272 z = *vl & r->mask;
273 *vl &= ~r->mask;
274 run(r->iv, il, vl, z);
275 }
276 }
277 }
278
279 /* --- Done --- */
280
281 MP_SHRINK(x);
282 return (x);
283 }
284
285 /* --- @gfreduce_sqrt@ --- *
286 *
287 * Arguments: @gfreduce *r@ = pointer to reduction context
288 * @mp *d@ = destination
289 * @mp *x@ = some polynomial
290 *
291 * Returns: The square root of @x@ modulo @r->p@, or null.
292 */
293
294 mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x)
295 {
296 mp *y = MP_COPY(x);
297 mp *z, *spare = MP_NEW;
298 unsigned long m = mp_bits(r->p) - 1;
299 unsigned long i;
300
301 for (i = 0; i < m - 1; i++) {
302 mp *t = gf_sqr(spare, y);
303 spare = y;
304 y = gfreduce_do(r, t, t);
305 }
306 z = gf_sqr(spare, y);
307 z = gfreduce_do(r, z, z);
308 if (!MP_EQ(x, z)) {
309 mp_drop(y);
310 y = 0;
311 }
312 mp_drop(z);
313 mp_drop(d);
314 return (y);
315 }
316
317 /* --- @gfreduce_trace@ --- *
318 *
319 * Arguments: @gfreduce *r@ = pointer to reduction context
320 * @mp *x@ = some polynomial
321 *
322 * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
323 * if %$x \in \gf{2^m}$%).
324 */
325
326 int gfreduce_trace(gfreduce *r, mp *x)
327 {
328 mp *y = MP_COPY(x);
329 mp *spare = MP_NEW;
330 unsigned long m = mp_bits(r->p) - 1;
331 unsigned long i;
332 int rc;
333
334 for (i = 0; i < m - 1; i++) {
335 mp *t = gf_sqr(spare, y);
336 spare = y;
337 y = gfreduce_do(r, t, t);
338 y = gf_add(y, y, x);
339 }
340 rc = !MP_ZEROP(y);
341 mp_drop(spare);
342 mp_drop(y);
343 return (rc);
344 }
345
346 /* --- @gfreduce_halftrace@ --- *
347 *
348 * Arguments: @gfreduce *r@ = pointer to reduction context
349 * @mp *d@ = destination
350 * @mp *x@ = some polynomial
351 *
352 * Returns: The half-trace of @x@.
353 * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
354 * if %$x \in \gf{2^m}$% with %$m$% odd).
355 */
356
357 mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x)
358 {
359 mp *y = MP_COPY(x);
360 mp *spare = MP_NEW;
361 unsigned long m = mp_bits(r->p) - 1;
362 unsigned long i;
363
364 mp_drop(d);
365 for (i = 0; i < m - 1; i += 2) {
366 mp *t = gf_sqr(spare, y);
367 spare = y;
368 y = gfreduce_do(r, t, t);
369 t = gf_sqr(spare, y);
370 spare = y;
371 y = gfreduce_do(r, t, t);
372 y = gf_add(y, y, x);
373 }
374 mp_drop(spare);
375 return (y);
376 }
377
378 /* --- @gfreduce_quadsolve@ --- *
379 *
380 * Arguments: @gfreduce *r@ = pointer to reduction context
381 * @mp *d@ = destination
382 * @mp *x@ = some polynomial
383 *
384 * Returns: A polynomial @y@ such that %$y^2 + y = x$%, or null.
385 */
386
387 mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x)
388 {
389 unsigned long m = mp_bits(r->p) - 1;
390 mp *t;
391
392 MP_COPY(x);
393 if (m & 1)
394 d = gfreduce_halftrace(r, d, x);
395 else {
396 mp *z, *w, *rho = MP_NEW;
397 mp *spare = MP_NEW;
398 grand *fr = fibrand_create(0);
399 unsigned long i;
400
401 for (;;) {
402 rho = mprand(rho, m, fr, 0);
403 z = MP_ZERO;
404 w = MP_COPY(rho);
405 for (i = 0; i < m - 1; i++) {
406 t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
407 t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
408 t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
409 z = gf_add(z, z, t);
410 w = gf_add(w, w, rho);
411 }
412 if (!MP_ZEROP(w))
413 break;
414 MP_DROP(z);
415 MP_DROP(w);
416 }
417 if (d) MP_DROP(d);
418 MP_DROP(w);
419 MP_DROP(spare);
420 MP_DROP(rho);
421 fr->ops->destroy(fr);
422 d = z;
423 }
424
425 t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
426 if (!MP_EQ(t, x)) {
427 MP_DROP(d);
428 d = 0;
429 }
430 MP_DROP(t);
431 MP_DROP(x);
432 if (d) d->v[0] &= ~(mpw)1;
433 return (d);
434 }
435
436 /* --- @gfreduce_exp@ --- *
437 *
438 * Arguments: @gfreduce *gr@ = pointer to reduction context
439 * @mp *d@ = fake destination
440 * @mp *a@ = base
441 * @mp *e@ = exponent
442 *
443 * Returns: Result, %$a^e \bmod m$%.
444 */
445
446 mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e)
447 {
448 mp *x = MP_ONE;
449 mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
450
451 MP_SHRINK(e);
452 MP_COPY(a);
453 if (MP_ZEROP(e))
454 ;
455 else {
456 if (MP_NEGP(e))
457 a = gf_modinv(a, a, gr->p);
458 if (MP_LEN(e) < EXP_THRESH)
459 EXP_SIMPLE(x, a, e);
460 else
461 EXP_WINDOW(x, a, e);
462 }
463 mp_drop(d);
464 mp_drop(a);
465 mp_drop(spare);
466 return (x);
467 }
468
469 /*----- Test rig ----------------------------------------------------------*/
470
471 #ifdef TEST_RIG
472
473 #define MP(x) mp_readstring(MP_NEW, #x, 0, 0)
474
475 static int vreduce(dstr *v)
476 {
477 mp *d = *(mp **)v[0].buf;
478 mp *n = *(mp **)v[1].buf;
479 mp *r = *(mp **)v[2].buf;
480 mp *c;
481 int ok = 1;
482 gfreduce rr;
483
484 gfreduce_create(&rr, d);
485 c = gfreduce_do(&rr, MP_NEW, n);
486 if (!MP_EQ(c, r)) {
487 fprintf(stderr, "\n*** reduction failed\n*** ");
488 gfreduce_dump(&rr, stderr);
489 fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
490 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
491 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
492 fprintf(stderr, "\n");
493 ok = 0;
494 }
495 gfreduce_destroy(&rr);
496 mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
497 assert(mparena_count(MPARENA_GLOBAL) == 0);
498 return (ok);
499 }
500
501 static int vmodexp(dstr *v)
502 {
503 mp *p = *(mp **)v[0].buf;
504 mp *g = *(mp **)v[1].buf;
505 mp *x = *(mp **)v[2].buf;
506 mp *r = *(mp **)v[3].buf;
507 mp *c;
508 int ok = 1;
509 gfreduce rr;
510
511 gfreduce_create(&rr, p);
512 c = gfreduce_exp(&rr, MP_NEW, g, x);
513 if (!MP_EQ(c, r)) {
514 fprintf(stderr, "\n*** modexp failed\n*** ");
515 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
516 fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
517 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
518 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
519 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
520 fprintf(stderr, "\n");
521 ok = 0;
522 }
523 gfreduce_destroy(&rr);
524 mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
525 assert(mparena_count(MPARENA_GLOBAL) == 0);
526 return (ok);
527 }
528
529 static int vsqrt(dstr *v)
530 {
531 mp *p = *(mp **)v[0].buf;
532 mp *x = *(mp **)v[1].buf;
533 mp *r = *(mp **)v[2].buf;
534 mp *c;
535 int ok = 1;
536 gfreduce rr;
537
538 gfreduce_create(&rr, p);
539 c = gfreduce_sqrt(&rr, MP_NEW, x);
540 if (!MP_EQ(c, r)) {
541 fprintf(stderr, "\n*** sqrt failed\n*** ");
542 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
543 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
544 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
545 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
546 fprintf(stderr, "\n");
547 ok = 0;
548 }
549 gfreduce_destroy(&rr);
550 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
551 assert(mparena_count(MPARENA_GLOBAL) == 0);
552 return (ok);
553 }
554
555 static int vtr(dstr *v)
556 {
557 mp *p = *(mp **)v[0].buf;
558 mp *x = *(mp **)v[1].buf;
559 int r = *(int *)v[2].buf, c;
560 int ok = 1;
561 gfreduce rr;
562
563 gfreduce_create(&rr, p);
564 c = gfreduce_trace(&rr, x);
565 if (c != r) {
566 fprintf(stderr, "\n*** trace failed\n*** ");
567 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
568 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
569 fprintf(stderr, "\n*** c = %d", c);
570 fprintf(stderr, "\n*** r = %d", r);
571 fprintf(stderr, "\n");
572 ok = 0;
573 }
574 gfreduce_destroy(&rr);
575 mp_drop(p); mp_drop(x);
576 assert(mparena_count(MPARENA_GLOBAL) == 0);
577 return (ok);
578 }
579
580 static int vhftr(dstr *v)
581 {
582 mp *p = *(mp **)v[0].buf;
583 mp *x = *(mp **)v[1].buf;
584 mp *r = *(mp **)v[2].buf;
585 mp *c;
586 int ok = 1;
587 gfreduce rr;
588
589 gfreduce_create(&rr, p);
590 c = gfreduce_halftrace(&rr, MP_NEW, x);
591 if (!MP_EQ(c, r)) {
592 fprintf(stderr, "\n*** halftrace failed\n*** ");
593 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
594 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
595 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
596 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
597 fprintf(stderr, "\n");
598 ok = 0;
599 }
600 gfreduce_destroy(&rr);
601 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
602 assert(mparena_count(MPARENA_GLOBAL) == 0);
603 return (ok);
604 }
605
606 static int vquad(dstr *v)
607 {
608 mp *p = *(mp **)v[0].buf;
609 mp *x = *(mp **)v[1].buf;
610 mp *r = *(mp **)v[2].buf;
611 mp *c;
612 int ok = 1;
613 gfreduce rr;
614
615 gfreduce_create(&rr, p);
616 c = gfreduce_quadsolve(&rr, MP_NEW, x);
617 if (!MP_EQ(c, r)) {
618 fprintf(stderr, "\n*** quadsolve failed\n*** ");
619 fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
620 fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
621 fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
622 fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
623 fprintf(stderr, "\n");
624 ok = 0;
625 }
626 gfreduce_destroy(&rr);
627 mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
628 assert(mparena_count(MPARENA_GLOBAL) == 0);
629 return (ok);
630 }
631
632 static test_chunk defs[] = {
633 { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
634 { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
635 { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
636 { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
637 { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
638 { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
639 { 0, 0, { 0 } }
640 };
641
642 int main(int argc, char *argv[])
643 {
644 test_run(argc, argv, defs, SRCDIR"/t/gfreduce");
645 return (0);
646 }
647
648 #endif
649
650 /*----- That's all, folks -------------------------------------------------*/