Commit | Line | Data |
---|---|---|
ceb3f0c0 | 1 | /* -*-c-*- |
2 | * | |
ceb3f0c0 | 3 | * Efficient reduction modulo sparse binary polynomials |
4 | * | |
5 | * (c) 2004 Straylight/Edgeware | |
6 | */ | |
7 | ||
45c0fd36 | 8 | /*----- Licensing notice --------------------------------------------------* |
ceb3f0c0 | 9 | * |
10 | * This file is part of Catacomb. | |
11 | * | |
12 | * Catacomb is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU Library General Public License as | |
14 | * published by the Free Software Foundation; either version 2 of the | |
15 | * License, or (at your option) any later version. | |
45c0fd36 | 16 | * |
ceb3f0c0 | 17 | * Catacomb is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU Library General Public License for more details. | |
45c0fd36 | 21 | * |
ceb3f0c0 | 22 | * You should have received a copy of the GNU Library General Public |
23 | * License along with Catacomb; if not, write to the Free | |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | * MA 02111-1307, USA. | |
26 | */ | |
27 | ||
ceb3f0c0 | 28 | /*----- Header files ------------------------------------------------------*/ |
29 | ||
30 | #include <mLib/alloc.h> | |
31 | #include <mLib/darray.h> | |
32 | #include <mLib/macros.h> | |
33 | ||
34 | #include "gf.h" | |
35 | #include "gfreduce.h" | |
36 | #include "gfreduce-exp.h" | |
37 | #include "fibrand.h" | |
38 | #include "mprand.h" | |
39 | ||
40 | /*----- Data structures ---------------------------------------------------*/ | |
41 | ||
42 | DA_DECL(instr_v, gfreduce_instr); | |
43 | ||
44 | /*----- Main code ---------------------------------------------------------*/ | |
45 | ||
46 | /* --- What's going on here? --- * | |
47 | * | |
48 | * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any | |
49 | * sense fast. Here, we do efficient reduction modulo sparse polynomials. | |
51f5bbe0 | 50 | * (It works for arbitrary polynomials, but isn't efficient for dense ones.) |
ceb3f0c0 | 51 | * |
51f5bbe0 MW |
52 | * Suppose that %$p(x) = x^n + p'(x) = \sum_{0\le i<n} p_i x^i$%, hopefully |
53 | * with only a few other %$p_i \ne 0$%. We're going to compile %$p$% into a | |
54 | * sequence of instructions which can be used to perform reduction modulo | |
55 | * %$p$%. The important observation is that %$x^n \equiv p' \pmod p$%. | |
56 | * | |
57 | * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with | |
58 | * %$0 \le n' < w$%. Let %$u(x)$% be some arbitrary polynomial. Write | |
59 | * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%; then a reduction step uses | |
60 | * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has | |
61 | * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this | |
62 | * makes progress towards a complete reduction. | |
63 | * | |
64 | * The compiled instruction sequence computes | |
65 | * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%. | |
ceb3f0c0 | 66 | */ |
67 | ||
68 | /* --- @gfreduce_create@ --- * | |
69 | * | |
70 | * Arguments: @gfreduce *r@ = structure to fill in | |
71 | * @mp *x@ = a (hopefully sparse) polynomial | |
72 | * | |
73 | * Returns: --- | |
74 | * | |
75 | * Use: Initializes a context structure for reduction. | |
76 | */ | |
77 | ||
51f5bbe0 MW |
78 | struct gen { |
79 | unsigned f; /* Flags */ | |
80 | #define f_lsr 1u /* Overflow from previous word */ | |
81 | #define f_load 2u /* Outstanding @LOAD@ */ | |
5937940f | 82 | #define f_fip 4u /* Final-pass offset is set */ |
51f5bbe0 | 83 | instr_v iv; /* Instruction vector */ |
5937940f | 84 | size_t fip; /* Offset for final-pass reduction */ |
51f5bbe0 MW |
85 | size_t w; /* Currently loaded target word */ |
86 | size_t wi; /* Left-shifts for current word */ | |
5937940f | 87 | gfreduce *r; /* Reduction context pointer */ |
51f5bbe0 MW |
88 | }; |
89 | ||
90 | #define INSTR(g_, op_, arg_) do { \ | |
91 | struct gen *_g = (g_); \ | |
92 | instr_v *_iv = &_g->iv; \ | |
93 | size_t _i = DA_LEN(_iv); \ | |
94 | \ | |
95 | DA_ENSURE(_iv, 1); \ | |
96 | DA(_iv)[_i].op = (op_); \ | |
97 | DA(_iv)[_i].arg = (arg_); \ | |
98 | DA_EXTEND(_iv, 1); \ | |
99 | } while (0) | |
100 | ||
101 | static void emit_load(struct gen *g, size_t w) | |
102 | { | |
5937940f MW |
103 | /* --- If this is not the low-order word then note final-pass start --- * |
104 | * | |
105 | * Once we've eliminated the whole high-degree words, there will possibly | |
106 | * remain a few high-degree bits. We can further reduce the subject | |
107 | * polynomial by subtracting an appropriate multiple of %$p'$%, but if we | |
108 | * do this naively we'll end up addressing `low-order' words beyond the | |
109 | * bottom of our input. We solve this problem by storing an alternative | |
110 | * start position for this final pass (which works because we scan bits | |
111 | * right-to-left). | |
112 | */ | |
113 | ||
114 | if (!(g->f & f_fip) && w < g->r->lim) { | |
115 | g->fip = DA_LEN(&g->iv); | |
116 | g->f |= f_fip; | |
117 | } | |
118 | ||
119 | /* --- Actually emit the instruction --- */ | |
120 | ||
51f5bbe0 MW |
121 | INSTR(g, GFRI_LOAD, w); |
122 | g->f |= f_load; | |
123 | g->w = w; | |
124 | } | |
125 | ||
126 | static void emit_right_shifts(struct gen *g) | |
127 | { | |
128 | gfreduce_instr *ip; | |
129 | size_t i, wl; | |
130 | ||
131 | /* --- Close off the current word --- * | |
132 | * | |
133 | * If we shifted into this current word with a nonzero bit offset, then | |
134 | * we'll also need to arrange to perform a sequence of right shifts into | |
135 | * the following word, which we might as well do by scanning the | |
136 | * instruction sequence (which starts at @wi@). | |
137 | * | |
138 | * Either way, we leave a @LOAD@ unmatched if there was one before, in the | |
139 | * hope that callers have an easier time; @g->w@ is updated to reflect the | |
140 | * currently open word. | |
141 | */ | |
142 | ||
143 | if (!(g->f & f_lsr)) | |
144 | return; | |
145 | ||
146 | wl = DA_LEN(&g->iv); | |
147 | INSTR(g, GFRI_STORE, g->w); | |
148 | emit_load(g, g->w - 1); | |
149 | for (i = g->wi; i < wl; i++) { | |
150 | ip = &DA(&g->iv)[i]; | |
151 | assert(ip->op == GFRI_LSL); | |
152 | if (ip->arg) | |
153 | INSTR(g, GFRI_LSR, MPW_BITS - ip->arg); | |
154 | } | |
155 | g->f &= ~f_lsr; | |
156 | } | |
157 | ||
158 | static void ensure_loaded(struct gen *g, size_t w) | |
159 | { | |
160 | if (!(g->f & f_load)) { | |
161 | emit_load(g, w); | |
162 | g->wi = DA_LEN(&g->iv); | |
163 | } else if (w != g->w) { | |
164 | emit_right_shifts(g); | |
165 | if (w != g->w) { | |
166 | INSTR(g, GFRI_STORE, g->w); | |
167 | emit_load(g, w); | |
168 | } | |
169 | g->wi = DA_LEN(&g->iv); | |
170 | } | |
171 | } | |
172 | ||
ceb3f0c0 | 173 | void gfreduce_create(gfreduce *r, mp *p) |
174 | { | |
51f5bbe0 | 175 | struct gen g = { 0, DA_INIT }; |
f46efa79 | 176 | unsigned long d; |
177 | unsigned dw; | |
ceb3f0c0 | 178 | mpscan sc; |
179 | unsigned long i; | |
51f5bbe0 | 180 | size_t w, bb; |
ceb3f0c0 | 181 | |
182 | /* --- Sort out the easy stuff --- */ | |
183 | ||
5937940f | 184 | g.r = r; |
ceb3f0c0 | 185 | d = mp_bits(p); assert(d); d--; |
186 | r->lim = d/MPW_BITS; | |
187 | dw = d%MPW_BITS; | |
188 | if (!dw) | |
189 | r->mask = 0; | |
190 | else { | |
191 | r->mask = MPW(((mpw)-1) << dw); | |
192 | r->lim++; | |
193 | } | |
194 | r->p = mp_copy(p); | |
195 | ||
51f5bbe0 MW |
196 | /* --- How this works --- * |
197 | * | |
198 | * The instruction sequence is run with two ambient parameters: a pointer | |
199 | * (usually) just past the most significant word of the polynomial to be | |
200 | * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant | |
201 | * to add. | |
202 | * | |
203 | * The sequence visits each word of the polynomial at most once. Suppose | |
204 | * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%. | |
205 | * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where | |
206 | * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects | |
207 | * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple | |
208 | * of %$w$%) also word %$I - 1$%. | |
209 | * | |
210 | * We have four instructions: @LOAD@ reads a specified word of %$u$% into an | |
211 | * accumulator, and @STORE@ stores it back (we'll always store back to the | |
212 | * same word we most recently read, but this isn't a requirement); and | |
213 | * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into | |
214 | * the accumulator. So a typical program will contain sequences of @LSR@ | |
215 | * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs. | |
216 | * | |
217 | * We do a single right-to-left pass across %$p$%. | |
218 | */ | |
ceb3f0c0 | 219 | |
c29970a7 | 220 | bb = MPW_BITS - dw; |
51f5bbe0 | 221 | |
ceb3f0c0 | 222 | for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) { |
223 | if (!mp_bit(&sc)) | |
224 | continue; | |
51f5bbe0 MW |
225 | |
226 | /* --- We've found a set bit, so work out which word it affects --- * | |
227 | * | |
228 | * In general, a bit affects two words: it needs to be shifted left into | |
229 | * one, and shifted right into the next. We find the former here. | |
230 | */ | |
231 | ||
232 | w = (d - i + MPW_BITS - 1)/MPW_BITS; | |
233 | ||
234 | /* --- Concentrate on the appropriate word --- */ | |
235 | ||
236 | ensure_loaded(&g, w); | |
237 | ||
238 | /* --- Accumulate a new @LSL@ instruction --- * | |
239 | * | |
240 | * If this was a nonzero shift, then we'll need to arrange to do right | |
241 | * shifts into the following word. | |
242 | */ | |
243 | ||
244 | INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS); | |
c29970a7 | 245 | if ((bb + i)%MPW_BITS) |
51f5bbe0 | 246 | g.f |= f_lsr; |
ceb3f0c0 | 247 | } |
248 | ||
51f5bbe0 MW |
249 | /* --- Wrapping up --- * |
250 | * | |
251 | * We probably need a final @STORE@, and maybe a sequence of right shifts. | |
252 | */ | |
ceb3f0c0 | 253 | |
51f5bbe0 MW |
254 | if (g.f & f_load) { |
255 | emit_right_shifts(&g); | |
256 | INSTR(&g, GFRI_STORE, g.w); | |
257 | } | |
258 | ||
5937940f MW |
259 | /* --- Copy the instruction vector. |
260 | * | |
261 | * If we've not set a final-pass offset yet then now would be an excellent | |
262 | * time. Obviously it should be right at the end, because there's nothing | |
263 | * for a final pass to do. | |
264 | */ | |
265 | ||
51f5bbe0 | 266 | r->in = DA_LEN(&g.iv); |
ceb3f0c0 | 267 | r->iv = xmalloc(r->in * sizeof(gfreduce_instr)); |
51f5bbe0 | 268 | memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr)); |
5937940f MW |
269 | |
270 | if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv); | |
271 | r->fiv = r->iv + g.fip; | |
272 | ||
51f5bbe0 | 273 | DA_DESTROY(&g.iv); |
ceb3f0c0 | 274 | } |
275 | ||
51f5bbe0 MW |
276 | #undef INSTR |
277 | ||
278 | #undef f_lsr | |
279 | #undef f_load | |
5937940f | 280 | #undef f_fip |
51f5bbe0 | 281 | |
ceb3f0c0 | 282 | /* --- @gfreduce_destroy@ --- * |
283 | * | |
284 | * Arguments: @gfreduce *r@ = structure to free | |
285 | * | |
286 | * Returns: --- | |
287 | * | |
288 | * Use: Reclaims the resources from a reduction context. | |
289 | */ | |
290 | ||
291 | void gfreduce_destroy(gfreduce *r) | |
292 | { | |
293 | mp_drop(r->p); | |
294 | xfree(r->iv); | |
295 | } | |
296 | ||
297 | /* --- @gfreduce_dump@ --- * | |
298 | * | |
299 | * Arguments: @gfreduce *r@ = structure to dump | |
300 | * @FILE *fp@ = file to dump on | |
301 | * | |
302 | * Returns: --- | |
303 | * | |
304 | * Use: Dumps a reduction context. | |
305 | */ | |
306 | ||
307 | void gfreduce_dump(gfreduce *r, FILE *fp) | |
308 | { | |
309 | size_t i; | |
310 | ||
311 | fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16); | |
312 | fprintf(fp, "\n lim = %lu; mask = %lx\n", | |
313 | (unsigned long)r->lim, (unsigned long)r->mask); | |
314 | for (i = 0; i < r->in; i++) { | |
315 | static const char *opname[] = { "load", "lsl", "lsr", "store" }; | |
5937940f MW |
316 | if (&r->iv[i] == r->fiv) |
317 | fputs("final:\n", fp); | |
ceb3f0c0 | 318 | assert(r->iv[i].op < N(opname)); |
319 | fprintf(fp, " %s %lu\n", | |
320 | opname[r->iv[i].op], | |
321 | (unsigned long)r->iv[i].arg); | |
322 | } | |
5937940f MW |
323 | if (&r->iv[i] == r->fiv) |
324 | fputs("final:\n", fp); | |
ceb3f0c0 | 325 | } |
326 | ||
327 | /* --- @gfreduce_do@ --- * | |
328 | * | |
329 | * Arguments: @gfreduce *r@ = reduction context | |
330 | * @mp *d@ = destination | |
331 | * @mp *x@ = source | |
332 | * | |
333 | * Returns: Destination, @x@ reduced modulo the reduction poly. | |
334 | */ | |
335 | ||
336 | static void run(const gfreduce_instr *i, const gfreduce_instr *il, | |
337 | mpw *v, mpw z) | |
338 | { | |
339 | mpw w = 0; | |
340 | ||
341 | for (; i < il; i++) { | |
342 | switch (i->op) { | |
343 | case GFRI_LOAD: w = *(v - i->arg); break; | |
344 | case GFRI_LSL: w ^= z << i->arg; break; | |
345 | case GFRI_LSR: w ^= z >> i->arg; break; | |
346 | case GFRI_STORE: *(v - i->arg) = MPW(w); break; | |
347 | default: abort(); | |
348 | } | |
349 | } | |
350 | } | |
351 | ||
352 | mp *gfreduce_do(gfreduce *r, mp *d, mp *x) | |
353 | { | |
354 | mpw *v, *vl; | |
355 | const gfreduce_instr *il; | |
356 | mpw z; | |
357 | ||
358 | /* --- Try to reuse the source's space --- */ | |
359 | ||
360 | MP_COPY(x); | |
361 | if (d) MP_DROP(d); | |
362 | MP_DEST(x, MP_LEN(x), x->f); | |
363 | ||
364 | /* --- Do the reduction --- */ | |
365 | ||
366 | il = r->iv + r->in; | |
367 | if (MP_LEN(x) >= r->lim) { | |
368 | v = x->v + r->lim; | |
369 | vl = x->vl; | |
370 | while (vl-- > v) { | |
371 | while (*vl) { | |
372 | z = *vl; | |
373 | *vl = 0; | |
374 | run(r->iv, il, vl, z); | |
375 | } | |
376 | } | |
377 | if (r->mask) { | |
378 | while (*vl & r->mask) { | |
379 | z = *vl & r->mask; | |
380 | *vl &= ~r->mask; | |
5937940f | 381 | run(r->fiv, il, vl, z); |
ceb3f0c0 | 382 | } |
383 | } | |
384 | } | |
385 | ||
386 | /* --- Done --- */ | |
387 | ||
388 | MP_SHRINK(x); | |
389 | return (x); | |
390 | } | |
391 | ||
392 | /* --- @gfreduce_sqrt@ --- * | |
393 | * | |
394 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
395 | * @mp *d@ = destination | |
396 | * @mp *x@ = some polynomial | |
397 | * | |
398 | * Returns: The square root of @x@ modulo @r->p@, or null. | |
399 | */ | |
400 | ||
401 | mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x) | |
402 | { | |
403 | mp *y = MP_COPY(x); | |
404 | mp *z, *spare = MP_NEW; | |
405 | unsigned long m = mp_bits(r->p) - 1; | |
406 | unsigned long i; | |
407 | ||
408 | for (i = 0; i < m - 1; i++) { | |
409 | mp *t = gf_sqr(spare, y); | |
410 | spare = y; | |
411 | y = gfreduce_do(r, t, t); | |
412 | } | |
413 | z = gf_sqr(spare, y); | |
414 | z = gfreduce_do(r, z, z); | |
415 | if (!MP_EQ(x, z)) { | |
416 | mp_drop(y); | |
417 | y = 0; | |
418 | } | |
419 | mp_drop(z); | |
420 | mp_drop(d); | |
421 | return (y); | |
422 | } | |
423 | ||
424 | /* --- @gfreduce_trace@ --- * | |
425 | * | |
426 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
427 | * @mp *x@ = some polynomial | |
428 | * | |
429 | * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$% | |
430 | * if %$x \in \gf{2^m}$%). | |
431 | */ | |
432 | ||
433 | int gfreduce_trace(gfreduce *r, mp *x) | |
434 | { | |
435 | mp *y = MP_COPY(x); | |
436 | mp *spare = MP_NEW; | |
437 | unsigned long m = mp_bits(r->p) - 1; | |
438 | unsigned long i; | |
439 | int rc; | |
440 | ||
441 | for (i = 0; i < m - 1; i++) { | |
442 | mp *t = gf_sqr(spare, y); | |
443 | spare = y; | |
444 | y = gfreduce_do(r, t, t); | |
445 | y = gf_add(y, y, x); | |
446 | } | |
a69a3efd | 447 | rc = !MP_ZEROP(y); |
ceb3f0c0 | 448 | mp_drop(spare); |
449 | mp_drop(y); | |
450 | return (rc); | |
451 | } | |
452 | ||
453 | /* --- @gfreduce_halftrace@ --- * | |
454 | * | |
455 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
456 | * @mp *d@ = destination | |
457 | * @mp *x@ = some polynomial | |
458 | * | |
459 | * Returns: The half-trace of @x@. | |
460 | * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$% | |
461 | * if %$x \in \gf{2^m}$% with %$m$% odd). | |
462 | */ | |
463 | ||
464 | mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x) | |
465 | { | |
466 | mp *y = MP_COPY(x); | |
467 | mp *spare = MP_NEW; | |
468 | unsigned long m = mp_bits(r->p) - 1; | |
469 | unsigned long i; | |
470 | ||
471 | mp_drop(d); | |
472 | for (i = 0; i < m - 1; i += 2) { | |
473 | mp *t = gf_sqr(spare, y); | |
474 | spare = y; | |
475 | y = gfreduce_do(r, t, t); | |
476 | t = gf_sqr(spare, y); | |
477 | spare = y; | |
478 | y = gfreduce_do(r, t, t); | |
479 | y = gf_add(y, y, x); | |
480 | } | |
481 | mp_drop(spare); | |
482 | return (y); | |
483 | } | |
484 | ||
485 | /* --- @gfreduce_quadsolve@ --- * | |
486 | * | |
487 | * Arguments: @gfreduce *r@ = pointer to reduction context | |
488 | * @mp *d@ = destination | |
489 | * @mp *x@ = some polynomial | |
490 | * | |
491 | * Returns: A polynomial @y@ such that %$y^2 + y = x$%, or null. | |
492 | */ | |
493 | ||
494 | mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x) | |
495 | { | |
496 | unsigned long m = mp_bits(r->p) - 1; | |
497 | mp *t; | |
498 | ||
499 | MP_COPY(x); | |
500 | if (m & 1) | |
501 | d = gfreduce_halftrace(r, d, x); | |
502 | else { | |
503 | mp *z, *w, *rho = MP_NEW; | |
504 | mp *spare = MP_NEW; | |
505 | grand *fr = fibrand_create(0); | |
506 | unsigned long i; | |
507 | ||
508 | for (;;) { | |
509 | rho = mprand(rho, m, fr, 0); | |
510 | z = MP_ZERO; | |
511 | w = MP_COPY(rho); | |
512 | for (i = 0; i < m - 1; i++) { | |
513 | t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t); | |
514 | t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t); | |
515 | t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t; | |
516 | z = gf_add(z, z, t); | |
517 | w = gf_add(w, w, rho); | |
518 | } | |
a69a3efd | 519 | if (!MP_ZEROP(w)) |
ceb3f0c0 | 520 | break; |
521 | MP_DROP(z); | |
522 | MP_DROP(w); | |
523 | } | |
524 | if (d) MP_DROP(d); | |
525 | MP_DROP(w); | |
526 | MP_DROP(spare); | |
527 | MP_DROP(rho); | |
528 | fr->ops->destroy(fr); | |
529 | d = z; | |
530 | } | |
531 | ||
532 | t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d); | |
533 | if (!MP_EQ(t, x)) { | |
534 | MP_DROP(d); | |
535 | d = 0; | |
536 | } | |
537 | MP_DROP(t); | |
538 | MP_DROP(x); | |
bc985cef | 539 | if (d) d->v[0] &= ~(mpw)1; |
ceb3f0c0 | 540 | return (d); |
541 | } | |
542 | ||
543 | /* --- @gfreduce_exp@ --- * | |
544 | * | |
545 | * Arguments: @gfreduce *gr@ = pointer to reduction context | |
45c0fd36 MW |
546 | * @mp *d@ = fake destination |
547 | * @mp *a@ = base | |
548 | * @mp *e@ = exponent | |
ceb3f0c0 | 549 | * |
45c0fd36 | 550 | * Returns: Result, %$a^e \bmod m$%. |
ceb3f0c0 | 551 | */ |
552 | ||
553 | mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e) | |
554 | { | |
555 | mp *x = MP_ONE; | |
556 | mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW; | |
557 | ||
558 | MP_SHRINK(e); | |
a69a3efd | 559 | MP_COPY(a); |
560 | if (MP_ZEROP(e)) | |
ceb3f0c0 | 561 | ; |
a69a3efd | 562 | else { |
563 | if (MP_NEGP(e)) | |
564 | a = gf_modinv(a, a, gr->p); | |
565 | if (MP_LEN(e) < EXP_THRESH) | |
566 | EXP_SIMPLE(x, a, e); | |
567 | else | |
568 | EXP_WINDOW(x, a, e); | |
569 | } | |
ceb3f0c0 | 570 | mp_drop(d); |
a69a3efd | 571 | mp_drop(a); |
ceb3f0c0 | 572 | mp_drop(spare); |
573 | return (x); | |
574 | } | |
575 | ||
576 | /*----- Test rig ----------------------------------------------------------*/ | |
577 | ||
578 | #ifdef TEST_RIG | |
579 | ||
580 | #define MP(x) mp_readstring(MP_NEW, #x, 0, 0) | |
581 | ||
582 | static int vreduce(dstr *v) | |
583 | { | |
584 | mp *d = *(mp **)v[0].buf; | |
585 | mp *n = *(mp **)v[1].buf; | |
586 | mp *r = *(mp **)v[2].buf; | |
587 | mp *c; | |
588 | int ok = 1; | |
589 | gfreduce rr; | |
590 | ||
591 | gfreduce_create(&rr, d); | |
592 | c = gfreduce_do(&rr, MP_NEW, n); | |
593 | if (!MP_EQ(c, r)) { | |
594 | fprintf(stderr, "\n*** reduction failed\n*** "); | |
595 | gfreduce_dump(&rr, stderr); | |
596 | fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16); | |
597 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
598 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
599 | fprintf(stderr, "\n"); | |
600 | ok = 0; | |
601 | } | |
602 | gfreduce_destroy(&rr); | |
603 | mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c); | |
604 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
605 | return (ok); | |
606 | } | |
607 | ||
608 | static int vmodexp(dstr *v) | |
609 | { | |
610 | mp *p = *(mp **)v[0].buf; | |
611 | mp *g = *(mp **)v[1].buf; | |
612 | mp *x = *(mp **)v[2].buf; | |
613 | mp *r = *(mp **)v[3].buf; | |
614 | mp *c; | |
615 | int ok = 1; | |
616 | gfreduce rr; | |
617 | ||
618 | gfreduce_create(&rr, p); | |
619 | c = gfreduce_exp(&rr, MP_NEW, g, x); | |
620 | if (!MP_EQ(c, r)) { | |
621 | fprintf(stderr, "\n*** modexp failed\n*** "); | |
622 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
623 | fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16); | |
624 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
625 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
626 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
627 | fprintf(stderr, "\n"); | |
628 | ok = 0; | |
629 | } | |
630 | gfreduce_destroy(&rr); | |
631 | mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c); | |
632 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
633 | return (ok); | |
634 | } | |
635 | ||
636 | static int vsqrt(dstr *v) | |
637 | { | |
638 | mp *p = *(mp **)v[0].buf; | |
639 | mp *x = *(mp **)v[1].buf; | |
640 | mp *r = *(mp **)v[2].buf; | |
641 | mp *c; | |
642 | int ok = 1; | |
643 | gfreduce rr; | |
644 | ||
645 | gfreduce_create(&rr, p); | |
646 | c = gfreduce_sqrt(&rr, MP_NEW, x); | |
647 | if (!MP_EQ(c, r)) { | |
648 | fprintf(stderr, "\n*** sqrt failed\n*** "); | |
649 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
650 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
651 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
652 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
653 | fprintf(stderr, "\n"); | |
654 | ok = 0; | |
655 | } | |
656 | gfreduce_destroy(&rr); | |
657 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
658 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
659 | return (ok); | |
660 | } | |
661 | ||
662 | static int vtr(dstr *v) | |
663 | { | |
664 | mp *p = *(mp **)v[0].buf; | |
665 | mp *x = *(mp **)v[1].buf; | |
666 | int r = *(int *)v[2].buf, c; | |
667 | int ok = 1; | |
668 | gfreduce rr; | |
669 | ||
670 | gfreduce_create(&rr, p); | |
671 | c = gfreduce_trace(&rr, x); | |
672 | if (c != r) { | |
673 | fprintf(stderr, "\n*** trace failed\n*** "); | |
674 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
675 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
676 | fprintf(stderr, "\n*** c = %d", c); | |
677 | fprintf(stderr, "\n*** r = %d", r); | |
678 | fprintf(stderr, "\n"); | |
679 | ok = 0; | |
680 | } | |
681 | gfreduce_destroy(&rr); | |
45c0fd36 | 682 | mp_drop(p); mp_drop(x); |
ceb3f0c0 | 683 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
684 | return (ok); | |
685 | } | |
686 | ||
687 | static int vhftr(dstr *v) | |
688 | { | |
689 | mp *p = *(mp **)v[0].buf; | |
690 | mp *x = *(mp **)v[1].buf; | |
691 | mp *r = *(mp **)v[2].buf; | |
692 | mp *c; | |
693 | int ok = 1; | |
694 | gfreduce rr; | |
695 | ||
696 | gfreduce_create(&rr, p); | |
697 | c = gfreduce_halftrace(&rr, MP_NEW, x); | |
698 | if (!MP_EQ(c, r)) { | |
699 | fprintf(stderr, "\n*** halftrace failed\n*** "); | |
700 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
701 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
702 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
703 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
704 | fprintf(stderr, "\n"); | |
705 | ok = 0; | |
706 | } | |
707 | gfreduce_destroy(&rr); | |
708 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
709 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
710 | return (ok); | |
711 | } | |
712 | ||
713 | static int vquad(dstr *v) | |
714 | { | |
715 | mp *p = *(mp **)v[0].buf; | |
716 | mp *x = *(mp **)v[1].buf; | |
717 | mp *r = *(mp **)v[2].buf; | |
718 | mp *c; | |
719 | int ok = 1; | |
720 | gfreduce rr; | |
721 | ||
722 | gfreduce_create(&rr, p); | |
723 | c = gfreduce_quadsolve(&rr, MP_NEW, x); | |
724 | if (!MP_EQ(c, r)) { | |
725 | fprintf(stderr, "\n*** quadsolve failed\n*** "); | |
726 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); | |
727 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); | |
728 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); | |
729 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); | |
730 | fprintf(stderr, "\n"); | |
731 | ok = 0; | |
732 | } | |
733 | gfreduce_destroy(&rr); | |
734 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); | |
735 | assert(mparena_count(MPARENA_GLOBAL) == 0); | |
736 | return (ok); | |
737 | } | |
738 | ||
739 | static test_chunk defs[] = { | |
740 | { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } }, | |
741 | { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } }, | |
742 | { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } }, | |
743 | { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } }, | |
744 | { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } }, | |
745 | { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } }, | |
746 | { 0, 0, { 0 } } | |
747 | }; | |
748 | ||
749 | int main(int argc, char *argv[]) | |
750 | { | |
0f00dc4c | 751 | test_run(argc, argv, defs, SRCDIR"/t/gfreduce"); |
ceb3f0c0 | 752 | return (0); |
753 | } | |
754 | ||
755 | #endif | |
756 | ||
757 | /*----- That's all, folks -------------------------------------------------*/ |