| 1 | /* -*-c-*- |
| 2 | * |
| 3 | * Efficient reduction modulo sparse binary polynomials |
| 4 | * |
| 5 | * (c) 2004 Straylight/Edgeware |
| 6 | */ |
| 7 | |
| 8 | /*----- Licensing notice --------------------------------------------------* |
| 9 | * |
| 10 | * This file is part of Catacomb. |
| 11 | * |
| 12 | * Catacomb is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU Library General Public License as |
| 14 | * published by the Free Software Foundation; either version 2 of the |
| 15 | * License, or (at your option) any later version. |
| 16 | * |
| 17 | * Catacomb is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU Library General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Library General Public |
| 23 | * License along with Catacomb; if not, write to the Free |
| 24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | * MA 02111-1307, USA. |
| 26 | */ |
| 27 | |
| 28 | /*----- Header files ------------------------------------------------------*/ |
| 29 | |
| 30 | #include <mLib/alloc.h> |
| 31 | #include <mLib/darray.h> |
| 32 | #include <mLib/macros.h> |
| 33 | |
| 34 | #include "gf.h" |
| 35 | #include "gfreduce.h" |
| 36 | #include "gfreduce-exp.h" |
| 37 | #include "fibrand.h" |
| 38 | #include "mprand.h" |
| 39 | |
| 40 | /*----- Data structures ---------------------------------------------------*/ |
| 41 | |
| 42 | DA_DECL(instr_v, gfreduce_instr); |
| 43 | |
| 44 | /*----- Main code ---------------------------------------------------------*/ |
| 45 | |
| 46 | /* --- What's going on here? --- * |
| 47 | * |
| 48 | * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any |
| 49 | * sense fast. Here, we do efficient reduction modulo sparse polynomials. |
| 50 | * (It works for arbitrary polynomials, but isn't efficient for dense ones.) |
| 51 | * |
| 52 | * Suppose that %$p = x^n + p'$% where %$p' = \sum_{0\le i<n} p_i x^i$%, |
| 53 | * hopefully with only a few %$p_i \ne 0$%. We're going to compile %$p$% |
| 54 | * into a sequence of instructions which can be used to perform reduction |
| 55 | * modulo %$p$%. The important observation is that |
| 56 | * %$x^n \equiv p' \pmod p$%. |
| 57 | * |
| 58 | * Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with |
| 59 | * %$0 \le n' < w$%. Let %$u(x)$% be some arbitrary polynomial. Write |
| 60 | * %$u = z x^k + u'$% with %$\deg u' < k \ge n$%. Then a reduction step uses |
| 61 | * that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has |
| 62 | * degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this |
| 63 | * makes progress towards a complete reduction. |
| 64 | * |
| 65 | * The compiled instruction sequence computes |
| 66 | * %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%. |
| 67 | */ |
| 68 | |
| 69 | /* --- @gfreduce_create@ --- * |
| 70 | * |
| 71 | * Arguments: @gfreduce *r@ = structure to fill in |
| 72 | * @mp *x@ = a (hopefully sparse) polynomial |
| 73 | * |
| 74 | * Returns: --- |
| 75 | * |
| 76 | * Use: Initializes a context structure for reduction. |
| 77 | */ |
| 78 | |
| 79 | struct gen { |
| 80 | unsigned f; /* Flags */ |
| 81 | #define f_lsr 1u /* Overflow from previous word */ |
| 82 | #define f_load 2u /* Outstanding @LOAD@ */ |
| 83 | #define f_fip 4u /* Final-pass offset is set */ |
| 84 | instr_v iv; /* Instruction vector */ |
| 85 | size_t fip; /* Offset for final-pass reduction */ |
| 86 | size_t w; /* Currently loaded target word */ |
| 87 | size_t wi; /* Left-shifts for current word */ |
| 88 | gfreduce *r; /* Reduction context pointer */ |
| 89 | }; |
| 90 | |
| 91 | #define INSTR(g_, op_, arg_) do { \ |
| 92 | struct gen *_g = (g_); \ |
| 93 | instr_v *_iv = &_g->iv; \ |
| 94 | size_t _i = DA_LEN(_iv); \ |
| 95 | \ |
| 96 | DA_ENSURE(_iv, 1); \ |
| 97 | DA(_iv)[_i].op = (op_); \ |
| 98 | DA(_iv)[_i].arg = (arg_); \ |
| 99 | DA_EXTEND(_iv, 1); \ |
| 100 | } while (0) |
| 101 | |
| 102 | static void emit_load(struct gen *g, size_t w) |
| 103 | { |
| 104 | /* --- If this is not the low-order word then note final-pass start --- * |
| 105 | * |
| 106 | * Once we've eliminated the whole high-degree words, there will possibly |
| 107 | * remain a few high-degree bits. We can further reduce the subject |
| 108 | * polynomial by subtracting an appropriate multiple of %$p'$%, but if we |
| 109 | * do this naively we'll end up addressing `low-order' words beyond the |
| 110 | * bottom of our input. We solve this problem by storing an alternative |
| 111 | * start position for this final pass (which works because we scan bits |
| 112 | * right-to-left). |
| 113 | */ |
| 114 | |
| 115 | if (!(g->f & f_fip) && w < g->r->lim) { |
| 116 | g->fip = DA_LEN(&g->iv); |
| 117 | g->f |= f_fip; |
| 118 | } |
| 119 | |
| 120 | /* --- Actually emit the instruction --- */ |
| 121 | |
| 122 | INSTR(g, GFRI_LOAD, w); |
| 123 | g->f |= f_load; |
| 124 | g->w = w; |
| 125 | } |
| 126 | |
| 127 | static void emit_right_shifts(struct gen *g) |
| 128 | { |
| 129 | gfreduce_instr *ip; |
| 130 | size_t i, wl; |
| 131 | |
| 132 | /* --- Close off the current word --- * |
| 133 | * |
| 134 | * If we shifted into this current word with a nonzero bit offset, then |
| 135 | * we'll also need to arrange to perform a sequence of right shifts into |
| 136 | * the following word, which we might as well do by scanning the |
| 137 | * instruction sequence (which starts at @wi@). |
| 138 | * |
| 139 | * Either way, we leave a @LOAD@ unmatched if there was one before, in the |
| 140 | * hope that callers have an easier time; @g->w@ is updated to reflect the |
| 141 | * currently open word. |
| 142 | */ |
| 143 | |
| 144 | if (!(g->f & f_lsr)) |
| 145 | return; |
| 146 | |
| 147 | wl = DA_LEN(&g->iv); |
| 148 | INSTR(g, GFRI_STORE, g->w); |
| 149 | emit_load(g, g->w - 1); |
| 150 | for (i = g->wi; i < wl; i++) { |
| 151 | ip = &DA(&g->iv)[i]; |
| 152 | assert(ip->op == GFRI_LSL); |
| 153 | if (ip->arg) |
| 154 | INSTR(g, GFRI_LSR, MPW_BITS - ip->arg); |
| 155 | } |
| 156 | g->f &= ~f_lsr; |
| 157 | } |
| 158 | |
| 159 | static void ensure_loaded(struct gen *g, size_t w) |
| 160 | { |
| 161 | if (!(g->f & f_load)) { |
| 162 | emit_load(g, w); |
| 163 | g->wi = DA_LEN(&g->iv); |
| 164 | } else if (w != g->w) { |
| 165 | emit_right_shifts(g); |
| 166 | if (w != g->w) { |
| 167 | INSTR(g, GFRI_STORE, g->w); |
| 168 | emit_load(g, w); |
| 169 | } |
| 170 | g->wi = DA_LEN(&g->iv); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | void gfreduce_create(gfreduce *r, mp *p) |
| 175 | { |
| 176 | struct gen g = { 0, DA_INIT }; |
| 177 | unsigned long d; |
| 178 | unsigned dw; |
| 179 | mpscan sc; |
| 180 | unsigned long i; |
| 181 | size_t w, bb; |
| 182 | |
| 183 | /* --- Sort out the easy stuff --- */ |
| 184 | |
| 185 | g.r = r; |
| 186 | d = mp_bits(p); assert(d); d--; |
| 187 | r->lim = d/MPW_BITS; |
| 188 | dw = d%MPW_BITS; |
| 189 | if (!dw) |
| 190 | r->mask = 0; |
| 191 | else { |
| 192 | r->mask = MPW(((mpw)-1) << dw); |
| 193 | r->lim++; |
| 194 | } |
| 195 | r->p = mp_copy(p); |
| 196 | |
| 197 | /* --- How this works --- * |
| 198 | * |
| 199 | * The instruction sequence is run with two ambient parameters: a pointer |
| 200 | * (usually) just past the most significant word of the polynomial to be |
| 201 | * reduced; and a word %$z$% which is the multiple of %$p'$% we are meant |
| 202 | * to add. |
| 203 | * |
| 204 | * The sequence visits each word of the polynomial at most once. Suppose |
| 205 | * %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%. |
| 206 | * Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where |
| 207 | * %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects |
| 208 | * word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple |
| 209 | * of %$w$%) also word %$I - 1$%. |
| 210 | * |
| 211 | * We have four instructions: @LOAD@ reads a specified word of %$u$% into an |
| 212 | * accumulator, and @STORE@ stores it back (we'll always store back to the |
| 213 | * same word we most recently read, but this isn't a requirement); and |
| 214 | * @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into |
| 215 | * the accumulator. So a typical program will contain sequences of @LSR@ |
| 216 | * and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs. |
| 217 | * |
| 218 | * We do a single right-to-left pass across %$p$%. |
| 219 | */ |
| 220 | |
| 221 | bb = MPW_BITS - dw; |
| 222 | |
| 223 | for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) { |
| 224 | if (!mp_bit(&sc)) |
| 225 | continue; |
| 226 | |
| 227 | /* --- We've found a set bit, so work out which word it affects --- * |
| 228 | * |
| 229 | * In general, a bit affects two words: it needs to be shifted left into |
| 230 | * one, and shifted right into the next. We find the former here. |
| 231 | */ |
| 232 | |
| 233 | w = (d - i + MPW_BITS - 1)/MPW_BITS; |
| 234 | |
| 235 | /* --- Concentrate on the appropriate word --- */ |
| 236 | |
| 237 | ensure_loaded(&g, w); |
| 238 | |
| 239 | /* --- Accumulate a new @LSL@ instruction --- * |
| 240 | * |
| 241 | * If this was a nonzero shift, then we'll need to arrange to do right |
| 242 | * shifts into the following word. |
| 243 | */ |
| 244 | |
| 245 | INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS); |
| 246 | if ((bb + i)%MPW_BITS) |
| 247 | g.f |= f_lsr; |
| 248 | } |
| 249 | |
| 250 | /* --- Wrapping up --- * |
| 251 | * |
| 252 | * We probably need a final @STORE@, and maybe a sequence of right shifts. |
| 253 | */ |
| 254 | |
| 255 | if (g.f & f_load) { |
| 256 | emit_right_shifts(&g); |
| 257 | INSTR(&g, GFRI_STORE, g.w); |
| 258 | } |
| 259 | |
| 260 | /* --- Copy the instruction vector. |
| 261 | * |
| 262 | * If we've not set a final-pass offset yet then now would be an excellent |
| 263 | * time. Obviously it should be right at the end, because there's nothing |
| 264 | * for a final pass to do. |
| 265 | */ |
| 266 | |
| 267 | r->in = DA_LEN(&g.iv); |
| 268 | r->iv = xmalloc(r->in * sizeof(gfreduce_instr)); |
| 269 | memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr)); |
| 270 | |
| 271 | if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv); |
| 272 | r->fiv = r->iv + g.fip; |
| 273 | |
| 274 | DA_DESTROY(&g.iv); |
| 275 | } |
| 276 | |
| 277 | #undef INSTR |
| 278 | |
| 279 | #undef f_lsr |
| 280 | #undef f_load |
| 281 | #undef f_fip |
| 282 | |
| 283 | /* --- @gfreduce_destroy@ --- * |
| 284 | * |
| 285 | * Arguments: @gfreduce *r@ = structure to free |
| 286 | * |
| 287 | * Returns: --- |
| 288 | * |
| 289 | * Use: Reclaims the resources from a reduction context. |
| 290 | */ |
| 291 | |
| 292 | void gfreduce_destroy(gfreduce *r) |
| 293 | { |
| 294 | mp_drop(r->p); |
| 295 | xfree(r->iv); |
| 296 | } |
| 297 | |
| 298 | /* --- @gfreduce_dump@ --- * |
| 299 | * |
| 300 | * Arguments: @const gfreduce *r@ = structure to dump |
| 301 | * @FILE *fp@ = file to dump on |
| 302 | * |
| 303 | * Returns: --- |
| 304 | * |
| 305 | * Use: Dumps a reduction context. |
| 306 | */ |
| 307 | |
| 308 | void gfreduce_dump(const gfreduce *r, FILE *fp) |
| 309 | { |
| 310 | size_t i; |
| 311 | |
| 312 | fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16); |
| 313 | fprintf(fp, "\n lim = %lu; mask = %lx\n", |
| 314 | (unsigned long)r->lim, (unsigned long)r->mask); |
| 315 | for (i = 0; i < r->in; i++) { |
| 316 | static const char *opname[] = { "load", "lsl", "lsr", "store" }; |
| 317 | if (&r->iv[i] == r->fiv) |
| 318 | fputs("final:\n", fp); |
| 319 | assert(r->iv[i].op < N(opname)); |
| 320 | fprintf(fp, " %s %lu\n", |
| 321 | opname[r->iv[i].op], |
| 322 | (unsigned long)r->iv[i].arg); |
| 323 | } |
| 324 | if (&r->iv[i] == r->fiv) |
| 325 | fputs("final:\n", fp); |
| 326 | } |
| 327 | |
| 328 | /* --- @gfreduce_do@ --- * |
| 329 | * |
| 330 | * Arguments: @const gfreduce *r@ = reduction context |
| 331 | * @mp *d@ = destination |
| 332 | * @mp *x@ = source |
| 333 | * |
| 334 | * Returns: Destination, @x@ reduced modulo the reduction poly. |
| 335 | */ |
| 336 | |
| 337 | static void run(const gfreduce_instr *i, const gfreduce_instr *il, |
| 338 | mpw *v, mpw z) |
| 339 | { |
| 340 | mpw w = 0; |
| 341 | |
| 342 | for (; i < il; i++) { |
| 343 | switch (i->op) { |
| 344 | case GFRI_LOAD: w = *(v - i->arg); break; |
| 345 | case GFRI_LSL: w ^= z << i->arg; break; |
| 346 | case GFRI_LSR: w ^= z >> i->arg; break; |
| 347 | case GFRI_STORE: *(v - i->arg) = MPW(w); break; |
| 348 | default: abort(); |
| 349 | } |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | mp *gfreduce_do(const gfreduce *r, mp *d, mp *x) |
| 354 | { |
| 355 | mpw *v, *vl; |
| 356 | const gfreduce_instr *il; |
| 357 | mpw z; |
| 358 | |
| 359 | /* --- Try to reuse the source's space --- */ |
| 360 | |
| 361 | MP_COPY(x); |
| 362 | if (d) MP_DROP(d); |
| 363 | MP_DEST(x, MP_LEN(x), x->f); |
| 364 | |
| 365 | /* --- Do the reduction --- */ |
| 366 | |
| 367 | il = r->iv + r->in; |
| 368 | if (MP_LEN(x) >= r->lim) { |
| 369 | v = x->v + r->lim; |
| 370 | vl = x->vl; |
| 371 | while (vl-- > v) { |
| 372 | while (*vl) { |
| 373 | z = *vl; |
| 374 | *vl = 0; |
| 375 | run(r->iv, il, vl, z); |
| 376 | } |
| 377 | } |
| 378 | if (r->mask) { |
| 379 | while (*vl & r->mask) { |
| 380 | z = *vl & r->mask; |
| 381 | *vl &= ~r->mask; |
| 382 | run(r->fiv, il, vl, z); |
| 383 | } |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | /* --- Done --- */ |
| 388 | |
| 389 | MP_SHRINK(x); |
| 390 | return (x); |
| 391 | } |
| 392 | |
| 393 | /* --- @gfreduce_sqrt@ --- * |
| 394 | * |
| 395 | * Arguments: @const gfreduce *r@ = pointer to reduction context |
| 396 | * @mp *d@ = destination |
| 397 | * @mp *x@ = some polynomial |
| 398 | * |
| 399 | * Returns: The square root of @x@ modulo @r->p@, or null. |
| 400 | */ |
| 401 | |
| 402 | mp *gfreduce_sqrt(const gfreduce *r, mp *d, mp *x) |
| 403 | { |
| 404 | mp *y = MP_COPY(x); |
| 405 | mp *z, *spare = MP_NEW; |
| 406 | unsigned long m = mp_bits(r->p) - 1; |
| 407 | unsigned long i; |
| 408 | |
| 409 | /* --- This is pretty easy --- * |
| 410 | * |
| 411 | * Note that %$x = x^{2^m}$%; therefore %$(x^{2^{m-1}})^2 = x^{2^m} = x$%, |
| 412 | * so %$x^{2^{m-1}}$% is the square root we seek. |
| 413 | */ |
| 414 | |
| 415 | for (i = 0; i < m - 1; i++) { |
| 416 | mp *t = gf_sqr(spare, y); |
| 417 | spare = y; |
| 418 | y = gfreduce_do(r, t, t); |
| 419 | } |
| 420 | z = gf_sqr(spare, y); |
| 421 | z = gfreduce_do(r, z, z); |
| 422 | if (!MP_EQ(x, z)) { |
| 423 | mp_drop(y); |
| 424 | y = 0; |
| 425 | } |
| 426 | mp_drop(z); |
| 427 | mp_drop(d); |
| 428 | return (y); |
| 429 | } |
| 430 | |
| 431 | /* --- @gfreduce_trace@ --- * |
| 432 | * |
| 433 | * Arguments: @const gfreduce *r@ = pointer to reduction context |
| 434 | * @mp *x@ = some polynomial |
| 435 | * |
| 436 | * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$% |
| 437 | * if %$x \in \gf{2^m}$%). Since the trace is invariant under |
| 438 | * the Frobenius automorphism (i.e., %$\Tr(x)^2 = \Tr(x)$%), it |
| 439 | * must be an element of the base field, i.e., %$\gf{2}$%, and |
| 440 | * we only need a single bit to represent it. |
| 441 | */ |
| 442 | |
| 443 | int gfreduce_trace(const gfreduce *r, mp *x) |
| 444 | { |
| 445 | mp *y = MP_COPY(x); |
| 446 | mp *spare = MP_NEW; |
| 447 | unsigned long m = mp_bits(r->p) - 1; |
| 448 | unsigned long i; |
| 449 | int rc; |
| 450 | |
| 451 | for (i = 0; i < m - 1; i++) { |
| 452 | mp *t = gf_sqr(spare, y); |
| 453 | spare = y; |
| 454 | y = gfreduce_do(r, t, t); |
| 455 | y = gf_add(y, y, x); |
| 456 | } |
| 457 | rc = !MP_ZEROP(y); |
| 458 | mp_drop(spare); |
| 459 | mp_drop(y); |
| 460 | return (rc); |
| 461 | } |
| 462 | |
| 463 | /* --- @gfreduce_halftrace@ --- * |
| 464 | * |
| 465 | * Arguments: @const gfreduce *r@ = pointer to reduction context |
| 466 | * @mp *d@ = destination |
| 467 | * @mp *x@ = some polynomial |
| 468 | * |
| 469 | * Returns: The half-trace of @x@. |
| 470 | * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$% |
| 471 | * if %$x \in \gf{2^m}$% with %$m$% odd). |
| 472 | */ |
| 473 | |
| 474 | mp *gfreduce_halftrace(const gfreduce *r, mp *d, mp *x) |
| 475 | { |
| 476 | mp *y = MP_COPY(x); |
| 477 | mp *spare = MP_NEW; |
| 478 | unsigned long m = mp_bits(r->p) - 1; |
| 479 | unsigned long i; |
| 480 | |
| 481 | mp_drop(d); |
| 482 | for (i = 0; i < m - 1; i += 2) { |
| 483 | mp *t = gf_sqr(spare, y); |
| 484 | spare = y; |
| 485 | y = gfreduce_do(r, t, t); |
| 486 | t = gf_sqr(spare, y); |
| 487 | spare = y; |
| 488 | y = gfreduce_do(r, t, t); |
| 489 | y = gf_add(y, y, x); |
| 490 | } |
| 491 | mp_drop(spare); |
| 492 | return (y); |
| 493 | } |
| 494 | |
| 495 | /* --- @gfreduce_quadsolve@ --- * |
| 496 | * |
| 497 | * Arguments: @const gfreduce *r@ = pointer to reduction context |
| 498 | * @mp *d@ = destination |
| 499 | * @mp *x@ = some polynomial |
| 500 | * |
| 501 | * Returns: A polynomial @z@ such that %$z^2 + z = x$%, or null. |
| 502 | * |
| 503 | * Use: Solves quadratic equations in a field with characteristic 2. |
| 504 | * Suppose we have an equation %$y^2 + A y + B = 0$% where |
| 505 | * %$A \ne 0$%. (If %$A = 0$% then %$y = \sqrt{B}$% and you |
| 506 | * want @gfreduce_sqrt@ instead.) Use this function to solve |
| 507 | * %$z^2 + z = B/A^2$%; then set %$y = A z$%, since |
| 508 | * %$y^2 + y = A^2 z^2 + A^2 z = A^2 (z^2 + z) = B$% as |
| 509 | * required. |
| 510 | * |
| 511 | * The two roots are %$z$% and %$z + 1$%; this function always |
| 512 | * returns the one with zero scalar coefficient. |
| 513 | */ |
| 514 | |
| 515 | mp *gfreduce_quadsolve(const gfreduce *r, mp *d, mp *x) |
| 516 | { |
| 517 | unsigned long m = mp_bits(r->p) - 1; |
| 518 | mp *t; |
| 519 | |
| 520 | /* --- About the solutions --- * |
| 521 | * |
| 522 | * Factor %$z^2 + z = z (z + 1)$%. Therefore, if %$z^2 + z = x$% and |
| 523 | * %$z' = z + 1$% then %$z'^2 + z' = z^2 + 1 + z + 1 = z^2 + z$%, so |
| 524 | * %$z + 1$% is the other solution. |
| 525 | * |
| 526 | * A solution exists if and only if %$\Tr(x) = 0$%. To see the `only if' |
| 527 | * implication, recall that the trace function is linear, and hence |
| 528 | * $%\Tr(z^2 + z) = \Tr(z)^2 + \Tr(z) = \Tr(z) + \Tr(z) = 0$%. The `if' |
| 529 | * direction will be proven using explicit constructions captured in the |
| 530 | * code below. |
| 531 | */ |
| 532 | |
| 533 | MP_COPY(x); |
| 534 | if (m & 1) { |
| 535 | |
| 536 | /* --- A short-cut for fields with odd degree --- |
| 537 | * |
| 538 | * The method below works in all binary fields, but there's a quicker way |
| 539 | * which works whenever the degree is odd. The half-trace is |
| 540 | * %$z = \sum_{0\le i\le (m-1)/2} x^{2^{2i}}$%. Then %$z^2 + z = {}$% |
| 541 | * %$\sum_{0\le i\le (m-1)/2} (x^{2^{2i}} + x^{2^{2i+1}}) = {}$% |
| 542 | * %$\Tr(x) + x^{2^m} = \Tr(x) + x$%. This therefore gives us the |
| 543 | * solution we want whenever %$\Tr(x) = 0$%. |
| 544 | */ |
| 545 | |
| 546 | d = gfreduce_halftrace(r, d, x); |
| 547 | } else { |
| 548 | mp *z, *w, *rho = MP_NEW; |
| 549 | mp *spare = MP_NEW; |
| 550 | grand *fr = fibrand_create(0); |
| 551 | unsigned long i; |
| 552 | |
| 553 | /* --- Unpicking the magic --- * |
| 554 | * |
| 555 | * Choose %$\rho \inr \gf{2^m}$% with %$\Tr(\rho) = 1$%. Let |
| 556 | * %$z = \sum_{0\le i<m} \rho^{2^i} \sum_{0\le j<i} x^{2^j} = {}$% |
| 557 | * %$\rho^2 x + \rho^4 (x + x^2) + \rho^8 (x + x^2 + x^4) + \cdots + {}$% |
| 558 | * %$\rho^{2^{m-1}} (x + x^2 + x^{2^{m-2}})$%. Then %$z^2 = {}$% |
| 559 | * %$\sum_{0\le i<m} \rho^{2^{i+1}} \sum_{0\le j<i} x^{2^{j+1}} = {}$% |
| 560 | * %$\sum_{1\le i\le m} \rho^{2^i} \sum_{1\le j\le i} x^{2^j}$% and, |
| 561 | * somewhat miraculously, %$z^2 + z = \sum_{0\le i<m} \rho^{2^i} x + {}$% |
| 562 | * %$\rho \sum_{1\le i<m} x^{2^i} = x \Tr(\rho) + \rho \Tr(x)$%. Again, |
| 563 | * this gives us the root we want whenever %$\Tr(x) = 0$%. |
| 564 | * |
| 565 | * The loop below calculates %$w = \Tr(\rho)$% and %$z$% simultaneously, |
| 566 | * since the same powers of %$\rho$% are wanted in both calculations. |
| 567 | */ |
| 568 | |
| 569 | for (;;) { |
| 570 | rho = mprand(rho, m, fr, 0); |
| 571 | z = MP_ZERO; |
| 572 | w = MP_COPY(rho); |
| 573 | for (i = 0; i < m - 1; i++) { |
| 574 | t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t); |
| 575 | t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t); |
| 576 | t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t; |
| 577 | z = gf_add(z, z, t); |
| 578 | w = gf_add(w, w, rho); |
| 579 | } |
| 580 | if (!MP_ZEROP(w)) |
| 581 | break; |
| 582 | MP_DROP(z); |
| 583 | MP_DROP(w); |
| 584 | } |
| 585 | if (d) MP_DROP(d); |
| 586 | MP_DROP(w); |
| 587 | MP_DROP(spare); |
| 588 | MP_DROP(rho); |
| 589 | fr->ops->destroy(fr); |
| 590 | d = z; |
| 591 | } |
| 592 | |
| 593 | /* --- Check that we calculated the right answer --- * |
| 594 | * |
| 595 | * It should be correct; if it's not then maybe the ring we're working in |
| 596 | * isn't really a field. |
| 597 | */ |
| 598 | |
| 599 | t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d); |
| 600 | if (!MP_EQ(t, x)) { |
| 601 | MP_DROP(d); |
| 602 | d = 0; |
| 603 | } |
| 604 | MP_DROP(t); |
| 605 | MP_DROP(x); |
| 606 | |
| 607 | /* --- Pick a canonical root --- * |
| 608 | * |
| 609 | * The two roots are %$z$% and %$z + 1$%; pick the one with a zero |
| 610 | * scalar coefficient just for consistency's sake. |
| 611 | */ |
| 612 | |
| 613 | if (d) d->v[0] &= ~(mpw)1; |
| 614 | return (d); |
| 615 | } |
| 616 | |
| 617 | /* --- @gfreduce_exp@ --- * |
| 618 | * |
| 619 | * Arguments: @const gfreduce *gr@ = pointer to reduction context |
| 620 | * @mp *d@ = fake destination |
| 621 | * @mp *a@ = base |
| 622 | * @mp *e@ = exponent |
| 623 | * |
| 624 | * Returns: Result, %$a^e \bmod m$%. |
| 625 | */ |
| 626 | |
| 627 | mp *gfreduce_exp(const gfreduce *gr, mp *d, mp *a, mp *e) |
| 628 | { |
| 629 | mp *x = MP_ONE; |
| 630 | mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW; |
| 631 | |
| 632 | MP_SHRINK(e); |
| 633 | MP_COPY(a); |
| 634 | if (MP_ZEROP(e)) |
| 635 | ; |
| 636 | else { |
| 637 | if (MP_NEGP(e)) |
| 638 | a = gf_modinv(a, a, gr->p); |
| 639 | if (MP_LEN(e) < EXP_THRESH) |
| 640 | EXP_SIMPLE(x, a, e); |
| 641 | else |
| 642 | EXP_WINDOW(x, a, e); |
| 643 | } |
| 644 | mp_drop(d); |
| 645 | mp_drop(a); |
| 646 | mp_drop(spare); |
| 647 | return (x); |
| 648 | } |
| 649 | |
| 650 | /*----- Test rig ----------------------------------------------------------*/ |
| 651 | |
| 652 | #ifdef TEST_RIG |
| 653 | |
| 654 | static int vreduce(dstr *v) |
| 655 | { |
| 656 | mp *d = *(mp **)v[0].buf; |
| 657 | mp *n = *(mp **)v[1].buf; |
| 658 | mp *r = *(mp **)v[2].buf; |
| 659 | mp *c; |
| 660 | int ok = 1; |
| 661 | gfreduce rr; |
| 662 | |
| 663 | gfreduce_create(&rr, d); |
| 664 | c = gfreduce_do(&rr, MP_NEW, n); |
| 665 | if (!MP_EQ(c, r)) { |
| 666 | fprintf(stderr, "\n*** reduction failed\n*** "); |
| 667 | gfreduce_dump(&rr, stderr); |
| 668 | fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16); |
| 669 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); |
| 670 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); |
| 671 | fprintf(stderr, "\n"); |
| 672 | ok = 0; |
| 673 | } |
| 674 | gfreduce_destroy(&rr); |
| 675 | mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c); |
| 676 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 677 | return (ok); |
| 678 | } |
| 679 | |
| 680 | static int vmodexp(dstr *v) |
| 681 | { |
| 682 | mp *p = *(mp **)v[0].buf; |
| 683 | mp *g = *(mp **)v[1].buf; |
| 684 | mp *x = *(mp **)v[2].buf; |
| 685 | mp *r = *(mp **)v[3].buf; |
| 686 | mp *c; |
| 687 | int ok = 1; |
| 688 | gfreduce rr; |
| 689 | |
| 690 | gfreduce_create(&rr, p); |
| 691 | c = gfreduce_exp(&rr, MP_NEW, g, x); |
| 692 | if (!MP_EQ(c, r)) { |
| 693 | fprintf(stderr, "\n*** modexp failed\n*** "); |
| 694 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); |
| 695 | fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16); |
| 696 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); |
| 697 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); |
| 698 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); |
| 699 | fprintf(stderr, "\n"); |
| 700 | ok = 0; |
| 701 | } |
| 702 | gfreduce_destroy(&rr); |
| 703 | mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c); |
| 704 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 705 | return (ok); |
| 706 | } |
| 707 | |
| 708 | static int vsqrt(dstr *v) |
| 709 | { |
| 710 | mp *p = *(mp **)v[0].buf; |
| 711 | mp *x = *(mp **)v[1].buf; |
| 712 | mp *r = *(mp **)v[2].buf; |
| 713 | mp *c; |
| 714 | int ok = 1; |
| 715 | gfreduce rr; |
| 716 | |
| 717 | gfreduce_create(&rr, p); |
| 718 | c = gfreduce_sqrt(&rr, MP_NEW, x); |
| 719 | if (!MP_EQ(c, r)) { |
| 720 | fprintf(stderr, "\n*** sqrt failed\n*** "); |
| 721 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); |
| 722 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); |
| 723 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); |
| 724 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); |
| 725 | fprintf(stderr, "\n"); |
| 726 | ok = 0; |
| 727 | } |
| 728 | gfreduce_destroy(&rr); |
| 729 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); |
| 730 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 731 | return (ok); |
| 732 | } |
| 733 | |
| 734 | static int vtr(dstr *v) |
| 735 | { |
| 736 | mp *p = *(mp **)v[0].buf; |
| 737 | mp *x = *(mp **)v[1].buf; |
| 738 | int r = *(int *)v[2].buf, c; |
| 739 | int ok = 1; |
| 740 | gfreduce rr; |
| 741 | |
| 742 | gfreduce_create(&rr, p); |
| 743 | c = gfreduce_trace(&rr, x); |
| 744 | if (c != r) { |
| 745 | fprintf(stderr, "\n*** trace failed\n*** "); |
| 746 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); |
| 747 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); |
| 748 | fprintf(stderr, "\n*** c = %d", c); |
| 749 | fprintf(stderr, "\n*** r = %d", r); |
| 750 | fprintf(stderr, "\n"); |
| 751 | ok = 0; |
| 752 | } |
| 753 | gfreduce_destroy(&rr); |
| 754 | mp_drop(p); mp_drop(x); |
| 755 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 756 | return (ok); |
| 757 | } |
| 758 | |
| 759 | static int vhftr(dstr *v) |
| 760 | { |
| 761 | mp *p = *(mp **)v[0].buf; |
| 762 | mp *x = *(mp **)v[1].buf; |
| 763 | mp *r = *(mp **)v[2].buf; |
| 764 | mp *c; |
| 765 | int ok = 1; |
| 766 | gfreduce rr; |
| 767 | |
| 768 | gfreduce_create(&rr, p); |
| 769 | c = gfreduce_halftrace(&rr, MP_NEW, x); |
| 770 | if (!MP_EQ(c, r)) { |
| 771 | fprintf(stderr, "\n*** halftrace failed\n*** "); |
| 772 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); |
| 773 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); |
| 774 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); |
| 775 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); |
| 776 | fprintf(stderr, "\n"); |
| 777 | ok = 0; |
| 778 | } |
| 779 | gfreduce_destroy(&rr); |
| 780 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); |
| 781 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 782 | return (ok); |
| 783 | } |
| 784 | |
| 785 | static int vquad(dstr *v) |
| 786 | { |
| 787 | mp *p = *(mp **)v[0].buf; |
| 788 | mp *x = *(mp **)v[1].buf; |
| 789 | mp *r = *(mp **)v[2].buf; |
| 790 | mp *c; |
| 791 | int ok = 1; |
| 792 | gfreduce rr; |
| 793 | |
| 794 | gfreduce_create(&rr, p); |
| 795 | c = gfreduce_quadsolve(&rr, MP_NEW, x); |
| 796 | if (!MP_EQ(c, r)) { |
| 797 | fprintf(stderr, "\n*** quadsolve failed\n*** "); |
| 798 | fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); |
| 799 | fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); |
| 800 | fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); |
| 801 | fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); |
| 802 | fprintf(stderr, "\n"); |
| 803 | ok = 0; |
| 804 | } |
| 805 | gfreduce_destroy(&rr); |
| 806 | mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); |
| 807 | assert(mparena_count(MPARENA_GLOBAL) == 0); |
| 808 | return (ok); |
| 809 | } |
| 810 | |
| 811 | static test_chunk defs[] = { |
| 812 | { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } }, |
| 813 | { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } }, |
| 814 | { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } }, |
| 815 | { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } }, |
| 816 | { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } }, |
| 817 | { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } }, |
| 818 | { 0, 0, { 0 } } |
| 819 | }; |
| 820 | |
| 821 | int main(int argc, char *argv[]) |
| 822 | { |
| 823 | test_run(argc, argv, defs, SRCDIR"/t/gfreduce"); |
| 824 | return (0); |
| 825 | } |
| 826 | |
| 827 | #endif |
| 828 | |
| 829 | /*----- That's all, folks -------------------------------------------------*/ |