X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/ba6e6b64033b1f9de49feccb5c9cd438354481f7..0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a:/math/gfx-kmul.c diff --git a/math/gfx-kmul.c b/math/gfx-kmul.c new file mode 100644 index 0000000..5a5838a --- /dev/null +++ b/math/gfx-kmul.c @@ -0,0 +1,247 @@ +/* -*-c-*- + * + * Karatsuba's multiplication algorithm on binary polynomials + * + * (c) 2000 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include + +#include "gfx.h" +#include "karatsuba.h" + +/*----- Tweakables --------------------------------------------------------*/ + +#ifdef TEST_RIG +# undef GFK_THRESH +# define GFK_THRESH 1 +#endif + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @gfx_kmul@ --- * + * + * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer + * @const mpw *av, *avl@ = pointer to first argument + * @const mpw *bv, *bvl@ = pointer to second argument + * @mpw *sv, *svl@ = pointer to scratch workspace + * + * Returns: --- + * + * Use: Multiplies two binary polynomials using Karatsuba's + * algorithm. This is rather faster than traditional long + * multiplication (e.g., @gfx_umul@) on polynomials with large + * degree, although more expensive on small ones. + * + * The destination must be twice as large as the larger + * argument. The scratch space must be twice as large as the + * larger argument. + */ + +void gfx_kmul(mpw *dv, mpw *dvl, + const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, + mpw *sv, mpw *svl) +{ + const mpw *avm, *bvm; + size_t m; + + /* --- Dispose of easy cases to @mpx_umul@ --- * + * + * Karatsuba is only a win on large numbers, because of all the + * recursiveness and bookkeeping. The recursive calls make a quick check + * to see whether to bottom out to @gfx_umul@ which should help quite a + * lot, but sometimes the only way to know is to make sure... + */ + + MPX_SHRINK(av, avl); + MPX_SHRINK(bv, bvl); + + if (avl - av <= GFK_THRESH || bvl - bv <= GFK_THRESH) { + gfx_mul(dv, dvl, av, avl, bv, bvl); + return; + } + + /* --- How the algorithm works --- * + * + * Let %$A = xb + y$% and %$B = ub + v$%. Then, simply by expanding, + * %$AB = x u b^2 + b(x v + y u) + y v$%. That's not helped any, because + * I've got four multiplications, each four times easier than the one I + * started with. However, note that I can rewrite the coefficient of %$b$% + * as %$xv + yu = (x + y)(u + v) - xu - yv$%. The terms %$xu$% and %$yv$% + * I've already calculated, and that leaves only one more multiplication to + * do. So now I have three multiplications, each four times easier, and + * that's a win. + */ + + /* --- First things --- * + * + * Sort out where to break the factors in half. I'll choose the midpoint + * of the larger one, since this minimizes the amount of work I have to do + * most effectively. + */ + + if (avl - av > bvl - bv) { + m = (avl - av + 1) >> 1; + avm = av + m; + if (bvl - bv > m) + bvm = bv + m; + else + bvm = bvl; + } else { + m = (bvl - bv + 1) >> 1; + bvm = bv + m; + if (avl - av > m) + avm = av + m; + else + avm = avl; + } + + /* --- Sort out the middle term --- */ + + { + mpw *bsv = sv + m, *ssv = bsv + m; + mpw *rdv = dv + m, *rdvl = rdv + 2 * m; + + assert(rdvl <= dvl); + assert(ssv <= svl); + UXOR2(sv, bsv, av, avm, avm, avl); + UXOR2(bsv, ssv, bv, bvm, bvm, bvl); + if (m > GFK_THRESH) + gfx_kmul(rdv, rdvl, sv, bsv, bsv, ssv, ssv, svl); + else + gfx_mul(rdv, rdvl, sv, bsv, bsv, ssv); + } + + /* --- Sort out the other two terms --- */ + + { + mpw *svm = sv + m, *ssv = svm + m; + mpw *tdv = dv + m; + mpw *rdv = tdv + m; + + if (avl == avm || bvl == bvm) + MPX_ZERO(rdv + m, dvl); + else { + if (m > GFK_THRESH) + gfx_kmul(sv, ssv, avm, avl, bvm, bvl, ssv, svl); + else + gfx_mul(sv, ssv, avm, avl, bvm, bvl); + MPX_COPY(rdv + m, dvl, svm, ssv); + UXOR(rdv, sv, svm); + UXOR(tdv, sv, ssv); + } + + if (m > GFK_THRESH) + gfx_kmul(sv, ssv, av, avm, bv, bvm, ssv, svl); + else + gfx_mul(sv, ssv, av, avm, bv, bvm); + MPX_COPY(dv, tdv, sv, svm); + UXOR(tdv, sv, ssv); + UXOR(tdv, svm, ssv); + } +} + +/*----- Test rig ----------------------------------------------------------*/ + +#ifdef TEST_RIG + +#include +#include + +#define ALLOC(v, vl, sz) do { \ + size_t _sz = (sz); \ + mpw *_vv = xmalloc(MPWS(_sz)); \ + mpw *_vvl = _vv + _sz; \ + (v) = _vv; \ + (vl) = _vvl; \ +} while (0) + +#define LOAD(v, vl, d) do { \ + const dstr *_d = (d); \ + mpw *_v, *_vl; \ + ALLOC(_v, _vl, MPW_RQ(_d->len)); \ + mpx_loadb(_v, _vl, _d->buf, _d->len); \ + (v) = _v; \ + (vl) = _vl; \ +} while (0) + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +static void dumpmp(const char *msg, const mpw *v, const mpw *vl) +{ + fputs(msg, stderr); + MPX_SHRINK(v, vl); + while (v < vl) + fprintf(stderr, " %08lx", (unsigned long)*--vl); + fputc('\n', stderr); +} + +static int mul(dstr *v) +{ + mpw *a, *al; + mpw *b, *bl; + mpw *c, *cl; + mpw *d, *dl; + mpw *s, *sl; + size_t m; + int ok = 1; + + LOAD(a, al, &v[0]); + LOAD(b, bl, &v[1]); + LOAD(c, cl, &v[2]); + m = MAX(al - a, bl - b) + 1; + ALLOC(d, dl, 2 * m); + ALLOC(s, sl, 2 * m); + + gfx_kmul(d, dl, a, al, b, bl, s, sl); + if (!mpx_ueq(d, dl, c, cl)) { + fprintf(stderr, "\n*** mul failed\n"); + dumpmp(" a", a, al); + dumpmp(" b", b, bl); + dumpmp("expected", c, cl); + dumpmp(" result", d, dl); + ok = 0; + } + + xfree(a); xfree(b); xfree(c); xfree(d); xfree(s); + return (ok); +} + +static test_chunk defs[] = { + { "mul", mul, { &type_hex, &type_hex, &type_hex, 0 } }, + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + test_run(argc, argv, defs, SRCDIR"/t/gfx"); + return (0); +} + +#endif + +/*----- That's all, folks -------------------------------------------------*/