X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/ba6e6b64033b1f9de49feccb5c9cd438354481f7..0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a:/math/mpx-ksqr.c diff --git a/math/mpx-ksqr.c b/math/mpx-ksqr.c new file mode 100644 index 0000000..e96040b --- /dev/null +++ b/math/mpx-ksqr.c @@ -0,0 +1,211 @@ +/* -*-c-*- + * + * Karatsuba-based squaring algorithm + * + * (c) 1999 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include + +#include "mpx.h" +#include "karatsuba.h" + +/*----- Tweakables --------------------------------------------------------*/ + +#ifdef TEST_RIG +# undef MPK_THRESH +# define MPK_THRESH 4 +#endif + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @mpx_ksqr@ --- * + * + * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer + * @const mpw *av, *avl@ = pointer to first argument + * @mpw *sv, *svl@ = pointer to scratch workspace + * + * Returns: --- + * + * Use: Squares a multiprecision integers using something similar to + * Karatsuba's multiplication algorithm. This is rather faster + * than traditional long multiplication (e.g., @mpx_umul@) on + * large numbers, although more expensive on small ones, and + * rather simpler than full-blown Karatsuba multiplication. + * + * The destination must be three times as large as the larger + * argument. The scratch space must be five times as large as + * the larger argument. + */ + +void mpx_ksqr(mpw *dv, mpw *dvl, + const mpw *av, const mpw *avl, + mpw *sv, mpw *svl) +{ + const mpw *avm; + size_t m; + + /* --- Dispose of easy cases to @mpx_usqr@ --- * + * + * Karatsuba is only a win on large numbers, because of all the + * recursiveness and bookkeeping. The recursive calls make a quick check + * to see whether to bottom out to @mpx_usqr@ which should help quite a + * lot, but sometimes the only way to know is to make sure... + */ + + MPX_SHRINK(av, avl); + + if (avl - av <= MPK_THRESH) { + mpx_usqr(dv, dvl, av, avl); + return; + } + + /* --- How the algorithm works --- * + * + * The identity for squaring is known to all schoolchildren. + * Let %$A = xb + y$%. Then %$A^2 = x^2 b^2 + 2 x y b + y^2$%. Now, + * %$(x + y)^2 - x^2 - y^2 = 2 x y$%, which means I only need to do three + * squarings. + */ + + /* --- First things --- * + * + * Sort out where to break the factor in half. + */ + + m = (avl - av + 1) >> 1; + avm = av + m; + + /* --- Sort out everything --- */ + + { + mpw *svm = sv + m, *svn = svm + m, *ssv = svn + 4; + mpw *tdv = dv + m; + mpw *rdv = tdv + m; + + assert(rdv + m + 4 < dvl); + assert(ssv < svl); + UADD2(sv, svm, av, avm, avm, avl); + if (m > MPK_THRESH) + mpx_ksqr(tdv, rdv + m + 4, sv, svm + 1, ssv, svl); + else + mpx_usqr(tdv, rdv + m + 4, sv, svm + 1); + + if (m > MPK_THRESH) + mpx_ksqr(sv, ssv, avm, avl, ssv, svl); + else + mpx_usqr(sv, ssv, avm, avl); + MPX_COPY(rdv + m + 1, dvl, svm + 1, svn); + UADD(rdv, sv, svm + 1); + USUB(tdv, sv, svn); + + if (m > MPK_THRESH) + mpx_ksqr(sv, ssv, av, avm, ssv, svl); + else + mpx_usqr(sv, ssv, av, avm); + MPX_COPY(dv, tdv, sv, svm); + UADD(tdv, svm, svn); + USUB(tdv, sv, svn); + } +} + +/*----- Test rig ----------------------------------------------------------*/ + +#ifdef TEST_RIG + +#include +#include + +#define ALLOC(v, vl, sz) do { \ + size_t _sz = (sz); \ + mpw *_vv = xmalloc(MPWS(_sz)); \ + mpw *_vvl = _vv + _sz; \ + (v) = _vv; \ + (vl) = _vvl; \ +} while (0) + +#define LOAD(v, vl, d) do { \ + const dstr *_d = (d); \ + mpw *_v, *_vl; \ + ALLOC(_v, _vl, MPW_RQ(_d->len)); \ + mpx_loadb(_v, _vl, _d->buf, _d->len); \ + (v) = _v; \ + (vl) = _vl; \ +} while (0) + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +static void dumpmp(const char *msg, const mpw *v, const mpw *vl) +{ + fputs(msg, stderr); + MPX_SHRINK(v, vl); + while (v < vl) + fprintf(stderr, " %08lx", (unsigned long)*--vl); + fputc('\n', stderr); +} + +static int usqr(dstr *v) +{ + mpw *a, *al; + mpw *c, *cl; + mpw *d, *dl; + mpw *s, *sl; + size_t m; + int ok = 1; + + LOAD(a, al, &v[0]); + LOAD(c, cl, &v[1]); + m = al - a + 1; + ALLOC(d, dl, 3 * m); + ALLOC(s, sl, 5 * m); + + mpx_ksqr(d, dl, a, al, s, sl); + if (!mpx_ueq(d, dl, c, cl)) { + fprintf(stderr, "\n*** usqr failed\n"); + dumpmp(" a", a, al); + dumpmp("expected", c, cl); + dumpmp(" result", d, dl); + ok = 0; + } + + xfree(a); xfree(c); xfree(d); xfree(s); + return (ok); +} + +static test_chunk defs[] = { + { "usqr", usqr, { &type_hex, &type_hex, 0 } }, + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + test_run(argc, argv, defs, SRCDIR"/t/mpx"); + return (0); +} + +#endif + +/*----- That's all, folks -------------------------------------------------*/