X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/d1c733526062c5e1a2fb87edc35ebbaa4f9aa0fc..dd22938ef0d9b0131dad9171a8a95866ceec9607:/mpx-ksqr.c diff --git a/mpx-ksqr.c b/mpx-ksqr.c index 25dbb70..8a0ad64 100644 --- a/mpx-ksqr.c +++ b/mpx-ksqr.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: mpx-ksqr.c,v 1.6 2000/10/08 15:48:35 mdw Exp $ + * $Id: mpx-ksqr.c,v 1.7 2002/10/09 00:36:03 mdw Exp $ * * Karatsuba-based squaring algorithm * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: mpx-ksqr.c,v $ + * Revision 1.7 2002/10/09 00:36:03 mdw + * Fix bounds on workspace for Karatsuba operations. + * * Revision 1.6 2000/10/08 15:48:35 mdw * Rename Karatsuba constants now that we have @gfx_kmul@ too. * @@ -64,7 +67,7 @@ #ifdef TEST_RIG # undef MPK_THRESH -# define MPK_THRESH 2 +# define MPK_THRESH 4 #endif /*----- Main code ---------------------------------------------------------*/ @@ -83,9 +86,9 @@ * large numbers, although more expensive on small ones, and * rather simpler than full-blown Karatsuba multiplication. * - * The destination must be twice as large as the argument. The - * scratch space must be twice as large as the argument, plus - * the magic number @MPK_SLOP@. + * The destination must be three times as large as the larger + * argument. The scratch space must be five times as large as + * the larger argument. */ void mpx_ksqr(mpw *dv, mpw *dvl, @@ -126,11 +129,6 @@ void mpx_ksqr(mpw *dv, mpw *dvl, m = (avl - av + 1) >> 1; avm = av + m; - assert(((void)"Destination too small for Karatsuba square", - dvl - dv >= 4 * m)); - assert(((void)"Not enough workspace for Karatsuba square", - svl - sv >= 4 * m)); - /* --- Sort out everything --- */ { @@ -138,6 +136,8 @@ void mpx_ksqr(mpw *dv, mpw *dvl, mpw *tdv = dv + m; mpw *rdv = tdv + m; + assert(rdv + m + 4 < dvl); + assert(ssv < svl); UADD2(sv, svm, av, avm, avm, avl); if (m > MPK_THRESH) mpx_ksqr(tdv, rdv + m + 4, sv, svm + 1, ssv, svl); @@ -209,8 +209,8 @@ static int usqr(dstr *v) LOAD(a, al, &v[0]); LOAD(c, cl, &v[1]); m = al - a + 1; - ALLOC(d, dl, 2 * m); - ALLOC(s, sl, 2 * m + 32); + ALLOC(d, dl, 3 * m); + ALLOC(s, sl, 5 * m); mpx_ksqr(d, dl, a, al, s, sl); if (!mpx_ueq(d, dl, c, cl)) {