From: Mark Wooding Date: Thu, 1 Feb 2024 14:32:42 +0000 (+0000) Subject: math/gfx-sqr.c: Use bithacking rather than a table for squaring. X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/d9d9e6456f3f6d03a0ae1d1eb78e6796ecdb7fab math/gfx-sqr.c: Use bithacking rather than a table for squaring. This gives a modest performance improvement to binary-curve scalar multiplication. --- diff --git a/math/Makefile.am b/math/Makefile.am index 8c53940c..26e6d91b 100644 --- a/math/Makefile.am +++ b/math/Makefile.am @@ -318,17 +318,6 @@ TESTS += gfx.t$(EXEEXT) libmath_la_SOURCES += gfx-kmul.c TESTS += gfx-kmul.t$(EXEEXT) libmath_la_SOURCES += gfx-sqr.c -nodist_libmath_la_SOURCES += ../precomp/math/gfx-sqrtab.c -PRECOMPS += $(precomp)/math/gfx-sqrtab.c -PRECOMP_PROGS += gfx-sqr-mktab -if !CROSS_COMPILING -$(precomp)/math/gfx-sqrtab.c: - $(AM_V_at)$(MKDIR_P) $(precomp)/math - $(AM_V_at)$(MAKE) gfx-sqr-mktab$(EXEEXT) - $(AM_V_GEN)./gfx-sqr-mktab >$(precomp)/math/gfx-sqrtab.c.new && \ - mv $(precomp)/math/gfx-sqrtab.c.new \ - $(precomp)/math/gfx-sqrtab.c -endif TESTS += gfx-sqr.t$(EXEEXT) EXTRA_DIST += t/gfx diff --git a/math/gfx-sqr-mktab.c b/math/gfx-sqr-mktab.c deleted file mode 100644 index efc610ac..00000000 --- a/math/gfx-sqr-mktab.c +++ /dev/null @@ -1,86 +0,0 @@ -/* -*-c-*- - * - * Build table for squaring of binary polynomials - * - * (c) 2000 Straylight/Edgeware - */ - -/*----- Licensing notice --------------------------------------------------* - * - * This file is part of Catacomb. - * - * Catacomb is free software; you can redistribute it and/or modify - * it under the terms of the GNU Library General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * Catacomb is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with Catacomb; if not, write to the Free - * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - */ - -/*----- Header files ------------------------------------------------------*/ - -#include -#include - -#include - -/*----- Main code ---------------------------------------------------------*/ - -static void mktab(uint16 *t) -{ - unsigned i, j, x; - - for (i = 0; i < 256; i++) { - x = 0; - for (j = 0; j < 8; j++) { - if (i & (1 << j)) - x |= 1 << (2 * j); - } - t[i] = x; - } -} - -int main(void) -{ - uint16 t[256]; - unsigned i; - - mktab(t); -fputs("\ -/* -*-c-*-\n\ - *\n\ - * Bit spacing table for binary polynomial squaring\n\ - */\n\ -\n\ -#include \n\ -\n\ -const uint16 gfx_sqrtab[256] = {\n\ - ", stdout); - - for (i = 0; i < 256; i++) { - printf("0x%04x", t[i]); - if (i == 255) - puts("\n};"); - else if (i % 8 == 7) - fputs(",\n ", stdout); - else - fputs(", ", stdout); - } - - if (fclose(stdout)) { - fprintf(stderr, "error writing data\n"); - exit(EXIT_FAILURE); - } - - return (0); -} - -/*----- That's all, folks -------------------------------------------------*/ diff --git a/math/gfx-sqr.c b/math/gfx-sqr.c index ba54da88..0192738b 100644 --- a/math/gfx-sqr.c +++ b/math/gfx-sqr.c @@ -29,10 +29,7 @@ #include "mpx.h" #include "gfx.h" - -/*----- Static variables --------------------------------------------------*/ - -extern const uint16 gfx_sqrtab[256]; +#include "permute.h" /*----- Main code ---------------------------------------------------------*/ @@ -48,8 +45,25 @@ extern const uint16 gfx_sqrtab[256]; void gfx_sqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) { - mpd a = 0, aa = 0; - unsigned b = 0, bb = 0; +#if MPW_BITS <= 16 +# define REGWD 16 +#elif MPW_BITS <= 32 +# define REGWD 32 +#elif MPW_BITS <= 64 +# define REGWD 64 +#elif MPW_BITS <= 128 +# define REGWD 128 +#else +# error "unsupported limb width: extend `gfx-sqr.c'" +#endif + +#if MPW_BITS == REGWD + typedef mpw regty; +#else + typedef mpd regty; +#endif + + regty a, t; /* --- Simple stuff --- */ @@ -62,76 +76,51 @@ void gfx_sqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) * Our method depends on the fact that, in a field of characteristic 2, we * have that %$(a + b)^2 = a^2 + b^2$%. Thus, to square a polynomial, it's * sufficient just to put a zero bit between each of the bits of the - * original argument. We use a precomputed table for this, and work on - * entire octets at a time. Life is more complicated because we've got to - * be careful of bizarre architectures which don't have words with a - * multiple of 8 bits in them. + * original argument. */ - for (;;) { - - /* --- Input buffering --- */ - - if (b < 8) { - if (av >= avl) - break; - a |= *av++ << b; - b += MPW_BITS; - } - - /* --- Do the work in the middle --- */ - - aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb; - bb += 16; - a >>= 8; - b -= 8; - - /* --- Output buffering --- */ - - if (bb >= MPW_BITS) { - *dv++ = MPW(aa); - if (dv >= dvl) - return; - aa >>= MPW_BITS; - bb -= MPW_BITS; - } - } - - /* --- Flush the input buffer --- */ - - if (b) for (;;) { - aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb; - bb += 16; - if (bb > MPW_BITS) { - *dv++ = MPW(aa); - if (dv >= dvl) - return; - aa >>= MPW_BITS; - bb -= MPW_BITS; - } - a >>= 8; - if (b <= 8) - break; - else - b -= 8; - } + while (av < avl) { + a = *av++; + /* ..., 7, 6, 5, 4, 3, 2, 1, 0 */ + SWIZZLE_EXCH(a, 0, 1); /* ..., 7, 6, 5, 4, 3, 2, 0, 1 */ + SWIZZLE_EXCH(a, 0, 2); /* ..., 7, 6, 5, 4, 3, 1, 0, 2 */ + SWIZZLE_EXCH(a, 0, 3); /* ..., 7, 6, 5, 4, 2, 1, 0, 3 */ +#if MPW_BITS > 16 + SWIZZLE_EXCH(a, 0, 4); /* ..., 7, 6, 5, 3, 2, 1, 0, 4 */ +#endif +#if MPW_BITS > 32 + SWIZZLE_EXCH(a, 0, 5); /* ..., 7, 6, 4, 3, 2, 1, 0, 5 */ +#endif +#if MPW_BITS > 64 + SWIZZLE_EXCH(a, 0, 6); /* ..., 7, 5, 4, 3, 2, 1, 0, 6 */ +#endif +#if MPW_BITS > 128 +# error "unsupported limb width: extend `gfx-sqr.c'" +#endif - /* --- Flush the output buffer --- */ - - if (bb) for (;;) { - *dv++ = MPW(aa); - if (dv >= dvl) - return; - aa >>= MPW_BITS; - if (bb <= MPW_BITS) - break; - else - bb -= MPW_BITS; + /* Write out the low half, which consists of the low-order bits in + * even-numbered positions. + */ + *dv++ = MPW(a&IXMASK(0)); if (dv >= dvl) break; + + /* Write the high half. This is trickier, since, in general, the bits + * are split across even-numbered bits in the high part of the wider + * register and odd-numbered bits in the low part. Alas, we can't + * resolve this mess without special cases because shifts are broken in + * C. + */ + t = ((a >> 1)&IXMASK(0)) << (REGWD - MPW_BITS); +#if MPW_BITS < REGWD + t |= (a&IXMASK(0)) >> MPW_BITS; +#endif + *dv++ = MPW(t); if (dv >= dvl) break; } /* --- Zero the rest of everything --- */ MPX_ZERO(dv, dvl); + +#undef REGWD } /*----- Test rig ----------------------------------------------------------*/