From 7ba379a677058c8f48bf4f98631db4cf63cc56a2 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 26 May 2016 09:26:09 +0100 Subject: [PATCH] math/: Add low-level testing for accelerated `mpx-mul4' multiplier. --- math/Makefile.am | 15 ++- math/mpx-mul4-test.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++ math/t/mul4 | 69 ++++++++++++ 3 files changed, 373 insertions(+), 2 deletions(-) create mode 100644 math/mpx-mul4-test.c create mode 100644 math/t/mul4 diff --git a/math/Makefile.am b/math/Makefile.am index 0afee1f1..804ffbb0 100644 --- a/math/Makefile.am +++ b/math/Makefile.am @@ -182,11 +182,22 @@ noinst_PROGRAMS += bittest TESTS += bittest EXTRA_DIST += t/mpx if CPUFAM_X86 -libmath_la_SOURCES += mpx-mul4-x86-sse2.S +MPX_MUL4_SOURCES = mpx-mul4-x86-sse2.S +check_PROGRAMS += mpx-mul4.t +TESTS += mpx-mul4.t$(EXEEXT) endif if CPUFAM_AMD64 -libmath_la_SOURCES += mpx-mul4-amd64-sse2.S +MPX_MUL4_SOURCES = mpx-mul4-amd64-sse2.S +check_PROGRAMS += mpx-mul4.t +TESTS += mpx-mul4.t$(EXEEXT) endif +libmath_la_SOURCES += $(MPX_MUL4_SOURCES) + +mpx_mul4_t_SOURCES = mpx-mul4-test.c $(MPX_MUL4_SOURCES) +mpx_mul4_t_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -DTEST_MUL4 -DSRCDIR="\"$(srcdir)\"" +mpx_mul4_t_LDADD = $(top_builddir)/libcatacomb.la $(mLib_LIBS) ## A quick-and-dirty parser, used for parsing descriptions of groups, fields, ## etc. diff --git a/math/mpx-mul4-test.c b/math/mpx-mul4-test.c new file mode 100644 index 00000000..883c4fcc --- /dev/null +++ b/math/mpx-mul4-test.c @@ -0,0 +1,291 @@ +/* -*-c-*- + * + * Testing optimized 128-bit multipliers + * + * (c) 2017 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include "config.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "dispatch.h" +#include "mp.h" +#include "mpmont.h" +#include "mptext.h" + +/*----- CPU feature detection ---------------------------------------------*/ + +#if CPUFAM_X86 +# define VARIANT _x86_sse2 +# define REPR_32 +static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } +#endif + +#if CPUFAM_AMD64 +# define VARIANT _amd64_sse2 +# define REPR_32 +static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } +#endif + +#ifndef VARIANT +# error "Unsupported CPU family." +#endif + +#ifdef REPR_32 +# define NWBY 4 +# define NDBY 8 +# define LDW LOAD32 +# define LDD LOAD64 +# define STW STORE32 +# define STD STORE64 +typedef struct { mpw w[4]; } p128; +typedef struct { mpw w[8]; } x128; +typedef struct { mpd w[6]; } carry; +#endif + +/*----- Test operation table ----------------------------------------------*/ + +#define TESTOPS(_) \ + /* a c u x v y z' y' c' */ \ + _(dmul4, NIL, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ + _(dmla4, P128, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ + _(mul4, NIL, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ + _(mla4, P128, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ + _(mmul4, NIL, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ + _(mmla4, P128, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ + _(mont4, P128, NIL, NIL, P128, NIL, P128, P128, X128, CARRY) + +/*----- Assembler test interface ------------------------------------------*/ + +#define EMPTY + +#define PARAM(v, ty) ty *v, +#define PARAM_NIL(v, q) +#define PARAM_P128(v, q) PARAM(v, q p128) +#define PARAM_X128(v, q) PARAM(v, q x128) +#define PARAM_CARRY(v, q) PARAM(v, q carry) + +#define DECLSTUB(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ + extern void test_##fn(PARAM_##tyzz(zz, EMPTY) PARAM_##tycc(cc, EMPTY) \ + PARAM_##tyyy(yy, EMPTY) \ + PARAM_##tyu(u, const) PARAM_##tyx(x, const) \ + PARAM_##tyv(v, const) PARAM_##tyy(y, const) \ + unsigned n, unsigned long long *cyv); +TESTOPS(DECLSTUB) + +/*----- Conversion functions ----------------------------------------------*/ + +#define DEFTYPE(ty, ld, st, nby) \ + \ + static void cvt_##ty(const char *buf, dstr *d) \ + { \ + dstr dd = DSTR_INIT; \ + int i; \ + ty *x; \ + const octet *p; \ + \ + type_hex.cvt(buf, &dd); \ + if (dd.len != N(x->w)*nby) die(1, "invalid length for " #ty); \ + dstr_ensure(d, sizeof(*x)); \ + x = (ty *)d->buf; p = (const octet *)dd.buf; \ + for (i = 0; i < N(x->w); i++) { x->w[i] = ld(p); p += nby; } \ + dstr_destroy(&dd); \ + } \ + \ + static void dump_##ty(dstr *d, FILE *fp) \ + { \ + dstr dd = DSTR_INIT; \ + int i; \ + const ty *x = (const ty *)d->buf; \ + octet *p; \ + \ + dstr_ensure(&dd, N(x->w)*nby); p = (octet *)dd.buf; \ + for (i = 0; i < N(x->w); i++) { st(p, x->w[i]); p += nby; } \ + dd.len = N(x->w)*nby; \ + type_hex.dump(&dd, fp); \ + dstr_destroy(&dd); \ + } \ + \ + static int eq_##ty(const ty *x, const ty *y) \ + { \ + int i; \ + \ + for (i = 0; i < N(x->w); i++) \ + if (x->w[i] != y->w[i]) return (0); \ + return (1); \ + } \ + \ + static const struct test_type type_##ty = { cvt_##ty, dump_##ty }; + +DEFTYPE(p128, LDW, STW, NWBY) +DEFTYPE(x128, LDW, STW, NWBY) +DEFTYPE(carry, LDD, STD, NDBY) + +/*----- Test functions ----------------------------------------------------*/ + +#define DECL_IN(v, ty) \ + dstr *d_##v = dp++; const ty *v = (const ty *)d_##v->buf; +#define DECL_IN_NIL(v) +#define DECL_IN_P128(v) DECL_IN(v, p128) +#define DECL_IN_X128(v) DECL_IN(v, x128) +#define DECL_IN_CARRY(v) DECL_IN(v, carry) + +#define DECL_OUT(v, ty) \ + dstr dd_##v = DSTR_INIT, *d_##v = &dd_##v; ty *v; +#define DECL_OUT_NIL(v) +#define DECL_OUT_P128(v) DECL_OUT(v, p128) +#define DECL_OUT_X128(v) DECL_OUT(v, x128) +#define DECL_OUT_CARRY(v) DECL_OUT(v, carry) + +#define INIT_OUT(v, ty) \ + dstr_ensure(d_##v, sizeof(ty)); v = (ty *)d_##v->buf; +#define INIT_OUT_NIL(v) +#define INIT_OUT_P128(v) INIT_OUT(v, p128) +#define INIT_OUT_X128(v) INIT_OUT(v, x128) +#define INIT_OUT_CARRY(v) INIT_OUT(v, carry) + +#define ARG(v) , v +#define ARG_NIL(v) +#define ARG_P128(v) ARG(v) +#define ARG_X128(v) ARG(v) +#define ARG_CARRY(v) ARG(v) + +#define CHECK(v, vv, ty) if (!eq_##ty(v, vv)) ok = 0; +#define CHECK_NIL(v, vv) +#define CHECK_P128(v, vv) CHECK(v, vv, p128) +#define CHECK_X128(v, vv) CHECK(v, vv, x128) +#define CHECK_CARRY(v, vv) CHECK(v, vv, carry) + +#define DUMP(v, ty) \ + fprintf(stderr, "\n\t%-6s = ", #v); dump_##ty(d_##v, stderr); +#define DUMP_NIL(v) +#define DUMP_P128(v) DUMP(v, p128) +#define DUMP_X128(v) DUMP(v, x128) +#define DUMP_CARRY(v) DUMP(v, carry) + +#define COPY(v, vv, ty) *v = *vv; +#define COPY_NIL(v, vv) +#define COPY_P128(v, vv) COPY(v, vv, p128) +#define COPY_X128(v, vv) COPY(v, vv, x128) +#define COPY_CARRY(v, vv) COPY(v, vv, carry) + +#define FREE_OUT(v, ty) dstr_destroy(d_##v); +#define FREE_OUT_NIL(v) +#define FREE_OUT_P128(v) FREE_OUT(v, p128) +#define FREE_OUT_X128(v) FREE_OUT(v, x128) +#define FREE_OUT_CARRY(v) FREE_OUT(v, carry) + +#define DEFTESTFN(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ + \ + static int v##fn(dstr dv[]) \ + { \ + dstr *dp = dv; \ + DECL_IN_##tya(a) \ + DECL_IN_##tyc(c) \ + DECL_IN_##tyu(u) \ + DECL_IN_##tyx(x) \ + DECL_IN_##tyv(v) \ + DECL_IN_##tyy(y) \ + DECL_IN_##tyzz(zz_exp) \ + DECL_IN_##tyyy(yy_exp) \ + DECL_IN_##tycc(cc_exp) \ + DECL_OUT_##tyzz(zz_out) \ + DECL_OUT_##tyyy(yy_out) \ + DECL_OUT_##tycc(cc_out) \ + unsigned long long cyv[1]; \ + int ok = 1; \ + \ + INIT_OUT_##tyzz(zz_out) \ + INIT_OUT_##tyyy(yy_out) \ + INIT_OUT_##tycc(cc_out) \ + \ + COPY_##tya(zz_out, a); \ + COPY_##tyc(cc_out, c); \ + test_##fn(zz_out ARG_##tycc(cc_out) ARG_##tyyy(yy_out) \ + ARG_##tyu(u) ARG_##tyx(x) ARG_##tyv(v) ARG_##tyy(y), \ + 1, cyv); \ + CHECK_##tyzz(zz_exp, zz_out) \ + CHECK_##tyyy(yy_exp, yy_out) \ + CHECK_##tycc(cc_exp, cc_out) \ + \ + if (!ok) { \ + fputs(#fn " failed", stderr); \ + DUMP_##tya(a) \ + DUMP_##tyc(c) \ + DUMP_##tyu(u) \ + DUMP_##tyx(x) \ + DUMP_##tyv(v) \ + DUMP_##tyy(y) \ + DUMP_##tyzz(zz_exp) \ + DUMP_##tyzz(zz_out) \ + DUMP_##tyyy(yy_exp) \ + DUMP_##tyyy(yy_out) \ + DUMP_##tycc(cc_exp) \ + DUMP_##tycc(cc_out) \ + fputc('\n', stderr); \ + } \ + \ + FREE_OUT_##tyzz(zz_out); \ + FREE_OUT_##tyyy(yy_out); \ + FREE_OUT_##tycc(cc_out); \ + \ + return (ok); \ + } + +TESTOPS(DEFTESTFN) + +/*----- Main code ---------------------------------------------------------*/ + +#define NIL +#define P128 &type_p128, +#define X128 &type_x128, +#define CARRY &type_carry, + +static test_chunk tests[] = { +#define DEFCHUNK(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ + { #fn, v##fn, { tya tyc tyu tyx tyv tyy tyzz tyyy tycc } }, + TESTOPS(DEFCHUNK) +#undef DEFCHUNK + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + sub_init(); + if (!cpu_features_p()) + { fprintf(stderr, "required cpu feature not available\n"); exit(77); } + test_run(argc, argv, tests, SRCDIR "/t/mul4"); + return (0); +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/math/t/mul4 b/math/t/mul4 new file mode 100644 index 00000000..1373215a --- /dev/null +++ b/math/t/mul4 @@ -0,0 +1,69 @@ +# Test vectors for accelerated multiplication + +dmul4 { + 0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c + c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u + e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x + d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v + f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y + d4356fa018c7f681e0be24efecdaf6e0 # zz + 0004bb142333e4e00004c56cb3ac322d000335ca0eb0310000033cbfe475dfd00001a2236db667a00001a5a668a94f10; ## cc +} + +dmla4 { + b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a + 0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c + c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u + e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x + d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v + f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y + 87e82150d07eac369c78dea7ac99b49d # zz + 0004bb142333e4e10004c56cb3ac322d000335ca0eb0310000033cbfe475dfd00001a2236db667a00001a5a668a94f10; # cc +} + +mul4 { + 0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c + e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x + f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y + 964a43a0b812545cd3c4a34a69e3ec23 # zz + 0002b2f3db03f8310002b880e3fffed70001d457394991000001d812a4ace8a80000ee0b505470500000efed0e0e2428; ## cc +} + +mla4 { + b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a + 0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c + e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x + f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y + 49fcf5506fc90a118f7f5d0329a2a9e0 # zz + 0002b2f3db03f8320002b880e3fffed70001d457394991000001d812a4ace8a80000ee0b505470500000efed0e0e2428; ## cc +} + +mmul4 { + c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u + acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n + d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v + 546f97b132b6ca1d10d519b5ca6ab8a9 # m + 00000000000000000000000000000000 # zz + 00006c00000012ad00009a8d0000630c0000f0840000979d000077a400000caa # yy + 0003126be83bdbf40002a05c4867918e000259dfe01b01770001b7e463bf6b7a00011339f770da470000bdab9990cf26; # cc +} + +mmla4 { + b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a + c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u + acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n + d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v + 546f97b132b6ca1d10d519b5ca6ab8a9 # m + 00000000000000000000000000000000 # zz + 000016b00000d85500000b390000507000008de20000754b000057700000c5db # yy + 000338658ad352110002f9fbc6cd85d5000205e99c5e20d300021acac7b997550000fdb10c111c11000131df2708bb59; # cc +} + +mont4 { + b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a + acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n + 546f97b132b6ca1d10d519b5ca6ab8a9 # m + 00000000000000000000000000000000 # zz + 0000aab00000c5a7000070ab0000ed6400009d5d0000ddad0000dfcb0000b930 # yy + 0001734705fa761d00019ee57a6290e40000f14fc045d61200010386c155e29100008b1816a19f2700007432ecd64990; # cc +} -- 2.11.0