math/: Add low-level testing for accelerated `mpx-mul4' multiplier.
authorMark Wooding <mdw@distorted.org.uk>
Thu, 26 May 2016 08:26:09 +0000 (09:26 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Tue, 4 Apr 2017 11:30:55 +0000 (12:30 +0100)
math/Makefile.am
math/mpx-mul4-test.c [new file with mode: 0644]
math/t/mul4 [new file with mode: 0644]

index 0afee1f..804ffbb 100644 (file)
@@ -182,11 +182,22 @@ noinst_PROGRAMS           += bittest
 TESTS                  += bittest
 EXTRA_DIST             += t/mpx
 if CPUFAM_X86
-libmath_la_SOURCES     += mpx-mul4-x86-sse2.S
+MPX_MUL4_SOURCES        = mpx-mul4-x86-sse2.S
+check_PROGRAMS         += mpx-mul4.t
+TESTS                  += mpx-mul4.t$(EXEEXT)
 endif
 if CPUFAM_AMD64
-libmath_la_SOURCES     += mpx-mul4-amd64-sse2.S
+MPX_MUL4_SOURCES        = mpx-mul4-amd64-sse2.S
+check_PROGRAMS         += mpx-mul4.t
+TESTS                  += mpx-mul4.t$(EXEEXT)
 endif
+libmath_la_SOURCES     += $(MPX_MUL4_SOURCES)
+
+mpx_mul4_t_SOURCES      = mpx-mul4-test.c $(MPX_MUL4_SOURCES)
+mpx_mul4_t_CPPFLAGS     = \
+       $(AM_CPPFLAGS) \
+       -DTEST_MUL4 -DSRCDIR="\"$(srcdir)\""
+mpx_mul4_t_LDADD        = $(top_builddir)/libcatacomb.la $(mLib_LIBS)
 
 ## A quick-and-dirty parser, used for parsing descriptions of groups, fields,
 ## etc.
diff --git a/math/mpx-mul4-test.c b/math/mpx-mul4-test.c
new file mode 100644 (file)
index 0000000..883c4fc
--- /dev/null
@@ -0,0 +1,291 @@
+/* -*-c-*-
+ *
+ * Testing optimized 128-bit multipliers
+ *
+ * (c) 2017 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <mLib/macros.h>
+#include <mLib/report.h>
+#include <mLib/testrig.h>
+
+#include "dispatch.h"
+#include "mp.h"
+#include "mpmont.h"
+#include "mptext.h"
+
+/*----- CPU feature detection ---------------------------------------------*/
+
+#if CPUFAM_X86
+#  define VARIANT _x86_sse2
+#  define REPR_32
+static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); }
+#endif
+
+#if CPUFAM_AMD64
+#  define VARIANT _amd64_sse2
+#  define REPR_32
+static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); }
+#endif
+
+#ifndef VARIANT
+#  error "Unsupported CPU family."
+#endif
+
+#ifdef REPR_32
+#  define NWBY 4
+#  define NDBY 8
+#  define LDW LOAD32
+#  define LDD LOAD64
+#  define STW STORE32
+#  define STD STORE64
+typedef struct { mpw w[4]; } p128;
+typedef struct { mpw w[8]; } x128;
+typedef struct { mpd w[6]; } carry;
+#endif
+
+/*----- Test operation table ----------------------------------------------*/
+
+#define TESTOPS(_)                                                     \
+        /*  a     c      u     x     v     y    z'    y'     c' */     \
+  _(dmul4, NIL,  CARRY, P128, P128, P128, P128, P128, NIL,  CARRY)     \
+  _(dmla4, P128, CARRY, P128, P128, P128, P128, P128, NIL,  CARRY)     \
+  _(mul4,  NIL,         CARRY, NIL,  P128, NIL,  P128, P128, NIL,  CARRY)      \
+  _(mla4,  P128, CARRY, NIL,  P128, NIL,  P128, P128, NIL,  CARRY)     \
+  _(mmul4, NIL,         NIL,   P128, P128, P128, P128, P128, X128, CARRY)      \
+  _(mmla4, P128, NIL,  P128, P128, P128, P128, P128, X128, CARRY)      \
+  _(mont4, P128, NIL,  NIL,  P128, NIL,  P128, P128, X128, CARRY)
+
+/*----- Assembler test interface ------------------------------------------*/
+
+#define EMPTY
+
+#define PARAM(v, ty) ty *v,
+#define PARAM_NIL(v, q)
+#define PARAM_P128(v, q) PARAM(v, q p128)
+#define PARAM_X128(v, q) PARAM(v, q x128)
+#define PARAM_CARRY(v, q) PARAM(v, q carry)
+
+#define DECLSTUB(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc)   \
+  extern void test_##fn(PARAM_##tyzz(zz, EMPTY) PARAM_##tycc(cc, EMPTY)        \
+                       PARAM_##tyyy(yy, EMPTY)                         \
+                       PARAM_##tyu(u, const) PARAM_##tyx(x, const)     \
+                       PARAM_##tyv(v, const) PARAM_##tyy(y, const)     \
+                       unsigned n, unsigned long long *cyv);
+TESTOPS(DECLSTUB)
+
+/*----- Conversion functions ----------------------------------------------*/
+
+#define DEFTYPE(ty, ld, st, nby)                                       \
+                                                                       \
+  static void cvt_##ty(const char *buf, dstr *d)                       \
+  {                                                                    \
+    dstr dd = DSTR_INIT;                                               \
+    int i;                                                             \
+    ty *x;                                                             \
+    const octet *p;                                                    \
+                                                                       \
+    type_hex.cvt(buf, &dd);                                            \
+    if (dd.len != N(x->w)*nby) die(1, "invalid length for " #ty);      \
+    dstr_ensure(d, sizeof(*x));                                                \
+    x = (ty *)d->buf; p = (const octet *)dd.buf;                       \
+    for (i = 0; i < N(x->w); i++) { x->w[i] = ld(p); p += nby; }       \
+    dstr_destroy(&dd);                                                 \
+  }                                                                    \
+                                                                       \
+  static void dump_##ty(dstr *d, FILE *fp)                             \
+  {                                                                    \
+    dstr dd = DSTR_INIT;                                               \
+    int i;                                                             \
+    const ty *x = (const ty *)d->buf;                                  \
+    octet *p;                                                          \
+                                                                       \
+    dstr_ensure(&dd, N(x->w)*nby); p = (octet *)dd.buf;                        \
+    for (i = 0; i < N(x->w); i++) { st(p, x->w[i]); p += nby; }                \
+    dd.len = N(x->w)*nby;                                              \
+    type_hex.dump(&dd, fp);                                            \
+    dstr_destroy(&dd);                                                 \
+  }                                                                    \
+                                                                       \
+  static int eq_##ty(const ty *x, const ty *y)                         \
+  {                                                                    \
+    int i;                                                             \
+                                                                       \
+    for (i = 0; i < N(x->w); i++)                                      \
+      if (x->w[i] != y->w[i]) return (0);                              \
+    return (1);                                                                \
+  }                                                                    \
+                                                                       \
+  static const struct test_type type_##ty = { cvt_##ty, dump_##ty };
+
+DEFTYPE(p128, LDW, STW, NWBY)
+DEFTYPE(x128, LDW, STW, NWBY)
+DEFTYPE(carry, LDD, STD, NDBY)
+
+/*----- Test functions ----------------------------------------------------*/
+
+#define DECL_IN(v, ty) \
+  dstr *d_##v = dp++; const ty *v = (const ty *)d_##v->buf;
+#define DECL_IN_NIL(v)
+#define DECL_IN_P128(v) DECL_IN(v, p128)
+#define DECL_IN_X128(v) DECL_IN(v, x128)
+#define DECL_IN_CARRY(v) DECL_IN(v, carry)
+
+#define DECL_OUT(v, ty) \
+  dstr dd_##v = DSTR_INIT, *d_##v = &dd_##v; ty *v;
+#define DECL_OUT_NIL(v)
+#define DECL_OUT_P128(v) DECL_OUT(v, p128)
+#define DECL_OUT_X128(v) DECL_OUT(v, x128)
+#define DECL_OUT_CARRY(v) DECL_OUT(v, carry)
+
+#define INIT_OUT(v, ty) \
+  dstr_ensure(d_##v, sizeof(ty)); v = (ty *)d_##v->buf;
+#define INIT_OUT_NIL(v)
+#define INIT_OUT_P128(v) INIT_OUT(v, p128)
+#define INIT_OUT_X128(v) INIT_OUT(v, x128)
+#define INIT_OUT_CARRY(v) INIT_OUT(v, carry)
+
+#define ARG(v) , v
+#define ARG_NIL(v)
+#define ARG_P128(v) ARG(v)
+#define ARG_X128(v) ARG(v)
+#define ARG_CARRY(v) ARG(v)
+
+#define CHECK(v, vv, ty) if (!eq_##ty(v, vv)) ok = 0;
+#define CHECK_NIL(v, vv)
+#define CHECK_P128(v, vv) CHECK(v, vv, p128)
+#define CHECK_X128(v, vv) CHECK(v, vv, x128)
+#define CHECK_CARRY(v, vv) CHECK(v, vv, carry)
+
+#define DUMP(v, ty) \
+  fprintf(stderr, "\n\t%-6s = ", #v); dump_##ty(d_##v, stderr);
+#define DUMP_NIL(v)
+#define DUMP_P128(v) DUMP(v, p128)
+#define DUMP_X128(v) DUMP(v, x128)
+#define DUMP_CARRY(v) DUMP(v, carry)
+
+#define COPY(v, vv, ty) *v = *vv;
+#define COPY_NIL(v, vv)
+#define COPY_P128(v, vv) COPY(v, vv, p128)
+#define COPY_X128(v, vv) COPY(v, vv, x128)
+#define COPY_CARRY(v, vv) COPY(v, vv, carry)
+
+#define FREE_OUT(v, ty) dstr_destroy(d_##v);
+#define FREE_OUT_NIL(v)
+#define FREE_OUT_P128(v) FREE_OUT(v, p128)
+#define FREE_OUT_X128(v) FREE_OUT(v, x128)
+#define FREE_OUT_CARRY(v) FREE_OUT(v, carry)
+
+#define DEFTESTFN(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc)  \
+                                                                       \
+  static int v##fn(dstr dv[])                                          \
+  {                                                                    \
+    dstr *dp = dv;                                                     \
+    DECL_IN_##tya(a)                                                   \
+    DECL_IN_##tyc(c)                                                   \
+    DECL_IN_##tyu(u)                                                   \
+    DECL_IN_##tyx(x)                                                   \
+    DECL_IN_##tyv(v)                                                   \
+    DECL_IN_##tyy(y)                                                   \
+    DECL_IN_##tyzz(zz_exp)                                             \
+    DECL_IN_##tyyy(yy_exp)                                             \
+    DECL_IN_##tycc(cc_exp)                                             \
+    DECL_OUT_##tyzz(zz_out)                                            \
+    DECL_OUT_##tyyy(yy_out)                                            \
+    DECL_OUT_##tycc(cc_out)                                            \
+    unsigned long long cyv[1];                                         \
+    int ok = 1;                                                                \
+                                                                       \
+    INIT_OUT_##tyzz(zz_out)                                            \
+    INIT_OUT_##tyyy(yy_out)                                            \
+    INIT_OUT_##tycc(cc_out)                                            \
+                                                                       \
+    COPY_##tya(zz_out, a);                                             \
+    COPY_##tyc(cc_out, c);                                             \
+    test_##fn(zz_out ARG_##tycc(cc_out) ARG_##tyyy(yy_out)             \
+             ARG_##tyu(u) ARG_##tyx(x) ARG_##tyv(v) ARG_##tyy(y),      \
+             1, cyv);                                                  \
+    CHECK_##tyzz(zz_exp, zz_out)                                       \
+    CHECK_##tyyy(yy_exp, yy_out)                                       \
+    CHECK_##tycc(cc_exp, cc_out)                                       \
+                                                                       \
+    if (!ok) {                                                         \
+      fputs(#fn " failed", stderr);                                    \
+      DUMP_##tya(a)                                                    \
+      DUMP_##tyc(c)                                                    \
+      DUMP_##tyu(u)                                                    \
+      DUMP_##tyx(x)                                                    \
+      DUMP_##tyv(v)                                                    \
+      DUMP_##tyy(y)                                                    \
+      DUMP_##tyzz(zz_exp)                                              \
+      DUMP_##tyzz(zz_out)                                              \
+      DUMP_##tyyy(yy_exp)                                              \
+      DUMP_##tyyy(yy_out)                                              \
+      DUMP_##tycc(cc_exp)                                              \
+      DUMP_##tycc(cc_out)                                              \
+      fputc('\n', stderr);                                             \
+    }                                                                  \
+                                                                       \
+    FREE_OUT_##tyzz(zz_out);                                           \
+    FREE_OUT_##tyyy(yy_out);                                           \
+    FREE_OUT_##tycc(cc_out);                                           \
+                                                                       \
+    return (ok);                                                       \
+  }
+
+TESTOPS(DEFTESTFN)
+
+/*----- Main code ---------------------------------------------------------*/
+
+#define NIL
+#define P128 &type_p128,
+#define X128 &type_x128,
+#define CARRY &type_carry,
+
+static test_chunk tests[] = {
+#define DEFCHUNK(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc)   \
+  { #fn, v##fn, { tya tyc tyu tyx tyv tyy tyzz tyyy tycc } },
+  TESTOPS(DEFCHUNK)
+#undef DEFCHUNK
+  { 0, 0, { 0 } }
+};
+
+int main(int argc, char *argv[])
+{
+  sub_init();
+  if (!cpu_features_p())
+    { fprintf(stderr, "required cpu feature not available\n"); exit(77); }
+  test_run(argc, argv, tests, SRCDIR "/t/mul4");
+  return (0);
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/math/t/mul4 b/math/t/mul4
new file mode 100644 (file)
index 0000000..1373215
--- /dev/null
@@ -0,0 +1,69 @@
+# Test vectors for accelerated multiplication
+
+dmul4 {
+  0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c
+    c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u
+    e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x
+    d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v
+    f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y
+    d4356fa018c7f681e0be24efecdaf6e0 # zz
+    0004bb142333e4e00004c56cb3ac322d000335ca0eb0310000033cbfe475dfd00001a2236db667a00001a5a668a94f10; ## cc
+}
+
+dmla4 {
+  b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a
+    0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c
+    c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u
+    e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x
+    d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v
+    f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y
+    87e82150d07eac369c78dea7ac99b49d # zz
+    0004bb142333e4e10004c56cb3ac322d000335ca0eb0310000033cbfe475dfd00001a2236db667a00001a5a668a94f10; # cc
+}
+
+mul4 {
+  0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c
+    e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x
+    f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y
+    964a43a0b812545cd3c4a34a69e3ec23 # zz
+    0002b2f3db03f8310002b880e3fffed70001d457394991000001d812a4ace8a80000ee0b505470500000efed0e0e2428; ## cc
+}
+
+mla4 {
+  b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a
+    0000a5a4a3a2a1a000009594939291900000abaaa9a8a7a600009b9a999897960000afaeadacabaa00009f9e9d9c9b9a # c
+    e3e2e1e0e7e6e5e4ebeae9e8efeeedec # x
+    f3f2f1f0f7f6f5f4fbfaf9f8fffefdfc # y
+    49fcf5506fc90a118f7f5d0329a2a9e0 # zz
+    0002b2f3db03f8320002b880e3fffed70001d457394991000001d812a4ace8a80000ee0b505470500000efed0e0e2428; ## cc
+}
+
+mmul4 {
+  c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u
+    acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n
+    d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v
+    546f97b132b6ca1d10d519b5ca6ab8a9 # m
+    00000000000000000000000000000000 # zz
+    00006c00000012ad00009a8d0000630c0000f0840000979d000077a400000caa # yy
+    0003126be83bdbf40002a05c4867918e000259dfe01b01770001b7e463bf6b7a00011339f770da470000bdab9990cf26; # cc
+}
+
+mmla4 {
+  b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a
+    c3c2c1c0c7c6c5c4cbcac9c8cfcecdcc # u
+    acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n
+    d3d2d1d0d7d6d5d4dbdad9d8dfdedddc # v
+    546f97b132b6ca1d10d519b5ca6ab8a9 # m
+    00000000000000000000000000000000 # zz
+    000016b00000d85500000b390000507000008de20000754b000057700000c5db # yy
+    000338658ad352110002f9fbc6cd85d5000205e99c5e20d300021acac7b997550000fdb10c111c11000131df2708bb59; # cc
+}
+
+mont4 {
+  b3b2b1b0b7b6b5b4bbbab9b8bfbebdbc # a
+    acadaeafa8a9aaaba4a5a6a7a0a1a2a3 # n
+    546f97b132b6ca1d10d519b5ca6ab8a9 # m
+    00000000000000000000000000000000 # zz
+    0000aab00000c5a7000070ab0000ed6400009d5d0000ddad0000dfcb0000b930 # yy
+    0001734705fa761d00019ee57a6290e40000f14fc045d61200010386c155e29100008b1816a19f2700007432ecd64990; # cc
+}