| 1 | /* -*-c-*- |
| 2 | * |
| 3 | * Testing optimized 128-bit multipliers |
| 4 | * |
| 5 | * (c) 2017 Straylight/Edgeware |
| 6 | */ |
| 7 | |
| 8 | /*----- Licensing notice --------------------------------------------------* |
| 9 | * |
| 10 | * This file is part of Catacomb. |
| 11 | * |
| 12 | * Catacomb is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU Library General Public License as |
| 14 | * published by the Free Software Foundation; either version 2 of the |
| 15 | * License, or (at your option) any later version. |
| 16 | * |
| 17 | * Catacomb is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU Library General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Library General Public |
| 23 | * License along with Catacomb; if not, write to the Free |
| 24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | * MA 02111-1307, USA. |
| 26 | */ |
| 27 | |
| 28 | /*----- Header files ------------------------------------------------------*/ |
| 29 | |
| 30 | #include "config.h" |
| 31 | |
| 32 | #include <assert.h> |
| 33 | #include <stdio.h> |
| 34 | #include <stdlib.h> |
| 35 | #include <string.h> |
| 36 | |
| 37 | #include <mLib/macros.h> |
| 38 | #include <mLib/report.h> |
| 39 | #include <mLib/testrig.h> |
| 40 | |
| 41 | #include "dispatch.h" |
| 42 | #include "mp.h" |
| 43 | #include "mpmont.h" |
| 44 | #include "mptext.h" |
| 45 | |
| 46 | /*----- CPU feature detection ---------------------------------------------*/ |
| 47 | |
| 48 | #if CPUFAM_X86 |
| 49 | # define VARIANT _x86_sse2 |
| 50 | # define REPR_32 |
| 51 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 52 | #endif |
| 53 | |
| 54 | #if CPUFAM_AMD64 |
| 55 | # define VARIANT _amd64_sse2 |
| 56 | # define REPR_32 |
| 57 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 58 | #endif |
| 59 | |
| 60 | #ifndef VARIANT |
| 61 | # error "Unsupported CPU family." |
| 62 | #endif |
| 63 | |
| 64 | #ifdef REPR_32 |
| 65 | # define NWBY 4 |
| 66 | # define NDBY 8 |
| 67 | # define LDW LOAD32 |
| 68 | # define LDD LOAD64 |
| 69 | # define STW STORE32 |
| 70 | # define STD STORE64 |
| 71 | typedef struct { mpw w[4]; } p128; |
| 72 | typedef struct { mpw w[8]; } x128; |
| 73 | typedef struct { mpd w[6]; } carry; |
| 74 | #endif |
| 75 | |
| 76 | /*----- Test operation table ----------------------------------------------*/ |
| 77 | |
| 78 | #define TESTOPS(_) \ |
| 79 | /* a c u x v y z' y' c' */ \ |
| 80 | _(dmul4, NIL, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 81 | _(dmla4, P128, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 82 | _(mul4, NIL, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 83 | _(mul4zc,NIL, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 84 | _(mla4, P128, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 85 | _(mla4zc,P128, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 86 | _(mmul4, NIL, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 87 | _(mmla4, P128, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 88 | _(mont4, P128, NIL, NIL, P128, NIL, P128, P128, X128, CARRY) |
| 89 | |
| 90 | /*----- Assembler test interface ------------------------------------------*/ |
| 91 | |
| 92 | #define EMPTY |
| 93 | |
| 94 | #define PARAM(v, ty) ty *v, |
| 95 | #define PARAM_NIL(v, q) |
| 96 | #define PARAM_P128(v, q) PARAM(v, q p128) |
| 97 | #define PARAM_X128(v, q) PARAM(v, q x128) |
| 98 | #define PARAM_CARRY(v, q) PARAM(v, q carry) |
| 99 | |
| 100 | #define DECLSTUB(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 101 | extern void test_##fn(PARAM_##tyzz(zz, EMPTY) PARAM_##tycc(cc, EMPTY) \ |
| 102 | PARAM_##tyyy(yy, EMPTY) \ |
| 103 | PARAM_##tyu(u, const) PARAM_##tyx(x, const) \ |
| 104 | PARAM_##tyv(v, const) PARAM_##tyy(y, const) \ |
| 105 | unsigned n, unsigned long long *cyv); |
| 106 | TESTOPS(DECLSTUB) |
| 107 | |
| 108 | /*----- Conversion functions ----------------------------------------------*/ |
| 109 | |
| 110 | #define DEFTYPE(ty, ld, st, nby) \ |
| 111 | \ |
| 112 | static void cvt_##ty(const char *buf, dstr *d) \ |
| 113 | { \ |
| 114 | dstr dd = DSTR_INIT; \ |
| 115 | int i; \ |
| 116 | ty *x; \ |
| 117 | const octet *p; \ |
| 118 | \ |
| 119 | type_hex.cvt(buf, &dd); \ |
| 120 | if (dd.len != N(x->w)*nby) die(1, "invalid length for " #ty); \ |
| 121 | dstr_ensure(d, sizeof(*x)); d->len = sizeof(*x); \ |
| 122 | x = (ty *)d->buf; p = (const octet *)dd.buf; \ |
| 123 | for (i = 0; i < N(x->w); i++) { x->w[i] = ld(p); p += nby; } \ |
| 124 | dstr_destroy(&dd); \ |
| 125 | } \ |
| 126 | \ |
| 127 | static void dump_##ty(dstr *d, FILE *fp) \ |
| 128 | { \ |
| 129 | dstr dd = DSTR_INIT; \ |
| 130 | int i; \ |
| 131 | const ty *x = (const ty *)d->buf; \ |
| 132 | octet *p; \ |
| 133 | \ |
| 134 | dstr_ensure(&dd, N(x->w)*nby); p = (octet *)dd.buf; \ |
| 135 | for (i = 0; i < N(x->w); i++) { st(p, x->w[i]); p += nby; } \ |
| 136 | dd.len = N(x->w)*nby; \ |
| 137 | type_hex.dump(&dd, fp); \ |
| 138 | dstr_destroy(&dd); \ |
| 139 | } \ |
| 140 | \ |
| 141 | static int eq_##ty(const ty *x, const ty *y) \ |
| 142 | { \ |
| 143 | int i; \ |
| 144 | \ |
| 145 | for (i = 0; i < N(x->w); i++) \ |
| 146 | if (x->w[i] != y->w[i]) return (0); \ |
| 147 | return (1); \ |
| 148 | } \ |
| 149 | \ |
| 150 | static const struct test_type type_##ty = { cvt_##ty, dump_##ty }; |
| 151 | |
| 152 | DEFTYPE(p128, LDW, STW, NWBY) |
| 153 | DEFTYPE(x128, LDW, STW, NWBY) |
| 154 | DEFTYPE(carry, LDD, STD, NDBY) |
| 155 | |
| 156 | /*----- Test functions ----------------------------------------------------*/ |
| 157 | |
| 158 | #define DECL_IN(v, ty) \ |
| 159 | dstr *d_##v = dp++; const ty *v = (const ty *)d_##v->buf; |
| 160 | #define DECL_IN_NIL(v) |
| 161 | #define DECL_IN_P128(v) DECL_IN(v, p128) |
| 162 | #define DECL_IN_X128(v) DECL_IN(v, x128) |
| 163 | #define DECL_IN_CARRY(v) DECL_IN(v, carry) |
| 164 | |
| 165 | #define DECL_OUT(v, ty) \ |
| 166 | dstr dd_##v = DSTR_INIT, *d_##v = &dd_##v; ty *v; |
| 167 | #define DECL_OUT_NIL(v) |
| 168 | #define DECL_OUT_P128(v) DECL_OUT(v, p128) |
| 169 | #define DECL_OUT_X128(v) DECL_OUT(v, x128) |
| 170 | #define DECL_OUT_CARRY(v) DECL_OUT(v, carry) |
| 171 | |
| 172 | #define INIT_OUT(v, ty) \ |
| 173 | dstr_ensure(d_##v, sizeof(ty)); v = (ty *)d_##v->buf; |
| 174 | #define INIT_OUT_NIL(v) |
| 175 | #define INIT_OUT_P128(v) INIT_OUT(v, p128) |
| 176 | #define INIT_OUT_X128(v) INIT_OUT(v, x128) |
| 177 | #define INIT_OUT_CARRY(v) INIT_OUT(v, carry) |
| 178 | |
| 179 | #define ARG(v) , v |
| 180 | #define ARG_NIL(v) |
| 181 | #define ARG_P128(v) ARG(v) |
| 182 | #define ARG_X128(v) ARG(v) |
| 183 | #define ARG_CARRY(v) ARG(v) |
| 184 | |
| 185 | #define CHECK(v, vv, ty) if (!eq_##ty(v, vv)) ok = 0; |
| 186 | #define CHECK_NIL(v, vv) |
| 187 | #define CHECK_P128(v, vv) CHECK(v, vv, p128) |
| 188 | #define CHECK_X128(v, vv) CHECK(v, vv, x128) |
| 189 | #define CHECK_CARRY(v, vv) CHECK(v, vv, carry) |
| 190 | |
| 191 | #define DUMP(v, ty) \ |
| 192 | fprintf(stderr, "\n\t%-6s = ", #v); dump_##ty(d_##v, stderr); |
| 193 | #define DUMP_NIL(v) |
| 194 | #define DUMP_P128(v) DUMP(v, p128) |
| 195 | #define DUMP_X128(v) DUMP(v, x128) |
| 196 | #define DUMP_CARRY(v) DUMP(v, carry) |
| 197 | |
| 198 | #define COPY(v, vv, ty) *v = *vv; |
| 199 | #define COPY_NIL(v, vv) |
| 200 | #define COPY_P128(v, vv) COPY(v, vv, p128) |
| 201 | #define COPY_X128(v, vv) COPY(v, vv, x128) |
| 202 | #define COPY_CARRY(v, vv) COPY(v, vv, carry) |
| 203 | |
| 204 | #define FREE_OUT(v, ty) dstr_destroy(d_##v); |
| 205 | #define FREE_OUT_NIL(v) |
| 206 | #define FREE_OUT_P128(v) FREE_OUT(v, p128) |
| 207 | #define FREE_OUT_X128(v) FREE_OUT(v, x128) |
| 208 | #define FREE_OUT_CARRY(v) FREE_OUT(v, carry) |
| 209 | |
| 210 | #define DEFTESTFN(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 211 | \ |
| 212 | static int v##fn(dstr dv[]) \ |
| 213 | { \ |
| 214 | dstr *dp = dv; \ |
| 215 | DECL_IN_##tya(a) \ |
| 216 | DECL_IN_##tyc(c) \ |
| 217 | DECL_IN_##tyu(u) \ |
| 218 | DECL_IN_##tyx(x) \ |
| 219 | DECL_IN_##tyv(v) \ |
| 220 | DECL_IN_##tyy(y) \ |
| 221 | DECL_IN_##tyzz(zz_exp) \ |
| 222 | DECL_IN_##tyyy(yy_exp) \ |
| 223 | DECL_IN_##tycc(cc_exp) \ |
| 224 | DECL_OUT_##tyzz(zz_out) \ |
| 225 | DECL_OUT_##tyyy(yy_out) \ |
| 226 | DECL_OUT_##tycc(cc_out) \ |
| 227 | unsigned long long cyv[1]; \ |
| 228 | int ok = 1; \ |
| 229 | \ |
| 230 | INIT_OUT_##tyzz(zz_out) \ |
| 231 | INIT_OUT_##tyyy(yy_out) \ |
| 232 | INIT_OUT_##tycc(cc_out) \ |
| 233 | \ |
| 234 | COPY_##tya(zz_out, a); \ |
| 235 | COPY_##tyc(cc_out, c); \ |
| 236 | test_##fn(zz_out ARG_##tycc(cc_out) ARG_##tyyy(yy_out) \ |
| 237 | ARG_##tyu(u) ARG_##tyx(x) ARG_##tyv(v) ARG_##tyy(y), \ |
| 238 | 1, cyv); \ |
| 239 | CHECK_##tyzz(zz_exp, zz_out) \ |
| 240 | CHECK_##tyyy(yy_exp, yy_out) \ |
| 241 | CHECK_##tycc(cc_exp, cc_out) \ |
| 242 | \ |
| 243 | if (!ok) { \ |
| 244 | fputs(#fn " failed", stderr); \ |
| 245 | DUMP_##tya(a) \ |
| 246 | DUMP_##tyc(c) \ |
| 247 | DUMP_##tyu(u) \ |
| 248 | DUMP_##tyx(x) \ |
| 249 | DUMP_##tyv(v) \ |
| 250 | DUMP_##tyy(y) \ |
| 251 | DUMP_##tyzz(zz_exp) \ |
| 252 | DUMP_##tyzz(zz_out) \ |
| 253 | DUMP_##tyyy(yy_exp) \ |
| 254 | DUMP_##tyyy(yy_out) \ |
| 255 | DUMP_##tycc(cc_exp) \ |
| 256 | DUMP_##tycc(cc_out) \ |
| 257 | fputc('\n', stderr); \ |
| 258 | } \ |
| 259 | \ |
| 260 | FREE_OUT_##tyzz(zz_out); \ |
| 261 | FREE_OUT_##tyyy(yy_out); \ |
| 262 | FREE_OUT_##tycc(cc_out); \ |
| 263 | \ |
| 264 | return (ok); \ |
| 265 | } |
| 266 | |
| 267 | TESTOPS(DEFTESTFN) |
| 268 | |
| 269 | /*----- Main code ---------------------------------------------------------*/ |
| 270 | |
| 271 | #define NIL |
| 272 | #define P128 &type_p128, |
| 273 | #define X128 &type_x128, |
| 274 | #define CARRY &type_carry, |
| 275 | |
| 276 | static test_chunk tests[] = { |
| 277 | #define DEFCHUNK(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 278 | { #fn, v##fn, { tya tyc tyu tyx tyv tyy tyzz tyyy tycc } }, |
| 279 | TESTOPS(DEFCHUNK) |
| 280 | #undef DEFCHUNK |
| 281 | { 0, 0, { 0 } } |
| 282 | }; |
| 283 | |
| 284 | int main(int argc, char *argv[]) |
| 285 | { |
| 286 | sub_init(); |
| 287 | if (!cpu_features_p()) |
| 288 | { fprintf(stderr, "required cpu feature not available\n"); exit(77); } |
| 289 | test_run(argc, argv, tests, SRCDIR "/t/mpx-mul4"); |
| 290 | return (0); |
| 291 | } |
| 292 | |
| 293 | /*----- That's all, folks -------------------------------------------------*/ |