| 1 | /* -*-c-*- |
| 2 | * |
| 3 | * Testing optimized 128-bit multipliers |
| 4 | * |
| 5 | * (c) 2017 Straylight/Edgeware |
| 6 | */ |
| 7 | |
| 8 | /*----- Licensing notice --------------------------------------------------* |
| 9 | * |
| 10 | * This file is part of Catacomb. |
| 11 | * |
| 12 | * Catacomb is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU Library General Public License as |
| 14 | * published by the Free Software Foundation; either version 2 of the |
| 15 | * License, or (at your option) any later version. |
| 16 | * |
| 17 | * Catacomb is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU Library General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Library General Public |
| 23 | * License along with Catacomb; if not, write to the Free |
| 24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | * MA 02111-1307, USA. |
| 26 | */ |
| 27 | |
| 28 | /*----- Header files ------------------------------------------------------*/ |
| 29 | |
| 30 | #include "config.h" |
| 31 | |
| 32 | #ifdef ENABLE_ASM_DEBUG |
| 33 | # include "regdump.h" |
| 34 | #endif |
| 35 | |
| 36 | #include <assert.h> |
| 37 | #include <stdio.h> |
| 38 | #include <stdlib.h> |
| 39 | #include <string.h> |
| 40 | |
| 41 | #include <mLib/macros.h> |
| 42 | #include <mLib/report.h> |
| 43 | #include <mLib/testrig.h> |
| 44 | |
| 45 | #include "dispatch.h" |
| 46 | #include "mp.h" |
| 47 | #include "mpmont.h" |
| 48 | #include "mptext.h" |
| 49 | |
| 50 | /*----- CPU feature detection ---------------------------------------------*/ |
| 51 | |
| 52 | #if CPUFAM_X86 |
| 53 | # define VARIANT _x86_sse2 |
| 54 | # define REPR_32 |
| 55 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 56 | #endif |
| 57 | |
| 58 | #if CPUFAM_AMD64 |
| 59 | # define VARIANT _amd64_sse2 |
| 60 | # define REPR_32 |
| 61 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 62 | #endif |
| 63 | |
| 64 | #ifndef VARIANT |
| 65 | # error "Unsupported CPU family." |
| 66 | #endif |
| 67 | |
| 68 | #ifdef REPR_32 |
| 69 | # define NWBY 4 |
| 70 | # define NDBY 8 |
| 71 | # define LDW LOAD32 |
| 72 | # define LDD LOAD64 |
| 73 | # define STW STORE32 |
| 74 | # define STD STORE64 |
| 75 | typedef struct { mpw w[4]; } p128; |
| 76 | typedef struct { mpw w[8]; } x128; |
| 77 | typedef struct { mpd w[6]; } carry; |
| 78 | #endif |
| 79 | |
| 80 | /*----- Test operation table ----------------------------------------------*/ |
| 81 | |
| 82 | #define TESTOPS(_) \ |
| 83 | /* a c u x v y z' y' c' */ \ |
| 84 | _(dmul4, NIL, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 85 | _(dmla4, P128, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 86 | _(mul4, NIL, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 87 | _(mul4zc,NIL, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 88 | _(mla4, P128, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 89 | _(mla4zc,P128, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 90 | _(mmul4, NIL, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 91 | _(mmla4, P128, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 92 | _(mont4, P128, NIL, NIL, P128, NIL, P128, P128, X128, CARRY) |
| 93 | |
| 94 | /*----- Assembler test interface ------------------------------------------*/ |
| 95 | |
| 96 | #define EMPTY |
| 97 | |
| 98 | #define PARAM(v, ty) ty *v, |
| 99 | #define PARAM_NIL(v, q) |
| 100 | #define PARAM_P128(v, q) PARAM(v, q p128) |
| 101 | #define PARAM_X128(v, q) PARAM(v, q x128) |
| 102 | #define PARAM_CARRY(v, q) PARAM(v, q carry) |
| 103 | |
| 104 | #define DECLSTUB(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 105 | extern void test_##fn(PARAM_##tyzz(zz, EMPTY) PARAM_##tycc(cc, EMPTY) \ |
| 106 | PARAM_##tyyy(yy, EMPTY) \ |
| 107 | PARAM_##tyu(u, const) PARAM_##tyx(x, const) \ |
| 108 | PARAM_##tyv(v, const) PARAM_##tyy(y, const) \ |
| 109 | unsigned n, unsigned long long *cyv); |
| 110 | TESTOPS(DECLSTUB) |
| 111 | |
| 112 | /*----- Conversion functions ----------------------------------------------*/ |
| 113 | |
| 114 | static mp *combine_mpw(mp *d, const mpw *v, size_t n, unsigned off) |
| 115 | { |
| 116 | size_t i; |
| 117 | unsigned o; |
| 118 | mp m, *t = d; |
| 119 | mpw w[1]; |
| 120 | |
| 121 | d = MP_ZERO; |
| 122 | for (i = 0, o = 0; i < n; i++, o += off) { |
| 123 | w[0] = v[i]; mp_build(&m, w, w + 1); |
| 124 | t = mp_lsl(t, &m, o); d = mp_add(d, d, t); |
| 125 | } |
| 126 | mp_drop(t); return (d); |
| 127 | } |
| 128 | |
| 129 | static mp *combine_mpd(mp *d, const mpd *v, size_t n, unsigned off) |
| 130 | { |
| 131 | size_t i; |
| 132 | unsigned o; |
| 133 | mp m, *t = d; |
| 134 | mpw w[2]; |
| 135 | |
| 136 | d = MP_ZERO; |
| 137 | for (i = 0, o = 0; i < n; i++, o += off) { |
| 138 | w[0] = MPW(v[i]); w[1] = MPW(v[i] >> MPW_BITS); mp_build(&m, w, w + 2); |
| 139 | t = mp_lsl(t, &m, o); d = mp_add(d, d, t); |
| 140 | } |
| 141 | mp_drop(t); return (d); |
| 142 | } |
| 143 | |
| 144 | #define DEFTYPE(ty, ld, st, nby, combfn, off) \ |
| 145 | \ |
| 146 | static void cvt_##ty(const char *buf, dstr *d) \ |
| 147 | { \ |
| 148 | dstr dd = DSTR_INIT; \ |
| 149 | int i; \ |
| 150 | ty *x; \ |
| 151 | const octet *p; \ |
| 152 | \ |
| 153 | type_hex.cvt(buf, &dd); \ |
| 154 | if (dd.len != N(x->w)*nby) die(1, "invalid length for " #ty); \ |
| 155 | dstr_ensure(d, sizeof(*x)); d->len = sizeof(*x); \ |
| 156 | x = (ty *)d->buf; p = (const octet *)dd.buf; \ |
| 157 | for (i = 0; i < N(x->w); i++) { x->w[i] = ld(p); p += nby; } \ |
| 158 | dstr_destroy(&dd); \ |
| 159 | } \ |
| 160 | \ |
| 161 | static void dump_##ty(dstr *d, FILE *fp) \ |
| 162 | { \ |
| 163 | dstr dd = DSTR_INIT; \ |
| 164 | int i; \ |
| 165 | const ty *x = (const ty *)d->buf; \ |
| 166 | mp *xx = combfn(MP_NEW, x->w, N(x->w), off); \ |
| 167 | octet *p; \ |
| 168 | \ |
| 169 | dstr_ensure(&dd, N(x->w)*nby); p = (octet *)dd.buf; \ |
| 170 | for (i = 0; i < N(x->w); i++) { st(p, x->w[i]); p += nby; } \ |
| 171 | dd.len = N(x->w)*nby; \ |
| 172 | type_hex.dump(&dd, fp); \ |
| 173 | dstr_destroy(&dd); \ |
| 174 | \ |
| 175 | fputs(" = 0x", fp); mp_writefile(xx, fp, 16); \ |
| 176 | fputs(" = ", fp); mp_writefile(xx, fp, 10); \ |
| 177 | MP_DROP(xx); \ |
| 178 | } \ |
| 179 | \ |
| 180 | static int eq_##ty(const ty *x, const ty *y) \ |
| 181 | { \ |
| 182 | mp *xx = combfn(MP_NEW, x->w, N(x->w), off), \ |
| 183 | *yy = combfn(MP_NEW, y->w, N(y->w), off); \ |
| 184 | int rc = MP_EQ(xx, yy); \ |
| 185 | MP_DROP(xx); MP_DROP(yy); \ |
| 186 | return (rc); \ |
| 187 | } \ |
| 188 | \ |
| 189 | static const struct test_type type_##ty = { cvt_##ty, dump_##ty }; |
| 190 | |
| 191 | DEFTYPE(p128, LDW, STW, NWBY, combine_mpw, MPW_BITS) |
| 192 | DEFTYPE(x128, LDW, STW, NWBY, combine_mpw, MPW_BITS/2) |
| 193 | DEFTYPE(carry, LDD, STD, NDBY, combine_mpd, MPW_BITS/2) |
| 194 | |
| 195 | /*----- Test functions ----------------------------------------------------*/ |
| 196 | |
| 197 | #define DECL_IN(v, ty) \ |
| 198 | dstr *d_##v = dp++; const ty *v = (const ty *)d_##v->buf; |
| 199 | #define DECL_IN_NIL(v) |
| 200 | #define DECL_IN_P128(v) DECL_IN(v, p128) |
| 201 | #define DECL_IN_X128(v) DECL_IN(v, x128) |
| 202 | #define DECL_IN_CARRY(v) DECL_IN(v, carry) |
| 203 | |
| 204 | #define DECL_OUT(v, ty) \ |
| 205 | dstr dd_##v = DSTR_INIT, *d_##v = &dd_##v; ty *v; |
| 206 | #define DECL_OUT_NIL(v) |
| 207 | #define DECL_OUT_P128(v) DECL_OUT(v, p128) |
| 208 | #define DECL_OUT_X128(v) DECL_OUT(v, x128) |
| 209 | #define DECL_OUT_CARRY(v) DECL_OUT(v, carry) |
| 210 | |
| 211 | #define INIT_OUT(v, ty) \ |
| 212 | dstr_ensure(d_##v, sizeof(ty)); v = (ty *)d_##v->buf; |
| 213 | #define INIT_OUT_NIL(v) |
| 214 | #define INIT_OUT_P128(v) INIT_OUT(v, p128) |
| 215 | #define INIT_OUT_X128(v) INIT_OUT(v, x128) |
| 216 | #define INIT_OUT_CARRY(v) INIT_OUT(v, carry) |
| 217 | |
| 218 | #define ARG(v) , v |
| 219 | #define ARG_NIL(v) |
| 220 | #define ARG_P128(v) ARG(v) |
| 221 | #define ARG_X128(v) ARG(v) |
| 222 | #define ARG_CARRY(v) ARG(v) |
| 223 | |
| 224 | #define CHECK(v, vv, ty) if (!eq_##ty(v, vv)) ok = 0; |
| 225 | #define CHECK_NIL(v, vv) |
| 226 | #define CHECK_P128(v, vv) CHECK(v, vv, p128) |
| 227 | #define CHECK_X128(v, vv) CHECK(v, vv, x128) |
| 228 | #define CHECK_CARRY(v, vv) CHECK(v, vv, carry) |
| 229 | |
| 230 | #define DUMP(v, ty) \ |
| 231 | fprintf(stderr, "\n\t%-6s = ", #v); dump_##ty(d_##v, stderr); |
| 232 | #define DUMP_NIL(v) |
| 233 | #define DUMP_P128(v) DUMP(v, p128) |
| 234 | #define DUMP_X128(v) DUMP(v, x128) |
| 235 | #define DUMP_CARRY(v) DUMP(v, carry) |
| 236 | |
| 237 | #define COPY(v, vv, ty) *v = *vv; |
| 238 | #define COPY_NIL(v, vv) |
| 239 | #define COPY_P128(v, vv) COPY(v, vv, p128) |
| 240 | #define COPY_X128(v, vv) COPY(v, vv, x128) |
| 241 | #define COPY_CARRY(v, vv) COPY(v, vv, carry) |
| 242 | |
| 243 | #define FREE_OUT(v, ty) dstr_destroy(d_##v); |
| 244 | #define FREE_OUT_NIL(v) |
| 245 | #define FREE_OUT_P128(v) FREE_OUT(v, p128) |
| 246 | #define FREE_OUT_X128(v) FREE_OUT(v, x128) |
| 247 | #define FREE_OUT_CARRY(v) FREE_OUT(v, carry) |
| 248 | |
| 249 | #define DEFTESTFN(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 250 | \ |
| 251 | static int v##fn(dstr dv[]) \ |
| 252 | { \ |
| 253 | dstr *dp = dv; \ |
| 254 | DECL_IN_##tya(a) \ |
| 255 | DECL_IN_##tyc(c) \ |
| 256 | DECL_IN_##tyu(u) \ |
| 257 | DECL_IN_##tyx(x) \ |
| 258 | DECL_IN_##tyv(v) \ |
| 259 | DECL_IN_##tyy(y) \ |
| 260 | DECL_IN_##tyzz(zz_exp) \ |
| 261 | DECL_IN_##tyyy(yy_exp) \ |
| 262 | DECL_IN_##tycc(cc_exp) \ |
| 263 | DECL_OUT_##tyzz(zz_out) \ |
| 264 | DECL_OUT_##tyyy(yy_out) \ |
| 265 | DECL_OUT_##tycc(cc_out) \ |
| 266 | unsigned long long cyv[1]; \ |
| 267 | int ok = 1; \ |
| 268 | \ |
| 269 | INIT_OUT_##tyzz(zz_out) \ |
| 270 | INIT_OUT_##tyyy(yy_out) \ |
| 271 | INIT_OUT_##tycc(cc_out) \ |
| 272 | \ |
| 273 | COPY_##tya(zz_out, a); \ |
| 274 | COPY_##tyc(cc_out, c); \ |
| 275 | test_##fn(zz_out ARG_##tycc(cc_out) ARG_##tyyy(yy_out) \ |
| 276 | ARG_##tyu(u) ARG_##tyx(x) ARG_##tyv(v) ARG_##tyy(y), \ |
| 277 | 1, cyv); \ |
| 278 | CHECK_##tyzz(zz_exp, zz_out) \ |
| 279 | CHECK_##tyyy(yy_exp, yy_out) \ |
| 280 | CHECK_##tycc(cc_exp, cc_out) \ |
| 281 | \ |
| 282 | if (!ok) { \ |
| 283 | fputs(#fn " failed", stderr); \ |
| 284 | DUMP_##tya(a) \ |
| 285 | DUMP_##tyc(c) \ |
| 286 | DUMP_##tyu(u) \ |
| 287 | DUMP_##tyx(x) \ |
| 288 | DUMP_##tyv(v) \ |
| 289 | DUMP_##tyy(y) \ |
| 290 | DUMP_##tyzz(zz_exp) \ |
| 291 | DUMP_##tyzz(zz_out) \ |
| 292 | DUMP_##tyyy(yy_exp) \ |
| 293 | DUMP_##tyyy(yy_out) \ |
| 294 | DUMP_##tycc(cc_exp) \ |
| 295 | DUMP_##tycc(cc_out) \ |
| 296 | fputc('\n', stderr); \ |
| 297 | } \ |
| 298 | \ |
| 299 | FREE_OUT_##tyzz(zz_out); \ |
| 300 | FREE_OUT_##tyyy(yy_out); \ |
| 301 | FREE_OUT_##tycc(cc_out); \ |
| 302 | \ |
| 303 | return (ok); \ |
| 304 | } |
| 305 | |
| 306 | TESTOPS(DEFTESTFN) |
| 307 | |
| 308 | /*----- Main code ---------------------------------------------------------*/ |
| 309 | |
| 310 | #define NIL |
| 311 | #define P128 &type_p128, |
| 312 | #define X128 &type_x128, |
| 313 | #define CARRY &type_carry, |
| 314 | |
| 315 | static test_chunk tests[] = { |
| 316 | #define DEFCHUNK(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 317 | { #fn, v##fn, { tya tyc tyu tyx tyv tyy tyzz tyyy tycc } }, |
| 318 | TESTOPS(DEFCHUNK) |
| 319 | #undef DEFCHUNK |
| 320 | { 0, 0, { 0 } } |
| 321 | }; |
| 322 | |
| 323 | int main(int argc, char *argv[]) |
| 324 | { |
| 325 | sub_init(); |
| 326 | #ifdef ENABLE_ASM_DEBUG |
| 327 | regdump_init(); |
| 328 | #endif |
| 329 | if (!cpu_features_p()) |
| 330 | { fprintf(stderr, "required cpu feature not available\n"); exit(77); } |
| 331 | test_run(argc, argv, tests, SRCDIR "/t/mpx-mul4"); |
| 332 | return (0); |
| 333 | } |
| 334 | |
| 335 | /*----- That's all, folks -------------------------------------------------*/ |