| 1 | /* -*-c-*- |
| 2 | * |
| 3 | * Testing optimized 128-bit multipliers |
| 4 | * |
| 5 | * (c) 2017 Straylight/Edgeware |
| 6 | */ |
| 7 | |
| 8 | /*----- Licensing notice --------------------------------------------------* |
| 9 | * |
| 10 | * This file is part of Catacomb. |
| 11 | * |
| 12 | * Catacomb is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU Library General Public License as |
| 14 | * published by the Free Software Foundation; either version 2 of the |
| 15 | * License, or (at your option) any later version. |
| 16 | * |
| 17 | * Catacomb is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU Library General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Library General Public |
| 23 | * License along with Catacomb; if not, write to the Free |
| 24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| 25 | * MA 02111-1307, USA. |
| 26 | */ |
| 27 | |
| 28 | /*----- Header files ------------------------------------------------------*/ |
| 29 | |
| 30 | #include "config.h" |
| 31 | |
| 32 | #ifdef ENABLE_ASM_DEBUG |
| 33 | # include "regdump.h" |
| 34 | #endif |
| 35 | |
| 36 | #include <assert.h> |
| 37 | #include <stdio.h> |
| 38 | #include <stdlib.h> |
| 39 | #include <string.h> |
| 40 | |
| 41 | #include <mLib/macros.h> |
| 42 | #include <mLib/report.h> |
| 43 | #include <mLib/testrig.h> |
| 44 | |
| 45 | #include "dispatch.h" |
| 46 | #include "mp.h" |
| 47 | #include "mpmont.h" |
| 48 | #include "mptext.h" |
| 49 | |
| 50 | /*----- CPU feature detection ---------------------------------------------*/ |
| 51 | |
| 52 | #if CPUFAM_X86 |
| 53 | # define VARIANT _x86_sse2 |
| 54 | # define REPR_32 |
| 55 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 56 | #endif |
| 57 | |
| 58 | #if CPUFAM_AMD64 |
| 59 | # define VARIANT _amd64_sse2 |
| 60 | # define REPR_32 |
| 61 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_X86_SSE2)); } |
| 62 | #endif |
| 63 | |
| 64 | #if CPUFAM_ARMEL |
| 65 | # define VARIANT _arm_neon |
| 66 | # define REPR_32 |
| 67 | static int cpu_features_p(void) { return (cpu_feature_p(CPUFEAT_ARM_NEON)); } |
| 68 | #endif |
| 69 | |
| 70 | #if CPUFAM_ARM64 |
| 71 | # define VARIANT _arm64_simd |
| 72 | # define REPR_32 |
| 73 | static int cpu_features_p(void) { return (1); } |
| 74 | #endif |
| 75 | |
| 76 | #ifndef VARIANT |
| 77 | # error "Unsupported CPU family." |
| 78 | #endif |
| 79 | |
| 80 | #ifdef REPR_32 |
| 81 | # define NWBY 4 |
| 82 | # define NDBY 8 |
| 83 | # define LDW LOAD32 |
| 84 | # define LDD LOAD64 |
| 85 | # define STW STORE32 |
| 86 | # define STD STORE64 |
| 87 | typedef struct { mpw w[4]; } p128; |
| 88 | typedef struct { mpw w[8]; } x128; |
| 89 | typedef struct { mpd w[6]; } carry; |
| 90 | #endif |
| 91 | |
| 92 | /*----- Test operation table ----------------------------------------------*/ |
| 93 | |
| 94 | #define TESTOPS(_) \ |
| 95 | /* a c u x v y z' y' c' */ \ |
| 96 | _(dmul4, NIL, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 97 | _(dmla4, P128, CARRY, P128, P128, P128, P128, P128, NIL, CARRY) \ |
| 98 | _(mul4, NIL, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 99 | _(mul4zc,NIL, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 100 | _(mla4, P128, CARRY, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 101 | _(mla4zc,P128, NIL, NIL, P128, NIL, P128, P128, NIL, CARRY) \ |
| 102 | _(mmul4, NIL, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 103 | _(mmla4, P128, NIL, P128, P128, P128, P128, P128, X128, CARRY) \ |
| 104 | _(mont4, P128, NIL, NIL, P128, NIL, P128, P128, X128, CARRY) |
| 105 | |
| 106 | /*----- Assembler test interface ------------------------------------------*/ |
| 107 | |
| 108 | #define EMPTY |
| 109 | |
| 110 | #define PARAM(v, ty) ty *v, |
| 111 | #define PARAM_NIL(v, q) |
| 112 | #define PARAM_P128(v, q) PARAM(v, q p128) |
| 113 | #define PARAM_X128(v, q) PARAM(v, q x128) |
| 114 | #define PARAM_CARRY(v, q) PARAM(v, q carry) |
| 115 | |
| 116 | #define DECLSTUB(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 117 | extern void test_##fn(PARAM_##tyzz(zz, EMPTY) PARAM_##tycc(cc, EMPTY) \ |
| 118 | PARAM_##tyyy(yy, EMPTY) \ |
| 119 | PARAM_##tyu(u, const) PARAM_##tyx(x, const) \ |
| 120 | PARAM_##tyv(v, const) PARAM_##tyy(y, const) \ |
| 121 | unsigned n, unsigned long long *cyv); |
| 122 | TESTOPS(DECLSTUB) |
| 123 | |
| 124 | /*----- Conversion functions ----------------------------------------------*/ |
| 125 | |
| 126 | static mp *combine_mpw(mp *d, const mpw *v, size_t n, unsigned off) |
| 127 | { |
| 128 | size_t i; |
| 129 | unsigned o; |
| 130 | mp m, *t = d; |
| 131 | mpw w[1]; |
| 132 | |
| 133 | d = MP_ZERO; |
| 134 | for (i = 0, o = 0; i < n; i++, o += off) { |
| 135 | w[0] = v[i]; mp_build(&m, w, w + 1); |
| 136 | t = mp_lsl(t, &m, o); d = mp_add(d, d, t); |
| 137 | } |
| 138 | mp_drop(t); return (d); |
| 139 | } |
| 140 | |
| 141 | static mp *combine_mpd(mp *d, const mpd *v, size_t n, unsigned off) |
| 142 | { |
| 143 | size_t i; |
| 144 | unsigned o; |
| 145 | mp m, *t = d; |
| 146 | mpw w[2]; |
| 147 | |
| 148 | d = MP_ZERO; |
| 149 | for (i = 0, o = 0; i < n; i++, o += off) { |
| 150 | w[0] = MPW(v[i]); w[1] = MPW(v[i] >> MPW_BITS); mp_build(&m, w, w + 2); |
| 151 | t = mp_lsl(t, &m, o); d = mp_add(d, d, t); |
| 152 | } |
| 153 | mp_drop(t); return (d); |
| 154 | } |
| 155 | |
| 156 | #define DEFTYPE(ty, ld, st, nby, combfn, off) \ |
| 157 | \ |
| 158 | static void cvt_##ty(const char *buf, dstr *d) \ |
| 159 | { \ |
| 160 | dstr dd = DSTR_INIT; \ |
| 161 | int i; \ |
| 162 | ty *x; \ |
| 163 | const octet *p; \ |
| 164 | \ |
| 165 | type_hex.cvt(buf, &dd); \ |
| 166 | if (dd.len != N(x->w)*nby) die(1, "invalid length for " #ty); \ |
| 167 | dstr_ensure(d, sizeof(*x)); d->len = sizeof(*x); \ |
| 168 | x = (ty *)d->buf; p = (const octet *)dd.buf; \ |
| 169 | for (i = 0; i < N(x->w); i++) { x->w[i] = ld(p); p += nby; } \ |
| 170 | dstr_destroy(&dd); \ |
| 171 | } \ |
| 172 | \ |
| 173 | static void dump_##ty(dstr *d, FILE *fp) \ |
| 174 | { \ |
| 175 | dstr dd = DSTR_INIT; \ |
| 176 | int i; \ |
| 177 | const ty *x = (const ty *)d->buf; \ |
| 178 | mp *xx = combfn(MP_NEW, x->w, N(x->w), off); \ |
| 179 | octet *p; \ |
| 180 | \ |
| 181 | dstr_ensure(&dd, N(x->w)*nby); p = (octet *)dd.buf; \ |
| 182 | for (i = 0; i < N(x->w); i++) { st(p, x->w[i]); p += nby; } \ |
| 183 | dd.len = N(x->w)*nby; \ |
| 184 | type_hex.dump(&dd, fp); \ |
| 185 | dstr_destroy(&dd); \ |
| 186 | \ |
| 187 | fputs(" = 0x", fp); mp_writefile(xx, fp, 16); \ |
| 188 | fputs(" = ", fp); mp_writefile(xx, fp, 10); \ |
| 189 | MP_DROP(xx); \ |
| 190 | } \ |
| 191 | \ |
| 192 | static int eq_##ty(const ty *x, const ty *y) \ |
| 193 | { \ |
| 194 | mp *xx = combfn(MP_NEW, x->w, N(x->w), off), \ |
| 195 | *yy = combfn(MP_NEW, y->w, N(y->w), off); \ |
| 196 | int rc = MP_EQ(xx, yy); \ |
| 197 | MP_DROP(xx); MP_DROP(yy); \ |
| 198 | return (rc); \ |
| 199 | } \ |
| 200 | \ |
| 201 | static const struct test_type type_##ty = { cvt_##ty, dump_##ty }; |
| 202 | |
| 203 | DEFTYPE(p128, LDW, STW, NWBY, combine_mpw, MPW_BITS) |
| 204 | DEFTYPE(x128, LDW, STW, NWBY, combine_mpw, MPW_BITS/2) |
| 205 | DEFTYPE(carry, LDD, STD, NDBY, combine_mpd, MPW_BITS/2) |
| 206 | |
| 207 | /*----- Test functions ----------------------------------------------------*/ |
| 208 | |
| 209 | #define DECL_IN(v, ty) \ |
| 210 | dstr *d_##v = dp++; const ty *v = (const ty *)d_##v->buf; |
| 211 | #define DECL_IN_NIL(v) |
| 212 | #define DECL_IN_P128(v) DECL_IN(v, p128) |
| 213 | #define DECL_IN_X128(v) DECL_IN(v, x128) |
| 214 | #define DECL_IN_CARRY(v) DECL_IN(v, carry) |
| 215 | |
| 216 | #define DECL_OUT(v, ty) \ |
| 217 | dstr dd_##v = DSTR_INIT, *d_##v = &dd_##v; ty *v; |
| 218 | #define DECL_OUT_NIL(v) |
| 219 | #define DECL_OUT_P128(v) DECL_OUT(v, p128) |
| 220 | #define DECL_OUT_X128(v) DECL_OUT(v, x128) |
| 221 | #define DECL_OUT_CARRY(v) DECL_OUT(v, carry) |
| 222 | |
| 223 | #define INIT_OUT(v, ty) \ |
| 224 | dstr_ensure(d_##v, sizeof(ty)); v = (ty *)d_##v->buf; |
| 225 | #define INIT_OUT_NIL(v) |
| 226 | #define INIT_OUT_P128(v) INIT_OUT(v, p128) |
| 227 | #define INIT_OUT_X128(v) INIT_OUT(v, x128) |
| 228 | #define INIT_OUT_CARRY(v) INIT_OUT(v, carry) |
| 229 | |
| 230 | #define ARG(v) , v |
| 231 | #define ARG_NIL(v) |
| 232 | #define ARG_P128(v) ARG(v) |
| 233 | #define ARG_X128(v) ARG(v) |
| 234 | #define ARG_CARRY(v) ARG(v) |
| 235 | |
| 236 | #define CHECK(v, vv, ty) if (!eq_##ty(v, vv)) ok = 0; |
| 237 | #define CHECK_NIL(v, vv) |
| 238 | #define CHECK_P128(v, vv) CHECK(v, vv, p128) |
| 239 | #define CHECK_X128(v, vv) CHECK(v, vv, x128) |
| 240 | #define CHECK_CARRY(v, vv) CHECK(v, vv, carry) |
| 241 | |
| 242 | #define DUMP(v, ty) \ |
| 243 | fprintf(stderr, "\n\t%-6s = ", #v); dump_##ty(d_##v, stderr); |
| 244 | #define DUMP_NIL(v) |
| 245 | #define DUMP_P128(v) DUMP(v, p128) |
| 246 | #define DUMP_X128(v) DUMP(v, x128) |
| 247 | #define DUMP_CARRY(v) DUMP(v, carry) |
| 248 | |
| 249 | #define COPY(v, vv, ty) *v = *vv; |
| 250 | #define COPY_NIL(v, vv) |
| 251 | #define COPY_P128(v, vv) COPY(v, vv, p128) |
| 252 | #define COPY_X128(v, vv) COPY(v, vv, x128) |
| 253 | #define COPY_CARRY(v, vv) COPY(v, vv, carry) |
| 254 | |
| 255 | #define FREE_OUT(v, ty) dstr_destroy(d_##v); |
| 256 | #define FREE_OUT_NIL(v) |
| 257 | #define FREE_OUT_P128(v) FREE_OUT(v, p128) |
| 258 | #define FREE_OUT_X128(v) FREE_OUT(v, x128) |
| 259 | #define FREE_OUT_CARRY(v) FREE_OUT(v, carry) |
| 260 | |
| 261 | #define DEFTESTFN(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 262 | \ |
| 263 | static int v##fn(dstr dv[]) \ |
| 264 | { \ |
| 265 | dstr *dp = dv; \ |
| 266 | DECL_IN_##tya(a) \ |
| 267 | DECL_IN_##tyc(c) \ |
| 268 | DECL_IN_##tyu(u) \ |
| 269 | DECL_IN_##tyx(x) \ |
| 270 | DECL_IN_##tyv(v) \ |
| 271 | DECL_IN_##tyy(y) \ |
| 272 | DECL_IN_##tyzz(zz_exp) \ |
| 273 | DECL_IN_##tyyy(yy_exp) \ |
| 274 | DECL_IN_##tycc(cc_exp) \ |
| 275 | DECL_OUT_##tyzz(zz_out) \ |
| 276 | DECL_OUT_##tyyy(yy_out) \ |
| 277 | DECL_OUT_##tycc(cc_out) \ |
| 278 | unsigned long long cyv[1]; \ |
| 279 | int ok = 1; \ |
| 280 | \ |
| 281 | INIT_OUT_##tyzz(zz_out) \ |
| 282 | INIT_OUT_##tyyy(yy_out) \ |
| 283 | INIT_OUT_##tycc(cc_out) \ |
| 284 | \ |
| 285 | COPY_##tya(zz_out, a); \ |
| 286 | COPY_##tyc(cc_out, c); \ |
| 287 | test_##fn(zz_out ARG_##tycc(cc_out) ARG_##tyyy(yy_out) \ |
| 288 | ARG_##tyu(u) ARG_##tyx(x) ARG_##tyv(v) ARG_##tyy(y), \ |
| 289 | 1, cyv); \ |
| 290 | CHECK_##tyzz(zz_exp, zz_out) \ |
| 291 | CHECK_##tyyy(yy_exp, yy_out) \ |
| 292 | CHECK_##tycc(cc_exp, cc_out) \ |
| 293 | \ |
| 294 | if (!ok) { \ |
| 295 | fputs(#fn " failed", stderr); \ |
| 296 | DUMP_##tya(a) \ |
| 297 | DUMP_##tyc(c) \ |
| 298 | DUMP_##tyu(u) \ |
| 299 | DUMP_##tyx(x) \ |
| 300 | DUMP_##tyv(v) \ |
| 301 | DUMP_##tyy(y) \ |
| 302 | DUMP_##tyzz(zz_exp) \ |
| 303 | DUMP_##tyzz(zz_out) \ |
| 304 | DUMP_##tyyy(yy_exp) \ |
| 305 | DUMP_##tyyy(yy_out) \ |
| 306 | DUMP_##tycc(cc_exp) \ |
| 307 | DUMP_##tycc(cc_out) \ |
| 308 | fputc('\n', stderr); \ |
| 309 | } \ |
| 310 | \ |
| 311 | FREE_OUT_##tyzz(zz_out); \ |
| 312 | FREE_OUT_##tyyy(yy_out); \ |
| 313 | FREE_OUT_##tycc(cc_out); \ |
| 314 | \ |
| 315 | return (ok); \ |
| 316 | } |
| 317 | |
| 318 | TESTOPS(DEFTESTFN) |
| 319 | |
| 320 | /*----- Main code ---------------------------------------------------------*/ |
| 321 | |
| 322 | #define NIL |
| 323 | #define P128 &type_p128, |
| 324 | #define X128 &type_x128, |
| 325 | #define CARRY &type_carry, |
| 326 | |
| 327 | static test_chunk tests[] = { |
| 328 | #define DEFCHUNK(fn, tya, tyc, tyu, tyx, tyv, tyy, tyzz, tyyy, tycc) \ |
| 329 | { #fn, v##fn, { tya tyc tyu tyx tyv tyy tyzz tyyy tycc } }, |
| 330 | TESTOPS(DEFCHUNK) |
| 331 | #undef DEFCHUNK |
| 332 | { 0, 0, { 0 } } |
| 333 | }; |
| 334 | |
| 335 | int main(int argc, char *argv[]) |
| 336 | { |
| 337 | sub_init(); |
| 338 | #ifdef ENABLE_ASM_DEBUG |
| 339 | regdump_init(); |
| 340 | #endif |
| 341 | if (!cpu_features_p()) |
| 342 | { fprintf(stderr, "required cpu feature not available\n"); exit(77); } |
| 343 | test_run(argc, argv, tests, SRCDIR "/t/mpx-mul4"); |
| 344 | return (0); |
| 345 | } |
| 346 | |
| 347 | /*----- That's all, folks -------------------------------------------------*/ |