* @obits@ = width of output units, in bits
* @oavail@ = condition expression: is output space available?
* @putbits@ = function or macro: write its argument to output
+ * @fixfinal@ = statements to fix shift register at the end
* @clear@ = statements to clear remainder of output
*
* Use: Generates a function to convert between a sequence of
#define MPX_LOADSTORE(name, wconst, oconst, decls, \
ibits, iavail, getbits, obits, oavail, putbits, \
- clear) \
+ fixfinal, clear) \
\
void mpx_##name(wconst mpw *v, wconst mpw *vl, \
oconst void *pp, size_t sz) \
} \
\
flush: \
- while (bits > 0) MPX_PUTBITS(ibits, obits, oavail, putbits); \
+ if (bits) { \
+ fixfinal; \
+ while (bits > 0) MPX_PUTBITS(ibits, obits, oavail, putbits); \
+ } \
clear; \
}
*v++ = _t; \
} while (0)
-#define FLUSHW_2CN do { \
- if (c) MPX_ONE(v, vl); \
- else MPX_ZERO(v, vl); \
+#define FIXFINALW_2CN do { \
+ if (c && !w && !t); \
+ else if (bits == 8) t ^= ~(mpw)0xffu; \
+ else t ^= ((mpw)1 << (MPW_BITS - bits + 8)) - 256u; \
} while (0)
#define FLUSHO_2CN do { \
- memset(p, c ? 0xff : 0, q - p); \
+ memset(p, c ? 0 : 0xff, q - p); \
} while (0)
/* --- @mpx_storel@ --- *
MPX_LOADSTORE(storel, const, EMPTY, EMPTY,
MPW_BITS, (v < vl), GETMPW,
8, (p < q), PUTOCTETI,
- { memset(p, 0, q - p); })
+ EMPTY, { memset(p, 0, q - p); })
/* --- @mpx_loadl@ --- *
*
MPX_LOADSTORE(loadl, EMPTY, const, EMPTY,
8, (p < q), GETOCTETI,
MPW_BITS, (v < vl), PUTMPW,
- { MPX_ZERO(v, vl); })
+ EMPTY, { MPX_ZERO(v, vl); })
/* --- @mpx_storeb@ --- *
MPX_LOADSTORE(storeb, const, EMPTY, EMPTY,
MPW_BITS, (v < vl), GETMPW,
8, (p < q), PUTOCTETD,
- { memset(p, 0, q - p); })
+ EMPTY, { memset(p, 0, q - p); })
/* --- @mpx_loadb@ --- *
*
MPX_LOADSTORE(loadb, EMPTY, const, EMPTY,
8, (p < q), GETOCTETD,
MPW_BITS, (v < vl), PUTMPW,
- { MPX_ZERO(v, vl); })
+ EMPTY, { MPX_ZERO(v, vl); })
/* --- @mpx_storel2cn@ --- *
*
MPX_LOADSTORE(storel2cn, const, EMPTY, DECL_2CN,
MPW_BITS, (v < vl), GETMPW_2CN,
8, (p < q), PUTOCTETI,
- { FLUSHO_2CN; })
+ EMPTY, { FLUSHO_2CN; })
/* --- @mpx_loadl2cn@ --- *
*
MPX_LOADSTORE(loadl2cn, EMPTY, const, DECL_2CN,
8, (p < q), GETOCTETI,
MPW_BITS, (v < vl), PUTMPW_2CN,
- { FLUSHW_2CN; })
+ { FIXFINALW_2CN; }, { MPX_ZERO(v, vl); })
/* --- @mpx_storeb2cn@ --- *
*
MPX_LOADSTORE(storeb2cn, const, EMPTY, DECL_2CN,
MPW_BITS, (v < vl), GETMPW_2CN,
8, (p < q), PUTOCTETD,
- { FLUSHO_2CN; })
+ EMPTY, { FLUSHO_2CN; })
/* --- @mpx_loadb2cn@ --- *
*
MPX_LOADSTORE(loadb2cn, EMPTY, const, DECL_2CN,
8, (p < q), GETOCTETD,
MPW_BITS, (v < vl), PUTMPW_2CN,
- { FLUSHW_2CN; })
+ { FIXFINALW_2CN; }, { MPX_ZERO(v, vl); })
/*----- Logical shifting --------------------------------------------------*/
size_t nr = MPW_BITS - nb;
mpw w;
- av += nw;
- w = av < avl ? *av++ : 0;
- while (av < avl) {
- mpw t;
- if (dv >= dvl) goto done;
- t = *av++;
- *dv++ = MPW((w >> nb) | (t << nr));
- w = t;
+ if (nw >= avl - av)
+ w = 0;
+ else {
+ av += nw;
+ w = *av++;
+
+ while (av < avl) {
+ mpw t;
+ if (dv >= dvl) goto done;
+ t = *av++;
+ *dv++ = MPW((w >> nb) | (t << nr));
+ w = t;
+ }
}
+
if (dv < dvl) {
*dv++ = MPW(w >> nb);
MPX_ZERO(dv, dvl);
MAYBE_UMUL4(x86_sse2)
#endif
+#if CPUFAM_AMD64
+ MAYBE_UMUL4(amd64_sse2)
+#endif
+
static mpx_umul__functype *pick_umul(void)
{
#if CPUFAM_X86
DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_sse2,
cpu_feature_p(CPUFEAT_X86_SSE2));
#endif
+#if CPUFAM_AMD64
+ DISPATCH_PICK_COND(mpx_umul, maybe_umul4_amd64_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+#endif
DISPATCH_PICK_FALLBACK(mpx_umul, simple_umul);
}
static int twocl(dstr *v)
{
dstr d = DSTR_INIT;
- mpw *m, *ml;
- size_t sz;
+ mpw *m, *ml0, *ml1;
+ size_t sz0, sz1, szmax;
int ok = 1;
+ int i;
- sz = v[0].len; if (v[1].len > sz) sz = v[1].len;
- dstr_ensure(&d, sz);
+ sz0 = MPW_RQ(v[0].len); sz1 = MPW_RQ(v[1].len);
+ dstr_ensure(&d, v[0].len > v[1].len ? v[0].len : v[1].len);
- sz = MPW_RQ(sz);
- m = xmalloc(MPWS(sz));
- ml = m + sz;
+ szmax = sz0 > sz1 ? sz0 : sz1;
+ m = xmalloc(MPWS(szmax));
+ ml0 = m + sz0; ml1 = m + sz1;
- mpx_loadl(m, ml, v[0].buf, v[0].len);
- mpx_storel2cn(m, ml, d.buf, v[1].len);
- if (memcmp(d.buf, v[1].buf, v[1].len)) {
- dumpbits("\n*** storel2cn failed", d.buf, v[1].len);
- ok = 0;
- }
+ for (i = 0; i < 2; i++) {
+ if (i) ml0 = ml1 = m + szmax;
- mpx_loadl2cn(m, ml, v[1].buf, v[1].len);
- mpx_storel(m, ml, d.buf, v[0].len);
- if (memcmp(d.buf, v[0].buf, v[0].len)) {
- dumpbits("\n*** loadl2cn failed", d.buf, v[0].len);
- ok = 0;
+ mpx_loadl(m, ml0, v[0].buf, v[0].len);
+ mpx_storel2cn(m, ml0, d.buf, v[1].len);
+ if (memcmp(d.buf, v[1].buf, v[1].len)) {
+ dumpbits("\n*** storel2cn failed", d.buf, v[1].len);
+ ok = 0;
+ }
+
+ mpx_loadl2cn(m, ml1, v[1].buf, v[1].len);
+ mpx_storel(m, ml1, d.buf, v[0].len);
+ if (memcmp(d.buf, v[0].buf, v[0].len)) {
+ dumpbits("\n*** loadl2cn failed", d.buf, v[0].len);
+ ok = 0;
+ }
}
if (!ok) {
static int twocb(dstr *v)
{
dstr d = DSTR_INIT;
- mpw *m, *ml;
- size_t sz;
+ mpw *m, *ml0, *ml1;
+ size_t sz0, sz1, szmax;
int ok = 1;
+ int i;
- sz = v[0].len; if (v[1].len > sz) sz = v[1].len;
- dstr_ensure(&d, sz);
+ sz0 = MPW_RQ(v[0].len); sz1 = MPW_RQ(v[1].len);
+ dstr_ensure(&d, v[0].len > v[1].len ? v[0].len : v[1].len);
- sz = MPW_RQ(sz);
- m = xmalloc(MPWS(sz));
- ml = m + sz;
+ szmax = sz0 > sz1 ? sz0 : sz1;
+ m = xmalloc(MPWS(szmax));
+ ml0 = m + sz0; ml1 = m + sz1;
- mpx_loadb(m, ml, v[0].buf, v[0].len);
- mpx_storeb2cn(m, ml, d.buf, v[1].len);
- if (memcmp(d.buf, v[1].buf, v[1].len)) {
- dumpbits("\n*** storeb2cn failed", d.buf, v[1].len);
- ok = 0;
- }
+ for (i = 0; i < 2; i++) {
+ if (i) ml0 = ml1 = m + szmax;
- mpx_loadb2cn(m, ml, v[1].buf, v[1].len);
- mpx_storeb(m, ml, d.buf, v[0].len);
- if (memcmp(d.buf, v[0].buf, v[0].len)) {
- dumpbits("\n*** loadb2cn failed", d.buf, v[0].len);
- ok = 0;
+ mpx_loadb(m, ml0, v[0].buf, v[0].len);
+ mpx_storeb2cn(m, ml0, d.buf, v[1].len);
+ if (memcmp(d.buf, v[1].buf, v[1].len)) {
+ dumpbits("\n*** storeb2cn failed", d.buf, v[1].len);
+ ok = 0;
+ }
+
+ mpx_loadb2cn(m, ml1, v[1].buf, v[1].len);
+ mpx_storeb(m, ml1, d.buf, v[0].len);
+ if (memcmp(d.buf, v[0].buf, v[0].len)) {
+ dumpbits("\n*** loadb2cn failed", d.buf, v[0].len);
+ ok = 0;
+ }
}
if (!ok) {