From: Mark Wooding Date: Wed, 2 Apr 2014 23:04:24 +0000 (+0100) Subject: math/mpx.c: Eliminate clone-and-hack from `mpx_{load,store}{l,b}{,2cn}. X-Git-Tag: 2.1.7~10 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/0c9ebe471cfa8343f2ac5d8bd206870f82e87837 math/mpx.c: Eliminate clone-and-hack from `mpx_{load,store}{l,b}{,2cn}. Introduce another pile of macros. The implementations probably aren't as efficient as the hand-coded versions, but I don't think they were ever time-critical. --- diff --git a/math/mpx.c b/math/mpx.c index 5f7ffab0..2745fe0f 100644 --- a/math/mpx.c +++ b/math/mpx.c @@ -41,6 +41,161 @@ /*----- Loading and storing -----------------------------------------------*/ +/* --- These are all variations on a theme --- * + * + * Essentially we want to feed bits into a shift register, @ibits@ bits at a + * time, and extract them @obits@ bits at a time whenever there are enough. + * Of course, @i@ and @o@ will, in general, be different sizes, and we don't + * necessarily know which is larger. + * + * During an operation, we have a shift register @w@ and a most-recent input + * @t@. Together, these hold @bits@ significant bits of input. We arrange + * that @bits < ibits + obits <= 2*MPW_BITS@, so we can get away with using + * an @mpw@ for both of these quantitities. + */ + +/* --- @MPX_GETBITS@ --- * + * + * Arguments: @ibits@ = width of input units, in bits + * @obits@ = width of output units, in bits + * @iavail@ = condition expression: is input data available? + * @getbits@ = function or macro: set argument to next input + * + * Use: Read an input unit into @t@ and update the necessary + * variables. + * + * It is assumed on entry that @bits < obits@. On exit, we have + * @bits < ibits + obits@, and @t@ is live. + */ + +#define MPX_GETBITS(ibits, obits, iavail, getbits) do { \ + if (!iavail) goto flush; \ + if (bits >= ibits) w |= t << (bits - ibits); \ + getbits(t); \ + bits += ibits; \ +} while (0) + +/* --- @MPX_PUTBITS@ --- * + * + * Arguments: @ibits@ = width of input units, in bits + * @obits@ = width of output units, in bits + * @oavail@ = condition expression: is output space available? + * @putbits@ = function or macro: write its argument to output + * + * Use: Emit an output unit, and update the necessary variables. If + * the output buffer is full, then force an immediate return. + * + * We assume that @bits < ibits + obits@, and that @t@ is only + * relevant if @bits >= ibits@. (The @MPX_GETBITS@ macro + * ensures that this is true.) + */ + +#define SHRW(w, b) ((b) < MPW_BITS ? (w) >> (b) : 0) + +#define MPX_PUTBITS(ibits, obits, oavail, putbits) do { \ + if (!oavail) return; \ + if (bits < ibits) { \ + putbits(w); \ + bits -= obits; \ + w = SHRW(w, obits); \ + } else { \ + putbits(w | (t << (bits - ibits))); \ + bits -= obits; \ + if (bits >= ibits) w = SHRW(w, obits) | (t << (bits - ibits)); \ + else w = SHRW(w, obits) | (t >> (ibits - bits)); \ + t = 0; \ + } \ +} while (0) + +/* --- @MPX_LOADSTORE@ --- * + * + * Arguments: @name@ = name of function to create, without @mpx_@ prefix + * @wconst@ = qualifiers for @mpw *@ arguments + * @oconst@ = qualifiers for octet pointers + * @decls@ = additional declarations needed + * @ibits@ = width of input units, in bits + * @iavail@ = condition expression: is input data available? + * @getbits@ = function or macro: set argument to next input + * @obits@ = width of output units, in bits + * @oavail@ = condition expression: is output space available? + * @putbits@ = function or macro: write its argument to output + * @clear@ = statements to clear remainder of output + * + * Use: Generates a function to convert between a sequence of + * multiprecision words and a vector of octets. + * + * The arguments @ibits@, @iavail@ and @getbits@ are passed on + * to @MPX_GETBITS@; similarly, @obits@, @oavail@, and @putbits@ + * are passed on to @MPX_PUTBITS@. + * + * The following variables are in scope: @v@ and @vl are the + * current base and limit of the word vector; @p@ and @q@ are + * the base and limit of the octet vector; @w@ and @t@ form the + * shift register used during the conversion (see commentary + * above); and @bits@ tracks the number of live bits in the + * shift register. + */ + +#define MPX_LOADSTORE(name, wconst, oconst, decls, \ + ibits, iavail, getbits, obits, oavail, putbits, \ + clear) \ + \ +void mpx_##name(wconst mpw *v, wconst mpw *vl, \ + oconst void *pp, size_t sz) \ +{ \ + mpw t = 0, w = 0; \ + oconst octet *p = pp, *q = p + sz; \ + int bits = 0; \ + decls \ + \ + for (;;) { \ + while (bits < obits) MPX_GETBITS(ibits, obits, iavail, getbits); \ + while (bits >= obits) MPX_PUTBITS(ibits, obits, oavail, putbits); \ + } \ + \ +flush: \ + while (bits > 0) MPX_PUTBITS(ibits, obits, oavail, putbits); \ + clear; \ +} + +#define EMPTY + +/* --- Macros for @getbits@ and @putbits@ --- */ + +#define GETMPW(t) do { t = *v++; } while (0) +#define PUTMPW(x) do { *v++ = MPW(x); } while (0) + +#define GETOCTETI(t) do { t = *p++; } while (0) +#define PUTOCTETD(x) do { *--q = U8(x); } while (0) + +#define PUTOCTETI(x) do { *p++ = U8(x); } while (0) +#define GETOCTETD(t) do { t = *--q; } while (0) + +/* --- Machinery for two's complement I/O --- */ + +#define DECL_2CN \ + unsigned c = 1; + +#define GETMPW_2CN(t) do { \ + t = MPW(~*v++ + c); \ + c = c && !t; \ +} while (0) + +#define PUTMPW_2CN(t) do { \ + mpw _t = MPW(~(t) + c); \ + c = c && !_t; \ + *v++ = _t; \ +} while (0) + +#define FLUSHW_2CN do { \ + if (c) MPX_ONE(v, vl); \ + else MPX_ZERO(v, vl); \ +} while (0) + +#define FLUSHO_2CN do { \ + memset(p, c ? 0xff : 0, q - p); \ +} while (0) + /* --- @mpx_storel@ --- * * * Arguments: @const mpw *v, *vl@ = base and limit of source vector @@ -54,30 +209,10 @@ * isn't enough space for them. */ -void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) -{ - mpw n, w = 0; - octet *p = pp, *q = p + sz; - unsigned bits = 0; - - while (p < q) { - if (bits < 8) { - if (v >= vl) { - *p++ = U8(w); - break; - } - n = *v++; - *p++ = U8(w | n << bits); - w = n >> (8 - bits); - bits += MPW_BITS - 8; - } else { - *p++ = U8(w); - w >>= 8; - bits -= 8; - } - } - memset(p, 0, q - p); -} +MPX_LOADSTORE(storel, const, EMPTY, EMPTY, + MPW_BITS, (v < vl), GETMPW, + 8, (p < q), PUTOCTETI, + { memset(p, 0, q - p); }) /* --- @mpx_loadl@ --- * * @@ -92,30 +227,11 @@ void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) * space for them. */ -void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) -{ - unsigned n; - mpw w = 0; - const octet *p = pp, *q = p + sz; - unsigned bits = 0; +MPX_LOADSTORE(loadl, EMPTY, const, EMPTY, + 8, (p < q), GETOCTETI, + MPW_BITS, (v < vl), PUTMPW, + { MPX_ZERO(v, vl); }) - if (v >= vl) - return; - while (p < q) { - n = U8(*p++); - w |= n << bits; - bits += 8; - if (bits >= MPW_BITS) { - *v++ = MPW(w); - w = n >> (MPW_BITS - bits + 8); - bits -= MPW_BITS; - if (v >= vl) - return; - } - } - *v++ = w; - MPX_ZERO(v, vl); -} /* --- @mpx_storeb@ --- * * @@ -130,30 +246,10 @@ void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) * isn't enough space for them. */ -void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) -{ - mpw n, w = 0; - octet *p = pp, *q = p + sz; - unsigned bits = 0; - - while (q > p) { - if (bits < 8) { - if (v >= vl) { - *--q = U8(w); - break; - } - n = *v++; - *--q = U8(w | n << bits); - w = n >> (8 - bits); - bits += MPW_BITS - 8; - } else { - *--q = U8(w); - w >>= 8; - bits -= 8; - } - } - memset(p, 0, q - p); -} +MPX_LOADSTORE(storeb, const, EMPTY, EMPTY, + MPW_BITS, (v < vl), GETMPW, + 8, (p < q), PUTOCTETD, + { memset(p, 0, q - p); }) /* --- @mpx_loadb@ --- * * @@ -168,30 +264,10 @@ void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) * space for them. */ -void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) -{ - unsigned n; - mpw w = 0; - const octet *p = pp, *q = p + sz; - unsigned bits = 0; - - if (v >= vl) - return; - while (q > p) { - n = U8(*--q); - w |= n << bits; - bits += 8; - if (bits >= MPW_BITS) { - *v++ = MPW(w); - w = n >> (MPW_BITS - bits + 8); - bits -= MPW_BITS; - if (v >= vl) - return; - } - } - *v++ = w; - MPX_ZERO(v, vl); -} +MPX_LOADSTORE(loadb, EMPTY, const, EMPTY, + 8, (p < q), GETOCTETD, + MPW_BITS, (v < vl), PUTMPW, + { MPX_ZERO(v, vl); }) /* --- @mpx_storel2cn@ --- * * @@ -207,40 +283,10 @@ void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) * This obviously makes the output bad. */ -void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) -{ - unsigned c = 1; - unsigned b = 0; - mpw n, w = 0; - octet *p = pp, *q = p + sz; - unsigned bits = 0; - - while (p < q) { - if (bits < 8) { - if (v >= vl) { - b = w; - break; - } - n = *v++; - b = w | n << bits; - w = n >> (8 - bits); - bits += MPW_BITS - 8; - } else { - b = w; - w >>= 8; - bits -= 8; - } - b = U8(~b + c); - c = c && !b; - *p++ = b; - } - while (p < q) { - b = U8(~b + c); - c = c && !b; - *p++ = b; - b = 0; - } -} +MPX_LOADSTORE(storel2cn, const, EMPTY, DECL_2CN, + MPW_BITS, (v < vl), GETMPW_2CN, + 8, (p < q), PUTOCTETI, + { FLUSHO_2CN; }) /* --- @mpx_loadl2cn@ --- * * @@ -256,32 +302,10 @@ void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) * means you made the wrong choice coming here. */ -void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) -{ - unsigned n; - unsigned c = 1; - mpw w = 0; - const octet *p = pp, *q = p + sz; - unsigned bits = 0; - - if (v >= vl) - return; - while (p < q) { - n = U8(~(*p++) + c); - c = c && !n; - w |= n << bits; - bits += 8; - if (bits >= MPW_BITS) { - *v++ = MPW(w); - w = n >> (MPW_BITS - bits + 8); - bits -= MPW_BITS; - if (v >= vl) - return; - } - } - *v++ = w; - MPX_ZERO(v, vl); -} +MPX_LOADSTORE(loadl2cn, EMPTY, const, DECL_2CN, + 8, (p < q), GETOCTETI, + MPW_BITS, (v < vl), PUTMPW_2CN, + { FLUSHW_2CN; }) /* --- @mpx_storeb2cn@ --- * * @@ -297,40 +321,10 @@ void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) * which probably isn't what you meant. */ -void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) -{ - mpw n, w = 0; - unsigned b = 0; - unsigned c = 1; - octet *p = pp, *q = p + sz; - unsigned bits = 0; - - while (q > p) { - if (bits < 8) { - if (v >= vl) { - b = w; - break; - } - n = *v++; - b = w | n << bits; - w = n >> (8 - bits); - bits += MPW_BITS - 8; - } else { - b = w; - w >>= 8; - bits -= 8; - } - b = U8(~b + c); - c = c && !b; - *--q = b; - } - while (q > p) { - b = ~b + c; - c = c && !(b & 0xff); - *--q = b; - b = 0; - } -} +MPX_LOADSTORE(storeb2cn, const, EMPTY, DECL_2CN, + MPW_BITS, (v < vl), GETMPW_2CN, + 8, (p < q), PUTOCTETD, + { FLUSHO_2CN; }) /* --- @mpx_loadb2cn@ --- * * @@ -346,32 +340,10 @@ void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) * chose this function wrongly. */ -void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz) -{ - unsigned n; - unsigned c = 1; - mpw w = 0; - const octet *p = pp, *q = p + sz; - unsigned bits = 0; - - if (v >= vl) - return; - while (q > p) { - n = U8(~(*--q) + c); - c = c && !n; - w |= n << bits; - bits += 8; - if (bits >= MPW_BITS) { - *v++ = MPW(w); - w = n >> (MPW_BITS - bits + 8); - bits -= MPW_BITS; - if (v >= vl) - return; - } - } - *v++ = w; - MPX_ZERO(v, vl); -} +MPX_LOADSTORE(loadb2cn, EMPTY, const, DECL_2CN, + 8, (p < q), GETOCTETD, + MPW_BITS, (v < vl), PUTMPW_2CN, + { FLUSHW_2CN; }) /*----- Logical shifting --------------------------------------------------*/