From: Mark Wooding <mdw@distorted.org.uk>
Date: Wed, 2 Apr 2014 23:04:24 +0000 (+0100)
Subject: math/mpx.c: Eliminate clone-and-hack from `mpx_{load,store}{l,b}{,2cn}.
X-Git-Tag: 2.1.7~10
X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/0c9ebe471cfa8343f2ac5d8bd206870f82e87837

math/mpx.c: Eliminate clone-and-hack from `mpx_{load,store}{l,b}{,2cn}.

Introduce another pile of macros.  The implementations probably aren't
as efficient as the hand-coded versions, but I don't think they were
ever time-critical.
---

diff --git a/math/mpx.c b/math/mpx.c
index 5f7ffab0..2745fe0f 100644
--- a/math/mpx.c
+++ b/math/mpx.c
@@ -41,6 +41,161 @@
 
 /*----- Loading and storing -----------------------------------------------*/
 
+/* --- These are all variations on a theme --- *
+ *
+ * Essentially we want to feed bits into a shift register, @ibits@ bits at a
+ * time, and extract them @obits@ bits at a time whenever there are enough.
+ * Of course, @i@ and @o@ will, in general, be different sizes, and we don't
+ * necessarily know which is larger.
+ *
+ * During an operation, we have a shift register @w@ and a most-recent input
+ * @t@.  Together, these hold @bits@ significant bits of input.  We arrange
+ * that @bits < ibits + obits <= 2*MPW_BITS@, so we can get away with using
+ * an @mpw@ for both of these quantitities.
+ */
+
+/* --- @MPX_GETBITS@ --- *
+ *
+ * Arguments:	@ibits@ = width of input units, in bits
+ *		@obits@ = width of output units, in bits
+ *		@iavail@ = condition expression: is input data available?
+ *		@getbits@ = function or macro: set argument to next input
+ *
+ * Use:		Read an input unit into @t@ and update the necessary
+ *		variables.
+ *
+ *		It is assumed on entry that @bits < obits@.  On exit, we have
+ *		@bits < ibits + obits@, and @t@ is live.
+ */
+
+#define MPX_GETBITS(ibits, obits, iavail, getbits) do {			\
+  if (!iavail) goto flush;						\
+  if (bits >= ibits) w |= t << (bits - ibits);				\
+  getbits(t);								\
+  bits += ibits;							\
+} while (0)
+
+/* --- @MPX_PUTBITS@ --- *
+ *
+ * Arguments:	@ibits@ = width of input units, in bits
+ *		@obits@ = width of output units, in bits
+ *		@oavail@ = condition expression: is output space available?
+ *		@putbits@ = function or macro: write its argument to output
+ *
+ * Use:		Emit an output unit, and update the necessary variables.  If
+ *		the output buffer is full, then force an immediate return.
+ *
+ *		We assume that @bits < ibits + obits@, and that @t@ is only
+ *		relevant if @bits >= ibits@.  (The @MPX_GETBITS@ macro
+ *		ensures that this is true.)
+ */
+
+#define SHRW(w, b) ((b) < MPW_BITS ? (w) >> (b) : 0)
+
+#define MPX_PUTBITS(ibits, obits, oavail, putbits) do {			\
+  if (!oavail) return;							\
+  if (bits < ibits) {							\
+    putbits(w);								\
+    bits -= obits;							\
+    w = SHRW(w, obits);							\
+  } else {								\
+    putbits(w | (t << (bits - ibits)));					\
+    bits -= obits;							\
+    if (bits >= ibits) w = SHRW(w, obits) | (t << (bits - ibits));	\
+    else w = SHRW(w, obits) | (t >> (ibits - bits));			\
+    t = 0;								\
+  }									\
+} while (0)
+
+/* --- @MPX_LOADSTORE@ --- *
+ *
+ * Arguments:	@name@ = name of function to create, without @mpx_@ prefix
+ *		@wconst@ = qualifiers for @mpw *@ arguments
+ *		@oconst@ = qualifiers for octet pointers
+ *		@decls@ = additional declarations needed
+ *		@ibits@ = width of input units, in bits
+ *		@iavail@ = condition expression: is input data available?
+ *		@getbits@ = function or macro: set argument to next input
+ *		@obits@ = width of output units, in bits
+ *		@oavail@ = condition expression: is output space available?
+ *		@putbits@ = function or macro: write its argument to output
+ *		@clear@ = statements to clear remainder of output
+ *
+ * Use:		Generates a function to convert between a sequence of
+ *		multiprecision words and a vector of octets.
+ *
+ *		The arguments @ibits@, @iavail@ and @getbits@ are passed on
+ *		to @MPX_GETBITS@; similarly, @obits@, @oavail@, and @putbits@
+ *		are passed on to @MPX_PUTBITS@.
+ *
+ *		The following variables are in scope: @v@ and @vl are the
+ *		current base and limit of the word vector; @p@ and @q@ are
+ *		the base and limit of the octet vector; @w@ and @t@ form the
+ *		shift register used during the conversion (see commentary
+ *		above); and @bits@ tracks the number of live bits in the
+ *		shift register.
+ */
+
+#define MPX_LOADSTORE(name, wconst, oconst, decls,			\
+		      ibits, iavail, getbits, obits, oavail, putbits,	\
+		      clear)						\
+									\
+void mpx_##name(wconst mpw *v, wconst mpw *vl,				\
+		oconst void *pp, size_t sz)				\
+{									\
+  mpw t = 0, w = 0;							\
+  oconst octet *p = pp, *q = p + sz;					\
+  int bits = 0;								\
+  decls									\
+									\
+  for (;;) {								\
+    while (bits < obits) MPX_GETBITS(ibits, obits, iavail, getbits);	\
+    while (bits >= obits) MPX_PUTBITS(ibits, obits, oavail, putbits);	\
+  }									\
+									\
+flush:									\
+  while (bits > 0) MPX_PUTBITS(ibits, obits, oavail, putbits);		\
+  clear;								\
+}
+
+#define EMPTY
+
+/* --- Macros for @getbits@ and @putbits@ --- */
+
+#define GETMPW(t) do { t = *v++; } while (0)
+#define PUTMPW(x) do { *v++ = MPW(x); } while (0)
+
+#define GETOCTETI(t) do { t = *p++; } while (0)
+#define PUTOCTETD(x) do { *--q = U8(x); } while (0)
+
+#define PUTOCTETI(x) do { *p++ = U8(x); } while (0)
+#define GETOCTETD(t) do { t = *--q; } while (0)
+
+/* --- Machinery for two's complement I/O --- */
+
+#define DECL_2CN							\
+  unsigned c = 1;
+
+#define GETMPW_2CN(t) do {						\
+  t = MPW(~*v++ + c);							\
+  c = c && !t;								\
+} while (0)
+
+#define PUTMPW_2CN(t) do {						\
+  mpw _t = MPW(~(t) + c);						\
+  c = c && !_t;								\
+  *v++ = _t;								\
+} while (0)
+
+#define FLUSHW_2CN do {							\
+  if (c) MPX_ONE(v, vl);						\
+  else MPX_ZERO(v, vl);							\
+} while (0)
+
+#define FLUSHO_2CN do {							\
+  memset(p, c ? 0xff : 0, q - p);					\
+} while (0)
+
 /* --- @mpx_storel@ --- *
  *
  * Arguments:	@const mpw *v, *vl@ = base and limit of source vector
@@ -54,30 +209,10 @@
  *		isn't enough space for them.
  */
 
-void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
-{
-  mpw n, w = 0;
-  octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  while (p < q) {
-    if (bits < 8) {
-      if (v >= vl) {
-	*p++ = U8(w);
-	break;
-      }
-      n = *v++;
-      *p++ = U8(w | n << bits);
-      w = n >> (8 - bits);
-      bits += MPW_BITS - 8;
-    } else {
-      *p++ = U8(w);
-      w >>= 8;
-      bits -= 8;
-    }
-  }
-  memset(p, 0, q - p);
-}
+MPX_LOADSTORE(storel, const, EMPTY, EMPTY,
+	      MPW_BITS, (v < vl), GETMPW,
+	      8, (p < q), PUTOCTETI,
+	      { memset(p, 0, q - p); })
 
 /* --- @mpx_loadl@ --- *
  *
@@ -92,30 +227,11 @@ void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
  *		space for them.
  */
 
-void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
-{
-  unsigned n;
-  mpw w = 0;
-  const octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
+MPX_LOADSTORE(loadl, EMPTY, const, EMPTY,
+	      8, (p < q), GETOCTETI,
+	      MPW_BITS, (v < vl), PUTMPW,
+	      { MPX_ZERO(v, vl); })
 
-  if (v >= vl)
-    return;
-  while (p < q) {
-    n = U8(*p++);
-    w |= n << bits;
-    bits += 8;
-    if (bits >= MPW_BITS) {
-      *v++ = MPW(w);
-      w = n >> (MPW_BITS - bits + 8);
-      bits -= MPW_BITS;
-      if (v >= vl)
-	return;
-    }
-  }
-  *v++ = w;
-  MPX_ZERO(v, vl);
-}
 
 /* --- @mpx_storeb@ --- *
  *
@@ -130,30 +246,10 @@ void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
  *		isn't enough space for them.
  */
 
-void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
-{
-  mpw n, w = 0;
-  octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  while (q > p) {
-    if (bits < 8) {
-      if (v >= vl) {
-	*--q = U8(w);
-	break;
-      }
-      n = *v++;
-      *--q = U8(w | n << bits);
-      w = n >> (8 - bits);
-      bits += MPW_BITS - 8;
-    } else {
-      *--q = U8(w);
-      w >>= 8;
-      bits -= 8;
-    }
-  }
-  memset(p, 0, q - p);
-}
+MPX_LOADSTORE(storeb, const, EMPTY, EMPTY,
+	      MPW_BITS, (v < vl), GETMPW,
+	      8, (p < q), PUTOCTETD,
+	      { memset(p, 0, q - p); })
 
 /* --- @mpx_loadb@ --- *
  *
@@ -168,30 +264,10 @@ void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
  *		space for them.
  */
 
-void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
-{
-  unsigned n;
-  mpw w = 0;
-  const octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  if (v >= vl)
-    return;
-  while (q > p) {
-    n = U8(*--q);
-    w |= n << bits;
-    bits += 8;
-    if (bits >= MPW_BITS) {
-      *v++ = MPW(w);
-      w = n >> (MPW_BITS - bits + 8);
-      bits -= MPW_BITS;
-      if (v >= vl)
-	return;
-    }
-  }
-  *v++ = w;
-  MPX_ZERO(v, vl);
-}
+MPX_LOADSTORE(loadb, EMPTY, const, EMPTY,
+	      8, (p < q), GETOCTETD,
+	      MPW_BITS, (v < vl), PUTMPW,
+	      { MPX_ZERO(v, vl); })
 
 /* --- @mpx_storel2cn@ --- *
  *
@@ -207,40 +283,10 @@ void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
  *		This obviously makes the output bad.
  */
 
-void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz)
-{
-  unsigned c = 1;
-  unsigned b = 0;
-  mpw n, w = 0;
-  octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  while (p < q) {
-    if (bits < 8) {
-      if (v >= vl) {
-	b = w;
-	break;
-      }
-      n = *v++;
-      b = w | n << bits;
-      w = n >> (8 - bits);
-      bits += MPW_BITS - 8;
-    } else {
-      b = w;
-      w >>= 8;
-      bits -= 8;
-    }
-    b = U8(~b + c);
-    c = c && !b;
-    *p++ = b;
-  }
-  while (p < q) {
-    b = U8(~b + c);
-    c = c && !b;
-    *p++ = b;
-    b = 0;
-  }
-}
+MPX_LOADSTORE(storel2cn, const, EMPTY, DECL_2CN,
+	      MPW_BITS, (v < vl), GETMPW_2CN,
+	      8, (p < q), PUTOCTETI,
+	      { FLUSHO_2CN; })
 
 /* --- @mpx_loadl2cn@ --- *
  *
@@ -256,32 +302,10 @@ void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz)
  *		means you made the wrong choice coming here.
  */
 
-void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz)
-{
-  unsigned n;
-  unsigned c = 1;
-  mpw w = 0;
-  const octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  if (v >= vl)
-    return;
-  while (p < q) {
-    n = U8(~(*p++) + c);
-    c = c && !n;
-    w |= n << bits;
-    bits += 8;
-    if (bits >= MPW_BITS) {
-      *v++ = MPW(w);
-      w = n >> (MPW_BITS - bits + 8);
-      bits -= MPW_BITS;
-      if (v >= vl)
-	return;
-    }
-  }
-  *v++ = w;
-  MPX_ZERO(v, vl);
-}
+MPX_LOADSTORE(loadl2cn, EMPTY, const, DECL_2CN,
+	      8, (p < q), GETOCTETI,
+	      MPW_BITS, (v < vl), PUTMPW_2CN,
+	      { FLUSHW_2CN; })
 
 /* --- @mpx_storeb2cn@ --- *
  *
@@ -297,40 +321,10 @@ void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz)
  *		which probably isn't what you meant.
  */
 
-void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz)
-{
-  mpw n, w = 0;
-  unsigned b = 0;
-  unsigned c = 1;
-  octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  while (q > p) {
-    if (bits < 8) {
-      if (v >= vl) {
-	b = w;
-	break;
-      }
-      n = *v++;
-      b = w | n << bits;
-      w = n >> (8 - bits);
-      bits += MPW_BITS - 8;
-    } else {
-      b = w;
-      w >>= 8;
-      bits -= 8;
-    }
-    b = U8(~b + c);
-    c = c && !b;
-    *--q = b;
-  }
-  while (q > p) {
-    b = ~b + c;
-    c = c && !(b & 0xff);
-    *--q = b;
-    b = 0;
-  }
-}
+MPX_LOADSTORE(storeb2cn, const, EMPTY, DECL_2CN,
+	      MPW_BITS, (v < vl), GETMPW_2CN,
+	      8, (p < q), PUTOCTETD,
+	      { FLUSHO_2CN; })
 
 /* --- @mpx_loadb2cn@ --- *
  *
@@ -346,32 +340,10 @@ void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz)
  *		chose this function wrongly.
  */
 
-void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz)
-{
-  unsigned n;
-  unsigned c = 1;
-  mpw w = 0;
-  const octet *p = pp, *q = p + sz;
-  unsigned bits = 0;
-
-  if (v >= vl)
-    return;
-  while (q > p) {
-    n = U8(~(*--q) + c);
-    c = c && !n;
-    w |= n << bits;
-    bits += 8;
-    if (bits >= MPW_BITS) {
-      *v++ = MPW(w);
-      w = n >> (MPW_BITS - bits + 8);
-      bits -= MPW_BITS;
-      if (v >= vl)
-	return;
-    }
-  }
-  *v++ = w;
-  MPX_ZERO(v, vl);
-}
+MPX_LOADSTORE(loadb2cn, EMPTY, const, DECL_2CN,
+	      8, (p < q), GETOCTETD,
+	      MPW_BITS, (v < vl), PUTMPW_2CN,
+	      { FLUSHW_2CN; })
 
 /*----- Logical shifting --------------------------------------------------*/