From 30d09778225eda3093d3ab77921c2e6c4c7f845a Mon Sep 17 00:00:00 2001
From: Mark Wooding <mdw@distorted.org.uk>
Date: Tue, 6 Aug 2013 19:49:08 +0100
Subject: [PATCH] math/mpreduce.[ch]: Extend the domain to all positive
 integers.

Integers of the form (100...)_2 are now acceptable, at last, now that
I've got a grip on the underlying theory.  (It's somewhat embarrassing
that it's taken so long, given that the algorithm was my own work to
begin with, but it was all rather trial-and-error.)

Negative numbers still don't work, and probably never will.
---
 math/mpreduce.c | 32 ++++++++++++++++++++++----------
 math/mpreduce.h |  5 ++++-
 math/t/mpreduce |  5 +++++
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/math/mpreduce.c b/math/mpreduce.c
index cc74bc2..b148dd5 100644
--- a/math/mpreduce.c
+++ b/math/mpreduce.c
@@ -81,7 +81,10 @@ DA_DECL(instr_v, mpreduce_instr);
  * Arguments:	@gfreduce *r@ = structure to fill in
  *		@mp *x@ = an integer
  *
- * Returns:	Zero if successful; nonzero on failure.
+ * Returns:	Zero if successful; nonzero on failure.  The current
+ *		algorithm always succeeds when given positive @x@.  Earlier
+ *		versions used to fail on particular kinds of integers, but
+ *		this is guaranteed not to happen any more.
  *
  * Use:		Initializes a context structure for reduction.
  */
@@ -196,18 +199,27 @@ int mpreduce_create(mpreduce *r, mp *p)
     }
   }
 
-  /* --- This doesn't always work --- *
+  /* --- Fix up wrong-sided decompositions --- *
    *
-   * If %$d \ge 2^{n-1}$% then the above recurrence will output a subtraction
-   * as the final instruction, which may sometimes underflow.  (It interprets
-   * such numbers as being in the form %$2^{n-1} + d$%.)  This is clearly
-   * bad, so detect the situation and fail gracefully.
+   * At this point, we haven't actually finished up the state machine
+   * properly.  We stopped scanning just after bit %$n - 1$% -- the most
+   * significant one, which we know in advance must be set (since @x@ is
+   * strictly positive).  Therefore we are either in state @X@ or @Z1@.  In
+   * the former case, we have nothing to do.  In the latter, there are two
+   * subcases to deal with.  If there are no other instructions, then @x@ is
+   * a perfect power of two, and %$d = 0$%, so again there is nothing to do.
+   *
+   * In the remaining case, we have decomposed @x@ as %$2^{n-1} + d$%, for
+   * some positive %$d%, which is unfortuante: if we're asked to reduce
+   * %$2^n$%, say, we'll end up with %$-d$% (or would do, if we weren't
+   * sticking to unsigned arithmetic for good performance).  So instead, we
+   * rewrite this as %$2^n - 2^{n-1} + d$% and everything will be good.
    */
 
-  if (DA_LEN(&iv) && (DA(&iv)[DA_LEN(&iv) - 1].op & ~1u) == MPRI_SUB) {
-    mp_drop(r->p);
-    DA_DESTROY(&iv);
-    return (-1);
+  if (st == Z1 && DA_LEN(&iv)) {
+    w = 1;
+    b = (bb + d)%MPW_BITS;
+    INSTR(MPRI_ADD | !!b, w, b);
   }
 
 #undef INSTR
diff --git a/math/mpreduce.h b/math/mpreduce.h
index 80fb113..efac88f 100644
--- a/math/mpreduce.h
+++ b/math/mpreduce.h
@@ -70,7 +70,10 @@ typedef struct mpreduce {
  * Arguments:	@gfreduce *r@ = structure to fill in
  *		@mp *x@ = an integer
  *
- * Returns:	Zero for success, nonzero on error.
+ * Returns:	Zero if successful; nonzero on failure.  The current
+ *		algorithm always succeeds when given positive @x@.  Earlier
+ *		versions used to fail on particular kinds of integers, but
+ *		this is guaranteed not to happen any more.
  *
  * Use:		Initializes a context structure for reduction.
  */
diff --git a/math/t/mpreduce b/math/t/mpreduce
index 2fb8f09..4b2ee9d 100644
--- a/math/t/mpreduce
+++ b/math/t/mpreduce
@@ -3,6 +3,7 @@
 reduce {
   0xc000 0x16cb3 0xacb3;
   0x8000 0x345545 0x5545;
+  0x8001 0x345545 0x54dd;
 
   0xfffef 0x100000 0x11;
 
@@ -14,6 +15,10 @@ reduce {
   0x367aa8f5ba9ac4e8e2ea198b8af2c3b3081deab392ffc05715783b245a62a6fa
   0x08e8c03ebf398c63d71d8fd7ca4ece12367a8dde180ca650afb6;
 
+  0x52e2c37447f8bca34c4a39b130ea8e5c9a7d8b54564aa88ea773
+  0x367aa8f5ba9ac4e8e2ea198b8af2c3b3081deab392ffc05715783b245a62a6fa
+  0x4b6bd8300540dbbd767fe9e64ad2cbde52a9ae2299e3c516152d;
+
   0xfffffffdffffffffffffffffffffffff
   0x7fb838a8a0a95046b9d9d9fb4440f7bbc1a7bd3b
   0xa019c198b9d9d9fb4440f7bc415ff5e4;
-- 
2.11.0