/* -*-c-*-
*
- * $Id: mptext.c,v 1.3 1999/12/10 23:23:26 mdw Exp $
+ * $Id: mptext.c,v 1.7 2000/07/15 10:01:08 mdw Exp $
*
* Textual representation of multiprecision numbers
*
/*----- Revision history --------------------------------------------------*
*
* $Log: mptext.c,v $
+ * Revision 1.7 2000/07/15 10:01:08 mdw
+ * Bug fix in binary input.
+ *
+ * Revision 1.6 2000/06/25 12:58:23 mdw
+ * Fix the derivation of `depth' commentary.
+ *
+ * Revision 1.5 2000/06/17 11:46:19 mdw
+ * New and much faster stack-based algorithm for reading integers. Support
+ * reading and writing binary integers in bases between 2 and 256.
+ *
+ * Revision 1.4 1999/12/22 15:56:56 mdw
+ * Use clever recursive algorithm for writing numbers out.
+ *
* Revision 1.3 1999/12/10 23:23:26 mdw
* Allocate slightly less memory.
*
/*----- Header files ------------------------------------------------------*/
#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
-#include <mLib/darray.h>
-
#include "mp.h"
#include "mptext.h"
+#include "paranoia.h"
-/*----- Data structures ---------------------------------------------------*/
+/*----- Magical numbers ---------------------------------------------------*/
-#ifndef CHARV
- DA_DECL(charv, char);
-# define CHARV
-#endif
+/* --- Maximum recursion depth --- *
+ *
+ * This is the number of bits in a @size_t@ object. Why?
+ *
+ * To see this, let %$b = \mathit{MPW\_MAX} + 1$% and let %$Z$% be the
+ * largest @size_t@ value. Then the largest possible @mp@ is %$M - 1$% where
+ * %$M = b^Z$%. Let %$r$% be a radix to read or write. Since the recursion
+ * squares the radix at each step, the highest number reached by the
+ * recursion is %$d$%, where:
+ *
+ * %$r^{2^d} = b^Z$%.
+ *
+ * Solving gives that %$d = \lg \log_r b^Z$%. If %$r = 2$%, this is maximum,
+ * so choosing %$d = \lg \lg b^Z = \lg (Z \lg b) = \lg Z + \lg \lg b$%.
+ *
+ * Expressing %$\lg Z$% as @CHAR_BIT * sizeof(size_t)@ yields an
+ * overestimate, since a @size_t@ representation may contain `holes'.
+ * Choosing to represent %$\lg \lg b$% by 10 is almost certainly sufficient
+ * for `some time to come'.
+ */
+
+#define DEPTH (CHAR_BIT * sizeof(size_t) + 10)
/*----- Main code ---------------------------------------------------------*/
* before the number is ignored.
*/
+/* --- About the algorithm --- *
+ *
+ * The algorithm here is rather aggressive. I maintain an array of
+ * successive squarings of the radix, and a stack of partial results, each
+ * with a counter attached indicating which radix square to multiply by.
+ * Once the item at the top of the stack reaches the same counter level as
+ * the next item down, they are combined together and the result is given a
+ * counter level one higher than either of the results.
+ *
+ * Gluing the results together at the end is slightly tricky. Pay attention
+ * to the code.
+ *
+ * This is more complicated because of the need to handle the slightly
+ * bizarre syntax.
+ */
+
mp *mp_read(mp *m, int radix, const mptext_ops *ops, void *p)
{
- int r;
- int ch;
- unsigned f = 0;
+ int ch; /* Current char being considered */
+ unsigned f = 0; /* Flags about the current number */
+ int r; /* Radix to switch over to */
+ mpw rd; /* Radix as an @mp@ digit */
+ mp rr; /* The @mp@ for the radix */
+ unsigned nf = m ? m->f & MP_BURN : 0; /* New @mp@ flags */
+
+ /* --- Stacks --- */
+
+ mp *pow[DEPTH]; /* List of powers */
+ unsigned pows; /* Next index to fill */
+ struct { unsigned i; mp *m; } s[DEPTH]; /* Main stack */
+ unsigned sp; /* Current stack pointer */
+
+ /* --- Flags --- */
enum {
f_neg = 1u,
f_ok = 2u
};
+ /* --- Initialize the stacks --- */
+
+ mp_build(&rr, &rd, &rd + 1);
+ pow[0] = &rr;
+ pows = 1;
+
+ sp = 0;
+
/* --- Initialize the destination number --- */
- MP_MODIFY(m, 4);
- m->vl = m->v;
- m->f &= ~MP_UNDEF;
+ if (m)
+ MP_DROP(m);
/* --- Read an initial character --- */
/* --- Handle an initial sign --- */
- if (ch == '-') {
+ if (radix >= 0 && ch == '-') {
f |= f_neg;
ch = ops->get(p);
while (isspace(ch))
/* --- If the radix is zero, look for leading zeros --- */
- if (radix)
+ if (radix > 0) {
+ assert(((void)"ascii radix must be <= 36", radix <= 36));
+ rd = radix;
+ r = -1;
+ } else if (radix < 0) {
+ rd = -radix;
+ assert(((void)"binary radix must fit in a byte ", rd < UCHAR_MAX));
r = -1;
- else if (ch != '0') {
- radix = 10;
+ } else if (ch != '0') {
+ rd = 10;
r = 0;
} else {
ch = ops->get(p);
if (ch == 'x') {
ch = ops->get(p);
- radix = 16;
+ rd = 16;
} else {
- radix = 8;
+ rd = 8;
f |= f_ok;
}
r = -1;
for (;; ch = ops->get(p)) {
int x;
+ if (ch < 0)
+ break;
+
/* --- An underscore indicates a numbered base --- */
if (ch == '_' && r > 0 && r <= 36) {
- radix = r;
- m->vl = m->v;
+ unsigned i;
+
+ /* --- Clear out the stacks --- */
+
+ for (i = 1; i < pows; i++)
+ MP_DROP(pow[i]);
+ pows = 1;
+ for (i = 0; i < sp; i++)
+ MP_DROP(s[i].m);
+ sp = 0;
+
+ /* --- Restart the search --- */
+
+ rd = r;
r = -1;
f &= ~f_ok;
continue;
/* --- Check that the character is a digit and in range --- */
- if (!isalnum(ch))
- break;
- if (ch >= '0' && ch <= '9')
- x = ch - '0';
+ if (radix < 0)
+ x = ch;
else {
- ch = tolower(ch);
- if (ch >= 'a' && ch <= 'z') /* ASCII dependent! */
- x = ch - 'a' + 10;
- else
+ if (!isalnum(ch))
break;
+ if (ch >= '0' && ch <= '9')
+ x = ch - '0';
+ else {
+ ch = tolower(ch);
+ if (ch >= 'a' && ch <= 'z') /* ASCII dependent! */
+ x = ch - 'a' + 10;
+ else
+ break;
+ }
}
/* --- Sort out what to do with the character --- */
if (x >= 10 && r >= 0)
r = -1;
- if (x >= radix)
+ if (x >= rd)
break;
if (r >= 0)
/* --- Stick the character on the end of my integer --- */
- mp_ensure(m, MP_LEN(m) + 1);
- mpx_umuln(m->v, m->vl, m->v, m->vl - 1, radix);
- mpx_uaddn(m->v, m->vl, x);
- mp_shrink(m);
+ assert(((void)"Number is too unimaginably huge", sp < DEPTH));
+ s[sp].m = m = mp_new(1, nf);
+ m->v[0] = x;
+ s[sp].i = 0;
+
+ /* --- Now grind through the stack --- */
+
+ while (sp > 0 && s[sp - 1].i == s[sp].i) {
+
+ /* --- Combine the top two items --- */
+
+ sp--;
+ m = s[sp].m;
+ m = mp_mul(m, m, pow[s[sp].i]);
+ m = mp_add(m, m, s[sp + 1].m);
+ s[sp].m = m;
+ MP_DROP(s[sp + 1].m);
+ s[sp].i++;
+
+ /* --- Make a new radix power if necessary --- */
+
+ if (s[sp].i >= pows) {
+ assert(((void)"Number is too unimaginably huge", pows < DEPTH));
+ pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
+ pows++;
+ }
+ }
f |= f_ok;
+ sp++;
}
ops->unget(ch, p);
+ /* --- If we're done, compute the rest of the number --- */
+
+ if (f & f_ok) {
+ if (!sp)
+ return (MP_ZERO);
+ else {
+ mp *z = MP_ONE;
+ sp--;
+
+ while (sp > 0) {
+
+ /* --- Combine the top two items --- */
+
+ sp--;
+ m = s[sp].m;
+ z = mp_mul(z, z, pow[s[sp + 1].i]);
+ m = mp_mul(m, m, z);
+ m = mp_add(m, m, s[sp + 1].m);
+ s[sp].m = m;
+ MP_DROP(s[sp + 1].m);
+
+ /* --- Make a new radix power if necessary --- */
+
+ if (s[sp].i >= pows) {
+ assert(((void)"Number is too unimaginably huge", pows < DEPTH));
+ pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
+ pows++;
+ }
+ }
+ MP_DROP(z);
+ m = s[0].m;
+ }
+ } else {
+ unsigned i;
+ for (i = 0; i < sp; i++)
+ MP_DROP(s[i].m);
+ }
+
+ /* --- Clear the radix power list --- */
+
+ {
+ unsigned i;
+ for (i = 1; i < pows; i++)
+ MP_DROP(pow[i]);
+ }
+
/* --- Bail out if the number was bad --- */
- if (!(f & f_ok)) {
- MP_DROP(m);
+ if (!(f & f_ok))
return (0);
- }
/* --- Set the sign and return --- */
- m->f = 0;
if (f & f_neg)
m->f |= MP_NEG;
return (m);
* Use: Writes a large integer in textual form.
*/
+/* --- Simple case --- *
+ *
+ * Use a fixed-sized buffer and the simple single-precision division
+ * algorithm to pick off low-order digits. Put each digit in a buffer,
+ * working backwards from the end. If the buffer becomes full, recurse to
+ * get another one. Ensure that there are at least @z@ digits by writing
+ * leading zeroes if there aren't enough real digits.
+ */
+
+static int simple(mp *m, int radix, unsigned z,
+ const mptext_ops *ops, void *p)
+{
+ int rc = 0;
+ char buf[64];
+ unsigned i = sizeof(buf);
+ int rd = radix > 0 ? radix : -radix;
+
+ do {
+ int ch;
+ mpw x;
+
+ x = mpx_udivn(m->v, m->vl, m->v, m->vl, rd);
+ MP_SHRINK(m);
+ if (radix < 0)
+ ch = x;
+ else {
+ if (x < 10)
+ ch = '0' + x;
+ else
+ ch = 'a' + x - 10;
+ }
+ buf[--i] = ch;
+ if (z)
+ z--;
+ } while (i && MP_LEN(m));
+
+ if (MP_LEN(m))
+ rc = simple(m, radix, z, ops, p);
+ else {
+ static const char zero[32] = "00000000000000000000000000000000";
+ while (!rc && z >= sizeof(zero)) {
+ rc = ops->put(zero, sizeof(zero), p);
+ z -= sizeof(zero);
+ }
+ if (!rc && z)
+ rc = ops->put(zero, z, p);
+ }
+ if (!rc)
+ ops->put(buf + i, sizeof(buf) - i, p);
+ if (m->f & MP_BURN)
+ BURN(buf);
+ return (rc);
+}
+
+/* --- Complicated case --- *
+ *
+ * If the number is small, fall back to the simple case above. Otherwise
+ * divide and take remainder by current large power of the radix, and emit
+ * each separately. Don't emit a zero quotient. Be very careful about
+ * leading zeroes on the remainder part, because they're deeply significant.
+ */
+
+static int complicated(mp *m, int radix, mp **pr, unsigned i, unsigned z,
+ const mptext_ops *ops, void *p)
+{
+ int rc = 0;
+ mp *q = MP_NEW;
+ unsigned d = 1 << i;
+
+ if (MP_LEN(m) < 8)
+ return (simple(m, radix, z, ops, p));
+
+ mp_div(&q, &m, m, pr[i]);
+ if (!MP_LEN(q))
+ d = z;
+ else {
+ if (z > d)
+ z -= d;
+ else
+ z = 0;
+ rc = complicated(q, radix, pr, i - 1, z, ops, p);
+ }
+ if (!rc)
+ rc = complicated(m, radix, pr, i - 1, d, ops, p);
+ mp_drop(q);
+ return (rc);
+}
+
+/* --- Main driver code --- */
+
int mp_write(mp *m, int radix, const mptext_ops *ops, void *p)
{
- charv v = DA_INIT;
- mpw b = radix;
- mp bb;
+ int rc;
/* --- Set various things up --- */
m = MP_COPY(m);
- mp_build(&bb, &b, &b + 1);
+ MP_SPLIT(m);
+
+ /* --- Check the radix for sensibleness --- */
+
+ if (radix > 0)
+ assert(((void)"ascii radix must be <= 36", radix <= 36));
+ else if (radix < 0)
+ assert(((void)"binary radix must fit in a byte", -radix < UCHAR_MAX));
+ else
+ assert(((void)"radix can't be zero in mp_write", 0));
/* --- If the number is negative, sort that out --- */
if (m->f & MP_NEG) {
if (ops->put("-", 1, p))
return (EOF);
- MP_SPLIT(m);
m->f &= ~MP_NEG;
}
- /* --- Write digits to a temporary array --- */
-
- do {
- mp *q = MP_NEW, *r = MP_NEW;
- int ch;
- mpw x;
+ /* --- If the number is small, do it the easy way --- */
+
+ if (MP_LEN(m) < 8)
+ rc = simple(m, radix, 0, ops, p);
+
+ /* --- Use a clever algorithm --- *
+ *
+ * Square the radix repeatedly, remembering old results, until I get
+ * something more than half the size of the number @m@. Use this to divide
+ * the number: the quotient and remainder will be approximately the same
+ * size, and I'll have split them on a digit boundary, so I can just emit
+ * the quotient and remainder recursively, in order.
+ */
+
+ else {
+ mp *pr[DEPTH];
+ size_t target = MP_LEN(m) / 2;
+ unsigned i = 0;
+ mp *z = mp_new(1, 0);
+
+ /* --- Set up the exponent table --- */
+
+ z->v[0] = (radix > 0 ? radix : -radix);
+ z->f = 0;
+ for (;;) {
+ assert(((void)"Number is too unimaginably huge", i < DEPTH));
+ pr[i++] = z;
+ if (MP_LEN(z) > target)
+ break;
+ z = mp_sqr(MP_NEW, z);
+ }
- mp_div(&q, &r, m, &bb);
- x = r->v[0];
- if (x < 10)
- ch = '0' + x;
- else
- ch = 'a' + x - 10;
- DA_UNSHIFT(&v, ch);
- MP_DROP(m);
- MP_DROP(r);
- m = q;
- } while (MP_CMP(m, !=, MP_ZERO));
+ /* --- Write out the answer --- */
- /* --- Finished that --- */
+ rc = complicated(m, radix, pr, i - 1, 0, ops, p);
- MP_DROP(m);
+ /* --- Tidy away the array --- */
- {
- int rc = ops->put(DA(&v), DA_LEN(&v), p);
- DA_DESTROY(&v);
- return (rc ? EOF : 0);
+ while (i > 0)
+ mp_drop(pr[--i]);
}
+
+ /* --- Tidying up code --- */
+
+ MP_DROP(m);
+ return (rc);
}
/*----- Test rig ----------------------------------------------------------*/
if (m) {
if (!ob) {
fprintf(stderr, "*** unexpected successful parse\n"
- "*** input [%i] = %s\n",
- ib, v[1].buf);
+ "*** input [%i] = ", ib);
+ if (ib < 0)
+ type_hex.dump(&v[1], stderr);
+ else
+ fputs(v[1].buf, stderr);
mp_writedstr(m, &d, 10);
- fprintf(stderr, "*** (value = %s)\n", d.buf);
+ fprintf(stderr, "\n*** (value = %s)\n", d.buf);
ok = 0;
} else {
mp_writedstr(m, &d, ob);
if (d.len != v[3].len || memcmp(d.buf, v[3].buf, d.len) != 0) {
fprintf(stderr, "*** failed read or write\n"
- "*** input [%i] = %s\n"
- "*** output [%i] = %s\n"
- "*** expected [%i] = %s\n",
- ib, v[1].buf, ob, d.buf, ob, v[3].buf);
+ "*** input [%i] = ", ib);
+ if (ib < 0)
+ type_hex.dump(&v[1], stderr);
+ else
+ fputs(v[1].buf, stderr);
+ fprintf(stderr, "\n*** output [%i] = ", ob);
+ if (ob < 0)
+ type_hex.dump(&d, stderr);
+ else
+ fputs(d.buf, stderr);
+ fprintf(stderr, "\n*** expected [%i] = ", ob);
+ if (ob < 0)
+ type_hex.dump(&v[3], stderr);
+ else
+ fputs(v[3].buf, stderr);
+ fputc('\n', stderr);
ok = 0;
}
}
} else {
if (ob) {
fprintf(stderr, "*** unexpected parse failure\n"
- "*** input [%i] = %s\n"
- "*** expected [%i] = %s\n",
- ib, v[1].buf, ob, v[3].buf);
+ "*** input [%i] = ", ib);
+ if (ib < 0)
+ type_hex.dump(&v[1], stderr);
+ else
+ fputs(v[1].buf, stderr);
+ fprintf(stderr, "\n*** expected [%i] = ", ob);
+ if (ob < 0)
+ type_hex.dump(&v[3], stderr);
+ else
+ fputs(v[3].buf, stderr);
+ fputc('\n', stderr);
ok = 0;
}
}
}
static test_chunk tests[] = {
- { "mptext", verify,
+ { "mptext-ascii", verify,
{ &type_int, &type_string, &type_int, &type_string, 0 } },
+ { "mptext-bin-in", verify,
+ { &type_int, &type_hex, &type_int, &type_string, 0 } },
+ { "mptext-bin-out", verify,
+ { &type_int, &type_string, &type_int, &type_hex, 0 } },
{ 0, 0, { 0 } }
};