Add an internal-representation no-op function.
[u/mdw/catacomb] / mptext.c
CommitLineData
d3409d5e 1/* -*-c-*-
2 *
3bc9cb53 3 * $Id: mptext.c,v 1.9 2001/02/03 16:05:17 mdw Exp $
d3409d5e 4 *
5 * Textual representation of multiprecision numbers
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mptext.c,v $
3bc9cb53 33 * Revision 1.9 2001/02/03 16:05:17 mdw
34 * Make flags be unsigned. Improve the write algorithm: recurse until the
35 * parts are one word long and use single-precision arithmetic from there.
36 * Fix off-by-one bug when breaking the number apart.
37 *
9d3838a0 38 * Revision 1.8 2000/12/06 20:32:42 mdw
39 * Reduce binary bytes (to allow marker bits to be ignored). Fix error
40 * message string a bit. Allow leading `+' signs.
41 *
7d45ed6c 42 * Revision 1.7 2000/07/15 10:01:08 mdw
43 * Bug fix in binary input.
44 *
dd9199f0 45 * Revision 1.6 2000/06/25 12:58:23 mdw
46 * Fix the derivation of `depth' commentary.
47 *
2b26f2d7 48 * Revision 1.5 2000/06/17 11:46:19 mdw
49 * New and much faster stack-based algorithm for reading integers. Support
50 * reading and writing binary integers in bases between 2 and 256.
51 *
e360a4f2 52 * Revision 1.4 1999/12/22 15:56:56 mdw
53 * Use clever recursive algorithm for writing numbers out.
54 *
9c3df6c0 55 * Revision 1.3 1999/12/10 23:23:26 mdw
56 * Allocate slightly less memory.
57 *
90b6f0be 58 * Revision 1.2 1999/11/20 22:24:15 mdw
59 * Use function versions of MPX_UMULN and MPX_UADDN.
60 *
d3409d5e 61 * Revision 1.1 1999/11/17 18:02:16 mdw
62 * New multiprecision integer arithmetic suite.
63 *
64 */
65
66/*----- Header files ------------------------------------------------------*/
67
68#include <ctype.h>
2b26f2d7 69#include <limits.h>
d3409d5e 70#include <stdio.h>
71
d3409d5e 72#include "mp.h"
73#include "mptext.h"
e360a4f2 74#include "paranoia.h"
d3409d5e 75
2b26f2d7 76/*----- Magical numbers ---------------------------------------------------*/
77
78/* --- Maximum recursion depth --- *
79 *
80 * This is the number of bits in a @size_t@ object. Why?
81 *
dd9199f0 82 * To see this, let %$b = \mathit{MPW\_MAX} + 1$% and let %$Z$% be the
83 * largest @size_t@ value. Then the largest possible @mp@ is %$M - 1$% where
84 * %$M = b^Z$%. Let %$r$% be a radix to read or write. Since the recursion
85 * squares the radix at each step, the highest number reached by the
86 * recursion is %$d$%, where:
2b26f2d7 87 *
dd9199f0 88 * %$r^{2^d} = b^Z$%.
2b26f2d7 89 *
90 * Solving gives that %$d = \lg \log_r b^Z$%. If %$r = 2$%, this is maximum,
91 * so choosing %$d = \lg \lg b^Z = \lg (Z \lg b) = \lg Z + \lg \lg b$%.
92 *
93 * Expressing %$\lg Z$% as @CHAR_BIT * sizeof(size_t)@ yields an
94 * overestimate, since a @size_t@ representation may contain `holes'.
95 * Choosing to represent %$\lg \lg b$% by 10 is almost certainly sufficient
96 * for `some time to come'.
97 */
98
99#define DEPTH (CHAR_BIT * sizeof(size_t) + 10)
100
d3409d5e 101/*----- Main code ---------------------------------------------------------*/
102
103/* --- @mp_read@ --- *
104 *
105 * Arguments: @mp *m@ = destination multiprecision number
106 * @int radix@ = base to assume for data (or zero to guess)
107 * @const mptext_ops *ops@ = pointer to operations block
108 * @void *p@ = data for the operations block
109 *
110 * Returns: The integer read, or zero if it didn't work.
111 *
112 * Use: Reads an integer from some source. If the @radix@ is
113 * specified, the number is assumed to be given in that radix,
114 * with the letters `a' (either upper- or lower-case) upwards
115 * standing for digits greater than 9. Otherwise, base 10 is
116 * assumed unless the number starts with `0' (octal), `0x' (hex)
117 * or `nnn_' (base `nnn'). An arbitrary amount of whitespace
118 * before the number is ignored.
119 */
120
2b26f2d7 121/* --- About the algorithm --- *
122 *
123 * The algorithm here is rather aggressive. I maintain an array of
124 * successive squarings of the radix, and a stack of partial results, each
125 * with a counter attached indicating which radix square to multiply by.
126 * Once the item at the top of the stack reaches the same counter level as
127 * the next item down, they are combined together and the result is given a
128 * counter level one higher than either of the results.
129 *
130 * Gluing the results together at the end is slightly tricky. Pay attention
131 * to the code.
132 *
133 * This is more complicated because of the need to handle the slightly
134 * bizarre syntax.
135 */
136
d3409d5e 137mp *mp_read(mp *m, int radix, const mptext_ops *ops, void *p)
138{
2b26f2d7 139 int ch; /* Current char being considered */
140 unsigned f = 0; /* Flags about the current number */
141 int r; /* Radix to switch over to */
142 mpw rd; /* Radix as an @mp@ digit */
143 mp rr; /* The @mp@ for the radix */
144 unsigned nf = m ? m->f & MP_BURN : 0; /* New @mp@ flags */
145
146 /* --- Stacks --- */
147
148 mp *pow[DEPTH]; /* List of powers */
149 unsigned pows; /* Next index to fill */
150 struct { unsigned i; mp *m; } s[DEPTH]; /* Main stack */
151 unsigned sp; /* Current stack pointer */
152
153 /* --- Flags --- */
d3409d5e 154
3bc9cb53 155#define f_neg 1u
156#define f_ok 2u
d3409d5e 157
2b26f2d7 158 /* --- Initialize the stacks --- */
159
160 mp_build(&rr, &rd, &rd + 1);
161 pow[0] = &rr;
162 pows = 1;
163
164 sp = 0;
165
d3409d5e 166 /* --- Initialize the destination number --- */
167
2b26f2d7 168 if (m)
169 MP_DROP(m);
d3409d5e 170
171 /* --- Read an initial character --- */
172
173 ch = ops->get(p);
174 while (isspace(ch))
175 ch = ops->get(p);
176
177 /* --- Handle an initial sign --- */
178
9d3838a0 179 if (radix >= 0 && (ch == '-' || ch == '+')) {
180 if (ch == '-')
181 f |= f_neg;
182 do ch = ops->get(p); while isspace(ch);
d3409d5e 183 }
184
185 /* --- If the radix is zero, look for leading zeros --- */
186
2b26f2d7 187 if (radix > 0) {
188 assert(((void)"ascii radix must be <= 36", radix <= 36));
189 rd = radix;
190 r = -1;
191 } else if (radix < 0) {
192 rd = -radix;
9d3838a0 193 assert(((void)"binary radix must fit in a byte", rd < UCHAR_MAX));
d3409d5e 194 r = -1;
2b26f2d7 195 } else if (ch != '0') {
196 rd = 10;
d3409d5e 197 r = 0;
198 } else {
199 ch = ops->get(p);
200 if (ch == 'x') {
201 ch = ops->get(p);
2b26f2d7 202 rd = 16;
d3409d5e 203 } else {
2b26f2d7 204 rd = 8;
d3409d5e 205 f |= f_ok;
206 }
207 r = -1;
208 }
209
210 /* --- Time to start --- */
211
212 for (;; ch = ops->get(p)) {
213 int x;
214
7d45ed6c 215 if (ch < 0)
216 break;
217
d3409d5e 218 /* --- An underscore indicates a numbered base --- */
219
220 if (ch == '_' && r > 0 && r <= 36) {
2b26f2d7 221 unsigned i;
222
223 /* --- Clear out the stacks --- */
224
225 for (i = 1; i < pows; i++)
226 MP_DROP(pow[i]);
227 pows = 1;
228 for (i = 0; i < sp; i++)
229 MP_DROP(s[i].m);
230 sp = 0;
231
232 /* --- Restart the search --- */
233
234 rd = r;
d3409d5e 235 r = -1;
236 f &= ~f_ok;
237 continue;
238 }
239
240 /* --- Check that the character is a digit and in range --- */
241
2b26f2d7 242 if (radix < 0)
9d3838a0 243 x = ch % rd;
d3409d5e 244 else {
2b26f2d7 245 if (!isalnum(ch))
d3409d5e 246 break;
2b26f2d7 247 if (ch >= '0' && ch <= '9')
248 x = ch - '0';
249 else {
250 ch = tolower(ch);
251 if (ch >= 'a' && ch <= 'z') /* ASCII dependent! */
252 x = ch - 'a' + 10;
253 else
254 break;
255 }
d3409d5e 256 }
257
258 /* --- Sort out what to do with the character --- */
259
260 if (x >= 10 && r >= 0)
261 r = -1;
2b26f2d7 262 if (x >= rd)
d3409d5e 263 break;
264
265 if (r >= 0)
266 r = r * 10 + x;
267
268 /* --- Stick the character on the end of my integer --- */
269
2b26f2d7 270 assert(((void)"Number is too unimaginably huge", sp < DEPTH));
271 s[sp].m = m = mp_new(1, nf);
272 m->v[0] = x;
273 s[sp].i = 0;
274
275 /* --- Now grind through the stack --- */
276
277 while (sp > 0 && s[sp - 1].i == s[sp].i) {
278
279 /* --- Combine the top two items --- */
280
281 sp--;
282 m = s[sp].m;
283 m = mp_mul(m, m, pow[s[sp].i]);
284 m = mp_add(m, m, s[sp + 1].m);
285 s[sp].m = m;
286 MP_DROP(s[sp + 1].m);
287 s[sp].i++;
288
289 /* --- Make a new radix power if necessary --- */
290
291 if (s[sp].i >= pows) {
292 assert(((void)"Number is too unimaginably huge", pows < DEPTH));
293 pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
294 pows++;
295 }
296 }
d3409d5e 297 f |= f_ok;
2b26f2d7 298 sp++;
d3409d5e 299 }
300
301 ops->unget(ch, p);
302
2b26f2d7 303 /* --- If we're done, compute the rest of the number --- */
304
305 if (f & f_ok) {
306 if (!sp)
307 return (MP_ZERO);
308 else {
309 mp *z = MP_ONE;
310 sp--;
311
312 while (sp > 0) {
313
314 /* --- Combine the top two items --- */
315
316 sp--;
317 m = s[sp].m;
318 z = mp_mul(z, z, pow[s[sp + 1].i]);
319 m = mp_mul(m, m, z);
320 m = mp_add(m, m, s[sp + 1].m);
321 s[sp].m = m;
322 MP_DROP(s[sp + 1].m);
323
324 /* --- Make a new radix power if necessary --- */
325
326 if (s[sp].i >= pows) {
327 assert(((void)"Number is too unimaginably huge", pows < DEPTH));
328 pow[pows] = mp_sqr(MP_NEW, pow[pows - 1]);
329 pows++;
330 }
331 }
332 MP_DROP(z);
333 m = s[0].m;
334 }
335 } else {
336 unsigned i;
337 for (i = 0; i < sp; i++)
338 MP_DROP(s[i].m);
339 }
340
341 /* --- Clear the radix power list --- */
342
343 {
344 unsigned i;
345 for (i = 1; i < pows; i++)
346 MP_DROP(pow[i]);
347 }
348
d3409d5e 349 /* --- Bail out if the number was bad --- */
350
2b26f2d7 351 if (!(f & f_ok))
d3409d5e 352 return (0);
d3409d5e 353
354 /* --- Set the sign and return --- */
355
d3409d5e 356 if (f & f_neg)
357 m->f |= MP_NEG;
358 return (m);
3bc9cb53 359
360#undef f_neg
361#undef f_ok
d3409d5e 362}
363
364/* --- @mp_write@ --- *
365 *
366 * Arguments: @mp *m@ = pointer to a multi-precision integer
367 * @int radix@ = radix to use when writing the number out
368 * @const mptext_ops *ops@ = pointer to an operations block
369 * @void *p@ = data for the operations block
370 *
371 * Returns: Zero if it worked, nonzero otherwise.
372 *
373 * Use: Writes a large integer in textual form.
374 */
375
e360a4f2 376/* --- Simple case --- *
377 *
3bc9cb53 378 * Use a fixed-sized buffer and single-precision arithmetic to pick off
379 * low-order digits. Put each digit in a buffer, working backwards from the
380 * end. If the buffer becomes full, recurse to get another one. Ensure that
381 * there are at least @z@ digits by writing leading zeroes if there aren't
382 * enough real digits.
e360a4f2 383 */
384
3bc9cb53 385static int simple(mpw n, int radix, unsigned z,
e360a4f2 386 const mptext_ops *ops, void *p)
387{
388 int rc = 0;
389 char buf[64];
390 unsigned i = sizeof(buf);
2b26f2d7 391 int rd = radix > 0 ? radix : -radix;
e360a4f2 392
393 do {
394 int ch;
395 mpw x;
396
3bc9cb53 397 x = n % rd;
398 n /= rd;
2b26f2d7 399 if (radix < 0)
400 ch = x;
3bc9cb53 401 else if (x < 10)
402 ch = '0' + x;
403 else
404 ch = 'a' + x - 10;
e360a4f2 405 buf[--i] = ch;
406 if (z)
407 z--;
3bc9cb53 408 } while (i && n);
e360a4f2 409
3bc9cb53 410 if (n)
411 rc = simple(n, radix, z, ops, p);
e360a4f2 412 else {
413 static const char zero[32] = "00000000000000000000000000000000";
414 while (!rc && z >= sizeof(zero)) {
415 rc = ops->put(zero, sizeof(zero), p);
416 z -= sizeof(zero);
417 }
418 if (!rc && z)
419 rc = ops->put(zero, z, p);
420 }
421 if (!rc)
3bc9cb53 422 rc = ops->put(buf + i, sizeof(buf) - i, p);
423 BURN(buf);
e360a4f2 424 return (rc);
425}
426
427/* --- Complicated case --- *
428 *
429 * If the number is small, fall back to the simple case above. Otherwise
430 * divide and take remainder by current large power of the radix, and emit
431 * each separately. Don't emit a zero quotient. Be very careful about
432 * leading zeroes on the remainder part, because they're deeply significant.
433 */
434
435static int complicated(mp *m, int radix, mp **pr, unsigned i, unsigned z,
436 const mptext_ops *ops, void *p)
437{
438 int rc = 0;
439 mp *q = MP_NEW;
440 unsigned d = 1 << i;
441
3bc9cb53 442 if (MP_LEN(m) < 2)
443 return (simple(MP_LEN(m) ? m->v[0] : 0, radix, z, ops, p));
e360a4f2 444
3bc9cb53 445 assert(i);
e360a4f2 446 mp_div(&q, &m, m, pr[i]);
447 if (!MP_LEN(q))
448 d = z;
449 else {
450 if (z > d)
451 z -= d;
452 else
453 z = 0;
454 rc = complicated(q, radix, pr, i - 1, z, ops, p);
455 }
456 if (!rc)
457 rc = complicated(m, radix, pr, i - 1, d, ops, p);
458 mp_drop(q);
459 return (rc);
460}
461
462/* --- Main driver code --- */
463
d3409d5e 464int mp_write(mp *m, int radix, const mptext_ops *ops, void *p)
465{
e360a4f2 466 int rc;
d3409d5e 467
468 /* --- Set various things up --- */
469
470 m = MP_COPY(m);
e360a4f2 471 MP_SPLIT(m);
d3409d5e 472
2b26f2d7 473 /* --- Check the radix for sensibleness --- */
474
475 if (radix > 0)
476 assert(((void)"ascii radix must be <= 36", radix <= 36));
477 else if (radix < 0)
478 assert(((void)"binary radix must fit in a byte", -radix < UCHAR_MAX));
479 else
480 assert(((void)"radix can't be zero in mp_write", 0));
481
d3409d5e 482 /* --- If the number is negative, sort that out --- */
483
484 if (m->f & MP_NEG) {
485 if (ops->put("-", 1, p))
486 return (EOF);
2b26f2d7 487 m->f &= ~MP_NEG;
d3409d5e 488 }
489
e360a4f2 490 /* --- If the number is small, do it the easy way --- */
491
3bc9cb53 492 if (MP_LEN(m) < 2)
493 rc = simple(MP_LEN(m) ? m->v[0] : 0, radix, 0, ops, p);
e360a4f2 494
495 /* --- Use a clever algorithm --- *
496 *
497 * Square the radix repeatedly, remembering old results, until I get
498 * something more than half the size of the number @m@. Use this to divide
499 * the number: the quotient and remainder will be approximately the same
500 * size, and I'll have split them on a digit boundary, so I can just emit
501 * the quotient and remainder recursively, in order.
e360a4f2 502 */
503
504 else {
2b26f2d7 505 mp *pr[DEPTH];
3bc9cb53 506 size_t target = (MP_LEN(m) + 1) / 2;
e360a4f2 507 unsigned i = 0;
2b26f2d7 508 mp *z = mp_new(1, 0);
e360a4f2 509
510 /* --- Set up the exponent table --- */
511
2b26f2d7 512 z->v[0] = (radix > 0 ? radix : -radix);
e360a4f2 513 z->f = 0;
514 for (;;) {
2b26f2d7 515 assert(((void)"Number is too unimaginably huge", i < DEPTH));
e360a4f2 516 pr[i++] = z;
517 if (MP_LEN(z) > target)
518 break;
519 z = mp_sqr(MP_NEW, z);
520 }
d3409d5e 521
e360a4f2 522 /* --- Write out the answer --- */
d3409d5e 523
e360a4f2 524 rc = complicated(m, radix, pr, i - 1, 0, ops, p);
d3409d5e 525
e360a4f2 526 /* --- Tidy away the array --- */
d3409d5e 527
e360a4f2 528 while (i > 0)
529 mp_drop(pr[--i]);
d3409d5e 530 }
e360a4f2 531
532 /* --- Tidying up code --- */
533
534 MP_DROP(m);
535 return (rc);
d3409d5e 536}
537
538/*----- Test rig ----------------------------------------------------------*/
539
540#ifdef TEST_RIG
541
542#include <mLib/testrig.h>
543
544static int verify(dstr *v)
545{
546 int ok = 1;
547 int ib = *(int *)v[0].buf, ob = *(int *)v[2].buf;
548 dstr d = DSTR_INIT;
549 mp *m = mp_readdstr(MP_NEW, &v[1], 0, ib);
550 if (m) {
551 if (!ob) {
552 fprintf(stderr, "*** unexpected successful parse\n"
2b26f2d7 553 "*** input [%i] = ", ib);
554 if (ib < 0)
555 type_hex.dump(&v[1], stderr);
556 else
557 fputs(v[1].buf, stderr);
d3409d5e 558 mp_writedstr(m, &d, 10);
2b26f2d7 559 fprintf(stderr, "\n*** (value = %s)\n", d.buf);
d3409d5e 560 ok = 0;
561 } else {
562 mp_writedstr(m, &d, ob);
563 if (d.len != v[3].len || memcmp(d.buf, v[3].buf, d.len) != 0) {
564 fprintf(stderr, "*** failed read or write\n"
2b26f2d7 565 "*** input [%i] = ", ib);
566 if (ib < 0)
567 type_hex.dump(&v[1], stderr);
568 else
569 fputs(v[1].buf, stderr);
570 fprintf(stderr, "\n*** output [%i] = ", ob);
571 if (ob < 0)
572 type_hex.dump(&d, stderr);
573 else
574 fputs(d.buf, stderr);
575 fprintf(stderr, "\n*** expected [%i] = ", ob);
576 if (ob < 0)
577 type_hex.dump(&v[3], stderr);
578 else
579 fputs(v[3].buf, stderr);
580 fputc('\n', stderr);
d3409d5e 581 ok = 0;
582 }
583 }
584 mp_drop(m);
585 } else {
586 if (ob) {
587 fprintf(stderr, "*** unexpected parse failure\n"
2b26f2d7 588 "*** input [%i] = ", ib);
589 if (ib < 0)
590 type_hex.dump(&v[1], stderr);
591 else
592 fputs(v[1].buf, stderr);
593 fprintf(stderr, "\n*** expected [%i] = ", ob);
594 if (ob < 0)
595 type_hex.dump(&v[3], stderr);
596 else
597 fputs(v[3].buf, stderr);
598 fputc('\n', stderr);
d3409d5e 599 ok = 0;
600 }
601 }
602
603 dstr_destroy(&d);
9c3df6c0 604 assert(mparena_count(MPARENA_GLOBAL) == 0);
d3409d5e 605 return (ok);
606}
607
608static test_chunk tests[] = {
2b26f2d7 609 { "mptext-ascii", verify,
d3409d5e 610 { &type_int, &type_string, &type_int, &type_string, 0 } },
2b26f2d7 611 { "mptext-bin-in", verify,
612 { &type_int, &type_hex, &type_int, &type_string, 0 } },
613 { "mptext-bin-out", verify,
614 { &type_int, &type_string, &type_int, &type_hex, 0 } },
d3409d5e 615 { 0, 0, { 0 } }
616};
617
618int main(int argc, char *argv[])
619{
620 sub_init();
621 test_run(argc, argv, tests, SRCDIR "/tests/mptext");
622 return (0);
623}
624
625#endif
626
627/*----- That's all, folks -------------------------------------------------*/