d03ab969 |
1 | /* -*-c-*- |
2 | * |
2bd53494 |
3 | * $Id: mpx.c,v 1.15 2002/10/20 01:12:31 mdw Exp $ |
d03ab969 |
4 | * |
5 | * Low-level multiprecision arithmetic |
6 | * |
7 | * (c) 1999 Straylight/Edgeware |
8 | */ |
9 | |
10 | /*----- Licensing notice --------------------------------------------------* |
11 | * |
12 | * This file is part of Catacomb. |
13 | * |
14 | * Catacomb is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU Library General Public License as |
16 | * published by the Free Software Foundation; either version 2 of the |
17 | * License, or (at your option) any later version. |
18 | * |
19 | * Catacomb is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU Library General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU Library General Public |
25 | * License along with Catacomb; if not, write to the Free |
26 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
27 | * MA 02111-1307, USA. |
28 | */ |
29 | |
30 | /*----- Revision history --------------------------------------------------* |
31 | * |
32 | * $Log: mpx.c,v $ |
2bd53494 |
33 | * Revision 1.15 2002/10/20 01:12:31 mdw |
34 | * Two's complement I/O fixes. |
35 | * |
4f29a732 |
36 | * Revision 1.14 2002/10/19 18:55:08 mdw |
37 | * Fix overflows in shift primitives. |
38 | * |
75263f25 |
39 | * Revision 1.13 2002/10/19 17:56:50 mdw |
40 | * Fix bit operations. Test them (a bit) better. |
41 | * |
f09e814a |
42 | * Revision 1.12 2002/10/06 22:52:50 mdw |
43 | * Pile of changes for supporting two's complement properly. |
44 | * |
0f32e0f8 |
45 | * Revision 1.11 2001/04/03 19:36:05 mdw |
46 | * Add some simple bitwise operations so that Perl can use them. |
47 | * |
1a05a8ef |
48 | * Revision 1.10 2000/10/08 12:06:12 mdw |
49 | * Provide @mpx_ueq@ for rapidly testing equality of two integers. |
50 | * |
99b30c23 |
51 | * Revision 1.9 2000/06/26 07:52:50 mdw |
52 | * Portability fix for the bug fix. |
53 | * |
ce76ff16 |
54 | * Revision 1.8 2000/06/25 12:59:02 mdw |
55 | * (mpx_udiv): Fix bug in quotient digit estimation. |
56 | * |
698bd937 |
57 | * Revision 1.7 1999/12/22 15:49:07 mdw |
58 | * New function for division by a small integer. |
59 | * |
42684bdb |
60 | * Revision 1.6 1999/11/20 22:43:44 mdw |
61 | * Integrate testing for MPX routines. |
62 | * |
dd517851 |
63 | * Revision 1.5 1999/11/20 22:23:27 mdw |
64 | * Add function versions of some low-level macros with wider use. |
65 | * |
f45a00c6 |
66 | * Revision 1.4 1999/11/17 18:04:09 mdw |
67 | * Add two's-complement functionality. Improve mpx_udiv a little by |
68 | * performing the multiplication of the divisor by q with the subtraction |
69 | * from r. |
70 | * |
501da53c |
71 | * Revision 1.3 1999/11/13 01:57:31 mdw |
72 | * Remove stray debugging code. |
73 | * |
c8a2f9ef |
74 | * Revision 1.2 1999/11/13 01:50:59 mdw |
75 | * Multiprecision routines finished and tested. |
76 | * |
d03ab969 |
77 | * Revision 1.1 1999/09/03 08:41:12 mdw |
78 | * Initial import. |
79 | * |
80 | */ |
81 | |
82 | /*----- Header files ------------------------------------------------------*/ |
83 | |
c8a2f9ef |
84 | #include <assert.h> |
d03ab969 |
85 | #include <stdio.h> |
86 | #include <stdlib.h> |
87 | #include <string.h> |
88 | |
89 | #include <mLib/bits.h> |
90 | |
91 | #include "mptypes.h" |
92 | #include "mpx.h" |
75263f25 |
93 | #include "bitops.h" |
d03ab969 |
94 | |
95 | /*----- Loading and storing -----------------------------------------------*/ |
96 | |
97 | /* --- @mpx_storel@ --- * |
98 | * |
99 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
c8a2f9ef |
100 | * @void *pp@ = pointer to octet array |
d03ab969 |
101 | * @size_t sz@ = size of octet array |
102 | * |
103 | * Returns: --- |
104 | * |
105 | * Use: Stores an MP in an octet array, least significant octet |
106 | * first. High-end octets are silently discarded if there |
107 | * isn't enough space for them. |
108 | */ |
109 | |
c8a2f9ef |
110 | void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 |
111 | { |
112 | mpw n, w = 0; |
c8a2f9ef |
113 | octet *p = pp, *q = p + sz; |
d03ab969 |
114 | unsigned bits = 0; |
115 | |
116 | while (p < q) { |
117 | if (bits < 8) { |
118 | if (v >= vl) { |
119 | *p++ = U8(w); |
120 | break; |
121 | } |
122 | n = *v++; |
123 | *p++ = U8(w | n << bits); |
124 | w = n >> (8 - bits); |
125 | bits += MPW_BITS - 8; |
126 | } else { |
127 | *p++ = U8(w); |
128 | w >>= 8; |
129 | bits -= 8; |
130 | } |
131 | } |
132 | memset(p, 0, q - p); |
133 | } |
134 | |
135 | /* --- @mpx_loadl@ --- * |
136 | * |
137 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
c8a2f9ef |
138 | * @const void *pp@ = pointer to octet array |
d03ab969 |
139 | * @size_t sz@ = size of octet array |
140 | * |
141 | * Returns: --- |
142 | * |
143 | * Use: Loads an MP in an octet array, least significant octet |
144 | * first. High-end octets are ignored if there isn't enough |
145 | * space for them. |
146 | */ |
147 | |
c8a2f9ef |
148 | void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 |
149 | { |
150 | unsigned n; |
c8a2f9ef |
151 | mpw w = 0; |
152 | const octet *p = pp, *q = p + sz; |
d03ab969 |
153 | unsigned bits = 0; |
154 | |
155 | if (v >= vl) |
156 | return; |
157 | while (p < q) { |
158 | n = U8(*p++); |
159 | w |= n << bits; |
160 | bits += 8; |
161 | if (bits >= MPW_BITS) { |
162 | *v++ = MPW(w); |
163 | w = n >> (MPW_BITS - bits + 8); |
164 | bits -= MPW_BITS; |
165 | if (v >= vl) |
166 | return; |
167 | } |
168 | } |
169 | *v++ = w; |
170 | MPX_ZERO(v, vl); |
171 | } |
172 | |
173 | /* --- @mpx_storeb@ --- * |
174 | * |
175 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
c8a2f9ef |
176 | * @void *pp@ = pointer to octet array |
d03ab969 |
177 | * @size_t sz@ = size of octet array |
178 | * |
179 | * Returns: --- |
180 | * |
181 | * Use: Stores an MP in an octet array, most significant octet |
182 | * first. High-end octets are silently discarded if there |
183 | * isn't enough space for them. |
184 | */ |
185 | |
c8a2f9ef |
186 | void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 |
187 | { |
188 | mpw n, w = 0; |
c8a2f9ef |
189 | octet *p = pp, *q = p + sz; |
d03ab969 |
190 | unsigned bits = 0; |
191 | |
192 | while (q > p) { |
193 | if (bits < 8) { |
194 | if (v >= vl) { |
195 | *--q = U8(w); |
196 | break; |
197 | } |
198 | n = *v++; |
199 | *--q = U8(w | n << bits); |
200 | w = n >> (8 - bits); |
201 | bits += MPW_BITS - 8; |
202 | } else { |
203 | *--q = U8(w); |
204 | w >>= 8; |
205 | bits -= 8; |
206 | } |
207 | } |
208 | memset(p, 0, q - p); |
209 | } |
210 | |
211 | /* --- @mpx_loadb@ --- * |
212 | * |
213 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
c8a2f9ef |
214 | * @const void *pp@ = pointer to octet array |
d03ab969 |
215 | * @size_t sz@ = size of octet array |
216 | * |
217 | * Returns: --- |
218 | * |
219 | * Use: Loads an MP in an octet array, most significant octet |
220 | * first. High-end octets are ignored if there isn't enough |
221 | * space for them. |
222 | */ |
223 | |
c8a2f9ef |
224 | void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 |
225 | { |
226 | unsigned n; |
c8a2f9ef |
227 | mpw w = 0; |
228 | const octet *p = pp, *q = p + sz; |
d03ab969 |
229 | unsigned bits = 0; |
230 | |
231 | if (v >= vl) |
232 | return; |
233 | while (q > p) { |
234 | n = U8(*--q); |
235 | w |= n << bits; |
236 | bits += 8; |
237 | if (bits >= MPW_BITS) { |
238 | *v++ = MPW(w); |
239 | w = n >> (MPW_BITS - bits + 8); |
240 | bits -= MPW_BITS; |
241 | if (v >= vl) |
242 | return; |
243 | } |
244 | } |
245 | *v++ = w; |
246 | MPX_ZERO(v, vl); |
247 | } |
248 | |
f09e814a |
249 | /* --- @mpx_storel2cn@ --- * |
250 | * |
251 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
252 | * @void *pp@ = pointer to octet array |
253 | * @size_t sz@ = size of octet array |
254 | * |
255 | * Returns: --- |
256 | * |
257 | * Use: Stores a negative MP in an octet array, least significant |
258 | * octet first, as two's complement. High-end octets are |
259 | * silently discarded if there isn't enough space for them. |
260 | * This obviously makes the output bad. |
261 | */ |
262 | |
263 | void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) |
264 | { |
265 | unsigned c = 1; |
266 | unsigned b = 0; |
267 | mpw n, w = 0; |
268 | octet *p = pp, *q = p + sz; |
269 | unsigned bits = 0; |
270 | |
271 | while (p < q) { |
272 | if (bits < 8) { |
273 | if (v >= vl) { |
274 | b = w; |
275 | break; |
276 | } |
277 | n = *v++; |
278 | b = w | n << bits; |
279 | w = n >> (8 - bits); |
280 | bits += MPW_BITS - 8; |
281 | } else { |
282 | b = w; |
283 | w >>= 8; |
284 | bits -= 8; |
285 | } |
286 | b = U8(~b + c); |
2bd53494 |
287 | c = c && !b; |
f09e814a |
288 | *p++ = b; |
289 | } |
290 | while (p < q) { |
291 | b = U8(~b + c); |
2bd53494 |
292 | c = c && !b; |
f09e814a |
293 | *p++ = b; |
294 | b = 0; |
295 | } |
296 | } |
297 | |
298 | /* --- @mpx_loadl2cn@ --- * |
299 | * |
300 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
301 | * @const void *pp@ = pointer to octet array |
302 | * @size_t sz@ = size of octet array |
303 | * |
304 | * Returns: --- |
305 | * |
306 | * Use: Loads a negative MP in an octet array, least significant |
307 | * octet first, as two's complement. High-end octets are |
308 | * ignored if there isn't enough space for them. This probably |
309 | * means you made the wrong choice coming here. |
310 | */ |
311 | |
312 | void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) |
313 | { |
314 | unsigned n; |
315 | unsigned c = 1; |
316 | mpw w = 0; |
317 | const octet *p = pp, *q = p + sz; |
318 | unsigned bits = 0; |
319 | |
320 | if (v >= vl) |
321 | return; |
322 | while (p < q) { |
323 | n = U8(~(*p++) + c); |
2bd53494 |
324 | c = c && !n; |
f09e814a |
325 | w |= n << bits; |
326 | bits += 8; |
327 | if (bits >= MPW_BITS) { |
328 | *v++ = MPW(w); |
329 | w = n >> (MPW_BITS - bits + 8); |
330 | bits -= MPW_BITS; |
331 | if (v >= vl) |
332 | return; |
333 | } |
334 | } |
335 | *v++ = w; |
336 | MPX_ZERO(v, vl); |
337 | } |
338 | |
339 | /* --- @mpx_storeb2cn@ --- * |
340 | * |
341 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
342 | * @void *pp@ = pointer to octet array |
343 | * @size_t sz@ = size of octet array |
344 | * |
345 | * Returns: --- |
346 | * |
347 | * Use: Stores a negative MP in an octet array, most significant |
348 | * octet first, as two's complement. High-end octets are |
349 | * silently discarded if there isn't enough space for them, |
350 | * which probably isn't what you meant. |
351 | */ |
352 | |
353 | void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) |
354 | { |
355 | mpw n, w = 0; |
356 | unsigned b = 0; |
357 | unsigned c = 1; |
358 | octet *p = pp, *q = p + sz; |
359 | unsigned bits = 0; |
360 | |
361 | while (q > p) { |
362 | if (bits < 8) { |
363 | if (v >= vl) { |
364 | b = w; |
365 | break; |
366 | } |
367 | n = *v++; |
368 | b = w | n << bits; |
369 | w = n >> (8 - bits); |
370 | bits += MPW_BITS - 8; |
371 | } else { |
372 | b = w; |
373 | w >>= 8; |
374 | bits -= 8; |
375 | } |
376 | b = U8(~b + c); |
2bd53494 |
377 | c = c && !b; |
f09e814a |
378 | *--q = b; |
379 | } |
380 | while (q > p) { |
381 | b = ~b + c; |
2bd53494 |
382 | c = c && !(b & 0xff); |
f09e814a |
383 | *--q = b; |
384 | b = 0; |
385 | } |
386 | } |
387 | |
388 | /* --- @mpx_loadb2cn@ --- * |
389 | * |
390 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
391 | * @const void *pp@ = pointer to octet array |
392 | * @size_t sz@ = size of octet array |
393 | * |
394 | * Returns: --- |
395 | * |
396 | * Use: Loads a negative MP in an octet array, most significant octet |
397 | * first as two's complement. High-end octets are ignored if |
398 | * there isn't enough space for them. This probably means you |
399 | * chose this function wrongly. |
400 | */ |
401 | |
402 | void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz) |
403 | { |
404 | unsigned n; |
405 | unsigned c = 1; |
406 | mpw w = 0; |
407 | const octet *p = pp, *q = p + sz; |
408 | unsigned bits = 0; |
409 | |
410 | if (v >= vl) |
411 | return; |
412 | while (q > p) { |
413 | n = U8(~(*--q) + c); |
2bd53494 |
414 | c = c && !n; |
f09e814a |
415 | w |= n << bits; |
416 | bits += 8; |
417 | if (bits >= MPW_BITS) { |
418 | *v++ = MPW(w); |
419 | w = n >> (MPW_BITS - bits + 8); |
420 | bits -= MPW_BITS; |
421 | if (v >= vl) |
422 | return; |
423 | } |
424 | } |
425 | *v++ = w; |
426 | MPX_ZERO(v, vl); |
427 | } |
428 | |
d03ab969 |
429 | /*----- Logical shifting --------------------------------------------------*/ |
430 | |
431 | /* --- @mpx_lsl@ --- * |
432 | * |
433 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
434 | * @const mpw *av, *avl@ = source vector base and limit |
435 | * @size_t n@ = number of bit positions to shift by |
436 | * |
437 | * Returns: --- |
438 | * |
439 | * Use: Performs a logical shift left operation on an integer. |
440 | */ |
441 | |
442 | void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) |
443 | { |
444 | size_t nw; |
445 | unsigned nb; |
446 | |
447 | /* --- Trivial special case --- */ |
448 | |
449 | if (n == 0) |
450 | MPX_COPY(dv, dvl, av, avl); |
451 | |
452 | /* --- Single bit shifting --- */ |
453 | |
454 | else if (n == 1) { |
455 | mpw w = 0; |
456 | while (av < avl) { |
457 | mpw t; |
458 | if (dv >= dvl) |
459 | goto done; |
460 | t = *av++; |
461 | *dv++ = MPW((t << 1) | w); |
462 | w = t >> (MPW_BITS - 1); |
463 | } |
464 | if (dv >= dvl) |
465 | goto done; |
466 | *dv++ = MPW(w); |
467 | MPX_ZERO(dv, dvl); |
c8a2f9ef |
468 | goto done; |
d03ab969 |
469 | } |
470 | |
471 | /* --- Break out word and bit shifts for more sophisticated work --- */ |
472 | |
473 | nw = n / MPW_BITS; |
474 | nb = n % MPW_BITS; |
475 | |
476 | /* --- Handle a shift by a multiple of the word size --- */ |
477 | |
478 | if (nb == 0) { |
4f29a732 |
479 | if (nw >= dvl - dv) |
480 | MPX_ZERO(dv, dvl); |
481 | else { |
482 | MPX_COPY(dv + nw, dvl, av, avl); |
483 | memset(dv, 0, MPWS(nw)); |
484 | } |
d03ab969 |
485 | } |
486 | |
c8a2f9ef |
487 | /* --- And finally the difficult case --- * |
488 | * |
489 | * This is a little convoluted, because I have to start from the end and |
490 | * work backwards to avoid overwriting the source, if they're both the same |
491 | * block of memory. |
492 | */ |
d03ab969 |
493 | |
494 | else { |
495 | mpw w; |
496 | size_t nr = MPW_BITS - nb; |
c8a2f9ef |
497 | size_t dvn = dvl - dv; |
498 | size_t avn = avl - av; |
d03ab969 |
499 | |
c8a2f9ef |
500 | if (dvn <= nw) { |
d03ab969 |
501 | MPX_ZERO(dv, dvl); |
502 | goto done; |
503 | } |
d03ab969 |
504 | |
c8a2f9ef |
505 | if (dvn > avn + nw) { |
506 | size_t off = avn + nw + 1; |
507 | MPX_ZERO(dv + off, dvl); |
508 | dvl = dv + off; |
509 | w = 0; |
510 | } else { |
511 | avl = av + dvn - nw; |
512 | w = *--avl << nb; |
d03ab969 |
513 | } |
514 | |
c8a2f9ef |
515 | while (avl > av) { |
516 | mpw t = *--avl; |
517 | *--dvl = (t >> nr) | w; |
518 | w = t << nb; |
d03ab969 |
519 | } |
c8a2f9ef |
520 | |
521 | *--dvl = w; |
522 | MPX_ZERO(dv, dvl); |
d03ab969 |
523 | } |
524 | |
525 | done:; |
526 | } |
527 | |
528 | /* --- @mpx_lsr@ --- * |
529 | * |
530 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
531 | * @const mpw *av, *avl@ = source vector base and limit |
532 | * @size_t n@ = number of bit positions to shift by |
533 | * |
534 | * Returns: --- |
535 | * |
536 | * Use: Performs a logical shift right operation on an integer. |
537 | */ |
538 | |
539 | void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) |
540 | { |
541 | size_t nw; |
542 | unsigned nb; |
543 | |
544 | /* --- Trivial special case --- */ |
545 | |
546 | if (n == 0) |
547 | MPX_COPY(dv, dvl, av, avl); |
548 | |
549 | /* --- Single bit shifting --- */ |
550 | |
551 | else if (n == 1) { |
552 | mpw w = *av++ >> 1; |
553 | while (av < avl) { |
554 | mpw t; |
555 | if (dv >= dvl) |
556 | goto done; |
557 | t = *av++; |
558 | *dv++ = MPW((t << (MPW_BITS - 1)) | w); |
559 | w = t >> 1; |
560 | } |
561 | if (dv >= dvl) |
562 | goto done; |
563 | *dv++ = MPW(w); |
564 | MPX_ZERO(dv, dvl); |
c8a2f9ef |
565 | goto done; |
d03ab969 |
566 | } |
567 | |
568 | /* --- Break out word and bit shifts for more sophisticated work --- */ |
569 | |
570 | nw = n / MPW_BITS; |
571 | nb = n % MPW_BITS; |
572 | |
573 | /* --- Handle a shift by a multiple of the word size --- */ |
574 | |
4f29a732 |
575 | if (nb == 0) { |
576 | if (nw >= avl - av) |
577 | MPX_ZERO(dv, dvl); |
578 | else |
579 | MPX_COPY(dv, dvl, av + nw, avl); |
580 | } |
d03ab969 |
581 | |
582 | /* --- And finally the difficult case --- */ |
583 | |
584 | else { |
585 | mpw w; |
586 | size_t nr = MPW_BITS - nb; |
587 | |
588 | av += nw; |
4f29a732 |
589 | w = av < avl ? *av++ : 0; |
d03ab969 |
590 | while (av < avl) { |
591 | mpw t; |
592 | if (dv >= dvl) |
593 | goto done; |
594 | t = *av++; |
595 | *dv++ = MPW((w >> nb) | (t << nr)); |
596 | w = t; |
597 | } |
598 | if (dv < dvl) { |
599 | *dv++ = MPW(w >> nb); |
600 | MPX_ZERO(dv, dvl); |
601 | } |
602 | } |
603 | |
604 | done:; |
605 | } |
606 | |
0f32e0f8 |
607 | /*----- Bitwise operations ------------------------------------------------*/ |
608 | |
f09e814a |
609 | /* --- @mpx_bitop@ --- * |
0f32e0f8 |
610 | * |
611 | * Arguments: @mpw *dv, *dvl@ = destination vector |
612 | * @const mpw *av, *avl@ = first source vector |
613 | * @const mpw *bv, *bvl@ = second source vector |
614 | * |
615 | * Returns: --- |
616 | * |
f09e814a |
617 | * Use; Provides the dyadic boolean functions. |
0f32e0f8 |
618 | */ |
619 | |
f09e814a |
620 | #define MPX_BITBINOP(string) \ |
0f32e0f8 |
621 | \ |
f09e814a |
622 | void mpx_bit##string(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, \ |
623 | const mpw *bv, const mpw *bvl) \ |
0f32e0f8 |
624 | { \ |
625 | MPX_SHRINK(av, avl); \ |
626 | MPX_SHRINK(bv, bvl); \ |
627 | \ |
628 | while (dv < dvl) { \ |
629 | mpw a, b; \ |
630 | a = (av < avl) ? *av++ : 0; \ |
631 | b = (bv < bvl) ? *bv++ : 0; \ |
75263f25 |
632 | *dv++ = B##string(a, b); \ |
0f32e0f8 |
633 | } \ |
634 | } |
635 | |
f09e814a |
636 | MPX_DOBIN(MPX_BITBINOP) |
0f32e0f8 |
637 | |
638 | void mpx_not(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) |
639 | { |
640 | MPX_SHRINK(av, avl); |
641 | |
642 | while (dv < dvl) { |
643 | mpw a; |
644 | a = (av < avl) ? *av++ : 0; |
645 | *dv++ = ~a; |
646 | } |
647 | } |
648 | |
d03ab969 |
649 | /*----- Unsigned arithmetic -----------------------------------------------*/ |
650 | |
f45a00c6 |
651 | /* --- @mpx_2c@ --- * |
652 | * |
653 | * Arguments: @mpw *dv, *dvl@ = destination vector |
654 | * @const mpw *v, *vl@ = source vector |
655 | * |
656 | * Returns: --- |
657 | * |
658 | * Use: Calculates the two's complement of @v@. |
659 | */ |
660 | |
661 | void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl) |
662 | { |
663 | mpw c = 0; |
664 | while (dv < dvl && v < vl) |
665 | *dv++ = c = MPW(~*v++); |
666 | if (dv < dvl) { |
667 | if (c > MPW_MAX / 2) |
668 | c = MPW(~0); |
669 | while (dv < dvl) |
670 | *dv++ = c; |
671 | } |
672 | MPX_UADDN(dv, dvl, 1); |
673 | } |
674 | |
1a05a8ef |
675 | /* --- @mpx_ueq@ --- * |
676 | * |
677 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit |
678 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
679 | * |
680 | * Returns: Nonzero if the two vectors are equal. |
681 | * |
682 | * Use: Performs an unsigned integer test for equality. |
683 | */ |
684 | |
685 | int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) |
686 | { |
687 | MPX_SHRINK(av, avl); |
688 | MPX_SHRINK(bv, bvl); |
689 | if (avl - av != bvl - bv) |
690 | return (0); |
691 | while (av < avl) { |
692 | if (*av++ != *bv++) |
693 | return (0); |
694 | } |
695 | return (1); |
696 | } |
697 | |
d03ab969 |
698 | /* --- @mpx_ucmp@ --- * |
699 | * |
700 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit |
701 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
702 | * |
703 | * Returns: Less than, equal to, or greater than zero depending on |
704 | * whether @a@ is less than, equal to or greater than @b@, |
705 | * respectively. |
706 | * |
707 | * Use: Performs an unsigned integer comparison. |
708 | */ |
709 | |
710 | int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) |
711 | { |
712 | MPX_SHRINK(av, avl); |
713 | MPX_SHRINK(bv, bvl); |
714 | |
715 | if (avl - av > bvl - bv) |
716 | return (+1); |
717 | else if (avl - av < bvl - bv) |
718 | return (-1); |
719 | else while (avl > av) { |
720 | mpw a = *--avl, b = *--bvl; |
721 | if (a > b) |
722 | return (+1); |
723 | else if (a < b) |
724 | return (-1); |
725 | } |
726 | return (0); |
727 | } |
1a05a8ef |
728 | |
d03ab969 |
729 | /* --- @mpx_uadd@ --- * |
730 | * |
731 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
732 | * @const mpw *av, *avl@ = first addend vector base and limit |
733 | * @const mpw *bv, *bvl@ = second addend vector base and limit |
734 | * |
735 | * Returns: --- |
736 | * |
737 | * Use: Performs unsigned integer addition. If the result overflows |
738 | * the destination vector, high-order bits are discarded. This |
739 | * means that two's complement addition happens more or less for |
740 | * free, although that's more a side-effect than anything else. |
741 | * The result vector may be equal to either or both source |
742 | * vectors, but may not otherwise overlap them. |
743 | */ |
744 | |
745 | void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
746 | const mpw *bv, const mpw *bvl) |
747 | { |
748 | mpw c = 0; |
749 | |
750 | while (av < avl || bv < bvl) { |
751 | mpw a, b; |
752 | mpd x; |
753 | if (dv >= dvl) |
754 | return; |
755 | a = (av < avl) ? *av++ : 0; |
756 | b = (bv < bvl) ? *bv++ : 0; |
757 | x = (mpd)a + (mpd)b + c; |
758 | *dv++ = MPW(x); |
759 | c = x >> MPW_BITS; |
760 | } |
761 | if (dv < dvl) { |
762 | *dv++ = c; |
763 | MPX_ZERO(dv, dvl); |
764 | } |
765 | } |
766 | |
dd517851 |
767 | /* --- @mpx_uaddn@ --- * |
768 | * |
769 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit |
770 | * @mpw n@ = other addend |
771 | * |
772 | * Returns: --- |
773 | * |
774 | * Use: Adds a small integer to a multiprecision number. |
775 | */ |
776 | |
777 | void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); } |
778 | |
d03ab969 |
779 | /* --- @mpx_usub@ --- * |
780 | * |
781 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
782 | * @const mpw *av, *avl@ = first argument vector base and limit |
783 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
784 | * |
785 | * Returns: --- |
786 | * |
787 | * Use: Performs unsigned integer subtraction. If the result |
788 | * overflows the destination vector, high-order bits are |
789 | * discarded. This means that two's complement subtraction |
790 | * happens more or less for free, althuogh that's more a side- |
791 | * effect than anything else. The result vector may be equal to |
792 | * either or both source vectors, but may not otherwise overlap |
793 | * them. |
794 | */ |
795 | |
796 | void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
797 | const mpw *bv, const mpw *bvl) |
798 | { |
799 | mpw c = 0; |
800 | |
801 | while (av < avl || bv < bvl) { |
802 | mpw a, b; |
803 | mpd x; |
804 | if (dv >= dvl) |
805 | return; |
806 | a = (av < avl) ? *av++ : 0; |
807 | b = (bv < bvl) ? *bv++ : 0; |
c8a2f9ef |
808 | x = (mpd)a - (mpd)b - c; |
d03ab969 |
809 | *dv++ = MPW(x); |
c8a2f9ef |
810 | if (x >> MPW_BITS) |
811 | c = 1; |
812 | else |
813 | c = 0; |
d03ab969 |
814 | } |
c8a2f9ef |
815 | if (c) |
816 | c = MPW_MAX; |
d03ab969 |
817 | while (dv < dvl) |
c8a2f9ef |
818 | *dv++ = c; |
d03ab969 |
819 | } |
820 | |
dd517851 |
821 | /* --- @mpx_usubn@ --- * |
822 | * |
823 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit |
824 | * @n@ = subtrahend |
825 | * |
826 | * Returns: --- |
827 | * |
828 | * Use: Subtracts a small integer from a multiprecision number. |
829 | */ |
830 | |
831 | void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } |
832 | |
d03ab969 |
833 | /* --- @mpx_umul@ --- * |
834 | * |
835 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
836 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
837 | * @const mpw *bv, *bvl@ = multiplier vector base and limit |
838 | * |
839 | * Returns: --- |
840 | * |
841 | * Use: Performs unsigned integer multiplication. If the result |
842 | * overflows the desination vector, high-order bits are |
843 | * discarded. The result vector may not overlap the argument |
844 | * vectors in any way. |
845 | */ |
846 | |
847 | void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
848 | const mpw *bv, const mpw *bvl) |
849 | { |
850 | /* --- This is probably worthwhile on a multiply --- */ |
851 | |
852 | MPX_SHRINK(av, avl); |
853 | MPX_SHRINK(bv, bvl); |
854 | |
855 | /* --- Deal with a multiply by zero --- */ |
856 | |
857 | if (bv == bvl) { |
c8a2f9ef |
858 | MPX_ZERO(dv, dvl); |
d03ab969 |
859 | return; |
860 | } |
861 | |
862 | /* --- Do the initial multiply and initialize the accumulator --- */ |
863 | |
864 | MPX_UMULN(dv, dvl, av, avl, *bv++); |
865 | |
866 | /* --- Do the remaining multiply/accumulates --- */ |
867 | |
c8a2f9ef |
868 | while (dv < dvl && bv < bvl) { |
d03ab969 |
869 | mpw m = *bv++; |
c8a2f9ef |
870 | mpw c = 0; |
d03ab969 |
871 | const mpw *avv = av; |
872 | mpw *dvv = ++dv; |
873 | |
874 | while (avv < avl) { |
875 | mpd x; |
876 | if (dvv >= dvl) |
877 | goto next; |
c8a2f9ef |
878 | x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c; |
879 | *dvv++ = MPW(x); |
d03ab969 |
880 | c = x >> MPW_BITS; |
881 | } |
c8a2f9ef |
882 | MPX_UADDN(dvv, dvl, c); |
d03ab969 |
883 | next:; |
884 | } |
885 | } |
886 | |
dd517851 |
887 | /* --- @mpx_umuln@ --- * |
888 | * |
889 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
890 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
891 | * @mpw m@ = multiplier |
892 | * |
893 | * Returns: --- |
894 | * |
895 | * Use: Multiplies a multiprecision integer by a single-word value. |
896 | * The destination and source may be equal. The destination |
897 | * is completely cleared after use. |
898 | */ |
899 | |
900 | void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) |
901 | { |
902 | MPX_UMULN(dv, dvl, av, avl, m); |
903 | } |
904 | |
905 | /* --- @mpx_umlan@ --- * |
906 | * |
907 | * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit |
908 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
909 | * @mpw m@ = multiplier |
910 | * |
911 | * Returns: --- |
912 | * |
913 | * Use: Multiplies a multiprecision integer by a single-word value |
914 | * and adds the result to an accumulator. |
915 | */ |
916 | |
917 | void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) |
918 | { |
919 | MPX_UMLAN(dv, dvl, av, avl, m); |
920 | } |
921 | |
c8a2f9ef |
922 | /* --- @mpx_usqr@ --- * |
923 | * |
924 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
925 | * @const mpw *av, *av@ = source vector base and limit |
926 | * |
927 | * Returns: --- |
928 | * |
929 | * Use: Performs unsigned integer squaring. The result vector must |
930 | * not overlap the source vector in any way. |
931 | */ |
932 | |
933 | void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) |
934 | { |
935 | MPX_ZERO(dv, dvl); |
936 | |
937 | /* --- Main loop --- */ |
938 | |
939 | while (av < avl) { |
940 | const mpw *avv = av; |
941 | mpw *dvv = dv; |
942 | mpw a = *av; |
943 | mpd c; |
944 | |
945 | /* --- Stop if I've run out of destination --- */ |
946 | |
947 | if (dvv >= dvl) |
948 | break; |
949 | |
950 | /* --- Work out the square at this point in the proceedings --- */ |
951 | |
952 | { |
c8a2f9ef |
953 | mpd x = (mpd)a * (mpd)a + *dvv; |
954 | *dvv++ = MPW(x); |
955 | c = MPW(x >> MPW_BITS); |
956 | } |
957 | |
958 | /* --- Now fix up the rest of the vector upwards --- */ |
959 | |
960 | avv++; |
961 | while (dvv < dvl && avv < avl) { |
c8a2f9ef |
962 | mpd x = (mpd)a * (mpd)*avv++; |
963 | mpd y = ((x << 1) & MPW_MAX) + c + *dvv; |
964 | c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS); |
965 | *dvv++ = MPW(y); |
966 | } |
967 | while (dvv < dvl && c) { |
968 | mpd x = c + *dvv; |
969 | *dvv++ = MPW(x); |
970 | c = x >> MPW_BITS; |
971 | } |
972 | |
973 | /* --- Get ready for the next round --- */ |
974 | |
975 | av++; |
976 | dv += 2; |
977 | } |
978 | } |
979 | |
d03ab969 |
980 | /* --- @mpx_udiv@ --- * |
981 | * |
982 | * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit |
983 | * @mpw *rv, *rvl@ = dividend/remainder vector base and limit |
984 | * @const mpw *dv, *dvl@ = divisor vector base and limit |
c8a2f9ef |
985 | * @mpw *sv, *svl@ = scratch workspace |
d03ab969 |
986 | * |
987 | * Returns: --- |
988 | * |
989 | * Use: Performs unsigned integer division. If the result overflows |
990 | * the quotient vector, high-order bits are discarded. (Clearly |
991 | * the remainder vector can't overflow.) The various vectors |
992 | * may not overlap in any way. Yes, I know it's a bit odd |
993 | * requiring the dividend to be in the result position but it |
994 | * does make some sense really. The remainder must have |
c8a2f9ef |
995 | * headroom for at least two extra words. The scratch space |
f45a00c6 |
996 | * must be at least one word larger than the divisor. |
d03ab969 |
997 | */ |
998 | |
999 | void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl, |
c8a2f9ef |
1000 | const mpw *dv, const mpw *dvl, |
1001 | mpw *sv, mpw *svl) |
d03ab969 |
1002 | { |
d03ab969 |
1003 | unsigned norm = 0; |
1004 | size_t scale; |
1005 | mpw d, dd; |
1006 | |
1007 | /* --- Initialize the quotient --- */ |
1008 | |
1009 | MPX_ZERO(qv, qvl); |
1010 | |
c8a2f9ef |
1011 | /* --- Perform some sanity checks --- */ |
1012 | |
1013 | MPX_SHRINK(dv, dvl); |
1014 | assert(((void)"division by zero in mpx_udiv", dv < dvl)); |
1015 | |
d03ab969 |
1016 | /* --- Normalize the divisor --- * |
1017 | * |
1018 | * The algorithm requires that the divisor be at least two digits long. |
1019 | * This is easy to fix. |
1020 | */ |
1021 | |
c8a2f9ef |
1022 | { |
1023 | unsigned b; |
d03ab969 |
1024 | |
c8a2f9ef |
1025 | d = dvl[-1]; |
1026 | for (b = MPW_BITS / 2; b; b >>= 1) { |
1027 | if (d < (MPW_MAX >> b)) { |
1028 | d <<= b; |
1029 | norm += b; |
1030 | } |
1031 | } |
1032 | if (dv + 1 == dvl) |
1033 | norm += MPW_BITS; |
d03ab969 |
1034 | } |
d03ab969 |
1035 | |
1036 | /* --- Normalize the dividend/remainder to match --- */ |
1037 | |
c8a2f9ef |
1038 | if (norm) { |
c8a2f9ef |
1039 | mpx_lsl(rv, rvl, rv, rvl, norm); |
f45a00c6 |
1040 | mpx_lsl(sv, svl, dv, dvl, norm); |
c8a2f9ef |
1041 | dv = sv; |
f45a00c6 |
1042 | dvl = svl; |
c8a2f9ef |
1043 | MPX_SHRINK(dv, dvl); |
1044 | } |
1045 | |
d03ab969 |
1046 | MPX_SHRINK(rv, rvl); |
c8a2f9ef |
1047 | d = dvl[-1]; |
1048 | dd = dvl[-2]; |
d03ab969 |
1049 | |
1050 | /* --- Work out the relative scales --- */ |
1051 | |
1052 | { |
1053 | size_t rvn = rvl - rv; |
c8a2f9ef |
1054 | size_t dvn = dvl - dv; |
d03ab969 |
1055 | |
1056 | /* --- If the divisor is clearly larger, notice this --- */ |
1057 | |
1058 | if (dvn > rvn) { |
1059 | mpx_lsr(rv, rvl, rv, rvl, norm); |
1060 | return; |
1061 | } |
1062 | |
1063 | scale = rvn - dvn; |
1064 | } |
1065 | |
1066 | /* --- Calculate the most significant quotient digit --- * |
1067 | * |
1068 | * Because the divisor has its top bit set, this can only happen once. The |
1069 | * pointer arithmetic is a little contorted, to make sure that the |
1070 | * behaviour is defined. |
1071 | */ |
1072 | |
1073 | if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) { |
1074 | mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl); |
1075 | if (qvl - qv > scale) |
1076 | qv[scale] = 1; |
1077 | } |
1078 | |
1079 | /* --- Now for the main loop --- */ |
1080 | |
1081 | { |
c8a2f9ef |
1082 | mpw *rvv = rvl - 2; |
d03ab969 |
1083 | |
1084 | while (scale) { |
c8a2f9ef |
1085 | mpw q; |
1086 | mpd rh; |
d03ab969 |
1087 | |
1088 | /* --- Get an estimate for the next quotient digit --- */ |
1089 | |
c8a2f9ef |
1090 | mpw r = rvv[1]; |
1091 | mpw rr = rvv[0]; |
1092 | mpw rrr = *--rvv; |
1093 | |
1094 | scale--; |
1095 | rh = ((mpd)r << MPW_BITS) | rr; |
d03ab969 |
1096 | if (r == d) |
1097 | q = MPW_MAX; |
c8a2f9ef |
1098 | else |
1099 | q = MPW(rh / d); |
d03ab969 |
1100 | |
1101 | /* --- Refine the estimate --- */ |
1102 | |
1103 | { |
1104 | mpd yh = (mpd)d * q; |
ce76ff16 |
1105 | mpd yy = (mpd)dd * q; |
1106 | mpw yl; |
c8a2f9ef |
1107 | |
ce76ff16 |
1108 | if (yy > MPW_MAX) |
1109 | yh += yy >> MPW_BITS; |
1110 | yl = MPW(yy); |
c8a2f9ef |
1111 | |
1112 | while (yh > rh || (yh == rh && yl > rrr)) { |
1113 | q--; |
1114 | yh -= d; |
ce76ff16 |
1115 | if (yl < dd) |
1116 | yh--; |
99b30c23 |
1117 | yl = MPW(yl - dd); |
c8a2f9ef |
1118 | } |
1119 | } |
1120 | |
1121 | /* --- Remove a chunk from the dividend --- */ |
1122 | |
1123 | { |
1124 | mpw *svv; |
1125 | const mpw *dvv; |
f45a00c6 |
1126 | mpw mc = 0, sc = 0; |
c8a2f9ef |
1127 | |
f45a00c6 |
1128 | /* --- Calculate the size of the chunk --- * |
1129 | * |
1130 | * This does the whole job of calculating @r >> scale - qd@. |
1131 | */ |
c8a2f9ef |
1132 | |
f45a00c6 |
1133 | for (svv = rv + scale, dvv = dv; |
1134 | dvv < dvl && svv < rvl; |
1135 | svv++, dvv++) { |
1136 | mpd x = (mpd)*dvv * (mpd)q + mc; |
1137 | mc = x >> MPW_BITS; |
1138 | x = (mpd)*svv - MPW(x) - sc; |
c8a2f9ef |
1139 | *svv = MPW(x); |
f45a00c6 |
1140 | if (x >> MPW_BITS) |
1141 | sc = 1; |
1142 | else |
1143 | sc = 0; |
1144 | } |
1145 | |
1146 | if (svv < rvl) { |
1147 | mpd x = (mpd)*svv - mc - sc; |
1148 | *svv++ = MPW(x); |
1149 | if (x >> MPW_BITS) |
1150 | sc = MPW_MAX; |
1151 | else |
1152 | sc = 0; |
1153 | while (svv < rvl) |
1154 | *svv++ = sc; |
c8a2f9ef |
1155 | } |
c8a2f9ef |
1156 | |
f45a00c6 |
1157 | /* --- Fix if the quotient was too large --- * |
c8a2f9ef |
1158 | * |
f45a00c6 |
1159 | * This doesn't seem to happen very often. |
c8a2f9ef |
1160 | */ |
1161 | |
c8a2f9ef |
1162 | if (rvl[-1] > MPW_MAX / 2) { |
1163 | mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl); |
1164 | q--; |
1165 | } |
1166 | } |
1167 | |
1168 | /* --- Done for another iteration --- */ |
1169 | |
1170 | if (qvl - qv > scale) |
1171 | qv[scale] = q; |
1172 | r = rr; |
1173 | rr = rrr; |
1174 | } |
1175 | } |
1176 | |
1177 | /* --- Now fiddle with unnormalizing and things --- */ |
1178 | |
1179 | mpx_lsr(rv, rvl, rv, rvl, norm); |
d03ab969 |
1180 | } |
1181 | |
698bd937 |
1182 | /* --- @mpx_udivn@ --- * |
1183 | * |
1184 | * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap |
1185 | * dividend) |
1186 | * @const mpw *rv, *rvl@ = dividend |
1187 | * @mpw d@ = single-precision divisor |
1188 | * |
1189 | * Returns: Remainder after divison. |
1190 | * |
1191 | * Use: Performs a single-precision division operation. |
1192 | */ |
1193 | |
1194 | mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) |
1195 | { |
1196 | size_t i; |
1197 | size_t ql = qvl - qv; |
1198 | mpd r = 0; |
1199 | |
1200 | i = rvl - rv; |
1201 | while (i > 0) { |
1202 | i--; |
1203 | r = (r << MPW_BITS) | rv[i]; |
1204 | if (i < ql) |
1205 | qv[i] = r / d; |
1206 | r %= d; |
1207 | } |
1208 | return (MPW(r)); |
1209 | } |
1210 | |
42684bdb |
1211 | /*----- Test rig ----------------------------------------------------------*/ |
1212 | |
1213 | #ifdef TEST_RIG |
1214 | |
1215 | #include <mLib/alloc.h> |
1216 | #include <mLib/dstr.h> |
1217 | #include <mLib/quis.h> |
1218 | #include <mLib/testrig.h> |
1219 | |
1220 | #include "mpscan.h" |
1221 | |
1222 | #define ALLOC(v, vl, sz) do { \ |
1223 | size_t _sz = (sz); \ |
1224 | mpw *_vv = xmalloc(MPWS(_sz)); \ |
1225 | mpw *_vvl = _vv + _sz; \ |
1226 | (v) = _vv; \ |
1227 | (vl) = _vvl; \ |
1228 | } while (0) |
1229 | |
1230 | #define LOAD(v, vl, d) do { \ |
1231 | const dstr *_d = (d); \ |
1232 | mpw *_v, *_vl; \ |
1233 | ALLOC(_v, _vl, MPW_RQ(_d->len)); \ |
1234 | mpx_loadb(_v, _vl, _d->buf, _d->len); \ |
1235 | (v) = _v; \ |
1236 | (vl) = _vl; \ |
1237 | } while (0) |
1238 | |
1239 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) |
1240 | |
1241 | static void dumpbits(const char *msg, const void *pp, size_t sz) |
1242 | { |
1243 | const octet *p = pp; |
1244 | fputs(msg, stderr); |
1245 | for (; sz; sz--) |
1246 | fprintf(stderr, " %02x", *p++); |
1247 | fputc('\n', stderr); |
1248 | } |
1249 | |
1250 | static void dumpmp(const char *msg, const mpw *v, const mpw *vl) |
1251 | { |
1252 | fputs(msg, stderr); |
1253 | MPX_SHRINK(v, vl); |
1254 | while (v < vl) |
1255 | fprintf(stderr, " %08lx", (unsigned long)*--vl); |
1256 | fputc('\n', stderr); |
1257 | } |
1258 | |
1259 | static int chkscan(const mpw *v, const mpw *vl, |
1260 | const void *pp, size_t sz, int step) |
1261 | { |
1262 | mpscan mps; |
1263 | const octet *p = pp; |
1264 | unsigned bit = 0; |
1265 | int ok = 1; |
1266 | |
1267 | mpscan_initx(&mps, v, vl); |
1268 | while (sz) { |
1269 | unsigned x = *p; |
1270 | int i; |
1271 | p += step; |
1272 | for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) { |
1273 | if (MPSCAN_BIT(&mps) != (x & 1)) { |
1274 | fprintf(stderr, |
1275 | "\n*** error, step %i, bit %u, expected %u, found %u\n", |
1276 | step, bit, x & 1, MPSCAN_BIT(&mps)); |
1277 | ok = 0; |
1278 | } |
1279 | x >>= 1; |
1280 | bit++; |
1281 | } |
1282 | sz--; |
1283 | } |
1284 | |
1285 | return (ok); |
1286 | } |
1287 | |
1288 | static int loadstore(dstr *v) |
1289 | { |
1290 | dstr d = DSTR_INIT; |
1291 | size_t sz = MPW_RQ(v->len) * 2, diff; |
1292 | mpw *m, *ml; |
1293 | int ok = 1; |
1294 | |
1295 | dstr_ensure(&d, v->len); |
1296 | m = xmalloc(MPWS(sz)); |
1297 | |
1298 | for (diff = 0; diff < sz; diff += 5) { |
1299 | size_t oct; |
1300 | |
1301 | ml = m + sz - diff; |
1302 | |
1303 | mpx_loadl(m, ml, v->buf, v->len); |
1304 | if (!chkscan(m, ml, v->buf, v->len, +1)) |
1305 | ok = 0; |
1306 | MPX_OCTETS(oct, m, ml); |
1307 | mpx_storel(m, ml, d.buf, d.sz); |
1308 | if (memcmp(d.buf, v->buf, oct) != 0) { |
1309 | dumpbits("\n*** storel failed", d.buf, d.sz); |
1310 | ok = 0; |
1311 | } |
1312 | |
1313 | mpx_loadb(m, ml, v->buf, v->len); |
1314 | if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1)) |
1315 | ok = 0; |
1316 | MPX_OCTETS(oct, m, ml); |
1317 | mpx_storeb(m, ml, d.buf, d.sz); |
1318 | if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) { |
1319 | dumpbits("\n*** storeb failed", d.buf, d.sz); |
1320 | ok = 0; |
1321 | } |
1322 | } |
1323 | |
1324 | if (!ok) |
1325 | dumpbits("input data", v->buf, v->len); |
1326 | |
1327 | free(m); |
1328 | dstr_destroy(&d); |
1329 | return (ok); |
1330 | } |
1331 | |
f09e814a |
1332 | static int twocl(dstr *v) |
1333 | { |
1334 | dstr d = DSTR_INIT; |
1335 | mpw *m, *ml; |
1336 | size_t sz; |
1337 | int ok = 1; |
1338 | |
1339 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; |
1340 | dstr_ensure(&d, sz); |
1341 | |
1342 | sz = MPW_RQ(sz); |
1343 | m = xmalloc(MPWS(sz)); |
1344 | ml = m + sz; |
1345 | |
1346 | mpx_loadl(m, ml, v[0].buf, v[0].len); |
1347 | mpx_storel2cn(m, ml, d.buf, v[1].len); |
1348 | if (memcmp(d.buf, v[1].buf, v[1].len)) { |
1349 | dumpbits("\n*** storel2cn failed", d.buf, v[1].len); |
1350 | ok = 0; |
1351 | } |
1352 | |
1353 | mpx_loadl2cn(m, ml, v[1].buf, v[1].len); |
1354 | mpx_storel(m, ml, d.buf, v[0].len); |
1355 | if (memcmp(d.buf, v[0].buf, v[0].len)) { |
1356 | dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); |
1357 | ok = 0; |
1358 | } |
1359 | |
1360 | if (!ok) { |
1361 | dumpbits("pos", v[0].buf, v[0].len); |
1362 | dumpbits("neg", v[1].buf, v[1].len); |
1363 | } |
1364 | |
1365 | free(m); |
1366 | dstr_destroy(&d); |
1367 | |
1368 | return (ok); |
1369 | } |
1370 | |
1371 | static int twocb(dstr *v) |
1372 | { |
1373 | dstr d = DSTR_INIT; |
1374 | mpw *m, *ml; |
1375 | size_t sz; |
1376 | int ok = 1; |
1377 | |
1378 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; |
1379 | dstr_ensure(&d, sz); |
1380 | |
1381 | sz = MPW_RQ(sz); |
1382 | m = xmalloc(MPWS(sz)); |
1383 | ml = m + sz; |
1384 | |
1385 | mpx_loadb(m, ml, v[0].buf, v[0].len); |
1386 | mpx_storeb2cn(m, ml, d.buf, v[1].len); |
1387 | if (memcmp(d.buf, v[1].buf, v[1].len)) { |
1388 | dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); |
1389 | ok = 0; |
1390 | } |
1391 | |
1392 | mpx_loadb2cn(m, ml, v[1].buf, v[1].len); |
1393 | mpx_storeb(m, ml, d.buf, v[0].len); |
1394 | if (memcmp(d.buf, v[0].buf, v[0].len)) { |
1395 | dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); |
1396 | ok = 0; |
1397 | } |
1398 | |
1399 | if (!ok) { |
1400 | dumpbits("pos", v[0].buf, v[0].len); |
1401 | dumpbits("neg", v[1].buf, v[1].len); |
1402 | } |
1403 | |
1404 | free(m); |
1405 | dstr_destroy(&d); |
1406 | |
1407 | return (ok); |
1408 | } |
1409 | |
42684bdb |
1410 | static int lsl(dstr *v) |
1411 | { |
1412 | mpw *a, *al; |
1413 | int n = *(int *)v[1].buf; |
1414 | mpw *c, *cl; |
1415 | mpw *d, *dl; |
1416 | int ok = 1; |
1417 | |
1418 | LOAD(a, al, &v[0]); |
1419 | LOAD(c, cl, &v[2]); |
1420 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); |
1421 | |
1422 | mpx_lsl(d, dl, a, al, n); |
1a05a8ef |
1423 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1424 | fprintf(stderr, "\n*** lsl(%i) failed\n", n); |
1425 | dumpmp(" a", a, al); |
1426 | dumpmp("expected", c, cl); |
1427 | dumpmp(" result", d, dl); |
1428 | ok = 0; |
1429 | } |
1430 | |
1431 | free(a); free(c); free(d); |
1432 | return (ok); |
1433 | } |
1434 | |
1435 | static int lsr(dstr *v) |
1436 | { |
1437 | mpw *a, *al; |
1438 | int n = *(int *)v[1].buf; |
1439 | mpw *c, *cl; |
1440 | mpw *d, *dl; |
1441 | int ok = 1; |
1442 | |
1443 | LOAD(a, al, &v[0]); |
1444 | LOAD(c, cl, &v[2]); |
1445 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1); |
1446 | |
1447 | mpx_lsr(d, dl, a, al, n); |
1a05a8ef |
1448 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1449 | fprintf(stderr, "\n*** lsr(%i) failed\n", n); |
1450 | dumpmp(" a", a, al); |
1451 | dumpmp("expected", c, cl); |
1452 | dumpmp(" result", d, dl); |
1453 | ok = 0; |
1454 | } |
1455 | |
1456 | free(a); free(c); free(d); |
1457 | return (ok); |
1458 | } |
1459 | |
1460 | static int uadd(dstr *v) |
1461 | { |
1462 | mpw *a, *al; |
1463 | mpw *b, *bl; |
1464 | mpw *c, *cl; |
1465 | mpw *d, *dl; |
1466 | int ok = 1; |
1467 | |
1468 | LOAD(a, al, &v[0]); |
1469 | LOAD(b, bl, &v[1]); |
1470 | LOAD(c, cl, &v[2]); |
1471 | ALLOC(d, dl, MAX(al - a, bl - b) + 1); |
1472 | |
1473 | mpx_uadd(d, dl, a, al, b, bl); |
1a05a8ef |
1474 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1475 | fprintf(stderr, "\n*** uadd failed\n"); |
1476 | dumpmp(" a", a, al); |
1477 | dumpmp(" b", b, bl); |
1478 | dumpmp("expected", c, cl); |
1479 | dumpmp(" result", d, dl); |
1480 | ok = 0; |
1481 | } |
1482 | |
1483 | free(a); free(b); free(c); free(d); |
1484 | return (ok); |
1485 | } |
1486 | |
1487 | static int usub(dstr *v) |
1488 | { |
1489 | mpw *a, *al; |
1490 | mpw *b, *bl; |
1491 | mpw *c, *cl; |
1492 | mpw *d, *dl; |
1493 | int ok = 1; |
1494 | |
1495 | LOAD(a, al, &v[0]); |
1496 | LOAD(b, bl, &v[1]); |
1497 | LOAD(c, cl, &v[2]); |
1498 | ALLOC(d, dl, al - a); |
1499 | |
1500 | mpx_usub(d, dl, a, al, b, bl); |
1a05a8ef |
1501 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1502 | fprintf(stderr, "\n*** usub failed\n"); |
1503 | dumpmp(" a", a, al); |
1504 | dumpmp(" b", b, bl); |
1505 | dumpmp("expected", c, cl); |
1506 | dumpmp(" result", d, dl); |
1507 | ok = 0; |
1508 | } |
1509 | |
1510 | free(a); free(b); free(c); free(d); |
1511 | return (ok); |
1512 | } |
1513 | |
1514 | static int umul(dstr *v) |
1515 | { |
1516 | mpw *a, *al; |
1517 | mpw *b, *bl; |
1518 | mpw *c, *cl; |
1519 | mpw *d, *dl; |
1520 | int ok = 1; |
1521 | |
1522 | LOAD(a, al, &v[0]); |
1523 | LOAD(b, bl, &v[1]); |
1524 | LOAD(c, cl, &v[2]); |
1525 | ALLOC(d, dl, (al - a) + (bl - b)); |
1526 | |
1527 | mpx_umul(d, dl, a, al, b, bl); |
1a05a8ef |
1528 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1529 | fprintf(stderr, "\n*** umul failed\n"); |
1530 | dumpmp(" a", a, al); |
1531 | dumpmp(" b", b, bl); |
1532 | dumpmp("expected", c, cl); |
1533 | dumpmp(" result", d, dl); |
1534 | ok = 0; |
1535 | } |
1536 | |
1537 | free(a); free(b); free(c); free(d); |
1538 | return (ok); |
1539 | } |
1540 | |
1541 | static int usqr(dstr *v) |
1542 | { |
1543 | mpw *a, *al; |
1544 | mpw *c, *cl; |
1545 | mpw *d, *dl; |
1546 | int ok = 1; |
1547 | |
1548 | LOAD(a, al, &v[0]); |
1549 | LOAD(c, cl, &v[1]); |
1550 | ALLOC(d, dl, 2 * (al - a)); |
1551 | |
1552 | mpx_usqr(d, dl, a, al); |
1a05a8ef |
1553 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1554 | fprintf(stderr, "\n*** usqr failed\n"); |
1555 | dumpmp(" a", a, al); |
1556 | dumpmp("expected", c, cl); |
1557 | dumpmp(" result", d, dl); |
1558 | ok = 0; |
1559 | } |
1560 | |
1561 | free(a); free(c); free(d); |
1562 | return (ok); |
1563 | } |
1564 | |
1565 | static int udiv(dstr *v) |
1566 | { |
1567 | mpw *a, *al; |
1568 | mpw *b, *bl; |
1569 | mpw *q, *ql; |
1570 | mpw *r, *rl; |
1571 | mpw *qq, *qql; |
1572 | mpw *s, *sl; |
1573 | int ok = 1; |
1574 | |
1575 | ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len); |
1576 | LOAD(b, bl, &v[1]); |
1577 | LOAD(q, ql, &v[2]); |
1578 | LOAD(r, rl, &v[3]); |
1579 | ALLOC(qq, qql, al - a); |
1580 | ALLOC(s, sl, (bl - b) + 1); |
1581 | |
1582 | mpx_udiv(qq, qql, a, al, b, bl, s, sl); |
1a05a8ef |
1583 | if (!mpx_ueq(qq, qql, q, ql) || |
1584 | !mpx_ueq(a, al, r, rl)) { |
42684bdb |
1585 | fprintf(stderr, "\n*** udiv failed\n"); |
1586 | dumpmp(" divisor", b, bl); |
1587 | dumpmp("expect r", r, rl); |
1588 | dumpmp("result r", a, al); |
1589 | dumpmp("expect q", q, ql); |
1590 | dumpmp("result q", qq, qql); |
1591 | ok = 0; |
1592 | } |
1593 | |
1594 | free(a); free(b); free(r); free(q); free(s); free(qq); |
1595 | return (ok); |
1596 | } |
1597 | |
1598 | static test_chunk defs[] = { |
1599 | { "load-store", loadstore, { &type_hex, 0 } }, |
f09e814a |
1600 | { "2cl", twocl, { &type_hex, &type_hex, } }, |
1601 | { "2cb", twocb, { &type_hex, &type_hex, } }, |
42684bdb |
1602 | { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } }, |
1603 | { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } }, |
1604 | { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } }, |
1605 | { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } }, |
1606 | { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } }, |
1607 | { "usqr", usqr, { &type_hex, &type_hex, 0 } }, |
1608 | { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } }, |
1609 | { 0, 0, { 0 } } |
1610 | }; |
1611 | |
1612 | int main(int argc, char *argv[]) |
1613 | { |
1614 | test_run(argc, argv, defs, SRCDIR"/tests/mpx"); |
1615 | return (0); |
1616 | } |
1617 | |
42684bdb |
1618 | #endif |
1619 | |
d03ab969 |
1620 | /*----- That's all, folks -------------------------------------------------*/ |