mdw@git.distorted.org.uk Git - catacomb/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* $Id: mpx.c,v 1.13 2002/10/19 17:56:50 mdw Exp $
	4	*
	5	* Low-level multiprecision arithmetic
	6	*
	7	* (c) 1999 Straylight/Edgeware
	8	*/
	9
	10	/----- Licensing notice --------------------------------------------------
	11	*
	12	* This file is part of Catacomb.
	13	*
	14	* Catacomb is free software; you can redistribute it and/or modify
	15	* it under the terms of the GNU Library General Public License as
	16	* published by the Free Software Foundation; either version 2 of the
	17	* License, or (at your option) any later version.
	18	*
	19	* Catacomb is distributed in the hope that it will be useful,
	20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	22	* GNU Library General Public License for more details.
	23	*
	24	* You should have received a copy of the GNU Library General Public
	25	* License along with Catacomb; if not, write to the Free
	26	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	27	* MA 02111-1307, USA.
	28	*/
	29
	30	/----- Revision history --------------------------------------------------
	31	*
	32	* $Log: mpx.c,v $
	33	* Revision 1.13 2002/10/19 17:56:50 mdw
	34	* Fix bit operations. Test them (a bit) better.
	35	*
	36	* Revision 1.12 2002/10/06 22:52:50 mdw
	37	* Pile of changes for supporting two's complement properly.
	38	*
	39	* Revision 1.11 2001/04/03 19:36:05 mdw
	40	* Add some simple bitwise operations so that Perl can use them.
	41	*
	42	* Revision 1.10 2000/10/08 12:06:12 mdw
	43	* Provide @mpx_ueq@ for rapidly testing equality of two integers.
	44	*
	45	* Revision 1.9 2000/06/26 07:52:50 mdw
	46	* Portability fix for the bug fix.
	47	*
	48	* Revision 1.8 2000/06/25 12:59:02 mdw
	49	* (mpx_udiv): Fix bug in quotient digit estimation.
	50	*
	51	* Revision 1.7 1999/12/22 15:49:07 mdw
	52	* New function for division by a small integer.
	53	*
	54	* Revision 1.6 1999/11/20 22:43:44 mdw
	55	* Integrate testing for MPX routines.
	56	*
	57	* Revision 1.5 1999/11/20 22:23:27 mdw
	58	* Add function versions of some low-level macros with wider use.
	59	*
	60	* Revision 1.4 1999/11/17 18:04:09 mdw
	61	* Add two's-complement functionality. Improve mpx_udiv a little by
	62	* performing the multiplication of the divisor by q with the subtraction
	63	* from r.
	64	*
	65	* Revision 1.3 1999/11/13 01:57:31 mdw
	66	* Remove stray debugging code.
	67	*
	68	* Revision 1.2 1999/11/13 01:50:59 mdw
	69	* Multiprecision routines finished and tested.
	70	*
	71	* Revision 1.1 1999/09/03 08:41:12 mdw
	72	* Initial import.
	73	*
	74	*/
	75
	76	/----- Header files ------------------------------------------------------/
	77
	78	#include <assert.h>
	79	#include <stdio.h>
	80	#include <stdlib.h>
	81	#include <string.h>
	82
	83	#include <mLib/bits.h>
	84
	85	#include "mptypes.h"
	86	#include "mpx.h"
	87	#include "bitops.h"
	88
	89	/----- Loading and storing -----------------------------------------------/
	90
	91	/* --- @mpx_storel@ --- *
	92	*
	93	* Arguments: @const mpw v, vl@ = base and limit of source vector
	94	* @void *pp@ = pointer to octet array
	95	* @size_t sz@ = size of octet array
	96	*
	97	* Returns: ---
	98	*
	99	* Use: Stores an MP in an octet array, least significant octet
	100	* first. High-end octets are silently discarded if there
	101	* isn't enough space for them.
	102	*/
	103
	104	void mpx_storel(const mpw v, const mpw vl, void *pp, size_t sz)
	105	{
	106	mpw n, w = 0;
	107	octet p = pp, q = p + sz;
	108	unsigned bits = 0;
	109
	110	while (p < q) {
	111	if (bits < 8) {
	112	if (v >= vl) {
	113	*p++ = U8(w);
	114	break;
	115	}
	116	n = *v++;
	117	*p++ = U8(w \| n << bits);
	118	w = n >> (8 - bits);
	119	bits += MPW_BITS - 8;
	120	} else {
	121	*p++ = U8(w);
	122	w >>= 8;
	123	bits -= 8;
	124	}
	125	}
	126	memset(p, 0, q - p);
	127	}
	128
	129	/* --- @mpx_loadl@ --- *
	130	*
	131	* Arguments: @mpw v, vl@ = base and limit of destination vector
	132	* @const void *pp@ = pointer to octet array
	133	* @size_t sz@ = size of octet array
	134	*
	135	* Returns: ---
	136	*
	137	* Use: Loads an MP in an octet array, least significant octet
	138	* first. High-end octets are ignored if there isn't enough
	139	* space for them.
	140	*/
	141
	142	void mpx_loadl(mpw v, mpw vl, const void *pp, size_t sz)
	143	{
	144	unsigned n;
	145	mpw w = 0;
	146	const octet p = pp, q = p + sz;
	147	unsigned bits = 0;
	148
	149	if (v >= vl)
	150	return;
	151	while (p < q) {
	152	n = U8(*p++);
	153	w \|= n << bits;
	154	bits += 8;
	155	if (bits >= MPW_BITS) {
	156	*v++ = MPW(w);
	157	w = n >> (MPW_BITS - bits + 8);
	158	bits -= MPW_BITS;
	159	if (v >= vl)
	160	return;
	161	}
	162	}
	163	*v++ = w;
	164	MPX_ZERO(v, vl);
	165	}
	166
	167	/* --- @mpx_storeb@ --- *
	168	*
	169	* Arguments: @const mpw v, vl@ = base and limit of source vector
	170	* @void *pp@ = pointer to octet array
	171	* @size_t sz@ = size of octet array
	172	*
	173	* Returns: ---
	174	*
	175	* Use: Stores an MP in an octet array, most significant octet
	176	* first. High-end octets are silently discarded if there
	177	* isn't enough space for them.
	178	*/
	179
	180	void mpx_storeb(const mpw v, const mpw vl, void *pp, size_t sz)
	181	{
	182	mpw n, w = 0;
	183	octet p = pp, q = p + sz;
	184	unsigned bits = 0;
	185
	186	while (q > p) {
	187	if (bits < 8) {
	188	if (v >= vl) {
	189	*--q = U8(w);
	190	break;
	191	}
	192	n = *v++;
	193	*--q = U8(w \| n << bits);
	194	w = n >> (8 - bits);
	195	bits += MPW_BITS - 8;
	196	} else {
	197	*--q = U8(w);
	198	w >>= 8;
	199	bits -= 8;
	200	}
	201	}
	202	memset(p, 0, q - p);
	203	}
	204
	205	/* --- @mpx_loadb@ --- *
	206	*
	207	* Arguments: @mpw v, vl@ = base and limit of destination vector
	208	* @const void *pp@ = pointer to octet array
	209	* @size_t sz@ = size of octet array
	210	*
	211	* Returns: ---
	212	*
	213	* Use: Loads an MP in an octet array, most significant octet
	214	* first. High-end octets are ignored if there isn't enough
	215	* space for them.
	216	*/
	217
	218	void mpx_loadb(mpw v, mpw vl, const void *pp, size_t sz)
	219	{
	220	unsigned n;
	221	mpw w = 0;
	222	const octet p = pp, q = p + sz;
	223	unsigned bits = 0;
	224
	225	if (v >= vl)
	226	return;
	227	while (q > p) {
	228	n = U8(*--q);
	229	w \|= n << bits;
	230	bits += 8;
	231	if (bits >= MPW_BITS) {
	232	*v++ = MPW(w);
	233	w = n >> (MPW_BITS - bits + 8);
	234	bits -= MPW_BITS;
	235	if (v >= vl)
	236	return;
	237	}
	238	}
	239	*v++ = w;
	240	MPX_ZERO(v, vl);
	241	}
	242
	243	/* --- @mpx_storel2cn@ --- *
	244	*
	245	* Arguments: @const mpw v, vl@ = base and limit of source vector
	246	* @void *pp@ = pointer to octet array
	247	* @size_t sz@ = size of octet array
	248	*
	249	* Returns: ---
	250	*
	251	* Use: Stores a negative MP in an octet array, least significant
	252	* octet first, as two's complement. High-end octets are
	253	* silently discarded if there isn't enough space for them.
	254	* This obviously makes the output bad.
	255	*/
	256
	257	void mpx_storel2cn(const mpw v, const mpw vl, void *pp, size_t sz)
	258	{
	259	unsigned c = 1;
	260	unsigned b = 0;
	261	mpw n, w = 0;
	262	octet p = pp, q = p + sz;
	263	unsigned bits = 0;
	264
	265	while (p < q) {
	266	if (bits < 8) {
	267	if (v >= vl) {
	268	b = w;
	269	break;
	270	}
	271	n = *v++;
	272	b = w \| n << bits;
	273	w = n >> (8 - bits);
	274	bits += MPW_BITS - 8;
	275	} else {
	276	b = w;
	277	w >>= 8;
	278	bits -= 8;
	279	}
	280	b = U8(~b + c);
	281	c = !b;
	282	*p++ = b;
	283	}
	284	while (p < q) {
	285	b = U8(~b + c);
	286	c = !b;
	287	*p++ = b;
	288	b = 0;
	289	}
	290	}
	291
	292	/* --- @mpx_loadl2cn@ --- *
	293	*
	294	* Arguments: @mpw v, vl@ = base and limit of destination vector
	295	* @const void *pp@ = pointer to octet array
	296	* @size_t sz@ = size of octet array
	297	*
	298	* Returns: ---
	299	*
	300	* Use: Loads a negative MP in an octet array, least significant
	301	* octet first, as two's complement. High-end octets are
	302	* ignored if there isn't enough space for them. This probably
	303	* means you made the wrong choice coming here.
	304	*/
	305
	306	void mpx_loadl2cn(mpw v, mpw vl, const void *pp, size_t sz)
	307	{
	308	unsigned n;
	309	unsigned c = 1;
	310	mpw w = 0;
	311	const octet p = pp, q = p + sz;
	312	unsigned bits = 0;
	313
	314	if (v >= vl)
	315	return;
	316	while (p < q) {
	317	n = U8(~(*p++) + c);
	318	c = !n;
	319	w \|= n << bits;
	320	bits += 8;
	321	if (bits >= MPW_BITS) {
	322	*v++ = MPW(w);
	323	w = n >> (MPW_BITS - bits + 8);
	324	bits -= MPW_BITS;
	325	if (v >= vl)
	326	return;
	327	}
	328	}
	329	*v++ = w;
	330	MPX_ZERO(v, vl);
	331	}
	332
	333	/* --- @mpx_storeb2cn@ --- *
	334	*
	335	* Arguments: @const mpw v, vl@ = base and limit of source vector
	336	* @void *pp@ = pointer to octet array
	337	* @size_t sz@ = size of octet array
	338	*
	339	* Returns: ---
	340	*
	341	* Use: Stores a negative MP in an octet array, most significant
	342	* octet first, as two's complement. High-end octets are
	343	* silently discarded if there isn't enough space for them,
	344	* which probably isn't what you meant.
	345	*/
	346
	347	void mpx_storeb2cn(const mpw v, const mpw vl, void *pp, size_t sz)
	348	{
	349	mpw n, w = 0;
	350	unsigned b = 0;
	351	unsigned c = 1;
	352	octet p = pp, q = p + sz;
	353	unsigned bits = 0;
	354
	355	while (q > p) {
	356	if (bits < 8) {
	357	if (v >= vl) {
	358	b = w;
	359	break;
	360	}
	361	n = *v++;
	362	b = w \| n << bits;
	363	w = n >> (8 - bits);
	364	bits += MPW_BITS - 8;
	365	} else {
	366	b = w;
	367	w >>= 8;
	368	bits -= 8;
	369	}
	370	b = U8(~b + c);
	371	c = !b;
	372	*--q = b;
	373	}
	374	while (q > p) {
	375	b = ~b + c;
	376	c = !(b & 0xff);
	377	*--q = b;
	378	b = 0;
	379	}
	380	}
	381
	382	/* --- @mpx_loadb2cn@ --- *
	383	*
	384	* Arguments: @mpw v, vl@ = base and limit of destination vector
	385	* @const void *pp@ = pointer to octet array
	386	* @size_t sz@ = size of octet array
	387	*
	388	* Returns: ---
	389	*
	390	* Use: Loads a negative MP in an octet array, most significant octet
	391	* first as two's complement. High-end octets are ignored if
	392	* there isn't enough space for them. This probably means you
	393	* chose this function wrongly.
	394	*/
	395
	396	void mpx_loadb2cn(mpw v, mpw vl, const void *pp, size_t sz)
	397	{
	398	unsigned n;
	399	unsigned c = 1;
	400	mpw w = 0;
	401	const octet p = pp, q = p + sz;
	402	unsigned bits = 0;
	403
	404	if (v >= vl)
	405	return;
	406	while (q > p) {
	407	n = U8(~(*--q) + c);
	408	c = !n;
	409	w \|= n << bits;
	410	bits += 8;
	411	if (bits >= MPW_BITS) {
	412	*v++ = MPW(w);
	413	w = n >> (MPW_BITS - bits + 8);
	414	bits -= MPW_BITS;
	415	if (v >= vl)
	416	return;
	417	}
	418	}
	419	*v++ = w;
	420	MPX_ZERO(v, vl);
	421	}
	422
	423	/----- Logical shifting --------------------------------------------------/
	424
	425	/* --- @mpx_lsl@ --- *
	426	*
	427	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	428	* @const mpw av, avl@ = source vector base and limit
	429	* @size_t n@ = number of bit positions to shift by
	430	*
	431	* Returns: ---
	432	*
	433	* Use: Performs a logical shift left operation on an integer.
	434	*/
	435
	436	void mpx_lsl(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	437	{
	438	size_t nw;
	439	unsigned nb;
	440
	441	/* --- Trivial special case --- */
	442
	443	if (n == 0)
	444	MPX_COPY(dv, dvl, av, avl);
	445
	446	/* --- Single bit shifting --- */
	447
	448	else if (n == 1) {
	449	mpw w = 0;
	450	while (av < avl) {
	451	mpw t;
	452	if (dv >= dvl)
	453	goto done;
	454	t = *av++;
	455	*dv++ = MPW((t << 1) \| w);
	456	w = t >> (MPW_BITS - 1);
	457	}
	458	if (dv >= dvl)
	459	goto done;
	460	*dv++ = MPW(w);
	461	MPX_ZERO(dv, dvl);
	462	goto done;
	463	}
	464
	465	/* --- Break out word and bit shifts for more sophisticated work --- */
	466
	467	nw = n / MPW_BITS;
	468	nb = n % MPW_BITS;
	469
	470	/* --- Handle a shift by a multiple of the word size --- */
	471
	472	if (nb == 0) {
	473	MPX_COPY(dv + nw, dvl, av, avl);
	474	memset(dv, 0, MPWS(nw));
	475	}
	476
	477	/* --- And finally the difficult case --- *
	478	*
	479	* This is a little convoluted, because I have to start from the end and
	480	* work backwards to avoid overwriting the source, if they're both the same
	481	* block of memory.
	482	*/
	483
	484	else {
	485	mpw w;
	486	size_t nr = MPW_BITS - nb;
	487	size_t dvn = dvl - dv;
	488	size_t avn = avl - av;
	489
	490	if (dvn <= nw) {
	491	MPX_ZERO(dv, dvl);
	492	goto done;
	493	}
	494
	495	if (dvn > avn + nw) {
	496	size_t off = avn + nw + 1;
	497	MPX_ZERO(dv + off, dvl);
	498	dvl = dv + off;
	499	w = 0;
	500	} else {
	501	avl = av + dvn - nw;
	502	w = *--avl << nb;
	503	}
	504
	505	while (avl > av) {
	506	mpw t = *--avl;
	507	*--dvl = (t >> nr) \| w;
	508	w = t << nb;
	509	}
	510
	511	*--dvl = w;
	512	MPX_ZERO(dv, dvl);
	513	}
	514
	515	done:;
	516	}
	517
	518	/* --- @mpx_lsr@ --- *
	519	*
	520	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	521	* @const mpw av, avl@ = source vector base and limit
	522	* @size_t n@ = number of bit positions to shift by
	523	*
	524	* Returns: ---
	525	*
	526	* Use: Performs a logical shift right operation on an integer.
	527	*/
	528
	529	void mpx_lsr(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	530	{
	531	size_t nw;
	532	unsigned nb;
	533
	534	/* --- Trivial special case --- */
	535
	536	if (n == 0)
	537	MPX_COPY(dv, dvl, av, avl);
	538
	539	/* --- Single bit shifting --- */
	540
	541	else if (n == 1) {
	542	mpw w = *av++ >> 1;
	543	while (av < avl) {
	544	mpw t;
	545	if (dv >= dvl)
	546	goto done;
	547	t = *av++;
	548	*dv++ = MPW((t << (MPW_BITS - 1)) \| w);
	549	w = t >> 1;
	550	}
	551	if (dv >= dvl)
	552	goto done;
	553	*dv++ = MPW(w);
	554	MPX_ZERO(dv, dvl);
	555	goto done;
	556	}
	557
	558	/* --- Break out word and bit shifts for more sophisticated work --- */
	559
	560	nw = n / MPW_BITS;
	561	nb = n % MPW_BITS;
	562
	563	/* --- Handle a shift by a multiple of the word size --- */
	564
	565	if (nb == 0)
	566	MPX_COPY(dv, dvl, av + nw, avl);
	567
	568	/* --- And finally the difficult case --- */
	569
	570	else {
	571	mpw w;
	572	size_t nr = MPW_BITS - nb;
	573
	574	av += nw;
	575	w = *av++;
	576	while (av < avl) {
	577	mpw t;
	578	if (dv >= dvl)
	579	goto done;
	580	t = *av++;
	581	*dv++ = MPW((w >> nb) \| (t << nr));
	582	w = t;
	583	}
	584	if (dv < dvl) {
	585	*dv++ = MPW(w >> nb);
	586	MPX_ZERO(dv, dvl);
	587	}
	588	}
	589
	590	done:;
	591	}
	592
	593	/----- Bitwise operations ------------------------------------------------/
	594
	595	/* --- @mpx_bitop@ --- *
	596	*
	597	* Arguments: @mpw dv, dvl@ = destination vector
	598	* @const mpw av, avl@ = first source vector
	599	* @const mpw bv, bvl@ = second source vector
	600	*
	601	* Returns: ---
	602	*
	603	* Use; Provides the dyadic boolean functions.
	604	*/
	605
	606	#define MPX_BITBINOP(string) \
	607	\
	608	void mpx_bit##string(mpw dv, mpw dvl, const mpw av, const mpw avl, \
	609	const mpw bv, const mpw bvl) \
	610	{ \
	611	MPX_SHRINK(av, avl); \
	612	MPX_SHRINK(bv, bvl); \
	613	\
	614	while (dv < dvl) { \
	615	mpw a, b; \
	616	a = (av < avl) ? *av++ : 0; \
	617	b = (bv < bvl) ? *bv++ : 0; \
	618	*dv++ = B##string(a, b); \
	619	} \
	620	}
	621
	622	MPX_DOBIN(MPX_BITBINOP)
	623
	624	void mpx_not(mpw dv, mpw dvl, const mpw av, const mpw avl)
	625	{
	626	MPX_SHRINK(av, avl);
	627
	628	while (dv < dvl) {
	629	mpw a;
	630	a = (av < avl) ? *av++ : 0;
	631	*dv++ = ~a;
	632	}
	633	}
	634
	635	/----- Unsigned arithmetic -----------------------------------------------/
	636
	637	/* --- @mpx_2c@ --- *
	638	*
	639	* Arguments: @mpw dv, dvl@ = destination vector
	640	* @const mpw v, vl@ = source vector
	641	*
	642	* Returns: ---
	643	*
	644	* Use: Calculates the two's complement of @v@.
	645	*/
	646
	647	void mpx_2c(mpw dv, mpw dvl, const mpw v, const mpw vl)
	648	{
	649	mpw c = 0;
	650	while (dv < dvl && v < vl)
	651	dv++ = c = MPW(~v++);
	652	if (dv < dvl) {
	653	if (c > MPW_MAX / 2)
	654	c = MPW(~0);
	655	while (dv < dvl)
	656	*dv++ = c;
	657	}
	658	MPX_UADDN(dv, dvl, 1);
	659	}
	660
	661	/* --- @mpx_ueq@ --- *
	662	*
	663	* Arguments: @const mpw av, avl@ = first argument vector base and limit
	664	* @const mpw bv, bvl@ = second argument vector base and limit
	665	*
	666	* Returns: Nonzero if the two vectors are equal.
	667	*
	668	* Use: Performs an unsigned integer test for equality.
	669	*/
	670
	671	int mpx_ueq(const mpw av, const mpw avl, const mpw bv, const mpw bvl)
	672	{
	673	MPX_SHRINK(av, avl);
	674	MPX_SHRINK(bv, bvl);
	675	if (avl - av != bvl - bv)
	676	return (0);
	677	while (av < avl) {
	678	if (av++ != bv++)
	679	return (0);
	680	}
	681	return (1);
	682	}
	683
	684	/* --- @mpx_ucmp@ --- *
	685	*
	686	* Arguments: @const mpw av, avl@ = first argument vector base and limit
	687	* @const mpw bv, bvl@ = second argument vector base and limit
	688	*
	689	* Returns: Less than, equal to, or greater than zero depending on
	690	* whether @a@ is less than, equal to or greater than @b@,
	691	* respectively.
	692	*
	693	* Use: Performs an unsigned integer comparison.
	694	*/
	695
	696	int mpx_ucmp(const mpw av, const mpw avl, const mpw bv, const mpw bvl)
	697	{
	698	MPX_SHRINK(av, avl);
	699	MPX_SHRINK(bv, bvl);
	700
	701	if (avl - av > bvl - bv)
	702	return (+1);
	703	else if (avl - av < bvl - bv)
	704	return (-1);
	705	else while (avl > av) {
	706	mpw a = --avl, b = --bvl;
	707	if (a > b)
	708	return (+1);
	709	else if (a < b)
	710	return (-1);
	711	}
	712	return (0);
	713	}
	714
	715	/* --- @mpx_uadd@ --- *
	716	*
	717	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	718	* @const mpw av, avl@ = first addend vector base and limit
	719	* @const mpw bv, bvl@ = second addend vector base and limit
	720	*
	721	* Returns: ---
	722	*
	723	* Use: Performs unsigned integer addition. If the result overflows
	724	* the destination vector, high-order bits are discarded. This
	725	* means that two's complement addition happens more or less for
	726	* free, although that's more a side-effect than anything else.
	727	* The result vector may be equal to either or both source
	728	* vectors, but may not otherwise overlap them.
	729	*/
	730
	731	void mpx_uadd(mpw dv, mpw dvl, const mpw av, const mpw avl,
	732	const mpw bv, const mpw bvl)
	733	{
	734	mpw c = 0;
	735
	736	while (av < avl \|\| bv < bvl) {
	737	mpw a, b;
	738	mpd x;
	739	if (dv >= dvl)
	740	return;
	741	a = (av < avl) ? *av++ : 0;
	742	b = (bv < bvl) ? *bv++ : 0;
	743	x = (mpd)a + (mpd)b + c;
	744	*dv++ = MPW(x);
	745	c = x >> MPW_BITS;
	746	}
	747	if (dv < dvl) {
	748	*dv++ = c;
	749	MPX_ZERO(dv, dvl);
	750	}
	751	}
	752
	753	/* --- @mpx_uaddn@ --- *
	754	*
	755	* Arguments: @mpw dv, dvl@ = source and destination base and limit
	756	* @mpw n@ = other addend
	757	*
	758	* Returns: ---
	759	*
	760	* Use: Adds a small integer to a multiprecision number.
	761	*/
	762
	763	void mpx_uaddn(mpw dv, mpw dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
	764
	765	/* --- @mpx_usub@ --- *
	766	*
	767	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	768	* @const mpw av, avl@ = first argument vector base and limit
	769	* @const mpw bv, bvl@ = second argument vector base and limit
	770	*
	771	* Returns: ---
	772	*
	773	* Use: Performs unsigned integer subtraction. If the result
	774	* overflows the destination vector, high-order bits are
	775	* discarded. This means that two's complement subtraction
	776	* happens more or less for free, althuogh that's more a side-
	777	* effect than anything else. The result vector may be equal to
	778	* either or both source vectors, but may not otherwise overlap
	779	* them.
	780	*/
	781
	782	void mpx_usub(mpw dv, mpw dvl, const mpw av, const mpw avl,
	783	const mpw bv, const mpw bvl)
	784	{
	785	mpw c = 0;
	786
	787	while (av < avl \|\| bv < bvl) {
	788	mpw a, b;
	789	mpd x;
	790	if (dv >= dvl)
	791	return;
	792	a = (av < avl) ? *av++ : 0;
	793	b = (bv < bvl) ? *bv++ : 0;
	794	x = (mpd)a - (mpd)b - c;
	795	*dv++ = MPW(x);
	796	if (x >> MPW_BITS)
	797	c = 1;
	798	else
	799	c = 0;
	800	}
	801	if (c)
	802	c = MPW_MAX;
	803	while (dv < dvl)
	804	*dv++ = c;
	805	}
	806
	807	/* --- @mpx_usubn@ --- *
	808	*
	809	* Arguments: @mpw dv, dvl@ = source and destination base and limit
	810	* @n@ = subtrahend
	811	*
	812	* Returns: ---
	813	*
	814	* Use: Subtracts a small integer from a multiprecision number.
	815	*/
	816
	817	void mpx_usubn(mpw dv, mpw dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
	818
	819	/* --- @mpx_umul@ --- *
	820	*
	821	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	822	* @const mpw av, avl@ = multiplicand vector base and limit
	823	* @const mpw bv, bvl@ = multiplier vector base and limit
	824	*
	825	* Returns: ---
	826	*
	827	* Use: Performs unsigned integer multiplication. If the result
	828	* overflows the desination vector, high-order bits are
	829	* discarded. The result vector may not overlap the argument
	830	* vectors in any way.
	831	*/
	832
	833	void mpx_umul(mpw dv, mpw dvl, const mpw av, const mpw avl,
	834	const mpw bv, const mpw bvl)
	835	{
	836	/* --- This is probably worthwhile on a multiply --- */
	837
	838	MPX_SHRINK(av, avl);
	839	MPX_SHRINK(bv, bvl);
	840
	841	/* --- Deal with a multiply by zero --- */
	842
	843	if (bv == bvl) {
	844	MPX_ZERO(dv, dvl);
	845	return;
	846	}
	847
	848	/* --- Do the initial multiply and initialize the accumulator --- */
	849
	850	MPX_UMULN(dv, dvl, av, avl, *bv++);
	851
	852	/* --- Do the remaining multiply/accumulates --- */
	853
	854	while (dv < dvl && bv < bvl) {
	855	mpw m = *bv++;
	856	mpw c = 0;
	857	const mpw *avv = av;
	858	mpw *dvv = ++dv;
	859
	860	while (avv < avl) {
	861	mpd x;
	862	if (dvv >= dvl)
	863	goto next;
	864	x = (mpd)dvv + (mpd)m (mpd)*avv++ + c;
	865	*dvv++ = MPW(x);
	866	c = x >> MPW_BITS;
	867	}
	868	MPX_UADDN(dvv, dvl, c);
	869	next:;
	870	}
	871	}
	872
	873	/* --- @mpx_umuln@ --- *
	874	*
	875	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	876	* @const mpw av, avl@ = multiplicand vector base and limit
	877	* @mpw m@ = multiplier
	878	*
	879	* Returns: ---
	880	*
	881	* Use: Multiplies a multiprecision integer by a single-word value.
	882	* The destination and source may be equal. The destination
	883	* is completely cleared after use.
	884	*/
	885
	886	void mpx_umuln(mpw dv, mpw dvl, const mpw av, const mpw avl, mpw m)
	887	{
	888	MPX_UMULN(dv, dvl, av, avl, m);
	889	}
	890
	891	/* --- @mpx_umlan@ --- *
	892	*
	893	* Arguments: @mpw dv, dvl@ = destination/accumulator base and limit
	894	* @const mpw av, avl@ = multiplicand vector base and limit
	895	* @mpw m@ = multiplier
	896	*
	897	* Returns: ---
	898	*
	899	* Use: Multiplies a multiprecision integer by a single-word value
	900	* and adds the result to an accumulator.
	901	*/
	902
	903	void mpx_umlan(mpw dv, mpw dvl, const mpw av, const mpw avl, mpw m)
	904	{
	905	MPX_UMLAN(dv, dvl, av, avl, m);
	906	}
	907
	908	/* --- @mpx_usqr@ --- *
	909	*
	910	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	911	* @const mpw av, av@ = source vector base and limit
	912	*
	913	* Returns: ---
	914	*
	915	* Use: Performs unsigned integer squaring. The result vector must
	916	* not overlap the source vector in any way.
	917	*/
	918
	919	void mpx_usqr(mpw dv, mpw dvl, const mpw av, const mpw avl)
	920	{
	921	MPX_ZERO(dv, dvl);
	922
	923	/* --- Main loop --- */
	924
	925	while (av < avl) {
	926	const mpw *avv = av;
	927	mpw *dvv = dv;
	928	mpw a = *av;
	929	mpd c;
	930
	931	/* --- Stop if I've run out of destination --- */
	932
	933	if (dvv >= dvl)
	934	break;
	935
	936	/* --- Work out the square at this point in the proceedings --- */
	937
	938	{
	939	mpd x = (mpd)a * (mpd)a + *dvv;
	940	*dvv++ = MPW(x);
	941	c = MPW(x >> MPW_BITS);
	942	}
	943
	944	/* --- Now fix up the rest of the vector upwards --- */
	945
	946	avv++;
	947	while (dvv < dvl && avv < avl) {
	948	mpd x = (mpd)a * (mpd)*avv++;
	949	mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
	950	c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
	951	*dvv++ = MPW(y);
	952	}
	953	while (dvv < dvl && c) {
	954	mpd x = c + *dvv;
	955	*dvv++ = MPW(x);
	956	c = x >> MPW_BITS;
	957	}
	958
	959	/* --- Get ready for the next round --- */
	960
	961	av++;
	962	dv += 2;
	963	}
	964	}
	965
	966	/* --- @mpx_udiv@ --- *
	967	*
	968	* Arguments: @mpw qv, qvl@ = quotient vector base and limit
	969	* @mpw rv, rvl@ = dividend/remainder vector base and limit
	970	* @const mpw dv, dvl@ = divisor vector base and limit
	971	* @mpw sv, svl@ = scratch workspace
	972	*
	973	* Returns: ---
	974	*
	975	* Use: Performs unsigned integer division. If the result overflows
	976	* the quotient vector, high-order bits are discarded. (Clearly
	977	* the remainder vector can't overflow.) The various vectors
	978	* may not overlap in any way. Yes, I know it's a bit odd
	979	* requiring the dividend to be in the result position but it
	980	* does make some sense really. The remainder must have
	981	* headroom for at least two extra words. The scratch space
	982	* must be at least one word larger than the divisor.
	983	*/
	984
	985	void mpx_udiv(mpw qv, mpw qvl, mpw rv, mpw rvl,
	986	const mpw dv, const mpw dvl,
	987	mpw sv, mpw svl)
	988	{
	989	unsigned norm = 0;
	990	size_t scale;
	991	mpw d, dd;
	992
	993	/* --- Initialize the quotient --- */
	994
	995	MPX_ZERO(qv, qvl);
	996
	997	/* --- Perform some sanity checks --- */
	998
	999	MPX_SHRINK(dv, dvl);
	1000	assert(((void)"division by zero in mpx_udiv", dv < dvl));
	1001
	1002	/* --- Normalize the divisor --- *
	1003	*
	1004	* The algorithm requires that the divisor be at least two digits long.
	1005	* This is easy to fix.
	1006	*/
	1007
	1008	{
	1009	unsigned b;
	1010
	1011	d = dvl[-1];
	1012	for (b = MPW_BITS / 2; b; b >>= 1) {
	1013	if (d < (MPW_MAX >> b)) {
	1014	d <<= b;
	1015	norm += b;
	1016	}
	1017	}
	1018	if (dv + 1 == dvl)
	1019	norm += MPW_BITS;
	1020	}
	1021
	1022	/* --- Normalize the dividend/remainder to match --- */
	1023
	1024	if (norm) {
	1025	mpx_lsl(rv, rvl, rv, rvl, norm);
	1026	mpx_lsl(sv, svl, dv, dvl, norm);
	1027	dv = sv;
	1028	dvl = svl;
	1029	MPX_SHRINK(dv, dvl);
	1030	}
	1031
	1032	MPX_SHRINK(rv, rvl);
	1033	d = dvl[-1];
	1034	dd = dvl[-2];
	1035
	1036	/* --- Work out the relative scales --- */
	1037
	1038	{
	1039	size_t rvn = rvl - rv;
	1040	size_t dvn = dvl - dv;
	1041
	1042	/* --- If the divisor is clearly larger, notice this --- */
	1043
	1044	if (dvn > rvn) {
	1045	mpx_lsr(rv, rvl, rv, rvl, norm);
	1046	return;
	1047	}
	1048
	1049	scale = rvn - dvn;
	1050	}
	1051
	1052	/* --- Calculate the most significant quotient digit --- *
	1053	*
	1054	* Because the divisor has its top bit set, this can only happen once. The
	1055	* pointer arithmetic is a little contorted, to make sure that the
	1056	* behaviour is defined.
	1057	*/
	1058
	1059	if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
	1060	mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	1061	if (qvl - qv > scale)
	1062	qv[scale] = 1;
	1063	}
	1064
	1065	/* --- Now for the main loop --- */
	1066
	1067	{
	1068	mpw *rvv = rvl - 2;
	1069
	1070	while (scale) {
	1071	mpw q;
	1072	mpd rh;
	1073
	1074	/* --- Get an estimate for the next quotient digit --- */
	1075
	1076	mpw r = rvv[1];
	1077	mpw rr = rvv[0];
	1078	mpw rrr = *--rvv;
	1079
	1080	scale--;
	1081	rh = ((mpd)r << MPW_BITS) \| rr;
	1082	if (r == d)
	1083	q = MPW_MAX;
	1084	else
	1085	q = MPW(rh / d);
	1086
	1087	/* --- Refine the estimate --- */
	1088
	1089	{
	1090	mpd yh = (mpd)d * q;
	1091	mpd yy = (mpd)dd * q;
	1092	mpw yl;
	1093
	1094	if (yy > MPW_MAX)
	1095	yh += yy >> MPW_BITS;
	1096	yl = MPW(yy);
	1097
	1098	while (yh > rh \|\| (yh == rh && yl > rrr)) {
	1099	q--;
	1100	yh -= d;
	1101	if (yl < dd)
	1102	yh--;
	1103	yl = MPW(yl - dd);
	1104	}
	1105	}
	1106
	1107	/* --- Remove a chunk from the dividend --- */
	1108
	1109	{
	1110	mpw *svv;
	1111	const mpw *dvv;
	1112	mpw mc = 0, sc = 0;
	1113
	1114	/* --- Calculate the size of the chunk --- *
	1115	*
	1116	* This does the whole job of calculating @r >> scale - qd@.
	1117	*/
	1118
	1119	for (svv = rv + scale, dvv = dv;
	1120	dvv < dvl && svv < rvl;
	1121	svv++, dvv++) {
	1122	mpd x = (mpd)dvv (mpd)q + mc;
	1123	mc = x >> MPW_BITS;
	1124	x = (mpd)*svv - MPW(x) - sc;
	1125	*svv = MPW(x);
	1126	if (x >> MPW_BITS)
	1127	sc = 1;
	1128	else
	1129	sc = 0;
	1130	}
	1131
	1132	if (svv < rvl) {
	1133	mpd x = (mpd)*svv - mc - sc;
	1134	*svv++ = MPW(x);
	1135	if (x >> MPW_BITS)
	1136	sc = MPW_MAX;
	1137	else
	1138	sc = 0;
	1139	while (svv < rvl)
	1140	*svv++ = sc;
	1141	}
	1142
	1143	/* --- Fix if the quotient was too large --- *
	1144	*
	1145	* This doesn't seem to happen very often.
	1146	*/
	1147
	1148	if (rvl[-1] > MPW_MAX / 2) {
	1149	mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	1150	q--;
	1151	}
	1152	}
	1153
	1154	/* --- Done for another iteration --- */
	1155
	1156	if (qvl - qv > scale)
	1157	qv[scale] = q;
	1158	r = rr;
	1159	rr = rrr;
	1160	}
	1161	}
	1162
	1163	/* --- Now fiddle with unnormalizing and things --- */
	1164
	1165	mpx_lsr(rv, rvl, rv, rvl, norm);
	1166	}
	1167
	1168	/* --- @mpx_udivn@ --- *
	1169	*
	1170	* Arguments: @mpw qv, qvl@ = storage for the quotient (may overlap
	1171	* dividend)
	1172	* @const mpw rv, rvl@ = dividend
	1173	* @mpw d@ = single-precision divisor
	1174	*
	1175	* Returns: Remainder after divison.
	1176	*
	1177	* Use: Performs a single-precision division operation.
	1178	*/
	1179
	1180	mpw mpx_udivn(mpw qv, mpw qvl, const mpw rv, const mpw rvl, mpw d)
	1181	{
	1182	size_t i;
	1183	size_t ql = qvl - qv;
	1184	mpd r = 0;
	1185
	1186	i = rvl - rv;
	1187	while (i > 0) {
	1188	i--;
	1189	r = (r << MPW_BITS) \| rv[i];
	1190	if (i < ql)
	1191	qv[i] = r / d;
	1192	r %= d;
	1193	}
	1194	return (MPW(r));
	1195	}
	1196
	1197	/----- Test rig ----------------------------------------------------------/
	1198
	1199	#ifdef TEST_RIG
	1200
	1201	#include <mLib/alloc.h>
	1202	#include <mLib/dstr.h>
	1203	#include <mLib/quis.h>
	1204	#include <mLib/testrig.h>
	1205
	1206	#include "mpscan.h"
	1207
	1208	#define ALLOC(v, vl, sz) do { \
	1209	size_t _sz = (sz); \
	1210	mpw *_vv = xmalloc(MPWS(_sz)); \
	1211	mpw *_vvl = _vv + _sz; \
	1212	(v) = _vv; \
	1213	(vl) = _vvl; \
	1214	} while (0)
	1215
	1216	#define LOAD(v, vl, d) do { \
	1217	const dstr *_d = (d); \
	1218	mpw _v, _vl; \
	1219	ALLOC(_v, _vl, MPW_RQ(_d->len)); \
	1220	mpx_loadb(_v, _vl, _d->buf, _d->len); \
	1221	(v) = _v; \
	1222	(vl) = _vl; \
	1223	} while (0)
	1224
	1225	#define MAX(x, y) ((x) > (y) ? (x) : (y))
	1226
	1227	static void dumpbits(const char msg, const void pp, size_t sz)
	1228	{
	1229	const octet *p = pp;
	1230	fputs(msg, stderr);
	1231	for (; sz; sz--)
	1232	fprintf(stderr, " %02x", *p++);
	1233	fputc('\n', stderr);
	1234	}
	1235
	1236	static void dumpmp(const char msg, const mpw v, const mpw *vl)
	1237	{
	1238	fputs(msg, stderr);
	1239	MPX_SHRINK(v, vl);
	1240	while (v < vl)
	1241	fprintf(stderr, " %08lx", (unsigned long)*--vl);
	1242	fputc('\n', stderr);
	1243	}
	1244
	1245	static int chkscan(const mpw v, const mpw vl,
	1246	const void *pp, size_t sz, int step)
	1247	{
	1248	mpscan mps;
	1249	const octet *p = pp;
	1250	unsigned bit = 0;
	1251	int ok = 1;
	1252
	1253	mpscan_initx(&mps, v, vl);
	1254	while (sz) {
	1255	unsigned x = *p;
	1256	int i;
	1257	p += step;
	1258	for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) {
	1259	if (MPSCAN_BIT(&mps) != (x & 1)) {
	1260	fprintf(stderr,
	1261	"\n*** error, step %i, bit %u, expected %u, found %u\n",
	1262	step, bit, x & 1, MPSCAN_BIT(&mps));
	1263	ok = 0;
	1264	}
	1265	x >>= 1;
	1266	bit++;
	1267	}
	1268	sz--;
	1269	}
	1270
	1271	return (ok);
	1272	}
	1273
	1274	static int loadstore(dstr *v)
	1275	{
	1276	dstr d = DSTR_INIT;
	1277	size_t sz = MPW_RQ(v->len) * 2, diff;
	1278	mpw m, ml;
	1279	int ok = 1;
	1280
	1281	dstr_ensure(&d, v->len);
	1282	m = xmalloc(MPWS(sz));
	1283
	1284	for (diff = 0; diff < sz; diff += 5) {
	1285	size_t oct;
	1286
	1287	ml = m + sz - diff;
	1288
	1289	mpx_loadl(m, ml, v->buf, v->len);
	1290	if (!chkscan(m, ml, v->buf, v->len, +1))
	1291	ok = 0;
	1292	MPX_OCTETS(oct, m, ml);
	1293	mpx_storel(m, ml, d.buf, d.sz);
	1294	if (memcmp(d.buf, v->buf, oct) != 0) {
	1295	dumpbits("\n*** storel failed", d.buf, d.sz);
	1296	ok = 0;
	1297	}
	1298
	1299	mpx_loadb(m, ml, v->buf, v->len);
	1300	if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1))
	1301	ok = 0;
	1302	MPX_OCTETS(oct, m, ml);
	1303	mpx_storeb(m, ml, d.buf, d.sz);
	1304	if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) {
	1305	dumpbits("\n*** storeb failed", d.buf, d.sz);
	1306	ok = 0;
	1307	}
	1308	}
	1309
	1310	if (!ok)
	1311	dumpbits("input data", v->buf, v->len);
	1312
	1313	free(m);
	1314	dstr_destroy(&d);
	1315	return (ok);
	1316	}
	1317
	1318	static int twocl(dstr *v)
	1319	{
	1320	dstr d = DSTR_INIT;
	1321	mpw m, ml;
	1322	size_t sz;
	1323	int ok = 1;
	1324
	1325	sz = v[0].len; if (v[1].len > sz) sz = v[1].len;
	1326	dstr_ensure(&d, sz);
	1327
	1328	sz = MPW_RQ(sz);
	1329	m = xmalloc(MPWS(sz));
	1330	ml = m + sz;
	1331
	1332	mpx_loadl(m, ml, v[0].buf, v[0].len);
	1333	mpx_storel2cn(m, ml, d.buf, v[1].len);
	1334	if (memcmp(d.buf, v[1].buf, v[1].len)) {
	1335	dumpbits("\n*** storel2cn failed", d.buf, v[1].len);
	1336	ok = 0;
	1337	}
	1338
	1339	mpx_loadl2cn(m, ml, v[1].buf, v[1].len);
	1340	mpx_storel(m, ml, d.buf, v[0].len);
	1341	if (memcmp(d.buf, v[0].buf, v[0].len)) {
	1342	dumpbits("\n*** loadl2cn failed", d.buf, v[0].len);
	1343	ok = 0;
	1344	}
	1345
	1346	if (!ok) {
	1347	dumpbits("pos", v[0].buf, v[0].len);
	1348	dumpbits("neg", v[1].buf, v[1].len);
	1349	}
	1350
	1351	free(m);
	1352	dstr_destroy(&d);
	1353
	1354	return (ok);
	1355	}
	1356
	1357	static int twocb(dstr *v)
	1358	{
	1359	dstr d = DSTR_INIT;
	1360	mpw m, ml;
	1361	size_t sz;
	1362	int ok = 1;
	1363
	1364	sz = v[0].len; if (v[1].len > sz) sz = v[1].len;
	1365	dstr_ensure(&d, sz);
	1366
	1367	sz = MPW_RQ(sz);
	1368	m = xmalloc(MPWS(sz));
	1369	ml = m + sz;
	1370
	1371	mpx_loadb(m, ml, v[0].buf, v[0].len);
	1372	mpx_storeb2cn(m, ml, d.buf, v[1].len);
	1373	if (memcmp(d.buf, v[1].buf, v[1].len)) {
	1374	dumpbits("\n*** storeb2cn failed", d.buf, v[1].len);
	1375	ok = 0;
	1376	}
	1377
	1378	mpx_loadb2cn(m, ml, v[1].buf, v[1].len);
	1379	mpx_storeb(m, ml, d.buf, v[0].len);
	1380	if (memcmp(d.buf, v[0].buf, v[0].len)) {
	1381	dumpbits("\n*** loadb2cn failed", d.buf, v[0].len);
	1382	ok = 0;
	1383	}
	1384
	1385	if (!ok) {
	1386	dumpbits("pos", v[0].buf, v[0].len);
	1387	dumpbits("neg", v[1].buf, v[1].len);
	1388	}
	1389
	1390	free(m);
	1391	dstr_destroy(&d);
	1392
	1393	return (ok);
	1394	}
	1395
	1396	static int lsl(dstr *v)
	1397	{
	1398	mpw a, al;
	1399	int n = (int )v[1].buf;
	1400	mpw c, cl;
	1401	mpw d, dl;
	1402	int ok = 1;
	1403
	1404	LOAD(a, al, &v[0]);
	1405	LOAD(c, cl, &v[2]);
	1406	ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS);
	1407
	1408	mpx_lsl(d, dl, a, al, n);
	1409	if (!mpx_ueq(d, dl, c, cl)) {
	1410	fprintf(stderr, "\n*** lsl(%i) failed\n", n);
	1411	dumpmp(" a", a, al);
	1412	dumpmp("expected", c, cl);
	1413	dumpmp(" result", d, dl);
	1414	ok = 0;
	1415	}
	1416
	1417	free(a); free(c); free(d);
	1418	return (ok);
	1419	}
	1420
	1421	static int lsr(dstr *v)
	1422	{
	1423	mpw a, al;
	1424	int n = (int )v[1].buf;
	1425	mpw c, cl;
	1426	mpw d, dl;
	1427	int ok = 1;
	1428
	1429	LOAD(a, al, &v[0]);
	1430	LOAD(c, cl, &v[2]);
	1431	ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1);
	1432
	1433	mpx_lsr(d, dl, a, al, n);
	1434	if (!mpx_ueq(d, dl, c, cl)) {
	1435	fprintf(stderr, "\n*** lsr(%i) failed\n", n);
	1436	dumpmp(" a", a, al);
	1437	dumpmp("expected", c, cl);
	1438	dumpmp(" result", d, dl);
	1439	ok = 0;
	1440	}
	1441
	1442	free(a); free(c); free(d);
	1443	return (ok);
	1444	}
	1445
	1446	static int uadd(dstr *v)
	1447	{
	1448	mpw a, al;
	1449	mpw b, bl;
	1450	mpw c, cl;
	1451	mpw d, dl;
	1452	int ok = 1;
	1453
	1454	LOAD(a, al, &v[0]);
	1455	LOAD(b, bl, &v[1]);
	1456	LOAD(c, cl, &v[2]);
	1457	ALLOC(d, dl, MAX(al - a, bl - b) + 1);
	1458
	1459	mpx_uadd(d, dl, a, al, b, bl);
	1460	if (!mpx_ueq(d, dl, c, cl)) {
	1461	fprintf(stderr, "\n*** uadd failed\n");
	1462	dumpmp(" a", a, al);
	1463	dumpmp(" b", b, bl);
	1464	dumpmp("expected", c, cl);
	1465	dumpmp(" result", d, dl);
	1466	ok = 0;
	1467	}
	1468
	1469	free(a); free(b); free(c); free(d);
	1470	return (ok);
	1471	}
	1472
	1473	static int usub(dstr *v)
	1474	{
	1475	mpw a, al;
	1476	mpw b, bl;
	1477	mpw c, cl;
	1478	mpw d, dl;
	1479	int ok = 1;
	1480
	1481	LOAD(a, al, &v[0]);
	1482	LOAD(b, bl, &v[1]);
	1483	LOAD(c, cl, &v[2]);
	1484	ALLOC(d, dl, al - a);
	1485
	1486	mpx_usub(d, dl, a, al, b, bl);
	1487	if (!mpx_ueq(d, dl, c, cl)) {
	1488	fprintf(stderr, "\n*** usub failed\n");
	1489	dumpmp(" a", a, al);
	1490	dumpmp(" b", b, bl);
	1491	dumpmp("expected", c, cl);
	1492	dumpmp(" result", d, dl);
	1493	ok = 0;
	1494	}
	1495
	1496	free(a); free(b); free(c); free(d);
	1497	return (ok);
	1498	}
	1499
	1500	static int umul(dstr *v)
	1501	{
	1502	mpw a, al;
	1503	mpw b, bl;
	1504	mpw c, cl;
	1505	mpw d, dl;
	1506	int ok = 1;
	1507
	1508	LOAD(a, al, &v[0]);
	1509	LOAD(b, bl, &v[1]);
	1510	LOAD(c, cl, &v[2]);
	1511	ALLOC(d, dl, (al - a) + (bl - b));
	1512
	1513	mpx_umul(d, dl, a, al, b, bl);
	1514	if (!mpx_ueq(d, dl, c, cl)) {
	1515	fprintf(stderr, "\n*** umul failed\n");
	1516	dumpmp(" a", a, al);
	1517	dumpmp(" b", b, bl);
	1518	dumpmp("expected", c, cl);
	1519	dumpmp(" result", d, dl);
	1520	ok = 0;
	1521	}
	1522
	1523	free(a); free(b); free(c); free(d);
	1524	return (ok);
	1525	}
	1526
	1527	static int usqr(dstr *v)
	1528	{
	1529	mpw a, al;
	1530	mpw c, cl;
	1531	mpw d, dl;
	1532	int ok = 1;
	1533
	1534	LOAD(a, al, &v[0]);
	1535	LOAD(c, cl, &v[1]);
	1536	ALLOC(d, dl, 2 * (al - a));
	1537
	1538	mpx_usqr(d, dl, a, al);
	1539	if (!mpx_ueq(d, dl, c, cl)) {
	1540	fprintf(stderr, "\n*** usqr failed\n");
	1541	dumpmp(" a", a, al);
	1542	dumpmp("expected", c, cl);
	1543	dumpmp(" result", d, dl);
	1544	ok = 0;
	1545	}
	1546
	1547	free(a); free(c); free(d);
	1548	return (ok);
	1549	}
	1550
	1551	static int udiv(dstr *v)
	1552	{
	1553	mpw a, al;
	1554	mpw b, bl;
	1555	mpw q, ql;
	1556	mpw r, rl;
	1557	mpw qq, qql;
	1558	mpw s, sl;
	1559	int ok = 1;
	1560
	1561	ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len);
	1562	LOAD(b, bl, &v[1]);
	1563	LOAD(q, ql, &v[2]);
	1564	LOAD(r, rl, &v[3]);
	1565	ALLOC(qq, qql, al - a);
	1566	ALLOC(s, sl, (bl - b) + 1);
	1567
	1568	mpx_udiv(qq, qql, a, al, b, bl, s, sl);
	1569	if (!mpx_ueq(qq, qql, q, ql) \|\|
	1570	!mpx_ueq(a, al, r, rl)) {
	1571	fprintf(stderr, "\n*** udiv failed\n");
	1572	dumpmp(" divisor", b, bl);
	1573	dumpmp("expect r", r, rl);
	1574	dumpmp("result r", a, al);
	1575	dumpmp("expect q", q, ql);
	1576	dumpmp("result q", qq, qql);
	1577	ok = 0;
	1578	}
	1579
	1580	free(a); free(b); free(r); free(q); free(s); free(qq);
	1581	return (ok);
	1582	}
	1583
	1584	static test_chunk defs[] = {
	1585	{ "load-store", loadstore, { &type_hex, 0 } },
	1586	{ "2cl", twocl, { &type_hex, &type_hex, } },
	1587	{ "2cb", twocb, { &type_hex, &type_hex, } },
	1588	{ "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } },
	1589	{ "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } },
	1590	{ "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } },
	1591	{ "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } },
	1592	{ "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } },
	1593	{ "usqr", usqr, { &type_hex, &type_hex, 0 } },
	1594	{ "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } },
	1595	{ 0, 0, { 0 } }
	1596	};
	1597
	1598	int main(int argc, char *argv[])
	1599	{
	1600	test_run(argc, argv, defs, SRCDIR"/tests/mpx");
	1601	return (0);
	1602	}
	1603
	1604	#endif
	1605
	1606	/----- That's all, folks -------------------------------------------------/