git.distorted.org.uk Git - u/mdw/catacomb/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* $Id: mpx.c,v 1.6 1999/11/20 22:43:44 mdw Exp $
	4	*
	5	* Low-level multiprecision arithmetic
	6	*
	7	* (c) 1999 Straylight/Edgeware
	8	*/
	9
	10	/----- Licensing notice --------------------------------------------------
	11	*
	12	* This file is part of Catacomb.
	13	*
	14	* Catacomb is free software; you can redistribute it and/or modify
	15	* it under the terms of the GNU Library General Public License as
	16	* published by the Free Software Foundation; either version 2 of the
	17	* License, or (at your option) any later version.
	18	*
	19	* Catacomb is distributed in the hope that it will be useful,
	20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	22	* GNU Library General Public License for more details.
	23	*
	24	* You should have received a copy of the GNU Library General Public
	25	* License along with Catacomb; if not, write to the Free
	26	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	27	* MA 02111-1307, USA.
	28	*/
	29
	30	/----- Revision history --------------------------------------------------
	31	*
	32	* $Log: mpx.c,v $
	33	* Revision 1.6 1999/11/20 22:43:44 mdw
	34	* Integrate testing for MPX routines.
	35	*
	36	* Revision 1.5 1999/11/20 22:23:27 mdw
	37	* Add function versions of some low-level macros with wider use.
	38	*
	39	* Revision 1.4 1999/11/17 18:04:09 mdw
	40	* Add two's-complement functionality. Improve mpx_udiv a little by
	41	* performing the multiplication of the divisor by q with the subtraction
	42	* from r.
	43	*
	44	* Revision 1.3 1999/11/13 01:57:31 mdw
	45	* Remove stray debugging code.
	46	*
	47	* Revision 1.2 1999/11/13 01:50:59 mdw
	48	* Multiprecision routines finished and tested.
	49	*
	50	* Revision 1.1 1999/09/03 08:41:12 mdw
	51	* Initial import.
	52	*
	53	*/
	54
	55	/----- Header files ------------------------------------------------------/
	56
	57	#include <assert.h>
	58	#include <stdio.h>
	59	#include <stdlib.h>
	60	#include <string.h>
	61
	62	#include <mLib/bits.h>
	63
	64	#include "mptypes.h"
	65	#include "mpx.h"
	66
	67	/----- Loading and storing -----------------------------------------------/
	68
	69	/* --- @mpx_storel@ --- *
	70	*
	71	* Arguments: @const mpw v, vl@ = base and limit of source vector
	72	* @void *pp@ = pointer to octet array
	73	* @size_t sz@ = size of octet array
	74	*
	75	* Returns: ---
	76	*
	77	* Use: Stores an MP in an octet array, least significant octet
	78	* first. High-end octets are silently discarded if there
	79	* isn't enough space for them.
	80	*/
	81
	82	void mpx_storel(const mpw v, const mpw vl, void *pp, size_t sz)
	83	{
	84	mpw n, w = 0;
	85	octet p = pp, q = p + sz;
	86	unsigned bits = 0;
	87
	88	while (p < q) {
	89	if (bits < 8) {
	90	if (v >= vl) {
	91	*p++ = U8(w);
	92	break;
	93	}
	94	n = *v++;
	95	*p++ = U8(w \| n << bits);
	96	w = n >> (8 - bits);
	97	bits += MPW_BITS - 8;
	98	} else {
	99	*p++ = U8(w);
	100	w >>= 8;
	101	bits -= 8;
	102	}
	103	}
	104	memset(p, 0, q - p);
	105	}
	106
	107	/* --- @mpx_loadl@ --- *
	108	*
	109	* Arguments: @mpw v, vl@ = base and limit of destination vector
	110	* @const void *pp@ = pointer to octet array
	111	* @size_t sz@ = size of octet array
	112	*
	113	* Returns: ---
	114	*
	115	* Use: Loads an MP in an octet array, least significant octet
	116	* first. High-end octets are ignored if there isn't enough
	117	* space for them.
	118	*/
	119
	120	void mpx_loadl(mpw v, mpw vl, const void *pp, size_t sz)
	121	{
	122	unsigned n;
	123	mpw w = 0;
	124	const octet p = pp, q = p + sz;
	125	unsigned bits = 0;
	126
	127	if (v >= vl)
	128	return;
	129	while (p < q) {
	130	n = U8(*p++);
	131	w \|= n << bits;
	132	bits += 8;
	133	if (bits >= MPW_BITS) {
	134	*v++ = MPW(w);
	135	w = n >> (MPW_BITS - bits + 8);
	136	bits -= MPW_BITS;
	137	if (v >= vl)
	138	return;
	139	}
	140	}
	141	*v++ = w;
	142	MPX_ZERO(v, vl);
	143	}
	144
	145	/* --- @mpx_storeb@ --- *
	146	*
	147	* Arguments: @const mpw v, vl@ = base and limit of source vector
	148	* @void *pp@ = pointer to octet array
	149	* @size_t sz@ = size of octet array
	150	*
	151	* Returns: ---
	152	*
	153	* Use: Stores an MP in an octet array, most significant octet
	154	* first. High-end octets are silently discarded if there
	155	* isn't enough space for them.
	156	*/
	157
	158	void mpx_storeb(const mpw v, const mpw vl, void *pp, size_t sz)
	159	{
	160	mpw n, w = 0;
	161	octet p = pp, q = p + sz;
	162	unsigned bits = 0;
	163
	164	while (q > p) {
	165	if (bits < 8) {
	166	if (v >= vl) {
	167	*--q = U8(w);
	168	break;
	169	}
	170	n = *v++;
	171	*--q = U8(w \| n << bits);
	172	w = n >> (8 - bits);
	173	bits += MPW_BITS - 8;
	174	} else {
	175	*--q = U8(w);
	176	w >>= 8;
	177	bits -= 8;
	178	}
	179	}
	180	memset(p, 0, q - p);
	181	}
	182
	183	/* --- @mpx_loadb@ --- *
	184	*
	185	* Arguments: @mpw v, vl@ = base and limit of destination vector
	186	* @const void *pp@ = pointer to octet array
	187	* @size_t sz@ = size of octet array
	188	*
	189	* Returns: ---
	190	*
	191	* Use: Loads an MP in an octet array, most significant octet
	192	* first. High-end octets are ignored if there isn't enough
	193	* space for them.
	194	*/
	195
	196	void mpx_loadb(mpw v, mpw vl, const void *pp, size_t sz)
	197	{
	198	unsigned n;
	199	mpw w = 0;
	200	const octet p = pp, q = p + sz;
	201	unsigned bits = 0;
	202
	203	if (v >= vl)
	204	return;
	205	while (q > p) {
	206	n = U8(*--q);
	207	w \|= n << bits;
	208	bits += 8;
	209	if (bits >= MPW_BITS) {
	210	*v++ = MPW(w);
	211	w = n >> (MPW_BITS - bits + 8);
	212	bits -= MPW_BITS;
	213	if (v >= vl)
	214	return;
	215	}
	216	}
	217	*v++ = w;
	218	MPX_ZERO(v, vl);
	219	}
	220
	221	/----- Logical shifting --------------------------------------------------/
	222
	223	/* --- @mpx_lsl@ --- *
	224	*
	225	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	226	* @const mpw av, avl@ = source vector base and limit
	227	* @size_t n@ = number of bit positions to shift by
	228	*
	229	* Returns: ---
	230	*
	231	* Use: Performs a logical shift left operation on an integer.
	232	*/
	233
	234	void mpx_lsl(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	235	{
	236	size_t nw;
	237	unsigned nb;
	238
	239	/* --- Trivial special case --- */
	240
	241	if (n == 0)
	242	MPX_COPY(dv, dvl, av, avl);
	243
	244	/* --- Single bit shifting --- */
	245
	246	else if (n == 1) {
	247	mpw w = 0;
	248	while (av < avl) {
	249	mpw t;
	250	if (dv >= dvl)
	251	goto done;
	252	t = *av++;
	253	*dv++ = MPW((t << 1) \| w);
	254	w = t >> (MPW_BITS - 1);
	255	}
	256	if (dv >= dvl)
	257	goto done;
	258	*dv++ = MPW(w);
	259	MPX_ZERO(dv, dvl);
	260	goto done;
	261	}
	262
	263	/* --- Break out word and bit shifts for more sophisticated work --- */
	264
	265	nw = n / MPW_BITS;
	266	nb = n % MPW_BITS;
	267
	268	/* --- Handle a shift by a multiple of the word size --- */
	269
	270	if (nb == 0) {
	271	MPX_COPY(dv + nw, dvl, av, avl);
	272	memset(dv, 0, MPWS(nw));
	273	}
	274
	275	/* --- And finally the difficult case --- *
	276	*
	277	* This is a little convoluted, because I have to start from the end and
	278	* work backwards to avoid overwriting the source, if they're both the same
	279	* block of memory.
	280	*/
	281
	282	else {
	283	mpw w;
	284	size_t nr = MPW_BITS - nb;
	285	size_t dvn = dvl - dv;
	286	size_t avn = avl - av;
	287
	288	if (dvn <= nw) {
	289	MPX_ZERO(dv, dvl);
	290	goto done;
	291	}
	292
	293	if (dvn > avn + nw) {
	294	size_t off = avn + nw + 1;
	295	MPX_ZERO(dv + off, dvl);
	296	dvl = dv + off;
	297	w = 0;
	298	} else {
	299	avl = av + dvn - nw;
	300	w = *--avl << nb;
	301	}
	302
	303	while (avl > av) {
	304	mpw t = *--avl;
	305	*--dvl = (t >> nr) \| w;
	306	w = t << nb;
	307	}
	308
	309	*--dvl = w;
	310	MPX_ZERO(dv, dvl);
	311	}
	312
	313	done:;
	314	}
	315
	316	/* --- @mpx_lsr@ --- *
	317	*
	318	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	319	* @const mpw av, avl@ = source vector base and limit
	320	* @size_t n@ = number of bit positions to shift by
	321	*
	322	* Returns: ---
	323	*
	324	* Use: Performs a logical shift right operation on an integer.
	325	*/
	326
	327	void mpx_lsr(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	328	{
	329	size_t nw;
	330	unsigned nb;
	331
	332	/* --- Trivial special case --- */
	333
	334	if (n == 0)
	335	MPX_COPY(dv, dvl, av, avl);
	336
	337	/* --- Single bit shifting --- */
	338
	339	else if (n == 1) {
	340	mpw w = *av++ >> 1;
	341	while (av < avl) {
	342	mpw t;
	343	if (dv >= dvl)
	344	goto done;
	345	t = *av++;
	346	*dv++ = MPW((t << (MPW_BITS - 1)) \| w);
	347	w = t >> 1;
	348	}
	349	if (dv >= dvl)
	350	goto done;
	351	*dv++ = MPW(w);
	352	MPX_ZERO(dv, dvl);
	353	goto done;
	354	}
	355
	356	/* --- Break out word and bit shifts for more sophisticated work --- */
	357
	358	nw = n / MPW_BITS;
	359	nb = n % MPW_BITS;
	360
	361	/* --- Handle a shift by a multiple of the word size --- */
	362
	363	if (nb == 0)
	364	MPX_COPY(dv, dvl, av + nw, avl);
	365
	366	/* --- And finally the difficult case --- */
	367
	368	else {
	369	mpw w;
	370	size_t nr = MPW_BITS - nb;
	371
	372	av += nw;
	373	w = *av++;
	374	while (av < avl) {
	375	mpw t;
	376	if (dv >= dvl)
	377	goto done;
	378	t = *av++;
	379	*dv++ = MPW((w >> nb) \| (t << nr));
	380	w = t;
	381	}
	382	if (dv < dvl) {
	383	*dv++ = MPW(w >> nb);
	384	MPX_ZERO(dv, dvl);
	385	}
	386	}
	387
	388	done:;
	389	}
	390
	391	/----- Unsigned arithmetic -----------------------------------------------/
	392
	393	/* --- @mpx_2c@ --- *
	394	*
	395	* Arguments: @mpw dv, dvl@ = destination vector
	396	* @const mpw v, vl@ = source vector
	397	*
	398	* Returns: ---
	399	*
	400	* Use: Calculates the two's complement of @v@.
	401	*/
	402
	403	void mpx_2c(mpw dv, mpw dvl, const mpw v, const mpw vl)
	404	{
	405	mpw c = 0;
	406	while (dv < dvl && v < vl)
	407	dv++ = c = MPW(~v++);
	408	if (dv < dvl) {
	409	if (c > MPW_MAX / 2)
	410	c = MPW(~0);
	411	while (dv < dvl)
	412	*dv++ = c;
	413	}
	414	MPX_UADDN(dv, dvl, 1);
	415	}
	416
	417	/* --- @mpx_ucmp@ --- *
	418	*
	419	* Arguments: @const mpw av, avl@ = first argument vector base and limit
	420	* @const mpw bv, bvl@ = second argument vector base and limit
	421	*
	422	* Returns: Less than, equal to, or greater than zero depending on
	423	* whether @a@ is less than, equal to or greater than @b@,
	424	* respectively.
	425	*
	426	* Use: Performs an unsigned integer comparison.
	427	*/
	428
	429	int mpx_ucmp(const mpw av, const mpw avl, const mpw bv, const mpw bvl)
	430	{
	431	MPX_SHRINK(av, avl);
	432	MPX_SHRINK(bv, bvl);
	433
	434	if (avl - av > bvl - bv)
	435	return (+1);
	436	else if (avl - av < bvl - bv)
	437	return (-1);
	438	else while (avl > av) {
	439	mpw a = --avl, b = --bvl;
	440	if (a > b)
	441	return (+1);
	442	else if (a < b)
	443	return (-1);
	444	}
	445	return (0);
	446	}
	447
	448	/* --- @mpx_uadd@ --- *
	449	*
	450	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	451	* @const mpw av, avl@ = first addend vector base and limit
	452	* @const mpw bv, bvl@ = second addend vector base and limit
	453	*
	454	* Returns: ---
	455	*
	456	* Use: Performs unsigned integer addition. If the result overflows
	457	* the destination vector, high-order bits are discarded. This
	458	* means that two's complement addition happens more or less for
	459	* free, although that's more a side-effect than anything else.
	460	* The result vector may be equal to either or both source
	461	* vectors, but may not otherwise overlap them.
	462	*/
	463
	464	void mpx_uadd(mpw dv, mpw dvl, const mpw av, const mpw avl,
	465	const mpw bv, const mpw bvl)
	466	{
	467	mpw c = 0;
	468
	469	while (av < avl \|\| bv < bvl) {
	470	mpw a, b;
	471	mpd x;
	472	if (dv >= dvl)
	473	return;
	474	a = (av < avl) ? *av++ : 0;
	475	b = (bv < bvl) ? *bv++ : 0;
	476	x = (mpd)a + (mpd)b + c;
	477	*dv++ = MPW(x);
	478	c = x >> MPW_BITS;
	479	}
	480	if (dv < dvl) {
	481	*dv++ = c;
	482	MPX_ZERO(dv, dvl);
	483	}
	484	}
	485
	486	/* --- @mpx_uaddn@ --- *
	487	*
	488	* Arguments: @mpw dv, dvl@ = source and destination base and limit
	489	* @mpw n@ = other addend
	490	*
	491	* Returns: ---
	492	*
	493	* Use: Adds a small integer to a multiprecision number.
	494	*/
	495
	496	void mpx_uaddn(mpw dv, mpw dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
	497
	498	/* --- @mpx_usub@ --- *
	499	*
	500	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	501	* @const mpw av, avl@ = first argument vector base and limit
	502	* @const mpw bv, bvl@ = second argument vector base and limit
	503	*
	504	* Returns: ---
	505	*
	506	* Use: Performs unsigned integer subtraction. If the result
	507	* overflows the destination vector, high-order bits are
	508	* discarded. This means that two's complement subtraction
	509	* happens more or less for free, althuogh that's more a side-
	510	* effect than anything else. The result vector may be equal to
	511	* either or both source vectors, but may not otherwise overlap
	512	* them.
	513	*/
	514
	515	void mpx_usub(mpw dv, mpw dvl, const mpw av, const mpw avl,
	516	const mpw bv, const mpw bvl)
	517	{
	518	mpw c = 0;
	519
	520	while (av < avl \|\| bv < bvl) {
	521	mpw a, b;
	522	mpd x;
	523	if (dv >= dvl)
	524	return;
	525	a = (av < avl) ? *av++ : 0;
	526	b = (bv < bvl) ? *bv++ : 0;
	527	x = (mpd)a - (mpd)b - c;
	528	*dv++ = MPW(x);
	529	if (x >> MPW_BITS)
	530	c = 1;
	531	else
	532	c = 0;
	533	}
	534	if (c)
	535	c = MPW_MAX;
	536	while (dv < dvl)
	537	*dv++ = c;
	538	}
	539
	540	/* --- @mpx_usubn@ --- *
	541	*
	542	* Arguments: @mpw dv, dvl@ = source and destination base and limit
	543	* @n@ = subtrahend
	544	*
	545	* Returns: ---
	546	*
	547	* Use: Subtracts a small integer from a multiprecision number.
	548	*/
	549
	550	void mpx_usubn(mpw dv, mpw dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
	551
	552	/* --- @mpx_umul@ --- *
	553	*
	554	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	555	* @const mpw av, avl@ = multiplicand vector base and limit
	556	* @const mpw bv, bvl@ = multiplier vector base and limit
	557	*
	558	* Returns: ---
	559	*
	560	* Use: Performs unsigned integer multiplication. If the result
	561	* overflows the desination vector, high-order bits are
	562	* discarded. The result vector may not overlap the argument
	563	* vectors in any way.
	564	*/
	565
	566	void mpx_umul(mpw dv, mpw dvl, const mpw av, const mpw avl,
	567	const mpw bv, const mpw bvl)
	568	{
	569	/* --- This is probably worthwhile on a multiply --- */
	570
	571	MPX_SHRINK(av, avl);
	572	MPX_SHRINK(bv, bvl);
	573
	574	/* --- Deal with a multiply by zero --- */
	575
	576	if (bv == bvl) {
	577	MPX_ZERO(dv, dvl);
	578	return;
	579	}
	580
	581	/* --- Do the initial multiply and initialize the accumulator --- */
	582
	583	MPX_UMULN(dv, dvl, av, avl, *bv++);
	584
	585	/* --- Do the remaining multiply/accumulates --- */
	586
	587	while (dv < dvl && bv < bvl) {
	588	mpw m = *bv++;
	589	mpw c = 0;
	590	const mpw *avv = av;
	591	mpw *dvv = ++dv;
	592
	593	while (avv < avl) {
	594	mpd x;
	595	if (dvv >= dvl)
	596	goto next;
	597	x = (mpd)dvv + (mpd)m (mpd)*avv++ + c;
	598	*dvv++ = MPW(x);
	599	c = x >> MPW_BITS;
	600	}
	601	MPX_UADDN(dvv, dvl, c);
	602	next:;
	603	}
	604	}
	605
	606	/* --- @mpx_umuln@ --- *
	607	*
	608	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	609	* @const mpw av, avl@ = multiplicand vector base and limit
	610	* @mpw m@ = multiplier
	611	*
	612	* Returns: ---
	613	*
	614	* Use: Multiplies a multiprecision integer by a single-word value.
	615	* The destination and source may be equal. The destination
	616	* is completely cleared after use.
	617	*/
	618
	619	void mpx_umuln(mpw dv, mpw dvl, const mpw av, const mpw avl, mpw m)
	620	{
	621	MPX_UMULN(dv, dvl, av, avl, m);
	622	}
	623
	624	/* --- @mpx_umlan@ --- *
	625	*
	626	* Arguments: @mpw dv, dvl@ = destination/accumulator base and limit
	627	* @const mpw av, avl@ = multiplicand vector base and limit
	628	* @mpw m@ = multiplier
	629	*
	630	* Returns: ---
	631	*
	632	* Use: Multiplies a multiprecision integer by a single-word value
	633	* and adds the result to an accumulator.
	634	*/
	635
	636	void mpx_umlan(mpw dv, mpw dvl, const mpw av, const mpw avl, mpw m)
	637	{
	638	MPX_UMLAN(dv, dvl, av, avl, m);
	639	}
	640
	641	/* --- @mpx_usqr@ --- *
	642	*
	643	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	644	* @const mpw av, av@ = source vector base and limit
	645	*
	646	* Returns: ---
	647	*
	648	* Use: Performs unsigned integer squaring. The result vector must
	649	* not overlap the source vector in any way.
	650	*/
	651
	652	void mpx_usqr(mpw dv, mpw dvl, const mpw av, const mpw avl)
	653	{
	654	MPX_ZERO(dv, dvl);
	655
	656	/* --- Main loop --- */
	657
	658	while (av < avl) {
	659	const mpw *avv = av;
	660	mpw *dvv = dv;
	661	mpw a = *av;
	662	mpd c;
	663
	664	/* --- Stop if I've run out of destination --- */
	665
	666	if (dvv >= dvl)
	667	break;
	668
	669	/* --- Work out the square at this point in the proceedings --- */
	670
	671	{
	672	mpd x = (mpd)a * (mpd)a + *dvv;
	673	*dvv++ = MPW(x);
	674	c = MPW(x >> MPW_BITS);
	675	}
	676
	677	/* --- Now fix up the rest of the vector upwards --- */
	678
	679	avv++;
	680	while (dvv < dvl && avv < avl) {
	681	mpd x = (mpd)a * (mpd)*avv++;
	682	mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
	683	c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
	684	*dvv++ = MPW(y);
	685	}
	686	while (dvv < dvl && c) {
	687	mpd x = c + *dvv;
	688	*dvv++ = MPW(x);
	689	c = x >> MPW_BITS;
	690	}
	691
	692	/* --- Get ready for the next round --- */
	693
	694	av++;
	695	dv += 2;
	696	}
	697	}
	698
	699	/* --- @mpx_udiv@ --- *
	700	*
	701	* Arguments: @mpw qv, qvl@ = quotient vector base and limit
	702	* @mpw rv, rvl@ = dividend/remainder vector base and limit
	703	* @const mpw dv, dvl@ = divisor vector base and limit
	704	* @mpw sv, svl@ = scratch workspace
	705	*
	706	* Returns: ---
	707	*
	708	* Use: Performs unsigned integer division. If the result overflows
	709	* the quotient vector, high-order bits are discarded. (Clearly
	710	* the remainder vector can't overflow.) The various vectors
	711	* may not overlap in any way. Yes, I know it's a bit odd
	712	* requiring the dividend to be in the result position but it
	713	* does make some sense really. The remainder must have
	714	* headroom for at least two extra words. The scratch space
	715	* must be at least one word larger than the divisor.
	716	*/
	717
	718	void mpx_udiv(mpw qv, mpw qvl, mpw rv, mpw rvl,
	719	const mpw dv, const mpw dvl,
	720	mpw sv, mpw svl)
	721	{
	722	unsigned norm = 0;
	723	size_t scale;
	724	mpw d, dd;
	725
	726	/* --- Initialize the quotient --- */
	727
	728	MPX_ZERO(qv, qvl);
	729
	730	/* --- Perform some sanity checks --- */
	731
	732	MPX_SHRINK(dv, dvl);
	733	assert(((void)"division by zero in mpx_udiv", dv < dvl));
	734
	735	/* --- Normalize the divisor --- *
	736	*
	737	* The algorithm requires that the divisor be at least two digits long.
	738	* This is easy to fix.
	739	*/
	740
	741	{
	742	unsigned b;
	743
	744	d = dvl[-1];
	745	for (b = MPW_BITS / 2; b; b >>= 1) {
	746	if (d < (MPW_MAX >> b)) {
	747	d <<= b;
	748	norm += b;
	749	}
	750	}
	751	if (dv + 1 == dvl)
	752	norm += MPW_BITS;
	753	}
	754
	755	/* --- Normalize the dividend/remainder to match --- */
	756
	757	if (norm) {
	758	mpx_lsl(rv, rvl, rv, rvl, norm);
	759	mpx_lsl(sv, svl, dv, dvl, norm);
	760	dv = sv;
	761	dvl = svl;
	762	MPX_SHRINK(dv, dvl);
	763	}
	764
	765	MPX_SHRINK(rv, rvl);
	766	d = dvl[-1];
	767	dd = dvl[-2];
	768
	769	/* --- Work out the relative scales --- */
	770
	771	{
	772	size_t rvn = rvl - rv;
	773	size_t dvn = dvl - dv;
	774
	775	/* --- If the divisor is clearly larger, notice this --- */
	776
	777	if (dvn > rvn) {
	778	mpx_lsr(rv, rvl, rv, rvl, norm);
	779	return;
	780	}
	781
	782	scale = rvn - dvn;
	783	}
	784
	785	/* --- Calculate the most significant quotient digit --- *
	786	*
	787	* Because the divisor has its top bit set, this can only happen once. The
	788	* pointer arithmetic is a little contorted, to make sure that the
	789	* behaviour is defined.
	790	*/
	791
	792	if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
	793	mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	794	if (qvl - qv > scale)
	795	qv[scale] = 1;
	796	}
	797
	798	/* --- Now for the main loop --- */
	799
	800	{
	801	mpw *rvv = rvl - 2;
	802
	803	while (scale) {
	804	mpw q;
	805	mpd rh;
	806
	807	/* --- Get an estimate for the next quotient digit --- */
	808
	809	mpw r = rvv[1];
	810	mpw rr = rvv[0];
	811	mpw rrr = *--rvv;
	812
	813	scale--;
	814	rh = ((mpd)r << MPW_BITS) \| rr;
	815	if (r == d)
	816	q = MPW_MAX;
	817	else
	818	q = MPW(rh / d);
	819
	820	/* --- Refine the estimate --- */
	821
	822	{
	823	mpd yh = (mpd)d * q;
	824	mpd yl = (mpd)dd * q;
	825
	826	if (yl > MPW_MAX) {
	827	yh += yl >> MPW_BITS;
	828	yl &= MPW_MAX;
	829	}
	830
	831	while (yh > rh \|\| (yh == rh && yl > rrr)) {
	832	q--;
	833	yh -= d;
	834	if (yl < dd) {
	835	yh++;
	836	yl += MPW_MAX;
	837	}
	838	yl -= dd;
	839	}
	840	}
	841
	842	/* --- Remove a chunk from the dividend --- */
	843
	844	{
	845	mpw *svv;
	846	const mpw *dvv;
	847	mpw mc = 0, sc = 0;
	848
	849	/* --- Calculate the size of the chunk --- *
	850	*
	851	* This does the whole job of calculating @r >> scale - qd@.
	852	*/
	853
	854	for (svv = rv + scale, dvv = dv;
	855	dvv < dvl && svv < rvl;
	856	svv++, dvv++) {
	857	mpd x = (mpd)dvv (mpd)q + mc;
	858	mc = x >> MPW_BITS;
	859	x = (mpd)*svv - MPW(x) - sc;
	860	*svv = MPW(x);
	861	if (x >> MPW_BITS)
	862	sc = 1;
	863	else
	864	sc = 0;
	865	}
	866
	867	if (svv < rvl) {
	868	mpd x = (mpd)*svv - mc - sc;
	869	*svv++ = MPW(x);
	870	if (x >> MPW_BITS)
	871	sc = MPW_MAX;
	872	else
	873	sc = 0;
	874	while (svv < rvl)
	875	*svv++ = sc;
	876	}
	877
	878	/* --- Fix if the quotient was too large --- *
	879	*
	880	* This doesn't seem to happen very often.
	881	*/
	882
	883	if (rvl[-1] > MPW_MAX / 2) {
	884	mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	885	q--;
	886	}
	887	}
	888
	889	/* --- Done for another iteration --- */
	890
	891	if (qvl - qv > scale)
	892	qv[scale] = q;
	893	r = rr;
	894	rr = rrr;
	895	}
	896	}
	897
	898	/* --- Now fiddle with unnormalizing and things --- */
	899
	900	mpx_lsr(rv, rvl, rv, rvl, norm);
	901	}
	902
	903	/----- Test rig ----------------------------------------------------------/
	904
	905	#ifdef TEST_RIG
	906
	907	#include <mLib/alloc.h>
	908	#include <mLib/dstr.h>
	909	#include <mLib/quis.h>
	910	#include <mLib/testrig.h>
	911
	912	#include "mpscan.h"
	913
	914	#define ALLOC(v, vl, sz) do { \
	915	size_t _sz = (sz); \
	916	mpw *_vv = xmalloc(MPWS(_sz)); \
	917	mpw *_vvl = _vv + _sz; \
	918	(v) = _vv; \
	919	(vl) = _vvl; \
	920	} while (0)
	921
	922	#define LOAD(v, vl, d) do { \
	923	const dstr *_d = (d); \
	924	mpw _v, _vl; \
	925	ALLOC(_v, _vl, MPW_RQ(_d->len)); \
	926	mpx_loadb(_v, _vl, _d->buf, _d->len); \
	927	(v) = _v; \
	928	(vl) = _vl; \
	929	} while (0)
	930
	931	#define MAX(x, y) ((x) > (y) ? (x) : (y))
	932
	933	static void dumpbits(const char msg, const void pp, size_t sz)
	934	{
	935	const octet *p = pp;
	936	fputs(msg, stderr);
	937	for (; sz; sz--)
	938	fprintf(stderr, " %02x", *p++);
	939	fputc('\n', stderr);
	940	}
	941
	942	static void dumpmp(const char msg, const mpw v, const mpw *vl)
	943	{
	944	fputs(msg, stderr);
	945	MPX_SHRINK(v, vl);
	946	while (v < vl)
	947	fprintf(stderr, " %08lx", (unsigned long)*--vl);
	948	fputc('\n', stderr);
	949	}
	950
	951	static int chkscan(const mpw v, const mpw vl,
	952	const void *pp, size_t sz, int step)
	953	{
	954	mpscan mps;
	955	const octet *p = pp;
	956	unsigned bit = 0;
	957	int ok = 1;
	958
	959	mpscan_initx(&mps, v, vl);
	960	while (sz) {
	961	unsigned x = *p;
	962	int i;
	963	p += step;
	964	for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) {
	965	if (MPSCAN_BIT(&mps) != (x & 1)) {
	966	fprintf(stderr,
	967	"\n*** error, step %i, bit %u, expected %u, found %u\n",
	968	step, bit, x & 1, MPSCAN_BIT(&mps));
	969	ok = 0;
	970	}
	971	x >>= 1;
	972	bit++;
	973	}
	974	sz--;
	975	}
	976
	977	return (ok);
	978	}
	979
	980	static int loadstore(dstr *v)
	981	{
	982	dstr d = DSTR_INIT;
	983	size_t sz = MPW_RQ(v->len) * 2, diff;
	984	mpw m, ml;
	985	int ok = 1;
	986
	987	dstr_ensure(&d, v->len);
	988	m = xmalloc(MPWS(sz));
	989
	990	for (diff = 0; diff < sz; diff += 5) {
	991	size_t oct;
	992
	993	ml = m + sz - diff;
	994
	995	mpx_loadl(m, ml, v->buf, v->len);
	996	if (!chkscan(m, ml, v->buf, v->len, +1))
	997	ok = 0;
	998	MPX_OCTETS(oct, m, ml);
	999	mpx_storel(m, ml, d.buf, d.sz);
	1000	if (memcmp(d.buf, v->buf, oct) != 0) {
	1001	dumpbits("\n*** storel failed", d.buf, d.sz);
	1002	ok = 0;
	1003	}
	1004
	1005	mpx_loadb(m, ml, v->buf, v->len);
	1006	if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1))
	1007	ok = 0;
	1008	MPX_OCTETS(oct, m, ml);
	1009	mpx_storeb(m, ml, d.buf, d.sz);
	1010	if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) {
	1011	dumpbits("\n*** storeb failed", d.buf, d.sz);
	1012	ok = 0;
	1013	}
	1014	}
	1015
	1016	if (!ok)
	1017	dumpbits("input data", v->buf, v->len);
	1018
	1019	free(m);
	1020	dstr_destroy(&d);
	1021	return (ok);
	1022	}
	1023
	1024	static int lsl(dstr *v)
	1025	{
	1026	mpw a, al;
	1027	int n = (int )v[1].buf;
	1028	mpw c, cl;
	1029	mpw d, dl;
	1030	int ok = 1;
	1031
	1032	LOAD(a, al, &v[0]);
	1033	LOAD(c, cl, &v[2]);
	1034	ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS);
	1035
	1036	mpx_lsl(d, dl, a, al, n);
	1037	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1038	fprintf(stderr, "\n*** lsl(%i) failed\n", n);
	1039	dumpmp(" a", a, al);
	1040	dumpmp("expected", c, cl);
	1041	dumpmp(" result", d, dl);
	1042	ok = 0;
	1043	}
	1044
	1045	free(a); free(c); free(d);
	1046	return (ok);
	1047	}
	1048
	1049	static int lsr(dstr *v)
	1050	{
	1051	mpw a, al;
	1052	int n = (int )v[1].buf;
	1053	mpw c, cl;
	1054	mpw d, dl;
	1055	int ok = 1;
	1056
	1057	LOAD(a, al, &v[0]);
	1058	LOAD(c, cl, &v[2]);
	1059	ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1);
	1060
	1061	mpx_lsr(d, dl, a, al, n);
	1062	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1063	fprintf(stderr, "\n*** lsr(%i) failed\n", n);
	1064	dumpmp(" a", a, al);
	1065	dumpmp("expected", c, cl);
	1066	dumpmp(" result", d, dl);
	1067	ok = 0;
	1068	}
	1069
	1070	free(a); free(c); free(d);
	1071	return (ok);
	1072	}
	1073
	1074	static int uadd(dstr *v)
	1075	{
	1076	mpw a, al;
	1077	mpw b, bl;
	1078	mpw c, cl;
	1079	mpw d, dl;
	1080	int ok = 1;
	1081
	1082	LOAD(a, al, &v[0]);
	1083	LOAD(b, bl, &v[1]);
	1084	LOAD(c, cl, &v[2]);
	1085	ALLOC(d, dl, MAX(al - a, bl - b) + 1);
	1086
	1087	mpx_uadd(d, dl, a, al, b, bl);
	1088	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1089	fprintf(stderr, "\n*** uadd failed\n");
	1090	dumpmp(" a", a, al);
	1091	dumpmp(" b", b, bl);
	1092	dumpmp("expected", c, cl);
	1093	dumpmp(" result", d, dl);
	1094	ok = 0;
	1095	}
	1096
	1097	free(a); free(b); free(c); free(d);
	1098	return (ok);
	1099	}
	1100
	1101	static int usub(dstr *v)
	1102	{
	1103	mpw a, al;
	1104	mpw b, bl;
	1105	mpw c, cl;
	1106	mpw d, dl;
	1107	int ok = 1;
	1108
	1109	LOAD(a, al, &v[0]);
	1110	LOAD(b, bl, &v[1]);
	1111	LOAD(c, cl, &v[2]);
	1112	ALLOC(d, dl, al - a);
	1113
	1114	mpx_usub(d, dl, a, al, b, bl);
	1115	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1116	fprintf(stderr, "\n*** usub failed\n");
	1117	dumpmp(" a", a, al);
	1118	dumpmp(" b", b, bl);
	1119	dumpmp("expected", c, cl);
	1120	dumpmp(" result", d, dl);
	1121	ok = 0;
	1122	}
	1123
	1124	free(a); free(b); free(c); free(d);
	1125	return (ok);
	1126	}
	1127
	1128	static int umul(dstr *v)
	1129	{
	1130	mpw a, al;
	1131	mpw b, bl;
	1132	mpw c, cl;
	1133	mpw d, dl;
	1134	int ok = 1;
	1135
	1136	LOAD(a, al, &v[0]);
	1137	LOAD(b, bl, &v[1]);
	1138	LOAD(c, cl, &v[2]);
	1139	ALLOC(d, dl, (al - a) + (bl - b));
	1140
	1141	mpx_umul(d, dl, a, al, b, bl);
	1142	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1143	fprintf(stderr, "\n*** umul failed\n");
	1144	dumpmp(" a", a, al);
	1145	dumpmp(" b", b, bl);
	1146	dumpmp("expected", c, cl);
	1147	dumpmp(" result", d, dl);
	1148	ok = 0;
	1149	}
	1150
	1151	free(a); free(b); free(c); free(d);
	1152	return (ok);
	1153	}
	1154
	1155	static int usqr(dstr *v)
	1156	{
	1157	mpw a, al;
	1158	mpw c, cl;
	1159	mpw d, dl;
	1160	int ok = 1;
	1161
	1162	LOAD(a, al, &v[0]);
	1163	LOAD(c, cl, &v[1]);
	1164	ALLOC(d, dl, 2 * (al - a));
	1165
	1166	mpx_usqr(d, dl, a, al);
	1167	if (MPX_UCMP(d, dl, !=, c, cl)) {
	1168	fprintf(stderr, "\n*** usqr failed\n");
	1169	dumpmp(" a", a, al);
	1170	dumpmp("expected", c, cl);
	1171	dumpmp(" result", d, dl);
	1172	ok = 0;
	1173	}
	1174
	1175	free(a); free(c); free(d);
	1176	return (ok);
	1177	}
	1178
	1179	static int udiv(dstr *v)
	1180	{
	1181	mpw a, al;
	1182	mpw b, bl;
	1183	mpw q, ql;
	1184	mpw r, rl;
	1185	mpw qq, qql;
	1186	mpw s, sl;
	1187	int ok = 1;
	1188
	1189	ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len);
	1190	LOAD(b, bl, &v[1]);
	1191	LOAD(q, ql, &v[2]);
	1192	LOAD(r, rl, &v[3]);
	1193	ALLOC(qq, qql, al - a);
	1194	ALLOC(s, sl, (bl - b) + 1);
	1195
	1196	mpx_udiv(qq, qql, a, al, b, bl, s, sl);
	1197	if (MPX_UCMP(qq, qql, !=, q, ql) \|\|
	1198	MPX_UCMP(a, al, !=, r, rl)) {
	1199	fprintf(stderr, "\n*** udiv failed\n");
	1200	dumpmp(" divisor", b, bl);
	1201	dumpmp("expect r", r, rl);
	1202	dumpmp("result r", a, al);
	1203	dumpmp("expect q", q, ql);
	1204	dumpmp("result q", qq, qql);
	1205	ok = 0;
	1206	}
	1207
	1208	free(a); free(b); free(r); free(q); free(s); free(qq);
	1209	return (ok);
	1210	}
	1211
	1212	static test_chunk defs[] = {
	1213	{ "load-store", loadstore, { &type_hex, 0 } },
	1214	{ "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } },
	1215	{ "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } },
	1216	{ "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } },
	1217	{ "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } },
	1218	{ "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } },
	1219	{ "usqr", usqr, { &type_hex, &type_hex, 0 } },
	1220	{ "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } },
	1221	{ 0, 0, { 0 } }
	1222	};
	1223
	1224	int main(int argc, char *argv[])
	1225	{
	1226	test_run(argc, argv, defs, SRCDIR"/tests/mpx");
	1227	return (0);
	1228	}
	1229
	1230
	1231	#endif
	1232
	1233	/----- That's all, folks -------------------------------------------------/