[catacomb] / mpx.c

/* -*-c-*-
 *
 * $Id: mpx.c,v 1.4 1999/11/17 18:04:09 mdw Exp $
 *
 * Low-level multiprecision arithmetic
 *
 * (c) 1999 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------* 
 *
 * This file is part of Catacomb.
 *
 * Catacomb is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * Catacomb is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 * 
 * You should have received a copy of the GNU Library General Public
 * License along with Catacomb; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA.
 */

/*----- Revision history --------------------------------------------------* 
 *
 * $Log: mpx.c,v $
 * Revision 1.4  1999/11/17 18:04:09  mdw
 * Add two's-complement functionality.  Improve mpx_udiv a little by
 * performing the multiplication of the divisor by q with the subtraction
 * from r.
 *
 * Revision 1.3  1999/11/13 01:57:31  mdw
 * Remove stray debugging code.
 *
 * Revision 1.2  1999/11/13 01:50:59  mdw
 * Multiprecision routines finished and tested.
 *
 * Revision 1.1  1999/09/03 08:41:12  mdw
 * Initial import.
 *
 */

/*----- Header files ------------------------------------------------------*/

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <mLib/bits.h>

#include "mptypes.h"
#include "mpx.h"

/*----- Loading and storing -----------------------------------------------*/

/* --- @mpx_storel@ --- *
 *
 * Arguments:	@const mpw *v, *vl@ = base and limit of source vector
 *		@void *pp@ = pointer to octet array
 *		@size_t sz@ = size of octet array
 *
 * Returns:	---
 *
 * Use:		Stores an MP in an octet array, least significant octet
 *		first.  High-end octets are silently discarded if there
 *		isn't enough space for them.
 */

void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
{
  mpw n, w = 0;
  octet *p = pp, *q = p + sz;
  unsigned bits = 0;

  while (p < q) {
    if (bits < 8) {
      if (v >= vl) {
	*p++ = U8(w);
	break;
      }
      n = *v++;
      *p++ = U8(w | n << bits);
      w = n >> (8 - bits);
      bits += MPW_BITS - 8;
    } else {
      *p++ = U8(w);
      w >>= 8;
      bits -= 8;
    }
  }
  memset(p, 0, q - p);
}

/* --- @mpx_loadl@ --- *
 *
 * Arguments:	@mpw *v, *vl@ = base and limit of destination vector
 *		@const void *pp@ = pointer to octet array
 *		@size_t sz@ = size of octet array
 *
 * Returns:	---
 *
 * Use:		Loads an MP in an octet array, least significant octet
 *		first.  High-end octets are ignored if there isn't enough
 *		space for them.
 */

void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
{
  unsigned n;
  mpw w = 0;
  const octet *p = pp, *q = p + sz;
  unsigned bits = 0;

  if (v >= vl)
    return;
  while (p < q) {
    n = U8(*p++);
    w |= n << bits;
    bits += 8;
    if (bits >= MPW_BITS) {
      *v++ = MPW(w);
      w = n >> (MPW_BITS - bits + 8);
      bits -= MPW_BITS;
      if (v >= vl)
	return;
    }
  }
  *v++ = w;
  MPX_ZERO(v, vl);
}

/* --- @mpx_storeb@ --- *
 *
 * Arguments:	@const mpw *v, *vl@ = base and limit of source vector
 *		@void *pp@ = pointer to octet array
 *		@size_t sz@ = size of octet array
 *
 * Returns:	---
 *
 * Use:		Stores an MP in an octet array, most significant octet
 *		first.  High-end octets are silently discarded if there
 *		isn't enough space for them.
 */

void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
{
  mpw n, w = 0;
  octet *p = pp, *q = p + sz;
  unsigned bits = 0;

  while (q > p) {
    if (bits < 8) {
      if (v >= vl) {
	*--q = U8(w);
	break;
      }
      n = *v++;
      *--q = U8(w | n << bits);
      w = n >> (8 - bits);
      bits += MPW_BITS - 8;
    } else {
      *--q = U8(w);
      w >>= 8;
      bits -= 8;
    }
  }
  memset(p, 0, q - p);
}

/* --- @mpx_loadb@ --- *
 *
 * Arguments:	@mpw *v, *vl@ = base and limit of destination vector
 *		@const void *pp@ = pointer to octet array
 *		@size_t sz@ = size of octet array
 *
 * Returns:	---
 *
 * Use:		Loads an MP in an octet array, most significant octet
 *		first.  High-end octets are ignored if there isn't enough
 *		space for them.
 */

void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
{
  unsigned n;
  mpw w = 0;
  const octet *p = pp, *q = p + sz;
  unsigned bits = 0;

  if (v >= vl)
    return;
  while (q > p) {
    n = U8(*--q);
    w |= n << bits;
    bits += 8;
    if (bits >= MPW_BITS) {
      *v++ = MPW(w);
      w = n >> (MPW_BITS - bits + 8);
      bits -= MPW_BITS;
      if (v >= vl)
	return;
    }
  }
  *v++ = w;
  MPX_ZERO(v, vl);
}

/*----- Logical shifting --------------------------------------------------*/

/* --- @mpx_lsl@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *avl@ = source vector base and limit
 *		@size_t n@ = number of bit positions to shift by
 *
 * Returns:	---
 *
 * Use:		Performs a logical shift left operation on an integer.
 */

void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
{
  size_t nw;
  unsigned nb;

  /* --- Trivial special case --- */

  if (n == 0)
    MPX_COPY(dv, dvl, av, avl);

  /* --- Single bit shifting --- */

  else if (n == 1) {
    mpw w = 0;
    while (av < avl) {
      mpw t;
      if (dv >= dvl)
	goto done;
      t = *av++;
      *dv++ = MPW((t << 1) | w);
      w = t >> (MPW_BITS - 1);
    }
    if (dv >= dvl)
      goto done;
    *dv++ = MPW(w);
    MPX_ZERO(dv, dvl);
    goto done;
  }

  /* --- Break out word and bit shifts for more sophisticated work --- */
	
  nw = n / MPW_BITS;
  nb = n % MPW_BITS;

  /* --- Handle a shift by a multiple of the word size --- */

  if (nb == 0) {
    MPX_COPY(dv + nw, dvl, av, avl);
    memset(dv, 0, MPWS(nw));
  }

  /* --- And finally the difficult case --- *
   *
   * This is a little convoluted, because I have to start from the end and
   * work backwards to avoid overwriting the source, if they're both the same
   * block of memory.
   */

  else {
    mpw w;
    size_t nr = MPW_BITS - nb;
    size_t dvn = dvl - dv;
    size_t avn = avl - av;

    if (dvn <= nw) {
      MPX_ZERO(dv, dvl);
      goto done;
    }

    if (dvn > avn + nw) {
      size_t off = avn + nw + 1;
      MPX_ZERO(dv + off, dvl);
      dvl = dv + off;
      w = 0;
    } else {
      avl = av + dvn - nw;
      w = *--avl << nb;
    }

    while (avl > av) {
      mpw t = *--avl;
      *--dvl = (t >> nr) | w;
      w = t << nb;
    }

    *--dvl = w;
    MPX_ZERO(dv, dvl);
  }

done:;
}

/* --- @mpx_lsr@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *avl@ = source vector base and limit
 *		@size_t n@ = number of bit positions to shift by
 *
 * Returns:	---
 *
 * Use:		Performs a logical shift right operation on an integer.
 */

void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
{
  size_t nw;
  unsigned nb;

  /* --- Trivial special case --- */

  if (n == 0)
    MPX_COPY(dv, dvl, av, avl);

  /* --- Single bit shifting --- */

  else if (n == 1) {
    mpw w = *av++ >> 1;
    while (av < avl) {
      mpw t;
      if (dv >= dvl)
	goto done;
      t = *av++;
      *dv++ = MPW((t << (MPW_BITS - 1)) | w);
      w = t >> 1;
    }
    if (dv >= dvl)
      goto done;
    *dv++ = MPW(w);
    MPX_ZERO(dv, dvl);
    goto done;
  }

  /* --- Break out word and bit shifts for more sophisticated work --- */

  nw = n / MPW_BITS;
  nb = n % MPW_BITS;

  /* --- Handle a shift by a multiple of the word size --- */

  if (nb == 0)
    MPX_COPY(dv, dvl, av + nw, avl);

  /* --- And finally the difficult case --- */

  else {
    mpw w;
    size_t nr = MPW_BITS - nb;

    av += nw;
    w = *av++;
    while (av < avl) {
      mpw t;
      if (dv >= dvl)
	goto done;
      t = *av++;
      *dv++ = MPW((w >> nb) | (t << nr));
      w = t;
    }
    if (dv < dvl) {
      *dv++ = MPW(w >> nb);
      MPX_ZERO(dv, dvl);
    }
  }

done:;
}

/*----- Unsigned arithmetic -----------------------------------------------*/

/* --- @mpx_2c@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector
 *		@const mpw *v, *vl@ = source vector
 *
 * Returns:	---
 *
 * Use:		Calculates the two's complement of @v@.
 */

void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
{
  mpw c = 0;
  while (dv < dvl && v < vl)
    *dv++ = c = MPW(~*v++);
  if (dv < dvl) {
    if (c > MPW_MAX / 2)
      c = MPW(~0);
    while (dv < dvl)
      *dv++ = c;
  }
  MPX_UADDN(dv, dvl, 1);
}

/* --- @mpx_ucmp@ --- *
 *
 * Arguments:	@const mpw *av, *avl@ = first argument vector base and limit
 *		@const mpw *bv, *bvl@ = second argument vector base and limit
 *
 * Returns:	Less than, equal to, or greater than zero depending on
 *		whether @a@ is less than, equal to or greater than @b@,
 *		respectively.
 *
 * Use:		Performs an unsigned integer comparison.
 */

int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
{
  MPX_SHRINK(av, avl);
  MPX_SHRINK(bv, bvl);

  if (avl - av > bvl - bv)
    return (+1);
  else if (avl - av < bvl - bv)
    return (-1);
  else while (avl > av) {
    mpw a = *--avl, b = *--bvl;
    if (a > b)
      return (+1);
    else if (a < b)
      return (-1);
  }
  return (0);
}
  
/* --- @mpx_uadd@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *avl@ = first addend vector base and limit
 *		@const mpw *bv, *bvl@ = second addend vector base and limit
 *
 * Returns:	---
 *
 * Use:		Performs unsigned integer addition.  If the result overflows
 *		the destination vector, high-order bits are discarded.  This
 *		means that two's complement addition happens more or less for
 *		free, although that's more a side-effect than anything else.
 *		The result vector may be equal to either or both source
 *		vectors, but may not otherwise overlap them.
 */

void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
	      const mpw *bv, const mpw *bvl)
{
  mpw c = 0;

  while (av < avl || bv < bvl) {
    mpw a, b;
    mpd x;
    if (dv >= dvl)
      return;
    a = (av < avl) ? *av++ : 0;
    b = (bv < bvl) ? *bv++ : 0;
    x = (mpd)a + (mpd)b + c;
    *dv++ = MPW(x);
    c = x >> MPW_BITS;
  }
  if (dv < dvl) {
    *dv++ = c;
    MPX_ZERO(dv, dvl);
  }
}

/* --- @mpx_usub@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *avl@ = first argument vector base and limit
 *		@const mpw *bv, *bvl@ = second argument vector base and limit
 *
 * Returns:	---
 *
 * Use:		Performs unsigned integer subtraction.  If the result
 *		overflows the destination vector, high-order bits are
 *		discarded.  This means that two's complement subtraction
 *		happens more or less for free, althuogh that's more a side-
 *		effect than anything else.  The result vector may be equal to
 *		either or both source vectors, but may not otherwise overlap
 *		them.
 */

void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
	      const mpw *bv, const mpw *bvl)
{
  mpw c = 0;

  while (av < avl || bv < bvl) {
    mpw a, b;
    mpd x;
    if (dv >= dvl)
      return;
    a = (av < avl) ? *av++ : 0;
    b = (bv < bvl) ? *bv++ : 0;
    x = (mpd)a - (mpd)b - c;
    *dv++ = MPW(x);
    if (x >> MPW_BITS)
      c = 1;
    else
      c = 0;
  }
  if (c)
    c = MPW_MAX;
  while (dv < dvl)
    *dv++ = c;
}

/* --- @mpx_umul@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *avl@ = multiplicand vector base and limit
 *		@const mpw *bv, *bvl@ = multiplier vector base and limit
 *
 * Returns:	---
 *
 * Use:		Performs unsigned integer multiplication.  If the result
 *		overflows the desination vector, high-order bits are
 *		discarded.  The result vector may not overlap the argument
 *		vectors in any way.
 */

void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
	      const mpw *bv, const mpw *bvl)
{
  /* --- This is probably worthwhile on a multiply --- */

  MPX_SHRINK(av, avl);
  MPX_SHRINK(bv, bvl);

  /* --- Deal with a multiply by zero --- */
  
  if (bv == bvl) {
    MPX_ZERO(dv, dvl);
    return;
  }

  /* --- Do the initial multiply and initialize the accumulator --- */

  MPX_UMULN(dv, dvl, av, avl, *bv++);

  /* --- Do the remaining multiply/accumulates --- */

  while (dv < dvl && bv < bvl) {
    mpw m = *bv++;
    mpw c = 0;
    const mpw *avv = av;
    mpw *dvv = ++dv;

    while (avv < avl) {
      mpd x;
      if (dvv >= dvl)
	goto next;
      x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
      *dvv++ = MPW(x);
      c = x >> MPW_BITS;
    }
    MPX_UADDN(dvv, dvl, c);
  next:;
  }
}

/* --- @mpx_usqr@ --- *
 *
 * Arguments:	@mpw *dv, *dvl@ = destination vector base and limit
 *		@const mpw *av, *av@ = source vector base and limit
 *
 * Returns:	---
 *
 * Use:		Performs unsigned integer squaring.  The result vector must
 *		not overlap the source vector in any way.
 */

void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
{
  MPX_ZERO(dv, dvl);

  /* --- Main loop --- */

  while (av < avl) {
    const mpw *avv = av;
    mpw *dvv = dv;
    mpw a = *av;
    mpd c;

    /* --- Stop if I've run out of destination --- */

    if (dvv >= dvl)
      break;

    /* --- Work out the square at this point in the proceedings --- */

    {
      mpd x = (mpd)a * (mpd)a + *dvv;
      *dvv++ = MPW(x);
      c = MPW(x >> MPW_BITS);
    }

    /* --- Now fix up the rest of the vector upwards --- */

    avv++;
    while (dvv < dvl && avv < avl) {
      mpd x = (mpd)a * (mpd)*avv++;
      mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
      c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
      *dvv++ = MPW(y);
    }
    while (dvv < dvl && c) {
      mpd x = c + *dvv;
      *dvv++ = MPW(x);
      c = x >> MPW_BITS;
    }

    /* --- Get ready for the next round --- */

    av++;
    dv += 2;
  }
}

/* --- @mpx_udiv@ --- *
 *
 * Arguments:	@mpw *qv, *qvl@ = quotient vector base and limit
 *		@mpw *rv, *rvl@ = dividend/remainder vector base and limit
 *		@const mpw *dv, *dvl@ = divisor vector base and limit
 *		@mpw *sv, *svl@ = scratch workspace
 *
 * Returns:	---
 *
 * Use:		Performs unsigned integer division.  If the result overflows
 *		the quotient vector, high-order bits are discarded.  (Clearly
 *		the remainder vector can't overflow.)  The various vectors
 *		may not overlap in any way.  Yes, I know it's a bit odd
 *		requiring the dividend to be in the result position but it
 *		does make some sense really.  The remainder must have
 *		headroom for at least two extra words.  The scratch space
 *		must be at least one word larger than the divisor.
 */

void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
	      const mpw *dv, const mpw *dvl,
	      mpw *sv, mpw *svl)
{
  unsigned norm = 0;
  size_t scale;
  mpw d, dd;

  /* --- Initialize the quotient --- */

  MPX_ZERO(qv, qvl);

  /* --- Perform some sanity checks --- */

  MPX_SHRINK(dv, dvl);
  assert(((void)"division by zero in mpx_udiv", dv < dvl));

  /* --- Normalize the divisor --- *
   *
   * The algorithm requires that the divisor be at least two digits long.
   * This is easy to fix.
   */

  {
    unsigned b;

    d = dvl[-1];
    for (b = MPW_BITS / 2; b; b >>= 1) {
      if (d < (MPW_MAX >> b)) {
	d <<= b;
	norm += b;
      }
    }
    if (dv + 1 == dvl)
      norm += MPW_BITS;
  }

  /* --- Normalize the dividend/remainder to match --- */

  if (norm) {
    mpx_lsl(rv, rvl, rv, rvl, norm);
    mpx_lsl(sv, svl, dv, dvl, norm);
    dv = sv;
    dvl = svl;
    MPX_SHRINK(dv, dvl);
  }

  MPX_SHRINK(rv, rvl);
  d = dvl[-1];
  dd = dvl[-2];

  /* --- Work out the relative scales --- */

  {
    size_t rvn = rvl - rv;
    size_t dvn = dvl - dv;

    /* --- If the divisor is clearly larger, notice this --- */

    if (dvn > rvn) {
      mpx_lsr(rv, rvl, rv, rvl, norm);
      return;
    }

    scale = rvn - dvn;
  }

  /* --- Calculate the most significant quotient digit --- *
   *
   * Because the divisor has its top bit set, this can only happen once.  The
   * pointer arithmetic is a little contorted, to make sure that the
   * behaviour is defined.
   */

  if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
    mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
    if (qvl - qv > scale)
      qv[scale] = 1;
  }

  /* --- Now for the main loop --- */

  {
    mpw *rvv = rvl - 2;

    while (scale) {
      mpw q;
      mpd rh;

      /* --- Get an estimate for the next quotient digit --- */

      mpw r = rvv[1];
      mpw rr = rvv[0];
      mpw rrr = *--rvv;

      scale--;
      rh = ((mpd)r << MPW_BITS) | rr;
      if (r == d)
	q = MPW_MAX;
      else
	q = MPW(rh / d);

      /* --- Refine the estimate --- */

      {
	mpd yh = (mpd)d * q;
	mpd yl = (mpd)dd * q;

	if (yl > MPW_MAX) {
	  yh += yl >> MPW_BITS;
	  yl &= MPW_MAX;
	}

	while (yh > rh || (yh == rh && yl > rrr)) {
	  q--;
	  yh -= d;
	  if (yl < dd) {
	    yh++;
	    yl += MPW_MAX;
	  }
	  yl -= dd;
	}
      }

      /* --- Remove a chunk from the dividend --- */

      {
	mpw *svv;
	const mpw *dvv;
	mpw mc = 0, sc = 0;

	/* --- Calculate the size of the chunk --- *
	 *
	 * This does the whole job of calculating @r >> scale - qd@.
	 */

	for (svv = rv + scale, dvv = dv;
	     dvv < dvl && svv < rvl;
	     svv++, dvv++) {
	  mpd x = (mpd)*dvv * (mpd)q + mc;
	  mc = x >> MPW_BITS;
	  x = (mpd)*svv - MPW(x) - sc;
	  *svv = MPW(x);
	  if (x >> MPW_BITS)
	    sc = 1;
	  else
	    sc = 0;
	}

	if (svv < rvl) {
	  mpd x = (mpd)*svv - mc - sc;
	  *svv++ = MPW(x);
	  if (x >> MPW_BITS)
	    sc = MPW_MAX;
	  else
	    sc = 0;
	  while (svv < rvl)
	    *svv++ = sc;
	}

	/* --- Fix if the quotient was too large --- *
	 *
	 * This doesn't seem to happen very often.
	 */

	if (rvl[-1] > MPW_MAX / 2) {
	  mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	  q--;
	}
      }

      /* --- Done for another iteration --- */

      if (qvl - qv > scale)
	qv[scale] = q;
      r = rr;
      rr = rrr;
    }
  }

  /* --- Now fiddle with unnormalizing and things --- */

  mpx_lsr(rv, rvl, rv, rvl, norm);
}

/*----- That's all, folks -------------------------------------------------*/
Commit	Line	Data
d03ab969	1	/* --c--
d03ab969	2	*
f45a00c6	3	* $Id: mpx.c,v 1.4 1999/11/17 18:04:09 mdw Exp $
d03ab969	4	*
	5	* Low-level multiprecision arithmetic
	6	*
	7	* (c) 1999 Straylight/Edgeware
	8	*/
	9
	10	/----- Licensing notice --------------------------------------------------
	11	*
	12	* This file is part of Catacomb.
	13	*
	14	* Catacomb is free software; you can redistribute it and/or modify
	15	* it under the terms of the GNU Library General Public License as
	16	* published by the Free Software Foundation; either version 2 of the
	17	* License, or (at your option) any later version.
	18	*
	19	* Catacomb is distributed in the hope that it will be useful,
	20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	22	* GNU Library General Public License for more details.
	23	*
	24	* You should have received a copy of the GNU Library General Public
	25	* License along with Catacomb; if not, write to the Free
	26	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	27	* MA 02111-1307, USA.
	28	*/
	29
	30	/----- Revision history --------------------------------------------------
	31	*
	32	* $Log: mpx.c,v $
f45a00c6	33	* Revision 1.4 1999/11/17 18:04:09 mdw
	34	* Add two's-complement functionality. Improve mpx_udiv a little by
	35	* performing the multiplication of the divisor by q with the subtraction
	36	* from r.
	37	*
501da53c	38	* Revision 1.3 1999/11/13 01:57:31 mdw
	39	* Remove stray debugging code.
	40	*
c8a2f9ef	41	* Revision 1.2 1999/11/13 01:50:59 mdw
	42	* Multiprecision routines finished and tested.
	43	*
d03ab969	44	* Revision 1.1 1999/09/03 08:41:12 mdw
	45	* Initial import.
	46	*
	47	*/
	48
	49	/----- Header files ------------------------------------------------------/
	50
c8a2f9ef	51	#include <assert.h>
d03ab969	52	#include <stdio.h>
	53	#include <stdlib.h>
	54	#include <string.h>
	55
	56	#include <mLib/bits.h>
	57
	58	#include "mptypes.h"
	59	#include "mpx.h"
	60
	61	/----- Loading and storing -----------------------------------------------/
	62
	63	/* --- @mpx_storel@ --- *
	64	*
	65	* Arguments: @const mpw v, vl@ = base and limit of source vector
c8a2f9ef	66	* @void *pp@ = pointer to octet array
d03ab969	67	* @size_t sz@ = size of octet array
	68	*
	69	* Returns: ---
	70	*
	71	* Use: Stores an MP in an octet array, least significant octet
	72	* first. High-end octets are silently discarded if there
	73	* isn't enough space for them.
	74	*/
	75
c8a2f9ef	76	void mpx_storel(const mpw v, const mpw vl, void *pp, size_t sz)
d03ab969	77	{
d03ab969	78	mpw n, w = 0;
c8a2f9ef	79	octet p = pp, q = p + sz;
d03ab969	80	unsigned bits = 0;
	81
	82	while (p < q) {
	83	if (bits < 8) {
	84	if (v >= vl) {
	85	*p++ = U8(w);
	86	break;
	87	}
	88	n = *v++;
	89	*p++ = U8(w \| n << bits);
	90	w = n >> (8 - bits);
	91	bits += MPW_BITS - 8;
	92	} else {
	93	*p++ = U8(w);
	94	w >>= 8;
	95	bits -= 8;
	96	}
	97	}
	98	memset(p, 0, q - p);
	99	}
	100
	101	/* --- @mpx_loadl@ --- *
	102	*
	103	* Arguments: @mpw v, vl@ = base and limit of destination vector
c8a2f9ef	104	* @const void *pp@ = pointer to octet array
d03ab969	105	* @size_t sz@ = size of octet array
	106	*
	107	* Returns: ---
	108	*
	109	* Use: Loads an MP in an octet array, least significant octet
	110	* first. High-end octets are ignored if there isn't enough
	111	* space for them.
	112	*/
	113
c8a2f9ef	114	void mpx_loadl(mpw v, mpw vl, const void *pp, size_t sz)
d03ab969	115	{
d03ab969	116	unsigned n;
c8a2f9ef	117	mpw w = 0;
c8a2f9ef	118	const octet p = pp, q = p + sz;
d03ab969	119	unsigned bits = 0;
	120
	121	if (v >= vl)
	122	return;
	123	while (p < q) {
	124	n = U8(*p++);
	125	w \|= n << bits;
	126	bits += 8;
	127	if (bits >= MPW_BITS) {
	128	*v++ = MPW(w);
	129	w = n >> (MPW_BITS - bits + 8);
	130	bits -= MPW_BITS;
	131	if (v >= vl)
	132	return;
	133	}
	134	}
	135	*v++ = w;
	136	MPX_ZERO(v, vl);
	137	}
	138
	139	/* --- @mpx_storeb@ --- *
	140	*
	141	* Arguments: @const mpw v, vl@ = base and limit of source vector
c8a2f9ef	142	* @void *pp@ = pointer to octet array
d03ab969	143	* @size_t sz@ = size of octet array
	144	*
	145	* Returns: ---
	146	*
	147	* Use: Stores an MP in an octet array, most significant octet
	148	* first. High-end octets are silently discarded if there
	149	* isn't enough space for them.
	150	*/
	151
c8a2f9ef	152	void mpx_storeb(const mpw v, const mpw vl, void *pp, size_t sz)
d03ab969	153	{
d03ab969	154	mpw n, w = 0;
c8a2f9ef	155	octet p = pp, q = p + sz;
d03ab969	156	unsigned bits = 0;
	157
	158	while (q > p) {
	159	if (bits < 8) {
	160	if (v >= vl) {
	161	*--q = U8(w);
	162	break;
	163	}
	164	n = *v++;
	165	*--q = U8(w \| n << bits);
	166	w = n >> (8 - bits);
	167	bits += MPW_BITS - 8;
	168	} else {
	169	*--q = U8(w);
	170	w >>= 8;
	171	bits -= 8;
	172	}
	173	}
	174	memset(p, 0, q - p);
	175	}
	176
	177	/* --- @mpx_loadb@ --- *
	178	*
	179	* Arguments: @mpw v, vl@ = base and limit of destination vector
c8a2f9ef	180	* @const void *pp@ = pointer to octet array
d03ab969	181	* @size_t sz@ = size of octet array
	182	*
	183	* Returns: ---
	184	*
	185	* Use: Loads an MP in an octet array, most significant octet
	186	* first. High-end octets are ignored if there isn't enough
	187	* space for them.
	188	*/
	189
c8a2f9ef	190	void mpx_loadb(mpw v, mpw vl, const void *pp, size_t sz)
d03ab969	191	{
d03ab969	192	unsigned n;
c8a2f9ef	193	mpw w = 0;
c8a2f9ef	194	const octet p = pp, q = p + sz;
d03ab969	195	unsigned bits = 0;
	196
	197	if (v >= vl)
	198	return;
	199	while (q > p) {
	200	n = U8(*--q);
	201	w \|= n << bits;
	202	bits += 8;
	203	if (bits >= MPW_BITS) {
	204	*v++ = MPW(w);
	205	w = n >> (MPW_BITS - bits + 8);
	206	bits -= MPW_BITS;
	207	if (v >= vl)
	208	return;
	209	}
	210	}
	211	*v++ = w;
	212	MPX_ZERO(v, vl);
	213	}
	214
	215	/----- Logical shifting --------------------------------------------------/
	216
	217	/* --- @mpx_lsl@ --- *
	218	*
	219	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	220	* @const mpw av, avl@ = source vector base and limit
	221	* @size_t n@ = number of bit positions to shift by
	222	*
	223	* Returns: ---
	224	*
	225	* Use: Performs a logical shift left operation on an integer.
	226	*/
	227
	228	void mpx_lsl(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	229	{
	230	size_t nw;
	231	unsigned nb;
	232
	233	/* --- Trivial special case --- */
	234
	235	if (n == 0)
	236	MPX_COPY(dv, dvl, av, avl);
	237
	238	/* --- Single bit shifting --- */
	239
	240	else if (n == 1) {
	241	mpw w = 0;
	242	while (av < avl) {
	243	mpw t;
	244	if (dv >= dvl)
	245	goto done;
	246	t = *av++;
	247	*dv++ = MPW((t << 1) \| w);
	248	w = t >> (MPW_BITS - 1);
	249	}
	250	if (dv >= dvl)
	251	goto done;
	252	*dv++ = MPW(w);
	253	MPX_ZERO(dv, dvl);
c8a2f9ef	254	goto done;
d03ab969	255	}
	256
	257	/* --- Break out word and bit shifts for more sophisticated work --- */
	258
	259	nw = n / MPW_BITS;
	260	nb = n % MPW_BITS;
	261
	262	/* --- Handle a shift by a multiple of the word size --- */
	263
	264	if (nb == 0) {
	265	MPX_COPY(dv + nw, dvl, av, avl);
	266	memset(dv, 0, MPWS(nw));
	267	}
	268
c8a2f9ef	269	/* --- And finally the difficult case --- *
	270	*
	271	* This is a little convoluted, because I have to start from the end and
	272	* work backwards to avoid overwriting the source, if they're both the same
	273	* block of memory.
	274	*/
d03ab969	275
	276	else {
	277	mpw w;
	278	size_t nr = MPW_BITS - nb;
c8a2f9ef	279	size_t dvn = dvl - dv;
c8a2f9ef	280	size_t avn = avl - av;
d03ab969	281
c8a2f9ef	282	if (dvn <= nw) {
d03ab969	283	MPX_ZERO(dv, dvl);
	284	goto done;
	285	}
d03ab969	286
c8a2f9ef	287	if (dvn > avn + nw) {
	288	size_t off = avn + nw + 1;
	289	MPX_ZERO(dv + off, dvl);
	290	dvl = dv + off;
	291	w = 0;
	292	} else {
	293	avl = av + dvn - nw;
	294	w = *--avl << nb;
d03ab969	295	}
d03ab969	296
c8a2f9ef	297	while (avl > av) {
	298	mpw t = *--avl;
	299	*--dvl = (t >> nr) \| w;
	300	w = t << nb;
d03ab969	301	}
c8a2f9ef	302
	303	*--dvl = w;
	304	MPX_ZERO(dv, dvl);
d03ab969	305	}
	306
	307	done:;
	308	}
	309
	310	/* --- @mpx_lsr@ --- *
	311	*
	312	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	313	* @const mpw av, avl@ = source vector base and limit
	314	* @size_t n@ = number of bit positions to shift by
	315	*
	316	* Returns: ---
	317	*
	318	* Use: Performs a logical shift right operation on an integer.
	319	*/
	320
	321	void mpx_lsr(mpw dv, mpw dvl, const mpw av, const mpw avl, size_t n)
	322	{
	323	size_t nw;
	324	unsigned nb;
	325
	326	/* --- Trivial special case --- */
	327
	328	if (n == 0)
	329	MPX_COPY(dv, dvl, av, avl);
	330
	331	/* --- Single bit shifting --- */
	332
	333	else if (n == 1) {
	334	mpw w = *av++ >> 1;
	335	while (av < avl) {
	336	mpw t;
	337	if (dv >= dvl)
	338	goto done;
	339	t = *av++;
	340	*dv++ = MPW((t << (MPW_BITS - 1)) \| w);
	341	w = t >> 1;
	342	}
	343	if (dv >= dvl)
	344	goto done;
	345	*dv++ = MPW(w);
	346	MPX_ZERO(dv, dvl);
c8a2f9ef	347	goto done;
d03ab969	348	}
	349
	350	/* --- Break out word and bit shifts for more sophisticated work --- */
	351
	352	nw = n / MPW_BITS;
	353	nb = n % MPW_BITS;
	354
	355	/* --- Handle a shift by a multiple of the word size --- */
	356
	357	if (nb == 0)
	358	MPX_COPY(dv, dvl, av + nw, avl);
	359
	360	/* --- And finally the difficult case --- */
	361
	362	else {
	363	mpw w;
	364	size_t nr = MPW_BITS - nb;
	365
	366	av += nw;
	367	w = *av++;
	368	while (av < avl) {
	369	mpw t;
	370	if (dv >= dvl)
	371	goto done;
	372	t = *av++;
	373	*dv++ = MPW((w >> nb) \| (t << nr));
	374	w = t;
	375	}
	376	if (dv < dvl) {
	377	*dv++ = MPW(w >> nb);
	378	MPX_ZERO(dv, dvl);
	379	}
	380	}
	381
	382	done:;
	383	}
	384
	385	/----- Unsigned arithmetic -----------------------------------------------/
	386
f45a00c6	387	/* --- @mpx_2c@ --- *
	388	*
	389	* Arguments: @mpw dv, dvl@ = destination vector
	390	* @const mpw v, vl@ = source vector
	391	*
	392	* Returns: ---
	393	*
	394	* Use: Calculates the two's complement of @v@.
	395	*/
	396
	397	void mpx_2c(mpw dv, mpw dvl, const mpw v, const mpw vl)
	398	{
	399	mpw c = 0;
	400	while (dv < dvl && v < vl)
	401	dv++ = c = MPW(~v++);
	402	if (dv < dvl) {
	403	if (c > MPW_MAX / 2)
	404	c = MPW(~0);
	405	while (dv < dvl)
	406	*dv++ = c;
	407	}
	408	MPX_UADDN(dv, dvl, 1);
	409	}
	410
d03ab969	411	/* --- @mpx_ucmp@ --- *
	412	*
	413	* Arguments: @const mpw av, avl@ = first argument vector base and limit
	414	* @const mpw bv, bvl@ = second argument vector base and limit
	415	*
	416	* Returns: Less than, equal to, or greater than zero depending on
	417	* whether @a@ is less than, equal to or greater than @b@,
	418	* respectively.
	419	*
	420	* Use: Performs an unsigned integer comparison.
	421	*/
	422
	423	int mpx_ucmp(const mpw av, const mpw avl, const mpw bv, const mpw bvl)
	424	{
	425	MPX_SHRINK(av, avl);
	426	MPX_SHRINK(bv, bvl);
	427
	428	if (avl - av > bvl - bv)
	429	return (+1);
	430	else if (avl - av < bvl - bv)
	431	return (-1);
	432	else while (avl > av) {
	433	mpw a = --avl, b = --bvl;
	434	if (a > b)
	435	return (+1);
	436	else if (a < b)
	437	return (-1);
	438	}
	439	return (0);
	440	}
	441
	442	/* --- @mpx_uadd@ --- *
	443	*
	444	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	445	* @const mpw av, avl@ = first addend vector base and limit
	446	* @const mpw bv, bvl@ = second addend vector base and limit
	447	*
	448	* Returns: ---
	449	*
	450	* Use: Performs unsigned integer addition. If the result overflows
	451	* the destination vector, high-order bits are discarded. This
	452	* means that two's complement addition happens more or less for
	453	* free, although that's more a side-effect than anything else.
	454	* The result vector may be equal to either or both source
	455	* vectors, but may not otherwise overlap them.
	456	*/
	457
	458	void mpx_uadd(mpw dv, mpw dvl, const mpw av, const mpw avl,
	459	const mpw bv, const mpw bvl)
	460	{
	461	mpw c = 0;
	462
	463	while (av < avl \|\| bv < bvl) {
	464	mpw a, b;
	465	mpd x;
	466	if (dv >= dvl)
	467	return;
	468	a = (av < avl) ? *av++ : 0;
	469	b = (bv < bvl) ? *bv++ : 0;
	470	x = (mpd)a + (mpd)b + c;
	471	*dv++ = MPW(x);
	472	c = x >> MPW_BITS;
	473	}
	474	if (dv < dvl) {
475	*dv++ = c;
476	MPX_ZERO(dv, dvl);
477	}
478	}
479
480	/* --- @mpx_usub@ --- *
481	*
482	* Arguments: @mpw dv, dvl@ = destination vector base and limit
483	* @const mpw av, avl@ = first argument vector base and limit
484	* @const mpw bv, bvl@ = second argument vector base and limit
485	*
486	* Returns: ---
487	*
488	* Use: Performs unsigned integer subtraction. If the result
489	* overflows the destination vector, high-order bits are
490	* discarded. This means that two's complement subtraction
491	* happens more or less for free, althuogh that's more a side-
492	* effect than anything else. The result vector may be equal to
493	* either or both source vectors, but may not otherwise overlap
494	* them.
495	*/
496
497	void mpx_usub(mpw dv, mpw dvl, const mpw av, const mpw avl,
498	const mpw bv, const mpw bvl)
499	{
500	mpw c = 0;
501
502	while (av < avl \|\| bv < bvl) {
503	mpw a, b;
504	mpd x;
505	if (dv >= dvl)
506	return;
507	a = (av < avl) ? *av++ : 0;
508	b = (bv < bvl) ? *bv++ : 0;
c8a2f9ef	509	x = (mpd)a - (mpd)b - c;
d03ab969	510	*dv++ = MPW(x);
c8a2f9ef	511	if (x >> MPW_BITS)
	512	c = 1;
	513	else
	514	c = 0;
d03ab969	515	}
c8a2f9ef	516	if (c)
c8a2f9ef	517	c = MPW_MAX;
d03ab969	518	while (dv < dvl)
c8a2f9ef	519	*dv++ = c;
d03ab969	520	}
	521
	522	/* --- @mpx_umul@ --- *
	523	*
	524	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	525	* @const mpw av, avl@ = multiplicand vector base and limit
	526	* @const mpw bv, bvl@ = multiplier vector base and limit
	527	*
	528	* Returns: ---
	529	*
	530	* Use: Performs unsigned integer multiplication. If the result
	531	* overflows the desination vector, high-order bits are
	532	* discarded. The result vector may not overlap the argument
	533	* vectors in any way.
	534	*/
	535
	536	void mpx_umul(mpw dv, mpw dvl, const mpw av, const mpw avl,
	537	const mpw bv, const mpw bvl)
	538	{
	539	/* --- This is probably worthwhile on a multiply --- */
	540
	541	MPX_SHRINK(av, avl);
	542	MPX_SHRINK(bv, bvl);
	543
	544	/* --- Deal with a multiply by zero --- */
	545
	546	if (bv == bvl) {
c8a2f9ef	547	MPX_ZERO(dv, dvl);
d03ab969	548	return;
	549	}
	550
	551	/* --- Do the initial multiply and initialize the accumulator --- */
	552
	553	MPX_UMULN(dv, dvl, av, avl, *bv++);
	554
	555	/* --- Do the remaining multiply/accumulates --- */
	556
c8a2f9ef	557	while (dv < dvl && bv < bvl) {
d03ab969	558	mpw m = *bv++;
c8a2f9ef	559	mpw c = 0;
d03ab969	560	const mpw *avv = av;
	561	mpw *dvv = ++dv;
	562
	563	while (avv < avl) {
	564	mpd x;
	565	if (dvv >= dvl)
	566	goto next;
c8a2f9ef	567	x = (mpd)dvv + (mpd)m (mpd)*avv++ + c;
c8a2f9ef	568	*dvv++ = MPW(x);
d03ab969	569	c = x >> MPW_BITS;
d03ab969	570	}
c8a2f9ef	571	MPX_UADDN(dvv, dvl, c);
d03ab969	572	next:;
	573	}
	574	}
	575
c8a2f9ef	576	/* --- @mpx_usqr@ --- *
	577	*
	578	* Arguments: @mpw dv, dvl@ = destination vector base and limit
	579	* @const mpw av, av@ = source vector base and limit
	580	*
	581	* Returns: ---
	582	*
	583	* Use: Performs unsigned integer squaring. The result vector must
	584	* not overlap the source vector in any way.
	585	*/
	586
	587	void mpx_usqr(mpw dv, mpw dvl, const mpw av, const mpw avl)
	588	{
	589	MPX_ZERO(dv, dvl);
	590
	591	/* --- Main loop --- */
	592
	593	while (av < avl) {
	594	const mpw *avv = av;
	595	mpw *dvv = dv;
	596	mpw a = *av;
	597	mpd c;
	598
	599	/* --- Stop if I've run out of destination --- */
	600
	601	if (dvv >= dvl)
	602	break;
	603
	604	/* --- Work out the square at this point in the proceedings --- */
	605
	606	{
c8a2f9ef	607	mpd x = (mpd)a * (mpd)a + *dvv;
	608	*dvv++ = MPW(x);
	609	c = MPW(x >> MPW_BITS);
	610	}
	611
	612	/* --- Now fix up the rest of the vector upwards --- */
	613
	614	avv++;
	615	while (dvv < dvl && avv < avl) {
c8a2f9ef	616	mpd x = (mpd)a * (mpd)*avv++;
	617	mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
	618	c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
	619	*dvv++ = MPW(y);
	620	}
	621	while (dvv < dvl && c) {
	622	mpd x = c + *dvv;
	623	*dvv++ = MPW(x);
	624	c = x >> MPW_BITS;
	625	}
	626
	627	/* --- Get ready for the next round --- */
	628
	629	av++;
	630	dv += 2;
	631	}
	632	}
	633
d03ab969	634	/* --- @mpx_udiv@ --- *
	635	*
	636	* Arguments: @mpw qv, qvl@ = quotient vector base and limit
	637	* @mpw rv, rvl@ = dividend/remainder vector base and limit
	638	* @const mpw dv, dvl@ = divisor vector base and limit
c8a2f9ef	639	* @mpw sv, svl@ = scratch workspace
d03ab969	640	*
	641	* Returns: ---
	642	*
	643	* Use: Performs unsigned integer division. If the result overflows
	644	* the quotient vector, high-order bits are discarded. (Clearly
	645	* the remainder vector can't overflow.) The various vectors
	646	* may not overlap in any way. Yes, I know it's a bit odd
	647	* requiring the dividend to be in the result position but it
	648	* does make some sense really. The remainder must have
c8a2f9ef	649	* headroom for at least two extra words. The scratch space
f45a00c6	650	* must be at least one word larger than the divisor.
d03ab969	651	*/
	652
	653	void mpx_udiv(mpw qv, mpw qvl, mpw rv, mpw rvl,
c8a2f9ef	654	const mpw dv, const mpw dvl,
c8a2f9ef	655	mpw sv, mpw svl)
d03ab969	656	{
d03ab969	657	unsigned norm = 0;
	658	size_t scale;
	659	mpw d, dd;
	660
	661	/* --- Initialize the quotient --- */
	662
	663	MPX_ZERO(qv, qvl);
	664
c8a2f9ef	665	/* --- Perform some sanity checks --- */
	666
	667	MPX_SHRINK(dv, dvl);
	668	assert(((void)"division by zero in mpx_udiv", dv < dvl));
	669
d03ab969	670	/* --- Normalize the divisor --- *
	671	*
	672	* The algorithm requires that the divisor be at least two digits long.
	673	* This is easy to fix.
	674	*/
	675
c8a2f9ef	676	{
c8a2f9ef	677	unsigned b;
d03ab969	678
c8a2f9ef	679	d = dvl[-1];
	680	for (b = MPW_BITS / 2; b; b >>= 1) {
	681	if (d < (MPW_MAX >> b)) {
	682	d <<= b;
	683	norm += b;
	684	}
	685	}
	686	if (dv + 1 == dvl)
	687	norm += MPW_BITS;
d03ab969	688	}
d03ab969	689
	690	/* --- Normalize the dividend/remainder to match --- */
	691
c8a2f9ef	692	if (norm) {
c8a2f9ef	693	mpx_lsl(rv, rvl, rv, rvl, norm);
f45a00c6	694	mpx_lsl(sv, svl, dv, dvl, norm);
c8a2f9ef	695	dv = sv;
f45a00c6	696	dvl = svl;
c8a2f9ef	697	MPX_SHRINK(dv, dvl);
	698	}
	699
d03ab969	700	MPX_SHRINK(rv, rvl);
c8a2f9ef	701	d = dvl[-1];
c8a2f9ef	702	dd = dvl[-2];
d03ab969	703
	704	/* --- Work out the relative scales --- */
	705
	706	{
	707	size_t rvn = rvl - rv;
c8a2f9ef	708	size_t dvn = dvl - dv;
d03ab969	709
	710	/* --- If the divisor is clearly larger, notice this --- */
	711
	712	if (dvn > rvn) {
	713	mpx_lsr(rv, rvl, rv, rvl, norm);
	714	return;
	715	}
	716
	717	scale = rvn - dvn;
	718	}
	719
	720	/* --- Calculate the most significant quotient digit --- *
	721	*
	722	* Because the divisor has its top bit set, this can only happen once. The
	723	* pointer arithmetic is a little contorted, to make sure that the
	724	* behaviour is defined.
	725	*/
	726
	727	if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
	728	mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	729	if (qvl - qv > scale)
	730	qv[scale] = 1;
	731	}
	732
	733	/* --- Now for the main loop --- */
	734
	735	{
c8a2f9ef	736	mpw *rvv = rvl - 2;
d03ab969	737
d03ab969	738	while (scale) {
c8a2f9ef	739	mpw q;
c8a2f9ef	740	mpd rh;
d03ab969	741
	742	/* --- Get an estimate for the next quotient digit --- */
	743
c8a2f9ef	744	mpw r = rvv[1];
	745	mpw rr = rvv[0];
	746	mpw rrr = *--rvv;
	747
	748	scale--;
	749	rh = ((mpd)r << MPW_BITS) \| rr;
d03ab969	750	if (r == d)
d03ab969	751	q = MPW_MAX;
c8a2f9ef	752	else
c8a2f9ef	753	q = MPW(rh / d);
d03ab969	754
	755	/* --- Refine the estimate --- */
	756
	757	{
	758	mpd yh = (mpd)d * q;
	759	mpd yl = (mpd)dd * q;
c8a2f9ef	760
	761	if (yl > MPW_MAX) {
	762	yh += yl >> MPW_BITS;
	763	yl &= MPW_MAX;
	764	}
	765
	766	while (yh > rh \|\| (yh == rh && yl > rrr)) {
	767	q--;
	768	yh -= d;
	769	if (yl < dd) {
	770	yh++;
	771	yl += MPW_MAX;
	772	}
	773	yl -= dd;
	774	}
	775	}
	776
	777	/* --- Remove a chunk from the dividend --- */
	778
	779	{
	780	mpw *svv;
	781	const mpw *dvv;
f45a00c6	782	mpw mc = 0, sc = 0;
c8a2f9ef	783
f45a00c6	784	/* --- Calculate the size of the chunk --- *
	785	*
	786	* This does the whole job of calculating @r >> scale - qd@.
	787	*/
c8a2f9ef	788
f45a00c6	789	for (svv = rv + scale, dvv = dv;
	790	dvv < dvl && svv < rvl;
	791	svv++, dvv++) {
	792	mpd x = (mpd)dvv (mpd)q + mc;
	793	mc = x >> MPW_BITS;
	794	x = (mpd)*svv - MPW(x) - sc;
c8a2f9ef	795	*svv = MPW(x);
f45a00c6	796	if (x >> MPW_BITS)
	797	sc = 1;
	798	else
	799	sc = 0;
	800	}
	801
	802	if (svv < rvl) {
	803	mpd x = (mpd)*svv - mc - sc;
	804	*svv++ = MPW(x);
	805	if (x >> MPW_BITS)
	806	sc = MPW_MAX;
	807	else
	808	sc = 0;
	809	while (svv < rvl)
	810	*svv++ = sc;
c8a2f9ef	811	}
c8a2f9ef	812
f45a00c6	813	/* --- Fix if the quotient was too large --- *
c8a2f9ef	814	*
f45a00c6	815	* This doesn't seem to happen very often.
c8a2f9ef	816	*/
c8a2f9ef	817
c8a2f9ef	818	if (rvl[-1] > MPW_MAX / 2) {
	819	mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
	820	q--;
	821	}
	822	}
	823
	824	/* --- Done for another iteration --- */
	825
	826	if (qvl - qv > scale)
	827	qv[scale] = q;
	828	r = rr;
	829	rr = rrr;
	830	}
	831	}
	832
	833	/* --- Now fiddle with unnormalizing and things --- */
	834
	835	mpx_lsr(rv, rvl, rv, rvl, norm);
d03ab969	836	}
	837
	838	/----- That's all, folks -------------------------------------------------/