[u/mdw/putty] / sshec.c

/*
 * Elliptic curve arithmetic.
 */

#include "bn-internal.h"
#include "ssh.h"

/* Some background.
 *
 * These notes are intended to help programmers who are mathematically
 * inclined, but not well versed in the theory of elliptic curves.  If more
 * detail is required, introductory texts of various levels of sophistication
 * are readily available.  I found
 *
 * L. Washington, `Elliptic Curves: Number Theory and Cryptography', CRC
 * Press, 2003
 *
 * useful.  Further background on algebra and number theory can be found in
 *
 * V. Shoup, `A Computational Introduction to Number Theory and Algebra',
 * version 2, Cambridge University Press, 2008; http://shoup.net/ntb/
 * [CC-ND-NC].
 *
 * We work in a finite field k = GF(p^m).  To make things easier, we'll only
 * deal with the cases where p = 2 (`binary' fields) or m = 1 (`prime'
 * fields).  In fact, we don't currently implement binary fields either.
 *
 * We start in two-dimensional projective space over k, denoted P^2(k).  This
 * is the space of classes of triples of elements of k { (a x : a y : a z) |
 * a in k^* } such that x y z /= 0.  Projective points with z /= 0 are called
 * `finite', and correspond to points in the more familiar two-dimensional
 * affine space (x/z, y/z); points with z = 0 are called `infinite'.
 *
 * An elliptic curve is the set of projective points satisfying an equation
 * of the form
 *
 *	y^2 z + a_1 x y z + a_3 y z^2 = x^3 + a_2 x^2 z + a_4 x z^2 + a_6 z^3
 *
 * (the `Weierstrass form'), such that the partial derivatives don't both
 * vanish at any point.  (Notice that this is a homogeneous equation -- all
 * of the monomials have the same degree -- so this is a well-defined subset
 * of our projective space.  Don't worry about the numbering of the
 * coefficients: maybe the later stuff about Jacobian coordinates will make
 * them clear.)
 *
 * Let's consider, for a moment, a straight line r x + s y + t z = 0 which
 * meets the curve at two points P and Q; then they must also meet again at
 * some other point R.  (Here, any or all of P, Q, and R might be equal;
 * in this case, the simultaneous equations have repeated solutions.)  Let's
 * go out on a limb and write
 *
 *	P + Q + R = 0
 *
 * whenever three points P, Q, and R on the curve are collinear.  Obviously
 * this notion of `addition' is commutative: three points are collinear
 * whichever order we write them.  If we designate some point O as being an
 * `identity' then we can introduce a notion of inverses.  And then some
 * magic occurs: this `addition' operation we've just invented is also
 * associative -- so it's an abelian group operation!  (This can be
 * demonstrated by slogging through the algebra, but there's a fancy way
 * involving the Weil pairing.  Washington's book has both versions.)
 *
 * So we have an abelian group.  It's actually quite a fun group, for
 * complicated reasons I'm not going to do into here.  For our purposes it's
 * sufficient to note that, in most elliptic curve groups, the discrete
 * logarithm problem -- finding x given x P -- seems really very hard.  I
 * mean, much harder than in multiplicative subgroups of finite fields,
 * where index calculus techniques can give you the answer in subexponential
 * time.  The best algorithms known for solving this problem in elliptic
 * curve groups are the ones which work in any old group G -- and they run in
 * O(sqrt(#G)) time.
 *
 * Staring at the Weierstrass equation some more, we can determine that in
 * fact there's exactly one infinite point on the curve: if z = 0 then we
 * must have x = 0, so (0 : 1 : 0) is precisely that point.  That's kind of
 * handy.
 *
 * The Weierstrass form is pretty horrid, really.  Fortunately, it's always
 * possible to apply a linear transformation to make it less nasty.  If p > 3
 * (which it will be for the prime fields we care about), we can simplify
 * this to
 *
 *	y^2 z = x^3 + a x z^2 + b z^3 (with 4 a^3 + 27 b^2 /= 0)
 *
 * and in the binary case we can simplify to
 *
 *	y^2 z + x y z = x^3 + a x^2 z + b z^3 (with b /= 0)
 *
 * and it's these kinds of forms which we're actually going to work with.
 * (There are other forms of elliptic curves which have interesting benefits,
 * e.g., the Montgomery and Edwards forms, but they've not really caught on
 * in standards.)  Linear transformations don't mess up the geometry, so the
 * group we constructed above still works -- and, indeed, the transformation
 * is an isomorphism of groups.  And, finally, the point at infinity we found
 * above is still infinite.  Let's call that point O, and declare it to be
 * our additive identity.  That leaves the finite points (x : y : 1), which
 * correspond to the affine points
 *
 *	y^2 = x^3 + a x + b	or	y^2 + x y = x^3 + a x^2 + b
 *
 * So, if we want to add two points P and Q, we draw the line between them;
 * if P = Q then the line in question is the tangent to the curve at P.  (The
 * smoothness condition above is precisely what we need for this tangent to
 * be well-defined.  Unsmooth curves aren't worth worrying about, because
 * they're cryptographically useless: they're isomorphic to some other group
 * in which discrete logs are very easy.)  In prime fields, if this line is
 * vertical then it meets the curve again at O, and P + Q = O; otherwise it
 * meets the curve at some other finite point R = (x, y), and the answer we
 * want is P + Q = -R = (x, -y).  In binary fields, the situation is similar
 * but a little messier because of the x y term we couldn't eliminate.
 *
 * It's not very hard to work out where this point R is.  You get a cubic
 * equation out the far end which looks kind of daunting until you notice
 * that the negation of the x^2 coefficient is exactly the sum of the roots,
 * and you already know two of them.  Unfortunately, the solution is still
 * somewhat unpleasant and involves a bunch of division -- and in finite
 * fields that involves computing a modular inverse, which is rather slow.
 *
 * Wouldn't it be really nice if we could somehow keep the denominators
 * separate, so we could just do a few `big' divisions at the end of some
 * calculation?  Of course, we'd need to tweak the point-addition formulae so
 * that we could cope with input points which have separate denominators.
 * And so we enter the realm of fun coordinate systems.
 *
 * Perhaps an obvious choice of coordinates is the projective form above:
 * rather than working out (x/m, y/n), we could just keep track of a
 * projective point (x : y : m n).  It turns out that there's a slightly
 * different variant which is a little better: `Jacobian coordinates' are
 * classes of triples (x :: y :: z), where x y z /= 0, and the point (x :: y
 * :: z) is equivalent to (a^2 x :: a^3 y :: a z).
 *
 * The (simplified) Wieierstrass equation, using Jacobian projective
 * coordinates, looks like this.
 *
 *	y^2 = x^3 + a x z^2 + b z^6
 *
 * or
 *
 *	y^2 + x y z = x^3 + a x^2 z + b z^3
 *
 * (Maybe this is a hint about the strange coefficient numbering in the
 * general Weierstrass form.)  Anyway, if a point is infinite then we have
 * y^2 = x^3, which has the obvious solution x = y = 1.
 *
 * That's about all the theory we need for now.  Let's get into the code.
 */

/* Let's start by making an abstract interface for finite field arithmetic.
 * If we need to do clever things later then this will be a convenient
 * extension point (e.g., supporting binary fields, working in extension
 * fields as we'd need if we wanted to implement Tate or Weil pairings, or
 * just doing different kinds of optimizations for the prime case).
 *
 * This is quite cheesy, but it's enough for now.
 */

union fieldelt {
    Bignum n;
};

struct field {
    const struct field_ops *ops;	/* Field operations table. */
    union fieldelt zero, one;		/* Identity elements. */
    Bignum q;				/* The field size. */
};

struct field_ops {
    void (*destroy)(struct field *f);	/* Destroy the field object. */

    void (*init)(struct field *f, union fieldelt *x);
					/* Initialize x (to null). */

    void (*free)(struct field *f, union fieldelt *x);
					/* Free an element x, leaving it
					 * null.
					 */

    void (*copy)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Make d be a copy of x. */

    void (*frombn)(struct field *f, union fieldelt *d, Bignum n);
					/* Convert n to an element. */

    Bignum (*tobn)(struct field *f, union fieldelt *x);
					/* Convert x to an integer. */

    int (*zerop)(struct field *f, union fieldelt *x);
					/* Return nonzero if x is zero. */

    void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store 2 x in d. */

    void (*add)(struct field *f, union fieldelt *d,
		union fieldelt *x, union fieldelt *y);
					/* Store x + y in d. */

    void (*sub)(struct field *f, union fieldelt *d,
		union fieldelt *x, union fieldelt *y);
					/* Store x - y in d. */

    void (*mul)(struct field *f, union fieldelt *d,
		union fieldelt *x, union fieldelt *y);
					/* Store x y in d. */

    void (*sqr)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store x^2 in d. */

    void (*inv)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store 1/x in d; it is an error
					 * for x to be zero.
					 */

    int (*sqrt)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* If x is a square, store a square
					 * root (chosen arbitrarily) in d
					 * and return nonzero; otherwise
					 * return zero leaving d unchanged.
					 */

    /* For internal use only.  The standard methods call on these for
     * detailed field-specific behaviour.
     */
    void (*_reduce)(struct field *f, union fieldelt *x);
					/* Reduce an intermediate result so
					 * that it's within whatever bounds
					 * are appropriate.
					 */

    /* These functions are only important for prime fields. */
    void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store 2 x in d. */

    void (*tpl)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store 3 x in d. */

    void (*dql)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store 4 x in d. */

    void (*hlv)(struct field *f, union fieldelt *d, union fieldelt *x);
					/* Store x/2 in d. */
};

/* Some standard methods for prime fields. */

static void gen_destroy(struct field *f)
    { sfree(f); }

static void prime_init(struct field *f, union fieldelt *x)
    { x->n = 0; }

static void prime_free(struct field *f, union fieldelt *x)
    { if (x->n) { freebn(x->n); x->n = 0; } }

static void prime_frombn(struct field *f, union fieldelt *d, Bignum n)
    { f->ops->free(f, d); d->n = copybn(n); }

static Bignum prime_tobn(struct field *f, union fieldelt *x)
    { return copybn(x->n); }

static Bignum prime_zerop(struct field *f, union fieldelt *x)
    { return bignum_cmp(x->n, f->zero.n) == 0; }

static void prime_add(struct field *f, union fieldelt *d,
		      union fieldelt *x, union fieldelt *y)
{
    Bignum sum = bigadd(x, y);
    Bignum red = bigsub(sum, f->q);

    f->ops->free(f, d);
    if (!red)
	d->n = sum;
    else {
	freebn(sum);
	d->n = red;
    }
}

static void prime_sub(struct field *f, union fieldelt *d,
		      union fieldelt *x, union fieldelt *y)
{
    Bignum raise = bigadd(x, f->q);
    Bignum diff = bigsub(raise, y);
    Bignum drop = bigsub(diff, f->q);

    f->ops->free(f, d); freebn(raise);
    if (!drop)
	d->n = diff;
    else {
	freebn(diff);
	d->n = drop;
    }
}

static void prime_mul(struct field *f, union fieldelt *d,
		      union fieldelt *x, union fieldelt *y)
{
    Bignum prod = bigmul(x, y);

    f->ops->free(f, d);
    d->n = prod;
    f->ops->_reduce(f, d);
}

static void gen_sqr(struct field *f, union fieldelt *d, union fieldelt *x)
    { f->ops->mul(f->ops, d, x, x); }

static void prime_inv(struct field *f, union fieldelt *d, union fieldelt *x)
{
    Bignum inv = modinv(x->n, f->q);

    f->ops->free(f, d);
    d->n = inv;
}

static int prime_sqrt(struct field *f, union fieldelt *d, union fieldelt *x)
{
    Bignum sqrt = modsqrt(x->n, f->q);

    if (!sqrt) return 0;
    f->ops->free(f, d);
    d->n = sqrt;
    return 1;
}

static void prime_dbl(struct field *f, union_fieldelt *d, union fieldelt *x)
    { f->ops->add(f, d, x, x); }

static void prime_tpl(struct field *f, union_fieldelt *d, union fieldelt *x)
{
    union_fieldelt t;

    if (d != x) {
	f->ops->add(f, d, x, x);
	f->ops->add(f, d, d, x);
    } else {
	f->ops->init(f, &t);
	f->ops->copy(f, d, x);
	f->ops->add(f, d, &t, &t);
	f->ops->add(f, d, d, &t);
	f->ops->free(f, &t);
    }
}

static void prime_qdl(struct field *f, union fieldelt *d,
		     union fieldelt *x)
    { f->ops->add(f, d, x, x); f->ops->add(f, d, d, d); }

static void prime_hlv(struct field *f, union fieldelt *d, union fieldelt *x)
{
    Bignum t, u;

    if (!x->n[0])
	f->ops->copy(f, d, x);
    else {
	/* The tedious answer is to multiply by the inverse of 2 in this
	 * field.  But there's a better way: either x or q - x is actually
	 * divisible by 2, so the answer we want is either x >> 1 or p -
	 * ((p - x) >> 1) depending on whether x is even.
	 */
	if (!(x->n[1] & 1)) {
	    t = copybn(x->n);
	    shift_right(t + 1, t[0], 1);
	} else {
	    u = bigsub(f->q, x);
	    shift_right(u + 1, u[0], 1);
	    t = bigsub(f->q, u);
	    freebn(u);
	}
	f->ops->free(f, d);
	d->n = t;
}

/* Now some utilities for modular reduction.  All of the primes we're
 * concerned about are of the form p = 2^n - d for `convenient' values of d
 * -- such numbers are called `pseudo-Mersenne primes'.  Suppose we're given
 * a value x = x_0 2^n + x_1: then x - x_0 p = x_1 + x_0 d is clearly less
 * than x and congruent to it mod p.  So we can reduce x below 2^n by
 * iterating this trick; if we're unlucky enough to have p <= x < 2^n then we
 * can just subtract p.
 *
 * The values of d we'll be dealing with are specifically convenient because
 * they satisfy the following properties.
 *
 *   * We can write d = SUM_i d_i 2^i, with d_i in { -1, 0, +1 }.  (This is
 *     no surprise.)
 *
 *   * d > 0.
 *
 *   * Very few d_i /= 0.  With one small exception, if d_i /= 0 then i is
 *     divisible by 32.
 *
 * The following functions will therefore come in very handy.
 */

static void add_imm_lsl(BignumInt *x, BignumInt y, unsigned shift)
{
    BignumDbl c = y << (shift % BIGNUM_INT_BITS);

    for (x += shift/BIGNUM_INT_BITS; c; x++) {
	c += *x;
	*x = c & BIGNUM_INT_MASK;
	c >>= BIGNUM_INT_BITS;
    }
}

static void sub_imm_lsl(BignumInt *x, BignumInt y, unsigned shift)
{
    BignumDbl c = y << (shift % BIGNUM_INT_BITS);
    BignumDbl t;

    c--;
    for (x += shift/BIGNUM_INT_BITS; c; x++) {
	c ^= BIGNUM_INT_MASK;
	c += *x;
	*x = c & BIGNUM_INT_MASK;
	c >>= BIGNUM_INT_BITS;
    }
}

static void pmp_reduce(struct field *f, union fieldelt *xx,
		       void (*add_mul_d_lsl)(BignumInt *x,
					     BignumInt y,
					     unsigned shift))
{
    /* Notation: write w = BIGNUM_INT_WORDS, and B = 2^w is the base we're
     * working in.  We assume that p = 2^n - d for some d.  The job of
     * add_mul_d_lsl is to add to its argument x the value y d 2^shift.
     */

    BignumInt top, *p, *x;
    unsigned plen, xlen, i;
    unsigned topbits, topclear;
    Bignum t;

    /* Initialization: if x is shorter than p then there's nothing to do.
     * Otherwise, ensure that it's at least one word longer, since this is
     * required by the utility functions which add_mul_d_lsl will call.
     */
    p = f->q + 1; plen = f->q[0];
    x = xx->n + 1; xlen = xx->n[0];
    if (xlen < plen) return;
    else if (xlen == plen) {
	t = newbn(plen + 1);
	for (i = 0; i < plen; i++) t[i + 1] = x[i];
	t[plen] = 0;
	freebn(xx->n); xx->n = t;
	x = t + 1; xlen = plen + 1;
    }

    /* Preparation: Work out the bit length of p.  At the end of this, we'll
     * have topbits such that n = w (plen - 1) + topbits, and topclear = w -
     * topbits, so that n = w plen - topclear.
     */
    while (plen > 0 && !p[plen - 1]) plen--;
    top = p[plen - 1];
    assert(top);
    if (top & BIGNUM_TOP_BIT) topbits = BIGNUM_INT_BITS;
    else for (topbits = 0; top >> topbits; topbits++);
    topclear = BIGNUM_INT_BITS - topbits;

    /* Step 1: Trim x down to the right number of words. */
    while (xlen > plen) {

	/* Pick out the topmost word; call it y for now. */
	y = x[xlen - 1];
	if (!y) {
	    xlen--;
	    continue;
	}

	/* Observe that d == 2^n (mod p).  Clear that top word.  This
	 * effectively subtracts y B^{xlen-1} from x, so we must add back d
	 * 2^{w(xlen-1)-n} in order to preserve congruence modulo p.  Since d
	 * is smaller than 2^n, this will reduce the absolute value of x, so
	 * we make (at least some) progress.  In practice, we expect that d
	 * is a lot smaller.
	 *
	 * Note that w (xlen - 1) - n = w xlen - w - n = w (xlen - plen - 1)
	 * + topclear.
	 */
	x[xlen - 1] = 0;
	add_mul_d_lsl(x + xlen - plen - 1, y, topclear);
    }

    /* Step 2: Trim off the high bits of x, beyond the top bit of p.  In more
     * detail: if x >= 2^n > p, then write x = x_0 + 2^n y; then we can clear
     * the top topclear bits of x, and add y d to preserve congruence mod p.
     *
     * If topclear is zero then there's nothing to do here: step 1 will
     * already have arranged that x < 2^n.
     */
    if (topclear) {
	for (;;) {
	    y = x[xlen - 1] >> topbits;
	    if (!y) break;
	    x[xlen - 1] &= (1 << topbits) - 1;
	    add_mul_d_lsl(x, y, 0);
	}
    }

    /* Step 3: If x >= p then subtract p from x. */
    for (i = plen; i-- && x[i] == p[i]; );
    if (i >= plen || x[i] > p[i]) internal_sub(x, p, plen);

    /* We're done now. */
    d->n[0] = xlen;
    bn_restore_invariant(d->n);
}

/* Putting together a field-ops structure for a pseudo-Mersenne prime field.
 */

static void pmp_destroy(struct field *f)
    { freebn(f->q); sfree(f); }

#define DEFINE_PMP_FIELD(fname)						\
static const struct field_ops fname##_fieldops {			\
    pmp_destroy,							\
    prime_init, prime_free, prime_copy,					\
    prime_frombn, prime_tobn,						\
    prime_zerop,							\
    prime_add, prime_sub, prime_mul, gen_sqr, prime_inv, prime_sqrt,	\
    fname_reduce,							\
    gen_dbl, gen_tpl, gen_qdl,						\
    prime_hlv								\
};									\
									\
static struct field *make_##fname##_field(void)				\
{									\
    struct field *f = snew(struct field);				\
    f->ops = &fname##_fieldops;						\
    f->zero.n = Zero;							\
    f->one.n = One;							\
    f->q = bigunm_from_bytes(fname##_p, sizeof(fname##_p));		\
    return f;								\
}

/* Some actual field definitions. */

static const unsigned char p256_p[] = {
    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};

static void p256_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift)
{
    /* p_{256} = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 1 */
    add_imm_lsl(x, y, shift + 224);
    sub_imm_lsl(x, y, shift + 192);
    sub_imm_lsl(x, y, shift +  96);
    add_imm_lsl(x, y, shift +   0);
}

static void p256_reduce(struct field *f, union fieldelt *d)
    { pmp_reduce(f, d, p256_add_mul_d_lsl); }

DEFINE_PMP_FIELD(p256)

static const unsigned char p384_p[] = {
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
};

static void p384_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift)
{
    /* p_{384} = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 1 */
    add_imm_lsl(x, y, shift + 128);
    add_imm_lsl(x, y, shift +  96);
    sub_imm_lsl(x, y, shift +  32);
    add_imm_lsl(x, y, shift +   0);
}

static void p384_reduce(struct field *f, union fieldelt *d)
    { pmp_reduce(f, d, p384_add_mul_d_lsl); }

DEFINE_PMP_FIELD(p384)

static const unsigned char p521_p[] = {
					0x01, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};

static void p521_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift)
    { add_imm_lsl(x, y, shift); }	/* p_{521} = 2^{521} - 1 */

static void p521_reduce(struct field *f, union fieldelt *d)
    { pmp_reduce(f, d, p521_add_mul_d_lsl); }

DEFINE_PMP_FIELD(p521)

/* And now, some actual elliptic curves.
 *
 * As with fields, we start by defining an abstract interface which may have
 * various implementations (e.g., different algorithms with different
 * performance properties, or based on different kinds of underlying fields.
 */

/* An external-form elliptic curve point. */
struct ecpt {
    unsigned f;				/* Flags */
#define ECPTF_INF 1u			/*   Point is at infinity */
    union fieldelt x, y;		/* x- and y-coordinates */
};

/* An internal-form elliptic curve point.  This is where most of the action
 * happens.
 */
union iecpt {
    struct {
	union fieldelt x, y, z;		/* Projective-form triple. */
    } jac;				/* Jacobian projective coords. */
};

struct ec {
    const struct ec_ops *ops;		/* Curve operations table. */
    struct field *f;			/* The underlying finite field. */
    struct iecpt p;			/* The well-known generator point */
    Bignum r;				/* The size of the generated group */
    union {
	struct { union fieldelt a, b; } w; /* Simple Weierstrass coeffs. */
    } u;
};

struct ec_ops {
    void (*destroy)(struct ec *ec);	/* Destroy the curve itself. */

    void (*init)(struct ec *ec, union iecpt *p);
					/* Initialize p (to null). */

    void (*free)(struct ec *ec, union iecpt *p);
					/* Free the point p. */

    void (*setinf)(struct ec *ec, union iecpt *p);
					/* Set p to be the point at infinity.
					 */

    int (*infp)(struct ec *ec, union iecpt *p);
					/* Answer whether p is infinite. */

    void (*copy)(struct ec *ec, union iecpt *d, union iecpt *d);
					/* Make p be a copy of d. */

    void (*in)(struct ec *ec, union iecpt *d, struct ecpt *p);
					/* Convert external-format p to
					 * internal-format d.
					 */

    void (*out)(struct ec *ec, struct ecpt *d, union iecpt *p);
					/* Convert internal-format p to
					 * external-format d.
					 */

    void (*add)(struct ec *ec, union iecpt *d,
		union iecpt *p, union iecpt *q);
					/* Store in d the sum of the points p
					 * and q.
					 */

    void (*sub)(struct ec *ec, union iecpt *d,
		union iecpt *p, union iecpt *q);
					/* Store in d the result of
					 * subtracting q from p.
					 */

    void (*neg)(struct ec *ec, union iecpt *d, union iecpt *p);
					/* Store in d the negation of p. */
};

/* And now some common utilities. */

static void weier_destroy(struct ec *ec)
{
    ec->f->ops(ec->f, &ec->u.w.a);
    ec->f->ops(ec->f, &ec->u.w.b);
    freebn(ec->r);
    f->ops->destroy(ec->f);
    sfree(ec);
}

static void jac_init(struct ec *ec, union iecpt *p)
{
    ec->f->ops->init(ec->f, &p->proj.x);
    ec->f->ops->init(ec->f, &p->proj.y);
    ec->f->ops->init(ec->f, &p->proj.z);
}

static void jac_free(struct ec *ec, union iecpt *p)
{
    ec->f->ops->free(ec->f, &p->proj.x);
    ec->f->ops->free(ec->f, &p->proj.y);
    ec->f->ops->free(ec->f, &p->proj.z);
}

static void jac_setinf(struct ec *ec, union iecpt *d)
{
    ec->f->ops->copy(ec->f, &d->proj.x, &ec->f->one);
    ec->f->ops->copy(ec->f, &d->proj.y, &ec->f->one);
    ec->f->ops->copy(ec->f, &d->proj.z, &ec->f->zero);
}

static int jac_infp(struct ec *ec, union iecpt *d)
    { return ec->f->ops->zerop(ec->f, &d->proj.z); }

static void jac_copy(struct ec *ec, union iecpt *d, union iecpt *d)
{
    ec->f->ops->copy(ec->f, &d->proj.x, &p->proj.x);
    ec->f->ops->copy(ec->f, &d->proj.y, &p->proj.y);
    ec->f->ops->copy(ec->f, &d->proj.z, &p->proj.z);
}

static void jac_in(struct ec *ec, union iecpt *d, struct ecpt *p)
{
    ec->f->ops->copy(ec->f, &d->x, &p->proj.x);
    ec->f->ops->copy(ec->f, &d->y, &p->proj.y);
    ec->f->ops->copy(ec->f, &d->z, &ec->f->one);
}

static void jac_out(struct ec *ec, struct ecpt *d, union iecpt *p)
{
    union fieldelt iz, iz2, iz3;

    if (ec->f->ops->zerop(ec->f, &p->proj.z)) {
	ec->f->ops->free(ec->f, d->x);
	ec->f->ops->free(ec->f, d->y);
	d->f = ECPTF_INF;
    } else {
	ec->f->ops->init(ec->f, &iz2);
	ec->f->ops->init(ec->f, &iz3);

	ec->f->ops->inv(ec->f, &iz3, &p->proj.z);
	ec->f->ops->sqr(ec->f, &iz2, &iz3);
	ec->f->ops->mul(ec->f, &iz3, &iz3, &iz2);

	ec->f->ops->mul(ec->f, &d->x, &p->proj.x, &iz2);
	ec->f->ops->mul(ec->f, &d->y, &p->proj.x, &iz3);
	d->f = 0;

	ec->f->ops->free(ec->f, &iz2);
	ec->f->ops->free(ec->f, &iz3);
    }
}

static void gen_sub(struct ec *ec, union iecpt *d,
		    union iecpt *p, union iecpt *q)
{
    union iecpt t;

    ec->ops->init(ec, &t);
    ec->ops->neg(ec, &t, q);
    ec->ops->add(ec, d, p, &t);
    ec->ops->free(ec, &t);
}

/* Finally, let's define some actual curve arithmetic functions. */

static void pwjac_dbl(struct ec *ec, union iecpt *d, union iecpt *p)
{
    struct field *f = ec->f;
    union fieldelt m, s, t, u;

    if (ec->ops->infp(ec, p)) {
	ec->setinf(ec, d);
	return;
    }

    f->ops->init(f, &m);
    f->ops->init(f, &s);
    f->ops->init(f, &t);
    f->ops->init(f, &u);

    f->ops->sqr(f, &u, &p->proj.z);		/* z^2 */
    f->ops->sqr(f, &t, &u);			/* z^4 */
    f->ops->mul(f, &u, &t, &ec->u.w.a);		/* a z^4 */
    f->ops->sqr(f, &m, &p->proj.x);		/* x^2 */
    f->ops->tpl(f, &m, &m);			/* 3 x^2 */
    f->ops->add(f, &m, &m, &u);			/* m = 3 x^2 + a z^4 */

    f->ops->dbl(f, &t, &p->proj.y);		/* 2 y */
    f->ops->mul(f, &d->proj.z, &t, &p->proj.z); /* z' = 2 y z */

    f->ops->sqr(f, &u, &t);			/* 4 y^2 */
    f->ops->mul(f, &s, &u, &p->proj.x);		/* s = 4 x y^2 */
    f->ops->sqr(f, &t, &u);			/* 16 y^4 */
    f->ops->hlv(f, &t, &t);			/* t = 8 y^4 */

    f->ops->dbl(f, &u, &s);			/* 2 s */
    f->ops->sqr(f, &d->proj.x, &m);		/* m^2 */
    f->ops->sub(f, &d->proj.x, &d->proj.x, &u); /* x' = m^2 - 2 s */

    f->ops->sub(f, &u, &s, &d->proj.x);		/* s - x' */
    f->ops->mul(f, &d->proj.y, &m, &u);		/* m (s - x') */
    f->ops->sub(f, &d->proj.y, &d->proj.y, &t);	/* y' = m (s - x') - t */

    f->ops->free(f, &m);
    f->ops->free(f, &s);
    f->ops->free(f, &t);
    f->ops->free(f, &u);
}

static void pwjac_add(struct ec *ec, union iecpt *d,
		      union iecpt *p, union iecpt *q)
{
    struct field *f = ec->f;
    union fieldelt m, r, s, ss, t, u, uu, w;

    if (a == b) { pwjac_dbl(ec, d, p); return; }
    else if (ec->ops->infp(ec, p)) { ec->ops->copy(ec, d, q); return; }
    else if (ec->ops->infp(ec, q)) { ec->ops->copy(ec, d, p); return; }

    f->ops->init(f, &m);
    f->ops->init(f, &r);
    f->ops->init(f, &s);
    f->ops->init(f, &ss);
    f->ops->init(f, &t);
    f->ops->init(f, &u);
    f->ops->init(f, &uu);
    f->ops->init(f, &w);

    f->ops->sqr(f, &s, &p->proj.z);		/* z_0^2 */
    f->ops->mul(f, &u, &s, &q->proj.x);		/* u = x_1 z_0^2 */
    f->ops->mul(f, &s, &s, &p->proj.z);		/* z_0^3 */
    f->ops->mul(f, &s, &s, &q->proj.y);		/* s = y_1 z_0^3 */

    f->ops->sqr(f, &ss, &q->proj.z);		/* z_1^2 */
    f->ops->mul(f, &uu, &ss, &p->proj.x);	/* uu = x_0 z_1^2 */
    f->ops->mul(f, &ss, &ss, &q->proj.z);	/* z_1^3 */
    f->ops->mul(f, &ss, &ss, &p->proj.y);	/* ss = y_0 z_1^3 */

    f->ops->sub(f, &w, &uu, &u);		/* w = uu - u */
    f->ops->sub(f, &r, &ss, &s);		/* r = ss - s */
    if (f->ops->zerop(f, w)) {
	if (f->ops->zerop(f, r)) ec->ops->dbl(ec, d, p);
	else ec->ops->setinf(ec, d);
	goto cleanup;
    }

    f->ops->add(f, &t, &u, &uu);		/* t = uu + u */
    f->ops->add(f, &m, &s, &ss);		/* m = ss + s */

    f->ops->mul(f, &uu, &p->proj.z, &w);	/* z_0 w */
    f->ops->mul(f, &d->proj.z, &uu, &q->proj.z); /* z' = z_0 z_1 w */

    f->ops->sqr(f, &s, &w);			/* w^2 */
    f->ops->mul(f, &u, &s, &t);			/* t w^2 */
    f->ops->mul(f, &s, &s, &w);			/* w^3 */
    f->ops->mul(f, &s, &s, &m);			/* m w^3 */

    f->ops->sqr(f, &m, &r);			/* r^2 */
    f->ops->sub(f, &d->proj.x, &m, &u);		/* x' = r^2 - t w^2 */

    f->ops->dbl(f, &m, &d->proj.x);		/* 2 x' */
    f->ops->sub(f, &u, &u, &m);			/* v = t w^2 - 2 x' */
    f->ops->mul(f, &u, &u, &r);			/* v r */
    f->ops->sub(f, &u, &u, &s);			/* v r - m w^3 */
    f->ops->hlv(f, &d->proj.y, &m);		/* y' = (v r - m w^3)/2 */

cleanup:
    f->ops->free(f, &m);
    f->ops->free(f, &r);
    f->ops->free(f, &s);
    f->ops->free(f, &ss);
    f->ops->free(f, &t);
    f->ops->free(f, &u);
    f->ops->free(f, &uu);
    f->ops->free(f, &w);
}