From 06f2b5a74a4122209907493b0b72b5af8d99bb19 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 26 Aug 2013 11:45:31 +0100 Subject: [PATCH] sshec.c: Initial cut at elliptic curve arithmetic. Signed-off-by: Mark Wooding --- sshec.c | 1522 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1522 insertions(+) create mode 100644 sshec.c diff --git a/sshec.c b/sshec.c new file mode 100644 index 00000000..8ab2dd4c --- /dev/null +++ b/sshec.c @@ -0,0 +1,1522 @@ +/* + * Elliptic curve arithmetic. + */ + +#include "bn-internal.h" +#include "ssh.h" + +/* Some background. + * + * These notes are intended to help programmers who are mathematically + * inclined, but not well versed in the theory of elliptic curves. If more + * detail is required, introductory texts of various levels of sophistication + * are readily available. I found + * + * L. Washington, `Elliptic Curves: Number Theory and Cryptography', CRC + * Press, 2003 + * + * useful. Further background on algebra and number theory can be found in + * + * V. Shoup, `A Computational Introduction to Number Theory and Algebra', + * version 2, Cambridge University Press, 2008; http://shoup.net/ntb/ + * [CC-ND-NC]. + * + * We work in a finite field k = GF(p^m). To make things easier, we'll only + * deal with the cases where p = 2 (`binary' fields) or m = 1 (`prime' + * fields). In fact, we don't currently implement binary fields either. + * + * We start in two-dimensional projective space over k, denoted P^2(k). This + * is the space of classes of triples of elements of k { (a x : a y : a z) | + * a in k^* } such that x y z /= 0. Projective points with z /= 0 are called + * `finite', and correspond to points in the more familiar two-dimensional + * affine space (x/z, y/z); points with z = 0 are called `infinite'. + * + * An elliptic curve is the set of projective points satisfying an equation + * of the form + * + * y^2 z + a_1 x y z + a_3 y z^2 = x^3 + a_2 x^2 z + a_4 x z^2 + a_6 z^3 + * + * (the `Weierstrass form'), such that the partial derivatives don't both + * vanish at any point. (Notice that this is a homogeneous equation -- all + * of the monomials have the same degree -- so this is a well-defined subset + * of our projective space. Don't worry about the numbering of the + * coefficients: maybe the later stuff about Jacobian coordinates will make + * them clear.) + * + * Let's consider, for a moment, a straight line r x + s y + t z = 0 which + * meets the curve at two points P and Q; then they must also meet again at + * some other point R. (Here, any or all of P, Q, and R might be equal; + * in this case, the simultaneous equations have repeated solutions.) Let's + * go out on a limb and write + * + * P + Q + R = 0 + * + * whenever three points P, Q, and R on the curve are collinear. Obviously + * this notion of `addition' is commutative: three points are collinear + * whichever order we write them. If we designate some point O as being an + * `identity' then we can introduce a notion of inverses. And then some + * magic occurs: this `addition' operation we've just invented is also + * associative -- so it's an abelian group operation! (This can be + * demonstrated by slogging through the algebra, but there's a fancy way + * involving the Weil pairing. Washington's book has both versions.) + * + * So we have an abelian group. It's actually quite a fun group, for + * complicated reasons I'm not going to do into here. For our purposes it's + * sufficient to note that, in most elliptic curve groups, the discrete + * logarithm problem -- finding x given x P -- seems really very hard. I + * mean, much harder than in multiplicative subgroups of finite fields, + * where index calculus techniques can give you the answer in subexponential + * time. The best algorithms known for solving this problem in elliptic + * curve groups are the ones which work in any old group G -- and they run in + * O(sqrt(#G)) time. + * + * Staring at the Weierstrass equation some more, we can determine that in + * fact there's exactly one infinite point on the curve: if z = 0 then we + * must have x = 0, so (0 : 1 : 0) is precisely that point. That's kind of + * handy. + * + * The Weierstrass form is pretty horrid, really. Fortunately, it's always + * possible to apply a linear transformation to make it less nasty. If p > 3 + * (which it will be for the prime fields we care about), we can simplify + * this to + * + * y^2 z = x^3 + a x z^2 + b z^3 (with 4 a^3 + 27 b^2 /= 0) + * + * and in the binary case we can simplify to + * + * y^2 z + x y z = x^3 + a x^2 z + b z^3 (with b /= 0) + * + * and it's these kinds of forms which we're actually going to work with. + * (There are other forms of elliptic curves which have interesting benefits, + * e.g., the Montgomery and Edwards forms, but they've not really caught on + * in standards.) Linear transformations don't mess up the geometry, so the + * group we constructed above still works -- and, indeed, the transformation + * is an isomorphism of groups. And, finally, the point at infinity we found + * above is still infinite. Let's call that point O, and declare it to be + * our additive identity. That leaves the finite points (x : y : 1), which + * correspond to the affine points + * + * y^2 = x^3 + a x + b or y^2 + x y = x^3 + a x^2 + b + * + * So, if we want to add two points P and Q, we draw the line between them; + * if P = Q then the line in question is the tangent to the curve at P. (The + * smoothness condition above is precisely what we need for this tangent to + * be well-defined. Unsmooth curves aren't worth worrying about, because + * they're cryptographically useless: they're isomorphic to some other group + * in which discrete logs are very easy.) In prime fields, if this line is + * vertical then it meets the curve again at O, and P + Q = O; otherwise it + * meets the curve at some other finite point R = (x, y), and the answer we + * want is P + Q = -R = (x, -y). In binary fields, the situation is similar + * but a little messier because of the x y term we couldn't eliminate. + * + * It's not very hard to work out where this point R is. You get a cubic + * equation out the far end which looks kind of daunting until you notice + * that the negation of the x^2 coefficient is exactly the sum of the roots, + * and you already know two of them. Unfortunately, the solution is still + * somewhat unpleasant and involves a bunch of division -- and in finite + * fields that involves computing a modular inverse, which is rather slow. + * + * Wouldn't it be really nice if we could somehow keep the denominators + * separate, so we could just do a few `big' divisions at the end of some + * calculation? Of course, we'd need to tweak the point-addition formulae so + * that we could cope with input points which have separate denominators. + * And so we enter the realm of fun coordinate systems. + * + * Perhaps an obvious choice of coordinates is the projective form above: + * rather than working out (x/m, y/n), we could just keep track of a + * projective point (x : y : m n). It turns out that there's a slightly + * different variant which is a little better: `Jacobian coordinates' are + * classes of triples (x :: y :: z), where x y z /= 0, and the point (x :: y + * :: z) is equivalent to (a^2 x :: a^3 y :: a z). + * + * The (simplified) Wieierstrass equation, using Jacobian projective + * coordinates, looks like this. + * + * y^2 = x^3 + a x z^4 + b z^6 + * + * or + * + * y^2 + x y z = x^3 + a x^2 z^2 + b z^6 + * + * (Maybe this is a hint about the strange coefficient numbering in the + * general Weierstrass form.) Anyway, if a point is infinite then we have + * y^2 = x^3, which has the obvious solution x = y = 1. + * + * That's about all the theory we need for now. Let's get into the code. + */ + +/* Let's start by making an abstract interface for finite field arithmetic. + * If we need to do clever things later then this will be a convenient + * extension point (e.g., supporting binary fields, working in extension + * fields as we'd need if we wanted to implement Tate or Weil pairings, or + * just doing different kinds of optimizations for the prime case). + * + * The conversions between field elements and bignums deserve more + * explanation. They are intended to align with the IEEE 1363 FE2OSP/OS2FEP + * conversions, so that a big-endian radix-256 representation of the integer + * (padded with leading zeroes as necessary) is precisely the octet-string + * representation of the field element. For prime fields GF(p) == Z/pZ, a + * field element is a residue class, and the appropriate integer is simply + * the canonical class representative, i.e., the unique element n of the + * class such that 0 <= n < p. + * + * Now consider an extension field GF(q^m) over GF(q). This can be viewed as + * a GF(q)-vector space, and there are a number of choices of basis. A + * popular choice is a polynomial basis: we represent GF(q^m) as + * GF(q)[t]/(p(t)) for a degree-m polynomial irreducible over GF(q), and then + * take (1 t ... t^{m-1}) as a basis; but Gaussian bases, of the form + * (b^{q^{m-1}} ... b^q b) are also popular. Anyway, we start with a vector + * (u_0 u_1 ... u_{m-1}) of GF(q) elements which (inductively) we can + * convert to integers n_i with 0 <= n_i < q; then we can use the obvious + * arithmetic code to map our element of GF(q^m) to u_0 + u_1 q + ... + + * u_{m-1} q^{m-1}. + * + * This is quite cheesy, but it's enough for now. + */ + +union fieldelt { + Bignum n; +}; + +struct field { + const struct field_ops *ops; /* Field operations table */ + union fieldelt zero, one; /* Identity elements */ + Bignum q; /* The field size */ + size_t eltbits, eltsz; /* Bits/octets for a field element */ +}; + +struct field_ops { + void (*destroy)(struct field *f); /* Destroy the field object */ + + void (*init)(struct field *f, union fieldelt *x); + /* Initialize x (to null) */ + + void (*free)(struct field *f, union fieldelt *x); + /* Free an element x, leaving it + * null + */ + + void (*copy)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Make d be a copy of x */ + + void (*frombn)(struct field *f, union fieldelt *d, Bignum n); + /* Convert n to an element */ + + Bignum (*tobn)(struct field *f, union fieldelt *x); + /* Convert x to an integer */ + + int (*zerop)(struct field *f, union fieldelt *x); + /* Return nonzero if x is zero */ + + void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 2 x in d */ + + void (*add)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x + y in d */ + + void (*sub)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x - y in d */ + + void (*mul)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x y in d */ + + void (*sqr)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store x^2 in d */ + + void (*inv)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 1/x in d; it is an error + * for x to be zero + */ + + int (*sqrt)(struct field *f, union fieldelt *d, union fieldelt *x); + /* If x is a square, store a square + * root (chosen arbitrarily) in d + * and return nonzero; otherwise + * return zero leaving d unchanged. + */ + + /* For internal use only. The standard methods call on these for + * detailed field-specific behaviour. + */ + void (*_reduce)(struct field *f, union fieldelt *x); + /* Reduce an intermediate result so + * that it's within whatever bounds + * are appropriate. + */ + + /* These functions are only important for prime fields. */ + void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 2 x in d */ + + void (*tpl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 3 x in d */ + + void (*dql)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 4 x in d */ + + void (*hlv)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store x/2 in d */ +}; + +/* Some standard methods for prime fields. */ + +static void gen_destroy(struct field *f) + { sfree(f); } + +static void prime_init(struct field *f, union fieldelt *x) + { x->n = 0; } + +static void prime_free(struct field *f, union fieldelt *x) + { if (x->n) { freebn(x->n); x->n = 0; } } + +static void prime_copy(struct field *f, union fieldelt *d, union fieldelt *x) + { if (d != x) { f->ops->free(f, d); if (x->n) d->n = copybn(x->n); } } + +static void prime_frombn(struct field *f, union fieldelt *d, Bignum n) + { f->ops->free(f, d); d->n = copybn(n); } + +static Bignum prime_tobn(struct field *f, union fieldelt *x) + { return copybn(x->n); } + +static Bignum prime_zerop(struct field *f, union fieldelt *x) + { return bignum_cmp(x->n, f->zero.n) == 0; } + +static void prime_add(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum sum = bigadd(x, y); + Bignum red = bigsub(sum, f->q); + + f->ops->free(f, d); + if (!red) + d->n = sum; + else { + freebn(sum); + d->n = red; + } +} + +static void prime_sub(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum raise = bigadd(x, f->q); + Bignum diff = bigsub(raise, y); + Bignum drop = bigsub(diff, f->q); + + f->ops->free(f, d); freebn(raise); + if (!drop) + d->n = diff; + else { + freebn(diff); + d->n = drop; + } +} + +static void prime_mul(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum prod = bigmul(x, y); + + f->ops->free(f, d); + d->n = prod; + f->ops->_reduce(f, d); +} + +static void gen_sqr(struct field *f, union fieldelt *d, union fieldelt *x) + { f->ops->mul(f->ops, d, x, x); } + +static void prime_inv(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum inv = modinv(x->n, f->q); + + f->ops->free(f, d); + d->n = inv; +} + +static int prime_sqrt(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum sqrt = modsqrt(x->n, f->q); + + if (!sqrt) return 0; + f->ops->free(f, d); + d->n = sqrt; + return 1; +} + +static void prime_dbl(struct field *f, union_fieldelt *d, union fieldelt *x) + { f->ops->add(f, d, x, x); } + +static void prime_tpl(struct field *f, union_fieldelt *d, union fieldelt *x) +{ + union_fieldelt t; + + if (d != x) { + f->ops->add(f, d, x, x); + f->ops->add(f, d, d, x); + } else { + f->ops->init(f, &t); + f->ops->copy(f, d, x); + f->ops->add(f, d, &t, &t); + f->ops->add(f, d, d, &t); + f->ops->free(f, &t); + } +} + +static void prime_qdl(struct field *f, union fieldelt *d, + union fieldelt *x) + { f->ops->add(f, d, x, x); f->ops->add(f, d, d, d); } + +static void prime_hlv(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum t, u; + + if (!x->n[0]) + f->ops->copy(f, d, x); + else { + /* The tedious answer is to multiply by the inverse of 2 in this + * field. But there's a better way: either x or q - x is actually + * divisible by 2, so the answer we want is either x >> 1 or p - + * ((p - x) >> 1) depending on whether x is even. + */ + if (!(x->n[1] & 1)) { + t = copybn(x->n); + shift_right(t + 1, t[0], 1); + } else { + u = bigsub(f->q, x); + shift_right(u + 1, u[0], 1); + t = bigsub(f->q, u); + freebn(u); + } + f->ops->free(f, d); + d->n = t; +} + +/* Now some utilities for modular reduction. All of the primes we're + * concerned about are of the form p = 2^n - d for `convenient' values of d + * -- such numbers are called `pseudo-Mersenne primes'. Suppose we're given + * a value x = x_0 2^n + x_1: then x - x_0 p = x_1 + x_0 d is clearly less + * than x and congruent to it mod p. So we can reduce x below 2^n by + * iterating this trick; if we're unlucky enough to have p <= x < 2^n then we + * can just subtract p. + * + * The values of d we'll be dealing with are specifically convenient because + * they satisfy the following properties. + * + * * We can write d = SUM_i d_i 2^i, with d_i in { -1, 0, +1 }. (This is + * no surprise.) + * + * * d > 0. + * + * * Very few d_i /= 0. With one small exception, if d_i /= 0 then i is + * divisible by 32. + * + * The following functions will therefore come in very handy. + */ + +static void add_imm_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + BignumDbl c = y << (shift % BIGNUM_INT_BITS); + + for (x += shift/BIGNUM_INT_BITS; c; x++) { + c += *x; + *x = c & BIGNUM_INT_MASK; + c >>= BIGNUM_INT_BITS; + } +} + +static void sub_imm_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + BignumDbl c = y << (shift % BIGNUM_INT_BITS); + BignumDbl t; + + c--; + for (x += shift/BIGNUM_INT_BITS; c; x++) { + c ^= BIGNUM_INT_MASK; + c += *x; + *x = c & BIGNUM_INT_MASK; + c >>= BIGNUM_INT_BITS; + } +} + +static void pmp_reduce(struct field *f, union fieldelt *xx, + void (*add_mul_d_lsl)(BignumInt *x, + BignumInt y, + unsigned shift)) +{ + /* Notation: write w = BIGNUM_INT_WORDS, and B = 2^w is the base we're + * working in. We assume that p = 2^n - d for some d. The job of + * add_mul_d_lsl is to add to its argument x the value y d 2^shift. + */ + + BignumInt top, *p, *x; + unsigned plen, xlen, i; + unsigned topbits, topclear; + Bignum t; + + /* Initialization: if x is shorter than p then there's nothing to do. + * Otherwise, ensure that it's at least one word longer, since this is + * required by the utility functions which add_mul_d_lsl will call. + */ + p = f->q + 1; plen = f->q[0]; + x = xx->n + 1; xlen = xx->n[0]; + if (xlen < plen) return; + else if (xlen == plen) { + t = newbn(plen + 1); + for (i = 0; i < plen; i++) t[i + 1] = x[i]; + t[plen] = 0; + freebn(xx->n); xx->n = t; + x = t + 1; xlen = plen + 1; + } + + /* Preparation: Work out the bit length of p. At the end of this, we'll + * have topbits such that n = w (plen - 1) + topbits, and topclear = w - + * topbits, so that n = w plen - topclear. + */ + while (plen > 0 && !p[plen - 1]) plen--; + top = p[plen - 1]; + assert(top); + if (top & BIGNUM_TOP_BIT) topbits = BIGNUM_INT_BITS; + else for (topbits = 0; top >> topbits; topbits++); + topclear = BIGNUM_INT_BITS - topbits; + + /* Step 1: Trim x down to the right number of words. */ + while (xlen > plen) { + + /* Pick out the topmost word; call it y for now. */ + y = x[xlen - 1]; + if (!y) { + xlen--; + continue; + } + + /* Observe that d == 2^n (mod p). Clear that top word. This + * effectively subtracts y B^{xlen-1} from x, so we must add back d + * 2^{w(xlen-1)-n} in order to preserve congruence modulo p. Since d + * is smaller than 2^n, this will reduce the absolute value of x, so + * we make (at least some) progress. In practice, we expect that d + * is a lot smaller. + * + * Note that w (xlen - 1) - n = w xlen - w - n = w (xlen - plen - 1) + * + topclear. + */ + x[xlen - 1] = 0; + add_mul_d_lsl(x + xlen - plen - 1, y, topclear); + } + + /* Step 2: Trim off the high bits of x, beyond the top bit of p. In more + * detail: if x >= 2^n > p, then write x = x_0 + 2^n y; then we can clear + * the top topclear bits of x, and add y d to preserve congruence mod p. + * + * If topclear is zero then there's nothing to do here: step 1 will + * already have arranged that x < 2^n. + */ + if (topclear) { + for (;;) { + y = x[xlen - 1] >> topbits; + if (!y) break; + x[xlen - 1] &= (1 << topbits) - 1; + add_mul_d_lsl(x, y, 0); + } + } + + /* Step 3: If x >= p then subtract p from x. */ + for (i = plen; i-- && x[i] == p[i]; ); + if (i >= plen || x[i] > p[i]) internal_sub(x, p, plen); + + /* We're done now. */ + d->n[0] = xlen; + bn_restore_invariant(d->n); +} + +/* Conversion operations for field elements. + * + * These are from IEEE 1363, among other standards. The format is absurdly + * simple: its just a sequence of octets of the length necessary to represent + * any element. For prime fields GF(p), this is just the big-endian + * radix-256 encoding of the canonical residue-class representative (i.e., + * the unique element n of the residue class such that 0 <= n < p). Other + * kinds of fields work differently, but the necessary conversions are + * actually done by the tobn/frombn methods, so see the commentary there. + */ + +/* Convert an octet string to an element of field f, storing it i in x. The + * octet string is at *pp, and is at least *nn octets long (one octet per + * unsigned-char element). On success, zero is returned, and *pp and *nn are + * updated to refer to the unconsumed piece of the input string. On failure, + * -1 is returned, and *pp and *nn are left with unhelpful values. + * + * Failures can occur for several reasons (e.g., input overruns, or the input + * being out of range); they are not distinguished here. + */ +int os2fep(field *f, union fieldelt *x, unsigned char **pp, long *nn) +{ + Bignum xn = 0; + size_t n = f->eltsz; + int rc = -1; + + if (*nn < n) goto done; + + xn = bignum_from_bytes(*pp, n); + *pp = p + n; + *nn -= n; + if (bignum_cmp(xn, f->q) >= 0) goto done; + f->ops->frombn(f, x, xn); +done: + if (xn) freebn(xn); + return rc; +} + +/* Convert a element x of field f to an octet string. The string is written, + * one octet per unsigned-char element, to the buffer at *pp, which is + * assumed to have space for *nn elements. On success, zero is returned, and + * *pp and *nn are updated to reflect the remaining space in the input + * buffer. On failure, -1 is returned, but the buffer and *pp and *nn may be + * partially updated. + * + * If you want this function to succeed, you must ensure that there are at + * least f->eltsz bytes free in the output buffer. + */ +int fe2osp(field *f, union fieldelt *x, unsigned char **pp, long *nn) +{ + Bignum xn = 0; + size_t n = f->eltsz, i; + unsigned char *p = *pp; + int rc = -1; + + if (*nn < n) goto done; + + xn = f->ops->tobn(f, x); + for (i = 0; i < n; i++) p[i] = bignum_byte(xn, n - i - 1); + *pp = p + n; + *nn -= n; + rc = 0; +done: + if (xn) freebn(xn); + return rc; +} + +/* Putting together a field-ops structure for a pseudo-Mersenne prime field. + */ + +static void pmp_destroy(struct field *f) + { freebn(f->q); sfree(f); } + +static void genfield_setup(struct field *f) +{ + Bignum qm1 = bigsub(f->q, One); + f->eltbits = bignum_bitcount(qm1); + f->eltsz = (f->eltbits + 7)/8; + freebn(qm1); +} + +static struct field *make_pmp_field(const struct field_ops *ops, + const unsigned char *p, size_t plen) +{ + struct field *f = snew(struct field); + + f->ops = ops; + f->zero.n = Zero; + f->one.n = One; + f->q = bignum_from_bytes(p, plen); + genfield_setup(f); + + return f; +} + +#define DEFINE_PMP_FIELD(fname) \ + \ +static const struct field_ops fname##_fieldops = { \ + pmp_destroy, \ + prime_init, prime_free, prime_copy, \ + prime_frombn, prime_tobn, \ + prime_zerop, \ + prime_add, prime_sub, prime_mul, gen_sqr, prime_inv, prime_sqrt, \ + fname##_reduce, \ + gen_dbl, gen_tpl, gen_qdl, \ + prime_hlv \ +}; \ + \ +struct field *make_##fname##_field(void) \ +{ \ + return make_pmp_field(&fname##_fieldops, \ + fname##_p, sizeof(fname##_p)); \ +} + +/* Some actual field definitions. */ + +static const unsigned char p256_p[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static void p256_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + /* p_{256} = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 1 */ + add_imm_lsl(x, y, shift + 224); + sub_imm_lsl(x, y, shift + 192); + sub_imm_lsl(x, y, shift + 96); + add_imm_lsl(x, y, shift + 0); +} + +static void p256_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p256_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p256) + +static const unsigned char p384_p[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; + +static void p384_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + /* p_{384} = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 1 */ + add_imm_lsl(x, y, shift + 128); + add_imm_lsl(x, y, shift + 96); + sub_imm_lsl(x, y, shift + 32); + add_imm_lsl(x, y, shift + 0); +} + +static void p384_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p384_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p384) + +static const unsigned char p521_p[] = { + 0x01, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static void p521_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) + { add_imm_lsl(x, y, shift); } /* p_{521} = 2^{521} - 1 */ + +static void p521_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p521_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p521) + +/* And now, some actual elliptic curves. + * + * As with fields, we start by defining an abstract interface which may have + * various implementations (e.g., different algorithms with different + * performance properties, or based on different kinds of underlying fields. + */ + +/* An external-form elliptic curve point. */ +struct ecpt { + unsigned f; /* Flags */ +#define ECPTF_INF 1u /* Point is at infinity */ + union fieldelt x, y; /* x- and y-coordinates */ +}; + +/* An internal-form elliptic curve point. This is where most of the action + * happens. + */ +union iecpt { + struct { + union fieldelt x, y, z; /* Projective-form triple */ + } jac; /* Jacobian projective coords */ +}; + +struct ec { + const struct ec_ops *ops; /* Curve operations table */ + struct field *f; /* The underlying finite field */ + struct iecpt p; /* The well-known generator point */ + Bignum r; /* Size of the generated group */ + unsigned flags; /* Various flags */ +#define ECF_MINUS3 256u /* Prime Jacobi: a = -3 */ + union { + struct { union fieldelt a, b; } w; /* Simple Weierstrass coeffs */ + } u; +}; + +struct ec_ops { + void (*destroy)(struct ec *ec); /* Destroy the curve itself */ + + void (*init)(struct ec *ec, union iecpt *p); + /* Initialize p (to null) */ + + void (*free)(struct ec *ec, union iecpt *p); + /* Free the point p */ + + void (*setinf)(struct ec *ec, union iecpt *p); + /* Set p to be the point at infinity + */ + + int (*infp)(struct ec *ec, union iecpt *p); + /* Answer whether p is infinite */ + + void (*copy)(struct ec *ec, union iecpt *d, union iecpt *d); + /* Make p be a copy of d */ + + void (*in)(struct ec *ec, union iecpt *d, struct ecpt *p); + /* Convert external-format p to + * internal-format d + */ + + void (*out)(struct ec *ec, struct ecpt *d, union iecpt *p); + /* Convert internal-format p to + * external-format d + */ + + void (*add)(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q); + /* Store in d the sum of the points p + * and q + */ + + void (*sub)(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q); + /* Store in d the result of + * subtracting q from p + */ + + void (*dbl)(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q); + /* Store in d the result of adding + * point p to itself. + */ + + void (*neg)(struct ec *ec, union iecpt *d, union iecpt *p); + /* Store in d the negation of p */ + + + int (*oncurvep)(struct ec *ec, struct ecpt *p); + /* Is the point p actually on the + * curve? + */ + + int (*compressy)(struct ec *ec, struct ecpt *p, unsigned pc); + /* Return the compressed form of p + * according to the flags set in + * pc. + */ + + int (*fromx)(struct ec *ec, struct ecpt *p); + /* Given x, find and store y so that + * (x, y) is on the curve. If there + * is no such y then return zero; + * otherwise return one. + */ +}; + +/* Initialize the (external-format) point p to infinity. */ +void init_ecpt(struct ec *ec, struct ecpt *p) +{ + struct field *f = ec->f; + + p->f = ECPTF_INF; + f->ops->init(f, &p->x); + f->ops->init(f, &p->y); +} + +/* Free an external-format point. */ +void free_ecpt(struct ec *ec, struct ecpt *p) +{ + struct field *f = ec->f; + + p->f = ECPTF_INF; + f->ops->free(f, &p->x); + f->ops->free(f, &p->y); +} + +/* The EC2OSP format bits, according to IEEE 1363a-D12. The point at + * infinity is represented by a byte with value zero; a finite point must + * have at least one of COMPR and UNCOMPR set. Point compression works as + * follows: for any x in F, there are at most two y values such that (x, y) + * is on the curve. Therefore we can transmit y as a single bit if we have + * some way of distinguishing the two. + * + * The SORT format is easiest. If there are two different y values, one of + * them has a lexicographically smaller representation; so we can just + * transmit that one as 0, and the other one as 1. + * + * The LSB format (implied if there are no other compression-format flags + * set) is a little cleverer, and works differently according to the + * characteristic of the underlying field. Let y be one of the possible + * y-coordinates. + * + * For prime fields GF(p), the other y-coordinate is -y == p - y (mod p); + * because p is odd, exactly one of these (well, its canonical residue, in + * the interval [0, p)) will be even, so we can transmit y as its least + * significant bit. + * + * For binary fields GF(2^m), the other y-coordinate is y' = y + x (observe + * that (y + x)^2 + x (y + x) = y^2 + x^2 + x y + x^2 = y^2 + x y). + * Therefore, there's a single y coordinate exactly[1] when x = 0; we'll + * compress y to zero in this case. Otherwise, write z = y/x; then y = x z + * and y' = x (z + 1). Obviously z and z + 1 differ in their least + * significant bits, so encode y as the least significant bit of y/x. + * + * [1] Note that the squaring map x |-> x^2 in a binary field is a + * permutation. More generally, suppose F is a field with characteristic + * p; then the Frobenius map x |-> x^p is (binomial theorem) linear on + * F. Suppose that x^p = y^p; then (x - y)^p = 0 by linearity. But if + * x /= y then x - y is a zero-divisor, which is impossible. + */ +#define ECF_Y 1u /* The compressed y-coordinate */ +#define ECF_COMPR 2u /* Compressed y is present */ +#define ECF_UNCOMPR 4u /* Uncompressed y is present */ +#define ECF_SORT 8u /* SORT compression (not LSB) */ +#define ECF_RESV 0xf0 /* Reserved bits, must be zero */ + +/* And now some common utilities. */ + +static void weier_destroy(struct ec *ec) +{ + ec->f->ops(ec->f, &ec->u.w.a); + ec->f->ops(ec->f, &ec->u.w.b); + freebn(ec->r); + f->ops->destroy(ec->f); + sfree(ec); +} + +static void jac_init(struct ec *ec, union iecpt *p) +{ + ec->f->ops->init(ec->f, &p->jac.x); + ec->f->ops->init(ec->f, &p->jac.y); + ec->f->ops->init(ec->f, &p->jac.z); +} + +static void jac_free(struct ec *ec, union iecpt *p) +{ + ec->f->ops->free(ec->f, &p->jac.x); + ec->f->ops->free(ec->f, &p->jac.y); + ec->f->ops->free(ec->f, &p->jac.z); +} + +static void jac_setinf(struct ec *ec, union iecpt *d) +{ + ec->f->ops->copy(ec->f, &d->jac.x, &ec->f->one); + ec->f->ops->copy(ec->f, &d->jac.y, &ec->f->one); + ec->f->ops->copy(ec->f, &d->jac.z, &ec->f->zero); +} + +static int jac_infp(struct ec *ec, union iecpt *d) + { return ec->f->ops->zerop(ec->f, &d->jac.z); } + +static void jac_copy(struct ec *ec, union iecpt *d, union iecpt *d) +{ + ec->f->ops->copy(ec->f, &d->jac.x, &p->jac.x); + ec->f->ops->copy(ec->f, &d->jac.y, &p->jac.y); + ec->f->ops->copy(ec->f, &d->jac.z, &p->jac.z); +} + +static void jac_in(struct ec *ec, union iecpt *d, struct ecpt *p) +{ + ec->f->ops->copy(ec->f, &d->x, &p->jac.x); + ec->f->ops->copy(ec->f, &d->y, &p->jac.y); + ec->f->ops->copy(ec->f, &d->z, &ec->f->one); +} + +static void jac_out(struct ec *ec, struct ecpt *d, union iecpt *p) +{ + union fieldelt iz2, iz3; + + if (ec->f->ops->zerop(ec->f, &p->jac.z)) { + ec->f->ops->free(ec->f, d->x); + ec->f->ops->free(ec->f, d->y); + d->f = ECPTF_INF; + } else { + ec->f->ops->init(ec->f, &iz2); + ec->f->ops->init(ec->f, &iz3); + + ec->f->ops->inv(ec->f, &iz3, &p->jac.z); + ec->f->ops->sqr(ec->f, &iz2, &iz3); + ec->f->ops->mul(ec->f, &iz3, &iz3, &iz2); + + ec->f->ops->mul(ec->f, &d->x, &p->jac.x, &iz2); + ec->f->ops->mul(ec->f, &d->y, &p->jac.x, &iz3); + d->f = 0; + + ec->f->ops->free(ec->f, &iz2); + ec->f->ops->free(ec->f, &iz3); + } +} + +static void genec_sub(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q) +{ + union iecpt t; + + ec->ops->init(ec, &t); + ec->ops->neg(ec, &t, q); + ec->ops->add(ec, d, p, &t); + ec->ops->free(ec, &t); +} + +/* Finally, let's define some actual curve arithmetic functions. */ + +static void pwjac_dbl(struct ec *ec, union iecpt *d, union iecpt *p) +{ + struct field *f = ec->f; + union fieldelt m, s, t, u; + + if (ec->ops->infp(ec, p)) { + ec->setinf(ec, d); + return; + } + + f->ops->init(f, &m); + f->ops->init(f, &s); + f->ops->init(f, &t); + f->ops->init(f, &u); + + f->ops->sqr(f, &u, &p->jac.z); /* z^2 */ + f->ops->sqr(f, &t, &u); /* z^4 */ + f->ops->sqr(f, &m, &p->jac.x); /* x^2 */ + if (ec->flags & ECF_MINUS3) { + f->ops->sqr(f, &m, &m, &t); /* x^2 - z^4 */ + f->ops->tpl(f, &m, &m); /* m = 3 x^2 - 3 z^4 */ + } else { + f->ops->mul(f, &u, &t, &ec->u.w.a); /* a z^4 */ + f->ops->tpl(f, &m, &m); /* 3 x^2 */ + f->ops->add(f, &m, &m, &u); /* m = 3 x^2 + a z^4 */ + } + + f->ops->dbl(f, &t, &p->jac.y); /* 2 y */ + f->ops->mul(f, &d->jac.z, &t, &p->jac.z); /* z' = 2 y z */ + + f->ops->sqr(f, &u, &t); /* 4 y^2 */ + f->ops->mul(f, &s, &u, &p->jac.x); /* s = 4 x y^2 */ + f->ops->sqr(f, &t, &u); /* 16 y^4 */ + f->ops->hlv(f, &t, &t); /* t = 8 y^4 */ + + f->ops->dbl(f, &u, &s); /* 2 s */ + f->ops->sqr(f, &d->jac.x, &m); /* m^2 */ + f->ops->sub(f, &d->jac.x, &d->jac.x, &u); /* x' = m^2 - 2 s */ + + f->ops->sub(f, &u, &s, &d->jac.x); /* s - x' */ + f->ops->mul(f, &d->jac.y, &m, &u); /* m (s - x') */ + f->ops->sub(f, &d->jac.y, &d->jac.y, &t); /* y' = m (s - x') - t */ + + f->ops->free(f, &m); + f->ops->free(f, &s); + f->ops->free(f, &t); + f->ops->free(f, &u); +} + +static void pwjac_add(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q) +{ + struct field *f = ec->f; + union fieldelt m, r, s, ss, t, u, uu, w; + + if (a == b) { pwjac_dbl(ec, d, p); return; } + else if (ec->ops->infp(ec, p)) { ec->ops->copy(ec, d, q); return; } + else if (ec->ops->infp(ec, q)) { ec->ops->copy(ec, d, p); return; } + + f->ops->init(f, &m); + f->ops->init(f, &r); + f->ops->init(f, &s); + f->ops->init(f, &ss); + f->ops->init(f, &t); + f->ops->init(f, &u); + f->ops->init(f, &uu); + f->ops->init(f, &w); + + f->ops->sqr(f, &s, &p->jac.z); /* z_0^2 */ + f->ops->mul(f, &u, &s, &q->jac.x); /* u = x_1 z_0^2 */ + f->ops->mul(f, &s, &s, &p->jac.z); /* z_0^3 */ + f->ops->mul(f, &s, &s, &q->jac.y); /* s = y_1 z_0^3 */ + + f->ops->sqr(f, &ss, &q->jac.z); /* z_1^2 */ + f->ops->mul(f, &uu, &ss, &p->jac.x); /* uu = x_0 z_1^2 */ + f->ops->mul(f, &ss, &ss, &q->jac.z); /* z_1^3 */ + f->ops->mul(f, &ss, &ss, &p->jac.y); /* ss = y_0 z_1^3 */ + + f->ops->sub(f, &w, &uu, &u); /* w = uu - u */ + f->ops->sub(f, &r, &ss, &s); /* r = ss - s */ + if (f->ops->zerop(f, w)) { + if (f->ops->zerop(f, r)) ec->ops->dbl(ec, d, p); + else ec->ops->setinf(ec, d); + goto cleanup; + } + + f->ops->add(f, &t, &u, &uu); /* t = uu + u */ + f->ops->add(f, &m, &s, &ss); /* m = ss + s */ + + f->ops->mul(f, &uu, &p->jac.z, &w); /* z_0 w */ + f->ops->mul(f, &d->jac.z, &uu, &q->jac.z); /* z' = z_0 z_1 w */ + + f->ops->sqr(f, &s, &w); /* w^2 */ + f->ops->mul(f, &u, &s, &t); /* t w^2 */ + f->ops->mul(f, &s, &s, &w); /* w^3 */ + f->ops->mul(f, &s, &s, &m); /* m w^3 */ + + f->ops->sqr(f, &m, &r); /* r^2 */ + f->ops->sub(f, &d->jac.x, &m, &u); /* x' = r^2 - t w^2 */ + + f->ops->dbl(f, &m, &d->jac.x); /* 2 x' */ + f->ops->sub(f, &u, &u, &m); /* v = t w^2 - 2 x' */ + f->ops->mul(f, &u, &u, &r); /* v r */ + f->ops->sub(f, &u, &u, &s); /* v r - m w^3 */ + f->ops->hlv(f, &d->jac.y, &m); /* y' = (v r - m w^3)/2 */ + +cleanup: + f->ops->free(f, &m); + f->ops->free(f, &r); + f->ops->free(f, &s); + f->ops->free(f, &ss); + f->ops->free(f, &t); + f->ops->free(f, &u); + f->ops->free(f, &uu); + f->ops->free(f, &w); +} + +static int pweier_oncurvep(struct ec *ec, struct ecpt *p) +{ + struct field *f = ec->f; + union fieldelt a, t; + int rc; + + if (p->f & ECPTF_INF) return 1; + + f->ops->init(f, &a); + f->ops->init(f, &t); + + f->ops->sqr(f, &a, &p->x); /* x^2 */ + f->ops->mul(f, &a, &a, &p->x); /* x^3 */ + + f->ops->mul(f, &t, &p->x, &wc->u.w.a); /* a x */ + f->ops->add(f, &a, &a, &t); /* x^3 + a x */ + + f->ops->add(f, &a, &a, &ec->u.w.b); /* x^3 + a x + b */ + + f->ops->sqr(f, &t, &p->y); /* y^2 */ + f->ops->sub(f, &a, &a, &t); /* x^3 + a x + b - y^2 =? 0 */ + rc = f->ops->zerop(f, &a); + + f->ops->free(f, &a); + f->ops->free(f, &t); + return rc; +} + +/* Curve-generic SORT-style point compression. */ +static int genec_compressy_sort(struct ec *ec, struct ecpt *p) +{ + struct field *f = ec->f; + struct ecpt mp; + union iecpt q; + Bignum y, yy; + int rc; + + init_ecpt(ec, &mp); + ec->ops->init(ec, &q); + + ec->ops->in(ec, &q, p); + ec->ops->neg(ec, &q, &q); + ec->ops->out(ec, &mp, &q); + + y = f->ops->tobn(f, &p->y); + yy = f->ops->tobn(f, &mp.y); + rc = bignum_cmp(y, yy) > 0; + + freebn(y); freebn(yy); + free_ecpt(ec, &mp); + ec->ops->free(ec, &q); + + return rc; +} + +static int prime_compressy(struct ec *ec, struct ecpt *p, unsigned pc) +{ + Bignum y; + int rc; + + if (pc & ECF_SORT) + rc = genec_compressy_sort(ec, p); + else { + y = ec->f->ops->tobn(ec->f, &p->y); + rc = (y[0] && (y[1] & 1)); + freebn(y); + } + return rc; +} + +static int pweier_fromx(struct ec *ec, struct ecpt *p) +{ + struct field *f = ec->f; + union fieldelt a, t, u; + Bignum yn; + int rc; + + f->ops->init(f, &a); + f->ops->init(f, &t); + + f->ops->sqr(f, &a, &p->x); /* x^2 */ + f->ops->mul(f, &a, &a, &p->x); /* x^3 */ + f->ops->mul(f, &t, &ec->u.w.a, &p->x); /* a x */ + f->ops->add(f, &a, &a, &t); /* x^3 + a x */ + f->ops->mul(f, &a, &a, &ec->u.w.b); /* x^3 + a x + b */ + + rc = f->ops->sqrt(f, &p->y, &a); + + f->ops->free(f, &a); + f->ops->free(f, &t); + return rc; +} + +static const struct ec_ops pwjac_ecops = { + weier_destroy, + jac_init, jac_free, jac_setinf, jac_infp, jac_copy, jac_in, jac_out, + pwjac_add, genec_sub, pwjac_dbl, pwjac_neg, + pweier_oncurvep, prime_compressy, pweier_fromx +}; + +/* Conversions between elliptic-curve points and octet strings. + * + * The octet-string encoding works like this. The first octet is called PC + * and contains flags, listed above as Y, COMPR, UNCOMPR, and SORT. If all + * of the flags are clear, then (a) there are no further octets, and (b) the + * point is the point at infinity. Otherwise, there is an x-coordinate (in + * FE2OSP form); if UNCOMPR is set then there will be a y-coordinate (again + * in FE2OSP form). If COMPR is set then the y-coordinate is compressed into + * the Y flag (see above for the compression formats). It is acceptable that + * both COMPR and UNCOMPR are set: this is the `hybrid' encoding, which + * doesn't seem very useful but we support it anyway (checking that the + * explicitly provided y-coordinate really does match the compressed + * version). + * + * Finally, if both COMPR and UNCOMPR are clear, but Y is set, we're in the + * `x-only' case. Note that the two points with the same x-coordinate are + * additive inverses of each other, say Q and -Q; then n (-Q) = -(n Q), and + * so if we were only interested in the x-coordinate of n Q, then we only + * need to know the x-coordinate of Q. There are optimized ways of doing + * this computation, but we don't use those here. Instead, we just pick a + * y-coordinate arbitrarily and hope that's OK. + */ + +/* Convert an octet string into an elliptic curve point. The input octet + * string starts at **pp and goes on for *nn bytes. Return zero if all is + * good, and update *pp and *nn to refer to the unconsumed portion of the + * input; return -1 if the encoding is bad in some (unspecified) way (e.g., + * the encoding is bad, the options provided don't make sense, the point + * isn't on the curve,x...). In the latter case, *pp and *nn may be left in + * some unhelpful state. + */ +int os2ecp(struct ec *ec, union iecpt *p, unsigned char **pp, long *nn) +{ + struct field *f = ec->f; + unsigned pc; + struct ecpt pe; + union fieldelt t; + union iecpt q; + int rc = -1; + + /* Set things up. */ + init_ecpt(ec, &pe); + f->ops->init(f, &t); + ec->ops->init(ec, &q); + + /* Read the PC octet so that we can decode the rest. */ + if (*nn < 1) goto done; + pc = *(*pp++); (*nn)--; + + /* Decode the remaining data. */ + if (pc & ECF_RESV) + goto done; + else if (!pc) + pe.f = ECPTF_INF; + else { + pe.f = 0; + if (os2fep(f, &pe.x, pp, nn)) goto done; + + if (pc & ECF_UNCOMPR) { + if (os2fep(f, &pe.y, pp, nn)) goto done; + if (!ec->ops->oncurvep(ec, &pe)) goto done; + if (!(pc & ECF_COMPR)) { + if (pc & ~ECF_UNCOMPR) goto done; + } else { + if (ec->ops->compressy(ec, &pe) != (pc & ECF_Y)) goto done; + } + ec->ops->in(ec, p, &pe); + } else { + if (!ec->ops->fromx(ec, &pe)) goto done; + ec->ops->in(ec, p, &pe); + if (!(pc & ECF_COMPR)) { + if (pc != ECF_Y) goto done; + } else if (ec->ops->compressy(ec, &pe) != (pc & ECF_Y)) + ec->ops->neg(ec, p, p); + } + } + + rc = 0; + +done: + f->ops->free(f, &t); + free_ecpt(ec, &pe); + ec->ops->free(ec, &q); + return rc; +} + +/* Convert an elliptic curve point to external format. The point data is + * written to the buffer at *pp, which must have space for at least *nn + * bytes. On successful exit, *pp and *nn are updated to reflect the unused + * tail of the buffer, and zero is returned; on failure, -1 is returned, and + * the buffer may contain partial output. + * + * This will always succeed if the buffer is at least 2*ec->f->eltsz + 1 + * bytes long. + */ +int ec2osp(struct ec *ec, union iecpt *p, unsigned pc, + unsigned char **pp, long *nn) +{ + struct field *f = ec->f; + unsigned pc; + struct ecpt pe; + union fieldelt t; + int rc = -1; + + f->ops->init(f, &t); + init_ecpt(ec, &pe); + ec->ops->out(ec, &pe, p); + + if (pe.f & ECPTF_INF) + pc = 0; + else if (pc & ECF_COMPR) + pc = (pc & ~ECF_Y) | ec->ops->compressy(ec, &pe); + + if (*nn < 1) goto done; + *(*pp++) = pc; (*nn)--; + + if (pc && fe2osp(f, &pe.x, pp, nn)) goto done; + if ((pc & ECF_UNCOMPR) && fe2osp(f, &pe.y, pp, nn)) goto done; + + ec->ops->in(ec, p, &pe); + rc = 0; + +done: + f->ops->free(f, &t); + free_ecpt(ec, &pe); + return rc; +} + +/* Putting together a predfined elliptic curve. */ + +static void init_fieldelt_from_bytes(struct field *f, union fieldelt *x, + unsigned char *p, size_t n) +{ + Bignum n = bignum_from_bytes(p, n); + f->ops->init(f, x); + f->ops->frombn(f, x, n); + freebn(n); +} + +static void init_ecpt_from_bytes(struct ec *ec, union iecpt *p, + unsigned char *px, size_t nx, + unsigned char *py, size_t ny) +{ + struct ecpt pp; + struct field *f = ec->f; + + pp.f = 0; + init_fieldelt_from_bytes(f, &pp.x, px, nx); + init_fieldelt_from_bytes(f, &pp.y, py, ny); + ec->ops->init(ec, p); + ec->ops->in(ec, p, &pp); + f->ops->free(f, &pp.x); + f->ops->free(f, &pp.y); +} + +static struct ec *make_pwjac_curve(struct field *f, + const unsigned char *a, size_t alen, + const unsigned char *b, size_t blen, + const unsigned char *r, size_t rlen, + const unsigned char *p, size_t plen) +{ + struct ec *ec = snew(struct ec); + union fieldelt x; + unsigned char *q; + int rc; + static BignumInt three[] = { 1, 3 }; + + ec->ops = &pwjac_ecops; + ec->f = f; + ec->flags = 0; + + if (a) + init_fieldelt_from_bytes(ec->f, &ec->u.w.a, a, alen); + else { + f->ops->init(f, &ec->u.w.a); + f->ops->frombn(f, &ec->u.w.a, three); + f->ops->sub(f, &ec->u.w.a, &f->zero, &ec->u.w.a); + } + + init_fieldelt_from_bytes(ec->f, &ec->u.w.b, b, blen); + ec->r = bignum_from_bytes(r, rlen); + q = (/*unconst*/ unsigned char *)p; + rc = os2ecp(ec, &ec->p, &q, &plen); assert(!rc); assert(!plen); + + return rc; +} + +#define PWJAC_MINUS3(ecname) 0, 0 +#define PWJAC_STD(ecname) ecname##_a, sizeof(ecname##_a) + +#define DEFINE_PWJAC_CURVE(ecname, field, kind) \ + \ +struct ec *make_##ecname##_curve(void) \ +{ \ + return make_pwjac_curve(make_##field##_field(), \ + kind(ecname), \ + ecname##_b, sizeof(ecname##_b), \ + ecname##_r, sizeof(ecname##_r), \ + ecname##_p, sizeof(ecname##_p)); \ +} + +/* Some well-known elliptic curves. */ + +static const unsigned char p256_b[] = { + 0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, + 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, + 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, + 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b +}; + +static void unsigned char p256_r[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84, + 0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51 +}; + +static void unsigned char p256_p[] = { + ECF_COMPR | ECF_Y, + 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, + 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, + 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, + 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 +}; + +DEFINE_PWJAC_CURVE(p256, p256, PWJAC_MINUS3) + +static const unsigned char p384_b[] = { + 0xb3, 0x31, 0x2f, 0xa7, 0xe2, 0x3e, 0xe7, 0xe4, + 0x98, 0x8e, 0x05, 0x6b, 0xe3, 0xf8, 0x2d, 0x19, + 0x18, 0x1d, 0x9c, 0x6e, 0xfe, 0x81, 0x41, 0x12, + 0x03, 0x14, 0x08, 0x8f, 0x50, 0x13, 0x87, 0x5a, + 0xc6, 0x56, 0x39, 0x8d, 0x8a, 0x2e, 0xd1, 0x9d, + 0x2a, 0x85, 0xc8, 0xed, 0xd3, 0xec, 0x2a, 0xef +}; + +static const unsigned char p384_r[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc7, 0x63, 0x4d, 0x81, 0xf4, 0x37, 0x2d, 0xdf, + 0x58, 0x1a, 0x0d, 0xb2, 0x48, 0xb0, 0xa7, 0x7a, + 0xec, 0xec, 0x19, 0x6a, 0xcc, 0xc5, 0x29, 0x73 +}; + +static const unsigned char p384_p[] = { + ECF_COMPR | ECF_Y, + 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, + 0x8e, 0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, + 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b, 0x98, + 0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, + 0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29, 0x6c, + 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7 +}; + +DEFINE_PWJAC_CURVE(p384, p384, PWJAC_MINUS3) + +static const unsigned char p521_b[] = { + 0x51, + 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, 0x9a, 0x1f, + 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 0x40, 0xee, + 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 0x15, 0xf3, + 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 0x09, 0xe1, + 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 0x93, 0x7b, + 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 0xbf, 0x07, + 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 0x34, 0xf1, + 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 0x3f, 0x00 +}; + +static const unsigned char p521_r[] = { + 0x01, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, + 0x51, 0x86, 0x87, 0x83, 0xbf, 0x2f, 0x96, 0x6b, + 0x7f, 0xcc, 0x01, 0x48, 0xf7, 0x09, 0xa5, 0xd0, + 0x3b, 0xb5, 0xc9, 0xb8, 0x89, 0x9c, 0x47, 0xae, + 0xbb, 0x6f, 0xb7, 0x1e, 0x91, 0x38, 0x64, 0x09 +}; + +static const unsigned char p521_p[] = { + ECF_COMPR, + 0x00, 0xc6, + 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, 0xe9, 0xcd, + 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 0xb4, 0x42, + 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, 0xb5, 0x21, + 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 0x3d, 0xba, + 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 0x59, 0x28, + 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 0xa8, 0xde, + 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 0x42, 0x9b, + 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 0xbd, 0x66 +}; + +DEFINE_PWJAC_CURVE(p521, p521, PWJAC_MINUS3) + +/* Scalar multiplication: store in d the product n*p. This is the payoff for + * all of the above work. + */ +void ec_scalarmul(struct ec *ec, union iecpt *d, union iecpt *p, Bignum n) +{ + union iecpt t; + long i; + BignumInt v; + +#define BIT(i) ((n[(i)/BIGNUM_INT_BITS + 1] >> ((i)%BIGNUM_INT_BITS)) & 1) + + /* Setting up. */ + ec->ops->init(ec, &t); ec->ops->setinf(ec, &t); + if (!n[0]) goto done; + i = n[0]*BIGNUM_INT_BITS - 1; + while (i > 0 && !BIT(i)) i--; + + /* The main algorithm. This is a straightforward right-to-left double- + * and-add scalar multiplication. It works like this. We implement a + * function M defined inductively as follows. + * + * M(P, 0, 0, Q) = Q + * M(P, i + 1, b 2^i + n', Q) = M(P, i, n', 2 Q + b P) + * (if n' < 2^i) + * + * It's easy to show (by induction on i) the important property of this + * function + * + * M(P, i, n, Q) = 2^i Q + n P (if n < 2^i) + * + * and, in particular, then + * + * M(P, i, n, 0) = n P + * + * Many fancier algorithms exist (e.g., window NAF, comb, ...), with + * various tradeoffs between precomputation and calculation performance. + * But this will do for now. + */ + while (i > 0) { + ec->ops->dbl(ec, &t, &t); + if (BIT(i)) ec->ops->add(ec, &t, p); + i--; + } +} -- 2.11.0