From: Mark Wooding Date: Fri, 16 Aug 2013 20:44:04 +0000 (+0100) Subject: work in progress; to be tidied and fixed X-Git-Url: https://git.distorted.org.uk/u/mdw/putty/commitdiff_plain/7961438be4add9289b13b3471e4cd7943399f5ae work in progress; to be tidied and fixed --- diff --git a/ssh.h b/ssh.h index 1ac4e5e8..a61866d2 100644 --- a/ssh.h +++ b/ssh.h @@ -452,6 +452,8 @@ unsigned short bignum_mod_short(Bignum number, unsigned short modulus); Bignum bignum_add_long(Bignum number, unsigned long addend); Bignum bigadd(Bignum a, Bignum b); Bignum bigsub(Bignum a, Bignum b); +Bignum biglsl(Bignum x, int n); +Bignum biglsr(Bignum x, int n); Bignum bigmul(Bignum a, Bignum b); Bignum bigmuladd(Bignum a, Bignum b, Bignum addend); Bignum bigdiv(Bignum a, Bignum b); diff --git a/sshbn.c b/sshbn.c index 7c6b13a1..580dd6f6 100644 --- a/sshbn.c +++ b/sshbn.c @@ -1385,6 +1385,78 @@ Bignum bigsub(Bignum a, Bignum b) } /* + * Return a bignum which is the result of shifting another left by N bits. + * If N is negative then you get a right shift instead. + */ +Bignum biglsl(Bignum x, int n) +{ + Bignum d; + unsigned o, i; + + if (!n || !x[0]) + return copybn(x); + else if (n < 0) + return biglsr(x, -n); + + o = n/BIGNUM_INT_BITS; + n %= BIGNUM_INT_BITS; + d = newbn(x[0] + o + !!n); + + for (i = 1; i <= o; i++) + d[i] = 0; + + if (!n) { + for (i = 1; i <= x[0]; i++) + d[o + i] = x[i]; + } else { + d[o + 1] = x[1] << n; + for (i = 2; i <= x[0]; i--) + d[o + i] = (x[i] << n) | (x[i - 1] >> (BIGNUM_INT_BITS - n)); + d[o + x[0] + 1] = x[x[0]] >> (BIGNUM_INT_BITS - n); + } + + bn_restore_invariant(d); + return d; +} + +/* + * Return a bignum which is the result of shifting another right by N bits + * (discarding the least significant N bits, and shifting zeroes in at the + * most significant end). If N is negative then you get a left shift + * instead. + */ +Bignum biglsr(Bignum x, int n) +{ + Bignum d; + unsigned o, i; + + if (!n || !x[0]) + return copybn(x); + else if (n < 0) + return biglsl(x, -n); + + o = n/BIGNUM_INT_BITS; + n %= BIGNUM_INT_BITS; + d = newbn(x[0]); + + if (!n) { + for (i = o + 1; i <= x[0]; i++) + d[i - o] = x[i]; + } else { + d[1] = x[o + 1] >> n; + for (i = o + 2; i < x[0]; i++) + d[i - o] = x[ + d[o + x[0] + 1] = x[x[0]] >> (BIGNUM_INT_BITS - n); + for (i = x[0]; i > 1; i--) + d[o + i] = (x[i] << n) | (x[i - 1] >> (BIGNUM_INT_BITS - n)); + d[o + 1] = x[1] << n; + } + + bn_restore_invariant(d); + return d; +} + +/* * Create a bignum which is the bitmask covering another one. That * is, the smallest integer which is >= N and is also one less than * a power of two. diff --git a/sshec.c b/sshec.c new file mode 100644 index 00000000..6a7a2c76 --- /dev/null +++ b/sshec.c @@ -0,0 +1,880 @@ +/* + * Elliptic curve arithmetic. + */ + +#include "bn-internal.h" +#include "ssh.h" + +/* Some background. + * + * These notes are intended to help programmers who are mathematically + * inclined, but not well versed in the theory of elliptic curves. If more + * detail is required, introductory texts of various levels of sophistication + * are readily available. I found + * + * L. Washington, `Elliptic Curves: Number Theory and Cryptography', CRC + * Press, 2003 + * + * useful. Further background on algebra and number theory can be found in + * + * V. Shoup, `A Computational Introduction to Number Theory and Algebra', + * version 2, Cambridge University Press, 2008; http://shoup.net/ntb/ + * [CC-ND-NC]. + * + * We work in a finite field k = GF(p^m). To make things easier, we'll only + * deal with the cases where p = 2 (`binary' fields) or m = 1 (`prime' + * fields). In fact, we don't currently implement binary fields either. + * + * We start in two-dimensional projective space over k, denoted P^2(k). This + * is the space of classes of triples of elements of k { (a x : a y : a z) | + * a in k^* } such that x y z /= 0. Projective points with z /= 0 are called + * `finite', and correspond to points in the more familiar two-dimensional + * affine space (x/z, y/z); points with z = 0 are called `infinite'. + * + * An elliptic curve is the set of projective points satisfying an equation + * of the form + * + * y^2 z + a_1 x y z + a_3 y z^2 = x^3 + a_2 x^2 z + a_4 x z^2 + a_6 z^3 + * + * (the `Weierstrass form'), such that the partial derivatives don't both + * vanish at any point. (Notice that this is a homogeneous equation -- all + * of the monomials have the same degree -- so this is a well-defined subset + * of our projective space. Don't worry about the numbering of the + * coefficients: maybe the later stuff about Jacobian coordinates will make + * them clear.) + * + * Let's consider, for a moment, a straight line r x + s y + t z = 0 which + * meets the curve at two points P and Q; then they must also meet again at + * some other point R. (Here, any or all of P, Q, and R might be equal; + * in this case, the simultaneous equations have repeated solutions.) Let's + * go out on a limb and write + * + * P + Q + R = 0 + * + * whenever three points P, Q, and R on the curve are collinear. Obviously + * this notion of `addition' is commutative: three points are collinear + * whichever order we write them. If we designate some point O as being an + * `identity' then we can introduce a notion of inverses. And then some + * magic occurs: this `addition' operation we've just invented is also + * associative -- so it's an abelian group operation! (This can be + * demonstrated by slogging through the algebra, but there's a fancy way + * involving the Weil pairing. Washington's book has both versions.) + * + * So we have an abelian group. It's actually quite a fun group, for + * complicated reasons I'm not going to do into here. For our purposes it's + * sufficient to note that, in most elliptic curve groups, the discrete + * logarithm problem -- finding x given x P -- seems really very hard. I + * mean, much harder than in multiplicative subgroups of finite fields, + * where index calculus techniques can give you the answer in subexponential + * time. The best algorithms known for solving this problem in elliptic + * curve groups are the ones which work in any old group G -- and they run in + * O(sqrt(#G)) time. + * + * Staring at the Weierstrass equation some more, we can determine that in + * fact there's exactly one infinite point on the curve: if z = 0 then we + * must have x = 0, so (0 : 1 : 0) is precisely that point. That's kind of + * handy. + * + * The Weierstrass form is pretty horrid, really. Fortunately, it's always + * possible to apply a linear transformation to make it less nasty. If p > 3 + * (which it will be for the prime fields we care about), we can simplify + * this to + * + * y^2 z = x^3 + a x z^2 + b z^3 (with 4 a^3 + 27 b^2 /= 0) + * + * and in the binary case we can simplify to + * + * y^2 z + x y z = x^3 + a x^2 z + b z^3 (with b /= 0) + * + * and it's these kinds of forms which we're actually going to work with. + * (There are other forms of elliptic curves which have interesting benefits, + * e.g., the Montgomery and Edwards forms, but they've not really caught on + * in standards.) Linear transformations don't mess up the geometry, so the + * group we constructed above still works -- and, indeed, the transformation + * is an isomorphism of groups. And, finally, the point at infinity we found + * above is still infinite. Let's call that point O, and declare it to be + * our additive identity. That leaves the finite points (x : y : 1), which + * correspond to the affine points + * + * y^2 = x^3 + a x + b or y^2 + x y = x^3 + a x^2 + b + * + * So, if we want to add two points P and Q, we draw the line between them; + * if P = Q then the line in question is the tangent to the curve at P. (The + * smoothness condition above is precisely what we need for this tangent to + * be well-defined. Unsmooth curves aren't worth worrying about, because + * they're cryptographically useless: they're isomorphic to some other group + * in which discrete logs are very easy.) In prime fields, if this line is + * vertical then it meets the curve again at O, and P + Q = O; otherwise it + * meets the curve at some other finite point R = (x, y), and the answer we + * want is P + Q = -R = (x, -y). In binary fields, the situation is similar + * but a little messier because of the x y term we couldn't eliminate. + * + * It's not very hard to work out where this point R is. You get a cubic + * equation out the far end which looks kind of daunting until you notice + * that the negation of the x^2 coefficient is exactly the sum of the roots, + * and you already know two of them. Unfortunately, the solution is still + * somewhat unpleasant and involves a bunch of division -- and in finite + * fields that involves computing a modular inverse, which is rather slow. + * + * Wouldn't it be really nice if we could somehow keep the denominators + * separate, so we could just do a few `big' divisions at the end of some + * calculation? Of course, we'd need to tweak the point-addition formulae so + * that we could cope with input points which have separate denominators. + * And so we enter the realm of fun coordinate systems. + * + * Perhaps an obvious choice of coordinates is the projective form above: + * rather than working out (x/m, y/n), we could just keep track of a + * projective point (x : y : m n). It turns out that there's a slightly + * different variant which is a little better: `Jacobian coordinates' are + * classes of triples (x :: y :: z), where x y z /= 0, and the point (x :: y + * :: z) is equivalent to (a^2 x :: a^3 y :: a z). + * + * The (simplified) Wieierstrass equation, using Jacobian projective + * coordinates, looks like this. + * + * y^2 = x^3 + a x z^2 + b z^6 + * + * or + * + * y^2 + x y z = x^3 + a x^2 z + b z^3 + * + * (Maybe this is a hint about the strange coefficient numbering in the + * general Weierstrass form.) Anyway, if a point is infinite then we have + * y^2 = x^3, which has the obvious solution x = y = 1. + * + * That's about all the theory we need for now. Let's get into the code. + */ + +/* Let's start by making an abstract interface for finite field arithmetic. + * If we need to do clever things later then this will be a convenient + * extension point (e.g., supporting binary fields, working in extension + * fields as we'd need if we wanted to implement Tate or Weil pairings, or + * just doing different kinds of optimizations for the prime case). + * + * This is quite cheesy, but it's enough for now. + */ + +union fieldelt { + Bignum n; +}; + +struct field { + const struct field_ops *ops; /* Field operations table. */ + union fieldelt zero, one; /* Identity elements. */ + Bignum q; /* The field size. */ +}; + +struct field_ops { + void (*destroy)(struct field *f); /* Destroy the field object. */ + + void (*init)(struct field *f, union fieldelt *x); + /* Initialize x (to null). */ + + void (*free)(struct field *f, union fieldelt *x); + /* Free an element x, leaving it + * null. + */ + + void (*copy)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Make d be a copy of x. */ + + void (*frombn)(struct field *f, union fieldelt *d, Bignum n); + /* Convert n to an element. */ + + Bignum (*tobn)(struct field *f, union fieldelt *x); + /* Convert x to an integer. */ + + int (*zerop)(struct field *f, union fieldelt *x); + /* Return nonzero if x is zero. */ + + void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 2 x in d. */ + + void (*add)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x + y in d. */ + + void (*sub)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x - y in d. */ + + void (*mul)(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y); + /* Store x y in d. */ + + void (*sqr)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store x^2 in d. */ + + void (*inv)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 1/x in d; it is an error + * for x to be zero. + */ + + int (*sqrt)(struct field *f, union fieldelt *d, union fieldelt *x); + /* If x is a square, store a square + * root (chosen arbitrarily) in d + * and return nonzero; otherwise + * return zero leaving d unchanged. + */ + + /* For internal use only. The standard methods call on these for + * detailed field-specific behaviour. + */ + void (*_reduce)(struct field *f, union fieldelt *x); + /* Reduce an intermediate result so + * that it's within whatever bounds + * are appropriate. + */ + + /* These functions are only important for prime fields. */ + void (*dbl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 2 x in d. */ + + void (*tpl)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 3 x in d. */ + + void (*dql)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store 4 x in d. */ + + void (*hlv)(struct field *f, union fieldelt *d, union fieldelt *x); + /* Store x/2 in d. */ +}; + +/* Some standard methods for prime fields. */ + +static void gen_destroy(struct field *f) + { sfree(f); } + +static void prime_init(struct field *f, union fieldelt *x) + { x->n = 0; } + +static void prime_free(struct field *f, union fieldelt *x) + { if (x->n) { freebn(x->n); x->n = 0; } } + +static void prime_frombn(struct field *f, union fieldelt *d, Bignum n) + { f->ops->free(f, d); d->n = copybn(n); } + +static Bignum prime_tobn(struct field *f, union fieldelt *x) + { return copybn(x->n); } + +static Bignum prime_zerop(struct field *f, union fieldelt *x) + { return bignum_cmp(x->n, f->zero.n) == 0; } + +static void prime_add(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum sum = bigadd(x, y); + Bignum red = bigsub(sum, f->q); + + f->ops->free(f, d); + if (!red) + d->n = sum; + else { + freebn(sum); + d->n = red; + } +} + +static void prime_sub(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum raise = bigadd(x, f->q); + Bignum diff = bigsub(raise, y); + Bignum drop = bigsub(diff, f->q); + + f->ops->free(f, d); freebn(raise); + if (!drop) + d->n = diff; + else { + freebn(diff); + d->n = drop; + } +} + +static void prime_mul(struct field *f, union fieldelt *d, + union fieldelt *x, union fieldelt *y) +{ + Bignum prod = bigmul(x, y); + + f->ops->free(f, d); + d->n = prod; + f->ops->_reduce(f, d); +} + +static void gen_sqr(struct field *f, union fieldelt *d, union fieldelt *x) + { f->ops->mul(f->ops, d, x, x); } + +static void prime_inv(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum inv = modinv(x->n, f->q); + + f->ops->free(f, d); + d->n = inv; +} + +static int prime_sqrt(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum sqrt = modsqrt(x->n, f->q); + + if (!sqrt) return 0; + f->ops->free(f, d); + d->n = sqrt; + return 1; +} + +static void prime_dbl(struct field *f, union_fieldelt *d, union fieldelt *x) + { f->ops->add(f, d, x, x); } + +static void prime_tpl(struct field *f, union_fieldelt *d, union fieldelt *x) +{ + union_fieldelt t; + + if (d != x) { + f->ops->add(f, d, x, x); + f->ops->add(f, d, d, x); + } else { + f->ops->init(f, &t); + f->ops->copy(f, d, x); + f->ops->add(f, d, &t, &t); + f->ops->add(f, d, d, &t); + f->ops->free(f, &t); + } +} + +static void prime_qdl(struct field *f, union fieldelt *d, + union fieldelt *x) + { f->ops->add(f, d, x, x); f->ops->add(f, d, d, d); } + +static void prime_hlv(struct field *f, union fieldelt *d, union fieldelt *x) +{ + Bignum t, u; + + if (!x->n[0]) + f->ops->copy(f, d, x); + else { + /* The tedious answer is to multiply by the inverse of 2 in this + * field. But there's a better way: either x or q - x is actually + * divisible by 2, so the answer we want is either x >> 1 or p - + * ((p - x) >> 1) depending on whether x is even. + */ + if (!(x->n[1] & 1)) { + t = copybn(x->n); + shift_right(t + 1, t[0], 1); + } else { + u = bigsub(f->q, x); + shift_right(u + 1, u[0], 1); + t = bigsub(f->q, u); + freebn(u); + } + f->ops->free(f, d); + d->n = t; +} + +/* Now some utilities for modular reduction. All of the primes we're + * concerned about are of the form p = 2^n - d for `convenient' values of d + * -- such numbers are called `pseudo-Mersenne primes'. Suppose we're given + * a value x = x_0 2^n + x_1: then x - x_0 p = x_1 + x_0 d is clearly less + * than x and congruent to it mod p. So we can reduce x below 2^n by + * iterating this trick; if we're unlucky enough to have p <= x < 2^n then we + * can just subtract p. + * + * The values of d we'll be dealing with are specifically convenient because + * they satisfy the following properties. + * + * * We can write d = SUM_i d_i 2^i, with d_i in { -1, 0, +1 }. (This is + * no surprise.) + * + * * d > 0. + * + * * Very few d_i /= 0. With one small exception, if d_i /= 0 then i is + * divisible by 32. + * + * The following functions will therefore come in very handy. + */ + +static void add_imm_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + BignumDbl c = y << (shift % BIGNUM_INT_BITS); + + for (x += shift/BIGNUM_INT_BITS; c; x++) { + c += *x; + *x = c & BIGNUM_INT_MASK; + c >>= BIGNUM_INT_BITS; + } +} + +static void sub_imm_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + BignumDbl c = y << (shift % BIGNUM_INT_BITS); + BignumDbl t; + + c--; + for (x += shift/BIGNUM_INT_BITS; c; x++) { + c ^= BIGNUM_INT_MASK; + c += *x; + *x = c & BIGNUM_INT_MASK; + c >>= BIGNUM_INT_BITS; + } +} + +static void pmp_reduce(struct field *f, union fieldelt *xx, + void (*add_mul_d_lsl)(BignumInt *x, + BignumInt y, + unsigned shift)) +{ + /* Notation: write w = BIGNUM_INT_WORDS, and B = 2^w is the base we're + * working in. We assume that p = 2^n - d for some d. The job of + * add_mul_d_lsl is to add to its argument x the value y d 2^shift. + */ + + BignumInt top, *p, *x; + unsigned plen, xlen, i; + unsigned topbits, topclear; + Bignum t; + + /* Initialization: if x is shorter than p then there's nothing to do. + * Otherwise, ensure that it's at least one word longer, since this is + * required by the utility functions which add_mul_d_lsl will call. + */ + p = f->q + 1; plen = f->q[0]; + x = xx->n + 1; xlen = xx->n[0]; + if (xlen < plen) return; + else if (xlen == plen) { + t = newbn(plen + 1); + for (i = 0; i < plen; i++) t[i + 1] = x[i]; + t[plen] = 0; + freebn(xx->n); xx->n = t; + x = t + 1; xlen = plen + 1; + } + + /* Preparation: Work out the bit length of p. At the end of this, we'll + * have topbits such that n = w (plen - 1) + topbits, and topclear = w - + * topbits, so that n = w plen - topclear. + */ + while (plen > 0 && !p[plen - 1]) plen--; + top = p[plen - 1]; + assert(top); + if (top & BIGNUM_TOP_BIT) topbits = BIGNUM_INT_BITS; + else for (topbits = 0; top >> topbits; topbits++); + topclear = BIGNUM_INT_BITS - topbits; + + /* Step 1: Trim x down to the right number of words. */ + while (xlen > plen) { + + /* Pick out the topmost word; call it y for now. */ + y = x[xlen - 1]; + if (!y) { + xlen--; + continue; + } + + /* Observe that d == 2^n (mod p). Clear that top word. This + * effectively subtracts y B^{xlen-1} from x, so we must add back d + * 2^{w(xlen-1)-n} in order to preserve congruence modulo p. Since d + * is smaller than 2^n, this will reduce the absolute value of x, so + * we make (at least some) progress. In practice, we expect that d + * is a lot smaller. + * + * Note that w (xlen - 1) - n = w xlen - w - n = w (xlen - plen - 1) + * + topclear. + */ + x[xlen - 1] = 0; + add_mul_d_lsl(x + xlen - plen - 1, y, topclear); + } + + /* Step 2: Trim off the high bits of x, beyond the top bit of p. In more + * detail: if x >= 2^n > p, then write x = x_0 + 2^n y; then we can clear + * the top topclear bits of x, and add y d to preserve congruence mod p. + * + * If topclear is zero then there's nothing to do here: step 1 will + * already have arranged that x < 2^n. + */ + if (topclear) { + for (;;) { + y = x[xlen - 1] >> topbits; + if (!y) break; + x[xlen - 1] &= (1 << topbits) - 1; + add_mul_d_lsl(x, y, 0); + } + } + + /* Step 3: If x >= p then subtract p from x. */ + for (i = plen; i-- && x[i] == p[i]; ); + if (i >= plen || x[i] > p[i]) internal_sub(x, p, plen); + + /* We're done now. */ + d->n[0] = xlen; + bn_restore_invariant(d->n); +} + +/* Putting together a field-ops structure for a pseudo-Mersenne prime field. + */ + +static void pmp_destroy(struct field *f) + { freebn(f->q); sfree(f); } + +#define DEFINE_PMP_FIELD(fname) \ +static const struct field_ops fname##_fieldops { \ + pmp_destroy, \ + prime_init, prime_free, prime_copy, \ + prime_frombn, prime_tobn, \ + prime_zerop, \ + prime_add, prime_sub, prime_mul, gen_sqr, prime_inv, prime_sqrt, \ + fname_reduce, \ + gen_dbl, gen_tpl, gen_qdl, \ + prime_hlv \ +}; \ + \ +static struct field *make_##fname##_field(void) \ +{ \ + struct field *f = snew(struct field); \ + f->ops = &fname##_fieldops; \ + f->zero.n = Zero; \ + f->one.n = One; \ + f->q = bigunm_from_bytes(fname##_p, sizeof(fname##_p)); \ + return f; \ +} + +/* Some actual field definitions. */ + +static const unsigned char p256_p[] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static void p256_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + /* p_{256} = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 1 */ + add_imm_lsl(x, y, shift + 224); + sub_imm_lsl(x, y, shift + 192); + sub_imm_lsl(x, y, shift + 96); + add_imm_lsl(x, y, shift + 0); +} + +static void p256_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p256_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p256) + +static const unsigned char p384_p[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; + +static void p384_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) +{ + /* p_{384} = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 1 */ + add_imm_lsl(x, y, shift + 128); + add_imm_lsl(x, y, shift + 96); + sub_imm_lsl(x, y, shift + 32); + add_imm_lsl(x, y, shift + 0); +} + +static void p384_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p384_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p384) + +static const unsigned char p521_p[] = { + 0x01, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static void p521_add_mul_d_lsl(BignumInt *x, BignumInt y, unsigned shift) + { add_imm_lsl(x, y, shift); } /* p_{521} = 2^{521} - 1 */ + +static void p521_reduce(struct field *f, union fieldelt *d) + { pmp_reduce(f, d, p521_add_mul_d_lsl); } + +DEFINE_PMP_FIELD(p521) + +/* And now, some actual elliptic curves. + * + * As with fields, we start by defining an abstract interface which may have + * various implementations (e.g., different algorithms with different + * performance properties, or based on different kinds of underlying fields. + */ + +/* An external-form elliptic curve point. */ +struct ecpt { + unsigned f; /* Flags */ +#define ECPTF_INF 1u /* Point is at infinity */ + union fieldelt x, y; /* x- and y-coordinates */ +}; + +/* An internal-form elliptic curve point. This is where most of the action + * happens. + */ +union iecpt { + struct { + union fieldelt x, y, z; /* Projective-form triple. */ + } jac; /* Jacobian projective coords. */ +}; + +struct ec { + const struct ec_ops *ops; /* Curve operations table. */ + struct field *f; /* The underlying finite field. */ + struct iecpt p; /* The well-known generator point */ + Bignum r; /* The size of the generated group */ + union { + struct { union fieldelt a, b; } w; /* Simple Weierstrass coeffs. */ + } u; +}; + +struct ec_ops { + void (*destroy)(struct ec *ec); /* Destroy the curve itself. */ + + void (*init)(struct ec *ec, union iecpt *p); + /* Initialize p (to null). */ + + void (*free)(struct ec *ec, union iecpt *p); + /* Free the point p. */ + + void (*setinf)(struct ec *ec, union iecpt *p); + /* Set p to be the point at infinity. + */ + + int (*infp)(struct ec *ec, union iecpt *p); + /* Answer whether p is infinite. */ + + void (*copy)(struct ec *ec, union iecpt *d, union iecpt *d); + /* Make p be a copy of d. */ + + void (*in)(struct ec *ec, union iecpt *d, struct ecpt *p); + /* Convert external-format p to + * internal-format d. + */ + + void (*out)(struct ec *ec, struct ecpt *d, union iecpt *p); + /* Convert internal-format p to + * external-format d. + */ + + void (*add)(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q); + /* Store in d the sum of the points p + * and q. + */ + + void (*sub)(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q); + /* Store in d the result of + * subtracting q from p. + */ + + void (*neg)(struct ec *ec, union iecpt *d, union iecpt *p); + /* Store in d the negation of p. */ +}; + +/* And now some common utilities. */ + +static void weier_destroy(struct ec *ec) +{ + ec->f->ops(ec->f, &ec->u.w.a); + ec->f->ops(ec->f, &ec->u.w.b); + freebn(ec->r); + f->ops->destroy(ec->f); + sfree(ec); +} + +static void jac_init(struct ec *ec, union iecpt *p) +{ + ec->f->ops->init(ec->f, &p->proj.x); + ec->f->ops->init(ec->f, &p->proj.y); + ec->f->ops->init(ec->f, &p->proj.z); +} + +static void jac_free(struct ec *ec, union iecpt *p) +{ + ec->f->ops->free(ec->f, &p->proj.x); + ec->f->ops->free(ec->f, &p->proj.y); + ec->f->ops->free(ec->f, &p->proj.z); +} + +static void jac_setinf(struct ec *ec, union iecpt *d) +{ + ec->f->ops->copy(ec->f, &d->proj.x, &ec->f->one); + ec->f->ops->copy(ec->f, &d->proj.y, &ec->f->one); + ec->f->ops->copy(ec->f, &d->proj.z, &ec->f->zero); +} + +static int jac_infp(struct ec *ec, union iecpt *d) + { return ec->f->ops->zerop(ec->f, &d->proj.z); } + +static void jac_copy(struct ec *ec, union iecpt *d, union iecpt *d) +{ + ec->f->ops->copy(ec->f, &d->proj.x, &p->proj.x); + ec->f->ops->copy(ec->f, &d->proj.y, &p->proj.y); + ec->f->ops->copy(ec->f, &d->proj.z, &p->proj.z); +} + +static void jac_in(struct ec *ec, union iecpt *d, struct ecpt *p) +{ + ec->f->ops->copy(ec->f, &d->x, &p->proj.x); + ec->f->ops->copy(ec->f, &d->y, &p->proj.y); + ec->f->ops->copy(ec->f, &d->z, &ec->f->one); +} + +static void jac_out(struct ec *ec, struct ecpt *d, union iecpt *p) +{ + union fieldelt iz, iz2, iz3; + + if (ec->f->ops->zerop(ec->f, &p->proj.z)) { + ec->f->ops->free(ec->f, d->x); + ec->f->ops->free(ec->f, d->y); + d->f = ECPTF_INF; + } else { + ec->f->ops->init(ec->f, &iz2); + ec->f->ops->init(ec->f, &iz3); + + ec->f->ops->inv(ec->f, &iz3, &p->proj.z); + ec->f->ops->sqr(ec->f, &iz2, &iz3); + ec->f->ops->mul(ec->f, &iz3, &iz3, &iz2); + + ec->f->ops->mul(ec->f, &d->x, &p->proj.x, &iz2); + ec->f->ops->mul(ec->f, &d->y, &p->proj.x, &iz3); + d->f = 0; + + ec->f->ops->free(ec->f, &iz2); + ec->f->ops->free(ec->f, &iz3); + } +} + +static void gen_sub(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q) +{ + union iecpt t; + + ec->ops->init(ec, &t); + ec->ops->neg(ec, &t, q); + ec->ops->add(ec, d, p, &t); + ec->ops->free(ec, &t); +} + +/* Finally, let's define some actual curve arithmetic functions. */ + +static void pwjac_dbl(struct ec *ec, union iecpt *d, union iecpt *p) +{ + struct field *f = ec->f; + union fieldelt m, s, t, u; + + if (ec->ops->infp(ec, p)) { + ec->setinf(ec, d); + return; + } + + f->ops->init(f, &m); + f->ops->init(f, &s); + f->ops->init(f, &t); + f->ops->init(f, &u); + + f->ops->sqr(f, &u, &p->proj.z); /* z^2 */ + f->ops->sqr(f, &t, &u); /* z^4 */ + f->ops->mul(f, &u, &t, &ec->u.w.a); /* a z^4 */ + f->ops->sqr(f, &m, &p->proj.x); /* x^2 */ + f->ops->tpl(f, &m, &m); /* 3 x^2 */ + f->ops->add(f, &m, &m, &u); /* m = 3 x^2 + a z^4 */ + + f->ops->dbl(f, &t, &p->proj.y); /* 2 y */ + f->ops->mul(f, &d->proj.z, &t, &p->proj.z); /* z' = 2 y z */ + + f->ops->sqr(f, &u, &t); /* 4 y^2 */ + f->ops->mul(f, &s, &u, &p->proj.x); /* s = 4 x y^2 */ + f->ops->sqr(f, &t, &u); /* 16 y^4 */ + f->ops->hlv(f, &t, &t); /* t = 8 y^4 */ + + f->ops->dbl(f, &u, &s); /* 2 s */ + f->ops->sqr(f, &d->proj.x, &m); /* m^2 */ + f->ops->sub(f, &d->proj.x, &d->proj.x, &u); /* x' = m^2 - 2 s */ + + f->ops->sub(f, &u, &s, &d->proj.x); /* s - x' */ + f->ops->mul(f, &d->proj.y, &m, &u); /* m (s - x') */ + f->ops->sub(f, &d->proj.y, &d->proj.y, &t); /* y' = m (s - x') - t */ + + f->ops->free(f, &m); + f->ops->free(f, &s); + f->ops->free(f, &t); + f->ops->free(f, &u); +} + +static void pwjac_add(struct ec *ec, union iecpt *d, + union iecpt *p, union iecpt *q) +{ + struct field *f = ec->f; + union fieldelt m, r, s, ss, t, u, uu, w; + + if (a == b) { pwjac_dbl(ec, d, p); return; } + else if (ec->ops->infp(ec, p)) { ec->ops->copy(ec, d, q); return; } + else if (ec->ops->infp(ec, q)) { ec->ops->copy(ec, d, p); return; } + + f->ops->init(f, &m); + f->ops->init(f, &r); + f->ops->init(f, &s); + f->ops->init(f, &ss); + f->ops->init(f, &t); + f->ops->init(f, &u); + f->ops->init(f, &uu); + f->ops->init(f, &w); + + f->ops->sqr(f, &s, &p->proj.z); /* z_0^2 */ + f->ops->mul(f, &u, &s, &q->proj.x); /* u = x_1 z_0^2 */ + f->ops->mul(f, &s, &s, &p->proj.z); /* z_0^3 */ + f->ops->mul(f, &s, &s, &q->proj.y); /* s = y_1 z_0^3 */ + + f->ops->sqr(f, &ss, &q->proj.z); /* z_1^2 */ + f->ops->mul(f, &uu, &ss, &p->proj.x); /* uu = x_0 z_1^2 */ + f->ops->mul(f, &ss, &ss, &q->proj.z); /* z_1^3 */ + f->ops->mul(f, &ss, &ss, &p->proj.y); /* ss = y_0 z_1^3 */ + + f->ops->sub(f, &w, &uu, &u); /* w = uu - u */ + f->ops->sub(f, &r, &ss, &s); /* r = ss - s */ + if (f->ops->zerop(f, w)) { + if (f->ops->zerop(f, r)) ec->ops->dbl(ec, d, p); + else ec->ops->setinf(ec, d); + goto cleanup; + } + + f->ops->add(f, &t, &u, &uu); /* t = uu + u */ + f->ops->add(f, &m, &s, &ss); /* m = ss + s */ + + f->ops->mul(f, &uu, &p->proj.z, &w); /* z_0 w */ + f->ops->mul(f, &d->proj.z, &uu, &q->proj.z); /* z' = z_0 z_1 w */ + + f->ops->sqr(f, &s, &w); /* w^2 */ + f->ops->mul(f, &u, &s, &t); /* t w^2 */ + f->ops->mul(f, &s, &s, &w); /* w^3 */ + f->ops->mul(f, &s, &s, &m); /* m w^3 */ + + f->ops->sqr(f, &m, &r); /* r^2 */ + f->ops->sub(f, &d->proj.x, &m, &u); /* x' = r^2 - t w^2 */ + + f->ops->dbl(f, &m, &d->proj.x); /* 2 x' */ + f->ops->sub(f, &u, &u, &m); /* v = t w^2 - 2 x' */ + f->ops->mul(f, &u, &u, &r); /* v r */ + f->ops->sub(f, &u, &u, &s); /* v r - m w^3 */ + f->ops->hlv(f, &d->proj.y, &m); /* y' = (v r - m w^3)/2 */ + +cleanup: + f->ops->free(f, &m); + f->ops->free(f, &r); + f->ops->free(f, &s); + f->ops->free(f, &ss); + f->ops->free(f, &t); + f->ops->free(f, &u); + f->ops->free(f, &uu); + f->ops->free(f, &w); +} + diff --git a/unix/hacks.am b/unix/hacks.am new file mode 100644 index 00000000..57796305 --- /dev/null +++ b/unix/hacks.am @@ -0,0 +1,4 @@ +noinst_PROGRAMS = testbn +testbn_SOURCES = ../sshbn.c ../misc.c ../conf.c ../tree234.c +testbn_SOURCES += ../unix/uxmisc.c +testbn_CFLAGS = -DTESTBN