mdw@git.distorted.org.uk Git - secnet/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* Simple scalar fields
	4	*
	5	* (c) 2017 Straylight/Edgeware
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of secnet.
	11	* See README for full list of copyright holders.
	12	*
	13	* secnet is free software; you can redistribute it and/or modify it
	14	* under the terms of the GNU General Public License as published by
	15	* the Free Software Foundation; either version d of the License, or
	16	* (at your option) any later version.
	17	*
	18	* secnet is distributed in the hope that it will be useful, but
	19	* WITHOUT ANY WARRANTY; without even the implied warranty of
	20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	21	* General Public License for more details.
	22	*
	23	* You should have received a copy of the GNU General Public License
	24	* version 3 along with secnet; if not, see
	25	* https://www.gnu.org/licenses/gpl.html.
	26	*
	27	* This file was originally part of Catacomb, but has been automatically
	28	* modified for incorporation into secnet: see `import-catacomb-crypto'
	29	* for details.
	30	*
	31	* Catacomb is free software; you can redistribute it and/or modify
	32	* it under the terms of the GNU Library General Public License as
	33	* published by the Free Software Foundation; either version 2 of the
	34	* License, or (at your option) any later version.
	35	*
	36	* Catacomb is distributed in the hope that it will be useful,
	37	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	38	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	39	* GNU Library General Public License for more details.
	40	*
	41	* You should have received a copy of the GNU Library General Public
	42	* License along with Catacomb; if not, write to the Free
	43	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	44	* MA 02111-1307, USA.
	45	*/
	46
	47	/----- Header files ------------------------------------------------------/
	48
	49	#include <string.h>
	50
	51	#include "scaf.h"
	52
	53	/----- Debugging utilties ------------------------------------------------/
	54
	55	#ifdef SCAF_DEBUG
	56
	57	#include <stdio.h>
	58
	59	#include "mp.h"
	60	#include "mpint.h"
	61	#include "mptext.h"
	62
	63	static void scaf_dump(const char what, const scaf_piece x,
	64	size_t npiece, size_t piecewd)
	65	{
	66	mp y = MP_ZERO, t = MP_NEW;
	67	size_t i;
	68	unsigned o = 0;
	69
	70	for (i = 0; i < npiece; i++) {
	71	t = mp_fromuint64(t, x[i]);
	72	t = mp_lsl(t, t, o);
	73	y = mp_add(y, y, t);
	74	o += piecewd;
	75	}
	76	printf(";; %s", what); MP_PRINT("", y); putchar('\n');
	77	mp_drop(y); mp_drop(t);
	78	}
	79
	80	static void scaf_dumpdbl(const char what, const scaf_dblpiece x,
	81	size_t npiece, size_t piecewd)
	82	{
	83	mp y = MP_ZERO, t = MP_NEW;
	84	size_t i;
	85	unsigned o = 0;
	86
	87	for (i = 0; i < npiece; i++) {
	88	t = mp_fromuint64(t, x[i]);
	89	t = mp_lsl(t, t, o);
	90	y = mp_add(y, y, t);
	91	o += piecewd;
	92	}
	93	printf(";; %s", what); MP_PRINT("", y); putchar('\n');
	94	mp_drop(y); mp_drop(t);
	95	}
	96
	97	#endif
	98
	99	/----- Main code ---------------------------------------------------------/
	100
	101	/* --- @scaf_load@ --- *
	102	*
	103	* Arguments: @scaf_piece *z@ = where to write the result
	104	* @const octet *b@ = source buffer to read
	105	* @size_t sz@ = size of the source buffer
	106	* @size_t npiece@ = number of pieces to read
	107	* @unsigned piecewd@ = nominal width of pieces in bits
	108	*
	109	* Returns: ---
	110	*
	111	* Use: Loads a little-endian encoded scalar into a vector @z@ of
	112	* single-precision pieces.
	113	*/
	114
	115	void scaf_load(scaf_piece z, const octet b, size_t sz,
	116	size_t npiece, unsigned piecewd)
	117	{
	118	uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
	119	unsigned i, j, n;
	120
	121	for (i = j = n = 0, a = 0; i < sz; i++) {
	122	a \|= b[i] << n; n += 8;
	123	if (n >= piecewd) {
	124	z[j++] = a&m; a >>= piecewd; n -= piecewd;
	125	if (j >= npiece) return;
	126	}
	127	}
	128	z[j++] = a;
	129	while (j < npiece) z[j++] = 0;
	130	}
	131
	132	/* --- @scaf_loaddbl@ --- *
	133	*
	134	* Arguments: @scaf_dblpiece *z@ = where to write the result
	135	* @const octet *b@ = source buffer to read
	136	* @size_t sz@ = size of the source buffer
	137	* @size_t npiece@ = number of pieces to read
	138	* @unsigned piecewd@ = nominal width of pieces in bits
	139	*
	140	* Returns: ---
	141	*
	142	* Use: Loads a little-endian encoded scalar into a vector @z@ of
	143	* double-precision pieces.
	144	*/
	145
	146	void scaf_loaddbl(scaf_dblpiece z, const octet b, size_t sz,
	147	size_t npiece, unsigned piecewd)
	148	{
	149	uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
	150	unsigned i, j, n;
	151
	152	for (i = j = n = 0, a = 0; i < sz; i++) {
	153	a \|= b[i] << n; n += 8;
	154	if (n >= piecewd) {
	155	z[j++] = a&m; a >>= piecewd; n -= piecewd;
	156	if (j >= npiece) return;
	157	}
	158	}
	159	z[j++] = a;
	160	while (j < npiece) z[j++] = 0;
	161	}
	162
	163	/* --- @scaf_store@ --- *
	164	*
	165	* Arguments: @octet *b@ = buffer to fill in
	166	* @size_t sz@ = size of the buffer
	167	* @const scaf_piece *x@ = scalar to store
	168	* @size_t npiece@ = number of pieces in @x@
	169	* @unsigned piecewd@ = nominal width of pieces in bits
	170	*
	171	* Returns: ---
	172	*
	173	* Use: Stores a scalar in a vector of single-precison pieces as a
	174	* little-endian vector of bytes.
	175	*/
	176
	177	void scaf_store(octet b, size_t sz, const scaf_piece x,
	178	size_t npiece, unsigned piecewd)
	179	{
	180	uint32 a;
	181	unsigned i, j, n;
	182
	183	for (i = j = n = 0, a = 0; i < npiece; i++) {
	184	a \|= x[i] << n; n += piecewd;
	185	while (n >= 8) {
	186	b[j++] = a&0xffu; a >>= 8; n -= 8;
	187	if (j >= sz) return;
	188	}
	189	}
	190	b[j++] = a;
	191	memset(b + j, 0, sz - j);
	192	}
	193
	194	/* --- @scaf_mul@ --- *
	195	*
	196	* Arguments: @scaf_dblpiece *z@ = where to put the answer
	197	* @const scaf_piece x, y@ = the operands
	198	* @size_t npiece@ = the length of the operands
	199	*
	200	* Returns: ---
	201	*
	202	* Use: Multiply two scalars. The destination must have space for
	203	* @2*npiece@ pieces (though the last one will always be zero).
	204	* The result is not reduced.
	205	*/
	206
	207	void scaf_mul(scaf_dblpiece z, const scaf_piece x, const scaf_piece *y,
	208	size_t npiece)
	209	{
	210	unsigned i, j;
	211
	212	for (i = 0; i < 2*npiece; i++) z[i] = 0;
	213
	214	for (i = 0; i < npiece; i++)
	215	for (j = 0; j < npiece; j++)
	216	z[i + j] += (scaf_dblpiece)x[i]*y[j];
	217	}
	218
	219	/* --- @scaf_reduce@ --- *
	220	*
	221	* Arguments: @scaf_piece *z@ = where to write the result
	222	* @const scaf_dblpiece *x@ = the operand to reduce
	223	* @const scaf_piece *l@ = the modulus, in internal format
	224	* @const scaf_piece *mu@ = scaled approximation to @1/l@
	225	* @size_t npiece@ = number of pieces in @l@
	226	* @unsigned piecewd@ = nominal width of a piece in bits
	227	* @scaf_piece scratch@ = @3npiece@ scratch pieces
	228	*
	229	* Returns: ---
	230	*
	231	* Use: Reduce @x@ (a vector of @2*npiece@ double-precision pieces)
	232	* modulo @l@ (a vector of @npiece@ single-precision pieces),
	233	* writing the result to @z@.
	234	*
	235	* Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%. The
	236	* operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in
	237	* @npiece + 1@ pieces. Furthermore, we must have
	238	* %$3 l < B^n$%. (Fiddle with %$w$% if necessary.)
	239	*/
	240
	241	void scaf_reduce(scaf_piece z, const scaf_dblpiece x,
	242	const scaf_piece l, const scaf_piece mu,
	243	size_t npiece, unsigned piecewd, scaf_piece *scratch)
	244	{
	245	unsigned i, j;
	246	scaf_piece t = scratch, q = scratch + 2*npiece;
	247	scaf_piece u, m = ((scaf_piece)1 << piecewd) - 1;
	248	scaf_dblpiece c;
	249
	250	/* This here is the hard part.
	251	*
	252	* Let w = PIECEWD, let n = NPIECE, and let B = 2^w. We must have
	253	* B^(n-1) <= l < B^n.
	254	*
	255	* The argument MU contains pieces of the quantity µ = floor(B^2n/l), which
	256	* is a scaled approximation to 1/l. We'll calculate
	257	*
	258	* q = floor(µ floor(x/B^(n-1))/B^(n+1))
	259	*
	260	* which is an underestimate of x/l.
	261	*
	262	* With a bit more precision: by definition, u - 1 < floor(u) <= u. Hence,
	263	*
	264	* B^2n/l - 1 < µ <= B^2/l
	265	*
	266	* and
	267	*
	268	* x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1)
	269	*
	270	* Multiplying these together, and dividing through by B^(n+1), gives
	271	*
	272	* floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <=
	273	* q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l)
	274	*
	275	* Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and
	276	* B^(n-1)/l are each less than 1; hence
	277	*
	278	* floor(x/l) - 2 <= q <= floor(x/l) <= x/l
	279	*
	280	* Now we set r = x - q l. Certainly, r == x (mod l); and
	281	*
	282	* 0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n
	283	*/
	284
	285	/* Before we start on the fancy stuff, we need to resolve the pending
	286	* carries in x. We'll be doing the floor division by just ignoring some
	287	* of the pieces, and it would be bad if we missed some significant bits.
	288	* Of course, this means that we don't actually have to store the low
	289	* NPIECE - 1 pieces of the result.
	290	*/
	291	for (i = 0, c = 0; i < 2*npiece; i++)
	292	{ c += x[i]; t[i] = c&m; c >>= piecewd; }
	293
	294	/* Now we calculate q. If we calculate this in product-scanning order, we
	295	* can avoid having to store the low NPIECE + 1 pieces of the product as
	296	* long as we keep track of the carry out properly. Conveniently, NMU =
	297	* NPIECE + 1, which keeps the loop bounds easy in the first pass.
	298	*
	299	* Furthermore, because we know that r fits in NPIECE pieces, we only need
	300	* the low NPIECE pieces of q.
	301	*/
	302	for (i = 0, c = 0; i < npiece + 1; i++) {
	303	for (j = 0; j <= i; j++)
	304	c += (scaf_dblpiece)t[j + npiece - 1]*mu[i - j];
	305	c >>= piecewd;
	306	}
	307	for (i = 0; i < npiece; i++) {
	308	for (j = i + 1; j < npiece + 1; j++)
	309	c += (scaf_dblpiece)t[j + npiece - 1]*mu[npiece + 1 + i - j];
	310	q[i] = c&m; c >>= piecewd;
	311	}
	312
	313	/* Next, we calculate r - q l in z. Product-scanning seems to be working
	314	* out for us, and this time it will save us needing a large temporary
	315	* space for the product q l as we go. On the downside, we have to track
	316	* the carries from the multiplication and subtraction separately.
	317	*
	318	* Notice that the result r is at most NPIECE pieces long, so we can stop
	319	* once we have that many.
	320	*/
	321	u = 1; c = 0;
	322	for (i = 0; i < npiece; i++) {
	323	for (j = 0; j <= i; j++) c += (scaf_dblpiece)q[j]*l[i - j];
	324	u += t[i] + ((scaf_piece)(c&m) ^ m);
	325	z[i] = u&m; u >>= piecewd; c >>= piecewd;
	326	}
	327
	328	/* Finally, two passes of conditional subtraction. Calculate t = z - l; if
	329	* there's no borrow out the top, then update z = t; otherwise leave t
	330	* alone.
	331	*/
	332	for (i = 0; i < 2; i++) {
	333	for (j = 0, u = 1; j < npiece; j++) {
	334	u += z[j] + (l[j] ^ m);
	335	t[j] = u&m; u >>= piecewd;
	336	}
	337	for (j = 0, u = -u; j < npiece; j++) z[j] = (t[j]&u) \| (z[j]&~u);
	338	}
	339	}
	340
	341	/----- That's all, folks -------------------------------------------------/