mdw@git.distorted.org.uk Git - catacomb/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* Efficient reduction modulo sparse binary polynomials
	4	*
	5	* (c) 2004 Straylight/Edgeware
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of Catacomb.
	11	*
	12	* Catacomb is free software; you can redistribute it and/or modify
	13	* it under the terms of the GNU Library General Public License as
	14	* published by the Free Software Foundation; either version 2 of the
	15	* License, or (at your option) any later version.
	16	*
	17	* Catacomb is distributed in the hope that it will be useful,
	18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	* GNU Library General Public License for more details.
	21	*
	22	* You should have received a copy of the GNU Library General Public
	23	* License along with Catacomb; if not, write to the Free
	24	* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	25	* MA 02111-1307, USA.
	26	*/
	27
	28	/----- Header files ------------------------------------------------------/
	29
	30	#include <mLib/alloc.h>
	31	#include <mLib/darray.h>
	32	#include <mLib/macros.h>
	33
	34	#include "gf.h"
	35	#include "gfreduce.h"
	36	#include "gfreduce-exp.h"
	37	#include "fibrand.h"
	38	#include "mprand.h"
	39
	40	/----- Data structures ---------------------------------------------------/
	41
	42	DA_DECL(instr_v, gfreduce_instr);
	43
	44	/----- Main code ---------------------------------------------------------/
	45
	46	/* --- What's going on here? --- *
	47	*
	48	* Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any
	49	* sense fast. Here, we do efficient reduction modulo sparse polynomials.
	50	* (It works for arbitrary polynomials, but isn't efficient for dense ones.)
	51	*
	52	* Suppose that %$p = x^n + p'$% where %$p' = \sum_{0\le i<n} p_i x^i$%,
	53	* hopefully with only a few %$p_i \ne 0$%. We're going to compile %$p$%
	54	* into a sequence of instructions which can be used to perform reduction
	55	* modulo %$p$%. The important observation is that
	56	* %$x^n \equiv p' \pmod p$%.
	57	*
	58	* Suppose we're working with %$w$%-bit words; let %$n = N w + n'$% with
	59	* %$0 \le n' < w$%. Let %$u(x)$% be some arbitrary polynomial. Write
	60	* %$u = z x^k + u'$% with %$\deg u' < k \ge n$%. Then a reduction step uses
	61	* that %$u \equiv u' + z p' x^{k-n} \pmod p$%: the right hand side has
	62	* degree %$\max \{ \deg u', k + \deg p' - n + \deg z \} < \deg u$%, so this
	63	* makes progress towards a complete reduction.
	64	*
	65	* The compiled instruction sequence computes
	66	* %$u' + z p' x^{k-n} = u' + \sum_{0\le i<n} z x^{k-n+i}$%.
	67	*/
	68
	69	/* --- @gfreduce_create@ --- *
	70	*
	71	* Arguments: @gfreduce *r@ = structure to fill in
	72	* @mp *x@ = a (hopefully sparse) polynomial
	73	*
	74	* Returns: ---
	75	*
	76	* Use: Initializes a context structure for reduction.
	77	*/
	78
	79	struct gen {
	80	unsigned f; /* Flags */
	81	#define f_lsr 1u /* Overflow from previous word */
	82	#define f_load 2u /* Outstanding @LOAD@ */
	83	#define f_fip 4u /* Final-pass offset is set */
	84	instr_v iv; /* Instruction vector */
	85	size_t fip; /* Offset for final-pass reduction */
	86	size_t w; /* Currently loaded target word */
	87	size_t wi; /* Left-shifts for current word */
	88	gfreduce r; / Reduction context pointer */
	89	};
	90
	91	#define INSTR(g_, op_, arg_) do { \
	92	struct gen *_g = (g_); \
	93	instr_v *_iv = &_g->iv; \
	94	size_t _i = DA_LEN(_iv); \
	95	\
	96	DA_ENSURE(_iv, 1); \
	97	DA(_iv)[_i].op = (op_); \
	98	DA(_iv)[_i].arg = (arg_); \
	99	DA_EXTEND(_iv, 1); \
	100	} while (0)
	101
	102	static void emit_load(struct gen *g, size_t w)
	103	{
	104	/* --- If this is not the low-order word then note final-pass start --- *
	105	*
	106	* Once we've eliminated the whole high-degree words, there will possibly
	107	* remain a few high-degree bits. We can further reduce the subject
	108	* polynomial by subtracting an appropriate multiple of %$p'$%, but if we
	109	* do this naively we'll end up addressing `low-order' words beyond the
	110	* bottom of our input. We solve this problem by storing an alternative
	111	* start position for this final pass (which works because we scan bits
	112	* right-to-left).
	113	*/
	114
	115	if (!(g->f & f_fip) && w < g->r->lim) {
	116	g->fip = DA_LEN(&g->iv);
	117	g->f \|= f_fip;
	118	}
	119
	120	/* --- Actually emit the instruction --- */
	121
	122	INSTR(g, GFRI_LOAD, w);
	123	g->f \|= f_load;
	124	g->w = w;
	125	}
	126
	127	static void emit_right_shifts(struct gen *g)
	128	{
	129	gfreduce_instr *ip;
	130	size_t i, wl;
	131
	132	/* --- Close off the current word --- *
	133	*
	134	* If we shifted into this current word with a nonzero bit offset, then
	135	* we'll also need to arrange to perform a sequence of right shifts into
	136	* the following word, which we might as well do by scanning the
	137	* instruction sequence (which starts at @wi@).
	138	*
	139	* Either way, we leave a @LOAD@ unmatched if there was one before, in the
	140	* hope that callers have an easier time; @g->w@ is updated to reflect the
	141	* currently open word.
	142	*/
	143
	144	if (!(g->f & f_lsr))
	145	return;
	146
	147	wl = DA_LEN(&g->iv);
	148	INSTR(g, GFRI_STORE, g->w);
	149	emit_load(g, g->w - 1);
	150	for (i = g->wi; i < wl; i++) {
	151	ip = &DA(&g->iv)[i];
	152	assert(ip->op == GFRI_LSL);
	153	if (ip->arg)
	154	INSTR(g, GFRI_LSR, MPW_BITS - ip->arg);
	155	}
	156	g->f &= ~f_lsr;
	157	}
	158
	159	static void ensure_loaded(struct gen *g, size_t w)
	160	{
	161	if (!(g->f & f_load)) {
	162	emit_load(g, w);
	163	g->wi = DA_LEN(&g->iv);
	164	} else if (w != g->w) {
	165	emit_right_shifts(g);
	166	if (w != g->w) {
	167	INSTR(g, GFRI_STORE, g->w);
	168	emit_load(g, w);
	169	}
	170	g->wi = DA_LEN(&g->iv);
	171	}
	172	}
	173
	174	void gfreduce_create(gfreduce r, mp p)
	175	{
	176	struct gen g = { 0, DA_INIT };
	177	unsigned long d;
	178	unsigned dw;
	179	mpscan sc;
	180	unsigned long i;
	181	size_t w, bb;
	182
	183	/* --- Sort out the easy stuff --- */
	184
	185	g.r = r;
	186	d = mp_bits(p); assert(d); d--;
	187	r->lim = d/MPW_BITS;
	188	dw = d%MPW_BITS;
	189	if (!dw)
	190	r->mask = 0;
	191	else {
	192	r->mask = MPW(((mpw)-1) << dw);
	193	r->lim++;
	194	}
	195	r->p = mp_copy(p);
	196
	197	/* --- How this works --- *
	198	*
	199	* The instruction sequence is run with two ambient parameters: a pointer
	200	* (usually) just past the most significant word of the polynomial to be
	201	* reduced; and a word %$z$% which is the multiple of %$p'$% we are meant
	202	* to add.
	203	*
	204	* The sequence visits each word of the polynomial at most once. Suppose
	205	* %$u = z x^{w N} + u'$%; our pointer points just past the end of %$u'$%.
	206	* Word %$I$% of %$u'$% will be affected by modulus bits %$p_i$% where
	207	* %$(N - I - 1) w + 1 \le i \le (N - I + 1) w - 1$%, so %$p_i$% affects
	208	* word %$I = \lceil (n - i + 1)/w \rceil$% and (if %$i$% is not a multiple
	209	* of %$w$%) also word %$I - 1$%.
	210	*
	211	* We have four instructions: @LOAD@ reads a specified word of %$u$% into an
	212	* accumulator, and @STORE@ stores it back (we'll always store back to the
	213	* same word we most recently read, but this isn't a requirement); and
	214	* @LSL@ and @LSR@, which XOR in appropriately shifted copies of %$z$% into
	215	* the accumulator. So a typical program will contain sequences of @LSR@
	216	* and @LSL@ instructions sandwiched between @LOAD@/@STORE@ pairs.
	217	*
	218	* We do a single right-to-left pass across %$p$%.
	219	*/
	220
	221	bb = MPW_BITS - dw;
	222
	223	for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) {
	224	if (!mp_bit(&sc))
	225	continue;
	226
	227	/* --- We've found a set bit, so work out which word it affects --- *
	228	*
	229	* In general, a bit affects two words: it needs to be shifted left into
	230	* one, and shifted right into the next. We find the former here.
	231	*/
	232
	233	w = (d - i + MPW_BITS - 1)/MPW_BITS;
	234
	235	/* --- Concentrate on the appropriate word --- */
	236
	237	ensure_loaded(&g, w);
	238
	239	/* --- Accumulate a new @LSL@ instruction --- *
	240	*
	241	* If this was a nonzero shift, then we'll need to arrange to do right
	242	* shifts into the following word.
	243	*/
	244
	245	INSTR(&g, GFRI_LSL, (bb + i)%MPW_BITS);
	246	if ((bb + i)%MPW_BITS)
	247	g.f \|= f_lsr;
	248	}
	249
	250	/* --- Wrapping up --- *
	251	*
	252	* We probably need a final @STORE@, and maybe a sequence of right shifts.
	253	*/
	254
	255	if (g.f & f_load) {
	256	emit_right_shifts(&g);
	257	INSTR(&g, GFRI_STORE, g.w);
	258	}
	259
	260	/* --- Copy the instruction vector.
	261	*
	262	* If we've not set a final-pass offset yet then now would be an excellent
	263	* time. Obviously it should be right at the end, because there's nothing
	264	* for a final pass to do.
	265	*/
	266
	267	r->in = DA_LEN(&g.iv);
	268	r->iv = xmalloc(r->in * sizeof(gfreduce_instr));
	269	memcpy(r->iv, DA(&g.iv), r->in * sizeof(gfreduce_instr));
	270
	271	if (!(g.f & f_fip)) g.fip = DA_LEN(&g.iv);
	272	r->fiv = r->iv + g.fip;
	273
	274	DA_DESTROY(&g.iv);
	275	}
	276
	277	#undef INSTR
	278
	279	#undef f_lsr
	280	#undef f_load
	281	#undef f_fip
	282
	283	/* --- @gfreduce_destroy@ --- *
	284	*
	285	* Arguments: @gfreduce *r@ = structure to free
	286	*
	287	* Returns: ---
	288	*
	289	* Use: Reclaims the resources from a reduction context.
	290	*/
	291
	292	void gfreduce_destroy(gfreduce *r)
	293	{
	294	mp_drop(r->p);
	295	xfree(r->iv);
	296	}
	297
	298	/* --- @gfreduce_dump@ --- *
	299	*
	300	* Arguments: @const gfreduce *r@ = structure to dump
	301	* @FILE *fp@ = file to dump on
	302	*
	303	* Returns: ---
	304	*
	305	* Use: Dumps a reduction context.
	306	*/
	307
	308	void gfreduce_dump(const gfreduce r, FILE fp)
	309	{
	310	size_t i;
	311
	312	fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16);
	313	fprintf(fp, "\n lim = %lu; mask = %lx\n",
	314	(unsigned long)r->lim, (unsigned long)r->mask);
	315	for (i = 0; i < r->in; i++) {
	316	static const char *opname[] = { "load", "lsl", "lsr", "store" };
	317	if (&r->iv[i] == r->fiv)
	318	fputs("final:\n", fp);
	319	assert(r->iv[i].op < N(opname));
	320	fprintf(fp, " %s %lu\n",
	321	opname[r->iv[i].op],
	322	(unsigned long)r->iv[i].arg);
	323	}
	324	if (&r->iv[i] == r->fiv)
	325	fputs("final:\n", fp);
	326	}
	327
	328	/* --- @gfreduce_do@ --- *
	329	*
	330	* Arguments: @const gfreduce *r@ = reduction context
	331	* @mp *d@ = destination
	332	* @mp *x@ = source
	333	*
	334	* Returns: Destination, @x@ reduced modulo the reduction poly.
	335	*/
	336
	337	static void run(const gfreduce_instr i, const gfreduce_instr il,
	338	mpw *v, mpw z)
	339	{
	340	mpw w = 0;
	341
	342	for (; i < il; i++) {
	343	switch (i->op) {
	344	case GFRI_LOAD: w = *(v - i->arg); break;
	345	case GFRI_LSL: w ^= z << i->arg; break;
	346	case GFRI_LSR: w ^= z >> i->arg; break;
	347	case GFRI_STORE: *(v - i->arg) = MPW(w); break;
	348	default: abort();
	349	}
	350	}
	351	}
	352
	353	mp gfreduce_do(const gfreduce r, mp d, mp x)
	354	{
	355	mpw v, vl;
	356	const gfreduce_instr *il;
	357	mpw z;
	358
	359	/* --- Try to reuse the source's space --- */
	360
	361	MP_COPY(x);
	362	if (d) MP_DROP(d);
	363	MP_DEST(x, MP_LEN(x), x->f);
	364
	365	/* --- Do the reduction --- */
	366
	367	il = r->iv + r->in;
	368	if (MP_LEN(x) >= r->lim) {
	369	v = x->v + r->lim;
	370	vl = x->vl;
	371	while (vl-- > v) {
	372	while (*vl) {
	373	z = *vl;
	374	*vl = 0;
	375	run(r->iv, il, vl, z);
	376	}
	377	}
	378	if (r->mask) {
	379	while (*vl & r->mask) {
	380	z = *vl & r->mask;
	381	*vl &= ~r->mask;
	382	run(r->fiv, il, vl, z);
	383	}
	384	}
	385	}
	386
	387	/* --- Done --- */
	388
	389	MP_SHRINK(x);
	390	return (x);
	391	}
	392
	393	/* --- @gfreduce_sqrt@ --- *
	394	*
	395	* Arguments: @const gfreduce *r@ = pointer to reduction context
	396	* @mp *d@ = destination
	397	* @mp *x@ = some polynomial
	398	*
	399	* Returns: The square root of @x@ modulo @r->p@, or null.
	400	*/
	401
	402	mp gfreduce_sqrt(const gfreduce r, mp d, mp x)
	403	{
	404	mp *y = MP_COPY(x);
	405	mp z, spare = MP_NEW;
	406	unsigned long m = mp_bits(r->p) - 1;
	407	unsigned long i;
	408
	409	/* --- This is pretty easy --- *
	410	*
	411	* Note that %$x = x^{2^m}$%; therefore %$(x^{2^{m-1}})^2 = x^{2^m} = x$%,
	412	* so %$x^{2^{m-1}}$% is the square root we seek.
	413	*/
	414
	415	for (i = 0; i < m - 1; i++) {
	416	mp *t = gf_sqr(spare, y);
	417	spare = y;
	418	y = gfreduce_do(r, t, t);
	419	}
	420	z = gf_sqr(spare, y);
	421	z = gfreduce_do(r, z, z);
	422	if (!MP_EQ(x, z)) {
	423	mp_drop(y);
	424	y = 0;
	425	}
	426	mp_drop(z);
	427	mp_drop(d);
	428	return (y);
	429	}
	430
	431	/* --- @gfreduce_trace@ --- *
	432	*
	433	* Arguments: @const gfreduce *r@ = pointer to reduction context
	434	* @mp *x@ = some polynomial
	435	*
	436	* Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$%
	437	* if %$x \in \gf{2^m}$%). Since the trace is invariant under
	438	* the Frobenius automorphism (i.e., %$\Tr(x)^2 = \Tr(x)$%), it
	439	* must be an element of the base field, i.e., %$\gf{2}$%, and
	440	* we only need a single bit to represent it.
	441	*/
	442
	443	int gfreduce_trace(const gfreduce r, mp x)
	444	{
	445	mp *y = MP_COPY(x);
	446	mp *spare = MP_NEW;
	447	unsigned long m = mp_bits(r->p) - 1;
	448	unsigned long i;
	449	int rc;
	450
	451	for (i = 0; i < m - 1; i++) {
	452	mp *t = gf_sqr(spare, y);
	453	spare = y;
	454	y = gfreduce_do(r, t, t);
	455	y = gf_add(y, y, x);
	456	}
	457	rc = !MP_ZEROP(y);
	458	mp_drop(spare);
	459	mp_drop(y);
	460	return (rc);
	461	}
	462
	463	/* --- @gfreduce_halftrace@ --- *
	464	*
	465	* Arguments: @const gfreduce *r@ = pointer to reduction context
	466	* @mp *d@ = destination
	467	* @mp *x@ = some polynomial
	468	*
	469	* Returns: The half-trace of @x@.
	470	* (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$%
	471	* if %$x \in \gf{2^m}$% with %$m$% odd).
	472	*/
	473
	474	mp gfreduce_halftrace(const gfreduce r, mp d, mp x)
	475	{
	476	mp *y = MP_COPY(x);
	477	mp *spare = MP_NEW;
	478	unsigned long m = mp_bits(r->p) - 1;
	479	unsigned long i;
	480
	481	mp_drop(d);
	482	for (i = 0; i < m - 1; i += 2) {
	483	mp *t = gf_sqr(spare, y);
	484	spare = y;
	485	y = gfreduce_do(r, t, t);
	486	t = gf_sqr(spare, y);
	487	spare = y;
	488	y = gfreduce_do(r, t, t);
	489	y = gf_add(y, y, x);
	490	}
	491	mp_drop(spare);
	492	return (y);
	493	}
	494
	495	/* --- @gfreduce_quadsolve@ --- *
	496	*
	497	* Arguments: @const gfreduce *r@ = pointer to reduction context
	498	* @mp *d@ = destination
	499	* @mp *x@ = some polynomial
	500	*
	501	* Returns: A polynomial @z@ such that %$z^2 + z = x$%, or null.
	502	*
	503	* Use: Solves quadratic equations in a field with characteristic 2.
	504	* Suppose we have an equation %$y^2 + A y + B = 0$% where
	505	* %$A \ne 0$%. (If %$A = 0$% then %$y = \sqrt{B}$% and you
	506	* want @gfreduce_sqrt@ instead.) Use this function to solve
	507	* %$z^2 + z = B/A^2$%; then set %$y = A z$%, since
	508	* %$y^2 + y = A^2 z^2 + A^2 z = A^2 (z^2 + z) = B$% as
	509	* required.
	510	*
	511	* The two roots are %$z$% and %$z + 1$%; this function always
	512	* returns the one with zero scalar coefficient.
	513	*/
	514
	515	mp gfreduce_quadsolve(const gfreduce r, mp d, mp x)
	516	{
	517	unsigned long m = mp_bits(r->p) - 1;
	518	mp *t;
	519
	520	/* --- About the solutions --- *
	521	*
	522	* Factor %$z^2 + z = z (z + 1)$%. Therefore, if %$z^2 + z = x$% and
	523	* %$z' = z + 1$% then %$z'^2 + z' = z^2 + 1 + z + 1 = z^2 + z$%, so
	524	* %$z + 1$% is the other solution.
	525	*
	526	* A solution exists if and only if %$\Tr(x) = 0$%. To see the `only if'
	527	* implication, recall that the trace function is linear, and hence
	528	* $%\Tr(z^2 + z) = \Tr(z)^2 + \Tr(z) = \Tr(z) + \Tr(z) = 0$%. The `if'
	529	* direction will be proven using explicit constructions captured in the
	530	* code below.
	531	*/
	532
	533	MP_COPY(x);
	534	if (m & 1) {
	535
	536	/* --- A short-cut for fields with odd degree ---
	537	*
	538	* The method below works in all binary fields, but there's a quicker way
	539	* which works whenever the degree is odd. The half-trace is
	540	* %$z = \sum_{0\le i\le (m-1)/2} x^{2^{2i}}$%. Then %$z^2 + z = {}$%
	541	* %$\sum_{0\le i\le (m-1)/2} (x^{2^{2i}} + x^{2^{2i+1}}) = {}$%
	542	* %$\Tr(x) + x^{2^m} = \Tr(x) + x$%. This therefore gives us the
	543	* solution we want whenever %$\Tr(x) = 0$%.
	544	*/
	545
	546	d = gfreduce_halftrace(r, d, x);
	547	} else {
	548	mp z, w, *rho = MP_NEW;
	549	mp *spare = MP_NEW;
	550	grand *fr = fibrand_create(0);
	551	unsigned long i;
	552
	553	/* --- Unpicking the magic --- *
	554	*
	555	* Choose %$\rho \inr \gf{2^m}$% with %$\Tr(\rho) = 1$%. Let
	556	* %$z = \sum_{0\le i<m} \rho^{2^i} \sum_{0\le j<i} x^{2^j} = {}$%
	557	* %$\rho^2 x + \rho^4 (x + x^2) + \rho^8 (x + x^2 + x^4) + \cdots + {}$%
	558	* %$\rho^{2^{m-1}} (x + x^2 + x^{2^{m-2}})$%. Then %$z^2 = {}$%
	559	* %$\sum_{0\le i<m} \rho^{2^{i+1}} \sum_{0\le j<i} x^{2^{j+1}} = {}$%
	560	* %$\sum_{1\le i\le m} \rho^{2^i} \sum_{1\le j\le i} x^{2^j}$% and,
	561	* somewhat miraculously, %$z^2 + z = \sum_{0\le i<m} \rho^{2^i} x + {}$%
	562	* %$\rho \sum_{1\le i<m} x^{2^i} = x \Tr(\rho) + \rho \Tr(x)$%. Again,
	563	* this gives us the root we want whenever %$\Tr(x) = 0$%.
	564	*
	565	* The loop below calculates %$w = \Tr(\rho)$% and %$z$% simultaneously,
	566	* since the same powers of %$\rho$% are wanted in both calculations.
	567	*/
	568
	569	for (;;) {
	570	rho = mprand(rho, m, fr, 0);
	571	z = MP_ZERO;
	572	w = MP_COPY(rho);
	573	for (i = 0; i < m - 1; i++) {
	574	t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t);
	575	t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t);
	576	t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t;
	577	z = gf_add(z, z, t);
	578	w = gf_add(w, w, rho);
	579	}
	580	if (!MP_ZEROP(w))
	581	break;
	582	MP_DROP(z);
	583	MP_DROP(w);
	584	}
	585	if (d) MP_DROP(d);
	586	MP_DROP(w);
	587	MP_DROP(spare);
	588	MP_DROP(rho);
	589	fr->ops->destroy(fr);
	590	d = z;
	591	}
	592
	593	/* --- Check that we calculated the right answer --- *
	594	*
	595	* It should be correct; if it's not then maybe the ring we're working in
	596	* isn't really a field.
	597	*/
	598
	599	t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d);
	600	if (!MP_EQ(t, x)) {
	601	MP_DROP(d);
	602	d = 0;
	603	}
	604	MP_DROP(t);
	605	MP_DROP(x);
	606
	607	/* --- Pick a canonical root --- *
	608	*
	609	* The two roots are %$z$% and %$z + 1$%; pick the one with a zero
	610	* scalar coefficient just for consistency's sake.
	611	*/
	612
	613	if (d) d->v[0] &= ~(mpw)1;
	614	return (d);
	615	}
	616
	617	/* --- @gfreduce_exp@ --- *
	618	*
	619	* Arguments: @const gfreduce *gr@ = pointer to reduction context
	620	* @mp *d@ = fake destination
	621	* @mp *a@ = base
	622	* @mp *e@ = exponent
	623	*
	624	* Returns: Result, %$a^e \bmod m$%.
	625	*/
	626
	627	mp gfreduce_exp(const gfreduce gr, mp d, mp a, mp *e)
	628	{
	629	mp *x = MP_ONE;
	630	mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW;
	631
	632	MP_SHRINK(e);
	633	MP_COPY(a);
	634	if (MP_ZEROP(e))
	635	;
	636	else {
	637	if (MP_NEGP(e))
	638	a = gf_modinv(a, a, gr->p);
	639	if (MP_LEN(e) < EXP_THRESH)
	640	EXP_SIMPLE(x, a, e);
	641	else
	642	EXP_WINDOW(x, a, e);
	643	}
	644	mp_drop(d);
	645	mp_drop(a);
	646	mp_drop(spare);
	647	return (x);
	648	}
	649
	650	/----- Test rig ----------------------------------------------------------/
	651
	652	#ifdef TEST_RIG
	653
	654	static int vreduce(dstr *v)
	655	{
	656	mp d = (mp **)v[0].buf;
	657	mp n = (mp **)v[1].buf;
	658	mp r = (mp **)v[2].buf;
	659	mp *c;
	660	int ok = 1;
	661	gfreduce rr;
	662
	663	gfreduce_create(&rr, d);
	664	c = gfreduce_do(&rr, MP_NEW, n);
	665	if (!MP_EQ(c, r)) {
	666	fprintf(stderr, "\n* reduction failed\n* ");
	667	gfreduce_dump(&rr, stderr);
	668	fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16);
	669	fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
	670	fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
	671	fprintf(stderr, "\n");
	672	ok = 0;
	673	}
	674	gfreduce_destroy(&rr);
	675	mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c);
	676	assert(mparena_count(MPARENA_GLOBAL) == 0);
	677	return (ok);
	678	}
	679
	680	static int vmodexp(dstr *v)
	681	{
	682	mp p = (mp **)v[0].buf;
	683	mp g = (mp **)v[1].buf;
	684	mp x = (mp **)v[2].buf;
	685	mp r = (mp **)v[3].buf;
	686	mp *c;
	687	int ok = 1;
	688	gfreduce rr;
	689
	690	gfreduce_create(&rr, p);
	691	c = gfreduce_exp(&rr, MP_NEW, g, x);
	692	if (!MP_EQ(c, r)) {
	693	fprintf(stderr, "\n* modexp failed\n* ");
	694	fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
	695	fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16);
	696	fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
	697	fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
	698	fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
	699	fprintf(stderr, "\n");
	700	ok = 0;
	701	}
	702	gfreduce_destroy(&rr);
	703	mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c);
	704	assert(mparena_count(MPARENA_GLOBAL) == 0);
	705	return (ok);
	706	}
	707
	708	static int vsqrt(dstr *v)
	709	{
	710	mp p = (mp **)v[0].buf;
	711	mp x = (mp **)v[1].buf;
	712	mp r = (mp **)v[2].buf;
	713	mp *c;
	714	int ok = 1;
	715	gfreduce rr;
	716
	717	gfreduce_create(&rr, p);
	718	c = gfreduce_sqrt(&rr, MP_NEW, x);
	719	if (!MP_EQ(c, r)) {
	720	fprintf(stderr, "\n* sqrt failed\n* ");
	721	fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
	722	fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
	723	fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
	724	fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
	725	fprintf(stderr, "\n");
	726	ok = 0;
	727	}
	728	gfreduce_destroy(&rr);
	729	mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
	730	assert(mparena_count(MPARENA_GLOBAL) == 0);
	731	return (ok);
	732	}
	733
	734	static int vtr(dstr *v)
	735	{
	736	mp p = (mp **)v[0].buf;
	737	mp x = (mp **)v[1].buf;
	738	int r = (int )v[2].buf, c;
	739	int ok = 1;
	740	gfreduce rr;
	741
	742	gfreduce_create(&rr, p);
	743	c = gfreduce_trace(&rr, x);
	744	if (c != r) {
	745	fprintf(stderr, "\n* trace failed\n* ");
	746	fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
	747	fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
	748	fprintf(stderr, "\n*** c = %d", c);
	749	fprintf(stderr, "\n*** r = %d", r);
	750	fprintf(stderr, "\n");
	751	ok = 0;
	752	}
	753	gfreduce_destroy(&rr);
	754	mp_drop(p); mp_drop(x);
	755	assert(mparena_count(MPARENA_GLOBAL) == 0);
	756	return (ok);
	757	}
	758
	759	static int vhftr(dstr *v)
	760	{
	761	mp p = (mp **)v[0].buf;
	762	mp x = (mp **)v[1].buf;
	763	mp r = (mp **)v[2].buf;
	764	mp *c;
	765	int ok = 1;
	766	gfreduce rr;
	767
	768	gfreduce_create(&rr, p);
	769	c = gfreduce_halftrace(&rr, MP_NEW, x);
	770	if (!MP_EQ(c, r)) {
	771	fprintf(stderr, "\n* halftrace failed\n* ");
	772	fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
	773	fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
	774	fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
	775	fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
	776	fprintf(stderr, "\n");
	777	ok = 0;
	778	}
	779	gfreduce_destroy(&rr);
	780	mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
	781	assert(mparena_count(MPARENA_GLOBAL) == 0);
	782	return (ok);
	783	}
	784
	785	static int vquad(dstr *v)
	786	{
	787	mp p = (mp **)v[0].buf;
	788	mp x = (mp **)v[1].buf;
	789	mp r = (mp **)v[2].buf;
	790	mp *c;
	791	int ok = 1;
	792	gfreduce rr;
	793
	794	gfreduce_create(&rr, p);
	795	c = gfreduce_quadsolve(&rr, MP_NEW, x);
	796	if (!MP_EQ(c, r)) {
	797	fprintf(stderr, "\n* quadsolve failed\n* ");
	798	fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16);
	799	fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16);
	800	fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16);
	801	fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16);
	802	fprintf(stderr, "\n");
	803	ok = 0;
	804	}
	805	gfreduce_destroy(&rr);
	806	mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c);
	807	assert(mparena_count(MPARENA_GLOBAL) == 0);
	808	return (ok);
	809	}
	810
	811	static test_chunk defs[] = {
	812	{ "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } },
	813	{ "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } },
	814	{ "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } },
	815	{ "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } },
	816	{ "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } },
	817	{ "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } },
	818	{ 0, 0, { 0 } }
	819	};
	820
	821	int main(int argc, char *argv[])
	822	{
	823	test_run(argc, argv, defs, SRCDIR"/t/gfreduce");
	824	return (0);
	825	}
	826
	827	#endif
	828
	829	/----- That's all, folks -------------------------------------------------/