math/gfx-sqr.c: Use bithacking rather than a table for squaring.
[catacomb] / math / gfx-sqr.c
CommitLineData
ae747c9b 1/* -*-c-*-
2 *
ae747c9b 3 * Sqaring binary polynomials
4 *
5 * (c) 2000 Straylight/Edgeware
6 */
7
45c0fd36 8/*----- Licensing notice --------------------------------------------------*
ae747c9b 9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
45c0fd36 16 *
ae747c9b 17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
45c0fd36 21 *
ae747c9b 22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
ae747c9b 28/*----- Header files ------------------------------------------------------*/
29
30#include "mpx.h"
ceb3f0c0 31#include "gfx.h"
d9d9e645 32#include "permute.h"
ae747c9b 33
34/*----- Main code ---------------------------------------------------------*/
35
36/* --- @gfx_sqr@ --- *
37 *
38 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
39 * @const mpw *av, *avl@ = argument vector base and limit
40 *
41 * Returns: ---
42 *
43 * Use: Performs squaring of binary polynomials.
44 */
45
46void gfx_sqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
47{
d9d9e645
MW
48#if MPW_BITS <= 16
49# define REGWD 16
50#elif MPW_BITS <= 32
51# define REGWD 32
52#elif MPW_BITS <= 64
53# define REGWD 64
54#elif MPW_BITS <= 128
55# define REGWD 128
56#else
57# error "unsupported limb width: extend `gfx-sqr.c'"
58#endif
59
60#if MPW_BITS == REGWD
61 typedef mpw regty;
62#else
63 typedef mpd regty;
64#endif
65
66 regty a, t;
ae747c9b 67
68 /* --- Simple stuff --- */
69
70 if (dv >= dvl)
71 return;
72 MPX_SHRINK(av, avl);
73
74 /* --- The main algorithm --- *
75 *
76 * Our method depends on the fact that, in a field of characteristic 2, we
77 * have that %$(a + b)^2 = a^2 + b^2$%. Thus, to square a polynomial, it's
78 * sufficient just to put a zero bit between each of the bits of the
d9d9e645 79 * original argument.
ae747c9b 80 */
81
d9d9e645
MW
82 while (av < avl) {
83 a = *av++;
84 /* ..., 7, 6, 5, 4, 3, 2, 1, 0 */
85 SWIZZLE_EXCH(a, 0, 1); /* ..., 7, 6, 5, 4, 3, 2, 0, 1 */
86 SWIZZLE_EXCH(a, 0, 2); /* ..., 7, 6, 5, 4, 3, 1, 0, 2 */
87 SWIZZLE_EXCH(a, 0, 3); /* ..., 7, 6, 5, 4, 2, 1, 0, 3 */
88#if MPW_BITS > 16
89 SWIZZLE_EXCH(a, 0, 4); /* ..., 7, 6, 5, 3, 2, 1, 0, 4 */
90#endif
91#if MPW_BITS > 32
92 SWIZZLE_EXCH(a, 0, 5); /* ..., 7, 6, 4, 3, 2, 1, 0, 5 */
93#endif
94#if MPW_BITS > 64
95 SWIZZLE_EXCH(a, 0, 6); /* ..., 7, 5, 4, 3, 2, 1, 0, 6 */
96#endif
97#if MPW_BITS > 128
98# error "unsupported limb width: extend `gfx-sqr.c'"
99#endif
100
101 /* Write out the low half, which consists of the low-order bits in
102 * even-numbered positions.
103 */
104 *dv++ = MPW(a&IXMASK(0)); if (dv >= dvl) break;
105
106 /* Write the high half. This is trickier, since, in general, the bits
107 * are split across even-numbered bits in the high part of the wider
108 * register and odd-numbered bits in the low part. Alas, we can't
109 * resolve this mess without special cases because shifts are broken in
110 * C.
111 */
112 t = ((a >> 1)&IXMASK(0)) << (REGWD - MPW_BITS);
113#if MPW_BITS < REGWD
114 t |= (a&IXMASK(0)) >> MPW_BITS;
115#endif
116 *dv++ = MPW(t); if (dv >= dvl) break;
ae747c9b 117 }
118
119 /* --- Zero the rest of everything --- */
120
121 MPX_ZERO(dv, dvl);
d9d9e645
MW
122
123#undef REGWD
ae747c9b 124}
125
126/*----- Test rig ----------------------------------------------------------*/
127
128#ifdef TEST_RIG
129
130#include <mLib/alloc.h>
131#include <mLib/dstr.h>
132#include <mLib/quis.h>
133#include <mLib/testrig.h>
134
135#define ALLOC(v, vl, sz) do { \
136 size_t _sz = (sz); \
137 mpw *_vv = xmalloc(MPWS(_sz)); \
138 mpw *_vvl = _vv + _sz; \
139 (v) = _vv; \
140 (vl) = _vvl; \
141} while (0)
142
143#define LOAD(v, vl, d) do { \
144 const dstr *_d = (d); \
145 mpw *_v, *_vl; \
146 ALLOC(_v, _vl, MPW_RQ(_d->len)); \
147 mpx_loadb(_v, _vl, _d->buf, _d->len); \
148 (v) = _v; \
149 (vl) = _vl; \
150} while (0)
151
152#define MAX(x, y) ((x) > (y) ? (x) : (y))
45c0fd36 153
ae747c9b 154static void dumpmp(const char *msg, const mpw *v, const mpw *vl)
155{
156 fputs(msg, stderr);
157 MPX_SHRINK(v, vl);
158 while (v < vl)
159 fprintf(stderr, " %08lx", (unsigned long)*--vl);
160 fputc('\n', stderr);
161}
162
163static int vsqr(dstr *v)
164{
165 mpw *a, *al;
166 mpw *b, *bl;
167 mpw *d, *dl;
168 int ok = 1;
169
170 LOAD(a, al, &v[0]);
171 LOAD(b, bl, &v[1]);
172 ALLOC(d, dl, 2 * (al - a));
173
174 gfx_sqr(d, dl, a, al);
175 if (!mpx_ueq(d, dl, b, bl)) {
176 fprintf(stderr, "\n*** vsqr failed\n");
45c0fd36 177 dumpmp(" a", a, al);
ae747c9b 178 dumpmp("expected", b, bl);
179 dumpmp(" result", d, dl);
180 ok = 0;
181 }
182
12ed8a1f 183 xfree(a); xfree(b); xfree(d);
ae747c9b 184 return (ok);
185}
186
187static test_chunk defs[] = {
188 { "sqr", vsqr, { &type_hex, &type_hex, 0 } },
189 { 0, 0, { 0 } }
190};
191
192int main(int argc, char *argv[])
193{
0f00dc4c 194 test_run(argc, argv, defs, SRCDIR"/t/gfx");
ae747c9b 195 return (0);
196}
197
198#endif
199
200/*----- That's all, folks -------------------------------------------------*/