X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/4e7475c2fc07c8e588583d82d95221d8e6813757..HEAD:/utils/gcm-ref diff --git a/utils/gcm-ref b/utils/gcm-ref index bba76602..406c6595 100755 --- a/utils/gcm-ref +++ b/utils/gcm-ref @@ -123,6 +123,12 @@ def shift_left(x): p = poly(8*w) return gcm_mangle(C.GF.storel((C.GF.loadl(gcm_mangle(x)) << 1)%p)) +def shift_right(x): + """Given a field element X (in external format), return X/t.""" + w = len(x) + p = poly(8*w) + return gcm_mangle(C.GF.storel((C.GF.loadl(gcm_mangle(x))*p.modinv(2))%p)) + def table_common(u, v, flip, getword, ixmask): """ Multiply U by V using table lookup; common for `table-b' and `table-l'. @@ -180,12 +186,12 @@ def demo_table_l(u, v): _i = iota() TAG_INPUT_U = _i() TAG_INPUT_V = _i() +TAG_SHIFTED_V = _i() TAG_KPIECE_U = _i() TAG_KPIECE_V = _i() TAG_PRODPIECE = _i() TAG_PRODSUM = _i() TAG_PRODUCT = _i() -TAG_SHIFTED = _i() TAG_REDCBITS = _i() TAG_REDCFULL = _i() TAG_REDCMIX = _i() @@ -237,7 +243,7 @@ def rev8(x): x = ((x&m_55) << 1) | ((x >> 1)&m_55) return x -def present_gf_mullp64(tag, wd, x, w, n, what): +def present_gf_vmullp64(tag, wd, x, w, n, what): if tag == TAG_PRODPIECE or tag == TAG_REDCFULL: return elif (wd == 128 or wd == 64) and TAG_PRODSUM <= tag <= TAG_PRODUCT: @@ -255,9 +261,9 @@ def present_gf_mullp64(tag, wd, x, w, n, what): present_gf(y, (w + 63)&~63, n, what) def present_gf_pmull(tag, wd, x, w, n, what): - if tag == TAG_PRODPIECE or tag == TAG_REDCFULL or tag == TAG_SHIFTED: + if tag == TAG_PRODPIECE or tag == TAG_REDCFULL: return - elif tag == TAG_INPUT_V or tag == TAG_KPIECE_V: + elif tag == TAG_INPUT_V or tag == TAG_SHIFTED_V or tag == TAG_KPIECE_V: w = (w + 63)&~63 bx = C.ReadBuffer(x.storeb(w/8)) by = C.WriteBuffer() @@ -281,10 +287,9 @@ def poly64_mul_simple(u, v, presfn, wd, dispwd, mulwd, uwhat, vwhat): ## We start by carving the operands into 64-bit pieces. This is ## straightforward except for the 96-bit case, where we end up with two ## short pieces which we pad at the beginning. - if uw%mulwd: pad = (-uw)%mulwd; u += C.ByteString.zero(pad); uw += pad - if vw%mulwd: pad = (-vw)%mulwd; v += C.ByteString.zero(pad); vw += pad - uu = split_gf(u, mulwd) - vv = split_gf(v, mulwd) + upad = (-uw)%mulwd; u += C.ByteString.zero(upad); uw += upad + vpad = (-vw)%mulwd; v += C.ByteString.zero(vpad); vw += vpad + uu = split_gf(u, mulwd); vv = split_gf(v, mulwd) ## Report and accumulate the individual product pieces. x = C.GF(0) @@ -301,7 +306,7 @@ def poly64_mul_simple(u, v, presfn, wd, dispwd, mulwd, uwhat, vwhat): x += t << (mulwd*i) presfn(TAG_PRODUCT, wd, x, uw + vw, dispwd, '%s %s' % (uwhat, vwhat)) - return x + return x >> (upad + vpad) def poly64_mul_karatsuba(u, v, klimit, presfn, wd, dispwd, mulwd, uwhat, vwhat): @@ -344,8 +349,7 @@ def poly64_mul_karatsuba(u, v, klimit, presfn, wd, presfn(TAG_PRODUCT, wd, x, 2*w, dispwd, '%s %s' % (uwhat, vwhat)) return x -def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32, - klimit = 256): +def poly64_mul(u, v, presfn, dispwd, mulwd, klimit, uwhat, vwhat): """ Multiply U by V using a primitive 64-bit binary polynomial mutliplier. @@ -354,28 +358,27 @@ def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32, Operands arrive in a `register format', which is a byte-swapped variant of the external format. Implementations differ on the precise details, - though. + though. Returns the double-precision product. """ - ## We work in two main phases: first, calculate the full double-width - ## product; and, second, reduce it modulo the field polynomial. - w = 8*len(u); assert(w == 8*len(v)) - p = poly(w) - presfn(TAG_INPUT_U, w, C.GF.loadb(u), w, dispwd, 'u') - presfn(TAG_INPUT_V, w, C.GF.loadb(v), w, dispwd, 'v') + x = poly64_mul_karatsuba(u, v, klimit, presfn, + w, dispwd, mulwd, uwhat, vwhat) - ## So, on to the first part: the multiplication. - x = poly64_mul_karatsuba(u, v, klimit, presfn, w, dispwd, mulwd, 'u', 'v') + return x.storeb(w/4) - ## Now we have to shift everything up one bit to account for GCM's crazy - ## bit ordering. - y = x << 1 - if w == 96: y >>= 64 - presfn(TAG_SHIFTED, w, y, 2*w, dispwd, 'y') +def poly64_redc(y, presfn, dispwd, redcwd): + """ + Reduce a double-precision product X modulo the appropriate polynomial. + + The operand arrives in a `register format', which is a byte-swapped variant + of the external format. Implementations differ on the precise details, + though. Returns the single-precision reduced value. + """ + + w = 4*len(y) + p = poly(w) - ## Now for the reduction. - ## ## Our polynomial has the form p = t^d + r where r = SUM_{0<=i