utils/gcm-ref: Pull `poly64_mul' and `poly64_redc' out of `poly64_common'.

author Mark Wooding <mdw@distorted.org.uk>

Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)

committer Mark Wooding <mdw@distorted.org.uk>

Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)
author Mark Wooding <mdw@distorted.org.uk>
Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)
committer Mark Wooding <mdw@distorted.org.uk>
Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)
diff --git a/utils/gcm-ref b/utils/gcm-ref

index 6a9c4c2..174e79e 100755 (executable)
--- a/utils/gcm-ref
+++ b/utils/gcm-ref
@@ -343,8 +343,7 @@ def poly64_mul_karatsuba(u, v, klimit, presfn, wd,
    presfn(TAG_PRODUCT, wd, x, 2*w, dispwd, '%s %s' % (uwhat, vwhat))
    return x
  
-def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32,
-                  klimit = 256):
+def poly64_mul(u, v, presfn, dispwd, mulwd, klimit, uwhat, vwhat):
    """
    Multiply U by V using a primitive 64-bit binary polynomial mutliplier.
  
@@ -353,27 +352,27 @@ def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32,
  
    Operands arrive in a `register format', which is a byte-swapped variant of
    the external format.  Implementations differ on the precise details,
-  though.
+  though.  Returns the double-precision product.
    """
  
-  ## We work in two main phases: first, calculate the full double-width
-  ## product; and, second, reduce it modulo the field polynomial.
-
    w = 8*len(u); assert(w == 8*len(v))
-  p = poly(w)
-  presfn(TAG_INPUT_U, w, C.GF.loadb(u), w, dispwd, 'u')
-  presfn(TAG_INPUT_V, w, C.GF.loadb(v), w, dispwd, 'v')
+  x = poly64_mul_karatsuba(u, v, klimit, presfn,
+                           w, dispwd, mulwd, uwhat, vwhat)
  
-  ## So, on to the first part: the multiplication.
-  x = poly64_mul_karatsuba(u, v, klimit, presfn, w, dispwd, mulwd, 'u', 'v')
+  return x.storeb(w/4)
  
-  ## Now we have to shift everything up one bit to account for GCM's crazy
-  ## bit ordering.
-  y = x << 1
-  presfn(TAG_SHIFTED, w, y, 2*w, dispwd, 'y')
+def poly64_redc(y, presfn, dispwd, redcwd):
+  """
+  Reduce a double-precision product X modulo the appropriate polynomial.
+
+  The operand arrives in a `register format', which is a byte-swapped variant
+  of the external format.  Implementations differ on the precise details,
+  though.  Returns the single-precision reduced value.
+  """
+
+  w = 4*len(y)
+  p = poly(w)
  
-  ## Now for the reduction.
-  ##
    ## Our polynomial has the form p = t^d + r where r = SUM_{0<=i<d} r_i t^i,
    ## with each r_i either 0 or 1.  Because we choose the lexically earliest
    ## irreducible polynomial with the necessary degree, r_i = 1 happens only
@@ -405,14 +404,14 @@ def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32,
    m = gfmask(redcwd)
  
    ## Handle the spilling bits.
-  yy = split_gf(y.storeb(w/4), redcwd)
+  yy = split_gf(y, redcwd)
    b = C.GF(0)
    for rj in rr:
      br = [(yi << (redcwd - rj))&m for yi in yy[w/redcwd:]]
      presfn(TAG_REDCBITS, w, join_gf(br, redcwd), w, dispwd, 'b(%d)' % rj)
      b += join_gf(br, redcwd) << (w - redcwd)
    presfn(TAG_REDCFULL, w, b, 2*w, dispwd, 'b')
-  s = y + b
+  s = C.GF.loadb(y) + b
    presfn(TAG_REDCMIX, w, s, 2*w, dispwd, 's')
  
    ## Handle the nonspilling bits.
@@ -432,6 +431,16 @@ def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32,
    ## And we're done.
    return z.storeb(w/8)
  
+def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64,
+                  redcwd = 32, klimit = 256):
+  w = 8*len(u)
+  presfn(TAG_INPUT_U, w, C.GF.loadb(u), w, dispwd, 'u')
+  presfn(TAG_INPUT_V, w, C.GF.loadb(v), w, dispwd, 'v')
+  y = poly64_mul(u, v, presfn, dispwd, mulwd, klimit, "u", "v")
+  y = (C.GF.loadb(y) << 1).storeb(w/4)
+  z = poly64_redc(y, presfn, dispwd, redcwd)
+  return z
+
  @demo
  def demo_pclmul(u, v):
    return poly64_common(u, v, presfn = present_gf_pclmul)
author	Mark Wooding <mdw@distorted.org.uk>
	Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)
committer	Mark Wooding <mdw@distorted.org.uk>
	Tue, 16 Jan 2024 13:54:50 +0000 (13:54 +0000)