~mdw
/
catacomb
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
utils/gcm-ref (present_gf_vmullp64): Add `v' prefix to match front end.
[catacomb]
/
utils
/
gcm-ref
diff --git
a/utils/gcm-ref
b/utils/gcm-ref
index
4a53737
..
6a9c4c2
100755
(executable)
--- a/
utils/gcm-ref
+++ b/
utils/gcm-ref
@@
-127,7
+127,7
@@
def table_common(u, v, flip, getword, ixmask):
"""
Multiply U by V using table lookup; common for `table-b' and `table-l'.
"""
Multiply U by V using table lookup; common for `table-b' and `table-l'.
- This matches the `simple_mulk_...' implementation in `gcm.c'. One
-
entry
+ This matches the `simple_mulk_...' implementation in `gcm.c'. One
entry
per bit is the best we can manage if we want a constant-time
implementation: processing n bits at a time means we need to scan
(2^n - 1)/n times as much memory.
per bit is the best we can manage if we want a constant-time
implementation: processing n bits at a time means we need to scan
(2^n - 1)/n times as much memory.
@@
-140,7
+140,7
@@
def table_common(u, v, flip, getword, ixmask):
are processed most-significant first.
* IXMASK is a mask XORed into table indices to permute the table so that
are processed most-significant first.
* IXMASK is a mask XORed into table indices to permute the table so that
- it
'
s order matches that induced by GETWORD.
+ its order matches that induced by GETWORD.
The table is built such that tab[i XOR IXMASK] = U t^i.
"""
The table is built such that tab[i XOR IXMASK] = U t^i.
"""
@@
-172,7
+172,7
@@
def demo_table_b(u, v):
@demo
def demo_table_l(u, v):
"""Little-endian table lookup."""
@demo
def demo_table_l(u, v):
"""Little-endian table lookup."""
- return table_common(u, v, endswap_words, lambda b: b.getu32l(), 0x18)
+ return table_common(u, v, endswap_words
_32
, lambda b: b.getu32l(), 0x18)
###--------------------------------------------------------------------------
### Implementation using 64×64->128-bit binary polynomial multiplication.
###--------------------------------------------------------------------------
### Implementation using 64×64->128-bit binary polynomial multiplication.
@@
-237,7
+237,7
@@
def rev8(x):
x = ((x&m_55) << 1) | ((x >> 1)&m_55)
return x
x = ((x&m_55) << 1) | ((x >> 1)&m_55)
return x
-def present_gf_mullp64(tag, wd, x, w, n, what):
+def present_gf_
v
mullp64(tag, wd, x, w, n, what):
if tag == TAG_PRODPIECE or tag == TAG_REDCFULL:
return
elif (wd == 128 or wd == 64) and TAG_PRODSUM <= tag <= TAG_PRODUCT:
if tag == TAG_PRODPIECE or tag == TAG_REDCFULL:
return
elif (wd == 128 or wd == 64) and TAG_PRODSUM <= tag <= TAG_PRODUCT:
@@
-258,6
+258,7
@@
def present_gf_pmull(tag, wd, x, w, n, what):
if tag == TAG_PRODPIECE or tag == TAG_REDCFULL or tag == TAG_SHIFTED:
return
elif tag == TAG_INPUT_V or tag == TAG_KPIECE_V:
if tag == TAG_PRODPIECE or tag == TAG_REDCFULL or tag == TAG_SHIFTED:
return
elif tag == TAG_INPUT_V or tag == TAG_KPIECE_V:
+ w = (w + 63)&~63
bx = C.ReadBuffer(x.storeb(w/8))
by = C.WriteBuffer()
while bx.left: chunk = bx.get(8); by.put(chunk).put(chunk)
bx = C.ReadBuffer(x.storeb(w/8))
by = C.WriteBuffer()
while bx.left: chunk = bx.get(8); by.put(chunk).put(chunk)
@@
-280,10
+281,9
@@
def poly64_mul_simple(u, v, presfn, wd, dispwd, mulwd, uwhat, vwhat):
## We start by carving the operands into 64-bit pieces. This is
## straightforward except for the 96-bit case, where we end up with two
## short pieces which we pad at the beginning.
## We start by carving the operands into 64-bit pieces. This is
## straightforward except for the 96-bit case, where we end up with two
## short pieces which we pad at the beginning.
- if uw%mulwd: pad = (-uw)%mulwd; u += C.ByteString.zero(pad); uw += pad
- if vw%mulwd: pad = (-uw)%mulwd; v += C.ByteString.zero(pad); vw += pad
- uu = split_gf(u, mulwd)
- vv = split_gf(v, mulwd)
+ upad = (-uw)%mulwd; u += C.ByteString.zero(upad); uw += upad
+ vpad = (-vw)%mulwd; v += C.ByteString.zero(vpad); vw += vpad
+ uu = split_gf(u, mulwd); vv = split_gf(v, mulwd)
## Report and accumulate the individual product pieces.
x = C.GF(0)
## Report and accumulate the individual product pieces.
x = C.GF(0)
@@
-300,7
+300,7
@@
def poly64_mul_simple(u, v, presfn, wd, dispwd, mulwd, uwhat, vwhat):
x += t << (mulwd*i)
presfn(TAG_PRODUCT, wd, x, uw + vw, dispwd, '%s %s' % (uwhat, vwhat))
x += t << (mulwd*i)
presfn(TAG_PRODUCT, wd, x, uw + vw, dispwd, '%s %s' % (uwhat, vwhat))
- return x
+ return x
>> (upad + vpad)
def poly64_mul_karatsuba(u, v, klimit, presfn, wd,
dispwd, mulwd, uwhat, vwhat):
def poly64_mul_karatsuba(u, v, klimit, presfn, wd,
dispwd, mulwd, uwhat, vwhat):
@@
-370,7
+370,6
@@
def poly64_common(u, v, presfn, dispwd = 32, mulwd = 64, redcwd = 32,
## Now we have to shift everything up one bit to account for GCM's crazy
## bit ordering.
y = x << 1
## Now we have to shift everything up one bit to account for GCM's crazy
## bit ordering.
y = x << 1
- if w == 96: y >>= 64
presfn(TAG_SHIFTED, w, y, 2*w, dispwd, 'y')
## Now for the reduction.
presfn(TAG_SHIFTED, w, y, 2*w, dispwd, 'y')
## Now for the reduction.
@@
-440,7
+439,7
@@
def demo_pclmul(u, v):
@demo
def demo_vmullp64(u, v):
w = 8*len(u)
@demo
def demo_vmullp64(u, v):
w = 8*len(u)
- return poly64_common(u, v, presfn = present_gf_mullp64,
+ return poly64_common(u, v, presfn = present_gf_
v
mullp64,
redcwd = w%64 == 32 and 32 or 64)
@demo
redcwd = w%64 == 32 and 32 or 64)
@demo