symm/gcm-arm-crypto.S (mul96): Fill in the clobbered-registers list.
[catacomb] / symm / gcm-x86ish-pclmul.S
index 092242b..5f0c754 100644 (file)
        //      u v = SUM_{0<=i,j<n} u_i v_j t^{i+j}
        //
        // Suppose instead that we're given ũ = SUM_{0<=i<n} u_{n-i-1} t^i
-       // and  = SUM_{0<=j<n} v_{n-j-1} t^j, so the bits are backwards.
+       // and  = SUM_{0<=j<n} v_{n-j-1} t^j, so the bits are backwards.
        // Then
        //
-       //      ũ  = SUM_{0<=i,j<n} u_{n-i-1} v_{n-j-1} t^{i+j}
+       //      ũ  = SUM_{0<=i,j<n} u_{n-i-1} v_{n-j-1} t^{i+j}
        //          = SUM_{0<=i,j<n} u_i v_j t^{2n-2-(i+j)}
        //
        // which is almost the bit-reversal of u v, only it's shifted right
@@ -974,8 +974,8 @@ SSEFUNC(gcm_mulk_256b_x86ish_pclmul)
        setfp
        mov     A, [SP + 8]
        mov     K, [SP + 12]
+       stalloc 16
        and     SP, ~15
-       sub     SP, 16
 #endif
 #if CPUFAM_AMD64 && ABI_WIN
        stalloc 3*16 + 8
@@ -1018,9 +1018,9 @@ SSEFUNC(gcm_mulk_256l_x86ish_pclmul)
        setfp
        mov     A, [SP + 8]
        mov     K, [SP + 12]
-       and     SP, ~15
+       stalloc 16
        ldgot   ecx
-       sub     SP, 16
+       and     SP, ~15
 #endif
 #if CPUFAM_AMD64 && ABI_WIN
        stalloc 3*16 + 8