X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/ae429891fdd7b8177c60659e0b5a450ba8ee08ea..70bc6059902c30dcbd1cddbdb628e4bcbd9cc6f5:/symm/salsa20-x86ish-sse2.S diff --git a/symm/salsa20-x86ish-sse2.S b/symm/salsa20-x86ish-sse2.S index fbdfea72..930508a1 100644 --- a/symm/salsa20-x86ish-sse2.S +++ b/symm/salsa20-x86ish-sse2.S @@ -182,7 +182,7 @@ FUNC(salsa20_core_x86ish_sse2) // d ^= (c + b) <<< 13 movdqa xmm4, xmm2 paddd xmm4, xmm1 - pshufd xmm1, xmm1, ROTL + pshufd xmm1, xmm1, ROTL movdqa xmm5, xmm4 pslld xmm4, 13 psrld xmm5, 19 @@ -191,9 +191,9 @@ FUNC(salsa20_core_x86ish_sse2) // a ^= (d + c) <<< 18 movdqa xmm4, xmm3 - pshufd xmm3, xmm3, ROTR + pshufd xmm3, xmm3, ROTR paddd xmm4, xmm2 - pshufd xmm2, xmm2, ROT2 + pshufd xmm2, xmm2, ROT2 movdqa xmm5, xmm4 pslld xmm4, 18 psrld xmm5, 14 @@ -237,7 +237,7 @@ FUNC(salsa20_core_x86ish_sse2) // d ^= (c + b) <<< 13 movdqa xmm4, xmm2 paddd xmm4, xmm3 - pshufd xmm3, xmm3, ROTL + pshufd xmm3, xmm3, ROTL movdqa xmm5, xmm4 pslld xmm4, 13 psrld xmm5, 19 @@ -246,9 +246,9 @@ FUNC(salsa20_core_x86ish_sse2) // a ^= (d + c) <<< 18 movdqa xmm4, xmm1 - pshufd xmm1, xmm1, ROTR + pshufd xmm1, xmm1, ROTR paddd xmm4, xmm2 - pshufd xmm2, xmm2, ROT2 + pshufd xmm2, xmm2, ROT2 movdqa xmm5, xmm4 pslld xmm4, 18 psrld xmm5, 14 @@ -268,43 +268,43 @@ FUNC(salsa20_core_x86ish_sse2) // latency, so arrange to start a new shuffle into a temporary as // soon as we've written out the old value. paddd xmm0, SAVE0 - pshufd xmm4, xmm0, 0x39 + pshufd xmm4, xmm0, 0x39 movd [OUT + 0], xmm0 paddd xmm1, SAVE1 - pshufd xmm5, xmm1, ROTL + pshufd xmm5, xmm1, ROTL movd [OUT + 16], xmm1 paddd xmm2, SAVE2 - pshufd xmm6, xmm2, ROT2 + pshufd xmm6, xmm2, ROT2 movd [OUT + 32], xmm2 paddd xmm3, SAVE3 - pshufd xmm7, xmm3, ROTR + pshufd xmm7, xmm3, ROTR movd [OUT + 48], xmm3 movd [OUT + 4], xmm7 - pshufd xmm7, xmm3, ROT2 + pshufd xmm7, xmm3, ROT2 movd [OUT + 24], xmm7 - pshufd xmm3, xmm3, ROTL + pshufd xmm3, xmm3, ROTL movd [OUT + 44], xmm3 movd [OUT + 8], xmm6 - pshufd xmm6, xmm2, ROTL + pshufd xmm6, xmm2, ROTL movd [OUT + 28], xmm6 - pshufd xmm2, xmm2, ROTR + pshufd xmm2, xmm2, ROTR movd [OUT + 52], xmm2 movd [OUT + 12], xmm5 - pshufd xmm5, xmm1, ROTR + pshufd xmm5, xmm1, ROTR movd [OUT + 36], xmm5 - pshufd xmm1, xmm1, ROT2 + pshufd xmm1, xmm1, ROT2 movd [OUT + 56], xmm1 movd [OUT + 20], xmm4 - pshufd xmm4, xmm0, ROT2 + pshufd xmm4, xmm0, ROT2 movd [OUT + 40], xmm4 - pshufd xmm0, xmm0, ROTL + pshufd xmm0, xmm0, ROTL movd [OUT + 60], xmm0 // Tidy things up.