symm/*.S: Symbolic names for shuffles.

author Mark Wooding <mdw@distorted.org.uk>

Wed, 18 May 2016 09:29:03 +0000 (10:29 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Sat, 21 May 2016 16:17:26 +0000 (17:17 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Wed, 18 May 2016 09:29:03 +0000 (10:29 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Sat, 21 May 2016 16:17:26 +0000 (17:17 +0100)
diff --git a/symm/chacha-x86-sse2.S b/symm/chacha-x86-sse2.S

index 87fb096..ccdfa53 100644 (file)
--- a/symm/chacha-x86-sse2.S
+++ b/symm/chacha-x86-sse2.S
@@ -31,6 +31,14 @@
  #include "asm-common.h"
  
  ///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
+///--------------------------------------------------------------------------
  /// Main code.
  
         .arch pentium4
@@ -101,9 +109,9 @@ loop:
  
         // c += d; b ^= c; b <<<=  7
         paddd   xmm2, xmm3
-       pshufd  xmm3, xmm3, 0x93
+       pshufd  xmm3, xmm3, ROTL
         pxor    xmm1, xmm2
-       pshufd  xmm2, xmm2, 0x4e
+       pshufd  xmm2, xmm2, ROT2
         movdqa  xmm4, xmm1
         pslld   xmm1, 7
         psrld   xmm4, 25
@@ -121,7 +129,7 @@ loop:
         //
         // The shuffles have quite high latency, so they've mostly been
         // pushed upwards.  The remaining one can't be moved, though.
-       pshufd  xmm1, xmm1, 0x39
+       pshufd  xmm1, xmm1, ROTR
  
         // Apply the diagonal quarterround to each of the columns
         // simultaneously.
@@ -152,9 +160,9 @@ loop:
  
         // c += d; b ^= c; b <<<=  7
         paddd   xmm2, xmm3
-       pshufd  xmm3, xmm3, 0x39
+       pshufd  xmm3, xmm3, ROTR
         pxor    xmm1, xmm2
-       pshufd  xmm2, xmm2, 0x4e
+       pshufd  xmm2, xmm2, ROT2
         movdqa  xmm4, xmm1
         pslld   xmm1, 7
         psrld   xmm4, 25
@@ -163,7 +171,7 @@ loop:
         // Finally, finish off undoing the transpose, and we're done for this
         // doubleround.  Again, most of this was done above so we don't have
         // to wait for the shuffles.
-       pshufd  xmm1, xmm1, 0x93
+       pshufd  xmm1, xmm1, ROTL
  
         // Decrement the loop counter and see if we should go round again.
         sub     ecx, 2
diff --git a/symm/rijndael-x86-aesni.S b/symm/rijndael-x86-aesni.S

index d9aa9dc..eba7b05 100644 (file)
--- a/symm/rijndael-x86-aesni.S
+++ b/symm/rijndael-x86-aesni.S
@@ -37,6 +37,14 @@
         .globl  F(rijndael_rcon)
  
  ///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
+///--------------------------------------------------------------------------
  /// Main code.
  
         .arch   .aes
@@ -119,9 +127,9 @@ FUNC(rijndael_setup_x86_aesni)
         // open-coding the whole thing.  It's much easier to leave that as
         // zero and XOR in the round constant by hand.
  9:     movd    xmm0, eax
-       pshufd  xmm0, xmm0, 0x39
+       pshufd  xmm0, xmm0, ROTR
         aeskeygenassist xmm1, xmm0, 0
-       pshufd  xmm1, xmm1, 0x93
+       pshufd  xmm1, xmm1, ROTL
         movd    eax, xmm1
         xor     eax, [esi]
         xor     al, [ecx]
@@ -159,7 +167,7 @@ FUNC(rijndael_setup_x86_aesni)
         cmp     ebx, 7
         jb      0f
         movd    xmm0, eax
-       pshufd  xmm0, xmm0, 0x93
+       pshufd  xmm0, xmm0, ROTL
         aeskeygenassist xmm1, xmm0, 0
         movd    eax, xmm1
  0:     xor     eax, [esi]
diff --git a/symm/salsa20-x86-sse2.S b/symm/salsa20-x86-sse2.S

index 5a13fd4..7a5bd2a 100644 (file)
--- a/symm/salsa20-x86-sse2.S
+++ b/symm/salsa20-x86-sse2.S
@@ -31,6 +31,14 @@
  #include "asm-common.h"
  
  ///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
+///--------------------------------------------------------------------------
  /// Main code.
  
         .arch pentium4
@@ -115,7 +123,7 @@ loop:
         // d ^= (c + b) <<< 13
         movdqa  xmm4, xmm2
         paddd   xmm4, xmm1
-       pshufd  xmm1, xmm1, 0x93
+       pshufd  xmm1, xmm1, ROTL
         movdqa  xmm5, xmm4
         pslld   xmm4, 13
         psrld   xmm5, 19
@@ -124,9 +132,9 @@ loop:
  
         // a ^= (d + c) <<< 18
         movdqa  xmm4, xmm3
-       pshufd  xmm3, xmm3, 0x39
+       pshufd  xmm3, xmm3, ROTR
         paddd   xmm4, xmm2
-       pshufd  xmm2, xmm2, 0x4e
+       pshufd  xmm2, xmm2, ROT2
         movdqa  xmm5, xmm4
         pslld   xmm4, 18
         psrld   xmm5, 14
@@ -170,7 +178,7 @@ loop:
         // d ^= (c + b) <<< 13
         movdqa  xmm4, xmm2
         paddd   xmm4, xmm3
-       pshufd  xmm3, xmm3, 0x93
+       pshufd  xmm3, xmm3, ROTL
         movdqa  xmm5, xmm4
         pslld   xmm4, 13
         psrld   xmm5, 19
@@ -179,9 +187,9 @@ loop:
  
         // a ^= (d + c) <<< 18
         movdqa  xmm4, xmm1
-       pshufd  xmm1, xmm1, 0x39
+       pshufd  xmm1, xmm1, ROTR
         paddd   xmm4, xmm2
-       pshufd  xmm2, xmm2, 0x4e
+       pshufd  xmm2, xmm2, ROT2
         movdqa  xmm5, xmm4
         pslld   xmm4, 18
         psrld   xmm5, 14
@@ -203,43 +211,43 @@ loop:
         mov     edx, [ebp + 16]
  
         paddd   xmm0, [esp +  0]
-       pshufd  xmm4, xmm0, 0x39
+       pshufd  xmm4, xmm0, ROTR
         movd    [edx +  0], xmm0
  
         paddd   xmm1, [esp + 16]
-       pshufd  xmm5, xmm1, 0x93
+       pshufd  xmm5, xmm1, ROTL
         movd    [edx + 16], xmm1
  
         paddd   xmm2, xmm6
-       pshufd  xmm6, xmm2, 0x4e
+       pshufd  xmm6, xmm2, ROT2
         movd    [edx + 32], xmm2
  
         paddd   xmm3, xmm7
-       pshufd  xmm7, xmm3, 0x39
+       pshufd  xmm7, xmm3, ROTR
         movd    [edx + 48], xmm3
  
         movd    [edx +  4], xmm7
-       pshufd  xmm7, xmm3, 0x4e
+       pshufd  xmm7, xmm3, ROT2
         movd    [edx + 24], xmm7
-       pshufd  xmm3, xmm3, 0x93
+       pshufd  xmm3, xmm3, ROTL
         movd    [edx + 44], xmm3
  
         movd    [edx +  8], xmm6
-       pshufd  xmm6, xmm2, 0x93
+       pshufd  xmm6, xmm2, ROTL
         movd    [edx + 28], xmm6
-       pshufd  xmm2, xmm2, 0x39
+       pshufd  xmm2, xmm2, ROTR
         movd    [edx + 52], xmm2
  
         movd    [edx + 12], xmm5
-       pshufd  xmm5, xmm1, 0x39
+       pshufd  xmm5, xmm1, ROTR
         movd    [edx + 36], xmm5
-       pshufd  xmm1, xmm1, 0x4e
+       pshufd  xmm1, xmm1, ROT2
         movd    [edx + 56], xmm1
  
         movd    [edx + 20], xmm4
-       pshufd  xmm4, xmm0, 0x4e
+       pshufd  xmm4, xmm0, ROT2
         movd    [edx + 40], xmm4
-       pshufd  xmm0, xmm0, 0x93
+       pshufd  xmm0, xmm0, ROTL
         movd    [edx + 60], xmm0
  
         // Tidy things up.
author	Mark Wooding <mdw@distorted.org.uk>
	Wed, 18 May 2016 09:29:03 +0000 (10:29 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Sat, 21 May 2016 16:17:26 +0000 (17:17 +0100)
symm/chacha-x86-sse2.S		patch \| blob \| blame \| history
symm/rijndael-x86-aesni.S		patch \| blob \| blame \| history
symm/salsa20-x86-sse2.S		patch \| blob \| blame \| history