base/asm-common.h: Fix bogus indentation.

[catacomb] / symm / chacha-arm-neon.S
diff --git a/symm/chacha-arm-neon.S b/symm/chacha-arm-neon.S

index 5fb0073..a900db7 100644 (file)
--- a/symm/chacha-arm-neon.S
+++ b/symm/chacha-arm-neon.S
@@ -25,17 +25,18 @@
  /// MA 02111-1307, USA.
  
  ///--------------------------------------------------------------------------
-/// External definitions.
+/// Preliminaries.
  
  #include "config.h"
  #include "asm-common.h"
  
-///--------------------------------------------------------------------------
-/// Main.code.
-
         .arch   armv7-a
         .fpu    neon
-       .section .text
+
+       .text
+
+///--------------------------------------------------------------------------
+/// Main.code.
  
  FUNC(chacha_core_arm_neon)
  
@@ -55,7 +56,7 @@ FUNC(chacha_core_arm_neon)
         // We need a copy for later.  Rather than waste time copying them by
         // hand, we'll use the three-address nature of the instruction set.
         // But this means that the main loop is offset by a bit.
-       vldmia  r1, {d24-d31}
+       vldmia  r1, {QQ(q12, q15)}
  
         // a += b; d ^= a; d <<<= 16
         vadd.u32 q8, q12, q13
@@ -85,9 +86,9 @@ FUNC(chacha_core_arm_neon)
  
         // c += d; b ^= c; b <<<=  7
         vadd.u32 q10, q10, q11
-       vext.32 q11, q11, q11, #3
+        vext.32 q11, q11, q11, #3
         veor    q9, q9, q10
-       vext.32 q10, q10, q10, #2
+        vext.32 q10, q10, q10, #2
         vshl.u32 q0, q9, #7
         vshr.u32 q9, q9, #25
         vorr    q9, q9, q0
@@ -132,9 +133,9 @@ FUNC(chacha_core_arm_neon)
  
         // c += d; b ^= c; b <<<=  7
         vadd.u32 q10, q10, q11
-       vext.32 q11, q11, q11, #1
+        vext.32 q11, q11, q11, #1
         veor    q9, q9, q10
-       vext.32 q10, q10, q10, #2
+        vext.32 q10, q10, q10, #2
         vshl.u32 q0, q9, #7
         vshr.u32 q9, q9, #25
         vorr    q9, q9, q0
@@ -173,7 +174,7 @@ FUNC(chacha_core_arm_neon)
         vadd.u32 q11, q11, q15
  
         // And now we write out the result.
-       vstmia  r2, {d16-d23}
+       vstmia  r2, {QQ(q8, q11)}
  
         // And with that, we're done.
         bx      r14