X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/70bc6059902c30dcbd1cddbdb628e4bcbd9cc6f5..6a024d24d97cb5d42c0091571735475b849f59f4:/symm/chacha-arm-neon.S diff --git a/symm/chacha-arm-neon.S b/symm/chacha-arm-neon.S index 4c72791e..af53cfd3 100644 --- a/symm/chacha-arm-neon.S +++ b/symm/chacha-arm-neon.S @@ -55,7 +55,7 @@ FUNC(chacha_core_arm_neon) // We need a copy for later. Rather than waste time copying them by // hand, we'll use the three-address nature of the instruction set. // But this means that the main loop is offset by a bit. - vldmia r1, {d24-d31} + vldmia r1, {QQ(q12, q15)} // a += b; d ^= a; d <<<= 16 vadd.u32 q8, q12, q13 @@ -173,7 +173,7 @@ FUNC(chacha_core_arm_neon) vadd.u32 q11, q11, q15 // And now we write out the result. - vstmia r2, {d16-d23} + vstmia r2, {QQ(q8, q11)} // And with that, we're done. bx r14