X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/898f32b33bea7f1e26c8ba906facdd5038edcd57..43ea7558955ca453d4687c62d12efcb585124e8a:/symm/salsa20-arm-neon.S?ds=sidebyside diff --git a/symm/salsa20-arm-neon.S b/symm/salsa20-arm-neon.S index 9cb40472..9d553d3a 100644 --- a/symm/salsa20-arm-neon.S +++ b/symm/salsa20-arm-neon.S @@ -79,7 +79,7 @@ FUNC(salsa20_core_arm_neon) // We need a copy for later. Rather than waste time copying them by // hand, we'll use the three-address nature of the instruction set. // But this means that the main loop is offset by a bit. - vldmia r1, {d24-d31} + vldmia r1, {QQ(q12, q15)} // Apply a column quarterround to each of the columns simultaneously, // moving the results to their working registers. Alas, there @@ -234,7 +234,7 @@ FUNC(salsa20_core_arm_neon) vext.32 q3, q9, q9, #1 // 12, 13, 14, 15 // And with that, we're done. - vstmia r2, {d0-d7} + vstmia r2, {QQ(q0, q3)} bx r14 ENDFUNC