X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/81ceb2c35de440e701d2f4e6960001395d2b7e97..981a9e5d5e3af6c06ad8b3f821928852068227e4:/math/mpx-mul4-arm-neon.S diff --git a/math/mpx-mul4-arm-neon.S b/math/mpx-mul4-arm-neon.S index efca7902..8aa01bc0 100644 --- a/math/mpx-mul4-arm-neon.S +++ b/math/mpx-mul4-arm-neon.S @@ -60,9 +60,9 @@ /// pieces are placed into 32-bit cells, and arranged as two 128-bit NEON /// operands, as follows. /// -/// Offset 0 4 8 12 -/// 0 v'_0 v''_0 v'_1 v''_1 -/// 16 v'_2 v''_2 v'_3 v''_3 +/// Offset 12 8 4 0 +/// 0 v''_1 v'_1 v''_0 v'_0 +/// 16 v''_3 v'_3 v''_2 v'_2 /// /// The `vmull' and `vmlal' instructions can multiply a vector of two 32-bit /// values by a 32-bit scalar, giving two 64-bit results; thus, it will act @@ -1012,12 +1012,12 @@ ENDFUNC ldr r14, [STKARG(0)] // -> vv vld1.32 {q2}, [r14] vmov.i32 q3, #0 - vzip.16 q2, q3 // (v'_0, v''_0; v'_1, v''_1) + vzip.16 q2, q3 // (v''_1, v'_1; v''_0, v'_0) ldr r14, [STKARG(1)] // -> yy vld1.32 {q4}, [r14] vmov.i32 q5, #0 - vzip.16 q4, q5 // (y'_0, y''_0; y'_1, y''_1) + vzip.16 q4, q5 // (y''_1, y'_1; y''_0, y'_0) ldr r5, [STKARG(2)] // = n ldr r6, [STKARG(3)] // -> cyv @@ -1029,7 +1029,7 @@ ENDFUNC vld1.32 {q4}, [r3] vmov.i32 q5, #0 - vzip.16 q4, q5 // (y'_0, y''_0; y'_1, y''_1) + vzip.16 q4, q5 // (y''_1, y'_1; y''_0, y'_0) ldr r5, [STKARG(0)] // = n ldr r6, [STKARG(1)] // -> cyv @@ -1044,12 +1044,12 @@ ENDFUNC ldr r14, [STKARG(1)] // -> vv vld1.32 {q2}, [r14] vmov.i32 q3, #0 - vzip.16 q2, q3 // (v'_0, v''_0; v'_1, v''_1) + vzip.16 q2, q3 // (v''_1, v'_1; v''_0, v'_0) ldr r14, [STKARG(2)] // -> yy vld1.32 {q4}, [r14] vmov.i32 q5, #0 - vzip.16 q4, q5 // (y'_0, y''_0; y'_1, y''_1) + vzip.16 q4, q5 // (y''_1, y'_1; y''_0, y'_0) ldr r5, [STKARG(3)] // = n ldr r6, [STKARG(4)] // -> cyv @@ -1065,7 +1065,7 @@ ENDFUNC ldr r14, [STKARG(0)] // -> vv vld1.32 {q2}, [r14] vmov.i32 q3, #0 - vzip.16 q2, q3 // (v'_0, v''_0; v'_1, v''_1) + vzip.16 q2, q3 // (v''_1, v'_1; v''_0, v'_0) ldr r5, [STKARG(1)] // = n ldr r6, [STKARG(2)] // -> cyv