From: Mark Wooding Date: Sat, 7 Sep 2019 13:20:19 +0000 (+0100) Subject: base/asm-common.h: Add some macros for shifting entire NEON vectors. X-Git-Tag: 2.5.0~14^2~10 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/1a03119625f23b0f6fa3f6a2044343d325f6c7f6 base/asm-common.h: Add some macros for shifting entire NEON vectors. The `vext' (A32 NEON) or `ext' (A64) instructions can be (ab)used for shifting vectors left and right if you have a spare zero vector lying around. But using them is kind of confusing: left shifts, especially, need a reversed shift quantity, and the shift is measured in bytes rather than bits. Add a couple of macros to make this less strange. --- diff --git a/base/asm-common.h b/base/asm-common.h index 642820af..6ec238ff 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -739,6 +739,29 @@ name: #endif .endm +.macro vzero vz=q15 + // Set VZ (default q15) to zero. + vmov.u32 \vz, #0 +.endm + +.macro vshl128 vd, vn, nbit, vz=q15 + // Set VD to VN shifted left by NBIT. Assume VZ (default q15) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + vext.8 \vd, \vz, \vn, #16 - (\nbit >> 3) +.endm + +.macro vshr128 vd, vn, nbit, vz=q15 + // Set VD to VN shifted right by NBIT. Assume VZ (default q15) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + vext.8 \vd, \vn, \vz, #\nbit >> 3 +.endm + // Apply decoration decor to register name reg. #define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor) @@ -1073,6 +1096,29 @@ name: #endif .endm +.macro vzero vz=v31 + // Set VZ (default v31) to zero. + dup \vz\().4s, wzr +.endm + +.macro vshl128 vd, vn, nbit, vz=v31 + // Set VD to VN shifted left by NBIT. Assume VZ (default v31) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + ext \vd\().16b, \vz\().16b, \vn\().16b, #16 - (\nbit >> 3) +.endm + +.macro vshr128 vd, vn, nbit, vz=v31 + // Set VD to VN shifted right by NBIT. Assume VZ (default v31) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + ext \vd\().16b, \vn\().16b, \vz\().16b, #\nbit >> 3 +.endm + // Stack management and unwinding. .macro setfp fp=x29, offset=0 // If you're just going through the motions with a fixed-size stack frame,