X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/1f1fd8845afef39000b08a0f79ff14fae7690d41..0a80c8cefb56e80ccb95277b250dab0c10e99d9d:/base/asm-common.h diff --git a/base/asm-common.h b/base/asm-common.h index 642820af..e8f6445c 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -549,117 +549,6 @@ name: #endif -#if CPUFAM_X86 - -.macro _reg.0 - // Stash GP registers and establish temporary stack frame. - pushfd - push eax - push ecx - push edx - push ebp - mov ebp, esp - and esp, ~15 - sub esp, 512 - fxsave [esp] -.endm - -.macro _reg.1 -.endm - -.macro _reg.2 -.endm - -.macro _reg.3 fmt - // Print FMT and the other established arguments. - lea eax, .L$_reg$msg.\@ - push eax - call printf - jmp .L$_reg$cont.\@ -.L$_reg$msg.\@: - .ascii ";; \fmt\n\0" -.L$_reg$cont.\@: - mov eax, ebp - and eax, ~15 - sub eax, 512 - fxrstor [eax] - mov esp, ebp - pop ebp - pop edx - pop ecx - pop eax - popfd -.endm - -.macro msg msg - _reg.0 - _reg.1 - _reg.2 - _reg.3 "\msg" -.endm - -.macro reg r, msg - _reg.0 - .ifeqs "\r", "esp" - lea eax, [ebp + 20] - push eax - .else - .ifeqs "\r", "ebp" - push [ebp] - .else - push \r - .endif - .endif - _reg.1 - _reg.2 - _reg.3 "\msg: \r = %08x" -.endm - -.macro xmmreg r, msg - _reg.0 - _reg.1 - _reg.2 - movdqu xmm0, \r - pshufd xmm0, xmm0, 0x1b - sub esp, 16 - movdqa [esp], xmm0 - _reg.3 "\msg: \r = %08x %08x %08x %08x" -.endm - -.macro mmreg r, msg - _reg.0 - _reg.1 - _reg.2 - pshufw \r, \r, 0x4e - sub esp, 8 - movq [esp], \r - _reg.3 "\msg: \r = %08x %08x" -.endm - -.macro freg i, msg - _reg.0 - _reg.1 - _reg.2 - finit - fldt [esp + 32 + 16*\i] - sub esp, 12 - fstpt [esp] - _reg.3 "\msg: st(\i) = %.20Lg" -.endm - -.macro fxreg i, msg - _reg.0 - _reg.1 - _reg.2 - finit - fldt [esp + 32 + 16*\i] - sub esp, 12 - fstpt [esp] - _reg.3 "\msg: st(\i) = %La" -.endm - -#endif - ///-------------------------------------------------------------------------- /// ARM-specific hacking. @@ -739,6 +628,29 @@ name: #endif .endm +.macro vzero vz=q15 + // Set VZ (default q15) to zero. + vmov.u32 \vz, #0 +.endm + +.macro vshl128 vd, vn, nbit, vz=q15 + // Set VD to VN shifted left by NBIT. Assume VZ (default q15) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + vext.8 \vd, \vz, \vn, #16 - (\nbit >> 3) +.endm + +.macro vshr128 vd, vn, nbit, vz=q15 + // Set VD to VN shifted right by NBIT. Assume VZ (default q15) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + vext.8 \vd, \vn, \vz, #\nbit >> 3 +.endm + // Apply decoration decor to register name reg. #define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor) @@ -1073,6 +985,29 @@ name: #endif .endm +.macro vzero vz=v31 + // Set VZ (default v31) to zero. + dup \vz\().4s, wzr +.endm + +.macro vshl128 vd, vn, nbit, vz=v31 + // Set VD to VN shifted left by NBIT. Assume VZ (default v31) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + ext \vd\().16b, \vz\().16b, \vn\().16b, #16 - (\nbit >> 3) +.endm + +.macro vshr128 vd, vn, nbit, vz=v31 + // Set VD to VN shifted right by NBIT. Assume VZ (default v31) is + // all-bits-zero. NBIT must be a multiple of 8. + .if \nbit&3 != 0 + .error "shift quantity must be whole number of bytes" + .endif + ext \vd\().16b, \vn\().16b, \vz\().16b, #\nbit >> 3 +.endm + // Stack management and unwinding. .macro setfp fp=x29, offset=0 // If you're just going through the motions with a fixed-size stack frame,