X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/0923a413958b0e778a3f059c76355ab58e5be414..1f1fd8845afef39000b08a0f79ff14fae7690d41:/base/asm-common.h diff --git a/base/asm-common.h b/base/asm-common.h index 22bb44d6..642820af 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -1,6 +1,6 @@ /// -*- mode: asm; asm-comment-char: ?/ -*- /// -/// Fancy SIMD implementation of Salsa20 +/// Common definitions for asesembler source files /// /// (c) 2015 Straylight/Edgeware /// @@ -24,6 +24,9 @@ /// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, /// MA 02111-1307, USA. +#ifndef CATACOMB_ASM_COMMON_H +#define CATACOMB_ASM_COMMON_H + ///-------------------------------------------------------------------------- /// General definitions. @@ -217,11 +220,11 @@ name: # define INTADDR__1(addr, got) addr #endif -// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate, -// suitable for use in `pshufd' or `shufpd', which copies element D -// (0 <= D < 4) of the source to element 3 of the destination, element C to -// element 2, element B to element 1, and element A to element 0. -#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a)) +// Permutations for SIMD instructions. SHUF(A, B, C, D) is an immediate, +// suitable for use in `pshufd' or `shufpd', which copies element A +// (0 <= A < 4) of the source to element 0 of the destination, element B to +// element 1, element C to element 2, and element D to element 3. +#define SHUF(a, b, c, d) ((a) + 4*(b) + 16*(c) + 64*(d)) // Map register names to their individual pieces. @@ -255,10 +258,10 @@ name: # define _DECOR_abcd_q(reg) r##reg##x #endif -#define _DECOR_xp_b(reg) reg##l #define _DECOR_xp_w(reg) reg #define _DECOR_xp_d(reg) e##reg #if CPUFAM_AMD64 +# define _DECOR_xp_b(reg) reg##l # define _DECOR_xp_q(reg) r##reg #endif @@ -276,22 +279,33 @@ name: # define _DECOR_rn_r(reg) reg #endif +#define _DECOR_mem_b(addr) byte ptr addr +#define _DECOR_mem_w(addr) word ptr addr +#define _DECOR_mem_d(addr) dword ptr addr +#if CPUFAM_AMD64 +# define _DECOR_mem_q(addr) qword ptr addr +#endif + +#define _DECOR_imm_b(imm) byte imm +#define _DECOR_imm_w(imm) word imm +#define _DECOR_imm_d(imm) dword imm +#if CPUFAM_AMD64 +# define _DECOR_imm_q(imm) qword imm +#endif + #if CPUFAM_X86 # define _DECOR_abcd_r(reg) e##reg##x # define _DECOR_xp_r(reg) e##reg # define _DECOR_ip_r(reg) e##reg +# define _DECOR_mem_r(addr) dword ptr addr +# define _DECOR_imm_r(imm) dword imm #endif #if CPUFAM_AMD64 # define _DECOR_abcd_r(reg) r##reg##x # define _DECOR_xp_r(reg) r##reg # define _DECOR_ip_r(reg) r##reg -#endif - -#define _DECOR_mem_b(addr) byte ptr addr -#define _DECOR_mem_w(addr) word ptr addr -#define _DECOR_mem_d(addr) dword ptr addr -#if CPUFAM_AMD64 -# define _DECOR_mem_q(addr) qword ptr addr +# define _DECOR_mem_r(addr) qword ptr addr +# define _DECOR_imm_r(imm) qword imm #endif // R_r(decor) applies decoration decor to register r, which is an internal @@ -321,6 +335,9 @@ name: // address addr (which should supply its own square-brackets). #define MEM(decor, addr) _DECOR(mem, decor, addr) +// Refer to an immediate datum of the type implied by decor. +#define IMM(decor, imm) _DECOR(mem, decor, imm) + // Applies decoration decor to assembler-level register name reg. #define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor) @@ -436,7 +453,7 @@ name: #define WHOLE(reg) _REGFORM(reg, r) // Stack management and unwinding. -.macro setfp fp, offset = 0 +.macro setfp fp=R_bp(r), offset=0 .if \offset == 0 mov \fp, R_sp(r) #if __ELF__ @@ -459,7 +476,7 @@ name: .macro dropfp; _dropfp \fp, \offset; .endm .endm -.macro _dropfp fp, offset = 0 +.macro _dropfp fp, offset=0 .if \offset == 0 mov R_sp(r), \fp #if __ELF__ @@ -706,12 +723,12 @@ name: #if WANT_PIC ldr\cond \reg, .L$_leaextq$\@ .L$_leaextq_pc$\@: - .if .L$_pcoff == 8 + .if .L$_pcoff == 8 ldr\cond \reg, [pc, \reg] - .else + .else add\cond \reg, pc ldr\cond \reg, [\reg] - .endif + .endif _LIT .balign 4 .L$_leaextq$\@: @@ -972,7 +989,7 @@ name: #define QQ(qlo, qhi) D0(qlo)-D1(qhi) // Stack management and unwinding. -.macro setfp fp, offset = 0 +.macro setfp fp=r11, offset=0 .if \offset == 0 mov \fp, sp .setfp \fp, sp @@ -984,7 +1001,7 @@ name: .L$_frameptr_p = -1 .endm -.macro _dropfp fp, offset = 0 +.macro _dropfp fp, offset=0 .if \offset == 0 mov sp, \fp .else @@ -1005,12 +1022,12 @@ name: .endm .macro pushreg rr:vararg - stmfd sp!, {\rr} + push {\rr} .save {\rr} .endm .macro popreg rr:vararg - ldmfd sp!, {\rr} + pop {\rr} .endm .macro pushvfp rr:vararg @@ -1031,6 +1048,125 @@ name: #endif ///-------------------------------------------------------------------------- +/// AArch64-specific hacking. + +#if CPUFAM_ARM64 + +// Set the function hooks. +#define FUNC_PREHOOK(_) .balign 4 +#define FUNC_POSTHOOK(_) .cfi_startproc; .L$_prologue_p = -1 +#define ENDFUNC_HOOK(_) .cfi_endproc + +// Call external subroutine at ADDR, possibly via PLT. +.macro callext addr + bl \addr +.endm + +// Load address of external symbol ADDR into REG. +.macro leaext reg, addr +#if WANT_PIC + adrp \reg, :got:\addr + ldr \reg, [\reg, #:got_lo12:\addr] +#else + adrp \reg, \addr + add \reg, \reg, #:lo12:\addr +#endif +.endm + +// Stack management and unwinding. +.macro setfp fp=x29, offset=0 + // If you're just going through the motions with a fixed-size stack frame, + // then you want to say `add x29, sp, #OFFSET' directly, which will avoid + // pointlessly restoring sp later. + .if \offset == 0 + mov \fp, sp + .cfi_def_cfa_register \fp + .else + add \fp, sp, #\offset + .cfi_def_cfa_register \fp + .cfi_adjust_cfa_offset -\offset + .endif + .macro dropfp; _dropfp \fp, \offset; .endm + .L$_frameptr_p = -1 +.endm + +.macro _dropfp fp, offset=0 + .if \offset == 0 + mov sp, \fp + .cfi_def_cfa_register sp + .else + sub sp, \fp, #\offset + .cfi_def_cfa_register sp + .cfi_adjust_cfa_offset +\offset + .endif + .purgem dropfp + .L$_frameptr_p = 0 +.endm + +.macro stalloc n + sub sp, sp, #\n + .cfi_adjust_cfa_offset +\n +.endm + +.macro stfree n + add sp, sp, #\n + .cfi_adjust_cfa_offset -\n +.endm + +.macro pushreg x, y=nil + .ifeqs "\y", "nil" + str \x, [sp, #-16]! + .cfi_adjust_cfa_offset +16 + .cfi_rel_offset \x, 0 + .else + stp \x, \y, [sp, #-16]! + .cfi_adjust_cfa_offset +16 + .cfi_rel_offset \x, 0 + .cfi_rel_offset \y, 8 + .endif +.endm + +.macro popreg x, y=nil + .ifeqs "\y", "nil" + ldr \x, [sp], #16 + .cfi_restore \x + .cfi_adjust_cfa_offset -16 + .else + ldp \x, \y, [sp], #16 + .cfi_restore \x + .cfi_restore \y + .cfi_adjust_cfa_offset -16 + .endif +.endm + +.macro savereg x, y, z=nil + .ifeqs "\z", "nil" + str \x, [sp, \y] + .cfi_rel_offset \x, \y + .else + stp \x, \y, [sp, #\z] + .cfi_rel_offset \x, \z + .cfi_rel_offset \y, \z + 8 + .endif +.endm + +.macro rstrreg x, y, z=nil + .ifeqs "\z", "nil" + ldr \x, [sp, \y] + .cfi_restore \x + .else + ldp \x, \y, [sp, #\z] + .cfi_restore \x + .cfi_restore \y + .endif +.endm + +.macro endprologue +.endm + +#endif + +///-------------------------------------------------------------------------- /// Final stuff. // Default values for the various hooks. @@ -1056,9 +1192,11 @@ name: # define SIZE_OBJ(name) #endif -#if __ELF__ && defined(WANT_EXECUTABLE_STACK) +#if __ELF__ && !defined(WANT_EXECUTABLE_STACK) .pushsection .note.GNU-stack, "", _SECTTY(progbits) .popsection #endif ///----- That's all, folks -------------------------------------------------- + +#endif