X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/61bd904b61ef893246791746517ef7a38ed732db..898f32b33bea7f1e26c8ba906facdd5038edcd57:/base/asm-common.h diff --git a/base/asm-common.h b/base/asm-common.h index 0af9cb58..77dd6a72 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -27,11 +27,46 @@ ///-------------------------------------------------------------------------- /// General definitions. +// Preprocessor hacks. +#define STRINGY(x) _STRINGY(x, y) +#define _STRINGY(x) #x +#define GLUE(x, y) _GLUE(x, y) +#define _GLUE(x, y) x##y +#define _EMPTY + +// Some useful variables. + .L$_subsec = 0 + +// Literal pools done the hard way. +#define _LIT .text .L$_subsec + 1 +#define _ENDLIT .text .L$_subsec +#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec + +// ELF section types. +#if __ELF__ +# if CPUFAM_ARMEL +# define _SECTTY(ty) %ty +# else +# define _SECTTY(ty) @ty +# endif +#endif + +// Section selection. +#define TEXT .text .L$_subsec +#if ABI_WIN +# define RODATA .section .rdata, "dr" +#elif __ELF__ +# define RODATA .section .rodata, "a", _SECTTY(progbits) +#else +# define RODATA TEXT +#endif +#define DATA .data + // Announcing an external function. #define FUNC(name) \ .globl F(name); \ TYPE_FUNC(name); \ - .macro ENDFUNC; _ENDFUNC(name); .endm; \ + .macro ENDFUNC; _ENDFUNC(name); .endm; \ FUNC_PREHOOK(name); \ F(name): \ FUNC_POSTHOOK(name) @@ -40,7 +75,21 @@ F(name): \ #define _ENDFUNC(name) \ .purgem ENDFUNC; \ SIZE_OBJ(name); \ - ENDFUNC_HOOK(name) + ENDFUNC_HOOK(name); \ + _LTORG + +// Make a helper function, if necessary. +#define AUXFN(name) \ + .ifndef .L$_auxfn_def.name; \ + .text 7128; \ + .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \ + FUNC_PREHOOK(name); \ +name: +#define _ENDAUXFN_TAIL(name) \ + .purgem _ENDAUXFN; \ + .text .L$_subsec; \ + .L$_auxfn_def.name = 1 +#define ENDAUXFN _ENDAUXFN; .endif ///-------------------------------------------------------------------------- /// ELF-specific hacking. @@ -78,18 +127,26 @@ F(name): \ // Set the function hooks. #define FUNC_PREHOOK(_) .balign 16 +// On Windows, arrange to install stack-unwinding data. +#if CPUFAM_AMD64 && ABI_WIN +# define FUNC_POSTHOOK(name) .seh_proc name +# define ENDFUNC_HOOK(_) .seh_endproc +// Procedures are expected to invoke `.seh_setframe' if necessary, and +// `.seh_pushreg' and friends, and `.seh_endprologue'. +#endif + // Don't use the wretched AT&T syntax. It's festooned with pointless // punctuation, and all of the data movement is backwards. Ugh! .intel_syntax noprefix // Call external subroutine at ADDR, possibly via PLT. - .macro callext addr +.macro callext addr #if WANT_PIC call \addr@PLT #else call \addr #endif - .endm +.endm // Do I need to arrange a spare GOT register? #if WANT_PIC && CPUFAM_X86 @@ -98,25 +155,19 @@ F(name): \ #define GOTREG ebx // Not needed in AMD64 so don't care. // Maybe load GOT address into GOT. - .macro ldgot got=GOTREG -#if WANT_PIC && CPUFAM_X86 - call _where_am_i.\got - add \got, offset _GLOBAL_OFFSET_TABLE_ -#endif - .endm - -// Maybe build a helper subroutine for `ldgot GOT'. - .macro gotaux got=GOTREG +.macro ldgot got=GOTREG #if WANT_PIC && CPUFAM_X86 - .align 16 -_where_am_i.\got : + AUXFN(_ldgot.\got) mov \got, [esp] ret + ENDAUXFN + call _ldgot.\got + add \got, offset _GLOBAL_OFFSET_TABLE_ #endif - .endm +.endm // Load address of external symbol ADDR into REG, maybe using GOT. - .macro leaext reg, addr, got=GOTREG +.macro leaext reg, addr, got=GOTREG #if WANT_PIC # if CPUFAM_X86 mov \reg, [\got + \addr@GOT] @@ -132,7 +183,7 @@ _where_am_i.\got : lea \reg, \addr[rip] # endif #endif - .endm +.endm // Address expression (possibly using a base register, and a displacement) // referring to ADDR, which is within our module, maybe using GOT. @@ -146,25 +197,147 @@ _where_am_i.\got : # define INTADDR__1(addr, got) addr #endif +// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate, +// suitable for use in `pshufd' or `shufpd', which copies element D +// (0 <= D < 4) of the source to element 3 of the destination, element C to +// element 2, element B to element 1, and element A to element 0. +#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a)) + +#endif + +#if CPUFAM_X86 + +.macro _reg.0 + // Stash GP registers and establish temporary stack frame. + pushfd + push eax + push ecx + push edx + push ebp + mov ebp, esp + and esp, ~15 + sub esp, 512 + fxsave [esp] +.endm + +.macro _reg.1 +.endm + +.macro _reg.2 +.endm + +.macro _reg.3 fmt + // Print FMT and the other established arguments. + lea eax, .L$_reg$msg.\@ + push eax + call printf + jmp .L$_reg$cont.\@ +.L$_reg$msg.\@: + .ascii ";; \fmt\n\0" +.L$_reg$cont.\@: + mov eax, ebp + and eax, ~15 + sub eax, 512 + fxrstor [eax] + mov esp, ebp + pop ebp + pop edx + pop ecx + pop eax + popfd +.endm + +.macro msg msg + _reg.0 + _reg.1 + _reg.2 + _reg.3 "\msg" +.endm + +.macro reg r, msg + _reg.0 + .ifeqs "\r", "esp" + lea eax, [ebp + 20] + push eax + .else + .ifeqs "\r", "ebp" + push [ebp] + .else + push \r + .endif + .endif + _reg.1 + _reg.2 + _reg.3 "\msg: \r = %08x" +.endm + +.macro xmmreg r, msg + _reg.0 + _reg.1 + _reg.2 + movdqu xmm0, \r + pshufd xmm0, xmm0, 0x1b + sub esp, 16 + movdqa [esp], xmm0 + _reg.3 "\msg: \r = %08x %08x %08x %08x" +.endm + +.macro mmreg r, msg + _reg.0 + _reg.1 + _reg.2 + pshufw \r, \r, 0x4e + sub esp, 8 + movq [esp], \r + _reg.3 "\msg: \r = %08x %08x" +.endm + +.macro freg i, msg + _reg.0 + _reg.1 + _reg.2 + finit + fldt [esp + 32 + 16*\i] + sub esp, 12 + fstpt [esp] + _reg.3 "\msg: st(\i) = %.20Lg" +.endm + +.macro fxreg i, msg + _reg.0 + _reg.1 + _reg.2 + finit + fldt [esp + 32 + 16*\i] + sub esp, 12 + fstpt [esp] + _reg.3 "\msg: st(\i) = %La" +.endm + #endif ///-------------------------------------------------------------------------- /// ARM-specific hacking. -#if CPUFAM_ARM +#if CPUFAM_ARMEL + +// ARM/Thumb mode things. Use ARM by default. +#define ARM .arm; .L$_pcoff = 8 +#define THUMB .thumb; .L$_pcoff = 4 + ARM // Set the function hooks. #define FUNC_PREHOOK(_) .balign 4 #define ENDFUNC_HOOK(name) .ltorg // Call external subroutine at ADDR, possibly via PLT. - .macro callext addr, cond= +.macro callext addr, cond= #if WANT_PIC bl\cond \addr(PLT) #else bl\cond \addr #endif - .endm +.endm // Do I need to arrange a spare GOT register? #if WANT_PIC @@ -173,22 +346,54 @@ _where_am_i.\got : #define GOTREG r9 // Maybe load GOT address into GOT. - .macro ldgot got=r9 +.macro ldgot cond=, got=GOTREG #if WANT_PIC - ldr \got, =_GLOBAL_OFFSET_TABLE_ - . - 12 - add \got, pc, \got + ldr\cond \got, .L$_ldgot$\@ +.L$_ldgot_pc$\@: + add\cond \got, pc, \got + _LIT + .balign 4 +.L$_ldgot$\@: + .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff + _ENDLIT #endif - .endm +.endm // Load address of external symbol ADDR into REG, maybe using GOT. - .macro leaext reg, addr, cond=, got=GOTREG +.macro leaext reg, addr, cond=, got=GOTREG #if WANT_PIC - ldr \reg, =\addr(GOT) - ldr \reg, [\got, \reg] + ldr\cond \reg, .L$_leaext$\@ + ldr\cond \reg, [\got, \reg] + _LIT + .balign 4 +.L$_leaext$\@: + .word \addr(GOT) + _ENDLIT #else - ldr \reg, =\addr + ldr\cond \reg, =\addr #endif - .endm +.endm + +// Load address of external symbol ADDR into REG directly. +.macro leaextq reg, addr, cond= +#if WANT_PIC + ldr\cond \reg, .L$_leaextq$\@ +.L$_leaextq_pc$\@: + .if .L$_pcoff == 8 + ldr\cond \reg, [pc, \reg] + .else + add\cond \reg, pc + ldr\cond \reg, [\reg] + .endif + _LIT + .balign 4 +.L$_leaextq$\@: + .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff) + _ENDLIT +#else + ldr\cond \reg, =\addr +#endif +.endm #endif @@ -218,4 +423,9 @@ _where_am_i.\got : # define SIZE_OBJ(name) #endif +#if __ELF__ && defined(WANT_EXECUTABLE_STACK) + .pushsection .note.GNU-stack, "", _SECTTY(progbits) + .popsection +#endif + ///----- That's all, folks --------------------------------------------------