///--------------------------------------------------------------------------
/// General definitions.
+// Some useful variables.
+ .L$_subsec = 0
+
+// Literal pools done the hard way.
+#define _LIT .text .L$_subsec + 1
+#define _ENDLIT .text .L$_subsec
+#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
+
// Announcing an external function.
#define FUNC(name) \
.globl F(name); \
TYPE_FUNC(name); \
- .macro ENDFUNC; _ENDFUNC(name); .endm; \
+ .macro ENDFUNC; _ENDFUNC(name); .endm; \
FUNC_PREHOOK(name); \
F(name): \
FUNC_POSTHOOK(name)
#define _ENDFUNC(name) \
.purgem ENDFUNC; \
SIZE_OBJ(name); \
- ENDFUNC_HOOK(name)
+ ENDFUNC_HOOK(name); \
+ _LTORG
+
+// Make a helper function, if necessary.
+#define AUXFN(name) \
+ .ifndef .L$_auxfn_def.name; \
+ .text 7128; \
+ .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
+ FUNC_PREHOOK(name); \
+name:
+#define _ENDAUXFN_TAIL(name) \
+ .purgem _ENDAUXFN; \
+ .text .L$_subsec; \
+ .L$_auxfn_def.name = 1
+#define ENDAUXFN _ENDAUXFN; .endif
///--------------------------------------------------------------------------
/// ELF-specific hacking.
// Set the function hooks.
#define FUNC_PREHOOK(_) .balign 16
+// On Windows, arrange to install stack-unwinding data.
+#if CPUFAM_AMD64 && ABI_WIN
+# define FUNC_POSTHOOK(name) .seh_proc name
+# define ENDFUNC_HOOK(_) .seh_endproc
+// Procedures are expected to invoke `.seh_setframe' if necessary, and
+// `.seh_pushreg' and friends, and `.seh_endprologue'.
+#endif
+
// Don't use the wretched AT&T syntax. It's festooned with pointless
// punctuation, and all of the data movement is backwards. Ugh!
.intel_syntax noprefix
// Call external subroutine at ADDR, possibly via PLT.
- .macro callext addr
+.macro callext addr
#if WANT_PIC
call \addr@PLT
#else
call \addr
#endif
- .endm
+.endm
// Do I need to arrange a spare GOT register?
#if WANT_PIC && CPUFAM_X86
#define GOTREG ebx // Not needed in AMD64 so don't care.
// Maybe load GOT address into GOT.
- .macro ldgot got=GOTREG
-#if WANT_PIC && CPUFAM_X86
- call _where_am_i.\got
- add \got, offset _GLOBAL_OFFSET_TABLE_
-#endif
- .endm
-
-// Maybe build a helper subroutine for `ldgot GOT'.
- .macro gotaux got=GOTREG
+.macro ldgot got=GOTREG
#if WANT_PIC && CPUFAM_X86
- .align 16
-_where_am_i.\got :
+ AUXFN(_ldgot.\got)
mov \got, [esp]
ret
+ ENDAUXFN
+ call _ldgot.\got
+ add \got, offset _GLOBAL_OFFSET_TABLE_
#endif
- .endm
+.endm
// Load address of external symbol ADDR into REG, maybe using GOT.
- .macro leaext reg, addr, got=GOTREG
+.macro leaext reg, addr, got=GOTREG
#if WANT_PIC
# if CPUFAM_X86
mov \reg, [\got + \addr@GOT]
lea \reg, \addr[rip]
# endif
#endif
- .endm
+.endm
// Address expression (possibly using a base register, and a displacement)
// referring to ADDR, which is within our module, maybe using GOT.
# define INTADDR__1(addr, got) addr
#endif
+// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
+// suitable for use in `pshufd' or `shufpd', which copies element D
+// (0 <= D < 4) of the source to element 3 of the destination, element C to
+// element 2, element B to element 1, and element A to element 0.
+#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
+
+#endif
+
+#if CPUFAM_X86
+
+.macro _reg.0
+ // Stash GP registers and establish temporary stack frame.
+ pushfd
+ push eax
+ push ecx
+ push edx
+ push ebp
+ mov ebp, esp
+ and esp, ~15
+ sub esp, 512
+ fxsave [esp]
+.endm
+
+.macro _reg.1
+.endm
+
+.macro _reg.2
+.endm
+
+.macro _reg.3 fmt
+ // Print FMT and the other established arguments.
+ lea eax, .L$_reg$msg.\@
+ push eax
+ call printf
+ jmp .L$_reg$cont.\@
+.L$_reg$msg.\@:
+ .ascii ";; \fmt\n\0"
+.L$_reg$cont.\@:
+ mov eax, ebp
+ and eax, ~15
+ sub eax, 512
+ fxrstor [eax]
+ mov esp, ebp
+ pop ebp
+ pop edx
+ pop ecx
+ pop eax
+ popfd
+.endm
+
+.macro msg msg
+ _reg.0
+ _reg.1
+ _reg.2
+ _reg.3 "\msg"
+.endm
+
+.macro reg r, msg
+ _reg.0
+ .ifeqs "\r", "esp"
+ lea eax, [ebp + 20]
+ push eax
+ .else
+ .ifeqs "\r", "ebp"
+ push [ebp]
+ .else
+ push \r
+ .endif
+ .endif
+ _reg.1
+ _reg.2
+ _reg.3 "\msg: \r = %08x"
+.endm
+
+.macro xmmreg r, msg
+ _reg.0
+ _reg.1
+ _reg.2
+ movdqu xmm0, \r
+ pshufd xmm0, xmm0, 0x1b
+ sub esp, 16
+ movdqa [esp], xmm0
+ _reg.3 "\msg: \r = %08x %08x %08x %08x"
+.endm
+
+.macro mmreg r, msg
+ _reg.0
+ _reg.1
+ _reg.2
+ pshufw \r, \r, 0x4e
+ sub esp, 8
+ movq [esp], \r
+ _reg.3 "\msg: \r = %08x %08x"
+.endm
+
+.macro freg i, msg
+ _reg.0
+ _reg.1
+ _reg.2
+ finit
+ fldt [esp + 32 + 16*\i]
+ sub esp, 12
+ fstpt [esp]
+ _reg.3 "\msg: st(\i) = %.20Lg"
+.endm
+
+.macro fxreg i, msg
+ _reg.0
+ _reg.1
+ _reg.2
+ finit
+ fldt [esp + 32 + 16*\i]
+ sub esp, 12
+ fstpt [esp]
+ _reg.3 "\msg: st(\i) = %La"
+.endm
+
#endif
///--------------------------------------------------------------------------
/// ARM-specific hacking.
-#if CPUFAM_ARM
+#if CPUFAM_ARMEL
+
+// ARM/Thumb mode things. Use ARM by default.
+#define ARM .arm; .L$_pcoff = 8
+#define THUMB .thumb; .L$_pcoff = 4
+ ARM
// Set the function hooks.
#define FUNC_PREHOOK(_) .balign 4
#define ENDFUNC_HOOK(name) .ltorg
// Call external subroutine at ADDR, possibly via PLT.
- .macro callext addr, cond=
+.macro callext addr, cond=
#if WANT_PIC
bl\cond \addr(PLT)
#else
bl\cond \addr
#endif
- .endm
+.endm
// Do I need to arrange a spare GOT register?
#if WANT_PIC
#define GOTREG r9
// Maybe load GOT address into GOT.
- .macro ldgot cond=, got=GOTREG
+.macro ldgot cond=, got=GOTREG
#if WANT_PIC
- ldr\cond \got, =_GLOBAL_OFFSET_TABLE_ - . - 12
+ ldr\cond \got, .L$_ldgot$\@
+.L$_ldgot_pc$\@:
add\cond \got, pc, \got
+ _LIT
+ .balign 4
+.L$_ldgot$\@:
+ .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
+ _ENDLIT
#endif
- .endm
+.endm
// Load address of external symbol ADDR into REG, maybe using GOT.
- .macro leaext reg, addr, cond=, got=GOTREG
+.macro leaext reg, addr, cond=, got=GOTREG
#if WANT_PIC
- ldr\cond \reg, =\addr(GOT)
+ ldr\cond \reg, .L$_leaext$\@
ldr\cond \reg, [\got, \reg]
+ _LIT
+ .balign 4
+.L$_leaext$\@:
+ .word \addr(GOT)
+ _ENDLIT
+#else
+ ldr\cond \reg, =\addr
+#endif
+.endm
+
+// Load address of external symbol ADDR into REG directly.
+.macro leaextq reg, addr, cond=
+#if WANT_PIC
+ ldr\cond \reg, .L$_leaextq$\@
+.L$_leaextq_pc$\@:
+ .if .L$_pcoff == 8
+ ldr\cond \reg, [pc, \reg]
+ .else
+ add\cond \reg, pc
+ ldr\cond \reg, [\reg]
+ .endif
+ _LIT
+ .balign 4
+.L$_leaextq$\@:
+ .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
+ _ENDLIT
#else
ldr\cond \reg, =\addr
#endif
- .endm
+.endm
#endif
# define SIZE_OBJ(name)
#endif
+#if __ELF__ && defined(WANT_EXECUTABLE_STACK)
+ .pushsection .note.GNU-stack, "", _SECTTY(progbits)
+ .popsection
+#endif
+
///----- That's all, folks --------------------------------------------------