+// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
+// suitable for use in `pshufd' or `shufpd', which copies element D
+// (0 <= D < 4) of the source to element 3 of the destination, element C to
+// element 2, element B to element 1, and element A to element 0.
+#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
+
+#endif
+
+///--------------------------------------------------------------------------
+/// ARM-specific hacking.
+
+#if CPUFAM_ARMEL
+
+// ARM/Thumb mode things. Use ARM by default.
+#define ARM .arm; .L$_pcoff = 8
+#define THUMB .thumb; .L$_pcoff = 4
+ ARM
+
+// Set the function hooks.
+#define FUNC_PREHOOK(_) .balign 4
+#define ENDFUNC_HOOK(name) .ltorg
+
+// Call external subroutine at ADDR, possibly via PLT.
+.macro callext addr, cond=
+#if WANT_PIC
+ bl\cond \addr(PLT)
+#else
+ bl\cond \addr
+#endif
+.endm
+
+// Do I need to arrange a spare GOT register?
+#if WANT_PIC
+# define NEED_GOT 1
+#endif
+#define GOTREG r9
+
+// Maybe load GOT address into GOT.
+.macro ldgot cond=, got=GOTREG
+#if WANT_PIC
+ ldr\cond \got, .L$_ldgot$\@
+.L$_ldgot_pc$\@:
+ add\cond \got, pc, \got
+ _LIT
+ .balign 4
+.L$_ldgot$\@:
+ .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
+ _ENDLIT
+#endif
+.endm
+
+// Load address of external symbol ADDR into REG, maybe using GOT.
+.macro leaext reg, addr, cond=, got=GOTREG
+#if WANT_PIC
+ ldr\cond \reg, .L$_leaext$\@
+ ldr\cond \reg, [\got, \reg]
+ _LIT
+ .balign 4
+.L$_leaext$\@:
+ .word \addr(GOT)
+ _ENDLIT
+#else
+ ldr\cond \reg, =\addr
+#endif
+.endm
+
+// Load address of external symbol ADDR into REG directly.
+.macro leaextq reg, addr, cond=
+#if WANT_PIC
+ ldr\cond \reg, .L$_leaextq$\@
+.L$_leaextq_pc$\@:
+ .if .L$_pcoff == 8
+ ldr\cond \reg, [pc, \reg]
+ .else
+ add\cond \reg, pc
+ ldr\cond \reg, [\reg]
+ .endif
+ _LIT
+ .balign 4
+.L$_leaextq$\@:
+ .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
+ _ENDLIT
+#else
+ ldr\cond \reg, =\addr
+#endif
+.endm
+