configure.ac: Don't be so picky about identifying ARM variants.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
898f32b3
MW
30// Preprocessor hacks.
31#define STRINGY(x) _STRINGY(x, y)
32#define _STRINGY(x) #x
33#define GLUE(x, y) _GLUE(x, y)
34#define _GLUE(x, y) x##y
35#define _EMPTY
36
f8e509a9
MW
37// Some useful variables.
38 .L$_subsec = 0
39
40// Literal pools done the hard way.
41#define _LIT .text .L$_subsec + 1
42#define _ENDLIT .text .L$_subsec
6c54cbd3 43#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 44
645fcce0
MW
45// ELF section types.
46#if __ELF__
47# if CPUFAM_ARMEL
48# define _SECTTY(ty) %ty
49# else
50# define _SECTTY(ty) @ty
51# endif
52#endif
53
54// Section selection.
55#define TEXT .text .L$_subsec
56#if ABI_WIN
57# define RODATA .section .rdata, "dr"
58#elif __ELF__
59# define RODATA .section .rodata, "a", _SECTTY(progbits)
60#else
61# define RODATA TEXT
62#endif
63#define DATA .data
64
1a517bb3
MW
65// Announcing an internal function.
66#define INTFUNC(name) \
1a0c09c4 67 TYPE_FUNC(name); \
8a1aa284 68 .macro ENDFUNC; _ENDFUNC(name); .endm; \
0923a413 69 .L$_prologue_p = 0; .L$_frameptr_p = 0; \
1a0c09c4 70 FUNC_PREHOOK(name); \
1a517bb3 71name: \
1a0c09c4
MW
72 FUNC_POSTHOOK(name)
73
1a517bb3
MW
74// Announcing an external function.
75#define FUNC(name) \
76 .globl F(name); \
77INTFUNC(F(name))
78
1a0c09c4
MW
79// Marking the end of a function.
80#define _ENDFUNC(name) \
0923a413
MW
81 .if ~ .L$_prologue_p; .error "Missing `endprologue'"; .endif; \
82 .if .L$_frameptr_p; .purgem dropfp; .endif; \
1a0c09c4
MW
83 .purgem ENDFUNC; \
84 SIZE_OBJ(name); \
f8e509a9 85 ENDFUNC_HOOK(name); \
6c54cbd3 86 _LTORG
1a0c09c4 87
8ae4c946
MW
88// Make a helper function, if necessary.
89#define AUXFN(name) \
90 .ifndef .L$_auxfn_def.name; \
91 .text 7128; \
92 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
93 FUNC_PREHOOK(name); \
94name:
95#define _ENDAUXFN_TAIL(name) \
96 .purgem _ENDAUXFN; \
97 .text .L$_subsec; \
98 .L$_auxfn_def.name = 1
99#define ENDAUXFN _ENDAUXFN; .endif
100
1a0c09c4
MW
101///--------------------------------------------------------------------------
102/// ELF-specific hacking.
103
104#if __ELF__
105
106#if __PIC__ || __PIE__
107# define WANT_PIC 1
108#endif
109
110#define TYPE_FUNC(name) .type name, STT_FUNC
111
112#define SIZE_OBJ(name) .size name, . - name
113
114#endif
115
116///--------------------------------------------------------------------------
0f23f75f
MW
117/// Windows-specific hacking.
118
119#if ABI_WIN
1a0c09c4
MW
120
121#if CPUFAM_X86
0f23f75f
MW
122# define F(name) _##name
123#endif
124
125#endif
126
127///--------------------------------------------------------------------------
128/// x86- and amd64-specific hacking.
129///
130/// It's (slightly) easier to deal with both of these in one go.
131
132#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4 133
2cb17e02
MW
134// Word size.
135#if CPUFAM_X86
136# define WORDSZ 4
137#endif
138#if CPUFAM_AMD64
139# define WORDSZ 8
140#endif
141
1a0c09c4
MW
142// Set the function hooks.
143#define FUNC_PREHOOK(_) .balign 16
144
f71dd54d
MW
145// On Windows, arrange to install stack-unwinding data.
146#if CPUFAM_AMD64 && ABI_WIN
147# define FUNC_POSTHOOK(name) .seh_proc name
148# define ENDFUNC_HOOK(_) .seh_endproc
149// Procedures are expected to invoke `.seh_setframe' if necessary, and
150// `.seh_pushreg' and friends, and `.seh_endprologue'.
151#endif
152
0923a413
MW
153#if __ELF__
154# define FUNC_POSTHOOK(_) .cfi_startproc
155# define ENDFUNC_HOOK(_) .cfi_endproc
156#endif
157
1a0c09c4
MW
158// Don't use the wretched AT&T syntax. It's festooned with pointless
159// punctuation, and all of the data movement is backwards. Ugh!
160 .intel_syntax noprefix
161
162// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 163.macro callext addr
1a0c09c4
MW
164#if WANT_PIC
165 call \addr@PLT
166#else
167 call \addr
168#endif
8a1aa284 169.endm
1a0c09c4
MW
170
171// Do I need to arrange a spare GOT register?
172#if WANT_PIC && CPUFAM_X86
173# define NEED_GOT 1
174#endif
175#define GOTREG ebx // Not needed in AMD64 so don't care.
176
177// Maybe load GOT address into GOT.
8a1aa284 178.macro ldgot got=GOTREG
0f23f75f 179#if WANT_PIC && CPUFAM_X86
8ae4c946 180 AUXFN(_ldgot.\got)
1a0c09c4
MW
181 mov \got, [esp]
182 ret
8ae4c946
MW
183 ENDAUXFN
184 call _ldgot.\got
185 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 186#endif
8a1aa284 187.endm
1a0c09c4
MW
188
189// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 190.macro leaext reg, addr, got=GOTREG
1a0c09c4 191#if WANT_PIC
0f23f75f 192# if CPUFAM_X86
1a0c09c4 193 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
194# endif
195# if CPUFAM_AMD64
196 mov \reg, \addr@GOTPCREL[rip]
197# endif
1a0c09c4 198#else
0f23f75f 199# if CPUFAM_X86
1a0c09c4 200 mov \reg, offset \addr
0f23f75f
MW
201# endif
202# if CPUFAM_AMD64
203 lea \reg, \addr[rip]
204# endif
1a0c09c4 205#endif
8a1aa284 206.endm
1a0c09c4
MW
207
208// Address expression (possibly using a base register, and a displacement)
209// referring to ADDR, which is within our module, maybe using GOT.
210#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
211#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
212#if CPUFAM_AMD64
213# define INTADDR__1(addr, got) addr + rip
214#elif WANT_PIC
1a0c09c4
MW
215# define INTADDR__1(addr, got) got + addr@GOTOFF
216#else
217# define INTADDR__1(addr, got) addr
218#endif
219
a13b5730
MW
220// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
221// suitable for use in `pshufd' or `shufpd', which copies element D
222// (0 <= D < 4) of the source to element 3 of the destination, element C to
223// element 2, element B to element 1, and element A to element 0.
224#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
225
43ea7558
MW
226// Map register names to their individual pieces.
227
228// Apply decoration decor to (internal) register name reg of type ty.
229//
230// See `R_...' for internal register names. Decorations are as follows.
231//
232// b low byte (e.g., `al', `r8b')
233// h high byte (e.g., `ah')
234// w word (e.g., `ax', `r8w')
235// d doubleword (e.g., `eax', `r8d')
236// q quadword (e.g., `rax', `r8')
237// r whole register (doubleword on x86, quadword on amd64)
238//
239// And types are as follows.
240//
241// abcd the four traditional registers `a', `b', `c', `d'
242// xp the four pointer registers `si', `di', `bp', `sp'
243// ip the instruction pointer `ip'
244// rn the AMD64 numbered registers `r8'--`r15'
245#define _DECOR(ty, decor, reg) _DECOR_##ty##_##decor(reg)
246
247// Internal macros: _DECOR_ty_decor(reg) applies decoration decor to
248// (internal) register name reg of type ty.
249
250#define _DECOR_abcd_b(reg) reg##l
251#define _DECOR_abcd_h(reg) reg##h
252#define _DECOR_abcd_w(reg) reg##x
253#define _DECOR_abcd_d(reg) e##reg##x
254#if CPUFAM_AMD64
255# define _DECOR_abcd_q(reg) r##reg##x
256#endif
257
258#define _DECOR_xp_b(reg) reg##l
259#define _DECOR_xp_w(reg) reg
260#define _DECOR_xp_d(reg) e##reg
261#if CPUFAM_AMD64
262# define _DECOR_xp_q(reg) r##reg
263#endif
264
265#define _DECOR_ip_w(reg) reg
266#define _DECOR_ip_d(reg) e##reg
267#if CPUFAM_AMD64
268# define _DECOR_ip_q(reg) r##reg
269#endif
270
271#if CPUFAM_AMD64
272# define _DECOR_rn_b(reg) reg##b
273# define _DECOR_rn_w(reg) reg##w
274# define _DECOR_rn_d(reg) reg##d
275# define _DECOR_rn_q(reg) reg
276# define _DECOR_rn_r(reg) reg
277#endif
278
279#if CPUFAM_X86
280# define _DECOR_abcd_r(reg) e##reg##x
281# define _DECOR_xp_r(reg) e##reg
282# define _DECOR_ip_r(reg) e##reg
283#endif
284#if CPUFAM_AMD64
285# define _DECOR_abcd_r(reg) r##reg##x
286# define _DECOR_xp_r(reg) r##reg
287# define _DECOR_ip_r(reg) r##reg
288#endif
289
290#define _DECOR_mem_b(addr) byte ptr addr
291#define _DECOR_mem_w(addr) word ptr addr
292#define _DECOR_mem_d(addr) dword ptr addr
293#if CPUFAM_AMD64
294# define _DECOR_mem_q(addr) qword ptr addr
295#endif
296
297// R_r(decor) applies decoration decor to register r, which is an internal
298// register name. The internal register names are: `ip', `a', `b', `c', `d',
299// `si', `di', `bp', `sp', `r8'--`r15'.
300#define R_ip(decor) _DECOR(ip, decor, ip)
301#define R_a(decor) _DECOR(abcd, decor, a)
302#define R_b(decor) _DECOR(abcd, decor, b)
303#define R_c(decor) _DECOR(abcd, decor, c)
304#define R_d(decor) _DECOR(abcd, decor, d)
305#define R_si(decor) _DECOR(xp, decor, si)
306#define R_di(decor) _DECOR(xp, decor, di)
307#define R_bp(decor) _DECOR(xp, decor, bp)
308#define R_sp(decor) _DECOR(xp, decor, sp)
309#if CPUFAM_AMD64
310# define R_r8(decor) _DECOR(rn, decor, r8)
311# define R_r9(decor) _DECOR(rn, decor, r9)
312# define R_r10(decor) _DECOR(rn, decor, r10)
313# define R_r11(decor) _DECOR(rn, decor, r11)
314# define R_r12(decor) _DECOR(rn, decor, r12)
315# define R_r13(decor) _DECOR(rn, decor, r13)
316# define R_r14(decor) _DECOR(rn, decor, r14)
317# define R_r15(decor) _DECOR(rn, decor, r15)
318#endif
319
320// Refer to an in-memory datum of the type implied by decor residing at
321// address addr (which should supply its own square-brackets).
322#define MEM(decor, addr) _DECOR(mem, decor, addr)
323
324// Applies decoration decor to assembler-level register name reg.
325#define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
326
327// Internal macros: _REGFORM_r(decor) applies decoration decor to an
328// assembler-level register name, in place of any decoration that register
329// name has already.
330
331#define _REGFORM_ip(decor) R_ip(decor)
332#define _REGFORM_eip(decor) R_ip(decor)
333
334#define _REGFORM_a(decor) R_a(decor)
335#define _REGFORM_al(decor) R_a(decor)
336#define _REGFORM_ah(decor) R_a(decor)
337#define _REGFORM_ax(decor) R_a(decor)
338#define _REGFORM_eax(decor) R_a(decor)
339
340#define _REGFORM_b(decor) R_b(decor)
341#define _REGFORM_bl(decor) R_b(decor)
342#define _REGFORM_bh(decor) R_b(decor)
343#define _REGFORM_bx(decor) R_b(decor)
344#define _REGFORM_ebx(decor) R_b(decor)
345
346#define _REGFORM_c(decor) R_c(decor)
347#define _REGFORM_cl(decor) R_c(decor)
348#define _REGFORM_ch(decor) R_c(decor)
349#define _REGFORM_cx(decor) R_c(decor)
350#define _REGFORM_ecx(decor) R_c(decor)
351
352#define _REGFORM_d(decor) R_d(decor)
353#define _REGFORM_dl(decor) R_d(decor)
354#define _REGFORM_dh(decor) R_d(decor)
355#define _REGFORM_dx(decor) R_d(decor)
356#define _REGFORM_edx(decor) R_d(decor)
357
358#define _REGFORM_si(decor) R_si(decor)
359#define _REGFORM_sil(decor) R_si(decor)
360#define _REGFORM_esi(decor) R_si(decor)
361
362#define _REGFORM_di(decor) R_di(decor)
363#define _REGFORM_dil(decor) R_di(decor)
364#define _REGFORM_edi(decor) R_di(decor)
365
366#define _REGFORM_bp(decor) R_bp(decor)
367#define _REGFORM_bpl(decor) R_bp(decor)
368#define _REGFORM_ebp(decor) R_bp(decor)
369
370#define _REGFORM_sp(decor) R_sp(decor)
371#define _REGFORM_spl(decor) R_sp(decor)
372#define _REGFORM_esp(decor) R_sp(decor)
373
374#if CPUFAM_AMD64
375
376# define _REGFORM_rip(decor) R_ip(decor)
377# define _REGFORM_rsp(decor) R_sp(decor)
378# define _REGFORM_rbp(decor) R_bp(decor)
379# define _REGFORM_rdi(decor) R_di(decor)
380# define _REGFORM_rsi(decor) R_si(decor)
381# define _REGFORM_rdx(decor) R_d(decor)
382# define _REGFORM_rcx(decor) R_c(decor)
383# define _REGFORM_rbx(decor) R_b(decor)
384# define _REGFORM_rax(decor) R_a(decor)
385
386# define _REGFORM_r8(decor) R_r8(decor)
387# define _REGFORM_r8b(decor) R_r8(decor)
388# define _REGFORM_r8w(decor) R_r8(decor)
389# define _REGFORM_r8d(decor) R_r8(decor)
390
391# define _REGFORM_r9(decor) R_r9(decor)
392# define _REGFORM_r9b(decor) R_r9(decor)
393# define _REGFORM_r9w(decor) R_r9(decor)
394# define _REGFORM_r9d(decor) R_r9(decor)
395
396# define _REGFORM_r10(decor) R_r10(decor)
397# define _REGFORM_r10b(decor) R_r10(decor)
398# define _REGFORM_r10w(decor) R_r10(decor)
399# define _REGFORM_r10d(decor) R_r10(decor)
400
401# define _REGFORM_r11(decor) R_r11(decor)
402# define _REGFORM_r11b(decor) R_r11(decor)
403# define _REGFORM_r11w(decor) R_r11(decor)
404# define _REGFORM_r11d(decor) R_r11(decor)
405
406# define _REGFORM_r12(decor) R_r12(decor)
407# define _REGFORM_r12b(decor) R_r12(decor)
408# define _REGFORM_r12w(decor) R_r12(decor)
409# define _REGFORM_r12d(decor) R_r12(decor)
410
411# define _REGFORM_r13(decor) R_r13(decor)
412# define _REGFORM_r13b(decor) R_r13(decor)
413# define _REGFORM_r13w(decor) R_r13(decor)
414# define _REGFORM_r13d(decor) R_r13(decor)
415
416# define _REGFORM_r14(decor) R_r14(decor)
417# define _REGFORM_r14b(decor) R_r14(decor)
418# define _REGFORM_r14w(decor) R_r14(decor)
419# define _REGFORM_r14d(decor) R_r14(decor)
420
421# define _REGFORM_r15(decor) R_r15(decor)
422# define _REGFORM_r15b(decor) R_r15(decor)
423# define _REGFORM_r15w(decor) R_r15(decor)
424# define _REGFORM_r15d(decor) R_r15(decor)
425
426#endif
427
428// Macros for converting register names.
429#define BYTE(reg) _REGFORM(reg, b)
430#define HIBYTE(reg) _REGFORM(reg, h)
431#define WORD(reg) _REGFORM(reg, w)
432#define DWORD(reg) _REGFORM(reg, d)
433#if CPUFAM_AMD64
434# define QWORD(reg) _REGFORM(reg, q)
435#endif
436#define WHOLE(reg) _REGFORM(reg, r)
437
0923a413
MW
438// Stack management and unwinding.
439.macro setfp fp, offset = 0
440 .if \offset == 0
441 mov \fp, R_sp(r)
442#if __ELF__
443 .cfi_def_cfa_register \fp
444#endif
445#if ABI_WIN && CPUFAM_AMD64
446 .seh_setframe \fp, 0
447#endif
448 .else
449 lea \fp, [R_sp(r) + \offset]
450#if __ELF__
451 .cfi_def_cfa_register \fp
452 .cfi_adjust_cfa_offset -\offset
453#endif
454#if ABI_WIN && CPUFAM_AMD64
455 .seh_setframe \fp, \offset
456#endif
457 .endif
458 .L$_frameptr_p = -1
459 .macro dropfp; _dropfp \fp, \offset; .endm
460.endm
461
462.macro _dropfp fp, offset = 0
463 .if \offset == 0
464 mov R_sp(r), \fp
465#if __ELF__
466 .cfi_def_cfa_register R_sp(r)
467#endif
468 .else
469 lea R_sp(r), [\fp - \offset]
470#if __ELF__
471 .cfi_def_cfa_register R_sp(r)
472 .cfi_adjust_cfa_offset +\offset
473#endif
474 .endif
475 .L$_frameptr_p = 0
476 .purgem dropfp
477.endm
478
479.macro stalloc n
480 sub R_sp(r), \n
481#if __ELF__
482 .cfi_adjust_cfa_offset +\n
483#endif
484#if ABI_WIN && CPUFAM_AMD64
485 .seh_stackalloc \n
486#endif
487.endm
488
489.macro stfree n
490 add R_sp(r), \n
491#if __ELF__
492 .cfi_adjust_cfa_offset -\n
493#endif
494.endm
495
496.macro pushreg r
497 push \r
498#if __ELF__
499 .cfi_adjust_cfa_offset +WORDSZ
500 .cfi_rel_offset \r, 0
501#endif
502#if ABI_WIN && CPUFAM_AMD64
503 .seh_pushreg \r
504#endif
505.endm
506
507.macro popreg r
508 pop \r
509#if __ELF__
510 .cfi_adjust_cfa_offset -WORDSZ
511 .cfi_restore \r
512#endif
513.endm
514
515.macro savexmm r, offset
516 movdqa [R_sp(r) + \offset], \r
517#if ABI_WIN && CPUFAM_AMD64
518 .seh_savexmm \r, \offset
519#endif
520.endm
521
522.macro rstrxmm r, offset
523 movdqa \r, [R_sp(r) + \offset]
524.endm
525
526.macro endprologue
527#if ABI_WIN && CPUFAM_AMD64
528 .seh_endprologue
529#endif
530 .L$_prologue_p = -1
531.endm
532
1a0c09c4
MW
533#endif
534
17de5b2e
MW
535#if CPUFAM_X86
536
537.macro _reg.0
538 // Stash GP registers and establish temporary stack frame.
539 pushfd
540 push eax
541 push ecx
542 push edx
543 push ebp
544 mov ebp, esp
545 and esp, ~15
546 sub esp, 512
547 fxsave [esp]
548.endm
549
550.macro _reg.1
551.endm
552
553.macro _reg.2
554.endm
555
556.macro _reg.3 fmt
557 // Print FMT and the other established arguments.
558 lea eax, .L$_reg$msg.\@
559 push eax
560 call printf
561 jmp .L$_reg$cont.\@
562.L$_reg$msg.\@:
563 .ascii ";; \fmt\n\0"
564.L$_reg$cont.\@:
565 mov eax, ebp
566 and eax, ~15
567 sub eax, 512
568 fxrstor [eax]
569 mov esp, ebp
570 pop ebp
571 pop edx
572 pop ecx
573 pop eax
574 popfd
575.endm
576
577.macro msg msg
578 _reg.0
579 _reg.1
580 _reg.2
581 _reg.3 "\msg"
582.endm
583
584.macro reg r, msg
585 _reg.0
586 .ifeqs "\r", "esp"
587 lea eax, [ebp + 20]
588 push eax
589 .else
590 .ifeqs "\r", "ebp"
591 push [ebp]
592 .else
593 push \r
594 .endif
595 .endif
596 _reg.1
597 _reg.2
598 _reg.3 "\msg: \r = %08x"
599.endm
600
601.macro xmmreg r, msg
602 _reg.0
603 _reg.1
604 _reg.2
605 movdqu xmm0, \r
606 pshufd xmm0, xmm0, 0x1b
607 sub esp, 16
608 movdqa [esp], xmm0
609 _reg.3 "\msg: \r = %08x %08x %08x %08x"
610.endm
611
612.macro mmreg r, msg
613 _reg.0
614 _reg.1
615 _reg.2
616 pshufw \r, \r, 0x4e
617 sub esp, 8
618 movq [esp], \r
619 _reg.3 "\msg: \r = %08x %08x"
620.endm
621
622.macro freg i, msg
623 _reg.0
624 _reg.1
625 _reg.2
626 finit
627 fldt [esp + 32 + 16*\i]
628 sub esp, 12
629 fstpt [esp]
630 _reg.3 "\msg: st(\i) = %.20Lg"
631.endm
632
633.macro fxreg i, msg
634 _reg.0
635 _reg.1
636 _reg.2
637 finit
638 fldt [esp + 32 + 16*\i]
639 sub esp, 12
640 fstpt [esp]
641 _reg.3 "\msg: st(\i) = %La"
642.endm
643
644#endif
645
1a0c09c4 646///--------------------------------------------------------------------------
61bd904b
MW
647/// ARM-specific hacking.
648
59d86860 649#if CPUFAM_ARMEL
61bd904b 650
9f6eb05d
MW
651// ARM/Thumb mode things. Use ARM by default.
652#define ARM .arm; .L$_pcoff = 8
653#define THUMB .thumb; .L$_pcoff = 4
654 ARM
655
61bd904b 656// Set the function hooks.
0923a413
MW
657#define FUNC_PREHOOK(_) .balign 4; .fnstart
658#define ENDFUNC_HOOK(_) .fnend; .ltorg
61bd904b
MW
659
660// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 661.macro callext addr, cond=
61bd904b
MW
662#if WANT_PIC
663 bl\cond \addr(PLT)
664#else
665 bl\cond \addr
666#endif
8a1aa284 667.endm
61bd904b
MW
668
669// Do I need to arrange a spare GOT register?
670#if WANT_PIC
671# define NEED_GOT 1
672#endif
673#define GOTREG r9
674
675// Maybe load GOT address into GOT.
8a1aa284 676.macro ldgot cond=, got=GOTREG
61bd904b 677#if WANT_PIC
adca2a18
MW
678 ldr\cond \got, .L$_ldgot$\@
679.L$_ldgot_pc$\@:
2d03a881 680 add\cond \got, pc, \got
8a1aa284 681 _LIT
adca2a18
MW
682 .balign 4
683.L$_ldgot$\@:
9f6eb05d 684 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 685 _ENDLIT
61bd904b 686#endif
8a1aa284 687.endm
61bd904b
MW
688
689// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 690.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 691#if WANT_PIC
adca2a18 692 ldr\cond \reg, .L$_leaext$\@
2d03a881 693 ldr\cond \reg, [\got, \reg]
8a1aa284 694 _LIT
adca2a18
MW
695 .balign 4
696.L$_leaext$\@:
697 .word \addr(GOT)
8a1aa284 698 _ENDLIT
61bd904b 699#else
2d03a881 700 ldr\cond \reg, =\addr
61bd904b 701#endif
8a1aa284 702.endm
61bd904b 703
0c53ac58 704// Load address of external symbol ADDR into REG directly.
8a1aa284 705.macro leaextq reg, addr, cond=
0c53ac58
MW
706#if WANT_PIC
707 ldr\cond \reg, .L$_leaextq$\@
708.L$_leaextq_pc$\@:
9f6eb05d 709 .if .L$_pcoff == 8
0c53ac58 710 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
711 .else
712 add\cond \reg, pc
713 ldr\cond \reg, [\reg]
714 .endif
8a1aa284 715 _LIT
0c53ac58
MW
716 .balign 4
717.L$_leaextq$\@:
9f6eb05d 718 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 719 _ENDLIT
0c53ac58
MW
720#else
721 ldr\cond \reg, =\addr
722#endif
8a1aa284 723.endm
0c53ac58 724
43ea7558
MW
725// Apply decoration decor to register name reg.
726#define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
727
728// Internal macros: `_REGFORM_r(decor)' applies decoration decor to register
729// name r.
730
731#define _REGFORM_s0(decor) _DECOR(s, decor, 0)
732#define _REGFORM_s1(decor) _DECOR(s, decor, 1)
733#define _REGFORM_s2(decor) _DECOR(s, decor, 2)
734#define _REGFORM_s3(decor) _DECOR(s, decor, 3)
735#define _REGFORM_s4(decor) _DECOR(s, decor, 4)
736#define _REGFORM_s5(decor) _DECOR(s, decor, 5)
737#define _REGFORM_s6(decor) _DECOR(s, decor, 6)
738#define _REGFORM_s7(decor) _DECOR(s, decor, 7)
739#define _REGFORM_s8(decor) _DECOR(s, decor, 8)
740#define _REGFORM_s9(decor) _DECOR(s, decor, 9)
741#define _REGFORM_s10(decor) _DECOR(s, decor, 10)
742#define _REGFORM_s11(decor) _DECOR(s, decor, 11)
743#define _REGFORM_s12(decor) _DECOR(s, decor, 12)
744#define _REGFORM_s13(decor) _DECOR(s, decor, 13)
745#define _REGFORM_s14(decor) _DECOR(s, decor, 14)
746#define _REGFORM_s15(decor) _DECOR(s, decor, 15)
747#define _REGFORM_s16(decor) _DECOR(s, decor, 16)
748#define _REGFORM_s17(decor) _DECOR(s, decor, 17)
749#define _REGFORM_s18(decor) _DECOR(s, decor, 18)
750#define _REGFORM_s19(decor) _DECOR(s, decor, 19)
751#define _REGFORM_s20(decor) _DECOR(s, decor, 20)
752#define _REGFORM_s21(decor) _DECOR(s, decor, 21)
753#define _REGFORM_s22(decor) _DECOR(s, decor, 22)
754#define _REGFORM_s23(decor) _DECOR(s, decor, 23)
755#define _REGFORM_s24(decor) _DECOR(s, decor, 24)
756#define _REGFORM_s25(decor) _DECOR(s, decor, 25)
757#define _REGFORM_s26(decor) _DECOR(s, decor, 26)
758#define _REGFORM_s27(decor) _DECOR(s, decor, 27)
759#define _REGFORM_s28(decor) _DECOR(s, decor, 28)
760#define _REGFORM_s29(decor) _DECOR(s, decor, 29)
761#define _REGFORM_s30(decor) _DECOR(s, decor, 30)
762#define _REGFORM_s31(decor) _DECOR(s, decor, 31)
763
764#define _REGFORM_d0(decor) _DECOR(d, decor, 0)
765#define _REGFORM_d1(decor) _DECOR(d, decor, 1)
766#define _REGFORM_d2(decor) _DECOR(d, decor, 2)
767#define _REGFORM_d3(decor) _DECOR(d, decor, 3)
768#define _REGFORM_d4(decor) _DECOR(d, decor, 4)
769#define _REGFORM_d5(decor) _DECOR(d, decor, 5)
770#define _REGFORM_d6(decor) _DECOR(d, decor, 6)
771#define _REGFORM_d7(decor) _DECOR(d, decor, 7)
772#define _REGFORM_d8(decor) _DECOR(d, decor, 8)
773#define _REGFORM_d9(decor) _DECOR(d, decor, 9)
774#define _REGFORM_d10(decor) _DECOR(d, decor, 10)
775#define _REGFORM_d11(decor) _DECOR(d, decor, 11)
776#define _REGFORM_d12(decor) _DECOR(d, decor, 12)
777#define _REGFORM_d13(decor) _DECOR(d, decor, 13)
778#define _REGFORM_d14(decor) _DECOR(d, decor, 14)
779#define _REGFORM_d15(decor) _DECOR(d, decor, 15)
780#define _REGFORM_d16(decor) _DECOR(d, decor, 16)
781#define _REGFORM_d17(decor) _DECOR(d, decor, 17)
782#define _REGFORM_d18(decor) _DECOR(d, decor, 18)
783#define _REGFORM_d19(decor) _DECOR(d, decor, 19)
784#define _REGFORM_d20(decor) _DECOR(d, decor, 20)
785#define _REGFORM_d21(decor) _DECOR(d, decor, 21)
786#define _REGFORM_d22(decor) _DECOR(d, decor, 22)
787#define _REGFORM_d23(decor) _DECOR(d, decor, 23)
788#define _REGFORM_d24(decor) _DECOR(d, decor, 24)
789#define _REGFORM_d25(decor) _DECOR(d, decor, 25)
790#define _REGFORM_d26(decor) _DECOR(d, decor, 26)
791#define _REGFORM_d27(decor) _DECOR(d, decor, 27)
792#define _REGFORM_d28(decor) _DECOR(d, decor, 28)
793#define _REGFORM_d29(decor) _DECOR(d, decor, 29)
794#define _REGFORM_d30(decor) _DECOR(d, decor, 30)
795#define _REGFORM_d31(decor) _DECOR(d, decor, 31)
796
797#define _REGFORM_q0(decor) _DECOR(q, decor, 0)
798#define _REGFORM_q1(decor) _DECOR(q, decor, 1)
799#define _REGFORM_q2(decor) _DECOR(q, decor, 2)
800#define _REGFORM_q3(decor) _DECOR(q, decor, 3)
801#define _REGFORM_q4(decor) _DECOR(q, decor, 4)
802#define _REGFORM_q5(decor) _DECOR(q, decor, 5)
803#define _REGFORM_q6(decor) _DECOR(q, decor, 6)
804#define _REGFORM_q7(decor) _DECOR(q, decor, 7)
805#define _REGFORM_q8(decor) _DECOR(q, decor, 8)
806#define _REGFORM_q9(decor) _DECOR(q, decor, 9)
807#define _REGFORM_q10(decor) _DECOR(q, decor, 10)
808#define _REGFORM_q11(decor) _DECOR(q, decor, 11)
809#define _REGFORM_q12(decor) _DECOR(q, decor, 12)
810#define _REGFORM_q13(decor) _DECOR(q, decor, 13)
811#define _REGFORM_q14(decor) _DECOR(q, decor, 14)
812#define _REGFORM_q15(decor) _DECOR(q, decor, 15)
813
814// `_LOPART(n)' and `_HIPART(n)' return the numbers of the register halves of
815// register n, i.e., 2*n and 2*n + 1 respectively.
816#define _LOPART(n) _GLUE(_LOPART_, n)
817#define _HIPART(n) _GLUE(_HIPART_, n)
818
819// Internal macros: `_LOPART_n' and `_HIPART_n' return the numbers of the
820// register halves of register n, i.e., 2*n and 2*n + 1 respectively.
821
822#define _LOPART_0 0
823#define _HIPART_0 1
824#define _LOPART_1 2
825#define _HIPART_1 3
826#define _LOPART_2 4
827#define _HIPART_2 5
828#define _LOPART_3 6
829#define _HIPART_3 7
830#define _LOPART_4 8
831#define _HIPART_4 9
832#define _LOPART_5 10
833#define _HIPART_5 11
834#define _LOPART_6 12
835#define _HIPART_6 13
836#define _LOPART_7 14
837#define _HIPART_7 15
838#define _LOPART_8 16
839#define _HIPART_8 17
840#define _LOPART_9 18
841#define _HIPART_9 19
842#define _LOPART_10 20
843#define _HIPART_10 21
844#define _LOPART_11 22
845#define _HIPART_11 23
846#define _LOPART_12 24
847#define _HIPART_12 25
848#define _LOPART_13 26
849#define _HIPART_13 27
850#define _LOPART_14 28
851#define _HIPART_14 29
852#define _LOPART_15 30
853#define _HIPART_15 31
854
855// Return the register number of the pair containing register n, i.e.,
856// floor(n/2).
857#define _PAIR(n) _GLUE(_PAIR_, n)
858
859// Internal macros: `_PAIR_n' returns the register number of the pair
860// containing register n, i.e., floor(n/2).
861#define _PAIR_0 0
862#define _PAIR_1 0
863#define _PAIR_2 1
864#define _PAIR_3 1
865#define _PAIR_4 2
866#define _PAIR_5 2
867#define _PAIR_6 3
868#define _PAIR_7 3
869#define _PAIR_8 4
870#define _PAIR_9 4
871#define _PAIR_10 5
872#define _PAIR_11 5
873#define _PAIR_12 6
874#define _PAIR_13 6
875#define _PAIR_14 7
876#define _PAIR_15 7
877#define _PAIR_16 8
878#define _PAIR_17 8
879#define _PAIR_18 9
880#define _PAIR_19 9
881#define _PAIR_20 10
882#define _PAIR_21 10
883#define _PAIR_22 11
884#define _PAIR_23 11
885#define _PAIR_24 12
886#define _PAIR_25 12
887#define _PAIR_26 13
888#define _PAIR_27 13
889#define _PAIR_28 14
890#define _PAIR_29 14
891#define _PAIR_30 15
892#define _PAIR_31 15
893
894// Apply decoration decor to register number n of type ty. Decorations are
895// as follows.
896//
897// decor types meaning
898// Q s, d the NEON qN register containing this one
899// D s the NEON dN register containing this one
900// D0 q the low 64-bit half of this one
901// D1 q the high 64-bit half of this one
902// S0 d, q the first 32-bit piece of this one
903// S1 d, q the second 32-bit piece of this one
904// S2 q the third 32-bit piece of this one
905// S3 q the fourth 32-bit piece of this one
906// Bn q the nth byte of this register, as a scalar
907// Hn q the nth halfword of this register, as a scalar
908// Wn q the nth word of this register, as a scalar
909#define _DECOR(ty, decor, n) _DECOR_##ty##_##decor(n)
910
911// Internal macros: `_DECOR_ty_decor(n)' applies decoration decor to register
912// number n of type ty.
913
914#define _DECOR_s_Q(n) GLUE(q, _PAIR(_PAIR(n)))
915#define _DECOR_s_D(n) GLUE(d, _PAIR(n))
916
917#define _DECOR_d_Q(n) GLUE(q, _PAIR(n))
918#define _DECOR_d_S0(n) GLUE(s, _LOPART(n))
919#define _DECOR_d_S1(n) GLUE(s, _LOPART(n))
920
921#define _DECOR_q_D0(n) GLUE(d, _LOPART(n))
922#define _DECOR_q_D1(n) GLUE(d, _HIPART(n))
923#define _DECOR_q_S0(n) GLUE(s, _LOPART(_LOPART(n)))
924#define _DECOR_q_S1(n) GLUE(s, _HIPART(_LOPART(n)))
925#define _DECOR_q_S2(n) GLUE(s, _LOPART(_HIPART(n)))
926#define _DECOR_q_S3(n) GLUE(s, _HIPART(_HIPART(n)))
927#define _DECOR_q_W0(n) GLUE(d, _LOPART(n))[0]
928#define _DECOR_q_W1(n) GLUE(d, _LOPART(n))[1]
929#define _DECOR_q_W2(n) GLUE(d, _HIPART(n))[0]
930#define _DECOR_q_W3(n) GLUE(d, _HIPART(n))[1]
931#define _DECOR_q_H0(n) GLUE(d, _LOPART(n))[0]
932#define _DECOR_q_H1(n) GLUE(d, _LOPART(n))[1]
933#define _DECOR_q_H2(n) GLUE(d, _LOPART(n))[2]
934#define _DECOR_q_H3(n) GLUE(d, _LOPART(n))[3]
935#define _DECOR_q_H4(n) GLUE(d, _HIPART(n))[0]
936#define _DECOR_q_H5(n) GLUE(d, _HIPART(n))[1]
937#define _DECOR_q_H6(n) GLUE(d, _HIPART(n))[2]
938#define _DECOR_q_H7(n) GLUE(d, _HIPART(n))[3]
939#define _DECOR_q_B0(n) GLUE(d, _LOPART(n))[0]
940#define _DECOR_q_B1(n) GLUE(d, _LOPART(n))[1]
941#define _DECOR_q_B2(n) GLUE(d, _LOPART(n))[2]
942#define _DECOR_q_B3(n) GLUE(d, _LOPART(n))[3]
943#define _DECOR_q_B4(n) GLUE(d, _LOPART(n))[4]
944#define _DECOR_q_B5(n) GLUE(d, _LOPART(n))[5]
945#define _DECOR_q_B6(n) GLUE(d, _LOPART(n))[6]
946#define _DECOR_q_B7(n) GLUE(d, _LOPART(n))[7]
947#define _DECOR_q_B8(n) GLUE(d, _HIPART(n))[0]
948#define _DECOR_q_B9(n) GLUE(d, _HIPART(n))[1]
949#define _DECOR_q_B10(n) GLUE(d, _HIPART(n))[2]
950#define _DECOR_q_B11(n) GLUE(d, _HIPART(n))[3]
951#define _DECOR_q_B12(n) GLUE(d, _HIPART(n))[4]
952#define _DECOR_q_B13(n) GLUE(d, _HIPART(n))[5]
953#define _DECOR_q_B14(n) GLUE(d, _HIPART(n))[6]
954#define _DECOR_q_B15(n) GLUE(d, _HIPART(n))[7]
955
956// Macros for navigating the NEON register hierarchy.
957#define S0(reg) _REGFORM(reg, S0)
958#define S1(reg) _REGFORM(reg, S1)
959#define S2(reg) _REGFORM(reg, S2)
960#define S3(reg) _REGFORM(reg, S3)
961#define D(reg) _REGFORM(reg, D)
962#define D0(reg) _REGFORM(reg, D0)
963#define D1(reg) _REGFORM(reg, D1)
964#define Q(reg) _REGFORM(reg, Q)
965
966// Macros for indexing quadword registers.
967#define QB(reg, i) _REGFORM(reg, B##i)
968#define QH(reg, i) _REGFORM(reg, H##i)
969#define QW(reg, i) _REGFORM(reg, W##i)
970
971// Macros for converting vldm/vstm ranges.
972#define QQ(qlo, qhi) D0(qlo)-D1(qhi)
973
0923a413
MW
974// Stack management and unwinding.
975.macro setfp fp, offset = 0
976 .if \offset == 0
977 mov \fp, sp
978 .setfp \fp, sp
979 .else
980 add \fp, sp, #\offset
981 .setfp \fp, sp, #\offset
982 .endif
983 .macro dropfp; _dropfp \fp, \offset; .endm
984 .L$_frameptr_p = -1
985.endm
986
987.macro _dropfp fp, offset = 0
988 .if \offset == 0
989 mov sp, \fp
990 .else
991 sub sp, \fp, #\offset
992 .endif
993 .purgem dropfp
994 .L$_frameptr_p = 0
995.endm
996
997.macro stalloc n
998 sub sp, sp, #\n
999 .pad #\n
1000.endm
1001
1002.macro stfree n
1003 add sp, sp, #\n
1004 .pad #-\n
1005.endm
1006
1007.macro pushreg rr:vararg
1008 stmfd sp!, {\rr}
1009 .save {\rr}
1010.endm
1011
1012.macro popreg rr:vararg
1013 ldmfd sp!, {\rr}
1014.endm
1015
1016.macro pushvfp rr:vararg
1017 vstmdb sp!, {\rr}
1018 .vsave {\rr}
1019.endm
1020
1021.macro popvfp rr:vararg
1022 vldmia sp!, {\rr}
1023.endm
1024
1025.macro endprologue
1026.endm
1027
1028// No need for prologue markers on ARM.
1029#define FUNC_POSTHOOK(_) .L$_prologue_p = -1
1030
61bd904b
MW
1031#endif
1032
1033///--------------------------------------------------------------------------
1a0c09c4
MW
1034/// Final stuff.
1035
1036// Default values for the various hooks.
1037#ifndef FUNC_PREHOOK
1e5664a6 1038# define FUNC_PREHOOK(_)
1a0c09c4
MW
1039#endif
1040#ifndef FUNC_POSTHOOK
1e5664a6 1041# define FUNC_POSTHOOK(_)
1a0c09c4
MW
1042#endif
1043#ifndef ENDFUNC_HOOK
1e5664a6 1044# define ENDFUNC_HOOK(_)
1a0c09c4
MW
1045#endif
1046
1047#ifndef F
1048# define F(name) name
1049#endif
1050
1051#ifndef TYPE_FUNC
1052# define TYPE_FUNC(name)
1053#endif
1054
1055#ifndef SIZE_OBJ
1056# define SIZE_OBJ(name)
1057#endif
1058
91c8af7d 1059#if __ELF__ && !defined(WANT_EXECUTABLE_STACK)
1aa5bfa8
MW
1060 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
1061 .popsection
1062#endif
1063
1a0c09c4 1064///----- That's all, folks --------------------------------------------------