base/asm-common.h, *.S: Add `INTFUNC' macro for internal subroutines.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
898f32b3
MW
30// Preprocessor hacks.
31#define STRINGY(x) _STRINGY(x, y)
32#define _STRINGY(x) #x
33#define GLUE(x, y) _GLUE(x, y)
34#define _GLUE(x, y) x##y
35#define _EMPTY
36
f8e509a9
MW
37// Some useful variables.
38 .L$_subsec = 0
39
40// Literal pools done the hard way.
41#define _LIT .text .L$_subsec + 1
42#define _ENDLIT .text .L$_subsec
6c54cbd3 43#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 44
645fcce0
MW
45// ELF section types.
46#if __ELF__
47# if CPUFAM_ARMEL
48# define _SECTTY(ty) %ty
49# else
50# define _SECTTY(ty) @ty
51# endif
52#endif
53
54// Section selection.
55#define TEXT .text .L$_subsec
56#if ABI_WIN
57# define RODATA .section .rdata, "dr"
58#elif __ELF__
59# define RODATA .section .rodata, "a", _SECTTY(progbits)
60#else
61# define RODATA TEXT
62#endif
63#define DATA .data
64
1a517bb3
MW
65// Announcing an internal function.
66#define INTFUNC(name) \
1a0c09c4 67 TYPE_FUNC(name); \
8a1aa284 68 .macro ENDFUNC; _ENDFUNC(name); .endm; \
1a0c09c4 69 FUNC_PREHOOK(name); \
1a517bb3 70name: \
1a0c09c4
MW
71 FUNC_POSTHOOK(name)
72
1a517bb3
MW
73// Announcing an external function.
74#define FUNC(name) \
75 .globl F(name); \
76INTFUNC(F(name))
77
1a0c09c4
MW
78// Marking the end of a function.
79#define _ENDFUNC(name) \
80 .purgem ENDFUNC; \
81 SIZE_OBJ(name); \
f8e509a9 82 ENDFUNC_HOOK(name); \
6c54cbd3 83 _LTORG
1a0c09c4 84
8ae4c946
MW
85// Make a helper function, if necessary.
86#define AUXFN(name) \
87 .ifndef .L$_auxfn_def.name; \
88 .text 7128; \
89 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
90 FUNC_PREHOOK(name); \
91name:
92#define _ENDAUXFN_TAIL(name) \
93 .purgem _ENDAUXFN; \
94 .text .L$_subsec; \
95 .L$_auxfn_def.name = 1
96#define ENDAUXFN _ENDAUXFN; .endif
97
1a0c09c4
MW
98///--------------------------------------------------------------------------
99/// ELF-specific hacking.
100
101#if __ELF__
102
103#if __PIC__ || __PIE__
104# define WANT_PIC 1
105#endif
106
107#define TYPE_FUNC(name) .type name, STT_FUNC
108
109#define SIZE_OBJ(name) .size name, . - name
110
111#endif
112
113///--------------------------------------------------------------------------
0f23f75f
MW
114/// Windows-specific hacking.
115
116#if ABI_WIN
1a0c09c4
MW
117
118#if CPUFAM_X86
0f23f75f
MW
119# define F(name) _##name
120#endif
121
122#endif
123
124///--------------------------------------------------------------------------
125/// x86- and amd64-specific hacking.
126///
127/// It's (slightly) easier to deal with both of these in one go.
128
129#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4 130
2cb17e02
MW
131// Word size.
132#if CPUFAM_X86
133# define WORDSZ 4
134#endif
135#if CPUFAM_AMD64
136# define WORDSZ 8
137#endif
138
1a0c09c4
MW
139// Set the function hooks.
140#define FUNC_PREHOOK(_) .balign 16
141
f71dd54d
MW
142// On Windows, arrange to install stack-unwinding data.
143#if CPUFAM_AMD64 && ABI_WIN
144# define FUNC_POSTHOOK(name) .seh_proc name
145# define ENDFUNC_HOOK(_) .seh_endproc
146// Procedures are expected to invoke `.seh_setframe' if necessary, and
147// `.seh_pushreg' and friends, and `.seh_endprologue'.
148#endif
149
1a0c09c4
MW
150// Don't use the wretched AT&T syntax. It's festooned with pointless
151// punctuation, and all of the data movement is backwards. Ugh!
152 .intel_syntax noprefix
153
154// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 155.macro callext addr
1a0c09c4
MW
156#if WANT_PIC
157 call \addr@PLT
158#else
159 call \addr
160#endif
8a1aa284 161.endm
1a0c09c4
MW
162
163// Do I need to arrange a spare GOT register?
164#if WANT_PIC && CPUFAM_X86
165# define NEED_GOT 1
166#endif
167#define GOTREG ebx // Not needed in AMD64 so don't care.
168
169// Maybe load GOT address into GOT.
8a1aa284 170.macro ldgot got=GOTREG
0f23f75f 171#if WANT_PIC && CPUFAM_X86
8ae4c946 172 AUXFN(_ldgot.\got)
1a0c09c4
MW
173 mov \got, [esp]
174 ret
8ae4c946
MW
175 ENDAUXFN
176 call _ldgot.\got
177 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 178#endif
8a1aa284 179.endm
1a0c09c4
MW
180
181// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 182.macro leaext reg, addr, got=GOTREG
1a0c09c4 183#if WANT_PIC
0f23f75f 184# if CPUFAM_X86
1a0c09c4 185 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
186# endif
187# if CPUFAM_AMD64
188 mov \reg, \addr@GOTPCREL[rip]
189# endif
1a0c09c4 190#else
0f23f75f 191# if CPUFAM_X86
1a0c09c4 192 mov \reg, offset \addr
0f23f75f
MW
193# endif
194# if CPUFAM_AMD64
195 lea \reg, \addr[rip]
196# endif
1a0c09c4 197#endif
8a1aa284 198.endm
1a0c09c4
MW
199
200// Address expression (possibly using a base register, and a displacement)
201// referring to ADDR, which is within our module, maybe using GOT.
202#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
203#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
204#if CPUFAM_AMD64
205# define INTADDR__1(addr, got) addr + rip
206#elif WANT_PIC
1a0c09c4
MW
207# define INTADDR__1(addr, got) got + addr@GOTOFF
208#else
209# define INTADDR__1(addr, got) addr
210#endif
211
a13b5730
MW
212// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
213// suitable for use in `pshufd' or `shufpd', which copies element D
214// (0 <= D < 4) of the source to element 3 of the destination, element C to
215// element 2, element B to element 1, and element A to element 0.
216#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
217
43ea7558
MW
218// Map register names to their individual pieces.
219
220// Apply decoration decor to (internal) register name reg of type ty.
221//
222// See `R_...' for internal register names. Decorations are as follows.
223//
224// b low byte (e.g., `al', `r8b')
225// h high byte (e.g., `ah')
226// w word (e.g., `ax', `r8w')
227// d doubleword (e.g., `eax', `r8d')
228// q quadword (e.g., `rax', `r8')
229// r whole register (doubleword on x86, quadword on amd64)
230//
231// And types are as follows.
232//
233// abcd the four traditional registers `a', `b', `c', `d'
234// xp the four pointer registers `si', `di', `bp', `sp'
235// ip the instruction pointer `ip'
236// rn the AMD64 numbered registers `r8'--`r15'
237#define _DECOR(ty, decor, reg) _DECOR_##ty##_##decor(reg)
238
239// Internal macros: _DECOR_ty_decor(reg) applies decoration decor to
240// (internal) register name reg of type ty.
241
242#define _DECOR_abcd_b(reg) reg##l
243#define _DECOR_abcd_h(reg) reg##h
244#define _DECOR_abcd_w(reg) reg##x
245#define _DECOR_abcd_d(reg) e##reg##x
246#if CPUFAM_AMD64
247# define _DECOR_abcd_q(reg) r##reg##x
248#endif
249
250#define _DECOR_xp_b(reg) reg##l
251#define _DECOR_xp_w(reg) reg
252#define _DECOR_xp_d(reg) e##reg
253#if CPUFAM_AMD64
254# define _DECOR_xp_q(reg) r##reg
255#endif
256
257#define _DECOR_ip_w(reg) reg
258#define _DECOR_ip_d(reg) e##reg
259#if CPUFAM_AMD64
260# define _DECOR_ip_q(reg) r##reg
261#endif
262
263#if CPUFAM_AMD64
264# define _DECOR_rn_b(reg) reg##b
265# define _DECOR_rn_w(reg) reg##w
266# define _DECOR_rn_d(reg) reg##d
267# define _DECOR_rn_q(reg) reg
268# define _DECOR_rn_r(reg) reg
269#endif
270
271#if CPUFAM_X86
272# define _DECOR_abcd_r(reg) e##reg##x
273# define _DECOR_xp_r(reg) e##reg
274# define _DECOR_ip_r(reg) e##reg
275#endif
276#if CPUFAM_AMD64
277# define _DECOR_abcd_r(reg) r##reg##x
278# define _DECOR_xp_r(reg) r##reg
279# define _DECOR_ip_r(reg) r##reg
280#endif
281
282#define _DECOR_mem_b(addr) byte ptr addr
283#define _DECOR_mem_w(addr) word ptr addr
284#define _DECOR_mem_d(addr) dword ptr addr
285#if CPUFAM_AMD64
286# define _DECOR_mem_q(addr) qword ptr addr
287#endif
288
289// R_r(decor) applies decoration decor to register r, which is an internal
290// register name. The internal register names are: `ip', `a', `b', `c', `d',
291// `si', `di', `bp', `sp', `r8'--`r15'.
292#define R_ip(decor) _DECOR(ip, decor, ip)
293#define R_a(decor) _DECOR(abcd, decor, a)
294#define R_b(decor) _DECOR(abcd, decor, b)
295#define R_c(decor) _DECOR(abcd, decor, c)
296#define R_d(decor) _DECOR(abcd, decor, d)
297#define R_si(decor) _DECOR(xp, decor, si)
298#define R_di(decor) _DECOR(xp, decor, di)
299#define R_bp(decor) _DECOR(xp, decor, bp)
300#define R_sp(decor) _DECOR(xp, decor, sp)
301#if CPUFAM_AMD64
302# define R_r8(decor) _DECOR(rn, decor, r8)
303# define R_r9(decor) _DECOR(rn, decor, r9)
304# define R_r10(decor) _DECOR(rn, decor, r10)
305# define R_r11(decor) _DECOR(rn, decor, r11)
306# define R_r12(decor) _DECOR(rn, decor, r12)
307# define R_r13(decor) _DECOR(rn, decor, r13)
308# define R_r14(decor) _DECOR(rn, decor, r14)
309# define R_r15(decor) _DECOR(rn, decor, r15)
310#endif
311
312// Refer to an in-memory datum of the type implied by decor residing at
313// address addr (which should supply its own square-brackets).
314#define MEM(decor, addr) _DECOR(mem, decor, addr)
315
316// Applies decoration decor to assembler-level register name reg.
317#define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
318
319// Internal macros: _REGFORM_r(decor) applies decoration decor to an
320// assembler-level register name, in place of any decoration that register
321// name has already.
322
323#define _REGFORM_ip(decor) R_ip(decor)
324#define _REGFORM_eip(decor) R_ip(decor)
325
326#define _REGFORM_a(decor) R_a(decor)
327#define _REGFORM_al(decor) R_a(decor)
328#define _REGFORM_ah(decor) R_a(decor)
329#define _REGFORM_ax(decor) R_a(decor)
330#define _REGFORM_eax(decor) R_a(decor)
331
332#define _REGFORM_b(decor) R_b(decor)
333#define _REGFORM_bl(decor) R_b(decor)
334#define _REGFORM_bh(decor) R_b(decor)
335#define _REGFORM_bx(decor) R_b(decor)
336#define _REGFORM_ebx(decor) R_b(decor)
337
338#define _REGFORM_c(decor) R_c(decor)
339#define _REGFORM_cl(decor) R_c(decor)
340#define _REGFORM_ch(decor) R_c(decor)
341#define _REGFORM_cx(decor) R_c(decor)
342#define _REGFORM_ecx(decor) R_c(decor)
343
344#define _REGFORM_d(decor) R_d(decor)
345#define _REGFORM_dl(decor) R_d(decor)
346#define _REGFORM_dh(decor) R_d(decor)
347#define _REGFORM_dx(decor) R_d(decor)
348#define _REGFORM_edx(decor) R_d(decor)
349
350#define _REGFORM_si(decor) R_si(decor)
351#define _REGFORM_sil(decor) R_si(decor)
352#define _REGFORM_esi(decor) R_si(decor)
353
354#define _REGFORM_di(decor) R_di(decor)
355#define _REGFORM_dil(decor) R_di(decor)
356#define _REGFORM_edi(decor) R_di(decor)
357
358#define _REGFORM_bp(decor) R_bp(decor)
359#define _REGFORM_bpl(decor) R_bp(decor)
360#define _REGFORM_ebp(decor) R_bp(decor)
361
362#define _REGFORM_sp(decor) R_sp(decor)
363#define _REGFORM_spl(decor) R_sp(decor)
364#define _REGFORM_esp(decor) R_sp(decor)
365
366#if CPUFAM_AMD64
367
368# define _REGFORM_rip(decor) R_ip(decor)
369# define _REGFORM_rsp(decor) R_sp(decor)
370# define _REGFORM_rbp(decor) R_bp(decor)
371# define _REGFORM_rdi(decor) R_di(decor)
372# define _REGFORM_rsi(decor) R_si(decor)
373# define _REGFORM_rdx(decor) R_d(decor)
374# define _REGFORM_rcx(decor) R_c(decor)
375# define _REGFORM_rbx(decor) R_b(decor)
376# define _REGFORM_rax(decor) R_a(decor)
377
378# define _REGFORM_r8(decor) R_r8(decor)
379# define _REGFORM_r8b(decor) R_r8(decor)
380# define _REGFORM_r8w(decor) R_r8(decor)
381# define _REGFORM_r8d(decor) R_r8(decor)
382
383# define _REGFORM_r9(decor) R_r9(decor)
384# define _REGFORM_r9b(decor) R_r9(decor)
385# define _REGFORM_r9w(decor) R_r9(decor)
386# define _REGFORM_r9d(decor) R_r9(decor)
387
388# define _REGFORM_r10(decor) R_r10(decor)
389# define _REGFORM_r10b(decor) R_r10(decor)
390# define _REGFORM_r10w(decor) R_r10(decor)
391# define _REGFORM_r10d(decor) R_r10(decor)
392
393# define _REGFORM_r11(decor) R_r11(decor)
394# define _REGFORM_r11b(decor) R_r11(decor)
395# define _REGFORM_r11w(decor) R_r11(decor)
396# define _REGFORM_r11d(decor) R_r11(decor)
397
398# define _REGFORM_r12(decor) R_r12(decor)
399# define _REGFORM_r12b(decor) R_r12(decor)
400# define _REGFORM_r12w(decor) R_r12(decor)
401# define _REGFORM_r12d(decor) R_r12(decor)
402
403# define _REGFORM_r13(decor) R_r13(decor)
404# define _REGFORM_r13b(decor) R_r13(decor)
405# define _REGFORM_r13w(decor) R_r13(decor)
406# define _REGFORM_r13d(decor) R_r13(decor)
407
408# define _REGFORM_r14(decor) R_r14(decor)
409# define _REGFORM_r14b(decor) R_r14(decor)
410# define _REGFORM_r14w(decor) R_r14(decor)
411# define _REGFORM_r14d(decor) R_r14(decor)
412
413# define _REGFORM_r15(decor) R_r15(decor)
414# define _REGFORM_r15b(decor) R_r15(decor)
415# define _REGFORM_r15w(decor) R_r15(decor)
416# define _REGFORM_r15d(decor) R_r15(decor)
417
418#endif
419
420// Macros for converting register names.
421#define BYTE(reg) _REGFORM(reg, b)
422#define HIBYTE(reg) _REGFORM(reg, h)
423#define WORD(reg) _REGFORM(reg, w)
424#define DWORD(reg) _REGFORM(reg, d)
425#if CPUFAM_AMD64
426# define QWORD(reg) _REGFORM(reg, q)
427#endif
428#define WHOLE(reg) _REGFORM(reg, r)
429
1a0c09c4
MW
430#endif
431
17de5b2e
MW
432#if CPUFAM_X86
433
434.macro _reg.0
435 // Stash GP registers and establish temporary stack frame.
436 pushfd
437 push eax
438 push ecx
439 push edx
440 push ebp
441 mov ebp, esp
442 and esp, ~15
443 sub esp, 512
444 fxsave [esp]
445.endm
446
447.macro _reg.1
448.endm
449
450.macro _reg.2
451.endm
452
453.macro _reg.3 fmt
454 // Print FMT and the other established arguments.
455 lea eax, .L$_reg$msg.\@
456 push eax
457 call printf
458 jmp .L$_reg$cont.\@
459.L$_reg$msg.\@:
460 .ascii ";; \fmt\n\0"
461.L$_reg$cont.\@:
462 mov eax, ebp
463 and eax, ~15
464 sub eax, 512
465 fxrstor [eax]
466 mov esp, ebp
467 pop ebp
468 pop edx
469 pop ecx
470 pop eax
471 popfd
472.endm
473
474.macro msg msg
475 _reg.0
476 _reg.1
477 _reg.2
478 _reg.3 "\msg"
479.endm
480
481.macro reg r, msg
482 _reg.0
483 .ifeqs "\r", "esp"
484 lea eax, [ebp + 20]
485 push eax
486 .else
487 .ifeqs "\r", "ebp"
488 push [ebp]
489 .else
490 push \r
491 .endif
492 .endif
493 _reg.1
494 _reg.2
495 _reg.3 "\msg: \r = %08x"
496.endm
497
498.macro xmmreg r, msg
499 _reg.0
500 _reg.1
501 _reg.2
502 movdqu xmm0, \r
503 pshufd xmm0, xmm0, 0x1b
504 sub esp, 16
505 movdqa [esp], xmm0
506 _reg.3 "\msg: \r = %08x %08x %08x %08x"
507.endm
508
509.macro mmreg r, msg
510 _reg.0
511 _reg.1
512 _reg.2
513 pshufw \r, \r, 0x4e
514 sub esp, 8
515 movq [esp], \r
516 _reg.3 "\msg: \r = %08x %08x"
517.endm
518
519.macro freg i, msg
520 _reg.0
521 _reg.1
522 _reg.2
523 finit
524 fldt [esp + 32 + 16*\i]
525 sub esp, 12
526 fstpt [esp]
527 _reg.3 "\msg: st(\i) = %.20Lg"
528.endm
529
530.macro fxreg i, msg
531 _reg.0
532 _reg.1
533 _reg.2
534 finit
535 fldt [esp + 32 + 16*\i]
536 sub esp, 12
537 fstpt [esp]
538 _reg.3 "\msg: st(\i) = %La"
539.endm
540
541#endif
542
1a0c09c4 543///--------------------------------------------------------------------------
61bd904b
MW
544/// ARM-specific hacking.
545
59d86860 546#if CPUFAM_ARMEL
61bd904b 547
9f6eb05d
MW
548// ARM/Thumb mode things. Use ARM by default.
549#define ARM .arm; .L$_pcoff = 8
550#define THUMB .thumb; .L$_pcoff = 4
551 ARM
552
61bd904b
MW
553// Set the function hooks.
554#define FUNC_PREHOOK(_) .balign 4
555#define ENDFUNC_HOOK(name) .ltorg
556
557// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 558.macro callext addr, cond=
61bd904b
MW
559#if WANT_PIC
560 bl\cond \addr(PLT)
561#else
562 bl\cond \addr
563#endif
8a1aa284 564.endm
61bd904b
MW
565
566// Do I need to arrange a spare GOT register?
567#if WANT_PIC
568# define NEED_GOT 1
569#endif
570#define GOTREG r9
571
572// Maybe load GOT address into GOT.
8a1aa284 573.macro ldgot cond=, got=GOTREG
61bd904b 574#if WANT_PIC
adca2a18
MW
575 ldr\cond \got, .L$_ldgot$\@
576.L$_ldgot_pc$\@:
2d03a881 577 add\cond \got, pc, \got
8a1aa284 578 _LIT
adca2a18
MW
579 .balign 4
580.L$_ldgot$\@:
9f6eb05d 581 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 582 _ENDLIT
61bd904b 583#endif
8a1aa284 584.endm
61bd904b
MW
585
586// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 587.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 588#if WANT_PIC
adca2a18 589 ldr\cond \reg, .L$_leaext$\@
2d03a881 590 ldr\cond \reg, [\got, \reg]
8a1aa284 591 _LIT
adca2a18
MW
592 .balign 4
593.L$_leaext$\@:
594 .word \addr(GOT)
8a1aa284 595 _ENDLIT
61bd904b 596#else
2d03a881 597 ldr\cond \reg, =\addr
61bd904b 598#endif
8a1aa284 599.endm
61bd904b 600
0c53ac58 601// Load address of external symbol ADDR into REG directly.
8a1aa284 602.macro leaextq reg, addr, cond=
0c53ac58
MW
603#if WANT_PIC
604 ldr\cond \reg, .L$_leaextq$\@
605.L$_leaextq_pc$\@:
9f6eb05d 606 .if .L$_pcoff == 8
0c53ac58 607 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
608 .else
609 add\cond \reg, pc
610 ldr\cond \reg, [\reg]
611 .endif
8a1aa284 612 _LIT
0c53ac58
MW
613 .balign 4
614.L$_leaextq$\@:
9f6eb05d 615 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 616 _ENDLIT
0c53ac58
MW
617#else
618 ldr\cond \reg, =\addr
619#endif
8a1aa284 620.endm
0c53ac58 621
43ea7558
MW
622// Apply decoration decor to register name reg.
623#define _REGFORM(reg, decor) _GLUE(_REGFORM_, reg)(decor)
624
625// Internal macros: `_REGFORM_r(decor)' applies decoration decor to register
626// name r.
627
628#define _REGFORM_s0(decor) _DECOR(s, decor, 0)
629#define _REGFORM_s1(decor) _DECOR(s, decor, 1)
630#define _REGFORM_s2(decor) _DECOR(s, decor, 2)
631#define _REGFORM_s3(decor) _DECOR(s, decor, 3)
632#define _REGFORM_s4(decor) _DECOR(s, decor, 4)
633#define _REGFORM_s5(decor) _DECOR(s, decor, 5)
634#define _REGFORM_s6(decor) _DECOR(s, decor, 6)
635#define _REGFORM_s7(decor) _DECOR(s, decor, 7)
636#define _REGFORM_s8(decor) _DECOR(s, decor, 8)
637#define _REGFORM_s9(decor) _DECOR(s, decor, 9)
638#define _REGFORM_s10(decor) _DECOR(s, decor, 10)
639#define _REGFORM_s11(decor) _DECOR(s, decor, 11)
640#define _REGFORM_s12(decor) _DECOR(s, decor, 12)
641#define _REGFORM_s13(decor) _DECOR(s, decor, 13)
642#define _REGFORM_s14(decor) _DECOR(s, decor, 14)
643#define _REGFORM_s15(decor) _DECOR(s, decor, 15)
644#define _REGFORM_s16(decor) _DECOR(s, decor, 16)
645#define _REGFORM_s17(decor) _DECOR(s, decor, 17)
646#define _REGFORM_s18(decor) _DECOR(s, decor, 18)
647#define _REGFORM_s19(decor) _DECOR(s, decor, 19)
648#define _REGFORM_s20(decor) _DECOR(s, decor, 20)
649#define _REGFORM_s21(decor) _DECOR(s, decor, 21)
650#define _REGFORM_s22(decor) _DECOR(s, decor, 22)
651#define _REGFORM_s23(decor) _DECOR(s, decor, 23)
652#define _REGFORM_s24(decor) _DECOR(s, decor, 24)
653#define _REGFORM_s25(decor) _DECOR(s, decor, 25)
654#define _REGFORM_s26(decor) _DECOR(s, decor, 26)
655#define _REGFORM_s27(decor) _DECOR(s, decor, 27)
656#define _REGFORM_s28(decor) _DECOR(s, decor, 28)
657#define _REGFORM_s29(decor) _DECOR(s, decor, 29)
658#define _REGFORM_s30(decor) _DECOR(s, decor, 30)
659#define _REGFORM_s31(decor) _DECOR(s, decor, 31)
660
661#define _REGFORM_d0(decor) _DECOR(d, decor, 0)
662#define _REGFORM_d1(decor) _DECOR(d, decor, 1)
663#define _REGFORM_d2(decor) _DECOR(d, decor, 2)
664#define _REGFORM_d3(decor) _DECOR(d, decor, 3)
665#define _REGFORM_d4(decor) _DECOR(d, decor, 4)
666#define _REGFORM_d5(decor) _DECOR(d, decor, 5)
667#define _REGFORM_d6(decor) _DECOR(d, decor, 6)
668#define _REGFORM_d7(decor) _DECOR(d, decor, 7)
669#define _REGFORM_d8(decor) _DECOR(d, decor, 8)
670#define _REGFORM_d9(decor) _DECOR(d, decor, 9)
671#define _REGFORM_d10(decor) _DECOR(d, decor, 10)
672#define _REGFORM_d11(decor) _DECOR(d, decor, 11)
673#define _REGFORM_d12(decor) _DECOR(d, decor, 12)
674#define _REGFORM_d13(decor) _DECOR(d, decor, 13)
675#define _REGFORM_d14(decor) _DECOR(d, decor, 14)
676#define _REGFORM_d15(decor) _DECOR(d, decor, 15)
677#define _REGFORM_d16(decor) _DECOR(d, decor, 16)
678#define _REGFORM_d17(decor) _DECOR(d, decor, 17)
679#define _REGFORM_d18(decor) _DECOR(d, decor, 18)
680#define _REGFORM_d19(decor) _DECOR(d, decor, 19)
681#define _REGFORM_d20(decor) _DECOR(d, decor, 20)
682#define _REGFORM_d21(decor) _DECOR(d, decor, 21)
683#define _REGFORM_d22(decor) _DECOR(d, decor, 22)
684#define _REGFORM_d23(decor) _DECOR(d, decor, 23)
685#define _REGFORM_d24(decor) _DECOR(d, decor, 24)
686#define _REGFORM_d25(decor) _DECOR(d, decor, 25)
687#define _REGFORM_d26(decor) _DECOR(d, decor, 26)
688#define _REGFORM_d27(decor) _DECOR(d, decor, 27)
689#define _REGFORM_d28(decor) _DECOR(d, decor, 28)
690#define _REGFORM_d29(decor) _DECOR(d, decor, 29)
691#define _REGFORM_d30(decor) _DECOR(d, decor, 30)
692#define _REGFORM_d31(decor) _DECOR(d, decor, 31)
693
694#define _REGFORM_q0(decor) _DECOR(q, decor, 0)
695#define _REGFORM_q1(decor) _DECOR(q, decor, 1)
696#define _REGFORM_q2(decor) _DECOR(q, decor, 2)
697#define _REGFORM_q3(decor) _DECOR(q, decor, 3)
698#define _REGFORM_q4(decor) _DECOR(q, decor, 4)
699#define _REGFORM_q5(decor) _DECOR(q, decor, 5)
700#define _REGFORM_q6(decor) _DECOR(q, decor, 6)
701#define _REGFORM_q7(decor) _DECOR(q, decor, 7)
702#define _REGFORM_q8(decor) _DECOR(q, decor, 8)
703#define _REGFORM_q9(decor) _DECOR(q, decor, 9)
704#define _REGFORM_q10(decor) _DECOR(q, decor, 10)
705#define _REGFORM_q11(decor) _DECOR(q, decor, 11)
706#define _REGFORM_q12(decor) _DECOR(q, decor, 12)
707#define _REGFORM_q13(decor) _DECOR(q, decor, 13)
708#define _REGFORM_q14(decor) _DECOR(q, decor, 14)
709#define _REGFORM_q15(decor) _DECOR(q, decor, 15)
710
711// `_LOPART(n)' and `_HIPART(n)' return the numbers of the register halves of
712// register n, i.e., 2*n and 2*n + 1 respectively.
713#define _LOPART(n) _GLUE(_LOPART_, n)
714#define _HIPART(n) _GLUE(_HIPART_, n)
715
716// Internal macros: `_LOPART_n' and `_HIPART_n' return the numbers of the
717// register halves of register n, i.e., 2*n and 2*n + 1 respectively.
718
719#define _LOPART_0 0
720#define _HIPART_0 1
721#define _LOPART_1 2
722#define _HIPART_1 3
723#define _LOPART_2 4
724#define _HIPART_2 5
725#define _LOPART_3 6
726#define _HIPART_3 7
727#define _LOPART_4 8
728#define _HIPART_4 9
729#define _LOPART_5 10
730#define _HIPART_5 11
731#define _LOPART_6 12
732#define _HIPART_6 13
733#define _LOPART_7 14
734#define _HIPART_7 15
735#define _LOPART_8 16
736#define _HIPART_8 17
737#define _LOPART_9 18
738#define _HIPART_9 19
739#define _LOPART_10 20
740#define _HIPART_10 21
741#define _LOPART_11 22
742#define _HIPART_11 23
743#define _LOPART_12 24
744#define _HIPART_12 25
745#define _LOPART_13 26
746#define _HIPART_13 27
747#define _LOPART_14 28
748#define _HIPART_14 29
749#define _LOPART_15 30
750#define _HIPART_15 31
751
752// Return the register number of the pair containing register n, i.e.,
753// floor(n/2).
754#define _PAIR(n) _GLUE(_PAIR_, n)
755
756// Internal macros: `_PAIR_n' returns the register number of the pair
757// containing register n, i.e., floor(n/2).
758#define _PAIR_0 0
759#define _PAIR_1 0
760#define _PAIR_2 1
761#define _PAIR_3 1
762#define _PAIR_4 2
763#define _PAIR_5 2
764#define _PAIR_6 3
765#define _PAIR_7 3
766#define _PAIR_8 4
767#define _PAIR_9 4
768#define _PAIR_10 5
769#define _PAIR_11 5
770#define _PAIR_12 6
771#define _PAIR_13 6
772#define _PAIR_14 7
773#define _PAIR_15 7
774#define _PAIR_16 8
775#define _PAIR_17 8
776#define _PAIR_18 9
777#define _PAIR_19 9
778#define _PAIR_20 10
779#define _PAIR_21 10
780#define _PAIR_22 11
781#define _PAIR_23 11
782#define _PAIR_24 12
783#define _PAIR_25 12
784#define _PAIR_26 13
785#define _PAIR_27 13
786#define _PAIR_28 14
787#define _PAIR_29 14
788#define _PAIR_30 15
789#define _PAIR_31 15
790
791// Apply decoration decor to register number n of type ty. Decorations are
792// as follows.
793//
794// decor types meaning
795// Q s, d the NEON qN register containing this one
796// D s the NEON dN register containing this one
797// D0 q the low 64-bit half of this one
798// D1 q the high 64-bit half of this one
799// S0 d, q the first 32-bit piece of this one
800// S1 d, q the second 32-bit piece of this one
801// S2 q the third 32-bit piece of this one
802// S3 q the fourth 32-bit piece of this one
803// Bn q the nth byte of this register, as a scalar
804// Hn q the nth halfword of this register, as a scalar
805// Wn q the nth word of this register, as a scalar
806#define _DECOR(ty, decor, n) _DECOR_##ty##_##decor(n)
807
808// Internal macros: `_DECOR_ty_decor(n)' applies decoration decor to register
809// number n of type ty.
810
811#define _DECOR_s_Q(n) GLUE(q, _PAIR(_PAIR(n)))
812#define _DECOR_s_D(n) GLUE(d, _PAIR(n))
813
814#define _DECOR_d_Q(n) GLUE(q, _PAIR(n))
815#define _DECOR_d_S0(n) GLUE(s, _LOPART(n))
816#define _DECOR_d_S1(n) GLUE(s, _LOPART(n))
817
818#define _DECOR_q_D0(n) GLUE(d, _LOPART(n))
819#define _DECOR_q_D1(n) GLUE(d, _HIPART(n))
820#define _DECOR_q_S0(n) GLUE(s, _LOPART(_LOPART(n)))
821#define _DECOR_q_S1(n) GLUE(s, _HIPART(_LOPART(n)))
822#define _DECOR_q_S2(n) GLUE(s, _LOPART(_HIPART(n)))
823#define _DECOR_q_S3(n) GLUE(s, _HIPART(_HIPART(n)))
824#define _DECOR_q_W0(n) GLUE(d, _LOPART(n))[0]
825#define _DECOR_q_W1(n) GLUE(d, _LOPART(n))[1]
826#define _DECOR_q_W2(n) GLUE(d, _HIPART(n))[0]
827#define _DECOR_q_W3(n) GLUE(d, _HIPART(n))[1]
828#define _DECOR_q_H0(n) GLUE(d, _LOPART(n))[0]
829#define _DECOR_q_H1(n) GLUE(d, _LOPART(n))[1]
830#define _DECOR_q_H2(n) GLUE(d, _LOPART(n))[2]
831#define _DECOR_q_H3(n) GLUE(d, _LOPART(n))[3]
832#define _DECOR_q_H4(n) GLUE(d, _HIPART(n))[0]
833#define _DECOR_q_H5(n) GLUE(d, _HIPART(n))[1]
834#define _DECOR_q_H6(n) GLUE(d, _HIPART(n))[2]
835#define _DECOR_q_H7(n) GLUE(d, _HIPART(n))[3]
836#define _DECOR_q_B0(n) GLUE(d, _LOPART(n))[0]
837#define _DECOR_q_B1(n) GLUE(d, _LOPART(n))[1]
838#define _DECOR_q_B2(n) GLUE(d, _LOPART(n))[2]
839#define _DECOR_q_B3(n) GLUE(d, _LOPART(n))[3]
840#define _DECOR_q_B4(n) GLUE(d, _LOPART(n))[4]
841#define _DECOR_q_B5(n) GLUE(d, _LOPART(n))[5]
842#define _DECOR_q_B6(n) GLUE(d, _LOPART(n))[6]
843#define _DECOR_q_B7(n) GLUE(d, _LOPART(n))[7]
844#define _DECOR_q_B8(n) GLUE(d, _HIPART(n))[0]
845#define _DECOR_q_B9(n) GLUE(d, _HIPART(n))[1]
846#define _DECOR_q_B10(n) GLUE(d, _HIPART(n))[2]
847#define _DECOR_q_B11(n) GLUE(d, _HIPART(n))[3]
848#define _DECOR_q_B12(n) GLUE(d, _HIPART(n))[4]
849#define _DECOR_q_B13(n) GLUE(d, _HIPART(n))[5]
850#define _DECOR_q_B14(n) GLUE(d, _HIPART(n))[6]
851#define _DECOR_q_B15(n) GLUE(d, _HIPART(n))[7]
852
853// Macros for navigating the NEON register hierarchy.
854#define S0(reg) _REGFORM(reg, S0)
855#define S1(reg) _REGFORM(reg, S1)
856#define S2(reg) _REGFORM(reg, S2)
857#define S3(reg) _REGFORM(reg, S3)
858#define D(reg) _REGFORM(reg, D)
859#define D0(reg) _REGFORM(reg, D0)
860#define D1(reg) _REGFORM(reg, D1)
861#define Q(reg) _REGFORM(reg, Q)
862
863// Macros for indexing quadword registers.
864#define QB(reg, i) _REGFORM(reg, B##i)
865#define QH(reg, i) _REGFORM(reg, H##i)
866#define QW(reg, i) _REGFORM(reg, W##i)
867
868// Macros for converting vldm/vstm ranges.
869#define QQ(qlo, qhi) D0(qlo)-D1(qhi)
870
61bd904b
MW
871#endif
872
873///--------------------------------------------------------------------------
1a0c09c4
MW
874/// Final stuff.
875
876// Default values for the various hooks.
877#ifndef FUNC_PREHOOK
1e5664a6 878# define FUNC_PREHOOK(_)
1a0c09c4
MW
879#endif
880#ifndef FUNC_POSTHOOK
1e5664a6 881# define FUNC_POSTHOOK(_)
1a0c09c4
MW
882#endif
883#ifndef ENDFUNC_HOOK
1e5664a6 884# define ENDFUNC_HOOK(_)
1a0c09c4
MW
885#endif
886
887#ifndef F
888# define F(name) name
889#endif
890
891#ifndef TYPE_FUNC
892# define TYPE_FUNC(name)
893#endif
894
895#ifndef SIZE_OBJ
896# define SIZE_OBJ(name)
897#endif
898
1aa5bfa8
MW
899#if __ELF__ && defined(WANT_EXECUTABLE_STACK)
900 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
901 .popsection
902#endif
903
1a0c09c4 904///----- That's all, folks --------------------------------------------------