symm/{chacha,salsa20}-*.S: Indent the hoisted transposition instructions.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
f8e509a9
MW
30// Some useful variables.
31 .L$_subsec = 0
32
33// Literal pools done the hard way.
34#define _LIT .text .L$_subsec + 1
35#define _ENDLIT .text .L$_subsec
6c54cbd3 36#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 37
1a0c09c4
MW
38// Announcing an external function.
39#define FUNC(name) \
40 .globl F(name); \
41 TYPE_FUNC(name); \
8a1aa284 42 .macro ENDFUNC; _ENDFUNC(name); .endm; \
1a0c09c4
MW
43 FUNC_PREHOOK(name); \
44F(name): \
45 FUNC_POSTHOOK(name)
46
47// Marking the end of a function.
48#define _ENDFUNC(name) \
49 .purgem ENDFUNC; \
50 SIZE_OBJ(name); \
f8e509a9 51 ENDFUNC_HOOK(name); \
6c54cbd3 52 _LTORG
1a0c09c4 53
8ae4c946
MW
54// Make a helper function, if necessary.
55#define AUXFN(name) \
56 .ifndef .L$_auxfn_def.name; \
57 .text 7128; \
58 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
59 FUNC_PREHOOK(name); \
60name:
61#define _ENDAUXFN_TAIL(name) \
62 .purgem _ENDAUXFN; \
63 .text .L$_subsec; \
64 .L$_auxfn_def.name = 1
65#define ENDAUXFN _ENDAUXFN; .endif
66
1a0c09c4
MW
67///--------------------------------------------------------------------------
68/// ELF-specific hacking.
69
70#if __ELF__
71
72#if __PIC__ || __PIE__
73# define WANT_PIC 1
74#endif
75
76#define TYPE_FUNC(name) .type name, STT_FUNC
77
78#define SIZE_OBJ(name) .size name, . - name
79
80#endif
81
82///--------------------------------------------------------------------------
0f23f75f
MW
83/// Windows-specific hacking.
84
85#if ABI_WIN
1a0c09c4
MW
86
87#if CPUFAM_X86
0f23f75f
MW
88# define F(name) _##name
89#endif
90
91#endif
92
93///--------------------------------------------------------------------------
94/// x86- and amd64-specific hacking.
95///
96/// It's (slightly) easier to deal with both of these in one go.
97
98#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4
MW
99
100// Set the function hooks.
101#define FUNC_PREHOOK(_) .balign 16
102
f71dd54d
MW
103// On Windows, arrange to install stack-unwinding data.
104#if CPUFAM_AMD64 && ABI_WIN
105# define FUNC_POSTHOOK(name) .seh_proc name
106# define ENDFUNC_HOOK(_) .seh_endproc
107// Procedures are expected to invoke `.seh_setframe' if necessary, and
108// `.seh_pushreg' and friends, and `.seh_endprologue'.
109#endif
110
1a0c09c4
MW
111// Don't use the wretched AT&T syntax. It's festooned with pointless
112// punctuation, and all of the data movement is backwards. Ugh!
113 .intel_syntax noprefix
114
115// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 116.macro callext addr
1a0c09c4
MW
117#if WANT_PIC
118 call \addr@PLT
119#else
120 call \addr
121#endif
8a1aa284 122.endm
1a0c09c4
MW
123
124// Do I need to arrange a spare GOT register?
125#if WANT_PIC && CPUFAM_X86
126# define NEED_GOT 1
127#endif
128#define GOTREG ebx // Not needed in AMD64 so don't care.
129
130// Maybe load GOT address into GOT.
8a1aa284 131.macro ldgot got=GOTREG
0f23f75f 132#if WANT_PIC && CPUFAM_X86
8ae4c946 133 AUXFN(_ldgot.\got)
1a0c09c4
MW
134 mov \got, [esp]
135 ret
8ae4c946
MW
136 ENDAUXFN
137 call _ldgot.\got
138 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 139#endif
8a1aa284 140.endm
1a0c09c4
MW
141
142// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 143.macro leaext reg, addr, got=GOTREG
1a0c09c4 144#if WANT_PIC
0f23f75f 145# if CPUFAM_X86
1a0c09c4 146 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
147# endif
148# if CPUFAM_AMD64
149 mov \reg, \addr@GOTPCREL[rip]
150# endif
1a0c09c4 151#else
0f23f75f 152# if CPUFAM_X86
1a0c09c4 153 mov \reg, offset \addr
0f23f75f
MW
154# endif
155# if CPUFAM_AMD64
156 lea \reg, \addr[rip]
157# endif
1a0c09c4 158#endif
8a1aa284 159.endm
1a0c09c4
MW
160
161// Address expression (possibly using a base register, and a displacement)
162// referring to ADDR, which is within our module, maybe using GOT.
163#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
164#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
165#if CPUFAM_AMD64
166# define INTADDR__1(addr, got) addr + rip
167#elif WANT_PIC
1a0c09c4
MW
168# define INTADDR__1(addr, got) got + addr@GOTOFF
169#else
170# define INTADDR__1(addr, got) addr
171#endif
172
173#endif
174
175///--------------------------------------------------------------------------
61bd904b
MW
176/// ARM-specific hacking.
177
59d86860 178#if CPUFAM_ARMEL
61bd904b 179
9f6eb05d
MW
180// ARM/Thumb mode things. Use ARM by default.
181#define ARM .arm; .L$_pcoff = 8
182#define THUMB .thumb; .L$_pcoff = 4
183 ARM
184
61bd904b
MW
185// Set the function hooks.
186#define FUNC_PREHOOK(_) .balign 4
187#define ENDFUNC_HOOK(name) .ltorg
188
189// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 190.macro callext addr, cond=
61bd904b
MW
191#if WANT_PIC
192 bl\cond \addr(PLT)
193#else
194 bl\cond \addr
195#endif
8a1aa284 196.endm
61bd904b
MW
197
198// Do I need to arrange a spare GOT register?
199#if WANT_PIC
200# define NEED_GOT 1
201#endif
202#define GOTREG r9
203
204// Maybe load GOT address into GOT.
8a1aa284 205.macro ldgot cond=, got=GOTREG
61bd904b 206#if WANT_PIC
adca2a18
MW
207 ldr\cond \got, .L$_ldgot$\@
208.L$_ldgot_pc$\@:
2d03a881 209 add\cond \got, pc, \got
8a1aa284 210 _LIT
adca2a18
MW
211 .balign 4
212.L$_ldgot$\@:
9f6eb05d 213 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 214 _ENDLIT
61bd904b 215#endif
8a1aa284 216.endm
61bd904b
MW
217
218// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 219.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 220#if WANT_PIC
adca2a18 221 ldr\cond \reg, .L$_leaext$\@
2d03a881 222 ldr\cond \reg, [\got, \reg]
8a1aa284 223 _LIT
adca2a18
MW
224 .balign 4
225.L$_leaext$\@:
226 .word \addr(GOT)
8a1aa284 227 _ENDLIT
61bd904b 228#else
2d03a881 229 ldr\cond \reg, =\addr
61bd904b 230#endif
8a1aa284 231.endm
61bd904b 232
0c53ac58 233// Load address of external symbol ADDR into REG directly.
8a1aa284 234.macro leaextq reg, addr, cond=
0c53ac58
MW
235#if WANT_PIC
236 ldr\cond \reg, .L$_leaextq$\@
237.L$_leaextq_pc$\@:
9f6eb05d 238 .if .L$_pcoff == 8
0c53ac58 239 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
240 .else
241 add\cond \reg, pc
242 ldr\cond \reg, [\reg]
243 .endif
8a1aa284 244 _LIT
0c53ac58
MW
245 .balign 4
246.L$_leaextq$\@:
9f6eb05d 247 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 248 _ENDLIT
0c53ac58
MW
249#else
250 ldr\cond \reg, =\addr
251#endif
8a1aa284 252.endm
0c53ac58 253
61bd904b
MW
254#endif
255
256///--------------------------------------------------------------------------
1a0c09c4
MW
257/// Final stuff.
258
259// Default values for the various hooks.
260#ifndef FUNC_PREHOOK
261# define FUNC_PREHOOK(name)
262#endif
263#ifndef FUNC_POSTHOOK
264# define FUNC_POSTHOOK(name)
265#endif
266#ifndef ENDFUNC_HOOK
267# define ENDFUNC_HOOK(name)
268#endif
269
270#ifndef F
271# define F(name) name
272#endif
273
274#ifndef TYPE_FUNC
275# define TYPE_FUNC(name)
276#endif
277
278#ifndef SIZE_OBJ
279# define SIZE_OBJ(name)
280#endif
281
282///----- That's all, folks --------------------------------------------------