base/asm-common.h, *-x86ish-*.S: Centralize SSE shuffling constants.
[catacomb] / base / asm-common.h
1 /// -*- mode: asm; asm-comment-char: ?/ -*-
2 ///
3 /// Fancy SIMD implementation of Salsa20
4 ///
5 /// (c) 2015 Straylight/Edgeware
6 ///
7
8 ///----- Licensing notice ---------------------------------------------------
9 ///
10 /// This file is part of Catacomb.
11 ///
12 /// Catacomb is free software; you can redistribute it and/or modify
13 /// it under the terms of the GNU Library General Public License as
14 /// published by the Free Software Foundation; either version 2 of the
15 /// License, or (at your option) any later version.
16 ///
17 /// Catacomb is distributed in the hope that it will be useful,
18 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 /// GNU Library General Public License for more details.
21 ///
22 /// You should have received a copy of the GNU Library General Public
23 /// License along with Catacomb; if not, write to the Free
24 /// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 /// MA 02111-1307, USA.
26
27 ///--------------------------------------------------------------------------
28 /// General definitions.
29
30 // Some useful variables.
31 .L$_subsec = 0
32
33 // Literal pools done the hard way.
34 #define _LIT .text .L$_subsec + 1
35 #define _ENDLIT .text .L$_subsec
36 #define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
37
38 // Announcing an external function.
39 #define FUNC(name) \
40 .globl F(name); \
41 TYPE_FUNC(name); \
42 .macro ENDFUNC; _ENDFUNC(name); .endm; \
43 FUNC_PREHOOK(name); \
44 F(name): \
45 FUNC_POSTHOOK(name)
46
47 // Marking the end of a function.
48 #define _ENDFUNC(name) \
49 .purgem ENDFUNC; \
50 SIZE_OBJ(name); \
51 ENDFUNC_HOOK(name); \
52 _LTORG
53
54 // Make a helper function, if necessary.
55 #define AUXFN(name) \
56 .ifndef .L$_auxfn_def.name; \
57 .text 7128; \
58 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
59 FUNC_PREHOOK(name); \
60 name:
61 #define _ENDAUXFN_TAIL(name) \
62 .purgem _ENDAUXFN; \
63 .text .L$_subsec; \
64 .L$_auxfn_def.name = 1
65 #define ENDAUXFN _ENDAUXFN; .endif
66
67 ///--------------------------------------------------------------------------
68 /// ELF-specific hacking.
69
70 #if __ELF__
71
72 #if __PIC__ || __PIE__
73 # define WANT_PIC 1
74 #endif
75
76 #define TYPE_FUNC(name) .type name, STT_FUNC
77
78 #define SIZE_OBJ(name) .size name, . - name
79
80 #endif
81
82 ///--------------------------------------------------------------------------
83 /// Windows-specific hacking.
84
85 #if ABI_WIN
86
87 #if CPUFAM_X86
88 # define F(name) _##name
89 #endif
90
91 #endif
92
93 ///--------------------------------------------------------------------------
94 /// x86- and amd64-specific hacking.
95 ///
96 /// It's (slightly) easier to deal with both of these in one go.
97
98 #if CPUFAM_X86 || CPUFAM_AMD64
99
100 // Set the function hooks.
101 #define FUNC_PREHOOK(_) .balign 16
102
103 // On Windows, arrange to install stack-unwinding data.
104 #if CPUFAM_AMD64 && ABI_WIN
105 # define FUNC_POSTHOOK(name) .seh_proc name
106 # define ENDFUNC_HOOK(_) .seh_endproc
107 // Procedures are expected to invoke `.seh_setframe' if necessary, and
108 // `.seh_pushreg' and friends, and `.seh_endprologue'.
109 #endif
110
111 // Don't use the wretched AT&T syntax. It's festooned with pointless
112 // punctuation, and all of the data movement is backwards. Ugh!
113 .intel_syntax noprefix
114
115 // Call external subroutine at ADDR, possibly via PLT.
116 .macro callext addr
117 #if WANT_PIC
118 call \addr@PLT
119 #else
120 call \addr
121 #endif
122 .endm
123
124 // Do I need to arrange a spare GOT register?
125 #if WANT_PIC && CPUFAM_X86
126 # define NEED_GOT 1
127 #endif
128 #define GOTREG ebx // Not needed in AMD64 so don't care.
129
130 // Maybe load GOT address into GOT.
131 .macro ldgot got=GOTREG
132 #if WANT_PIC && CPUFAM_X86
133 AUXFN(_ldgot.\got)
134 mov \got, [esp]
135 ret
136 ENDAUXFN
137 call _ldgot.\got
138 add \got, offset _GLOBAL_OFFSET_TABLE_
139 #endif
140 .endm
141
142 // Load address of external symbol ADDR into REG, maybe using GOT.
143 .macro leaext reg, addr, got=GOTREG
144 #if WANT_PIC
145 # if CPUFAM_X86
146 mov \reg, [\got + \addr@GOT]
147 # endif
148 # if CPUFAM_AMD64
149 mov \reg, \addr@GOTPCREL[rip]
150 # endif
151 #else
152 # if CPUFAM_X86
153 mov \reg, offset \addr
154 # endif
155 # if CPUFAM_AMD64
156 lea \reg, \addr[rip]
157 # endif
158 #endif
159 .endm
160
161 // Address expression (possibly using a base register, and a displacement)
162 // referring to ADDR, which is within our module, maybe using GOT.
163 #define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
164 #define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
165 #if CPUFAM_AMD64
166 # define INTADDR__1(addr, got) addr + rip
167 #elif WANT_PIC
168 # define INTADDR__1(addr, got) got + addr@GOTOFF
169 #else
170 # define INTADDR__1(addr, got) addr
171 #endif
172
173 // Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
174 // suitable for use in `pshufd' or `shufpd', which copies element D
175 // (0 <= D < 4) of the source to element 3 of the destination, element C to
176 // element 2, element B to element 1, and element A to element 0.
177 #define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
178
179 #endif
180
181 ///--------------------------------------------------------------------------
182 /// ARM-specific hacking.
183
184 #if CPUFAM_ARMEL
185
186 // ARM/Thumb mode things. Use ARM by default.
187 #define ARM .arm; .L$_pcoff = 8
188 #define THUMB .thumb; .L$_pcoff = 4
189 ARM
190
191 // Set the function hooks.
192 #define FUNC_PREHOOK(_) .balign 4
193 #define ENDFUNC_HOOK(name) .ltorg
194
195 // Call external subroutine at ADDR, possibly via PLT.
196 .macro callext addr, cond=
197 #if WANT_PIC
198 bl\cond \addr(PLT)
199 #else
200 bl\cond \addr
201 #endif
202 .endm
203
204 // Do I need to arrange a spare GOT register?
205 #if WANT_PIC
206 # define NEED_GOT 1
207 #endif
208 #define GOTREG r9
209
210 // Maybe load GOT address into GOT.
211 .macro ldgot cond=, got=GOTREG
212 #if WANT_PIC
213 ldr\cond \got, .L$_ldgot$\@
214 .L$_ldgot_pc$\@:
215 add\cond \got, pc, \got
216 _LIT
217 .balign 4
218 .L$_ldgot$\@:
219 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
220 _ENDLIT
221 #endif
222 .endm
223
224 // Load address of external symbol ADDR into REG, maybe using GOT.
225 .macro leaext reg, addr, cond=, got=GOTREG
226 #if WANT_PIC
227 ldr\cond \reg, .L$_leaext$\@
228 ldr\cond \reg, [\got, \reg]
229 _LIT
230 .balign 4
231 .L$_leaext$\@:
232 .word \addr(GOT)
233 _ENDLIT
234 #else
235 ldr\cond \reg, =\addr
236 #endif
237 .endm
238
239 // Load address of external symbol ADDR into REG directly.
240 .macro leaextq reg, addr, cond=
241 #if WANT_PIC
242 ldr\cond \reg, .L$_leaextq$\@
243 .L$_leaextq_pc$\@:
244 .if .L$_pcoff == 8
245 ldr\cond \reg, [pc, \reg]
246 .else
247 add\cond \reg, pc
248 ldr\cond \reg, [\reg]
249 .endif
250 _LIT
251 .balign 4
252 .L$_leaextq$\@:
253 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
254 _ENDLIT
255 #else
256 ldr\cond \reg, =\addr
257 #endif
258 .endm
259
260 #endif
261
262 ///--------------------------------------------------------------------------
263 /// Final stuff.
264
265 // Default values for the various hooks.
266 #ifndef FUNC_PREHOOK
267 # define FUNC_PREHOOK(name)
268 #endif
269 #ifndef FUNC_POSTHOOK
270 # define FUNC_POSTHOOK(name)
271 #endif
272 #ifndef ENDFUNC_HOOK
273 # define ENDFUNC_HOOK(name)
274 #endif
275
276 #ifndef F
277 # define F(name) name
278 #endif
279
280 #ifndef TYPE_FUNC
281 # define TYPE_FUNC(name)
282 #endif
283
284 #ifndef SIZE_OBJ
285 # define SIZE_OBJ(name)
286 #endif
287
288 ///----- That's all, folks --------------------------------------------------