summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
2cb17e0)
This provides correct alignment, and scoping for Windows SEH
annotations.
#endif
#define DATA .data
#endif
#define DATA .data
-// Announcing an external function.
-#define FUNC(name) \
- .globl F(name); \
+// Announcing an internal function.
+#define INTFUNC(name) \
TYPE_FUNC(name); \
.macro ENDFUNC; _ENDFUNC(name); .endm; \
FUNC_PREHOOK(name); \
TYPE_FUNC(name); \
.macro ENDFUNC; _ENDFUNC(name); .endm; \
FUNC_PREHOOK(name); \
+// Announcing an external function.
+#define FUNC(name) \
+ .globl F(name); \
+INTFUNC(F(name))
+
// Marking the end of a function.
#define _ENDFUNC(name) \
.purgem ENDFUNC; \
// Marking the end of a function.
#define _ENDFUNC(name) \
.purgem ENDFUNC; \
///--------------------------------------------------------------------------
/// Primitive multipliers and related utilities.
///--------------------------------------------------------------------------
/// Primitive multipliers and related utilities.
// On entry, XMM4, XMM5, and XMM6 hold a 144-bit carry in an expanded
// form. Store the low 128 bits of the represented carry to [EDI] as
// a packed 128-bit value, and leave the remaining 16 bits in the low
// On entry, XMM4, XMM5, and XMM6 hold a 144-bit carry in an expanded
// form. Store the low 128 bits of the represented carry to [EDI] as
// a packed 128-bit value, and leave the remaining 16 bits in the low
endprop [edi + 12], xmm6, xmm4
ret
endprop [edi + 12], xmm6, xmm4
ret
+ENDFUNC
+
+INTFUNC(dmul4)
// On entry, EDI points to the destination buffer; EAX and EBX point
// to the packed operands U and X; ECX and EDX point to the expanded
// operands V and Y; and XMM4, XMM5, XMM6 hold the incoming carry
// On entry, EDI points to the destination buffer; EAX and EBX point
// to the packed operands U and X; ECX and EDX point to the expanded
// operands V and Y; and XMM4, XMM5, XMM6 hold the incoming carry
+ENDFUNC
+
+INTFUNC(dmla4)
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EAX and EBX point to the
// packed operands U and X; ECX and EDX point to the expanded
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EAX and EBX point to the
// packed operands U and X; ECX and EDX point to the expanded
+ENDFUNC
+
+INTFUNC(mul4zc)
// On entry, EDI points to the destination buffer; EBX points to a
// packed operand X; and EDX points to an expanded operand Y.
//
// On entry, EDI points to the destination buffer; EBX points to a
// packed operand X; and EDX points to an expanded operand Y.
//
+ENDFUNC
+
+INTFUNC(mul4)
// On entry, EDI points to the destination buffer; EBX points to a
// packed operand X; EDX points to an expanded operand Y; and XMM4,
// XMM5, XMM6 hold the incoming carry registers c0, c1, and c2,
// On entry, EDI points to the destination buffer; EBX points to a
// packed operand X; EDX points to an expanded operand Y; and XMM4,
// XMM5, XMM6 hold the incoming carry registers c0, c1, and c2,
+ENDFUNC
+
+INTFUNC(mla4zc)
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EBX points to a packed operand
// X; and EDX points to an expanded operand Y.
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EBX points to a packed operand
// X; and EDX points to an expanded operand Y.
+ENDFUNC
+
+INTFUNC(mla4)
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EBX points to a packed operand
// X; EDX points to an expanded operand Y; and XMM4, XMM5, XMM6 hold
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EBX points to a packed operand
// X; EDX points to an expanded operand Y; and XMM4, XMM5, XMM6 hold
+ENDFUNC
+
+INTFUNC(mmul4)
// On entry, EDI points to the destination buffer; EAX and EBX point
// to the packed operands U and N; ECX and ESI point to the expanded
// operands V and M; and EDX points to a place to store an expanded
// On entry, EDI points to the destination buffer; EAX and EBX point
// to the packed operands U and N; ECX and ESI point to the expanded
// operands V and M; and EDX points to a place to store an expanded
propout [edi + 0], xmm4, xmm5
jmp 5f
propout [edi + 0], xmm4, xmm5
jmp 5f
+ENDFUNC
+
+INTFUNC(mmla4)
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EAX and EBX point
// to the packed operands U and N; ECX and ESI point to the expanded
// On entry, EDI points to the destination buffer, which also
// contains an addend A to accumulate; EAX and EBX point
// to the packed operands U and N; ECX and ESI point to the expanded
+ENDFUNC
+
+INTFUNC(mont4)
// On entry, EDI points to the destination buffer holding a packed
// value A; EBX points to a packed operand N; ESI points to an
// expanded operand M; and EDX points to a place to store an expanded
// On entry, EDI points to the destination buffer holding a packed
// value A; EBX points to a packed operand N; ESI points to an
// expanded operand M; and EDX points to a place to store an expanded
// And, with that, we're done.
ret
// And, with that, we're done.
ret
///--------------------------------------------------------------------------
/// Bulk multipliers.
///--------------------------------------------------------------------------
/// Bulk multipliers.
#include "config.h"
#include "asm-common.h"
#include "config.h"
#include "asm-common.h"
- .globl F(abort)
- .globl F(rijndael_rcon)
+ .extern F(abort)
+ .extern F(rijndael_rcon)
///--------------------------------------------------------------------------
/// Main code.
///--------------------------------------------------------------------------
/// Main code.
// All done.
9: ldmfd sp!, {r4-r9, pc}
// All done.
9: ldmfd sp!, {r4-r9, pc}
+ENDFUNC
+
+INTFUNC(endswap_block)
// End-swap R2 words starting at R1. R1 is clobbered; R2 is not.
// It's OK to work in 16-byte chunks.
// End-swap R2 words starting at R1. R1 is clobbered; R2 is not.
// It's OK to work in 16-byte chunks.
mov r4, r2
0: vldmia r1, {d0, d1}
vrev32.8 q0, q0
mov r4, r2
0: vldmia r1, {d0, d1}
vrev32.8 q0, q0
- .align 16
-endswap_block:
+ENDFUNC
+
+INTFUNC(endswap_block)
// End-swap NKW words starting at SI. The end-swapping table is
// already loaded into XMM5; and it's OK to work in 16-byte chunks.
// End-swap NKW words starting at SI. The end-swapping table is
// already loaded into XMM5; and it's OK to work in 16-byte chunks.
+#if CPUFAM_AMD64 && ABI_WIN
+ .seh_endprologue
+#endif
+
mov ecx, NKW
0: movdqu xmm1, [SI]
pshufb xmm1, xmm5
mov ecx, NKW
0: movdqu xmm1, [SI]
pshufb xmm1, xmm5
add SI, 16
sub ecx, 4
ja 0b
add SI, 16
sub ecx, 4
ja 0b
#undef CTX
#undef BLKSZ
#undef SI
#undef CTX
#undef BLKSZ
#undef SI
///--------------------------------------------------------------------------
/// Encrypting and decrypting blocks.
///--------------------------------------------------------------------------
/// Encrypting and decrypting blocks.
///--------------------------------------------------------------------------
/// Random utilities.
///--------------------------------------------------------------------------
/// Random utilities.
// Abort the process because of a programming error. Indirecting
// through this point serves several purposes: (a) by CALLing, rather
// than branching to, `abort', we can save the return address, which
// might at least provide a hint as to what went wrong; (b) we don't
// have conditional CALLs (and they'd be big anyway); and (c) we can
// write a HLT here as a backstop against `abort' being mad.
// Abort the process because of a programming error. Indirecting
// through this point serves several purposes: (a) by CALLing, rather
// than branching to, `abort', we can save the return address, which
// might at least provide a hint as to what went wrong; (b) we don't
// have conditional CALLs (and they'd be big anyway); and (c) we can
// write a HLT here as a backstop against `abort' being mad.
+#if CPUFAM_AMD64 && ABI_WIN
+ .seh_endprologue
+#endif
+
+ callext F(abort)
///--------------------------------------------------------------------------
/// Data tables.
///--------------------------------------------------------------------------
/// Data tables.