From: Mark Wooding Date: Sat, 5 Nov 2016 21:28:22 +0000 (+0000) Subject: base/asm-common.h, *.S: Add `INTFUNC' macro for internal subroutines. X-Git-Tag: 2.3.0~18 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/1a517bb3785891ff6940c73af7c5a136d0250ebf base/asm-common.h, *.S: Add `INTFUNC' macro for internal subroutines. This provides correct alignment, and scoping for Windows SEH annotations. --- diff --git a/base/asm-common.h b/base/asm-common.h index 083643e7..fdd7fad1 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -62,15 +62,19 @@ #endif #define DATA .data -// Announcing an external function. -#define FUNC(name) \ - .globl F(name); \ +// Announcing an internal function. +#define INTFUNC(name) \ TYPE_FUNC(name); \ .macro ENDFUNC; _ENDFUNC(name); .endm; \ FUNC_PREHOOK(name); \ -F(name): \ +name: \ FUNC_POSTHOOK(name) +// Announcing an external function. +#define FUNC(name) \ + .globl F(name); \ +INTFUNC(F(name)) + // Marking the end of a function. #define _ENDFUNC(name) \ .purgem ENDFUNC; \ diff --git a/math/mpx-mul4-x86-sse2.S b/math/mpx-mul4-x86-sse2.S index 922de33d..8f69a559 100644 --- a/math/mpx-mul4-x86-sse2.S +++ b/math/mpx-mul4-x86-sse2.S @@ -310,8 +310,7 @@ ///-------------------------------------------------------------------------- /// Primitive multipliers and related utilities. - .p2align 4 -carryprop: +INTFUNC(carryprop) // On entry, XMM4, XMM5, and XMM6 hold a 144-bit carry in an expanded // form. Store the low 128 bits of the represented carry to [EDI] as // a packed 128-bit value, and leave the remaining 16 bits in the low @@ -322,8 +321,9 @@ carryprop: endprop [edi + 12], xmm6, xmm4 ret - .p2align 4 -dmul4: +ENDFUNC + +INTFUNC(dmul4) // On entry, EDI points to the destination buffer; EAX and EBX point // to the packed operands U and X; ECX and EDX point to the expanded // operands V and Y; and XMM4, XMM5, XMM6 hold the incoming carry @@ -351,8 +351,9 @@ dmul4: ret - .p2align 4 -dmla4: +ENDFUNC + +INTFUNC(dmla4) // On entry, EDI points to the destination buffer, which also // contains an addend A to accumulate; EAX and EBX point to the // packed operands U and X; ECX and EDX point to the expanded @@ -384,8 +385,9 @@ dmla4: ret - .p2align 4 -mul4zc: +ENDFUNC + +INTFUNC(mul4zc) // On entry, EDI points to the destination buffer; EBX points to a // packed operand X; and EDX points to an expanded operand Y. // @@ -407,8 +409,9 @@ mul4zc: ret - .p2align 4 -mul4: +ENDFUNC + +INTFUNC(mul4) // On entry, EDI points to the destination buffer; EBX points to a // packed operand X; EDX points to an expanded operand Y; and XMM4, // XMM5, XMM6 hold the incoming carry registers c0, c1, and c2, @@ -432,8 +435,9 @@ mul4: ret - .p2align 4 -mla4zc: +ENDFUNC + +INTFUNC(mla4zc) // On entry, EDI points to the destination buffer, which also // contains an addend A to accumulate; EBX points to a packed operand // X; and EDX points to an expanded operand Y. @@ -461,8 +465,9 @@ mla4zc: ret - .p2align 4 -mla4: +ENDFUNC + +INTFUNC(mla4) // On entry, EDI points to the destination buffer, which also // contains an addend A to accumulate; EBX points to a packed operand // X; EDX points to an expanded operand Y; and XMM4, XMM5, XMM6 hold @@ -489,8 +494,9 @@ mla4: ret - .p2align 4 -mmul4: +ENDFUNC + +INTFUNC(mmul4) // On entry, EDI points to the destination buffer; EAX and EBX point // to the packed operands U and N; ECX and ESI point to the expanded // operands V and M; and EDX points to a place to store an expanded @@ -510,8 +516,9 @@ mmul4: propout [edi + 0], xmm4, xmm5 jmp 5f - .p2align 4 -mmla4: +ENDFUNC + +INTFUNC(mmla4) // On entry, EDI points to the destination buffer, which also // contains an addend A to accumulate; EAX and EBX point // to the packed operands U and N; ECX and ESI point to the expanded @@ -595,8 +602,9 @@ mmla4: add esp, 64 ret - .p2align 4 -mont4: +ENDFUNC + +INTFUNC(mont4) // On entry, EDI points to the destination buffer holding a packed // value A; EBX points to a packed operand N; ESI points to an // expanded operand M; and EDX points to a place to store an expanded @@ -650,6 +658,8 @@ mont4: // And, with that, we're done. ret +ENDFUNC + ///-------------------------------------------------------------------------- /// Bulk multipliers. diff --git a/symm/rijndael-arm-crypto.S b/symm/rijndael-arm-crypto.S index 5ca516e3..4d7312d4 100644 --- a/symm/rijndael-arm-crypto.S +++ b/symm/rijndael-arm-crypto.S @@ -30,8 +30,8 @@ #include "config.h" #include "asm-common.h" - .globl F(abort) - .globl F(rijndael_rcon) + .extern F(abort) + .extern F(rijndael_rcon) ///-------------------------------------------------------------------------- /// Main code. @@ -211,9 +211,12 @@ FUNC(rijndael_setup_arm_crypto) // All done. 9: ldmfd sp!, {r4-r9, pc} -endswap_block: +ENDFUNC + +INTFUNC(endswap_block) // End-swap R2 words starting at R1. R1 is clobbered; R2 is not. // It's OK to work in 16-byte chunks. + mov r4, r2 0: vldmia r1, {d0, d1} vrev32.8 q0, q0 diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index 8090bca6..b0b880a4 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -332,10 +332,15 @@ FUNC(rijndael_setup_x86ish_aesni) #endif ret - .align 16 -endswap_block: +ENDFUNC + +INTFUNC(endswap_block) // End-swap NKW words starting at SI. The end-swapping table is // already loaded into XMM5; and it's OK to work in 16-byte chunks. +#if CPUFAM_AMD64 && ABI_WIN + .seh_endprologue +#endif + mov ecx, NKW 0: movdqu xmm1, [SI] pshufb xmm1, xmm5 @@ -343,8 +348,11 @@ endswap_block: add SI, 16 sub ecx, 4 ja 0b + ret +ENDFUNC + #undef CTX #undef BLKSZ #undef SI @@ -356,8 +364,6 @@ endswap_block: #undef LRK #undef BLKOFF -ENDFUNC - ///-------------------------------------------------------------------------- /// Encrypting and decrypting blocks. @@ -509,17 +515,23 @@ ENDFUNC ///-------------------------------------------------------------------------- /// Random utilities. - .align 16 +INTFUNC(bogus) // Abort the process because of a programming error. Indirecting // through this point serves several purposes: (a) by CALLing, rather // than branching to, `abort', we can save the return address, which // might at least provide a hint as to what went wrong; (b) we don't // have conditional CALLs (and they'd be big anyway); and (c) we can // write a HLT here as a backstop against `abort' being mad. -bogus: callext F(abort) +#if CPUFAM_AMD64 && ABI_WIN + .seh_endprologue +#endif + + callext F(abort) 0: hlt jmp 0b +ENDFUNC + ///-------------------------------------------------------------------------- /// Data tables.