X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/160214515f6913d84e0e41253cf61281718bcd99..57e7040b318f0ffc5ab43c3fb62df9a2bef42ac7:/symm/rijndael-arm-crypto.S diff --git a/symm/rijndael-arm-crypto.S b/symm/rijndael-arm-crypto.S index 908faaa4..1e551698 100644 --- a/symm/rijndael-arm-crypto.S +++ b/symm/rijndael-arm-crypto.S @@ -30,8 +30,8 @@ #include "config.h" #include "asm-common.h" - .globl F(abort) - .globl F(rijndael_rcon) + .extern F(abort) + .extern F(rijndael_rcon) ///-------------------------------------------------------------------------- /// Main code. @@ -52,7 +52,7 @@ // Useful constants. .equ maxrounds, 16 // maximum number of rounds .equ maxblksz, 32 // maximum block size, in bytes - .equ kbufsz, maxblksz*(maxrounds + 1) // size of a key-schedule buffer + .equ kbufsz, maxblksz*(maxrounds + 1) // size of key-sched buffer // Context structure. .equ nr, 0 // number of rounds @@ -70,7 +70,7 @@ FUNC(rijndael_setup_arm_crypto) // r2 = pointer to key material // r3 = key size in words - stmfd sp!, {r4-r9, r14} + pushreg {r4-r9, r14} // The initial round key material is taken directly from the input // key, so copy it over. Unfortunately, the key material is not @@ -209,11 +209,14 @@ FUNC(rijndael_setup_arm_crypto) bl endswap_block // All done. -9: ldmfd sp!, {r4-r9, pc} +9: popreg {r4-r9, pc} -endswap_block: +ENDFUNC + +INTFUNC(endswap_block) // End-swap R2 words starting at R1. R1 is clobbered; R2 is not. // It's OK to work in 16-byte chunks. + mov r4, r2 0: vldmia r1, {d0, d1} vrev32.8 q0, q0 @@ -227,7 +230,8 @@ ENDFUNC ///-------------------------------------------------------------------------- /// Encrypting and decrypting blocks. -FUNC(rijndael_eblk_arm_crypto) +.macro encdec op, aes, mc, koff + FUNC(rijndael_\op\()_arm_crypto) // Arguments: // r0 = pointer to context @@ -236,77 +240,95 @@ FUNC(rijndael_eblk_arm_crypto) // Set things up ready. ldr r3, [r0, #nr] - add r0, r0, #w + add r0, r0, #\koff vldmia r1, {d0, d1} vrev32.8 q0, q0 - // Dispatch according to the number of rounds. - add r3, r3, r3, lsl #1 - rsbs r3, r3, #3*14 - addcs pc, pc, r3, lsl #2 + // Check the number of rounds and dispatch. + sub r3, r3, #10 + cmp r3, #5 + addlo pc, pc, r3, lsl #2 callext F(abort) - // The last round doesn't have MixColumns, so do it separately. - .rept 13 - vldmia r0!, {d2, d3} - aese.8 q0, q1 - aesmc.8 q0, q0 - .endr - - // Final round. - vldmia r0!, {d2, d3} - aese.8 q0, q1 - - // Final whitening. - vldmia r0!, {d2, d3} - veor q0, q1 + b 10f + b 11f + b 12f + b 13f + b 14f + + // Eleven rounds. +11: vldmia r0!, {d16, d17} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + b 10f + + // Twelve rounds. +12: vldmia r0!, {d16-d19} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + b 10f + + // Thirteen rounds. +13: vldmia r0!, {d16-d21} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + b 10f + + // Fourteen rounds. (Drops through to the ten round case because + // this is the next most common.) +14: vldmia r0!, {d16-d23} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + // Drop through... + + // Ten rounds. +10: vldmia r0!, {d16-d25} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + \aes\().8 q0, q12 + \mc\().8 q0, q0 + + vldmia r0!, {d16-d27} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + + // Final round has no MixColumns, but is followed by final whitening. + \aes\().8 q0, q12 + veor q0, q0, q13 // All done. vrev32.8 q0, q0 vstmia r2, {d0, d1} bx r14 -ENDFUNC - -FUNC(rijndael_dblk_arm_crypto) + ENDFUNC +.endm - // Arguments: - // r0 = pointer to context - // r1 = pointer to input block - // r2 = pointer to output block - - // Set things up ready. - ldr r3, [r0, #nr] - add r0, r0, #wi - vldmia r1, {d0, d1} - vrev32.8 q0, q0 - - // Dispatch according to the number of rounds. - add r3, r3, r3, lsl #1 - rsbs r3, r3, #3*14 - addcs pc, pc, r3, lsl #2 - callext F(abort) - - // The last round doesn't have MixColumns, so do it separately. - .rept 13 - vldmia r0!, {d2, d3} - aesd.8 q0, q1 - aesimc.8 q0, q0 - .endr - - // Final round. - vldmia r0!, {d2, d3} - aesd.8 q0, q1 - - // Final whitening. - vldmia r0!, {d2, d3} - veor q0, q1 - - // All done. - vrev32.8 q0, q0 - vstmia r2, {d0, d1} - bx r14 - -ENDFUNC + encdec eblk, aese, aesmc, w + encdec dblk, aesd, aesimc, wi ///----- That's all, folks --------------------------------------------------