X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/0f23f75ff53acadf80e9d3dfd2dfd14cb526074f..bc9ac7ebf31d42ea0965ceded383714207746c27:/symm/chacha-x86ish-sse2.S diff --git a/symm/chacha-x86ish-sse2.S b/symm/chacha-x86ish-sse2.S index f36bf90f..641ad653 100644 --- a/symm/chacha-x86ish-sse2.S +++ b/symm/chacha-x86ish-sse2.S @@ -42,7 +42,7 @@ /// Main code. .arch pentium4 - .section .text + .text FUNC(chacha_core_x86ish_sse2) @@ -131,7 +131,7 @@ FUNC(chacha_core_x86ish_sse2) movdqa SAVE2, xmm2 movdqa SAVE3, xmm3 -loop: +0: // Apply a column quarterround to each of the columns simultaneously. // Alas, there doesn't seem to be a packed doubleword rotate, so we // have to synthesize it. @@ -228,7 +228,7 @@ loop: // Decrement the loop counter and see if we should go round again. sub NR, 2 - ja loop + ja 0b // Almost there. Firstly, the feedforward addition. paddd xmm0, SAVE0