From: Ian Jackson Date: Thu, 25 Jul 2013 17:30:47 +0000 (+0100) Subject: memcmp: Introduce and use consttime_memeq X-Git-Tag: debian/0.3.0_beta2~36 X-Git-Url: https://git.distorted.org.uk/~mdw/secnet/commitdiff_plain/5ad34db2ccbbfbc936cd16121bed2110b70594ba?hp=5ad34db2ccbbfbc936cd16121bed2110b70594ba memcmp: Introduce and use consttime_memeq We need to use a constant-time memcmp in MAC checking, to avoid leaking (to an adversary) how much of the MAC is right. (This would be especially dangerous if our MAC was outside the encryption, which thankfully it isn't.) The use of "volatile" on the accumulator prevents the compiler from optimising away any of the updates to the accumulator, each of which depends on all the bits of the two bytes being compared. So that stops the compiler shortcutting the computation. This also prevents the compiler spotting our boolean canonicalisation, and forces it to do it the way we say. This is true according to the spec by virtue of C99 6.7.3(6). In an attempt to get the compiler to eliminate the pointless repeated loading and storing of the single-byte accumulator value, I have specified it as "register volatile". There is no rule against "register volatile", but my compiler ignores the "register". To double check that all is well, here is an annotated disassembly, from this command line: gcc -save-temps -DHAVE_CONFIG_H -I. -I. -Wall -Wwrite-strings -g -O2 \ -Werror -W -Wno-unused -Wno-pointer-sign -Wstrict-prototypes \ -Wmissing-prototypes -Wmissing-declarations -Wnested-externs \ -Wredundant-decls -Wpointer-arith -Wformat=2 -Winit-self \ -Wswitch-enum -Wunused-variable -Wbad-function-cast \ -Wno-strict-aliasing -fno-strict-aliasing -c util.c -o util.o This is the relevant part of util.s, as generated by gcc 4.4.5-8 i486-linux-gnu, with my annotations: .globl consttime_memeq .type consttime_memeq, @function consttime_memeq: .LFB80: .loc 1 367 0 .cfi_startproc .LVL8: pushl %ebp .LCFI8: .cfi_def_cfa_offset 8 movl %esp, %ebp .cfi_offset 5, -8 .LCFI9: .cfi_def_cfa_register 5 pushl %edi pushl %esi pushl %ebx subl $16, %esp .loc 1 367 0 movl 16(%ebp), %ebx ebx : n .cfi_offset 3, -20 .cfi_offset 6, -16 .cfi_offset 7, -12 movl 8(%ebp), %esi esi : s1in movl 12(%ebp), %edi edi : s2in .loc 1 369 0 movb $0, -13(%ebp) -13(ebp) : accumulator .LVL9: .loc 1 371 0 testl %ebx, %ebx if (!n) je .L15 goto no_bytes; i.e. if (n) { ...loop... } .LVL10: The compiler has chosen to invent an offset variable for controlling the loop, rather than pointer arithmetic. We'll call that variable i. It ranges from 0..n-1 as expected. The compiler doesn't explictly compute the per-iteration pointers s1 and s2, instead using an addressing mode. xorl %edx, %edx edx : i .p2align 4,,7 .p2align 3 .L16: more_bytes: /* loop */ .loc 1 372 0 movzbl (%edi,%edx), %eax eax = *s2; .LVL11: movzbl -13(%ebp), %ecx ecx = accumulator; xorb (%esi,%edx), %al al = *s1 ^ *s2; addl $1, %edx i++; orl %ecx, %eax eax = accumulator | (*s1^*s2) .LVL12: .loc 1 371 0 cmpl %edx, %ebx [ if (i==n) ... ] .loc 1 372 0 movb %al, -13(%ebp) accumulator = eax; i.e., overall, accumulator |= *s1 ^ *s2 .loc 1 371 0 jne .L16 ... [if (i!=n)] goto more_bytes; .LVL13: .L15: no_bytes: .loc 1 374 0 /* end of loop and if */ /* now doing the shift-by-4: */ movzbl -13(%ebp), %eax eax = accumulator; movzbl -13(%ebp), %edx edx = accumulator; shrb $4, %al al >>= 4; .LVL14: orl %edx, %eax eax |= edx; .LVL15: movb %al, -13(%ebp) accumulator = eax; i.e., overall, accumulator |= accumulator >> 4; .loc 1 375 0 movzbl -13(%ebp), %eax /* same again but for shift-by-2 */ movzbl -13(%ebp), %edx shrb $2, %al orl %edx, %eax .LVL16: movb %al, -13(%ebp) .loc 1 376 0 movzbl -13(%ebp), %eax /* same again but for shift-by-1 */ movzbl -13(%ebp), %edx shrb %al orl %edx, %eax .LVL17: movb %al, -13(%ebp) /* now computed final accumulator */ .loc 1 377 0 movzbl -13(%ebp), %eax eax = accumulator; andl $1, %eax eax &= 1; .LVL18: movb %al, -13(%ebp) accumulator = eax; .loc 1 378 0 movzbl -13(%ebp), %eax eax = accumulator; xorl $1, %eax eax ^= 1; .LVL19: movb %al, -13(%ebp) accumulator = eax; .loc 1 379 0 movzbl -13(%ebp), %eax eax = accumulator; .LVL20: .loc 1 380 0 addl $16, %esp function epilogue popl %ebx ... .LVL21: popl %esi ... .LVL22: popl %edi ... .LVL23: .loc 1 379 0 movzbl %al, %eax sign extend? .loc 1 380 0 popl %ebp ... ret return eax; i.e. return accumulator; .cfi_endproc .LFE80: .size consttime_memeq, .-consttime_memeq Signed-off-by: Ian Jackson ---