/// -*- mode: asm; asm-comment-char: ?/ -*- .intel_syntax noprefix .section .note.GNU-stack, "", @progbits .macro proc name .globl \name .type \name, STT_FUNC .p2align 4 \name\(): .macro endproc .size \name, . - \name .purgem endproc .endm .endm .macro ch c pushf push rax push rcx push rdx push rsi push rdi push r8 push r9 push rbp mov rbp, rsp and rsp, -16 mov rdi, \c call putchar@plt mov rdi, [rip + stdout] call fflush@plt mov rsp, rbp pop rbp pop r9 pop r8 pop rdi pop rsi pop rdx pop rcx pop rax popf .endm .text proc call_example push rbx // rbx push r10 push r11 push r12 push r13 push r14 push r15 push rbp // flags, rbp, ..., rbx pushf push rsi // regs, flags, rbp, ..., rbx lea rax, [rip + 9f] push rax // cont, regs, flags, rbp, ..., rbx push rdi // func, cont, regs, flags, rbp, ..., rbx mov rax, [rsi + 56] pushf pop rcx and rax, 0x0cd5 and rcx, ~0x0cd5 or rax, rcx push rax popf mov rax, [rsi + 0] mov rbx, [rsi + 8] mov rcx, [rsi + 16] mov rdx, [rsi + 24] mov rdi, [rsi + 40] mov rbp, [rsi + 48] mov rsi, [rsi + 32] ret // -> func; regs, flags, rbp, ..., rbx 9: pushf // rflags, regs, flags, rbp, ..., rbx push rsi // rsi, rflags, regs, flags, rbp, ..., rbx mov rsi, [rsp + 16] mov [rsi + 0], rax mov [rsi + 8], rbx mov [rsi + 16], rcx mov [rsi + 24], rdx mov [rsi + 40], rdi mov [rsi + 48], rbp pop rax // rflags, regs, flags, rbp, ..., rbx mov [rsi + 32], rax pop rax // regs, flags, rbp, ..., rbx mov [rsi + 56], rax add rsp, 8 // flags, rbp, ..., rbx popf // rbp, ..., rbx pop rbp // ..., rbx pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop rbx // ret endproc proc nop ret endproc ///-------------------------------------------------------------------------- proc x00 // clear all 64 bits of extended traditional registers xor eax,eax // clear rax lea rbx,[0] // rbx -> _|_ loop . // iterate, decrement rcx until zero mov rdx,0 // set rdx = 0 and esi,0 // clear all bits of rsi sub edi,edi // set rdi = edi - edi = 0 push 0 pop rbp // pop 0 into rbp ret endproc proc x01 // advance a fibonacci pair by c steps // // on entry, a and d are f_{i+1} and f_i; on exit, they are f_{i+c+1} // and f_{i+c}, where f_{i+1} = f_i + f_{i-1} 0: xadd rax, rdx // a, d = a + d, a // = f_{i+1} + f_i, f_{i+1} // = f_{i+2}, f_{i+1} loop 0b // advance i, decrement c, iterate ret endproc proc x02 // boolean canonify a: if a = 0 on entry, leave it zero; otherwise // set a = 1 neg rax // set cf iff a /= 0 sbb rax, rax // a = a - a - cf = -cf neg rax // a = cf ret endproc proc x03 // set a = min(a, d) (unsigned); clobber c, d sub rdx, rax // d' = d - a; set cf if a > d sbb rcx, rcx // c = -cf = -[a > d] and rcx, rdx // c = a > d ? d - a : 0 add rax, rcx // a' = a > d ? d : a ret endproc proc x04 // switch case? xor al, 0x20 ret endproc proc x05 // answer whether 5 <= a 4 a > 9 or a < 5 // nc/ae/nb a' >= 4 a >= 9 or a < 5 // c/b/nae a' < 4 5 <= a < 9 // be/na a' <= 4 5 <= a <= 9 // // o a' < -2^63 + 4 -2^63 + 5 <= a < -2^63 + 9 // no a' >= -2^63 + 4 a >= -2^63 + 9 or // a < -2^63 + 5 // s -2^63 + 4 <= a' < 4 -2^63 + 9 <= a < 9 // ns a' < -2^63 + 4 or a < -2^63 + 9 or a >= 9 // a' >= 4 // ge/nl a' >= 4 a >= 9 or a < -2^63 + 5 // l/nge a' < 4 -2^63 + 5 <= a < 9 // g/nle a' > 4 a > 9 or a < -2^63 + 5 // le/ng a' <= 4 -2^63 + 5 <= a <= 9 ret endproc proc x06 // leave a unchanged, but set zf if a = 0, cf if a /= 0, clear of, // set sf to msb(a) not rax // a' = -a - 1 inc rax // a' = -a neg rax // a' = a ret endproc proc x07 // same as before (?) inc rax // a' = a + 1 neg rax // a' = -a - 1 inc rax // a' = -a neg rax // a' = a ret endproc proc x08 // floor((a + d)/2), correctly handling overflow conditions; final cf // is lsb(a + d), probably uninteresting add rax, rdx // cf || a' = a + d rcr rax, 1 // shift 65-bit result right by one // place; lsb moves into carry ret endproc proc x09 // a = a/8, rounded to nearest; i.e., floor(a/8) if a == 0, 1, 2, 3 // (mod 8), or ceil(a/8) if a == 4, 5, 6, 7 (mod 8). shr rax, 3 // a' = floor(a/8); cf = 1 if a == // 4, 5, 6, 7 (mod 8) adc rax, 0 // a' = floor(a/8) + cf ret endproc proc x0a // increment c-byte little-endian bignum at rdi add byte ptr [rdi], 1 0: inc rdi adc byte ptr [rdi], 0 loop 0b ret endproc proc x0b // negate double-precision d:a not rdx // d' = -d - 1 neg rax // a' = -a; // cf = 1 iff a /= 0 sbb rdx, -1 // d' = -d - cf ret endproc proc x0c // rotate is distributive over xor. // rax // = a_1 || a_0 // rbx // = b_1 || b_0 mov rcx, rax // = a_1 || a_0 xor rcx, rbx // = (a_1 XOR b_1) || (a_0 XOR b_0) ror rcx, 0xd // = (a_0 XOR b_0) || (a_1 XOR b_1) ror rax, 0xd // = a_0 || a_1 ror rbx, 0xd // = b_0 || b_1 xor rax, rbx // = (a_0 XOR b_0) || (a_1 XOR b_1) cmp rax, rcx // always equal ret endproc proc x0d // and is distributive over xor. mov rdx, rbx // = b xor rbx, rcx // = b XOR c and rbx, rax // = a AND (b XOR c) and rdx, rax // = a AND b and rax, rcx // = a AND c xor rax, rdx // = (a AND b) XOR (a AND c) // = a AND (b XOR c) cmp rax, rbx // always equal ret endproc proc x0e // de morgan's law mov rcx, rax // = a and rcx, rbx // = a AND b not rcx // = NOT (a AND b) not rax // = NOT a not rbx // = NOT b or rax, rbx // = (NOT a) OR (NOT b) // = NOT (a AND b) cmp rax, rcx ret endproc proc x0f // replace input buffer bytes with cumulative XORs with initial a; // final a is XOR of all buffer bytes and initial a. // // not sure why you'd do this. cld 0: xor [rsi], al lodsb loop 0b ret endproc proc x10 // four different ways to swap a pair of registers. push rax push rcx pop rax pop rcx xor rax, rcx xor rcx, rax xor rax, rcx add rax, rcx sub rcx, rax add rax, rcx neg rcx xchg rax, rcx ret endproc proc x11 // assuming a is initialized to zero, set a to the inclusive or of // the xor-differences of corresponding bytes in the c-byte strings // at si and di. // // in particular, a will be zero (and zf set) if and only if the two // strings are equal. 0: mov dl, [rsi] xor dl, [rdi] inc rsi inc rdi or al, dl loop 0b ret endproc proc x12 // an obtuse way of adding two registers. for any bit position, a // OR d is set if and only if at least one of a and d has a bit set // in that position, and a AND d is set if and only if both have a // bit set in that position. essentially, then, what we've done is // move all of the set bits in d to a, unless there's already a bit // there. this clearly doesn't change the sum. mov rcx, rdx // c' = d and rdx, rax // d' = a AND d or rax, rcx // a' = a OR d add rax, rdx ret endproc proc x13 // ok, so this is a really obtuse way of adding a and b; the result // is in a and d. but why does it work? mov rcx, 0x40 // carry chains at most 64 long 0: mov rdx, rax // copy a' xor rax, rbx // low bits of each bitwise sum and rbx, rdx // carry bits from each bitwise sum shl rbx, 001 // carry them into next position loop 0b ret endproc proc x14 // floor((a + d)/2), like x08. mov rcx, rax // copy a for later and rcx, rdx // carry bits xor rax, rdx // low bits of each bitwise sum shr rax, 1 // divide by 2; carries now in place add rax, rcx // add the carries; done ret endproc proc x15 // sign extension 32 -> 64 bits. //movsx rbx, eax // like this? mov rdx, 0xffffffff80000000 add rax, rdx // if bit 31 of a is set then bits // 31--63 of a' are clear; otherwise, // these bits are all set -- which is // exactly backwards xor rax, rdx // so fix it ret endproc proc x16 //shl rax, 56 //shl rbx, 56 //shl rcx, 56 xor rax, rbx // a' = a XOR b xor rbx, rcx // b' = b XOR c mov rsi, rax // t = a XOR b add rsi, rbx // t = (a XOR b) + (b XOR c) cmovc rax, rbx // a' = cf ? b XOR c : a XOR b xor rax, rbx // a' = cf ? 0 : a XOR c cmp rax, rsi ret endproc proc x17 ud2 endproc proc x18 ud2 endproc proc x19 ud2 endproc proc x1a ud2 endproc proc x1b ud2 endproc proc x1c ud2 endproc proc x1d ud2 endproc proc x1e ud2 endproc proc x1f ud2 endproc proc x20 ud2 ret endproc proc x21 ud2 endproc proc x22 ud2 endproc proc x23 ud2 endproc proc x24 ud2 endproc proc x25 ud2 endproc proc x26 ud2 endproc proc x27 ud2 endproc proc x28 ud2 endproc proc x29 ud2 endproc proc x2a ud2 endproc proc x2b ud2 endproc proc x2c ud2 endproc proc x2d ud2 endproc proc x2e ud2 endproc proc x2f ud2 endproc proc x30 ud2 ret endproc proc x31 ud2 endproc proc x32 ud2 endproc proc x33 ud2 endproc proc x34 ud2 endproc proc x35 ud2 endproc proc x36 ud2 endproc proc x37 ud2 endproc proc x38 ud2 endproc proc x39 ud2 endproc proc x3a ud2 endproc proc x3b ud2 endproc proc x3c ud2 endproc proc x3d ud2 endproc proc x3e ud2 endproc proc x3f ud2 endproc