1 /// -*- mode: asm; asm-comment-char: ?/ -*-
5 .section .note.GNU-stack, "", @progbits
13 .size \name, . - \name
34 mov rdi, [rip + stdout]
60 push rbp // flags, rbp, ..., rbx
63 push rsi // regs, flags, rbp, ..., rbx
66 push rax // cont, regs, flags, rbp, ..., rbx
67 push rdi // func, cont, regs, flags, rbp, ..., rbx
85 ret // -> func; regs, flags, rbp, ..., rbx
87 9: pushf // rflags, regs, flags, rbp, ..., rbx
88 push rsi // rsi, rflags, regs, flags, rbp, ..., rbx
96 pop rax // rflags, regs, flags, rbp, ..., rbx
98 pop rax // regs, flags, rbp, ..., rbx
101 add rsp, 8 // flags, rbp, ..., rbx
102 popf // rbp, ..., rbx
121 ///--------------------------------------------------------------------------
125 // clear all 64 bits of extended traditional registers
126 xor eax,eax // clear rax
127 lea rbx,[0] // rbx -> _|_
128 loop . // iterate, decrement rcx until zero
129 mov rdx,0 // set rdx = 0
130 and esi,0 // clear all bits of rsi
131 sub edi,edi // set rdi = edi - edi = 0
133 pop rbp // pop 0 into rbp
141 // advance a fibonacci pair by c steps
143 // on entry, a and d are f_{i+1} and f_i; on exit, they are f_{i+c+1}
144 // and f_{i+c}, where f_{i+1} = f_i + f_{i-1}
145 0: xadd rax, rdx // a, d = a + d, a
146 // = f_{i+1} + f_i, f_{i+1}
147 // = f_{i+2}, f_{i+1}
148 loop 0b // advance i, decrement c, iterate
156 // boolean canonify a: if a = 0 on entry, leave it zero; otherwise
158 neg rax // set cf iff a /= 0
159 sbb rax, rax // a = a - a - cf = -cf
168 // set a = min(a, d) (unsigned); clobber c, d
169 sub rdx, rax // d' = d - a; set cf if a > d
170 sbb rcx, rcx // c = -cf = -[a > d]
171 and rcx, rdx // c = a > d ? d - a : 0
172 add rax, rcx // a' = a > d ? d : a
189 // answer whether 5 <= a </<= 9.
190 sub rax, 5 // a' = a - 5
191 cmp rax, 4 // is a' - 5 </<= 4?
196 // nz/ne a' /= 4 a /= 9
198 // a/nbe a' > 4 a > 9 or a < 5
199 // nc/ae/nb a' >= 4 a >= 9 or a < 5
200 // c/b/nae a' < 4 5 <= a < 9
201 // be/na a' <= 4 5 <= a <= 9
203 // o a' < -2^63 + 4 -2^63 + 5 <= a < -2^63 + 9
204 // no a' >= -2^63 + 4 a >= -2^63 + 9 or
206 // s -2^63 + 4 <= a' < 4 -2^63 + 9 <= a < 9
207 // ns a' < -2^63 + 4 or a < -2^63 + 9 or a >= 9
209 // ge/nl a' >= 4 a >= 9 or a < -2^63 + 5
210 // l/nge a' < 4 -2^63 + 5 <= a < 9
211 // g/nle a' > 4 a > 9 or a < -2^63 + 5
212 // le/ng a' <= 4 -2^63 + 5 <= a <= 9
220 // leave a unchanged, but set zf if a = 0, cf if a /= 0, clear of,
222 not rax // a' = -a - 1
232 // same as before (?)
233 inc rax // a' = a + 1
234 neg rax // a' = -a - 1
244 // floor((a + d)/2), correctly handling overflow conditions; final cf
245 // is lsb(a + d), probably uninteresting
246 add rax, rdx // cf || a' = a + d
247 rcr rax, 1 // shift 65-bit result right by one
248 // place; lsb moves into carry
256 // a = a/8, rounded to nearest; i.e., floor(a/8) if a == 0, 1, 2, 3
257 // (mod 8), or ceil(a/8) if a == 4, 5, 6, 7 (mod 8).
258 shr rax, 3 // a' = floor(a/8); cf = 1 if a ==
259 // 4, 5, 6, 7 (mod 8)
260 adc rax, 0 // a' = floor(a/8) + cf
268 // increment c-byte little-endian bignum at rdi
269 add byte ptr [rdi], 1
271 adc byte ptr [rdi], 0
280 // negate double-precision d:a
281 not rdx // d' = -d - 1
284 sbb rdx, -1 // d' = -d - cf
292 // rotate is distributive over xor.
294 // rax // = a_1 || a_0
295 // rbx // = b_1 || b_0
296 mov rcx, rax // = a_1 || a_0
298 xor rcx, rbx // = (a_1 XOR b_1) || (a_0 XOR b_0)
299 ror rcx, 0xd // = (a_0 XOR b_0) || (a_1 XOR b_1)
301 ror rax, 0xd // = a_0 || a_1
302 ror rbx, 0xd // = b_0 || b_1
303 xor rax, rbx // = (a_0 XOR b_0) || (a_1 XOR b_1)
305 cmp rax, rcx // always equal
313 // and is distributive over xor.
317 xor rbx, rcx // = b XOR c
318 and rbx, rax // = a AND (b XOR c)
320 and rdx, rax // = a AND b
321 and rax, rcx // = a AND c
322 xor rax, rdx // = (a AND b) XOR (a AND c)
325 cmp rax, rbx // always equal
337 and rcx, rbx // = a AND b
338 not rcx // = NOT (a AND b)
342 or rax, rbx // = (NOT a) OR (NOT b)
353 // replace input buffer bytes with cumulative XORs with initial a;
354 // final a is XOR of all buffer bytes and initial a.
356 // not sure why you'd do this.
370 // four different ways to swap a pair of registers.
394 // assuming a is initialized to zero, set a to the inclusive or of
395 // the xor-differences of corresponding bytes in the c-byte strings
398 // in particular, a will be zero (and zf set) if and only if the two
399 // strings are equal.
414 // an obtuse way of adding two registers. for any bit position, a
415 // OR d is set if and only if at least one of a and d has a bit set
416 // in that position, and a AND d is set if and only if both have a
417 // bit set in that position. essentially, then, what we've done is
418 // move all of the set bits in d to a, unless there's already a bit
419 // there. this clearly doesn't change the sum.
421 mov rcx, rdx // c' = d
422 and rdx, rax // d' = a AND d
423 or rax, rcx // a' = a OR d
432 // ok, so this is a really obtuse way of adding a and b; the result
433 // is in a and d. but why does it work?
435 mov rcx, 0x40 // carry chains at most 64 long
436 0: mov rdx, rax // copy a'
437 xor rax, rbx // low bits of each bitwise sum
438 and rbx, rdx // carry bits from each bitwise sum
439 shl rbx, 001 // carry them into next position
448 // floor((a + d)/2), like x08.
450 mov rcx, rax // copy a for later
451 and rcx, rdx // carry bits
453 xor rax, rdx // low bits of each bitwise sum
454 shr rax, 1 // divide by 2; carries now in place
456 add rax, rcx // add the carries; done
464 // sign extension 32 -> 64 bits.
466 //movsx rbx, eax // like this?
468 mov rdx, 0xffffffff80000000
469 add rax, rdx // if bit 31 of a is set then bits
470 // 31--63 of a' are clear; otherwise,
471 // these bits are all set -- which is
473 xor rax, rdx // so fix it
485 xor rax, rbx // a' = a XOR b
486 xor rbx, rcx // b' = b XOR c
487 mov rsi, rax // t = a XOR b
488 add rsi, rbx // t = (a XOR b) + (b XOR c)
489 cmovc rax, rbx // a' = cf ? b XOR c : a XOR b
490 xor rax, rbx // a' = cf ? 0 : a XOR c