| 1 | /// -*- mode: asm; asm-comment-char: ?/ -*- |
| 2 | |
| 3 | .intel_syntax noprefix |
| 4 | |
| 5 | .section .note.GNU-stack, "", @progbits |
| 6 | |
| 7 | .macro proc name |
| 8 | .globl \name |
| 9 | .type \name, STT_FUNC |
| 10 | .p2align 4 |
| 11 | \name\(): |
| 12 | .macro endproc |
| 13 | .size \name, . - \name |
| 14 | .purgem endproc |
| 15 | .endm |
| 16 | .endm |
| 17 | |
| 18 | .macro ch c |
| 19 | pushf |
| 20 | push rax |
| 21 | push rcx |
| 22 | push rdx |
| 23 | push rsi |
| 24 | push rdi |
| 25 | push r8 |
| 26 | push r9 |
| 27 | push rbp |
| 28 | mov rbp, rsp |
| 29 | and rsp, -16 |
| 30 | |
| 31 | mov rdi, \c |
| 32 | call putchar@plt |
| 33 | |
| 34 | mov rdi, [rip + stdout] |
| 35 | call fflush@plt |
| 36 | |
| 37 | mov rsp, rbp |
| 38 | pop rbp |
| 39 | pop r9 |
| 40 | pop r8 |
| 41 | pop rdi |
| 42 | pop rsi |
| 43 | pop rdx |
| 44 | pop rcx |
| 45 | pop rax |
| 46 | popf |
| 47 | .endm |
| 48 | |
| 49 | .text |
| 50 | |
| 51 | proc call_example |
| 52 | |
| 53 | push rbx // rbx |
| 54 | push r10 |
| 55 | push r11 |
| 56 | push r12 |
| 57 | push r13 |
| 58 | push r14 |
| 59 | push r15 |
| 60 | push rbp // flags, rbp, ..., rbx |
| 61 | pushf |
| 62 | |
| 63 | push rsi // regs, flags, rbp, ..., rbx |
| 64 | |
| 65 | lea rax, [rip + 9f] |
| 66 | push rax // cont, regs, flags, rbp, ..., rbx |
| 67 | push rdi // func, cont, regs, flags, rbp, ..., rbx |
| 68 | |
| 69 | mov rax, [rsi + 56] |
| 70 | pushf |
| 71 | pop rcx |
| 72 | and rax, 0x0cd5 |
| 73 | and rcx, ~0x0cd5 |
| 74 | or rax, rcx |
| 75 | push rax |
| 76 | popf |
| 77 | mov rax, [rsi + 0] |
| 78 | mov rbx, [rsi + 8] |
| 79 | mov rcx, [rsi + 16] |
| 80 | mov rdx, [rsi + 24] |
| 81 | mov rdi, [rsi + 40] |
| 82 | mov rbp, [rsi + 48] |
| 83 | mov rsi, [rsi + 32] |
| 84 | |
| 85 | ret // -> func; regs, flags, rbp, ..., rbx |
| 86 | |
| 87 | 9: pushf // rflags, regs, flags, rbp, ..., rbx |
| 88 | push rsi // rsi, rflags, regs, flags, rbp, ..., rbx |
| 89 | mov rsi, [rsp + 16] |
| 90 | mov [rsi + 0], rax |
| 91 | mov [rsi + 8], rbx |
| 92 | mov [rsi + 16], rcx |
| 93 | mov [rsi + 24], rdx |
| 94 | mov [rsi + 40], rdi |
| 95 | mov [rsi + 48], rbp |
| 96 | pop rax // rflags, regs, flags, rbp, ..., rbx |
| 97 | mov [rsi + 32], rax |
| 98 | pop rax // regs, flags, rbp, ..., rbx |
| 99 | mov [rsi + 56], rax |
| 100 | |
| 101 | add rsp, 8 // flags, rbp, ..., rbx |
| 102 | popf // rbp, ..., rbx |
| 103 | pop rbp // ..., rbx |
| 104 | pop r15 |
| 105 | pop r14 |
| 106 | pop r13 |
| 107 | pop r12 |
| 108 | pop r11 |
| 109 | pop r10 |
| 110 | pop rbx // |
| 111 | ret |
| 112 | |
| 113 | endproc |
| 114 | |
| 115 | proc nop |
| 116 | |
| 117 | ret |
| 118 | |
| 119 | endproc |
| 120 | |
| 121 | ///-------------------------------------------------------------------------- |
| 122 | |
| 123 | proc x00 |
| 124 | |
| 125 | // clear all 64 bits of extended traditional registers |
| 126 | xor eax,eax // clear rax |
| 127 | lea rbx,[0] // rbx -> _|_ |
| 128 | loop . // iterate, decrement rcx until zero |
| 129 | mov rdx,0 // set rdx = 0 |
| 130 | and esi,0 // clear all bits of rsi |
| 131 | sub edi,edi // set rdi = edi - edi = 0 |
| 132 | push 0 |
| 133 | pop rbp // pop 0 into rbp |
| 134 | |
| 135 | ret |
| 136 | |
| 137 | endproc |
| 138 | |
| 139 | proc x01 |
| 140 | |
| 141 | // advance a fibonacci pair by c steps |
| 142 | // |
| 143 | // on entry, a and d are f_{i+1} and f_i; on exit, they are f_{i+c+1} |
| 144 | // and f_{i+c}, where f_{i+1} = f_i + f_{i-1} |
| 145 | 0: xadd rax, rdx // a, d = a + d, a |
| 146 | // = f_{i+1} + f_i, f_{i+1} |
| 147 | // = f_{i+2}, f_{i+1} |
| 148 | loop 0b // advance i, decrement c, iterate |
| 149 | |
| 150 | ret |
| 151 | |
| 152 | endproc |
| 153 | |
| 154 | proc x02 |
| 155 | |
| 156 | // boolean canonify a: if a = 0 on entry, leave it zero; otherwise |
| 157 | // set a = 1 |
| 158 | neg rax // set cf iff a /= 0 |
| 159 | sbb rax, rax // a = a - a - cf = -cf |
| 160 | neg rax // a = cf |
| 161 | |
| 162 | ret |
| 163 | |
| 164 | endproc |
| 165 | |
| 166 | proc x03 |
| 167 | |
| 168 | // set a = min(a, d) (unsigned); clobber c, d |
| 169 | sub rdx, rax // d' = d - a; set cf if a > d |
| 170 | sbb rcx, rcx // c = -cf = -[a > d] |
| 171 | and rcx, rdx // c = a > d ? d - a : 0 |
| 172 | add rax, rcx // a' = a > d ? d : a |
| 173 | |
| 174 | ret |
| 175 | |
| 176 | endproc |
| 177 | |
| 178 | proc x04 |
| 179 | |
| 180 | // switch case? |
| 181 | xor al, 0x20 |
| 182 | |
| 183 | ret |
| 184 | |
| 185 | endproc |
| 186 | |
| 187 | proc x05 |
| 188 | |
| 189 | // answer whether 5 <= a </<= 9. |
| 190 | sub rax, 5 // a' = a - 5 |
| 191 | cmp rax, 4 // is a' - 5 </<= 4? |
| 192 | |
| 193 | // cc a' a |
| 194 | // |
| 195 | // z/e a' = 4 a = 9 |
| 196 | // nz/ne a' /= 4 a /= 9 |
| 197 | // |
| 198 | // a/nbe a' > 4 a > 9 or a < 5 |
| 199 | // nc/ae/nb a' >= 4 a >= 9 or a < 5 |
| 200 | // c/b/nae a' < 4 5 <= a < 9 |
| 201 | // be/na a' <= 4 5 <= a <= 9 |
| 202 | // |
| 203 | // o a' < -2^63 + 4 -2^63 + 5 <= a < -2^63 + 9 |
| 204 | // no a' >= -2^63 + 4 a >= -2^63 + 9 or |
| 205 | // a < -2^63 + 5 |
| 206 | // s -2^63 + 4 <= a' < 4 -2^63 + 9 <= a < 9 |
| 207 | // ns a' < -2^63 + 4 or a < -2^63 + 9 or a >= 9 |
| 208 | // a' >= 4 |
| 209 | // ge/nl a' >= 4 a >= 9 or a < -2^63 + 5 |
| 210 | // l/nge a' < 4 -2^63 + 5 <= a < 9 |
| 211 | // g/nle a' > 4 a > 9 or a < -2^63 + 5 |
| 212 | // le/ng a' <= 4 -2^63 + 5 <= a <= 9 |
| 213 | |
| 214 | ret |
| 215 | |
| 216 | endproc |
| 217 | |
| 218 | proc x06 |
| 219 | |
| 220 | // leave a unchanged, but set zf if a = 0, cf if a /= 0, clear of, |
| 221 | // set sf to msb(a) |
| 222 | not rax // a' = -a - 1 |
| 223 | inc rax // a' = -a |
| 224 | neg rax // a' = a |
| 225 | |
| 226 | ret |
| 227 | |
| 228 | endproc |
| 229 | |
| 230 | proc x07 |
| 231 | |
| 232 | // same as before (?) |
| 233 | inc rax // a' = a + 1 |
| 234 | neg rax // a' = -a - 1 |
| 235 | inc rax // a' = -a |
| 236 | neg rax // a' = a |
| 237 | |
| 238 | ret |
| 239 | |
| 240 | endproc |
| 241 | |
| 242 | proc x08 |
| 243 | |
| 244 | // floor((a + d)/2), correctly handling overflow conditions; final cf |
| 245 | // is lsb(a + d), probably uninteresting |
| 246 | add rax, rdx // cf || a' = a + d |
| 247 | rcr rax, 1 // shift 65-bit result right by one |
| 248 | // place; lsb moves into carry |
| 249 | |
| 250 | ret |
| 251 | |
| 252 | endproc |
| 253 | |
| 254 | proc x09 |
| 255 | |
| 256 | // a = a/8, rounded to nearest; i.e., floor(a/8) if a == 0, 1, 2, 3 |
| 257 | // (mod 8), or ceil(a/8) if a == 4, 5, 6, 7 (mod 8). |
| 258 | shr rax, 3 // a' = floor(a/8); cf = 1 if a == |
| 259 | // 4, 5, 6, 7 (mod 8) |
| 260 | adc rax, 0 // a' = floor(a/8) + cf |
| 261 | |
| 262 | ret |
| 263 | |
| 264 | endproc |
| 265 | |
| 266 | proc x0a |
| 267 | |
| 268 | // increment c-byte little-endian bignum at rdi |
| 269 | add byte ptr [rdi], 1 |
| 270 | 0: inc rdi |
| 271 | adc byte ptr [rdi], 0 |
| 272 | loop 0b |
| 273 | |
| 274 | ret |
| 275 | |
| 276 | endproc |
| 277 | |
| 278 | proc x0b |
| 279 | |
| 280 | // negate double-precision d:a |
| 281 | not rdx // d' = -d - 1 |
| 282 | neg rax // a' = -a; |
| 283 | // cf = 1 iff a /= 0 |
| 284 | sbb rdx, -1 // d' = -d - cf |
| 285 | |
| 286 | ret |
| 287 | |
| 288 | endproc |
| 289 | |
| 290 | proc x0c |
| 291 | |
| 292 | // rotate is distributive over xor. |
| 293 | |
| 294 | // rax // = a_1 || a_0 |
| 295 | // rbx // = b_1 || b_0 |
| 296 | mov rcx, rax // = a_1 || a_0 |
| 297 | |
| 298 | xor rcx, rbx // = (a_1 XOR b_1) || (a_0 XOR b_0) |
| 299 | ror rcx, 0xd // = (a_0 XOR b_0) || (a_1 XOR b_1) |
| 300 | |
| 301 | ror rax, 0xd // = a_0 || a_1 |
| 302 | ror rbx, 0xd // = b_0 || b_1 |
| 303 | xor rax, rbx // = (a_0 XOR b_0) || (a_1 XOR b_1) |
| 304 | |
| 305 | cmp rax, rcx // always equal |
| 306 | |
| 307 | ret |
| 308 | |
| 309 | endproc |
| 310 | |
| 311 | proc x0d |
| 312 | |
| 313 | // and is distributive over xor. |
| 314 | |
| 315 | mov rdx, rbx // = b |
| 316 | |
| 317 | xor rbx, rcx // = b XOR c |
| 318 | and rbx, rax // = a AND (b XOR c) |
| 319 | |
| 320 | and rdx, rax // = a AND b |
| 321 | and rax, rcx // = a AND c |
| 322 | xor rax, rdx // = (a AND b) XOR (a AND c) |
| 323 | // = a AND (b XOR c) |
| 324 | |
| 325 | cmp rax, rbx // always equal |
| 326 | |
| 327 | ret |
| 328 | |
| 329 | endproc |
| 330 | |
| 331 | proc x0e |
| 332 | |
| 333 | // de morgan's law |
| 334 | |
| 335 | mov rcx, rax // = a |
| 336 | |
| 337 | and rcx, rbx // = a AND b |
| 338 | not rcx // = NOT (a AND b) |
| 339 | |
| 340 | not rax // = NOT a |
| 341 | not rbx // = NOT b |
| 342 | or rax, rbx // = (NOT a) OR (NOT b) |
| 343 | // = NOT (a AND b) |
| 344 | |
| 345 | cmp rax, rcx |
| 346 | |
| 347 | ret |
| 348 | |
| 349 | endproc |
| 350 | |
| 351 | proc x0f |
| 352 | |
| 353 | // replace input buffer bytes with cumulative XORs with initial a; |
| 354 | // final a is XOR of all buffer bytes and initial a. |
| 355 | // |
| 356 | // not sure why you'd do this. |
| 357 | |
| 358 | cld |
| 359 | |
| 360 | 0: xor [rsi], al |
| 361 | lodsb |
| 362 | loop 0b |
| 363 | |
| 364 | ret |
| 365 | |
| 366 | endproc |
| 367 | |
| 368 | proc x10 |
| 369 | |
| 370 | // four different ways to swap a pair of registers. |
| 371 | |
| 372 | push rax |
| 373 | push rcx |
| 374 | pop rax |
| 375 | pop rcx |
| 376 | |
| 377 | xor rax, rcx |
| 378 | xor rcx, rax |
| 379 | xor rax, rcx |
| 380 | |
| 381 | add rax, rcx |
| 382 | sub rcx, rax |
| 383 | add rax, rcx |
| 384 | neg rcx |
| 385 | |
| 386 | xchg rax, rcx |
| 387 | |
| 388 | ret |
| 389 | |
| 390 | endproc |
| 391 | |
| 392 | proc x11 |
| 393 | |
| 394 | // assuming a is initialized to zero, set a to the inclusive or of |
| 395 | // the xor-differences of corresponding bytes in the c-byte strings |
| 396 | // at si and di. |
| 397 | // |
| 398 | // in particular, a will be zero (and zf set) if and only if the two |
| 399 | // strings are equal. |
| 400 | |
| 401 | 0: mov dl, [rsi] |
| 402 | xor dl, [rdi] |
| 403 | inc rsi |
| 404 | inc rdi |
| 405 | or al, dl |
| 406 | loop 0b |
| 407 | |
| 408 | ret |
| 409 | |
| 410 | endproc |
| 411 | |
| 412 | proc x12 |
| 413 | |
| 414 | // an obtuse way of adding two registers. for any bit position, a |
| 415 | // OR d is set if and only if at least one of a and d has a bit set |
| 416 | // in that position, and a AND d is set if and only if both have a |
| 417 | // bit set in that position. essentially, then, what we've done is |
| 418 | // move all of the set bits in d to a, unless there's already a bit |
| 419 | // there. this clearly doesn't change the sum. |
| 420 | |
| 421 | mov rcx, rdx // c' = d |
| 422 | and rdx, rax // d' = a AND d |
| 423 | or rax, rcx // a' = a OR d |
| 424 | add rax, rdx |
| 425 | |
| 426 | ret |
| 427 | |
| 428 | endproc |
| 429 | |
| 430 | proc x13 |
| 431 | |
| 432 | // ok, so this is a really obtuse way of adding a and b; the result |
| 433 | // is in a and d. but why does it work? |
| 434 | |
| 435 | mov rcx, 0x40 // carry chains at most 64 long |
| 436 | 0: mov rdx, rax // copy a' |
| 437 | xor rax, rbx // low bits of each bitwise sum |
| 438 | and rbx, rdx // carry bits from each bitwise sum |
| 439 | shl rbx, 001 // carry them into next position |
| 440 | loop 0b |
| 441 | |
| 442 | ret |
| 443 | |
| 444 | endproc |
| 445 | |
| 446 | proc x14 |
| 447 | |
| 448 | // floor((a + d)/2), like x08. |
| 449 | |
| 450 | mov rcx, rax // copy a for later |
| 451 | and rcx, rdx // carry bits |
| 452 | |
| 453 | xor rax, rdx // low bits of each bitwise sum |
| 454 | shr rax, 1 // divide by 2; carries now in place |
| 455 | |
| 456 | add rax, rcx // add the carries; done |
| 457 | |
| 458 | ret |
| 459 | |
| 460 | endproc |
| 461 | |
| 462 | proc x15 |
| 463 | |
| 464 | // sign extension 32 -> 64 bits. |
| 465 | |
| 466 | //movsx rbx, eax // like this? |
| 467 | |
| 468 | mov rdx, 0xffffffff80000000 |
| 469 | add rax, rdx // if bit 31 of a is set then bits |
| 470 | // 31--63 of a' are clear; otherwise, |
| 471 | // these bits are all set -- which is |
| 472 | // exactly backwards |
| 473 | xor rax, rdx // so fix it |
| 474 | |
| 475 | ret |
| 476 | |
| 477 | endproc |
| 478 | |
| 479 | proc x16 |
| 480 | |
| 481 | //shl rax, 56 |
| 482 | //shl rbx, 56 |
| 483 | //shl rcx, 56 |
| 484 | |
| 485 | xor rax, rbx // a' = a XOR b |
| 486 | xor rbx, rcx // b' = b XOR c |
| 487 | mov rsi, rax // t = a XOR b |
| 488 | add rsi, rbx // t = (a XOR b) + (b XOR c) |
| 489 | cmovc rax, rbx // a' = cf ? b XOR c : a XOR b |
| 490 | xor rax, rbx // a' = cf ? 0 : a XOR c |
| 491 | cmp rax, rsi |
| 492 | |
| 493 | ret |
| 494 | |
| 495 | endproc |
| 496 | |
| 497 | proc x17 |
| 498 | |
| 499 | ud2 |
| 500 | |
| 501 | endproc |
| 502 | |
| 503 | proc x18 |
| 504 | |
| 505 | ud2 |
| 506 | |
| 507 | endproc |
| 508 | |
| 509 | proc x19 |
| 510 | |
| 511 | ud2 |
| 512 | |
| 513 | endproc |
| 514 | |
| 515 | proc x1a |
| 516 | |
| 517 | ud2 |
| 518 | |
| 519 | endproc |
| 520 | |
| 521 | proc x1b |
| 522 | |
| 523 | ud2 |
| 524 | |
| 525 | endproc |
| 526 | |
| 527 | proc x1c |
| 528 | |
| 529 | ud2 |
| 530 | |
| 531 | endproc |
| 532 | |
| 533 | proc x1d |
| 534 | |
| 535 | ud2 |
| 536 | |
| 537 | endproc |
| 538 | |
| 539 | proc x1e |
| 540 | |
| 541 | ud2 |
| 542 | |
| 543 | endproc |
| 544 | |
| 545 | proc x1f |
| 546 | |
| 547 | ud2 |
| 548 | |
| 549 | endproc |
| 550 | |
| 551 | proc x20 |
| 552 | |
| 553 | ud2 |
| 554 | |
| 555 | ret |
| 556 | |
| 557 | endproc |
| 558 | |
| 559 | proc x21 |
| 560 | |
| 561 | ud2 |
| 562 | |
| 563 | endproc |
| 564 | |
| 565 | proc x22 |
| 566 | |
| 567 | ud2 |
| 568 | |
| 569 | endproc |
| 570 | |
| 571 | proc x23 |
| 572 | |
| 573 | ud2 |
| 574 | |
| 575 | endproc |
| 576 | |
| 577 | proc x24 |
| 578 | |
| 579 | ud2 |
| 580 | |
| 581 | endproc |
| 582 | |
| 583 | proc x25 |
| 584 | |
| 585 | ud2 |
| 586 | |
| 587 | endproc |
| 588 | |
| 589 | proc x26 |
| 590 | |
| 591 | ud2 |
| 592 | |
| 593 | endproc |
| 594 | |
| 595 | proc x27 |
| 596 | |
| 597 | ud2 |
| 598 | |
| 599 | endproc |
| 600 | |
| 601 | proc x28 |
| 602 | |
| 603 | ud2 |
| 604 | |
| 605 | endproc |
| 606 | |
| 607 | proc x29 |
| 608 | |
| 609 | ud2 |
| 610 | |
| 611 | endproc |
| 612 | |
| 613 | proc x2a |
| 614 | |
| 615 | ud2 |
| 616 | |
| 617 | endproc |
| 618 | |
| 619 | proc x2b |
| 620 | |
| 621 | ud2 |
| 622 | |
| 623 | endproc |
| 624 | |
| 625 | proc x2c |
| 626 | |
| 627 | ud2 |
| 628 | |
| 629 | endproc |
| 630 | |
| 631 | proc x2d |
| 632 | |
| 633 | ud2 |
| 634 | |
| 635 | endproc |
| 636 | |
| 637 | proc x2e |
| 638 | |
| 639 | ud2 |
| 640 | |
| 641 | endproc |
| 642 | |
| 643 | proc x2f |
| 644 | |
| 645 | ud2 |
| 646 | |
| 647 | endproc |
| 648 | |
| 649 | proc x30 |
| 650 | |
| 651 | ud2 |
| 652 | |
| 653 | ret |
| 654 | |
| 655 | endproc |
| 656 | |
| 657 | proc x31 |
| 658 | |
| 659 | ud2 |
| 660 | |
| 661 | endproc |
| 662 | |
| 663 | proc x32 |
| 664 | |
| 665 | ud2 |
| 666 | |
| 667 | endproc |
| 668 | |
| 669 | proc x33 |
| 670 | |
| 671 | ud2 |
| 672 | |
| 673 | endproc |
| 674 | |
| 675 | proc x34 |
| 676 | |
| 677 | ud2 |
| 678 | |
| 679 | endproc |
| 680 | |
| 681 | proc x35 |
| 682 | |
| 683 | ud2 |
| 684 | |
| 685 | endproc |
| 686 | |
| 687 | proc x36 |
| 688 | |
| 689 | ud2 |
| 690 | |
| 691 | endproc |
| 692 | |
| 693 | proc x37 |
| 694 | |
| 695 | ud2 |
| 696 | |
| 697 | endproc |
| 698 | |
| 699 | proc x38 |
| 700 | |
| 701 | ud2 |
| 702 | |
| 703 | endproc |
| 704 | |
| 705 | proc x39 |
| 706 | |
| 707 | ud2 |
| 708 | |
| 709 | endproc |
| 710 | |
| 711 | proc x3a |
| 712 | |
| 713 | ud2 |
| 714 | |
| 715 | endproc |
| 716 | |
| 717 | proc x3b |
| 718 | |
| 719 | ud2 |
| 720 | |
| 721 | endproc |
| 722 | |
| 723 | proc x3c |
| 724 | |
| 725 | ud2 |
| 726 | |
| 727 | endproc |
| 728 | |
| 729 | proc x3d |
| 730 | |
| 731 | ud2 |
| 732 | |
| 733 | endproc |
| 734 | |
| 735 | proc x3e |
| 736 | |
| 737 | ud2 |
| 738 | |
| 739 | endproc |
| 740 | |
| 741 | proc x3f |
| 742 | |
| 743 | ud2 |
| 744 | |
| 745 | endproc |