Commit | Line | Data |
---|---|---|
06297a93 MW |
1 | /// -*- mode: asm; asm-comment-char: ?/ -*- |
2 | ||
3 | .intel_syntax noprefix | |
4 | ||
5 | .section .note.GNU-stack, "", @progbits | |
6 | ||
7 | .macro proc name | |
8 | .globl \name | |
9 | .type \name, STT_FUNC | |
10 | .p2align 4 | |
11 | \name\(): | |
12 | .macro endproc | |
13 | .size \name, . - \name | |
14 | .purgem endproc | |
15 | .endm | |
16 | .endm | |
17 | ||
18 | .macro ch c | |
19 | pushf | |
20 | push rax | |
21 | push rcx | |
22 | push rdx | |
23 | push rsi | |
24 | push rdi | |
25 | push r8 | |
26 | push r9 | |
27 | push rbp | |
28 | mov rbp, rsp | |
29 | and rsp, -16 | |
30 | ||
31 | mov rdi, \c | |
32 | call putchar@plt | |
33 | ||
34 | mov rdi, [rip + stdout] | |
35 | call fflush@plt | |
36 | ||
37 | mov rsp, rbp | |
38 | pop rbp | |
39 | pop r9 | |
40 | pop r8 | |
41 | pop rdi | |
42 | pop rsi | |
43 | pop rdx | |
44 | pop rcx | |
45 | pop rax | |
46 | popf | |
47 | .endm | |
48 | ||
49 | .text | |
50 | ||
51 | proc call_example | |
52 | ||
53 | push rbx // rbx | |
54 | push r10 | |
55 | push r11 | |
56 | push r12 | |
57 | push r13 | |
58 | push r14 | |
59 | push r15 | |
60 | push rbp // flags, rbp, ..., rbx | |
61 | pushf | |
62 | ||
63 | push rsi // regs, flags, rbp, ..., rbx | |
64 | ||
65 | lea rax, [rip + 9f] | |
66 | push rax // cont, regs, flags, rbp, ..., rbx | |
67 | push rdi // func, cont, regs, flags, rbp, ..., rbx | |
68 | ||
69 | mov rax, [rsi + 56] | |
70 | pushf | |
71 | pop rcx | |
72 | and rax, 0x0cd5 | |
73 | and rcx, ~0x0cd5 | |
74 | or rax, rcx | |
75 | push rax | |
76 | popf | |
77 | mov rax, [rsi + 0] | |
78 | mov rbx, [rsi + 8] | |
79 | mov rcx, [rsi + 16] | |
80 | mov rdx, [rsi + 24] | |
81 | mov rdi, [rsi + 40] | |
82 | mov rbp, [rsi + 48] | |
83 | mov rsi, [rsi + 32] | |
84 | ||
85 | ret // -> func; regs, flags, rbp, ..., rbx | |
86 | ||
87 | 9: pushf // rflags, regs, flags, rbp, ..., rbx | |
88 | push rsi // rsi, rflags, regs, flags, rbp, ..., rbx | |
89 | mov rsi, [rsp + 16] | |
90 | mov [rsi + 0], rax | |
91 | mov [rsi + 8], rbx | |
92 | mov [rsi + 16], rcx | |
93 | mov [rsi + 24], rdx | |
94 | mov [rsi + 40], rdi | |
95 | mov [rsi + 48], rbp | |
96 | pop rax // rflags, regs, flags, rbp, ..., rbx | |
97 | mov [rsi + 32], rax | |
98 | pop rax // regs, flags, rbp, ..., rbx | |
99 | mov [rsi + 56], rax | |
100 | ||
101 | add rsp, 8 // flags, rbp, ..., rbx | |
102 | popf // rbp, ..., rbx | |
103 | pop rbp // ..., rbx | |
104 | pop r15 | |
105 | pop r14 | |
106 | pop r13 | |
107 | pop r12 | |
108 | pop r11 | |
109 | pop r10 | |
110 | pop rbx // | |
111 | ret | |
112 | ||
113 | endproc | |
114 | ||
115 | proc nop | |
116 | ||
117 | ret | |
118 | ||
119 | endproc | |
120 | ||
121 | ///-------------------------------------------------------------------------- | |
122 | ||
123 | proc x00 | |
124 | ||
125 | // clear all 64 bits of extended traditional registers | |
126 | xor eax,eax // clear rax | |
127 | lea rbx,[0] // rbx -> _|_ | |
128 | loop . // iterate, decrement rcx until zero | |
129 | mov rdx,0 // set rdx = 0 | |
130 | and esi,0 // clear all bits of rsi | |
131 | sub edi,edi // set rdi = edi - edi = 0 | |
132 | push 0 | |
133 | pop rbp // pop 0 into rbp | |
134 | ||
135 | ret | |
136 | ||
137 | endproc | |
138 | ||
139 | proc x01 | |
140 | ||
141 | // advance a fibonacci pair by c steps | |
142 | // | |
143 | // on entry, a and d are f_{i+1} and f_i; on exit, they are f_{i+c+1} | |
144 | // and f_{i+c}, where f_{i+1} = f_i + f_{i-1} | |
145 | 0: xadd rax, rdx // a, d = a + d, a | |
146 | // = f_{i+1} + f_i, f_{i+1} | |
147 | // = f_{i+2}, f_{i+1} | |
148 | loop 0b // advance i, decrement c, iterate | |
149 | ||
150 | ret | |
151 | ||
152 | endproc | |
153 | ||
154 | proc x02 | |
155 | ||
156 | // boolean canonify a: if a = 0 on entry, leave it zero; otherwise | |
157 | // set a = 1 | |
158 | neg rax // set cf iff a /= 0 | |
159 | sbb rax, rax // a = a - a - cf = -cf | |
160 | neg rax // a = cf | |
161 | ||
162 | ret | |
163 | ||
164 | endproc | |
165 | ||
166 | proc x03 | |
167 | ||
168 | // set a = min(a, d) (unsigned); clobber c, d | |
169 | sub rdx, rax // d' = d - a; set cf if a > d | |
170 | sbb rcx, rcx // c = -cf = -[a > d] | |
171 | and rcx, rdx // c = a > d ? d - a : 0 | |
172 | add rax, rcx // a' = a > d ? d : a | |
173 | ||
174 | ret | |
175 | ||
176 | endproc | |
177 | ||
178 | proc x04 | |
179 | ||
180 | // switch case? | |
181 | xor al, 0x20 | |
182 | ||
183 | ret | |
184 | ||
185 | endproc | |
186 | ||
187 | proc x05 | |
188 | ||
189 | // answer whether 5 <= a </<= 9. | |
190 | sub rax, 5 // a' = a - 5 | |
191 | cmp rax, 4 // is a' - 5 </<= 4? | |
192 | ||
193 | // cc a' a | |
194 | // | |
195 | // z/e a' = 4 a = 9 | |
196 | // nz/ne a' /= 4 a /= 9 | |
197 | // | |
198 | // a/nbe a' > 4 a > 9 or a < 5 | |
199 | // nc/ae/nb a' >= 4 a >= 9 or a < 5 | |
200 | // c/b/nae a' < 4 5 <= a < 9 | |
201 | // be/na a' <= 4 5 <= a <= 9 | |
202 | // | |
203 | // o a' < -2^63 + 4 -2^63 + 5 <= a < -2^63 + 9 | |
204 | // no a' >= -2^63 + 4 a >= -2^63 + 9 or | |
205 | // a < -2^63 + 5 | |
206 | // s -2^63 + 4 <= a' < 4 -2^63 + 9 <= a < 9 | |
207 | // ns a' < -2^63 + 4 or a < -2^63 + 9 or a >= 9 | |
208 | // a' >= 4 | |
209 | // ge/nl a' >= 4 a >= 9 or a < -2^63 + 5 | |
210 | // l/nge a' < 4 -2^63 + 5 <= a < 9 | |
211 | // g/nle a' > 4 a > 9 or a < -2^63 + 5 | |
212 | // le/ng a' <= 4 -2^63 + 5 <= a <= 9 | |
213 | ||
214 | ret | |
215 | ||
216 | endproc | |
217 | ||
218 | proc x06 | |
219 | ||
220 | // leave a unchanged, but set zf if a = 0, cf if a /= 0, clear of, | |
221 | // set sf to msb(a) | |
222 | not rax // a' = -a - 1 | |
223 | inc rax // a' = -a | |
224 | neg rax // a' = a | |
225 | ||
226 | ret | |
227 | ||
228 | endproc | |
229 | ||
230 | proc x07 | |
231 | ||
232 | // same as before (?) | |
233 | inc rax // a' = a + 1 | |
234 | neg rax // a' = -a - 1 | |
235 | inc rax // a' = -a | |
236 | neg rax // a' = a | |
237 | ||
238 | ret | |
239 | ||
240 | endproc | |
241 | ||
242 | proc x08 | |
243 | ||
244 | // floor((a + d)/2), correctly handling overflow conditions; final cf | |
245 | // is lsb(a + d), probably uninteresting | |
246 | add rax, rdx // cf || a' = a + d | |
247 | rcr rax, 1 // shift 65-bit result right by one | |
248 | // place; lsb moves into carry | |
249 | ||
250 | ret | |
251 | ||
252 | endproc | |
253 | ||
254 | proc x09 | |
255 | ||
256 | // a = a/8, rounded to nearest; i.e., floor(a/8) if a == 0, 1, 2, 3 | |
257 | // (mod 8), or ceil(a/8) if a == 4, 5, 6, 7 (mod 8). | |
258 | shr rax, 3 // a' = floor(a/8); cf = 1 if a == | |
259 | // 4, 5, 6, 7 (mod 8) | |
260 | adc rax, 0 // a' = floor(a/8) + cf | |
261 | ||
262 | ret | |
263 | ||
264 | endproc | |
265 | ||
266 | proc x0a | |
267 | ||
268 | // increment c-byte little-endian bignum at rdi | |
269 | add byte ptr [rdi], 1 | |
270 | 0: inc rdi | |
271 | adc byte ptr [rdi], 0 | |
272 | loop 0b | |
273 | ||
274 | ret | |
275 | ||
276 | endproc | |
277 | ||
278 | proc x0b | |
279 | ||
280 | // negate double-precision d:a | |
281 | not rdx // d' = -d - 1 | |
282 | neg rax // a' = -a; | |
283 | // cf = 1 iff a /= 0 | |
284 | sbb rdx, -1 // d' = -d - cf | |
285 | ||
286 | ret | |
287 | ||
288 | endproc | |
289 | ||
290 | proc x0c | |
291 | ||
292 | // rotate is distributive over xor. | |
293 | ||
294 | // rax // = a_1 || a_0 | |
295 | // rbx // = b_1 || b_0 | |
296 | mov rcx, rax // = a_1 || a_0 | |
297 | ||
298 | xor rcx, rbx // = (a_1 XOR b_1) || (a_0 XOR b_0) | |
299 | ror rcx, 0xd // = (a_0 XOR b_0) || (a_1 XOR b_1) | |
300 | ||
301 | ror rax, 0xd // = a_0 || a_1 | |
302 | ror rbx, 0xd // = b_0 || b_1 | |
303 | xor rax, rbx // = (a_0 XOR b_0) || (a_1 XOR b_1) | |
304 | ||
305 | cmp rax, rcx // always equal | |
306 | ||
307 | ret | |
308 | ||
309 | endproc | |
310 | ||
311 | proc x0d | |
312 | ||
313 | // and is distributive over xor. | |
314 | ||
315 | mov rdx, rbx // = b | |
316 | ||
317 | xor rbx, rcx // = b XOR c | |
318 | and rbx, rax // = a AND (b XOR c) | |
319 | ||
320 | and rdx, rax // = a AND b | |
321 | and rax, rcx // = a AND c | |
322 | xor rax, rdx // = (a AND b) XOR (a AND c) | |
323 | // = a AND (b XOR c) | |
324 | ||
325 | cmp rax, rbx // always equal | |
326 | ||
327 | ret | |
328 | ||
329 | endproc | |
330 | ||
331 | proc x0e | |
332 | ||
333 | // de morgan's law | |
334 | ||
335 | mov rcx, rax // = a | |
336 | ||
337 | and rcx, rbx // = a AND b | |
338 | not rcx // = NOT (a AND b) | |
339 | ||
340 | not rax // = NOT a | |
341 | not rbx // = NOT b | |
342 | or rax, rbx // = (NOT a) OR (NOT b) | |
343 | // = NOT (a AND b) | |
344 | ||
345 | cmp rax, rcx | |
346 | ||
347 | ret | |
348 | ||
349 | endproc | |
350 | ||
351 | proc x0f | |
352 | ||
353 | // replace input buffer bytes with cumulative XORs with initial a; | |
354 | // final a is XOR of all buffer bytes and initial a. | |
355 | // | |
356 | // not sure why you'd do this. | |
357 | ||
358 | cld | |
359 | ||
360 | 0: xor [rsi], al | |
361 | lodsb | |
362 | loop 0b | |
363 | ||
364 | ret | |
365 | ||
366 | endproc | |
367 | ||
368 | proc x10 | |
369 | ||
370 | // four different ways to swap a pair of registers. | |
371 | ||
372 | push rax | |
373 | push rcx | |
374 | pop rax | |
375 | pop rcx | |
376 | ||
377 | xor rax, rcx | |
378 | xor rcx, rax | |
379 | xor rax, rcx | |
380 | ||
381 | add rax, rcx | |
382 | sub rcx, rax | |
383 | add rax, rcx | |
384 | neg rcx | |
385 | ||
386 | xchg rax, rcx | |
387 | ||
388 | ret | |
389 | ||
390 | endproc | |
391 | ||
392 | proc x11 | |
393 | ||
394 | // assuming a is initialized to zero, set a to the inclusive or of | |
395 | // the xor-differences of corresponding bytes in the c-byte strings | |
396 | // at si and di. | |
397 | // | |
398 | // in particular, a will be zero (and zf set) if and only if the two | |
399 | // strings are equal. | |
400 | ||
401 | 0: mov dl, [rsi] | |
402 | xor dl, [rdi] | |
403 | inc rsi | |
404 | inc rdi | |
405 | or al, dl | |
406 | loop 0b | |
407 | ||
408 | ret | |
409 | ||
410 | endproc | |
411 | ||
412 | proc x12 | |
413 | ||
414 | // an obtuse way of adding two registers. for any bit position, a | |
415 | // OR d is set if and only if at least one of a and d has a bit set | |
416 | // in that position, and a AND d is set if and only if both have a | |
417 | // bit set in that position. essentially, then, what we've done is | |
418 | // move all of the set bits in d to a, unless there's already a bit | |
419 | // there. this clearly doesn't change the sum. | |
420 | ||
421 | mov rcx, rdx // c' = d | |
422 | and rdx, rax // d' = a AND d | |
423 | or rax, rcx // a' = a OR d | |
424 | add rax, rdx | |
425 | ||
426 | ret | |
427 | ||
428 | endproc | |
429 | ||
430 | proc x13 | |
431 | ||
432 | // ok, so this is a really obtuse way of adding a and b; the result | |
433 | // is in a and d. but why does it work? | |
434 | ||
435 | mov rcx, 0x40 // carry chains at most 64 long | |
436 | 0: mov rdx, rax // copy a' | |
437 | xor rax, rbx // low bits of each bitwise sum | |
438 | and rbx, rdx // carry bits from each bitwise sum | |
439 | shl rbx, 001 // carry them into next position | |
440 | loop 0b | |
441 | ||
442 | ret | |
443 | ||
444 | endproc | |
445 | ||
446 | proc x14 | |
447 | ||
448 | // floor((a + d)/2), like x08. | |
449 | ||
450 | mov rcx, rax // copy a for later | |
451 | and rcx, rdx // carry bits | |
452 | ||
453 | xor rax, rdx // low bits of each bitwise sum | |
454 | shr rax, 1 // divide by 2; carries now in place | |
455 | ||
456 | add rax, rcx // add the carries; done | |
457 | ||
458 | ret | |
459 | ||
460 | endproc | |
461 | ||
462 | proc x15 | |
463 | ||
464 | // sign extension 32 -> 64 bits. | |
465 | ||
466 | //movsx rbx, eax // like this? | |
467 | ||
468 | mov rdx, 0xffffffff80000000 | |
469 | add rax, rdx // if bit 31 of a is set then bits | |
470 | // 31--63 of a' are clear; otherwise, | |
471 | // these bits are all set -- which is | |
472 | // exactly backwards | |
473 | xor rax, rdx // so fix it | |
474 | ||
475 | ret | |
476 | ||
477 | endproc | |
478 | ||
479 | proc x16 | |
480 | ||
2ed85f8c MW |
481 | //shl rax, 56 |
482 | //shl rbx, 56 | |
483 | //shl rcx, 56 | |
06297a93 MW |
484 | |
485 | xor rax, rbx // a' = a XOR b | |
486 | xor rbx, rcx // b' = b XOR c | |
487 | mov rsi, rax // t = a XOR b | |
488 | add rsi, rbx // t = (a XOR b) + (b XOR c) | |
489 | cmovc rax, rbx // a' = cf ? b XOR c : a XOR b | |
490 | xor rax, rbx // a' = cf ? 0 : a XOR c | |
491 | cmp rax, rsi | |
492 | ||
493 | ret | |
494 | ||
495 | endproc | |
496 | ||
497 | proc x17 | |
498 | ||
499 | ud2 | |
500 | ||
501 | endproc | |
502 | ||
503 | proc x18 | |
504 | ||
505 | ud2 | |
506 | ||
507 | endproc | |
508 | ||
509 | proc x19 | |
510 | ||
511 | ud2 | |
512 | ||
513 | endproc | |
514 | ||
515 | proc x1a | |
516 | ||
517 | ud2 | |
518 | ||
519 | endproc | |
520 | ||
521 | proc x1b | |
522 | ||
523 | ud2 | |
524 | ||
525 | endproc | |
526 | ||
527 | proc x1c | |
528 | ||
529 | ud2 | |
530 | ||
531 | endproc | |
532 | ||
533 | proc x1d | |
534 | ||
535 | ud2 | |
536 | ||
537 | endproc | |
538 | ||
539 | proc x1e | |
540 | ||
541 | ud2 | |
542 | ||
543 | endproc | |
544 | ||
545 | proc x1f | |
546 | ||
547 | ud2 | |
548 | ||
549 | endproc | |
550 | ||
551 | proc x20 | |
552 | ||
553 | ud2 | |
554 | ||
555 | ret | |
556 | ||
557 | endproc | |
558 | ||
559 | proc x21 | |
560 | ||
561 | ud2 | |
562 | ||
563 | endproc | |
564 | ||
565 | proc x22 | |
566 | ||
567 | ud2 | |
568 | ||
569 | endproc | |
570 | ||
571 | proc x23 | |
572 | ||
573 | ud2 | |
574 | ||
575 | endproc | |
576 | ||
577 | proc x24 | |
578 | ||
579 | ud2 | |
580 | ||
581 | endproc | |
582 | ||
583 | proc x25 | |
584 | ||
585 | ud2 | |
586 | ||
587 | endproc | |
588 | ||
589 | proc x26 | |
590 | ||
591 | ud2 | |
592 | ||
593 | endproc | |
594 | ||
595 | proc x27 | |
596 | ||
597 | ud2 | |
598 | ||
599 | endproc | |
600 | ||
601 | proc x28 | |
602 | ||
603 | ud2 | |
604 | ||
605 | endproc | |
606 | ||
607 | proc x29 | |
608 | ||
609 | ud2 | |
610 | ||
611 | endproc | |
612 | ||
613 | proc x2a | |
614 | ||
615 | ud2 | |
616 | ||
617 | endproc | |
618 | ||
619 | proc x2b | |
620 | ||
621 | ud2 | |
622 | ||
623 | endproc | |
624 | ||
625 | proc x2c | |
626 | ||
627 | ud2 | |
628 | ||
629 | endproc | |
630 | ||
631 | proc x2d | |
632 | ||
633 | ud2 | |
634 | ||
635 | endproc | |
636 | ||
637 | proc x2e | |
638 | ||
639 | ud2 | |
640 | ||
641 | endproc | |
642 | ||
643 | proc x2f | |
644 | ||
645 | ud2 | |
646 | ||
647 | endproc | |
648 | ||
649 | proc x30 | |
650 | ||
651 | ud2 | |
652 | ||
653 | ret | |
654 | ||
655 | endproc | |
656 | ||
657 | proc x31 | |
658 | ||
659 | ud2 | |
660 | ||
661 | endproc | |
662 | ||
663 | proc x32 | |
664 | ||
665 | ud2 | |
666 | ||
667 | endproc | |
668 | ||
669 | proc x33 | |
670 | ||
671 | ud2 | |
672 | ||
673 | endproc | |
674 | ||
675 | proc x34 | |
676 | ||
677 | ud2 | |
678 | ||
679 | endproc | |
680 | ||
681 | proc x35 | |
682 | ||
683 | ud2 | |
684 | ||
685 | endproc | |
686 | ||
687 | proc x36 | |
688 | ||
689 | ud2 | |
690 | ||
691 | endproc | |
692 | ||
693 | proc x37 | |
694 | ||
695 | ud2 | |
696 | ||
697 | endproc | |
698 | ||
699 | proc x38 | |
700 | ||
701 | ud2 | |
702 | ||
703 | endproc | |
704 | ||
705 | proc x39 | |
706 | ||
707 | ud2 | |
708 | ||
709 | endproc | |
710 | ||
711 | proc x3a | |
712 | ||
713 | ud2 | |
714 | ||
715 | endproc | |
716 | ||
717 | proc x3b | |
718 | ||
719 | ud2 | |
720 | ||
721 | endproc | |
722 | ||
723 | proc x3c | |
724 | ||
725 | ud2 | |
726 | ||
727 | endproc | |
728 | ||
729 | proc x3d | |
730 | ||
731 | ud2 | |
732 | ||
733 | endproc | |
734 | ||
735 | proc x3e | |
736 | ||
737 | ud2 | |
738 | ||
739 | endproc | |
740 | ||
741 | proc x3f | |
742 | ||
743 | ud2 | |
744 | ||
745 | endproc |