xchg.S: Oops. Disable stray debugging cruft.
[xchg-rax-rax] / xchg.S
CommitLineData
06297a93
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2
3 .intel_syntax noprefix
4
5 .section .note.GNU-stack, "", @progbits
6
7.macro proc name
8 .globl \name
9 .type \name, STT_FUNC
10 .p2align 4
11\name\():
12 .macro endproc
13 .size \name, . - \name
14 .purgem endproc
15 .endm
16.endm
17
18.macro ch c
19 pushf
20 push rax
21 push rcx
22 push rdx
23 push rsi
24 push rdi
25 push r8
26 push r9
27 push rbp
28 mov rbp, rsp
29 and rsp, -16
30
31 mov rdi, \c
32 call putchar@plt
33
34 mov rdi, [rip + stdout]
35 call fflush@plt
36
37 mov rsp, rbp
38 pop rbp
39 pop r9
40 pop r8
41 pop rdi
42 pop rsi
43 pop rdx
44 pop rcx
45 pop rax
46 popf
47.endm
48
49 .text
50
51proc call_example
52
53 push rbx // rbx
54 push r10
55 push r11
56 push r12
57 push r13
58 push r14
59 push r15
60 push rbp // flags, rbp, ..., rbx
61 pushf
62
63 push rsi // regs, flags, rbp, ..., rbx
64
65 lea rax, [rip + 9f]
66 push rax // cont, regs, flags, rbp, ..., rbx
67 push rdi // func, cont, regs, flags, rbp, ..., rbx
68
69 mov rax, [rsi + 56]
70 pushf
71 pop rcx
72 and rax, 0x0cd5
73 and rcx, ~0x0cd5
74 or rax, rcx
75 push rax
76 popf
77 mov rax, [rsi + 0]
78 mov rbx, [rsi + 8]
79 mov rcx, [rsi + 16]
80 mov rdx, [rsi + 24]
81 mov rdi, [rsi + 40]
82 mov rbp, [rsi + 48]
83 mov rsi, [rsi + 32]
84
85 ret // -> func; regs, flags, rbp, ..., rbx
86
879: pushf // rflags, regs, flags, rbp, ..., rbx
88 push rsi // rsi, rflags, regs, flags, rbp, ..., rbx
89 mov rsi, [rsp + 16]
90 mov [rsi + 0], rax
91 mov [rsi + 8], rbx
92 mov [rsi + 16], rcx
93 mov [rsi + 24], rdx
94 mov [rsi + 40], rdi
95 mov [rsi + 48], rbp
96 pop rax // rflags, regs, flags, rbp, ..., rbx
97 mov [rsi + 32], rax
98 pop rax // regs, flags, rbp, ..., rbx
99 mov [rsi + 56], rax
100
101 add rsp, 8 // flags, rbp, ..., rbx
102 popf // rbp, ..., rbx
103 pop rbp // ..., rbx
104 pop r15
105 pop r14
106 pop r13
107 pop r12
108 pop r11
109 pop r10
110 pop rbx //
111 ret
112
113endproc
114
115proc nop
116
117 ret
118
119endproc
120
121///--------------------------------------------------------------------------
122
123proc x00
124
125 // clear all 64 bits of extended traditional registers
126 xor eax,eax // clear rax
127 lea rbx,[0] // rbx -> _|_
128 loop . // iterate, decrement rcx until zero
129 mov rdx,0 // set rdx = 0
130 and esi,0 // clear all bits of rsi
131 sub edi,edi // set rdi = edi - edi = 0
132 push 0
133 pop rbp // pop 0 into rbp
134
135 ret
136
137endproc
138
139proc x01
140
141 // advance a fibonacci pair by c steps
142 //
143 // on entry, a and d are f_{i+1} and f_i; on exit, they are f_{i+c+1}
144 // and f_{i+c}, where f_{i+1} = f_i + f_{i-1}
1450: xadd rax, rdx // a, d = a + d, a
146 // = f_{i+1} + f_i, f_{i+1}
147 // = f_{i+2}, f_{i+1}
148 loop 0b // advance i, decrement c, iterate
149
150 ret
151
152endproc
153
154proc x02
155
156 // boolean canonify a: if a = 0 on entry, leave it zero; otherwise
157 // set a = 1
158 neg rax // set cf iff a /= 0
159 sbb rax, rax // a = a - a - cf = -cf
160 neg rax // a = cf
161
162 ret
163
164endproc
165
166proc x03
167
168 // set a = min(a, d) (unsigned); clobber c, d
169 sub rdx, rax // d' = d - a; set cf if a > d
170 sbb rcx, rcx // c = -cf = -[a > d]
171 and rcx, rdx // c = a > d ? d - a : 0
172 add rax, rcx // a' = a > d ? d : a
173
174 ret
175
176endproc
177
178proc x04
179
180 // switch case?
181 xor al, 0x20
182
183 ret
184
185endproc
186
187proc x05
188
189 // answer whether 5 <= a </<= 9.
190 sub rax, 5 // a' = a - 5
191 cmp rax, 4 // is a' - 5 </<= 4?
192
193 // cc a' a
194 //
195 // z/e a' = 4 a = 9
196 // nz/ne a' /= 4 a /= 9
197 //
198 // a/nbe a' > 4 a > 9 or a < 5
199 // nc/ae/nb a' >= 4 a >= 9 or a < 5
200 // c/b/nae a' < 4 5 <= a < 9
201 // be/na a' <= 4 5 <= a <= 9
202 //
203 // o a' < -2^63 + 4 -2^63 + 5 <= a < -2^63 + 9
204 // no a' >= -2^63 + 4 a >= -2^63 + 9 or
205 // a < -2^63 + 5
206 // s -2^63 + 4 <= a' < 4 -2^63 + 9 <= a < 9
207 // ns a' < -2^63 + 4 or a < -2^63 + 9 or a >= 9
208 // a' >= 4
209 // ge/nl a' >= 4 a >= 9 or a < -2^63 + 5
210 // l/nge a' < 4 -2^63 + 5 <= a < 9
211 // g/nle a' > 4 a > 9 or a < -2^63 + 5
212 // le/ng a' <= 4 -2^63 + 5 <= a <= 9
213
214 ret
215
216endproc
217
218proc x06
219
220 // leave a unchanged, but set zf if a = 0, cf if a /= 0, clear of,
221 // set sf to msb(a)
222 not rax // a' = -a - 1
223 inc rax // a' = -a
224 neg rax // a' = a
225
226 ret
227
228endproc
229
230proc x07
231
232 // same as before (?)
233 inc rax // a' = a + 1
234 neg rax // a' = -a - 1
235 inc rax // a' = -a
236 neg rax // a' = a
237
238 ret
239
240endproc
241
242proc x08
243
244 // floor((a + d)/2), correctly handling overflow conditions; final cf
245 // is lsb(a + d), probably uninteresting
246 add rax, rdx // cf || a' = a + d
247 rcr rax, 1 // shift 65-bit result right by one
248 // place; lsb moves into carry
249
250 ret
251
252endproc
253
254proc x09
255
256 // a = a/8, rounded to nearest; i.e., floor(a/8) if a == 0, 1, 2, 3
257 // (mod 8), or ceil(a/8) if a == 4, 5, 6, 7 (mod 8).
258 shr rax, 3 // a' = floor(a/8); cf = 1 if a ==
259 // 4, 5, 6, 7 (mod 8)
260 adc rax, 0 // a' = floor(a/8) + cf
261
262 ret
263
264endproc
265
266proc x0a
267
268 // increment c-byte little-endian bignum at rdi
269 add byte ptr [rdi], 1
2700: inc rdi
271 adc byte ptr [rdi], 0
272 loop 0b
273
274 ret
275
276endproc
277
278proc x0b
279
280 // negate double-precision d:a
281 not rdx // d' = -d - 1
282 neg rax // a' = -a;
283 // cf = 1 iff a /= 0
284 sbb rdx, -1 // d' = -d - cf
285
286 ret
287
288endproc
289
290proc x0c
291
292 // rotate is distributive over xor.
293
294 // rax // = a_1 || a_0
295 // rbx // = b_1 || b_0
296 mov rcx, rax // = a_1 || a_0
297
298 xor rcx, rbx // = (a_1 XOR b_1) || (a_0 XOR b_0)
299 ror rcx, 0xd // = (a_0 XOR b_0) || (a_1 XOR b_1)
300
301 ror rax, 0xd // = a_0 || a_1
302 ror rbx, 0xd // = b_0 || b_1
303 xor rax, rbx // = (a_0 XOR b_0) || (a_1 XOR b_1)
304
305 cmp rax, rcx // always equal
306
307 ret
308
309endproc
310
311proc x0d
312
313 // and is distributive over xor.
314
315 mov rdx, rbx // = b
316
317 xor rbx, rcx // = b XOR c
318 and rbx, rax // = a AND (b XOR c)
319
320 and rdx, rax // = a AND b
321 and rax, rcx // = a AND c
322 xor rax, rdx // = (a AND b) XOR (a AND c)
323 // = a AND (b XOR c)
324
325 cmp rax, rbx // always equal
326
327 ret
328
329endproc
330
331proc x0e
332
333 // de morgan's law
334
335 mov rcx, rax // = a
336
337 and rcx, rbx // = a AND b
338 not rcx // = NOT (a AND b)
339
340 not rax // = NOT a
341 not rbx // = NOT b
342 or rax, rbx // = (NOT a) OR (NOT b)
343 // = NOT (a AND b)
344
345 cmp rax, rcx
346
347 ret
348
349endproc
350
351proc x0f
352
353 // replace input buffer bytes with cumulative XORs with initial a;
354 // final a is XOR of all buffer bytes and initial a.
355 //
356 // not sure why you'd do this.
357
358 cld
359
3600: xor [rsi], al
361 lodsb
362 loop 0b
363
364 ret
365
366endproc
367
368proc x10
369
370 // four different ways to swap a pair of registers.
371
372 push rax
373 push rcx
374 pop rax
375 pop rcx
376
377 xor rax, rcx
378 xor rcx, rax
379 xor rax, rcx
380
381 add rax, rcx
382 sub rcx, rax
383 add rax, rcx
384 neg rcx
385
386 xchg rax, rcx
387
388 ret
389
390endproc
391
392proc x11
393
394 // assuming a is initialized to zero, set a to the inclusive or of
395 // the xor-differences of corresponding bytes in the c-byte strings
396 // at si and di.
397 //
398 // in particular, a will be zero (and zf set) if and only if the two
399 // strings are equal.
400
4010: mov dl, [rsi]
402 xor dl, [rdi]
403 inc rsi
404 inc rdi
405 or al, dl
406 loop 0b
407
408 ret
409
410endproc
411
412proc x12
413
414 // an obtuse way of adding two registers. for any bit position, a
415 // OR d is set if and only if at least one of a and d has a bit set
416 // in that position, and a AND d is set if and only if both have a
417 // bit set in that position. essentially, then, what we've done is
418 // move all of the set bits in d to a, unless there's already a bit
419 // there. this clearly doesn't change the sum.
420
421 mov rcx, rdx // c' = d
422 and rdx, rax // d' = a AND d
423 or rax, rcx // a' = a OR d
424 add rax, rdx
425
426 ret
427
428endproc
429
430proc x13
431
432 // ok, so this is a really obtuse way of adding a and b; the result
433 // is in a and d. but why does it work?
434
435 mov rcx, 0x40 // carry chains at most 64 long
4360: mov rdx, rax // copy a'
437 xor rax, rbx // low bits of each bitwise sum
438 and rbx, rdx // carry bits from each bitwise sum
439 shl rbx, 001 // carry them into next position
440 loop 0b
441
442 ret
443
444endproc
445
446proc x14
447
448 // floor((a + d)/2), like x08.
449
450 mov rcx, rax // copy a for later
451 and rcx, rdx // carry bits
452
453 xor rax, rdx // low bits of each bitwise sum
454 shr rax, 1 // divide by 2; carries now in place
455
456 add rax, rcx // add the carries; done
457
458 ret
459
460endproc
461
462proc x15
463
464 // sign extension 32 -> 64 bits.
465
466 //movsx rbx, eax // like this?
467
468 mov rdx, 0xffffffff80000000
469 add rax, rdx // if bit 31 of a is set then bits
470 // 31--63 of a' are clear; otherwise,
471 // these bits are all set -- which is
472 // exactly backwards
473 xor rax, rdx // so fix it
474
475 ret
476
477endproc
478
479proc x16
480
2ed85f8c
MW
481 //shl rax, 56
482 //shl rbx, 56
483 //shl rcx, 56
06297a93
MW
484
485 xor rax, rbx // a' = a XOR b
486 xor rbx, rcx // b' = b XOR c
487 mov rsi, rax // t = a XOR b
488 add rsi, rbx // t = (a XOR b) + (b XOR c)
489 cmovc rax, rbx // a' = cf ? b XOR c : a XOR b
490 xor rax, rbx // a' = cf ? 0 : a XOR c
491 cmp rax, rsi
492
493 ret
494
495endproc
496
497proc x17
498
499 ud2
500
501endproc
502
503proc x18
504
505 ud2
506
507endproc
508
509proc x19
510
511 ud2
512
513endproc
514
515proc x1a
516
517 ud2
518
519endproc
520
521proc x1b
522
523 ud2
524
525endproc
526
527proc x1c
528
529 ud2
530
531endproc
532
533proc x1d
534
535 ud2
536
537endproc
538
539proc x1e
540
541 ud2
542
543endproc
544
545proc x1f
546
547 ud2
548
549endproc
550
551proc x20
552
553 ud2
554
555 ret
556
557endproc
558
559proc x21
560
561 ud2
562
563endproc
564
565proc x22
566
567 ud2
568
569endproc
570
571proc x23
572
573 ud2
574
575endproc
576
577proc x24
578
579 ud2
580
581endproc
582
583proc x25
584
585 ud2
586
587endproc
588
589proc x26
590
591 ud2
592
593endproc
594
595proc x27
596
597 ud2
598
599endproc
600
601proc x28
602
603 ud2
604
605endproc
606
607proc x29
608
609 ud2
610
611endproc
612
613proc x2a
614
615 ud2
616
617endproc
618
619proc x2b
620
621 ud2
622
623endproc
624
625proc x2c
626
627 ud2
628
629endproc
630
631proc x2d
632
633 ud2
634
635endproc
636
637proc x2e
638
639 ud2
640
641endproc
642
643proc x2f
644
645 ud2
646
647endproc
648
649proc x30
650
651 ud2
652
653 ret
654
655endproc
656
657proc x31
658
659 ud2
660
661endproc
662
663proc x32
664
665 ud2
666
667endproc
668
669proc x33
670
671 ud2
672
673endproc
674
675proc x34
676
677 ud2
678
679endproc
680
681proc x35
682
683 ud2
684
685endproc
686
687proc x36
688
689 ud2
690
691endproc
692
693proc x37
694
695 ud2
696
697endproc
698
699proc x38
700
701 ud2
702
703endproc
704
705proc x39
706
707 ud2
708
709endproc
710
711proc x3a
712
713 ud2
714
715endproc
716
717proc x3b
718
719 ud2
720
721endproc
722
723proc x3c
724
725 ud2
726
727endproc
728
729proc x3d
730
731 ud2
732
733endproc
734
735proc x3e
736
737 ud2
738
739endproc
740
741proc x3f
742
743 ud2
744
745endproc