@@@ fltfmt mess
[mLib] / utils / bits.3.in
1 .\" -*-nroff-*-
2 .\"
3 .\" Manual for bit manipulation
4 .\"
5 .\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
6 .\"
7 .
8 .\"----- Licensing notice ---------------------------------------------------
9 .\"
10 .\" This file is part of the mLib utilities library.
11 .\"
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
16 .\"
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
21 .\"
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 .\" USA.
26 .
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
29 .
30 .\"--------------------------------------------------------------------------
31 .TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
32 .\" @octet
33 .\" @uint16
34 .\" @uint24
35 .\" @uint32
36 .\" @uint64
37 .\" @kludge64
38 .
39 .\" @MASK_8
40 .\" @MASK_16
41 .\" @MASK_16_L
42 .\" @MASK_16_B
43 .\" @MASK_24
44 .\" @MASK_24_L
45 .\" @MASK_24_B
46 .\" @MASK_32
47 .\" @MASK_32_L
48 .\" @MASK_32_B
49 .\" @MASK_64
50 .\" @MASK_64_L
51 .\" @MASK_64_B
52 .
53 .\" @SZ_8
54 .\" @SZ_16
55 .\" @SZ_16_L
56 .\" @SZ_16_B
57 .\" @SZ_24
58 .\" @SZ_24_L
59 .\" @SZ_24_B
60 .\" @SZ_32
61 .\" @SZ_32_L
62 .\" @SZ_32_B
63 .\" @SZ_64
64 .\" @SZ_64_L
65 .\" @SZ_64_B
66 .
67 .\" @TY_8
68 .\" @TY_16
69 .\" @TY_16_L
70 .\" @TY_16_B
71 .\" @TY_24
72 .\" @TY_24_L
73 .\" @TY_24_B
74 .\" @TY_32
75 .\" @TY_32_L
76 .\" @TY_32_B
77 .\" @TY_64
78 .\" @TY_64_L
79 .\" @TY_64_B
80 .
81 .\" @DOUINTSZ
82 .\" @DOUINTCONV
83 .
84 .\" @U8
85 .\" @U16
86 .\" @U24
87 .\" @U32
88 .\" @U64
89 .\" @U64_
90 .
91 .\" @LSL8
92 .\" @LSR8
93 .\" @LSL16
94 .\" @LSR16
95 .\" @LSL24
96 .\" @LSR24
97 .\" @LSL32
98 .\" @LSR32
99 .\" @LSL64
100 .\" @LSR64
101 .\" @LSL64_
102 .\" @LSR64_
103 .
104 .\" @ROL8
105 .\" @ROR8
106 .\" @ROL16
107 .\" @ROR16
108 .\" @ROL24
109 .\" @ROR24
110 .\" @ROL32
111 .\" @ROL32
112 .\" @ROL64
113 .\" @ROR64
114 .\" @ROL64_
115 .\" @ROR64_
116 .
117 .\" @ENDSWAP16
118 .\" @ENDSWAP32
119 .\" @ENDSWAP64
120 .
121 .\" @BTOH16
122 .\" @LTOH16
123 .\" @HTOB16
124 .\" @HTOL16
125 .\" @BTOH32
126 .\" @LTOH32
127 .\" @HTOB32
128 .\" @HTOL32
129 .\" @BTOH64
130 .\" @LTOH64
131 .\" @HTOB64
132 .\" @HTOL64
133 .
134 .\" @RAW8
135 .\" @RAW16
136 .\" @RAW32
137 .\" @RAW64
138 .
139 .\" @GETBYTE
140 .\" @PUTBYTE
141 .
142 .\" @LOAD8
143 .\" @STORE8
144 .
145 .\" @LOAD16_L
146 .\" @LOAD16_B
147 .\" @LOAD16
148 .\" @STORE16_L
149 .\" @STORE16_B
150 .\" @STORE16
151 .
152 .\" @LOAD24_L
153 .\" @LOAD24_B
154 .\" @LOAD24
155 .\" @STORE24_L
156 .\" @STORE24_B
157 .\" @STORE24
158 .
159 .\" @LOAD32_L
160 .\" @LOAD32_B
161 .\" @LOAD32
162 .\" @STORE32_L
163 .\" @STORE32_B
164 .\" @STORE32
165 .
166 .\" @LOAD64_L
167 .\" @LOAD64_B
168 .\" @LOAD64
169 .\" @STORE64_L
170 .\" @STORE64_B
171 .\" @STORE64
172 .
173 .\" @LOAD64_L_
174 .\" @LOAD64_B_
175 .\" @LOAD64_
176 .\" @STORE64_L_
177 .\" @STORE64_B_
178 .\" @STORE64_
179 .
180 .\" @SET64
181 .\" @X64
182 .\" @ASSIGN64
183 .\" @HI64
184 .\" @LO64
185 .\" @GET64
186 .\" @SETBYTE64
187 .\" @AND64
188 .\" @OR64
189 .\" @XOR64
190 .\" @CPL64
191 .\" @ADD64
192 .\" @SUB64
193 .\" @CMP64
194 .\" @ZERO64
195 .
196 .\"--------------------------------------------------------------------------
197 .SH NAME
198 bits \- portable bit manipulation macros
199 .
200 .\"--------------------------------------------------------------------------
201 .SH SYNOPSIS
202 .
203 .nf
204 .B "#include <mLib/bits.h>"
205 .PP
206 .BR "typedef " ... " octet;"
207 .BR "typedef " ... " uint16;"
208 .BR "typedef " ... " uint24;"
209 .BR "typedef " ... " uint32;"
210 .BR "typedef " ... " uint64;"
211 .BR "typedef " ... " kludge64;"
212 .PP
213 .fi
214 In the following,
215 .I w
216 is one of
217 .BR 8 ,
218 .BR 16 ,
219 .BR 24 ,
220 and
221 .BR 32 ,
222 and, on platforms with a 64-bit type,
223 .BR 64 ;
224 and
225 .I we
226 is one of
227 .BR 8 ,
228 .BR 16 ,
229 .BR 16_L ,
230 .BR 16_B ,
231 .BR 24 ,
232 .BR 24_L ,
233 .BR 24_B ,
234 .BR 32 ,
235 .BR 32_L ,
236 and
237 .BR 32_B ,
238 and, on platforms with a 64-bit type,
239 .BR 64 ,
240 .BR 64_L ,
241 and
242 .BR 64_B .
243 .nf
244 .PP
245 .BI "#define TY_" we " " type
246 .BI "#define SZ_" we " \fR..."
247 .BI "#define MASK_" we " \fR..."
248 .PP
249 .BI "#define DOUINTSZ(" f ") \fR..."
250 .BI "#define DOUINTCONV(" f ") \fR..."
251 .PP
252 .IB type " U" w ( v );
253 .PP
254 .IB type " LSL" w ( type " " v ", int " s );
255 .IB type " LSR" w ( type " " v ", int " s );
256 .IB type " ROL" w ( type " " v ", int " s );
257 .IB type " ROR" w ( type " " v ", int " s );
258 .PP
259 .BI "octet GETBYTE(void *" p ", size_t " o );
260 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
261 .PP
262 .IB type " LOAD" we "(void *" p );
263 .BI "void STORE" we "(void *" p ", " type " " v );
264 .PP
265 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
266 .BI "kludge64 X64(" hexh ", " hexl );
267 .BI "void ASSIGN64(kludge64 &" d ", " x );
268 .BI "uint32 HI64(kludge64" x );
269 .BI "uint32 LO64(kludge64" x );
270 .IB ty " GET64(" ty ", kludge64 " x );
271 .BI "void SETBYTE64(kludge64 &" z ", octet " x ", unsigned " j );
272 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
273 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
274 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
275 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
276 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
277 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
278 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
279 .BI "int ZERO64(kludge64 " x );
280 .fi
281 .
282 .\"--------------------------------------------------------------------------
283 .SH DESCRIPTION
284 .
285 The header file
286 .B <mLib/bits.h>
287 contains a number of useful definitions for portably dealing with bit-
288 and byte-level manipulation of larger quantities. The various macros
289 and types are named fairly systematically.
290 .PP
291 The header provides utilities for working with 64-bit quantities, but a
292 64-bit integer type is not guaranteed to exist under C89 rules. This
293 header takes two approaches. Firstly, if a 64-bit type is found, the
294 header defines the macro
295 .B HAVE_UINT64
296 and defines the various
297 .RB ... 64
298 macros as described below. Secondly, it unconditionally defines a type
299 .B kludge64
300 and a family of macros for working with them. See below for details.
301 .
302 .SS "Type definitions"
303 A number of types are defined.
304 .TP
305 .B octet
306 Equivalent to
307 .BR "unsigned char" .
308 This is intended to be used when a character array is used to represent
309 the octets of some external data format. Note that on some
310 architectures the
311 .B "unsigned char"
312 type may occupy more than 8 bits.
313 .TP
314 .B uint16
315 Equivalent to
316 .BR "unsigned short" .
317 Intended to be used when a 16-bit value is required. This type is
318 always capable of representing any 16-bit unsigned value, but the actual
319 type may be wider than 16 bits and will require masking.
320 .TP
321 .B uint24
322 Equivalent to some (architecture-dependent) standard type. Capable of
323 representing any unsigned 24-bit value, although the the actual type may
324 be wider than 24 bits.
325 .TP
326 .B uint32
327 Equivalent to some (architecture-dependent) standard type. Capable of
328 representing any unsigned 32-bit value, although the the actual type may
329 be wider than 32 bits.
330 pp.TP
331 .B uint64
332 Equivalent to some (architecture-dependent) standard type, if it exists.
333 Capable of representing any unsigned 64-bit value, although the the
334 actual type may be wider than 64 bits.
335 .
336 .SS "Size/endianness suffixes"
337 Let
338 .I w
339 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
340 Furthermore, let
341 .I we
342 be one of the size-and-endian suffixes
343 .IR w ,
344 or, where
345 .IR w \~>\~8,
346 .IB w _L
347 or
348 .IB w _B \fR,
349 where
350 .RB ` _L '
351 denotes little-endian (Intel, VAX) representation, and
352 .RB ` _B '
353 denotes big-endian (IBM, network) representation; omitting an explicit
354 suffix gives big-endian order by default, since this is most common in
355 portable data formats.
356 .PP
357 The macro invocation
358 .BI DOUINTSZ( f )
359 invokes a given macro
360 .I f
361 repeatedly, as
362 .IB f ( w )
363 for each size suffix
364 .I w
365 listed above.
366 .PP
367 The macro invocation
368 .BI DOUINTCONV( f )
369 invokes a given macro
370 .I f
371 repeatedly, as
372 .IR f ( w ", " we ", " suff )
373 where
374 .I we
375 ranges over size-and-endian suffixes as described above,
376 .I w
377 is just the corresponding bit width, as an integer, and
378 .I suff
379 is a suffix
380 .IR w ,
381 .IB w l\fR,
382 or
383 .IB w b\fR,
384 suitable for a C function name.
385 .PP
386 These macros are intended to be used to define families of related
387 functions.
388 .
389 .SS "Utility macros"
390 For each size-and-endian suffix
391 .IR we ,
392 the following macros are defined.
393 .TP
394 .BI TY_ we
395 A synonym for the appropriate one of the types
396 .BR octet ,
397 .BR uint32 ,
398 etc.\& listed above.
399 .TP
400 .BI SZ_ we
401 The number of octets needed to represent a value of the corresponding
402 type; i.e., this is
403 .IR w /8.
404 .TP
405 .BI MASK_ we
406 The largest integer representable in the corresponding type; i.e., this
407 is
408 .RI 2\*(ss w \*(se\~\-\~1.
409 .PP
410 (Note that the endianness suffix is irrelevant in the above
411 definitions.)
412 .PP
413 For each size suffix
414 .IR w ,
415 the macro invocation
416 .BI U w ( x )
417 coerces an integer
418 .I x
419 to the appropriate type; specifically, it returns the smallest
420 nonnegative integer congruent to
421 .I x
422 (modulo
423 .RI 2\*(ss w \*(se).
424 .
425 .SS "Shift and rotate"
426 For each size suffix
427 .IR w ,
428 the macro invocations
429 .BI LSL w ( x ", " n )
430 and
431 .BI LSR w ( x ", " n )
432 shift a
433 .IR w -bit
434 quantity
435 .I x
436 left or right, respectively, by
437 .I n
438 places; if
439 .IR n \~\(>=\~ w
440 then
441 .I n
442 is reduced modulo
443 .IR w .
444 (This behaviour is unfortunate, but (a) it's what a number of CPUs
445 provide natively, and (b) it's a cheap way to prevent undefined
446 behaviour.) Similarly,
447 .BI ROL w ( x ", " n )
448 and
449 .BI ROR w ( x ", " n )
450 rotate a
451 .IR w -bit
452 quantity
453 .I x
454 left or right, respectively, by
455 .I n
456 places.
457 .
458 .SS "Byte order conversions"
459 For each size suffix
460 .IR w ,
461 the macro invocation
462 .BI ENDSWAP w ( x )
463 returns the
464 .IR w -bit
465 value
466 .IR x
467 with its bytes reversed. The
468 .B ENDSWAP8
469 macro does nothing (except truncate its operand to 8 bits), but is
470 provided for the sake of completeness.
471 .PP
472 A
473 .I big-endian
474 representation stores the most significant octet of an integer at the
475 lowest address, with the following octets in decreasing order of
476 significance. A
477 .I little-endian
478 representation instead stores the
479 .I least
480 significant octet at the lowest address, with the following octets in
481 increasing order of significance. An environment has a preferred order
482 for arranging the constituent octets of an integer of some given size in
483 memory; this might be either the big- or little-endian representation
484 just described, or something else strange.
485 .PP
486 It might be possible to rearrange the bits in an integer so that, when
487 that integer is stored to memory in the environment's preferred manner,
488 you end up with the big- or little-endian representation of the original
489 integer; and, similarly, it might be possible to load a big- or
490 little-endian representation of an integer into a variable using the
491 environment's preferred ordering and then rearrange the bits so as to
492 recover the integer value originally represented. If the environment is
493 sufficiently strange, these things might not be possible, but this is
494 actually quite rare.
495 .PP
496 Say that an integer has been converted to
497 .I big-
498 or
499 .I "little-endian form"
500 if, when it is stored in memory in the environment's preferred manner,
501 one ends up with a big- or little-endian representation of the original
502 integer. Equivalently, if one starts with a big- or little-endian
503 representation of some integer, and loads it into a variable using the
504 environment's preferred manner, one ends up with the big- or
505 little-endian form of the original integer.
506 .PP
507 If these things are possible, then the following macros are defined.
508 .TP
509 .BI HTOL w ( x )
510 Convert a
511 .IR w -bit
512 integer
513 .I x
514 to little-endian form.
515 .TP
516 .BI HTOB w ( x )
517 Convert a
518 .IR w -bit
519 integer
520 .I x
521 to big-endian form.
522 .TP
523 .BI LTOH w ( x )
524 Convert a
525 .IR w -bit
526 integer
527 .I x
528 from little-endian form.
529 .TP
530 .BI BTOH w ( x )
531 Convert a
532 .IR w -bit
533 integer
534 .I x
535 from big-endian form.
536 .
537 .SS "Load and store"
538 The macro invocation
539 .BI GETBYTE( p ", " o )
540 returns the
541 .IR o th
542 octet following the address
543 .IR p .
544 Conversely,
545 .BI PUTBYTE( p ", " o ", " v)
546 stores
547 .I
548 v in the
549 .IR o th
550 byte following the address
551 .IR p .
552 These macros always operate on byte offsets regardless of the type of
553 the pointer
554 .IR p .
555 .PP
556 For each size suffix
557 .IR w ,
558 there may be a macro such that the invocation
559 .BI RAW w ( p )
560 is an lvalue designating the
561 .IR w /8
562 octets starting at address
563 .IR p ,
564 interpreted according to the environment's preferred representation,
565 except that
566 .I p
567 need not be aligned in any particular fashion. There are many reasons
568 why this might not be possible; programmers are not normally expected to
569 use these macros directly, and they are documented in case they are
570 useful for special effects.
571 .PP
572 For each size-and-endian suffix
573 .IR we ,
574 the macro invocation
575 .BI LOAD we ( p )
576 loads and returns a value in the corresponding format at address
577 .IR p ;
578 similarly,
579 .BI STORE we ( p ", " x )
580 stores the value
581 .I x
582 at address
583 .I p
584 in the corresponding format.
585 .
586 .SS "64-bit support"
587 For portability to environments without native 64-bit integers, the
588 structure
589 .B kludge64
590 is defined. If the target platform is known to have an unsigned 64-bit
591 integer type, then this structure merely encapsulates a native integer,
592 and a decent optimizing compiler can be expected to handle this exactly
593 as if it were the native type. Otherwise, it contains two 32-bit halves
594 which are processed the hard way.
595 .PP
596 For each of the above macros with a suffix
597 .BR 64 ,
598 .BR 64_L ,
599 or
600 .BR 64_B ,
601 an additional `kludge' macro is defined, whose name has an additional
602 final underscore; e.g., the kludge macro corresponding to
603 .B ROR64
604 is
605 .BR ROR64_ ;
606 and that corresponding to
607 .B LOAD64_L
608 is
609 .BR LOAD64_L_ .
610 If the original macro would have
611 .I returned
612 a value of type
613 .BR uint64 ,
614 then the kludge macro has an additional first argument, denoted
615 .IR d ,
616 which should be an lvalue of type
617 .BR kludge64 ,
618 and the kludge macro will store its result in
619 .IR d .
620 The kludge macro's remaining arguments are the same as the original
621 macro, except that where the original macro accepts an argument of type
622 .BR uint64 ,
623 the kludge macro accepts an argument of type
624 .B kludge64
625 instead.
626 .PP
627 Finally, a number of additional macros are provided, to make working
628 with
629 .B kludge64
630 somewhat less awful.
631 .TP
632 .BI SET64( d ", " h ", " l )
633 Set the high 32 bits of
634 .I d
635 to be
636 .IR h ,
637 and the low 32 bits to be
638 .IR l .
639 Both
640 .I h
641 and
642 .I l
643 may be arbitrary integers.
644 .TP
645 .BI X64( hexh ", " hexl )
646 Expands to an initializer for an object of type
647 .B kludge64
648 where
649 .I hexh
650 and
651 .I hexl
652 encode the high and low 32-bit halves in hexadecimal, without any
653 .B 0x
654 prefix.
655 .TP
656 .BI ASSIGN( d ", " x )
657 Make
658 .I d
659 be a copy of the
660 .B kludge64
661 .IR x .
662 .TP
663 .BI HI64( x )
664 Return the high 32 bits of
665 .IR x .
666 .TP
667 .BI LO64( x )
668 Return the low 32 bits of
669 .IR x .
670 .TP
671 .BI GET64( t ", " x )
672 Return the value of
673 .I x
674 as a value of type
675 .IR t .
676 If
677 .I t
678 is an unsigned integer type, then the value will be truncated to fit as
679 necessary; if
680 .I t
681 is a signed integer type, then the behaviour is undefined if the value
682 of
683 .I x
684 is too large.
685 .TP
686 .BI SETBYTE( z ", " x ", " j )
687 Store the value
688 .I x
689 in byte
690 .I j
691 of
692 .IR z ,
693 which is assumed to be initially zero.
694 Bytes are numbered with the least significant being byte zero and the
695 most significant being byte 7.
696 .TP
697 .BI AND64( d ", " x ", " y )
698 Set
699 .I d
700 to be the bitwise-and of the two
701 .B kludge64
702 arguments
703 .I x
704 and
705 .IR y .
706 .TP
707 .BI OR64( d ", " x ", " y )
708 Set
709 .I d
710 to be the bitwise-or of the two
711 .B kludge64
712 arguments
713 .I x
714 and
715 .IR y .
716 .TP
717 .BI XOR64( d ", " x ", " y )
718 Set
719 .I d
720 to be the bitwise-exclusive-or of the two
721 .B kludge64
722 arguments
723 .I x
724 and
725 .IR y .
726 .TP
727 .BI CPL64( d ", " x )
728 Set
729 .I d
730 to be the bitwise complement of the
731 .B kludge64
732 argument
733 .IR x .
734 .TP
735 .BI ADD64( d ", " x ", " y )
736 Set
737 .I d
738 to be the sum of the two
739 .B kludge64
740 arguments
741 .I x
742 and
743 .IR y .
744 .TP
745 .BI SUB64( d ", " x ", " y )
746 Set
747 .I d
748 to be the difference of the two
749 .B kludge64
750 arguments
751 .I x
752 and
753 .IR y .
754 .TP
755 .BI CMP64( x ", " op ", " y )
756 Here,
757 .I x
758 and
759 .I y
760 should be arguments of type
761 .B kludge64
762 and
763 .I op
764 should be one of the relational operators
765 .BR == ,
766 .BR < ,
767 .BR <= ,
768 .BR > ,
769 or
770 .B >=
771 \(en
772 .I not
773 .BR !=.
774 Evaluates nonzero if
775 .IR x \~ op \~ y .
776 .TP
777 .BI ZERO64( x )
778 Evaluates nonzero if the
779 .B kludge64
780 argument
781 .I x
782 is exactly zero.
783 .
784 .\"--------------------------------------------------------------------------
785 .SH "SEE ALSO"
786 .
787 .BR mLib (3).
788 .
789 .\"--------------------------------------------------------------------------
790 .SH AUTHOR
791 .
792 Mark Wooding, <mdw@distorted.org.uk>
793 .
794 .\"----- That's all, folks --------------------------------------------------