@@@ much mess, mostly manpages
[mLib] / utils / bits.3.in
CommitLineData
b6b9d458 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for bit manipulation
4.\"
5.\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
a9779382
MW
32.\" octet
33.\" uint16
34.\" uint24
35.\" uint32
36.\" uint64
37.\" kludge64
38.\"
39.\" MASK_8
40.\" MASK_16
41.\" MASK_16_L
42.\" MASK_16_B
43.\" MASK_24
44.\" MASK_24_L
45.\" MASK_24_B
46.\" MASK_32
47.\" MASK_32_L
48.\" MASK_32_B
49.\" MASK_64
50.\" MASK_64_L
51.\" MASK_64_B
52.\"
53.\" SZ_8
54.\" SZ_16
55.\" SZ_16_L
56.\" SZ_16_B
57.\" SZ_24
58.\" SZ_24_L
59.\" SZ_24_B
60.\" SZ_32
61.\" SZ_32_L
62.\" SZ_32_B
63.\" SZ_64
64.\" SZ_64_L
65.\" SZ_64_B
66.\"
67.\" TY_8
68.\" TY_16
69.\" TY_16_L
70.\" TY_16_B
71.\" TY_24
72.\" TY_24_L
73.\" TY_24_B
74.\" TY_32
75.\" TY_32_L
76.\" TY_32_B
77.\" TY_64
78.\" TY_64_L
79.\" TY_64_B
80.\"
81.\" DOUINTSZ
82.\" DOUINTCONV
83.\"
08da152e 84.\" @U8
85.\" @U16
a9779382 86.\" @U24
08da152e 87.\" @U32
a9779382
MW
88.\" @U64
89.\" @U64_
08da152e 90.\"
91.\" @LSL8
92.\" @LSR8
93.\" @LSL16
94.\" @LSR16
a9779382
MW
95.\" @LSL24
96.\" @LSR24
08da152e 97.\" @LSL32
98.\" @LSR32
a9779382
MW
99.\" @LSL64
100.\" @LSR64
101.\" @LSL64_
102.\" @LSR64_
08da152e 103.\"
104.\" @ROL8
105.\" @ROR8
106.\" @ROL16
107.\" @ROR16
a9779382
MW
108.\" @ROL24
109.\" @ROR24
110.\" @ROL32
08da152e 111.\" @ROL32
a9779382
MW
112.\" @ROL64
113.\" @ROR64
114.\" @ROL64_
115.\" @ROR64_
08da152e 116.\"
374bb459
MW
117.\" ENDSWAP16
118.\" ENDSWAP32
119.\" ENDSWAP64
120.\"
121.\" BTOH16
122.\" LTOH16
123.\" HTOB16
124.\" HTOL16
125.\" BTOH32
126.\" LTOH32
127.\" HTOB32
128.\" HTOL32
129.\" BTOH64
130.\" LTOH64
131.\" HTOB64
132.\" HTOL64
133.\"
a23bab96
MW
134.\" RAW8
135.\" RAW16
136.\" RAW32
137.\" RAW64
138.\"
08da152e 139.\" @GETBYTE
140.\" @PUTBYTE
141.\"
142.\" @LOAD8
143.\" @STORE8
144.\"
145.\" @LOAD16_L
146.\" @LOAD16_B
147.\" @LOAD16
148.\" @STORE16_L
149.\" @STORE16_B
150.\" @STORE16
151.\"
a9779382
MW
152.\" @LOAD24_L
153.\" @LOAD24_B
154.\" @LOAD24
155.\" @STORE24_L
156.\" @STORE24_B
157.\" @STORE24
158.\"
08da152e 159.\" @LOAD32_L
160.\" @LOAD32_B
161.\" @LOAD32
162.\" @STORE32_L
163.\" @STORE32_B
164.\" @STORE32
165.\"
a9779382
MW
166.\" @LOAD64_L
167.\" @LOAD64_B
168.\" @LOAD64
169.\" @STORE64_L
170.\" @STORE64_B
171.\" @STORE64
172.\"
173.\" @LOAD64_L_
174.\" @LOAD64_B_
175.\" @LOAD64_
176.\" @STORE64_L_
177.\" @STORE64_B_
178.\" @STORE64_
179.\"
180.\" @SET64
181.\" @X64
182.\" @ASSIGN64
183.\" @HI64
184.\" @LO64
185.\" @GET64
186.\" @AND64
187.\" @OR64
188.\" @XOR64
189.\" @CPL64
190.\" @ADD64
191.\" @SUB64
192.\" @CMP64
193.\" @ZERO64
c4ccbbf9
MW
194.
195.\"--------------------------------------------------------------------------
196.SH NAME
197bits \- portable bit manipulation macros
198.
199.\"--------------------------------------------------------------------------
b6b9d458 200.SH SYNOPSIS
c4ccbbf9 201.
b6b9d458 202.nf
203.B "#include <mLib/bits.h>"
d056fbdf 204.PP
a9779382
MW
205.BR "typedef " ... " octet;"
206.BR "typedef " ... " uint16;"
207.BR "typedef " ... " uint24;"
208.BR "typedef " ... " uint32;"
209.BR "typedef " ... " uint64;"
210.BR "typedef " ... " kludge64;"
d056fbdf 211.PP
c4ccbbf9
MW
212.fi
213In the following,
214.I w
215is one of
216.BR 8 ,
217.BR 16 ,
218.BR 24 ,
219and
220.BR 32 ,
221and, on platforms with a 64-bit type,
222.BR 64 ;
223and
224.I we
225is one of
226.BR 8 ,
227.BR 16 ,
228.BR 16_L ,
229.BR 16_B ,
230.BR 24 ,
231.BR 24_L ,
232.BR 24_B ,
233.BR 32 ,
234.BR 32_L ,
235and
236.BR 32_B ,
237and, on platforms with a 64-bit type,
238.BR 64 ,
239.BR 64_L ,
240and
241.BR 64_B .
242.nf
243.PP
a9779382
MW
244.BI "#define TY_" we " " type
245.BI "#define SZ_" we " \fR..."
246.BI "#define MASK_" we " \fR..."
d056fbdf 247.PP
a9779382
MW
248.BI "#define DOUINTSZ(" f ") \fR..."
249.BI "#define DOUINTCONV(" f ") \fR..."
d056fbdf 250.PP
a9779382 251.IB type " U" w ( v );
d056fbdf 252.PP
a9779382
MW
253.IB type " LSL" w ( type " " v ", int " s );
254.IB type " LSR" w ( type " " v ", int " s );
255.IB type " ROL" w ( type " " v ", int " s );
256.IB type " ROR" w ( type " " v ", int " s );
d056fbdf 257.PP
a9779382
MW
258.BI "octet GETBYTE(void *" p ", size_t " o );
259.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
d056fbdf 260.PP
a9779382
MW
261.IB type " LOAD" we "(void *" p );
262.BI "void STORE" we "(void *" p ", " type " " v );
d056fbdf 263.PP
a9779382
MW
264.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
265.BI "kludge64 X64(" hexh ", " hexl );
266.BI "void ASSIGN64(kludge64 &" d ", " x );
267.BI "uint32 HI64(kludge64" x );
268.BI "uint32 LO64(kludge64" x );
269.IB ty " GET64(" ty ", kludge64 " x );
270.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
271.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
272.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
273.BI "void CPL64(kludge64 &" d ", kludge64 " x );
274.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
275.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
276.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
277.BI "int ZERO64(kludge64 " x );
b6b9d458 278.fi
c4ccbbf9
MW
279.
280.\"--------------------------------------------------------------------------
b6b9d458 281.SH DESCRIPTION
c4ccbbf9 282.
b6b9d458 283The header file
284.B <mLib/bits.h>
285contains a number of useful definitions for portably dealing with bit-
a9779382
MW
286and byte-level manipulation of larger quantities. The various macros
287and types are named fairly systematically.
288.PP
289The header provides utilities for working with 64-bit quantities, but a
29064-bit integer type is not guaranteed to exist under C89 rules. This
291header takes two approaches. Firstly, if a 64-bit type is found, the
292header defines the macro
293.B HAVE_UINT64
294and defines the various
295.RB ... 64
296macros as described below. Secondly, it unconditionally defines a type
297.B kludge64
298and a family of macros for working with them. See below for details.
299.
300.SS "Type definitions"
301A number of types are defined.
b6b9d458 302.TP
303.B octet
304Equivalent to
305.BR "unsigned char" .
306This is intended to be used when a character array is used to represent
307the octets of some external data format. Note that on some
308architectures the
309.B "unsigned char"
310type may occupy more than 8 bits.
311.TP
312.B uint16
313Equivalent to
314.BR "unsigned short" .
315Intended to be used when a 16-bit value is required. This type is
316always capable of representing any 16-bit unsigned value, but the actual
317type may be wider than 16 bits and will require masking.
318.TP
a9779382
MW
319.B uint24
320Equivalent to some (architecture-dependent) standard type. Capable of
321representing any unsigned 24-bit value, although the the actual type may
322be wider than 24 bits.
323.TP
b6b9d458 324.B uint32
325Equivalent to some (architecture-dependent) standard type. Capable of
326representing any unsigned 32-bit value, although the the actual type may
327be wider than 32 bits.
a9779382
MW
328pp.TP
329.B uint64
330Equivalent to some (architecture-dependent) standard type, if it exists.
331Capable of representing any unsigned 64-bit value, although the the
332actual type may be wider than 64 bits.
333.
334.SS "Size/endianness suffixes"
335Let
336.I w
337be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
338Furthermore, let
339.I we
340be one of the size-and-endian suffixes
341.IR w ,
342or, where
343.IR w \~>\~8,
344.IB w _L
345or
346.IB w _B \fR,
347where
348.RB ` _L '
349denotes little-endian (Intel, VAX) representation, and
350.RB ` _B '
351denotes big-endian (IBM, network) representation; omitting an explicit
352suffix gives big-endian order by default, since this is most common in
353portable data formats.
b6b9d458 354.PP
a9779382
MW
355The macro invocation
356.BI DOUINTSZ( f )
357invokes a given macro
358.I f
359repeatedly, as
360.IB f ( w )
361for each size suffix
362.I w
363listed above.
b6b9d458 364.PP
a9779382
MW
365The macro invocation
366.BI DOUINTCONV( f )
367invokes a given macro
368.I f
369repeatedly, as
370.IR f ( w ", " we ", " suff )
b6b9d458 371where
a9779382
MW
372.I we
373ranges over size-and-endian suffixes as described above,
374.I w
375is just the corresponding bit width, as an integer, and
376.I suff
377is a suffix
378.IR w ,
379.IB w l\fR,
b6b9d458 380or
a9779382
MW
381.IB w b\fR,
382suitable for a C function name.
b6b9d458 383.PP
a9779382
MW
384These macros are intended to be used to define families of related
385functions.
386.
387.SS "Utility macros"
388For each size-and-endian suffix
389.IR we ,
390the following macros are defined.
391.TP
392.BI TY_ we
393A synonym for the appropriate one of the types
394.BR octet ,
395.BR uint32 ,
396etc.\& listed above.
397.TP
398.BI SZ_ we
399The number of octets needed to represent a value of the corresponding
400type; i.e., this is
401.IR w /8.
402.TP
403.BI MASK_ we
404The largest integer representable in the corresponding type; i.e., this
405is
406.RI 2\*(ss w \*(se\~\-\~1.
407.PP
408(Note that the endianness suffix is irrelevant in the above
409definitions.)
410.PP
411For each size suffix
412.IR w ,
413the macro invocation
414.BI U w ( x )
415coerces an integer
416.I x
417to the appropriate type; specifically, it returns the smallest
418nonnegative integer congruent to
419.I x
420(modulo
421.RI 2\*(ss w \*(se).
422.
423.SS "Shift and rotate"
424For each size suffix
425.IR w ,
426the macro invocations
427.BI LSL w ( x ", " n )
b6b9d458 428and
a9779382
MW
429.BI LSR w ( x ", " n )
430shift a
431.IR w -bit
432quantity
433.I x
434left or right, respectively, by
b6b9d458 435.I n
a9779382
MW
436places; if
437.IR n \~\(>=\~ w
438then
439.I n
440is reduced modulo
441.IR w .
442(This behaviour is unfortunate, but (a) it's what a number of CPUs
443provide natively, and (b) it's a cheap way to prevent undefined
444behaviour.) Similarly,
445.BI ROL w ( x ", " n )
b6b9d458 446and
a9779382
MW
447.BI ROR w ( x ", " n )
448rotate a
449.IR w -bit
450quantity
451.I x
452left or right, respectively, by
b6b9d458 453.I n
a9779382
MW
454places.
455.
374bb459
MW
456.SS "Byte order conversions"
457For each size suffix
458.IR w ,
459the macro invocation
460.BI ENDSWAP w ( x )
461returns the
462.IR w -bit
463value
464.IR x
465with its bytes reversed. The
466.B ENDSWAP8
467macro does nothing (except truncate its operand to 8 bits), but is
468provided for the sake of completeness.
469.PP
470A
471.I big-endian
472representation stores the most significant octet of an integer at the
473lowest address, with the following octets in decreasing order of
474significance. A
475.I little-endian
476representation instead stores the
477.I least
478significant octet at the lowest address, with the following octets in
479increasing order of significance. An environment has a preferred order
480for arranging the constituent octets of an integer of some given size in
481memory; this might be either the big- or little-endian representation
482just described, or something else strange.
483.PP
484It might be possible to rearrange the bits in an integer so that, when
485that integer is stored to memory in the environment's preferred manner,
486you end up with the big- or little-endian representation of the original
487integer; and, similarly, it might be possible to load a big- or
488little-endian representation of an integer into a variable using the
489environment's preferred ordering and then rearrange the bits so as to
490recover the integer value originally represented. If the environment is
491sufficiently strange, these things might not be possible, but this is
492actually quite rare.
493.PP
494Say that an integer has been converted to
495.I big-
496or
497.I "little-endian form"
498if, when it is stored in memory in the environment's preferred manner,
499one ends up with a big- or little-endian representation of the original
500integer. Equivalently, if one starts with a big- or little-endian
501representation of some integer, and loads it into a variable using the
502environment's preferred manner, one ends up with the big- or
503little-endian form of the original integer.
504.PP
505If these things are possible, then the following macros are defined.
506.TP
507.BI HTOL w ( x )
508Convert a
509.IR w -bit
510integer
511.I x
512to little-endian form.
513.TP
514.BI HTOB w ( x )
515Convert a
516.IR w -bit
517integer
518.I x
519to big-endian form.
520.TP
521.BI LTOH w ( x )
522Convert a
523.IR w -bit
524integer
525.I x
526from little-endian form.
527.TP
528.BI BTOH w ( x )
529Convert a
530.IR w -bit
531integer
532.I x
533from big-endian form.
534.
a9779382
MW
535.SS "Load and store"
536The macro invocation
537.BI GETBYTE( p ", " o )
538returns the
539.IR o th
540octet following the address
541.IR p .
542Conversely,
543.BI PUTBYTE( p ", " o ", " v)
544stores
545.I
546v in the
547.IR o th
548byte following the address
549.IR p .
550These macros always operate on byte offsets regardless of the type of
551the pointer
552.IR p .
553.PP
a23bab96
MW
554For each size suffix
555.IR w ,
556there may be a macro such that the invocation
557.BI RAW w ( p )
558is an lvalue designating the
559.IR w /8
560octets starting at address
561.IR p ,
562interpreted according to the environment's preferred representation,
563except that
564.I p
565need not be aligned in any particular fashion. There are many reasons
566why this might not be possible; programmers are not normally expected to
567use these macros directly, and they are documented in case they are
568useful for special effects.
569.PP
a9779382
MW
570For each size-and-endian suffix
571.IR we ,
572the macro invocation
573.BI LOAD we ( p )
574loads and returns a value in the corresponding format at address
575.IR p ;
576similarly,
577.BI STORE we ( p ", " x )
578stores the value
579.I x
580at address
581.I p
582in the corresponding format.
583.
584.SS "64-bit support"
585For portability to environments without native 64-bit integers, the
586structure
587.B kludge64
588is defined. If the target platform is known to have an unsigned 64-bit
589integer type, then this structure merely encapsulates a native integer,
590and a decent optimizing compiler can be expected to handle this exactly
591as if it were the native type. Otherwise, it contains two 32-bit halves
592which are processed the hard way.
593.PP
594For each of the above macros with a suffix
595.BR 64 ,
596.BR 64_L ,
b6b9d458 597or
a9779382
MW
598.BR 64_B ,
599an additional `kludge' macro is defined, whose name has an additional
600final underscore; e.g., the kludge macro corresponding to
601.B ROR64
602is
603.BR ROR64_ ;
604and that corresponding to
605.B LOAD64_L
606is
607.BR LOAD64_L_ .
608If the original macro would have
609.I returned
610a value of type
611.BR uint64 ,
612then the kludge macro has an additional first argument, denoted
613.IR d ,
614which should be an lvalue of type
615.BR kludge64 ,
616and the kludge macro will store its result in
617.IR d .
618The kludge macro's remaining arguments are the same as the original
619macro, except that where the original macro accepts an argument of type
620.BR uint64 ,
621the kludge macro accepts an argument of type
622.B kludge64
623instead.
624.PP
625Finally, a number of additional macros are provided, to make working
626with
627.B kludge64
628somewhat less awful.
629.TP
630.BI SET64( d ", " h ", " l )
631Set the high 32 bits of
632.I d
633to be
634.IR h ,
635and the low 32 bits to be
636.IR l .
637Both
638.I h
639and
640.I l
641may be arbitrary integers.
642.TP
643.BI X64( hexh ", " hexl )
644Expands to an initializer for an object of type
645.B kludge64
646where
647.I hexh
648and
649.I hexl
650encode the high and low 32-bit halves in hexadecimal, without any
651.B 0x
652prefix.
653.TP
654.BI ASSIGN( d ", " x )
655Make
656.I d
657be a copy of the
658.B kludge64
659.IR x .
660.TP
661.BI HI64( x )
662Return the high 32 bits of
663.IR x .
664.TP
665.BI LO64( x )
666Return the low 32 bits of
667.IR x .
668.TP
669.BI GET64( t ", " x )
670Return the value of
671.I x
672as a value of type
673.IR t .
674If
675.I t
676is an unsigned integer type, then the value will be truncated to fit as
677necessary; if
678.I t
679is a signed integer type, then the behaviour is undefined if the value
680of
681.I x
682is too large.
683.TP
684.BI AND64( d ", " x ", " y )
685Set
686.I d
687to be the bitwise-and of the two
688.B kludge64
689arguments
690.I x
b6b9d458 691and
a9779382
MW
692.IR y .
693.TP
694.BI OR64( d ", " x ", " y )
695Set
696.I d
697to be the bitwise-or of the two
698.B kludge64
699arguments
700.I x
701and
702.IR y .
703.TP
704.BI XOR64( d ", " x ", " y )
705Set
706.I d
707to be the bitwise-exclusive-or of the two
708.B kludge64
709arguments
710.I x
711and
712.IR y .
713.TP
714.BI CPL64( d ", " x )
715Set
716.I d
717to be the bitwise complement of the
718.B kludge64
719argument
720.IR x .
721.TP
722.BI ADD64( d ", " x ", " y )
723Set
724.I d
725to be the sum of the two
726.B kludge64
727arguments
728.I x
729and
730.IR y .
731.TP
732.BI SUB64( d ", " x ", " y )
733Set
734.I d
735to be the difference of the two
736.B kludge64
737arguments
738.I x
739and
740.IR y .
741.TP
742.BI CMP64( x ", " op ", " y )
743Here,
744.I x
745and
746.I y
747should be arguments of type
748.B kludge64
749and
750.I op
751should be one of the relational operators
752.BR == ,
753.BR < ,
754.BR <= ,
755.BR > ,
756or
757.B >=
758\(en
759.I not
760.BR !=.
761Evaluates nonzero if
762.IR x \~ op \~ y .
763.TP
764.BI ZERO64( x )
765Evaluates nonzero if the
766.B kludge64
767argument
768.I x
769is exactly zero.
c4ccbbf9
MW
770.
771.\"--------------------------------------------------------------------------
08da152e 772.SH "SEE ALSO"
c4ccbbf9 773.
08da152e 774.BR mLib (3).
c4ccbbf9
MW
775.
776.\"--------------------------------------------------------------------------
b6b9d458 777.SH AUTHOR
c4ccbbf9 778.
9b5ac6ff 779Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
780.
781.\"----- That's all, folks --------------------------------------------------