[mLib] / utils / bits.3.in

.\" -*-nroff-*-
.\"
.\" Manual for bit manipulation
.\"
.\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
.\"
.
.\"----- Licensing notice ---------------------------------------------------
.\"
.\" This file is part of the mLib utilities library.
.\"
.\" mLib is free software: you can redistribute it and/or modify it under
.\" the terms of the GNU Library General Public License as published by
.\" the Free Software Foundation; either version 2 of the License, or (at
.\" your option) any later version.
.\"
.\" mLib is distributed in the hope that it will be useful, but WITHOUT
.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
.\" FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
.\" License for more details.
.\"
.\" You should have received a copy of the GNU Library General Public
.\" License along with mLib.  If not, write to the Free Software
.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
.\" USA.
.
.\"--------------------------------------------------------------------------
.so ../defs.man \" @@@PRE@@@
.
.\"--------------------------------------------------------------------------
.TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
.\" @octet
.\" @uint16
.\" @uint24
.\" @uint32
.\" @uint64
.\" @kludge64
.
.\" @MASK_8
.\" @MASK_16
.\" @MASK_16_L
.\" @MASK_16_B
.\" @MASK_24
.\" @MASK_24_L
.\" @MASK_24_B
.\" @MASK_32
.\" @MASK_32_L
.\" @MASK_32_B
.\" @MASK_64
.\" @MASK_64_L
.\" @MASK_64_B
.
.\" @SZ_8
.\" @SZ_16
.\" @SZ_16_L
.\" @SZ_16_B
.\" @SZ_24
.\" @SZ_24_L
.\" @SZ_24_B
.\" @SZ_32
.\" @SZ_32_L
.\" @SZ_32_B
.\" @SZ_64
.\" @SZ_64_L
.\" @SZ_64_B
.
.\" @TY_8
.\" @TY_16
.\" @TY_16_L
.\" @TY_16_B
.\" @TY_24
.\" @TY_24_L
.\" @TY_24_B
.\" @TY_32
.\" @TY_32_L
.\" @TY_32_B
.\" @TY_64
.\" @TY_64_L
.\" @TY_64_B
.
.\" @DOUINTSZ
.\" @DOUINTCONV
.
.\" @U8
.\" @U16
.\" @U24
.\" @U32
.\" @U64
.\" @U64_
.
.\" @LSL8
.\" @LSR8
.\" @LSL16
.\" @LSR16
.\" @LSL24
.\" @LSR24
.\" @LSL32
.\" @LSR32
.\" @LSL64
.\" @LSR64
.\" @LSL64_
.\" @LSR64_
.
.\" @ROL8
.\" @ROR8
.\" @ROL16
.\" @ROR16
.\" @ROL24
.\" @ROR24
.\" @ROL32
.\" @ROL32
.\" @ROL64
.\" @ROR64
.\" @ROL64_
.\" @ROR64_
.
.\" @ENDSWAP16
.\" @ENDSWAP32
.\" @ENDSWAP64
.
.\" @BTOH16
.\" @LTOH16
.\" @HTOB16
.\" @HTOL16
.\" @BTOH32
.\" @LTOH32
.\" @HTOB32
.\" @HTOL32
.\" @BTOH64
.\" @LTOH64
.\" @HTOB64
.\" @HTOL64
.
.\" @RAW8
.\" @RAW16
.\" @RAW32
.\" @RAW64
.
.\" @GETBYTE
.\" @PUTBYTE
.
.\" @LOAD8
.\" @STORE8
.
.\" @LOAD16_L
.\" @LOAD16_B
.\" @LOAD16
.\" @STORE16_L
.\" @STORE16_B
.\" @STORE16
.
.\" @LOAD24_L
.\" @LOAD24_B
.\" @LOAD24
.\" @STORE24_L
.\" @STORE24_B
.\" @STORE24
.
.\" @LOAD32_L
.\" @LOAD32_B
.\" @LOAD32
.\" @STORE32_L
.\" @STORE32_B
.\" @STORE32
.
.\" @LOAD64_L
.\" @LOAD64_B
.\" @LOAD64
.\" @STORE64_L
.\" @STORE64_B
.\" @STORE64
.
.\" @LOAD64_L_
.\" @LOAD64_B_
.\" @LOAD64_
.\" @STORE64_L_
.\" @STORE64_B_
.\" @STORE64_
.
.\" @SET64
.\" @X64
.\" @ASSIGN64
.\" @HI64
.\" @LO64
.\" @GET64
.\" @SETBYTE64
.\" @AND64
.\" @OR64
.\" @XOR64
.\" @CPL64
.\" @ADD64
.\" @SUB64
.\" @CMP64
.\" @ZERO64
.
.\"--------------------------------------------------------------------------
.SH NAME
bits \- portable bit manipulation macros
.
.\"--------------------------------------------------------------------------
.SH SYNOPSIS
.
.nf
.B "#include <mLib/bits.h>"
.PP
.BR "typedef " ... " octet;"
.BR "typedef " ... " uint16;"
.BR "typedef " ... " uint24;"
.BR "typedef " ... " uint32;"
.BR "typedef " ... " uint64;"
.BR "typedef " ... " kludge64;"
.PP
.fi
In the following,
.I w
is one of
.BR 8 ,
.BR 16 ,
.BR 24 ,
and
.BR 32 ,
and, on platforms with a 64-bit type,
.BR 64 ;
and
.I we
is one of
.BR 8 ,
.BR 16 ,
.BR 16_L ,
.BR 16_B ,
.BR 24 ,
.BR 24_L ,
.BR 24_B ,
.BR 32 ,
.BR 32_L ,
and
.BR 32_B ,
and, on platforms with a 64-bit type,
.BR 64 ,
.BR 64_L ,
and
.BR 64_B .
.nf
.PP
.BI "#define TY_" we " " type
.BI "#define SZ_" we " \fR..."
.BI "#define MASK_" we " \fR..."
.PP
.BI "#define DOUINTSZ(" f ") \fR..."
.BI "#define DOUINTCONV(" f ") \fR..."
.PP
.IB type " U" w ( v );
.PP
.IB type " LSL" w ( type " " v ", int " s );
.IB type " LSR" w ( type " " v ", int " s );
.IB type " ROL" w ( type " " v ", int " s );
.IB type " ROR" w ( type " " v ", int " s );
.PP
.BI "octet GETBYTE(void *" p ", size_t " o );
.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
.PP
.IB type " LOAD" we "(void *" p );
.BI "void STORE" we "(void *" p ", " type " " v );
.PP
.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
.BI "kludge64 X64(" hexh ", " hexl );
.BI "void ASSIGN64(kludge64 &" d ", " x );
.BI "uint32 HI64(kludge64" x );
.BI "uint32 LO64(kludge64" x );
.IB ty " GET64(" ty ", kludge64 " x );
.BI "void SETBYTE64(kludge64 &" z ", octet " x ", unsigned " j );
.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void CPL64(kludge64 &" d ", kludge64 " x );
.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
.BI "int ZERO64(kludge64 " x );
.fi
.
.\"--------------------------------------------------------------------------
.SH DESCRIPTION
.
The header file
.B <mLib/bits.h>
contains a number of useful definitions for portably dealing with bit-
and byte-level manipulation of larger quantities.  The various macros
and types are named fairly systematically.
.PP
The header provides utilities for working with 64-bit quantities, but a
64-bit integer type is not guaranteed to exist under C89 rules.  This
header takes two approaches.  Firstly, if a 64-bit type is found, the
header defines the macro
.B HAVE_UINT64
and defines the various
.RB ... 64
macros as described below.  Secondly, it unconditionally defines a type
.B kludge64
and a family of macros for working with them.  See below for details.
.
.SS "Type definitions"
A number of types are defined.
.TP
.B octet
Equivalent to
.BR "unsigned char" .
This is intended to be used when a character array is used to represent
the octets of some external data format.  Note that on some
architectures the
.B "unsigned char"
type may occupy more than 8 bits.
.TP
.B uint16
Equivalent to
.BR "unsigned short" .
Intended to be used when a 16-bit value is required.  This type is
always capable of representing any 16-bit unsigned value, but the actual
type may be wider than 16 bits and will require masking.
.TP
.B uint24
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 24-bit value, although the the actual type may
be wider than 24 bits.
.TP
.B uint32
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 32-bit value, although the the actual type may
be wider than 32 bits.
pp.TP
.B uint64
Equivalent to some (architecture-dependent) standard type, if it exists.
Capable of representing any unsigned 64-bit value, although the the
actual type may be wider than 64 bits.
.
.SS "Size/endianness suffixes"
Let
.I w
be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
Furthermore, let
.I we
be one of the size-and-endian suffixes
.IR w ,
or, where
.IR w \~>\~8,
.IB w _L
or
.IB w _B \fR,
where
.RB ` _L '
denotes little-endian (Intel, VAX) representation, and
.RB ` _B '
denotes big-endian (IBM, network) representation; omitting an explicit
suffix gives big-endian order by default, since this is most common in
portable data formats.
.PP
The macro invocation
.BI DOUINTSZ( f )
invokes a given macro
.I f
repeatedly, as
.IB f ( w )
for each size suffix
.I w
listed above.
.PP
The macro invocation
.BI DOUINTCONV( f )
invokes a given macro
.I f
repeatedly, as
.IR f ( w ", " we ", " suff )
where
.I we
ranges over size-and-endian suffixes as described above,
.I w
is just the corresponding bit width, as an integer, and
.I suff
is a suffix
.IR w ,
.IB w l\fR,
or
.IB w b\fR,
suitable for a C function name.
.PP
These macros are intended to be used to define families of related
functions.
.
.SS "Utility macros"
For each size-and-endian suffix
.IR we ,
the following macros are defined.
.TP
.BI TY_ we
A synonym for the appropriate one of the types
.BR octet ,
.BR uint32 ,
etc.\& listed above.
.TP
.BI SZ_ we
The number of octets needed to represent a value of the corresponding
type; i.e., this is
.IR w /8.
.TP
.BI MASK_ we
The largest integer representable in the corresponding type; i.e., this
is
.RI 2\*(ss w \*(se\~\-\~1.
.PP
(Note that the endianness suffix is irrelevant in the above
definitions.)
.PP
For each size suffix
.IR w ,
the macro invocation
.BI U w ( x )
coerces an integer
.I x
to the appropriate type; specifically, it returns the smallest
nonnegative integer congruent to
.I x
(modulo
.RI 2\*(ss w \*(se).
.
.SS "Shift and rotate"
For each size suffix
.IR w ,
the macro invocations
.BI LSL w ( x ", " n )
and
.BI LSR w ( x ", " n )
shift a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places; if
.IR n \~\(>=\~ w
then
.I n
is reduced modulo
.IR w .
(This behaviour is unfortunate, but (a) it's what a number of CPUs
provide natively, and (b) it's a cheap way to prevent undefined
behaviour.)  Similarly,
.BI ROL w ( x ", " n )
and
.BI ROR w ( x ", " n )
rotate a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places.
.
.SS "Byte order conversions"
For each size suffix
.IR w ,
the macro invocation
.BI ENDSWAP w ( x )
returns the
.IR w -bit
value
.IR x
with its bytes reversed.  The
.B ENDSWAP8
macro does nothing (except truncate its operand to 8 bits), but is
provided for the sake of completeness.
.PP
A
.I big-endian
representation stores the most significant octet of an integer at the
lowest address, with the following octets in decreasing order of
significance.  A
.I little-endian
representation instead stores the
.I least
significant octet at the lowest address, with the following octets in
increasing order of significance.  An environment has a preferred order
for arranging the constituent octets of an integer of some given size in
memory; this might be either the big- or little-endian representation
just described, or something else strange.
.PP
It might be possible to rearrange the bits in an integer so that, when
that integer is stored to memory in the environment's preferred manner,
you end up with the big- or little-endian representation of the original
integer; and, similarly, it might be possible to load a big- or
little-endian representation of an integer into a variable using the
environment's preferred ordering and then rearrange the bits so as to
recover the integer value originally represented.  If the environment is
sufficiently strange, these things might not be possible, but this is
actually quite rare.
.PP
Say that an integer has been converted to
.I big-
or
.I "little-endian form"
if, when it is stored in memory in the environment's preferred manner,
one ends up with a big- or little-endian representation of the original
integer.  Equivalently, if one starts with a big- or little-endian
representation of some integer, and loads it into a variable using the
environment's preferred manner, one ends up with the big- or
little-endian form of the original integer.
.PP
If these things are possible, then the following macros are defined.
.TP
.BI HTOL w ( x )
Convert a
.IR w -bit
integer
.I x
to little-endian form.
.TP
.BI HTOB w ( x )
Convert a
.IR w -bit
integer
.I x
to big-endian form.
.TP
.BI LTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from little-endian form.
.TP
.BI BTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from big-endian form.
.
.SS "Load and store"
The macro invocation
.BI GETBYTE( p ", " o )
returns the
.IR o th
octet following the address
.IR p .
Conversely,
.BI PUTBYTE( p ", " o ", " v)
stores
.I
v in the
.IR o th
byte following the address
.IR p .
These macros always operate on byte offsets regardless of the type of
the pointer
.IR p .
.PP
For each size suffix
.IR w ,
there may be a macro such that the invocation
.BI RAW w ( p )
is an lvalue designating the
.IR w /8
octets starting at address
.IR p ,
interpreted according to the environment's preferred representation,
except that
.I p
need not be aligned in any particular fashion.  There are many reasons
why this might not be possible; programmers are not normally expected to
use these macros directly, and they are documented in case they are
useful for special effects.
.PP
For each size-and-endian suffix
.IR we ,
the macro invocation
.BI LOAD we ( p )
loads and returns a value in the corresponding format at address
.IR p ;
similarly,
.BI STORE we ( p ", " x )
stores the value
.I x
at address
.I p
in the corresponding format.
.
.SS "64-bit support"
For portability to environments without native 64-bit integers, the
structure
.B kludge64
is defined.  If the target platform is known to have an unsigned 64-bit
integer type, then this structure merely encapsulates a native integer,
and a decent optimizing compiler can be expected to handle this exactly
as if it were the native type.  Otherwise, it contains two 32-bit halves
which are processed the hard way.
.PP
For each of the above macros with a suffix
.BR 64 ,
.BR 64_L ,
or
.BR 64_B ,
an additional `kludge' macro is defined, whose name has an additional
final underscore; e.g., the kludge macro corresponding to
.B ROR64
is
.BR ROR64_ ;
and that corresponding to
.B LOAD64_L
is
.BR LOAD64_L_ .
If the original macro would have
.I returned
a value of type
.BR uint64 ,
then the kludge macro has an additional first argument, denoted
.IR d ,
which should be an lvalue of type
.BR kludge64 ,
and the kludge macro will store its result in
.IR d .
The kludge macro's remaining arguments are the same as the original
macro, except that where the original macro accepts an argument of type
.BR uint64 ,
the kludge macro accepts an argument of type
.B kludge64
instead.
.PP
Finally, a number of additional macros are provided, to make working
with
.B kludge64
somewhat less awful.
.TP
.BI SET64( d ", " h ", " l )
Set the high 32 bits of
.I d
to be
.IR h ,
and the low 32 bits to be
.IR l .
Both
.I h
and
.I l
may be arbitrary integers.
.TP
.BI X64( hexh ", " hexl )
Expands to an initializer for an object of type
.B kludge64
where
.I hexh
and
.I hexl
encode the high and low 32-bit halves in hexadecimal, without any
.B 0x
prefix.
.TP
.BI ASSIGN( d ", " x )
Make
.I d
be a copy of the
.B kludge64
.IR x .
.TP
.BI HI64( x )
Return the high 32 bits of
.IR x .
.TP
.BI LO64( x )
Return the low 32 bits of
.IR x .
.TP
.BI GET64( t ", " x )
Return the value of
.I x
as a value of type
.IR t .
If
.I t
is an unsigned integer type, then the value will be truncated to fit as
necessary; if
.I t
is a signed integer type, then the behaviour is undefined if the value
of
.I x
is too large.
.TP
.BI SETBYTE( z ", " x ", " j )
Store the value
.I x
in byte
.I j
of
.IR z ,
which is assumed to be initially zero.
Bytes are numbered with the least significant being byte zero and the
most significant being byte 7.
.TP
.BI AND64( d ", " x ", " y )
Set
.I d
to be the bitwise-and of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI OR64( d ", " x ", " y )
Set
.I d
to be the bitwise-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI XOR64( d ", " x ", " y )
Set
.I d
to be the bitwise-exclusive-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CPL64( d ", " x )
Set
.I d
to be the bitwise complement of the
.B kludge64
argument
.IR x .
.TP
.BI ADD64( d ", " x ", " y )
Set
.I d
to be the sum of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI SUB64( d ", " x ", " y )
Set
.I d
to be the difference of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CMP64( x ", " op ", " y )
Here,
.I x
and
.I y
should be arguments of type
.B kludge64
and
.I op
should be one of the relational operators
.BR == ,
.BR < ,
.BR <= ,
.BR > ,
or
.B >=
\(en
.I not
.BR !=.
Evaluates nonzero if
.IR x \~ op \~ y .
.TP
.BI ZERO64( x )
Evaluates nonzero if the
.B kludge64
argument
.I x
is exactly zero.
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
.
.BR mLib (3).
.
.\"--------------------------------------------------------------------------
.SH AUTHOR
.
Mark Wooding, <mdw@distorted.org.uk>
.
.\"----- That's all, folks --------------------------------------------------
Commit	Line	Data
b6b9d458	1	.\" --nroff--
c4ccbbf9 MW	2	.\"
	3	.\" Manual for bit manipulation
	4	.\"
	5	.\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
	6	.\"
	7	.
	8	.\"----- Licensing notice ---------------------------------------------------
	9	.\"
	10	.\" This file is part of the mLib utilities library.
	11	.\"
	12	.\" mLib is free software: you can redistribute it and/or modify it under
	13	.\" the terms of the GNU Library General Public License as published by
	14	.\" the Free Software Foundation; either version 2 of the License, or (at
	15	.\" your option) any later version.
	16	.\"
	17	.\" mLib is distributed in the hope that it will be useful, but WITHOUT
	18	.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
	20	.\" License for more details.
	21	.\"
	22	.\" You should have received a copy of the GNU Library General Public
	23	.\" License along with mLib. If not, write to the Free Software
	24	.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	25	.\" USA.
	26	.
	27	.\"--------------------------------------------------------------------------
	28	.so ../defs.man \" @@@PRE@@@
	29	.
	30	.\"--------------------------------------------------------------------------
	31	.TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
b1a20bee MW	32	.\" @octet
	33	.\" @uint16
	34	.\" @uint24
	35	.\" @uint32
	36	.\" @uint64
	37	.\" @kludge64
	38	.
	39	.\" @MASK_8
	40	.\" @MASK_16
	41	.\" @MASK_16_L
	42	.\" @MASK_16_B
	43	.\" @MASK_24
	44	.\" @MASK_24_L
	45	.\" @MASK_24_B
	46	.\" @MASK_32
	47	.\" @MASK_32_L
	48	.\" @MASK_32_B
	49	.\" @MASK_64
	50	.\" @MASK_64_L
	51	.\" @MASK_64_B
	52	.
	53	.\" @SZ_8
	54	.\" @SZ_16
	55	.\" @SZ_16_L
	56	.\" @SZ_16_B
	57	.\" @SZ_24
	58	.\" @SZ_24_L
	59	.\" @SZ_24_B
	60	.\" @SZ_32
	61	.\" @SZ_32_L
	62	.\" @SZ_32_B
	63	.\" @SZ_64
	64	.\" @SZ_64_L
	65	.\" @SZ_64_B
	66	.
	67	.\" @TY_8
	68	.\" @TY_16
	69	.\" @TY_16_L
	70	.\" @TY_16_B
	71	.\" @TY_24
	72	.\" @TY_24_L
	73	.\" @TY_24_B
	74	.\" @TY_32
	75	.\" @TY_32_L
	76	.\" @TY_32_B
	77	.\" @TY_64
	78	.\" @TY_64_L
	79	.\" @TY_64_B
	80	.
	81	.\" @DOUINTSZ
	82	.\" @DOUINTCONV
	83	.
08da152e	84	.\" @U8
08da152e	85	.\" @U16
a9779382	86	.\" @U24
08da152e	87	.\" @U32
a9779382 MW	88	.\" @U64
a9779382 MW	89	.\" @U64_
b1a20bee	90	.
08da152e	91	.\" @LSL8
	92	.\" @LSR8
	93	.\" @LSL16
	94	.\" @LSR16
a9779382 MW	95	.\" @LSL24
a9779382 MW	96	.\" @LSR24
08da152e	97	.\" @LSL32
08da152e	98	.\" @LSR32
a9779382 MW	99	.\" @LSL64
	100	.\" @LSR64
	101	.\" @LSL64_
	102	.\" @LSR64_
b1a20bee	103	.
08da152e	104	.\" @ROL8
	105	.\" @ROR8
	106	.\" @ROL16
	107	.\" @ROR16
a9779382 MW	108	.\" @ROL24
	109	.\" @ROR24
	110	.\" @ROL32
08da152e	111	.\" @ROL32
a9779382 MW	112	.\" @ROL64
	113	.\" @ROR64
	114	.\" @ROL64_
	115	.\" @ROR64_
b1a20bee MW	116	.
	117	.\" @ENDSWAP16
	118	.\" @ENDSWAP32
	119	.\" @ENDSWAP64
	120	.
	121	.\" @BTOH16
	122	.\" @LTOH16
	123	.\" @HTOB16
	124	.\" @HTOL16
	125	.\" @BTOH32
	126	.\" @LTOH32
	127	.\" @HTOB32
	128	.\" @HTOL32
	129	.\" @BTOH64
	130	.\" @LTOH64
	131	.\" @HTOB64
	132	.\" @HTOL64
	133	.
	134	.\" @RAW8
	135	.\" @RAW16
	136	.\" @RAW32
	137	.\" @RAW64
	138	.
08da152e	139	.\" @GETBYTE
08da152e	140	.\" @PUTBYTE
b1a20bee	141	.
08da152e	142	.\" @LOAD8
08da152e	143	.\" @STORE8
b1a20bee	144	.
08da152e	145	.\" @LOAD16_L
	146	.\" @LOAD16_B
	147	.\" @LOAD16
	148	.\" @STORE16_L
	149	.\" @STORE16_B
	150	.\" @STORE16
b1a20bee	151	.
a9779382 MW	152	.\" @LOAD24_L
	153	.\" @LOAD24_B
	154	.\" @LOAD24
	155	.\" @STORE24_L
	156	.\" @STORE24_B
	157	.\" @STORE24
b1a20bee	158	.
08da152e	159	.\" @LOAD32_L
	160	.\" @LOAD32_B
	161	.\" @LOAD32
	162	.\" @STORE32_L
	163	.\" @STORE32_B
	164	.\" @STORE32
b1a20bee	165	.
a9779382 MW	166	.\" @LOAD64_L
	167	.\" @LOAD64_B
	168	.\" @LOAD64
	169	.\" @STORE64_L
	170	.\" @STORE64_B
	171	.\" @STORE64
b1a20bee	172	.
a9779382 MW	173	.\" @LOAD64_L_
	174	.\" @LOAD64_B_
	175	.\" @LOAD64_
	176	.\" @STORE64_L_
	177	.\" @STORE64_B_
	178	.\" @STORE64_
b1a20bee	179	.
a9779382 MW	180	.\" @SET64
	181	.\" @X64
	182	.\" @ASSIGN64
	183	.\" @HI64
	184	.\" @LO64
	185	.\" @GET64
b1a20bee	186	.\" @SETBYTE64
a9779382 MW	187	.\" @AND64
	188	.\" @OR64
	189	.\" @XOR64
	190	.\" @CPL64
	191	.\" @ADD64
	192	.\" @SUB64
	193	.\" @CMP64
	194	.\" @ZERO64
c4ccbbf9 MW	195	.
	196	.\"--------------------------------------------------------------------------
	197	.SH NAME
	198	bits \- portable bit manipulation macros
	199	.
	200	.\"--------------------------------------------------------------------------
b6b9d458	201	.SH SYNOPSIS
c4ccbbf9	202	.
b6b9d458	203	.nf
b6b9d458	204	.B "#include <mLib/bits.h>"
d056fbdf	205	.PP
a9779382 MW	206	.BR "typedef " ... " octet;"
	207	.BR "typedef " ... " uint16;"
	208	.BR "typedef " ... " uint24;"
	209	.BR "typedef " ... " uint32;"
	210	.BR "typedef " ... " uint64;"
	211	.BR "typedef " ... " kludge64;"
d056fbdf	212	.PP
c4ccbbf9 MW	213	.fi
	214	In the following,
	215	.I w
	216	is one of
	217	.BR 8 ,
	218	.BR 16 ,
	219	.BR 24 ,
	220	and
	221	.BR 32 ,
	222	and, on platforms with a 64-bit type,
	223	.BR 64 ;
	224	and
	225	.I we
	226	is one of
	227	.BR 8 ,
	228	.BR 16 ,
	229	.BR 16_L ,
	230	.BR 16_B ,
	231	.BR 24 ,
	232	.BR 24_L ,
	233	.BR 24_B ,
	234	.BR 32 ,
	235	.BR 32_L ,
	236	and
	237	.BR 32_B ,
	238	and, on platforms with a 64-bit type,
	239	.BR 64 ,
	240	.BR 64_L ,
	241	and
	242	.BR 64_B .
	243	.nf
	244	.PP
a9779382 MW	245	.BI "#define TY_" we " " type
	246	.BI "#define SZ_" we " \fR..."
	247	.BI "#define MASK_" we " \fR..."
d056fbdf	248	.PP
a9779382 MW	249	.BI "#define DOUINTSZ(" f ") \fR..."
a9779382 MW	250	.BI "#define DOUINTCONV(" f ") \fR..."
d056fbdf	251	.PP
a9779382	252	.IB type " U" w ( v );
d056fbdf	253	.PP
a9779382 MW	254	.IB type " LSL" w ( type " " v ", int " s );
	255	.IB type " LSR" w ( type " " v ", int " s );
	256	.IB type " ROL" w ( type " " v ", int " s );
	257	.IB type " ROR" w ( type " " v ", int " s );
d056fbdf	258	.PP
a9779382 MW	259	.BI "octet GETBYTE(void *" p ", size_t " o );
a9779382 MW	260	.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
d056fbdf	261	.PP
a9779382 MW	262	.IB type " LOAD" we "(void *" p );
a9779382 MW	263	.BI "void STORE" we "(void *" p ", " type " " v );
d056fbdf	264	.PP
a9779382 MW	265	.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
	266	.BI "kludge64 X64(" hexh ", " hexl );
	267	.BI "void ASSIGN64(kludge64 &" d ", " x );
	268	.BI "uint32 HI64(kludge64" x );
	269	.BI "uint32 LO64(kludge64" x );
	270	.IB ty " GET64(" ty ", kludge64 " x );
b1a20bee	271	.BI "void SETBYTE64(kludge64 &" z ", octet " x ", unsigned " j );
a9779382 MW	272	.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	273	.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	274	.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	275	.BI "void CPL64(kludge64 &" d ", kludge64 " x );
	276	.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	277	.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	278	.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
	279	.BI "int ZERO64(kludge64 " x );
b6b9d458	280	.fi
c4ccbbf9 MW	281	.
c4ccbbf9 MW	282	.\"--------------------------------------------------------------------------
b6b9d458	283	.SH DESCRIPTION
c4ccbbf9	284	.
b6b9d458	285	The header file
	286	.B <mLib/bits.h>
	287	contains a number of useful definitions for portably dealing with bit-
a9779382 MW	288	and byte-level manipulation of larger quantities. The various macros
	289	and types are named fairly systematically.
	290	.PP
	291	The header provides utilities for working with 64-bit quantities, but a
	292	64-bit integer type is not guaranteed to exist under C89 rules. This
	293	header takes two approaches. Firstly, if a 64-bit type is found, the
	294	header defines the macro
	295	.B HAVE_UINT64
	296	and defines the various
	297	.RB ... 64
	298	macros as described below. Secondly, it unconditionally defines a type
	299	.B kludge64
	300	and a family of macros for working with them. See below for details.
	301	.
	302	.SS "Type definitions"
	303	A number of types are defined.
b6b9d458	304	.TP
	305	.B octet
	306	Equivalent to
	307	.BR "unsigned char" .
	308	This is intended to be used when a character array is used to represent
	309	the octets of some external data format. Note that on some
	310	architectures the
	311	.B "unsigned char"
	312	type may occupy more than 8 bits.
	313	.TP
	314	.B uint16
	315	Equivalent to
	316	.BR "unsigned short" .
	317	Intended to be used when a 16-bit value is required. This type is
	318	always capable of representing any 16-bit unsigned value, but the actual
	319	type may be wider than 16 bits and will require masking.
	320	.TP
a9779382 MW	321	.B uint24
	322	Equivalent to some (architecture-dependent) standard type. Capable of
	323	representing any unsigned 24-bit value, although the the actual type may
	324	be wider than 24 bits.
	325	.TP
b6b9d458	326	.B uint32
	327	Equivalent to some (architecture-dependent) standard type. Capable of
	328	representing any unsigned 32-bit value, although the the actual type may
	329	be wider than 32 bits.
a9779382 MW	330	pp.TP
	331	.B uint64
	332	Equivalent to some (architecture-dependent) standard type, if it exists.
	333	Capable of representing any unsigned 64-bit value, although the the
	334	actual type may be wider than 64 bits.
	335	.
	336	.SS "Size/endianness suffixes"
	337	Let
	338	.I w
	339	be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
	340	Furthermore, let
	341	.I we
	342	be one of the size-and-endian suffixes
	343	.IR w ,
	344	or, where
	345	.IR w \~>\~8,
	346	.IB w _L
	347	or
	348	.IB w _B \fR,
	349	where
	350	.RB ` _L '
	351	denotes little-endian (Intel, VAX) representation, and
	352	.RB ` _B '
	353	denotes big-endian (IBM, network) representation; omitting an explicit
	354	suffix gives big-endian order by default, since this is most common in
	355	portable data formats.
b6b9d458	356	.PP
a9779382 MW	357	The macro invocation
	358	.BI DOUINTSZ( f )
	359	invokes a given macro
	360	.I f
	361	repeatedly, as
	362	.IB f ( w )
	363	for each size suffix
	364	.I w
	365	listed above.
b6b9d458	366	.PP
a9779382 MW	367	The macro invocation
	368	.BI DOUINTCONV( f )
	369	invokes a given macro
	370	.I f
	371	repeatedly, as
	372	.IR f ( w ", " we ", " suff )
b6b9d458	373	where
a9779382 MW	374	.I we
	375	ranges over size-and-endian suffixes as described above,
	376	.I w
	377	is just the corresponding bit width, as an integer, and
	378	.I suff
	379	is a suffix
	380	.IR w ,
	381	.IB w l\fR,
b6b9d458	382	or
a9779382 MW	383	.IB w b\fR,
a9779382 MW	384	suitable for a C function name.
b6b9d458	385	.PP
a9779382 MW	386	These macros are intended to be used to define families of related
	387	functions.
	388	.
	389	.SS "Utility macros"
	390	For each size-and-endian suffix
	391	.IR we ,
	392	the following macros are defined.
	393	.TP
	394	.BI TY_ we
	395	A synonym for the appropriate one of the types
	396	.BR octet ,
	397	.BR uint32 ,
	398	etc.\& listed above.
	399	.TP
	400	.BI SZ_ we
	401	The number of octets needed to represent a value of the corresponding
	402	type; i.e., this is
	403	.IR w /8.
	404	.TP
	405	.BI MASK_ we
	406	The largest integer representable in the corresponding type; i.e., this
	407	is
	408	.RI 2\(ss w \(se\~\-\~1.
	409	.PP
	410	(Note that the endianness suffix is irrelevant in the above
	411	definitions.)
	412	.PP
	413	For each size suffix
	414	.IR w ,
	415	the macro invocation
	416	.BI U w ( x )
	417	coerces an integer
	418	.I x
	419	to the appropriate type; specifically, it returns the smallest
	420	nonnegative integer congruent to
	421	.I x
	422	(modulo
	423	.RI 2\(ss w \(se).
	424	.
	425	.SS "Shift and rotate"
	426	For each size suffix
	427	.IR w ,
	428	the macro invocations
	429	.BI LSL w ( x ", " n )
b6b9d458	430	and
a9779382 MW	431	.BI LSR w ( x ", " n )
	432	shift a
	433	.IR w -bit
	434	quantity
	435	.I x
	436	left or right, respectively, by
b6b9d458	437	.I n
a9779382 MW	438	places; if
	439	.IR n \~\(>=\~ w
	440	then
	441	.I n
	442	is reduced modulo
	443	.IR w .
	444	(This behaviour is unfortunate, but (a) it's what a number of CPUs
	445	provide natively, and (b) it's a cheap way to prevent undefined
	446	behaviour.) Similarly,
	447	.BI ROL w ( x ", " n )
b6b9d458	448	and
a9779382 MW	449	.BI ROR w ( x ", " n )
	450	rotate a
	451	.IR w -bit
	452	quantity
	453	.I x
	454	left or right, respectively, by
b6b9d458	455	.I n
a9779382 MW	456	places.
a9779382 MW	457	.
374bb459 MW	458	.SS "Byte order conversions"
	459	For each size suffix
	460	.IR w ,
	461	the macro invocation
	462	.BI ENDSWAP w ( x )
	463	returns the
	464	.IR w -bit
	465	value
	466	.IR x
	467	with its bytes reversed. The
	468	.B ENDSWAP8
	469	macro does nothing (except truncate its operand to 8 bits), but is
	470	provided for the sake of completeness.
	471	.PP
	472	A
	473	.I big-endian
	474	representation stores the most significant octet of an integer at the
	475	lowest address, with the following octets in decreasing order of
	476	significance. A
	477	.I little-endian
	478	representation instead stores the
	479	.I least
	480	significant octet at the lowest address, with the following octets in
	481	increasing order of significance. An environment has a preferred order
	482	for arranging the constituent octets of an integer of some given size in
	483	memory; this might be either the big- or little-endian representation
	484	just described, or something else strange.
	485	.PP
	486	It might be possible to rearrange the bits in an integer so that, when
	487	that integer is stored to memory in the environment's preferred manner,
	488	you end up with the big- or little-endian representation of the original
	489	integer; and, similarly, it might be possible to load a big- or
	490	little-endian representation of an integer into a variable using the
	491	environment's preferred ordering and then rearrange the bits so as to
	492	recover the integer value originally represented. If the environment is
	493	sufficiently strange, these things might not be possible, but this is
	494	actually quite rare.
	495	.PP
	496	Say that an integer has been converted to
	497	.I big-
	498	or
	499	.I "little-endian form"
	500	if, when it is stored in memory in the environment's preferred manner,
	501	one ends up with a big- or little-endian representation of the original
	502	integer. Equivalently, if one starts with a big- or little-endian
	503	representation of some integer, and loads it into a variable using the
	504	environment's preferred manner, one ends up with the big- or
	505	little-endian form of the original integer.
	506	.PP
	507	If these things are possible, then the following macros are defined.
	508	.TP
	509	.BI HTOL w ( x )
	510	Convert a
	511	.IR w -bit
	512	integer
	513	.I x
	514	to little-endian form.
	515	.TP
	516	.BI HTOB w ( x )
	517	Convert a
	518	.IR w -bit
	519	integer
	520	.I x
	521	to big-endian form.
522	.TP
523	.BI LTOH w ( x )
524	Convert a
525	.IR w -bit
526	integer
527	.I x
528	from little-endian form.
529	.TP
530	.BI BTOH w ( x )
531	Convert a
532	.IR w -bit
533	integer
534	.I x
535	from big-endian form.
536	.
a9779382 MW	537	.SS "Load and store"
	538	The macro invocation
	539	.BI GETBYTE( p ", " o )
	540	returns the
	541	.IR o th
	542	octet following the address
	543	.IR p .
	544	Conversely,
	545	.BI PUTBYTE( p ", " o ", " v)
	546	stores
	547	.I
	548	v in the
	549	.IR o th
	550	byte following the address
	551	.IR p .
	552	These macros always operate on byte offsets regardless of the type of
	553	the pointer
	554	.IR p .
	555	.PP
a23bab96 MW	556	For each size suffix
	557	.IR w ,
	558	there may be a macro such that the invocation
	559	.BI RAW w ( p )
	560	is an lvalue designating the
	561	.IR w /8
	562	octets starting at address
	563	.IR p ,
	564	interpreted according to the environment's preferred representation,
	565	except that
	566	.I p
	567	need not be aligned in any particular fashion. There are many reasons
	568	why this might not be possible; programmers are not normally expected to
	569	use these macros directly, and they are documented in case they are
	570	useful for special effects.
	571	.PP
a9779382 MW	572	For each size-and-endian suffix
	573	.IR we ,
	574	the macro invocation
	575	.BI LOAD we ( p )
	576	loads and returns a value in the corresponding format at address
	577	.IR p ;
	578	similarly,
	579	.BI STORE we ( p ", " x )
	580	stores the value
	581	.I x
	582	at address
	583	.I p
	584	in the corresponding format.
	585	.
	586	.SS "64-bit support"
	587	For portability to environments without native 64-bit integers, the
	588	structure
	589	.B kludge64
	590	is defined. If the target platform is known to have an unsigned 64-bit
	591	integer type, then this structure merely encapsulates a native integer,
	592	and a decent optimizing compiler can be expected to handle this exactly
	593	as if it were the native type. Otherwise, it contains two 32-bit halves
	594	which are processed the hard way.
	595	.PP
	596	For each of the above macros with a suffix
	597	.BR 64 ,
	598	.BR 64_L ,
b6b9d458	599	or
a9779382 MW	600	.BR 64_B ,
	601	an additional `kludge' macro is defined, whose name has an additional
	602	final underscore; e.g., the kludge macro corresponding to
	603	.B ROR64
	604	is
	605	.BR ROR64_ ;
	606	and that corresponding to
	607	.B LOAD64_L
	608	is
	609	.BR LOAD64_L_ .
	610	If the original macro would have
	611	.I returned
	612	a value of type
	613	.BR uint64 ,
	614	then the kludge macro has an additional first argument, denoted
	615	.IR d ,
	616	which should be an lvalue of type
	617	.BR kludge64 ,
	618	and the kludge macro will store its result in
	619	.IR d .
	620	The kludge macro's remaining arguments are the same as the original
	621	macro, except that where the original macro accepts an argument of type
	622	.BR uint64 ,
	623	the kludge macro accepts an argument of type
	624	.B kludge64
	625	instead.
	626	.PP
	627	Finally, a number of additional macros are provided, to make working
	628	with
	629	.B kludge64
	630	somewhat less awful.
	631	.TP
	632	.BI SET64( d ", " h ", " l )
	633	Set the high 32 bits of
	634	.I d
	635	to be
	636	.IR h ,
	637	and the low 32 bits to be
	638	.IR l .
	639	Both
	640	.I h
	641	and
	642	.I l
	643	may be arbitrary integers.
	644	.TP
	645	.BI X64( hexh ", " hexl )
	646	Expands to an initializer for an object of type
	647	.B kludge64
	648	where
	649	.I hexh
	650	and
	651	.I hexl
	652	encode the high and low 32-bit halves in hexadecimal, without any
	653	.B 0x
	654	prefix.
	655	.TP
	656	.BI ASSIGN( d ", " x )
	657	Make
	658	.I d
	659	be a copy of the
	660	.B kludge64
	661	.IR x .
	662	.TP
	663	.BI HI64( x )
664	Return the high 32 bits of
665	.IR x .
666	.TP
667	.BI LO64( x )
668	Return the low 32 bits of
669	.IR x .
670	.TP
671	.BI GET64( t ", " x )
672	Return the value of
673	.I x
674	as a value of type
675	.IR t .
676	If
677	.I t
678	is an unsigned integer type, then the value will be truncated to fit as
679	necessary; if
680	.I t
681	is a signed integer type, then the behaviour is undefined if the value
682	of
683	.I x
684	is too large.
685	.TP
b1a20bee MW	686	.BI SETBYTE( z ", " x ", " j )
	687	Store the value
	688	.I x
	689	in byte
	690	.I j
	691	of
	692	.IR z ,
	693	which is assumed to be initially zero.
	694	Bytes are numbered with the least significant being byte zero and the
	695	most significant being byte 7.
	696	.TP
a9779382 MW	697	.BI AND64( d ", " x ", " y )
	698	Set
	699	.I d
	700	to be the bitwise-and of the two
	701	.B kludge64
	702	arguments
	703	.I x
b6b9d458	704	and
a9779382 MW	705	.IR y .
	706	.TP
	707	.BI OR64( d ", " x ", " y )
	708	Set
	709	.I d
	710	to be the bitwise-or of the two
	711	.B kludge64
	712	arguments
	713	.I x
	714	and
	715	.IR y .
	716	.TP
	717	.BI XOR64( d ", " x ", " y )
	718	Set
	719	.I d
	720	to be the bitwise-exclusive-or of the two
	721	.B kludge64
	722	arguments
	723	.I x
	724	and
	725	.IR y .
	726	.TP
	727	.BI CPL64( d ", " x )
	728	Set
	729	.I d
	730	to be the bitwise complement of the
	731	.B kludge64
	732	argument
	733	.IR x .
	734	.TP
	735	.BI ADD64( d ", " x ", " y )
	736	Set
	737	.I d
	738	to be the sum of the two
	739	.B kludge64
	740	arguments
	741	.I x
	742	and
	743	.IR y .
	744	.TP
	745	.BI SUB64( d ", " x ", " y )
	746	Set
	747	.I d
	748	to be the difference of the two
	749	.B kludge64
	750	arguments
	751	.I x
	752	and
	753	.IR y .
	754	.TP
	755	.BI CMP64( x ", " op ", " y )
	756	Here,
	757	.I x
	758	and
	759	.I y
	760	should be arguments of type
	761	.B kludge64
	762	and
	763	.I op
	764	should be one of the relational operators
	765	.BR == ,
	766	.BR < ,
	767	.BR <= ,
	768	.BR > ,
769	or
770	.B >=
771	\(en
772	.I not
773	.BR !=.
774	Evaluates nonzero if
775	.IR x \~ op \~ y .
776	.TP
777	.BI ZERO64( x )
778	Evaluates nonzero if the
779	.B kludge64
780	argument
781	.I x
782	is exactly zero.
c4ccbbf9 MW	783	.
c4ccbbf9 MW	784	.\"--------------------------------------------------------------------------
08da152e	785	.SH "SEE ALSO"
c4ccbbf9	786	.
08da152e	787	.BR mLib (3).
c4ccbbf9 MW	788	.
c4ccbbf9 MW	789	.\"--------------------------------------------------------------------------
b6b9d458	790	.SH AUTHOR
c4ccbbf9	791	.
9b5ac6ff	792	Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9 MW	793	.
c4ccbbf9 MW	794	.\"----- That's all, folks --------------------------------------------------