[mLib] / utils / bits.3.in

.\" -*-nroff-*-
.\"
.\" Manual for bit manipulation
.\"
.\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
.\"
.
.\"----- Licensing notice ---------------------------------------------------
.\"
.\" This file is part of the mLib utilities library.
.\"
.\" mLib is free software: you can redistribute it and/or modify it under
.\" the terms of the GNU Library General Public License as published by
.\" the Free Software Foundation; either version 2 of the License, or (at
.\" your option) any later version.
.\"
.\" mLib is distributed in the hope that it will be useful, but WITHOUT
.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
.\" FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
.\" License for more details.
.\"
.\" You should have received a copy of the GNU Library General Public
.\" License along with mLib.  If not, write to the Free Software
.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
.\" USA.
.
.\"--------------------------------------------------------------------------
.so ../defs.man \" @@@PRE@@@
.
.\"--------------------------------------------------------------------------
.TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
.\" octet
.\" uint16
.\" uint24
.\" uint32
.\" uint64
.\" kludge64
.\"
.\" MASK_8
.\" MASK_16
.\" MASK_16_L
.\" MASK_16_B
.\" MASK_24
.\" MASK_24_L
.\" MASK_24_B
.\" MASK_32
.\" MASK_32_L
.\" MASK_32_B
.\" MASK_64
.\" MASK_64_L
.\" MASK_64_B
.\"
.\" SZ_8
.\" SZ_16
.\" SZ_16_L
.\" SZ_16_B
.\" SZ_24
.\" SZ_24_L
.\" SZ_24_B
.\" SZ_32
.\" SZ_32_L
.\" SZ_32_B
.\" SZ_64
.\" SZ_64_L
.\" SZ_64_B
.\"
.\" TY_8
.\" TY_16
.\" TY_16_L
.\" TY_16_B
.\" TY_24
.\" TY_24_L
.\" TY_24_B
.\" TY_32
.\" TY_32_L
.\" TY_32_B
.\" TY_64
.\" TY_64_L
.\" TY_64_B
.\"
.\" DOUINTSZ
.\" DOUINTCONV
.\"
.\" @U8
.\" @U16
.\" @U24
.\" @U32
.\" @U64
.\" @U64_
.\"
.\" @LSL8
.\" @LSR8
.\" @LSL16
.\" @LSR16
.\" @LSL24
.\" @LSR24
.\" @LSL32
.\" @LSR32
.\" @LSL64
.\" @LSR64
.\" @LSL64_
.\" @LSR64_
.\"
.\" @ROL8
.\" @ROR8
.\" @ROL16
.\" @ROR16
.\" @ROL24
.\" @ROR24
.\" @ROL32
.\" @ROL32
.\" @ROL64
.\" @ROR64
.\" @ROL64_
.\" @ROR64_
.\"
.\" ENDSWAP16
.\" ENDSWAP32
.\" ENDSWAP64
.\"
.\" BTOH16
.\" LTOH16
.\" HTOB16
.\" HTOL16
.\" BTOH32
.\" LTOH32
.\" HTOB32
.\" HTOL32
.\" BTOH64
.\" LTOH64
.\" HTOB64
.\" HTOL64
.\"
.\" RAW8
.\" RAW16
.\" RAW32
.\" RAW64
.\"
.\" @GETBYTE
.\" @PUTBYTE
.\"
.\" @LOAD8
.\" @STORE8
.\"
.\" @LOAD16_L
.\" @LOAD16_B
.\" @LOAD16
.\" @STORE16_L
.\" @STORE16_B
.\" @STORE16
.\"
.\" @LOAD24_L
.\" @LOAD24_B
.\" @LOAD24
.\" @STORE24_L
.\" @STORE24_B
.\" @STORE24
.\"
.\" @LOAD32_L
.\" @LOAD32_B
.\" @LOAD32
.\" @STORE32_L
.\" @STORE32_B
.\" @STORE32
.\"
.\" @LOAD64_L
.\" @LOAD64_B
.\" @LOAD64
.\" @STORE64_L
.\" @STORE64_B
.\" @STORE64
.\"
.\" @LOAD64_L_
.\" @LOAD64_B_
.\" @LOAD64_
.\" @STORE64_L_
.\" @STORE64_B_
.\" @STORE64_
.\"
.\" @SET64
.\" @X64
.\" @ASSIGN64
.\" @HI64
.\" @LO64
.\" @GET64
.\" @AND64
.\" @OR64
.\" @XOR64
.\" @CPL64
.\" @ADD64
.\" @SUB64
.\" @CMP64
.\" @ZERO64
.
.\"--------------------------------------------------------------------------
.SH NAME
bits \- portable bit manipulation macros
.
.\"--------------------------------------------------------------------------
.SH SYNOPSIS
.
.nf
.B "#include <mLib/bits.h>"
.PP
.BR "typedef " ... " octet;"
.BR "typedef " ... " uint16;"
.BR "typedef " ... " uint24;"
.BR "typedef " ... " uint32;"
.BR "typedef " ... " uint64;"
.BR "typedef " ... " kludge64;"
.PP
.fi
In the following,
.I w
is one of
.BR 8 ,
.BR 16 ,
.BR 24 ,
and
.BR 32 ,
and, on platforms with a 64-bit type,
.BR 64 ;
and
.I we
is one of
.BR 8 ,
.BR 16 ,
.BR 16_L ,
.BR 16_B ,
.BR 24 ,
.BR 24_L ,
.BR 24_B ,
.BR 32 ,
.BR 32_L ,
and
.BR 32_B ,
and, on platforms with a 64-bit type,
.BR 64 ,
.BR 64_L ,
and
.BR 64_B .
.nf
.PP
.BI "#define TY_" we " " type
.BI "#define SZ_" we " \fR..."
.BI "#define MASK_" we " \fR..."
.PP
.BI "#define DOUINTSZ(" f ") \fR..."
.BI "#define DOUINTCONV(" f ") \fR..."
.PP
.IB type " U" w ( v );
.PP
.IB type " LSL" w ( type " " v ", int " s );
.IB type " LSR" w ( type " " v ", int " s );
.IB type " ROL" w ( type " " v ", int " s );
.IB type " ROR" w ( type " " v ", int " s );
.PP
.BI "octet GETBYTE(void *" p ", size_t " o );
.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
.PP
.IB type " LOAD" we "(void *" p );
.BI "void STORE" we "(void *" p ", " type " " v );
.PP
.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
.BI "kludge64 X64(" hexh ", " hexl );
.BI "void ASSIGN64(kludge64 &" d ", " x );
.BI "uint32 HI64(kludge64" x );
.BI "uint32 LO64(kludge64" x );
.IB ty " GET64(" ty ", kludge64 " x );
.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void CPL64(kludge64 &" d ", kludge64 " x );
.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
.BI "int ZERO64(kludge64 " x );
.fi
.
.\"--------------------------------------------------------------------------
.SH DESCRIPTION
.
The header file
.B <mLib/bits.h>
contains a number of useful definitions for portably dealing with bit-
and byte-level manipulation of larger quantities.  The various macros
and types are named fairly systematically.
.PP
The header provides utilities for working with 64-bit quantities, but a
64-bit integer type is not guaranteed to exist under C89 rules.  This
header takes two approaches.  Firstly, if a 64-bit type is found, the
header defines the macro
.B HAVE_UINT64
and defines the various
.RB ... 64
macros as described below.  Secondly, it unconditionally defines a type
.B kludge64
and a family of macros for working with them.  See below for details.
.
.SS "Type definitions"
A number of types are defined.
.TP
.B octet
Equivalent to
.BR "unsigned char" .
This is intended to be used when a character array is used to represent
the octets of some external data format.  Note that on some
architectures the
.B "unsigned char"
type may occupy more than 8 bits.
.TP
.B uint16
Equivalent to
.BR "unsigned short" .
Intended to be used when a 16-bit value is required.  This type is
always capable of representing any 16-bit unsigned value, but the actual
type may be wider than 16 bits and will require masking.
.TP
.B uint24
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 24-bit value, although the the actual type may
be wider than 24 bits.
.TP
.B uint32
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 32-bit value, although the the actual type may
be wider than 32 bits.
pp.TP
.B uint64
Equivalent to some (architecture-dependent) standard type, if it exists.
Capable of representing any unsigned 64-bit value, although the the
actual type may be wider than 64 bits.
.
.SS "Size/endianness suffixes"
Let
.I w
be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
Furthermore, let
.I we
be one of the size-and-endian suffixes
.IR w ,
or, where
.IR w \~>\~8,
.IB w _L
or
.IB w _B \fR,
where
.RB ` _L '
denotes little-endian (Intel, VAX) representation, and
.RB ` _B '
denotes big-endian (IBM, network) representation; omitting an explicit
suffix gives big-endian order by default, since this is most common in
portable data formats.
.PP
The macro invocation
.BI DOUINTSZ( f )
invokes a given macro
.I f
repeatedly, as
.IB f ( w )
for each size suffix
.I w
listed above.
.PP
The macro invocation
.BI DOUINTCONV( f )
invokes a given macro
.I f
repeatedly, as
.IR f ( w ", " we ", " suff )
where
.I we
ranges over size-and-endian suffixes as described above,
.I w
is just the corresponding bit width, as an integer, and
.I suff
is a suffix
.IR w ,
.IB w l\fR,
or
.IB w b\fR,
suitable for a C function name.
.PP
These macros are intended to be used to define families of related
functions.
.
.SS "Utility macros"
For each size-and-endian suffix
.IR we ,
the following macros are defined.
.TP
.BI TY_ we
A synonym for the appropriate one of the types
.BR octet ,
.BR uint32 ,
etc.\& listed above.
.TP
.BI SZ_ we
The number of octets needed to represent a value of the corresponding
type; i.e., this is
.IR w /8.
.TP
.BI MASK_ we
The largest integer representable in the corresponding type; i.e., this
is
.RI 2\*(ss w \*(se\~\-\~1.
.PP
(Note that the endianness suffix is irrelevant in the above
definitions.)
.PP
For each size suffix
.IR w ,
the macro invocation
.BI U w ( x )
coerces an integer
.I x
to the appropriate type; specifically, it returns the smallest
nonnegative integer congruent to
.I x
(modulo
.RI 2\*(ss w \*(se).
.
.SS "Shift and rotate"
For each size suffix
.IR w ,
the macro invocations
.BI LSL w ( x ", " n )
and
.BI LSR w ( x ", " n )
shift a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places; if
.IR n \~\(>=\~ w
then
.I n
is reduced modulo
.IR w .
(This behaviour is unfortunate, but (a) it's what a number of CPUs
provide natively, and (b) it's a cheap way to prevent undefined
behaviour.)  Similarly,
.BI ROL w ( x ", " n )
and
.BI ROR w ( x ", " n )
rotate a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places.
.
.SS "Byte order conversions"
For each size suffix
.IR w ,
the macro invocation
.BI ENDSWAP w ( x )
returns the
.IR w -bit
value
.IR x
with its bytes reversed.  The
.B ENDSWAP8
macro does nothing (except truncate its operand to 8 bits), but is
provided for the sake of completeness.
.PP
A
.I big-endian
representation stores the most significant octet of an integer at the
lowest address, with the following octets in decreasing order of
significance.  A
.I little-endian
representation instead stores the
.I least
significant octet at the lowest address, with the following octets in
increasing order of significance.  An environment has a preferred order
for arranging the constituent octets of an integer of some given size in
memory; this might be either the big- or little-endian representation
just described, or something else strange.
.PP
It might be possible to rearrange the bits in an integer so that, when
that integer is stored to memory in the environment's preferred manner,
you end up with the big- or little-endian representation of the original
integer; and, similarly, it might be possible to load a big- or
little-endian representation of an integer into a variable using the
environment's preferred ordering and then rearrange the bits so as to
recover the integer value originally represented.  If the environment is
sufficiently strange, these things might not be possible, but this is
actually quite rare.
.PP
Say that an integer has been converted to
.I big-
or
.I "little-endian form"
if, when it is stored in memory in the environment's preferred manner,
one ends up with a big- or little-endian representation of the original
integer.  Equivalently, if one starts with a big- or little-endian
representation of some integer, and loads it into a variable using the
environment's preferred manner, one ends up with the big- or
little-endian form of the original integer.
.PP
If these things are possible, then the following macros are defined.
.TP
.BI HTOL w ( x )
Convert a
.IR w -bit
integer
.I x
to little-endian form.
.TP
.BI HTOB w ( x )
Convert a
.IR w -bit
integer
.I x
to big-endian form.
.TP
.BI LTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from little-endian form.
.TP
.BI BTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from big-endian form.
.
.SS "Load and store"
The macro invocation
.BI GETBYTE( p ", " o )
returns the
.IR o th
octet following the address
.IR p .
Conversely,
.BI PUTBYTE( p ", " o ", " v)
stores
.I
v in the
.IR o th
byte following the address
.IR p .
These macros always operate on byte offsets regardless of the type of
the pointer
.IR p .
.PP
For each size suffix
.IR w ,
there may be a macro such that the invocation
.BI RAW w ( p )
is an lvalue designating the
.IR w /8
octets starting at address
.IR p ,
interpreted according to the environment's preferred representation,
except that
.I p
need not be aligned in any particular fashion.  There are many reasons
why this might not be possible; programmers are not normally expected to
use these macros directly, and they are documented in case they are
useful for special effects.
.PP
For each size-and-endian suffix
.IR we ,
the macro invocation
.BI LOAD we ( p )
loads and returns a value in the corresponding format at address
.IR p ;
similarly,
.BI STORE we ( p ", " x )
stores the value
.I x
at address
.I p
in the corresponding format.
.
.SS "64-bit support"
For portability to environments without native 64-bit integers, the
structure
.B kludge64
is defined.  If the target platform is known to have an unsigned 64-bit
integer type, then this structure merely encapsulates a native integer,
and a decent optimizing compiler can be expected to handle this exactly
as if it were the native type.  Otherwise, it contains two 32-bit halves
which are processed the hard way.
.PP
For each of the above macros with a suffix
.BR 64 ,
.BR 64_L ,
or
.BR 64_B ,
an additional `kludge' macro is defined, whose name has an additional
final underscore; e.g., the kludge macro corresponding to
.B ROR64
is
.BR ROR64_ ;
and that corresponding to
.B LOAD64_L
is
.BR LOAD64_L_ .
If the original macro would have
.I returned
a value of type
.BR uint64 ,
then the kludge macro has an additional first argument, denoted
.IR d ,
which should be an lvalue of type
.BR kludge64 ,
and the kludge macro will store its result in
.IR d .
The kludge macro's remaining arguments are the same as the original
macro, except that where the original macro accepts an argument of type
.BR uint64 ,
the kludge macro accepts an argument of type
.B kludge64
instead.
.PP
Finally, a number of additional macros are provided, to make working
with
.B kludge64
somewhat less awful.
.TP
.BI SET64( d ", " h ", " l )
Set the high 32 bits of
.I d
to be
.IR h ,
and the low 32 bits to be
.IR l .
Both
.I h
and
.I l
may be arbitrary integers.
.TP
.BI X64( hexh ", " hexl )
Expands to an initializer for an object of type
.B kludge64
where
.I hexh
and
.I hexl
encode the high and low 32-bit halves in hexadecimal, without any
.B 0x
prefix.
.TP
.BI ASSIGN( d ", " x )
Make
.I d
be a copy of the
.B kludge64
.IR x .
.TP
.BI HI64( x )
Return the high 32 bits of
.IR x .
.TP
.BI LO64( x )
Return the low 32 bits of
.IR x .
.TP
.BI GET64( t ", " x )
Return the value of
.I x
as a value of type
.IR t .
If
.I t
is an unsigned integer type, then the value will be truncated to fit as
necessary; if
.I t
is a signed integer type, then the behaviour is undefined if the value
of
.I x
is too large.
.TP
.BI AND64( d ", " x ", " y )
Set
.I d
to be the bitwise-and of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI OR64( d ", " x ", " y )
Set
.I d
to be the bitwise-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI XOR64( d ", " x ", " y )
Set
.I d
to be the bitwise-exclusive-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CPL64( d ", " x )
Set
.I d
to be the bitwise complement of the
.B kludge64
argument
.IR x .
.TP
.BI ADD64( d ", " x ", " y )
Set
.I d
to be the sum of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI SUB64( d ", " x ", " y )
Set
.I d
to be the difference of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CMP64( x ", " op ", " y )
Here,
.I x
and
.I y
should be arguments of type
.B kludge64
and
.I op
should be one of the relational operators
.BR == ,
.BR < ,
.BR <= ,
.BR > ,
or
.B >=
\(en
.I not
.BR !=.
Evaluates nonzero if
.IR x \~ op \~ y .
.TP
.BI ZERO64( x )
Evaluates nonzero if the
.B kludge64
argument
.I x
is exactly zero.
.
.\"--------------------------------------------------------------------------
.SH "SEE ALSO"
.
.BR mLib (3).
.
.\"--------------------------------------------------------------------------
.SH AUTHOR
.
Mark Wooding, <mdw@distorted.org.uk>
.
.\"----- That's all, folks --------------------------------------------------
Commit	Line	Data
b6b9d458	1	.\" --nroff--
c4ccbbf9 MW	2	.\"
	3	.\" Manual for bit manipulation
	4	.\"
	5	.\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
	6	.\"
	7	.
	8	.\"----- Licensing notice ---------------------------------------------------
	9	.\"
	10	.\" This file is part of the mLib utilities library.
	11	.\"
	12	.\" mLib is free software: you can redistribute it and/or modify it under
	13	.\" the terms of the GNU Library General Public License as published by
	14	.\" the Free Software Foundation; either version 2 of the License, or (at
	15	.\" your option) any later version.
	16	.\"
	17	.\" mLib is distributed in the hope that it will be useful, but WITHOUT
	18	.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
	20	.\" License for more details.
	21	.\"
	22	.\" You should have received a copy of the GNU Library General Public
	23	.\" License along with mLib. If not, write to the Free Software
	24	.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	25	.\" USA.
	26	.
	27	.\"--------------------------------------------------------------------------
	28	.so ../defs.man \" @@@PRE@@@
	29	.
	30	.\"--------------------------------------------------------------------------
	31	.TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
a9779382 MW	32	.\" octet
	33	.\" uint16
	34	.\" uint24
	35	.\" uint32
	36	.\" uint64
	37	.\" kludge64
	38	.\"
	39	.\" MASK_8
	40	.\" MASK_16
	41	.\" MASK_16_L
	42	.\" MASK_16_B
	43	.\" MASK_24
	44	.\" MASK_24_L
	45	.\" MASK_24_B
	46	.\" MASK_32
	47	.\" MASK_32_L
	48	.\" MASK_32_B
	49	.\" MASK_64
	50	.\" MASK_64_L
	51	.\" MASK_64_B
	52	.\"
	53	.\" SZ_8
	54	.\" SZ_16
	55	.\" SZ_16_L
	56	.\" SZ_16_B
	57	.\" SZ_24
	58	.\" SZ_24_L
	59	.\" SZ_24_B
	60	.\" SZ_32
	61	.\" SZ_32_L
	62	.\" SZ_32_B
	63	.\" SZ_64
	64	.\" SZ_64_L
	65	.\" SZ_64_B
	66	.\"
	67	.\" TY_8
	68	.\" TY_16
	69	.\" TY_16_L
	70	.\" TY_16_B
	71	.\" TY_24
	72	.\" TY_24_L
	73	.\" TY_24_B
	74	.\" TY_32
	75	.\" TY_32_L
	76	.\" TY_32_B
	77	.\" TY_64
	78	.\" TY_64_L
	79	.\" TY_64_B
	80	.\"
	81	.\" DOUINTSZ
	82	.\" DOUINTCONV
	83	.\"
08da152e	84	.\" @U8
08da152e	85	.\" @U16
a9779382	86	.\" @U24
08da152e	87	.\" @U32
a9779382 MW	88	.\" @U64
a9779382 MW	89	.\" @U64_
08da152e	90	.\"
	91	.\" @LSL8
	92	.\" @LSR8
	93	.\" @LSL16
	94	.\" @LSR16
a9779382 MW	95	.\" @LSL24
a9779382 MW	96	.\" @LSR24
08da152e	97	.\" @LSL32
08da152e	98	.\" @LSR32
a9779382 MW	99	.\" @LSL64
	100	.\" @LSR64
	101	.\" @LSL64_
	102	.\" @LSR64_
08da152e	103	.\"
	104	.\" @ROL8
	105	.\" @ROR8
	106	.\" @ROL16
	107	.\" @ROR16
a9779382 MW	108	.\" @ROL24
	109	.\" @ROR24
	110	.\" @ROL32
08da152e	111	.\" @ROL32
a9779382 MW	112	.\" @ROL64
	113	.\" @ROR64
	114	.\" @ROL64_
	115	.\" @ROR64_
08da152e	116	.\"
374bb459 MW	117	.\" ENDSWAP16
	118	.\" ENDSWAP32
	119	.\" ENDSWAP64
	120	.\"
	121	.\" BTOH16
	122	.\" LTOH16
	123	.\" HTOB16
	124	.\" HTOL16
	125	.\" BTOH32
	126	.\" LTOH32
	127	.\" HTOB32
	128	.\" HTOL32
	129	.\" BTOH64
	130	.\" LTOH64
	131	.\" HTOB64
	132	.\" HTOL64
	133	.\"
a23bab96 MW	134	.\" RAW8
	135	.\" RAW16
	136	.\" RAW32
	137	.\" RAW64
	138	.\"
08da152e	139	.\" @GETBYTE
	140	.\" @PUTBYTE
	141	.\"
	142	.\" @LOAD8
	143	.\" @STORE8
	144	.\"
	145	.\" @LOAD16_L
	146	.\" @LOAD16_B
	147	.\" @LOAD16
	148	.\" @STORE16_L
	149	.\" @STORE16_B
	150	.\" @STORE16
	151	.\"
a9779382 MW	152	.\" @LOAD24_L
	153	.\" @LOAD24_B
	154	.\" @LOAD24
	155	.\" @STORE24_L
	156	.\" @STORE24_B
	157	.\" @STORE24
	158	.\"
08da152e	159	.\" @LOAD32_L
	160	.\" @LOAD32_B
	161	.\" @LOAD32
	162	.\" @STORE32_L
	163	.\" @STORE32_B
	164	.\" @STORE32
	165	.\"
a9779382 MW	166	.\" @LOAD64_L
	167	.\" @LOAD64_B
	168	.\" @LOAD64
	169	.\" @STORE64_L
	170	.\" @STORE64_B
	171	.\" @STORE64
	172	.\"
	173	.\" @LOAD64_L_
	174	.\" @LOAD64_B_
	175	.\" @LOAD64_
	176	.\" @STORE64_L_
	177	.\" @STORE64_B_
	178	.\" @STORE64_
	179	.\"
	180	.\" @SET64
	181	.\" @X64
	182	.\" @ASSIGN64
	183	.\" @HI64
	184	.\" @LO64
	185	.\" @GET64
	186	.\" @AND64
	187	.\" @OR64
	188	.\" @XOR64
	189	.\" @CPL64
	190	.\" @ADD64
	191	.\" @SUB64
	192	.\" @CMP64
	193	.\" @ZERO64
c4ccbbf9 MW	194	.
	195	.\"--------------------------------------------------------------------------
	196	.SH NAME
	197	bits \- portable bit manipulation macros
	198	.
	199	.\"--------------------------------------------------------------------------
b6b9d458	200	.SH SYNOPSIS
c4ccbbf9	201	.
b6b9d458	202	.nf
b6b9d458	203	.B "#include <mLib/bits.h>"
d056fbdf	204	.PP
a9779382 MW	205	.BR "typedef " ... " octet;"
	206	.BR "typedef " ... " uint16;"
	207	.BR "typedef " ... " uint24;"
	208	.BR "typedef " ... " uint32;"
	209	.BR "typedef " ... " uint64;"
	210	.BR "typedef " ... " kludge64;"
d056fbdf	211	.PP
c4ccbbf9 MW	212	.fi
	213	In the following,
	214	.I w
	215	is one of
	216	.BR 8 ,
	217	.BR 16 ,
	218	.BR 24 ,
	219	and
	220	.BR 32 ,
	221	and, on platforms with a 64-bit type,
	222	.BR 64 ;
	223	and
	224	.I we
	225	is one of
	226	.BR 8 ,
	227	.BR 16 ,
	228	.BR 16_L ,
	229	.BR 16_B ,
	230	.BR 24 ,
	231	.BR 24_L ,
	232	.BR 24_B ,
	233	.BR 32 ,
	234	.BR 32_L ,
	235	and
	236	.BR 32_B ,
	237	and, on platforms with a 64-bit type,
	238	.BR 64 ,
	239	.BR 64_L ,
	240	and
	241	.BR 64_B .
	242	.nf
	243	.PP
a9779382 MW	244	.BI "#define TY_" we " " type
	245	.BI "#define SZ_" we " \fR..."
	246	.BI "#define MASK_" we " \fR..."
d056fbdf	247	.PP
a9779382 MW	248	.BI "#define DOUINTSZ(" f ") \fR..."
a9779382 MW	249	.BI "#define DOUINTCONV(" f ") \fR..."
d056fbdf	250	.PP
a9779382	251	.IB type " U" w ( v );
d056fbdf	252	.PP
a9779382 MW	253	.IB type " LSL" w ( type " " v ", int " s );
	254	.IB type " LSR" w ( type " " v ", int " s );
	255	.IB type " ROL" w ( type " " v ", int " s );
	256	.IB type " ROR" w ( type " " v ", int " s );
d056fbdf	257	.PP
a9779382 MW	258	.BI "octet GETBYTE(void *" p ", size_t " o );
a9779382 MW	259	.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
d056fbdf	260	.PP
a9779382 MW	261	.IB type " LOAD" we "(void *" p );
a9779382 MW	262	.BI "void STORE" we "(void *" p ", " type " " v );
d056fbdf	263	.PP
a9779382 MW	264	.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
	265	.BI "kludge64 X64(" hexh ", " hexl );
	266	.BI "void ASSIGN64(kludge64 &" d ", " x );
	267	.BI "uint32 HI64(kludge64" x );
	268	.BI "uint32 LO64(kludge64" x );
	269	.IB ty " GET64(" ty ", kludge64 " x );
	270	.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	271	.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	272	.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	273	.BI "void CPL64(kludge64 &" d ", kludge64 " x );
	274	.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	275	.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	276	.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
	277	.BI "int ZERO64(kludge64 " x );
b6b9d458	278	.fi
c4ccbbf9 MW	279	.
c4ccbbf9 MW	280	.\"--------------------------------------------------------------------------
b6b9d458	281	.SH DESCRIPTION
c4ccbbf9	282	.
b6b9d458	283	The header file
	284	.B <mLib/bits.h>
	285	contains a number of useful definitions for portably dealing with bit-
a9779382 MW	286	and byte-level manipulation of larger quantities. The various macros
	287	and types are named fairly systematically.
	288	.PP
	289	The header provides utilities for working with 64-bit quantities, but a
	290	64-bit integer type is not guaranteed to exist under C89 rules. This
	291	header takes two approaches. Firstly, if a 64-bit type is found, the
	292	header defines the macro
	293	.B HAVE_UINT64
	294	and defines the various
	295	.RB ... 64
	296	macros as described below. Secondly, it unconditionally defines a type
	297	.B kludge64
	298	and a family of macros for working with them. See below for details.
	299	.
	300	.SS "Type definitions"
	301	A number of types are defined.
b6b9d458	302	.TP
	303	.B octet
	304	Equivalent to
	305	.BR "unsigned char" .
	306	This is intended to be used when a character array is used to represent
	307	the octets of some external data format. Note that on some
	308	architectures the
	309	.B "unsigned char"
	310	type may occupy more than 8 bits.
	311	.TP
	312	.B uint16
	313	Equivalent to
	314	.BR "unsigned short" .
	315	Intended to be used when a 16-bit value is required. This type is
	316	always capable of representing any 16-bit unsigned value, but the actual
	317	type may be wider than 16 bits and will require masking.
	318	.TP
a9779382 MW	319	.B uint24
	320	Equivalent to some (architecture-dependent) standard type. Capable of
	321	representing any unsigned 24-bit value, although the the actual type may
	322	be wider than 24 bits.
	323	.TP
b6b9d458	324	.B uint32
	325	Equivalent to some (architecture-dependent) standard type. Capable of
	326	representing any unsigned 32-bit value, although the the actual type may
	327	be wider than 32 bits.
a9779382 MW	328	pp.TP
	329	.B uint64
	330	Equivalent to some (architecture-dependent) standard type, if it exists.
	331	Capable of representing any unsigned 64-bit value, although the the
	332	actual type may be wider than 64 bits.
	333	.
	334	.SS "Size/endianness suffixes"
	335	Let
	336	.I w
	337	be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
	338	Furthermore, let
	339	.I we
	340	be one of the size-and-endian suffixes
	341	.IR w ,
	342	or, where
	343	.IR w \~>\~8,
	344	.IB w _L
	345	or
	346	.IB w _B \fR,
	347	where
	348	.RB ` _L '
	349	denotes little-endian (Intel, VAX) representation, and
	350	.RB ` _B '
	351	denotes big-endian (IBM, network) representation; omitting an explicit
	352	suffix gives big-endian order by default, since this is most common in
	353	portable data formats.
b6b9d458	354	.PP
a9779382 MW	355	The macro invocation
	356	.BI DOUINTSZ( f )
	357	invokes a given macro
	358	.I f
	359	repeatedly, as
	360	.IB f ( w )
	361	for each size suffix
	362	.I w
	363	listed above.
b6b9d458	364	.PP
a9779382 MW	365	The macro invocation
	366	.BI DOUINTCONV( f )
	367	invokes a given macro
	368	.I f
	369	repeatedly, as
	370	.IR f ( w ", " we ", " suff )
b6b9d458	371	where
a9779382 MW	372	.I we
	373	ranges over size-and-endian suffixes as described above,
	374	.I w
	375	is just the corresponding bit width, as an integer, and
	376	.I suff
	377	is a suffix
	378	.IR w ,
	379	.IB w l\fR,
b6b9d458	380	or
a9779382 MW	381	.IB w b\fR,
a9779382 MW	382	suitable for a C function name.
b6b9d458	383	.PP
a9779382 MW	384	These macros are intended to be used to define families of related
	385	functions.
	386	.
	387	.SS "Utility macros"
	388	For each size-and-endian suffix
	389	.IR we ,
	390	the following macros are defined.
	391	.TP
	392	.BI TY_ we
	393	A synonym for the appropriate one of the types
	394	.BR octet ,
	395	.BR uint32 ,
	396	etc.\& listed above.
	397	.TP
	398	.BI SZ_ we
	399	The number of octets needed to represent a value of the corresponding
	400	type; i.e., this is
	401	.IR w /8.
	402	.TP
	403	.BI MASK_ we
	404	The largest integer representable in the corresponding type; i.e., this
	405	is
	406	.RI 2\(ss w \(se\~\-\~1.
	407	.PP
	408	(Note that the endianness suffix is irrelevant in the above
	409	definitions.)
	410	.PP
	411	For each size suffix
	412	.IR w ,
	413	the macro invocation
	414	.BI U w ( x )
	415	coerces an integer
	416	.I x
	417	to the appropriate type; specifically, it returns the smallest
	418	nonnegative integer congruent to
	419	.I x
	420	(modulo
	421	.RI 2\(ss w \(se).
	422	.
	423	.SS "Shift and rotate"
	424	For each size suffix
	425	.IR w ,
	426	the macro invocations
	427	.BI LSL w ( x ", " n )
b6b9d458	428	and
a9779382 MW	429	.BI LSR w ( x ", " n )
	430	shift a
	431	.IR w -bit
	432	quantity
	433	.I x
	434	left or right, respectively, by
b6b9d458	435	.I n
a9779382 MW	436	places; if
	437	.IR n \~\(>=\~ w
	438	then
	439	.I n
	440	is reduced modulo
	441	.IR w .
	442	(This behaviour is unfortunate, but (a) it's what a number of CPUs
	443	provide natively, and (b) it's a cheap way to prevent undefined
	444	behaviour.) Similarly,
	445	.BI ROL w ( x ", " n )
b6b9d458	446	and
a9779382 MW	447	.BI ROR w ( x ", " n )
	448	rotate a
	449	.IR w -bit
	450	quantity
	451	.I x
	452	left or right, respectively, by
b6b9d458	453	.I n
a9779382 MW	454	places.
a9779382 MW	455	.
374bb459 MW	456	.SS "Byte order conversions"
	457	For each size suffix
	458	.IR w ,
	459	the macro invocation
	460	.BI ENDSWAP w ( x )
	461	returns the
	462	.IR w -bit
	463	value
	464	.IR x
	465	with its bytes reversed. The
	466	.B ENDSWAP8
	467	macro does nothing (except truncate its operand to 8 bits), but is
	468	provided for the sake of completeness.
	469	.PP
	470	A
	471	.I big-endian
	472	representation stores the most significant octet of an integer at the
	473	lowest address, with the following octets in decreasing order of
	474	significance. A
	475	.I little-endian
	476	representation instead stores the
	477	.I least
	478	significant octet at the lowest address, with the following octets in
	479	increasing order of significance. An environment has a preferred order
	480	for arranging the constituent octets of an integer of some given size in
	481	memory; this might be either the big- or little-endian representation
	482	just described, or something else strange.
	483	.PP
	484	It might be possible to rearrange the bits in an integer so that, when
	485	that integer is stored to memory in the environment's preferred manner,
	486	you end up with the big- or little-endian representation of the original
	487	integer; and, similarly, it might be possible to load a big- or
	488	little-endian representation of an integer into a variable using the
	489	environment's preferred ordering and then rearrange the bits so as to
	490	recover the integer value originally represented. If the environment is
	491	sufficiently strange, these things might not be possible, but this is
	492	actually quite rare.
	493	.PP
	494	Say that an integer has been converted to
	495	.I big-
	496	or
	497	.I "little-endian form"
	498	if, when it is stored in memory in the environment's preferred manner,
	499	one ends up with a big- or little-endian representation of the original
	500	integer. Equivalently, if one starts with a big- or little-endian
	501	representation of some integer, and loads it into a variable using the
	502	environment's preferred manner, one ends up with the big- or
	503	little-endian form of the original integer.
	504	.PP
	505	If these things are possible, then the following macros are defined.
	506	.TP
	507	.BI HTOL w ( x )
	508	Convert a
	509	.IR w -bit
	510	integer
	511	.I x
	512	to little-endian form.
	513	.TP
	514	.BI HTOB w ( x )
	515	Convert a
	516	.IR w -bit
	517	integer
	518	.I x
	519	to big-endian form.
520	.TP
521	.BI LTOH w ( x )
522	Convert a
523	.IR w -bit
524	integer
525	.I x
526	from little-endian form.
527	.TP
528	.BI BTOH w ( x )
529	Convert a
530	.IR w -bit
531	integer
532	.I x
533	from big-endian form.
534	.
a9779382 MW	535	.SS "Load and store"
	536	The macro invocation
	537	.BI GETBYTE( p ", " o )
	538	returns the
	539	.IR o th
	540	octet following the address
	541	.IR p .
	542	Conversely,
	543	.BI PUTBYTE( p ", " o ", " v)
	544	stores
	545	.I
	546	v in the
	547	.IR o th
	548	byte following the address
	549	.IR p .
	550	These macros always operate on byte offsets regardless of the type of
	551	the pointer
	552	.IR p .
	553	.PP
a23bab96 MW	554	For each size suffix
	555	.IR w ,
	556	there may be a macro such that the invocation
	557	.BI RAW w ( p )
	558	is an lvalue designating the
	559	.IR w /8
	560	octets starting at address
	561	.IR p ,
	562	interpreted according to the environment's preferred representation,
	563	except that
	564	.I p
	565	need not be aligned in any particular fashion. There are many reasons
	566	why this might not be possible; programmers are not normally expected to
	567	use these macros directly, and they are documented in case they are
	568	useful for special effects.
	569	.PP
a9779382 MW	570	For each size-and-endian suffix
	571	.IR we ,
	572	the macro invocation
	573	.BI LOAD we ( p )
	574	loads and returns a value in the corresponding format at address
	575	.IR p ;
	576	similarly,
	577	.BI STORE we ( p ", " x )
	578	stores the value
	579	.I x
	580	at address
	581	.I p
	582	in the corresponding format.
	583	.
	584	.SS "64-bit support"
	585	For portability to environments without native 64-bit integers, the
	586	structure
	587	.B kludge64
	588	is defined. If the target platform is known to have an unsigned 64-bit
	589	integer type, then this structure merely encapsulates a native integer,
	590	and a decent optimizing compiler can be expected to handle this exactly
	591	as if it were the native type. Otherwise, it contains two 32-bit halves
	592	which are processed the hard way.
	593	.PP
	594	For each of the above macros with a suffix
	595	.BR 64 ,
	596	.BR 64_L ,
b6b9d458	597	or
a9779382 MW	598	.BR 64_B ,
	599	an additional `kludge' macro is defined, whose name has an additional
	600	final underscore; e.g., the kludge macro corresponding to
	601	.B ROR64
	602	is
	603	.BR ROR64_ ;
	604	and that corresponding to
	605	.B LOAD64_L
	606	is
	607	.BR LOAD64_L_ .
	608	If the original macro would have
	609	.I returned
	610	a value of type
	611	.BR uint64 ,
	612	then the kludge macro has an additional first argument, denoted
	613	.IR d ,
	614	which should be an lvalue of type
	615	.BR kludge64 ,
	616	and the kludge macro will store its result in
	617	.IR d .
	618	The kludge macro's remaining arguments are the same as the original
	619	macro, except that where the original macro accepts an argument of type
	620	.BR uint64 ,
	621	the kludge macro accepts an argument of type
	622	.B kludge64
	623	instead.
	624	.PP
	625	Finally, a number of additional macros are provided, to make working
	626	with
	627	.B kludge64
	628	somewhat less awful.
	629	.TP
	630	.BI SET64( d ", " h ", " l )
	631	Set the high 32 bits of
	632	.I d
	633	to be
	634	.IR h ,
	635	and the low 32 bits to be
	636	.IR l .
	637	Both
	638	.I h
	639	and
	640	.I l
	641	may be arbitrary integers.
	642	.TP
	643	.BI X64( hexh ", " hexl )
	644	Expands to an initializer for an object of type
	645	.B kludge64
	646	where
	647	.I hexh
	648	and
	649	.I hexl
	650	encode the high and low 32-bit halves in hexadecimal, without any
	651	.B 0x
	652	prefix.
	653	.TP
	654	.BI ASSIGN( d ", " x )
	655	Make
	656	.I d
	657	be a copy of the
	658	.B kludge64
	659	.IR x .
	660	.TP
	661	.BI HI64( x )
662	Return the high 32 bits of
663	.IR x .
664	.TP
665	.BI LO64( x )
666	Return the low 32 bits of
667	.IR x .
668	.TP
669	.BI GET64( t ", " x )
670	Return the value of
671	.I x
672	as a value of type
673	.IR t .
674	If
675	.I t
676	is an unsigned integer type, then the value will be truncated to fit as
677	necessary; if
678	.I t
679	is a signed integer type, then the behaviour is undefined if the value
680	of
681	.I x
682	is too large.
683	.TP
684	.BI AND64( d ", " x ", " y )
685	Set
686	.I d
687	to be the bitwise-and of the two
688	.B kludge64
689	arguments
690	.I x
b6b9d458	691	and
a9779382 MW	692	.IR y .
	693	.TP
	694	.BI OR64( d ", " x ", " y )
	695	Set
	696	.I d
	697	to be the bitwise-or of the two
	698	.B kludge64
	699	arguments
	700	.I x
	701	and
	702	.IR y .
	703	.TP
	704	.BI XOR64( d ", " x ", " y )
	705	Set
	706	.I d
	707	to be the bitwise-exclusive-or of the two
	708	.B kludge64
	709	arguments
	710	.I x
	711	and
	712	.IR y .
	713	.TP
	714	.BI CPL64( d ", " x )
	715	Set
	716	.I d
	717	to be the bitwise complement of the
	718	.B kludge64
	719	argument
	720	.IR x .
	721	.TP
	722	.BI ADD64( d ", " x ", " y )
	723	Set
	724	.I d
	725	to be the sum of the two
	726	.B kludge64
	727	arguments
	728	.I x
	729	and
	730	.IR y .
	731	.TP
	732	.BI SUB64( d ", " x ", " y )
	733	Set
	734	.I d
	735	to be the difference of the two
	736	.B kludge64
	737	arguments
	738	.I x
	739	and
	740	.IR y .
	741	.TP
	742	.BI CMP64( x ", " op ", " y )
	743	Here,
	744	.I x
	745	and
	746	.I y
	747	should be arguments of type
	748	.B kludge64
	749	and
	750	.I op
	751	should be one of the relational operators
	752	.BR == ,
	753	.BR < ,
	754	.BR <= ,
	755	.BR > ,
756	or
757	.B >=
758	\(en
759	.I not
760	.BR !=.
761	Evaluates nonzero if
762	.IR x \~ op \~ y .
763	.TP
764	.BI ZERO64( x )
765	Evaluates nonzero if the
766	.B kludge64
767	argument
768	.I x
769	is exactly zero.
c4ccbbf9 MW	770	.
c4ccbbf9 MW	771	.\"--------------------------------------------------------------------------
08da152e	772	.SH "SEE ALSO"
c4ccbbf9	773	.
08da152e	774	.BR mLib (3).
c4ccbbf9 MW	775	.
c4ccbbf9 MW	776	.\"--------------------------------------------------------------------------
b6b9d458	777	.SH AUTHOR
c4ccbbf9	778	.
9b5ac6ff	779	Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9 MW	780	.
c4ccbbf9 MW	781	.\"----- That's all, folks --------------------------------------------------