3 .\" Manual for bit manipulation
5 .\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
8 .\"----- Licensing notice ---------------------------------------------------
10 .\" This file is part of the mLib utilities library.
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
30 .\"--------------------------------------------------------------------------
31 .TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
196 .\"--------------------------------------------------------------------------
198 bits \- portable bit manipulation macros
200 .\"--------------------------------------------------------------------------
204 .B "#include <mLib/bits.h>"
206 .BR "typedef " ... " octet;"
207 .BR "typedef " ... " uint16;"
208 .BR "typedef " ... " uint24;"
209 .BR "typedef " ... " uint32;"
210 .BR "typedef " ... " uint64;"
211 .BR "typedef " ... " kludge64;"
222 and, on platforms with a 64-bit type,
238 and, on platforms with a 64-bit type,
245 .BI "#define TY_" we " " type
246 .BI "#define SZ_" we " \fR..."
247 .BI "#define MASK_" we " \fR..."
249 .BI "#define DOUINTSZ(" f ") \fR..."
250 .BI "#define DOUINTCONV(" f ") \fR..."
252 .IB type " U" w ( v );
254 .IB type " LSL" w ( type " " v ", int " s );
255 .IB type " LSR" w ( type " " v ", int " s );
256 .IB type " ROL" w ( type " " v ", int " s );
257 .IB type " ROR" w ( type " " v ", int " s );
259 .BI "octet GETBYTE(void *" p ", size_t " o );
260 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
262 .IB type " LOAD" we "(void *" p );
263 .BI "void STORE" we "(void *" p ", " type " " v );
265 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
266 .BI "kludge64 X64(" hexh ", " hexl );
267 .BI "void ASSIGN64(kludge64 &" d ", " x );
268 .BI "uint32 HI64(kludge64" x );
269 .BI "uint32 LO64(kludge64" x );
270 .IB ty " GET64(" ty ", kludge64 " x );
271 .BI "void SETBYTE64(kludge64 &" z ", octet " x ", unsigned " j );
272 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
273 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
274 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
275 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
276 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
277 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
278 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
279 .BI "int ZERO64(kludge64 " x );
282 .\"--------------------------------------------------------------------------
287 contains a number of useful definitions for portably dealing with bit-
288 and byte-level manipulation of larger quantities. The various macros
289 and types are named fairly systematically.
291 The header provides utilities for working with 64-bit quantities, but a
292 64-bit integer type is not guaranteed to exist under C89 rules. This
293 header takes two approaches. Firstly, if a 64-bit type is found, the
294 header defines the macro
296 and defines the various
298 macros as described below. Secondly, it unconditionally defines a type
300 and a family of macros for working with them. See below for details.
302 .SS "Type definitions"
303 A number of types are defined.
307 .BR "unsigned char" .
308 This is intended to be used when a character array is used to represent
309 the octets of some external data format. Note that on some
312 type may occupy more than 8 bits.
316 .BR "unsigned short" .
317 Intended to be used when a 16-bit value is required. This type is
318 always capable of representing any 16-bit unsigned value, but the actual
319 type may be wider than 16 bits and will require masking.
322 Equivalent to some (architecture-dependent) standard type. Capable of
323 representing any unsigned 24-bit value, although the the actual type may
324 be wider than 24 bits.
327 Equivalent to some (architecture-dependent) standard type. Capable of
328 representing any unsigned 32-bit value, although the the actual type may
329 be wider than 32 bits.
332 Equivalent to some (architecture-dependent) standard type, if it exists.
333 Capable of representing any unsigned 64-bit value, although the the
334 actual type may be wider than 64 bits.
336 .SS "Size/endianness suffixes"
339 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
342 be one of the size-and-endian suffixes
351 denotes little-endian (Intel, VAX) representation, and
353 denotes big-endian (IBM, network) representation; omitting an explicit
354 suffix gives big-endian order by default, since this is most common in
355 portable data formats.
359 invokes a given macro
369 invokes a given macro
372 .IR f ( w ", " we ", " suff )
375 ranges over size-and-endian suffixes as described above,
377 is just the corresponding bit width, as an integer, and
384 suitable for a C function name.
386 These macros are intended to be used to define families of related
390 For each size-and-endian suffix
392 the following macros are defined.
395 A synonym for the appropriate one of the types
401 The number of octets needed to represent a value of the corresponding
406 The largest integer representable in the corresponding type; i.e., this
408 .RI 2\*(ss w \*(se\~\-\~1.
410 (Note that the endianness suffix is irrelevant in the above
419 to the appropriate type; specifically, it returns the smallest
420 nonnegative integer congruent to
425 .SS "Shift and rotate"
428 the macro invocations
429 .BI LSL w ( x ", " n )
431 .BI LSR w ( x ", " n )
436 left or right, respectively, by
444 (This behaviour is unfortunate, but (a) it's what a number of CPUs
445 provide natively, and (b) it's a cheap way to prevent undefined
446 behaviour.) Similarly,
447 .BI ROL w ( x ", " n )
449 .BI ROR w ( x ", " n )
454 left or right, respectively, by
458 .SS "Byte order conversions"
467 with its bytes reversed. The
469 macro does nothing (except truncate its operand to 8 bits), but is
470 provided for the sake of completeness.
474 representation stores the most significant octet of an integer at the
475 lowest address, with the following octets in decreasing order of
478 representation instead stores the
480 significant octet at the lowest address, with the following octets in
481 increasing order of significance. An environment has a preferred order
482 for arranging the constituent octets of an integer of some given size in
483 memory; this might be either the big- or little-endian representation
484 just described, or something else strange.
486 It might be possible to rearrange the bits in an integer so that, when
487 that integer is stored to memory in the environment's preferred manner,
488 you end up with the big- or little-endian representation of the original
489 integer; and, similarly, it might be possible to load a big- or
490 little-endian representation of an integer into a variable using the
491 environment's preferred ordering and then rearrange the bits so as to
492 recover the integer value originally represented. If the environment is
493 sufficiently strange, these things might not be possible, but this is
496 Say that an integer has been converted to
499 .I "little-endian form"
500 if, when it is stored in memory in the environment's preferred manner,
501 one ends up with a big- or little-endian representation of the original
502 integer. Equivalently, if one starts with a big- or little-endian
503 representation of some integer, and loads it into a variable using the
504 environment's preferred manner, one ends up with the big- or
505 little-endian form of the original integer.
507 If these things are possible, then the following macros are defined.
514 to little-endian form.
528 from little-endian form.
535 from big-endian form.
539 .BI GETBYTE( p ", " o )
542 octet following the address
545 .BI PUTBYTE( p ", " o ", " v)
550 byte following the address
552 These macros always operate on byte offsets regardless of the type of
558 there may be a macro such that the invocation
560 is an lvalue designating the
562 octets starting at address
564 interpreted according to the environment's preferred representation,
567 need not be aligned in any particular fashion. There are many reasons
568 why this might not be possible; programmers are not normally expected to
569 use these macros directly, and they are documented in case they are
570 useful for special effects.
572 For each size-and-endian suffix
576 loads and returns a value in the corresponding format at address
579 .BI STORE we ( p ", " x )
584 in the corresponding format.
587 For portability to environments without native 64-bit integers, the
590 is defined. If the target platform is known to have an unsigned 64-bit
591 integer type, then this structure merely encapsulates a native integer,
592 and a decent optimizing compiler can be expected to handle this exactly
593 as if it were the native type. Otherwise, it contains two 32-bit halves
594 which are processed the hard way.
596 For each of the above macros with a suffix
601 an additional `kludge' macro is defined, whose name has an additional
602 final underscore; e.g., the kludge macro corresponding to
606 and that corresponding to
610 If the original macro would have
614 then the kludge macro has an additional first argument, denoted
616 which should be an lvalue of type
618 and the kludge macro will store its result in
620 The kludge macro's remaining arguments are the same as the original
621 macro, except that where the original macro accepts an argument of type
623 the kludge macro accepts an argument of type
627 Finally, a number of additional macros are provided, to make working
632 .BI SET64( d ", " h ", " l )
633 Set the high 32 bits of
637 and the low 32 bits to be
643 may be arbitrary integers.
645 .BI X64( hexh ", " hexl )
646 Expands to an initializer for an object of type
652 encode the high and low 32-bit halves in hexadecimal, without any
656 .BI ASSIGN( d ", " x )
664 Return the high 32 bits of
668 Return the low 32 bits of
671 .BI GET64( t ", " x )
678 is an unsigned integer type, then the value will be truncated to fit as
681 is a signed integer type, then the behaviour is undefined if the value
686 .BI SETBYTE( z ", " x ", " j )
693 which is assumed to be initially zero.
694 Bytes are numbered with the least significant being byte zero and the
695 most significant being byte 7.
697 .BI AND64( d ", " x ", " y )
700 to be the bitwise-and of the two
707 .BI OR64( d ", " x ", " y )
710 to be the bitwise-or of the two
717 .BI XOR64( d ", " x ", " y )
720 to be the bitwise-exclusive-or of the two
727 .BI CPL64( d ", " x )
730 to be the bitwise complement of the
735 .BI ADD64( d ", " x ", " y )
738 to be the sum of the two
745 .BI SUB64( d ", " x ", " y )
748 to be the difference of the two
755 .BI CMP64( x ", " op ", " y )
760 should be arguments of type
764 should be one of the relational operators
778 Evaluates nonzero if the
784 .\"--------------------------------------------------------------------------
789 .\"--------------------------------------------------------------------------
792 Mark Wooding, <mdw@distorted.org.uk>
794 .\"----- That's all, folks --------------------------------------------------