mdw@git.distorted.org.uk Git - mLib/blob - utils/bits.3

   1 .\" -*-nroff-*-
   2 .TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
   3 .ie t \{\
   4 .  ds ss \s8\u
   5 .  ds se \d\s0
   6 .\}
   7 .el \{\
   8 .  ds ss ^
   9 .  ds se
  10 .\}
  11 .SH NAME
  12 bits \- portable bit manipulation macros
  13 .\" octet
  14 .\" uint16
  15 .\" uint24
  16 .\" uint32
  17 .\" uint64
  18 .\" kludge64
  19 .\"
  20 .\" MASK_8
  21 .\" MASK_16
  22 .\" MASK_16_L
  23 .\" MASK_16_B
  24 .\" MASK_24
  25 .\" MASK_24_L
  26 .\" MASK_24_B
  27 .\" MASK_32
  28 .\" MASK_32_L
  29 .\" MASK_32_B
  30 .\" MASK_64
  31 .\" MASK_64_L
  32 .\" MASK_64_B
  33 .\"
  34 .\" SZ_8
  35 .\" SZ_16
  36 .\" SZ_16_L
  37 .\" SZ_16_B
  38 .\" SZ_24
  39 .\" SZ_24_L
  40 .\" SZ_24_B
  41 .\" SZ_32
  42 .\" SZ_32_L
  43 .\" SZ_32_B
  44 .\" SZ_64
  45 .\" SZ_64_L
  46 .\" SZ_64_B
  47 .\"
  48 .\" TY_8
  49 .\" TY_16
  50 .\" TY_16_L
  51 .\" TY_16_B
  52 .\" TY_24
  53 .\" TY_24_L
  54 .\" TY_24_B
  55 .\" TY_32
  56 .\" TY_32_L
  57 .\" TY_32_B
  58 .\" TY_64
  59 .\" TY_64_L
  60 .\" TY_64_B
  61 .\"
  62 .\" DOUINTSZ
  63 .\" DOUINTCONV
  64 .\"
  65 .\" @U8
  66 .\" @U16
  67 .\" @U24
  68 .\" @U32
  69 .\" @U64
  70 .\" @U64_
  71 .\"
  72 .\" @LSL8
  73 .\" @LSR8
  74 .\" @LSL16
  75 .\" @LSR16
  76 .\" @LSL24
  77 .\" @LSR24
  78 .\" @LSL32
  79 .\" @LSR32
  80 .\" @LSL64
  81 .\" @LSR64
  82 .\" @LSL64_
  83 .\" @LSR64_
  84 .\"
  85 .\" @ROL8
  86 .\" @ROR8
  87 .\" @ROL16
  88 .\" @ROR16
  89 .\" @ROL24
  90 .\" @ROR24
  91 .\" @ROL32
  92 .\" @ROL32
  93 .\" @ROL64
  94 .\" @ROR64
  95 .\" @ROL64_
  96 .\" @ROR64_
  97 .\"
  98 .\" ENDSWAP16
  99 .\" ENDSWAP32
 100 .\" ENDSWAP64
 101 .\"
 102 .\" BTOH16
 103 .\" LTOH16
 104 .\" HTOB16
 105 .\" HTOL16
 106 .\" BTOH32
 107 .\" LTOH32
 108 .\" HTOB32
 109 .\" HTOL32
 110 .\" BTOH64
 111 .\" LTOH64
 112 .\" HTOB64
 113 .\" HTOL64
 114 .\"
 115 .\" @GETBYTE
 116 .\" @PUTBYTE
 117 .\"
 118 .\" @LOAD8
 119 .\" @STORE8
 120 .\"
 121 .\" @LOAD16_L
 122 .\" @LOAD16_B
 123 .\" @LOAD16
 124 .\" @STORE16_L
 125 .\" @STORE16_B
 126 .\" @STORE16
 127 .\"
 128 .\" @LOAD24_L
 129 .\" @LOAD24_B
 130 .\" @LOAD24
 131 .\" @STORE24_L
 132 .\" @STORE24_B
 133 .\" @STORE24
 134 .\"
 135 .\" @LOAD32_L
 136 .\" @LOAD32_B
 137 .\" @LOAD32
 138 .\" @STORE32_L
 139 .\" @STORE32_B
 140 .\" @STORE32
 141 .\"
 142 .\" @LOAD64_L
 143 .\" @LOAD64_B
 144 .\" @LOAD64
 145 .\" @STORE64_L
 146 .\" @STORE64_B
 147 .\" @STORE64
 148 .\"
 149 .\" @LOAD64_L_
 150 .\" @LOAD64_B_
 151 .\" @LOAD64_
 152 .\" @STORE64_L_
 153 .\" @STORE64_B_
 154 .\" @STORE64_
 155 .\"
 156 .\" @SET64
 157 .\" @X64
 158 .\" @ASSIGN64
 159 .\" @HI64
 160 .\" @LO64
 161 .\" @GET64
 162 .\" @AND64
 163 .\" @OR64
 164 .\" @XOR64
 165 .\" @CPL64
 166 .\" @ADD64
 167 .\" @SUB64
 168 .\" @CMP64
 169 .\" @ZERO64
 170 .SH SYNOPSIS
 171 .nf
 172 .B "#include <mLib/bits.h>"
 173
 174 .BR "typedef " ... " octet;"
 175 .BR "typedef " ... " uint16;"
 176 .BR "typedef " ... " uint24;"
 177 .BR "typedef " ... " uint32;"
 178 .BR "typedef " ... " uint64;"
 179 .BR "typedef " ... " kludge64;"
 180
 181 .BI "#define TY_" we " " type
 182 .BI "#define SZ_" we " \fR..."
 183 .BI "#define MASK_" we " \fR..."
 184
 185 .BI "#define DOUINTSZ(" f ") \fR..."
 186 .BI "#define DOUINTCONV(" f ") \fR..."
 187
 188 .IB type " U" w ( v );
 189
 190 .IB type " LSL" w ( type " " v ", int " s );
 191 .IB type " LSR" w ( type " " v ", int " s );
 192 .IB type " ROL" w ( type " " v ", int " s );
 193 .IB type " ROR" w ( type " " v ", int " s );
 194
 195 .BI "octet GETBYTE(void *" p ", size_t " o );
 196 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
 197
 198 .IB type " LOAD" we "(void *" p );
 199 .BI "void STORE" we "(void *" p ", " type " " v );
 200
 201 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
 202 .BI "kludge64 X64(" hexh ", " hexl );
 203 .BI "void ASSIGN64(kludge64 &" d ", " x );
 204 .BI "uint32 HI64(kludge64" x );
 205 .BI "uint32 LO64(kludge64" x );
 206 .IB ty " GET64(" ty ", kludge64 " x );
 207 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 208 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 209 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 210 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
 211 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 212 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 213 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
 214 .BI "int ZERO64(kludge64 " x );
 215 .fi
 216 .SH DESCRIPTION
 217 The header file
 218 .B <mLib/bits.h>
 219 contains a number of useful definitions for portably dealing with bit-
 220 and byte-level manipulation of larger quantities.  The various macros
 221 and types are named fairly systematically.
 222 .PP
 223 The header provides utilities for working with 64-bit quantities, but a
 224 64-bit integer type is not guaranteed to exist under C89 rules.  This
 225 header takes two approaches.  Firstly, if a 64-bit type is found, the
 226 header defines the macro
 227 .B HAVE_UINT64
 228 and defines the various
 229 .RB ... 64
 230 macros as described below.  Secondly, it unconditionally defines a type
 231 .B kludge64
 232 and a family of macros for working with them.  See below for details.
 233 .
 234 .SS "Type definitions"
 235 A number of types are defined.
 236 .TP
 237 .B octet
 238 Equivalent to
 239 .BR "unsigned char" .
 240 This is intended to be used when a character array is used to represent
 241 the octets of some external data format.  Note that on some
 242 architectures the
 243 .B "unsigned char"
 244 type may occupy more than 8 bits.
 245 .TP
 246 .B uint16
 247 Equivalent to
 248 .BR "unsigned short" .
 249 Intended to be used when a 16-bit value is required.  This type is
 250 always capable of representing any 16-bit unsigned value, but the actual
 251 type may be wider than 16 bits and will require masking.
 252 .TP
 253 .B uint24
 254 Equivalent to some (architecture-dependent) standard type.  Capable of
 255 representing any unsigned 24-bit value, although the the actual type may
 256 be wider than 24 bits.
 257 .TP
 258 .B uint32
 259 Equivalent to some (architecture-dependent) standard type.  Capable of
 260 representing any unsigned 32-bit value, although the the actual type may
 261 be wider than 32 bits.
 262 pp.TP
 263 .B uint64
 264 Equivalent to some (architecture-dependent) standard type, if it exists.
 265 Capable of representing any unsigned 64-bit value, although the the
 266 actual type may be wider than 64 bits.
 267 .
 268 .SS "Size/endianness suffixes"
 269 Let
 270 .I w
 271 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
 272 Furthermore, let
 273 .I we
 274 be one of the size-and-endian suffixes
 275 .IR w ,
 276 or, where
 277 .IR w \~>\~8,
 278 .IB w _L
 279 or
 280 .IB w _B \fR,
 281 where
 282 .RB ` _L '
 283 denotes little-endian (Intel, VAX) representation, and
 284 .RB ` _B '
 285 denotes big-endian (IBM, network) representation; omitting an explicit
 286 suffix gives big-endian order by default, since this is most common in
 287 portable data formats.
 288 .PP
 289 The macro invocation
 290 .BI DOUINTSZ( f )
 291 invokes a given macro
 292 .I f
 293 repeatedly, as
 294 .IB f ( w )
 295 for each size suffix
 296 .I w
 297 listed above.
 298 .PP
 299 The macro invocation
 300 .BI DOUINTCONV( f )
 301 invokes a given macro
 302 .I f
 303 repeatedly, as
 304 .IR f ( w ", " we ", " suff )
 305 where
 306 .I we
 307 ranges over size-and-endian suffixes as described above,
 308 .I w
 309 is just the corresponding bit width, as an integer, and
 310 .I suff
 311 is a suffix
 312 .IR w ,
 313 .IB w l\fR,
 314 or
 315 .IB w b\fR,
 316 suitable for a C function name.
 317 .PP
 318 These macros are intended to be used to define families of related
 319 functions.
 320 .
 321 .SS "Utility macros"
 322 For each size-and-endian suffix
 323 .IR we ,
 324 the following macros are defined.
 325 .TP
 326 .BI TY_ we
 327 A synonym for the appropriate one of the types
 328 .BR octet ,
 329 .BR uint32 ,
 330 etc.\& listed above.
 331 .TP
 332 .BI SZ_ we
 333 The number of octets needed to represent a value of the corresponding
 334 type; i.e., this is
 335 .IR w /8.
 336 .TP
 337 .BI MASK_ we
 338 The largest integer representable in the corresponding type; i.e., this
 339 is
 340 .RI 2\*(ss w \*(se\~\-\~1.
 341 .PP
 342 (Note that the endianness suffix is irrelevant in the above
 343 definitions.)
 344 .PP
 345 For each size suffix
 346 .IR w ,
 347 the macro invocation
 348 .BI U w ( x )
 349 coerces an integer
 350 .I x
 351 to the appropriate type; specifically, it returns the smallest
 352 nonnegative integer congruent to
 353 .I x
 354 (modulo
 355 .RI 2\*(ss w \*(se).
 356 .
 357 .SS "Shift and rotate"
 358 For each size suffix
 359 .IR w ,
 360 the macro invocations
 361 .BI LSL w ( x ", " n )
 362 and
 363 .BI LSR w ( x ", " n )
 364 shift a
 365 .IR w -bit
 366 quantity
 367 .I x
 368 left or right, respectively, by
 369 .I n
 370 places; if
 371 .IR n \~\(>=\~ w
 372 then
 373 .I n
 374 is reduced modulo
 375 .IR w .
 376 (This behaviour is unfortunate, but (a) it's what a number of CPUs
 377 provide natively, and (b) it's a cheap way to prevent undefined
 378 behaviour.)  Similarly,
 379 .BI ROL w ( x ", " n )
 380 and
 381 .BI ROR w ( x ", " n )
 382 rotate a
 383 .IR w -bit
 384 quantity
 385 .I x
 386 left or right, respectively, by
 387 .I n
 388 places.
 389 .
 390 .SS "Byte order conversions"
 391 For each size suffix
 392 .IR w ,
 393 the macro invocation
 394 .BI ENDSWAP w ( x )
 395 returns the
 396 .IR w -bit
 397 value
 398 .IR x
 399 with its bytes reversed.  The
 400 .B ENDSWAP8
 401 macro does nothing (except truncate its operand to 8 bits), but is
 402 provided for the sake of completeness.
 403 .PP
 404 A
 405 .I big-endian
 406 representation stores the most significant octet of an integer at the
 407 lowest address, with the following octets in decreasing order of
 408 significance.  A
 409 .I little-endian
 410 representation instead stores the
 411 .I least
 412 significant octet at the lowest address, with the following octets in
 413 increasing order of significance.  An environment has a preferred order
 414 for arranging the constituent octets of an integer of some given size in
 415 memory; this might be either the big- or little-endian representation
 416 just described, or something else strange.
 417 .PP
 418 It might be possible to rearrange the bits in an integer so that, when
 419 that integer is stored to memory in the environment's preferred manner,
 420 you end up with the big- or little-endian representation of the original
 421 integer; and, similarly, it might be possible to load a big- or
 422 little-endian representation of an integer into a variable using the
 423 environment's preferred ordering and then rearrange the bits so as to
 424 recover the integer value originally represented.  If the environment is
 425 sufficiently strange, these things might not be possible, but this is
 426 actually quite rare.
 427 .PP
 428 Say that an integer has been converted to
 429 .I big-
 430 or
 431 .I "little-endian form"
 432 if, when it is stored in memory in the environment's preferred manner,
 433 one ends up with a big- or little-endian representation of the original
 434 integer.  Equivalently, if one starts with a big- or little-endian
 435 representation of some integer, and loads it into a variable using the
 436 environment's preferred manner, one ends up with the big- or
 437 little-endian form of the original integer.
 438 .PP
 439 If these things are possible, then the following macros are defined.
 440 .TP
 441 .BI HTOL w ( x )
 442 Convert a
 443 .IR w -bit
 444 integer
 445 .I x
 446 to little-endian form.
 447 .TP
 448 .BI HTOB w ( x )
 449 Convert a
 450 .IR w -bit
 451 integer
 452 .I x
 453 to big-endian form.
 454 .TP
 455 .BI LTOH w ( x )
 456 Convert a
 457 .IR w -bit
 458 integer
 459 .I x
 460 from little-endian form.
 461 .TP
 462 .BI BTOH w ( x )
 463 Convert a
 464 .IR w -bit
 465 integer
 466 .I x
 467 from big-endian form.
 468 .
 469 .SS "Load and store"
 470 The macro invocation
 471 .BI GETBYTE( p ", " o )
 472 returns the
 473 .IR o th
 474 octet following the address
 475 .IR p .
 476 Conversely,
 477 .BI PUTBYTE( p ", " o ", " v)
 478 stores
 479 .I
 480 v in the
 481 .IR o th
 482 byte following the address
 483 .IR p .
 484 These macros always operate on byte offsets regardless of the type of
 485 the pointer
 486 .IR p .
 487 .PP
 488 For each size-and-endian suffix
 489 .IR we ,
 490 the macro invocation
 491 .BI LOAD we ( p )
 492 loads and returns a value in the corresponding format at address
 493 .IR p ;
 494 similarly,
 495 .BI STORE we ( p ", " x )
 496 stores the value
 497 .I x
 498 at address
 499 .I p
 500 in the corresponding format.
 501 .
 502 .SS "64-bit support"
 503 For portability to environments without native 64-bit integers, the
 504 structure
 505 .B kludge64
 506 is defined.  If the target platform is known to have an unsigned 64-bit
 507 integer type, then this structure merely encapsulates a native integer,
 508 and a decent optimizing compiler can be expected to handle this exactly
 509 as if it were the native type.  Otherwise, it contains two 32-bit halves
 510 which are processed the hard way.
 511 .PP
 512 For each of the above macros with a suffix
 513 .BR 64 ,
 514 .BR 64_L ,
 515 or
 516 .BR 64_B ,
 517 an additional `kludge' macro is defined, whose name has an additional
 518 final underscore; e.g., the kludge macro corresponding to
 519 .B ROR64
 520 is
 521 .BR ROR64_ ;
 522 and that corresponding to
 523 .B LOAD64_L
 524 is
 525 .BR LOAD64_L_ .
 526 If the original macro would have
 527 .I returned
 528 a value of type
 529 .BR uint64 ,
 530 then the kludge macro has an additional first argument, denoted
 531 .IR d ,
 532 which should be an lvalue of type
 533 .BR kludge64 ,
 534 and the kludge macro will store its result in
 535 .IR d .
 536 The kludge macro's remaining arguments are the same as the original
 537 macro, except that where the original macro accepts an argument of type
 538 .BR uint64 ,
 539 the kludge macro accepts an argument of type
 540 .B kludge64
 541 instead.
 542 .PP
 543 Finally, a number of additional macros are provided, to make working
 544 with
 545 .B kludge64
 546 somewhat less awful.
 547 .TP
 548 .BI SET64( d ", " h ", " l )
 549 Set the high 32 bits of
 550 .I d
 551 to be
 552 .IR h ,
 553 and the low 32 bits to be
 554 .IR l .
 555 Both
 556 .I h
 557 and
 558 .I l
 559 may be arbitrary integers.
 560 .TP
 561 .BI X64( hexh ", " hexl )
 562 Expands to an initializer for an object of type
 563 .B kludge64
 564 where
 565 .I hexh
 566 and
 567 .I hexl
 568 encode the high and low 32-bit halves in hexadecimal, without any
 569 .B 0x
 570 prefix.
 571 .TP
 572 .BI ASSIGN( d ", " x )
 573 Make
 574 .I d
 575 be a copy of the
 576 .B kludge64
 577 .IR x .
 578 .TP
 579 .BI HI64( x )
 580 Return the high 32 bits of
 581 .IR x .
 582 .TP
 583 .BI LO64( x )
 584 Return the low 32 bits of
 585 .IR x .
 586 .TP
 587 .BI GET64( t ", " x )
 588 Return the value of
 589 .I x
 590 as a value of type
 591 .IR t .
 592 If
 593 .I t
 594 is an unsigned integer type, then the value will be truncated to fit as
 595 necessary; if
 596 .I t
 597 is a signed integer type, then the behaviour is undefined if the value
 598 of
 599 .I x
 600 is too large.
 601 .TP
 602 .BI AND64( d ", " x ", " y )
 603 Set
 604 .I d
 605 to be the bitwise-and of the two
 606 .B kludge64
 607 arguments
 608 .I x
 609 and
 610 .IR y .
 611 .TP
 612 .BI OR64( d ", " x ", " y )
 613 Set
 614 .I d
 615 to be the bitwise-or of the two
 616 .B kludge64
 617 arguments
 618 .I x
 619 and
 620 .IR y .
 621 .TP
 622 .BI XOR64( d ", " x ", " y )
 623 Set
 624 .I d
 625 to be the bitwise-exclusive-or of the two
 626 .B kludge64
 627 arguments
 628 .I x
 629 and
 630 .IR y .
 631 .TP
 632 .BI CPL64( d ", " x )
 633 Set
 634 .I d
 635 to be the bitwise complement of the
 636 .B kludge64
 637 argument
 638 .IR x .
 639 .TP
 640 .BI ADD64( d ", " x ", " y )
 641 Set
 642 .I d
 643 to be the sum of the two
 644 .B kludge64
 645 arguments
 646 .I x
 647 and
 648 .IR y .
 649 .TP
 650 .BI SUB64( d ", " x ", " y )
 651 Set
 652 .I d
 653 to be the difference of the two
 654 .B kludge64
 655 arguments
 656 .I x
 657 and
 658 .IR y .
 659 .TP
 660 .BI CMP64( x ", " op ", " y )
 661 Here,
 662 .I x
 663 and
 664 .I y
 665 should be arguments of type
 666 .B kludge64
 667 and
 668 .I op
 669 should be one of the relational operators
 670 .BR == ,
 671 .BR < ,
 672 .BR <= ,
 673 .BR > ,
 674 or
 675 .B >=
 676 \(en
 677 .I not
 678 .BR !=.
 679 Evaluates nonzero if
 680 .IR x \~ op \~ y .
 681 .TP
 682 .BI ZERO64( x )
 683 Evaluates nonzero if the
 684 .B kludge64
 685 argument
 686 .I x
 687 is exactly zero.
 688 .SH "SEE ALSO"
 689 .BR mLib (3).
 690 .SH AUTHOR
 691 Mark Wooding, <mdw@distorted.org.uk>
 692