utils/bits.3: Basically rewrite it, filling in the missing things.
[mLib] / utils / bits.3
1 .\" -*-nroff-*-
2 .TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
3 .ie t \{\
4 . ds ss \s8\u
5 . ds se \d\s0
6 .\}
7 .el \{\
8 . ds ss ^
9 . ds se
10 .\}
11 .SH NAME
12 bits \- portable bit manipulation macros
13 .\" octet
14 .\" uint16
15 .\" uint24
16 .\" uint32
17 .\" uint64
18 .\" kludge64
19 .\"
20 .\" MASK_8
21 .\" MASK_16
22 .\" MASK_16_L
23 .\" MASK_16_B
24 .\" MASK_24
25 .\" MASK_24_L
26 .\" MASK_24_B
27 .\" MASK_32
28 .\" MASK_32_L
29 .\" MASK_32_B
30 .\" MASK_64
31 .\" MASK_64_L
32 .\" MASK_64_B
33 .\"
34 .\" SZ_8
35 .\" SZ_16
36 .\" SZ_16_L
37 .\" SZ_16_B
38 .\" SZ_24
39 .\" SZ_24_L
40 .\" SZ_24_B
41 .\" SZ_32
42 .\" SZ_32_L
43 .\" SZ_32_B
44 .\" SZ_64
45 .\" SZ_64_L
46 .\" SZ_64_B
47 .\"
48 .\" TY_8
49 .\" TY_16
50 .\" TY_16_L
51 .\" TY_16_B
52 .\" TY_24
53 .\" TY_24_L
54 .\" TY_24_B
55 .\" TY_32
56 .\" TY_32_L
57 .\" TY_32_B
58 .\" TY_64
59 .\" TY_64_L
60 .\" TY_64_B
61 .\"
62 .\" DOUINTSZ
63 .\" DOUINTCONV
64 .\"
65 .\" @U8
66 .\" @U16
67 .\" @U24
68 .\" @U32
69 .\" @U64
70 .\" @U64_
71 .\"
72 .\" @LSL8
73 .\" @LSR8
74 .\" @LSL16
75 .\" @LSR16
76 .\" @LSL24
77 .\" @LSR24
78 .\" @LSL32
79 .\" @LSR32
80 .\" @LSL64
81 .\" @LSR64
82 .\" @LSL64_
83 .\" @LSR64_
84 .\"
85 .\" @ROL8
86 .\" @ROR8
87 .\" @ROL16
88 .\" @ROR16
89 .\" @ROL24
90 .\" @ROR24
91 .\" @ROL32
92 .\" @ROL32
93 .\" @ROL64
94 .\" @ROR64
95 .\" @ROL64_
96 .\" @ROR64_
97 .\"
98 .\" @GETBYTE
99 .\" @PUTBYTE
100 .\"
101 .\" @LOAD8
102 .\" @STORE8
103 .\"
104 .\" @LOAD16_L
105 .\" @LOAD16_B
106 .\" @LOAD16
107 .\" @STORE16_L
108 .\" @STORE16_B
109 .\" @STORE16
110 .\"
111 .\" @LOAD24_L
112 .\" @LOAD24_B
113 .\" @LOAD24
114 .\" @STORE24_L
115 .\" @STORE24_B
116 .\" @STORE24
117 .\"
118 .\" @LOAD32_L
119 .\" @LOAD32_B
120 .\" @LOAD32
121 .\" @STORE32_L
122 .\" @STORE32_B
123 .\" @STORE32
124 .\"
125 .\" @LOAD64_L
126 .\" @LOAD64_B
127 .\" @LOAD64
128 .\" @STORE64_L
129 .\" @STORE64_B
130 .\" @STORE64
131 .\"
132 .\" @LOAD64_L_
133 .\" @LOAD64_B_
134 .\" @LOAD64_
135 .\" @STORE64_L_
136 .\" @STORE64_B_
137 .\" @STORE64_
138 .\"
139 .\" @SET64
140 .\" @X64
141 .\" @ASSIGN64
142 .\" @HI64
143 .\" @LO64
144 .\" @GET64
145 .\" @AND64
146 .\" @OR64
147 .\" @XOR64
148 .\" @CPL64
149 .\" @ADD64
150 .\" @SUB64
151 .\" @CMP64
152 .\" @ZERO64
153 .SH SYNOPSIS
154 .nf
155 .B "#include <mLib/bits.h>"
156
157 .BR "typedef " ... " octet;"
158 .BR "typedef " ... " uint16;"
159 .BR "typedef " ... " uint24;"
160 .BR "typedef " ... " uint32;"
161 .BR "typedef " ... " uint64;"
162 .BR "typedef " ... " kludge64;"
163
164 .BI "#define TY_" we " " type
165 .BI "#define SZ_" we " \fR..."
166 .BI "#define MASK_" we " \fR..."
167
168 .BI "#define DOUINTSZ(" f ") \fR..."
169 .BI "#define DOUINTCONV(" f ") \fR..."
170
171 .IB type " U" w ( v );
172
173 .IB type " LSL" w ( type " " v ", int " s );
174 .IB type " LSR" w ( type " " v ", int " s );
175 .IB type " ROL" w ( type " " v ", int " s );
176 .IB type " ROR" w ( type " " v ", int " s );
177
178 .BI "octet GETBYTE(void *" p ", size_t " o );
179 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
180
181 .IB type " LOAD" we "(void *" p );
182 .BI "void STORE" we "(void *" p ", " type " " v );
183
184 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
185 .BI "kludge64 X64(" hexh ", " hexl );
186 .BI "void ASSIGN64(kludge64 &" d ", " x );
187 .BI "uint32 HI64(kludge64" x );
188 .BI "uint32 LO64(kludge64" x );
189 .IB ty " GET64(" ty ", kludge64 " x );
190 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
191 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
192 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
193 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
194 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
195 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
196 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
197 .BI "int ZERO64(kludge64 " x );
198 .fi
199 .SH DESCRIPTION
200 The header file
201 .B <mLib/bits.h>
202 contains a number of useful definitions for portably dealing with bit-
203 and byte-level manipulation of larger quantities. The various macros
204 and types are named fairly systematically.
205 .PP
206 The header provides utilities for working with 64-bit quantities, but a
207 64-bit integer type is not guaranteed to exist under C89 rules. This
208 header takes two approaches. Firstly, if a 64-bit type is found, the
209 header defines the macro
210 .B HAVE_UINT64
211 and defines the various
212 .RB ... 64
213 macros as described below. Secondly, it unconditionally defines a type
214 .B kludge64
215 and a family of macros for working with them. See below for details.
216 .
217 .SS "Type definitions"
218 A number of types are defined.
219 .TP
220 .B octet
221 Equivalent to
222 .BR "unsigned char" .
223 This is intended to be used when a character array is used to represent
224 the octets of some external data format. Note that on some
225 architectures the
226 .B "unsigned char"
227 type may occupy more than 8 bits.
228 .TP
229 .B uint16
230 Equivalent to
231 .BR "unsigned short" .
232 Intended to be used when a 16-bit value is required. This type is
233 always capable of representing any 16-bit unsigned value, but the actual
234 type may be wider than 16 bits and will require masking.
235 .TP
236 .B uint24
237 Equivalent to some (architecture-dependent) standard type. Capable of
238 representing any unsigned 24-bit value, although the the actual type may
239 be wider than 24 bits.
240 .TP
241 .B uint32
242 Equivalent to some (architecture-dependent) standard type. Capable of
243 representing any unsigned 32-bit value, although the the actual type may
244 be wider than 32 bits.
245 pp.TP
246 .B uint64
247 Equivalent to some (architecture-dependent) standard type, if it exists.
248 Capable of representing any unsigned 64-bit value, although the the
249 actual type may be wider than 64 bits.
250 .
251 .SS "Size/endianness suffixes"
252 Let
253 .I w
254 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
255 Furthermore, let
256 .I we
257 be one of the size-and-endian suffixes
258 .IR w ,
259 or, where
260 .IR w \~>\~8,
261 .IB w _L
262 or
263 .IB w _B \fR,
264 where
265 .RB ` _L '
266 denotes little-endian (Intel, VAX) representation, and
267 .RB ` _B '
268 denotes big-endian (IBM, network) representation; omitting an explicit
269 suffix gives big-endian order by default, since this is most common in
270 portable data formats.
271 .PP
272 The macro invocation
273 .BI DOUINTSZ( f )
274 invokes a given macro
275 .I f
276 repeatedly, as
277 .IB f ( w )
278 for each size suffix
279 .I w
280 listed above.
281 .PP
282 The macro invocation
283 .BI DOUINTCONV( f )
284 invokes a given macro
285 .I f
286 repeatedly, as
287 .IR f ( w ", " we ", " suff )
288 where
289 .I we
290 ranges over size-and-endian suffixes as described above,
291 .I w
292 is just the corresponding bit width, as an integer, and
293 .I suff
294 is a suffix
295 .IR w ,
296 .IB w l\fR,
297 or
298 .IB w b\fR,
299 suitable for a C function name.
300 .PP
301 These macros are intended to be used to define families of related
302 functions.
303 .
304 .SS "Utility macros"
305 For each size-and-endian suffix
306 .IR we ,
307 the following macros are defined.
308 .TP
309 .BI TY_ we
310 A synonym for the appropriate one of the types
311 .BR octet ,
312 .BR uint32 ,
313 etc.\& listed above.
314 .TP
315 .BI SZ_ we
316 The number of octets needed to represent a value of the corresponding
317 type; i.e., this is
318 .IR w /8.
319 .TP
320 .BI MASK_ we
321 The largest integer representable in the corresponding type; i.e., this
322 is
323 .RI 2\*(ss w \*(se\~\-\~1.
324 .PP
325 (Note that the endianness suffix is irrelevant in the above
326 definitions.)
327 .PP
328 For each size suffix
329 .IR w ,
330 the macro invocation
331 .BI U w ( x )
332 coerces an integer
333 .I x
334 to the appropriate type; specifically, it returns the smallest
335 nonnegative integer congruent to
336 .I x
337 (modulo
338 .RI 2\*(ss w \*(se).
339 .
340 .SS "Shift and rotate"
341 For each size suffix
342 .IR w ,
343 the macro invocations
344 .BI LSL w ( x ", " n )
345 and
346 .BI LSR w ( x ", " n )
347 shift a
348 .IR w -bit
349 quantity
350 .I x
351 left or right, respectively, by
352 .I n
353 places; if
354 .IR n \~\(>=\~ w
355 then
356 .I n
357 is reduced modulo
358 .IR w .
359 (This behaviour is unfortunate, but (a) it's what a number of CPUs
360 provide natively, and (b) it's a cheap way to prevent undefined
361 behaviour.) Similarly,
362 .BI ROL w ( x ", " n )
363 and
364 .BI ROR w ( x ", " n )
365 rotate a
366 .IR w -bit
367 quantity
368 .I x
369 left or right, respectively, by
370 .I n
371 places.
372 .
373 .SS "Load and store"
374 The macro invocation
375 .BI GETBYTE( p ", " o )
376 returns the
377 .IR o th
378 octet following the address
379 .IR p .
380 Conversely,
381 .BI PUTBYTE( p ", " o ", " v)
382 stores
383 .I
384 v in the
385 .IR o th
386 byte following the address
387 .IR p .
388 These macros always operate on byte offsets regardless of the type of
389 the pointer
390 .IR p .
391 .PP
392 For each size-and-endian suffix
393 .IR we ,
394 the macro invocation
395 .BI LOAD we ( p )
396 loads and returns a value in the corresponding format at address
397 .IR p ;
398 similarly,
399 .BI STORE we ( p ", " x )
400 stores the value
401 .I x
402 at address
403 .I p
404 in the corresponding format.
405 .
406 .SS "64-bit support"
407 For portability to environments without native 64-bit integers, the
408 structure
409 .B kludge64
410 is defined. If the target platform is known to have an unsigned 64-bit
411 integer type, then this structure merely encapsulates a native integer,
412 and a decent optimizing compiler can be expected to handle this exactly
413 as if it were the native type. Otherwise, it contains two 32-bit halves
414 which are processed the hard way.
415 .PP
416 For each of the above macros with a suffix
417 .BR 64 ,
418 .BR 64_L ,
419 or
420 .BR 64_B ,
421 an additional `kludge' macro is defined, whose name has an additional
422 final underscore; e.g., the kludge macro corresponding to
423 .B ROR64
424 is
425 .BR ROR64_ ;
426 and that corresponding to
427 .B LOAD64_L
428 is
429 .BR LOAD64_L_ .
430 If the original macro would have
431 .I returned
432 a value of type
433 .BR uint64 ,
434 then the kludge macro has an additional first argument, denoted
435 .IR d ,
436 which should be an lvalue of type
437 .BR kludge64 ,
438 and the kludge macro will store its result in
439 .IR d .
440 The kludge macro's remaining arguments are the same as the original
441 macro, except that where the original macro accepts an argument of type
442 .BR uint64 ,
443 the kludge macro accepts an argument of type
444 .B kludge64
445 instead.
446 .PP
447 Finally, a number of additional macros are provided, to make working
448 with
449 .B kludge64
450 somewhat less awful.
451 .TP
452 .BI SET64( d ", " h ", " l )
453 Set the high 32 bits of
454 .I d
455 to be
456 .IR h ,
457 and the low 32 bits to be
458 .IR l .
459 Both
460 .I h
461 and
462 .I l
463 may be arbitrary integers.
464 .TP
465 .BI X64( hexh ", " hexl )
466 Expands to an initializer for an object of type
467 .B kludge64
468 where
469 .I hexh
470 and
471 .I hexl
472 encode the high and low 32-bit halves in hexadecimal, without any
473 .B 0x
474 prefix.
475 .TP
476 .BI ASSIGN( d ", " x )
477 Make
478 .I d
479 be a copy of the
480 .B kludge64
481 .IR x .
482 .TP
483 .BI HI64( x )
484 Return the high 32 bits of
485 .IR x .
486 .TP
487 .BI LO64( x )
488 Return the low 32 bits of
489 .IR x .
490 .TP
491 .BI GET64( t ", " x )
492 Return the value of
493 .I x
494 as a value of type
495 .IR t .
496 If
497 .I t
498 is an unsigned integer type, then the value will be truncated to fit as
499 necessary; if
500 .I t
501 is a signed integer type, then the behaviour is undefined if the value
502 of
503 .I x
504 is too large.
505 .TP
506 .BI AND64( d ", " x ", " y )
507 Set
508 .I d
509 to be the bitwise-and of the two
510 .B kludge64
511 arguments
512 .I x
513 and
514 .IR y .
515 .TP
516 .BI OR64( d ", " x ", " y )
517 Set
518 .I d
519 to be the bitwise-or of the two
520 .B kludge64
521 arguments
522 .I x
523 and
524 .IR y .
525 .TP
526 .BI XOR64( d ", " x ", " y )
527 Set
528 .I d
529 to be the bitwise-exclusive-or of the two
530 .B kludge64
531 arguments
532 .I x
533 and
534 .IR y .
535 .TP
536 .BI CPL64( d ", " x )
537 Set
538 .I d
539 to be the bitwise complement of the
540 .B kludge64
541 argument
542 .IR x .
543 .TP
544 .BI ADD64( d ", " x ", " y )
545 Set
546 .I d
547 to be the sum of the two
548 .B kludge64
549 arguments
550 .I x
551 and
552 .IR y .
553 .TP
554 .BI SUB64( d ", " x ", " y )
555 Set
556 .I d
557 to be the difference of the two
558 .B kludge64
559 arguments
560 .I x
561 and
562 .IR y .
563 .TP
564 .BI CMP64( x ", " op ", " y )
565 Here,
566 .I x
567 and
568 .I y
569 should be arguments of type
570 .B kludge64
571 and
572 .I op
573 should be one of the relational operators
574 .BR == ,
575 .BR < ,
576 .BR <= ,
577 .BR > ,
578 or
579 .B >=
580 \(en
581 .I not
582 .BR !=.
583 Evaluates nonzero if
584 .IR x \~ op \~ y .
585 .TP
586 .BI ZERO64( x )
587 Evaluates nonzero if the
588 .B kludge64
589 argument
590 .I x
591 is exactly zero.
592 .SH "SEE ALSO"
593 .BR mLib (3).
594 .SH AUTHOR
595 Mark Wooding, <mdw@distorted.org.uk>
596