mdw@git.distorted.org.uk Git - mLib/blame_incremental

... / ...

Commit	Line	Data
	1	.\" --nroff--
	2	.TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
	3	.ie t \{\
	4	. ds ss \s8\u
	5	. ds se \d\s0
	6	.\}
	7	.el \{\
	8	. ds ss ^
	9	. ds se
	10	.\}
	11	.SH NAME
	12	bits \- portable bit manipulation macros
	13	.\" octet
	14	.\" uint16
	15	.\" uint24
	16	.\" uint32
	17	.\" uint64
	18	.\" kludge64
	19	.\"
	20	.\" MASK_8
	21	.\" MASK_16
	22	.\" MASK_16_L
	23	.\" MASK_16_B
	24	.\" MASK_24
	25	.\" MASK_24_L
	26	.\" MASK_24_B
	27	.\" MASK_32
	28	.\" MASK_32_L
	29	.\" MASK_32_B
	30	.\" MASK_64
	31	.\" MASK_64_L
	32	.\" MASK_64_B
	33	.\"
	34	.\" SZ_8
	35	.\" SZ_16
	36	.\" SZ_16_L
	37	.\" SZ_16_B
	38	.\" SZ_24
	39	.\" SZ_24_L
	40	.\" SZ_24_B
	41	.\" SZ_32
	42	.\" SZ_32_L
	43	.\" SZ_32_B
	44	.\" SZ_64
	45	.\" SZ_64_L
	46	.\" SZ_64_B
	47	.\"
	48	.\" TY_8
	49	.\" TY_16
	50	.\" TY_16_L
	51	.\" TY_16_B
	52	.\" TY_24
	53	.\" TY_24_L
	54	.\" TY_24_B
	55	.\" TY_32
	56	.\" TY_32_L
	57	.\" TY_32_B
	58	.\" TY_64
	59	.\" TY_64_L
	60	.\" TY_64_B
	61	.\"
	62	.\" DOUINTSZ
	63	.\" DOUINTCONV
	64	.\"
	65	.\" @U8
	66	.\" @U16
	67	.\" @U24
	68	.\" @U32
	69	.\" @U64
	70	.\" @U64_
	71	.\"
	72	.\" @LSL8
	73	.\" @LSR8
	74	.\" @LSL16
	75	.\" @LSR16
	76	.\" @LSL24
	77	.\" @LSR24
	78	.\" @LSL32
	79	.\" @LSR32
	80	.\" @LSL64
	81	.\" @LSR64
	82	.\" @LSL64_
	83	.\" @LSR64_
	84	.\"
	85	.\" @ROL8
	86	.\" @ROR8
	87	.\" @ROL16
	88	.\" @ROR16
	89	.\" @ROL24
	90	.\" @ROR24
	91	.\" @ROL32
	92	.\" @ROL32
	93	.\" @ROL64
	94	.\" @ROR64
	95	.\" @ROL64_
	96	.\" @ROR64_
	97	.\"
	98	.\" ENDSWAP16
	99	.\" ENDSWAP32
	100	.\" ENDSWAP64
	101	.\"
	102	.\" BTOH16
	103	.\" LTOH16
	104	.\" HTOB16
	105	.\" HTOL16
	106	.\" BTOH32
	107	.\" LTOH32
	108	.\" HTOB32
	109	.\" HTOL32
	110	.\" BTOH64
	111	.\" LTOH64
	112	.\" HTOB64
	113	.\" HTOL64
	114	.\"
	115	.\" RAW8
	116	.\" RAW16
	117	.\" RAW32
	118	.\" RAW64
	119	.\"
	120	.\" @GETBYTE
	121	.\" @PUTBYTE
	122	.\"
	123	.\" @LOAD8
	124	.\" @STORE8
	125	.\"
	126	.\" @LOAD16_L
	127	.\" @LOAD16_B
	128	.\" @LOAD16
	129	.\" @STORE16_L
	130	.\" @STORE16_B
	131	.\" @STORE16
	132	.\"
	133	.\" @LOAD24_L
	134	.\" @LOAD24_B
	135	.\" @LOAD24
	136	.\" @STORE24_L
	137	.\" @STORE24_B
	138	.\" @STORE24
	139	.\"
	140	.\" @LOAD32_L
	141	.\" @LOAD32_B
	142	.\" @LOAD32
	143	.\" @STORE32_L
	144	.\" @STORE32_B
	145	.\" @STORE32
	146	.\"
	147	.\" @LOAD64_L
	148	.\" @LOAD64_B
	149	.\" @LOAD64
	150	.\" @STORE64_L
	151	.\" @STORE64_B
	152	.\" @STORE64
	153	.\"
	154	.\" @LOAD64_L_
	155	.\" @LOAD64_B_
	156	.\" @LOAD64_
	157	.\" @STORE64_L_
	158	.\" @STORE64_B_
	159	.\" @STORE64_
	160	.\"
	161	.\" @SET64
	162	.\" @X64
	163	.\" @ASSIGN64
	164	.\" @HI64
	165	.\" @LO64
	166	.\" @GET64
	167	.\" @AND64
	168	.\" @OR64
	169	.\" @XOR64
	170	.\" @CPL64
	171	.\" @ADD64
	172	.\" @SUB64
	173	.\" @CMP64
	174	.\" @ZERO64
	175	.SH SYNOPSIS
	176	.nf
	177	.B "#include <mLib/bits.h>"
	178
	179	.BR "typedef " ... " octet;"
	180	.BR "typedef " ... " uint16;"
	181	.BR "typedef " ... " uint24;"
	182	.BR "typedef " ... " uint32;"
	183	.BR "typedef " ... " uint64;"
	184	.BR "typedef " ... " kludge64;"
	185
	186	.BI "#define TY_" we " " type
	187	.BI "#define SZ_" we " \fR..."
	188	.BI "#define MASK_" we " \fR..."
	189
	190	.BI "#define DOUINTSZ(" f ") \fR..."
	191	.BI "#define DOUINTCONV(" f ") \fR..."
	192
	193	.IB type " U" w ( v );
	194
	195	.IB type " LSL" w ( type " " v ", int " s );
	196	.IB type " LSR" w ( type " " v ", int " s );
	197	.IB type " ROL" w ( type " " v ", int " s );
	198	.IB type " ROR" w ( type " " v ", int " s );
	199
	200	.BI "octet GETBYTE(void *" p ", size_t " o );
	201	.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
	202
	203	.IB type " LOAD" we "(void *" p );
	204	.BI "void STORE" we "(void *" p ", " type " " v );
	205
	206	.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
	207	.BI "kludge64 X64(" hexh ", " hexl );
	208	.BI "void ASSIGN64(kludge64 &" d ", " x );
	209	.BI "uint32 HI64(kludge64" x );
	210	.BI "uint32 LO64(kludge64" x );
	211	.IB ty " GET64(" ty ", kludge64 " x );
	212	.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	213	.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	214	.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	215	.BI "void CPL64(kludge64 &" d ", kludge64 " x );
	216	.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	217	.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
	218	.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
	219	.BI "int ZERO64(kludge64 " x );
	220	.fi
	221	.SH DESCRIPTION
	222	The header file
	223	.B <mLib/bits.h>
	224	contains a number of useful definitions for portably dealing with bit-
	225	and byte-level manipulation of larger quantities. The various macros
	226	and types are named fairly systematically.
	227	.PP
	228	The header provides utilities for working with 64-bit quantities, but a
	229	64-bit integer type is not guaranteed to exist under C89 rules. This
	230	header takes two approaches. Firstly, if a 64-bit type is found, the
	231	header defines the macro
	232	.B HAVE_UINT64
	233	and defines the various
	234	.RB ... 64
	235	macros as described below. Secondly, it unconditionally defines a type
	236	.B kludge64
	237	and a family of macros for working with them. See below for details.
	238	.
	239	.SS "Type definitions"
	240	A number of types are defined.
	241	.TP
	242	.B octet
	243	Equivalent to
	244	.BR "unsigned char" .
	245	This is intended to be used when a character array is used to represent
	246	the octets of some external data format. Note that on some
	247	architectures the
	248	.B "unsigned char"
	249	type may occupy more than 8 bits.
	250	.TP
	251	.B uint16
	252	Equivalent to
	253	.BR "unsigned short" .
	254	Intended to be used when a 16-bit value is required. This type is
	255	always capable of representing any 16-bit unsigned value, but the actual
	256	type may be wider than 16 bits and will require masking.
	257	.TP
	258	.B uint24
	259	Equivalent to some (architecture-dependent) standard type. Capable of
	260	representing any unsigned 24-bit value, although the the actual type may
	261	be wider than 24 bits.
	262	.TP
	263	.B uint32
	264	Equivalent to some (architecture-dependent) standard type. Capable of
	265	representing any unsigned 32-bit value, although the the actual type may
	266	be wider than 32 bits.
	267	pp.TP
	268	.B uint64
	269	Equivalent to some (architecture-dependent) standard type, if it exists.
	270	Capable of representing any unsigned 64-bit value, although the the
	271	actual type may be wider than 64 bits.
	272	.
	273	.SS "Size/endianness suffixes"
	274	Let
	275	.I w
	276	be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
	277	Furthermore, let
	278	.I we
	279	be one of the size-and-endian suffixes
	280	.IR w ,
	281	or, where
	282	.IR w \~>\~8,
	283	.IB w _L
	284	or
	285	.IB w _B \fR,
	286	where
	287	.RB ` _L '
	288	denotes little-endian (Intel, VAX) representation, and
	289	.RB ` _B '
	290	denotes big-endian (IBM, network) representation; omitting an explicit
	291	suffix gives big-endian order by default, since this is most common in
	292	portable data formats.
	293	.PP
	294	The macro invocation
	295	.BI DOUINTSZ( f )
	296	invokes a given macro
	297	.I f
	298	repeatedly, as
	299	.IB f ( w )
	300	for each size suffix
	301	.I w
	302	listed above.
	303	.PP
	304	The macro invocation
	305	.BI DOUINTCONV( f )
	306	invokes a given macro
	307	.I f
	308	repeatedly, as
	309	.IR f ( w ", " we ", " suff )
	310	where
	311	.I we
	312	ranges over size-and-endian suffixes as described above,
	313	.I w
	314	is just the corresponding bit width, as an integer, and
	315	.I suff
	316	is a suffix
	317	.IR w ,
	318	.IB w l\fR,
	319	or
	320	.IB w b\fR,
	321	suitable for a C function name.
	322	.PP
	323	These macros are intended to be used to define families of related
	324	functions.
	325	.
	326	.SS "Utility macros"
	327	For each size-and-endian suffix
	328	.IR we ,
	329	the following macros are defined.
	330	.TP
	331	.BI TY_ we
	332	A synonym for the appropriate one of the types
	333	.BR octet ,
	334	.BR uint32 ,
	335	etc.\& listed above.
	336	.TP
	337	.BI SZ_ we
	338	The number of octets needed to represent a value of the corresponding
	339	type; i.e., this is
	340	.IR w /8.
	341	.TP
	342	.BI MASK_ we
	343	The largest integer representable in the corresponding type; i.e., this
	344	is
	345	.RI 2\(ss w \(se\~\-\~1.
	346	.PP
	347	(Note that the endianness suffix is irrelevant in the above
	348	definitions.)
	349	.PP
	350	For each size suffix
	351	.IR w ,
	352	the macro invocation
	353	.BI U w ( x )
	354	coerces an integer
	355	.I x
	356	to the appropriate type; specifically, it returns the smallest
	357	nonnegative integer congruent to
	358	.I x
	359	(modulo
	360	.RI 2\(ss w \(se).
	361	.
	362	.SS "Shift and rotate"
	363	For each size suffix
	364	.IR w ,
	365	the macro invocations
	366	.BI LSL w ( x ", " n )
	367	and
	368	.BI LSR w ( x ", " n )
	369	shift a
	370	.IR w -bit
	371	quantity
	372	.I x
	373	left or right, respectively, by
	374	.I n
	375	places; if
	376	.IR n \~\(>=\~ w
	377	then
	378	.I n
	379	is reduced modulo
	380	.IR w .
	381	(This behaviour is unfortunate, but (a) it's what a number of CPUs
	382	provide natively, and (b) it's a cheap way to prevent undefined
	383	behaviour.) Similarly,
	384	.BI ROL w ( x ", " n )
	385	and
	386	.BI ROR w ( x ", " n )
	387	rotate a
	388	.IR w -bit
	389	quantity
	390	.I x
	391	left or right, respectively, by
	392	.I n
	393	places.
	394	.
	395	.SS "Byte order conversions"
	396	For each size suffix
	397	.IR w ,
	398	the macro invocation
	399	.BI ENDSWAP w ( x )
	400	returns the
	401	.IR w -bit
	402	value
	403	.IR x
	404	with its bytes reversed. The
	405	.B ENDSWAP8
	406	macro does nothing (except truncate its operand to 8 bits), but is
	407	provided for the sake of completeness.
	408	.PP
	409	A
	410	.I big-endian
	411	representation stores the most significant octet of an integer at the
	412	lowest address, with the following octets in decreasing order of
	413	significance. A
	414	.I little-endian
	415	representation instead stores the
	416	.I least
	417	significant octet at the lowest address, with the following octets in
	418	increasing order of significance. An environment has a preferred order
	419	for arranging the constituent octets of an integer of some given size in
	420	memory; this might be either the big- or little-endian representation
	421	just described, or something else strange.
	422	.PP
	423	It might be possible to rearrange the bits in an integer so that, when
	424	that integer is stored to memory in the environment's preferred manner,
	425	you end up with the big- or little-endian representation of the original
	426	integer; and, similarly, it might be possible to load a big- or
	427	little-endian representation of an integer into a variable using the
	428	environment's preferred ordering and then rearrange the bits so as to
	429	recover the integer value originally represented. If the environment is
	430	sufficiently strange, these things might not be possible, but this is
	431	actually quite rare.
	432	.PP
	433	Say that an integer has been converted to
	434	.I big-
	435	or
	436	.I "little-endian form"
	437	if, when it is stored in memory in the environment's preferred manner,
	438	one ends up with a big- or little-endian representation of the original
	439	integer. Equivalently, if one starts with a big- or little-endian
	440	representation of some integer, and loads it into a variable using the
	441	environment's preferred manner, one ends up with the big- or
	442	little-endian form of the original integer.
	443	.PP
	444	If these things are possible, then the following macros are defined.
	445	.TP
	446	.BI HTOL w ( x )
	447	Convert a
	448	.IR w -bit
	449	integer
	450	.I x
	451	to little-endian form.
	452	.TP
	453	.BI HTOB w ( x )
	454	Convert a
	455	.IR w -bit
	456	integer
	457	.I x
	458	to big-endian form.
	459	.TP
	460	.BI LTOH w ( x )
	461	Convert a
	462	.IR w -bit
	463	integer
	464	.I x
	465	from little-endian form.
	466	.TP
	467	.BI BTOH w ( x )
	468	Convert a
	469	.IR w -bit
	470	integer
	471	.I x
	472	from big-endian form.
	473	.
	474	.SS "Load and store"
	475	The macro invocation
	476	.BI GETBYTE( p ", " o )
	477	returns the
	478	.IR o th
	479	octet following the address
	480	.IR p .
	481	Conversely,
	482	.BI PUTBYTE( p ", " o ", " v)
	483	stores
	484	.I
	485	v in the
	486	.IR o th
	487	byte following the address
	488	.IR p .
	489	These macros always operate on byte offsets regardless of the type of
	490	the pointer
	491	.IR p .
	492	.PP
	493	For each size suffix
	494	.IR w ,
	495	there may be a macro such that the invocation
	496	.BI RAW w ( p )
	497	is an lvalue designating the
	498	.IR w /8
	499	octets starting at address
	500	.IR p ,
	501	interpreted according to the environment's preferred representation,
	502	except that
	503	.I p
	504	need not be aligned in any particular fashion. There are many reasons
	505	why this might not be possible; programmers are not normally expected to
	506	use these macros directly, and they are documented in case they are
	507	useful for special effects.
	508	.PP
	509	For each size-and-endian suffix
	510	.IR we ,
	511	the macro invocation
	512	.BI LOAD we ( p )
	513	loads and returns a value in the corresponding format at address
	514	.IR p ;
	515	similarly,
	516	.BI STORE we ( p ", " x )
	517	stores the value
	518	.I x
	519	at address
	520	.I p
	521	in the corresponding format.
	522	.
	523	.SS "64-bit support"
	524	For portability to environments without native 64-bit integers, the
	525	structure
	526	.B kludge64
	527	is defined. If the target platform is known to have an unsigned 64-bit
	528	integer type, then this structure merely encapsulates a native integer,
	529	and a decent optimizing compiler can be expected to handle this exactly
	530	as if it were the native type. Otherwise, it contains two 32-bit halves
	531	which are processed the hard way.
	532	.PP
	533	For each of the above macros with a suffix
	534	.BR 64 ,
	535	.BR 64_L ,
	536	or
	537	.BR 64_B ,
	538	an additional `kludge' macro is defined, whose name has an additional
	539	final underscore; e.g., the kludge macro corresponding to
	540	.B ROR64
	541	is
	542	.BR ROR64_ ;
	543	and that corresponding to
	544	.B LOAD64_L
	545	is
	546	.BR LOAD64_L_ .
	547	If the original macro would have
	548	.I returned
	549	a value of type
	550	.BR uint64 ,
	551	then the kludge macro has an additional first argument, denoted
	552	.IR d ,
	553	which should be an lvalue of type
	554	.BR kludge64 ,
	555	and the kludge macro will store its result in
	556	.IR d .
	557	The kludge macro's remaining arguments are the same as the original
	558	macro, except that where the original macro accepts an argument of type
	559	.BR uint64 ,
	560	the kludge macro accepts an argument of type
	561	.B kludge64
	562	instead.
	563	.PP
	564	Finally, a number of additional macros are provided, to make working
	565	with
	566	.B kludge64
	567	somewhat less awful.
	568	.TP
	569	.BI SET64( d ", " h ", " l )
	570	Set the high 32 bits of
	571	.I d
	572	to be
	573	.IR h ,
	574	and the low 32 bits to be
	575	.IR l .
	576	Both
	577	.I h
	578	and
	579	.I l
	580	may be arbitrary integers.
	581	.TP
	582	.BI X64( hexh ", " hexl )
	583	Expands to an initializer for an object of type
	584	.B kludge64
	585	where
	586	.I hexh
	587	and
	588	.I hexl
	589	encode the high and low 32-bit halves in hexadecimal, without any
	590	.B 0x
	591	prefix.
	592	.TP
	593	.BI ASSIGN( d ", " x )
	594	Make
	595	.I d
	596	be a copy of the
	597	.B kludge64
	598	.IR x .
	599	.TP
	600	.BI HI64( x )
	601	Return the high 32 bits of
	602	.IR x .
	603	.TP
	604	.BI LO64( x )
	605	Return the low 32 bits of
	606	.IR x .
	607	.TP
	608	.BI GET64( t ", " x )
	609	Return the value of
	610	.I x
	611	as a value of type
	612	.IR t .
	613	If
	614	.I t
	615	is an unsigned integer type, then the value will be truncated to fit as
	616	necessary; if
	617	.I t
	618	is a signed integer type, then the behaviour is undefined if the value
	619	of
	620	.I x
	621	is too large.
	622	.TP
	623	.BI AND64( d ", " x ", " y )
	624	Set
	625	.I d
	626	to be the bitwise-and of the two
	627	.B kludge64
	628	arguments
	629	.I x
	630	and
	631	.IR y .
	632	.TP
	633	.BI OR64( d ", " x ", " y )
	634	Set
	635	.I d
	636	to be the bitwise-or of the two
	637	.B kludge64
	638	arguments
	639	.I x
	640	and
	641	.IR y .
	642	.TP
	643	.BI XOR64( d ", " x ", " y )
	644	Set
	645	.I d
	646	to be the bitwise-exclusive-or of the two
	647	.B kludge64
	648	arguments
	649	.I x
	650	and
	651	.IR y .
	652	.TP
	653	.BI CPL64( d ", " x )
	654	Set
	655	.I d
	656	to be the bitwise complement of the
	657	.B kludge64
	658	argument
	659	.IR x .
	660	.TP
	661	.BI ADD64( d ", " x ", " y )
	662	Set
	663	.I d
	664	to be the sum of the two
	665	.B kludge64
	666	arguments
	667	.I x
	668	and
	669	.IR y .
	670	.TP
	671	.BI SUB64( d ", " x ", " y )
	672	Set
	673	.I d
	674	to be the difference of the two
	675	.B kludge64
	676	arguments
	677	.I x
	678	and
	679	.IR y .
	680	.TP
	681	.BI CMP64( x ", " op ", " y )
	682	Here,
	683	.I x
	684	and
	685	.I y
	686	should be arguments of type
	687	.B kludge64
	688	and
	689	.I op
	690	should be one of the relational operators
	691	.BR == ,
	692	.BR < ,
	693	.BR <= ,
	694	.BR > ,
	695	or
	696	.B >=
	697	\(en
	698	.I not
	699	.BR !=.
	700	Evaluates nonzero if
	701	.IR x \~ op \~ y .
	702	.TP
	703	.BI ZERO64( x )
	704	Evaluates nonzero if the
	705	.B kludge64
	706	argument
	707	.I x
	708	is exactly zero.
	709	.SH "SEE ALSO"
	710	.BR mLib (3).
	711	.SH AUTHOR
	712	Mark Wooding, <mdw@distorted.org.uk>
	713