mdw@git.distorted.org.uk Git - mLib/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* Encoding and decoding floating-point values
	4	*
	5	* (c) 2023 Straylight/Edgeware
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of the mLib utilities library.
	11	*
	12	* mLib is free software: you can redistribute it and/or modify it under
	13	* the terms of the GNU Library General Public License as published by
	14	* the Free Software Foundation; either version 2 of the License, or (at
	15	* your option) any later version.
	16	*
	17	* mLib is distributed in the hope that it will be useful, but WITHOUT
	18	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
	20	* License for more details.
	21	*
	22	* You should have received a copy of the GNU Library General Public
	23	* License along with mLib. If not, write to the Free Software
	24	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	25	* USA.
	26	*/
	27
	28	/----- Header files ------------------------------------------------------/
	29
	30	#include <float.h>
	31	#include <math.h>
	32
	33	#include "bits.h"
	34	#include "buf.h"
	35	#include "maths.h"
	36
	37	/----- Formatting primitives ---------------------------------------------/
	38
	39	/* We use the IEEE 754 `binary64' format. Briefly:
	40	*
	41	* * The top bit is the sign %$s$%: 0 encodes %$s = +1$%, and 1 encodes
	42	* %$s = -1$%.. The format is signed-magnitude, so everything else is
	43	* the same for positive and negative numbers.
	44	*
	45	* * The next eleven bits are the biased exponent %$e$%.
	46	*
	47	* * The remaining 52 bits are the significand %$m$%.
	48	*
	49	* If %$0 < e < 2047$% then the encoding represents the normal number
	50	* %$s \cdot (1 + m/2^{52}) \cdot 2^{e-1023}$%.
	51	*
	52	* If %$e = 0$% and %$m = 0$% then the encoding represents positive or
	53	* negative zero.
	54	*
	55	* If %$e = 0$% and %$m \ne 0$% then the encoding represents a subnormal
	56	* number %$s \cdot m/2^{52} \cdot 2^{-1022}$%.
	57	*
	58	* If %$e = 2047$% and %$m = 0$% then the encoding represents positive or
	59	* negative infinity.
	60	*
	61	* If %$e = 2047$% and %$m \ne 0$% then the encoding represents a NaN. If
	62	* the most significant bit of %$m$% is set then this is a quiet NaN;
	63	* otherwise it's a signalling NaN.
	64	*/
	65
	66	/* --- @f64_to_k64@ --- *
	67	*
	68	* Arguments: @double x@ = a floating-point number
	69	*
	70	* Returns: A 64-bit encoding of @x@.
	71	*
	72	* Use: Encodes @x@ as a `binary64' value. See `buf_putf64' for the
	73	* caveats.
	74	*/
	75
	76	static kludge64 f64_to_k64(double x)
	77	{
	78	kludge64 k;
	79	uint32 lo, hi, t;
	80	int e; double m;
	81
	82	/* Some machinery before we start. */
	83
	84	if (NANP(x)) {
	85	/* A NaN. */
	86	hi = 0x7ff80000; lo = 0;
	87	} else if (INFP(x)) {
	88	/* Positive or negative infinity. */
	89	hi = NEGP(x) ? 0xfff00000 : 0x7ff00000; lo = 0;
	90	} else if (x == 0) {
	91	/* Positive or negative zero. */
	92	hi = NEGP(x) ? 0x80000000 : 0; lo = 0;
	93	} else {
	94	/* A normal or subnormal number. Now we have to do some actual work. */
	95
	96	/* Let's get the sign dealt with so we don't have to worry about it any
	97	* more.
	98	*/
	99	if (!NEGP(x)) hi = 0;
	100	else { x = -x; hi = 0x80000000; }
	101
	102	/* Now we start on the value. The first thing to do is to split off the
	103	* exponent. Our number will be %$m \cdot 2^e$%, with %$1/2 \le m < 1$%.
	104	*/
	105	m = frexp(x, &e);
	106
	107	/* If our number is too big, we'll round it to infinity. This will
	108	* happen if %$x \ge 2^{1024}$%, i.e., if %$e > 1024$%.
	109	*/
	110	if (e > 1024)
	111	{ hi \|= 0x7ff00000; lo = 0; }
	112	else {
	113	/* Our number is sufficiently small that we can represent it at least
	114	* approximately (though maybe we'll have to flush it to zero). The
	115	* next step, then, is to pull the significand bits out.
	116	*/
	117
	118	/* Determine the correct exponent to store. We're not going to bias it
	119	* yet, but this is where we deal with subnormal numbers. Our number
	120	* is normal if %$x \ge 2^{-1022}$%, i.e., %$e > -1022$%. In this
	121	* case, there's an implicit bit which we'll clear. Otherwise, if it's
	122	* subnormal, we'll scale our floating-point number so that the
	123	* significand will look right when we extract it, and adjust the
	124	* exponent so that, when we're finally done, it will have the correct
	125	* sentinel value.
	126	*/
	127	if (e > -1022) m -= 0.5;
	128	else { m = ldexp(m, 1021 + e); e = -1022; }
	129
	130	/* Now we pull out the 53 bits of the significand. This will, in
	131	* general, leave a tail which we address through rounding. Scale it
	132	* up so that we end up with %$0 \le m' < 2$%; then we round up if
	133	* %$m > 1$%, or if %$m = 1$% and the low bit of the significand is
	134	* set.
	135	*/
	136	t = ldexp(m, 21); m -= ldexp(t, -21);
	137	lo = ldexp(m, 53); m -= ldexp(lo, -53);
	138	m = ldexp(m, 54);
	139
	140	/* Round the number if necessary. */
	141	if (lo&1 ? m >= 1.0 : m > 1)
	142	{ lo = U32(lo + 1); if (!lo) t++; }
	143
	144	/* Now we just put the pieces together. Note that our %$e$% is one
	145	* greater than it should be, because our implicit bit should have
	146	* been the unit bit not the 1/2 bit.
	147	*/
	148	hi \|= ((uint32)(e + 1022) << 20) \| t;
	149	}
	150	}
	151
	152	/* Convert to external format and go home. */
	153	SET64(k, hi, lo); return (k);
	154	}
	155
	156	/* --- @k64_to_f64@ --- *
	157	*
	158	* Arguments: @double *x_out@ = where to put the result
	159	* @kludge64 k@ = a 64-bit encoding of a floating-point value
	160	*
	161	* Returns: Zero on success, @-1@ on failure.
	162	*
	163	* Use: Decodes @k@ as a `binary64' value. See `buf_getf64' for the
	164	* caveats.
	165	*/
	166
	167	static int k64_to_f64(double *x_out, kludge64 k)
	168	{
	169	uint32 lo, hi, t;
	170	int s, e; double x;
	171
	172	/* We're using the IEEE 754 `binary64' format: see `float_to_k64' above. */
	173
	174	/* Pick the encoded number apart. */
	175	hi = HI64(k); lo = LO64(k);
	176	s = (hi >> 31)&1; e = (hi >> 20)&0x07ff; t = hi&0x000fffff;
	177
	178	/* Deal with various special cases. */
	179	if (e == 2047) {
	180	/* Maximum exponent indicates (positive or negative) infinity or NaN. */
	181
	182	if (t \|\| lo) {
	183	/* It's a NaN. We're not going to be picky about which one. If we
	184	* can't represent it then we'll just have to fail.
	185	*/
	186
	187	#ifdef NAN
	188	x = NAN;
	189	#else
	190	return (-1);
	191	#endif
	192	} else {
	193	/* It's an infinity. If we don't have one of those to hand, then pick
	194	* something really big.
	195	*/
	196
	197	#ifdef INFINITY
	198	x = s ? -INFINITY : INFINITY;
	199	#else
	200	x = s ? -DBL_MAX : DBL_MAX;
	201	#endif
	202	}
	203	} else {
	204	/* It's a finite number, though maybe it's weird in some way. */
	205
	206	if (e == 0) {
	207	/* Minimum exponent indicates zero or a subnormal number. The
	208	* subnormal exponent is a sentinel value that shouldn't be taken
	209	* literally, so we should fix that. If the number is actually zero
	210	* then the exponent won't matter much so don't bother checking.
	211	*/
	212
	213	e = 1;
	214	} else {
	215	/* It's a normal number. In which case there's an implicit bit which
	216	* we can now set.
	217	*/
	218
	219	t \|= 0x00100000;
	220	}
	221
	222	/* All that remains is to stuff the significant and exponent into a
	223	* floating point number. We'll have to do this in pieces, and we'll
	224	* lean on the floating-point machinery to do rounding correctly.
	225	*/
	226	x = ldexp(t, e - 1043) + ldexp(lo, e - 1075);
	227	if (s) x = -x;
	228	}
	229
	230	/* And we're done. */
	231	*x_out = x; return (0);
	232	}
	233
	234	/----- External functions ------------------------------------------------/
	235
	236	/* --- @buf_putf64{,b,l} --- *
	237	*
	238	* Arguments: @buf *b@ = a buffer to write to
	239	* @double x@ = a number to write
	240	*
	241	* Returns: Zero on success, @-1@ on failure (and the buffer is broken).
	242	*
	243	* On C89, this function can't detect negative zero so these
	244	* will be silently written as positive zero.
	245	*
	246	* This function doesn't distinguish NaNs. Any NaN is written
	247	* as a quiet NaN with all payload bits zero.
	248	*
	249	* A finite value with too large a magnitude to be represented
	250	* is rounded to the appropriate infinity. Other finite values
	251	* are rounded as necessary, in the usual IEEE 754 round-to-
	252	* nearest-or-even way.
	253	*/
	254
	255	int buf_putf64(buf *b, double x)
	256	{ return (buf_putk64(b, f64_to_k64(x))); }
	257	int buf_putf64b(buf *b, double x)
	258	{ return (buf_putk64b(b, f64_to_k64(x))); }
	259	int buf_putf64l(buf *b, double x)
	260	{ return (buf_putk64l(b, f64_to_k64(x))); }
	261
	262	/* --- @buf_getf64{,b,l} --- *
	263	*
	264	* Arguments: @buf *b@ = a buffer to read from
	265	* @double *x_out@ = where to put the result
	266	*
	267	* Returns: Zero on success, @-1@ on failure (and the buffer is broken).
	268	*
	269	* If the system supports NaNs, then any encoded NaN is returned
	270	* as the value of @NAN@ in @<math.h>@; otherwise, this function
	271	* reports failure.
	272	*
	273	* In general, values are rounded to the nearest available
	274	* value, in the way that the system usually rounds. If the
	275	* system doesn't support infinities, then any encoded infinity
	276	* is reported as the largest-possible-magnitude finite value
	277	* instead.
	278	*/
	279
	280	int buf_getf64(buf b, double x_out)
	281	{
	282	kludge64 k;
	283
	284	if (buf_getk64(b, &k)) return (-1);
	285	if (k64_to_f64(x_out, k)) { b->f \|= BF_BROKEN; return (-1); }
	286	return (0);
	287	}
	288	int buf_getf64b(buf b, double x_out)
	289	{
	290	kludge64 k;
	291
	292	if (buf_getk64b(b, &k)) return (-1);
	293	if (k64_to_f64(x_out, k)) { b->f \|= BF_BROKEN; return (-1); }
	294	return (0);
	295	}
	296	int buf_getf64l(buf b, double x_out)
	297	{
	298	kludge64 k;
	299
	300	if (buf_getk64l(b, &k)) return (-1);
	301	if (k64_to_f64(x_out, k)) { b->f \|= BF_BROKEN; return (-1); }
	302	return (0);
	303	}
	304
	305	/----- That's all, folks -------------------------------------------------/