@@@ doc wip
[mLib] / utils / fltfmt.h
CommitLineData
b1a20bee
MW
1/* -*-c-*-
2 *
3 * Floating-point format conversions
4 *
5 * (c) 2024 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28#ifndef MLIB_FLTFMT_H
29#define MLIB_FLTFMT_H
30
31#ifdef __cplusplus
32 extern "C" {
33#endif
34
35/*----- Header files ------------------------------------------------------*/
36
37#ifndef MLIB_ARENA_H
38# include "arena.h"
39#endif
40
41#ifndef MLIB_BITS_H
42# include "bits.h"
43#endif
44
45/*----- Data structures ---------------------------------------------------*/
46
c752173d
MW
47/* Error codes. */
48#define FLTERR_OK 0x0000u /* no trouble */
49#define FLTERR_INVAL 0x0001u /* technically invalid encoding */
50#define FLTERR_INEXACT 0x0002u /* result is inexect */
51#define FLTERR_UFLOW 0x0004u /* underflowed to zero */
52#define FLTERR_OFLOW 0x0008u /* overflowed to ±∞ or max finite */
53#define FLTERR_REPR 0x0010 /* not representable */
54#define FLTERR_ALLERRS 0xffff /* all errors */
55
56/* Predicates considered for rounding. */
57#define FRPF_LOW 0x0001u /* lost bits not exactly zero or half */
58#define FRPF_HALF 0x0002u /* lost a half or more */
59#define FRPF_ODD 0x0004u /* final place is currently odd */
60#define FRPF_NEG 0x0008u /* number is negative */
61
62/* Rounding policies. These are represented as a 16-bit truth table applied
63 * to the predicate bits listed above. The following are the mask values
64 * corresponding to the predicate bits being set; a set bit means that the
65 * number should be rounded away from zero.
66 */
67#define FRPMASK_LOW 0xaaaau /* lost bits below half */
68#define FRPMASK_HALF 0xccccu /* lost a half or more */
69#define FRPMASK_ODD 0xf0f0u /* final place is dod */
70#define FRPMASK_NEG 0xff00u /* number is negative */
71
72/* Useful constructed masks from the above. */
73#define FRPMASK_INEXACT (FRPMASK_LOW | FRPMASK_HALF) /* lost nonzero bits */
74#define FRPMASK_NEAR(dir) (FRPMASK_HALF&(FRPMASK_LOW | (dir))) /* */
75
76/* Generally useful rounding criteria. */
77#define FLTRND_ZERO 0 /* towards zero (truncate) */
78#define FLTRND_PROJINF FRPMASK_INEXACT /* towards (projective) ±∞ */
79#define FLTRND_NEGINF (FRPMASK_INEXACT&FRPMASK_NEG) /* down, towards -∞ */
80#define FLTRND_POSINF (FRPMASK_INEXACT&~FRPMASK_NEG) /* up, towards +∞ */
81#define FLTRND_EVEN (FRPMASK_INEXACT&FRPMASK_ODD) /* to even */
82#define FLTRND_ODD (FRPMASK_INEXACT&~FRPMASK_ODD) /* to odd */
83#define FLTRND_NEAREVEN FRPMASK_NEAR(FLTRND_EVEN) /* nearest, ties to even */
84#define FLTRND_NEARODD FRPMASK_NEAR(FLTRND_ODD) /* nearest, ties to odd */
85#define FLTRND_NEARZERO FRPMASK_NEAR(FLTRND_ZERO) /* nearest, ties to zero */
86#define FLTRND_NEARINF FRPMASK_NEAR(FLTRND_PROJINF) /* nearest, ties to ±∞ */
87#define FLTRND_NEARNEG FRPMASK_NEAR(FLTRND_NEGINF) /* nearest, ties to -∞ */
88#define FLTRND_NEARPOS FRPMASK_NEAR(FLTRND_POSINF) /* nearest, ties to +∞ */
89
b1a20bee
MW
90struct floatbits {
91 /* A decoded floating-point number.
92 *
93 * The flags do most of the heavy lifting here.
94 *
95 * * @FLTF_ZERO@ is set if the number is zero. The @frac@ and @exp@ are
96 * ignored.
97 *
98 * * @FLTF_NEG@ is set if the number is negative. The representation is
99 * signed magnitude, because that seems basically universal among
100 * floating-point formats. Negative zero is a thing.
101 *
102 * * @FLTF_SNAN@ and @FLTF_QMAN@ are set if the value is, respectively, a
103 * signalling or quiet not-a-number. The @frac@ holds the payload,
104 * left-aligned, excluding the quiet bit; @exp@ is ignored.
105 *
106 * * @FLTF_INF@ is set if the number is positive or negative infinity.
107 * Projective infinity is not representable. The @frac@ and @exp@ are
108 * ignored.
109 *
110 * The @frac@ field contains the fractional significand, big-end first;
111 * either the number is identically (positive or negative) zero, or the
112 * most significant bit of @sig[0]@ is set, and the significand lies
113 * between a half (inclusive) and one (exclusive). The @exp@ is the power
114 * of two by which the significand is to be scaled.
115 *
116 * The essential convention for @frac@ is that the value is unchanged if
117 * zero-valued words are added or removed at the end.
118 */
119
120 unsigned f; /* flags */
121#define FLTF_NEG 0x0001u /* number is negative */
122#define FLTF_INF 0x0002u /* number is negative */
123#define FLTF_QNAN 0x0004u /* quiet not-a-number */
124#define FLTF_SNAN 0x0008u /* signalling not-a-number */
125#define FLTF_ZERO 0x0010u /* number is zero */
126#define FLTF_NANMASK (FLTF_QNAN | FLTF_SNAN) /* any kind of NaN */
127 int exp; /* exponent, base 2 */
128 arena *a; /* memory arena */
129 uint32 *frac; /* fraction */
130 unsigned n, fracsz; /* fraction limbs used/allocated */
131};
132#define FLOATBITS_INIT { FLTF_ZERO, 0, &arena_stdlib, 0, 0, 0 }
133
b1a20bee
MW
134/*----- General floating-point hacking ------------------------------------*/
135
136/* --- @fltfmt_initbits@ --- *
137 *
138 * Arguments: @struct floatbits *x@ = pointer to structure to initialize
139 *
140 * Returns: ---
141 *
142 * Use: Dynamically initialize @x@ to (positive) zero so that it can
143 * be used as the destination operand by other operations. This
144 * doesn't allocate resources and cannot fail. The
145 * @FLOATBITS_INIT@ macro is a suitable static initializer for
146 * performing the same task.
147 */
148
149extern void fltfmt_initbits(struct floatbits */*x*/);
150
151/* --- @fltfmt_freebits@ --- *
152 *
153 * Arguments: @struct floatbits *x@ = pointer to structure to free
154 *
155 * Returns: ---
156 *
157 * Use: Releases the memory held by @x@. Afterwards, @x@ is a valid
158 * (positive) zero, but can safely be discarded.
159 */
160
161extern void fltfmt_freebits(struct floatbits */*x*/);
162
163/* --- @fltfmt_allocfrac@ --- *
164 *
165 * Arguments: @struct floatbits *x@ = structure to adjust
166 * @unsigned n@ = number of words required
167 *
168 * Returns: ---
169 *
170 * Use: Reallocate the @frac@ vector so that it has space for at
171 * least @n@ 32-bit words, and set @x->n@ equal to @n@. If the
172 * current size is already @n@ or greater, then just update the
173 * active length @n@ and return; otherwise, any existing vector
174 * is discarded and a fresh, larger one allocated.
175 */
176
177extern void fltfmt_allocfrac(struct floatbits */*x*/, unsigned /*n*/);
178
179/* --- @fltfmt_copybits@ --- *
180 *
181 * Arguments: @struct floatbits *z_out@ = where to leave the result
182 * @const struct floatbits *x@ = source to copy
183 *
184 * Returns: ---
185 *
186 * Use: Make @z_out@ be a copy of @x@. If @z_out@ is the same object
187 * as @x@ then do nothing.
188 */
189
190extern void fltfmt_copybits(struct floatbits */*z_out*/,
191 const struct floatbits */*x*/);
192
193/* --- @fltfmt_round@ --- *
194 *
195 * Arguments: @struct floatbits *z_out@ = destination (may equal source)
196 * @const struct floatbits *x@ = source
197 * @unsigned r@ = rounding mode (@FLTRND_...@ code)
198 * @unsigned n@ = nonzero number of bits to leave
199 *
200 * Returns: A @FLTERR_...@ code, specifically either @FLTERR_INEXACT@ if
201 * rounding discarded some nonzero value bits, or @FLTERR_OK@ if
202 * rounding was unnecessary.
203 *
204 * Use: Rounds a floating-point value to a given number of
205 * significant bits, using the given rounding rule.
206 */
207
208extern unsigned fltfmt_round(struct floatbits */*z_out*/,
209 const struct floatbits */*x*/,
210 unsigned /*r*/, unsigned /*n*/);
211
212/*----- IEEE formats ------------------------------------------------------*/
213
214struct fltfmt_ieeefmt {
215 /* Description of a binary IEEE floating-point format.
216 *
217 * An IEEE binary floating-point encoding is split into three fields,
218 * called %$\sigma$%, %$e'$%, and %$m$%.
219 *
220 * The %$\sigma$% field encodes the sign as a single bit: if %$\sigma = 0$%
221 * then the value is nonnegative; if %$\sigma = 1$% then the value is
222 * negative. Signed-magnitude encoding is used: if the rest of the
223 * encoding represents a (necessarily nonnegative) value %$x$% then the
224 * signed value is %$(-1)^\sigma \cdot x$%.
225 *
226 * The %$e'$% field encodes the exponent in a field of %$w$% bits. The
227 * true exponent %$e = e' - e_0$%, where %$e_0 = 2^{w-1} - 1$% is the
228 * %%\emph{exponent bias}%%. The maximum exponent for finite values is
229 * %$e_{\text{max}} = 2^w - 2 - e_0 = 2^{w-1} - 1$%, which is
230 * coincidentally equal to %$e_0$%; and the minimum exponent for
231 * %%\emph{normal}%% finite values is %$e_{\text{min}} = 1 - e_0 = {}$%
232 * %$2 - 2^{w-1}$%. The maximum exponent value %$2^w - 1$% denotes
233 * infinities and NaN values, while the minimum value denotes zeros and
234 * subnormal values.
235 *
236 * If a `hidden-bit' convention is used (@IEEEF_HIDDEN@ is set in @f@),
237 * then %$h = 1$%; otherwise, %$h = 0$%.
238 *
239 * The %$m$% field encodes the %$p$%-bit %%\emph{significand}%%. If a
240 * `hidden-bit' convention is used then the %$m$% field is actually %$p -
241 * 1$% bits wide; otherwise, it is %$p$% bits.
242 *
243 * * If %$e_{\text{min}} \le e \le e_{\text{max}}$% then the encoding
244 * represents a %%\emph{normal} value, specifically the value
245 * %$x = (-1)^\sigma \cdot (h + m/2^{p-1}) \cdot 2^e$%. In formats
246 * which do not use the hidden-bit convention, the most significant bit
247 * of %$m$% must be set; we return @FLTERR_INVAL@ for other
248 * encodings, and interpret the `unnormal' value as encoded.
249 *
250 * * If %$e = e_{\text{min}} - 1$% then the encoding represents (signed)
251 * zero if %$m = 0$%, or a %%\emph{subnormal}%% value %$x = (-1)^\sigma
252 * \cdot m/2^{p-1} \cdot 2^{e_{\text{min}}}$%. Note that, in formats
253 * which do not use the hidden-bit convention, the unit bit should be
254 * clear; we return @FLTERR_INVAL@ for other encodings, and interpret
255 * the `pseudo-denormal' value as encoded.
256 *
257 * * If %e = e_{\text{max}} + 1$% then the encoding represents
258 * %$(-1)^\sigma \cdot \infty$% if %$m = 0$%, or a not-a-number value
259 * (NaN) with payload %$m \ne 0$%. A %%\emph{quiet}%% NaN has bit
260 * %$p - 2$% set in %$m$%; a signalling NaN has this bit reset. Note
261 * that some platform's native format reverses this convention, but
262 * this is handled in code which deals with native formats: the
263 * interchange formats described here always indicate quiet NaNs by
264 * setting the bit. In formats which use the hidden-bit convetion, the
265 * unit bit %$p - 1$% is ignored
266 */
267
268 unsigned f; /* flags */
c752173d 269#define FLTIF_HIDDEN 1u /* unit bit is implicit */
b1a20bee
MW
270 unsigned expwd; /* exponent field width %$w$% */
271 unsigned prec; /* precision %$p$% */
272};
273
274/* IEEE (and related) format descriptions. */
275extern const struct fltfmt_ieeefmt
276 fltfmt_f16, fltfmt_f32, fltfmt_f64, fltfmt_f128,
277 fltfmt_mini, fltfmt_bf16, fltfmt_idblext80;
278
279/* --- @fltfmt_encieee@ ---
280 *
281 * Arguments: @const struct fltfmt_ieeefmt *fmt@ = format description
282 * @uint32 *z@ = output vector
283 * @const struct floatbits *x@ = value to encode
284 * @unsigned r@ = rounding mode
285 * @unsigned errmask@ = error mask
286 *
287 * Returns: Error flags (@FLTERR_...@).
288 *
289 * Use: Encode a floating-point value in an IEEE format. This is the
290 * machinery shared by the @fltfmt_enc...@ functions for
291 * encoding IEEE-format values. Most of the arguments and
292 * behaviour are as described for those functions.
293 *
294 * The encoded value is right-aligned and big-endian; i.e., the
295 * sign bit ends up in @z[0]@, and the least significant bit of
296 * the significand ends up in the least significant bit of
297 * @z[n - 1]@.
298 */
299
300extern unsigned fltfmt_encieee(const struct fltfmt_ieeefmt */*fmt*/,
301 uint32 */*z*/, const struct floatbits */*x*/,
302 unsigned /*r*/, unsigned /*errmask*/);
303
304/* --- @fltfmt_encTY@ --- *
305 *
306 * Arguments: @octet *z_out@, @uint16 *z_out@, @uint32 *z_out@,
307 * @kludge64 *z_out@ = where to put the encoded value
308 * @uint16 *se_out@, @kludge64 *m_out@ = where to put the
309 * encoded sign-and-exponent and significand
310 * @const struct floatbits *x@ = value to encode
311 * @unsigned r@ = rounding mode
312 * @unsigned errmask@ = error mask
313 *
314 * Returns: Error flags (@FLTERR_...@).
315 *
316 * Use: Encode a floating-point value in an IEEE (or IEEE-adjacent)
317 * format.
318 *
319 * If an error is encountered during the encoding, and the
320 * corresponding bit of @errmask@ is clear, then processing
321 * stops immediately and the error is returned; if the bit is
322 * set, then processing continues as described below.
323 *
324 * The @TY@ may be
325 *
326 * * @mini@ for the 8-bit `1.4.3 minifloat' format, with
327 * four-bit exponent and four-bit significand, represented
328 * as a single octet;
329 *
330 * * @bf16@ for the Google `bfloat16' format, with eight-bit
331 * exponent and eight-bit significand, represented as a
332 * @uint16@;
333 *
334 * * @f16@ for the IEEE `binary16' format, with five-bit
335 * exponent and eleven-bit significand, represented as a
336 * @uint16@;
337 *
338 * * @f32@ for the IEEE `binary32' format, with eight-bit
339 * exponent and 24-bit significand, represented as a
340 * @uint32@;
341 *
342 * * @f64@ for the IEEE `binary64' format, with eleven-bit
343 * exponent and 53-bit significand, represented as a
344 * @kludge64@;
345 *
346 * * @f128@ for the IEEE `binary128' format, with fifteen-bit
347 * exponent and 113-bit significand, represented as four
348 * @uint32@ limbs, most significant first; or
349 *
350 * * @idblext80@ for the Intel 80-bit `double extended'
351 * format, with fifteen-bit exponent and 64-bit significand
352 * with no hidden bit, represented as a @uint16 se@
353 * holding the sign and exponent, and a @kludge64 m@
354 * holding the significand.
355 *
356 * Positive and negative zero and infinity are representable
357 * exactly.
358 *
359 * Following IEEE recommendations (and most implementations),
360 * the most significant fraction bit of a quiet NaN is set; this
361 * bit is clear in a signalling NaN. The most significant
362 * payload bits of a NaN, held in the top bits of @x->frac[0]@,
363 * are encoded in the output significand following the `quiet'
364 * bit. If the chosen format's significand field is too small
365 * to accommodate all of the set payload bits then the
366 * @FLTERR_INEXACT@ error bit is set and, if masked, the
367 * excess payload bits are discarded. No rounding of NaN
368 * payloads is performed.
369 *
370 * Otherwise, the input value is finite and nonzero. If the
371 * significand cannot be represented exactly then the
372 * @FLTERR_INEXACT@ error bit is set, and, if masked, the value
373 * will be rounded (internally -- the input @x@ is not changed).
374 * If the (rounded) value's exponent is too large to represent,
375 * then the @FLTERR_OFLOW@ and @FLTERR_INEXACT@ error bits are
376 * set and, if masked, the result is either the (absolute)
377 * largest representable finite value or infinity, with the
378 * appropriate sign, chosen according to the rounding mode. If
379 * the exponent is too small to represent, then the
380 * @FLTERR_UFLOW@ and @FLTERR_INEXACT@ error bits are set and,
381 * if masked, the result is either the (absolute) smallest
382 * nonzero value or zero, with the appropriate sign, chosen
383 * according to the rounding mode.
384 */
385
386extern unsigned fltfmt_encmini(octet */*z_out*/,
387 const struct floatbits */*x*/,
388 unsigned /*r*/, unsigned /*errmask*/);
389
390extern unsigned fltfmt_encbf16(uint16 */*z_out*/,
391 const struct floatbits */*x*/,
392 unsigned /*r*/, unsigned /*errmask*/);
393
394extern unsigned fltfmt_encf16(uint16 */*z_out*/,
395 const struct floatbits */*x*/,
396 unsigned /*r*/, unsigned /*errmask*/);
397
398extern unsigned fltfmt_encf32(uint32 */*z_out*/,
399 const struct floatbits */*x*/,
400 unsigned /*r*/, unsigned /*errmask*/);
401
402extern unsigned fltfmt_encf64(kludge64 */*z_out*/,
403 const struct floatbits */*x*/,
404 unsigned /*r*/, unsigned /*errmask*/);
405
406extern unsigned fltfmt_encf128(uint32 */*z_out*/,
407 const struct floatbits */*x*/,
408 unsigned /*r*/, unsigned /*errmask*/);
409
410extern unsigned fltfmt_encidblext80(uint16 */*se_out*/, kludge64 */*f_out*/,
411 const struct floatbits */*x*/,
412 unsigned /*r*/, unsigned /*errmask*/);
413
414/* --- @fltfmt_decieee@ --- *
415 *
416 * Arguments: @const struct fltfmt_ieeefmt *fmt@ = format description
417 * @struct floatbits *z_out@ = output decoded representation
418 * @const uint32 *x@ = input encoding
419 *
420 * Returns: Error flags (@FLTERR_...@).
421 *
422 * Use: Decode a floating-point value in an IEEE format. This is the
423 * machinery shared by the @fltfmt_dec...@ functions for
424 * deccoding IEEE-format values. Most of the arguments and
425 * behaviour are as described for those functions.
426 *
427 * The encoded value should be right-aligned and big-endian;
428 * i.e., the sign bit ends up in @z[0]@, and the least
429 * significant bit of the significand ends up in the least
430 * significant bit of @z[n - 1]@.
431 */
432
433extern unsigned fltfmt_decieee(const struct fltfmt_ieeefmt */*fmt*/,
434 struct floatbits */*z_out*/,
435 const uint32 */*x*/);
436
437/* --- @fltfmt_decTY@ --- *
438 *
439 * Arguments: @const struct floatbits *z_out@ = storage for the result
440 * @octet x@, @uint16 x@, @uint32 x@, @kludge64 x@ =
441 * encoded input
442 * @uint16 se@, @kludge64 m@ = encoded sign-and-exponent and
443 * significand
444 *
445 * Returns: Error flags (@FLTERR_...@).
446 *
447 * Use: Encode a floating-point value in an IEEE (or IEEE-adjacent)
448 * format.
449 *
450 * The options for @TY@ are as documented for the encoding
451 * functions above.
452 *
453 * In formats without a hidden bit -- currently only @idblext80@
454 * -- not all bit patterns are valid encodings. If the explicit
455 * unit bit is set when the exponent field is all-bits-zero, or
456 * clear when the exponent field is not all-bits-zero, then the
457 * @FLTERR_INVAL@ error bit is set. If the exponent is all-
458 * bits-set, denoting infinity or a NaN, then the unit bit is
459 * otherwise ignored -- in particular, it does not affect the
460 * NaN payload, or even whether the input encodes a NaN or
461 * infinity. Otherwise, the unit bit is considered significant,
462 * and the result is normalized as one would expect.
463 * Consequently, biased exponent values 0 and 1 are distinct
464 * only with respect to which bit patterns are considered valid,
465 * and not with respect to the set of values denoted.
466 */
467
468extern unsigned fltfmt_decmini(struct floatbits */*z_out*/, octet /*x*/);
469
470extern unsigned fltfmt_decbf16(struct floatbits */*z_out*/, uint16 /*x*/);
471
472extern unsigned fltfmt_decf16(struct floatbits */*z_out*/, uint16 /*x*/);
473
474extern unsigned fltfmt_decf32(struct floatbits */*z_out*/, uint32 /*x*/);
475
476extern unsigned fltfmt_decf64(struct floatbits */*z_out*/, kludge64 /*x*/);
477
478extern unsigned fltfmt_decf128(struct floatbits */*z_out*/,
479 const uint32 */*x*/);
480
481extern unsigned fltfmt_decidblext80(struct floatbits */*z_out*/,
482 uint16 /*se*/, kludge64 /*f*/);
483
484/*----- Native formats ----------------------------------------------------*/
485
486/* Hacking for platforms which ill-advisedly have the opposite sense for the
487 * quiet NaN bit.
488 *
489 * Obviously we toggle the quiet bit, but there's a problem: if the quiet bit
490 * is the only one set, then if we toggle it, the fraction will become zero
491 * and we'll be left with an infinity. Follow MIPS and set all of the bits.
492 *
493 * This is all internal machinery and shouldn't be relied on by applications.
494 */
495#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
496# define FLTFMT__MUST_FROB_NANS
497
498# define FLTFMT__FROB_NAN_F32(x_inout, rc) do { \
499 uint32 *_x_inout_ = (x_inout), _x0_ = _x_inout_[0]; \
500 \
501 if ((_x0_&0x7f800000) != 0x7f800000 || !(_x0_&0x007fffff)) \
502 ; \
503 else if (_x0_&0x003fffff) \
504 _x_inout_[0] = _x0_ ^ 0x00400000; \
505 else { \
506 _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff; \
507 (rc) |= FLTERR_INEXACT; \
508 } \
509 } while (0)
510
511# define FLTFMT__FROB_NAN_F64(x_inout, rc) do { \
512 uint32 *_x_inout_ = (x_inout), \
513 _x0_ = _x_inout_[0], _x1_ = _x_inout_[1]; \
514 \
515 if ((_x0_&0x7ff00000) != 0x7ff00000 || (!(_x0_&0x000fffff) && !_x1_)) \
516 ; \
517 else if ((_x0_&0x0007ffff) || _x1_) \
518 _x_inout_[0] = _x0_ ^ 0x00080000; \
519 else { \
520 _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff; \
521 _x_inout_[1] = 0xffffffff; \
522 (rc) |= FLTERR_INEXACT; \
523 } \
524 } while (0)
525
526# define FLTFMT__FROB_NAN_F128(x_inout, rc) do { \
527 uint32 *_x_inout_ = (x_inout), \
528 _x0_ = _x_inout_[0], _x1_ = _x_inout_[1], \
529 _x2_ = _x_inout_[2], _x3_ = _x_inout_[3]; \
530 \
531 if ((_x0_&0x7fff0000) != 0x7fff0000 || \
532 (!(_x0_&0x000fffff) && !_x1_ && !_x2_ && !_x3_)) \
533 ; \
534 else if ((_x0_&0x00007fff) || _x1_ || _x2_ || _x3_) \
535 _x_inout_[0] = _x0_ ^ 0x00008000; \
536 else { \
537 _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff; \
538 _x_inout_[1] = _x_inout_[2] = _x_inout_[3] = 0xffffffff; \
539 (rc) |= FLTERR_INEXACT; \
540 } \
541 } while (0)
542
543# define FLTFMT__FROB_NAN_IDBLEXT80(x_inout, rc) do { \
544 uint32 *_x_inout_ = (x_inout), \
545 _x0_ = _x_inout_[0], _x1_ = _x_inout_[1], _x2_ = _x_inout_[2]; \
546 \
547 if ((_x0_&0x00007fff) != 0x00007fff || (!(_x1_&0x7fffffff) && !_x2_)) \
548 ; \
549 else if ((_x1_&0x3fffffff) || _x1_ || _x2_) \
550 _x_inout_[1] = _x1_ ^ 0x40000000; \
551 else { \
552 _x_inout_[1] = (_x1_&0x80000000) | 0x3fffffff; /* preserve unit */ \
553 _x_inout_[2] = 0xffffffff; \
554 } \
555 } while (0)
556
557#else
558# define FLTFMT__FROB_NAN_F32(x_inout, rc) do ; while (0)
559# define FLTFMT__FROB_NAN_F64(x_inout, rc) do ; while (0)
560# define FLTFMT__FROB_NAN_F128(x_inout, rc) do ; while (0)
561# define FLTFMT__FROB_NAN_IDBLEXT80(x_inout, rc) do ; while (0)
562#endif
563
564/* --- @fltfmt_encTY@ --- *
565 *
566 * Arguments: @ty *z_out@ = storage for the result
567 * @const struct floatbits *x@ = value to encode
568 * @unsigned r@ = rounding mode
569 *
570 * Returns: Error flags (@FLTERR_...@).
571 *
572 * Use: Encode the floating-point value @x@ as a native C object and
573 * store the result in @z_out@.
574 *
575 * The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
576 * @double@, or (on C99 implementations) @ldbl@ to encode a
577 * @long double@.
578 *
579 * In detail, conversion is performed as follows.
580 *
581 * * If a non-finite value cannot be represented by the
582 * implementation then the @FLTERR_REPR@ error bit is set
583 * and @*z_out@ is set to zero if @x@ is a NaN, or the
584 * (absolute) largest representable value, with appropriate
585 * sign, if @x@ is an infinity.
586 *
587 * * If the implementation can represent NaNs, but cannot set
588 * NaN payloads, then the @FLTERR_INEXACT@ error bit is set,
589 * and @*z_out@ is set to an arbitrary (quiet) NaN value.
590 *
591 * * If @x@ is negative zero, but the implementation does not
592 * distinguish negative and positive zero, then the
593 * @FLTERR_INEXACT@ error bit is set and @*z_out@ is set to
594 * zero.
595 *
596 * * If the implementation's floating-point radix is not a
597 * power of two, and @x@ is a nonzero finite value, then
598 * @FLTERR_INEXACT@ error bit is set (unconditionally), and
599 * the value is rounded by the implementation using its
600 * prevailing rounding policy. If the radix is a power of
601 * two, then the @FLTERR_INEXACT@ error bit is set only if
602 * rounding is necessary, and rounding is performed using
603 * the rounding mode @r@.
604 */
605
606extern unsigned fltfmt_encflt(float */*z_out*/,
607 const struct floatbits */*x*/,
608 unsigned /*r*/);
609
610extern unsigned fltfmt_encdbl(double */*z_out*/,
611 const struct floatbits */*x*/,
612 unsigned /*r*/);
613
614#if __STDC_VERSION__ >= 199001
615extern unsigned fltfmt_encldbl(long double */*z_out*/,
616 const struct floatbits */*x*/,
617 unsigned /*r*/);
618#endif
619
620/* --- @fltfmt_decTY@ --- *
621 *
622 * Arguments: @struct floatbits *z_out@ = storage for the result
623 * @ty x@ = value to decode
624 * @unsigned r@ = rounding mode
625 *
626 * Returns: Error flags (@FLTERR_...@).
627 *
628 * Use: Decode the native C floatingpoint value @x@ and store the
629 * result in @z_out@.
630 *
631 * The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
632 * @double@, or (on C99 implementations) @ldbl@ to encode a
633 * @long double@.
634 *
635 * In detail, conversion is performed as follows.
636 *
637 * * If the implementation supports negative zeros and/or
638 * infinity, then these are recognized and decoded.
639 *
640 * * If the input as a NaN, but the implementation cannot
641 * usefully report NaN payloads, then the @FLTERR_INEXACT@
642 * error bit is set and the decoded payload is left empty.
643 *
644 * * If the implementation's floating-point radix is not a
645 * power of two, and @x@ is a nonzero finite value, then
646 * @FLTERR_INEXACT@ error bit is set (unconditionally), and
647 * the rounded value (according to the rounding mode @r@) is
648 * stored in as many fraction words as necessary to identify
649 * the original value uniquely. If the radix is a power of
650 * two, then the value is represented exactly.
651 */
652
653extern unsigned fltfmt_decflt(struct floatbits */*z_out*/,
654 float /*x*/, unsigned /*r*/);
655
656extern unsigned fltfmt_decdbl(struct floatbits */*z_out*/,
657 double /*x*/, unsigned /*r*/);
658
659#if __STDC_VERSION__ >= 199001
660extern unsigned fltfmt_decldbl(struct floatbits */*z_out*/,
661 long double /*x*/, unsigned /*r*/);
662#endif
663
664/*----- Some common conversions packaged up -------------------------------*/
665
666/* --- @fltfmt_CTYtoFTYE@ --- *
667 *
668 * Arguments: @octet *p@ = output pointer
669 * @float x@, @double x@ = value to convert
670 * @unsigned r@ = rounding mode
671 *
672 * Returns: Error flags (@FLTERR_...@).
673 *
674 * Use: Encode a native C floating-point value in an external format.
675 *
676 * The @CTY@ is an abbreviation for a C type: @flt@ for @float@,
677 * or @dbl@ for @double@; @fty@ is an abbreviation for the
678 * external format, @f32@ for IEEE Binary32, or @f64@ for IEEE
679 * Binary64; and @E@ is @l@ for little-endian or @b@ for
680 * big-endian byte order. Not all combinations are currently
681 * supported.
682 *
683 * On platforms where the external format is used natively,
684 * these functions are simple data copies.
685 */
686
687extern unsigned fltfmt_flttof32l(octet */*p*/, float /*x*/, unsigned /*r*/);
688extern unsigned fltfmt_flttof32b(octet */*p*/, float /*x*/, unsigned /*r*/);
689extern unsigned fltfmt_dbltof64l(octet */*p*/, double /*x*/, unsigned /*r*/);
690extern unsigned fltfmt_dbltof64b(octet */*p*/, double /*x*/, unsigned /*r*/);
691
692/* --- @fltfmt_FTYEtoCTY@ --- *
693 *
694 * Arguments: @float *z_out@, @double *z_out@ = storage for output
695 * @const octet *p@ = input pointer
696 * @unsigned r@ = rounding mode
697 *
698 * Returns: Error flags (@FLTERR_...@).
699 *
700 * Use: Decodes a floating point value in an external format into a
701 * native value.
702 *
703 * The naming conventions are the same as for @fltfmt_dbltof64b@
704 * above.
705 *
706 * On platforms where the external format is used natively,
707 * these functions are simple data copies.
708 */
709
710extern unsigned fltfmt_f32ltoflt(float */*z_out*/, const octet */*p*/,
711 unsigned /*r*/);
712extern unsigned fltfmt_f32btoflt(float */*z_out*/, const octet */*p*/,
713 unsigned /*r*/);
714extern unsigned fltfmt_f64ltodbl(double */*z_out*/, const octet */*p*/,
715 unsigned /*r*/);
716extern unsigned fltfmt_f64btodbl(double */*z_out*/, const octet */*p*/,
717 unsigned /*r*/);
718
719/*----- That's all, folks -------------------------------------------------*/
720
721#ifdef __cplusplus
722 }
723#endif
724
725#endif