@@@ fltfmt mess

[mLib] / utils / fltfmt.h
diff --git a/utils/fltfmt.h b/utils/fltfmt.h

new file mode 100644 (file)

index 0000000..cd012a0
--- /dev/null
+++ b/utils/fltfmt.h
@@ -0,0 +1,725 @@
+/* -*-c-*-
+ *
+ * Floating-point format conversions
+ *
+ * (c) 2024 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of the mLib utilities library.
+ *
+ * mLib is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * mLib is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with mLib.  If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef MLIB_FLTFMT_H
+#define MLIB_FLTFMT_H
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/*----- Header files ------------------------------------------------------*/
+
+#ifndef MLIB_ARENA_H
+#  include "arena.h"
+#endif
+
+#ifndef MLIB_BITS_H
+#  include "bits.h"
+#endif
+
+/*----- Data structures ---------------------------------------------------*/
+
+struct floatbits {
+  /* A decoded floating-point number.
+   *
+   * The flags do most of the heavy lifting here.
+   *
+   *   * @FLTF_ZERO@ is set if the number is zero.  The @frac@ and @exp@ are
+   *    ignored.
+   *
+   *   * @FLTF_NEG@ is set if the number is negative.  The representation is
+   *    signed magnitude, because that seems basically universal among
+   *    floating-point formats.  Negative zero is a thing.
+   *
+   *   * @FLTF_SNAN@ and @FLTF_QMAN@ are set if the value is, respectively, a
+   *    signalling or quiet not-a-number.  The @frac@ holds the payload,
+   *    left-aligned, excluding the quiet bit; @exp@ is ignored.
+   *
+   *   * @FLTF_INF@ is set if the number is positive or negative infinity.
+   *    Projective infinity is not representable.  The @frac@ and @exp@ are
+   *    ignored.
+   *
+   * The @frac@ field contains the fractional significand, big-end first;
+   * either the number is identically (positive or negative) zero, or the
+   * most significant bit of @sig[0]@ is set, and the significand lies
+   * between a half (inclusive) and one (exclusive).  The @exp@ is the power
+   * of two by which the significand is to be scaled.
+   *
+   * The essential convention for @frac@ is that the value is unchanged if
+   * zero-valued words are added or removed at the end.
+   */
+
+  unsigned f;                          /* flags */
+#define FLTF_NEG 0x0001u               /*   number is negative */
+#define FLTF_INF 0x0002u               /*   number is negative */
+#define FLTF_QNAN 0x0004u              /*   quiet not-a-number */
+#define FLTF_SNAN 0x0008u              /*   signalling not-a-number */
+#define FLTF_ZERO 0x0010u              /*   number is zero */
+#define FLTF_NANMASK (FLTF_QNAN | FLTF_SNAN) /* any kind of NaN */
+  int exp;                             /* exponent, base 2 */
+  arena *a;                            /* memory arena */
+  uint32 *frac;                                /* fraction */
+  unsigned n, fracsz;                  /* fraction limbs used/allocated */
+};
+#define FLOATBITS_INIT { FLTF_ZERO, 0, &arena_stdlib, 0, 0, 0 }
+
+/* Error codes. */
+#define FLTERR_OK 0x0000u              /* no trouble */
+#define FLTERR_INVAL 0x0001u           /* technically invalid encoding */
+#define FLTERR_INEXACT 0x0002u         /* result is inexect */
+#define FLTERR_UFLOW 0x0004u           /* underflowed to zero */
+#define FLTERR_OFLOW 0x0008u           /* overflowed to ±∞ or max finite */
+#define FLTERR_REPR 0x0010             /* not representable */
+#define FLTERR_ALLERRS 0xffff          /* all errors */
+
+/* Predicates considered for rounding. */
+#define FRPF_LOW 0x0001u            /* lost bits not exactly zero or half */
+#define FRPF_HALF 0x0002u              /* lost a half or more  */
+#define FRPF_ODD 0x0004u               /* final place is currently odd */
+#define FRPF_NEG 0x0008u               /* number is negative */
+
+/* Rounding policies.  These are represented as a 16-bit truth table applied
+ * to the predicate bits listed above.  The following are the mask values
+ * corresponding to the predicate bits being set; a set bit means that the
+ * number should be rounded away from zero.
+ */
+#define FRPMASK_LOW 0xaaaau            /* lost bits below half */
+#define FRPMASK_HALF 0xccccu           /* lost a half or more */
+#define FRPMASK_ODD 0xf0f0u            /* final place is dod */
+#define FRPMASK_NEG 0xff00u            /* number is negative */
+
+/* Useful constructed masks from the above. */
+#define FRPMASK_INEXACT (FRPMASK_LOW | FRPMASK_HALF) /* lost nonzero bits */
+#define FRPMASK_NEAR(dir) (FRPMASK_HALF&(FRPMASK_LOW | (dir))) /*  */
+
+/* Generally useful rounding criteria. */
+#define FLTRND_ZERO 0                  /* towards zero (truncate) */
+#define FLTRND_PROJINF FRPMASK_INEXACT /*        ½³ towards (projective) ±∞ */
+#define FLTRND_NEGINF (FRPMASK_INEXACT&FRPMASK_NEG) /* down, towards -∞ */
+#define FLTRND_POSINF (FRPMASK_INEXACT&~FRPMASK_NEG) /* up, towards +∞ */
+#define FLTRND_EVEN (FRPMASK_INEXACT&FRPMASK_ODD) /* to even */
+#define FLTRND_ODD (FRPMASK_INEXACT&~FRPMASK_ODD) /* to odd */
+#define FLTRND_NEAREVEN FRPMASK_NEAR(FLTRND_EVEN) /* nearest, ties to even */
+#define FLTRND_NEARODD FRPMASK_NEAR(FLTRND_ODD) /* nearest, ties to odd */
+#define FLTRND_NEARZERO FRPMASK_NEAR(FLTRND_ZERO) /* nearest, ties to zero */
+#define FLTRND_NEARINF FRPMASK_NEAR(FLTRND_PROJINF) /* nearest, ties to ±∞ */
+#define FLTRND_NEARNEG FRPMASK_NEAR(FLTRND_NEGINF) /* nearest, ties to -∞ */
+#define FLTRND_NEARPOS FRPMASK_NEAR(FLTRND_POSINF) /* nearest, ties to +∞ */
+
+/*----- General floating-point hacking ------------------------------------*/
+
+/* --- @fltfmt_initbits@ --- *
+ *
+ * Arguments:  @struct floatbits *x@ = pointer to structure to initialize
+ *
+ * Returns:    ---
+ *
+ * Use:                Dynamically initialize @x@ to (positive) zero so that it can
+ *             be used as the destination operand by other operations.  This
+ *             doesn't allocate resources and cannot fail.  The
+ *             @FLOATBITS_INIT@ macro is a suitable static initializer for
+ *             performing the same task.
+ */
+
+extern void fltfmt_initbits(struct floatbits */*x*/);
+
+/* --- @fltfmt_freebits@ --- *
+ *
+ * Arguments:  @struct floatbits *x@ = pointer to structure to free
+ *
+ * Returns:    ---
+ *
+ * Use:                Releases the memory held by @x@.  Afterwards, @x@ is a valid
+ *             (positive) zero, but can safely be discarded.
+ */
+
+extern void fltfmt_freebits(struct floatbits */*x*/);
+
+/* --- @fltfmt_allocfrac@ --- *
+ *
+ * Arguments:  @struct floatbits *x@ = structure to adjust
+ *             @unsigned n@ = number of words required
+ *
+ * Returns:    ---
+ *
+ * Use:                Reallocate the @frac@ vector so that it has space for at
+ *             least @n@ 32-bit words, and set @x->n@ equal to @n@.  If the
+ *             current size is already @n@ or greater, then just update the
+ *             active length @n@ and return; otherwise, any existing vector
+ *             is discarded and a fresh, larger one allocated.
+ */
+
+extern void fltfmt_allocfrac(struct floatbits */*x*/, unsigned /*n*/);
+
+/* --- @fltfmt_copybits@ --- *
+ *
+ * Arguments:  @struct floatbits *z_out@ = where to leave the result
+ *             @const struct floatbits *x@ = source to copy
+ *
+ * Returns:    ---
+ *
+ * Use:                Make @z_out@ be a copy of @x@.  If @z_out@ is the same object
+ *             as @x@ then do nothing.
+ */
+
+extern void fltfmt_copybits(struct floatbits */*z_out*/,
+                           const struct floatbits */*x*/);
+
+/* --- @fltfmt_round@ --- *
+ *
+ * Arguments:  @struct floatbits *z_out@ = destination (may equal source)
+ *             @const struct floatbits *x@ = source
+ *             @unsigned r@ = rounding mode (@FLTRND_...@ code)
+ *             @unsigned n@ = nonzero number of bits to leave
+ *
+ * Returns:    A @FLTERR_...@ code, specifically either @FLTERR_INEXACT@ if
+ *             rounding discarded some nonzero value bits, or @FLTERR_OK@ if
+ *             rounding was unnecessary.
+ *
+ * Use:                Rounds a floating-point value to a given number of
+ *             significant bits, using the given rounding rule.
+ */
+
+extern unsigned fltfmt_round(struct floatbits */*z_out*/,
+                            const struct floatbits */*x*/,
+                            unsigned /*r*/, unsigned /*n*/);
+
+/*----- IEEE formats ------------------------------------------------------*/
+
+struct fltfmt_ieeefmt {
+  /* Description of a binary IEEE floating-point format.
+   *
+   * An IEEE binary floating-point encoding is split into three fields,
+   * called %$\sigma$%, %$e'$%, and %$m$%.
+   *
+   * The %$\sigma$% field encodes the sign as a single bit: if %$\sigma = 0$%
+   * then the value is nonnegative; if %$\sigma = 1$% then the value is
+   * negative.  Signed-magnitude encoding is used: if the rest of the
+   * encoding represents a (necessarily nonnegative) value %$x$% then the
+   * signed value is %$(-1)^\sigma \cdot x$%.
+   *
+   * The %$e'$% field encodes the exponent in a field of %$w$% bits.  The
+   * true exponent %$e = e' - e_0$%, where %$e_0 = 2^{w-1} - 1$% is the
+   * %%\emph{exponent bias}%%.  The maximum exponent for finite values is
+   * %$e_{\text{max}} = 2^w - 2 - e_0 = 2^{w-1} - 1$%, which is
+   * coincidentally equal to %$e_0$%; and the minimum exponent for
+   * %%\emph{normal}%% finite values is %$e_{\text{min}} = 1 - e_0 = {}$%
+   * %$2 - 2^{w-1}$%.  The maximum exponent value %$2^w - 1$% denotes
+   * infinities and NaN values, while the minimum value denotes zeros and
+   * subnormal values.
+   *
+   * If a `hidden-bit' convention is used (@IEEEF_HIDDEN@ is set in @f@),
+   * then %$h = 1$%; otherwise, %$h = 0$%.
+   *
+   * The %$m$% field encodes the %$p$%-bit %%\emph{significand}%%.  If a
+   * `hidden-bit' convention is used then the %$m$% field is actually %$p -
+   * 1$% bits wide; otherwise, it is %$p$% bits.
+   *
+   *   * If %$e_{\text{min}} \le e \le e_{\text{max}}$% then the encoding
+   *    represents a %%\emph{normal} value, specifically the value
+   *    %$x = (-1)^\sigma \cdot (h + m/2^{p-1}) \cdot 2^e$%.  In formats
+   *    which do not use the hidden-bit convention, the most significant bit
+   *    of %$m$% must be set; we return @FLTERR_INVAL@ for other
+   *    encodings, and interpret the `unnormal' value as encoded.
+   *
+   *   * If %$e = e_{\text{min}} - 1$% then the encoding represents (signed)
+   *    zero if %$m = 0$%, or a %%\emph{subnormal}%% value %$x = (-1)^\sigma
+   *    \cdot m/2^{p-1} \cdot 2^{e_{\text{min}}}$%.  Note that, in formats
+   *    which do not use the hidden-bit convention, the unit bit should be
+   *    clear; we return @FLTERR_INVAL@ for other encodings, and interpret
+   *    the `pseudo-denormal' value as encoded.
+   *
+   *   * If %e = e_{\text{max}} + 1$% then the encoding represents
+   *    %$(-1)^\sigma \cdot \infty$% if %$m = 0$%, or a not-a-number value
+   *    (NaN) with payload %$m \ne 0$%.  A %%\emph{quiet}%% NaN has bit
+   *    %$p - 2$% set in %$m$%; a signalling NaN has this bit reset.  Note
+   *    that some platform's native format reverses this convention, but
+   *    this is handled in code which deals with native formats: the
+   *    interchange formats described here always indicate quiet NaNs by
+   *    setting the bit.  In formats which use the hidden-bit convetion, the
+   *    unit bit %$p - 1$% is ignored
+   */
+
+  unsigned f;                          /* flags */
+#define IEEEF_HIDDEN 1u                        /*   unit bit is implicit */
+  unsigned expwd;                      /* exponent field width %$w$% */
+  unsigned prec;                       /* precision %$p$% */
+};
+
+/* IEEE (and related) format descriptions. */
+extern const struct fltfmt_ieeefmt
+  fltfmt_f16, fltfmt_f32, fltfmt_f64, fltfmt_f128,
+  fltfmt_mini, fltfmt_bf16, fltfmt_idblext80;
+
+/* --- @fltfmt_encieee@ ---
+ *
+ * Arguments:  @const struct fltfmt_ieeefmt *fmt@ = format description
+ *             @uint32 *z@ = output vector
+ *             @const struct floatbits *x@ = value to encode
+ *             @unsigned r@ = rounding mode
+ *             @unsigned errmask@ = error mask
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Encode a floating-point value in an IEEE format.  This is the
+ *             machinery shared by the @fltfmt_enc...@ functions for
+ *             encoding IEEE-format values.  Most of the arguments and
+ *             behaviour are as described for those functions.
+ *
+ *             The encoded value is right-aligned and big-endian; i.e., the
+ *             sign bit ends up in @z[0]@, and the least significant bit of
+ *             the significand ends up in the least significant bit of
+ *             @z[n - 1]@.
+ */
+
+extern unsigned fltfmt_encieee(const struct fltfmt_ieeefmt */*fmt*/,
+                              uint32 */*z*/, const struct floatbits */*x*/,
+                              unsigned /*r*/, unsigned /*errmask*/);
+
+/* --- @fltfmt_encTY@ --- *
+ *
+ * Arguments:  @octet *z_out@, @uint16 *z_out@, @uint32 *z_out@,
+ *                     @kludge64 *z_out@ = where to put the encoded value
+ *             @uint16 *se_out@, @kludge64 *m_out@ = where to put the
+ *                     encoded sign-and-exponent and significand
+ *             @const struct floatbits *x@ = value to encode
+ *             @unsigned r@ = rounding mode
+ *             @unsigned errmask@ = error mask
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Encode a floating-point value in an IEEE (or IEEE-adjacent)
+ *             format.
+ *
+ *             If an error is encountered during the encoding, and the
+ *             corresponding bit of @errmask@ is clear, then processing
+ *             stops immediately and the error is returned; if the bit is
+ *             set, then processing continues as described below.
+ *
+ *             The @TY@ may be
+ *
+ *               * @mini@ for the 8-bit `1.4.3 minifloat' format, with
+ *                 four-bit exponent and four-bit significand, represented
+ *                 as a single octet;
+ *
+ *               * @bf16@ for the Google `bfloat16' format, with eight-bit
+ *                 exponent and eight-bit significand, represented as a
+ *                 @uint16@;
+ *
+ *               * @f16@ for the IEEE `binary16' format, with five-bit
+ *                 exponent and eleven-bit significand, represented as a
+ *                 @uint16@;
+ *
+ *               * @f32@ for the IEEE `binary32' format, with eight-bit
+ *                 exponent and 24-bit significand, represented as a
+ *                 @uint32@;
+ *
+ *               * @f64@ for the IEEE `binary64' format, with eleven-bit
+ *                 exponent and 53-bit significand, represented as a
+ *                 @kludge64@;
+ *
+ *               * @f128@ for the IEEE `binary128' format, with fifteen-bit
+ *                 exponent and 113-bit significand, represented as four
+ *                 @uint32@ limbs, most significant first; or
+ *
+ *               * @idblext80@ for the Intel 80-bit `double extended'
+ *                 format, with fifteen-bit exponent and 64-bit significand
+ *                 with no hidden bit, represented as a @uint16 se@
+ *                 holding the sign and exponent, and a @kludge64 m@
+ *                 holding the significand.
+ *
+ *             Positive and negative zero and infinity are representable
+ *             exactly.
+ *
+ *              Following IEEE recommendations (and most implementations),
+ *             the most significant fraction bit of a quiet NaN is set; this
+ *             bit is clear in a signalling NaN.  The most significant
+ *             payload bits of a NaN, held in the top bits of @x->frac[0]@,
+ *             are encoded in the output significand following the `quiet'
+ *             bit.  If the chosen format's significand field is too small
+ *             to accommodate all of the set payload bits then the
+ *             @FLTERR_INEXACT@ error bit is set and, if masked, the
+ *             excess payload bits are discarded.  No rounding of NaN
+ *             payloads is performed.
+ *
+ *             Otherwise, the input value is finite and nonzero.  If the
+ *             significand cannot be represented exactly then the
+ *             @FLTERR_INEXACT@ error bit is set, and, if masked, the value
+ *             will be rounded (internally -- the input @x@ is not changed).
+ *             If the (rounded) value's exponent is too large to represent,
+ *             then the @FLTERR_OFLOW@ and @FLTERR_INEXACT@ error bits are
+ *             set and, if masked, the result is either the (absolute)
+ *             largest representable finite value or infinity, with the
+ *             appropriate sign, chosen according to the rounding mode.  If
+ *             the exponent is too small to represent, then the
+ *             @FLTERR_UFLOW@ and @FLTERR_INEXACT@ error bits are set and,
+ *             if masked, the result is either the (absolute) smallest
+ *             nonzero value or zero, with the appropriate sign, chosen
+ *             according to the rounding mode.
+ */
+
+extern unsigned fltfmt_encmini(octet */*z_out*/,
+                              const struct floatbits */*x*/,
+                              unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encbf16(uint16 */*z_out*/,
+                              const struct floatbits */*x*/,
+                              unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encf16(uint16 */*z_out*/,
+                             const struct floatbits */*x*/,
+                             unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encf32(uint32 */*z_out*/,
+                             const struct floatbits */*x*/,
+                             unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encf64(kludge64 */*z_out*/,
+                             const struct floatbits */*x*/,
+                             unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encf128(uint32 */*z_out*/,
+                              const struct floatbits */*x*/,
+                              unsigned /*r*/, unsigned /*errmask*/);
+
+extern unsigned fltfmt_encidblext80(uint16 */*se_out*/, kludge64 */*f_out*/,
+                                   const struct floatbits */*x*/,
+                                   unsigned /*r*/, unsigned /*errmask*/);
+
+/* --- @fltfmt_decieee@ --- *
+ *
+ * Arguments:  @const struct fltfmt_ieeefmt *fmt@ = format description
+ *             @struct floatbits *z_out@ = output decoded representation
+ *             @const uint32 *x@ = input encoding
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Decode a floating-point value in an IEEE format.  This is the
+ *             machinery shared by the @fltfmt_dec...@ functions for
+ *             deccoding IEEE-format values.  Most of the arguments and
+ *             behaviour are as described for those functions.
+ *
+ *             The encoded value should be right-aligned and big-endian;
+ *             i.e., the sign bit ends up in @z[0]@, and the least
+ *             significant bit of the significand ends up in the least
+ *             significant bit of @z[n - 1]@.
+ */
+
+extern unsigned fltfmt_decieee(const struct fltfmt_ieeefmt */*fmt*/,
+                              struct floatbits */*z_out*/,
+                              const uint32 */*x*/);
+
+/* --- @fltfmt_decTY@ --- *
+ *
+ * Arguments:  @const struct floatbits *z_out@ = storage for the result
+ *             @octet x@, @uint16 x@, @uint32 x@, @kludge64 x@ =
+ *                     encoded input
+ *             @uint16 se@, @kludge64 m@ = encoded sign-and-exponent and
+ *                     significand
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Encode a floating-point value in an IEEE (or IEEE-adjacent)
+ *             format.
+ *
+ *             The options for @TY@ are as documented for the encoding
+ *             functions above.
+ *
+ *             In formats without a hidden bit -- currently only @idblext80@
+ *             -- not all bit patterns are valid encodings.  If the explicit
+ *             unit bit is set when the exponent field is all-bits-zero, or
+ *             clear when the exponent field is not all-bits-zero, then the
+ *             @FLTERR_INVAL@ error bit is set.  If the exponent is all-
+ *             bits-set, denoting infinity or a NaN, then the unit bit is
+ *             otherwise ignored -- in particular, it does not affect the
+ *             NaN payload, or even whether the input encodes a NaN or
+ *             infinity.  Otherwise, the unit bit is considered significant,
+ *             and the result is normalized as one would expect.
+ *             Consequently, biased exponent values 0 and 1 are distinct
+ *             only with respect to which bit patterns are considered valid,
+ *             and not with respect to the set of values denoted.
+ */
+
+extern unsigned fltfmt_decmini(struct floatbits */*z_out*/, octet /*x*/);
+
+extern unsigned fltfmt_decbf16(struct floatbits */*z_out*/, uint16 /*x*/);
+
+extern unsigned fltfmt_decf16(struct floatbits */*z_out*/, uint16 /*x*/);
+
+extern unsigned fltfmt_decf32(struct floatbits */*z_out*/, uint32 /*x*/);
+
+extern unsigned fltfmt_decf64(struct floatbits */*z_out*/, kludge64 /*x*/);
+
+extern unsigned fltfmt_decf128(struct floatbits */*z_out*/,
+                              const uint32 */*x*/);
+
+extern unsigned fltfmt_decidblext80(struct floatbits */*z_out*/,
+                                   uint16 /*se*/, kludge64 /*f*/);
+
+/*----- Native formats ----------------------------------------------------*/
+
+/* Hacking for platforms which ill-advisedly have the opposite sense for the
+ * quiet NaN bit.
+ *
+ * Obviously we toggle the quiet bit, but there's a problem: if the quiet bit
+ * is the only one set, then if we toggle it, the fraction will become zero
+ * and we'll be left with an infinity.  Follow MIPS and set all of the bits.
+ *
+ * This is all internal machinery and shouldn't be relied on by applications.
+ */
+#if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
+#  define FLTFMT__MUST_FROB_NANS
+
+#  define FLTFMT__FROB_NAN_F32(x_inout, rc) do {                       \
+     uint32 *_x_inout_ = (x_inout), _x0_ = _x_inout_[0];               \
+                                                                       \
+     if ((_x0_&0x7f800000) != 0x7f800000 || !(_x0_&0x007fffff))                \
+       ;                                                               \
+     else if (_x0_&0x003fffff)                                         \
+       _x_inout_[0] = _x0_ ^ 0x00400000;                               \
+     else {                                                            \
+       _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff;                  \
+       (rc) |= FLTERR_INEXACT;                                         \
+     }                                                                 \
+   } while (0)
+
+#  define FLTFMT__FROB_NAN_F64(x_inout, rc) do {                       \
+     uint32 *_x_inout_ = (x_inout),                                    \
+       _x0_ = _x_inout_[0], _x1_ = _x_inout_[1];                       \
+                                                                       \
+     if ((_x0_&0x7ff00000) != 0x7ff00000 || (!(_x0_&0x000fffff) && !_x1_)) \
+       ;                                                               \
+     else if ((_x0_&0x0007ffff) || _x1_)                               \
+       _x_inout_[0] = _x0_ ^ 0x00080000;                               \
+     else {                                                            \
+       _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff;                  \
+       _x_inout_[1] = 0xffffffff;                                      \
+       (rc) |= FLTERR_INEXACT;                                         \
+     }                                                                 \
+   } while (0)
+
+#  define FLTFMT__FROB_NAN_F128(x_inout, rc) do {                      \
+     uint32 *_x_inout_ = (x_inout),                                    \
+       _x0_ = _x_inout_[0], _x1_ = _x_inout_[1],                       \
+       _x2_ = _x_inout_[2], _x3_ = _x_inout_[3];                       \
+                                                                       \
+     if ((_x0_&0x7fff0000) != 0x7fff0000 ||                            \
+        (!(_x0_&0x000fffff) && !_x1_ && !_x2_ && !_x3_))               \
+       ;                                                               \
+     else if ((_x0_&0x00007fff) || _x1_ || _x2_ || _x3_)               \
+       _x_inout_[0] = _x0_ ^ 0x00008000;                               \
+     else {                                                            \
+       _x_inout_[0] = (_x0_&0x80000000) | 0x7fffffff;                  \
+       _x_inout_[1] = _x_inout_[2] = _x_inout_[3] = 0xffffffff;                \
+       (rc) |= FLTERR_INEXACT;                                         \
+     }                                                                 \
+   } while (0)
+
+#  define FLTFMT__FROB_NAN_IDBLEXT80(x_inout, rc) do {                 \
+     uint32 *_x_inout_ = (x_inout),                                    \
+       _x0_ = _x_inout_[0], _x1_ = _x_inout_[1], _x2_ = _x_inout_[2];  \
+                                                                       \
+     if ((_x0_&0x00007fff) != 0x00007fff || (!(_x1_&0x7fffffff) && !_x2_)) \
+       ;                                                               \
+     else if ((_x1_&0x3fffffff) || _x1_ || _x2_)                       \
+       _x_inout_[1] = _x1_ ^ 0x40000000;                               \
+     else {                                                            \
+       _x_inout_[1] = (_x1_&0x80000000) | 0x3fffffff; /* preserve unit */ \
+       _x_inout_[2] = 0xffffffff;                                      \
+     }                                                                 \
+   } while (0)
+
+#else
+#  define FLTFMT__FROB_NAN_F32(x_inout, rc) do ; while (0)
+#  define FLTFMT__FROB_NAN_F64(x_inout, rc) do ; while (0)
+#  define FLTFMT__FROB_NAN_F128(x_inout, rc) do ; while (0)
+#  define FLTFMT__FROB_NAN_IDBLEXT80(x_inout, rc) do ; while (0)
+#endif
+
+/* --- @fltfmt_encTY@ --- *
+ *
+ * Arguments:  @ty *z_out@ = storage for the result
+ *             @const struct floatbits *x@ = value to encode
+ *             @unsigned r@ = rounding mode
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Encode the floating-point value @x@ as a native C object and
+ *             store the result in @z_out@.
+ *
+ *             The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
+ *             @double@, or (on C99 implementations) @ldbl@ to encode a
+ *             @long double@.
+ *
+ *             In detail, conversion is performed as follows.
+ *
+ *               * If a non-finite value cannot be represented by the
+ *                 implementation then the @FLTERR_REPR@ error bit is set
+ *                 and @*z_out@ is set to zero if @x@ is a NaN, or the
+ *                 (absolute) largest representable value, with appropriate
+ *                 sign, if @x@ is an infinity.
+ *
+ *               * If the implementation can represent NaNs, but cannot set
+ *                 NaN payloads, then the @FLTERR_INEXACT@ error bit is set,
+ *                 and @*z_out@ is set to an arbitrary (quiet) NaN value.
+ *
+ *               * If @x@ is negative zero, but the implementation does not
+ *                 distinguish negative and positive zero, then the
+ *                 @FLTERR_INEXACT@ error bit is set and @*z_out@ is set to
+ *                 zero.
+ *
+ *               * If the implementation's floating-point radix is not a
+ *                 power of two, and @x@ is a nonzero finite value, then
+ *                 @FLTERR_INEXACT@ error bit is set (unconditionally), and
+ *                 the value is rounded by the implementation using its
+ *                 prevailing rounding policy.  If the radix is a power of
+ *                 two, then the @FLTERR_INEXACT@ error bit is set only if
+ *                 rounding is necessary, and rounding is performed using
+ *                 the rounding mode @r@.
+ */
+
+extern unsigned fltfmt_encflt(float */*z_out*/,
+                             const struct floatbits */*x*/,
+                             unsigned /*r*/);
+
+extern unsigned fltfmt_encdbl(double */*z_out*/,
+                             const struct floatbits */*x*/,
+                             unsigned /*r*/);
+
+#if __STDC_VERSION__ >= 199001
+extern unsigned fltfmt_encldbl(long double */*z_out*/,
+                              const struct floatbits */*x*/,
+                              unsigned /*r*/);
+#endif
+
+/* --- @fltfmt_decTY@ --- *
+ *
+ * Arguments:  @struct floatbits *z_out@ = storage for the result
+ *             @ty x@ = value to decode
+ *             @unsigned r@ = rounding mode
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Decode the native C floatingpoint value @x@ and store the
+ *             result in @z_out@.
+ *
+ *             The @TY@ may be @flt@ to encode a @float@, @dbl@ to encode a
+ *             @double@, or (on C99 implementations) @ldbl@ to encode a
+ *             @long double@.
+ *
+ *             In detail, conversion is performed as follows.
+ *
+ *               * If the implementation supports negative zeros and/or
+ *                 infinity, then these are recognized and decoded.
+ *
+ *               * If the input as a NaN, but the implementation cannot
+ *                 usefully report NaN payloads, then the @FLTERR_INEXACT@
+ *                 error bit is set and the decoded payload is left empty.
+ *
+ *               * If the implementation's floating-point radix is not a
+ *                 power of two, and @x@ is a nonzero finite value, then
+ *                 @FLTERR_INEXACT@ error bit is set (unconditionally), and
+ *                 the rounded value (according to the rounding mode @r@) is
+ *                 stored in as many fraction words as necessary to identify
+ *                 the original value uniquely.  If the radix is a power of
+ *                 two, then the value is represented exactly.
+ */
+
+extern unsigned fltfmt_decflt(struct floatbits */*z_out*/,
+                             float /*x*/, unsigned /*r*/);
+
+extern unsigned fltfmt_decdbl(struct floatbits */*z_out*/,
+                             double /*x*/, unsigned /*r*/);
+
+#if __STDC_VERSION__ >= 199001
+extern unsigned fltfmt_decldbl(struct floatbits */*z_out*/,
+                              long double /*x*/, unsigned /*r*/);
+#endif
+
+/*----- Some common conversions packaged up -------------------------------*/
+
+/* --- @fltfmt_CTYtoFTYE@ --- *
+ *
+ * Arguments:  @octet *p@ = output pointer
+ *             @float x@, @double x@ = value to convert
+ *             @unsigned r@ = rounding mode
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Encode a native C floating-point value in an external format.
+ *
+ *             The @CTY@ is an abbreviation for a C type: @flt@ for @float@,
+ *             or @dbl@ for @double@; @fty@ is an abbreviation for the
+ *             external format, @f32@ for IEEE Binary32, or @f64@ for IEEE
+ *             Binary64; and @E@ is @l@ for little-endian or @b@ for
+ *             big-endian byte order.  Not all combinations are currently
+ *             supported.
+ *
+ *             On platforms where the external format is used natively,
+ *             these functions are simple data copies.
+ */
+
+extern unsigned fltfmt_flttof32l(octet */*p*/, float /*x*/, unsigned /*r*/);
+extern unsigned fltfmt_flttof32b(octet */*p*/, float /*x*/, unsigned /*r*/);
+extern unsigned fltfmt_dbltof64l(octet */*p*/, double /*x*/, unsigned /*r*/);
+extern unsigned fltfmt_dbltof64b(octet */*p*/, double /*x*/, unsigned /*r*/);
+
+/* --- @fltfmt_FTYEtoCTY@ --- *
+ *
+ * Arguments:  @float *z_out@, @double *z_out@ = storage for output
+ *             @const octet *p@ = input pointer
+ *             @unsigned r@ = rounding mode
+ *
+ * Returns:    Error flags (@FLTERR_...@).
+ *
+ * Use:                Decodes a floating point value in an external format into a
+ *             native value.
+ *
+ *             The naming conventions are the same as for @fltfmt_dbltof64b@
+ *             above.
+ *
+ *             On platforms where the external format is used natively,
+ *             these functions are simple data copies.
+ */
+
+extern unsigned fltfmt_f32ltoflt(float */*z_out*/, const octet */*p*/,
+                                unsigned /*r*/);
+extern unsigned fltfmt_f32btoflt(float */*z_out*/, const octet */*p*/,
+                                unsigned /*r*/);
+extern unsigned fltfmt_f64ltodbl(double */*z_out*/, const octet */*p*/,
+                                unsigned /*r*/);
+extern unsigned fltfmt_f64btodbl(double */*z_out*/, const octet */*p*/,
+                                unsigned /*r*/);
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif