diff mbox series

[PULL,26/46] softfloat: Convert float128_silence_nan to parts

Message ID 20210516123431.718318-27-richard.henderson@linaro.org
State New
Headers show
Series softfloat: Convert float128 to FloatParts (part 1) | expand

Commit Message

Richard Henderson May 16, 2021, 12:34 p.m. UTC
This is the minimal change that also introduces float128_params,
float128_unpack_raw, and float128_pack_raw without running into
unused symbol Werrors.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 fpu/softfloat.c                | 96 +++++++++++++++++++++++++++++-----
 fpu/softfloat-specialize.c.inc | 25 +++------
 2 files changed, 89 insertions(+), 32 deletions(-)

-- 
2.25.1
diff mbox series

Patch

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ee609540aa..f8f4ef51e8 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -500,14 +500,12 @@  static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 }
 
 /*
- * Structure holding all of the decomposed parts of a float. The
- * exponent is unbiased and the fraction is normalized. All
- * calculations are done with a 64 bit fraction and then rounded as
- * appropriate for the final format.
+ * Structure holding all of the decomposed parts of a float.
+ * The exponent is unbiased and the fraction is normalized.
  *
- * Thanks to the packed FloatClass a decent compiler should be able to
- * fit the whole structure into registers and avoid using the stack
- * for parameter passing.
+ * The fraction words are stored in big-endian word ordering,
+ * so that truncation from a larger format to a smaller format
+ * can be done simply by ignoring subsequent elements.
  */
 
 typedef struct {
@@ -527,6 +525,15 @@  typedef struct {
     };
 } FloatParts64;
 
+typedef struct {
+    FloatClass cls;
+    bool sign;
+    int32_t exp;
+    uint64_t frac_hi;
+    uint64_t frac_lo;
+} FloatParts128;
+
+/* These apply to the most significant word of each FloatPartsN. */
 #define DECOMPOSED_BINARY_POINT    63
 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 
@@ -562,11 +569,11 @@  typedef struct {
     .exp_bias       = ((1 << E) - 1) >> 1,                           \
     .exp_max        = (1 << E) - 1,                                  \
     .frac_size      = F,                                             \
-    .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \
-    .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \
-    .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \
-    .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \
-    .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
+    .frac_shift     = (-F - 1) & 63,                                 \
+    .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
+    .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
+    .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
+    .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 
 static const FloatFmt float16_params = {
     FLOAT_PARAMS(5, 10)
@@ -589,6 +596,10 @@  static const FloatFmt float64_params = {
     FLOAT_PARAMS(11, 52)
 };
 
+static const FloatFmt float128_params = {
+    FLOAT_PARAMS(15, 112)
+};
+
 /* Unpack a float to parts, but do not canonicalize.  */
 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 {
@@ -623,6 +634,20 @@  static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
     unpack_raw64(p, &float64_params, f);
 }
 
+static void float128_unpack_raw(FloatParts128 *p, float128 f)
+{
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+
+    *p = (FloatParts128) {
+        .cls = float_class_unclassified,
+        .sign = extract64(f.high, f_size + e_size, 1),
+        .exp = extract64(f.high, f_size, e_size),
+        .frac_hi = extract64(f.high, 0, f_size),
+        .frac_lo = f.low,
+    };
+}
+
 /* Pack a float from parts, but do not canonicalize.  */
 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 {
@@ -656,6 +681,18 @@  static inline float64 float64_pack_raw(const FloatParts64 *p)
     return make_float64(pack_raw64(p, &float64_params));
 }
 
+static float128 float128_pack_raw(const FloatParts128 *p)
+{
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+    uint64_t hi;
+
+    hi = (uint64_t)p->sign << (f_size + e_size);
+    hi = deposit64(hi, f_size, e_size, p->exp);
+    hi = deposit64(hi, 0, f_size, p->frac_hi);
+    return make_float128(hi, p->frac_lo);
+}
+
 /*----------------------------------------------------------------------------
 | Functions and definitions to determine:  (1) whether tininess for underflow
 | is detected before or after rounding by default, (2) what (if anything)
@@ -666,8 +703,30 @@  static inline float64 float64_pack_raw(const FloatParts64 *p)
 *----------------------------------------------------------------------------*/
 #include "softfloat-specialize.c.inc"
 
+#define PARTS_GENERIC_64_128(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+
 #define parts_default_nan  parts64_default_nan
-#define parts_silence_nan  parts64_silence_nan
+#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
+
+
+/*
+ * Helper functions for softfloat-parts.c.inc, per-size operations.
+ */
+
+static void frac128_shl(FloatParts128 *a, int c)
+{
+    shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+}
+
+#define frac_shl(A, C)             frac128_shl(A, C)
+
+static void frac128_shr(FloatParts128 *a, int c)
+{
+    shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+}
+
+#define frac_shr(A, C)             frac128_shr(A, C)
 
 /* Canonicalize EXP and FRAC, setting CLS.  */
 static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
@@ -3851,6 +3910,17 @@  bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
     return bfloat16_pack_raw(&p);
 }
 
+float128 float128_silence_nan(float128 a, float_status *status)
+{
+    FloatParts128 p;
+
+    float128_unpack_raw(&p, a);
+    frac_shl(&p, float128_params.frac_shift);
+    parts_silence_nan(&p, status);
+    frac_shr(&p, float128_params.frac_shift);
+    return float128_pack_raw(&p);
+}
+
 /*----------------------------------------------------------------------------
 | If `a' is denormal and we are in flush-to-zero mode then set the
 | input-denormal exception and return zero. Otherwise just return the value.
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index 4038955379..5b85b843c2 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -197,6 +197,12 @@  static void parts64_silence_nan(FloatParts64 *p, float_status *status)
     p->cls = float_class_qnan;
 }
 
+static void parts128_silence_nan(FloatParts128 *p, float_status *status)
+{
+    p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
+    p->cls = float_class_qnan;
+}
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated extended double-precision NaN.
 *----------------------------------------------------------------------------*/
@@ -1062,25 +1068,6 @@  bool float128_is_signaling_nan(float128 a, float_status *status)
     }
 }
 
-/*----------------------------------------------------------------------------
-| Returns a quiet NaN from a signalling NaN for the quadruple-precision
-| floating point value `a'.
-*----------------------------------------------------------------------------*/
-
-float128 float128_silence_nan(float128 a, float_status *status)
-{
-    if (no_signaling_nans(status)) {
-        g_assert_not_reached();
-    } else {
-        if (snan_bit_is_one(status)) {
-            return float128_default_nan(status);
-        } else {
-            a.high |= UINT64_C(0x0000800000000000);
-            return a;
-        }
-    }
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid