diff mbox series

[v2,11/27] fpu/softfloat: support ARM Alternative half-precision

Message ID 20180512004311.9299-12-richard.henderson@linaro.org
State New
Headers show
Series softfloat patch roundup | expand

Commit Message

Richard Henderson May 12, 2018, 12:42 a.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>


For float16 ARM supports an alternative half-precision format which
sacrifices the ability to represent NaN/Inf in return for a higher
dynamic range. To support this I've added an additional
FloatFmt (float16_params_ahp).

The new FloatFmt flag (arm_althp) is then used to modify the behaviour
of canonicalize and round_canonical with respect to representation and
exception raising.

Finally the float16_to_floatN and floatN_to_float16 conversion
routines select the new alternative FloatFmt when !ieee.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


---
v3
  - squash NaN to 0 if destination is AHP F16
v4
  - handle inf -> ahp max in float_to_float not round_canonical
  - assert no nan and inf for ahp in round_canonical
  - check ahp before snan in float_to_float
---
 fpu/softfloat.c | 95 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 81 insertions(+), 14 deletions(-)

-- 
2.17.0

Comments

Peter Maydell May 14, 2018, 1:52 p.m. UTC | #1
On 12 May 2018 at 01:42, Richard Henderson <richard.henderson@linaro.org> wrote:
> From: Alex Bennée <alex.bennee@linaro.org>

>

> For float16 ARM supports an alternative half-precision format which

> sacrifices the ability to represent NaN/Inf in return for a higher

> dynamic range. To support this I've added an additional

> FloatFmt (float16_params_ahp).

>

> The new FloatFmt flag (arm_althp) is then used to modify the behaviour

> of canonicalize and round_canonical with respect to representation and

> exception raising.

>

> Finally the float16_to_floatN and floatN_to_float16 conversion

> routines select the new alternative FloatFmt when !ieee.

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


If we do this like this then we have a regression for the
alternate-halfprec conversions between the patch where we change
over the conversions functions and this one.

To fix that we could put the parts of this patch that change
existing-in-master code like round_canonical() first, and then
fold the parts that fix up the conversion functions into the
patch which is currently 10/27.

> ---

> v3

>   - squash NaN to 0 if destination is AHP F16

> v4

>   - handle inf -> ahp max in float_to_float not round_canonical

>   - assert no nan and inf for ahp in round_canonical

>   - check ahp before snan in float_to_float

> ---

>  fpu/softfloat.c | 95 +++++++++++++++++++++++++++++++++++++++++--------

>  1 file changed, 81 insertions(+), 14 deletions(-)

>

> diff --git a/fpu/softfloat.c b/fpu/softfloat.c

> index aa219223ff..15a272759d 100644

> --- a/fpu/softfloat.c

> +++ b/fpu/softfloat.c

> @@ -211,8 +211,10 @@ typedef struct {

>   *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT

>   * The following are computed based the size of fraction

>   *   frac_lsb: least significant bit of fraction

> - *   fram_lsbm1: the bit bellow the least significant bit (for rounding)

> + *   frac_lsbm1: the bit bellow the least significant bit (for rounding)


"below" (I see this typo slipped past us the first time around)

thanks
-- PMM
diff mbox series

Patch

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index aa219223ff..15a272759d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -211,8 +211,10 @@  typedef struct {
  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
  * The following are computed based the size of fraction
  *   frac_lsb: least significant bit of fraction
- *   fram_lsbm1: the bit bellow the least significant bit (for rounding)
+ *   frac_lsbm1: the bit bellow the least significant bit (for rounding)
  *   round_mask/roundeven_mask: masks used for rounding
+ * The following optional modifiers are available:
+ *   arm_althp: handle ARM Alternative Half Precision
  */
 typedef struct {
     int exp_size;
@@ -224,6 +226,7 @@  typedef struct {
     uint64_t frac_lsbm1;
     uint64_t round_mask;
     uint64_t roundeven_mask;
+    bool arm_althp;
 } FloatFmt;
 
 /*----------------------------------------------------------------------------
@@ -252,6 +255,11 @@  static const FloatFmt float16_params = {
     FLOAT_PARAMS(5, 10)
 };
 
+static const FloatFmt float16_params_ahp = {
+    FLOAT_PARAMS(5, 10),
+    .arm_althp = true
+};
+
 static const FloatFmt float32_params = {
     FLOAT_PARAMS(8, 23)
 };
@@ -315,7 +323,7 @@  static inline float64 float64_pack_raw(FloatParts p)
 static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
                                float_status *status)
 {
-    if (part.exp == parm->exp_max) {
+    if (part.exp == parm->exp_max && !parm->arm_althp) {
         if (part.frac == 0) {
             part.cls = float_class_inf;
         } else {
@@ -404,7 +412,15 @@  static FloatParts round_canonical(FloatParts p, float_status *s,
             }
             frac >>= frac_shift;
 
-            if (unlikely(exp >= exp_max)) {
+            if (parm->arm_althp) {
+                /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
+                if (unlikely(exp > exp_max)) {
+                    /* Overflow.  Return the maximum normal.  */
+                    flags = float_flag_invalid;
+                    exp = exp_max;
+                    frac = -1;
+                }
+            } else if (unlikely(exp >= exp_max)) {
                 flags |= float_flag_overflow | float_flag_inexact;
                 if (overflow_norm) {
                     exp = exp_max - 1;
@@ -455,12 +471,14 @@  static FloatParts round_canonical(FloatParts p, float_status *s,
 
     case float_class_inf:
     do_inf:
+        assert(!parm->arm_althp);
         exp = exp_max;
         frac = 0;
         break;
 
     case float_class_qnan:
     case float_class_snan:
+        assert(!parm->arm_althp);
         exp = exp_max;
         frac >>= parm->frac_shift;
         break;
@@ -475,14 +493,27 @@  static FloatParts round_canonical(FloatParts p, float_status *s,
     return p;
 }
 
+/* Explicit FloatFmt version */
+static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
+                                            const FloatFmt *params)
+{
+    return canonicalize(float16_unpack_raw(f), params, s);
+}
+
 static FloatParts float16_unpack_canonical(float16 f, float_status *s)
 {
-    return canonicalize(float16_unpack_raw(f), &float16_params, s);
+    return float16a_unpack_canonical(f, s, &float16_params);
+}
+
+static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
+                                             const FloatFmt *params)
+{
+    return float16_pack_raw(round_canonical(p, s, params));
 }
 
 static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
 {
-    return float16_pack_raw(round_canonical(p, s, &float16_params));
+    return float16a_round_pack_canonical(p, s, &float16_params);
 }
 
 static FloatParts float32_unpack_canonical(float32 f, float_status *s)
@@ -1174,7 +1205,33 @@  static FloatParts float_to_float(FloatParts a,
                                  const FloatFmt *srcf, const FloatFmt *dstf,
                                  float_status *s)
 {
-    if (is_nan(a.cls)) {
+    if (dstf->arm_althp) {
+        switch (a.cls) {
+        case float_class_qnan:
+        case float_class_snan:
+            /* There is no NaN in the destination format.  Raise Invalid
+             * and return a zero with the sign of the input NaN.
+             */
+            s->float_exception_flags |= float_flag_invalid;
+            a.cls = float_class_zero;
+            a.frac = 0;
+            a.exp = 0;
+            break;
+
+        case float_class_inf:
+            /* There is no Inf in the destination format.  Raise Invalid
+             * and return the maximum normal with the correct sign.
+             */
+            s->float_exception_flags |= float_flag_invalid;
+            a.cls = float_class_normal;
+            a.exp = dstf->exp_max;
+            a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
+            break;
+
+        default:
+            break;
+        }
+    } else if (is_nan(a.cls)) {
         if (is_snan(a.cls)) {
             s->float_exception_flags |= float_flag_invalid;
             a = parts_silence_nan(a, s);
@@ -1186,25 +1243,34 @@  static FloatParts float_to_float(FloatParts a,
     return a;
 }
 
+/*
+ * Currently non-ieee implies ARM Alternative Half Precision handling
+ * for float16 values. If more are needed we'll need to expand the API
+ * into softfloat.
+ */
+
 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
 {
-    FloatParts p = float16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float16_params, &float32_params, s);
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
+    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
+    FloatParts pr = float_to_float(p, fmt16, &float32_params, s);
     return float32_round_pack_canonical(pr, s);
 }
 
 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
 {
-    FloatParts p = float16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float16_params, &float64_params, s);
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
+    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
+    FloatParts pr = float_to_float(p, fmt16, &float64_params, s);
     return float64_round_pack_canonical(pr, s);
 }
 
 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
 {
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
     FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float32_params, &float16_params, s);
-    return float16_round_pack_canonical(pr, s);
+    FloatParts pr = float_to_float(p, &float32_params, fmt16, s);
+    return float16a_round_pack_canonical(pr, s, fmt16);
 }
 
 float64 float32_to_float64(float32 a, float_status *s)
@@ -1216,9 +1282,10 @@  float64 float32_to_float64(float32 a, float_status *s)
 
 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
 {
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
     FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float64_params, &float16_params, s);
-    return float16_round_pack_canonical(pr, s);
+    FloatParts pr = float_to_float(p, &float64_params, fmt16, s);
+    return float16a_round_pack_canonical(pr, s, fmt16);
 }
 
 float32 float64_to_float32(float64 a, float_status *s)