diff mbox series

[01/10] softfloat: Use post test for floatN_mul

Message ID 20200515190153.6017-2-richard.henderson@linaro.org
State Superseded
Headers show
Series softfloat: misc cleanups | expand

Commit Message

Richard Henderson May 15, 2020, 7:01 p.m. UTC
The existing f{32,64}_addsub_post test, which checks for zero
inputs, is identical to f{32,64}_mul_fast_test.  Which means
we can eliminate the fast_test/fast_op hooks in favor of
reusing the same post hook.

This means we have one fewer test along the fast path for multiply.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 fpu/softfloat.c | 65 +++++++++++--------------------------------------
 1 file changed, 14 insertions(+), 51 deletions(-)

-- 
2.20.1

Comments

Alex Bennée May 19, 2020, 8:48 a.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> The existing f{32,64}_addsub_post test, which checks for zero

> inputs, is identical to f{32,64}_mul_fast_test.  Which means

> we can eliminate the fast_test/fast_op hooks in favor of

> reusing the same post hook.

>

> This means we have one fewer test along the fast path for multiply.


I was worried that we were missing an opportunity for the early fast
path but fp-bench disagrees.

Before:
  ./fp-bench -o mul                               
  139.01 MFlops

After:
  ./fp-bench -o mul                        
  155.28 MFlops

So:

Tested-by: Alex Bennée <alex.bennee@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  fpu/softfloat.c | 65 +++++++++++--------------------------------------

>  1 file changed, 14 insertions(+), 51 deletions(-)

>

> diff --git a/fpu/softfloat.c b/fpu/softfloat.c

> index a362bf89ca..5fb4ef75bb 100644

> --- a/fpu/softfloat.c

> +++ b/fpu/softfloat.c

> @@ -339,12 +339,10 @@ static inline bool f64_is_inf(union_float64 a)

>      return float64_is_infinity(a.s);

>  }

>  

> -/* Note: @fast_test and @post can be NULL */

>  static inline float32

>  float32_gen2(float32 xa, float32 xb, float_status *s,

>               hard_f32_op2_fn hard, soft_f32_op2_fn soft,

> -             f32_check_fn pre, f32_check_fn post,

> -             f32_check_fn fast_test, soft_f32_op2_fn fast_op)

> +             f32_check_fn pre, f32_check_fn post)

>  {

>      union_float32 ua, ub, ur;

>  

> @@ -359,17 +357,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,

>      if (unlikely(!pre(ua, ub))) {

>          goto soft;

>      }

> -    if (fast_test && fast_test(ua, ub)) {

> -        return fast_op(ua.s, ub.s, s);

> -    }

>  

>      ur.h = hard(ua.h, ub.h);

>      if (unlikely(f32_is_inf(ur))) {

>          s->float_exception_flags |= float_flag_overflow;

> -    } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {

> -        if (post == NULL || post(ua, ub)) {

> -            goto soft;

> -        }

> +    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {

> +        goto soft;

>      }

>      return ur.s;

>  

> @@ -380,8 +373,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,

>  static inline float64

>  float64_gen2(float64 xa, float64 xb, float_status *s,

>               hard_f64_op2_fn hard, soft_f64_op2_fn soft,

> -             f64_check_fn pre, f64_check_fn post,

> -             f64_check_fn fast_test, soft_f64_op2_fn fast_op)

> +             f64_check_fn pre, f64_check_fn post)

>  {

>      union_float64 ua, ub, ur;

>  

> @@ -396,17 +388,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,

>      if (unlikely(!pre(ua, ub))) {

>          goto soft;

>      }

> -    if (fast_test && fast_test(ua, ub)) {

> -        return fast_op(ua.s, ub.s, s);

> -    }

>  

>      ur.h = hard(ua.h, ub.h);

>      if (unlikely(f64_is_inf(ur))) {

>          s->float_exception_flags |= float_flag_overflow;

> -    } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {

> -        if (post == NULL || post(ua, ub)) {

> -            goto soft;

> -        }

> +    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {

> +        goto soft;

>      }

>      return ur.s;

>  

> @@ -1115,7 +1102,7 @@ static double hard_f64_sub(double a, double b)

>      return a - b;

>  }

>  

> -static bool f32_addsub_post(union_float32 a, union_float32 b)

> +static bool f32_addsubmul_post(union_float32 a, union_float32 b)

>  {

>      if (QEMU_HARDFLOAT_2F32_USE_FP) {

>          return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);

> @@ -1123,7 +1110,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)

>      return !(float32_is_zero(a.s) && float32_is_zero(b.s));

>  }

>  

> -static bool f64_addsub_post(union_float64 a, union_float64 b)

> +static bool f64_addsubmul_post(union_float64 a, union_float64 b)

>  {

>      if (QEMU_HARDFLOAT_2F64_USE_FP) {

>          return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);

> @@ -1136,14 +1123,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,

>                                hard_f32_op2_fn hard, soft_f32_op2_fn soft)

>  {

>      return float32_gen2(a, b, s, hard, soft,

> -                        f32_is_zon2, f32_addsub_post, NULL, NULL);

> +                        f32_is_zon2, f32_addsubmul_post);

>  }

>  

>  static float64 float64_addsub(float64 a, float64 b, float_status *s,

>                                hard_f64_op2_fn hard, soft_f64_op2_fn soft)

>  {

>      return float64_gen2(a, b, s, hard, soft,

> -                        f64_is_zon2, f64_addsub_post, NULL, NULL);

> +                        f64_is_zon2, f64_addsubmul_post);

>  }

>  

>  float32 QEMU_FLATTEN

> @@ -1258,42 +1245,18 @@ static double hard_f64_mul(double a, double b)

>      return a * b;

>  }

>  

> -static bool f32_mul_fast_test(union_float32 a, union_float32 b)

> -{

> -    return float32_is_zero(a.s) || float32_is_zero(b.s);

> -}

> -

> -static bool f64_mul_fast_test(union_float64 a, union_float64 b)

> -{

> -    return float64_is_zero(a.s) || float64_is_zero(b.s);

> -}

> -

> -static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)

> -{

> -    bool signbit = float32_is_neg(a) ^ float32_is_neg(b);

> -

> -    return float32_set_sign(float32_zero, signbit);

> -}

> -

> -static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)

> -{

> -    bool signbit = float64_is_neg(a) ^ float64_is_neg(b);

> -

> -    return float64_set_sign(float64_zero, signbit);

> -}

> -

>  float32 QEMU_FLATTEN

>  float32_mul(float32 a, float32 b, float_status *s)

>  {

>      return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,

> -                        f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);

> +                        f32_is_zon2, f32_addsubmul_post);

>  }

>  

>  float64 QEMU_FLATTEN

>  float64_mul(float64 a, float64 b, float_status *s)

>  {

>      return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,

> -                        f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);

> +                        f64_is_zon2, f64_addsubmul_post);

>  }

>  

>  /*

> @@ -1834,14 +1797,14 @@ float32 QEMU_FLATTEN

>  float32_div(float32 a, float32 b, float_status *s)

>  {

>      return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,

> -                        f32_div_pre, f32_div_post, NULL, NULL);

> +                        f32_div_pre, f32_div_post);

>  }

>  

>  float64 QEMU_FLATTEN

>  float64_div(float64 a, float64 b, float_status *s)

>  {

>      return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,

> -                        f64_div_pre, f64_div_post, NULL, NULL);

> +                        f64_div_pre, f64_div_post);

>  }

>  

>  /*



-- 
Alex Bennée
diff mbox series

Patch

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index a362bf89ca..5fb4ef75bb 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -339,12 +339,10 @@  static inline bool f64_is_inf(union_float64 a)
     return float64_is_infinity(a.s);
 }
 
-/* Note: @fast_test and @post can be NULL */
 static inline float32
 float32_gen2(float32 xa, float32 xb, float_status *s,
              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
-             f32_check_fn pre, f32_check_fn post,
-             f32_check_fn fast_test, soft_f32_op2_fn fast_op)
+             f32_check_fn pre, f32_check_fn post)
 {
     union_float32 ua, ub, ur;
 
@@ -359,17 +357,12 @@  float32_gen2(float32 xa, float32 xb, float_status *s,
     if (unlikely(!pre(ua, ub))) {
         goto soft;
     }
-    if (fast_test && fast_test(ua, ub)) {
-        return fast_op(ua.s, ub.s, s);
-    }
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f32_is_inf(ur))) {
         s->float_exception_flags |= float_flag_overflow;
-    } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
-        if (post == NULL || post(ua, ub)) {
-            goto soft;
-        }
+    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
+        goto soft;
     }
     return ur.s;
 
@@ -380,8 +373,7 @@  float32_gen2(float32 xa, float32 xb, float_status *s,
 static inline float64
 float64_gen2(float64 xa, float64 xb, float_status *s,
              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
-             f64_check_fn pre, f64_check_fn post,
-             f64_check_fn fast_test, soft_f64_op2_fn fast_op)
+             f64_check_fn pre, f64_check_fn post)
 {
     union_float64 ua, ub, ur;
 
@@ -396,17 +388,12 @@  float64_gen2(float64 xa, float64 xb, float_status *s,
     if (unlikely(!pre(ua, ub))) {
         goto soft;
     }
-    if (fast_test && fast_test(ua, ub)) {
-        return fast_op(ua.s, ub.s, s);
-    }
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f64_is_inf(ur))) {
         s->float_exception_flags |= float_flag_overflow;
-    } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
-        if (post == NULL || post(ua, ub)) {
-            goto soft;
-        }
+    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
+        goto soft;
     }
     return ur.s;
 
@@ -1115,7 +1102,7 @@  static double hard_f64_sub(double a, double b)
     return a - b;
 }
 
-static bool f32_addsub_post(union_float32 a, union_float32 b)
+static bool f32_addsubmul_post(union_float32 a, union_float32 b)
 {
     if (QEMU_HARDFLOAT_2F32_USE_FP) {
         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1123,7 +1110,7 @@  static bool f32_addsub_post(union_float32 a, union_float32 b)
     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
 }
 
-static bool f64_addsub_post(union_float64 a, union_float64 b)
+static bool f64_addsubmul_post(union_float64 a, union_float64 b)
 {
     if (QEMU_HARDFLOAT_2F64_USE_FP) {
         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@@ -1136,14 +1123,14 @@  static float32 float32_addsub(float32 a, float32 b, float_status *s,
                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
 {
     return float32_gen2(a, b, s, hard, soft,
-                        f32_is_zon2, f32_addsub_post, NULL, NULL);
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
 static float64 float64_addsub(float64 a, float64 b, float_status *s,
                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
 {
     return float64_gen2(a, b, s, hard, soft,
-                        f64_is_zon2, f64_addsub_post, NULL, NULL);
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
 float32 QEMU_FLATTEN
@@ -1258,42 +1245,18 @@  static double hard_f64_mul(double a, double b)
     return a * b;
 }
 
-static bool f32_mul_fast_test(union_float32 a, union_float32 b)
-{
-    return float32_is_zero(a.s) || float32_is_zero(b.s);
-}
-
-static bool f64_mul_fast_test(union_float64 a, union_float64 b)
-{
-    return float64_is_zero(a.s) || float64_is_zero(b.s);
-}
-
-static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
-{
-    bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
-
-    return float32_set_sign(float32_zero, signbit);
-}
-
-static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
-{
-    bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
-
-    return float64_set_sign(float64_zero, signbit);
-}
-
 float32 QEMU_FLATTEN
 float32_mul(float32 a, float32 b, float_status *s)
 {
     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
-                        f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
 float64 QEMU_FLATTEN
 float64_mul(float64 a, float64 b, float_status *s)
 {
     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
-                        f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
 /*
@@ -1834,14 +1797,14 @@  float32 QEMU_FLATTEN
 float32_div(float32 a, float32 b, float_status *s)
 {
     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
-                        f32_div_pre, f32_div_post, NULL, NULL);
+                        f32_div_pre, f32_div_post);
 }
 
 float64 QEMU_FLATTEN
 float64_div(float64 a, float64 b, float_status *s)
 {
     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
-                        f64_div_pre, f64_div_post, NULL, NULL);
+                        f64_div_pre, f64_div_post);
 }
 
 /*