[7/7] target/ppc: Use non-arithmetic conversions for fp load/store

Message ID 20180703151732.29843-8-richard.henderson@linaro.org
State New
Headers show
Series
  • target/ppc fp cleanups
Related show

Commit Message

Richard Henderson July 3, 2018, 3:17 p.m.
Memory operations have no side effects on fp state.
The use of a "real" conversions between float64 and float32
would raise exceptions for SNaN and out-of-range inputs.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/ppc/helper.h                |  4 +-
 target/ppc/fpu_helper.c            | 63 ++++++++++++++++++++++++------
 target/ppc/translate/fp-impl.inc.c | 26 +++++-------
 3 files changed, 62 insertions(+), 31 deletions(-)

-- 
2.17.1

Comments

Programmingkid July 5, 2018, 4:31 p.m. | #1
> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote:

> 

> Memory operations have no side effects on fp state.

> The use of a "real" conversions between float64 and float32

> would raise exceptions for SNaN and out-of-range inputs.


Would you have any documentation that tells us about converting
between 64 bit and 32 bit floating points?


> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

> target/ppc/helper.h                |  4 +-

> target/ppc/fpu_helper.c            | 63 ++++++++++++++++++++++++------

> target/ppc/translate/fp-impl.inc.c | 26 +++++-------

> 3 files changed, 62 insertions(+), 31 deletions(-)

> 

> diff --git a/target/ppc/helper.h b/target/ppc/helper.h

> index cc3d031407..33e6e1df60 100644

> --- a/target/ppc/helper.h

> +++ b/target/ppc/helper.h

> @@ -61,8 +61,8 @@ DEF_HELPER_2(compute_fprf_float64, void, env, i64)

> DEF_HELPER_3(store_fpscr, void, env, i64, i32)

> DEF_HELPER_2(fpscr_clrbit, void, env, i32)

> DEF_HELPER_2(fpscr_setbit, void, env, i32)

> -DEF_HELPER_2(float64_to_float32, i32, env, i64)

> -DEF_HELPER_2(float32_to_float64, i64, env, i32)

> +DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32)

> +DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64)

> 

> DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)

> DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)

> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c

> index 1e195487d3..d4e9e3bccb 100644

> --- a/target/ppc/fpu_helper.c

> +++ b/target/ppc/fpu_helper.c

> @@ -47,24 +47,61 @@ static inline bool fp_exceptions_enabled(CPUPPCState *env)

> 

> /*****************************************************************************/

> /* Floating point operations helpers */

> -uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg)

> -{

> -    CPU_FloatU f;

> -    CPU_DoubleU d;

> 

> -    f.l = arg;

> -    d.d = float32_to_float64(f.f, &env->fp_status);

> -    return d.ll;

> +/*

> + * This is the non-arithmatic conversion that happens e.g. on loads.

> + * In the Power ISA pseudocode, this is called DOUBLE.

> + */

> +uint64_t helper_todouble(uint32_t arg)

> +{

> +    uint32_t abs_arg = arg & 0x7fffffff;

> +    uint64_t ret;

> +

> +    if (likely(abs_arg >= 0x00800000)) {

> +        /* Normalized operand, or Inf, or NaN.  */

> +        ret  = (uint64_t)extract32(arg, 30, 2) << 62;

> +        ret |= ((extract32(arg, 30, 1) ^ 1) * (uint64_t)7) << 59;

> +        ret |= (uint64_t)extract32(arg, 0, 29) << 29;

> +    } else {

> +        /* Zero or Denormalized operand.  */

> +        ret = (uint64_t)extract32(arg, 31, 1) << 63;

> +        if (unlikely(abs_arg != 0)) {

> +            /* Denormalized operand.  */

> +            int shift = clz32(abs_arg) - 9;

> +            int exp = -126 - shift + 1023;

> +            ret |= (uint64_t)exp << 52;

> +            ret |= abs_arg << (shift + 29);

> +        }

> +    }

> +    return ret;

> }

> 

> -uint32_t helper_float64_to_float32(CPUPPCState *env, uint64_t arg)

> +/*

> + * This is the non-arithmatic conversion that happens e.g. on stores.

> + * In the Power ISA pseudocode, this is called SINGLE.

> + */

> +uint32_t helper_tosingle(uint64_t arg)

> {

> -    CPU_FloatU f;

> -    CPU_DoubleU d;

> +    int exp = extract64(arg, 52, 11);

> +    uint32_t ret;

> 

> -    d.ll = arg;

> -    f.f = float64_to_float32(d.d, &env->fp_status);

> -    return f.l;

> +    if (likely(exp > 896)) {

> +        /* No denormalization required (includes Inf, NaN).  */

> +        ret  = extract64(arg, 62, 2) << 30;

> +        ret |= extract64(arg, 29, 29);

> +    } else {

> +        /* Zero or Denormal result.  If the exponent is in bounds for

> +         * a single-precision denormal result, extract the proper bits.

> +         * If the input is not zero, and the exponent is out of bounds,

> +         * then the result is undefined; this underflows to zero.

> +         */

> +        ret = extract64(arg, 63, 1) << 63;

> +        if (unlikely(exp >= 874)) {

> +            /* Denormal result.  */

> +            ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp);

> +        }

> +    }

> +    return ret;

> }

> 

> static inline int ppc_float32_get_unbiased_exp(float32 f)

> diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c

> index 2fbd4d4f38..a6f522b85c 100644

> --- a/target/ppc/translate/fp-impl.inc.c

> +++ b/target/ppc/translate/fp-impl.inc.c

> @@ -660,15 +660,12 @@ GEN_LDUF(name, ldop, op | 0x21, type);                                        \

> GEN_LDUXF(name, ldop, op | 0x01, type);                                       \

> GEN_LDXF(name, ldop, 0x17, op | 0x00, type)

> 

> -static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)

> +static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)

> {

> -    TCGv t0 = tcg_temp_new();

> -    TCGv_i32 t1 = tcg_temp_new_i32();

> -    gen_qemu_ld32u(ctx, t0, arg2);

> -    tcg_gen_trunc_tl_i32(t1, t0);

> -    tcg_temp_free(t0);

> -    gen_helper_float32_to_float64(arg1, cpu_env, t1);

> -    tcg_temp_free_i32(t1);

> +    TCGv_i32 tmp = tcg_temp_new_i32();

> +    tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));

> +    gen_helper_todouble(dest, tmp);

> +    tcg_temp_free_i32(tmp);

> }

> 

>  /* lfd lfdu lfdux lfdx */

> @@ -836,15 +833,12 @@ GEN_STUF(name, stop, op | 0x21, type);                                        \

> GEN_STUXF(name, stop, op | 0x01, type);                                       \

> GEN_STXF(name, stop, 0x17, op | 0x00, type)

> 

> -static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)

> +static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)

> {

> -    TCGv_i32 t0 = tcg_temp_new_i32();

> -    TCGv t1 = tcg_temp_new();

> -    gen_helper_float64_to_float32(t0, cpu_env, arg1);

> -    tcg_gen_extu_i32_tl(t1, t0);

> -    tcg_temp_free_i32(t0);

> -    gen_qemu_st32(ctx, t1, arg2);

> -    tcg_temp_free(t1);

> +    TCGv_i32 tmp = tcg_temp_new_i32();

> +    gen_helper_tosingle(tmp, src);

> +    tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));

> +    tcg_temp_free_i32(tmp);

> }

> 

> /* stfd stfdu stfdux stfdx */

> -- 

> 2.17.1

>
Richard Henderson July 5, 2018, 4:48 p.m. | #2
On 07/05/2018 09:31 AM, Programmingkid wrote:
>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote:

>>

>> Memory operations have no side effects on fp state.

>> The use of a "real" conversions between float64 and float32

>> would raise exceptions for SNaN and out-of-range inputs.

> 

> Would you have any documentation that tells us about converting

> between 64 bit and 32 bit floating points?


Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of
Book 1 of the Power ISA manual (version 3.0B) [0].

I've double-checked vs RISU[1] testing of LFS and STFS, with master traces
generated on Power 8 ppc64le, so I don't see anything immediately wrong with
the patch.  But I haven't had time to look further than that.


r~


[0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0
[1] https://git.linaro.org/people/peter.maydell/risu.git
Programmingkid July 5, 2018, 4:51 p.m. | #3
> On Jul 5, 2018, at 12:48 PM, Richard Henderson <richard.henderson@linaro.org> wrote:

> 

> On 07/05/2018 09:31 AM, Programmingkid wrote:

>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote:

>>> 

>>> Memory operations have no side effects on fp state.

>>> The use of a "real" conversions between float64 and float32

>>> would raise exceptions for SNaN and out-of-range inputs.

>> 

>> Would you have any documentation that tells us about converting

>> between 64 bit and 32 bit floating points?

> 

> Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of

> Book 1 of the Power ISA manual (version 3.0B) [0].

> 

> I've double-checked vs RISU[1] testing of LFS and STFS, with master traces

> generated on Power 8 ppc64le, so I don't see anything immediately wrong with

> the patch.  But I haven't had time to look further than that.

> 

> 

> r~

> 

> 

> [0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0

> [1] https://git.linaro.org/people/peter.maydell/risu.git


Thank you for the documentation. My guess is there are differences between the PowerPC and Power 8 implementations. PowerPC is big endian. Would you be able to do your testing again with your Power 8 CPU in big endian mode?
David Gibson July 6, 2018, 1:03 a.m. | #4
On Thu, Jul 05, 2018 at 12:51:00PM -0400, Programmingkid wrote:
> 

> > On Jul 5, 2018, at 12:48 PM, Richard Henderson <richard.henderson@linaro.org> wrote:

> > 

> > On 07/05/2018 09:31 AM, Programmingkid wrote:

> >>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote:

> >>> 

> >>> Memory operations have no side effects on fp state.

> >>> The use of a "real" conversions between float64 and float32

> >>> would raise exceptions for SNaN and out-of-range inputs.

> >> 

> >> Would you have any documentation that tells us about converting

> >> between 64 bit and 32 bit floating points?

> > 

> > Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of

> > Book 1 of the Power ISA manual (version 3.0B) [0].

> > 

> > I've double-checked vs RISU[1] testing of LFS and STFS, with master traces

> > generated on Power 8 ppc64le, so I don't see anything immediately wrong with

> > the patch.  But I haven't had time to look further than that.

> > 

> > 

> > r~

> > 

> > 

> > [0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0

> > [1] https://git.linaro.org/people/peter.maydell/risu.git

> 

> Thank you for the documentation. My guess is there are differences

> between the PowerPC and Power 8 implementations.


That seems very, very unlikely to me.

> PowerPC is big

> endian. Would you be able to do your testing again with your Power 8

> CPU in big endian mode?

> 

> 


-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Mark Cave-Ayland July 6, 2018, 9:03 a.m. | #5
On 05/07/18 17:48, Richard Henderson wrote:

> On 07/05/2018 09:31 AM, Programmingkid wrote:

>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote:

>>>

>>> Memory operations have no side effects on fp state.

>>> The use of a "real" conversions between float64 and float32

>>> would raise exceptions for SNaN and out-of-range inputs.

>>

>> Would you have any documentation that tells us about converting

>> between 64 bit and 32 bit floating points?

> 

> Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of

> Book 1 of the Power ISA manual (version 3.0B) [0].

> 

> I've double-checked vs RISU[1] testing of LFS and STFS, with master traces

> generated on Power 8 ppc64le, so I don't see anything immediately wrong with

> the patch.  But I haven't had time to look further than that.


I've had a quick look at this with the attached patch to compare the 
helper results before your patch and after, writing any differences to 
the console.

With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded 
the output below:


$ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99

helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 
3bf0000000000000
helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 
3bf0000000000000

(note: MacOS 9 will hang here unless the line marked "Uncommenting this 
allows MacOS to run" in my patch is enabled)

helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret: 
3be0000000000000
helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret: 
3be0000000000000
helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 
3bf0000000000000
helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 
3bf0000000000000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_todouble diff for arg: be61b08a  d.ll: bfcc361140000000  ret: 
bbcc361140000000
helper_todouble diff for arg: 3fdf81a5  d.ll: 3ffbf034a0000000  ret: 
3bfbf034a0000000
helper_todouble diff for arg: bf402647  d.ll: bfe804c8e0000000  ret: 
bbe804c8e0000000
helper_todouble diff for arg: 3e61b08a  d.ll: 3fcc361140000000  ret: 
3bcc361140000000
helper_tosingle diff for arg: bfcc361140000000  f.l: be61b08a  ret: 9e61b08a
helper_todouble diff for arg: 3f0ccccd  d.ll: 3fe19999a0000000  ret: 
3be19999a0000000
helper_tosingle diff for arg: 3ffbf034a0000000  f.l: 3fdf81a5  ret: 1fdf81a5
helper_tosingle diff for arg: bfe804c8e0000000  f.l: bf402647  ret: 9f402647
helper_tosingle diff for arg: 3fcc361140000000  f.l: 3e61b08a  ret: 1e61b08a
helper_tosingle diff for arg: 3fe19999a0000000  f.l: 3f0ccccd  ret: 1f0ccccd
helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret: 
3b70000000000000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret: 
3b70000000000000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000
helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000


It looks like the differences are related to a flag or flags in the MSB 
byte of ret.


ATB,

Mark.
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index cb82e6e842..aedf5d1e3b 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -52,10 +52,15 @@ static inline bool fp_exceptions_enabled(CPUPPCState *env)
  * This is the non-arithmatic conversion that happens e.g. on loads.
  * In the Power ISA pseudocode, this is called DOUBLE.
  */
-uint64_t helper_todouble(uint32_t arg)
+uint64_t helper_todouble(CPUPPCState *env, uint32_t arg)
 {
     uint32_t abs_arg = arg & 0x7fffffff;
     uint64_t ret;
+    CPU_FloatU f;
+    CPU_DoubleU d;
+
+    f.l = arg;
+    d.d = float32_to_float64(f.f, &env->fp_status);
 
     if (likely(abs_arg >= 0x00800000)) {
         /* Normalized operand, or Inf, or NaN.  */
@@ -73,6 +78,13 @@ uint64_t helper_todouble(uint32_t arg)
             ret |= abs_arg << (shift + 29);
         }
     }
+
+    if (d.ll != ret) {
+        printf("helper_todouble diff for arg: %x  d.ll: %lx  ret: %lx\n", arg, d.ll, ret);
+        // Uncommenting this allows MacOS to run
+        //ret = d.ll;
+    }
+
     return ret;
 }
 
@@ -80,10 +92,15 @@ uint64_t helper_todouble(uint32_t arg)
  * This is the non-arithmatic conversion that happens e.g. on stores.
  * In the Power ISA pseudocode, this is called SINGLE.
  */
-uint32_t helper_tosingle(uint64_t arg)
+uint32_t helper_tosingle(CPUPPCState *env, uint64_t arg)
 {
     int exp = extract64(arg, 52, 11);
     uint32_t ret;
+    CPU_FloatU f;
+    CPU_DoubleU d;
+
+    d.ll = arg;
+    f.f = float64_to_float32(d.d, &env->fp_status);
 
     if (likely(exp > 896)) {
         /* No denormalization required (includes Inf, NaN).  */
@@ -101,6 +118,11 @@ uint32_t helper_tosingle(uint64_t arg)
             ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp);
         }
     }
+
+    if (f.l != ret) {
+        printf("helper_tosingle diff for arg: %lx  f.l: %x  ret: %x\n", arg, f.l, ret);
+    }
+
     return ret;
 }
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ef64248bc4..94b5f4fd8c 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -61,8 +61,8 @@ DEF_HELPER_2(compute_fprf_float64, void, env, i64)
 DEF_HELPER_3(store_fpscr, void, env, i64, i32)
 DEF_HELPER_2(fpscr_clrbit, void, env, i32)
 DEF_HELPER_2(fpscr_setbit, void, env, i32)
-DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32)
-DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64)
+DEF_HELPER_FLAGS_2(todouble, TCG_CALL_NO_RWG_SE, i64, env, i32)
+DEF_HELPER_FLAGS_2(tosingle, TCG_CALL_NO_RWG_SE, i32, env, i64)
 
 DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
 DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c
index a6f522b85c..16ecca3048 100644
--- a/target/ppc/translate/fp-impl.inc.c
+++ b/target/ppc/translate/fp-impl.inc.c
@@ -664,7 +664,7 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
     tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));
-    gen_helper_todouble(dest, tmp);
+    gen_helper_todouble(dest, cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -836,7 +836,7 @@ GEN_STXF(name, stop, 0x17, op | 0x00, type)
 static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
-    gen_helper_tosingle(tmp, src);
+    gen_helper_tosingle(tmp, cpu_env, src);
     tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));
     tcg_temp_free_i32(tmp);
 }
Mark Cave-Ayland Aug. 5, 2018, 11:41 a.m. | #6
On 06/07/18 10:03, Mark Cave-Ayland wrote:

> On 05/07/18 17:48, Richard Henderson wrote:

> 

>> On 07/05/2018 09:31 AM, Programmingkid wrote:

>>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson 

>>>> <richard.henderson@linaro.org> wrote:

>>>>

>>>> Memory operations have no side effects on fp state.

>>>> The use of a "real" conversions between float64 and float32

>>>> would raise exceptions for SNaN and out-of-range inputs.

>>>

>>> Would you have any documentation that tells us about converting

>>> between 64 bit and 32 bit floating points?

>>

>> Spelled out right at the beginning of sections 4.6 (load) and 4.7 

>> (store) of

>> Book 1 of the Power ISA manual (version 3.0B) [0].

>>

>> I've double-checked vs RISU[1] testing of LFS and STFS, with master 

>> traces

>> generated on Power 8 ppc64le, so I don't see anything immediately 

>> wrong with

>> the patch.  But I haven't had time to look further than that.

> 

> I've had a quick look at this with the attached patch to compare the 

> helper results before your patch and after, writing any differences to 

> the console.

> 

> With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded 

> the output below:

> 

> 

> $ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99

> 

> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 

> 3bf0000000000000

> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 

> 3bf0000000000000

> 

> (note: MacOS 9 will hang here unless the line marked "Uncommenting this 

> allows MacOS to run" in my patch is enabled)

> 

> helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret: 

> 3be0000000000000

> helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret: 

> 3be0000000000000

> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 

> 3bf0000000000000

> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret: 

> 3bf0000000000000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_todouble diff for arg: be61b08a  d.ll: bfcc361140000000  ret: 

> bbcc361140000000

> helper_todouble diff for arg: 3fdf81a5  d.ll: 3ffbf034a0000000  ret: 

> 3bfbf034a0000000

> helper_todouble diff for arg: bf402647  d.ll: bfe804c8e0000000  ret: 

> bbe804c8e0000000

> helper_todouble diff for arg: 3e61b08a  d.ll: 3fcc361140000000  ret: 

> 3bcc361140000000

> helper_tosingle diff for arg: bfcc361140000000  f.l: be61b08a  ret: 

> 9e61b08a

> helper_todouble diff for arg: 3f0ccccd  d.ll: 3fe19999a0000000  ret: 

> 3be19999a0000000

> helper_tosingle diff for arg: 3ffbf034a0000000  f.l: 3fdf81a5  ret: 

> 1fdf81a5

> helper_tosingle diff for arg: bfe804c8e0000000  f.l: bf402647  ret: 

> 9f402647

> helper_tosingle diff for arg: 3fcc361140000000  f.l: 3e61b08a  ret: 

> 1e61b08a

> helper_tosingle diff for arg: 3fe19999a0000000  f.l: 3f0ccccd  ret: 

> 1f0ccccd

> helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret: 

> 3b70000000000000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret: 

> 3b70000000000000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 

> 1f800000

> 

> 

> It looks like the differences are related to a flag or flags in the MSB 

> byte of ret.


Hi Richard,

Have you had a chance to look at this yet? I've been working on top of 
David's ppc-for-3.1 branch over the weekend and ran into this again 
during my testing :/


ATB,

Mark.
Richard Henderson Aug. 6, 2018, 1:33 a.m. | #7
On 08/05/2018 04:41 AM, Mark Cave-Ayland wrote:
> On 06/07/18 10:03, Mark Cave-Ayland wrote:

> 

>> On 05/07/18 17:48, Richard Henderson wrote:

>>

>>> On 07/05/2018 09:31 AM, Programmingkid wrote:

>>>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson

>>>>> <richard.henderson@linaro.org> wrote:

>>>>>

>>>>> Memory operations have no side effects on fp state.

>>>>> The use of a "real" conversions between float64 and float32

>>>>> would raise exceptions for SNaN and out-of-range inputs.

>>>>

>>>> Would you have any documentation that tells us about converting

>>>> between 64 bit and 32 bit floating points?

>>>

>>> Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of

>>> Book 1 of the Power ISA manual (version 3.0B) [0].

>>>

>>> I've double-checked vs RISU[1] testing of LFS and STFS, with master traces

>>> generated on Power 8 ppc64le, so I don't see anything immediately wrong with

>>> the patch.  But I haven't had time to look further than that.

>>

>> I've had a quick look at this with the attached patch to compare the helper

>> results before your patch and after, writing any differences to the console.

>>

>> With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded the

>> output below:

>>

>>

>> $ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99

>>

>> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret:

>> 3bf0000000000000

>> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret:

>> 3bf0000000000000

>>

>> (note: MacOS 9 will hang here unless the line marked "Uncommenting this

>> allows MacOS to run" in my patch is enabled)

>>

>> helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret:

>> 3be0000000000000

>> helper_todouble diff for arg: 3f000000  d.ll: 3fe0000000000000  ret:

>> 3be0000000000000

>> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret:

>> 3bf0000000000000

>> helper_todouble diff for arg: 3f800000  d.ll: 3ff0000000000000  ret:

>> 3bf0000000000000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_todouble diff for arg: be61b08a  d.ll: bfcc361140000000  ret:

>> bbcc361140000000

>> helper_todouble diff for arg: 3fdf81a5  d.ll: 3ffbf034a0000000  ret:

>> 3bfbf034a0000000

>> helper_todouble diff for arg: bf402647  d.ll: bfe804c8e0000000  ret:

>> bbe804c8e0000000

>> helper_todouble diff for arg: 3e61b08a  d.ll: 3fcc361140000000  ret:

>> 3bcc361140000000

>> helper_tosingle diff for arg: bfcc361140000000  f.l: be61b08a  ret: 9e61b08a

>> helper_todouble diff for arg: 3f0ccccd  d.ll: 3fe19999a0000000  ret:

>> 3be19999a0000000

>> helper_tosingle diff for arg: 3ffbf034a0000000  f.l: 3fdf81a5  ret: 1fdf81a5

>> helper_tosingle diff for arg: bfe804c8e0000000  f.l: bf402647  ret: 9f402647

>> helper_tosingle diff for arg: 3fcc361140000000  f.l: 3e61b08a  ret: 1e61b08a

>> helper_tosingle diff for arg: 3fe19999a0000000  f.l: 3f0ccccd  ret: 1f0ccccd

>> helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret:

>> 3b70000000000000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_todouble diff for arg: 3b800000  d.ll: 3f70000000000000  ret:

>> 3b70000000000000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>> helper_tosingle diff for arg: 3ff0000000000000  f.l: 3f800000  ret: 1f800000

>>

>>

>> It looks like the differences are related to a flag or flags in the MSB byte

>> of ret.

> 

> Hi Richard,

> 

> Have you had a chance to look at this yet? I've been working on top of David's

> ppc-for-3.1 branch over the weekend and ran into this again during my testing :/


Thanks for the reminder and the test cases.
I've posted a fix for this now.


r~

Patch

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index cc3d031407..33e6e1df60 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -61,8 +61,8 @@  DEF_HELPER_2(compute_fprf_float64, void, env, i64)
 DEF_HELPER_3(store_fpscr, void, env, i64, i32)
 DEF_HELPER_2(fpscr_clrbit, void, env, i32)
 DEF_HELPER_2(fpscr_setbit, void, env, i32)
-DEF_HELPER_2(float64_to_float32, i32, env, i64)
-DEF_HELPER_2(float32_to_float64, i64, env, i32)
+DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32)
+DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64)
 
 DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
 DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 1e195487d3..d4e9e3bccb 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -47,24 +47,61 @@  static inline bool fp_exceptions_enabled(CPUPPCState *env)
 
 /*****************************************************************************/
 /* Floating point operations helpers */
-uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg)
-{
-    CPU_FloatU f;
-    CPU_DoubleU d;
 
-    f.l = arg;
-    d.d = float32_to_float64(f.f, &env->fp_status);
-    return d.ll;
+/*
+ * This is the non-arithmatic conversion that happens e.g. on loads.
+ * In the Power ISA pseudocode, this is called DOUBLE.
+ */
+uint64_t helper_todouble(uint32_t arg)
+{
+    uint32_t abs_arg = arg & 0x7fffffff;
+    uint64_t ret;
+
+    if (likely(abs_arg >= 0x00800000)) {
+        /* Normalized operand, or Inf, or NaN.  */
+        ret  = (uint64_t)extract32(arg, 30, 2) << 62;
+        ret |= ((extract32(arg, 30, 1) ^ 1) * (uint64_t)7) << 59;
+        ret |= (uint64_t)extract32(arg, 0, 29) << 29;
+    } else {
+        /* Zero or Denormalized operand.  */
+        ret = (uint64_t)extract32(arg, 31, 1) << 63;
+        if (unlikely(abs_arg != 0)) {
+            /* Denormalized operand.  */
+            int shift = clz32(abs_arg) - 9;
+            int exp = -126 - shift + 1023;
+            ret |= (uint64_t)exp << 52;
+            ret |= abs_arg << (shift + 29);
+        }
+    }
+    return ret;
 }
 
-uint32_t helper_float64_to_float32(CPUPPCState *env, uint64_t arg)
+/*
+ * This is the non-arithmatic conversion that happens e.g. on stores.
+ * In the Power ISA pseudocode, this is called SINGLE.
+ */
+uint32_t helper_tosingle(uint64_t arg)
 {
-    CPU_FloatU f;
-    CPU_DoubleU d;
+    int exp = extract64(arg, 52, 11);
+    uint32_t ret;
 
-    d.ll = arg;
-    f.f = float64_to_float32(d.d, &env->fp_status);
-    return f.l;
+    if (likely(exp > 896)) {
+        /* No denormalization required (includes Inf, NaN).  */
+        ret  = extract64(arg, 62, 2) << 30;
+        ret |= extract64(arg, 29, 29);
+    } else {
+        /* Zero or Denormal result.  If the exponent is in bounds for
+         * a single-precision denormal result, extract the proper bits.
+         * If the input is not zero, and the exponent is out of bounds,
+         * then the result is undefined; this underflows to zero.
+         */
+        ret = extract64(arg, 63, 1) << 63;
+        if (unlikely(exp >= 874)) {
+            /* Denormal result.  */
+            ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp);
+        }
+    }
+    return ret;
 }
 
 static inline int ppc_float32_get_unbiased_exp(float32 f)
diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c
index 2fbd4d4f38..a6f522b85c 100644
--- a/target/ppc/translate/fp-impl.inc.c
+++ b/target/ppc/translate/fp-impl.inc.c
@@ -660,15 +660,12 @@  GEN_LDUF(name, ldop, op | 0x21, type);                                        \
 GEN_LDUXF(name, ldop, op | 0x01, type);                                       \
 GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
 
-static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
 {
-    TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    gen_qemu_ld32u(ctx, t0, arg2);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_temp_free(t0);
-    gen_helper_float32_to_float64(arg1, cpu_env, t1);
-    tcg_temp_free_i32(t1);
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));
+    gen_helper_todouble(dest, tmp);
+    tcg_temp_free_i32(tmp);
 }
 
  /* lfd lfdu lfdux lfdx */
@@ -836,15 +833,12 @@  GEN_STUF(name, stop, op | 0x21, type);                                        \
 GEN_STUXF(name, stop, op | 0x01, type);                                       \
 GEN_STXF(name, stop, 0x17, op | 0x00, type)
 
-static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)
 {
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv t1 = tcg_temp_new();
-    gen_helper_float64_to_float32(t0, cpu_env, arg1);
-    tcg_gen_extu_i32_tl(t1, t0);
-    tcg_temp_free_i32(t0);
-    gen_qemu_st32(ctx, t1, arg2);
-    tcg_temp_free(t1);
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    gen_helper_tosingle(tmp, src);
+    tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL));
+    tcg_temp_free_i32(tmp);
 }
 
 /* stfd stfdu stfdux stfdx */