diff mbox series

[PULL,09/28] target/arm: convert conversion helpers to fpst/ahp_flag

Message ID 20180516155243.16937-10-richard.henderson@linaro.org
State Superseded
Headers show
Series softfloat patch roundup | expand

Commit Message

Richard Henderson May 16, 2018, 3:52 p.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>


Instead of passing env and leaving it up to the helper to get the
right fpstatus we pass it explicitly. There was already a get_fpstatus
helper for neon for the 32 bit code. We also add an get_ahp_flag() for
passing the state of the alternative FP16 format flag. This leaves
scope for later tracking the AHP state in translation flags.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/helper.h        | 10 +++---
 target/arm/translate.h     | 12 +++++++
 target/arm/helper.c        | 56 +++++------------------------
 target/arm/translate-a64.c | 37 +++++++++++++++----
 target/arm/translate.c     | 74 +++++++++++++++++++++++++++++---------
 5 files changed, 112 insertions(+), 77 deletions(-)

-- 
2.17.0

Comments

Peter Maydell May 17, 2018, 10:16 a.m. UTC | #1
On 16 May 2018 at 16:52, Richard Henderson <richard.henderson@linaro.org> wrote:
> diff --git a/target/arm/translate.c b/target/arm/translate.c

> index 731cf327a1..613598d090 100644

> --- a/target/arm/translate.c

> +++ b/target/arm/translate.c


Just noticed, but in the 32-bit translator where the argument to
get_fpstatus_ptr() is "is this neon?" (ie "do we use the standard
FPSCR value"), shouldn't we be passing 'true' to get_fpstatus_ptr()
for the halfprec conversions in disas_neon_data_insn() ?

I haven't tested, but I imagine that otherwise you get the wrong
results if the input is a denormal and FPSCR.FZ is 0 or if the
output should be a NaN and FPSCR.DN is 0.

> @@ -7222,53 +7247,70 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>                      }

>                      break;

>                  case NEON_2RM_VCVT_F16_F32:

> +                {

> +                    TCGv_ptr fpst;

> +                    TCGv_i32 ahp;

> +

>                      if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||

>                          q || (rm & 1)) {

>                          return 1;

>                      }

>                      tmp = tcg_temp_new_i32();

>                      tmp2 = tcg_temp_new_i32();

> +                    fpst = get_fpstatus_ptr(false);

> +                    ahp = get_ahp_flag();

>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));

> -                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);

> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);

>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));

> -                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);

> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);

>                      tcg_gen_shli_i32(tmp2, tmp2, 16);

>                      tcg_gen_or_i32(tmp2, tmp2, tmp);

>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));

> -                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);

> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);

>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));

>                      neon_store_reg(rd, 0, tmp2);

>                      tmp2 = tcg_temp_new_i32();

> -                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);

> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);

>                      tcg_gen_shli_i32(tmp2, tmp2, 16);

>                      tcg_gen_or_i32(tmp2, tmp2, tmp);

>                      neon_store_reg(rd, 1, tmp2);

>                      tcg_temp_free_i32(tmp);

> +                    tcg_temp_free_i32(ahp);

> +                    tcg_temp_free_ptr(fpst);

>                      break;

> +                }

>                  case NEON_2RM_VCVT_F32_F16:

> +                {

> +                    TCGv_ptr fpst;

> +                    TCGv_i32 ahp;

>                      if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||

>                          q || (rd & 1)) {

>                          return 1;

>                      }

> +                    fpst = get_fpstatus_ptr(false);

> +                    ahp = get_ahp_flag();

>                      tmp3 = tcg_temp_new_i32();

>                      tmp = neon_load_reg(rm, 0);

>                      tmp2 = neon_load_reg(rm, 1);

>                      tcg_gen_ext16u_i32(tmp3, tmp);

> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);

> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);

>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));

>                      tcg_gen_shri_i32(tmp3, tmp, 16);

> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);

> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);

>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));

>                      tcg_temp_free_i32(tmp);

>                      tcg_gen_ext16u_i32(tmp3, tmp2);

> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);

> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);

>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));

>                      tcg_gen_shri_i32(tmp3, tmp2, 16);

> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);

> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);

>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));

>                      tcg_temp_free_i32(tmp2);

>                      tcg_temp_free_i32(tmp3);

> +                    tcg_temp_free_i32(ahp);

> +                    tcg_temp_free_ptr(fpst);

>                      break;

> +                }

>                  case NEON_2RM_AESE: case NEON_2RM_AESMC:

>                      if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)

>                          || ((rm | rd) & 1)) {

> --

> 2.17.0

>


thanks
-- PMM
Richard Henderson May 17, 2018, 10:19 p.m. UTC | #2
On 05/17/2018 03:16 AM, Peter Maydell wrote:
> On 16 May 2018 at 16:52, Richard Henderson <richard.henderson@linaro.org> wrote:

>> diff --git a/target/arm/translate.c b/target/arm/translate.c

>> index 731cf327a1..613598d090 100644

>> --- a/target/arm/translate.c

>> +++ b/target/arm/translate.c

> 

> Just noticed, but in the 32-bit translator where the argument to

> get_fpstatus_ptr() is "is this neon?" (ie "do we use the standard

> FPSCR value"), shouldn't we be passing 'true' to get_fpstatus_ptr()

> for the halfprec conversions in disas_neon_data_insn() ?


Bah.  I went overboard with the aa64 fixups and forgot that aa32 hasn't had the
real fp16 extensions added.  I'll re-send.


r~
diff mbox series

Patch

diff --git a/target/arm/helper.h b/target/arm/helper.h
index ce89968b2d..047f3bc1ca 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -187,12 +187,10 @@  DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)
 DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
 DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
 
-DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
-DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
-DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
-DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
-DEF_HELPER_FLAGS_2(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, i32, env)
-DEF_HELPER_FLAGS_2(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, i32, f64, env)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, ptr, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
 
 DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 37a1bba056..45f04244be 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -177,4 +177,16 @@  void arm_free_cc(DisasCompare *cmp);
 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
 
+/* Return state of Alternate Half-precision flag, caller frees result */
+static inline TCGv_i32 get_ahp_flag(void)
+{
+    TCGv_i32 ret = tcg_temp_new_i32();
+
+    tcg_gen_ld_i32(ret, cpu_env,
+                   offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
+    tcg_gen_extract_i32(ret, ret, 26, 1);
+
+    return ret;
+}
+
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c6fd7f9479..1762042fc7 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11540,64 +11540,24 @@  uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
 }
 
 /* Half precision conversions.  */
-static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s)
+float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void *fpstp, uint32_t ahp_mode)
 {
-    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
-    float32 r = float16_to_float32(make_float16(a), ieee, s);
-    if (ieee) {
-        return float32_maybe_silence_nan(r, s);
-    }
-    return r;
+    return float16_to_float32(a, !ahp_mode, fpstp);
 }
 
-static uint32_t do_fcvt_f32_to_f16(float32 a, CPUARMState *env, float_status *s)
+float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
 {
-    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
-    float16 r = float32_to_float16(a, ieee, s);
-    if (ieee) {
-        r = float16_maybe_silence_nan(r, s);
-    }
-    return float16_val(r);
+    return float32_to_float16(a, !ahp_mode, fpstp);
 }
 
-float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUARMState *env)
+float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void *fpstp, uint32_t ahp_mode)
 {
-    return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
+    return float16_to_float64(a, !ahp_mode, fpstp);
 }
 
-uint32_t HELPER(neon_fcvt_f32_to_f16)(float32 a, CPUARMState *env)
+float16 HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
 {
-    return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
-}
-
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUARMState *env)
-{
-    return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
-}
-
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUARMState *env)
-{
-    return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
-}
-
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, CPUARMState *env)
-{
-    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
-    float64 r = float16_to_float64(make_float16(a), ieee, &env->vfp.fp_status);
-    if (ieee) {
-        return float64_maybe_silence_nan(r, &env->vfp.fp_status);
-    }
-    return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, CPUARMState *env)
-{
-    int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
-    float16 r = float64_to_float16(a, ieee, &env->vfp.fp_status);
-    if (ieee) {
-        r = float16_maybe_silence_nan(r, &env->vfp.fp_status);
-    }
-    return float16_val(r);
+    return float64_to_float16(a, !ahp_mode, fpstp);
 }
 
 #define float32_two make_float32(0x40000000)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a0b0c43d12..d8284678f7 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5147,10 +5147,15 @@  static void handle_fp_fcvt(DisasContext *s, int opcode,
         } else {
             /* Single to half */
             TCGv_i32 tcg_rd = tcg_temp_new_i32();
-            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
+            TCGv_i32 ahp = get_ahp_flag();
+            TCGv_ptr fpst = get_fpstatus_ptr(false);
+
+            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
             write_fp_sreg(s, rd, tcg_rd);
             tcg_temp_free_i32(tcg_rd);
+            tcg_temp_free_i32(ahp);
+            tcg_temp_free_ptr(fpst);
         }
         tcg_temp_free_i32(tcg_rn);
         break;
@@ -5163,9 +5168,13 @@  static void handle_fp_fcvt(DisasContext *s, int opcode,
             /* Double to single */
             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
         } else {
+            TCGv_ptr fpst = get_fpstatus_ptr(false);
+            TCGv_i32 ahp = get_ahp_flag();
             /* Double to half */
-            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
+            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
+            tcg_temp_free_ptr(fpst);
+            tcg_temp_free_i32(ahp);
         }
         write_fp_sreg(s, rd, tcg_rd);
         tcg_temp_free_i32(tcg_rd);
@@ -5175,17 +5184,21 @@  static void handle_fp_fcvt(DisasContext *s, int opcode,
     case 0x3:
     {
         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+        TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
+        TCGv_i32 tcg_ahp = get_ahp_flag();
         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
         if (dtype == 0) {
             /* Half to single */
             TCGv_i32 tcg_rd = tcg_temp_new_i32();
-            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
+            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
             write_fp_sreg(s, rd, tcg_rd);
+            tcg_temp_free_ptr(tcg_fpst);
+            tcg_temp_free_i32(tcg_ahp);
             tcg_temp_free_i32(tcg_rd);
         } else {
             /* Half to double */
             TCGv_i64 tcg_rd = tcg_temp_new_i64();
-            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
+            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
             write_fp_dreg(s, rd, tcg_rd);
             tcg_temp_free_i64(tcg_rd);
         }
@@ -9053,12 +9066,17 @@  static void handle_2misc_narrow(DisasContext *s, bool scalar,
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
+                TCGv_ptr fpst = get_fpstatus_ptr(false);
+                TCGv_i32 ahp = get_ahp_flag();
+
                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
-                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
-                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
+                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
+                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
                 tcg_temp_free_i32(tcg_lo);
                 tcg_temp_free_i32(tcg_hi);
+                tcg_temp_free_ptr(fpst);
+                tcg_temp_free_i32(ahp);
             }
             break;
         case 0x56:  /* FCVTXN, FCVTXN2 */
@@ -11532,18 +11550,23 @@  static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
         /* 16 -> 32 bit fp conversion */
         int srcelt = is_q ? 4 : 0;
         TCGv_i32 tcg_res[4];
+        TCGv_ptr fpst = get_fpstatus_ptr(false);
+        TCGv_i32 ahp = get_ahp_flag();
 
         for (pass = 0; pass < 4; pass++) {
             tcg_res[pass] = tcg_temp_new_i32();
 
             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
-                                           cpu_env);
+                                           fpst, ahp);
         }
         for (pass = 0; pass < 4; pass++) {
             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
             tcg_temp_free_i32(tcg_res[pass]);
         }
+
+        tcg_temp_free_ptr(fpst);
+        tcg_temp_free_i32(ahp);
     }
 }
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 731cf327a1..613598d090 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3824,38 +3824,56 @@  static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                         gen_vfp_sqrt(dp);
                         break;
                     case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(false);
+                        TCGv_i32 ahp_mode = get_ahp_flag();
                         tmp = gen_vfp_mrs();
                         tcg_gen_ext16u_i32(tmp, tmp);
                         if (dp) {
                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
-                                                           cpu_env);
+                                                           fpst, ahp_mode);
                         } else {
                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
-                                                           cpu_env);
+                                                           fpst, ahp_mode);
                         }
+                        tcg_temp_free_i32(ahp_mode);
+                        tcg_temp_free_ptr(fpst);
                         tcg_temp_free_i32(tmp);
                         break;
+                    }
                     case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(false);
+                        TCGv_i32 ahp = get_ahp_flag();
                         tmp = gen_vfp_mrs();
                         tcg_gen_shri_i32(tmp, tmp, 16);
                         if (dp) {
                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         } else {
                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         }
                         tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(ahp);
+                        tcg_temp_free_ptr(fpst);
                         break;
+                    }
                     case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(false);
+                        TCGv_i32 ahp = get_ahp_flag();
                         tmp = tcg_temp_new_i32();
+
                         if (dp) {
                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         } else {
                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         }
+                        tcg_temp_free_i32(ahp);
+                        tcg_temp_free_ptr(fpst);
                         gen_mov_F0_vreg(0, rd);
                         tmp2 = gen_vfp_mrs();
                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
@@ -3863,15 +3881,21 @@  static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                         tcg_temp_free_i32(tmp2);
                         gen_vfp_msr(tmp);
                         break;
+                    }
                     case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(false);
+                        TCGv_i32 ahp = get_ahp_flag();
                         tmp = tcg_temp_new_i32();
                         if (dp) {
                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         } else {
                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
-                                                           cpu_env);
+                                                           fpst, ahp);
                         }
+                        tcg_temp_free_i32(ahp);
+                        tcg_temp_free_ptr(fpst);
                         tcg_gen_shli_i32(tmp, tmp, 16);
                         gen_mov_F0_vreg(0, rd);
                         tmp2 = gen_vfp_mrs();
@@ -3880,6 +3904,7 @@  static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                         tcg_temp_free_i32(tmp2);
                         gen_vfp_msr(tmp);
                         break;
+                    }
                     case 8: /* cmp */
                         gen_vfp_cmp(dp);
                         break;
@@ -7222,53 +7247,70 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                     }
                     break;
                 case NEON_2RM_VCVT_F16_F32:
+                {
+                    TCGv_ptr fpst;
+                    TCGv_i32 ahp;
+
                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
                         q || (rm & 1)) {
                         return 1;
                     }
                     tmp = tcg_temp_new_i32();
                     tmp2 = tcg_temp_new_i32();
+                    fpst = get_fpstatus_ptr(false);
+                    ahp = get_ahp_flag();
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
-                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
-                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
                     tcg_gen_shli_i32(tmp2, tmp2, 16);
                     tcg_gen_or_i32(tmp2, tmp2, tmp);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
-                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
                     neon_store_reg(rd, 0, tmp2);
                     tmp2 = tcg_temp_new_i32();
-                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
                     tcg_gen_shli_i32(tmp2, tmp2, 16);
                     tcg_gen_or_i32(tmp2, tmp2, tmp);
                     neon_store_reg(rd, 1, tmp2);
                     tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(ahp);
+                    tcg_temp_free_ptr(fpst);
                     break;
+                }
                 case NEON_2RM_VCVT_F32_F16:
+                {
+                    TCGv_ptr fpst;
+                    TCGv_i32 ahp;
                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
                         q || (rd & 1)) {
                         return 1;
                     }
+                    fpst = get_fpstatus_ptr(false);
+                    ahp = get_ahp_flag();
                     tmp3 = tcg_temp_new_i32();
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
                     tcg_gen_ext16u_i32(tmp3, tmp);
-                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
                     tcg_gen_shri_i32(tmp3, tmp, 16);
-                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
                     tcg_temp_free_i32(tmp);
                     tcg_gen_ext16u_i32(tmp3, tmp2);
-                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
                     tcg_gen_shri_i32(tmp3, tmp2, 16);
-                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
                     tcg_temp_free_i32(tmp2);
                     tcg_temp_free_i32(tmp3);
+                    tcg_temp_free_i32(ahp);
+                    tcg_temp_free_ptr(fpst);
                     break;
+                }
                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
                     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
                         || ((rm | rd) & 1)) {