diff mbox series

[11/19] target/arm: Replace ARM_FEATURE_VFP4 with isar_feature_aa32_simdfmac

Message ID 20200214181547.21408-12-richard.henderson@linaro.org
State New
Headers show
Series target/arm: vfp feature and decodetree cleanup | expand

Commit Message

Richard Henderson Feb. 14, 2020, 6:15 p.m. UTC
All remaining tests for VFP4 are for fused multiply-add insns.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/cpu.h               |  5 +++++
 target/arm/translate-vfp.inc.c | 12 ++++++++----
 target/arm/translate.c         |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

-- 
2.20.1

Comments

Peter Maydell Feb. 20, 2020, 4:37 p.m. UTC | #1
On Fri, 14 Feb 2020 at 18:16, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> All remaining tests for VFP4 are for fused multiply-add insns.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/cpu.h               |  5 +++++

>  target/arm/translate-vfp.inc.c | 12 ++++++++----

>  target/arm/translate.c         |  2 +-

>  3 files changed, 14 insertions(+), 5 deletions(-)

>

> diff --git a/target/arm/cpu.h b/target/arm/cpu.h

> index 4ff28418df..f27b8e35df 100644

> --- a/target/arm/cpu.h

> +++ b/target/arm/cpu.h

> @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)

>      return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;

>  }

>

> +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)

> +{

> +    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;

> +}


This is tricky, because the SIMDFMAC register
field indicates "do we have fused-multiply-accumulate
for either VFP or Neon", so in a VFP-no-Neon core or
a Neon-no-VFP core it will be 1 but can't be used on its
own as a gate on "should this insn be present".

Currently in the part of arm_cpu_realize() which handles
the user having selected vfp=off and/or neon=off we
do allow (for AArch32 cores) both of those combinations.

trans_VFM_dp already tests aa32_fpdp_v2, so I think the
main thing we need to do is add a test on aa32_fpsp_v2 to
trans_VFM_sp.

We clear the SIMDFMAC field to 0 in the !has_neon condition,
and I think that should actually be in the !neon && !vfp part.

I propose to squash in the following and beef up the commit message:

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f641478fc80..d4c73a20b6a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3506,6 +3506,13 @@ static inline bool
isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
     return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
 }

+/*
+ * Note that this ID register field covers both VFP and Neon FMAC,
+ * so should usually be tested in combination with some other
+ * check that confirms the presence of whichever of VFP or Neon is
+ * relevant, to avoid accidentally enabling a Neon feature on
+ * a VFP-no-Neon core or vice-versa.
+ */
 static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
 {
     return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d5a75c265ac..95ada81ebae 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1510,7 +1510,6 @@ static void arm_cpu_realizefn(DeviceState *dev,
Error **errp)
         u = FIELD_DP32(u, MVFR1, SIMDINT, 0);
         u = FIELD_DP32(u, MVFR1, SIMDSP, 0);
         u = FIELD_DP32(u, MVFR1, SIMDHP, 0);
-        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
         cpu->isar.mvfr1 = u;

         u = cpu->isar.mvfr2;
@@ -1533,6 +1532,11 @@ static void arm_cpu_realizefn(DeviceState *dev,
Error **errp)
         u = cpu->isar.mvfr0;
         u = FIELD_DP32(u, MVFR0, SIMDREG, 0);
         cpu->isar.mvfr0 = u;
+
+        /* Despite the name, this field covers both VFP and Neon */
+        u = cpu->isar.mvfr1;
+        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
+        cpu->isar.mvfr1;
     }

     if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) {
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index f6f7601fe2a..69052d840a4 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1805,8 +1805,13 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
      * Present in VFPv4 only.
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
      */
-    if (!dc_isar_feature(aa32_simdfmac, s)) {
+    if (!dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
         return false;
     }
     if (s->vec_len != 0 || s->vec_stride != 0) {


thanks
-- PMM
Peter Maydell Feb. 20, 2020, 4:41 p.m. UTC | #2
On Thu, 20 Feb 2020 at 16:37, Peter Maydell <peter.maydell@linaro.org> wrote:
>

> On Fri, 14 Feb 2020 at 18:16, Richard Henderson

> <richard.henderson@linaro.org> wrote:

> >

> > All remaining tests for VFP4 are for fused multiply-add insns.

> >

> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> > ---

> >  target/arm/cpu.h               |  5 +++++

> >  target/arm/translate-vfp.inc.c | 12 ++++++++----

> >  target/arm/translate.c         |  2 +-

> >  3 files changed, 14 insertions(+), 5 deletions(-)

> >

> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h

> > index 4ff28418df..f27b8e35df 100644

> > --- a/target/arm/cpu.h

> > +++ b/target/arm/cpu.h

> > @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)

> >      return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;

> >  }

> >

> > +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)

> > +{

> > +    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;

> > +}

>

> This is tricky, because the SIMDFMAC register

> field indicates "do we have fused-multiply-accumulate

> for either VFP or Neon", so in a VFP-no-Neon core or

> a Neon-no-VFP core it will be 1 but can't be used on its

> own as a gate on "should this insn be present".

>

> Currently in the part of arm_cpu_realize() which handles

> the user having selected vfp=off and/or neon=off we

> do allow (for AArch32 cores) both of those combinations.

>

> trans_VFM_dp already tests aa32_fpdp_v2, so I think the

> main thing we need to do is add a test on aa32_fpsp_v2 to

> trans_VFM_sp.

>

> We clear the SIMDFMAC field to 0 in the !has_neon condition,

> and I think that should actually be in the !neon && !vfp part.

>

> I propose to squash in the following and beef up the commit message:

>


> +        /* Despite the name, this field covers both VFP and Neon */

> +        u = cpu->isar.mvfr1;

> +        u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);

> +        cpu->isar.mvfr1;


 ... "cpu->isar.mvfr1 = u;", obviously.


> -    if (!dc_isar_feature(aa32_simdfmac, s)) {

> +    if (!dc_isar_feature(aa32_simdfmac, s) ||

> +        !dc_isar_feature(aa32_fpsp_v2, s)) {

> +        return false;

> +    }


and not that extra "}".

-- PMM
Richard Henderson Feb. 20, 2020, 5:55 p.m. UTC | #3
On 2/20/20 8:37 AM, Peter Maydell wrote:
> This is tricky, because the SIMDFMAC register

> field indicates "do we have fused-multiply-accumulate

> for either VFP or Neon", so in a VFP-no-Neon core or

> a Neon-no-VFP core it will be 1 but can't be used on its

> own as a gate on "should this insn be present".

> 

> Currently in the part of arm_cpu_realize() which handles

> the user having selected vfp=off and/or neon=off we

> do allow (for AArch32 cores) both of those combinations.

> 

> trans_VFM_dp already tests aa32_fpdp_v2, so I think the

> main thing we need to do is add a test on aa32_fpsp_v2 to

> trans_VFM_sp.

> 

> We clear the SIMDFMAC field to 0 in the !has_neon condition,

> and I think that should actually be in the !neon && !vfp part.

> 

> I propose to squash in the following and beef up the commit message:


Good catch.  Makes sense.


r~
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 4ff28418df..f27b8e35df 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3468,6 +3468,11 @@  static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
     return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
 }
 
+static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
+}
+
 static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id)
 {
     return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1;
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 8913320259..f6f7601fe2 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1806,8 +1806,10 @@  static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
      */
-    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
-        (s->vec_len != 0 || s->vec_stride != 0)) {
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+    if (s->vec_len != 0 || s->vec_stride != 0) {
         return false;
     }
 
@@ -1864,8 +1866,10 @@  static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a)
      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
      */
-    if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
-        (s->vec_len != 0 || s->vec_stride != 0)) {
+    if (!dc_isar_feature(aa32_simdfmac, s)) {
+        return false;
+    }
+    if (s->vec_len != 0 || s->vec_stride != 0) {
         return false;
     }
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 3b9bf13933..0da780102c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4877,7 +4877,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             }
             break;
         case NEON_3R_VFM_VQRDMLSH:
-            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
+            if (!dc_isar_feature(aa32_simdfmac, s)) {
                 return 1;
             }
             break;