diff mbox series

[20/36] target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree

Message ID 20200430181003.21682-21-peter.maydell@linaro.org
State Superseded
Headers show
Series target/arm: Convert Neon to decodetree (part 1) | expand

Commit Message

Peter Maydell April 30, 2020, 6:09 p.m. UTC
Convert the Neon VQRDMLAH and VQRDMLSH insns in the 3-reg-same group
to decodetree.  These don't use do_3same() because they want to
operate on VFP double registers, whose offsets are different from the
neon_reg_offset() calculations do_3same does.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

---
 target/arm/translate-neon.inc.c | 57 +++++++++++++++++++++++++++++++++
 target/arm/translate.c          | 36 ++-------------------
 target/arm/neon-dp.decode       |  3 ++
 3 files changed, 62 insertions(+), 34 deletions(-)

-- 
2.20.1

Comments

Richard Henderson April 30, 2020, 8:03 p.m. UTC | #1
On 4/30/20 11:09 AM, Peter Maydell wrote:
> Convert the Neon VQRDMLAH and VQRDMLSH insns in the 3-reg-same group

> to decodetree.  These don't use do_3same() because they want to

> operate on VFP double registers, whose offsets are different from the

> neon_reg_offset() calculations do_3same does.

> 

> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

> ---

>  target/arm/translate-neon.inc.c | 57 +++++++++++++++++++++++++++++++++

>  target/arm/translate.c          | 36 ++-------------------

>  target/arm/neon-dp.decode       |  3 ++

>  3 files changed, 62 insertions(+), 34 deletions(-)


Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Richard Henderson April 30, 2020, 8:28 p.m. UTC | #2
On 4/30/20 11:09 AM, Peter Maydell wrote:
> These don't use do_3same() because they want to

> operate on VFP double registers, whose offsets are different from the

> neon_reg_offset() calculations do_3same does.


Actually, no, it's an around the bush way of computing the same register offset.

vfp_reg_offset(true, reg)

->  vfp.zregs[reg >> 1].d[reg & 1];

neon_reg_offset(reg, 0)

->  vfp_reg_offset(false, 2 * reg + 0)
->  vfp.zregs[(2 * reg) >> 2].d[((2 * reg) >> 1) & 1]
    + ((2 * reg) & 1) * offsetof(lower/upper)
->  vfp.zregs[reg >> 1].d[reg & 1] + 0


r~
Peter Maydell May 1, 2020, 2:23 p.m. UTC | #3
On Thu, 30 Apr 2020 at 21:28, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 4/30/20 11:09 AM, Peter Maydell wrote:

> > These don't use do_3same() because they want to

> > operate on VFP double registers, whose offsets are different from the

> > neon_reg_offset() calculations do_3same does.

>

> Actually, no, it's an around the bush way of computing the same register offset.


So it is. I could have sworn I'd written this using
do_3same first time around and found it didn't work,
but maybe I'm misremembering a change I had to make to
some other patch.

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 50b77b6d714..c8beb048fa2 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -712,3 +712,60 @@  DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
 
 DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
 DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
+
+static bool do_vqrdmlah(DisasContext *s, arg_3same *a,
+                        gen_helper_gvec_3_ptr *fn)
+{
+    int vec_size = a->q ? 16 : 8;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_rdm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!fn) {
+        return false; /* bad size */
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       cpu_env, vec_size, vec_size, 0, fn);
+    return true;
+}
+
+static bool trans_VQRDMLAH_3s(DisasContext *s, arg_3same *a)
+{
+    static gen_helper_gvec_3_ptr * const fns[] = {
+        NULL,
+        gen_helper_gvec_qrdmlah_s16,
+        gen_helper_gvec_qrdmlah_s32,
+        NULL,
+    };
+    return do_vqrdmlah(s, a, fns[a->size]);
+}
+
+static bool trans_VQRDMLSH_3s(DisasContext *s, arg_3same *a)
+{
+    static gen_helper_gvec_3_ptr * const fns[] = {
+        NULL,
+        gen_helper_gvec_qrdmlsh_s16,
+        gen_helper_gvec_qrdmlsh_s32,
+        NULL,
+    };
+    return do_vqrdmlah(s, a, fns[a->size]);
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index ad60b7190f9..adc42362469 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3629,22 +3629,6 @@  static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VCVT_UF] = 0x4,
 };
 
-
-/* Expand v8.1 simd helper.  */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
-                         int q, int rd, int rn, int rm)
-{
-    if (dc_isar_feature(aa32_rdm, s)) {
-        int opr_sz = (1 + q) * 8;
-        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm), cpu_env,
-                           opr_sz, opr_sz, 0, fn);
-        return 0;
-    }
-    return 1;
-}
-
 static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
 {
     tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
@@ -4818,15 +4802,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             if (!u) {
                 break;  /* VPADD */
             }
-            /* VQRDMLAH */
-            switch (size) {
-            case 1:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
-                                     q, rd, rn, rm);
-            case 2:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
-                                     q, rd, rn, rm);
-            }
+            /* VQRDMLAH : handled by decodetree */
             return 1;
 
         case NEON_3R_VFM_VQRDMLSH:
@@ -4837,15 +4813,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 }
                 break;
             }
-            /* VQRDMLSH */
-            switch (size) {
-            case 1:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
-                                     q, rd, rn, rm);
-            case 2:
-                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
-                                     q, rd, rn, rm);
-            }
+            /* VQRDMLSH : handled by decodetree */
             return 1;
 
         case NEON_3R_VADD_VSUB:
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index ec3a92fe753..ce0db476c88 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -84,3 +84,6 @@  VMLS_3s          1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
 
 VMUL_3s          1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
 VMUL_p_3s        1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
+
+VQRDMLAH_3s      1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
+VQRDMLSH_3s      1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same