diff mbox series

[v2,9/9] target/arm: Convert Neon one-register-and-immediate insns to decodetree

Message ID 20200522145520.6778-10-peter.maydell@linaro.org
State Superseded
Headers show
Series target/arm: Convert 2-reg-shift and 1-reg-imm Neon insns to decodetree | expand

Commit Message

Peter Maydell May 22, 2020, 2:55 p.m. UTC
Convert the insns in the one-register-and-immediate group to decodetree.

In the new decode, our asimd_imm_const() function returns a 64-bit value
rather than a 32-bit one, which means we don't need to treat cmode=14 op=1
as a special case in the decoder (it is the only encoding where the two
halves of the 64-bit value are different).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

---
 target/arm/neon-dp.decode       |  22 ++++++
 target/arm/translate-neon.inc.c | 118 ++++++++++++++++++++++++++++++++
 target/arm/translate.c          | 101 +--------------------------
 3 files changed, 142 insertions(+), 99 deletions(-)

-- 
2.20.1

Comments

Richard Henderson June 1, 2020, 11:32 p.m. UTC | #1
On 5/22/20 7:55 AM, Peter Maydell wrote:
> Convert the insns in the one-register-and-immediate group to decodetree.

> 

> In the new decode, our asimd_imm_const() function returns a 64-bit value

> rather than a 32-bit one, which means we don't need to treat cmode=14 op=1

> as a special case in the decoder (it is the only encoding where the two

> halves of the 64-bit value are different).

> 

> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

> ---

>  target/arm/neon-dp.decode       |  22 ++++++

>  target/arm/translate-neon.inc.c | 118 ++++++++++++++++++++++++++++++++

>  target/arm/translate.c          | 101 +--------------------------

>  3 files changed, 142 insertions(+), 99 deletions(-)



Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


because this is a faithful transliteration of the existing code, but...

> +    switch (cmode) {

> +    case 0: case 1:

> +        /* no-op */

> +        break;

> +    case 2: case 3:

> +        imm <<= 8;

> +        break;

> +    case 4: case 5:

> +        imm <<= 16;

> +        break;

> +    case 6: case 7:

> +        imm <<= 24;

> +        break;

> +    case 8: case 9:

> +        imm |= imm << 16;

> +        break;

> +    case 10: case 11:

> +        imm = (imm << 8) | (imm << 24);

> +        break;


It might be clearer to use dup_const for each case, which would more closely
match the pseudocode.  E.g. here,

    return dup_const(MO_16, imm << 8);

> +        imm |= (imm << 8) | (imm << 16) | (imm << 24);


    return dup_const(MO_8, imm);

Something to remember for a follow-up.

r~
Peter Maydell June 2, 2020, 8:58 a.m. UTC | #2
On Tue, 2 Jun 2020 at 00:32, Richard Henderson
<richard.henderson@linaro.org> wrote:
> It might be clearer to use dup_const for each case, which would more closely

> match the pseudocode.  E.g. here,

>

>     return dup_const(MO_16, imm << 8);

>

> > +        imm |= (imm << 8) | (imm << 16) | (imm << 24);

>

>     return dup_const(MO_8, imm);


Yeah, I did think about this, but figured that keeping the
existing code structure was clearer for purposes of reviewing
this refactoring series.

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index e217d51670d..1643d84e9c2 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -373,3 +373,25 @@  VCVT_SF_2sh      1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
 VCVT_UF_2sh      1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
 VCVT_FS_2sh      1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
 VCVT_FU_2sh      1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
+
+######################################################################
+# 1-reg-and-modified-immediate grouping:
+# 1111 001 i 1 D 000 imm:3 Vd:4 cmode:4 0 Q op 1 Vm:4
+######################################################################
+
+&1reg_imm        vd q imm cmode op
+
+%asimd_imm_value 24:1 16:3 0:4
+
+@1reg_imm        .... ... . . . ... ... .... .... . q:1 . . .... \
+                 &1reg_imm imm=%asimd_imm_value vd=%vd_dp
+
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMNV, but
+# not in a way we can conveniently represent in decodetree without
+# a lot of repetition:
+# VORR: op=0, (cmode & 1) && cmode < 12
+# VBIC: op=1, (cmode & 1) && cmode < 12
+# VMOV: everything else
+# So we have a single decode line and check the cmode/op in the
+# trans function.
+Vimm_1r          1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 8d1c58eddc2..39c7e70373a 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1817,3 +1817,121 @@  DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos)
 DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos)
 DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero)
 DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero)
+
+static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
+{
+    /*
+     * Expand the encoded constant.
+     * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
+     * We choose to not special-case this and will behave as if a
+     * valid constant encoding of 0 had been given.
+     * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
+     */
+    switch (cmode) {
+    case 0: case 1:
+        /* no-op */
+        break;
+    case 2: case 3:
+        imm <<= 8;
+        break;
+    case 4: case 5:
+        imm <<= 16;
+        break;
+    case 6: case 7:
+        imm <<= 24;
+        break;
+    case 8: case 9:
+        imm |= imm << 16;
+        break;
+    case 10: case 11:
+        imm = (imm << 8) | (imm << 24);
+        break;
+    case 12:
+        imm = (imm << 8) | 0xff;
+        break;
+    case 13:
+        imm = (imm << 16) | 0xffff;
+        break;
+    case 14:
+        if (op) {
+            /*
+             * This is the only case where the top and bottom 32 bits
+             * of the encoded constant differ.
+             */
+            uint64_t imm64 = 0;
+            int n;
+
+            for (n = 0; n < 8; n++) {
+                if (imm & (1 << n)) {
+                    imm64 |= (0xffULL << (n * 8));
+                }
+            }
+            return imm64;
+        }
+        imm |= (imm << 8) | (imm << 16) | (imm << 24);
+        break;
+    case 15:
+        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
+            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
+        break;
+    }
+    if (op) {
+        imm = ~imm;
+    }
+    return dup_const(MO_32, imm);
+}
+
+static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
+                        GVecGen2iFn *fn)
+{
+    uint64_t imm;
+    int reg_ofs, vec_size;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    reg_ofs = neon_reg_offset(a->vd, 0);
+    vec_size = a->q ? 16 : 8;
+    imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+    fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
+    return true;
+}
+
+static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
+{
+    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+    GVecGen2iFn *fn;
+
+    if ((a->cmode & 1) && a->cmode < 12) {
+        /* for op=1, the imm will be inverted, so BIC becomes AND. */
+        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+    } else {
+        /* There is one unallocated cmode/op combination in this space */
+        if (a->cmode == 15 && a->op == 1) {
+            return false;
+        }
+        fn = gen_VMOV_1r;
+    }
+    return do_1reg_imm(s, a, fn);
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 9cc44e6258e..20d07e99053 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5232,105 +5232,8 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         /* Three register same length: handled by decodetree */
         return 1;
     } else if (insn & (1 << 4)) {
-        if ((insn & 0x00380080) != 0) {
-            /* Two registers and shift: handled by decodetree */
-            return 1;
-        } else { /* (insn & 0x00380080) == 0 */
-            int invert, reg_ofs, vec_size;
-
-            if (q && (rd & 1)) {
-                return 1;
-            }
-
-            op = (insn >> 8) & 0xf;
-            /* One register and immediate.  */
-            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
-            invert = (insn & (1 << 5)) != 0;
-            /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
-             * We choose to not special-case this and will behave as if a
-             * valid constant encoding of 0 had been given.
-             */
-            switch (op) {
-            case 0: case 1:
-                /* no-op */
-                break;
-            case 2: case 3:
-                imm <<= 8;
-                break;
-            case 4: case 5:
-                imm <<= 16;
-                break;
-            case 6: case 7:
-                imm <<= 24;
-                break;
-            case 8: case 9:
-                imm |= imm << 16;
-                break;
-            case 10: case 11:
-                imm = (imm << 8) | (imm << 24);
-                break;
-            case 12:
-                imm = (imm << 8) | 0xff;
-                break;
-            case 13:
-                imm = (imm << 16) | 0xffff;
-                break;
-            case 14:
-                imm |= (imm << 8) | (imm << 16) | (imm << 24);
-                if (invert) {
-                    imm = ~imm;
-                }
-                break;
-            case 15:
-                if (invert) {
-                    return 1;
-                }
-                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
-                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
-                break;
-            }
-            if (invert) {
-                imm = ~imm;
-            }
-
-            reg_ofs = neon_reg_offset(rd, 0);
-            vec_size = q ? 16 : 8;
-
-            if (op & 1 && op < 12) {
-                if (invert) {
-                    /* The immediate value has already been inverted,
-                     * so BIC becomes AND.
-                     */
-                    tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
-                                      vec_size, vec_size);
-                } else {
-                    tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
-                                     vec_size, vec_size);
-                }
-            } else {
-                /* VMOV, VMVN.  */
-                if (op == 14 && invert) {
-                    TCGv_i64 t64 = tcg_temp_new_i64();
-
-                    for (pass = 0; pass <= q; ++pass) {
-                        uint64_t val = 0;
-                        int n;
-
-                        for (n = 0; n < 8; n++) {
-                            if (imm & (1 << (n + pass * 8))) {
-                                val |= 0xffull << (n * 8);
-                            }
-                        }
-                        tcg_gen_movi_i64(t64, val);
-                        neon_store_reg64(t64, rd + pass);
-                    }
-                    tcg_temp_free_i64(t64);
-                } else {
-                    tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
-                                         vec_size, imm);
-                }
-            }
-        }
+        /* Two registers and shift or reg and imm: handled by decodetree */
+        return 1;
     } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;