[v8,11/45] target/arm: Implement the ADDG, SUBG instructions

Message ID 20200623193658.623279-12-richard.henderson@linaro.org
State Superseded
Headers show
Series
  • target/arm: Implement ARMv8.5-MemTag, system mode
Related show

Commit Message

Richard Henderson June 23, 2020, 7:36 p.m.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
v2: Shift offset in translate; use extract32.
v6: Implement inline for !ATA.
v8: Use separate decode function.
---
 target/arm/helper-a64.h    |  1 +
 target/arm/internals.h     |  9 +++++++
 target/arm/mte_helper.c    | 10 ++++++++
 target/arm/translate-a64.c | 51 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+)

-- 
2.25.1

Comments

Peter Maydell June 25, 2020, 10:39 a.m. | #1
On Tue, 23 Jun 2020 at 20:37, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> +/*

> + * Add/subtract (immediate, with tags)

> + *

> + *  31 30 29 28         23 22 21     16 14      10 9   5 4   0

> + * +--+--+--+-------------+--+---------+--+-------+-----+-----+

> + * |sf|op| S| 1 0 0 0 1 0 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |

> + * +--+--+--+-------------+--+---------+--+-------+-----+-----+


Bit 23 should be '1'.

> + *

> + *    op: 0 -> add, 1 -> sub

> + */

> +static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)

> +{

> +    int rd = extract32(insn, 0, 5);

> +    int rn = extract32(insn, 5, 5);

> +    int uimm4 = extract32(insn, 10, 4);

> +    int uimm6 = extract32(insn, 16, 6);

> +    bool sub_op = extract32(insn, 30, 1);

> +    TCGv_i64 tcg_rn, tcg_rd;

> +    int imm;

> +

> +    /* Test all of sf=1, S=0, o2=0, o3=0.  */

> +    if ((insn & 0xc040e000u) != 0x80000000u ||


This bit pattern doesn't seem to match the comment:
0xc is 0b1100 so that's sf and op, not sf and S;
0xe is 0b1110 so that's testing the top bit of uimm4
as well as op3. I think it should be 0xa040c000.
Though the existence of this bug suggests that it would
be clearer to test the individual fields :-)

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>



Interestingly clang (6.0.0) gets pretty close to optimizing
the field test version:

bool foo(uint32_t insn) {
   return (insn & 0xa040c000) != 0x80000000u;
}

bool bar(uint32_t insn) {
   bool sf = extract32(insn, 31, 1);
   bool s = extract32(insn, 29, 1);
   bool o2 = extract32(insn, 22, 1);
   int op2 = extract32(insn, 14, 2);

   return sf != 1 || s != 0 || o2 != 0 || op2 != 0;
}

gives
0000000000000000 <foo>:
   0:   81 e7 00 c0 40 a0       and    $0xa040c000,%edi
   6:   81 ff 00 00 00 80       cmp    $0x80000000,%edi
   c:   0f 95 c0                setne  %al
   f:   c3                      retq

0000000000000010 <bar>:
  10:   89 f8                   mov    %edi,%eax
  12:   25 00 00 00 a0          and    $0xa0000000,%eax
  17:   3d 00 00 00 80          cmp    $0x80000000,%eax
  1c:   75 0a                   jne    28 <bar+0x18>
  1e:   f7 c7 00 c0 40 00       test   $0x40c000,%edi
  24:   0f 95 c0                setne  %al
  27:   c3                      retq
  28:   b0 01                   mov    $0x1,%al
  2a:   c3                      retq

(I don't know why it's split it into two tests: it's
not that it's testing "must be 1" in one part and "must
be 0" in the other because it has checked both sf and s
in the first comparison.)

gcc (7.4.0) makes more of a hash of it though:

  10:   89 f8                   mov    %edi,%eax
  12:   89 fa                   mov    %edi,%edx
  14:   c1 e8 1f                shr    $0x1f,%eax
  17:   c1 ea 1d                shr    $0x1d,%edx
  1a:   83 f0 01                xor    $0x1,%eax
  1d:   09 d0                   or     %edx,%eax
  1f:   83 e0 01                and    $0x1,%eax
  22:   75 13                   jne    37 <bar+0x27>
  24:   89 f8                   mov    %edi,%eax
  26:   c1 ef 16                shr    $0x16,%edi
  29:   c1 e8 0e                shr    $0xe,%eax
  2c:   83 e7 01                and    $0x1,%edi
  2f:   83 e0 03                and    $0x3,%eax
  32:   09 f8                   or     %edi,%eax
  34:   0f 95 c0                setne  %al
  37:   f3 c3                   repz retq

thanks
-- PMM

Patch

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 587ccbe42f..6c116481e8 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -105,3 +105,4 @@  DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
 DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index ae611a6ff5..5c69d4e5a5 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1261,6 +1261,15 @@  void arm_log_exception(int idx);
  */
 #define GMID_EL1_BS  6
 
+/* We associate one allocation tag per 16 bytes, the minimum.  */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE      (1 << LOG2_TAG_GRANULE)
+
+static inline int allocation_tag_from_addr(uint64_t ptr)
+{
+    return extract64(ptr, 56, 4);
+}
+
 static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
 {
     return deposit64(ptr, 56, 4, rtag);
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 539a04de84..9ab9ed749d 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -70,3 +70,13 @@  uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
 
     return address_with_allocation_tag(rn, rtag);
 }
+
+uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
+                         int32_t offset, uint32_t tag_offset)
+{
+    int start_tag = allocation_tag_from_addr(ptr);
+    uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+    int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+
+    return address_with_allocation_tag(ptr + offset, rtag);
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 59f44fc412..e9bc7e90af 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3808,6 +3808,54 @@  static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
     tcg_temp_free_i64(tcg_result);
 }
 
+/*
+ * Add/subtract (immediate, with tags)
+ *
+ *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 0 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ *
+ *    op: 0 -> add, 1 -> sub
+ */
+static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int uimm4 = extract32(insn, 10, 4);
+    int uimm6 = extract32(insn, 16, 6);
+    bool sub_op = extract32(insn, 30, 1);
+    TCGv_i64 tcg_rn, tcg_rd;
+    int imm;
+
+    /* Test all of sf=1, S=0, o2=0, o3=0.  */
+    if ((insn & 0xc040e000u) != 0x80000000u ||
+        !dc_isar_feature(aa64_mte_insn_reg, s)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    imm = uimm6 << LOG2_TAG_GRANULE;
+    if (sub_op) {
+        imm = -imm;
+    }
+
+    tcg_rn = cpu_reg_sp(s, rn);
+    tcg_rd = cpu_reg_sp(s, rd);
+
+    if (s->ata) {
+        TCGv_i32 offset = tcg_const_i32(imm);
+        TCGv_i32 tag_offset = tcg_const_i32(uimm4);
+
+        gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
+        tcg_temp_free_i32(tag_offset);
+        tcg_temp_free_i32(offset);
+    } else {
+        tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
+        gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
+    }
+}
+
 /* The input should be a value in the bottom e bits (with higher
  * bits zero); returns that value replicated into every element
  * of size e in a 64 bit integer.
@@ -4170,6 +4218,9 @@  static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
     case 0x22: /* Add/subtract (immediate) */
         disas_add_sub_imm(s, insn);
         break;
+    case 0x23: /* Add/subtract (immediate, with tags) */
+        disas_add_sub_imm_with_tags(s, insn);
+        break;
     case 0x24: /* Logical (immediate) */
         disas_logic_imm(s, insn);
         break;