[v5,16/22] target/arm: Implement data cache set allocation tags

Message ID 20191011134744.2477-17-richard.henderson@linaro.org
State New
Headers show
Series
  • [v5,01/22] target/arm: Add MTE_ACTIVE to tb_flags
Related show

Commit Message

Richard Henderson Oct. 11, 2019, 1:47 p.m.
This is DC GVA and DC GZVA.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
v2: Use allocation_tag_mem + memset.
v3: Require pre-cleaned addresses.
---
 target/arm/cpu.h           |  4 +++-
 target/arm/helper-a64.h    |  1 +
 target/arm/helper.c        | 16 ++++++++++++++++
 target/arm/mte_helper.c    | 28 ++++++++++++++++++++++++++++
 target/arm/translate-a64.c |  9 +++++++++
 5 files changed, 57 insertions(+), 1 deletion(-)

-- 
2.17.1

Comments

Peter Maydell Dec. 5, 2019, 6:17 p.m. | #1
On Fri, 11 Oct 2019 at 14:50, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> This is DC GVA and DC GZVA.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

> v2: Use allocation_tag_mem + memset.

> v3: Require pre-cleaned addresses.

> ---


> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c

> index f1315bae37..e8d8a6bedb 100644

> --- a/target/arm/mte_helper.c

> +++ b/target/arm/mte_helper.c

> @@ -510,3 +510,31 @@ void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)

>          }

>      }

>  }

> +

> +void HELPER(dc_gva)(CPUARMState *env, uint64_t ptr)

> +{

> +    ARMCPU *cpu = env_archcpu(env);

> +    size_t blocklen = 4 << cpu->dcz_blocksize;

> +    int el;

> +    uint64_t sctlr;

> +    uint8_t *mem;

> +    int rtag;

> +

> +    ptr = QEMU_ALIGN_DOWN(ptr, blocklen);

> +

> +    /* Trap if accessing an invalid page.  */

> +    mem = allocation_tag_mem(env, ptr, true, GETPC());

> +

> +    /* No action if page does not support tags, or if access is disabled.  */

> +    el = arm_current_el(env);

> +    sctlr = arm_sctlr(env, el);

> +    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {

> +        return;

> +    }

> +

> +    rtag = allocation_tag_from_addr(ptr);

> +    rtag |= rtag << 4;

> +

> +    assert(QEMU_IS_ALIGNED(blocklen, 2 * TAG_GRANULE));


Could we assert this on CPU init rather than in this helper?
That way if anybody tries to create a CPU whose dcz_blocksize
doesn't work with the TAG_GRANULE they'll realize immediately
rather than only if they happen to run a guest workload that
use DC GVA or DC GZVA.

I also had to think a bit to work out which way round this
assert is checking: it's testing that the ZVA block length
(usually 64 bytes) is a multiple of (twice the TAG_GRANULE),
which is to say a multiple of 32. Given that the blocksize
is stored as a log2 value, this can only fail for blocksizes
16, 8, 4, 2 or 1, which are all fairly unlikely.

> +    memset(mem, rtag, blocklen / (2 * TAG_GRANULE));

> +}

> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c

> index 49817b96ae..31260f97f9 100644

> --- a/target/arm/translate-a64.c

> +++ b/target/arm/translate-a64.c

> @@ -1769,6 +1769,15 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,

>          tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);

>          gen_helper_dc_zva(cpu_env, tcg_rt);

>          return;

> +    case ARM_CP_DC_GVA:

> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);

> +        gen_helper_dc_gva(cpu_env, tcg_rt);

> +        return;

> +    case ARM_CP_DC_GZVA:

> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);

> +        gen_helper_dc_zva(cpu_env, tcg_rt);

> +        gen_helper_dc_gva(cpu_env, tcg_rt);


I think this means that if there's a watchpoint set on the memory
partway through the block we're zeroing then we'll take the
watchpoint with some of the memory zeroed but without the
corresponding tags having been updated. But that's probably OK
(at any rate I wouldn't worry about it for now...)

> +        return;

>      default:

>          break;

>      }

> --


Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


thanks
-- PMM
Richard Henderson Feb. 18, 2020, 1:19 a.m. | #2
On 12/5/19 10:17 AM, Peter Maydell wrote:
> On Fri, 11 Oct 2019 at 14:50, Richard Henderson

> <richard.henderson@linaro.org> wrote:

>>

>> This is DC GVA and DC GZVA.

>>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> ---

>> v2: Use allocation_tag_mem + memset.

>> v3: Require pre-cleaned addresses.

>> ---

> 

>> diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c

>> index f1315bae37..e8d8a6bedb 100644

>> --- a/target/arm/mte_helper.c

>> +++ b/target/arm/mte_helper.c

>> @@ -510,3 +510,31 @@ void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)

>>          }

>>      }

>>  }

>> +

>> +void HELPER(dc_gva)(CPUARMState *env, uint64_t ptr)

>> +{

>> +    ARMCPU *cpu = env_archcpu(env);

>> +    size_t blocklen = 4 << cpu->dcz_blocksize;

>> +    int el;

>> +    uint64_t sctlr;

>> +    uint8_t *mem;

>> +    int rtag;

>> +

>> +    ptr = QEMU_ALIGN_DOWN(ptr, blocklen);

>> +

>> +    /* Trap if accessing an invalid page.  */

>> +    mem = allocation_tag_mem(env, ptr, true, GETPC());

>> +

>> +    /* No action if page does not support tags, or if access is disabled.  */

>> +    el = arm_current_el(env);

>> +    sctlr = arm_sctlr(env, el);

>> +    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {

>> +        return;

>> +    }

>> +

>> +    rtag = allocation_tag_from_addr(ptr);

>> +    rtag |= rtag << 4;

>> +

>> +    assert(QEMU_IS_ALIGNED(blocklen, 2 * TAG_GRANULE));

> 

> Could we assert this on CPU init rather than in this helper?

> That way if anybody tries to create a CPU whose dcz_blocksize

> doesn't work with the TAG_GRANULE they'll realize immediately

> rather than only if they happen to run a guest workload that

> use DC GVA or DC GZVA.


Sure.  I've moved it to realize.

> I also had to think a bit to work out which way round this

> assert is checking: it's testing that the ZVA block length

> (usually 64 bytes) is a multiple of (twice the TAG_GRANULE),

> which is to say a multiple of 32. Given that the blocksize

> is stored as a log2 value, this can only fail for blocksizes

> 16, 8, 4, 2 or 1, which are all fairly unlikely.


Indeed.

>> +    case ARM_CP_DC_GVA:

>> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);

>> +        gen_helper_dc_gva(cpu_env, tcg_rt);

>> +        return;

>> +    case ARM_CP_DC_GZVA:

>> +        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);

>> +        gen_helper_dc_zva(cpu_env, tcg_rt);

>> +        gen_helper_dc_gva(cpu_env, tcg_rt);

> 

> I think this means that if there's a watchpoint set on the memory

> partway through the block we're zeroing then we'll take the

> watchpoint with some of the memory zeroed but without the

> corresponding tags having been updated. But that's probably OK

> (at any rate I wouldn't worry about it for now...)


The relatively new probe_{access,write} functions take care of watchpoints.  On
the next round I'll have another look at what may or may not be missing to use
that here.  It's something that I've been thinking about to get right for SVE
as well.


r~

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d99bb5e956..93a362708b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2233,7 +2233,9 @@  static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_NZCV              (ARM_CP_SPECIAL | 0x0300)
 #define ARM_CP_CURRENTEL         (ARM_CP_SPECIAL | 0x0400)
 #define ARM_CP_DC_ZVA            (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL         ARM_CP_DC_ZVA
+#define ARM_CP_DC_GVA            (ARM_CP_SPECIAL | 0x0600)
+#define ARM_CP_DC_GZVA           (ARM_CP_SPECIAL | 0x0700)
+#define ARM_LAST_SPECIAL         ARM_CP_DC_GZVA
 #define ARM_CP_FPU               0x1000
 #define ARM_CP_SVE               0x2000
 #define ARM_CP_NO_GDB            0x4000
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 405aa60016..c82605b51e 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -118,3 +118,4 @@  DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(stzgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(dc_gva, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 33bc176e1c..eec9064d88 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6026,6 +6026,22 @@  static const ARMCPRegInfo mte_reginfo[] = {
     { .name = "CIGDVAC", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
       .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "GVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3,
+      .access = PL0_W, .type = ARM_CP_DC_GVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
+    { .name = "GZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4,
+      .access = PL0_W, .type = ARM_CP_DC_GZVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
     REGINFO_SENTINEL
 };
 
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index f1315bae37..e8d8a6bedb 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -510,3 +510,31 @@  void HELPER(stzgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
         }
     }
 }
+
+void HELPER(dc_gva)(CPUARMState *env, uint64_t ptr)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    size_t blocklen = 4 << cpu->dcz_blocksize;
+    int el;
+    uint64_t sctlr;
+    uint8_t *mem;
+    int rtag;
+
+    ptr = QEMU_ALIGN_DOWN(ptr, blocklen);
+
+    /* Trap if accessing an invalid page.  */
+    mem = allocation_tag_mem(env, ptr, true, GETPC());
+
+    /* No action if page does not support tags, or if access is disabled.  */
+    el = arm_current_el(env);
+    sctlr = arm_sctlr(env, el);
+    if (!mem || !allocation_tag_access_enabled(env, el, sctlr)) {
+        return;
+    }
+
+    rtag = allocation_tag_from_addr(ptr);
+    rtag |= rtag << 4;
+
+    assert(QEMU_IS_ALIGNED(blocklen, 2 * TAG_GRANULE));
+    memset(mem, rtag, blocklen / (2 * TAG_GRANULE));
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 49817b96ae..31260f97f9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1769,6 +1769,15 @@  static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
         gen_helper_dc_zva(cpu_env, tcg_rt);
         return;
+    case ARM_CP_DC_GVA:
+        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
+        gen_helper_dc_gva(cpu_env, tcg_rt);
+        return;
+    case ARM_CP_DC_GZVA:
+        tcg_rt = clean_data_tbi(s, cpu_reg(s, rt), false);
+        gen_helper_dc_zva(cpu_env, tcg_rt);
+        gen_helper_dc_gva(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }