[03/17] target/arm: Add MTE system registers

Message ID 20190114011122.5995-4-richard.henderson@linaro.org
State Superseded
Headers show
Series
  • target/arm: Implement ARMv8.5-MemTag
Related show

Commit Message

Richard Henderson Jan. 14, 2019, 1:11 a.m.
This is TFSRE0_EL1, TFSR_EL1, TFSR_EL2, TFSR_EL3,
RGSR_EL1, GCR_EL1, and PSTATE.TCO.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/cpu.h           |  5 +++++
 target/arm/translate.h     | 11 ++++++++++
 target/arm/helper.c        | 45 ++++++++++++++++++++++++++++++++++++++
 target/arm/translate-a64.c | 11 ++++++++++
 4 files changed, 72 insertions(+)

-- 
2.17.2

Comments

Peter Maydell Feb. 5, 2019, 7:27 p.m. | #1
On Mon, 14 Jan 2019 at 01:11, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> This is TFSRE0_EL1, TFSR_EL1, TFSR_EL2, TFSR_EL3,

> RGSR_EL1, GCR_EL1, and PSTATE.TCO.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/cpu.h           |  5 +++++

>  target/arm/translate.h     | 11 ++++++++++

>  target/arm/helper.c        | 45 ++++++++++++++++++++++++++++++++++++++

>  target/arm/translate-a64.c | 11 ++++++++++

>  4 files changed, 72 insertions(+)

>

> diff --git a/target/arm/cpu.h b/target/arm/cpu.h

> index 22163c9c3f..c8b447e30a 100644

> --- a/target/arm/cpu.h

> +++ b/target/arm/cpu.h

> @@ -482,6 +482,11 @@ typedef struct CPUARMState {

>          uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */

>          uint64_t vpidr_el2; /* Virtualization Processor ID Register */

>          uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */

> +#ifdef TARGET_AARCH64

> +        uint64_t tfsr_el[4]; /* tfsrel0_el1 is index 0.  */

> +        uint64_t gcr_el1;

> +        uint64_t rgsr_el1;

> +#endif


Are we going to add more fields inside this #ifdef or is it only
saving 12 words?

>      } cp15;

>

>      struct {

> diff --git a/target/arm/translate.h b/target/arm/translate.h

> index 5a101e1c6d..a24757d3d7 100644

> --- a/target/arm/translate.h

> +++ b/target/arm/translate.h

> @@ -204,6 +204,17 @@ static inline TCGv_i32 get_ahp_flag(void)

>      return ret;

>  }

>

> +/* Set bits within PSTATE.  */

> +static inline void set_pstate_bits(uint32_t bits)

> +{

> +    TCGv_i32 p = tcg_temp_new_i32();

> +

> +    tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate));

> +    tcg_gen_ori_i32(p, p, bits);

> +    tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate));

> +    tcg_temp_free_i32(p);


Maybe assert() that all the bits in the input are in the
set that we actually store in env->pstate, to catch attempts
to set NZCV, nRW, etc this way ?

> +}

> +

>  /* Clear bits within PSTATE.  */

>  static inline void clear_pstate_bits(uint32_t bits)

>  {

> diff --git a/target/arm/helper.c b/target/arm/helper.c

> index 5a59fc4315..df43deb0f8 100644

> --- a/target/arm/helper.c

> +++ b/target/arm/helper.c

> @@ -5132,6 +5132,48 @@ static const ARMCPRegInfo pauth_reginfo[] = {

>        .fieldoffset = offsetof(CPUARMState, apib_key.hi) },

>      REGINFO_SENTINEL

>  };

> +

> +static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)

> +{

> +    return env->pstate & PSTATE_TCO;

> +}

> +

> +static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)

> +{

> +    env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);

> +}

> +

> +static const ARMCPRegInfo mte_reginfo[] = {

> +    { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 6, .opc2 = 1,

> +      .access = PL1_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },

> +    { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 5, .opc2 = 0,

> +      .access = PL1_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },

> +    { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 5, .opc2 = 0,

> +      .access = PL2_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },

> +    { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 6, .opc2 = 0,

> +      .access = PL3_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },

> +    { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,

> +      .access = PL1_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },

> +    { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,

> +      .access = PL1_RW,

> +      .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },

> +    { .name = "TCO", .state = ARM_CP_STATE_AA64,

> +      .opc0 = 0, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,


Shouldn't this have opc0 = 3 ?

> +      .type = ARM_CP_NO_RAW,

> +      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },

> +    REGINFO_SENTINEL


Missing GMID_EL1 ?

> +};

>  #endif

>

>  void register_cp_regs_for_features(ARMCPU *cpu)

> @@ -5923,6 +5965,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)

>      if (cpu_isar_feature(aa64_pauth, cpu)) {

>          define_arm_cp_regs(cpu, pauth_reginfo);

>      }

> +    if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {

> +        define_arm_cp_regs(cpu, mte_reginfo);

> +    }

>  #endif

>  }

>

> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c

> index 0286507bae..5c2577a9ac 100644

> --- a/target/arm/translate-a64.c

> +++ b/target/arm/translate-a64.c

> @@ -1668,6 +1668,17 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,

>          s->base.is_jmp = DISAS_UPDATE;

>          break;

>

> +    case 0x1c: /* TCO */

> +        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {

> +            goto do_unallocated;

> +        }

> +        if (crm & 1) {

> +            set_pstate_bits(PSTATE_TCO);

> +        } else {

> +            clear_pstate_bits(PSTATE_TCO);

> +        }

> +        break;


Don't we need to break the TB here or something to pick up
the new value of TCO when we generate code for a following
load or store ? (TCO is self-synchronizing so there is no
requirement for an ISB before it takes effect.)

thanks
-- PMM
Richard Henderson Feb. 10, 2019, 1:20 a.m. | #2
On 2/5/19 11:27 AM, Peter Maydell wrote:
>> +#ifdef TARGET_AARCH64

>> +        uint64_t tfsr_el[4]; /* tfsrel0_el1 is index 0.  */

>> +        uint64_t gcr_el1;

>> +        uint64_t rgsr_el1;

>> +#endif

> 

> Are we going to add more fields inside this #ifdef or is it only

> saving 12 words?


Just the 12 words here.  We've got plenty of other ifdefs though...

>> +/* Set bits within PSTATE.  */

>> +static inline void set_pstate_bits(uint32_t bits)

>> +{

>> +    TCGv_i32 p = tcg_temp_new_i32();

>> +

>> +    tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate));

>> +    tcg_gen_ori_i32(p, p, bits);

>> +    tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate));

>> +    tcg_temp_free_i32(p);

> 

> Maybe assert() that all the bits in the input are in the

> set that we actually store in env->pstate, to catch attempts

> to set NZCV, nRW, etc this way ?


I suppose.  There's the clear_pstate_bits just below, which has a couple of users.

>> +      .type = ARM_CP_NO_RAW,

>> +      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },

>> +    REGINFO_SENTINEL

> 

> Missing GMID_EL1 ?


Err..  that's not in 00eac5, at least.

>> +    case 0x1c: /* TCO */

>> +        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {

>> +            goto do_unallocated;

>> +        }

>> +        if (crm & 1) {

>> +            set_pstate_bits(PSTATE_TCO);

>> +        } else {

>> +            clear_pstate_bits(PSTATE_TCO);

>> +        }

>> +        break;

> 

> Don't we need to break the TB here or something to pick up

> the new value of TCO when we generate code for a following

> load or store?


Yep.  It's included in the (quite complex) MTE_ACTIVE.


r~
Richard Henderson Feb. 10, 2019, 1:23 a.m. | #3
On 2/5/19 11:27 AM, Peter Maydell wrote:
>> +++ b/target/arm/translate-a64.c

>> @@ -1668,6 +1668,17 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,

>>          s->base.is_jmp = DISAS_UPDATE;

>>          break;

>>

>> +    case 0x1c: /* TCO */

>> +        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {

>> +            goto do_unallocated;

>> +        }

>> +        if (crm & 1) {

>> +            set_pstate_bits(PSTATE_TCO);

>> +        } else {

>> +            clear_pstate_bits(PSTATE_TCO);

>> +        }

>> +        break;

> Don't we need to break the TB here or something to pick up

> the new value of TCO when we generate code for a following

> load or store ? (TCO is self-synchronizing so there is no

> requirement for an ISB before it takes effect.)


Actually, we already break the TB here by default.


r~
Peter Maydell Feb. 10, 2019, 9:40 p.m. | #4
On Sun, 10 Feb 2019 at 01:23, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 2/5/19 11:27 AM, Peter Maydell wrote:

> >> +++ b/target/arm/translate-a64.c

> >> @@ -1668,6 +1668,17 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,

> >>          s->base.is_jmp = DISAS_UPDATE;

> >>          break;

> >>

> >> +    case 0x1c: /* TCO */

> >> +        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {

> >> +            goto do_unallocated;

> >> +        }

> >> +        if (crm & 1) {

> >> +            set_pstate_bits(PSTATE_TCO);

> >> +        } else {

> >> +            clear_pstate_bits(PSTATE_TCO);

> >> +        }

> >> +        break;

> > Don't we need to break the TB here or something to pick up

> > the new value of TCO when we generate code for a following

> > load or store ? (TCO is self-synchronizing so there is no

> > requirement for an ISB before it takes effect.)

>

> Actually, we already break the TB here by default.


Do we? I didn't see any code (apart from the handling
in the DAIFSet/Clear codepaths, which aren't used for TCO).

thanks
-- PMM
Richard Henderson Feb. 10, 2019, 10:47 p.m. | #5
On 2/10/19 1:40 PM, Peter Maydell wrote:
>> Actually, we already break the TB here by default.

> 

> Do we? I didn't see any code (apart from the handling

> in the DAIFSet/Clear codepaths, which aren't used for TCO).


At the start of the function:

    /* End the TB by default, chaining is ok.  */
    s->base.is_jmp = DISAS_TOO_MANY;

Since the change to TCO is from an immediate, the change to MTE_ACTIVE is also
constant, and so chaining will work.


r~
Peter Maydell Feb. 11, 2019, 9:43 a.m. | #6
On Sun, 10 Feb 2019 at 22:47, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 2/10/19 1:40 PM, Peter Maydell wrote:

> >> Actually, we already break the TB here by default.

> >

> > Do we? I didn't see any code (apart from the handling

> > in the DAIFSet/Clear codepaths, which aren't used for TCO).

>

> At the start of the function:

>

>     /* End the TB by default, chaining is ok.  */

>     s->base.is_jmp = DISAS_TOO_MANY;


...that's only in the v2 patchset which you hadn't posted yet :-)

-- PMM

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 22163c9c3f..c8b447e30a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -482,6 +482,11 @@  typedef struct CPUARMState {
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
         uint64_t vpidr_el2; /* Virtualization Processor ID Register */
         uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+#ifdef TARGET_AARCH64
+        uint64_t tfsr_el[4]; /* tfsrel0_el1 is index 0.  */
+        uint64_t gcr_el1;
+        uint64_t rgsr_el1;
+#endif
     } cp15;
 
     struct {
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 5a101e1c6d..a24757d3d7 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -204,6 +204,17 @@  static inline TCGv_i32 get_ahp_flag(void)
     return ret;
 }
 
+/* Set bits within PSTATE.  */
+static inline void set_pstate_bits(uint32_t bits)
+{
+    TCGv_i32 p = tcg_temp_new_i32();
+
+    tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate));
+    tcg_gen_ori_i32(p, p, bits);
+    tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate));
+    tcg_temp_free_i32(p);
+}
+
 /* Clear bits within PSTATE.  */
 static inline void clear_pstate_bits(uint32_t bits)
 {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 5a59fc4315..df43deb0f8 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5132,6 +5132,48 @@  static const ARMCPRegInfo pauth_reginfo[] = {
       .fieldoffset = offsetof(CPUARMState, apib_key.hi) },
     REGINFO_SENTINEL
 };
+
+static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return env->pstate & PSTATE_TCO;
+}
+
+static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+    env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);
+}
+
+static const ARMCPRegInfo mte_reginfo[] = {
+    { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 6, .opc2 = 1,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
+    { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 6, .crm = 5, .opc2 = 0,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
+    { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 5, .opc2 = 0,
+      .access = PL2_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },
+    { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 6, .opc2 = 0,
+      .access = PL3_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },
+    { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },
+    { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },
+    { .name = "TCO", .state = ARM_CP_STATE_AA64,
+      .opc0 = 0, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+      .type = ARM_CP_NO_RAW,
+      .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
+    REGINFO_SENTINEL
+};
 #endif
 
 void register_cp_regs_for_features(ARMCPU *cpu)
@@ -5923,6 +5965,9 @@  void register_cp_regs_for_features(ARMCPU *cpu)
     if (cpu_isar_feature(aa64_pauth, cpu)) {
         define_arm_cp_regs(cpu, pauth_reginfo);
     }
+    if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
+        define_arm_cp_regs(cpu, mte_reginfo);
+    }
 #endif
 }
 
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0286507bae..5c2577a9ac 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1668,6 +1668,17 @@  static void handle_msr_i(DisasContext *s, uint32_t insn,
         s->base.is_jmp = DISAS_UPDATE;
         break;
 
+    case 0x1c: /* TCO */
+        if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+            goto do_unallocated;
+        }
+        if (crm & 1) {
+            set_pstate_bits(PSTATE_TCO);
+        } else {
+            clear_pstate_bits(PSTATE_TCO);
+        }
+        break;
+
     default:
     do_unallocated:
         unallocated_encoding(s);