diff mbox

[04/11] target-arm: Define correct mmu_idx values and pass them in TB flags

Message ID 1422037228-5363-5-git-send-email-peter.maydell@linaro.org
State Superseded
Headers show

Commit Message

Peter Maydell Jan. 23, 2015, 6:20 p.m. UTC
We currently claim that for ARM the mmu_idx should simply be the current
exception level. However this isn't actually correct -- secure EL0 and EL1
should have separate indexes from non-secure EL0 and EL1 since their
VA->PA mappings may differ. We also will want an index for stage 2
translations when we properly support EL2.

Define and document all seven mmu index values that we require, and
pass the mmu index in the TB flags rather than exception level or
priv/user bit.

This change doesn't update the get_phys_addr() code, so our page
table walking still assumes a simplistic "user or priv?" model for
the moment.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
This leaves some odd gaps in the TB flags usage. I will circle
back and clean this up later (including moving the other common
flags like the singlestep ones to the top of the flags word),
but I didn't want to bloat this patchseries further.
---
 target-arm/cpu.h           | 113 ++++++++++++++++++++++++++++++++++++---------
 target-arm/helper.c        |   3 +-
 target-arm/translate-a64.c |   5 +-
 target-arm/translate.c     |   5 +-
 target-arm/translate.h     |   3 +-
 5 files changed, 101 insertions(+), 28 deletions(-)

Comments

Greg Bellows Jan. 23, 2015, 9:44 p.m. UTC | #1
On Fri, Jan 23, 2015 at 12:20 PM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> We currently claim that for ARM the mmu_idx should simply be the current
> exception level. However this isn't actually correct -- secure EL0 and EL1
> should have separate indexes from non-secure EL0 and EL1 since their
> VA->PA mappings may differ. We also will want an index for stage 2
> translations when we properly support EL2.
>
> Define and document all seven mmu index values that we require, and
> pass the mmu index in the TB flags rather than exception level or
> priv/user bit.
>
> This change doesn't update the get_phys_addr() code, so our page
> table walking still assumes a simplistic "user or priv?" model for
> the moment.
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
> This leaves some odd gaps in the TB flags usage. I will circle
> back and clean this up later (including moving the other common
> flags like the singlestep ones to the top of the flags word),
> but I didn't want to bloat this patchseries further.
> ---
>  target-arm/cpu.h           | 113 ++++++++++++++++++++++++++++++++++++---------
>  target-arm/helper.c        |   3 +-
>  target-arm/translate-a64.c |   5 +-
>  target-arm/translate.c     |   5 +-
>  target-arm/translate.h     |   3 +-
>  5 files changed, 101 insertions(+), 28 deletions(-)
>
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 3eb00f4..cf7b9ab 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -98,7 +98,7 @@ typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
>
>  struct arm_boot_info;
>
> -#define NB_MMU_MODES 4
> +#define NB_MMU_MODES 7
>
>  /* We currently assume float and double are IEEE single and double
>     precision respectively.
> @@ -1572,13 +1572,92 @@ static inline CPUARMState *cpu_init(const char *cpu_model)
>  #define cpu_signal_handler cpu_arm_signal_handler
>  #define cpu_list arm_cpu_list
>
> -/* MMU modes definitions */
> +/* ARM has the following "translation regimes" (as the ARM ARM calls them):
> + *
> + * If EL3 is 64-bit:
> + *  + NonSecure EL1 & 0 stage 1
> + *  + NonSecure EL1 & 0 stage 2
> + *  + NonSecure EL2
> + *  + Secure EL1 & EL0
> + *  + Secure EL3
> + * If EL3 is 32-bit:
> + *  + NonSecure PL1 & 0 stage 1
> + *  + NonSecure PL1 & 0 stage 2
> + *  + NonSecure PL2
> + *  + Secure PL0 & PL1
> + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.)
> + *
> + * For QEMU, an mmu_idx is not quite the same as a translation regime because:
> + *  1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because they
> + *     may differ in access permissions even if the VA->PA map is the same
> + *  2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2
> + *     translation, which means that we have one mmu_idx that deals with two
> + *     concatenated translation regimes [this sort of combined s1+2 TLB is
> + *     architecturally permitted]
> + *  3. we don't need to allocate an mmu_idx to translations that we won't be
> + *     handling via the TLB. The only way to do a stage 1 translation without
> + *     the immediate stage 2 translation is via the ATS or AT system insns,
> + *     which can be slow-pathed and always do a page table walk.
> + *  4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
> + *     translation regimes, because they map reasonably well to each other
> + *     and they can't both be active at the same time.
> + * This gives us the following list of mmu_idx values:
> + *
> + * NS EL0 (aka NS PL0) stage 1+2
> + * NS EL1 (aka NS PL1) stage 1+2
> + * NS EL2 (aka NS PL2)
> + * S EL3 (aka S PL1)
> + * S EL0 (aka S PL0)
> + * S EL1 (not used if EL3 is 32 bit)
> + * NS EL0+1 stage 2
> + *
> + * (The last of these is an mmu_idx because we want to be able to use the TLB
> + * for the accesses done as part of a stage 1 page table walk, rather than
> + * having to walk the stage 2 page table over and over.)
> + *
> + * Our enumeration includes at the end some entries which are not "true"
> + * mmu_idx values in that they don't have corresponding TLBs and are only
> + * valid for doing slow path page table walks.
> + *
> + * The constant names here are patterned after the general style of the names
> + * of the AT/ATS operations.
> + * The values used are carefully arranged to make mmu_idx => EL lookup easy.
> + */
> +typedef enum ARMMMUIdx {
> +    ARMMMUIdx_S12NSE0 = 0,
> +    ARMMMUIdx_S12NSE1 = 1,
> +    ARMMMUIdx_S1E2 = 2,
> +    ARMMMUIdx_S1E3 = 3,
> +    ARMMMUIdx_S1SE0 = 4,
> +    ARMMMUIdx_S1SE1 = 5,
> +    ARMMMUIdx_S2NS = 6,
> +    /* Indexes below here don't have TLBs and are used only for AT system
> +     * instructions or for the first stage of an S12 page table walk.
> +     */
> +    ARMMMUIdx_S1NSE0 = 7,
> +    ARMMMUIdx_S1NSE1 = 8,
> +} ARMMMUIdx;
> +
>  #define MMU_MODE0_SUFFIX _user
>  #define MMU_MODE1_SUFFIX _kernel
>  #define MMU_USER_IDX 0
> +
> +/* Return the exception level we're running at if this is our mmu_idx */
> +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
> +{
> +    assert(mmu_idx < ARMMMUIdx_S2NS);
> +    return mmu_idx & 3;
> +}
> +
> +/* Determine the current mmu_idx to use for normal loads/stores */
>  static inline int cpu_mmu_index (CPUARMState *env)
>  {
> -    return arm_current_el(env);
> +    int el = arm_current_el(env);
> +
> +    if (el < 3 && arm_is_secure_below_el3(env)) {

We bypass the secure check for EL3 but not EL2.  We should either
circumvent both EL2 & 3 or neither.

> +        return ARMMMUIdx_S1SE0 + el;
> +    }
> +    return el;
>  }
>
>  /* Return the Exception Level targeted by debug exceptions;
> @@ -1645,9 +1724,13 @@ static inline bool arm_singlestep_active(CPUARMState *env)
>
>  /* Bit usage in the TB flags field: bit 31 indicates whether we are
>   * in 32 or 64 bit mode. The meaning of the other bits depends on that.
> + * We put flags which are shared between 32 and 64 bit mode at the top
> + * of the word, and flags which apply to only one mode at the bottom.
>   */
>  #define ARM_TBFLAG_AARCH64_STATE_SHIFT 31
>  #define ARM_TBFLAG_AARCH64_STATE_MASK  (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT)
> +#define ARM_TBFLAG_MMUIDX_SHIFT 28
> +#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT)
>
>  /* Bit usage when in AArch32 state: */
>  #define ARM_TBFLAG_THUMB_SHIFT      0
> @@ -1656,8 +1739,6 @@ static inline bool arm_singlestep_active(CPUARMState *env)
>  #define ARM_TBFLAG_VECLEN_MASK      (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
>  #define ARM_TBFLAG_VECSTRIDE_SHIFT  4
>  #define ARM_TBFLAG_VECSTRIDE_MASK   (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
> -#define ARM_TBFLAG_PRIV_SHIFT       6
> -#define ARM_TBFLAG_PRIV_MASK        (1 << ARM_TBFLAG_PRIV_SHIFT)
>  #define ARM_TBFLAG_VFPEN_SHIFT      7
>  #define ARM_TBFLAG_VFPEN_MASK       (1 << ARM_TBFLAG_VFPEN_SHIFT)
>  #define ARM_TBFLAG_CONDEXEC_SHIFT   8
> @@ -1683,8 +1764,6 @@ static inline bool arm_singlestep_active(CPUARMState *env)
>  #define ARM_TBFLAG_NS_MASK          (1 << ARM_TBFLAG_NS_SHIFT)
>
>  /* Bit usage when in AArch64 state */
> -#define ARM_TBFLAG_AA64_EL_SHIFT    0
> -#define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
>  #define ARM_TBFLAG_AA64_FPEN_SHIFT  2
>  #define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
>  #define ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT 3
> @@ -1695,14 +1774,14 @@ static inline bool arm_singlestep_active(CPUARMState *env)
>  /* some convenience accessor macros */
>  #define ARM_TBFLAG_AARCH64_STATE(F) \
>      (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
> +#define ARM_TBFLAG_MMUIDX(F) \
> +    (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT)
>  #define ARM_TBFLAG_THUMB(F) \
>      (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
>  #define ARM_TBFLAG_VECLEN(F) \
>      (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
>  #define ARM_TBFLAG_VECSTRIDE(F) \
>      (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
> -#define ARM_TBFLAG_PRIV(F) \
> -    (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT)
>  #define ARM_TBFLAG_VFPEN(F) \
>      (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
>  #define ARM_TBFLAG_CONDEXEC(F) \
> @@ -1717,8 +1796,6 @@ static inline bool arm_singlestep_active(CPUARMState *env)
>      (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT)
>  #define ARM_TBFLAG_XSCALE_CPAR(F) \
>      (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT)
> -#define ARM_TBFLAG_AA64_EL(F) \
> -    (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
>  #define ARM_TBFLAG_AA64_FPEN(F) \
>      (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
>  #define ARM_TBFLAG_AA64_SS_ACTIVE(F) \
> @@ -1742,8 +1819,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>
>      if (is_a64(env)) {
>          *pc = env->pc;
> -        *flags = ARM_TBFLAG_AARCH64_STATE_MASK
> -            | (arm_current_el(env) << ARM_TBFLAG_AA64_EL_SHIFT);
> +        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
>          if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) {
>              *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
>          }
> @@ -1761,21 +1837,12 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>              }
>          }
>      } else {
> -        int privmode;
>          *pc = env->regs[15];
>          *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
>              | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
>              | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
>              | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
>              | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
> -        if (arm_feature(env, ARM_FEATURE_M)) {
> -            privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
> -        } else {
> -            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
> -        }
> -        if (privmode) {
> -            *flags |= ARM_TBFLAG_PRIV_MASK;
> -        }
>          if (!(access_secure_reg(env))) {
>              *flags |= ARM_TBFLAG_NS_MASK;
>          }
> @@ -1803,6 +1870,8 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>                     << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
>      }
>
> +    *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT);
> +
>      *cs_base = 0;
>  }
>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 1a5e067..06478d8 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -5119,7 +5119,8 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address,
>      uint32_t syn;
>      bool same_el = (arm_current_el(env) != 0);
>
> -    is_user = mmu_idx == MMU_USER_IDX;
> +    /* TODO: pass the translation regime to get_phys_addr */
> +    is_user = (arm_mmu_idx_to_el(mmu_idx) == 0);
>      ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
>                          &page_size);
>      if (ret == 0) {
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index dac2f63..96f14ff 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -10922,14 +10922,15 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
>      dc->bswap_code = 0;
>      dc->condexec_mask = 0;
>      dc->condexec_cond = 0;
> +    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
> +    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
>  #if !defined(CONFIG_USER_ONLY)
> -    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
> +    dc->user = (dc->current_el == 0);
>  #endif
>      dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
>      dc->vec_len = 0;
>      dc->vec_stride = 0;
>      dc->cp_regs = cpu->cp_regs;
> -    dc->current_el = arm_current_el(env);
>      dc->features = env->features;
>
>      /* Single step state. The code-generation logic here is:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index bdfcdf1..7163649 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -11032,8 +11032,10 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
>      dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
>      dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
>      dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
> +    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
> +    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
>  #if !defined(CONFIG_USER_ONLY)
> -    dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
> +    dc->user = (dc->current_el == 0);
>  #endif
>      dc->ns = ARM_TBFLAG_NS(tb->flags);
>      dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
> @@ -11042,7 +11044,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
>      dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
>      dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
>      dc->cp_regs = cpu->cp_regs;
> -    dc->current_el = arm_current_el(env);
>      dc->features = env->features;
>
>      /* Single step state. The code-generation logic here is:
> diff --git a/target-arm/translate.h b/target-arm/translate.h
> index f6ee789..a1eb5b5 100644
> --- a/target-arm/translate.h
> +++ b/target-arm/translate.h
> @@ -20,6 +20,7 @@ typedef struct DisasContext {
>  #if !defined(CONFIG_USER_ONLY)
>      int user;
>  #endif
> +    ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
>      bool ns;        /* Use non-secure CPREG bank on access */
>      bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
>      bool vfp_enabled; /* FP enabled via FPSCR.EN */
> @@ -69,7 +70,7 @@ static inline int arm_dc_feature(DisasContext *dc, int feature)
>
>  static inline int get_mem_index(DisasContext *s)
>  {
> -    return s->current_el;
> +    return s->mmu_idx;
>  }
>
>  /* target-specific extra values for is_jmp */
> --
> 1.9.1
>

Otherwise,

Reviewed-by: Greg Bellows <greg.bellows@linaro.org>
Peter Maydell Jan. 24, 2015, 1:12 a.m. UTC | #2
On 23 January 2015 at 21:44, Greg Bellows <greg.bellows@linaro.org> wrote:
> On Fri, Jan 23, 2015 at 12:20 PM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>> +typedef enum ARMMMUIdx {
>> +    ARMMMUIdx_S12NSE0 = 0,
>> +    ARMMMUIdx_S12NSE1 = 1,
>> +    ARMMMUIdx_S1E2 = 2,
>> +    ARMMMUIdx_S1E3 = 3,
>> +    ARMMMUIdx_S1SE0 = 4,
>> +    ARMMMUIdx_S1SE1 = 5,
>> +    ARMMMUIdx_S2NS = 6,
>> +    /* Indexes below here don't have TLBs and are used only for AT system
>> +     * instructions or for the first stage of an S12 page table walk.
>> +     */
>> +    ARMMMUIdx_S1NSE0 = 7,
>> +    ARMMMUIdx_S1NSE1 = 8,
>> +} ARMMMUIdx;
>> +
>>  #define MMU_MODE0_SUFFIX _user
>>  #define MMU_MODE1_SUFFIX _kernel
>>  #define MMU_USER_IDX 0
>> +
>> +/* Return the exception level we're running at if this is our mmu_idx */
>> +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
>> +{
>> +    assert(mmu_idx < ARMMMUIdx_S2NS);
>> +    return mmu_idx & 3;
>> +}
>> +
>> +/* Determine the current mmu_idx to use for normal loads/stores */
>>  static inline int cpu_mmu_index (CPUARMState *env)
>>  {
>> -    return arm_current_el(env);
>> +    int el = arm_current_el(env);
>> +
>> +    if (el < 3 && arm_is_secure_below_el3(env)) {
>
> We bypass the secure check for EL3 but not EL2.  We should either
> circumvent both EL2 & 3 or neither.

Not sure what you mean here. The point of this code is to return
a suitable MMU idx for the current CPU state. If we are in
EL2 or EL3 then just "el" is correct (being ARMMMUIdx_S1E2 and
ARMMMUIdx_S1E3). We can't be in this condition with el == 2
because if we're in EL2 then the NS bit must be set (there's
no such thing as Secure EL2) and so arm_is_secure_below_el3()
will have returned false. So we know here that el is 0 or 1,
and this addition below:

>> +        return ARMMMUIdx_S1SE0 + el;

means we end up with either _S1SE0 or _S1SE1, as required.

(the condition could equally well be written
   if (el < 2 && arm_is_secure_below_el3(env))
 as the two are true in exactly the same set of cases. < 3 seemed
 marginally better to me, since it's expressing "if we're secure
and not in EL3" which is the set of cases where we need to
change the mmu_idx from the 0/1/2/3 values.)

>> +    }
>> +    return el;

Anything else (_S12NSE0, _S12NSE1, _S1E2, _S1E3) is this return.

>>  }

thanks
-- PMM
Greg Bellows Jan. 24, 2015, 4:36 p.m. UTC | #3
On Jan 23, 2015 7:12 PM, "Peter Maydell" <peter.maydell@linaro.org> wrote:
>
> On 23 January 2015 at 21:44, Greg Bellows <greg.bellows@linaro.org> wrote:
> > On Fri, Jan 23, 2015 at 12:20 PM, Peter Maydell
> > <peter.maydell@linaro.org> wrote:
> >> +typedef enum ARMMMUIdx {
> >> +    ARMMMUIdx_S12NSE0 = 0,
> >> +    ARMMMUIdx_S12NSE1 = 1,
> >> +    ARMMMUIdx_S1E2 = 2,
> >> +    ARMMMUIdx_S1E3 = 3,
> >> +    ARMMMUIdx_S1SE0 = 4,
> >> +    ARMMMUIdx_S1SE1 = 5,
> >> +    ARMMMUIdx_S2NS = 6,
> >> +    /* Indexes below here don't have TLBs and are used only for AT
system
> >> +     * instructions or for the first stage of an S12 page table walk.
> >> +     */
> >> +    ARMMMUIdx_S1NSE0 = 7,
> >> +    ARMMMUIdx_S1NSE1 = 8,
> >> +} ARMMMUIdx;
> >> +
> >>  #define MMU_MODE0_SUFFIX _user
> >>  #define MMU_MODE1_SUFFIX _kernel
> >>  #define MMU_USER_IDX 0
> >> +
> >> +/* Return the exception level we're running at if this is our mmu_idx
*/
> >> +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
> >> +{
> >> +    assert(mmu_idx < ARMMMUIdx_S2NS);
> >> +    return mmu_idx & 3;
> >> +}
> >> +
> >> +/* Determine the current mmu_idx to use for normal loads/stores */
> >>  static inline int cpu_mmu_index (CPUARMState *env)
> >>  {
> >> -    return arm_current_el(env);
> >> +    int el = arm_current_el(env);
> >> +
> >> +    if (el < 3 && arm_is_secure_below_el3(env)) {
> >
> > We bypass the secure check for EL3 but not EL2.  We should either
> > circumvent both EL2 & 3 or neither.
>
> Not sure what you mean here. The point of this code is to return
> a suitable MMU idx for the current CPU state. If we are in
> EL2 or EL3 then just "el" is correct (being ARMMMUIdx_S1E2 and
> ARMMMUIdx_S1E3). We can't be in this condition with el == 2

I understand what the code is doing, my point is that you rely on
arm_is_secure_below_el3 to deal with EL2 when you could have just as easily
checked for el<2.  Not a big deal though.

> because if we're in EL2 then the NS bit must be set (there's
> no such thing as Secure EL2) and so arm_is_secure_below_el3()
> will have returned false. So we know here that el is 0 or 1,
> and this addition below:
>
> >> +        return ARMMMUIdx_S1SE0 + el;
>
> means we end up with either _S1SE0 or _S1SE1, as required.
>
> (the condition could equally well be written
>    if (el < 2 && arm_is_secure_below_el3(env))
>  as the two are true in exactly the same set of cases. < 3 seemed
>  marginally better to me, since it's expressing "if we're secure
> and not in EL3" which is the set of cases where we need to
> change the mmu_idx from the 0/1/2/3 values.)
>
> >> +    }
> >> +    return el;
>
> Anything else (_S12NSE0, _S12NSE1, _S1E2, _S1E3) is this return.
>
> >>  }
>
> thanks
> -- PMM
Peter Maydell Jan. 24, 2015, 7:31 p.m. UTC | #4
On 24 January 2015 at 16:36, Greg Bellows <greg.bellows@linaro.org> wrote:
> I understand what the code is doing, my point is that you rely on
> arm_is_secure_below_el3 to deal with EL2 when you could have just as easily
> checked for el<2.  Not a big deal though.

Well, I rely on the architecture to tell me that it isn't
possible for arm_is_secure_below_el3 to be true if we're
in EL2. Otherwise there'd be a bug in that function...

-- PMM
Peter Maydell Jan. 26, 2015, 11:29 a.m. UTC | #5
On 24 January 2015 at 19:31, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 24 January 2015 at 16:36, Greg Bellows <greg.bellows@linaro.org> wrote:
>> I understand what the code is doing, my point is that you rely on
>> arm_is_secure_below_el3 to deal with EL2 when you could have just as easily
>> checked for el<2.  Not a big deal though.
>
> Well, I rely on the architecture to tell me that it isn't
> possible for arm_is_secure_below_el3 to be true if we're
> in EL2. Otherwise there'd be a bug in that function...

On reflection, I see your point, so have changed to 'el < 2 && ...'
for v2.

thanks
-- PMM
Peter Maydell Jan. 27, 2015, 7:30 p.m. UTC | #6
On 23 January 2015 at 18:20, Peter Maydell <peter.maydell@linaro.org> wrote:
> +/* Determine the current mmu_idx to use for normal loads/stores */
>  static inline int cpu_mmu_index (CPUARMState *env)
>  {
> -    return arm_current_el(env);
> +    int el = arm_current_el(env);
> +
> +    if (el < 3 && arm_is_secure_below_el3(env)) {
> +        return ARMMMUIdx_S1SE0 + el;
> +    }
> +    return el;
>  }

Just noticed the stray extra space between function name and '('
here; will delete the space in v2, since we're rewriting the whole
function body anyhow...

-- PMM
Greg Bellows Jan. 28, 2015, 9:57 p.m. UTC | #7
On Fri, Jan 23, 2015 at 12:20 PM, Peter Maydell <peter.maydell@linaro.org>
wrote:

> We currently claim that for ARM the mmu_idx should simply be the current
> exception level. However this isn't actually correct -- secure EL0 and EL1
> should have separate indexes from non-secure EL0 and EL1 since their
> VA->PA mappings may differ. We also will want an index for stage 2
> translations when we properly support EL2.
>
> Define and document all seven mmu index values that we require, and
> pass the mmu index in the TB flags rather than exception level or
> priv/user bit.
>
> This change doesn't update the get_phys_addr() code, so our page
> table walking still assumes a simplistic "user or priv?" model for
> the moment.
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
> This leaves some odd gaps in the TB flags usage. I will circle
> back and clean this up later (including moving the other common
> flags like the singlestep ones to the top of the flags word),
> but I didn't want to bloat this patchseries further.
> ---
>  target-arm/cpu.h           | 113
> ++++++++++++++++++++++++++++++++++++---------
>  target-arm/helper.c        |   3 +-
>  target-arm/translate-a64.c |   5 +-
>  target-arm/translate.c     |   5 +-
>  target-arm/translate.h     |   3 +-
>  5 files changed, 101 insertions(+), 28 deletions(-)
>
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 3eb00f4..cf7b9ab 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -98,7 +98,7 @@ typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
>
>  struct arm_boot_info;
>
> -#define NB_MMU_MODES 4
> +#define NB_MMU_MODES 7
>
>  /* We currently assume float and double are IEEE single and double
>     precision respectively.
> @@ -1572,13 +1572,92 @@ static inline CPUARMState *cpu_init(const char
> *cpu_model)
>  #define cpu_signal_handler cpu_arm_signal_handler
>  #define cpu_list arm_cpu_list
>
> -/* MMU modes definitions */
> +/* ARM has the following "translation regimes" (as the ARM ARM calls
> them):
> + *
> + * If EL3 is 64-bit:
> + *  + NonSecure EL1 & 0 stage 1
> + *  + NonSecure EL1 & 0 stage 2
> + *  + NonSecure EL2
> + *  + Secure EL1 & EL0
> + *  + Secure EL3
> + * If EL3 is 32-bit:
> + *  + NonSecure PL1 & 0 stage 1
> + *  + NonSecure PL1 & 0 stage 2
> + *  + NonSecure PL2
> + *  + Secure PL0 & PL1
> + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.)
> + *
> + * For QEMU, an mmu_idx is not quite the same as a translation regime
> because:
> + *  1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because
> they
> + *     may differ in access permissions even if the VA->PA map is the same
> + *  2. we want to cache in our TLB the full VA->IPA->PA lookup for a
> stage 1+2
> + *     translation, which means that we have one mmu_idx that deals with
> two
> + *     concatenated translation regimes [this sort of combined s1+2 TLB is
> + *     architecturally permitted]
> + *  3. we don't need to allocate an mmu_idx to translations that we won't
> be
> + *     handling via the TLB. The only way to do a stage 1 translation
> without
> + *     the immediate stage 2 translation is via the ATS or AT system
> insns,
> + *     which can be slow-pathed and always do a page table walk.
> + *  4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
> + *     translation regimes, because they map reasonably well to each other
> + *     and they can't both be active at the same time.
> + * This gives us the following list of mmu_idx values:
> + *
> + * NS EL0 (aka NS PL0) stage 1+2
> + * NS EL1 (aka NS PL1) stage 1+2
> + * NS EL2 (aka NS PL2)
> + * S EL3 (aka S PL1)
> + * S EL0 (aka S PL0)
> + * S EL1 (not used if EL3 is 32 bit)
> + * NS EL0+1 stage 2
> + *
> + * (The last of these is an mmu_idx because we want to be able to use the
> TLB
> + * for the accesses done as part of a stage 1 page table walk, rather than
> + * having to walk the stage 2 page table over and over.)
> + *
> + * Our enumeration includes at the end some entries which are not "true"
> + * mmu_idx values in that they don't have corresponding TLBs and are only
> + * valid for doing slow path page table walks.
> + *
> + * The constant names here are patterned after the general style of the
> names
> + * of the AT/ATS operations.
> + * The values used are carefully arranged to make mmu_idx => EL lookup
> easy.
> + */
> +typedef enum ARMMMUIdx {
> +    ARMMMUIdx_S12NSE0 = 0,
> +    ARMMMUIdx_S12NSE1 = 1,
> +    ARMMMUIdx_S1E2 = 2,
> +    ARMMMUIdx_S1E3 = 3,
> +    ARMMMUIdx_S1SE0 = 4,
> +    ARMMMUIdx_S1SE1 = 5,
> +    ARMMMUIdx_S2NS = 6,
> +    /* Indexes below here don't have TLBs and are used only for AT system
> +     * instructions or for the first stage of an S12 page table walk.
> +     */
> +    ARMMMUIdx_S1NSE0 = 7,
> +    ARMMMUIdx_S1NSE1 = 8,
> +} ARMMMUIdx;
> +
>  #define MMU_MODE0_SUFFIX _user
>  #define MMU_MODE1_SUFFIX _kernel
>  #define MMU_USER_IDX 0
> +
> +/* Return the exception level we're running at if this is our mmu_idx */
> +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
> +{
> +    assert(mmu_idx < ARMMMUIdx_S2NS);
> +    return mmu_idx & 3;
> +}
> +
> +/* Determine the current mmu_idx to use for normal loads/stores */
>  static inline int cpu_mmu_index (CPUARMState *env)
>  {
> -    return arm_current_el(env);
> +    int el = arm_current_el(env);
> +
> +    if (el < 3 && arm_is_secure_below_el3(env)) {
> +        return ARMMMUIdx_S1SE0 + el;
> +    }
> +    return el;
>  }
>
>  /* Return the Exception Level targeted by debug exceptions;
> @@ -1645,9 +1724,13 @@ static inline bool
> arm_singlestep_active(CPUARMState *env)
>
>  /* Bit usage in the TB flags field: bit 31 indicates whether we are
>   * in 32 or 64 bit mode. The meaning of the other bits depends on that.
> + * We put flags which are shared between 32 and 64 bit mode at the top
> + * of the word, and flags which apply to only one mode at the bottom.
>   */
>  #define ARM_TBFLAG_AARCH64_STATE_SHIFT 31
>  #define ARM_TBFLAG_AARCH64_STATE_MASK  (1U <<
> ARM_TBFLAG_AARCH64_STATE_SHIFT)
> +#define ARM_TBFLAG_MMUIDX_SHIFT 28
> +#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT)
>
>  /* Bit usage when in AArch32 state: */
>  #define ARM_TBFLAG_THUMB_SHIFT      0
> @@ -1656,8 +1739,6 @@ static inline bool arm_singlestep_active(CPUARMState
> *env)
>  #define ARM_TBFLAG_VECLEN_MASK      (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
>  #define ARM_TBFLAG_VECSTRIDE_SHIFT  4
>  #define ARM_TBFLAG_VECSTRIDE_MASK   (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
> -#define ARM_TBFLAG_PRIV_SHIFT       6
> -#define ARM_TBFLAG_PRIV_MASK        (1 << ARM_TBFLAG_PRIV_SHIFT)
>  #define ARM_TBFLAG_VFPEN_SHIFT      7
>  #define ARM_TBFLAG_VFPEN_MASK       (1 << ARM_TBFLAG_VFPEN_SHIFT)
>  #define ARM_TBFLAG_CONDEXEC_SHIFT   8
> @@ -1683,8 +1764,6 @@ static inline bool arm_singlestep_active(CPUARMState
> *env)
>  #define ARM_TBFLAG_NS_MASK          (1 << ARM_TBFLAG_NS_SHIFT)
>
>  /* Bit usage when in AArch64 state */
> -#define ARM_TBFLAG_AA64_EL_SHIFT    0
> -#define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
>  #define ARM_TBFLAG_AA64_FPEN_SHIFT  2
>  #define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
>  #define ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT 3
> @@ -1695,14 +1774,14 @@ static inline bool
> arm_singlestep_active(CPUARMState *env)
>  /* some convenience accessor macros */
>  #define ARM_TBFLAG_AARCH64_STATE(F) \
>      (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >>
> ARM_TBFLAG_AARCH64_STATE_SHIFT)
> +#define ARM_TBFLAG_MMUIDX(F) \
> +    (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT)
>  #define ARM_TBFLAG_THUMB(F) \
>      (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
>  #define ARM_TBFLAG_VECLEN(F) \
>      (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
>  #define ARM_TBFLAG_VECSTRIDE(F) \
>      (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
> -#define ARM_TBFLAG_PRIV(F) \
> -    (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT)
>  #define ARM_TBFLAG_VFPEN(F) \
>      (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
>  #define ARM_TBFLAG_CONDEXEC(F) \
> @@ -1717,8 +1796,6 @@ static inline bool arm_singlestep_active(CPUARMState
> *env)
>      (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT)
>  #define ARM_TBFLAG_XSCALE_CPAR(F) \
>      (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT)
> -#define ARM_TBFLAG_AA64_EL(F) \
> -    (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
>  #define ARM_TBFLAG_AA64_FPEN(F) \
>      (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
>  #define ARM_TBFLAG_AA64_SS_ACTIVE(F) \
> @@ -1742,8 +1819,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState
> *env, target_ulong *pc,
>
>      if (is_a64(env)) {
>          *pc = env->pc;
> -        *flags = ARM_TBFLAG_AARCH64_STATE_MASK
> -            | (arm_current_el(env) << ARM_TBFLAG_AA64_EL_SHIFT);
> +        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
>          if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) {
>              *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
>          }
> @@ -1761,21 +1837,12 @@ static inline void
> cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
>              }
>          }
>      } else {
> -        int privmode;
>          *pc = env->regs[15];
>          *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
>              | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
>              | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
>              | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
>              | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
> -        if (arm_feature(env, ARM_FEATURE_M)) {
> -            privmode = !((env->v7m.exception == 0) && (env->v7m.control &
> 1));
> -        } else {
> -            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
> -        }
> -        if (privmode) {
> -            *flags |= ARM_TBFLAG_PRIV_MASK;
> -        }
>          if (!(access_secure_reg(env))) {
>              *flags |= ARM_TBFLAG_NS_MASK;
>          }
> @@ -1803,6 +1870,8 @@ static inline void cpu_get_tb_cpu_state(CPUARMState
> *env, target_ulong *pc,
>                     << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
>      }
>
> +    *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT);
> +
>

​After getting through patch 9, I wonder if the TB NS bit can also be
removed as it is implied in the MMU index.​


>      *cs_base = 0;
>  }
>
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 1a5e067..06478d8 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -5119,7 +5119,8 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr
> address,
>      uint32_t syn;
>      bool same_el = (arm_current_el(env) != 0);
>
> -    is_user = mmu_idx == MMU_USER_IDX;
> +    /* TODO: pass the translation regime to get_phys_addr */
> +    is_user = (arm_mmu_idx_to_el(mmu_idx) == 0);
>      ret = get_phys_addr(env, address, access_type, is_user, &phys_addr,
> &prot,
>                          &page_size);
>      if (ret == 0) {
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index dac2f63..96f14ff 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -10922,14 +10922,15 @@ void gen_intermediate_code_internal_a64(ARMCPU
> *cpu,
>      dc->bswap_code = 0;
>      dc->condexec_mask = 0;
>      dc->condexec_cond = 0;
> +    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
> +    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
>  #if !defined(CONFIG_USER_ONLY)
> -    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
> +    dc->user = (dc->current_el == 0);
>  #endif
>      dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
>      dc->vec_len = 0;
>      dc->vec_stride = 0;
>      dc->cp_regs = cpu->cp_regs;
> -    dc->current_el = arm_current_el(env);
>      dc->features = env->features;
>
>      /* Single step state. The code-generation logic here is:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index bdfcdf1..7163649 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -11032,8 +11032,10 @@ static inline void
> gen_intermediate_code_internal(ARMCPU *cpu,
>      dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
>      dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
>      dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
> +    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
> +    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
>  #if !defined(CONFIG_USER_ONLY)
> -    dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
> +    dc->user = (dc->current_el == 0);
>  #endif
>      dc->ns = ARM_TBFLAG_NS(tb->flags);
>      dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
> @@ -11042,7 +11044,6 @@ static inline void
> gen_intermediate_code_internal(ARMCPU *cpu,
>      dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
>      dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
>      dc->cp_regs = cpu->cp_regs;
> -    dc->current_el = arm_current_el(env);
>      dc->features = env->features;
>
>      /* Single step state. The code-generation logic here is:
> diff --git a/target-arm/translate.h b/target-arm/translate.h
> index f6ee789..a1eb5b5 100644
> --- a/target-arm/translate.h
> +++ b/target-arm/translate.h
> @@ -20,6 +20,7 @@ typedef struct DisasContext {
>  #if !defined(CONFIG_USER_ONLY)
>      int user;
>  #endif
> +    ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
>      bool ns;        /* Use non-secure CPREG bank on access */
>      bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
>      bool vfp_enabled; /* FP enabled via FPSCR.EN */
> @@ -69,7 +70,7 @@ static inline int arm_dc_feature(DisasContext *dc, int
> feature)
>
>  static inline int get_mem_index(DisasContext *s)
>  {
> -    return s->current_el;
> +    return s->mmu_idx;
>  }
>
>  /* target-specific extra values for is_jmp */
> --
> 1.9.1
>
>
Peter Maydell Jan. 28, 2015, 10:34 p.m. UTC | #8
On 28 January 2015 at 21:57, Greg Bellows <greg.bellows@linaro.org> wrote:
> After getting through patch 9, I wonder if the TB NS bit can also be removed
> as it is implied in the MMU index.

No, because for a 32-bit EL3 we are always running under a Secure
translation regime (S1E3) but the TBFLAG_NS bit may be either 0 or
1 depending on the value of the SCR.NS bit.

-- PMM
Greg Bellows Jan. 29, 2015, 3:20 p.m. UTC | #9
On Wed, Jan 28, 2015 at 4:34 PM, Peter Maydell <peter.maydell@linaro.org>
wrote:

> On 28 January 2015 at 21:57, Greg Bellows <greg.bellows@linaro.org> wrote:
> > After getting through patch 9, I wonder if the TB NS bit can also be
> removed
> > as it is implied in the MMU index.
>
> No, because for a 32-bit EL3 we are always running under a Secure
> translation regime (S1E3) but the TBFLAG_NS bit may be either 0 or
> 1 depending on the value of the SCR.NS bit.
>
>
​That is correct.​



> -- PMM
>
diff mbox

Patch

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3eb00f4..cf7b9ab 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -98,7 +98,7 @@  typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
 
 struct arm_boot_info;
 
-#define NB_MMU_MODES 4
+#define NB_MMU_MODES 7
 
 /* We currently assume float and double are IEEE single and double
    precision respectively.
@@ -1572,13 +1572,92 @@  static inline CPUARMState *cpu_init(const char *cpu_model)
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
-/* MMU modes definitions */
+/* ARM has the following "translation regimes" (as the ARM ARM calls them):
+ *
+ * If EL3 is 64-bit:
+ *  + NonSecure EL1 & 0 stage 1
+ *  + NonSecure EL1 & 0 stage 2
+ *  + NonSecure EL2
+ *  + Secure EL1 & EL0
+ *  + Secure EL3
+ * If EL3 is 32-bit:
+ *  + NonSecure PL1 & 0 stage 1
+ *  + NonSecure PL1 & 0 stage 2
+ *  + NonSecure PL2
+ *  + Secure PL0 & PL1
+ * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.)
+ *
+ * For QEMU, an mmu_idx is not quite the same as a translation regime because:
+ *  1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because they
+ *     may differ in access permissions even if the VA->PA map is the same
+ *  2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2
+ *     translation, which means that we have one mmu_idx that deals with two
+ *     concatenated translation regimes [this sort of combined s1+2 TLB is
+ *     architecturally permitted]
+ *  3. we don't need to allocate an mmu_idx to translations that we won't be
+ *     handling via the TLB. The only way to do a stage 1 translation without
+ *     the immediate stage 2 translation is via the ATS or AT system insns,
+ *     which can be slow-pathed and always do a page table walk.
+ *  4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
+ *     translation regimes, because they map reasonably well to each other
+ *     and they can't both be active at the same time.
+ * This gives us the following list of mmu_idx values:
+ *
+ * NS EL0 (aka NS PL0) stage 1+2
+ * NS EL1 (aka NS PL1) stage 1+2
+ * NS EL2 (aka NS PL2)
+ * S EL3 (aka S PL1)
+ * S EL0 (aka S PL0)
+ * S EL1 (not used if EL3 is 32 bit)
+ * NS EL0+1 stage 2
+ *
+ * (The last of these is an mmu_idx because we want to be able to use the TLB
+ * for the accesses done as part of a stage 1 page table walk, rather than
+ * having to walk the stage 2 page table over and over.)
+ *
+ * Our enumeration includes at the end some entries which are not "true"
+ * mmu_idx values in that they don't have corresponding TLBs and are only
+ * valid for doing slow path page table walks.
+ *
+ * The constant names here are patterned after the general style of the names
+ * of the AT/ATS operations.
+ * The values used are carefully arranged to make mmu_idx => EL lookup easy.
+ */
+typedef enum ARMMMUIdx {
+    ARMMMUIdx_S12NSE0 = 0,
+    ARMMMUIdx_S12NSE1 = 1,
+    ARMMMUIdx_S1E2 = 2,
+    ARMMMUIdx_S1E3 = 3,
+    ARMMMUIdx_S1SE0 = 4,
+    ARMMMUIdx_S1SE1 = 5,
+    ARMMMUIdx_S2NS = 6,
+    /* Indexes below here don't have TLBs and are used only for AT system
+     * instructions or for the first stage of an S12 page table walk.
+     */
+    ARMMMUIdx_S1NSE0 = 7,
+    ARMMMUIdx_S1NSE1 = 8,
+} ARMMMUIdx;
+
 #define MMU_MODE0_SUFFIX _user
 #define MMU_MODE1_SUFFIX _kernel
 #define MMU_USER_IDX 0
+
+/* Return the exception level we're running at if this is our mmu_idx */
+static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
+{
+    assert(mmu_idx < ARMMMUIdx_S2NS);
+    return mmu_idx & 3;
+}
+
+/* Determine the current mmu_idx to use for normal loads/stores */
 static inline int cpu_mmu_index (CPUARMState *env)
 {
-    return arm_current_el(env);
+    int el = arm_current_el(env);
+
+    if (el < 3 && arm_is_secure_below_el3(env)) {
+        return ARMMMUIdx_S1SE0 + el;
+    }
+    return el;
 }
 
 /* Return the Exception Level targeted by debug exceptions;
@@ -1645,9 +1724,13 @@  static inline bool arm_singlestep_active(CPUARMState *env)
 
 /* Bit usage in the TB flags field: bit 31 indicates whether we are
  * in 32 or 64 bit mode. The meaning of the other bits depends on that.
+ * We put flags which are shared between 32 and 64 bit mode at the top
+ * of the word, and flags which apply to only one mode at the bottom.
  */
 #define ARM_TBFLAG_AARCH64_STATE_SHIFT 31
 #define ARM_TBFLAG_AARCH64_STATE_MASK  (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT)
+#define ARM_TBFLAG_MMUIDX_SHIFT 28
+#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT)
 
 /* Bit usage when in AArch32 state: */
 #define ARM_TBFLAG_THUMB_SHIFT      0
@@ -1656,8 +1739,6 @@  static inline bool arm_singlestep_active(CPUARMState *env)
 #define ARM_TBFLAG_VECLEN_MASK      (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
 #define ARM_TBFLAG_VECSTRIDE_SHIFT  4
 #define ARM_TBFLAG_VECSTRIDE_MASK   (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
-#define ARM_TBFLAG_PRIV_SHIFT       6
-#define ARM_TBFLAG_PRIV_MASK        (1 << ARM_TBFLAG_PRIV_SHIFT)
 #define ARM_TBFLAG_VFPEN_SHIFT      7
 #define ARM_TBFLAG_VFPEN_MASK       (1 << ARM_TBFLAG_VFPEN_SHIFT)
 #define ARM_TBFLAG_CONDEXEC_SHIFT   8
@@ -1683,8 +1764,6 @@  static inline bool arm_singlestep_active(CPUARMState *env)
 #define ARM_TBFLAG_NS_MASK          (1 << ARM_TBFLAG_NS_SHIFT)
 
 /* Bit usage when in AArch64 state */
-#define ARM_TBFLAG_AA64_EL_SHIFT    0
-#define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
 #define ARM_TBFLAG_AA64_FPEN_SHIFT  2
 #define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
 #define ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT 3
@@ -1695,14 +1774,14 @@  static inline bool arm_singlestep_active(CPUARMState *env)
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
     (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
+#define ARM_TBFLAG_MMUIDX(F) \
+    (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT)
 #define ARM_TBFLAG_THUMB(F) \
     (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
 #define ARM_TBFLAG_VECLEN(F) \
     (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
 #define ARM_TBFLAG_VECSTRIDE(F) \
     (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
-#define ARM_TBFLAG_PRIV(F) \
-    (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT)
 #define ARM_TBFLAG_VFPEN(F) \
     (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
 #define ARM_TBFLAG_CONDEXEC(F) \
@@ -1717,8 +1796,6 @@  static inline bool arm_singlestep_active(CPUARMState *env)
     (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT)
 #define ARM_TBFLAG_XSCALE_CPAR(F) \
     (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT)
-#define ARM_TBFLAG_AA64_EL(F) \
-    (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
 #define ARM_TBFLAG_AA64_FPEN(F) \
     (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
 #define ARM_TBFLAG_AA64_SS_ACTIVE(F) \
@@ -1742,8 +1819,7 @@  static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
 
     if (is_a64(env)) {
         *pc = env->pc;
-        *flags = ARM_TBFLAG_AARCH64_STATE_MASK
-            | (arm_current_el(env) << ARM_TBFLAG_AA64_EL_SHIFT);
+        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
         if (fpen == 3 || (fpen == 1 && arm_current_el(env) != 0)) {
             *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
         }
@@ -1761,21 +1837,12 @@  static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             }
         }
     } else {
-        int privmode;
         *pc = env->regs[15];
         *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
             | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
             | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
             | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
             | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
-        if (arm_feature(env, ARM_FEATURE_M)) {
-            privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
-        } else {
-            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
-        }
-        if (privmode) {
-            *flags |= ARM_TBFLAG_PRIV_MASK;
-        }
         if (!(access_secure_reg(env))) {
             *flags |= ARM_TBFLAG_NS_MASK;
         }
@@ -1803,6 +1870,8 @@  static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                    << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
     }
 
+    *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT);
+
     *cs_base = 0;
 }
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 1a5e067..06478d8 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5119,7 +5119,8 @@  int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address,
     uint32_t syn;
     bool same_el = (arm_current_el(env) != 0);
 
-    is_user = mmu_idx == MMU_USER_IDX;
+    /* TODO: pass the translation regime to get_phys_addr */
+    is_user = (arm_mmu_idx_to_el(mmu_idx) == 0);
     ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
                         &page_size);
     if (ret == 0) {
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index dac2f63..96f14ff 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -10922,14 +10922,15 @@  void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     dc->bswap_code = 0;
     dc->condexec_mask = 0;
     dc->condexec_cond = 0;
+    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
+    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
-    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
+    dc->user = (dc->current_el == 0);
 #endif
     dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = cpu->cp_regs;
-    dc->current_el = arm_current_el(env);
     dc->features = env->features;
 
     /* Single step state. The code-generation logic here is:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index bdfcdf1..7163649 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -11032,8 +11032,10 @@  static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
     dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
+    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
+    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
-    dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
+    dc->user = (dc->current_el == 0);
 #endif
     dc->ns = ARM_TBFLAG_NS(tb->flags);
     dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
@@ -11042,7 +11044,6 @@  static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
     dc->cp_regs = cpu->cp_regs;
-    dc->current_el = arm_current_el(env);
     dc->features = env->features;
 
     /* Single step state. The code-generation logic here is:
diff --git a/target-arm/translate.h b/target-arm/translate.h
index f6ee789..a1eb5b5 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -20,6 +20,7 @@  typedef struct DisasContext {
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
+    ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
     bool ns;        /* Use non-secure CPREG bank on access */
     bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
     bool vfp_enabled; /* FP enabled via FPSCR.EN */
@@ -69,7 +70,7 @@  static inline int arm_dc_feature(DisasContext *dc, int feature)
 
 static inline int get_mem_index(DisasContext *s)
 {
-    return s->current_el;
+    return s->mmu_idx;
 }
 
 /* target-specific extra values for is_jmp */