diff mbox series

[v4,24/33] target/nios2: Introduce shadow register sets

Message ID 20220308072005.307955-25-richard.henderson@linaro.org
State New
Headers show
Series target/nios2: Shadow register set, EIC and VIC | expand

Commit Message

Richard Henderson March 8, 2022, 7:19 a.m. UTC
Do not actually enable them so far, but add all of the
plumbing to address them.  Do not enable them for user-only.

Add an env->crs pointer that handles the indirection to
the current register set.  Add a nios2_crs() function to
wrap this for normal uses, which hides the difference
between user-only and system modes.

From the notes on wrprs, which states that r0 must be initialized
before use in shadow register sets, infer that R_ZERO is *not*
hardwired to zero in shadow register sets.  Adjust load_gpr and
dest_gpr to reflect this.  At the same time we might as well
special case crs == 0 to avoid the indirection through env->crs
during translation as well.  Given that this is intended to be
the most common case for non-interrupt handlers.

Drop the zeroing of env->regs at reset, as those are undefined.
Do init env->crs at reset.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/nios2/cpu.h        | 32 ++++++++++++++++++++
 hw/nios2/boot.c           |  8 ++---
 target/nios2/cpu.c        |  7 +++--
 target/nios2/helper.c     | 12 ++++----
 target/nios2/nios2-semi.c | 13 ++++----
 target/nios2/translate.c  | 62 ++++++++++++++++++++++++++-------------
 6 files changed, 95 insertions(+), 39 deletions(-)

Comments

Amir Gonnen March 9, 2022, 2:02 p.m. UTC | #1
Hi Richard,

How does "cpu_crs_R" work?
In your version you hold a pointer to the current register set instead of copying registers back and forth like I did, which makes sense.

But how does TCG know which set to refer to when it generates code?
It looks like it's always accessing the same cpu_crs_R, so how does it relate to the correct register set on "shadow_regs"?

In fact, I imagined it would be necessary to change "cpu_get_tb_cpu_state" and set "cs_base" or "flags" to STATUS.CRS such that different code would be generated for each shadow instance. Otherwise, each gpr access would be indirect. I'm probably missing something here.

Thanks,
Amir
Richard Henderson March 9, 2022, 6:01 p.m. UTC | #2
On 3/9/22 04:02, Amir Gonnen wrote:
> How does "cpu_crs_R" work?
...> Otherwise, each gpr access would be indirect. I'm probably missing something here.

They are indirect, but with some optimization.

> +    TCGv_ptr crs = tcg_global_mem_new_ptr(cpu_env,
> +                                          offsetof(CPUNios2State, crs), "crs");
> +
> +    for (int i = 0; i < NUM_GP_REGS; i++) {
> +        cpu_crs_R[i] = tcg_global_mem_new(crs, 4 * i, gr_regnames[i]);
> +    }

Note that the crs variable is relative to env, and then the cpu_crs_R registers are 
relative to crs.

Without an EIC-enabled kernel for testing, it's hard for me to show the nios2 code at 
work, but it is identical to what we do over in target/sparc:

>     for (i = 8; i < 32; ++i) {
>         cpu_regs[i] = tcg_global_mem_new(cpu_regwptr,
>                                          (i - 8) * sizeof(target_ulong),
>                                          gregnames[i]);
>     }

A small example of what this looks like is -d in_asm,op_ind,op_opt :

IN: __libc_start_main
0x00000000001032e8:  save  %sp, -720, %sp
0x00000000001032ec:  stx  %i0, [ %fp + 0x87f ]
0x00000000001032f0:  stx  %i1, [ %fp + 0x887 ]
0x00000000001032f4:  stx  %i2, [ %fp + 0x88f ]

OP before indirect lowering:
  ld_i32 tmp0,env,$0xfffffffffffffff8
  brcond_i32 tmp0,$0x0,lt,$L0              dead: 0 1

  ---- 00000000001032e8 00000000001032ec
  add_i64 tmp3,o6,$0xfffffffffffffd30      dead: 1 2
  call save,$0x0,$0,env                    dead: 0
  mov_i64 o6,tmp3                          sync: 0  dead: 0 1

  ---- 00000000001032ec 00000000001032f0
  add_i64 tmp2,i6,$0x87f                   dead: 2
  qemu_st_i64 i0,tmp2,beq,0                dead: 0 1

  ---- 00000000001032f0 00000000001032f4
  add_i64 tmp2,i6,$0x887                   dead: 2
  qemu_st_i64 i1,tmp2,beq,0                dead: 0 1

  ---- 00000000001032f4 00000000001032f8
  add_i64 tmp2,i6,$0x88f                   dead: 1 2
  qemu_st_i64 i2,tmp2,beq,0                dead: 0 1


You can see that early on, we optimize with the windowed registers themselves (o[0-7] and 
i[0-7] here).  But then we lower that to explicit load/store operations:


OP after optimization and liveness analysis:
  ld_i32 tmp0,env,$0xfffffffffffffff8      pref=0xffff
  brcond_i32 tmp0,$0x0,lt,$L0              dead: 0 1

  ---- 00000000001032e8 00000000001032ec
  ld_i64 tmp20,regwptr,$0x30               dead: 1  pref=0xffff
  add_i64 tmp3,tmp20,$0xfffffffffffffd30   dead: 1 2  pref=0xf038
  call save,$0x0,$0,env                    dead: 0
  st_i64 tmp3,regwptr,$0x30                dead: 0

  ---- 00000000001032ec 00000000001032f0
  ld_i64 tmp36,regwptr,$0xb0               pref=0xf038
  add_i64 tmp2,tmp36,$0x87f                dead: 2  pref=0xffff
  ld_i64 tmp30,regwptr,$0x80               pref=0xffff
  qemu_st_i64 tmp30,tmp2,beq,0             dead: 0 1

  ---- 00000000001032f0 00000000001032f4
  add_i64 tmp2,tmp36,$0x887                dead: 2  pref=0xffff
  ld_i64 tmp31,regwptr,$0x88               pref=0xffff
  qemu_st_i64 tmp31,tmp2,beq,0             dead: 0 1

  ---- 00000000001032f4 00000000001032f8
  add_i64 tmp2,tmp36,$0x88f                dead: 1 2  pref=0xffff
  ld_i64 tmp32,regwptr,$0x90               dead: 1  pref=0xffff
  qemu_st_i64 tmp32,tmp2,beq,0             dead: 0 1


You can now see the new tmpN variables, and the uses of regwptr in the loads and stores.


r~
diff mbox series

Patch

diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 2a5e070960..f05536e04d 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -61,6 +61,11 @@  struct Nios2CPUClass {
 #define NUM_GP_REGS 32
 #define NUM_CR_REGS 32
 
+#ifndef CONFIG_USER_ONLY
+/* 63 shadow register sets; index 0 is the primary register set. */
+#define NUM_REG_SETS 64
+#endif
+
 /* General purpose register aliases */
 enum {
     R_ZERO   = 0,
@@ -176,7 +181,13 @@  FIELD(CR_TLBMISC, EE, 24, 1)
 #define EXCP_MPUD     17
 
 struct CPUNios2State {
+#ifdef CONFIG_USER_ONLY
     uint32_t regs[NUM_GP_REGS];
+#else
+    uint32_t shadow_regs[NUM_REG_SETS][NUM_GP_REGS];
+    uint32_t *crs;
+#endif
+
     union {
         uint32_t ctrl[NUM_CR_REGS];
         struct {
@@ -245,6 +256,23 @@  static inline bool nios2_cr_reserved(const ControlRegState *s)
     return (s->writable | s->readonly) == 0;
 }
 
+static inline void nios2_update_crs(CPUNios2State *env)
+{
+#ifndef CONFIG_USER_ONLY
+    unsigned crs = FIELD_EX32(env->status, CR_STATUS, CRS);
+    env->crs = env->shadow_regs[crs];
+#endif
+}
+
+static inline uint32_t *nios2_crs(CPUNios2State *env)
+{
+#ifdef CONFIG_USER_ONLY
+    return env->regs;
+#else
+    return env->crs;
+#endif
+}
+
 void nios2_tcg_init(void);
 void nios2_cpu_do_interrupt(CPUState *cs);
 void dump_mmu(CPUNios2State *env);
@@ -286,12 +314,16 @@  typedef Nios2CPU ArchCPU;
 
 #include "exec/cpu-all.h"
 
+FIELD(TBFLAGS, CRS0, 0, 1)
+FIELD(TBFLAGS, U, 1, 1)     /* Overlaps CR_STATUS_U */
+
 static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
     *pc = env->pc;
     *cs_base = 0;
     *flags = env->status & CR_STATUS_U;
+    *flags |= env->status & R_CR_STATUS_CRS_MASK ? 0 : R_TBFLAGS_CRS0_MASK;
 }
 
 #endif /* NIOS2_CPU_H */
diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c
index 5b3e4efed5..96896f2ec5 100644
--- a/hw/nios2/boot.c
+++ b/hw/nios2/boot.c
@@ -62,10 +62,10 @@  static void main_cpu_reset(void *opaque)
 
     cpu_reset(CPU(cpu));
 
-    env->regs[R_ARG0] = NIOS2_MAGIC;
-    env->regs[R_ARG1] = boot_info.initrd_start;
-    env->regs[R_ARG2] = boot_info.fdt;
-    env->regs[R_ARG3] = boot_info.cmdline;
+    nios2_crs(env)[R_ARG0] = NIOS2_MAGIC;
+    nios2_crs(env)[R_ARG1] = boot_info.initrd_start;
+    nios2_crs(env)[R_ARG2] = boot_info.fdt;
+    nios2_crs(env)[R_ARG3] = boot_info.cmdline;
 
     cpu_set_pc(cs, boot_info.bootstrap_pc);
     if (boot_info.machine_cpu_reset) {
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 2779650128..05f4a7a93a 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -53,7 +53,6 @@  static void nios2_cpu_reset(DeviceState *dev)
 
     ncc->parent_reset(dev);
 
-    memset(env->regs, 0, sizeof(env->regs));
     memset(env->ctrl, 0, sizeof(env->ctrl));
     env->pc = cpu->reset_addr;
 
@@ -63,6 +62,8 @@  static void nios2_cpu_reset(DeviceState *dev)
 #else
     env->status = CR_STATUS_RSIE;
 #endif
+
+    nios2_update_crs(env);
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -210,7 +211,7 @@  static int nios2_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
     uint32_t val;
 
     if (n < 32) {          /* GP regs */
-        val = env->regs[n];
+        val = nios2_crs(env)[n];
     } else if (n == 32) {    /* PC */
         val = env->pc;
     } else if (n < 49) {     /* Status regs */
@@ -241,7 +242,7 @@  static int nios2_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     val = ldl_p(mem_buf);
 
     if (n < 32) {            /* GP regs */
-        env->regs[n] = val;
+        nios2_crs(env)[n] = val;
     } else if (n == 32) {    /* PC */
         env->pc = val;
     } else if (n < 49) {     /* Status regs */
diff --git a/target/nios2/helper.c b/target/nios2/helper.c
index 93338e86f0..007496b957 100644
--- a/target/nios2/helper.c
+++ b/target/nios2/helper.c
@@ -64,7 +64,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
         env->status |= CR_STATUS_IH;
         env->status &= ~(CR_STATUS_PIE | CR_STATUS_U);
 
-        env->regs[R_EA] = env->pc + 4;
+        nios2_crs(env)[R_EA] = env->pc + 4;
         env->pc = cpu->exception_addr;
         break;
 
@@ -83,7 +83,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
             env->tlbmisc &= ~CR_TLBMISC_DBL;
             env->tlbmisc |= CR_TLBMISC_WR;
 
-            env->regs[R_EA] = env->pc + 4;
+            nios2_crs(env)[R_EA] = env->pc + 4;
             env->pc = cpu->fast_tlb_miss_addr;
         } else {
             qemu_log_mask(CPU_LOG_INT, "TLB MISS (double) at pc=%x\n", env->pc);
@@ -111,7 +111,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
             env->tlbmisc |= CR_TLBMISC_WR;
         }
 
-        env->regs[R_EA] = env->pc + 4;
+        nios2_crs(env)[R_EA] = env->pc + 4;
         env->pc = cpu->exception_addr;
         break;
 
@@ -122,7 +122,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
 
         if ((env->status & CR_STATUS_EH) == 0) {
             env->estatus = env->status;
-            env->regs[R_EA] = env->pc + 4;
+            nios2_crs(env)[R_EA] = env->pc + 4;
         }
 
         env->status |= CR_STATUS_EH;
@@ -137,7 +137,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
 
         if ((env->status & CR_STATUS_EH) == 0) {
             env->estatus = env->status;
-            env->regs[R_EA] = env->pc + 4;
+            nios2_crs(env)[R_EA] = env->pc + 4;
         }
 
         env->status |= CR_STATUS_EH;
@@ -159,7 +159,7 @@  void nios2_cpu_do_interrupt(CPUState *cs)
 
         if ((env->status & CR_STATUS_EH) == 0) {
             env->bstatus = env->status;
-            env->regs[R_BA] = env->pc + 4;
+            nios2_crs(env)[R_BA] = env->pc + 4;
         }
 
         env->status |= CR_STATUS_EH;
diff --git a/target/nios2/nios2-semi.c b/target/nios2/nios2-semi.c
index fe5598bae4..8495718de0 100644
--- a/target/nios2/nios2-semi.c
+++ b/target/nios2/nios2-semi.c
@@ -144,7 +144,7 @@  static bool translate_stat(CPUNios2State *env, target_ulong addr,
 static void nios2_semi_return_u32(CPUNios2State *env, uint32_t ret,
                                   uint32_t err)
 {
-    target_ulong args = env->regs[R_ARG1];
+    target_ulong args = nios2_crs(env)[R_ARG1];
     if (put_user_u32(ret, args) ||
         put_user_u32(err, args + 4)) {
         /*
@@ -160,7 +160,7 @@  static void nios2_semi_return_u32(CPUNios2State *env, uint32_t ret,
 static void nios2_semi_return_u64(CPUNios2State *env, uint64_t ret,
                                   uint32_t err)
 {
-    target_ulong args = env->regs[R_ARG1];
+    target_ulong args = nios2_crs(env)[R_ARG1];
     if (put_user_u32(ret >> 32, args) ||
         put_user_u32(ret, args + 4) ||
         put_user_u32(err, args + 8)) {
@@ -210,13 +210,14 @@  void do_nios2_semihosting(CPUNios2State *env)
     void *q;
     uint32_t len;
     uint32_t result;
+    uint32_t *crs = nios2_crs(env);
 
-    nr = env->regs[R_ARG0];
-    args = env->regs[R_ARG1];
+    nr = crs[R_ARG0];
+    args = crs[R_ARG1];
     switch (nr) {
     case HOSTED_EXIT:
-        gdb_exit(env->regs[R_ARG0]);
-        exit(env->regs[R_ARG0]);
+        gdb_exit(crs[R_ARG0]);
+        exit(crs[R_ARG0]);
     case HOSTED_OPEN:
         GET_ARG(0);
         GET_ARG(1);
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 7c2ad02685..57913da3c9 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -100,12 +100,16 @@  typedef struct DisasContext {
     DisasContextBase  base;
     target_ulong      pc;
     int               mem_idx;
+    bool              crs0;
     TCGv              sink;
     const ControlRegState *cr_state;
 } DisasContext;
 
 static TCGv cpu_R[NUM_GP_REGS];
 static TCGv cpu_pc;
+#ifndef CONFIG_USER_ONLY
+static TCGv cpu_crs_R[NUM_GP_REGS];
+#endif
 
 typedef struct Nios2Instruction {
     void     (*handler)(DisasContext *dc, uint32_t code, uint32_t flags);
@@ -127,22 +131,36 @@  static uint8_t get_opxcode(uint32_t code)
 static TCGv load_gpr(DisasContext *dc, unsigned reg)
 {
     assert(reg < NUM_GP_REGS);
-    if (unlikely(reg == R_ZERO)) {
-        return tcg_constant_tl(0);
+    if (dc->crs0) {
+        if (unlikely(reg == R_ZERO)) {
+            return tcg_constant_tl(0);
+        }
+        return cpu_R[reg];
     }
-    return cpu_R[reg];
+#ifdef CONFIG_USER_ONLY
+    g_assert_not_reached();
+#else
+    return cpu_crs_R[reg];
+#endif
 }
 
 static TCGv dest_gpr(DisasContext *dc, unsigned reg)
 {
     assert(reg < NUM_GP_REGS);
-    if (unlikely(reg == R_ZERO)) {
-        if (dc->sink == NULL) {
-            dc->sink = tcg_temp_new();
+    if (dc->crs0) {
+        if (unlikely(reg == R_ZERO)) {
+            if (dc->sink == NULL) {
+                dc->sink = tcg_temp_new();
+            }
+            return dc->sink;
         }
-        return dc->sink;
+        return cpu_R[reg];
     }
-    return cpu_R[reg];
+#ifdef CONFIG_USER_ONLY
+    g_assert_not_reached();
+#else
+    return cpu_crs_R[reg];
+#endif
 }
 
 static void t_gen_helper_raise_exception(DisasContext *dc,
@@ -174,7 +192,7 @@  static void gen_excp(DisasContext *dc, uint32_t code, uint32_t flags)
 
 static bool gen_check_supervisor(DisasContext *dc)
 {
-    if (dc->base.tb->flags & CR_STATUS_U) {
+    if (dc->base.tb->flags & R_TBFLAGS_U_MASK) {
         /* CPU in user mode, privileged instruction called, stop. */
         t_gen_helper_raise_exception(dc, EXCP_SUPERI);
         return false;
@@ -773,6 +791,7 @@  static void nios2_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 
     dc->mem_idx = cpu_mmu_index(env, false);
     dc->cr_state = cpu->cr_state;
+    dc->crs0 = FIELD_EX32(dc->base.tb->flags, TBFLAGS, CRS0);
 
     /* Bound the number of insns to execute to those left on the page.  */
     page_insns = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
@@ -871,16 +890,13 @@  void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
     Nios2CPU *cpu = NIOS2_CPU(cs);
     CPUNios2State *env = &cpu->env;
+    const uint32_t *crs = nios2_crs(env);
     int i;
 
-    if (!env) {
-        return;
-    }
-
     qemu_fprintf(f, "IN: PC=%x %s\n", env->pc, lookup_symbol(env->pc));
 
     for (i = 0; i < NUM_GP_REGS; i++) {
-        qemu_fprintf(f, "%9s=%8.8x ", gr_regnames[i], env->regs[i]);
+        qemu_fprintf(f, "%9s=%8.8x ", gr_regnames[i], crs[i]);
         if ((i + 1) % 4 == 0) {
             qemu_fprintf(f, "\n");
         }
@@ -912,13 +928,19 @@  void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 
 void nios2_tcg_init(void)
 {
-    int i;
-
-    for (i = 0; i < NUM_GP_REGS; i++) {
-        cpu_R[i] = tcg_global_mem_new(cpu_env,
-                                      offsetof(CPUNios2State, regs[i]),
-                                      gr_regnames[i]);
+    for (int i = 0; i < NUM_GP_REGS; i++) {
+        cpu_R[i] = tcg_global_mem_new(cpu_env, 4 * i, gr_regnames[i]);
     }
+
+#ifndef CONFIG_USER_ONLY
+    TCGv_ptr crs = tcg_global_mem_new_ptr(cpu_env,
+                                          offsetof(CPUNios2State, crs), "crs");
+
+    for (int i = 0; i < NUM_GP_REGS; i++) {
+        cpu_crs_R[i] = tcg_global_mem_new(crs, 4 * i, gr_regnames[i]);
+    }
+#endif
+
     cpu_pc = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUNios2State, pc), "pc");
 }