diff mbox series

[v3,13/14] tcg/arm: Reserve a register for guest_base

Message ID 20210818212912.396794-14-richard.henderson@linaro.org
State New
Headers show
Series tcg/arm: Unaligned access and other cleanup | expand

Commit Message

Richard Henderson Aug. 18, 2021, 9:29 p.m. UTC
Reserve a register for the guest_base using aarch64 for reference.
By doing so, we do not have to recompute it for every memory load.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

-- 
2.25.1

Comments

Peter Maydell Aug. 20, 2021, 12:03 p.m. UTC | #1
On Wed, 18 Aug 2021 at 22:33, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> Reserve a register for the guest_base using aarch64 for reference.

> By doing so, we do not have to recompute it for every memory load.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++++++++-----------

>  1 file changed, 28 insertions(+), 11 deletions(-)

>

> diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc

> index 35bd4c68d6..2728035177 100644

> --- a/tcg/arm/tcg-target.c.inc

> +++ b/tcg/arm/tcg-target.c.inc

> @@ -84,6 +84,9 @@ static const int tcg_target_call_oarg_regs[2] = {

>

>  #define TCG_REG_TMP  TCG_REG_R12

>  #define TCG_VEC_TMP  TCG_REG_Q15

> +#ifndef CONFIG_SOFTMMU

> +#define TCG_REG_GUEST_BASE  TCG_REG_R11

> +#endif

>

>  typedef enum {

>      COND_EQ = 0x0,

> @@ -1763,7 +1766,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)

>

>  static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,

>                                    TCGReg datalo, TCGReg datahi,

> -                                  TCGReg addrlo, TCGReg addend)

> +                                  TCGReg addrlo, TCGReg addend,

> +                                  bool scratch_addend)

>  {

>      /* Byte swapping is left to middle-end expansion. */

>      tcg_debug_assert((opc & MO_BSWAP) == 0);

> @@ -1790,7 +1794,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,

>              && get_alignment_bits(opc) >= MO_64

>              && (datalo & 1) == 0 && datahi == datalo + 1) {

>              tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);

> -        } else if (datalo != addend) {

> +        } else if (scratch_addend) {

>              tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);

>              tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);

>          } else {


I don't understand this change. Yes, we can trash the addend
register, but if it's the same as 'datalo' then the second load
is not going to DTRT... Shouldn't this be
  if (scratch_addend && datalo != addend)
?

-- PMM
Richard Henderson Aug. 20, 2021, 6:47 p.m. UTC | #2
On 8/20/21 2:03 AM, Peter Maydell wrote:
>> -        } else if (datalo != addend) {

>> +        } else if (scratch_addend) {

>>               tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);

>>               tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);

>>           } else {

> 

> I don't understand this change. Yes, we can trash the addend

> register, but if it's the same as 'datalo' then the second load

> is not going to DTRT... Shouldn't this be

>    if (scratch_addend && datalo != addend)

> ?


Previously, addend was *always* a scratch register, TCG_REG_TMP or such.
Afterward, addend may be TCG_REG_GUEST_BASE, which should not be modified.
At no point is there overlap between addend and data{hi,lo}.

r~
Peter Maydell Aug. 21, 2021, 10:38 a.m. UTC | #3
On Fri, 20 Aug 2021 at 19:47, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 8/20/21 2:03 AM, Peter Maydell wrote:

> >> -        } else if (datalo != addend) {

> >> +        } else if (scratch_addend) {

> >>               tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);

> >>               tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);

> >>           } else {

> >

> > I don't understand this change. Yes, we can trash the addend

> > register, but if it's the same as 'datalo' then the second load

> > is not going to DTRT... Shouldn't this be

> >    if (scratch_addend && datalo != addend)

> > ?

>

> Previously, addend was *always* a scratch register, TCG_REG_TMP or such.

> Afterward, addend may be TCG_REG_GUEST_BASE, which should not be modified.

> At no point is there overlap between addend and data{hi,lo}.


So the old "datalo == addend" code path was dead code ?

Perhaps if the function now assumes that scratch_addend implies
that datalo != addend it would be worth assert()ing that, in case
some future callsite doesn't realize the restriction ?

thanks
-- PMM
diff mbox series

Patch

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 35bd4c68d6..2728035177 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -84,6 +84,9 @@  static const int tcg_target_call_oarg_regs[2] = {
 
 #define TCG_REG_TMP  TCG_REG_R12
 #define TCG_VEC_TMP  TCG_REG_Q15
+#ifndef CONFIG_SOFTMMU
+#define TCG_REG_GUEST_BASE  TCG_REG_R11
+#endif
 
 typedef enum {
     COND_EQ = 0x0,
@@ -1763,7 +1766,8 @@  static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 
 static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
                                   TCGReg datalo, TCGReg datahi,
-                                  TCGReg addrlo, TCGReg addend)
+                                  TCGReg addrlo, TCGReg addend,
+                                  bool scratch_addend)
 {
     /* Byte swapping is left to middle-end expansion. */
     tcg_debug_assert((opc & MO_BSWAP) == 0);
@@ -1790,7 +1794,7 @@  static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
             && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
-        } else if (datalo != addend) {
+        } else if (scratch_addend) {
             tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
             tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
         } else {
@@ -1875,14 +1879,14 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     label_ptr = s->code_ptr;
     tcg_out_bl_imm(s, COND_NE, 0);
 
-    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
 
     add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (guest_base) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
-        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
+        tcg_out_qemu_ld_index(s, opc, datalo, datahi,
+                              addrlo, TCG_REG_GUEST_BASE, false);
     } else {
         tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
     }
@@ -1891,7 +1895,8 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 
 static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
                                   TCGReg datalo, TCGReg datahi,
-                                  TCGReg addrlo, TCGReg addend)
+                                  TCGReg addrlo, TCGReg addend,
+                                  bool scratch_addend)
 {
     /* Byte swapping is left to middle-end expansion. */
     tcg_debug_assert((opc & MO_BSWAP) == 0);
@@ -1912,9 +1917,14 @@  static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
             && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
-        } else {
+        } else if (scratch_addend) {
             tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
             tcg_out_st32_12(s, cond, datahi, addend, 4);
+        } else {
+            tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP,
+                            addend, addrlo, SHIFT_IMM_LSL(0));
+            tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0);
+            tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4);
         }
         break;
     default:
@@ -1978,7 +1988,8 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     mem_index = get_mmuidx(oi);
     addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
 
-    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
+    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
+                          addrlo, addend, true);
 
     /* The conditional call must come last, as we're going to return here.  */
     label_ptr = s->code_ptr;
@@ -1988,9 +1999,8 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (guest_base) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
-        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
-                              datahi, addrlo, TCG_REG_TMP);
+        tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi,
+                              addrlo, TCG_REG_GUEST_BASE, false);
     } else {
         tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
     }
@@ -3153,6 +3163,13 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
+#ifndef CONFIG_SOFTMMU
+    if (guest_base) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
+        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
+    }
+#endif
+
     tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
 
     /*