[PULL,15/23] tcg/arm: Improve tlb load for armv7

Message ID	20170907224051.21518-16-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Thu, 7 Sep 2017 15:40:43 -0700 Message-Id: <20170907224051.21518-16-richard.henderson@linaro.org> In-Reply-To: <20170907224051.21518-1-richard.henderson@linaro.org> References: <20170907224051.21518-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PULL 15/23] tcg/arm: Improve tlb load for armv7 Precedence: list Cc: peter.maydell@linaro.org, Richard Henderson <rth@twiddle.net> Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	tcg constant pools and USE_DIRECT_JUMP cleanup \| expand [PULL,00/23] tcg constant pools and USE_DIRECT_JUMP cleanup [PULL,01/23] tcg: Move USE_DIRECT_JUMP discriminator to tcg/cpu/tcg-target.h [PULL,02/23] tcg: Rearrange ldst label tracking [PULL,03/23] tcg: Infrastructure for managing constant pools [PULL,04/23] tcg/i386: Store out-of-range call targets in constant pool [PULL,05/23] tcg/s390: Introduce TCG_REG_TB [PULL,06/23] tcg/s390: Fix sign of patch_reloc addend [PULL,07/23] tcg/s390: Use constant pool for movi [PULL,08/23] tcg/s390: Use constant pool for andi [PULL,09/23] tcg/s390: Use constant pool for ori [PULL,10/23] tcg/s390: Use constant pool for xori [PULL,11/23] tcg/s390: Use constant pool for cmpi [PULL,12/23] tcg/aarch64: Use constant pool for movi [PULL,13/23] tcg/sparc: Introduce TCG_REG_TB [PULL,14/23] tcg/sparc: Use constant pool for movi [PULL,15/23] tcg/arm: Improve tlb load for armv7 [PULL,16/23] tcg/arm: Tighten tlb indexing offset test [PULL,17/23] tcg/arm: Code rearrangement [PULL,18/23] tcg/arm: Extract INSN_NOP [PULL,19/23] tcg/arm: Use constant pool for movi [PULL,20/23] tcg/arm: Use constant pool for call [PULL,21/23] tcg/ppc: Change TCG_REG_RA to TCG_REG_TB [PULL,22/23] tcg/ppc: Look for shifted constants [PULL,23/23] tcg/ppc: Use constant pool for movi

Message ID

20170907224051.21518-16-richard.henderson@linaro.org

State

New

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	2001:4830:134:3::11 as permitted sender)
	client-ip=2001:4830:134:3::11; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Thu,  7 Sep 2017 15:40:43 -0700
Message-Id: <20170907224051.21518-16-richard.henderson@linaro.org>
In-Reply-To: <20170907224051.21518-1-richard.henderson@linaro.org>
References: <20170907224051.21518-1-richard.henderson@linaro.org>
Subject: [Qemu-devel] [PULL 15/23] tcg/arm: Improve tlb load for armv7
Precedence: list
Cc: peter.maydell@linaro.org, Richard Henderson <rth@twiddle.net>
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

tcg constant pools and USE_DIRECT_JUMP cleanup | expand

Commit Message

Richard Henderson Sept. 7, 2017, 10:40 p.m. UTC

From: Richard Henderson <rth@twiddle.net>


Use UBFX to avoid limitation on CPU_TLB_BITS.  Since we're dropping
the initial shift, we need to replace the page masking.  We can use
MOVW+BIC to do this without shifting.  The result is the same size
as the armv6 path with one less conditional instruction.

Signed-off-by: Richard Henderson <rth@twiddle.net>

---
 tcg/arm/tcg-target.inc.c | 72 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 20 deletions(-)

-- 
2.13.5

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 81ea900852..66c369c239 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1173,18 +1173,33 @@  static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
 
-    /* Should generate something like the following:
-     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
+    /* V7 generates the following:
+     *   ubfx   r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS
      *   add    r2, env, #high
-     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
-     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
-     *   ldr    r0, [r2, #cmp]                                    (4)
+     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
+     *   ldr    r0, [r2, #cmp]
+     *   ldr    r2, [r2, #add]
+     *   movw   tmp, #page_align_mask
+     *   bic    tmp, addrlo, tmp
+     *   cmp    r0, tmp
+     *
+     * Otherwise we generate:
+     *   shr    tmp, addrlo, #TARGET_PAGE_BITS
+     *   add    r2, env, #high
+     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)
+     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
+     *   ldr    r0, [r2, #cmp]
+     *   ldr    r2, [r2, #add]
      *   tst    addrlo, #s_mask
-     *   ldr    r2, [r2, #add]                                    (5)
      *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
      */
-    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
-                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+    if (use_armv7_instructions) {
+        tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo,
+                        TARGET_PAGE_BITS, CPU_TLB_BITS);
+    } else {
+        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
+                        0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+    }
 
     /* We checked that the offset is contained within 16 bits above.  */
     if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
@@ -1194,9 +1209,10 @@  static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
         add_off -= cmp_off & 0xff00;
         cmp_off &= 0xff;
     }
-
-    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
-                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
+    if (!use_armv7_instructions) {
+        tcg_out_dat_imm(s, COND_AL, ARITH_AND,
+                        TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
+    }
     tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
                     TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
 
@@ -1212,24 +1228,40 @@  static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
         }
     }
 
+    /* Load the tlb addend.  */
+    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
+
     /* Check alignment.  We don't support inline unaligned acceses,
        but we can easily support overalignment checks.  */
     if (a_bits < s_bits) {
         a_bits = s_bits;
     }
-    if (a_bits) {
-        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
-    }
 
-    /* Load the tlb addend.  */
-    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
+    if (use_armv7_instructions) {
+        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
+        int rot = encode_imm(mask);
 
-    tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
-                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+        if (rot >= 0) { 
+            tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
+                            rotl(mask, rot) | (rot << 7));
+        } else {
+            tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
+            tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
+                            addrlo, TCG_REG_TMP, 0);
+        }
+        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
+    } else {
+        if (a_bits) {
+            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
+                            (1 << a_bits) - 1);
+        }
+        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
+                        0, TCG_REG_R0, TCG_REG_TMP,
+                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+    }
 
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
-                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
+        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
     }
 
     return TCG_REG_R2;

[PULL,15/23] tcg/arm: Improve tlb load for armv7

Commit Message

Patch