diff mbox series

[v3,60/66] tcg/aarch64: Support raising sigbus for user-only

Message ID 20210818191920.390759-61-richard.henderson@linaro.org
State New
Headers show
Series Unaligned access for user-only | expand

Commit Message

Richard Henderson Aug. 18, 2021, 7:19 p.m. UTC
Use load-acquire / store-release for the normal case of
alignment matching the access size.  Otherwise, emit a
test + branch sequence invoking helper_unaligned_mmu.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/aarch64/tcg-target.h     |   2 -
 tcg/aarch64/tcg-target.c.inc | 174 +++++++++++++++++++++++++++++++----
 2 files changed, 157 insertions(+), 19 deletions(-)

-- 
2.25.1

Comments

Peter Maydell Aug. 20, 2021, 9:46 a.m. UTC | #1
On Wed, 18 Aug 2021 at 20:56, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> Use load-acquire / store-release for the normal case of

> alignment matching the access size.  Otherwise, emit a

> test + branch sequence invoking helper_unaligned_mmu.


I don't think this trick will work for a CPU that
implements the FEAT_LSE2 feature -- section B2.5.2 in
the Arm ARM DDI0487G.b says that if LSE2 is implemented
then the CPU either must or can make them just work and do
the unaligned access.

thanks
-- PMM
diff mbox series

Patch

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 7a93ac8023..876af589ce 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -151,9 +151,7 @@  typedef enum {
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
-#ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
-#endif
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 5edca8d44d..f5664636cf 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -10,6 +10,7 @@ 
  * See the COPYING file in the top-level directory for details.
  */
 
+#include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 #include "qemu/bitops.h"
 
@@ -390,6 +391,10 @@  typedef enum {
     I3305_LDR_v64   = 0x5c000000,
     I3305_LDR_v128  = 0x9c000000,
 
+    /* Load/store exclusive */
+    I3306_LDAR      = 0x08808000 | LDST_LD << 22, /* plus MO << 30 */
+    I3306_STLR      = 0x08808000 | LDST_ST << 22, /* plus MO << 30 */
+
     /* Load/store register.  Described here as 3.3.12, but the helper
        that emits them can transform to 3.3.10 or 3.3.13.  */
     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@@ -443,6 +448,7 @@  typedef enum {
     I3404_ANDI      = 0x12000000,
     I3404_ORRI      = 0x32000000,
     I3404_EORI      = 0x52000000,
+    I3404_ANDSI     = 0x72000000,
 
     /* Move wide immediate instructions.  */
     I3405_MOVN      = 0x12800000,
@@ -453,6 +459,9 @@  typedef enum {
     I3406_ADR       = 0x10000000,
     I3406_ADRP      = 0x90000000,
 
+    /* Add/subtract extended register. */
+    I3501_ADDEXT    = 0x0b200000,
+
     /* Add/subtract shifted register instructions (without a shift).  */
     I3502_ADD       = 0x0b000000,
     I3502_ADDS      = 0x2b000000,
@@ -623,6 +632,14 @@  static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 }
 
+static void G_GNUC_UNUSED
+tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, MemOp sz,
+                  TCGReg rs, TCGReg rt, TCGReg rt2, TCGReg rn)
+{
+    tcg_out32(s, insn | (sz << 30) | (rs << 16) |
+              (rt2 << 10) | (rn << 5) | rt);
+}
+
 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
                               TCGReg rt, int imm19)
 {
@@ -705,6 +722,13 @@  static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
     tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 }
 
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
+                                     TCGReg rd, TCGReg rn,
+                                     TCGReg rm, MemOp ext)
+{
+    tcg_out32(s, insn | 1 << 31 | rm << 16 | ext << 13 | rn << 5 | rd);
+}
+
 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
    the rare occasion when we actually want to supply a shift amount.  */
 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
@@ -1328,8 +1352,9 @@  static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
     if (offset == sextract64(offset, 0, 26)) {
         tcg_out_insn(s, 3206, B, offset);
     } else {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
-        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
+        /* Choose X9 as a call-clobbered non-LR temporary. */
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
+        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
     }
 }
 
@@ -1541,9 +1566,14 @@  static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
     }
 }
 
-#ifdef CONFIG_SOFTMMU
-#include "../tcg-ldst.c.inc"
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
+{
+    ptrdiff_t offset = tcg_pcrel_diff(s, target);
+    tcg_debug_assert(offset == sextract64(offset, 0, 21));
+    tcg_out_insn(s, 3406, ADR, rd, offset);
+}
 
+#ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     MemOpIdx oi, uintptr_t ra)
  */
@@ -1577,13 +1607,6 @@  static void * const qemu_st_helpers[MO_SIZE + 1] = {
 #endif
 };
 
-static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
-{
-    ptrdiff_t offset = tcg_pcrel_diff(s, target);
-    tcg_debug_assert(offset == sextract64(offset, 0, 21));
-    tcg_out_insn(s, 3406, ADR, rd, offset);
-}
-
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
     MemOpIdx oi = lb->oi;
@@ -1714,15 +1737,85 @@  static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
     tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
 }
 
+#else
+
+static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
+                                   unsigned a_bits)
+{
+    unsigned a_mask = (1 << a_bits) - 1;
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->addrlo_reg = addr_reg;
+
+    /* tst addr, #mask */
+    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
+
+    label->label_ptr[0] = s->code_ptr;
+
+    /* b.ne slow_path */
+    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
+
+    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
+}
+
+static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+        return false;
+    }
+
+    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
+
+    /*
+     * "Tail call" to the helper, with the return address back inline,
+     * just for the clarity of the debugging traceback -- the helper
+     * cannot return.
+     */
+    tcg_out_adr(s, TCG_REG_LR, l->raddr);
+    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
+                                        : helper_unaligned_st));
+    return true;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+
+static void tcg_out_qemu_ld_acquire(TCGContext *s, MemOp memop, TCGType ext,
+                                    TCGReg data_r, TCGReg addr_r)
+{
+    MemOp size = memop & MO_SIZE;
+
+    tcg_out_insn(s, 3306, LDAR, size,
+                 TCG_REG_XZR, data_r, TCG_REG_XZR, addr_r);
+    if (memop & MO_SIGN) {
+        tcg_out_sxt(s, ext, size, data_r, data_r);
+    }
+}
+
+static void tcg_out_qemu_st_release(TCGContext *s, MemOp memop,
+                                    TCGReg data_r, TCGReg addr_r)
+{
+    MemOp size = memop & MO_SIZE;
+
+    tcg_out_insn(s, 3306, STLR, size,
+                 TCG_REG_XZR, data_r, TCG_REG_XZR, addr_r);
+}
+
 #endif /* CONFIG_SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    /* Byte swapping is left to middle-end expansion. */
-    tcg_debug_assert((memop & MO_BSWAP) == 0);
-
     switch (memop & MO_SSIZE) {
     case MO_UB:
         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
@@ -1756,9 +1849,6 @@  static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    /* Byte swapping is left to middle-end expansion. */
-    tcg_debug_assert((memop & MO_BSWAP) == 0);
-
     switch (memop & MO_SIZE) {
     case MO_8:
         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
@@ -1782,6 +1872,10 @@  static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
     MemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((memop & MO_BSWAP) == 0);
+
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
     tcg_insn_unit *label_ptr;
@@ -1792,6 +1886,28 @@  static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
+    unsigned a_bits = get_alignment_bits(memop);
+
+    if (a_bits) {
+        /*
+         * If alignment required, and equals the access size, then
+         * use load-acquire for the size effect of alignment checking.
+         * Despite the extra memory barrier, for a ThunderX2 host,
+         * this is is about 40% faster.  It is always smaller.
+         */
+        if (a_bits == (memop & MO_SIZE)) {
+            if (USE_GUEST_BASE) {
+                tcg_out_insn(s, 3501, ADDEXT, TCG_REG_TMP, TCG_REG_GUEST_BASE,
+                             addr_reg, TARGET_LONG_BITS == 64 ? MO_64 : MO_32);
+                addr_reg = TCG_REG_TMP;
+            }
+            tcg_out_qemu_ld_acquire(s, memop, ext, data_reg, addr_reg);
+            return;
+        }
+
+        tcg_out_test_alignment(s, true, addr_reg, a_bits);
+    }
+
     if (USE_GUEST_BASE) {
         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
                                TCG_REG_GUEST_BASE, otype, addr_reg);
@@ -1807,6 +1923,10 @@  static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
     MemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((memop & MO_BSWAP) == 0);
+
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
     tcg_insn_unit *label_ptr;
@@ -1817,6 +1937,26 @@  static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
                         data_reg, addr_reg, s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
+    unsigned a_bits = get_alignment_bits(memop);
+
+    if (a_bits) {
+        /*
+         * If alignment required, and equals the access size, then
+         * use store-release for the size effect of alignment checking.
+         */
+        if (a_bits == (memop & MO_SIZE)) {
+            if (USE_GUEST_BASE) {
+                tcg_out_insn(s, 3501, ADDEXT, TCG_REG_TMP, TCG_REG_GUEST_BASE,
+                             addr_reg, TARGET_LONG_BITS == 64 ? MO_64 : MO_32);
+                addr_reg = TCG_REG_TMP;
+            }
+            tcg_out_qemu_st_release(s, memop, data_reg, addr_reg);
+            return;
+        }
+
+        tcg_out_test_alignment(s, false, addr_reg, a_bits);
+    }
+
     if (USE_GUEST_BASE) {
         tcg_out_qemu_st_direct(s, memop, data_reg,
                                TCG_REG_GUEST_BASE, otype, addr_reg);