@@ -2398,14 +2398,14 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i64 addr, int size, bool is_pair)
{
int idx = get_mem_index(s);
- MemOp memop = s->be_data;
+ MemOp memop;
g_assert(size <= 3);
if (is_pair) {
g_assert(size >= 2);
if (size == 2) {
/* The pair must be single-copy atomic for the doubleword. */
- memop |= MO_64 | MO_ALIGN;
+ memop = finalize_memop(s, MO_64 | MO_ALIGN);
tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
@@ -2415,21 +2415,30 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
}
} else {
- /* The pair must be single-copy atomic for *each* doubleword, not
- the entire quadword, however it must be quadword aligned. */
- memop |= MO_64;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
- memop | MO_ALIGN_16);
+ /*
+ * The pair must be single-copy atomic for *each* doubleword, not
+ * the entire quadword, however it must be quadword aligned.
+ * Expose the complete load to tcg, for ease of tlb lookup,
+ * but indicate that only 8-byte atomicity is required.
+ */
+ TCGv_i128 t16 = tcg_temp_new_i128();
- TCGv_i64 addr2 = tcg_temp_new_i64();
- tcg_gen_addi_i64(addr2, addr, 8);
- tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
+ memop = finalize_memop_atom(s, MO_128 | MO_ALIGN_16,
+ MO_ATOM_IFALIGN_PAIR);
+ tcg_gen_qemu_ld_i128(t16, addr, idx, memop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(cpu_exclusive_val,
+ cpu_exclusive_high, t16);
+ } else {
+ tcg_gen_extr_i128_i64(cpu_exclusive_high,
+ cpu_exclusive_val, t16);
+ }
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
}
} else {
- memop |= size | MO_ALIGN;
+ memop = finalize_memop(s, size | MO_ALIGN);
tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
}
While we don't require 16-byte atomicity here, using a single larger load simplifies the code, and makes it a closer match to STXP. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-a64.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-)