@@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
@@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
MemOpIdx oi, uintptr_t ra);
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra);
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra);
+
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
MemOpIdx oi, uintptr_t retaddr);
@@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
+void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
+void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
{
@@ -2199,6 +2199,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
}
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+ uint64_t h, l;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ h = helper_be_ldq_mmu(env, addr, new_oi, ra);
+ l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ return int128_make128(l, h);
+}
+
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+ uint64_t h, l;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ l = helper_le_ldq_mmu(env, addr, new_oi, ra);
+ h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ return int128_make128(l, h);
+}
+
/*
* Store Helpers
*/
@@ -2553,6 +2611,60 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
}
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ helper_be_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
+ helper_be_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
#include "ldst_common.c.inc"
/*
@@ -394,6 +394,42 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ Int128 ret;
+
+ validate_memop(oi, MO_128 | MO_BE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+ memcpy(&ret, haddr, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if (!HOST_BIG_ENDIAN) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ Int128 ret;
+
+ validate_memop(oi, MO_128 | MO_LE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+ memcpy(&ret, haddr, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if (HOST_BIG_ENDIAN) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOpIdx oi, uintptr_t ra)
{
@@ -478,6 +514,36 @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
+ Int128 val, MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+
+ validate_memop(oi, MO_128 | MO_BE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ if (!HOST_BIG_ENDIAN) {
+ val = bswap128(val);
+ }
+ memcpy(haddr, &val, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
+ Int128 val, MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+
+ validate_memop(oi, MO_128 | MO_LE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ if (HOST_BIG_ENDIAN) {
+ val = bswap128(val);
+ }
+ memcpy(haddr, &val, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
{
uint32_t ret;
@@ -3107,6 +3107,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
}
}
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
+{
+ MemOp mop_1 = orig, mop_2;
+
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
+ tcg_debug_assert((orig & MO_SIGN) == 0);
+
+ /* Use a memory ordering implemented by the host. */
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
+ mop_1 &= ~MO_BSWAP;
+ }
+
+ /* Reduce the size to 64-bit. */
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
+
+ /* Retain the alignment constraints of the original. */
+ switch (orig & MO_AMASK) {
+ case MO_UNALN:
+ case MO_ALIGN_2:
+ case MO_ALIGN_4:
+ mop_2 = mop_1;
+ break;
+ case MO_ALIGN_8:
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
+ mop_2 = mop_1;
+ break;
+ case MO_ALIGN:
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
+ mop_2 = mop_1;
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
+ break;
+ case MO_ALIGN_16:
+ case MO_ALIGN_32:
+ case MO_ALIGN_64:
+ /* Second has 8-byte alignment; first retains original. */
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ret[0] = mop_1;
+ ret[1] = mop_2;
+}
+
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp mop[2];
+ TCGv addr_p8;
+ TCGv_i64 x, y;
+
+ canonicalize_memop_i128_as_i64(mop, memop);
+
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ addr = plugin_prep_mem_callbacks(addr);
+
+ /* TODO: respect atomicity of the operation. */
+ /* TODO: allow the tcg backend to see the whole operation. */
+
+ /*
+ * Since there are no global TCGv_i128, there is no visible state
+ * changed if the second load faults. Load directly into the two
+ * subwords.
+ */
+ if ((memop & MO_BSWAP) == MO_LE) {
+ x = TCGV128_LOW(val);
+ y = TCGV128_HIGH(val);
+ } else {
+ x = TCGV128_HIGH(val);
+ y = TCGV128_LOW(val);
+ }
+
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
+
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ tcg_gen_bswap64_i64(x, x);
+ }
+
+ addr_p8 = tcg_temp_new();
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
+ tcg_temp_free(addr_p8);
+
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ tcg_gen_bswap64_i64(y, y);
+ }
+
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
+ QEMU_PLUGIN_MEM_R);
+}
+
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp mop[2];
+ TCGv addr_p8;
+ TCGv_i64 x, y;
+
+ canonicalize_memop_i128_as_i64(mop, memop);
+
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
+ addr = plugin_prep_mem_callbacks(addr);
+
+ /* TODO: respect atomicity of the operation. */
+ /* TODO: allow the tcg backend to see the whole operation. */
+
+ if ((memop & MO_BSWAP) == MO_LE) {
+ x = TCGV128_LOW(val);
+ y = TCGV128_HIGH(val);
+ } else {
+ x = TCGV128_HIGH(val);
+ y = TCGV128_LOW(val);
+ }
+
+ addr_p8 = tcg_temp_new();
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_bswap64_i64(t, x);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
+ tcg_gen_bswap64_i64(t, y);
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
+ }
+ tcg_temp_free(addr_p8);
+
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
+ QEMU_PLUGIN_MEM_W);
+}
+
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
{
switch (opc & MO_SSIZE) {
These are not yet considering atomicity of the 16-byte value; this is a direct replacement for the current target code which uses a pair of 8-byte operations. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/exec/cpu_ldst.h | 10 +++ include/tcg/tcg-op.h | 2 + accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++ accel/tcg/user-exec.c | 66 ++++++++++++++++++++ tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 324 insertions(+)