[PULL,v2,12/21] tcg: Split CONFIG_ATOMIC128

Message ID	20181019060656.7968-13-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Thu, 18 Oct 2018 23:06:47 -0700 Message-Id: <20181019060656.7968-13-richard.henderson@linaro.org> In-Reply-To: <20181019060656.7968-1-richard.henderson@linaro.org> References: <20181019060656.7968-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PULL v2 12/21] tcg: Split CONFIG_ATOMIC128 Precedence: list Cc: peter.maydell@linaro.org Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	tcg patch queue \| expand [PULL,v2,00/21] tcg patch queue [PULL,v2,01/21] tcg: Implement CPU_LOG_TB_NOCHAIN during expansion [PULL,v2,02/21] tcg: access cpu->icount_decr.u16.high with atomics [PULL,v2,03/21] tcg: fix use of uninitialized variable under CONFIG_PROFILER [PULL,v2,04/21] tcg: plug holes in struct TCGProfile [PULL,v2,05/21] tcg: distribute tcg_time into TCG contexts [PULL,v2,06/21] target/alpha: remove tlb_flush from alpha_cpu_initfn [PULL,v2,07/21] target/unicore32: remove tlb_flush from uc32_init_fn [PULL,v2,08/21] exec: introduce tlb_init [PULL,v2,09/21] cputlb: fix assert_cpu_is_self macro [PULL,v2,10/21] cputlb: serialize tlb updates with env->tlb_lock [PULL,v2,11/21] tcg: Add tlb_index and tlb_entry helpers [PULL,v2,12/21] tcg: Split CONFIG_ATOMIC128 [PULL,v2,13/21] target/i386: Convert to HAVE_CMPXCHG128 [PULL,v2,14/21] target/arm: Convert to HAVE_CMPXCHG128 [PULL,v2,15/21] target/arm: Check HAVE_CMPXCHG128 at translate time [PULL,v2,16/21] target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 [PULL,v2,17/21] target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 [PULL,v2,18/21] target/s390x: Split do_cdsg, do_lpq, do_stpq [PULL,v2,19/21] target/s390x: Skip wout, cout helpers if op helper does not return [PULL,v2,20/21] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate [PULL,v2,21/21] cputlb: read CPUTLBEntry.addr_write atomically

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index d751bcba48..efde12fdb2 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, cmpv, newv); +#else ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); +#endif ATOMIC_MMU_CLEANUP; return ret; } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return val; } @@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_ST; - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) @@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); +#else ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); +#endif ATOMIC_MMU_CLEANUP; return BSWAP(ret); } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return BSWAP(val); } @@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ATOMIC_TRACE_ST; val = BSWAP(val); - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h new file mode 100644 index 0000000000..a6af22ff10 --- /dev/null +++ b/include/qemu/atomic128.h @@ -0,0 +1,153 @@ +/* + * Simple interface for 128-bit atomic operations. + * + * Copyright (C) 2018 Linaro, Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC128_H +#define QEMU_ATOMIC128_H + +/* + * GCC is a house divided about supporting large atomic operations. + * + * For hosts that only have large compare-and-swap, a legalistic reading + * of the C++ standard means that one cannot implement __atomic_read on + * read-only memory, and thus all atomic operations must synchronize + * through libatomic. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 + * + * This interpretation is not especially helpful for QEMU. + * For softmmu, all RAM is always read/write from the hypervisor. + * For user-only, if the guest doesn't implement such an __atomic_read + * then the host need not worry about it either. + * + * Moreover, using libatomic is not an option, because its interface is + * built for std::atomic<T>, and requires that *all* accesses to such an + * object go through the library. In our case we do not have an object + * in the C/C++ sense, but a view of memory as seen by the guest. + * The guest may issue a large atomic operation and then access those + * pieces using word-sized accesses. From the hypervisor, we have no + * way to connect those two actions. + * + * Therefore, special case each platform. + */ + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return atomic_cmpxchg__nocheck(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(CONFIG_CMPXCHG128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return __sync_val_compare_and_swap_16(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(__aarch64__) +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "cmp %[oldl], %[cmpl]\n\t" + "ccmp %[oldh], %[cmph], #0, eq\n\t" + "b.ne 1f\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b\n" + "1:" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=r"(oldh) + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), + [newl] "r"(newl), [newh] "r"(newh) + : "memory", "cc"); + + return int128_make128(oldl, oldh); +} +# define HAVE_CMPXCHG128 1 +#else +/* Fallback definition that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); +# define HAVE_CMPXCHG128 0 +#endif /* Some definition for HAVE_CMPXCHG128 */ + + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_read(Int128 *ptr) +{ + return atomic_read__nocheck(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + atomic_set__nocheck(ptr, val); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) +/* We can do better than cmpxchg for AArch64. */ +static inline Int128 atomic16_read(Int128 *ptr) +{ + uint64_t l, h; + uint32_t tmp; + + /* The load must be paired with the store to guarantee not tearing. */ + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); + + return int128_make128(l, h); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l = int128_getlo(val), h = int128_gethi(val); + uint64_t t1, t2; + + /* Load into temporaries to acquire the exclusive access lock. */ + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" + "stxp %w[t1], %[l], %[h], %[mem]\n\t" + "cbnz %w[t1], 0b" + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) + : [l] "r"(l), [h] "r"(h)); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 +static inline Int128 atomic16_read(Int128 *ptr) +{ + /* Maybe replace 0 with 0, returning the old value. */ + return atomic16_cmpxchg(ptr, 0, 0); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + Int128 old = *ptr, cmp; + do { + cmp = old; + old = atomic16_cmpxchg(ptr, cmp, val); + } while (old != cmp); +} + +# define HAVE_ATOMIC128 1 +#else +/* Fallback definitions that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr); +void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val); +# define HAVE_ATOMIC128 0 +#endif /* Some definition for HAVE_ATOMIC128 */ + +#endif /* QEMU_ATOMIC128_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 66f8c241dc..6b92710487 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -146,6 +146,17 @@ # define QEMU_FLATTEN #endif +/* + * If __attribute__((error)) is present, use it to produce an error at + * compile time. Otherwise, one must wait for the linker to diagnose + * the missing symbol. + */ +#if __has_attribute(error) +# define QEMU_ERROR(X) __attribute__((error(X))) +#else +# define QEMU_ERROR(X) +#endif + /* Implement C11 _Generic via GCC builtins. Example: * * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) diff --git a/tcg/tcg.h b/tcg/tcg.h index c59f254e27..f4efbaa680 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -32,6 +32,7 @@ #include "qemu/queue.h" #include "tcg-mo.h" #include "tcg-target.h" +#include "qemu/int128.h" /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -1456,11 +1457,14 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER #endif /* CONFIG_SOFTMMU */ -#ifdef CONFIG_ATOMIC128 -#include "qemu/int128.h" - -/* These aren't really a "proper" helpers because TCG cannot manage Int128. - However, use the same format as the others, for use by the backends. */ +/* + * These aren't really a "proper" helpers because TCG cannot manage Int128. + * However, use the same format as the others, for use by the backends. + * + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; + * the ld/st functions are only defined if HAVE_ATOMIC128, + * as defined by <qemu/atomic128.h>. + */ Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, Int128 cmpv, Int128 newv, TCGMemOpIdx oi, uintptr_t retaddr); @@ -1477,6 +1481,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, TCGMemOpIdx oi, uintptr_t retaddr); -#endif /* CONFIG_ATOMIC128 */ - #endif /* TCG_H */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e4993d72fb..28b770a404 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -32,6 +32,7 @@ #include "exec/log.h" #include "exec/helper-proto.h" #include "qemu/atomic.h" +#include "qemu/atomic128.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1112,7 +1113,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #include "atomic_template.h" #endif -#ifdef CONFIG_ATOMIC128 +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 26a3ffbba1..cd75829cf2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "translate-all.h" #include "exec/helper-proto.h" +#include "qemu/atomic128.h" #undef EAX #undef ECX @@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* The following is only callable from other helpers, and matches up with the softmmu version. */ -#ifdef CONFIG_ATOMIC128 +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 #undef EXTRA_ARGS #undef ATOMIC_NAME @@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 16 #include "atomic_template.h" -#endif /* CONFIG_ATOMIC128 */ +#endif diff --git a/configure b/configure index 9138af37f8..1d267b49cc 100755 --- a/configure +++ b/configure @@ -5154,6 +5154,21 @@ EOF fi fi +cmpxchg128=no +if test "$int128" = yes -a "$atomic128" = no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; +} +EOF + if compile_prog "" "" ; then + cmpxchg128=yes + fi +fi + ######################################### # See if 64-bit atomic operations are supported. # Note that without __atomic builtins, we can only @@ -6663,6 +6678,10 @@ if test "$atomic128" = "yes" ; then echo "CONFIG_ATOMIC128=y" >> $config_host_mak fi +if test "$cmpxchg128" = "yes" ; then + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak +fi + if test "$atomic64" = "yes" ; then echo "CONFIG_ATOMIC64=y" >> $config_host_mak fi

[PULL,v2,12/21] tcg: Split CONFIG_ATOMIC128

Commit Message

Patch