diff mbox series

[v2,01/36] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX

Message ID 20221021071549.2398137-2-richard.henderson@linaro.org
State New
Headers show
Series tcg: Support for Int128 with helpers | expand

Commit Message

Richard Henderson Oct. 21, 2022, 7:15 a.m. UTC
Intel has now given guarantees about the atomicity of SSE read
and write instructions on cpus supporting AVX.  We can use these
instead of the much slower cmpxchg16b.

Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/atomic128.h | 44 ++++++++++++++++++++++++++
 util/atomic128.c         | 67 ++++++++++++++++++++++++++++++++++++++++
 util/meson.build         |  1 +
 3 files changed, 112 insertions(+)
 create mode 100644 util/atomic128.c
diff mbox series

Patch

diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index adb9a1a260..d179c05ede 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -127,6 +127,50 @@  static inline void atomic16_set(Int128 *ptr, Int128 val)
         : [l] "r"(l), [h] "r"(h));
 }
 
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__x86_64__)
+/*
+ * The latest Intel SDM has added:
+ *     Processors that enumerate support for IntelĀ® AVX (by setting
+ *     the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ *     16-byte memory operations performed by the following instructions
+ *     will always be carried out atomically:
+ *      - MOVAPD, MOVAPS, and MOVDQA.
+ *      - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ *      - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ *        with EVEX.128 and k0 (masking disabled).
+ *    Note that these instructions require the linear addresses of their
+ *    memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+extern bool have_atomic128;
+
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    Int128 ret;
+    if (have_atomic128) {
+        asm("vmovdqa %1, %0" : "=x" (ret) : "m" (*ptr));
+    } else {
+        ret = atomic16_cmpxchg(ptr, 0, 0);
+    }
+    return ret;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    if (have_atomic128) {
+        asm("vmovdqa %1, %0" : "=m" (*ptr) : "x" (val));
+    } else {
+        Int128 old = *ptr, cmp;
+        do {
+            cmp = old;
+            old = atomic16_cmpxchg(ptr, cmp, val);
+        } while (old != cmp);
+    }
+}
+
 # define HAVE_ATOMIC128 1
 #elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
 static inline Int128 atomic16_read(Int128 *ptr)
diff --git a/util/atomic128.c b/util/atomic128.c
new file mode 100644
index 0000000000..55863ce9bd
--- /dev/null
+++ b/util/atomic128.c
@@ -0,0 +1,67 @@ 
+/*
+ * Copyright (C) 2022, Linaro Ltd.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic128.h"
+
+#ifdef __x86_64__
+#include "qemu/cpuid.h"
+
+#ifndef signature_INTEL_ecx
+/* "Genu ineI ntel" */
+#define signature_INTEL_ebx     0x756e6547
+#define signature_INTEL_edx     0x49656e69
+#define signature_INTEL_ecx     0x6c65746e
+#endif
+
+/*
+ * The latest Intel SDM has added:
+ *     Processors that enumerate support for IntelĀ® AVX (by setting
+ *     the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ *     16-byte memory operations performed by the following instructions
+ *     will always be carried out atomically:
+ *      - MOVAPD, MOVAPS, and MOVDQA.
+ *      - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ *      - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ *        with EVEX.128 and k0 (masking disabled).
+ *    Note that these instructions require the linear addresses of their
+ *    memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+bool have_atomic128;
+
+static void __attribute__((constructor))
+init_have_atomic128(void)
+{
+    unsigned int a, b, c, d, xcrl, xcrh;
+
+    __cpuid(0, a, b, c, d);
+    if (a < 1) {
+        return; /* AVX leaf not present */
+    }
+    if (c != signature_INTEL_ecx) {
+        return; /* Not an Intel product */
+    }
+
+    __cpuid(1, a, b, c, d);
+    if ((c & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) {
+        return; /* AVX not present or XSAVE not enabled by OS */
+    }
+
+    /*
+     * The xgetbv instruction is not available to older versions of
+     * the assembler, so we encode the instruction manually.
+     */
+    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
+    if ((xcrl & 6) != 6) {
+        return; /* AVX not enabled by OS */
+    }
+
+    have_atomic128 = true;
+}
+#endif /* __x86_64__ */
diff --git a/util/meson.build b/util/meson.build
index 5e282130df..4b29b719a8 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -2,6 +2,7 @@  util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
 if not config_host_data.get('CONFIG_ATOMIC64')
   util_ss.add(files('atomic64.c'))
 endif
+util_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('atomic128.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
 if config_host_data.get('CONFIG_EPOLL_CREATE1')