Message ID | 20230515080554.248739380@infradead.org |
---|---|
State | New |
Headers | show |
Series | Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() | expand |
On Mon, May 15, 2023 at 09:57:04AM +0200, Peter Zijlstra wrote: > In order to replace cmpxchg_double() with the newly minted > cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg(). > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > --- a/arch/x86/include/asm/percpu.h > +++ b/arch/x86/include/asm/percpu.h > @@ -210,6 +210,65 @@ do { \ > (typeof(_var))(unsigned long) pco_old__; \ > }) > > +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64) > +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ > +({ \ > + union { \ > + u64 var; \ > + struct { \ > + u32 low, high; \ > + }; \ > + } old__, new__; \ > + \ > + old__.var = _oval; \ > + new__.var = _nval; \ > + \ > + asm qual ("cmpxchg8b " __percpu_arg([var]) \ > + : [var] "+m" (_var), \ > + "+a" (old__.low), \ > + "+d" (old__.high) \ > + : "b" (new__.low), \ > + "c" (new__.high) \ > + : "memory"); \ > + \ > + old__.var; \ > +}) > + > +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) > +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) > +#endif > + > +#ifdef CONFIG_X86_64 > +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); > +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); > + > +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ > +({ \ > + union { \ > + u128 var; \ > + struct { \ > + u64 low, high; \ > + }; \ > + } old__, new__; \ > + \ > + old__.var = _oval; \ > + new__.var = _nval; \ > + \ > + asm qual ("cmpxchg16b " __percpu_arg([var]) \ > + : [var] "+m" (_var), \ > + "+a" (old__.low), \ > + "+d" (old__.high) \ > + : "b" (new__.low), \ > + "c" (new__.high) \ > + : "memory"); \ > + \ > + old__.var; \ > +}) > + > +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) > +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) > +#endif > + > /* > * this_cpu_read() makes gcc load the percpu variable every time it is > * accessed while this_cpu_read_stable() allows the value to be cached. Since this_cpu_cmpxchg*() is assumed to always be present (their usage is not guarded with system_has_cmpxchg*() it needs a fallback for then the instruction is not present. The below has been tested with clearcpuid=cx16; adding an obvious defect in the fallback implemention crashes the kernel, with the code as presented it boots. Build tested i386-defconfig. (the things we do for museum pieces :/) --- include/asm/percpu.h | 25 +++++++++++--------- lib/Makefile | 3 +- lib/cmpxchg16b_emu.S | 43 ++++++++++++++++++++-------------- lib/cmpxchg8b_emu.S | 63 +++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 90 insertions(+), 44 deletions(-) --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,7 +210,7 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64) +#ifdef CONFIG_X86_32 #define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ ({ \ union { \ @@ -223,13 +223,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual ("cmpxchg8b " __percpu_arg([var]) \ + asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ "c" (new__.high) \ - : "memory"); \ + : "memory", "esi"); \ \ old__.var; \ }) @@ -254,13 +255,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual ("cmpxchg16b " __percpu_arg([var]) \ + asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ "c" (new__.high) \ - : "memory"); \ + : "memory", "rsi"); \ \ old__.var; \ }) @@ -400,12 +402,13 @@ do { \ bool __ret; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ - X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ - "+m" (pcp2), "+d" (__o2)), \ - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ + asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \ + "setz %0" \ + : "=a" (__ret), "+m" (pcp1) \ + : "b" (__n1), "c" (__n2), \ + "a" (__o1), "d" (__o2) \ + : "memory", "rsi"); \ __ret; \ }) --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y) lib-y += strstr_32.o lib-y += string_32.o lib-y += memmove_32.o + lib-y += cmpxchg8b_emu.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o atomic64_386_32.o + lib-y += atomic64_386_32.o endif else obj-y += iomap_copy_64.o --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -1,47 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <asm/percpu.h> +#include <asm/processor-flags.h> .text /* + * Emulate 'cmpxchg16b %gs:(%rsi)' + * * Inputs: * %rsi : memory location to compare * %rax : low 64 bits of old value * %rdx : high 64 bits of old value * %rbx : low 64 bits of new value * %rcx : high 64 bits of new value - * %al : Operation successful + * + * Notably this is not LOCK prefixed and is not safe against NMIs */ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) -# -# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not -# via the ZF. Caller will access %al to get result. -# -# Note that this is only useful for a cpuops operation. Meaning that we -# do *not* have a fully atomic operation but just an operation that is -# *atomic* on a single cpu (as provided by the this_cpu_xx class of -# macros). -# pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax - jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx - jne .Lnot_same + /* if (*ptr == old) */ + cmpq PER_CPU_VAR(0(%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same + + /* *ptr = new */ + movq %rbx, PER_CPU_VAR(0(%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + /* set ZF in EFLAGS to indicate success */ + orl $X86_EFLAGS_ZF, (%rsp) popfq - mov $1, %al RET .Lnot_same: + /* *ptr != old */ + + /* old = *ptr */ + movq PER_CPU_VAR(0(%rsi)), %rax + movq PER_CPU_VAR(8(%rsi)), %rdx + + /* clear ZF in EFLAGS to indicate failure */ + andl $(~X86_EFLAGS_ZF), (%rsp) + popfq - xor %al,%al RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -2,10 +2,16 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> .text +#ifndef CONFIG_X86_CMPXCHG64 + /* + * Emulate 'cmpxchg8b (%esi)' on UP + * * Inputs: * %esi : memory location to compare * %eax : low 32 bits of old value @@ -15,32 +21,61 @@ */ SYM_FUNC_START(cmpxchg8b_emu) -# -# Emulate 'cmpxchg8b (%esi)' on UP except we don't -# set the whole ZF thing (caller will just compare -# eax:edx with the expected value) -# pushfl cli - cmpl (%esi), %eax - jne .Lnot_same - cmpl 4(%esi), %edx - jne .Lhalf_same + cmpl 0(%esi), %eax + jne .Lnot_same + cmpl 4(%esi), %edx + jne .Lnot_same + + movl %ebx, 0(%esi) + movl %ecx, 4(%esi) - movl %ebx, (%esi) - movl %ecx, 4(%esi) + orl $X86_EFLAGS_ZF, (%esp) popfl RET .Lnot_same: - movl (%esi), %eax -.Lhalf_same: - movl 4(%esi), %edx + movl 0(%esi), %eax + movl 4(%esi), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) popfl RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) + +#endif + +SYM_FUNC_START(this_cpu_cmpxchg8b_emu) + + pushfl + cli + + cmpl PER_CPU_VAR(0(%esi)), %eax + jne .Lnot_same2 + cmpl PER_CPU_VAR(4(%esi)), %edx + jne .Lnot_same2 + + movl %ebx, PER_CPU_VAR(0(%esi)) + movl %ecx, PER_CPU_VAR(4(%esi)) + + orl $X86_EFLAGS_ZF, (%esp) + + popfl + RET + +.Lnot_same2: + movl PER_CPU_VAR(0(%esi)), %eax + movl PER_CPU_VAR(4(%esi)), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) + + popfl + RET + +SYM_FUNC_END(this_cpu_cmpxchg8b_emu)
On Thu, 25 May 2023 14:49:55 +0200 Peter Zijlstra <peterz@infradead.org> wrote: >[...] > Build tested i386-defconfig. > > (the things we do for museum pieces :/) I have always thought it's not done for museum pieces but rather for dumb emulators. I can vaguely recall a reverse-engineered MIPS firmware which was able to run i386 binaries, but not i586... Petr T
--- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -140,6 +140,10 @@ PERCPU_RET_OP(add, add, ldadd) * re-enabling preemption for preemptible kernels, but doing that in a way * which builds inside a module would mean messing directly with the preempt * count. If you do this, peterz and tglx will hunt you down. + * + * Not to mention it'll break the actual preemption model for missing a + * preemption point when TIF_NEED_RESCHED gets set while preemption is + * disabled. */ #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ ({ \ @@ -240,6 +244,22 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, o, n) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = o; \ + new__ = n; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #ifdef __KVM_NVHE_HYPERVISOR__ extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); #define __per_cpu_offset --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -148,6 +148,22 @@ #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = oval; \ + new__ = nval; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #define arch_this_cpu_xchg(pcp, nval) \ ({ \ typeof(pcp) *ptr__; \ --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,6 +210,65 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64) +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual ("cmpxchg8b " __percpu_arg([var]) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) +#endif + +#ifdef CONFIG_X86_64 +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); + +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual ("cmpxchg16b " __percpu_arg([var]) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) +#endif + /* * this_cpu_read() makes gcc load the percpu variable every time it is * accessed while this_cpu_read_stable() allows the value to be cached. --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -298,6 +298,14 @@ do { \ #define raw_cpu_cmpxchg_8(pcp, oval, nval) \ raw_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef raw_cpu_cmpxchg64 +#define raw_cpu_cmpxchg64(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef raw_cpu_cmpxchg128 +#define raw_cpu_cmpxchg128(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif #ifndef raw_cpu_cmpxchg_double_1 #define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ @@ -423,6 +431,14 @@ do { \ #define this_cpu_cmpxchg_8(pcp, oval, nval) \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef this_cpu_cmpxchg64 +#define this_cpu_cmpxchg64(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef this_cpu_cmpxchg128 +#define this_cpu_cmpxchg128(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif #ifndef this_cpu_cmpxchg_double_1 #define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
In order to replace cmpxchg_double() with the newly minted cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg(). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- arch/arm64/include/asm/percpu.h | 20 +++++++++++++ arch/s390/include/asm/percpu.h | 16 ++++++++++ arch/x86/include/asm/percpu.h | 59 ++++++++++++++++++++++++++++++++++++++++ include/asm-generic/percpu.h | 16 ++++++++++ 4 files changed, 111 insertions(+)