diff mbox series

[RFC,11/20] ia64: Add unconditional mmiowb() to arch_spin_unlock()

Message ID 20190222185026.10973-12-will.deacon@arm.com
State New
Headers show
Series Remove Mysterious Macro Intended to Obscure Weird Behaviours (mmiowb()) | expand

Commit Message

Will Deacon Feb. 22, 2019, 6:50 p.m. UTC
The mmiowb() macro is horribly difficult to use and drivers will continue
to work most of the time if they omit a call when it is required.

Rather than rely on driver authors getting this right, push mmiowb() into
arch_spin_unlock() for ia64. If this is deemed to be a performance issue,
a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide
the barrier in cases where no I/O writes were performned inside the
critical section.

Signed-off-by: Will Deacon <will.deacon@arm.com>

---
 arch/ia64/include/asm/Kbuild     |  1 -
 arch/ia64/include/asm/io.h       |  4 ----
 arch/ia64/include/asm/mmiowb.h   | 12 ++++++++++++
 arch/ia64/include/asm/spinlock.h |  2 ++
 4 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 arch/ia64/include/asm/mmiowb.h

-- 
2.11.0

Comments

Nicholas Piggin Feb. 27, 2019, 4:36 a.m. UTC | #1
Will Deacon's on February 23, 2019 4:50 am:
> The mmiowb() macro is horribly difficult to use and drivers will continue

> to work most of the time if they omit a call when it is required.

> 

> Rather than rely on driver authors getting this right, push mmiowb() into

> arch_spin_unlock() for ia64. If this is deemed to be a performance issue,

> a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide

> the barrier in cases where no I/O writes were performned inside the

> critical section.


mmiowb() was always the wrong approach. IIRC what happened is that an
ia64 platform found that real wmb() semantics were too expensive, so 
they kind of "relaxed" it, breaking everything, and then said drivers
that wanted to unbreak themselves had to add these mmiowb() in.

The right way to go of course would have been to implement wmb()
the way existing drivers expected, and add a faster io_wmb() that
only ordered mmio stores from the CPU added to the few drivers that
the platform cared about.

I think it was argued the wmb() was still technically correct because
the reordering did not happen at the CPU, but somewhere else in the 
interconnect or PCI controller. But that was just a crazy burden to
put on driver writers, and it was why the documentation was always
incomprehensible.

Not sure why Linus ever went along with it, but awesome you're removing
it. Thank you!

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 3273d7aedfa0..43e21fe3499c 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -4,7 +4,6 @@  generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 1e6fef69bb01..7f2371ba04a4 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -119,8 +119,6 @@  extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
  * Ensure ordering of I/O space writes.  This will make sure that writes
  * following the barrier will arrive after all previous writes.  For most
  * ia64 platforms, this is a simple 'mf.a' instruction.
- *
- * See Documentation/driver-api/device-io.rst for more information.
  */
 static inline void ___ia64_mmiowb(void)
 {
@@ -296,7 +294,6 @@  __outsl (unsigned long port, const void *src, unsigned long count)
 #define __outb		platform_outb
 #define __outw		platform_outw
 #define __outl		platform_outl
-#define __mmiowb	platform_mmiowb
 
 #define inb(p)		__inb(p)
 #define inw(p)		__inw(p)
@@ -310,7 +307,6 @@  __outsl (unsigned long port, const void *src, unsigned long count)
 #define outsb(p,s,c)	__outsb(p,s,c)
 #define outsw(p,s,c)	__outsw(p,s,c)
 #define outsl(p,s,c)	__outsl(p,s,c)
-#define mmiowb()	__mmiowb()
 
 /*
  * The address passed to these functions are ioremap()ped already.
diff --git a/arch/ia64/include/asm/mmiowb.h b/arch/ia64/include/asm/mmiowb.h
new file mode 100644
index 000000000000..238d56172c6f
--- /dev/null
+++ b/arch/ia64/include/asm/mmiowb.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_IA64_MMIOWB_H
+#define _ASM_IA64_MMIOWB_H
+
+#include <asm/machvec.h>
+
+#define mmiowb()	platform_mmiowb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* _ASM_IA64_MMIOWB_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index afd0b3121b4c..5f620e66384e 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -73,6 +73,8 @@  static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
 
+	/* This could be optimised with ARCH_HAS_MMIOWB */
+	mmiowb();
 	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
 	WRITE_ONCE(*p, (tmp + 2) & ~1);
 }