@@ -2,14 +2,15 @@
generic-y += compat.h
+generic-y += current.h
generic-y += exec.h
generic-y += export.h
generic-y += fb.h
generic-y += irq_work.h
+generic-y += kprobes.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += sections.h
+generic-y += simd.h
generic-y += trace_clock.h
-generic-y += current.h
-generic-y += kprobes.h
@@ -22,6 +22,7 @@ generic-y += parport.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += simd.h
generic-y += topology.h
generic-y += trace_clock.h
generic-y += user.h
new file mode 100644
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+#include <asm/neon.h>
+
+static __must_check inline bool may_use_simd(void)
+{
+ return !in_interrupt();
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+ *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+ if (*ctx & HAVE_SIMD_IN_USE)
+ kernel_neon_end();
+ *ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+ if (!(*ctx & HAVE_FULL_SIMD))
+ return false;
+ if (*ctx & HAVE_SIMD_IN_USE)
+ return true;
+ kernel_neon_begin();
+ *ctx |= HAVE_SIMD_IN_USE;
+ return true;
+}
+
+#else
+
+static __must_check inline bool may_use_simd(void)
+{
+ return false;
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+ *ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+ return false;
+}
+#endif
+
+#endif /* _ASM_SIMD_H */
@@ -1,11 +1,10 @@
-/*
- * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+/* SPDX-License-Identifier: GPL-2.0
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
+#include <linux/simd.h>
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
@@ -16,6 +15,8 @@
#include <linux/types.h>
#ifdef CONFIG_KERNEL_MODE_NEON
+#include <asm/neon.h>
+#include <asm/simd.h>
DECLARE_PER_CPU(bool, kernel_neon_busy);
@@ -40,9 +41,47 @@ static __must_check inline bool may_use_simd(void)
!this_cpu_read(kernel_neon_busy);
}
+static inline void simd_get(simd_context_t *ctx)
+{
+ *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+ if (*ctx & HAVE_SIMD_IN_USE)
+ kernel_neon_end();
+ *ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+ if (!(*ctx & HAVE_FULL_SIMD))
+ return false;
+ if (*ctx & HAVE_SIMD_IN_USE)
+ return true;
+ kernel_neon_begin();
+ *ctx |= HAVE_SIMD_IN_USE;
+ return true;
+}
+
#else /* ! CONFIG_KERNEL_MODE_NEON */
-static __must_check inline bool may_use_simd(void) {
+static __must_check inline bool may_use_simd(void)
+{
+ return false;
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+ *ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
return false;
}
@@ -5,8 +5,8 @@ generic-y += compat.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
-generic-y += dma.h
generic-y += dma-mapping.h
+generic-y += dma.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
@@ -30,6 +30,7 @@ generic-y += pgalloc.h
generic-y += preempt.h
generic-y += segment.h
generic-y += serial.h
+generic-y += simd.h
generic-y += tlbflush.h
generic-y += topology.h
generic-y += trace_clock.h
@@ -8,8 +8,8 @@ generic-y += current.h
generic-y += delay.h
generic-y += device.h
generic-y += div64.h
-generic-y += dma.h
generic-y += dma-mapping.h
+generic-y += dma.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
@@ -39,6 +39,7 @@ generic-y += preempt.h
generic-y += scatterlist.h
generic-y += sections.h
generic-y += serial.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += spinlock.h
generic-y += timex.h
@@ -29,6 +29,7 @@ generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += topology.h
generic-y += trace_clock.h
@@ -4,6 +4,7 @@ generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
+generic-y += simd.h
generic-y += trace_clock.h
generic-y += vtime.h
generic-y += word-at-a-time.h
@@ -19,6 +19,7 @@ generic-y += mm-arch-hooks.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
+generic-y += simd.h
generic-y += spinlock.h
generic-y += topology.h
generic-y += trace_clock.h
@@ -25,6 +25,7 @@ generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += serial.h
+generic-y += simd.h
generic-y += syscalls.h
generic-y += topology.h
generic-y += trace_clock.h
@@ -16,6 +16,7 @@ generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
generic-y += segment.h
+generic-y += simd.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
@@ -7,14 +7,14 @@ generic-y += bug.h
generic-y += bugs.h
generic-y += checksum.h
generic-y += clkdev.h
-generic-y += cmpxchg.h
generic-y += cmpxchg-local.h
+generic-y += cmpxchg.h
generic-y += compat.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
-generic-y += dma.h
generic-y += dma-mapping.h
+generic-y += dma.h
generic-y += emergency-restart.h
generic-y += errno.h
generic-y += exec.h
@@ -46,14 +46,15 @@ generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
generic-y += shmbuf.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += stat.h
generic-y += switch_to.h
generic-y += timex.h
generic-y += topology.h
generic-y += trace_clock.h
-generic-y += xor.h
generic-y += unaligned.h
generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
+generic-y += xor.h
@@ -33,6 +33,7 @@ generic-y += preempt.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+generic-y += simd.h
generic-y += spinlock.h
generic-y += topology.h
generic-y += trace_clock.h
@@ -28,12 +28,13 @@ generic-y += module.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += qspinlock_types.h
-generic-y += qspinlock.h
-generic-y += qrwlock_types.h
generic-y += qrwlock.h
+generic-y += qrwlock_types.h
+generic-y += qspinlock.h
+generic-y += qspinlock_types.h
generic-y += sections.h
generic-y += segment.h
+generic-y += simd.h
generic-y += string.h
generic-y += switch_to.h
generic-y += topology.h
@@ -17,6 +17,7 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += seccomp.h
generic-y += segment.h
+generic-y += simd.h
generic-y += topology.h
generic-y += trace_clock.h
generic-y += user.h
@@ -4,7 +4,8 @@ generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += local64.h
generic-y += mcs_spinlock.h
+generic-y += msi.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += simd.h
generic-y += vtime.h
-generic-y += msi.h
@@ -5,9 +5,9 @@ generic-y += compat.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
-generic-y += dma.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
+generic-y += dma.h
generic-y += emergency-restart.h
generic-y += errno.h
generic-y += exec.h
@@ -46,6 +46,7 @@ generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
generic-y += signal.h
+generic-y += simd.h
generic-y += socket.h
generic-y += sockios.h
generic-y += stat.h
@@ -7,9 +7,9 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += cacheflush.h
generic-y += device.h
+generic-y += div64.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
-generic-y += div64.h
generic-y += emergency-restart.h
generic-y += export.h
generic-y += fb.h
@@ -22,6 +22,7 @@ generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += simd.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += word-at-a-time.h
@@ -16,6 +16,7 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += serial.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += trace_clock.h
generic-y += xor.h
@@ -17,5 +17,6 @@ generic-y += msi.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += serial.h
+generic-y += simd.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
@@ -16,15 +16,16 @@ generic-y += io.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
+generic-y += kprobes.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += simd.h
generic-y += switch_to.h
generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
generic-y += xor.h
-generic-y += kprobes.h
@@ -27,6 +27,7 @@ generic-y += preempt.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += syscalls.h
generic-y += topology.h
@@ -1,4 +1,11 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
#include <asm/fpu/api.h>
@@ -10,3 +17,38 @@ static __must_check inline bool may_use_simd(void)
{
return irq_fpu_usable();
}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+ *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+#else
+ *ctx = HAVE_NO_SIMD;
+#endif
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+ if (*ctx & HAVE_SIMD_IN_USE)
+ kernel_fpu_end();
+#endif
+ *ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+ if (!(*ctx & HAVE_FULL_SIMD))
+ return false;
+ if (*ctx & HAVE_SIMD_IN_USE)
+ return true;
+ kernel_fpu_begin();
+ *ctx |= HAVE_SIMD_IN_USE;
+ return true;
+#else
+ return false;
+#endif
+}
+
+#endif /* _ASM_SIMD_H */
@@ -24,6 +24,7 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += sections.h
+generic-y += simd.h
generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
@@ -1,5 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
+
#include <linux/hardirq.h>
/*
@@ -13,3 +17,19 @@ static __must_check inline bool may_use_simd(void)
{
return !in_interrupt();
}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+ *ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+ return false;
+}
+
+#endif /* _ASM_SIMD_H */
new file mode 100644
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _SIMD_H
+#define _SIMD_H
+
+typedef enum {
+ HAVE_NO_SIMD = 1 << 0,
+ HAVE_FULL_SIMD = 1 << 1,
+ HAVE_SIMD_IN_USE = 1 << 31
+} simd_context_t;
+
+#include <linux/sched.h>
+#include <asm/simd.h>
+
+static inline void simd_relax(simd_context_t *ctx)
+{
+#ifdef CONFIG_PREEMPT
+ if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
+ simd_put(ctx);
+ simd_get(ctx);
+ }
+#endif
+}
+
+#endif /* _SIMD_H */
Sometimes it's useful to amortize calls to XSAVE/XRSTOR and the related FPU/SIMD functions over a number of calls, because FPU restoration is quite expensive. This adds a simple header for carrying out this pattern: simd_context_t simd_context; simd_get(&simd_context); while ((item = get_item_from_queue()) != NULL) { encrypt_item(item, simd_context); simd_relax(&simd_context); } simd_put(&simd_context); The relaxation step ensures that we don't trample over preemption, and the get/put API should be a familiar paradigm in the kernel. On the other end, code that actually wants to use SIMD instructions can accept this as a parameter and check it via: void encrypt_item(struct item *item, simd_context_t *simd_context) { if (item->len > LARGE_FOR_SIMD && simd_use(simd_context)) wild_simd_code(item); else boring_scalar_code(item); } The actual XSAVE happens during simd_use (and only on the first time), so that if the context is never actually used, no performance penalty is hit. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Cc: Samuel Neves <sneves@dei.uc.pt> Cc: Andy Lutomirski <luto@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/Kbuild | 5 ++- arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/simd.h | 63 ++++++++++++++++++++++++++++++ arch/arm64/include/asm/simd.h | 51 +++++++++++++++++++++--- arch/c6x/include/asm/Kbuild | 3 +- arch/h8300/include/asm/Kbuild | 3 +- arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/nds32/include/asm/Kbuild | 7 ++-- arch/nios2/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 7 ++-- arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 3 +- arch/riscv/include/asm/Kbuild | 3 +- arch/s390/include/asm/Kbuild | 3 +- arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 3 +- arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/simd.h | 44 ++++++++++++++++++++- arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/simd.h | 20 ++++++++++ include/linux/simd.h | 28 +++++++++++++ 26 files changed, 234 insertions(+), 21 deletions(-) create mode 100644 arch/arm/include/asm/simd.h create mode 100644 include/linux/simd.h -- 2.19.0