diff mbox series

[v2,14/21] crypto: x86 - add some helper macros for ECB and CBC modes

Message ID 20210105164809.8594-15-ardb@kernel.org
State Accepted
Commit 827ee47228a6bfa446ddb81999adf400ae901106
Headers show
Series crypto: x86 - remove glue helper module | expand

Commit Message

Ard Biesheuvel Jan. 5, 2021, 4:48 p.m. UTC
The x86 glue helper module is starting to show its age:
- It relies heavily on function pointers to invoke asm helper functions that
  operate on fixed input sizes that are relatively small. This means the
  performance is severely impacted by retpolines.
- It goes to great lengths to amortize the cost of kernel_fpu_begin()/end()
  over as much work as possible, which is no longer necessary now that FPU
  save/restore is done lazily, and doing so may cause unbounded scheduling
  blackouts due to the fact that enabling the FPU in kernel mode disables
  preemption.
- The CBC mode decryption helper makes backward strides through the input, in
  order to avoid a single block size memcpy() between chunks. Consuming the
  input in this manner is highly likely to defeat any hardware prefetchers,
  so it is better to go through the data linearly, and perform the extra
  memcpy() where needed (which is turned into direct loads and stores by the
  compiler anyway). Note that benchmarks won't show this effect, given that
  the memory they use is always cache hot.
- It implements blockwise XOR in terms of le128 pointers, which imply an
  alignment that is not guaranteed by the API, violating the C standard.

GCC does not seem to be smart enough to elide the indirect calls when the
function pointers are passed as arguments to static inline helper routines
modeled after the existing ones. So instead, let's create some CPP macros
that encapsulate the core of the ECB and CBC processing, so we can wire
them up for existing users of the glue helper module, i.e., Camellia,
Serpent, Twofish and CAST6.

Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/ecb_cbc_helpers.h | 76 ++++++++++++++++++++
 1 file changed, 76 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h
new file mode 100644
index 000000000000..eaa15c7b29d6
--- /dev/null
+++ b/arch/x86/crypto/ecb_cbc_helpers.h
@@ -0,0 +1,76 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRYPTO_ECB_CBC_HELPER_H
+#define _CRYPTO_ECB_CBC_HELPER_H
+
+#include <crypto/internal/skcipher.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Mode helpers to instantiate parameterized skcipher ECB/CBC modes without
+ * having to rely on indirect calls and retpolines.
+ */
+
+#define ECB_WALK_START(req, bsize, fpu_blocks) do {			\
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));	\
+	const int __bsize = (bsize);					\
+	struct skcipher_walk walk;					\
+	int err = skcipher_walk_virt(&walk, (req), false);		\
+	while (walk.nbytes > 0) {					\
+		unsigned int nbytes = walk.nbytes;			\
+		bool do_fpu = (fpu_blocks) != -1 &&			\
+			      nbytes >= (fpu_blocks) * __bsize;		\
+		const u8 *src = walk.src.virt.addr;			\
+		u8 *dst = walk.dst.virt.addr;				\
+		u8 __maybe_unused buf[(bsize)];				\
+		if (do_fpu) kernel_fpu_begin()
+
+#define CBC_WALK_START(req, bsize, fpu_blocks)				\
+	ECB_WALK_START(req, bsize, fpu_blocks)
+
+#define ECB_WALK_ADVANCE(blocks) do {					\
+	dst += (blocks) * __bsize;					\
+	src += (blocks) * __bsize;					\
+	nbytes -= (blocks) * __bsize;					\
+} while (0)
+
+#define ECB_BLOCK(blocks, func) do {					\
+	while (nbytes >= (blocks) * __bsize) {				\
+		(func)(ctx, dst, src);					\
+		ECB_WALK_ADVANCE(blocks);				\
+	}								\
+} while (0)
+
+#define CBC_ENC_BLOCK(func) do {					\
+	const u8 *__iv = walk.iv;					\
+	while (nbytes >= __bsize) {					\
+		crypto_xor_cpy(dst, src, __iv, __bsize);		\
+		(func)(ctx, dst, dst);					\
+		__iv = dst;						\
+		ECB_WALK_ADVANCE(1);					\
+	}								\
+	memcpy(walk.iv, __iv, __bsize);					\
+} while (0)
+
+#define CBC_DEC_BLOCK(blocks, func) do {				\
+	while (nbytes >= (blocks) * __bsize) {				\
+		const u8 *__iv = src + ((blocks) - 1) * __bsize;	\
+		if (dst == src)						\
+			__iv = memcpy(buf, __iv, __bsize);		\
+		(func)(ctx, dst, src);					\
+		crypto_xor(dst, walk.iv, __bsize);			\
+		memcpy(walk.iv, __iv, __bsize);				\
+		ECB_WALK_ADVANCE(blocks);				\
+	}								\
+} while (0)
+
+#define ECB_WALK_END()							\
+		if (do_fpu) kernel_fpu_end();				\
+		err = skcipher_walk_done(&walk, nbytes);		\
+	}								\
+	return err;							\
+} while (0)
+
+#define CBC_WALK_END() ECB_WALK_END()
+
+#endif