diff mbox series

[v2,3/5] armv8 SHA-1 using ARMv8 Crypto Extensions:

Message ID 1654107991-598-4-git-send-email-loic.poulain@linaro.org
State Accepted
Commit 084d8e6bf9ea6673e94f798c5c3793893eb783ab
Headers show
Series Add ARMv8 CE sha1/sha256 support | expand

Commit Message

Loic Poulain June 1, 2022, 6:26 p.m. UTC
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs
that have support for the SHA-1 part of the ARM v8 Crypto Extensions.

It greatly improves sha-1 based operations, about 10x faster on iMX8M
evk board. ~12ms vs ~165ms for a 20MiB kernel sha-1 verification.

asm implementation is a simplified version of the Linux version (from
Ard Biesheuvel).

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
---
 arch/arm/cpu/armv8/Kconfig        |  11 ++++
 arch/arm/cpu/armv8/Makefile       |   1 +
 arch/arm/cpu/armv8/sha1_ce_core.S | 132 ++++++++++++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/sha1_ce_glue.c |  21 ++++++
 4 files changed, 165 insertions(+)
 create mode 100644 arch/arm/cpu/armv8/sha1_ce_core.S
 create mode 100644 arch/arm/cpu/armv8/sha1_ce_glue.c

Comments

Tom Rini June 27, 2022, 9:31 p.m. UTC | #1
On Wed, Jun 01, 2022 at 08:26:29PM +0200, Loic Poulain wrote:

> This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs
> that have support for the SHA-1 part of the ARM v8 Crypto Extensions.
> 
> It greatly improves sha-1 based operations, about 10x faster on iMX8M
> evk board. ~12ms vs ~165ms for a 20MiB kernel sha-1 verification.
> 
> asm implementation is a simplified version of the Linux version (from
> Ard Biesheuvel).
> 
> Signed-off-by: Loic Poulain <loic.poulain@linaro.org>

Applied to u-boot/next, thanks!
diff mbox series

Patch

diff --git a/arch/arm/cpu/armv8/Kconfig b/arch/arm/cpu/armv8/Kconfig
index 9967376..0b11ca8 100644
--- a/arch/arm/cpu/armv8/Kconfig
+++ b/arch/arm/cpu/armv8/Kconfig
@@ -171,4 +171,15 @@  config ARMV8_SECURE_BASE
 
 endif
 
+menuconfig ARMV8_CRYPTO
+	bool "ARM64 Accelerated Cryptographic Algorithms"
+
+if ARMV8_CRYPTO
+
+config ARMV8_CE_SHA1
+	bool "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+	default y if SHA1
+
+endif
+
 endif
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
index 85fe047..ff2495c 100644
--- a/arch/arm/cpu/armv8/Makefile
+++ b/arch/arm/cpu/armv8/Makefile
@@ -44,3 +44,4 @@  obj-$(CONFIG_TARGET_HIKEY) += hisilicon/
 obj-$(CONFIG_ARMV8_PSCI) += psci.o
 obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
 obj-$(CONFIG_XEN) += xen/
+obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
diff --git a/arch/arm/cpu/armv8/sha1_ce_core.S b/arch/arm/cpu/armv8/sha1_ce_core.S
new file mode 100644
index 0000000..fbf2714
--- /dev/null
+++ b/arch/arm/cpu/armv8/sha1_ce_core.S
@@ -0,0 +1,132 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * sha1_ce_core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+#include <asm/system.h>
+#include <asm/macro.h>
+
+	.text
+	.arch		armv8-a+crypto
+
+	k0		.req	v0
+	k1		.req	v1
+	k2		.req	v2
+	k3		.req	v3
+
+	t0		.req	v4
+	t1		.req	v5
+
+	dga		.req	q6
+	dgav		.req	v6
+	dgb		.req	s7
+	dgbv		.req	v7
+
+	dg0q		.req	q12
+	dg0s		.req	s12
+	dg0v		.req	v12
+	dg1s		.req	s13
+	dg1v		.req	v13
+	dg2s		.req	s14
+
+	.macro		add_only, op, ev, rc, s0, dg1
+	.ifc		\ev, ev
+	add		t1.4s, v\s0\().4s, \rc\().4s
+	sha1h		dg2s, dg0s
+	.ifnb		\dg1
+	sha1\op		dg0q, \dg1, t0.4s
+	.else
+	sha1\op		dg0q, dg1s, t0.4s
+	.endif
+	.else
+	.ifnb		\s0
+	add		t0.4s, v\s0\().4s, \rc\().4s
+	.endif
+	sha1h		dg1s, dg0s
+	sha1\op		dg0q, dg2s, t1.4s
+	.endif
+	.endm
+
+	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
+	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
+	add_only	\op, \ev, \rc, \s1, \dg1
+	sha1su1		v\s0\().4s, v\s3\().4s
+	.endm
+
+	.macro		loadrc, k, val, tmp
+	movz		\tmp, :abs_g0_nc:\val
+	movk		\tmp, :abs_g1:\val
+	dup		\k, \tmp
+	.endm
+
+	/*
+	 * void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+	 * 			      uint32_t blocks)
+	 */
+ENTRY(sha1_armv8_ce_process)
+	/* load round constants */
+	loadrc		k0.4s, 0x5a827999, w6
+	loadrc		k1.4s, 0x6ed9eba1, w6
+	loadrc		k2.4s, 0x8f1bbcdc, w6
+	loadrc		k3.4s, 0xca62c1d6, w6
+
+	/* load state (4+1 digest states) */
+	ld1		{dgav.4s}, [x0]
+	ldr		dgb, [x0, #16]
+
+	/* load input (64 bytes into v8->v11 16B vectors) */
+0:	ld1		{v8.4s-v11.4s}, [x1], #64
+	sub		w2, w2, #1
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	rev32		v8.16b, v8.16b
+	rev32		v9.16b, v9.16b
+	rev32		v10.16b, v10.16b
+	rev32		v11.16b, v11.16b
+#endif
+
+1:	add		t0.4s, v8.4s, k0.4s
+	mov		dg0v.16b, dgav.16b
+
+	add_update	c, ev, k0,  8,  9, 10, 11, dgb
+	add_update	c, od, k0,  9, 10, 11,  8
+	add_update	c, ev, k0, 10, 11,  8,  9
+	add_update	c, od, k0, 11,  8,  9, 10
+	add_update	c, ev, k1,  8,  9, 10, 11
+
+	add_update	p, od, k1,  9, 10, 11,  8
+	add_update	p, ev, k1, 10, 11,  8,  9
+	add_update	p, od, k1, 11,  8,  9, 10
+	add_update	p, ev, k1,  8,  9, 10, 11
+	add_update	p, od, k2,  9, 10, 11,  8
+
+	add_update	m, ev, k2, 10, 11,  8,  9
+	add_update	m, od, k2, 11,  8,  9, 10
+	add_update	m, ev, k2,  8,  9, 10, 11
+	add_update	m, od, k2,  9, 10, 11,  8
+	add_update	m, ev, k3, 10, 11,  8,  9
+
+	add_update	p, od, k3, 11,  8,  9, 10
+	add_only	p, ev, k3,  9
+	add_only	p, od, k3, 10
+	add_only	p, ev, k3, 11
+	add_only	p, od
+
+	/* update state */
+	add		dgbv.2s, dgbv.2s, dg1v.2s
+	add		dgav.4s, dgav.4s, dg0v.4s
+
+	/* loop on next block? */
+	cbz		w2, 2f
+	b		0b
+
+	/* store new state */
+2:	st1		{dgav.4s}, [x0]
+	str		dgb, [x0, #16]
+	mov		w0, w2
+	ret
+ENDPROC(sha1_armv8_ce_process)
diff --git a/arch/arm/cpu/armv8/sha1_ce_glue.c b/arch/arm/cpu/armv8/sha1_ce_glue.c
new file mode 100644
index 0000000..780b119
--- /dev/null
+++ b/arch/arm/cpu/armv8/sha1_ce_glue.c
@@ -0,0 +1,21 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sha1_ce_glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <common.h>
+#include <u-boot/sha1.h>
+
+extern void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+				  uint32_t blocks);
+
+void sha1_process(sha1_context *ctx, const unsigned char *data,
+		  unsigned int blocks)
+{
+	if (!blocks)
+		return;
+
+	sha1_armv8_ce_process(ctx->state, data, blocks);
+}