diff mbox series

[8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helper

Message ID 20240111123302.589910-18-ardb+git@google.com
State New
Headers show
Series crypto: Clean up arm64 AES-CCM code | expand

Commit Message

Ard Biesheuvel Jan. 11, 2024, 12:33 p.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

The C glue code already infers whether or not the current iteration is
the final one, by comparing walk.nbytes with walk.total. This means we
can easily inform the asm helper of this as well, by conditionally
passing a pointer to the original IV, which is used in the finalization
of the MAC. This removes the need for a separate call into the asm code
to perform the finalization.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 32 ++++++++------------
 arch/arm64/crypto/aes-ce-ccm-glue.c | 27 ++++++++---------
 2 files changed, 24 insertions(+), 35 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 75be3157bae1..c0d89f8ae4c4 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -44,28 +44,12 @@ 
 	aese	\vb\().16b, v4.16b
 	.endm
 
-	/*
-	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
-	 * 			 u32 rounds);
-	 */
-SYM_FUNC_START(ce_aes_ccm_final)
-	ld1	{v0.16b}, [x0]			/* load mac */
-	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
-
-	aes_encrypt	v0, v1, w3
-
-	/* final round key cancels out */
-	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
-	st1	{v0.16b}, [x0]			/* store result */
-	ret
-SYM_FUNC_END(ce_aes_ccm_final)
-
 SYM_FUNC_START_LOCAL(aes_ccm_do_crypt)
 	load_round_keys	x3, w4, x10
 
+	ld1	{v0.16b}, [x5]			/* load mac */
 	cbz	x2, 5f
 	ldr	x8, [x6, #8]			/* load lower ctr */
-	ld1	{v0.16b}, [x5]			/* load mac */
 CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 0:	/* outer loop */
 	ld1	{v1.8b}, [x6]			/* load upper ctr */
@@ -89,9 +73,9 @@  CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 
 	bne	0b
 CPU_LE(	rev	x8, x8			)
-	st1	{v0.16b}, [x5]			/* store mac */
 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
-5:	ret
+5:	cbz	x7, 8f
+	b	7f
 
 6:	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
 	add	x0, x0, w2, sxtw		/* rewind the output pointer */
@@ -111,8 +95,16 @@  CPU_LE(	rev	x8, x8			)
 	eor	v0.16b, v0.16b, v22.16b		/* fold plaintext into mac */
 	tbx	v2.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
 
-	st1	{v0.16b}, [x5]			/* store mac */
 	st1	{v2.16b}, [x0]			/* store output block */
+	cbz	x7, 8f				/* time to finalize MAC? */
+7:	ld1	{v1.16b}, [x7]			/* load 1st ctriv */
+
+	aes_encrypt	v0, v1, w4
+
+	/* final round key cancels out */
+	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
+
+8:	st1	{v0.16b}, [x5]			/* store mac */
 	ret
 SYM_FUNC_END(aes_ccm_do_crypt)
 
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index ed3d79e05112..ce9b28e3c7d6 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -38,14 +38,11 @@  asmlinkage u32 ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
 
 asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
 				   u32 const rk[], u32 rounds, u8 mac[],
-				   u8 ctr[]);
+				   u8 ctr[], u8 const final_iv[]);
 
 asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 				   u32 const rk[], u32 rounds, u8 mac[],
-				   u8 ctr[]);
-
-asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
-				 u32 rounds);
+				   u8 ctr[], u8 const final_iv[]);
 
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
@@ -210,9 +207,12 @@  static int ccm_encrypt(struct aead_request *req)
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
 		u8 buf[AES_BLOCK_SIZE];
+		u8 *final_iv = NULL;
 
-		if (walk.nbytes == walk.total)
+		if (walk.nbytes == walk.total) {
 			tail = 0;
+			final_iv = orig_iv;
+		}
 
 		if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
 			src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
@@ -220,14 +220,11 @@  static int ccm_encrypt(struct aead_request *req)
 
 		ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
 				   ctx->key_enc, num_rounds(ctx),
-				   mac, walk.iv);
+				   mac, walk.iv, final_iv);
 
 		if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
 			memcpy(walk.dst.virt.addr, dst, walk.nbytes);
 
-		if (walk.nbytes == walk.total)
-			ce_aes_ccm_final(mac, orig_iv, ctx->key_enc, num_rounds(ctx));
-
 		if (walk.nbytes) {
 			err = skcipher_walk_done(&walk, tail);
 		}
@@ -277,9 +274,12 @@  static int ccm_decrypt(struct aead_request *req)
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
 		u8 buf[AES_BLOCK_SIZE];
+		u8 *final_iv = NULL;
 
-		if (walk.nbytes == walk.total)
+		if (walk.nbytes == walk.total) {
 			tail = 0;
+			final_iv = orig_iv;
+		}
 
 		if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
 			src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
@@ -287,14 +287,11 @@  static int ccm_decrypt(struct aead_request *req)
 
 		ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
 				   ctx->key_enc, num_rounds(ctx),
-				   mac, walk.iv);
+				   mac, walk.iv, final_iv);
 
 		if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
 			memcpy(walk.dst.virt.addr, dst, walk.nbytes);
 
-		if (walk.nbytes == walk.total)
-			ce_aes_ccm_final(mac, orig_iv, ctx->key_enc, num_rounds(ctx));
-
 		if (walk.nbytes) {
 			err = skcipher_walk_done(&walk, tail);
 		}