diff mbox series

[resend,17/18] crypto: arm/aes - avoid expanded lookup tables in the final round

Message ID 20170724102820.16534-18-ard.biesheuvel@linaro.org
State Accepted
Commit 0d149ce67d4409d7ff13c2d08c3474f3319e1b7e
Headers show
Series crypto: ARM/arm64 roundup for v4.14 | expand

Commit Message

Ard Biesheuvel July 24, 2017, 10:28 a.m. UTC
For the final round, avoid the expanded and padded lookup tables
exported by the generic AES driver. Instead, for encryption, we can
perform byte loads from the same table we used for the inner rounds,
which will still be hot in the caches. For decryption, use the inverse
AES Sbox directly, which is 4x smaller than the inverse lookup table
exported by the generic driver.

This should significantly reduce the Dcache footprint of our code,
which makes the code more robust against timing attacks. It does not
introduce any additional module dependencies, given that we already
rely on the core AES module for the shared key expansion routines.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

---
 arch/arm/crypto/aes-cipher-core.S | 88 +++++++++++++++-----
 1 file changed, 65 insertions(+), 23 deletions(-)

-- 
2.9.3
diff mbox series

Patch

diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index c817a86c4ca8..54b384084637 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@ 
  */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 	.text
 	.align		5
@@ -32,19 +33,19 @@ 
 	.endif
 	.endm
 
-	.macro		__load, out, in, idx
+	.macro		__load, out, in, idx, sz, op
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr		\out, [ttab, \in, lsr #(8 * \idx) - 2]
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
 	.else
-	ldr		\out, [ttab, \in, lsl #2]
+	ldr\op		\out, [ttab, \in, lsl #\sz]
 	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
-	__load		\out0, \out0, 0
-	__load		t0, t0, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
 
 	.if		\enc
 	__select	\out1, \in1, 0
@@ -53,10 +54,10 @@ 
 	__select	\out1, \in3, 0
 	__select	t1, \in0, 1
 	.endif
-	__load		\out1, \out1, 0
+	__load		\out1, \out1, 0, \sz, \op
 	__select	t2, \in2, 2
-	__load		t1, t1, 1
-	__load		t2, t2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
 
 	eor		\out0, \out0, t0, ror #24
 
@@ -68,9 +69,9 @@ 
 	__select	\t3, \in1, 2
 	__select	\t4, \in2, 3
 	.endif
-	__load		\t3, \t3, 2
-	__load		t0, t0, 3
-	__load		\t4, \t4, 3
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
 
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@ 
 	eor		\out1, \out1, t2
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
 	.macro		__rev, out, in
@@ -114,7 +115,7 @@ 
 	.endif
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
+	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 
 	ldr		r4, [in]
@@ -146,9 +147,12 @@ 
 
 1:	subs		rounds, rounds, #4
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	__adrl		ttab, \ltab, ls
+	bls		2f
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	bhi		0b
+	b		0b
+
+2:	__adrl		ttab, \ltab
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
@@ -170,10 +174,48 @@ 
 	.ltorg
 	.endm
 
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 
+	.align		5
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)