diff mbox series

[RFC/RFT] crypto: aes/xts - implement support for ciphertext stealing

Message ID 20190816101021.7837-1-ard.biesheuvel@linaro.org
State New
Headers show
Series [RFC/RFT] crypto: aes/xts - implement support for ciphertext stealing | expand

Commit Message

Ard Biesheuvel Aug. 16, 2019, 10:10 a.m. UTC
Align the x86 code with the generic XTS template, which now supports
ciphertext stealing as described by the IEEE XTS-AES spec P1619.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

---
 arch/x86/crypto/aesni-intel_glue.c         |  1 +
 arch/x86/crypto/camellia_aesni_avx2_glue.c |  1 +
 arch/x86/crypto/camellia_aesni_avx_glue.c  |  1 +
 arch/x86/crypto/cast6_avx_glue.c           |  1 +
 arch/x86/crypto/glue_helper.c              | 70 +++++++++++++++++++++-
 arch/x86/crypto/serpent_avx2_glue.c        |  1 +
 arch/x86/crypto/serpent_avx_glue.c         |  1 +
 arch/x86/crypto/twofish_avx_glue.c         |  1 +
 arch/x86/include/asm/crypto/glue_helper.h  |  1 +
 9 files changed, 76 insertions(+), 2 deletions(-)

-- 
2.17.1

Comments

Ard Biesheuvel Aug. 16, 2019, 10:11 a.m. UTC | #1
On Fri, 16 Aug 2019 at 13:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>

> Align the x86 code with the generic XTS template, which now supports

> ciphertext stealing as described by the IEEE XTS-AES spec P1619.

>

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


Oops, $SUBJECT should be x86/xts rather than aes/xts

> ---

>  arch/x86/crypto/aesni-intel_glue.c         |  1 +

>  arch/x86/crypto/camellia_aesni_avx2_glue.c |  1 +

>  arch/x86/crypto/camellia_aesni_avx_glue.c  |  1 +

>  arch/x86/crypto/cast6_avx_glue.c           |  1 +

>  arch/x86/crypto/glue_helper.c              | 70 +++++++++++++++++++++-

>  arch/x86/crypto/serpent_avx2_glue.c        |  1 +

>  arch/x86/crypto/serpent_avx_glue.c         |  1 +

>  arch/x86/crypto/twofish_avx_glue.c         |  1 +

>  arch/x86/include/asm/crypto/glue_helper.h  |  1 +

>  9 files changed, 76 insertions(+), 2 deletions(-)

>

> diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c

> index ef165d8cf443..0ea1517faf09 100644

> --- a/arch/x86/crypto/aesni-intel_glue.c

> +++ b/arch/x86/crypto/aesni-intel_glue.c

> @@ -591,6 +591,7 @@ static const struct common_glue_ctx aesni_enc_xts = {

>  static const struct common_glue_ctx aesni_dec_xts = {

>         .num_funcs = 2,

>         .fpu_blocks_limit = 1,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = 8,

> diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c

> index abf298c272dc..7854378e6d0c 100644

> --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c

> +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c

> @@ -129,6 +129,7 @@ static const struct common_glue_ctx camellia_dec_cbc = {

>  static const struct common_glue_ctx camellia_dec_xts = {

>         .num_funcs = 3,

>         .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,

> diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c

> index 0c22d84750a3..58e8f08819b6 100644

> --- a/arch/x86/crypto/camellia_aesni_avx_glue.c

> +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c

> @@ -136,6 +136,7 @@ static const struct common_glue_ctx camellia_dec_cbc = {

>  static const struct common_glue_ctx camellia_dec_xts = {

>         .num_funcs = 2,

>         .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,

> diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c

> index 645f8f16815c..dfdbdeff5a2b 100644

> --- a/arch/x86/crypto/cast6_avx_glue.c

> +++ b/arch/x86/crypto/cast6_avx_glue.c

> @@ -132,6 +132,7 @@ static const struct common_glue_ctx cast6_dec_cbc = {

>  static const struct common_glue_ctx cast6_dec_xts = {

>         .num_funcs = 2,

>         .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = CAST6_PARALLEL_BLOCKS,

> diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c

> index 901551445387..693541d00c0a 100644

> --- a/arch/x86/crypto/glue_helper.c

> +++ b/arch/x86/crypto/glue_helper.c

> @@ -14,6 +14,7 @@

>  #include <crypto/b128ops.h>

>  #include <crypto/gf128mul.h>

>  #include <crypto/internal/skcipher.h>

> +#include <crypto/scatterwalk.h>

>  #include <crypto/xts.h>

>  #include <asm/crypto/glue_helper.h>

>

> @@ -261,15 +262,34 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,

>                         common_glue_func_t tweak_fn, void *tweak_ctx,

>                         void *crypt_ctx)

>  {

> +       const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);

>         const unsigned int bsize = 128 / 8;

> +       struct skcipher_request subreq;

>         struct skcipher_walk walk;

>         bool fpu_enabled = false;

> -       unsigned int nbytes;

> +       unsigned int nbytes, tail;

>         int err;

>

> +       if (req->cryptlen < XTS_BLOCK_SIZE)

> +               return -EINVAL;

> +

> +       if (unlikely(cts)) {

> +               struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);

> +

> +               tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;

> +

> +               skcipher_request_set_tfm(&subreq, tfm);

> +               skcipher_request_set_callback(&subreq,

> +                                             crypto_skcipher_get_flags(tfm),

> +                                             NULL, NULL);

> +               skcipher_request_set_crypt(&subreq, req->src, req->dst,

> +                                          req->cryptlen - tail, req->iv);

> +               req = &subreq;

> +       }

> +

>         err = skcipher_walk_virt(&walk, req, false);

>         nbytes = walk.nbytes;

> -       if (!nbytes)

> +       if (err)

>                 return err;

>

>         /* set minimum length to bsize, for tweak_fn */

> @@ -287,6 +307,52 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,

>                 nbytes = walk.nbytes;

>         }

>

> +       if (unlikely(cts)) {

> +               struct scatterlist *src, *dst;

> +               struct scatterlist s[2], d[2];

> +               le128 final_tweak;

> +               u8 *next_tweak;

> +               le128 b[2];

> +

> +               dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);

> +               if (req->dst != req->src)

> +                       dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);

> +

> +               if (gctx->xts_decrypt) {

> +                       final_tweak = *(le128 *)req->iv;

> +                       next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);

> +                       gf128mul_x_ble(b, b);

> +               } else {

> +                       next_tweak = req->iv;

> +               }

> +

> +               skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,

> +                                          next_tweak);

> +

> +               err = skcipher_walk_virt(&walk, req, false) ?:

> +                     skcipher_walk_done(&walk,

> +                               __glue_xts_req_128bit(gctx, crypt_ctx, &walk));

> +               if (err)

> +                       goto out;

> +

> +               scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);

> +               memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);

> +               scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,

> +                                        tail - XTS_BLOCK_SIZE, 0);

> +               scatterwalk_map_and_copy(b, dst, 0, tail, 1);

> +

> +               skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,

> +                                          gctx->xts_decrypt ? (u8 *)&final_tweak

> +                                                            : req->iv);

> +

> +               err = skcipher_walk_virt(&walk, req, false) ?:

> +                     skcipher_walk_done(&walk,

> +                               __glue_xts_req_128bit(gctx, crypt_ctx, &walk));

> +               if (err)

> +                       goto out;

> +       }

> +

> +out:

>         glue_fpu_end(fpu_enabled);

>

>         return err;

> diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c

> index b871728e0b2f..b7b82bf69a67 100644

> --- a/arch/x86/crypto/serpent_avx2_glue.c

> +++ b/arch/x86/crypto/serpent_avx2_glue.c

> @@ -121,6 +121,7 @@ static const struct common_glue_ctx serpent_dec_cbc = {

>  static const struct common_glue_ctx serpent_dec_xts = {

>         .num_funcs = 3,

>         .fpu_blocks_limit = 8,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = 16,

> diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c

> index 4a9a9f2ee1d8..c46d722ecc12 100644

> --- a/arch/x86/crypto/serpent_avx_glue.c

> +++ b/arch/x86/crypto/serpent_avx_glue.c

> @@ -164,6 +164,7 @@ static const struct common_glue_ctx serpent_dec_cbc = {

>  static const struct common_glue_ctx serpent_dec_xts = {

>         .num_funcs = 2,

>         .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = SERPENT_PARALLEL_BLOCKS,

> diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c

> index 0dbf8e8b09d7..220de42bc3ab 100644

> --- a/arch/x86/crypto/twofish_avx_glue.c

> +++ b/arch/x86/crypto/twofish_avx_glue.c

> @@ -167,6 +167,7 @@ static const struct common_glue_ctx twofish_dec_cbc = {

>  static const struct common_glue_ctx twofish_dec_xts = {

>         .num_funcs = 2,

>         .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,

> +       .xts_decrypt = 1,

>

>         .funcs = { {

>                 .num_blocks = TWOFISH_PARALLEL_BLOCKS,

> diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h

> index d1818634ae7e..31e038759a54 100644

> --- a/arch/x86/include/asm/crypto/glue_helper.h

> +++ b/arch/x86/include/asm/crypto/glue_helper.h

> @@ -36,6 +36,7 @@ struct common_glue_func_entry {

>  struct common_glue_ctx {

>         unsigned int num_funcs;

>         int fpu_blocks_limit; /* -1 means fpu not needed at all */

> +       int xts_decrypt; /* whether this implements XTS decryption */

>

>         /*

>          * First funcs entry must have largest num_blocks and last funcs entry

> --

> 2.17.1

>
Stephan Mueller Aug. 16, 2019, 10:22 a.m. UTC | #2
Am Freitag, 16. August 2019, 12:10:21 CEST schrieb Ard Biesheuvel:

Hi Ard,

> Align the x86 code with the generic XTS template, which now supports

> ciphertext stealing as described by the IEEE XTS-AES spec P1619.


After applying the patch, the boot is successful even with the extra tests.
> 

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


Tested-by: Stephan Mueller <smueller@chronox.de>


Ciao
Stephan
Ard Biesheuvel Aug. 16, 2019, 12:05 p.m. UTC | #3
On Fri, 16 Aug 2019 at 13:22, Stephan Mueller <smueller@chronox.de> wrote:
>

> Am Freitag, 16. August 2019, 12:10:21 CEST schrieb Ard Biesheuvel:

>

> Hi Ard,

>

> > Align the x86 code with the generic XTS template, which now supports

> > ciphertext stealing as described by the IEEE XTS-AES spec P1619.

>

> After applying the patch, the boot is successful even with the extra tests.

> >

> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>

> Tested-by: Stephan Mueller <smueller@chronox.de>

>


Thanks!
diff mbox series

Patch

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ef165d8cf443..0ea1517faf09 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -591,6 +591,7 @@  static const struct common_glue_ctx aesni_enc_xts = {
 static const struct common_glue_ctx aesni_dec_xts = {
 	.num_funcs = 2,
 	.fpu_blocks_limit = 1,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = 8,
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index abf298c272dc..7854378e6d0c 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -129,6 +129,7 @@  static const struct common_glue_ctx camellia_dec_cbc = {
 static const struct common_glue_ctx camellia_dec_xts = {
 	.num_funcs = 3,
 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 0c22d84750a3..58e8f08819b6 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -136,6 +136,7 @@  static const struct common_glue_ctx camellia_dec_cbc = {
 static const struct common_glue_ctx camellia_dec_xts = {
 	.num_funcs = 2,
 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 645f8f16815c..dfdbdeff5a2b 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -132,6 +132,7 @@  static const struct common_glue_ctx cast6_dec_cbc = {
 static const struct common_glue_ctx cast6_dec_xts = {
 	.num_funcs = 2,
 	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = CAST6_PARALLEL_BLOCKS,
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 901551445387..693541d00c0a 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -14,6 +14,7 @@ 
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -261,15 +262,34 @@  int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
 			void *crypt_ctx)
 {
+	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
 	const unsigned int bsize = 128 / 8;
+	struct skcipher_request subreq;
 	struct skcipher_walk walk;
 	bool fpu_enabled = false;
-	unsigned int nbytes;
+	unsigned int nbytes, tail;
 	int err;
 
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(cts)) {
+		struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+		tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      crypto_skcipher_get_flags(tfm),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
 	err = skcipher_walk_virt(&walk, req, false);
 	nbytes = walk.nbytes;
-	if (!nbytes)
+	if (err)
 		return err;
 
 	/* set minimum length to bsize, for tweak_fn */
@@ -287,6 +307,52 @@  int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 		nbytes = walk.nbytes;
 	}
 
+	if (unlikely(cts)) {
+		struct scatterlist *src, *dst;
+		struct scatterlist s[2], d[2];
+		le128 final_tweak;
+		u8 *next_tweak;
+		le128 b[2];
+
+		dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);
+
+		if (gctx->xts_decrypt) {
+			final_tweak = *(le128 *)req->iv;
+			next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);
+			gf128mul_x_ble(b, b);
+		} else {
+			next_tweak = req->iv;
+		}
+
+		skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,
+					   next_tweak);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+		if (err)
+			goto out;
+
+		scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);
+		memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);
+		scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,
+					 tail - XTS_BLOCK_SIZE, 0);
+		scatterwalk_map_and_copy(b, dst, 0, tail, 1);
+
+		skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,
+					   gctx->xts_decrypt ? (u8 *)&final_tweak
+							     : req->iv);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+		if (err)
+			goto out;
+	}
+
+out:
 	glue_fpu_end(fpu_enabled);
 
 	return err;
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index b871728e0b2f..b7b82bf69a67 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -121,6 +121,7 @@  static const struct common_glue_ctx serpent_dec_cbc = {
 static const struct common_glue_ctx serpent_dec_xts = {
 	.num_funcs = 3,
 	.fpu_blocks_limit = 8,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = 16,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 4a9a9f2ee1d8..c46d722ecc12 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -164,6 +164,7 @@  static const struct common_glue_ctx serpent_dec_cbc = {
 static const struct common_glue_ctx serpent_dec_xts = {
 	.num_funcs = 2,
 	.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 0dbf8e8b09d7..220de42bc3ab 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -167,6 +167,7 @@  static const struct common_glue_ctx twofish_dec_cbc = {
 static const struct common_glue_ctx twofish_dec_xts = {
 	.num_funcs = 2,
 	.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+	.xts_decrypt = 1,
 
 	.funcs = { {
 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index d1818634ae7e..31e038759a54 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -36,6 +36,7 @@  struct common_glue_func_entry {
 struct common_glue_ctx {
 	unsigned int num_funcs;
 	int fpu_blocks_limit; /* -1 means fpu not needed at all */
+	int xts_decrypt; /* whether this implements XTS decryption */
 
 	/*
 	 * First funcs entry must have largest num_blocks and last funcs entry