Message ID | 20190816101021.7837-1-ard.biesheuvel@linaro.org |
---|---|
State | New |
Headers | show |
Series | [RFC/RFT] crypto: aes/xts - implement support for ciphertext stealing | expand |
On Fri, 16 Aug 2019 at 13:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > > Align the x86 code with the generic XTS template, which now supports > ciphertext stealing as described by the IEEE XTS-AES spec P1619. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Oops, $SUBJECT should be x86/xts rather than aes/xts > --- > arch/x86/crypto/aesni-intel_glue.c | 1 + > arch/x86/crypto/camellia_aesni_avx2_glue.c | 1 + > arch/x86/crypto/camellia_aesni_avx_glue.c | 1 + > arch/x86/crypto/cast6_avx_glue.c | 1 + > arch/x86/crypto/glue_helper.c | 70 +++++++++++++++++++++- > arch/x86/crypto/serpent_avx2_glue.c | 1 + > arch/x86/crypto/serpent_avx_glue.c | 1 + > arch/x86/crypto/twofish_avx_glue.c | 1 + > arch/x86/include/asm/crypto/glue_helper.h | 1 + > 9 files changed, 76 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c > index ef165d8cf443..0ea1517faf09 100644 > --- a/arch/x86/crypto/aesni-intel_glue.c > +++ b/arch/x86/crypto/aesni-intel_glue.c > @@ -591,6 +591,7 @@ static const struct common_glue_ctx aesni_enc_xts = { > static const struct common_glue_ctx aesni_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = 1, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = 8, > diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c > index abf298c272dc..7854378e6d0c 100644 > --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c > +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c > @@ -129,6 +129,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { > static const struct common_glue_ctx camellia_dec_xts = { > .num_funcs = 3, > .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c > index 0c22d84750a3..58e8f08819b6 100644 > --- a/arch/x86/crypto/camellia_aesni_avx_glue.c > +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c > @@ -136,6 +136,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { > static const struct common_glue_ctx camellia_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c > index 645f8f16815c..dfdbdeff5a2b 100644 > --- a/arch/x86/crypto/cast6_avx_glue.c > +++ b/arch/x86/crypto/cast6_avx_glue.c > @@ -132,6 +132,7 @@ static const struct common_glue_ctx cast6_dec_cbc = { > static const struct common_glue_ctx cast6_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAST6_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c > index 901551445387..693541d00c0a 100644 > --- a/arch/x86/crypto/glue_helper.c > +++ b/arch/x86/crypto/glue_helper.c > @@ -14,6 +14,7 @@ > #include <crypto/b128ops.h> > #include <crypto/gf128mul.h> > #include <crypto/internal/skcipher.h> > +#include <crypto/scatterwalk.h> > #include <crypto/xts.h> > #include <asm/crypto/glue_helper.h> > > @@ -261,15 +262,34 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, > common_glue_func_t tweak_fn, void *tweak_ctx, > void *crypt_ctx) > { > + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); > const unsigned int bsize = 128 / 8; > + struct skcipher_request subreq; > struct skcipher_walk walk; > bool fpu_enabled = false; > - unsigned int nbytes; > + unsigned int nbytes, tail; > int err; > > + if (req->cryptlen < XTS_BLOCK_SIZE) > + return -EINVAL; > + > + if (unlikely(cts)) { > + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > + > + tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; > + > + skcipher_request_set_tfm(&subreq, tfm); > + skcipher_request_set_callback(&subreq, > + crypto_skcipher_get_flags(tfm), > + NULL, NULL); > + skcipher_request_set_crypt(&subreq, req->src, req->dst, > + req->cryptlen - tail, req->iv); > + req = &subreq; > + } > + > err = skcipher_walk_virt(&walk, req, false); > nbytes = walk.nbytes; > - if (!nbytes) > + if (err) > return err; > > /* set minimum length to bsize, for tweak_fn */ > @@ -287,6 +307,52 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, > nbytes = walk.nbytes; > } > > + if (unlikely(cts)) { > + struct scatterlist *src, *dst; > + struct scatterlist s[2], d[2]; > + le128 final_tweak; > + u8 *next_tweak; > + le128 b[2]; > + > + dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); > + if (req->dst != req->src) > + dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); > + > + if (gctx->xts_decrypt) { > + final_tweak = *(le128 *)req->iv; > + next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); > + gf128mul_x_ble(b, b); > + } else { > + next_tweak = req->iv; > + } > + > + skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, > + next_tweak); > + > + err = skcipher_walk_virt(&walk, req, false) ?: > + skcipher_walk_done(&walk, > + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); > + if (err) > + goto out; > + > + scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); > + memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); > + scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, > + tail - XTS_BLOCK_SIZE, 0); > + scatterwalk_map_and_copy(b, dst, 0, tail, 1); > + > + skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, > + gctx->xts_decrypt ? (u8 *)&final_tweak > + : req->iv); > + > + err = skcipher_walk_virt(&walk, req, false) ?: > + skcipher_walk_done(&walk, > + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); > + if (err) > + goto out; > + } > + > +out: > glue_fpu_end(fpu_enabled); > > return err; > diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c > index b871728e0b2f..b7b82bf69a67 100644 > --- a/arch/x86/crypto/serpent_avx2_glue.c > +++ b/arch/x86/crypto/serpent_avx2_glue.c > @@ -121,6 +121,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { > static const struct common_glue_ctx serpent_dec_xts = { > .num_funcs = 3, > .fpu_blocks_limit = 8, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = 16, > diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c > index 4a9a9f2ee1d8..c46d722ecc12 100644 > --- a/arch/x86/crypto/serpent_avx_glue.c > +++ b/arch/x86/crypto/serpent_avx_glue.c > @@ -164,6 +164,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { > static const struct common_glue_ctx serpent_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = SERPENT_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c > index 0dbf8e8b09d7..220de42bc3ab 100644 > --- a/arch/x86/crypto/twofish_avx_glue.c > +++ b/arch/x86/crypto/twofish_avx_glue.c > @@ -167,6 +167,7 @@ static const struct common_glue_ctx twofish_dec_cbc = { > static const struct common_glue_ctx twofish_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = TWOFISH_PARALLEL_BLOCKS, > diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h > index d1818634ae7e..31e038759a54 100644 > --- a/arch/x86/include/asm/crypto/glue_helper.h > +++ b/arch/x86/include/asm/crypto/glue_helper.h > @@ -36,6 +36,7 @@ struct common_glue_func_entry { > struct common_glue_ctx { > unsigned int num_funcs; > int fpu_blocks_limit; /* -1 means fpu not needed at all */ > + int xts_decrypt; /* whether this implements XTS decryption */ > > /* > * First funcs entry must have largest num_blocks and last funcs entry > -- > 2.17.1 >
Am Freitag, 16. August 2019, 12:10:21 CEST schrieb Ard Biesheuvel: Hi Ard, > Align the x86 code with the generic XTS template, which now supports > ciphertext stealing as described by the IEEE XTS-AES spec P1619. After applying the patch, the boot is successful even with the extra tests. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Stephan Mueller <smueller@chronox.de> Ciao Stephan
On Fri, 16 Aug 2019 at 13:22, Stephan Mueller <smueller@chronox.de> wrote: > > Am Freitag, 16. August 2019, 12:10:21 CEST schrieb Ard Biesheuvel: > > Hi Ard, > > > Align the x86 code with the generic XTS template, which now supports > > ciphertext stealing as described by the IEEE XTS-AES spec P1619. > > After applying the patch, the boot is successful even with the extra tests. > > > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > > Tested-by: Stephan Mueller <smueller@chronox.de> > Thanks!
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ef165d8cf443..0ea1517faf09 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -591,6 +591,7 @@ static const struct common_glue_ctx aesni_enc_xts = { static const struct common_glue_ctx aesni_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = 1, + .xts_decrypt = 1, .funcs = { { .num_blocks = 8, diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index abf298c272dc..7854378e6d0c 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -129,6 +129,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { static const struct common_glue_ctx camellia_dec_xts = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 0c22d84750a3..58e8f08819b6 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -136,6 +136,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { static const struct common_glue_ctx camellia_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 645f8f16815c..dfdbdeff5a2b 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -132,6 +132,7 @@ static const struct common_glue_ctx cast6_dec_cbc = { static const struct common_glue_ctx cast6_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 901551445387..693541d00c0a 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -14,6 +14,7 @@ #include <crypto/b128ops.h> #include <crypto/gf128mul.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <crypto/xts.h> #include <asm/crypto/glue_helper.h> @@ -261,15 +262,34 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx) { + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const unsigned int bsize = 128 / 8; + struct skcipher_request subreq; struct skcipher_walk walk; bool fpu_enabled = false; - unsigned int nbytes; + unsigned int nbytes, tail; int err; + if (req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(cts)) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + crypto_skcipher_get_flags(tfm), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail, req->iv); + req = &subreq; + } + err = skcipher_walk_virt(&walk, req, false); nbytes = walk.nbytes; - if (!nbytes) + if (err) return err; /* set minimum length to bsize, for tweak_fn */ @@ -287,6 +307,52 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, nbytes = walk.nbytes; } + if (unlikely(cts)) { + struct scatterlist *src, *dst; + struct scatterlist s[2], d[2]; + le128 final_tweak; + u8 *next_tweak; + le128 b[2]; + + dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); + + if (gctx->xts_decrypt) { + final_tweak = *(le128 *)req->iv; + next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); + gf128mul_x_ble(b, b); + } else { + next_tweak = req->iv; + } + + skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, + next_tweak); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + if (err) + goto out; + + scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); + memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); + scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, + tail - XTS_BLOCK_SIZE, 0); + scatterwalk_map_and_copy(b, dst, 0, tail, 1); + + skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, + gctx->xts_decrypt ? (u8 *)&final_tweak + : req->iv); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + if (err) + goto out; + } + +out: glue_fpu_end(fpu_enabled); return err; diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index b871728e0b2f..b7b82bf69a67 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -121,6 +121,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { static const struct common_glue_ctx serpent_dec_xts = { .num_funcs = 3, .fpu_blocks_limit = 8, + .xts_decrypt = 1, .funcs = { { .num_blocks = 16, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 4a9a9f2ee1d8..c46d722ecc12 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -164,6 +164,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { static const struct common_glue_ctx serpent_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 0dbf8e8b09d7..220de42bc3ab 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -167,6 +167,7 @@ static const struct common_glue_ctx twofish_dec_cbc = { static const struct common_glue_ctx twofish_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index d1818634ae7e..31e038759a54 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -36,6 +36,7 @@ struct common_glue_func_entry { struct common_glue_ctx { unsigned int num_funcs; int fpu_blocks_limit; /* -1 means fpu not needed at all */ + int xts_decrypt; /* whether this implements XTS decryption */ /* * First funcs entry must have largest num_blocks and last funcs entry
Align the x86 code with the generic XTS template, which now supports ciphertext stealing as described by the IEEE XTS-AES spec P1619. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/x86/crypto/aesni-intel_glue.c | 1 + arch/x86/crypto/camellia_aesni_avx2_glue.c | 1 + arch/x86/crypto/camellia_aesni_avx_glue.c | 1 + arch/x86/crypto/cast6_avx_glue.c | 1 + arch/x86/crypto/glue_helper.c | 70 +++++++++++++++++++++- arch/x86/crypto/serpent_avx2_glue.c | 1 + arch/x86/crypto/serpent_avx_glue.c | 1 + arch/x86/crypto/twofish_avx_glue.c | 1 + arch/x86/include/asm/crypto/glue_helper.h | 1 + 9 files changed, 76 insertions(+), 2 deletions(-) -- 2.17.1