diff mbox series

crypto: x86 - load optimized sha1/sha256 modules based on CPU features

Message ID 20230915102325.35189-1-roxana.nicolescu@canonical.com
State Accepted
Commit 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d
Headers show
Series crypto: x86 - load optimized sha1/sha256 modules based on CPU features | expand

Commit Message

Roxana Nicolescu Sept. 15, 2023, 10:23 a.m. UTC
x86 optimized crypto modules are built as modules rather than build-in and
they are not loaded when the crypto API is initialized, resulting in the
generic builtin module (sha1-generic) being used instead.

It was discovered when creating a sha1/sha256 checksum of a 2Gb file by
using kcapi-tools because it would take significantly longer than creating
a sha512 checksum of the same file. trace-cmd showed that for sha1/256 the
generic module was used, whereas for sha512 the optimized module was used
instead.

Add module aliases() for these x86 optimized crypto modules based on CPU
feature bits so udev gets a chance to load them later in the boot
process. This resulted in ~3x decrease in the real-time execution of
kcapi-dsg.

Fix is inspired from commit
aa031b8f702e ("crypto: x86/sha512 - load based on CPU features")
where a similar fix was done for sha512.

Cc: stable@vger.kernel.org # 5.15+
Suggested-by: Dimitri John Ledkov <dimitri.ledkov@canonical.com>
Suggested-by: Julian Andres Klode <julian.klode@canonical.com>
Signed-off-by: Roxana Nicolescu <roxana.nicolescu@canonical.com>
---
 arch/x86/crypto/sha1_ssse3_glue.c   | 12 ++++++++++++
 arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++++++++
 2 files changed, 24 insertions(+)


base-commit: aed8aee11130a954356200afa3f1b8753e8a9482

Comments

Herbert Xu Sept. 20, 2023, 5:23 a.m. UTC | #1
On Fri, Sep 15, 2023 at 12:23:25PM +0200, Roxana Nicolescu wrote:
> x86 optimized crypto modules are built as modules rather than build-in and
> they are not loaded when the crypto API is initialized, resulting in the
> generic builtin module (sha1-generic) being used instead.
> 
> It was discovered when creating a sha1/sha256 checksum of a 2Gb file by
> using kcapi-tools because it would take significantly longer than creating
> a sha512 checksum of the same file. trace-cmd showed that for sha1/256 the
> generic module was used, whereas for sha512 the optimized module was used
> instead.
> 
> Add module aliases() for these x86 optimized crypto modules based on CPU
> feature bits so udev gets a chance to load them later in the boot
> process. This resulted in ~3x decrease in the real-time execution of
> kcapi-dsg.
> 
> Fix is inspired from commit
> aa031b8f702e ("crypto: x86/sha512 - load based on CPU features")
> where a similar fix was done for sha512.
> 
> Cc: stable@vger.kernel.org # 5.15+
> Suggested-by: Dimitri John Ledkov <dimitri.ledkov@canonical.com>
> Suggested-by: Julian Andres Klode <julian.klode@canonical.com>
> Signed-off-by: Roxana Nicolescu <roxana.nicolescu@canonical.com>
> ---
>  arch/x86/crypto/sha1_ssse3_glue.c   | 12 ++++++++++++
>  arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++++++++
>  2 files changed, 24 insertions(+)

Patch applied.  Thanks.
diff mbox series

Patch

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 44340a1139e0..959afa705e95 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -24,8 +24,17 @@ 
 #include <linux/types.h>
 #include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int sha1_update(struct shash_desc *desc, const u8 *data,
 			     unsigned int len, sha1_block_fn *sha1_xform)
 {
@@ -301,6 +310,9 @@  static inline void unregister_sha1_ni(void) { }
 
 static int __init sha1_ssse3_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (register_sha1_ssse3())
 		goto fail;
 
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 3a5f6be7dbba..d25235f0ccaf 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -38,11 +38,20 @@ 
 #include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/string.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
 asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
 				       const u8 *data, int blocks);
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int _sha256_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len, sha256_block_fn *sha256_xform)
 {
@@ -366,6 +375,9 @@  static inline void unregister_sha256_ni(void) { }
 
 static int __init sha256_ssse3_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (register_sha256_ssse3())
 		goto fail;