diff mbox series

[v2,19/19] crypto: x86/sha - register only the best function

Message ID 20221012215931.3896-20-elliott@hpe.com
State New
Headers show
Series [RFC,1/7] rcu: correct CONFIG_EXT_RCU_CPU_STALL_TIMEOUT descriptions | expand

Commit Message

Elliott, Robert (Servers) Oct. 12, 2022, 9:59 p.m. UTC
Don't register and unregister each of the functions from least-
to most-optimized (SSSE3 then AVX then AVX2); determine the
most-optimized function and load only that version.

Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Robert Elliott <elliott@hpe.com>
---
 arch/x86/crypto/sha1_ssse3_glue.c   | 139 ++++++++++++-------------
 arch/x86/crypto/sha256_ssse3_glue.c | 154 ++++++++++++++--------------
 arch/x86/crypto/sha512_ssse3_glue.c | 120 ++++++++++++----------
 3 files changed, 210 insertions(+), 203 deletions(-)

Comments

Eric Biggers Oct. 13, 2022, 6:07 a.m. UTC | #1
On Wed, Oct 12, 2022 at 04:59:31PM -0500, Robert Elliott wrote:
> Don't register and unregister each of the functions from least-
> to most-optimized (SSSE3 then AVX then AVX2); determine the
> most-optimized function and load only that version.
> 
> Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Robert Elliott <elliott@hpe.com>

I thought that it's done the way it is so that it's easy to run the self-tests
for all the different variants.

- Eric
Herbert Xu Oct. 13, 2022, 7:52 a.m. UTC | #2
On Wed, Oct 12, 2022 at 11:07:43PM -0700, Eric Biggers wrote:
>
> I thought that it's done the way it is so that it's easy to run the self-tests
> for all the different variants.

Yes, we should keep it that way so that it's easy to test the
different code paths for correctness and/or speed.

Thanks,
Elliott, Robert (Servers) Oct. 13, 2022, 10:59 p.m. UTC | #3
> -----Original Message-----
> From: Herbert Xu <herbert@gondor.apana.org.au>
> Sent: Thursday, October 13, 2022 2:52 AM
> To: Eric Biggers <ebiggers@kernel.org>
> Cc: Elliott, Robert (Servers) <elliott@hpe.com>; davem@davemloft.net;
> tim.c.chen@linux.intel.com; ap420073@gmail.com; ardb@kernel.org; linux-
> crypto@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2 19/19] crypto: x86/sha - register only the best
> function
> 
> On Wed, Oct 12, 2022 at 11:07:43PM -0700, Eric Biggers wrote:
> >
> > I thought that it's done the way it is so that it's easy to run the self-
> tests
> > for all the different variants.
> 
> Yes, we should keep it that way so that it's easy to test the
> different code paths for correctness and/or speed.

I have done some testing with extra patches that do that for
that very reason. Is there much overhead from having a module
loaded and registered in the crypto system, but not being
chosen for use?

The current sha modules register SSSE3, then register AVX and
unregister SSSE3, then register AVX2 and unregister AVX...
good testing for the unregister function, but not real helpful
for users.
Herbert Xu Oct. 14, 2022, 8:22 a.m. UTC | #4
On Thu, Oct 13, 2022 at 10:59:08PM +0000, Elliott, Robert (Servers) wrote:
>
> I have done some testing with extra patches that do that for
> that very reason. Is there much overhead from having a module
> loaded and registered in the crypto system, but not being
> chosen for use?

I don't think it's a big deal.  The system is designed to cope
with multiple implementations and picking the best option.

IOW if the overhead is an issue then that's something we'd need to
address in the core API code rather than trying to paper over it
by reducing the number of registered algorithms.

Cheers,
diff mbox series

Patch

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index edffc33bd12e..90a86d737bcf 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -123,17 +123,16 @@  static struct shash_alg sha1_ssse3_alg = {
 	}
 };
 
-static int register_sha1_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shash(&sha1_ssse3_alg);
-	return 0;
-}
-
+static bool sha1_ssse3_registered;
+static bool sha1_avx_registered;
+static bool sha1_avx2_registered;
+static bool sha1_ni_registered;
 static void unregister_sha1_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha1_ssse3_registered) {
 		crypto_unregister_shash(&sha1_ssse3_alg);
+		sha1_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha1_transform_avx(struct sha1_state *state,
@@ -172,28 +171,12 @@  static struct shash_alg sha1_avx_alg = {
 	}
 };
 
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int register_sha1_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shash(&sha1_avx_alg);
-	return 0;
-}
-
 static void unregister_sha1_avx(void)
 {
-	if (avx_usable())
+	if (sha1_avx_registered) {
 		crypto_unregister_shash(&sha1_avx_alg);
+		sha1_avx_registered = 0;
+	}
 }
 
 #define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
@@ -201,16 +184,6 @@  static void unregister_sha1_avx(void)
 asmlinkage void sha1_transform_avx2(struct sha1_state *state,
 				    const u8 *data, int blocks);
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
-		&& boot_cpu_has(X86_FEATURE_BMI1)
-		&& boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
 static void sha1_apply_transform_avx2(struct sha1_state *state,
 				      const u8 *data, int blocks)
 {
@@ -254,17 +227,13 @@  static struct shash_alg sha1_avx2_alg = {
 	}
 };
 
-static int register_sha1_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shash(&sha1_avx2_alg);
-	return 0;
-}
 
 static void unregister_sha1_avx2(void)
 {
-	if (avx2_usable())
+	if (sha1_avx2_registered) {
 		crypto_unregister_shash(&sha1_avx2_alg);
+		sha1_avx2_registered = 0;
+	}
 }
 
 #ifdef CONFIG_AS_SHA1_NI
@@ -304,13 +273,6 @@  static struct shash_alg sha1_ni_alg = {
 	}
 };
 
-static int register_sha1_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		return crypto_register_shash(&sha1_ni_alg);
-	return 0;
-}
-
 static const struct x86_cpu_id module_cpu_ids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL),
 	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
@@ -322,44 +284,79 @@  MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static void unregister_sha1_ni(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+	if (sha1_ni_registered) {
 		crypto_unregister_shash(&sha1_ni_alg);
+		sha1_ni_registered = 0;
+	}
 }
 
 #else
-static inline int register_sha1_ni(void) { return 0; }
 static inline void unregister_sha1_ni(void) { }
 #endif
 
 static int __init sha1_ssse3_mod_init(void)
 {
-	if (register_sha1_ssse3())
-		goto fail;
+	const char *feature_name;
+	const char *driver_name = NULL;
+	int ret;
 
 	if (!x86_match_cpu(module_cpu_ids))
 		return -ENODEV;
 
-	if (register_sha1_avx()) {
-		unregister_sha1_ssse3();
-		goto fail;
-	}
+	/* SHA-NI */
+	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
 
-	if (register_sha1_avx2()) {
-		unregister_sha1_avx();
-		unregister_sha1_ssse3();
-		goto fail;
-	}
+		ret = crypto_register_shash(&sha1_ni_alg);
+		if (!ret)
+			sha1_ni_registered = 1;
 
-	if (register_sha1_ni()) {
-		unregister_sha1_avx2();
-		unregister_sha1_avx();
-		unregister_sha1_ssse3();
-		goto fail;
+	/* AVX2 */
+	} else if (boot_cpu_has(X86_FEATURE_AVX2)) {
+
+		if (boot_cpu_has(X86_FEATURE_BMI1) &&
+		    boot_cpu_has(X86_FEATURE_BMI2)) {
+
+			ret = crypto_register_shash(&sha1_avx2_alg);
+			if (!ret) {
+				sha1_avx2_registered = 1;
+				driver_name = sha1_avx2_alg.base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required features (AVX2, BMI1, BMI2) not supported\n");
+		}
+
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+			       &feature_name)) {
+
+			ret = crypto_register_shash(&sha1_avx_alg);
+			if (!ret) {
+				sha1_avx_registered = 1;
+				driver_name = sha1_avx_alg.base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
+
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shash(&sha1_ssse3_alg);
+		if (!ret) {
+			sha1_ssse3_registered = 1;
+			driver_name = sha1_ssse3_alg.base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s, SHA-NI=%s): driver=%s\n",
+		sha1_ssse3_registered ? "yes" : "no",
+		sha1_avx_registered ? "yes" : "no",
+		sha1_avx2_registered ? "yes" : "no",
+		sha1_ni_registered ? "yes" : "no",
+		driver_name);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha1_ssse3_mod_fini(void)
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8a0fb308fbba..cd7bf2b48f3d 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -150,19 +150,18 @@  static struct shash_alg sha256_ssse3_algs[] = { {
 	}
 } };
 
-static int register_sha256_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(sha256_ssse3_algs,
-				ARRAY_SIZE(sha256_ssse3_algs));
-	return 0;
-}
+static bool sha256_ssse3_registered;
+static bool sha256_avx_registered;
+static bool sha256_avx2_registered;
+static bool sha256_ni_registered;
 
 static void unregister_sha256_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha256_ssse3_registered) {
 		crypto_unregister_shashes(sha256_ssse3_algs,
 				ARRAY_SIZE(sha256_ssse3_algs));
+		sha256_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha256_transform_avx(struct sha256_state *state,
@@ -215,30 +214,13 @@  static struct shash_alg sha256_avx_algs[] = { {
 	}
 } };
 
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int register_sha256_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shashes(sha256_avx_algs,
-				ARRAY_SIZE(sha256_avx_algs));
-	return 0;
-}
-
 static void unregister_sha256_avx(void)
 {
-	if (avx_usable())
+	if (sha256_avx_registered) {
 		crypto_unregister_shashes(sha256_avx_algs,
 				ARRAY_SIZE(sha256_avx_algs));
+		sha256_avx_registered = 0;
+	}
 }
 
 asmlinkage void sha256_transform_rorx(struct sha256_state *state,
@@ -291,28 +273,13 @@  static struct shash_alg sha256_avx2_algs[] = { {
 	}
 } };
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
-		    boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
-static int register_sha256_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shashes(sha256_avx2_algs,
-				ARRAY_SIZE(sha256_avx2_algs));
-	return 0;
-}
-
 static void unregister_sha256_avx2(void)
 {
-	if (avx2_usable())
+	if (sha256_avx2_registered) {
 		crypto_unregister_shashes(sha256_avx2_algs,
 				ARRAY_SIZE(sha256_avx2_algs));
+		sha256_avx2_registered = 0;
+	}
 }
 
 #ifdef CONFIG_AS_SHA256_NI
@@ -375,55 +342,92 @@  static const struct x86_cpu_id module_cpu_ids[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
-static int register_sha256_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		return crypto_register_shashes(sha256_ni_algs,
-				ARRAY_SIZE(sha256_ni_algs));
-	return 0;
-}
-
 static void unregister_sha256_ni(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+	if (sha256_ni_registered) {
 		crypto_unregister_shashes(sha256_ni_algs,
 				ARRAY_SIZE(sha256_ni_algs));
+		sha256_ni_registered = 0;
+	}
 }
 
 #else
-static inline int register_sha256_ni(void) { return 0; }
 static inline void unregister_sha256_ni(void) { }
 #endif
 
 static int __init sha256_ssse3_mod_init(void)
 {
-	if (!x86_match_cpu(module_cpu_ids))
+	const char *feature_name;
+	const char *driver_name = NULL;
+	const char *driver_name2 = NULL;
+	int ret;
+
+	if (!x86_match_cpu(module_cpu_ids)) {
+		pr_info("CPU-optimized crypto module not loaded, required CPU features (SSSE3, AVX, AVX2, or SHA-NI) not supported\n");
 		return -ENODEV;
+	}
 
-	if (register_sha256_ssse3())
-		goto fail;
+	/* SHA-NI */
+	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
 
-	if (register_sha256_avx()) {
-		unregister_sha256_ssse3();
-		goto fail;
-	}
+		ret = crypto_register_shashes(sha256_ni_algs,
+						ARRAY_SIZE(sha256_ni_algs));
+		if (!ret) {
+			sha256_ni_registered = 1;
+			driver_name = sha256_ni_algs[0].base.cra_driver_name;
+			driver_name2 = sha256_ni_algs[1].base.cra_driver_name;
+		}
 
-	if (register_sha256_avx2()) {
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
-	}
+	/* AVX2 */
+	} else if (boot_cpu_has(X86_FEATURE_AVX2)) {
+
+		if (boot_cpu_has(X86_FEATURE_BMI2)) {
+			ret = crypto_register_shashes(sha256_avx2_algs,
+						ARRAY_SIZE(sha256_avx2_algs));
+			if (!ret) {
+				sha256_avx2_registered = 1;
+				driver_name = sha256_avx2_algs[0].base.cra_driver_name;
+				driver_name2 = sha256_avx2_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required CPU features (AVX2, BMI2) not supported\n");
+		}
 
-	if (register_sha256_ni()) {
-		unregister_sha256_avx2();
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+			       &feature_name)) {
+			ret = crypto_register_shashes(sha256_avx_algs,
+						ARRAY_SIZE(sha256_avx_algs));
+			if (!ret) {
+				sha256_avx_registered = 1;
+				driver_name = sha256_avx_algs[0].base.cra_driver_name;
+				driver_name2 = sha256_avx_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
+
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shashes(sha256_ssse3_algs,
+					      ARRAY_SIZE(sha256_ssse3_algs));
+		if (!ret) {
+			sha256_ssse3_registered = 1;
+			driver_name = sha256_ssse3_algs[0].base.cra_driver_name;
+			driver_name2 = sha256_ssse3_algs[1].base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s, SHA-NI=%s): drivers=%s, %s\n",
+		sha256_ssse3_registered ? "yes" : "no",
+		sha256_avx_registered ? "yes" : "no",
+		sha256_avx2_registered ? "yes" : "no",
+		sha256_ni_registered ? "yes" : "no",
+		driver_name, driver_name2);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha256_ssse3_mod_fini(void)
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index fd5075a32613..df9f8207cc79 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -149,33 +149,21 @@  static struct shash_alg sha512_ssse3_algs[] = { {
 	}
 } };
 
-static int register_sha512_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(sha512_ssse3_algs,
-			ARRAY_SIZE(sha512_ssse3_algs));
-	return 0;
-}
+static bool sha512_ssse3_registered;
+static bool sha512_avx_registered;
+static bool sha512_avx2_registered;
 
 static void unregister_sha512_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha512_ssse3_registered) {
 		crypto_unregister_shashes(sha512_ssse3_algs,
 			ARRAY_SIZE(sha512_ssse3_algs));
+		sha512_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha512_transform_avx(struct sha512_state *state,
 				     const u8 *data, int blocks);
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
 
 static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
 		       unsigned int len)
@@ -225,19 +213,13 @@  static struct shash_alg sha512_avx_algs[] = { {
 	}
 } };
 
-static int register_sha512_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shashes(sha512_avx_algs,
-			ARRAY_SIZE(sha512_avx_algs));
-	return 0;
-}
-
 static void unregister_sha512_avx(void)
 {
-	if (avx_usable())
+	if (sha512_avx_registered) {
 		crypto_unregister_shashes(sha512_avx_algs,
 			ARRAY_SIZE(sha512_avx_algs));
+		sha512_avx_registered = 0;
+	}
 }
 
 asmlinkage void sha512_transform_rorx(struct sha512_state *state,
@@ -291,22 +273,6 @@  static struct shash_alg sha512_avx2_algs[] = { {
 	}
 } };
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
-		    boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
-static int register_sha512_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shashes(sha512_avx2_algs,
-			ARRAY_SIZE(sha512_avx2_algs));
-	return 0;
-}
 static const struct x86_cpu_id module_cpu_ids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
 	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
@@ -317,33 +283,73 @@  MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static void unregister_sha512_avx2(void)
 {
-	if (avx2_usable())
+	if (sha512_avx2_registered) {
 		crypto_unregister_shashes(sha512_avx2_algs,
 			ARRAY_SIZE(sha512_avx2_algs));
+		sha512_avx2_registered = 0;
+	}
 }
 
 static int __init sha512_ssse3_mod_init(void)
 {
-	if (!x86_match_cpu(module_cpu_ids))
+	const char *feature_name;
+	const char *driver_name = NULL;
+	const char *driver_name2 = NULL;
+	int ret;
+
+	if (!x86_match_cpu(module_cpu_ids)) {
+		pr_info("CPU-optimized crypto module not loaded, required CPU features (SSSE3, AVX, or AVX2) not supported\n");
 		return -ENODEV;
+	}
 
-	if (register_sha512_ssse3())
-		goto fail;
+	/* AVX2 */
+	if (boot_cpu_has(X86_FEATURE_AVX2)) {
+		if (boot_cpu_has(X86_FEATURE_BMI2)) {
+			ret = crypto_register_shashes(sha512_avx2_algs,
+					ARRAY_SIZE(sha512_avx2_algs));
+			if (!ret) {
+				sha512_avx2_registered = 1;
+				driver_name = sha512_avx2_algs[0].base.cra_driver_name;
+				driver_name2 = sha512_avx2_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required CPU features (AVX2, BMI2) not supported\n");
+		}
 
-	if (register_sha512_avx()) {
-		unregister_sha512_ssse3();
-		goto fail;
-	}
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				       &feature_name)) {
+			ret = crypto_register_shashes(sha512_avx_algs,
+					ARRAY_SIZE(sha512_avx_algs));
+			if (!ret) {
+				sha512_avx_registered = 1;
+				driver_name = sha512_avx_algs[0].base.cra_driver_name;
+				driver_name2 = sha512_avx_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
 
-	if (register_sha512_avx2()) {
-		unregister_sha512_avx();
-		unregister_sha512_ssse3();
-		goto fail;
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shashes(sha512_ssse3_algs,
+					ARRAY_SIZE(sha512_ssse3_algs));
+		if (!ret) {
+			sha512_ssse3_registered = 1;
+			driver_name = sha512_ssse3_algs[0].base.cra_driver_name;
+			driver_name2 = sha512_ssse3_algs[1].base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s): drivers=%s, %s\n",
+		sha512_ssse3_registered ? "yes" : "no",
+		sha512_avx_registered ? "yes" : "no",
+		sha512_avx2_registered ? "yes" : "no",
+		driver_name, driver_name2);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha512_ssse3_mod_fini(void)