diff mbox series

[v2,14/19] crypto: x86 - load based on CPU features

Message ID 20221012215931.3896-15-elliott@hpe.com
State New
Headers show
Series crypto: x86 - fix RCU stalls | expand

Commit Message

Elliott, Robert (Servers) Oct. 12, 2022, 9:59 p.m. UTC
x86 optimized crypto modules built as modules rather than built-in
to the kernel end up as .ko files in the filesystem, e.g., in
/usr/lib/modules. If the filesystem itself is a module, these might
not be available when the crypto API is initialized, resulting in
the generic implementation being used (e.g., sha512_transform rather
than sha512_transform_avx2).

In one test case, CPU utilization in the sha512 function dropped
from 15.34% to 7.18% after forcing loading of the optimized module.

Set module aliases for x86 optimized crypto modules based on CPU
feature bits so udev gets a chance to load them later in the boot
process when the filesystems are all running.

For example, with sha256, sha512, aesni_intel, and blake2s configured
as built-in and the rest configured as modules:

[   13.749145] sha256_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes, SHA-NI=no)
[   13.758502] sha512_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes)
[   13.766939] libblake2s_x86_64: CPU-optimized crypto module loaded (SSSE3=yes, AVX512=yes)
[   16.794502] aesni_intel: CPU-optimized crypto module loaded (GCM SSE=no, AVX=yes, AVX2=yes)(CTR AVX=yes)
...
[   18.160648] Run /init as init process
...
[   20.073484] twofish_x86_64: CPU-optimized crypto module loaded
[   23.974029] serpent_sse2_x86_64: CPU-optimized crypto module loaded
[   24.080749] serpent_avx_x86_64: CPU-optimized crypto module loaded
[   24.187148] serpent_avx2: CPU-optimized crypto module loaded
[   24.358980] des3_ede_x86_64: CPU-optimized crypto module loaded
[   24.459257] camellia_x86_64: CPU-optimized crypto module loaded
[   24.548487] camellia_aesni_avx_x86_64: CPU-optimized crypto module loaded
[   24.630777] camellia_aesni_avx2: CPU-optimized crypto module loaded
[   24.957134] blowfish_x86_64: CPU-optimized crypto module loaded
[   25.063537] aegis128_aesni: CPU-optimized crypto module loaded
[   25.174560] chacha_x86_64: CPU-optimized crypto module loaded (AVX2=yes, AVX512=yes)
[   25.270084] sha1_ssse3: CPU-optimized crypto module loaded (SSSE3=no, AVX=no, AVX2=yes, SHA-NI=no)
[   25.531724] ghash_clmulni_intel: CPU-optimized crypto module loaded
[   25.596316] crc32c_intel: CPU-optimized crypto module loaded (PCLMULQDQ=yes)
[   25.661693] crc32_pclmul: CPU-optimized crypto module loaded
[   25.696388] crct10dif_pclmul: CPU-optimized crypto module loaded
[   25.742040] poly1305_x86_64: CPU-optimized crypto module loaded (AVX=yes, AVX2=yes, AVX512=no)
[   25.841364] nhpoly1305_avx2: CPU-optimized crypto module loaded
[   25.856401] curve25519_x86_64: CPU-optimized crypto module loaded (ADX=yes)
[   25.866615] sm3_avx_x86_64: CPU-optimized crypto module loaded

This commit covers modules that did not create rcu stall issues
due to kernel_fpu_begin/kernel_fpu_end calls.

Signed-off-by: Robert Elliott <elliott@hpe.com>
---
 arch/x86/crypto/aegis128-aesni-glue.c      |  9 +++++++++
 arch/x86/crypto/aesni-intel_glue.c         |  7 +++----
 arch/x86/crypto/blake2s-glue.c             | 11 ++++++++++-
 arch/x86/crypto/blowfish_glue.c            | 10 ++++++++++
 arch/x86/crypto/camellia_aesni_avx2_glue.c | 12 ++++++++++++
 arch/x86/crypto/camellia_aesni_avx_glue.c  | 11 +++++++++++
 arch/x86/crypto/camellia_glue.c            |  9 +++++++++
 arch/x86/crypto/cast5_avx_glue.c           | 10 ++++++++++
 arch/x86/crypto/cast6_avx_glue.c           | 10 ++++++++++
 arch/x86/crypto/chacha_glue.c              | 12 ++++++++++--
 arch/x86/crypto/curve25519-x86_64.c        | 12 +++++++++++-
 arch/x86/crypto/des3_ede_glue.c            | 10 ++++++++++
 arch/x86/crypto/nhpoly1305-avx2-glue.c     | 10 ++++++++++
 arch/x86/crypto/nhpoly1305-sse2-glue.c     | 10 ++++++++++
 arch/x86/crypto/poly1305_glue.c            | 12 ++++++++++++
 arch/x86/crypto/serpent_avx2_glue.c        | 10 ++++++++++
 arch/x86/crypto/serpent_avx_glue.c         | 10 ++++++++++
 arch/x86/crypto/serpent_sse2_glue.c        | 10 ++++++++++
 arch/x86/crypto/sm4_aesni_avx2_glue.c      | 12 ++++++++++++
 arch/x86/crypto/sm4_aesni_avx_glue.c       | 11 +++++++++++
 arch/x86/crypto/twofish_avx_glue.c         | 10 ++++++++++
 arch/x86/crypto/twofish_glue.c             | 10 ++++++++++
 arch/x86/crypto/twofish_glue_3way.c        | 10 ++++++++++
 23 files changed, 230 insertions(+), 8 deletions(-)

Comments

Elliott, Robert (Servers) Oct. 14, 2022, 2:26 p.m. UTC | #1
> Subject: [PATCH v2 14/19] crypto: x86 - load based on CPU features
>  23 files changed, 230 insertions(+), 8 deletions(-)

Here are some things I've noticed on this patch that will be
addressed in v3.

-  Add aria device table (new algorithm added at end of 6.0)

- Change camellia_avx2 device table to not match on AVX (just AVX2
and AES-NI). There's a separate module for AVX.

- Remove ADX from the curve25519 device table. That is optional,
not mandatory.

- Remove AVX from the sm4-avx2 device table. There's a separate
module for AVX.

Here is a script to review the device table aliases:

modinfo /lib/modules/6.0.0+/kernel/arch/x86/crypto/* | grep -E "filename|alias.*cpu" |
        sed 's/.013D./\tSHA-NI/' |      
        sed 's/.0133./\tADX/' |   
        sed 's/.0130./\t\tAVX512-F/' |  
        sed 's/.0125./\t\tAVX2/' |      
        sed 's/.009C./\t\tAVX/' |       
        sed 's/.0099./\tAES-NI/' |      
        sed 's/.0094./\tXMM4.2/' |      
        sed 's/.0089./\t\tSSSE3/' |     
        sed 's/.0081./\tPCLMULQDQ/' |   
        sed 's/.001A./\tXMM2/' |        # aka sse2
        cat
diff mbox series

Patch

diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 4623189000d8..9e4ba031704d 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -263,10 +263,19 @@  static struct aead_alg crypto_aegis128_aesni_alg = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_aead_alg *simd_alg;
 
 static int __init crypto_aegis128_aesni_module_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
 	    !boot_cpu_has(X86_FEATURE_AES) ||
 	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index a5b0cb3efeba..4a530a558436 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -36,7 +36,6 @@ 
 #include <linux/spinlock.h>
 #include <linux/static_call.h>
 
-
 #define AESNI_ALIGN	16
 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE - 1))
@@ -1228,17 +1227,17 @@  static struct aead_alg aesni_aeads[0];
 
 static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
+static const struct x86_cpu_id module_cpu_ids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static int __init aesni_init(void)
 {
 	int err;
 
-	if (!x86_match_cpu(aesni_cpu_id))
+	if (!x86_match_cpu(module_cpu_ids))
 		return -ENODEV;
 #ifdef CONFIG_X86_64
 	if (boot_cpu_has(X86_FEATURE_AVX2)) {
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 3054ee7fa219..5153bb423dbe 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -10,7 +10,7 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
-
+#include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/fpu/api.h>
 #include <asm/processor.h>
@@ -56,8 +56,17 @@  void blake2s_compress(struct blake2s_state *state, const u8 *block,
 }
 EXPORT_SYMBOL(blake2s_compress);
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init blake2s_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (boot_cpu_has(X86_FEATURE_SSSE3))
 		static_branch_enable(&blake2s_use_ssse3);
 
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 019c64c1340a..4c0ead71b198 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -15,6 +15,7 @@ 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 /* regular block cipher functions */
 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -303,10 +304,19 @@  static int force;
 module_param(force, int, 0);
 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init blowfish_init(void)
 {
 	int err;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"blowfish-x86_64: performance on this CPU "
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index e7e4d64e9577..8e3ac5be7cf6 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -11,6 +11,7 @@ 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 #include "camellia.h"
 #include "ecb_cbc_helpers.h"
@@ -98,12 +99,23 @@  static struct skcipher_alg camellia_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
 	    !boot_cpu_has(X86_FEATURE_AVX2) ||
 	    !boot_cpu_has(X86_FEATURE_AES) ||
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index c7ccf63e741e..54fcd86160ff 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -11,6 +11,7 @@ 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 #include "camellia.h"
 #include "ecb_cbc_helpers.h"
@@ -98,12 +99,22 @@  static struct skcipher_alg camellia_algs[] = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
 	    !boot_cpu_has(X86_FEATURE_AES) ||
 	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index d45e9c0c42ac..e21d2d5b68f9 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1377,10 +1377,19 @@  static int force;
 module_param(force, int, 0);
 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init camellia_init(void)
 {
 	int err;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"camellia-x86_64: performance on this CPU "
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 3976a87f92ad..bdc3c763334c 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -13,6 +13,7 @@ 
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 #include "ecb_cbc_helpers.h"
 
@@ -93,12 +94,21 @@  static struct skcipher_alg cast5_algs[] = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
 
 static int __init cast5_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 7e2aea372349..addca34b3511 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -15,6 +15,7 @@ 
 #include <crypto/algapi.h>
 #include <crypto/cast6.h>
 #include <crypto/internal/simd.h>
+#include <asm/cpu_device_id.h>
 
 #include "ecb_cbc_helpers.h"
 
@@ -93,12 +94,21 @@  static struct skcipher_alg cast6_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
 
 static int __init cast6_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 0d7e172862db..7275cae3380d 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -13,6 +13,7 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
 #define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -278,10 +279,17 @@  static struct skcipher_alg algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init chacha_simd_mod_init(void)
 {
-	if (!boot_cpu_has(X86_FEATURE_SSSE3))
-		return 0;
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 
 	static_branch_enable(&chacha_use_simd);
 
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index d55fa9e9b9e6..7fe395dfa79d 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -12,7 +12,7 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
-
+#include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 
@@ -1697,9 +1697,19 @@  static struct kpp_alg curve25519_alg = {
 	.max_size		= curve25519_max_size,
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ADX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static int __init curve25519_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
 		static_branch_enable(&curve25519_use_bmi2_adx);
 	else
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index abb8b1fe123b..168cac5c6ca6 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -15,6 +15,7 @@ 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 struct des3_ede_x86_ctx {
 	struct des3_ede_ctx enc;
@@ -354,10 +355,19 @@  static int force;
 module_param(force, int, 0);
 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init des3_ede_x86_init(void)
 {
 	int err;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!force && is_blacklisted_cpu()) {
 		pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
 		return -ENODEV;
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 59615ae95e86..a8046334ddca 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -11,6 +11,7 @@ 
 #include <crypto/nhpoly1305.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
 #define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -57,8 +58,17 @@  static struct shash_alg nhpoly1305_alg = {
 	.descsize		= sizeof(struct nhpoly1305_state),
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init nhpoly1305_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
 	    !boot_cpu_has(X86_FEATURE_OSXSAVE))
 		return -ENODEV;
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index bf91c375821a..cdbe5df00927 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -11,6 +11,7 @@ 
 #include <crypto/nhpoly1305.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
 #define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
@@ -57,8 +58,17 @@  static struct shash_alg nhpoly1305_alg = {
 	.descsize		= sizeof(struct nhpoly1305_state),
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_XMM2, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init nhpoly1305_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_XMM2))
 		return -ENODEV;
 
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 3764301bdf1b..3e6ff505cd26 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -12,6 +12,7 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
+#include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/simd.h>
 
@@ -260,8 +261,19 @@  static struct shash_alg alg = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX512F, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init poly1305_simd_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (boot_cpu_has(X86_FEATURE_AVX) &&
 	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
 		static_branch_enable(&poly1305_use_avx);
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 347e97f4b713..24741d33edaf 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -12,6 +12,7 @@ 
 #include <crypto/algapi.h>
 #include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>
 
 #include "serpent-avx.h"
 #include "ecb_cbc_helpers.h"
@@ -94,12 +95,21 @@  static struct skcipher_alg serpent_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_avx2_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX2 instructions are not detected.\n");
 		return -ENODEV;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6c248e1ea4ef..0db18d99da50 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -15,6 +15,7 @@ 
 #include <crypto/algapi.h>
 #include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>
 
 #include "serpent-avx.h"
 #include "ecb_cbc_helpers.h"
@@ -100,12 +101,21 @@  static struct skcipher_alg serpent_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index d78f37e9b2cf..5288441cc223 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -20,6 +20,7 @@ 
 #include <crypto/b128ops.h>
 #include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
+#include <asm/cpu_device_id.h>
 
 #include "serpent-sse2.h"
 #include "ecb_cbc_helpers.h"
@@ -103,10 +104,19 @@  static struct skcipher_alg serpent_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_XMM2, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_sse2_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_XMM2)) {
 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
 		return -ENODEV;
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
index 84bc718f49a3..2e9fe76056b8 100644
--- a/arch/x86/crypto/sm4_aesni_avx2_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -11,6 +11,7 @@ 
 #include <linux/module.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
@@ -126,6 +127,14 @@  static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *
 simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
 
@@ -133,6 +142,9 @@  static int __init sm4_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
 	    !boot_cpu_has(X86_FEATURE_AVX2) ||
 	    !boot_cpu_has(X86_FEATURE_AES) ||
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
index 7800f77d68ad..f730822f203a 100644
--- a/arch/x86/crypto/sm4_aesni_avx_glue.c
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -11,6 +11,7 @@ 
 #include <linux/module.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
@@ -445,6 +446,13 @@  static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *
 simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
 
@@ -452,6 +460,9 @@  static int __init sm4_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
 	    !boot_cpu_has(X86_FEATURE_AES) ||
 	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 3eb3440b477a..4657e6efc35d 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -15,6 +15,7 @@ 
 #include <crypto/algapi.h>
 #include <crypto/internal/simd.h>
 #include <crypto/twofish.h>
+#include <asm/cpu_device_id.h>
 
 #include "twofish.h"
 #include "ecb_cbc_helpers.h"
@@ -103,12 +104,21 @@  static struct skcipher_alg twofish_algs[] = {
 	},
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
 
 static int __init twofish_init(void)
 {
 	const char *feature_name;
 
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index f9c4adc27404..ade98aef3402 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -43,6 +43,7 @@ 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
 				const u8 *src);
@@ -81,8 +82,17 @@  static struct crypto_alg alg = {
 	}
 };
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init twofish_glue_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	return crypto_register_alg(&alg);
 }
 
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 90454cf18e0d..790e5a59a9a7 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -11,6 +11,7 @@ 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/cpu_device_id.h>
 
 #include "twofish.h"
 #include "ecb_cbc_helpers.h"
@@ -140,8 +141,17 @@  static int force;
 module_param(force, int, 0);
 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
+
 static int __init twofish_3way_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"twofish-x86_64-3way: performance on this CPU "