@@ -14,6 +14,8 @@ ifdef CONFIG_KCSAN
CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
endif
+CFLAGS_crc32-glue.o := -mcrc32
+
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
@@ -47,11 +47,12 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
+static unsigned long crc32_ul(u32 crc, unsigned long p)
+{
+ if (IS_ENABLED(CONFIG_X86_64))
+ return __builtin_ia32_crc32di(crc, p);
+ return __builtin_ia32_crc32si(crc, p);
+}
/*
* Use carryless multiply version of crc32c when buffer size is >= 512 to
@@ -78,10 +79,10 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
for (num_longs = len / sizeof(unsigned long);
num_longs != 0; num_longs--, p += sizeof(unsigned long))
- asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+ crc = crc32_ul(crc, *(unsigned long *)p);
for (len %= sizeof(unsigned long); len; len--, p++)
- asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+ crc = __builtin_ia32_crc32qi(crc, *p);
return crc;
}
For both gcc and clang, crc32 builtins generate better code than the inline asm. GCC improves, removing unneeded "mov" instructions. Clang does the same and unrolls the loops. GCC has no changes on i386, but Clang's code generation is vastly improved, due to Clang's "rm" constraint issue. The number of cycles improved by ~0.1% for GCC and ~1% for Clang, which is expected because of the "rm" issue. However, Clang's performance is better than GCC's by ~1.5%, most likely due to loop unrolling. Link: https://github.com/llvm/llvm-project/issues/20571#issuecomment-2649330009 Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: x86@kernel.org Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Eric Biggers <ebiggers@kernel.org> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Nick Desaulniers <nick.desaulniers+lkml@gmail.com> Cc: Justin Stitt <justinstitt@google.com> Cc: linux-kernel@vger.kernel.org Cc: linux-crypto@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Bill Wendling <morbo@google.com> --- v2 - Limited range of '-mcrc32' usage to single file. - Use a function instead of macros. --- arch/x86/lib/Makefile | 2 ++ arch/x86/lib/crc32-glue.c | 15 ++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-)