@@ -53,21 +53,17 @@
#define IS_UNALIGNED $s7
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
-#define ROTx rotl
-#define ROTR(n) rotr n, 24
#define CPU_TO_LE32(n) \
- wsbh n; \
+ wsbh n, n; \
rotr n, 16;
#else
#define MSB 3
#define LSB 0
-#define ROTx rotr
#define CPU_TO_LE32(n)
-#define ROTR(n)
#endif
#define FOR_EACH_WORD(x) \
x( 0); \
x( 1); \
@@ -190,14 +186,14 @@ CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
addu X(D), X(N); \
xor X(V), X(A); \
xor X(W), X(B); \
xor X(Y), X(C); \
xor X(Z), X(D); \
- rotl X(V), S; \
- rotl X(W), S; \
- rotl X(Y), S; \
- rotl X(Z), S;
+ rotr X(V), 32 - S; \
+ rotr X(W), 32 - S; \
+ rotr X(Y), 32 - S; \
+ rotr X(Z), 32 - S;
.text
.set reorder
.set noat
.globl chacha_crypt_arch
@@ -370,25 +366,23 @@ chacha_crypt_arch:
addu IN, $at
addu OUT, $at
/* First byte */
lbu T1, 0(IN)
addiu $at, BYTES, 1
- CPU_TO_LE32(SAVED_X)
- ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
beqz $at, .Lchacha_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
- ROTx SAVED_X, 8
+ rotr SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
beqz $at, .Lchacha_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
- ROTx SAVED_X, 8
+ rotr SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
b .Lchacha_mips_xor_done
.Lchacha_mips_no_full_block_unaligned:
The MIPS32r2 ChaCha code has never been buildable with the clang assembler. First, clang doesn't support the 'rotl' pseudo-instruction: error: unknown instruction, did you mean: rol, rotr? Second, clang requires that both operands of the 'wsbh' instruction be explicitly given: error: too few operands for instruction To fix this, align the code with the real instruction set by (1) using the real instruction 'rotr' instead of the nonstandard pseudo- instruction 'rotl', and (2) explicitly giving both operands to 'wsbh'. To make removing the use of 'rotl' a bit easier, also remove the unnecessary special-casing for big endian CPUs at .Lchacha_mips_xor_bytes. The tail handling is actually endian-independent since it processes one byte at a time. On big endian CPUs the old code byte-swapped SAVED_X, then iterated through it in reverse order. But the byteswap and reverse iteration canceled out. Tested with chacha20poly1305-selftest in QEMU using "-M malta" with both little endian and big endian mips32r2 kernels. Fixes: 49aa7c00eddf ("crypto: mips/chacha - import 32r2 ChaCha code from Zinc") Cc: stable@vger.kernel.org Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202505080409.EujEBwA0-lkp@intel.com/ Signed-off-by: Eric Biggers <ebiggers@kernel.org> --- This applies on top of other pending lib/crypto patches and can be retrieved from git at: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git mips-chacha-fix lib/crypto/mips/chacha-core.S | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-)