diff mbox series

[4.4] Revert "crypto: arm64/sha - avoid non-standard inline asm tricks"

Message ID 20201207172625.2888810-1-dann.frazier@canonical.com
State New
Headers show
Series [4.4] Revert "crypto: arm64/sha - avoid non-standard inline asm tricks" | expand

Commit Message

dann frazier Dec. 7, 2020, 5:26 p.m. UTC
This reverts commit c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686.

This caused the build to emit ADR_PREL_PG_HI21 relocations in the sha{1,2}_ce
modules. This relocation type is not supported by the linux-4.4.y kernel
module loader when CONFIG_ARM64_ERRATUM_843419=y, which we have enabled, so
these modules now fail to load:

  [   37.866250] module sha1_ce: unsupported RELA relocation: 275

This issue does not exist with the backport to 4.9+. Bisection shows that
this is due to those kernels also having a backport of
commit 41c066f ("arm64: assembler: make adr_l work in modules under KASLR")

Signed-off-by: dann frazier <dann.frazier@canonical.com>
---
 arch/arm64/crypto/sha1-ce-core.S |  6 ++----
 arch/arm64/crypto/sha1-ce-glue.c | 11 ++++++++---
 arch/arm64/crypto/sha2-ce-core.S |  6 ++----
 arch/arm64/crypto/sha2-ce-glue.c | 13 ++++++++-----
 4 files changed, 20 insertions(+), 16 deletions(-)

Comments

Ard Biesheuvel Dec. 7, 2020, 5:50 p.m. UTC | #1
On Mon, 7 Dec 2020 at 18:26, dann frazier <dann.frazier@canonical.com> wrote:
>
> This reverts commit c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686.
>
> This caused the build to emit ADR_PREL_PG_HI21 relocations in the sha{1,2}_ce
> modules. This relocation type is not supported by the linux-4.4.y kernel
> module loader when CONFIG_ARM64_ERRATUM_843419=y, which we have enabled, so
> these modules now fail to load:
>
>   [   37.866250] module sha1_ce: unsupported RELA relocation: 275
>
> This issue does not exist with the backport to 4.9+. Bisection shows that
> this is due to those kernels also having a backport of
> commit 41c066f ("arm64: assembler: make adr_l work in modules under KASLR")

Hi Dann,

Would it be an option to backport 41c066f as well?

>
> Signed-off-by: dann frazier <dann.frazier@canonical.com>
> ---
>  arch/arm64/crypto/sha1-ce-core.S |  6 ++----
>  arch/arm64/crypto/sha1-ce-glue.c | 11 ++++++++---
>  arch/arm64/crypto/sha2-ce-core.S |  6 ++----
>  arch/arm64/crypto/sha2-ce-glue.c | 13 ++++++++-----
>  4 files changed, 20 insertions(+), 16 deletions(-)
>
> diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
> index 8550408735a0..c98e7e849f06 100644
> --- a/arch/arm64/crypto/sha1-ce-core.S
> +++ b/arch/arm64/crypto/sha1-ce-core.S
> @@ -82,8 +82,7 @@ ENTRY(sha1_ce_transform)
>         ldr             dgb, [x0, #16]
>
>         /* load sha1_ce_state::finalize */
> -       ldr_l           w4, sha1_ce_offsetof_finalize, x4
> -       ldr             w4, [x0, x4]
> +       ldr             w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
>
>         /* load input */
>  0:     ld1             {v8.4s-v11.4s}, [x1], #64
> @@ -133,8 +132,7 @@ CPU_LE(     rev32           v11.16b, v11.16b        )
>          * the padding is handled by the C code in that case.
>          */
>         cbz             x4, 3f
> -       ldr_l           w4, sha1_ce_offsetof_count, x4
> -       ldr             x4, [x0, x4]
> +       ldr             x4, [x0, #:lo12:sha1_ce_offsetof_count]
>         movi            v9.2d, #0
>         mov             x8, #0x80000000
>         movi            v10.2d, #0
> diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
> index 1b7b4684c35b..01e48b8970b1 100644
> --- a/arch/arm64/crypto/sha1-ce-glue.c
> +++ b/arch/arm64/crypto/sha1-ce-glue.c
> @@ -17,6 +17,9 @@
>  #include <linux/crypto.h>
>  #include <linux/module.h>
>
> +#define ASM_EXPORT(sym, val) \
> +       asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
> +
>  MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
>  MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
>  MODULE_LICENSE("GPL v2");
> @@ -29,9 +32,6 @@ struct sha1_ce_state {
>  asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
>                                   int blocks);
>
> -const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
> -const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
> -
>  static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
>                           unsigned int len)
>  {
> @@ -52,6 +52,11 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
>         struct sha1_ce_state *sctx = shash_desc_ctx(desc);
>         bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len;
>
> +       ASM_EXPORT(sha1_ce_offsetof_count,
> +                  offsetof(struct sha1_ce_state, sst.count));
> +       ASM_EXPORT(sha1_ce_offsetof_finalize,
> +                  offsetof(struct sha1_ce_state, finalize));
> +
>         /*
>          * Allow the asm code to perform the finalization if there is no
>          * partial data and the input is a round multiple of the block size.
> diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
> index 679c6c002f4f..01cfee066837 100644
> --- a/arch/arm64/crypto/sha2-ce-core.S
> +++ b/arch/arm64/crypto/sha2-ce-core.S
> @@ -88,8 +88,7 @@ ENTRY(sha2_ce_transform)
>         ld1             {dgav.4s, dgbv.4s}, [x0]
>
>         /* load sha256_ce_state::finalize */
> -       ldr_l           w4, sha256_ce_offsetof_finalize, x4
> -       ldr             w4, [x0, x4]
> +       ldr             w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
>
>         /* load input */
>  0:     ld1             {v16.4s-v19.4s}, [x1], #64
> @@ -137,8 +136,7 @@ CPU_LE(     rev32           v19.16b, v19.16b        )
>          * the padding is handled by the C code in that case.
>          */
>         cbz             x4, 3f
> -       ldr_l           w4, sha256_ce_offsetof_count, x4
> -       ldr             x4, [x0, x4]
> +       ldr             x4, [x0, #:lo12:sha256_ce_offsetof_count]
>         movi            v17.2d, #0
>         mov             x8, #0x80000000
>         movi            v18.2d, #0
> diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
> index 356ca9397a86..7a7f95b94869 100644
> --- a/arch/arm64/crypto/sha2-ce-glue.c
> +++ b/arch/arm64/crypto/sha2-ce-glue.c
> @@ -17,6 +17,9 @@
>  #include <linux/crypto.h>
>  #include <linux/module.h>
>
> +#define ASM_EXPORT(sym, val) \
> +       asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
> +
>  MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
>  MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
>  MODULE_LICENSE("GPL v2");
> @@ -29,11 +32,6 @@ struct sha256_ce_state {
>  asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
>                                   int blocks);
>
> -const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
> -                                             sst.count);
> -const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
> -                                                finalize);
> -
>  static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
>                             unsigned int len)
>  {
> @@ -54,6 +52,11 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
>         struct sha256_ce_state *sctx = shash_desc_ctx(desc);
>         bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len;
>
> +       ASM_EXPORT(sha256_ce_offsetof_count,
> +                  offsetof(struct sha256_ce_state, sst.count));
> +       ASM_EXPORT(sha256_ce_offsetof_finalize,
> +                  offsetof(struct sha256_ce_state, finalize));
> +
>         /*
>          * Allow the asm code to perform the finalization if there is no
>          * partial data and the input is a round multiple of the block size.
> --
> 2.29.2
>
Ard Biesheuvel Dec. 7, 2020, 6:29 p.m. UTC | #2
On Mon, 7 Dec 2020 at 19:08, dann frazier <dann.frazier@canonical.com> wrote:
>
> On Mon, Dec 7, 2020 at 10:50 AM Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Mon, 7 Dec 2020 at 18:26, dann frazier <dann.frazier@canonical.com> wrote:
> > >
> > > This reverts commit c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686.
> > >
> > > This caused the build to emit ADR_PREL_PG_HI21 relocations in the sha{1,2}_ce
> > > modules. This relocation type is not supported by the linux-4.4.y kernel
> > > module loader when CONFIG_ARM64_ERRATUM_843419=y, which we have enabled, so
> > > these modules now fail to load:
> > >
> > >   [   37.866250] module sha1_ce: unsupported RELA relocation: 275
> > >
> > > This issue does not exist with the backport to 4.9+. Bisection shows that
> > > this is due to those kernels also having a backport of
> > > commit 41c066f ("arm64: assembler: make adr_l work in modules under KASLR")
> >
> > Hi Dann,
> >
> > Would it be an option to backport 41c066f as well?
>
> Hi Ard,
>
> That was attempted before, but caused a build failure which would
> still happen today:
>   https://www.spinics.net/lists/stable/msg179709.html
> Specifically, head.S still has a 3 argument usage of adr_l. I'm not
> sure how to safely fix that up myself.
>

Given that the original reason for reverting the backport of 41c066f
no longer holds (as there are other users of adr_l in v4.4 now), I
think the best solution is to backport it again, but with the hunk
below folded in. (This just replaces the macro invocation with its
output when called with the 3 arguments in question, so the generated
code is identical)

--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,7 +424,8 @@ __mmap_switched:
        str     xzr, [x6], #8                   // Clear BSS
        b       1b
 2:
-       adr_l   sp, initial_sp, x4
+       adrp    x4, initial_sp
+       add     sp, x4, :lo12:initial_sp
        str_l   x21, __fdt_pointer, x5          // Save FDT pointer
        str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
        mov     x29, #0
dann frazier Dec. 7, 2020, 8:35 p.m. UTC | #3
On Mon, Dec 7, 2020 at 11:29 AM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Mon, 7 Dec 2020 at 19:08, dann frazier <dann.frazier@canonical.com> wrote:
> >
> > On Mon, Dec 7, 2020 at 10:50 AM Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > On Mon, 7 Dec 2020 at 18:26, dann frazier <dann.frazier@canonical.com> wrote:
> > > >
> > > > This reverts commit c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686.
> > > >
> > > > This caused the build to emit ADR_PREL_PG_HI21 relocations in the sha{1,2}_ce
> > > > modules. This relocation type is not supported by the linux-4.4.y kernel
> > > > module loader when CONFIG_ARM64_ERRATUM_843419=y, which we have enabled, so
> > > > these modules now fail to load:
> > > >
> > > >   [   37.866250] module sha1_ce: unsupported RELA relocation: 275
> > > >
> > > > This issue does not exist with the backport to 4.9+. Bisection shows that
> > > > this is due to those kernels also having a backport of
> > > > commit 41c066f ("arm64: assembler: make adr_l work in modules under KASLR")
> > >
> > > Hi Dann,
> > >
> > > Would it be an option to backport 41c066f as well?
> >
> > Hi Ard,
> >
> > That was attempted before, but caused a build failure which would
> > still happen today:
> >   https://www.spinics.net/lists/stable/msg179709.html
> > Specifically, head.S still has a 3 argument usage of adr_l. I'm not
> > sure how to safely fix that up myself.
> >
>
> Given that the original reason for reverting the backport of 41c066f
> no longer holds (as there are other users of adr_l in v4.4 now), I
> think the best solution is to backport it again, but with the hunk
> below folded in. (This just replaces the macro invocation with its
> output when called with the 3 arguments in question, so the generated
> code is identical)
>
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -424,7 +424,8 @@ __mmap_switched:
>         str     xzr, [x6], #8                   // Clear BSS
>         b       1b
>  2:
> -       adr_l   sp, initial_sp, x4
> +       adrp    x4, initial_sp
> +       add     sp, x4, :lo12:initial_sp
>         str_l   x21, __fdt_pointer, x5          // Save FDT pointer
>         str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
>         mov     x29, #0

Thanks Ard - that works. I'll follow-up with a backport patch.

  -dann
Ard Biesheuvel Dec. 7, 2020, 10:20 p.m. UTC | #4
On Mon, 7 Dec 2020 at 21:36, dann frazier <dann.frazier@canonical.com> wrote:
>
> On Mon, Dec 7, 2020 at 11:29 AM Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Mon, 7 Dec 2020 at 19:08, dann frazier <dann.frazier@canonical.com> wrote:
> > >
> > > On Mon, Dec 7, 2020 at 10:50 AM Ard Biesheuvel <ardb@kernel.org> wrote:
> > > >
> > > > On Mon, 7 Dec 2020 at 18:26, dann frazier <dann.frazier@canonical.com> wrote:
> > > > >
> > > > > This reverts commit c042dd600f4e89b6e7bdffa00aea4d1d3c1e9686.
> > > > >
> > > > > This caused the build to emit ADR_PREL_PG_HI21 relocations in the sha{1,2}_ce
> > > > > modules. This relocation type is not supported by the linux-4.4.y kernel
> > > > > module loader when CONFIG_ARM64_ERRATUM_843419=y, which we have enabled, so
> > > > > these modules now fail to load:
> > > > >
> > > > >   [   37.866250] module sha1_ce: unsupported RELA relocation: 275
> > > > >
> > > > > This issue does not exist with the backport to 4.9+. Bisection shows that
> > > > > this is due to those kernels also having a backport of
> > > > > commit 41c066f ("arm64: assembler: make adr_l work in modules under KASLR")
> > > >
> > > > Hi Dann,
> > > >
> > > > Would it be an option to backport 41c066f as well?
> > >
> > > Hi Ard,
> > >
> > > That was attempted before, but caused a build failure which would
> > > still happen today:
> > >   https://www.spinics.net/lists/stable/msg179709.html
> > > Specifically, head.S still has a 3 argument usage of adr_l. I'm not
> > > sure how to safely fix that up myself.
> > >
> >
> > Given that the original reason for reverting the backport of 41c066f
> > no longer holds (as there are other users of adr_l in v4.4 now), I
> > think the best solution is to backport it again, but with the hunk
> > below folded in. (This just replaces the macro invocation with its
> > output when called with the 3 arguments in question, so the generated
> > code is identical)
> >
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -424,7 +424,8 @@ __mmap_switched:
> >         str     xzr, [x6], #8                   // Clear BSS
> >         b       1b
> >  2:
> > -       adr_l   sp, initial_sp, x4
> > +       adrp    x4, initial_sp
> > +       add     sp, x4, :lo12:initial_sp
> >         str_l   x21, __fdt_pointer, x5          // Save FDT pointer
> >         str_l   x24, memstart_addr, x6          // Save PHYS_OFFSET
> >         mov     x29, #0
>
> Thanks Ard - that works. I'll follow-up with a backport patch.
>

Excellent.
diff mbox series

Patch

diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 8550408735a0..c98e7e849f06 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -82,8 +82,7 @@  ENTRY(sha1_ce_transform)
 	ldr		dgb, [x0, #16]
 
 	/* load sha1_ce_state::finalize */
-	ldr_l		w4, sha1_ce_offsetof_finalize, x4
-	ldr		w4, [x0, x4]
+	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
 
 	/* load input */
 0:	ld1		{v8.4s-v11.4s}, [x1], #64
@@ -133,8 +132,7 @@  CPU_LE(	rev32		v11.16b, v11.16b	)
 	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
-	ldr_l		w4, sha1_ce_offsetof_count, x4
-	ldr		x4, [x0, x4]
+	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
 	movi		v9.2d, #0
 	mov		x8, #0x80000000
 	movi		v10.2d, #0
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 1b7b4684c35b..01e48b8970b1 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -17,6 +17,9 @@ 
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -29,9 +32,6 @@  struct sha1_ce_state {
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 				  int blocks);
 
-const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
-const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
-
 static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
@@ -52,6 +52,11 @@  static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len;
 
+	ASM_EXPORT(sha1_ce_offsetof_count,
+		   offsetof(struct sha1_ce_state, sst.count));
+	ASM_EXPORT(sha1_ce_offsetof_finalize,
+		   offsetof(struct sha1_ce_state, finalize));
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 679c6c002f4f..01cfee066837 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -88,8 +88,7 @@  ENTRY(sha2_ce_transform)
 	ld1		{dgav.4s, dgbv.4s}, [x0]
 
 	/* load sha256_ce_state::finalize */
-	ldr_l		w4, sha256_ce_offsetof_finalize, x4
-	ldr		w4, [x0, x4]
+	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
 
 	/* load input */
 0:	ld1		{v16.4s-v19.4s}, [x1], #64
@@ -137,8 +136,7 @@  CPU_LE(	rev32		v19.16b, v19.16b	)
 	 * the padding is handled by the C code in that case.
 	 */
 	cbz		x4, 3f
-	ldr_l		w4, sha256_ce_offsetof_count, x4
-	ldr		x4, [x0, x4]
+	ldr		x4, [x0, #:lo12:sha256_ce_offsetof_count]
 	movi		v17.2d, #0
 	mov		x8, #0x80000000
 	movi		v18.2d, #0
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 356ca9397a86..7a7f95b94869 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -17,6 +17,9 @@ 
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -29,11 +32,6 @@  struct sha256_ce_state {
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 				  int blocks);
 
-const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
-					      sst.count);
-const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
-						 finalize);
-
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
@@ -54,6 +52,11 @@  static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len;
 
+	ASM_EXPORT(sha256_ce_offsetof_count,
+		   offsetof(struct sha256_ce_state, sst.count));
+	ASM_EXPORT(sha256_ce_offsetof_finalize,
+		   offsetof(struct sha256_ce_state, finalize));
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.