diff mbox

arm64: efi: Fix stub cache maintenance

Message ID 1415810351-3462-1-git-send-email-mark.rutland@arm.com
State Accepted
Commit 9b0b26580a753d4d6bdd2b8b4ca9a8f3f2d39065
Headers show

Commit Message

Mark Rutland Nov. 12, 2014, 4:39 p.m. UTC
While efi-entry.S mentions that efi_entry() will have relocated the
kernel image, it actually means that efi_entry will have placed a copy
of the kernel in the appropriate location, and until this is branched to
at the end of efi_entry.S, all instructions are executed from the
original image.

Thus while the flush in efi_entry.S does ensure that the copy is visible
to noncacheable accesses, it does not guarantee that this is true for
the image instructions are being executed from. This could have
disasterous effects when the MMU and caches are disabled if the image
has not been naturally evicted to the PoC.

Additionally, due to a missing dsb following the ic ialluis, the new
kernel image is not necessarily clean in the I-cache when it is branched
to, with similar potentially disasterous effects.

This patch adds additional flushing to ensure that the currently
executing stub text is flushed to the PoC and is thus visible to
noncacheable accesses. As it is placed after the instructions cache
maintenance for the new image and __flush_dcache_area already contains a
dsb, we do not need to add a separate barrier to ensure completion of
the icache maintenance.

Comments are updated to clarify the situation with regard to the two
images and the maintenance required for both.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ian Campbell <ijc@hellion.org.uk>
Cc: Joel Schopp <joel.schopp@amd.com>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/efi-entry.S | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

Roy Franz Nov. 12, 2014, 4:44 p.m. UTC | #1
On Wed, Nov 12, 2014 at 11:39 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> While efi-entry.S mentions that efi_entry() will have relocated the
> kernel image, it actually means that efi_entry will have placed a copy
> of the kernel in the appropriate location, and until this is branched to
> at the end of efi_entry.S, all instructions are executed from the
> original image.
>
> Thus while the flush in efi_entry.S does ensure that the copy is visible
> to noncacheable accesses, it does not guarantee that this is true for
> the image instructions are being executed from. This could have
> disasterous effects when the MMU and caches are disabled if the image
> has not been naturally evicted to the PoC.
>
> Additionally, due to a missing dsb following the ic ialluis, the new
> kernel image is not necessarily clean in the I-cache when it is branched
> to, with similar potentially disasterous effects.
>
> This patch adds additional flushing to ensure that the currently
> executing stub text is flushed to the PoC and is thus visible to
> noncacheable accesses. As it is placed after the instructions cache
> maintenance for the new image and __flush_dcache_area already contains a
> dsb, we do not need to add a separate barrier to ensure completion of
> the icache maintenance.
>
> Comments are updated to clarify the situation with regard to the two
> images and the maintenance required for both.

Looks good.
Reviewed-by: Roy Franz <roy.franz@linaro.org>

>
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Ian Campbell <ijc@hellion.org.uk>
> Cc: Joel Schopp <joel.schopp@amd.com>
> Cc: Leif Lindholm <leif.lindholm@linaro.org>
> Cc: Mark Salter <msalter@redhat.com>
> Cc: Roy Franz <roy.franz@linaro.org>
> Cc: Will Deacon <will.deacon@arm.com>
> ---
>  arch/arm64/kernel/efi-entry.S | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
> index 619b1dd..d18a449 100644
> --- a/arch/arm64/kernel/efi-entry.S
> +++ b/arch/arm64/kernel/efi-entry.S
> @@ -54,18 +54,17 @@ ENTRY(efi_stub_entry)
>         b.eq    efi_load_fail
>
>         /*
> -        * efi_entry() will have relocated the kernel image if necessary
> -        * and we return here with device tree address in x0 and the kernel
> -        * entry point stored at *image_addr. Save those values in registers
> -        * which are callee preserved.
> +        * efi_entry() will have copied the kernel image if necessary and we
> +        * return here with device tree address in x0 and the kernel entry
> +        * point stored at *image_addr. Save those values in registers which
> +        * are callee preserved.
>          */
>         mov     x20, x0         // DTB address
>         ldr     x0, [sp, #16]   // relocated _text address
>         mov     x21, x0
>
>         /*
> -        * Flush dcache covering current runtime addresses
> -        * of kernel text/data. Then flush all of icache.
> +        * Calculate size of the kernel Image (same for original and copy).
>          */
>         adrp    x1, _text
>         add     x1, x1, #:lo12:_text
> @@ -73,9 +72,24 @@ ENTRY(efi_stub_entry)
>         add     x2, x2, #:lo12:_edata
>         sub     x1, x2, x1
>
> +       /*
> +        * Flush the copied Image to the PoC, and ensure it is not shadowed by
> +        * stale icache entries from before relocation.
> +        */
>         bl      __flush_dcache_area
>         ic      ialluis
>
> +       /*
> +        * Ensure that the rest of this function (in the original Image) is
> +        * visible when the caches are disabled. The I-cache can't have stale
> +        * entries for the VA range of the current image, so no maintenance is
> +        * necessary.
> +        */
> +       adr     x0, efi_stub_entry
> +       adr     x1, efi_stub_entry_end
> +       sub     x1, x1, x0
> +       bl      __flush_dcache_area
> +
>         /* Turn off Dcache and MMU */
>         mrs     x0, CurrentEL
>         cmp     x0, #CurrentEL_EL2
> @@ -105,4 +119,5 @@ efi_load_fail:
>         ldp     x29, x30, [sp], #32
>         ret
>
> +efi_stub_entry_end:
>  ENDPROC(efi_stub_entry)
> --
> 1.9.1
>
Joel Schopp Nov. 12, 2014, 5:12 p.m. UTC | #2
>  
> +	/*
> +	 * Ensure that the rest of this function (in the original Image) is
> +	 * visible when the caches are disabled. The I-cache can't have stale
> +	 * entries for the VA range of the current image, so no maintenance is
> +	 * necessary.
> +	 */
> +	adr	x0, efi_stub_entry
> +	adr	x1, efi_stub_entry_end
> +	sub	x1, x1, x0
> +	bl	__flush_dcache_area
It is my understdanding that you also need an isb here.  Tom's testing
confirms this on my patch with the dsb and without the isb. 

Also, since this is not a performance critical path it seems more
straightforward to just flush everything.  I'm happy either way.  We
will test your patch as is and let you know.
Mark Rutland Nov. 12, 2014, 5:41 p.m. UTC | #3
On Wed, Nov 12, 2014 at 05:12:32PM +0000, Joel Schopp wrote:
> 
> >  
> > +	/*
> > +	 * Ensure that the rest of this function (in the original Image) is
> > +	 * visible when the caches are disabled. The I-cache can't have stale
> > +	 * entries for the VA range of the current image, so no maintenance is
> > +	 * necessary.
> > +	 */
> > +	adr	x0, efi_stub_entry
> > +	adr	x1, efi_stub_entry_end
> > +	sub	x1, x1, x0
> > +	bl	__flush_dcache_area
> It is my understdanding that you also need an isb here.  Tom's testing
> confirms this on my patch with the dsb and without the isb. 

Ok. I still can't see what context we would be synchronising, but it's
entirely possible my reasoning is off. I'm more than happy to add the
ISB if necessary, but I'd like to know why it is necessary.

> Also, since this is not a performance critical path it seems more
> straightforward to just flush everything.

Unfortunately, there's no way of flushing everything to the PoC.

Due to the possible presence of a system cache (e.g. the L3 in CCN), it
is only possible to flush data to the PoC by means of VA operations,
which must respect maintenance by VA per the architecture (see the ARMv8
ARM ARM, issue A.d).

Using Set/Way operations will at best only flush out to the L3 cache,
and even that is racy while the D-cache on; the CPU's D-cache can
speculatively allocate lines out of the system cache (even if dirty),
and theoretically could shuffle lines between levels (and via that race,
between ways in a set).

Set/Way operations can only be used safely with the D-cache off, and
only to ensure that lines have been invalidate or evicted from the CPU's
cache. They cannot be used to ensure that the lines have made it to the
PoC (or PoU).

If other cache-coherent masters (e.g. other CPUs) have caches enabled,
they can similarly acquire cache lines from the current CPU, though that
shouldn't be a concern in this path.

We could flush the entire physical address space by VA, but that will
take an extremely long time, even for a path that isn't performance
critical.

> I'm happy either way. We will test your patch as is and let you know.

Thanks, it's much appreciated.

Mark.
Joel Schopp Nov. 12, 2014, 6:14 p.m. UTC | #4
On 11/12/2014 10:39 AM, Mark Rutland wrote:
> While efi-entry.S mentions that efi_entry() will have relocated the
> kernel image, it actually means that efi_entry will have placed a copy
> of the kernel in the appropriate location, and until this is branched to
> at the end of efi_entry.S, all instructions are executed from the
> original image.
>
> Thus while the flush in efi_entry.S does ensure that the copy is visible
> to noncacheable accesses, it does not guarantee that this is true for
> the image instructions are being executed from. This could have
> disasterous effects when the MMU and caches are disabled if the image
> has not been naturally evicted to the PoC.
>
> Additionally, due to a missing dsb following the ic ialluis, the new
> kernel image is not necessarily clean in the I-cache when it is branched
> to, with similar potentially disasterous effects.
>
> This patch adds additional flushing to ensure that the currently
> executing stub text is flushed to the PoC and is thus visible to
> noncacheable accesses. As it is placed after the instructions cache
> maintenance for the new image and __flush_dcache_area already contains a
> dsb, we do not need to add a separate barrier to ensure completion of
> the icache maintenance.
>
> Comments are updated to clarify the situation with regard to the two
> images and the maintenance required for both.
>
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Ian Campbell <ijc@hellion.org.uk>
> Cc: Joel Schopp <joel.schopp@amd.com>
> Cc: Leif Lindholm <leif.lindholm@linaro.org>
> Cc: Mark Salter <msalter@redhat.com>
> Cc: Roy Franz <roy.franz@linaro.org>
> Cc: Will Deacon <will.deacon@arm.com>
Looks more elegant than mine and seems to work.

Acked-by: Joel Schopp <joel.schopp@amd.com>
diff mbox

Patch

diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 619b1dd..d18a449 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -54,18 +54,17 @@  ENTRY(efi_stub_entry)
 	b.eq	efi_load_fail
 
 	/*
-	 * efi_entry() will have relocated the kernel image if necessary
-	 * and we return here with device tree address in x0 and the kernel
-	 * entry point stored at *image_addr. Save those values in registers
-	 * which are callee preserved.
+	 * efi_entry() will have copied the kernel image if necessary and we
+	 * return here with device tree address in x0 and the kernel entry
+	 * point stored at *image_addr. Save those values in registers which
+	 * are callee preserved.
 	 */
 	mov	x20, x0		// DTB address
 	ldr	x0, [sp, #16]	// relocated _text address
 	mov	x21, x0
 
 	/*
-	 * Flush dcache covering current runtime addresses
-	 * of kernel text/data. Then flush all of icache.
+	 * Calculate size of the kernel Image (same for original and copy).
 	 */
 	adrp	x1, _text
 	add	x1, x1, #:lo12:_text
@@ -73,9 +72,24 @@  ENTRY(efi_stub_entry)
 	add	x2, x2, #:lo12:_edata
 	sub	x1, x2, x1
 
+	/*
+	 * Flush the copied Image to the PoC, and ensure it is not shadowed by
+	 * stale icache entries from before relocation.
+	 */
 	bl	__flush_dcache_area
 	ic	ialluis
 
+	/*
+	 * Ensure that the rest of this function (in the original Image) is
+	 * visible when the caches are disabled. The I-cache can't have stale
+	 * entries for the VA range of the current image, so no maintenance is
+	 * necessary.
+	 */
+	adr	x0, efi_stub_entry
+	adr	x1, efi_stub_entry_end
+	sub	x1, x1, x0
+	bl	__flush_dcache_area
+
 	/* Turn off Dcache and MMU */
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
@@ -105,4 +119,5 @@  efi_load_fail:
 	ldp	x29, x30, [sp], #32
 	ret
 
+efi_stub_entry_end:
 ENDPROC(efi_stub_entry)