diff mbox

[Xen-devel] xen/arm64: Use __flush_dcache_area instead of __flush_dcache_all

Message ID 1412610550-26964-1-git-send-email-suravee.suthikulpanit@amd.com
State New
Headers show

Commit Message

Suthikulpanit, Suravee Oct. 6, 2014, 3:49 p.m. UTC
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>

when booting with EFI, __flush_dcache_all does not correctly flush data.

According to Mark Rutland, __flush_dcache_all does not guaranteed to push
data to the PoC if there is a system-level cache as it uses Set/Way
operations.

Therefore, this patch switchs to use the "__flush_dcache_area"
mechanism, which is coppied from Linux.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---

NOTE: I still have not fully boot into Dom0 with this patch.
      However, it seems that the data is flushed out to physical
      memory now.

 xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++
 xen/arch/arm/arm64/head.S  | 24 +++++++++++++++++++-----
 2 files changed, 51 insertions(+), 5 deletions(-)

Comments

Mark Rutland Oct. 6, 2014, 4:28 p.m. UTC | #1
Hi Suravee,

On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> 
> when booting with EFI, __flush_dcache_all does not correctly flush data.
> 
> According to Mark Rutland, __flush_dcache_all does not guaranteed to push
> data to the PoC if there is a system-level cache as it uses Set/Way
> operations.

A better way to look at this is that Set/Way operations are never
guaranteed to flush data to the PoC, regardless of the presence of a
system-level cache. They might on certain implementations, but that's
not an architectural guarantee. The same caveat applies to using them to
push data to other points in the cache hierarchy (PoUU or PoUIS).

Generally, Set/Way cache maintenance operations can only be used to
empty or clean the architected caches visible to a given CPU, and only
when all masters sharing those caches have been prevented from
allocating any cache entries. Outside of IMPLEMENTATION DEFINED
power-down sequences or reset-like operations they are typically the
wrong thing to use.

So any other uses of Set/Way operations should also be treated as
suspect, and are likely to be problematic on platforms with system-level
caches.

> 
> Therefore, this patch switchs to use the "__flush_dcache_area"

Nit: s/switchs/switches/

> mechanism, which is coppied from Linux.

It would be good to state that this uses maintenance by VA, which (sane)
system caches should respect.

> 
> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> ---
> 
> NOTE: I still have not fully boot into Dom0 with this patch.
>       However, it seems that the data is flushed out to physical
>       memory now.
> 
>  xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++
>  xen/arch/arm/arm64/head.S  | 24 +++++++++++++++++++-----
>  2 files changed, 51 insertions(+), 5 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
> index a445cbf..38f96c2 100644
> --- a/xen/arch/arm/arm64/cache.S
> +++ b/xen/arch/arm/arm64/cache.S
> @@ -97,3 +97,35 @@ finished:
>  	isb
>  	ret
>  ENDPROC(__flush_dcache_all)
> +
> +/*
> + * dcache_line_size - get the minimum D-cache line size from the CTR register.
> + */
> +	.macro	dcache_line_size, reg, tmp
> +	mrs	\tmp, ctr_el0			// read CTR
> +	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
> +	mov	\reg, #4			// bytes per word
> +	lsl	\reg, \reg, \tmp		// actual cache line size
> +	.endm
> +
> +/*
> + *	__flush_dcache_area(kaddr, size)
> + *
> + *	Ensure that the data held in the page kaddr is written back to the
> + *	page in question.
> + *
> + *	- kaddr   - kernel address
> + *	- size    - size in question
> + */
> +ENTRY(__flush_dcache_area)
> +	dcache_line_size x2, x3
> +	add	x1, x0, x1
> +	sub	x3, x2, #1
> +	bic	x0, x0, x3
> +1:	dc	civac, x0			// clean & invalidate D line / unified line
> +	add	x0, x0, x2
> +	cmp	x0, x1
> +	b.lo	1b
> +	dsb	sy
> +	ret
> +ENDPROC(__flush_dcache_area)
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 7650abe..704f39d 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type)
>   */
>  ENTRY(efi_xen_start)
>          /*
> +         * Preserve x0 (fdf pointer) across call to __flush_dcache_area,

Sorry if this is a silly question, but what's the "fdf pointer"?

> +         * restore for entry into Xen.
> +         */
> +        mov   x20, x0
> +
> +        /*
> +         * Flush dcache covering current runtime addresses
> +         * of xen text/data. Then flush all of icache.
> +         */
> +        adrp  x1, _start
> +        add   x1, x1, #:lo12:_start
> +        adrp  x2, _end
> +        add   x2, x2, #:lo12:_end
> +        sub   x1, x2, x1

Shouldn't the start address go in x0? We saved the fdf pointer earlier
but never placed the start address into x0.

I take it Xen doesn't relocate itself?

Thanks,
Mark.

> +
> +        bl    __flush_dcache_area
> +        ic    ialluis
> +
> +        /*
>           * Turn off cache and MMU as Xen expects. EFI enables them, but also
>           * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the
>           * MMU while executing EFI code before entering Xen.
>           * The EFI loader calls this to start Xen.
> -         * Preserve x0 (fdf pointer) across call to __flush_dcache_all,
> -         * restore for entry into Xen.
>           */
> -        mov   x20, x0
> -        bl    __flush_dcache_all
> -        ic    ialluis
>  
>          /* Turn off Dcache and MMU */
>          mrs   x0, sctlr_el2
> -- 
> 1.9.3
> 
>
Roy Franz Oct. 7, 2014, 4:15 a.m. UTC | #2
On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi Suravee,
>
> On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
>> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
>>
>> when booting with EFI, __flush_dcache_all does not correctly flush data.
>>
>> According to Mark Rutland, __flush_dcache_all does not guaranteed to push
>> data to the PoC if there is a system-level cache as it uses Set/Way
>> operations.
>
> A better way to look at this is that Set/Way operations are never
> guaranteed to flush data to the PoC, regardless of the presence of a
> system-level cache. They might on certain implementations, but that's
> not an architectural guarantee. The same caveat applies to using them to
> push data to other points in the cache hierarchy (PoUU or PoUIS).
>
> Generally, Set/Way cache maintenance operations can only be used to
> empty or clean the architected caches visible to a given CPU, and only
> when all masters sharing those caches have been prevented from
> allocating any cache entries. Outside of IMPLEMENTATION DEFINED
> power-down sequences or reset-like operations they are typically the
> wrong thing to use.
>
> So any other uses of Set/Way operations should also be treated as
> suspect, and are likely to be problematic on platforms with system-level
> caches.

So what all do we need to flush?  Do we need to flush all modified
(dirty) cache lines,
or just a specific subset?

In Linux the FDT which is modified in the Linux EFI stub isn't
flushed, nor is the EFI memory map,
both of which are modified by the UEFI firmware/boot stub.  I feel
like I'm missing
something here.


>
>>
>> Therefore, this patch switchs to use the "__flush_dcache_area"
>
> Nit: s/switchs/switches/
>
>> mechanism, which is coppied from Linux.
>
> It would be good to state that this uses maintenance by VA, which (sane)
> system caches should respect.
>
>>
>> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
>> ---
>>
>> NOTE: I still have not fully boot into Dom0 with this patch.
>>       However, it seems that the data is flushed out to physical
>>       memory now.
>>
>>  xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++
>>  xen/arch/arm/arm64/head.S  | 24 +++++++++++++++++++-----
>>  2 files changed, 51 insertions(+), 5 deletions(-)
>>
>> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
>> index a445cbf..38f96c2 100644
>> --- a/xen/arch/arm/arm64/cache.S
>> +++ b/xen/arch/arm/arm64/cache.S
>> @@ -97,3 +97,35 @@ finished:
>>       isb
>>       ret
>>  ENDPROC(__flush_dcache_all)
>> +
>> +/*
>> + * dcache_line_size - get the minimum D-cache line size from the CTR register.
>> + */
>> +     .macro  dcache_line_size, reg, tmp
>> +     mrs     \tmp, ctr_el0                   // read CTR
>> +     ubfm    \tmp, \tmp, #16, #19            // cache line size encoding
>> +     mov     \reg, #4                        // bytes per word
>> +     lsl     \reg, \reg, \tmp                // actual cache line size
>> +     .endm
>> +
>> +/*
>> + *   __flush_dcache_area(kaddr, size)
>> + *
>> + *   Ensure that the data held in the page kaddr is written back to the
>> + *   page in question.
>> + *
>> + *   - kaddr   - kernel address
>> + *   - size    - size in question
>> + */
>> +ENTRY(__flush_dcache_area)
>> +     dcache_line_size x2, x3
>> +     add     x1, x0, x1
>> +     sub     x3, x2, #1
>> +     bic     x0, x0, x3
>> +1:   dc      civac, x0                       // clean & invalidate D line / unified line
>> +     add     x0, x0, x2
>> +     cmp     x0, x1
>> +     b.lo    1b
>> +     dsb     sy
>> +     ret
>> +ENDPROC(__flush_dcache_area)
>> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
>> index 7650abe..704f39d 100644
>> --- a/xen/arch/arm/arm64/head.S
>> +++ b/xen/arch/arm/arm64/head.S
>> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type)
>>   */
>>  ENTRY(efi_xen_start)
>>          /*
>> +         * Preserve x0 (fdf pointer) across call to __flush_dcache_area,
>
> Sorry if this is a silly question, but what's the "fdf pointer"?
>

Should be fdt.  This is a typo from my original patch.


Also, we should remove flush_dcache_all, as that was added for use in
the EFI boot code.  If we
don't use it there it doesn't have a user in Xen.


>> +         * restore for entry into Xen.
>> +         */
>> +        mov   x20, x0
>> +
>> +        /*
>> +         * Flush dcache covering current runtime addresses
>> +         * of xen text/data. Then flush all of icache.
>> +         */
>> +        adrp  x1, _start
>> +        add   x1, x1, #:lo12:_start
>> +        adrp  x2, _end
>> +        add   x2, x2, #:lo12:_end
>> +        sub   x1, x2, x1
>
> Shouldn't the start address go in x0? We saved the fdf pointer earlier
> but never placed the start address into x0.

Yes, this does seem to be missing
>
> I take it Xen doesn't relocate itself?

Xen does relocate itself, but that is done later in the boot process
that is common between the EFI and Image
boot methods.

>
> Thanks,
> Mark.
>
>> +
>> +        bl    __flush_dcache_area
>> +        ic    ialluis
>> +
>> +        /*
>>           * Turn off cache and MMU as Xen expects. EFI enables them, but also
>>           * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the
>>           * MMU while executing EFI code before entering Xen.
>>           * The EFI loader calls this to start Xen.
>> -         * Preserve x0 (fdf pointer) across call to __flush_dcache_all,
>> -         * restore for entry into Xen.
>>           */
>> -        mov   x20, x0
>> -        bl    __flush_dcache_all
>> -        ic    ialluis
>>
>>          /* Turn off Dcache and MMU */
>>          mrs   x0, sctlr_el2
>> --
>> 1.9.3
>>
>>
Ian Campbell Oct. 7, 2014, 9:27 a.m. UTC | #3
On Mon, 2014-10-06 at 17:28 +0100, Mark Rutland wrote:
> Hi Suravee,
> 
> On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
> > From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> > 
> > when booting with EFI, __flush_dcache_all does not correctly flush data.
> > 
> > According to Mark Rutland, __flush_dcache_all does not guaranteed to push
> > data to the PoC if there is a system-level cache as it uses Set/Way
> > operations.
> 
> A better way to look at this is that Set/Way operations are never
> guaranteed to flush data to the PoC, regardless of the presence of a
> system-level cache. They might on certain implementations, but that's
> not an architectural guarantee. The same caveat applies to using them to
> push data to other points in the cache hierarchy (PoUU or PoUIS).
> 
> Generally, Set/Way cache maintenance operations can only be used to
> empty or clean the architected caches visible to a given CPU, and only
> when all masters sharing those caches have been prevented from
> allocating any cache entries. Outside of IMPLEMENTATION DEFINED
> power-down sequences or reset-like operations they are typically the
> wrong thing to use.
> 
> So any other uses of Set/Way operations should also be treated as
> suspect, and are likely to be problematic on platforms with system-level
> caches.

I suppose this set of problematic situations still includes "running
apparently UP during boot" since we may not be aware of secondary
processors currently running platform firmware and therefore
(potentially) interacting with caches?

Ian.
Ian Campbell Oct. 7, 2014, 9:32 a.m. UTC | #4
On Mon, 2014-10-06 at 21:15 -0700, Roy Franz wrote:
> On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> > Hi Suravee,
> >
> > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
> >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> >>
> >> when booting with EFI, __flush_dcache_all does not correctly flush data.
> >>
> >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push
> >> data to the PoC if there is a system-level cache as it uses Set/Way
> >> operations.
> >
> > A better way to look at this is that Set/Way operations are never
> > guaranteed to flush data to the PoC, regardless of the presence of a
> > system-level cache. They might on certain implementations, but that's
> > not an architectural guarantee. The same caveat applies to using them to
> > push data to other points in the cache hierarchy (PoUU or PoUIS).
> >
> > Generally, Set/Way cache maintenance operations can only be used to
> > empty or clean the architected caches visible to a given CPU, and only
> > when all masters sharing those caches have been prevented from
> > allocating any cache entries. Outside of IMPLEMENTATION DEFINED
> > power-down sequences or reset-like operations they are typically the
> > wrong thing to use.
> >
> > So any other uses of Set/Way operations should also be treated as
> > suspect, and are likely to be problematic on platforms with system-level
> > caches.
> 
> So what all do we need to flush?  Do we need to flush all modified
> (dirty) cache lines,
> or just a specific subset?
> 
> In Linux the FDT which is modified in the Linux EFI stub isn't
> flushed, nor is the EFI memory map,
> both of which are modified by the UEFI firmware/boot stub.  I feel
> like I'm missing
> something here.

Mark was making reference on IRC to other missing flushes even in Linux.
Not sure if those include the ones which you mention...

> Also, we should remove flush_dcache_all, as that was added for use in
> the EFI boot code.  If we
> don't use it there it doesn't have a user in Xen.

Absolutely, especially given that it turns out to be dangerous to use
under most circumstances!

> > I take it Xen doesn't relocate itself?
> 
> Xen does relocate itself, but that is done later in the boot process
> that is common between the EFI and Image boot methods.

Even with it happening later it's possible that we might need to flush
some additional stuff on entry via the EFI path? e.g. there could be
stuff which the non-EFI code path was previously implicitly assuming
wouldn't be cached (because caches were never enabled on such
bootloaders, etc).

In fact I'd suggest that those missing flushes (if any, maybe we already
got it all right) really belong in the relocation code rather than in
the EFI stub, since even on non-EFI it seems fragile to rely on specific
caching behaviour from the bootloader. I suppose we will cross that
bridge when Suravee get's as far as that!

Ian.
Mark Rutland Oct. 7, 2014, 10:40 a.m. UTC | #5
On Tue, Oct 07, 2014 at 05:15:58AM +0100, Roy Franz wrote:
> On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> > Hi Suravee,
> >
> > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
> >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> >>
> >> when booting with EFI, __flush_dcache_all does not correctly flush data.
> >>
> >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push
> >> data to the PoC if there is a system-level cache as it uses Set/Way
> >> operations.
> >
> > A better way to look at this is that Set/Way operations are never
> > guaranteed to flush data to the PoC, regardless of the presence of a
> > system-level cache. They might on certain implementations, but that's
> > not an architectural guarantee. The same caveat applies to using them to
> > push data to other points in the cache hierarchy (PoUU or PoUIS).
> >
> > Generally, Set/Way cache maintenance operations can only be used to
> > empty or clean the architected caches visible to a given CPU, and only
> > when all masters sharing those caches have been prevented from
> > allocating any cache entries. Outside of IMPLEMENTATION DEFINED
> > power-down sequences or reset-like operations they are typically the
> > wrong thing to use.
> >
> > So any other uses of Set/Way operations should also be treated as
> > suspect, and are likely to be problematic on platforms with system-level
> > caches.
> 
> So what all do we need to flush?  Do we need to flush all modified
> (dirty) cache lines,
> or just a specific subset?

You need to flush anything which needs to be visible at the PoC. So
anything that needs to be accessible with the caches disabled needs to
be flushed. You also need to clean the range corresponding to anywhere
you intend to write to with the caches disabled.

> In Linux the FDT which is modified in the Linux EFI stub isn't
> flushed, nor is the EFI memory map,
> both of which are modified by the UEFI firmware/boot stub.  I feel
> like I'm missing
> something here.

Within Linux we're getting lucky here because those accesses are all
done with the caches enabled, and we don't make any conflicting accesses
while the caches are disabled -- once we turn the caches back on the
data is visible again.

There's a possible problem with mismatched aliases here, as UEFI could
have had cacheable mappings for any arbitrary subset of the physical
address space that might not match what we want to use. So far we
haven't encountered any because the memory attributes used by UEFI
happen to match that used by the kernel.

In the absence of a system cache we could just nuke the cache hierarchy
by set/way to prevent that so long as we know no masters are allocating
entries while we do so. With a system cache it would be possible to nuke
the cache hierarchy by VA, but for the sizeable quantities of RAM we
expect that's not likely to be feasible.

> >> Therefore, this patch switchs to use the "__flush_dcache_area"
> >
> > Nit: s/switchs/switches/
> >
> >> mechanism, which is coppied from Linux.
> >
> > It would be good to state that this uses maintenance by VA, which (sane)
> > system caches should respect.
> >
> >>
> >> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> >> ---
> >>
> >> NOTE: I still have not fully boot into Dom0 with this patch.
> >>       However, it seems that the data is flushed out to physical
> >>       memory now.
> >>
> >>  xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++
> >>  xen/arch/arm/arm64/head.S  | 24 +++++++++++++++++++-----
> >>  2 files changed, 51 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
> >> index a445cbf..38f96c2 100644
> >> --- a/xen/arch/arm/arm64/cache.S
> >> +++ b/xen/arch/arm/arm64/cache.S
> >> @@ -97,3 +97,35 @@ finished:
> >>       isb
> >>       ret
> >>  ENDPROC(__flush_dcache_all)
> >> +
> >> +/*
> >> + * dcache_line_size - get the minimum D-cache line size from the CTR register.
> >> + */
> >> +     .macro  dcache_line_size, reg, tmp
> >> +     mrs     \tmp, ctr_el0                   // read CTR
> >> +     ubfm    \tmp, \tmp, #16, #19            // cache line size encoding
> >> +     mov     \reg, #4                        // bytes per word
> >> +     lsl     \reg, \reg, \tmp                // actual cache line size
> >> +     .endm
> >> +
> >> +/*
> >> + *   __flush_dcache_area(kaddr, size)
> >> + *
> >> + *   Ensure that the data held in the page kaddr is written back to the
> >> + *   page in question.
> >> + *
> >> + *   - kaddr   - kernel address
> >> + *   - size    - size in question
> >> + */
> >> +ENTRY(__flush_dcache_area)
> >> +     dcache_line_size x2, x3
> >> +     add     x1, x0, x1
> >> +     sub     x3, x2, #1
> >> +     bic     x0, x0, x3
> >> +1:   dc      civac, x0                       // clean & invalidate D line / unified line
> >> +     add     x0, x0, x2
> >> +     cmp     x0, x1
> >> +     b.lo    1b
> >> +     dsb     sy
> >> +     ret
> >> +ENDPROC(__flush_dcache_area)
> >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> >> index 7650abe..704f39d 100644
> >> --- a/xen/arch/arm/arm64/head.S
> >> +++ b/xen/arch/arm/arm64/head.S
> >> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type)
> >>   */
> >>  ENTRY(efi_xen_start)
> >>          /*
> >> +         * Preserve x0 (fdf pointer) across call to __flush_dcache_area,
> >
> > Sorry if this is a silly question, but what's the "fdf pointer"?
> >
> 
> Should be fdt.  This is a typo from my original patch.

Ok.

> Also, we should remove flush_dcache_all, as that was added for use in
> the EFI boot code.  If we
> don't use it there it doesn't have a user in Xen.

That sounds like a good idea to me.

> >> +         * restore for entry into Xen.
> >> +         */
> >> +        mov   x20, x0
> >> +
> >> +        /*
> >> +         * Flush dcache covering current runtime addresses
> >> +         * of xen text/data. Then flush all of icache.
> >> +         */
> >> +        adrp  x1, _start
> >> +        add   x1, x1, #:lo12:_start
> >> +        adrp  x2, _end
> >> +        add   x2, x2, #:lo12:_end
> >> +        sub   x1, x2, x1
> >
> > Shouldn't the start address go in x0? We saved the fdf pointer earlier
> > but never placed the start address into x0.
> 
> Yes, this does seem to be missing
> >
> > I take it Xen doesn't relocate itself?
> 
> Xen does relocate itself, but that is done later in the boot process
> that is common between the EFI and Image
> boot methods.

Ah, ok.

Thanks,
Mark.
Mark Rutland Oct. 7, 2014, 10:52 a.m. UTC | #6
On Tue, Oct 07, 2014 at 10:27:20AM +0100, Ian Campbell wrote:
> On Mon, 2014-10-06 at 17:28 +0100, Mark Rutland wrote:
> > Hi Suravee,
> > 
> > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
> > > From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> > > 
> > > when booting with EFI, __flush_dcache_all does not correctly flush data.
> > > 
> > > According to Mark Rutland, __flush_dcache_all does not guaranteed to push
> > > data to the PoC if there is a system-level cache as it uses Set/Way
> > > operations.
> > 
> > A better way to look at this is that Set/Way operations are never
> > guaranteed to flush data to the PoC, regardless of the presence of a
> > system-level cache. They might on certain implementations, but that's
> > not an architectural guarantee. The same caveat applies to using them to
> > push data to other points in the cache hierarchy (PoUU or PoUIS).
> > 
> > Generally, Set/Way cache maintenance operations can only be used to
> > empty or clean the architected caches visible to a given CPU, and only
> > when all masters sharing those caches have been prevented from
> > allocating any cache entries. Outside of IMPLEMENTATION DEFINED
> > power-down sequences or reset-like operations they are typically the
> > wrong thing to use.
> > 
> > So any other uses of Set/Way operations should also be treated as
> > suspect, and are likely to be problematic on platforms with system-level
> > caches.
> 
> I suppose this set of problematic situations still includes "running
> apparently UP during boot" since we may not be aware of secondary
> processors currently running platform firmware and therefore
> (potentially) interacting with caches?

Yes.

That said, if those CPUs have active cacheable mappings for memory that
is not special reserved and/or secure, you could have issues with
mismatched aliases anyway.

I'd hope that in the FW secondary CPUs were either running without
caches enabled, or only secure mappings if the caches are necessary.

Mark.
Roy Franz Oct. 14, 2014, 3:48 a.m. UTC | #7
On Tue, Oct 7, 2014 at 3:40 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Tue, Oct 07, 2014 at 05:15:58AM +0100, Roy Franz wrote:
>> On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > Hi Suravee,
>> >
>> > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote:
>> >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
>> >>
>> >> when booting with EFI, __flush_dcache_all does not correctly flush data.
>> >>
>> >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push
>> >> data to the PoC if there is a system-level cache as it uses Set/Way
>> >> operations.
>> >
>> > A better way to look at this is that Set/Way operations are never
>> > guaranteed to flush data to the PoC, regardless of the presence of a
>> > system-level cache. They might on certain implementations, but that's
>> > not an architectural guarantee. The same caveat applies to using them to
>> > push data to other points in the cache hierarchy (PoUU or PoUIS).
>> >
>> > Generally, Set/Way cache maintenance operations can only be used to
>> > empty or clean the architected caches visible to a given CPU, and only
>> > when all masters sharing those caches have been prevented from
>> > allocating any cache entries. Outside of IMPLEMENTATION DEFINED
>> > power-down sequences or reset-like operations they are typically the
>> > wrong thing to use.
>> >
>> > So any other uses of Set/Way operations should also be treated as
>> > suspect, and are likely to be problematic on platforms with system-level
>> > caches.
>>
>> So what all do we need to flush?  Do we need to flush all modified
>> (dirty) cache lines,
>> or just a specific subset?
>
> You need to flush anything which needs to be visible at the PoC. So
> anything that needs to be accessible with the caches disabled needs to
> be flushed. You also need to clean the range corresponding to anywhere
> you intend to write to with the caches disabled.
>
>> In Linux the FDT which is modified in the Linux EFI stub isn't
>> flushed, nor is the EFI memory map,
>> both of which are modified by the UEFI firmware/boot stub.  I feel
>> like I'm missing
>> something here.
>
> Within Linux we're getting lucky here because those accesses are all
> done with the caches enabled, and we don't make any conflicting accesses
> while the caches are disabled -- once we turn the caches back on the
> data is visible again.
>
> There's a possible problem with mismatched aliases here, as UEFI could
> have had cacheable mappings for any arbitrary subset of the physical
> address space that might not match what we want to use. So far we
> haven't encountered any because the memory attributes used by UEFI
> happen to match that used by the kernel.

It seems that for Xen we do need to flush the FDT as well - I get a
variety of crashes
with a corrupt FDT when cache state is modeled on the FVP model, and
Suravee sees similar
behavior on Seattle. I was not expecting this, as I looked at the code
in Xen and the caches/TLB
are enabled quite early on, before the FDT is accessed by Xen.  I then
looked at the mappings
used by  edk2 and Xen, and found some differences.  Even after
modifying edk2 to use the same
configuration as Xen, the flushing of the FDT is still required. Xen
and edk2 use the same memory
attributes  in the MAIR_EL2 register (0xFF), but had different
sharing, access perm, and nG settings.

The flushing of the FDT seems to be required, but I'm not sure why.
Does linux access the FDT with the
same flat mapping used by edk2?  I think that Xen uses a different
virtual mapping, so I suppose this
could cause problems with a virtually tagged cache.  (I couldn't find
a description of that detail regarding
the caches.)  I'd really like to understand why this flush is required
for Xen, and to make sure there
there isn't other internal edk2 state that would also need flushing.

>
> In the absence of a system cache we could just nuke the cache hierarchy
> by set/way to prevent that so long as we know no masters are allocating
> entries while we do so. With a system cache it would be possible to nuke
> the cache hierarchy by VA, but for the sizeable quantities of RAM we
> expect that's not likely to be feasible.
>
>> >> Therefore, this patch switchs to use the "__flush_dcache_area"
>> >
>> > Nit: s/switchs/switches/
>> >
>> >> mechanism, which is coppied from Linux.
>> >
>> > It would be good to state that this uses maintenance by VA, which (sane)
>> > system caches should respect.
>> >
>> >>
>> >> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
>> >> ---
>> >>
>> >> NOTE: I still have not fully boot into Dom0 with this patch.
>> >>       However, it seems that the data is flushed out to physical
>> >>       memory now.
>> >>
>> >>  xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++
>> >>  xen/arch/arm/arm64/head.S  | 24 +++++++++++++++++++-----
>> >>  2 files changed, 51 insertions(+), 5 deletions(-)
>> >>
>> >> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
>> >> index a445cbf..38f96c2 100644
>> >> --- a/xen/arch/arm/arm64/cache.S
>> >> +++ b/xen/arch/arm/arm64/cache.S
>> >> @@ -97,3 +97,35 @@ finished:
>> >>       isb
>> >>       ret
>> >>  ENDPROC(__flush_dcache_all)
>> >> +
>> >> +/*
>> >> + * dcache_line_size - get the minimum D-cache line size from the CTR register.
>> >> + */
>> >> +     .macro  dcache_line_size, reg, tmp
>> >> +     mrs     \tmp, ctr_el0                   // read CTR
>> >> +     ubfm    \tmp, \tmp, #16, #19            // cache line size encoding
>> >> +     mov     \reg, #4                        // bytes per word
>> >> +     lsl     \reg, \reg, \tmp                // actual cache line size
>> >> +     .endm
>> >> +
>> >> +/*
>> >> + *   __flush_dcache_area(kaddr, size)
>> >> + *
>> >> + *   Ensure that the data held in the page kaddr is written back to the
>> >> + *   page in question.
>> >> + *
>> >> + *   - kaddr   - kernel address
>> >> + *   - size    - size in question
>> >> + */
>> >> +ENTRY(__flush_dcache_area)
>> >> +     dcache_line_size x2, x3
>> >> +     add     x1, x0, x1
>> >> +     sub     x3, x2, #1
>> >> +     bic     x0, x0, x3
>> >> +1:   dc      civac, x0                       // clean & invalidate D line / unified line
>> >> +     add     x0, x0, x2
>> >> +     cmp     x0, x1
>> >> +     b.lo    1b
>> >> +     dsb     sy
>> >> +     ret
>> >> +ENDPROC(__flush_dcache_area)
>> >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
>> >> index 7650abe..704f39d 100644
>> >> --- a/xen/arch/arm/arm64/head.S
>> >> +++ b/xen/arch/arm/arm64/head.S
>> >> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type)
>> >>   */
>> >>  ENTRY(efi_xen_start)
>> >>          /*
>> >> +         * Preserve x0 (fdf pointer) across call to __flush_dcache_area,
>> >
>> > Sorry if this is a silly question, but what's the "fdf pointer"?
>> >
>>
>> Should be fdt.  This is a typo from my original patch.
>
> Ok.
>
>> Also, we should remove flush_dcache_all, as that was added for use in
>> the EFI boot code.  If we
>> don't use it there it doesn't have a user in Xen.
>
> That sounds like a good idea to me.
>
>> >> +         * restore for entry into Xen.
>> >> +         */
>> >> +        mov   x20, x0
>> >> +
>> >> +        /*
>> >> +         * Flush dcache covering current runtime addresses
>> >> +         * of xen text/data. Then flush all of icache.
>> >> +         */
>> >> +        adrp  x1, _start
>> >> +        add   x1, x1, #:lo12:_start
>> >> +        adrp  x2, _end
>> >> +        add   x2, x2, #:lo12:_end
>> >> +        sub   x1, x2, x1
>> >
>> > Shouldn't the start address go in x0? We saved the fdf pointer earlier
>> > but never placed the start address into x0.
>>
>> Yes, this does seem to be missing
>> >
>> > I take it Xen doesn't relocate itself?
>>
>> Xen does relocate itself, but that is done later in the boot process
>> that is common between the EFI and Image
>> boot methods.
>
> Ah, ok.
>
> Thanks,
> Mark.
Mark Rutland Oct. 14, 2014, 9:21 a.m. UTC | #8
Hi Roy,

[...]

> It seems that for Xen we do need to flush the FDT as well - I get a
> variety of crashes
> with a corrupt FDT when cache state is modeled on the FVP model, and
> Suravee sees similar
> behavior on Seattle. I was not expecting this, as I looked at the code
> in Xen and the caches/TLB
> are enabled quite early on, before the FDT is accessed by Xen.  I then
> looked at the mappings
> used by  edk2 and Xen, and found some differences.  Even after
> modifying edk2 to use the same
> configuration as Xen, the flushing of the FDT is still required. Xen
> and edk2 use the same memory
> attributes  in the MAIR_EL2 register (0xFF), but had different
> sharing, access perm, and nG settings.

I don't think the access perm or nG settings should have any effect, but
the shareability forms part of the memory attributes (along with the
memory type and cacheability), and there are several rules that apply
when accessing a memory location with mismatched attributes. See the
ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
memory attributes.

In Linux we're likely getting lucky, and the shareability we use varies
for an SMP or UP kernel. So we need maintenance in at least one of those
cases. This would also apply to any initrd or other image.

Do you happen to know the shareability used by EDK2 and Xen?

> The flushing of the FDT seems to be required, but I'm not sure why.
> Does linux access the FDT with the
> same flat mapping used by edk2?  I think that Xen uses a different
> virtual mapping, so I suppose this
> could cause problems with a virtually tagged cache.  (I couldn't find
> a description of that detail regarding
> the caches.)  I'd really like to understand why this flush is required
> for Xen, and to make sure there
> there isn't other internal edk2 state that would also need flushing.

The D-caches should behave as if they are PIPT, so the virtual addresses
used should not be a problem. Linux maps the FDT in the swapper pgdir
rather than the idmap pgdir.

Linux might be doing some work that happens to flush the relevant
portions of the cache, even if accidentally, before accessing the FDT.

I would also like to understand what's going on here.

Thanks,
Mark.
Ian Campbell Oct. 14, 2014, 9:35 a.m. UTC | #9
On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote:
> Hi Roy,
> 
> [...]
> 
> > It seems that for Xen we do need to flush the FDT as well - I get a
> > variety of crashes
> > with a corrupt FDT when cache state is modeled on the FVP model, and
> > Suravee sees similar
> > behavior on Seattle. I was not expecting this, as I looked at the code
> > in Xen and the caches/TLB
> > are enabled quite early on, before the FDT is accessed by Xen.  I then
> > looked at the mappings
> > used by  edk2 and Xen, and found some differences.  Even after
> > modifying edk2 to use the same
> > configuration as Xen, the flushing of the FDT is still required. Xen
> > and edk2 use the same memory
> > attributes  in the MAIR_EL2 register (0xFF), but had different
> > sharing, access perm, and nG settings.
> 
> I don't think the access perm or nG settings should have any effect, but
> the shareability forms part of the memory attributes (along with the
> memory type and cacheability), and there are several rules that apply
> when accessing a memory location with mismatched attributes. See the
> ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
> memory attributes.
> 
> In Linux we're likely getting lucky, and the shareability we use varies
> for an SMP or UP kernel. So we need maintenance in at least one of those
> cases. This would also apply to any initrd or other image.
> 
> Do you happen to know the shareability used by EDK2 and Xen?

Xen maps everything inner-shareable. Dunno about EDK2.

Is the real issue here not a lack of specification for some corner cases
of the boot protocol? Can we get that fixed somehow?

Part of me wants to suggest that UEFI (and bootloaders generally) ought
to be cleaning caches for anything they have loaded into RAM before
launching an OS as a matter of good hygiene.

Ian.

> 
> > The flushing of the FDT seems to be required, but I'm not sure why.
> > Does linux access the FDT with the
> > same flat mapping used by edk2?  I think that Xen uses a different
> > virtual mapping, so I suppose this
> > could cause problems with a virtually tagged cache.  (I couldn't find
> > a description of that detail regarding
> > the caches.)  I'd really like to understand why this flush is required
> > for Xen, and to make sure there
> > there isn't other internal edk2 state that would also need flushing.
> 
> The D-caches should behave as if they are PIPT, so the virtual addresses
> used should not be a problem. Linux maps the FDT in the swapper pgdir
> rather than the idmap pgdir.
> 
> Linux might be doing some work that happens to flush the relevant
> portions of the cache, even if accidentally, before accessing the FDT.
> 
> I would also like to understand what's going on here.
> 
> Thanks,
> Mark.
Mark Rutland Oct. 14, 2014, 10:32 a.m. UTC | #10
On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote:
> On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote:
> > Hi Roy,
> > 
> > [...]
> > 
> > > It seems that for Xen we do need to flush the FDT as well - I get a
> > > variety of crashes
> > > with a corrupt FDT when cache state is modeled on the FVP model, and
> > > Suravee sees similar
> > > behavior on Seattle. I was not expecting this, as I looked at the code
> > > in Xen and the caches/TLB
> > > are enabled quite early on, before the FDT is accessed by Xen.  I then
> > > looked at the mappings
> > > used by  edk2 and Xen, and found some differences.  Even after
> > > modifying edk2 to use the same
> > > configuration as Xen, the flushing of the FDT is still required. Xen
> > > and edk2 use the same memory
> > > attributes  in the MAIR_EL2 register (0xFF), but had different
> > > sharing, access perm, and nG settings.
> > 
> > I don't think the access perm or nG settings should have any effect, but
> > the shareability forms part of the memory attributes (along with the
> > memory type and cacheability), and there are several rules that apply
> > when accessing a memory location with mismatched attributes. See the
> > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
> > memory attributes.
> > 
> > In Linux we're likely getting lucky, and the shareability we use varies
> > for an SMP or UP kernel. So we need maintenance in at least one of those
> > cases. This would also apply to any initrd or other image.
> > 
> > Do you happen to know the shareability used by EDK2 and Xen?
> 
> Xen maps everything inner-shareable. Dunno about EDK2.

Ok. That matches what an SMP Linux kernel will do, so it looks like
we're just getting lucky with Linux. I'lll have a play and see if I can
trigger similar issues.

> Is the real issue here not a lack of specification for some corner cases
> of the boot protocol? Can we get that fixed somehow?

To an extent, yes. We can try to fix up the Linux side with patche to
Documentation/arm64/booting.txt. As far as I am aware, for UEFI that
will require membership of the UEFI forum.

> Part of me wants to suggest that UEFI (and bootloaders generally) ought
> to be cleaning caches for anything they have loaded into RAM before
> launching an OS as a matter of good hygiene.

In general, yes.

Unfortunately, UEFI can't perform the maintenance in this case, because
the stub modifies things. I was under the impression it copied and
modified the FDT to embed the command line -- UEFI has no visibiltiy of
this and therefore cannot be in charge of flushing it. So in this case,
the stub needs to be thought of as the bootloader, and needs to be in
charge of any required maintenance.

There are a tonne of subtleties here, and certain properties we would
like (e.g. a completely clean cache hierarchy upon entry to the OS)
aren't necessarily possible to provide in general (thanks to the wonders
of non-architected system level caches, interaction with bootloaders,
etc).

Mark.
Ian Campbell Oct. 14, 2014, 10:39 a.m. UTC | #11
On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote:
> On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote:
> > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote:
> > > Hi Roy,
> > > 
> > > [...]
> > > 
> > > > It seems that for Xen we do need to flush the FDT as well - I get a
> > > > variety of crashes
> > > > with a corrupt FDT when cache state is modeled on the FVP model, and
> > > > Suravee sees similar
> > > > behavior on Seattle. I was not expecting this, as I looked at the code
> > > > in Xen and the caches/TLB
> > > > are enabled quite early on, before the FDT is accessed by Xen.  I then
> > > > looked at the mappings
> > > > used by  edk2 and Xen, and found some differences.  Even after
> > > > modifying edk2 to use the same
> > > > configuration as Xen, the flushing of the FDT is still required. Xen
> > > > and edk2 use the same memory
> > > > attributes  in the MAIR_EL2 register (0xFF), but had different
> > > > sharing, access perm, and nG settings.
> > > 
> > > I don't think the access perm or nG settings should have any effect, but
> > > the shareability forms part of the memory attributes (along with the
> > > memory type and cacheability), and there are several rules that apply
> > > when accessing a memory location with mismatched attributes. See the
> > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
> > > memory attributes.
> > > 
> > > In Linux we're likely getting lucky, and the shareability we use varies
> > > for an SMP or UP kernel. So we need maintenance in at least one of those
> > > cases. This would also apply to any initrd or other image.
> > > 
> > > Do you happen to know the shareability used by EDK2 and Xen?
> > 
> > Xen maps everything inner-shareable. Dunno about EDK2.
> 
> Ok. That matches what an SMP Linux kernel will do, so it looks like
> we're just getting lucky with Linux. I'lll have a play and see if I can
> trigger similar issues.
> 
> > Is the real issue here not a lack of specification for some corner cases
> > of the boot protocol? Can we get that fixed somehow?
> 
> To an extent, yes. We can try to fix up the Linux side with patche to
> Documentation/arm64/booting.txt. As far as I am aware, for UEFI that
> will require membership of the UEFI forum.
> 

Is Documentation/arm64/booting.txt relevant here since the kernel is
being launched as an EFI app, which already has a standardised calling
convention of its own. I suppose booting.txt is in addition to the UEFI
convention. It probably would be best to formalise that (what if a
second OS comes along with contradictory requirements?)

> > Part of me wants to suggest that UEFI (and bootloaders generally) ought
> > to be cleaning caches for anything they have loaded into RAM before
> > launching an OS as a matter of good hygiene.
> 
> In general, yes.
> 
> Unfortunately, UEFI can't perform the maintenance in this case, because
> the stub modifies things. I was under the impression it copied and
> modified the FDT to embed the command line -- UEFI has no visibiltiy of
> this and therefore cannot be in charge of flushing it. So in this case,
> the stub needs to be thought of as the bootloader, and needs to be in
> charge of any required maintenance.

Right, that's what I was thinking. UEFI enters bootloader with
everything it has done all nice and clean and consistent. Anything the
stub then does it is responsible for maintaining the cleanliness.

> There are a tonne of subtleties here, and certain properties we would
> like (e.g. a completely clean cache hierarchy upon entry to the OS)
> aren't necessarily possible to provide in general (thanks to the wonders
> of non-architected system level caches, interaction with bootloaders,
> etc).

I suppose it is easier for the UEFI implementation, since it knows the
platform it runs on and there knows about the caches. Harder for the
stub though :-/

Ian.
Mark Rutland Oct. 14, 2014, 11:23 a.m. UTC | #12
On Tue, Oct 14, 2014 at 11:39:37AM +0100, Ian Campbell wrote:
> On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote:
> > On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote:
> > > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote:
> > > > Hi Roy,
> > > > 
> > > > [...]
> > > > 
> > > > > It seems that for Xen we do need to flush the FDT as well - I get a
> > > > > variety of crashes
> > > > > with a corrupt FDT when cache state is modeled on the FVP model, and
> > > > > Suravee sees similar
> > > > > behavior on Seattle. I was not expecting this, as I looked at the code
> > > > > in Xen and the caches/TLB
> > > > > are enabled quite early on, before the FDT is accessed by Xen.  I then
> > > > > looked at the mappings
> > > > > used by  edk2 and Xen, and found some differences.  Even after
> > > > > modifying edk2 to use the same
> > > > > configuration as Xen, the flushing of the FDT is still required. Xen
> > > > > and edk2 use the same memory
> > > > > attributes  in the MAIR_EL2 register (0xFF), but had different
> > > > > sharing, access perm, and nG settings.
> > > > 
> > > > I don't think the access perm or nG settings should have any effect, but
> > > > the shareability forms part of the memory attributes (along with the
> > > > memory type and cacheability), and there are several rules that apply
> > > > when accessing a memory location with mismatched attributes. See the
> > > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
> > > > memory attributes.
> > > > 
> > > > In Linux we're likely getting lucky, and the shareability we use varies
> > > > for an SMP or UP kernel. So we need maintenance in at least one of those
> > > > cases. This would also apply to any initrd or other image.
> > > > 
> > > > Do you happen to know the shareability used by EDK2 and Xen?
> > > 
> > > Xen maps everything inner-shareable. Dunno about EDK2.
> > 
> > Ok. That matches what an SMP Linux kernel will do, so it looks like
> > we're just getting lucky with Linux. I'lll have a play and see if I can
> > trigger similar issues.
> > 
> > > Is the real issue here not a lack of specification for some corner cases
> > > of the boot protocol? Can we get that fixed somehow?
> > 
> > To an extent, yes. We can try to fix up the Linux side with patche to
> > Documentation/arm64/booting.txt. As far as I am aware, for UEFI that
> > will require membership of the UEFI forum.
> > 
> 
> Is Documentation/arm64/booting.txt relevant here since the kernel is
> being launched as an EFI app, which already has a standardised calling
> convention of its own. I suppose booting.txt is in addition to the UEFI
> convention. It probably would be best to formalise that (what if a
> second OS comes along with contradictory requirements?)

If we're trying to fix up UEFI, that needs to happen at the UEFI forum
level. I believe there are some additional reqwuirements in SBSA/SBBR,
but I haven't studied them in detail.

If there are requirements that Linux needs to have met regardless of
UEFI, we should ensure we mention that in booting.txt.

It would be nice to have cross-OS agreement on boot protocols, but at
the moment the table is somewhat empty beyond Linux and Xen. I had a
conversation with the FreeBSD guys working on 64-bit ARM stuff, but
they're still at an early stage, and I can't recall the specifics of
their boot process.

> > > Part of me wants to suggest that UEFI (and bootloaders generally) ought
> > > to be cleaning caches for anything they have loaded into RAM before
> > > launching an OS as a matter of good hygiene.
> > 
> > In general, yes.
> > 
> > Unfortunately, UEFI can't perform the maintenance in this case, because
> > the stub modifies things. I was under the impression it copied and
> > modified the FDT to embed the command line -- UEFI has no visibiltiy of
> > this and therefore cannot be in charge of flushing it. So in this case,
> > the stub needs to be thought of as the bootloader, and needs to be in
> > charge of any required maintenance.
> 
> Right, that's what I was thinking. UEFI enters bootloader with
> everything it has done all nice and clean and consistent. Anything the
> stub then does it is responsible for maintaining the cleanliness.

There are two horrible parts here:

 * EFI has no idea what a boot loader is. As far as it's aware, the
   kernel + efi stub is just another UEFI application until it calls
   ExitBootServices. For all UEFI knows, it may as well be a calculator
   until that point, and flushing the entire cache hierarchy for a
   calculator seems a little extreme.

 * Defining "nice and clean and consistent".
  
   As far as I am aware, UEFI may have an arbitrary set of mappings
   present during boot services time, with arbitrary drivers active. 
   That means that UEFI can create dirty cache entries concurrently with
   the bootloader, in addition to the usual clean entries that can be
   allocated at any time thanks to speculative fetches.
   
   So while we're in the bootloader, any system level caches can have
   entries allocated to it, and as those aren't architected the only
   thing we can do is flush those by VA for the portions we care about.
   
So we can have "initially consistent", but that might not be useful.

> > There are a tonne of subtleties here, and certain properties we would
> > like (e.g. a completely clean cache hierarchy upon entry to the OS)
> > aren't necessarily possible to provide in general (thanks to the wonders
> > of non-architected system level caches, interaction with bootloaders,
> > etc).
> 
> I suppose it is easier for the UEFI implementation, since it knows the
> platform it runs on and there knows about the caches. Harder for the
> stub though :-/

Yeah. System-level caches interact badly with pretty much any scenario
where ownership of the MMU is transferred (UEFI boot, kexec), and there
doesn't seem to be a single agent that can be charged with ownership of
maintenance.

This is something I've been meaning to revisit, but it takes a while to
get back up to speed on the minutiae of the cache architecture and the
rules for memory attributes, and I haven't had the time recently.

We do have a very heavy hammer that we know will work: flushing the
memory by PA in the stub once the MMU and caches are disabled. A
back-of-the-envelope calculation shows that could take minutes to issue
on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
last resort.

We could try to manage the system caches explicitly, but then we need
code to do so very early, we need to have them described in the
appropriate firmware tables, and they need to be manageable from the
non-secure side (which I believe is not always the case). That somewhat
defeat the portability aspect of booting as an EFI application.

So yes, it's harder for the stub :/

Mark.
Ian Campbell Oct. 14, 2014, 12:54 p.m. UTC | #13
On Tue, 2014-10-14 at 12:23 +0100, Mark Rutland wrote:
> On Tue, Oct 14, 2014 at 11:39:37AM +0100, Ian Campbell wrote:
> > On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote:
> > > On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote:
> > > > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote:
> > > > > Hi Roy,
> > > > > 
> > > > > [...]
> > > > > 
> > > > > > It seems that for Xen we do need to flush the FDT as well - I get a
> > > > > > variety of crashes
> > > > > > with a corrupt FDT when cache state is modeled on the FVP model, and
> > > > > > Suravee sees similar
> > > > > > behavior on Seattle. I was not expecting this, as I looked at the code
> > > > > > in Xen and the caches/TLB
> > > > > > are enabled quite early on, before the FDT is accessed by Xen.  I then
> > > > > > looked at the mappings
> > > > > > used by  edk2 and Xen, and found some differences.  Even after
> > > > > > modifying edk2 to use the same
> > > > > > configuration as Xen, the flushing of the FDT is still required. Xen
> > > > > > and edk2 use the same memory
> > > > > > attributes  in the MAIR_EL2 register (0xFF), but had different
> > > > > > sharing, access perm, and nG settings.
> > > > > 
> > > > > I don't think the access perm or nG settings should have any effect, but
> > > > > the shareability forms part of the memory attributes (along with the
> > > > > memory type and cacheability), and there are several rules that apply
> > > > > when accessing a memory location with mismatched attributes. See the
> > > > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched
> > > > > memory attributes.
> > > > > 
> > > > > In Linux we're likely getting lucky, and the shareability we use varies
> > > > > for an SMP or UP kernel. So we need maintenance in at least one of those
> > > > > cases. This would also apply to any initrd or other image.
> > > > > 
> > > > > Do you happen to know the shareability used by EDK2 and Xen?
> > > > 
> > > > Xen maps everything inner-shareable. Dunno about EDK2.
> > > 
> > > Ok. That matches what an SMP Linux kernel will do, so it looks like
> > > we're just getting lucky with Linux. I'lll have a play and see if I can
> > > trigger similar issues.
> > > 
> > > > Is the real issue here not a lack of specification for some corner cases
> > > > of the boot protocol? Can we get that fixed somehow?
> > > 
> > > To an extent, yes. We can try to fix up the Linux side with patche to
> > > Documentation/arm64/booting.txt. As far as I am aware, for UEFI that
> > > will require membership of the UEFI forum.
> > > 
> > 
> > Is Documentation/arm64/booting.txt relevant here since the kernel is
> > being launched as an EFI app, which already has a standardised calling
> > convention of its own. I suppose booting.txt is in addition to the UEFI
> > convention. It probably would be best to formalise that (what if a
> > second OS comes along with contradictory requirements?)
> 
> If we're trying to fix up UEFI, that needs to happen at the UEFI forum
> level. I believe there are some additional reqwuirements in SBSA/SBBR,
> but I haven't studied them in detail.
> 
> If there are requirements that Linux needs to have met regardless of
> UEFI, we should ensure we mention that in booting.txt.
> 
> It would be nice to have cross-OS agreement on boot protocols, but at
> the moment the table is somewhat empty beyond Linux and Xen. I had a
> conversation with the FreeBSD guys working on 64-bit ARM stuff, but
> they're still at an early stage, and I can't recall the specifics of
> their boot process.

I was thinking (perhaps naïvely) that these problems would be mostly the
same for any OS and that the solution ought to be specified in terms
which allow any OS to know what to expect and/or what is expected of
them. Really OSes ought to be designing their boot protocols within the
set of constraints implied by the (improved) UEFI launching spec, not
vice versa.

> > > > Part of me wants to suggest that UEFI (and bootloaders generally) ought
> > > > to be cleaning caches for anything they have loaded into RAM before
> > > > launching an OS as a matter of good hygiene.
> > > 
> > > In general, yes.
> > > 
> > > Unfortunately, UEFI can't perform the maintenance in this case, because
> > > the stub modifies things. I was under the impression it copied and
> > > modified the FDT to embed the command line -- UEFI has no visibiltiy of
> > > this and therefore cannot be in charge of flushing it. So in this case,
> > > the stub needs to be thought of as the bootloader, and needs to be in
> > > charge of any required maintenance.
> > 
> > Right, that's what I was thinking. UEFI enters bootloader with
> > everything it has done all nice and clean and consistent. Anything the
> > stub then does it is responsible for maintaining the cleanliness.
> 
> There are two horrible parts here:
> 
>  * EFI has no idea what a boot loader is. As far as it's aware, the
>    kernel + efi stub is just another UEFI application until it calls
>    ExitBootServices. For all UEFI knows, it may as well be a calculator
>    until that point, and flushing the entire cache hierarchy for a
>    calculator seems a little extreme.

Most EFI applications are not that trivial though, and any non-trivial
app is going to (with some reasonably high probability) need to touch
the MMU. I don't see the problem with doing something which always works
even if it might be overkill for some small subset of things you might
be launching.

>  * Defining "nice and clean and consistent".
>   
>    As far as I am aware, UEFI may have an arbitrary set of mappings
>    present during boot services time, with arbitrary drivers active. 
>    That means that UEFI can create dirty cache entries concurrently with
>    the bootloader, in addition to the usual clean entries that can be
>    allocated at any time thanks to speculative fetches.
>    
>    So while we're in the bootloader, any system level caches can have
>    entries allocated to it, and as those aren't architected the only
>    thing we can do is flush those by VA for the portions we care about.
>    
> So we can have "initially consistent", but that might not be useful.

Hrm, yes, rather unfortunate.

> 
> > > There are a tonne of subtleties here, and certain properties we would
> > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
> > > aren't necessarily possible to provide in general (thanks to the wonders
> > > of non-architected system level caches, interaction with bootloaders,
> > > etc).
> > 
> > I suppose it is easier for the UEFI implementation, since it knows the
> > platform it runs on and there knows about the caches. Harder for the
> > stub though :-/
> 
> Yeah. System-level caches interact badly with pretty much any scenario
> where ownership of the MMU is transferred (UEFI boot, kexec), and there
> doesn't seem to be a single agent that can be charged with ownership of
> maintenance.
> 
> This is something I've been meaning to revisit, but it takes a while to
> get back up to speed on the minutiae of the cache architecture and the
> rules for memory attributes, and I haven't had the time recently.
> 
> We do have a very heavy hammer that we know will work: flushing the
> memory by PA in the stub once the MMU and caches are disabled. A
> back-of-the-envelope calculation shows that could take minutes to issue
> on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
> last resort.

Ouch...

> We could try to manage the system caches explicitly, but then we need
> code to do so very early, we need to have them described in the
> appropriate firmware tables, and they need to be manageable from the
> non-secure side (which I believe is not always the case). That somewhat
> defeat the portability aspect of booting as an EFI application.
> 
> So yes, it's harder for the stub :

Indeed.

Probably this isn't even close to the correct venue. I'm not sure where
better to transfer it though. One of the Linaro lists perhaps?

Ian.
Mark Rutland Oct. 14, 2014, 2:30 p.m. UTC | #14
[...]

> > It would be nice to have cross-OS agreement on boot protocols, but at
> > the moment the table is somewhat empty beyond Linux and Xen. I had a
> > conversation with the FreeBSD guys working on 64-bit ARM stuff, but
> > they're still at an early stage, and I can't recall the specifics of
> > their boot process.
> 
> I was thinking (perhaps naïvely) that these problems would be mostly the
> same for any OS and that the solution ought to be specified in terms
> which allow any OS to know what to expect and/or what is expected of
> them. Really OSes ought to be designing their boot protocols within the
> set of constraints implied by the (improved) UEFI launching spec, not
> vice versa.

w.r.t. anything booting via UEFI, I would expect that to be covered by
the output of the UEFI forum. The cross-OS agreement would be for stuff
not covered by UEFI (e.g. booting without UEFI, whether to use the UEFI
memory map or one provided elsewhere, etc).

[...]

> > > Right, that's what I was thinking. UEFI enters bootloader with
> > > everything it has done all nice and clean and consistent. Anything the
> > > stub then does it is responsible for maintaining the cleanliness.
> > 
> > There are two horrible parts here:
> > 
> >  * EFI has no idea what a boot loader is. As far as it's aware, the
> >    kernel + efi stub is just another UEFI application until it calls
> >    ExitBootServices. For all UEFI knows, it may as well be a calculator
> >    until that point, and flushing the entire cache hierarchy for a
> >    calculator seems a little extreme.
> 
> Most EFI applications are not that trivial though, and any non-trivial
> app is going to (with some reasonably high probability) need to touch
> the MMU. I don't see the problem with doing something which always works
> even if it might be overkill for some small subset of things you might
> be launching.

That sounds reasonable to me.

> >  * Defining "nice and clean and consistent".
> >   
> >    As far as I am aware, UEFI may have an arbitrary set of mappings
> >    present during boot services time, with arbitrary drivers active. 
> >    That means that UEFI can create dirty cache entries concurrently with
> >    the bootloader, in addition to the usual clean entries that can be
> >    allocated at any time thanks to speculative fetches.
> >    
> >    So while we're in the bootloader, any system level caches can have
> >    entries allocated to it, and as those aren't architected the only
> >    thing we can do is flush those by VA for the portions we care about.
> >    
> > So we can have "initially consistent", but that might not be useful.
> 
> Hrm, yes, rather unfortunate.
> 
> > 
> > > > There are a tonne of subtleties here, and certain properties we would
> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
> > > > aren't necessarily possible to provide in general (thanks to the wonders
> > > > of non-architected system level caches, interaction with bootloaders,
> > > > etc).
> > > 
> > > I suppose it is easier for the UEFI implementation, since it knows the
> > > platform it runs on and there knows about the caches. Harder for the
> > > stub though :-/
> > 
> > Yeah. System-level caches interact badly with pretty much any scenario
> > where ownership of the MMU is transferred (UEFI boot, kexec), and there
> > doesn't seem to be a single agent that can be charged with ownership of
> > maintenance.
> > 
> > This is something I've been meaning to revisit, but it takes a while to
> > get back up to speed on the minutiae of the cache architecture and the
> > rules for memory attributes, and I haven't had the time recently.
> > 
> > We do have a very heavy hammer that we know will work: flushing the
> > memory by PA in the stub once the MMU and caches are disabled. A
> > back-of-the-envelope calculation shows that could take minutes to issue
> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
> > last resort.
> 
> Ouch...

Looking at that again, I was off by an order of 1000, and that actually
comes to about 0.13 seconds (though solely for CMO issue). So that might
not be as blunt as I made it out to be, but it's still not great as
platforms get larger.

> > We could try to manage the system caches explicitly, but then we need
> > code to do so very early, we need to have them described in the
> > appropriate firmware tables, and they need to be manageable from the
> > non-secure side (which I believe is not always the case). That somewhat
> > defeat the portability aspect of booting as an EFI application.
> > 
> > So yes, it's harder for the stub :
> 
> Indeed.
> 
> Probably this isn't even close to the correct venue. I'm not sure where
> better to transfer it though. One of the Linaro lists perhaps?

I'm not really sure where the right place is. There are quite a few
parties who have an interest in this problem (whether they realise it or
not). It would be nice to figure out more precisely what's happening
here first, anyhow.

Mark.
Roy Franz Oct. 14, 2014, 4:26 p.m. UTC | #15
On Tue, Oct 14, 2014 at 7:30 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> [...]
>
>> > It would be nice to have cross-OS agreement on boot protocols, but at
>> > the moment the table is somewhat empty beyond Linux and Xen. I had a
>> > conversation with the FreeBSD guys working on 64-bit ARM stuff, but
>> > they're still at an early stage, and I can't recall the specifics of
>> > their boot process.
>>
>> I was thinking (perhaps naïvely) that these problems would be mostly the
>> same for any OS and that the solution ought to be specified in terms
>> which allow any OS to know what to expect and/or what is expected of
>> them. Really OSes ought to be designing their boot protocols within the
>> set of constraints implied by the (improved) UEFI launching spec, not
>> vice versa.
>
> w.r.t. anything booting via UEFI, I would expect that to be covered by
> the output of the UEFI forum. The cross-OS agreement would be for stuff
> not covered by UEFI (e.g. booting without UEFI, whether to use the UEFI
> memory map or one provided elsewhere, etc).
>
> [...]
>
>> > > Right, that's what I was thinking. UEFI enters bootloader with
>> > > everything it has done all nice and clean and consistent. Anything the
>> > > stub then does it is responsible for maintaining the cleanliness.
>> >
>> > There are two horrible parts here:
>> >
>> >  * EFI has no idea what a boot loader is. As far as it's aware, the
>> >    kernel + efi stub is just another UEFI application until it calls
>> >    ExitBootServices. For all UEFI knows, it may as well be a calculator
>> >    until that point, and flushing the entire cache hierarchy for a
>> >    calculator seems a little extreme.
>>
>> Most EFI applications are not that trivial though, and any non-trivial
>> app is going to (with some reasonably high probability) need to touch
>> the MMU. I don't see the problem with doing something which always works
>> even if it might be overkill for some small subset of things you might
>> be launching.
>
> That sounds reasonable to me.
>
>> >  * Defining "nice and clean and consistent".
>> >
>> >    As far as I am aware, UEFI may have an arbitrary set of mappings
>> >    present during boot services time, with arbitrary drivers active.
>> >    That means that UEFI can create dirty cache entries concurrently with
>> >    the bootloader, in addition to the usual clean entries that can be
>> >    allocated at any time thanks to speculative fetches.

UEFI specifies that memory in the EFI memory map is flat mapped, but
I'd have to look to see if
it prohibits other mappings in addition to that.  Other mappings are
implementation
dependent (devices, etc. or memory not in the EFI memory map.)

In reviewing the Aarch64 specific portion of the spec (section 2.3.6
Aarch64 Platforms)
it says in part:

· Implementations of boot services will enable architecturally
manageable caches and TLBs i.e.
  those that can be managed directly using implementation independent
registers using
  mechanisms and procedures defined in the ARM Architecture Reference
Manual. They should
  not enable caches requiring platform information to manage or invoke
non-architectural cache/
  TLB lockdown mechanisms.

Does this imply that system level caches should not be enabled?

UEFI also specifies uni-processor, so we don't have to worry about
other cores' caches.

The spec does not mention the details of memory attributes - EDK2 currently maps
memory as non-shared, attributes 0xFF.


>> >
>> >    So while we're in the bootloader, any system level caches can have
>> >    entries allocated to it, and as those aren't architected the only
>> >    thing we can do is flush those by VA for the portions we care about.

Maybe the firmware is 'wrong' to enable these caches?  Are we guaranteed that
these caches can be disabled on all implementations?
Updating/clarifying the spec
to have these disabled could simplify the problem a bit.

>> >
>> > So we can have "initially consistent", but that might not be useful.
>>
>> Hrm, yes, rather unfortunate.
>>
>> >
>> > > > There are a tonne of subtleties here, and certain properties we would
>> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
>> > > > aren't necessarily possible to provide in general (thanks to the wonders
>> > > > of non-architected system level caches, interaction with bootloaders,
>> > > > etc).
>> > >
>> > > I suppose it is easier for the UEFI implementation, since it knows the
>> > > platform it runs on and there knows about the caches. Harder for the
>> > > stub though :-/
>> >
>> > Yeah. System-level caches interact badly with pretty much any scenario
>> > where ownership of the MMU is transferred (UEFI boot, kexec), and there
>> > doesn't seem to be a single agent that can be charged with ownership of
>> > maintenance.
>> >
>> > This is something I've been meaning to revisit, but it takes a while to
>> > get back up to speed on the minutiae of the cache architecture and the
>> > rules for memory attributes, and I haven't had the time recently.
>> >
>> > We do have a very heavy hammer that we know will work: flushing the
>> > memory by PA in the stub once the MMU and caches are disabled. A
>> > back-of-the-envelope calculation shows that could take minutes to issue
>> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
>> > last resort.
>>
>> Ouch...
>
> Looking at that again, I was off by an order of 1000, and that actually
> comes to about 0.13 seconds (though solely for CMO issue). So that might
> not be as blunt as I made it out to be, but it's still not great as
> platforms get larger.

I think we should be able to limit the memory we need to flush, as
there should be no
need to flush the free memory, just what is in use.  I think that good
portions, if not all of that
could be flushed from the C code with caches enabled, as we know they won't be
modified after that point (FDT, initrd, etc.)  We can do this in C
code after calling
ExitBootServices(), and immediately before calling the Xen entry point
efi_xen_start().
There are no EFI calls in this path between the last bit of C code and
the disabling
of caches and MMU, so I think we should be able to identify if
anything would need
to be flushed in the ASM code with caches off.

>
>> > We could try to manage the system caches explicitly, but then we need
>> > code to do so very early, we need to have them described in the
>> > appropriate firmware tables, and they need to be manageable from the
>> > non-secure side (which I believe is not always the case). That somewhat
>> > defeat the portability aspect of booting as an EFI application.
>> >
>> > So yes, it's harder for the stub :
>>
>> Indeed.
>>
>> Probably this isn't even close to the correct venue. I'm not sure where
>> better to transfer it though. One of the Linaro lists perhaps?
>
> I'm not really sure where the right place is. There are quite a few
> parties who have an interest in this problem (whether they realise it or
> not). It would be nice to figure out more precisely what's happening
> here first, anyhow.
>
> Mark.

Glad I'm not the only one confused :)  Getting back to the practical
side of this,
I'm thinking I (or Suravee) should update the patch to add the
flushing of the FDT,
as this is required for booting with the change to flush_dcache_area(), even if
the exact mechanism isn't understood.  This gets us a more correct and working
implementation, but not a final/robust implementation.

Roy
Mark Rutland Oct. 14, 2014, 5:07 p.m. UTC | #16
[...]

> >> >    As far as I am aware, UEFI may have an arbitrary set of mappings
> >> >    present during boot services time, with arbitrary drivers active.
> >> >    That means that UEFI can create dirty cache entries concurrently with
> >> >    the bootloader, in addition to the usual clean entries that can be
> >> >    allocated at any time thanks to speculative fetches.
> 
> UEFI specifies that memory in the EFI memory map is flat mapped, but
> I'd have to look to see if
> it prohibits other mappings in addition to that.  Other mappings are
> implementation
> dependent (devices, etc. or memory not in the EFI memory map.)

Regardless of the set of mapping that may exist, the key point is that
we don't know what may have been allocated into a cache. Any portion of
memory could have entries in the cache hierarchy, which could be clean
or dirty.

> In reviewing the Aarch64 specific portion of the spec (section 2.3.6
> Aarch64 Platforms)
> it says in part:
> 
> · Implementations of boot services will enable architecturally
> manageable caches and TLBs i.e.
>   those that can be managed directly using implementation independent
> registers using
>   mechanisms and procedures defined in the ARM Architecture Reference
> Manual. They should
>   not enable caches requiring platform information to manage or invoke
> non-architectural cache/
>   TLB lockdown mechanisms.
> 
> Does this imply that system level caches should not be enabled?

Arguably yes, but on a technicality no, because it's possible to flush
them by VA (albeit extremely slowly).

> UEFI also specifies uni-processor, so we don't have to worry about
> other cores' caches.

Ok.

> The spec does not mention the details of memory attributes - EDK2 currently maps
> memory as non-shared, attributes 0xFF.

Ok.

> >> >
> >> >    So while we're in the bootloader, any system level caches can have
> >> >    entries allocated to it, and as those aren't architected the only
> >> >    thing we can do is flush those by VA for the portions we care about.
> 
> Maybe the firmware is 'wrong' to enable these caches?

It is certainly arguable.

> Are we guaranteed that these caches can be disabled on all
> implementations?

I believe on some implementations the non-secure side will not have
access to the control registers. Beyond that I don't know.

> Updating/clarifying the spec to have these disabled could simplify the
> problem a bit.

Possibly, yes. I'm not sure what we'd clarify it to say, however.

> >> > So we can have "initially consistent", but that might not be useful.
> >>
> >> Hrm, yes, rather unfortunate.
> >>
> >> >
> >> > > > There are a tonne of subtleties here, and certain properties we would
> >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
> >> > > > aren't necessarily possible to provide in general (thanks to the wonders
> >> > > > of non-architected system level caches, interaction with bootloaders,
> >> > > > etc).
> >> > >
> >> > > I suppose it is easier for the UEFI implementation, since it knows the
> >> > > platform it runs on and there knows about the caches. Harder for the
> >> > > stub though :-/
> >> >
> >> > Yeah. System-level caches interact badly with pretty much any scenario
> >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there
> >> > doesn't seem to be a single agent that can be charged with ownership of
> >> > maintenance.
> >> >
> >> > This is something I've been meaning to revisit, but it takes a while to
> >> > get back up to speed on the minutiae of the cache architecture and the
> >> > rules for memory attributes, and I haven't had the time recently.
> >> >
> >> > We do have a very heavy hammer that we know will work: flushing the
> >> > memory by PA in the stub once the MMU and caches are disabled. A
> >> > back-of-the-envelope calculation shows that could take minutes to issue
> >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
> >> > last resort.
> >>
> >> Ouch...
> >
> > Looking at that again, I was off by an order of 1000, and that actually
> > comes to about 0.13 seconds (though solely for CMO issue). So that might
> > not be as blunt as I made it out to be, but it's still not great as
> > platforms get larger.
> 
> I think we should be able to limit the memory we need to flush, as
> there should be no
> need to flush the free memory, just what is in use.  I think that good
> portions, if not all of that
> could be flushed from the C code with caches enabled, as we know they won't be
> modified after that point (FDT, initrd, etc.)  We can do this in C
> code after calling
> ExitBootServices(), and immediately before calling the Xen entry point
> efi_xen_start().
> There are no EFI calls in this path between the last bit of C code and
> the disabling
> of caches and MMU, so I think we should be able to identify if
> anything would need
> to be flushed in the ASM code with caches off.

I agree the vast majority of this maintenance could be done by C code.

There might be a need to flush that free memory, depending on how it is
mapped, unless you are proposing a lazy flush-before-use strategy.

> >> > We could try to manage the system caches explicitly, but then we need
> >> > code to do so very early, we need to have them described in the
> >> > appropriate firmware tables, and they need to be manageable from the
> >> > non-secure side (which I believe is not always the case). That somewhat
> >> > defeat the portability aspect of booting as an EFI application.
> >> >
> >> > So yes, it's harder for the stub :
> >>
> >> Indeed.
> >>
> >> Probably this isn't even close to the correct venue. I'm not sure where
> >> better to transfer it though. One of the Linaro lists perhaps?
> >
> > I'm not really sure where the right place is. There are quite a few
> > parties who have an interest in this problem (whether they realise it or
> > not). It would be nice to figure out more precisely what's happening
> > here first, anyhow.
> >
> > Mark.
> 
> Glad I'm not the only one confused :)  Getting back to the practical
> side of this,
> I'm thinking I (or Suravee) should update the patch to add the
> flushing of the FDT,
> as this is required for booting with the change to flush_dcache_area(), even if
> the exact mechanism isn't understood.  This gets us a more correct and working
> implementation, but not a final/robust implementation.

On a practical front, yes.

It would be nice to know if the attributes are actually the problem.
Is it possible to build a UP Xen which maps memory as UEFI does (i.e.
non-shareable)? Or is that problematic?

Thanks,
Mark.
Roy Franz Oct. 14, 2014, 5:19 p.m. UTC | #17
On Tue, Oct 14, 2014 at 10:07 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> [...]
>
>> >> >    As far as I am aware, UEFI may have an arbitrary set of mappings
>> >> >    present during boot services time, with arbitrary drivers active.
>> >> >    That means that UEFI can create dirty cache entries concurrently with
>> >> >    the bootloader, in addition to the usual clean entries that can be
>> >> >    allocated at any time thanks to speculative fetches.
>>
>> UEFI specifies that memory in the EFI memory map is flat mapped, but
>> I'd have to look to see if
>> it prohibits other mappings in addition to that.  Other mappings are
>> implementation
>> dependent (devices, etc. or memory not in the EFI memory map.)
>
> Regardless of the set of mapping that may exist, the key point is that
> we don't know what may have been allocated into a cache. Any portion of
> memory could have entries in the cache hierarchy, which could be clean
> or dirty.
>
>> In reviewing the Aarch64 specific portion of the spec (section 2.3.6
>> Aarch64 Platforms)
>> it says in part:
>>
>> · Implementations of boot services will enable architecturally
>> manageable caches and TLBs i.e.
>>   those that can be managed directly using implementation independent
>> registers using
>>   mechanisms and procedures defined in the ARM Architecture Reference
>> Manual. They should
>>   not enable caches requiring platform information to manage or invoke
>> non-architectural cache/
>>   TLB lockdown mechanisms.
>>
>> Does this imply that system level caches should not be enabled?
>
> Arguably yes, but on a technicality no, because it's possible to flush
> them by VA (albeit extremely slowly).
>
>> UEFI also specifies uni-processor, so we don't have to worry about
>> other cores' caches.
>
> Ok.
>
>> The spec does not mention the details of memory attributes - EDK2 currently maps
>> memory as non-shared, attributes 0xFF.
>
> Ok.
>
>> >> >
>> >> >    So while we're in the bootloader, any system level caches can have
>> >> >    entries allocated to it, and as those aren't architected the only
>> >> >    thing we can do is flush those by VA for the portions we care about.
>>
>> Maybe the firmware is 'wrong' to enable these caches?
>
> It is certainly arguable.
>
>> Are we guaranteed that these caches can be disabled on all
>> implementations?
>
> I believe on some implementations the non-secure side will not have
> access to the control registers. Beyond that I don't know.
>
>> Updating/clarifying the spec to have these disabled could simplify the
>> problem a bit.
>
> Possibly, yes. I'm not sure what we'd clarify it to say, however.
>
>> >> > So we can have "initially consistent", but that might not be useful.
>> >>
>> >> Hrm, yes, rather unfortunate.
>> >>
>> >> >
>> >> > > > There are a tonne of subtleties here, and certain properties we would
>> >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
>> >> > > > aren't necessarily possible to provide in general (thanks to the wonders
>> >> > > > of non-architected system level caches, interaction with bootloaders,
>> >> > > > etc).
>> >> > >
>> >> > > I suppose it is easier for the UEFI implementation, since it knows the
>> >> > > platform it runs on and there knows about the caches. Harder for the
>> >> > > stub though :-/
>> >> >
>> >> > Yeah. System-level caches interact badly with pretty much any scenario
>> >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there
>> >> > doesn't seem to be a single agent that can be charged with ownership of
>> >> > maintenance.
>> >> >
>> >> > This is something I've been meaning to revisit, but it takes a while to
>> >> > get back up to speed on the minutiae of the cache architecture and the
>> >> > rules for memory attributes, and I haven't had the time recently.
>> >> >
>> >> > We do have a very heavy hammer that we know will work: flushing the
>> >> > memory by PA in the stub once the MMU and caches are disabled. A
>> >> > back-of-the-envelope calculation shows that could take minutes to issue
>> >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
>> >> > last resort.
>> >>
>> >> Ouch...
>> >
>> > Looking at that again, I was off by an order of 1000, and that actually
>> > comes to about 0.13 seconds (though solely for CMO issue). So that might
>> > not be as blunt as I made it out to be, but it's still not great as
>> > platforms get larger.
>>
>> I think we should be able to limit the memory we need to flush, as
>> there should be no
>> need to flush the free memory, just what is in use.  I think that good
>> portions, if not all of that
>> could be flushed from the C code with caches enabled, as we know they won't be
>> modified after that point (FDT, initrd, etc.)  We can do this in C
>> code after calling
>> ExitBootServices(), and immediately before calling the Xen entry point
>> efi_xen_start().
>> There are no EFI calls in this path between the last bit of C code and
>> the disabling
>> of caches and MMU, so I think we should be able to identify if
>> anything would need
>> to be flushed in the ASM code with caches off.
>
> I agree the vast majority of this maintenance could be done by C code.
>
> There might be a need to flush that free memory, depending on how it is
> mapped, unless you are proposing a lazy flush-before-use strategy.

Yeah, I was overlooking that even though Linux doesn't care what the content
of the free memory is, some of that being cached will still cause
problems later.
>
>> >> > We could try to manage the system caches explicitly, but then we need
>> >> > code to do so very early, we need to have them described in the
>> >> > appropriate firmware tables, and they need to be manageable from the
>> >> > non-secure side (which I believe is not always the case). That somewhat
>> >> > defeat the portability aspect of booting as an EFI application.
>> >> >
>> >> > So yes, it's harder for the stub :
>> >>
>> >> Indeed.
>> >>
>> >> Probably this isn't even close to the correct venue. I'm not sure where
>> >> better to transfer it though. One of the Linaro lists perhaps?
>> >
>> > I'm not really sure where the right place is. There are quite a few
>> > parties who have an interest in this problem (whether they realise it or
>> > not). It would be nice to figure out more precisely what's happening
>> > here first, anyhow.
>> >
>> > Mark.
>>
>> Glad I'm not the only one confused :)  Getting back to the practical
>> side of this,
>> I'm thinking I (or Suravee) should update the patch to add the
>> flushing of the FDT,
>> as this is required for booting with the change to flush_dcache_area(), even if
>> the exact mechanism isn't understood.  This gets us a more correct and working
>> implementation, but not a final/robust implementation.
>
> On a practical front, yes.
>
> It would be nice to know if the attributes are actually the problem.
> Is it possible to build a UP Xen which maps memory as UEFI does (i.e.
> non-shareable)? Or is that problematic?
>
> Thanks,
> Mark.

I tried the other way - making EDK2 mappings match what Xen was using.
I started with changing the shareability to inner shareable, and
verifying that the
memory attributes in MAIR_EL2 register matched (a different AttrIndex was used.)
The flushing was still required. I then modified EDK2 so that the
entire low 12 bits
of the block entry match Xen, and the flushing was still required.  So
I'm kind of stumped.

Roy
Ian Campbell Oct. 15, 2014, 8:02 a.m. UTC | #18
On Tue, 2014-10-14 at 18:07 +0100, Mark Rutland wrote:
> > Glad I'm not the only one confused :)  Getting back to the practical
> > side of this,
> > I'm thinking I (or Suravee) should update the patch to add the
> > flushing of the FDT,
> > as this is required for booting with the change to flush_dcache_area(), even if
> > the exact mechanism isn't understood.  This gets us a more correct and working
> > implementation, but not a final/robust implementation.
> 
> On a practical front, yes.
> 
> It would be nice to know if the attributes are actually the problem.
> Is it possible to build a UP Xen which maps memory as UEFI does (i.e.
> non-shareable)? Or is that problematic?

I think it would get to at least the point where you would observe these
issues, I'm not sure if/doubt that you would make it to actually booting
dom0.

Ian.
Stefano Stabellini Oct. 15, 2014, 3:02 p.m. UTC | #19
On Tue, 14 Oct 2014, Mark Rutland wrote:
> [...]
> 
> > >> >    As far as I am aware, UEFI may have an arbitrary set of mappings
> > >> >    present during boot services time, with arbitrary drivers active.
> > >> >    That means that UEFI can create dirty cache entries concurrently with
> > >> >    the bootloader, in addition to the usual clean entries that can be
> > >> >    allocated at any time thanks to speculative fetches.
> > 
> > UEFI specifies that memory in the EFI memory map is flat mapped, but
> > I'd have to look to see if
> > it prohibits other mappings in addition to that.  Other mappings are
> > implementation
> > dependent (devices, etc. or memory not in the EFI memory map.)
> 
> Regardless of the set of mapping that may exist, the key point is that
> we don't know what may have been allocated into a cache. Any portion of
> memory could have entries in the cache hierarchy, which could be clean
> or dirty.
> 
> > In reviewing the Aarch64 specific portion of the spec (section 2.3.6
> > Aarch64 Platforms)
> > it says in part:
> > 
> > · Implementations of boot services will enable architecturally
> > manageable caches and TLBs i.e.
> >   those that can be managed directly using implementation independent
> > registers using
> >   mechanisms and procedures defined in the ARM Architecture Reference
> > Manual. They should
> >   not enable caches requiring platform information to manage or invoke
> > non-architectural cache/
> >   TLB lockdown mechanisms.
> > 
> > Does this imply that system level caches should not be enabled?
> 
> Arguably yes, but on a technicality no, because it's possible to flush
> them by VA (albeit extremely slowly).

I think that this point should really be clearer at the spec level.


> >> >    So while we're in the bootloader, any system level caches can have
> > >> >    entries allocated to it, and as those aren't architected the only
> > >> >    thing we can do is flush those by VA for the portions we care about.
> > 
> > Maybe the firmware is 'wrong' to enable these caches?
> 
> It is certainly arguable.
> 
> > Are we guaranteed that these caches can be disabled on all
> > implementations?
> 
> I believe on some implementations the non-secure side will not have
> access to the control registers. Beyond that I don't know.
> 
> > Updating/clarifying the spec to have these disabled could simplify the
> > problem a bit.
> 
> Possibly, yes. I'm not sure what we'd clarify it to say, however.

We should start a discussion about this with the relevant parties.


> > >> > So we can have "initially consistent", but that might not be useful.
> > >>
> > >> Hrm, yes, rather unfortunate.
> > >>
> > >> >
> > >> > > > There are a tonne of subtleties here, and certain properties we would
> > >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS)
> > >> > > > aren't necessarily possible to provide in general (thanks to the wonders
> > >> > > > of non-architected system level caches, interaction with bootloaders,
> > >> > > > etc).
> > >> > >
> > >> > > I suppose it is easier for the UEFI implementation, since it knows the
> > >> > > platform it runs on and there knows about the caches. Harder for the
> > >> > > stub though :-/
> > >> >
> > >> > Yeah. System-level caches interact badly with pretty much any scenario
> > >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there
> > >> > doesn't seem to be a single agent that can be charged with ownership of
> > >> > maintenance.
> > >> >
> > >> > This is something I've been meaning to revisit, but it takes a while to
> > >> > get back up to speed on the minutiae of the cache architecture and the
> > >> > rules for memory attributes, and I haven't had the time recently.
> > >> >
> > >> > We do have a very heavy hammer that we know will work: flushing the
> > >> > memory by PA in the stub once the MMU and caches are disabled. A
> > >> > back-of-the-envelope calculation shows that could take minutes to issue
> > >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a
> > >> > last resort.
> > >>
> > >> Ouch...
> > >
> > > Looking at that again, I was off by an order of 1000, and that actually
> > > comes to about 0.13 seconds (though solely for CMO issue). So that might
> > > not be as blunt as I made it out to be, but it's still not great as
> > > platforms get larger.
> > 
> > I think we should be able to limit the memory we need to flush, as
> > there should be no
> > need to flush the free memory, just what is in use.  I think that good
> > portions, if not all of that
> > could be flushed from the C code with caches enabled, as we know they won't be
> > modified after that point (FDT, initrd, etc.)  We can do this in C
> > code after calling
> > ExitBootServices(), and immediately before calling the Xen entry point
> > efi_xen_start().
> > There are no EFI calls in this path between the last bit of C code and
> > the disabling
> > of caches and MMU, so I think we should be able to identify if
> > anything would need
> > to be flushed in the ASM code with caches off.
> 
> I agree the vast majority of this maintenance could be done by C code.
> 
> There might be a need to flush that free memory, depending on how it is
> mapped, unless you are proposing a lazy flush-before-use strategy.

Is it actually safe to only flush what we use (DTB, Xen, initrd, Linux)?
What if the firmware wrote something else (ACPI tables?) that we might
have to access?
What if the firmware wrote something else that we don't care about? Xen
scrubs all ram early at boot, so this last point might not be an issue.
diff mbox

Patch

diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
index a445cbf..38f96c2 100644
--- a/xen/arch/arm/arm64/cache.S
+++ b/xen/arch/arm/arm64/cache.S
@@ -97,3 +97,35 @@  finished:
 	isb
 	ret
 ENDPROC(__flush_dcache_all)
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ *	__flush_dcache_area(kaddr, size)
+ *
+ *	Ensure that the data held in the page kaddr is written back to the
+ *	page in question.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__flush_dcache_area)
+	dcache_line_size x2, x3
+	add	x1, x0, x1
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D line / unified line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__flush_dcache_area)
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 7650abe..704f39d 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -740,16 +740,30 @@  ENTRY(lookup_processor_type)
  */
 ENTRY(efi_xen_start)
         /*
+         * Preserve x0 (fdf pointer) across call to __flush_dcache_area,
+         * restore for entry into Xen.
+         */
+        mov   x20, x0
+
+        /*
+         * Flush dcache covering current runtime addresses
+         * of xen text/data. Then flush all of icache.
+         */
+        adrp  x1, _start
+        add   x1, x1, #:lo12:_start
+        adrp  x2, _end
+        add   x2, x2, #:lo12:_end
+        sub   x1, x2, x1
+
+        bl    __flush_dcache_area
+        ic    ialluis
+
+        /*
          * Turn off cache and MMU as Xen expects. EFI enables them, but also
          * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the
          * MMU while executing EFI code before entering Xen.
          * The EFI loader calls this to start Xen.
-         * Preserve x0 (fdf pointer) across call to __flush_dcache_all,
-         * restore for entry into Xen.
          */
-        mov   x20, x0
-        bl    __flush_dcache_all
-        ic    ialluis
 
         /* Turn off Dcache and MMU */
         mrs   x0, sctlr_el2