Message ID | 1412610550-26964-1-git-send-email-suravee.suthikulpanit@amd.com |
---|---|
State | New |
Headers | show |
Hi Suravee, On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: > From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > > when booting with EFI, __flush_dcache_all does not correctly flush data. > > According to Mark Rutland, __flush_dcache_all does not guaranteed to push > data to the PoC if there is a system-level cache as it uses Set/Way > operations. A better way to look at this is that Set/Way operations are never guaranteed to flush data to the PoC, regardless of the presence of a system-level cache. They might on certain implementations, but that's not an architectural guarantee. The same caveat applies to using them to push data to other points in the cache hierarchy (PoUU or PoUIS). Generally, Set/Way cache maintenance operations can only be used to empty or clean the architected caches visible to a given CPU, and only when all masters sharing those caches have been prevented from allocating any cache entries. Outside of IMPLEMENTATION DEFINED power-down sequences or reset-like operations they are typically the wrong thing to use. So any other uses of Set/Way operations should also be treated as suspect, and are likely to be problematic on platforms with system-level caches. > > Therefore, this patch switchs to use the "__flush_dcache_area" Nit: s/switchs/switches/ > mechanism, which is coppied from Linux. It would be good to state that this uses maintenance by VA, which (sane) system caches should respect. > > Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > --- > > NOTE: I still have not fully boot into Dom0 with this patch. > However, it seems that the data is flushed out to physical > memory now. > > xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++ > xen/arch/arm/arm64/head.S | 24 +++++++++++++++++++----- > 2 files changed, 51 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S > index a445cbf..38f96c2 100644 > --- a/xen/arch/arm/arm64/cache.S > +++ b/xen/arch/arm/arm64/cache.S > @@ -97,3 +97,35 @@ finished: > isb > ret > ENDPROC(__flush_dcache_all) > + > +/* > + * dcache_line_size - get the minimum D-cache line size from the CTR register. > + */ > + .macro dcache_line_size, reg, tmp > + mrs \tmp, ctr_el0 // read CTR > + ubfm \tmp, \tmp, #16, #19 // cache line size encoding > + mov \reg, #4 // bytes per word > + lsl \reg, \reg, \tmp // actual cache line size > + .endm > + > +/* > + * __flush_dcache_area(kaddr, size) > + * > + * Ensure that the data held in the page kaddr is written back to the > + * page in question. > + * > + * - kaddr - kernel address > + * - size - size in question > + */ > +ENTRY(__flush_dcache_area) > + dcache_line_size x2, x3 > + add x1, x0, x1 > + sub x3, x2, #1 > + bic x0, x0, x3 > +1: dc civac, x0 // clean & invalidate D line / unified line > + add x0, x0, x2 > + cmp x0, x1 > + b.lo 1b > + dsb sy > + ret > +ENDPROC(__flush_dcache_area) > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S > index 7650abe..704f39d 100644 > --- a/xen/arch/arm/arm64/head.S > +++ b/xen/arch/arm/arm64/head.S > @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type) > */ > ENTRY(efi_xen_start) > /* > + * Preserve x0 (fdf pointer) across call to __flush_dcache_area, Sorry if this is a silly question, but what's the "fdf pointer"? > + * restore for entry into Xen. > + */ > + mov x20, x0 > + > + /* > + * Flush dcache covering current runtime addresses > + * of xen text/data. Then flush all of icache. > + */ > + adrp x1, _start > + add x1, x1, #:lo12:_start > + adrp x2, _end > + add x2, x2, #:lo12:_end > + sub x1, x2, x1 Shouldn't the start address go in x0? We saved the fdf pointer earlier but never placed the start address into x0. I take it Xen doesn't relocate itself? Thanks, Mark. > + > + bl __flush_dcache_area > + ic ialluis > + > + /* > * Turn off cache and MMU as Xen expects. EFI enables them, but also > * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the > * MMU while executing EFI code before entering Xen. > * The EFI loader calls this to start Xen. > - * Preserve x0 (fdf pointer) across call to __flush_dcache_all, > - * restore for entry into Xen. > */ > - mov x20, x0 > - bl __flush_dcache_all > - ic ialluis > > /* Turn off Dcache and MMU */ > mrs x0, sctlr_el2 > -- > 1.9.3 > >
On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote: > Hi Suravee, > > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> >> >> when booting with EFI, __flush_dcache_all does not correctly flush data. >> >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push >> data to the PoC if there is a system-level cache as it uses Set/Way >> operations. > > A better way to look at this is that Set/Way operations are never > guaranteed to flush data to the PoC, regardless of the presence of a > system-level cache. They might on certain implementations, but that's > not an architectural guarantee. The same caveat applies to using them to > push data to other points in the cache hierarchy (PoUU or PoUIS). > > Generally, Set/Way cache maintenance operations can only be used to > empty or clean the architected caches visible to a given CPU, and only > when all masters sharing those caches have been prevented from > allocating any cache entries. Outside of IMPLEMENTATION DEFINED > power-down sequences or reset-like operations they are typically the > wrong thing to use. > > So any other uses of Set/Way operations should also be treated as > suspect, and are likely to be problematic on platforms with system-level > caches. So what all do we need to flush? Do we need to flush all modified (dirty) cache lines, or just a specific subset? In Linux the FDT which is modified in the Linux EFI stub isn't flushed, nor is the EFI memory map, both of which are modified by the UEFI firmware/boot stub. I feel like I'm missing something here. > >> >> Therefore, this patch switchs to use the "__flush_dcache_area" > > Nit: s/switchs/switches/ > >> mechanism, which is coppied from Linux. > > It would be good to state that this uses maintenance by VA, which (sane) > system caches should respect. > >> >> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> >> --- >> >> NOTE: I still have not fully boot into Dom0 with this patch. >> However, it seems that the data is flushed out to physical >> memory now. >> >> xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++ >> xen/arch/arm/arm64/head.S | 24 +++++++++++++++++++----- >> 2 files changed, 51 insertions(+), 5 deletions(-) >> >> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S >> index a445cbf..38f96c2 100644 >> --- a/xen/arch/arm/arm64/cache.S >> +++ b/xen/arch/arm/arm64/cache.S >> @@ -97,3 +97,35 @@ finished: >> isb >> ret >> ENDPROC(__flush_dcache_all) >> + >> +/* >> + * dcache_line_size - get the minimum D-cache line size from the CTR register. >> + */ >> + .macro dcache_line_size, reg, tmp >> + mrs \tmp, ctr_el0 // read CTR >> + ubfm \tmp, \tmp, #16, #19 // cache line size encoding >> + mov \reg, #4 // bytes per word >> + lsl \reg, \reg, \tmp // actual cache line size >> + .endm >> + >> +/* >> + * __flush_dcache_area(kaddr, size) >> + * >> + * Ensure that the data held in the page kaddr is written back to the >> + * page in question. >> + * >> + * - kaddr - kernel address >> + * - size - size in question >> + */ >> +ENTRY(__flush_dcache_area) >> + dcache_line_size x2, x3 >> + add x1, x0, x1 >> + sub x3, x2, #1 >> + bic x0, x0, x3 >> +1: dc civac, x0 // clean & invalidate D line / unified line >> + add x0, x0, x2 >> + cmp x0, x1 >> + b.lo 1b >> + dsb sy >> + ret >> +ENDPROC(__flush_dcache_area) >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S >> index 7650abe..704f39d 100644 >> --- a/xen/arch/arm/arm64/head.S >> +++ b/xen/arch/arm/arm64/head.S >> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type) >> */ >> ENTRY(efi_xen_start) >> /* >> + * Preserve x0 (fdf pointer) across call to __flush_dcache_area, > > Sorry if this is a silly question, but what's the "fdf pointer"? > Should be fdt. This is a typo from my original patch. Also, we should remove flush_dcache_all, as that was added for use in the EFI boot code. If we don't use it there it doesn't have a user in Xen. >> + * restore for entry into Xen. >> + */ >> + mov x20, x0 >> + >> + /* >> + * Flush dcache covering current runtime addresses >> + * of xen text/data. Then flush all of icache. >> + */ >> + adrp x1, _start >> + add x1, x1, #:lo12:_start >> + adrp x2, _end >> + add x2, x2, #:lo12:_end >> + sub x1, x2, x1 > > Shouldn't the start address go in x0? We saved the fdf pointer earlier > but never placed the start address into x0. Yes, this does seem to be missing > > I take it Xen doesn't relocate itself? Xen does relocate itself, but that is done later in the boot process that is common between the EFI and Image boot methods. > > Thanks, > Mark. > >> + >> + bl __flush_dcache_area >> + ic ialluis >> + >> + /* >> * Turn off cache and MMU as Xen expects. EFI enables them, but also >> * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the >> * MMU while executing EFI code before entering Xen. >> * The EFI loader calls this to start Xen. >> - * Preserve x0 (fdf pointer) across call to __flush_dcache_all, >> - * restore for entry into Xen. >> */ >> - mov x20, x0 >> - bl __flush_dcache_all >> - ic ialluis >> >> /* Turn off Dcache and MMU */ >> mrs x0, sctlr_el2 >> -- >> 1.9.3 >> >>
On Mon, 2014-10-06 at 17:28 +0100, Mark Rutland wrote: > Hi Suravee, > > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: > > From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > > > > when booting with EFI, __flush_dcache_all does not correctly flush data. > > > > According to Mark Rutland, __flush_dcache_all does not guaranteed to push > > data to the PoC if there is a system-level cache as it uses Set/Way > > operations. > > A better way to look at this is that Set/Way operations are never > guaranteed to flush data to the PoC, regardless of the presence of a > system-level cache. They might on certain implementations, but that's > not an architectural guarantee. The same caveat applies to using them to > push data to other points in the cache hierarchy (PoUU or PoUIS). > > Generally, Set/Way cache maintenance operations can only be used to > empty or clean the architected caches visible to a given CPU, and only > when all masters sharing those caches have been prevented from > allocating any cache entries. Outside of IMPLEMENTATION DEFINED > power-down sequences or reset-like operations they are typically the > wrong thing to use. > > So any other uses of Set/Way operations should also be treated as > suspect, and are likely to be problematic on platforms with system-level > caches. I suppose this set of problematic situations still includes "running apparently UP during boot" since we may not be aware of secondary processors currently running platform firmware and therefore (potentially) interacting with caches? Ian.
On Mon, 2014-10-06 at 21:15 -0700, Roy Franz wrote: > On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote: > > Hi Suravee, > > > > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: > >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > >> > >> when booting with EFI, __flush_dcache_all does not correctly flush data. > >> > >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push > >> data to the PoC if there is a system-level cache as it uses Set/Way > >> operations. > > > > A better way to look at this is that Set/Way operations are never > > guaranteed to flush data to the PoC, regardless of the presence of a > > system-level cache. They might on certain implementations, but that's > > not an architectural guarantee. The same caveat applies to using them to > > push data to other points in the cache hierarchy (PoUU or PoUIS). > > > > Generally, Set/Way cache maintenance operations can only be used to > > empty or clean the architected caches visible to a given CPU, and only > > when all masters sharing those caches have been prevented from > > allocating any cache entries. Outside of IMPLEMENTATION DEFINED > > power-down sequences or reset-like operations they are typically the > > wrong thing to use. > > > > So any other uses of Set/Way operations should also be treated as > > suspect, and are likely to be problematic on platforms with system-level > > caches. > > So what all do we need to flush? Do we need to flush all modified > (dirty) cache lines, > or just a specific subset? > > In Linux the FDT which is modified in the Linux EFI stub isn't > flushed, nor is the EFI memory map, > both of which are modified by the UEFI firmware/boot stub. I feel > like I'm missing > something here. Mark was making reference on IRC to other missing flushes even in Linux. Not sure if those include the ones which you mention... > Also, we should remove flush_dcache_all, as that was added for use in > the EFI boot code. If we > don't use it there it doesn't have a user in Xen. Absolutely, especially given that it turns out to be dangerous to use under most circumstances! > > I take it Xen doesn't relocate itself? > > Xen does relocate itself, but that is done later in the boot process > that is common between the EFI and Image boot methods. Even with it happening later it's possible that we might need to flush some additional stuff on entry via the EFI path? e.g. there could be stuff which the non-EFI code path was previously implicitly assuming wouldn't be cached (because caches were never enabled on such bootloaders, etc). In fact I'd suggest that those missing flushes (if any, maybe we already got it all right) really belong in the relocation code rather than in the EFI stub, since even on non-EFI it seems fragile to rely on specific caching behaviour from the bootloader. I suppose we will cross that bridge when Suravee get's as far as that! Ian.
On Tue, Oct 07, 2014 at 05:15:58AM +0100, Roy Franz wrote: > On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote: > > Hi Suravee, > > > > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: > >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > >> > >> when booting with EFI, __flush_dcache_all does not correctly flush data. > >> > >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push > >> data to the PoC if there is a system-level cache as it uses Set/Way > >> operations. > > > > A better way to look at this is that Set/Way operations are never > > guaranteed to flush data to the PoC, regardless of the presence of a > > system-level cache. They might on certain implementations, but that's > > not an architectural guarantee. The same caveat applies to using them to > > push data to other points in the cache hierarchy (PoUU or PoUIS). > > > > Generally, Set/Way cache maintenance operations can only be used to > > empty or clean the architected caches visible to a given CPU, and only > > when all masters sharing those caches have been prevented from > > allocating any cache entries. Outside of IMPLEMENTATION DEFINED > > power-down sequences or reset-like operations they are typically the > > wrong thing to use. > > > > So any other uses of Set/Way operations should also be treated as > > suspect, and are likely to be problematic on platforms with system-level > > caches. > > So what all do we need to flush? Do we need to flush all modified > (dirty) cache lines, > or just a specific subset? You need to flush anything which needs to be visible at the PoC. So anything that needs to be accessible with the caches disabled needs to be flushed. You also need to clean the range corresponding to anywhere you intend to write to with the caches disabled. > In Linux the FDT which is modified in the Linux EFI stub isn't > flushed, nor is the EFI memory map, > both of which are modified by the UEFI firmware/boot stub. I feel > like I'm missing > something here. Within Linux we're getting lucky here because those accesses are all done with the caches enabled, and we don't make any conflicting accesses while the caches are disabled -- once we turn the caches back on the data is visible again. There's a possible problem with mismatched aliases here, as UEFI could have had cacheable mappings for any arbitrary subset of the physical address space that might not match what we want to use. So far we haven't encountered any because the memory attributes used by UEFI happen to match that used by the kernel. In the absence of a system cache we could just nuke the cache hierarchy by set/way to prevent that so long as we know no masters are allocating entries while we do so. With a system cache it would be possible to nuke the cache hierarchy by VA, but for the sizeable quantities of RAM we expect that's not likely to be feasible. > >> Therefore, this patch switchs to use the "__flush_dcache_area" > > > > Nit: s/switchs/switches/ > > > >> mechanism, which is coppied from Linux. > > > > It would be good to state that this uses maintenance by VA, which (sane) > > system caches should respect. > > > >> > >> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > >> --- > >> > >> NOTE: I still have not fully boot into Dom0 with this patch. > >> However, it seems that the data is flushed out to physical > >> memory now. > >> > >> xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++ > >> xen/arch/arm/arm64/head.S | 24 +++++++++++++++++++----- > >> 2 files changed, 51 insertions(+), 5 deletions(-) > >> > >> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S > >> index a445cbf..38f96c2 100644 > >> --- a/xen/arch/arm/arm64/cache.S > >> +++ b/xen/arch/arm/arm64/cache.S > >> @@ -97,3 +97,35 @@ finished: > >> isb > >> ret > >> ENDPROC(__flush_dcache_all) > >> + > >> +/* > >> + * dcache_line_size - get the minimum D-cache line size from the CTR register. > >> + */ > >> + .macro dcache_line_size, reg, tmp > >> + mrs \tmp, ctr_el0 // read CTR > >> + ubfm \tmp, \tmp, #16, #19 // cache line size encoding > >> + mov \reg, #4 // bytes per word > >> + lsl \reg, \reg, \tmp // actual cache line size > >> + .endm > >> + > >> +/* > >> + * __flush_dcache_area(kaddr, size) > >> + * > >> + * Ensure that the data held in the page kaddr is written back to the > >> + * page in question. > >> + * > >> + * - kaddr - kernel address > >> + * - size - size in question > >> + */ > >> +ENTRY(__flush_dcache_area) > >> + dcache_line_size x2, x3 > >> + add x1, x0, x1 > >> + sub x3, x2, #1 > >> + bic x0, x0, x3 > >> +1: dc civac, x0 // clean & invalidate D line / unified line > >> + add x0, x0, x2 > >> + cmp x0, x1 > >> + b.lo 1b > >> + dsb sy > >> + ret > >> +ENDPROC(__flush_dcache_area) > >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S > >> index 7650abe..704f39d 100644 > >> --- a/xen/arch/arm/arm64/head.S > >> +++ b/xen/arch/arm/arm64/head.S > >> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type) > >> */ > >> ENTRY(efi_xen_start) > >> /* > >> + * Preserve x0 (fdf pointer) across call to __flush_dcache_area, > > > > Sorry if this is a silly question, but what's the "fdf pointer"? > > > > Should be fdt. This is a typo from my original patch. Ok. > Also, we should remove flush_dcache_all, as that was added for use in > the EFI boot code. If we > don't use it there it doesn't have a user in Xen. That sounds like a good idea to me. > >> + * restore for entry into Xen. > >> + */ > >> + mov x20, x0 > >> + > >> + /* > >> + * Flush dcache covering current runtime addresses > >> + * of xen text/data. Then flush all of icache. > >> + */ > >> + adrp x1, _start > >> + add x1, x1, #:lo12:_start > >> + adrp x2, _end > >> + add x2, x2, #:lo12:_end > >> + sub x1, x2, x1 > > > > Shouldn't the start address go in x0? We saved the fdf pointer earlier > > but never placed the start address into x0. > > Yes, this does seem to be missing > > > > I take it Xen doesn't relocate itself? > > Xen does relocate itself, but that is done later in the boot process > that is common between the EFI and Image > boot methods. Ah, ok. Thanks, Mark.
On Tue, Oct 07, 2014 at 10:27:20AM +0100, Ian Campbell wrote: > On Mon, 2014-10-06 at 17:28 +0100, Mark Rutland wrote: > > Hi Suravee, > > > > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: > > > From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> > > > > > > when booting with EFI, __flush_dcache_all does not correctly flush data. > > > > > > According to Mark Rutland, __flush_dcache_all does not guaranteed to push > > > data to the PoC if there is a system-level cache as it uses Set/Way > > > operations. > > > > A better way to look at this is that Set/Way operations are never > > guaranteed to flush data to the PoC, regardless of the presence of a > > system-level cache. They might on certain implementations, but that's > > not an architectural guarantee. The same caveat applies to using them to > > push data to other points in the cache hierarchy (PoUU or PoUIS). > > > > Generally, Set/Way cache maintenance operations can only be used to > > empty or clean the architected caches visible to a given CPU, and only > > when all masters sharing those caches have been prevented from > > allocating any cache entries. Outside of IMPLEMENTATION DEFINED > > power-down sequences or reset-like operations they are typically the > > wrong thing to use. > > > > So any other uses of Set/Way operations should also be treated as > > suspect, and are likely to be problematic on platforms with system-level > > caches. > > I suppose this set of problematic situations still includes "running > apparently UP during boot" since we may not be aware of secondary > processors currently running platform firmware and therefore > (potentially) interacting with caches? Yes. That said, if those CPUs have active cacheable mappings for memory that is not special reserved and/or secure, you could have issues with mismatched aliases anyway. I'd hope that in the FW secondary CPUs were either running without caches enabled, or only secure mappings if the caches are necessary. Mark.
On Tue, Oct 7, 2014 at 3:40 AM, Mark Rutland <mark.rutland@arm.com> wrote: > On Tue, Oct 07, 2014 at 05:15:58AM +0100, Roy Franz wrote: >> On Mon, Oct 6, 2014 at 9:28 AM, Mark Rutland <mark.rutland@arm.com> wrote: >> > Hi Suravee, >> > >> > On Mon, Oct 06, 2014 at 04:49:10PM +0100, suravee.suthikulpanit@amd.com wrote: >> >> From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> >> >> >> >> when booting with EFI, __flush_dcache_all does not correctly flush data. >> >> >> >> According to Mark Rutland, __flush_dcache_all does not guaranteed to push >> >> data to the PoC if there is a system-level cache as it uses Set/Way >> >> operations. >> > >> > A better way to look at this is that Set/Way operations are never >> > guaranteed to flush data to the PoC, regardless of the presence of a >> > system-level cache. They might on certain implementations, but that's >> > not an architectural guarantee. The same caveat applies to using them to >> > push data to other points in the cache hierarchy (PoUU or PoUIS). >> > >> > Generally, Set/Way cache maintenance operations can only be used to >> > empty or clean the architected caches visible to a given CPU, and only >> > when all masters sharing those caches have been prevented from >> > allocating any cache entries. Outside of IMPLEMENTATION DEFINED >> > power-down sequences or reset-like operations they are typically the >> > wrong thing to use. >> > >> > So any other uses of Set/Way operations should also be treated as >> > suspect, and are likely to be problematic on platforms with system-level >> > caches. >> >> So what all do we need to flush? Do we need to flush all modified >> (dirty) cache lines, >> or just a specific subset? > > You need to flush anything which needs to be visible at the PoC. So > anything that needs to be accessible with the caches disabled needs to > be flushed. You also need to clean the range corresponding to anywhere > you intend to write to with the caches disabled. > >> In Linux the FDT which is modified in the Linux EFI stub isn't >> flushed, nor is the EFI memory map, >> both of which are modified by the UEFI firmware/boot stub. I feel >> like I'm missing >> something here. > > Within Linux we're getting lucky here because those accesses are all > done with the caches enabled, and we don't make any conflicting accesses > while the caches are disabled -- once we turn the caches back on the > data is visible again. > > There's a possible problem with mismatched aliases here, as UEFI could > have had cacheable mappings for any arbitrary subset of the physical > address space that might not match what we want to use. So far we > haven't encountered any because the memory attributes used by UEFI > happen to match that used by the kernel. It seems that for Xen we do need to flush the FDT as well - I get a variety of crashes with a corrupt FDT when cache state is modeled on the FVP model, and Suravee sees similar behavior on Seattle. I was not expecting this, as I looked at the code in Xen and the caches/TLB are enabled quite early on, before the FDT is accessed by Xen. I then looked at the mappings used by edk2 and Xen, and found some differences. Even after modifying edk2 to use the same configuration as Xen, the flushing of the FDT is still required. Xen and edk2 use the same memory attributes in the MAIR_EL2 register (0xFF), but had different sharing, access perm, and nG settings. The flushing of the FDT seems to be required, but I'm not sure why. Does linux access the FDT with the same flat mapping used by edk2? I think that Xen uses a different virtual mapping, so I suppose this could cause problems with a virtually tagged cache. (I couldn't find a description of that detail regarding the caches.) I'd really like to understand why this flush is required for Xen, and to make sure there there isn't other internal edk2 state that would also need flushing. > > In the absence of a system cache we could just nuke the cache hierarchy > by set/way to prevent that so long as we know no masters are allocating > entries while we do so. With a system cache it would be possible to nuke > the cache hierarchy by VA, but for the sizeable quantities of RAM we > expect that's not likely to be feasible. > >> >> Therefore, this patch switchs to use the "__flush_dcache_area" >> > >> > Nit: s/switchs/switches/ >> > >> >> mechanism, which is coppied from Linux. >> > >> > It would be good to state that this uses maintenance by VA, which (sane) >> > system caches should respect. >> > >> >> >> >> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com> >> >> --- >> >> >> >> NOTE: I still have not fully boot into Dom0 with this patch. >> >> However, it seems that the data is flushed out to physical >> >> memory now. >> >> >> >> xen/arch/arm/arm64/cache.S | 32 ++++++++++++++++++++++++++++++++ >> >> xen/arch/arm/arm64/head.S | 24 +++++++++++++++++++----- >> >> 2 files changed, 51 insertions(+), 5 deletions(-) >> >> >> >> diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S >> >> index a445cbf..38f96c2 100644 >> >> --- a/xen/arch/arm/arm64/cache.S >> >> +++ b/xen/arch/arm/arm64/cache.S >> >> @@ -97,3 +97,35 @@ finished: >> >> isb >> >> ret >> >> ENDPROC(__flush_dcache_all) >> >> + >> >> +/* >> >> + * dcache_line_size - get the minimum D-cache line size from the CTR register. >> >> + */ >> >> + .macro dcache_line_size, reg, tmp >> >> + mrs \tmp, ctr_el0 // read CTR >> >> + ubfm \tmp, \tmp, #16, #19 // cache line size encoding >> >> + mov \reg, #4 // bytes per word >> >> + lsl \reg, \reg, \tmp // actual cache line size >> >> + .endm >> >> + >> >> +/* >> >> + * __flush_dcache_area(kaddr, size) >> >> + * >> >> + * Ensure that the data held in the page kaddr is written back to the >> >> + * page in question. >> >> + * >> >> + * - kaddr - kernel address >> >> + * - size - size in question >> >> + */ >> >> +ENTRY(__flush_dcache_area) >> >> + dcache_line_size x2, x3 >> >> + add x1, x0, x1 >> >> + sub x3, x2, #1 >> >> + bic x0, x0, x3 >> >> +1: dc civac, x0 // clean & invalidate D line / unified line >> >> + add x0, x0, x2 >> >> + cmp x0, x1 >> >> + b.lo 1b >> >> + dsb sy >> >> + ret >> >> +ENDPROC(__flush_dcache_area) >> >> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S >> >> index 7650abe..704f39d 100644 >> >> --- a/xen/arch/arm/arm64/head.S >> >> +++ b/xen/arch/arm/arm64/head.S >> >> @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type) >> >> */ >> >> ENTRY(efi_xen_start) >> >> /* >> >> + * Preserve x0 (fdf pointer) across call to __flush_dcache_area, >> > >> > Sorry if this is a silly question, but what's the "fdf pointer"? >> > >> >> Should be fdt. This is a typo from my original patch. > > Ok. > >> Also, we should remove flush_dcache_all, as that was added for use in >> the EFI boot code. If we >> don't use it there it doesn't have a user in Xen. > > That sounds like a good idea to me. > >> >> + * restore for entry into Xen. >> >> + */ >> >> + mov x20, x0 >> >> + >> >> + /* >> >> + * Flush dcache covering current runtime addresses >> >> + * of xen text/data. Then flush all of icache. >> >> + */ >> >> + adrp x1, _start >> >> + add x1, x1, #:lo12:_start >> >> + adrp x2, _end >> >> + add x2, x2, #:lo12:_end >> >> + sub x1, x2, x1 >> > >> > Shouldn't the start address go in x0? We saved the fdf pointer earlier >> > but never placed the start address into x0. >> >> Yes, this does seem to be missing >> > >> > I take it Xen doesn't relocate itself? >> >> Xen does relocate itself, but that is done later in the boot process >> that is common between the EFI and Image >> boot methods. > > Ah, ok. > > Thanks, > Mark.
Hi Roy, [...] > It seems that for Xen we do need to flush the FDT as well - I get a > variety of crashes > with a corrupt FDT when cache state is modeled on the FVP model, and > Suravee sees similar > behavior on Seattle. I was not expecting this, as I looked at the code > in Xen and the caches/TLB > are enabled quite early on, before the FDT is accessed by Xen. I then > looked at the mappings > used by edk2 and Xen, and found some differences. Even after > modifying edk2 to use the same > configuration as Xen, the flushing of the FDT is still required. Xen > and edk2 use the same memory > attributes in the MAIR_EL2 register (0xFF), but had different > sharing, access perm, and nG settings. I don't think the access perm or nG settings should have any effect, but the shareability forms part of the memory attributes (along with the memory type and cacheability), and there are several rules that apply when accessing a memory location with mismatched attributes. See the ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched memory attributes. In Linux we're likely getting lucky, and the shareability we use varies for an SMP or UP kernel. So we need maintenance in at least one of those cases. This would also apply to any initrd or other image. Do you happen to know the shareability used by EDK2 and Xen? > The flushing of the FDT seems to be required, but I'm not sure why. > Does linux access the FDT with the > same flat mapping used by edk2? I think that Xen uses a different > virtual mapping, so I suppose this > could cause problems with a virtually tagged cache. (I couldn't find > a description of that detail regarding > the caches.) I'd really like to understand why this flush is required > for Xen, and to make sure there > there isn't other internal edk2 state that would also need flushing. The D-caches should behave as if they are PIPT, so the virtual addresses used should not be a problem. Linux maps the FDT in the swapper pgdir rather than the idmap pgdir. Linux might be doing some work that happens to flush the relevant portions of the cache, even if accidentally, before accessing the FDT. I would also like to understand what's going on here. Thanks, Mark.
On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote: > Hi Roy, > > [...] > > > It seems that for Xen we do need to flush the FDT as well - I get a > > variety of crashes > > with a corrupt FDT when cache state is modeled on the FVP model, and > > Suravee sees similar > > behavior on Seattle. I was not expecting this, as I looked at the code > > in Xen and the caches/TLB > > are enabled quite early on, before the FDT is accessed by Xen. I then > > looked at the mappings > > used by edk2 and Xen, and found some differences. Even after > > modifying edk2 to use the same > > configuration as Xen, the flushing of the FDT is still required. Xen > > and edk2 use the same memory > > attributes in the MAIR_EL2 register (0xFF), but had different > > sharing, access perm, and nG settings. > > I don't think the access perm or nG settings should have any effect, but > the shareability forms part of the memory attributes (along with the > memory type and cacheability), and there are several rules that apply > when accessing a memory location with mismatched attributes. See the > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched > memory attributes. > > In Linux we're likely getting lucky, and the shareability we use varies > for an SMP or UP kernel. So we need maintenance in at least one of those > cases. This would also apply to any initrd or other image. > > Do you happen to know the shareability used by EDK2 and Xen? Xen maps everything inner-shareable. Dunno about EDK2. Is the real issue here not a lack of specification for some corner cases of the boot protocol? Can we get that fixed somehow? Part of me wants to suggest that UEFI (and bootloaders generally) ought to be cleaning caches for anything they have loaded into RAM before launching an OS as a matter of good hygiene. Ian. > > > The flushing of the FDT seems to be required, but I'm not sure why. > > Does linux access the FDT with the > > same flat mapping used by edk2? I think that Xen uses a different > > virtual mapping, so I suppose this > > could cause problems with a virtually tagged cache. (I couldn't find > > a description of that detail regarding > > the caches.) I'd really like to understand why this flush is required > > for Xen, and to make sure there > > there isn't other internal edk2 state that would also need flushing. > > The D-caches should behave as if they are PIPT, so the virtual addresses > used should not be a problem. Linux maps the FDT in the swapper pgdir > rather than the idmap pgdir. > > Linux might be doing some work that happens to flush the relevant > portions of the cache, even if accidentally, before accessing the FDT. > > I would also like to understand what's going on here. > > Thanks, > Mark.
On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote: > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote: > > Hi Roy, > > > > [...] > > > > > It seems that for Xen we do need to flush the FDT as well - I get a > > > variety of crashes > > > with a corrupt FDT when cache state is modeled on the FVP model, and > > > Suravee sees similar > > > behavior on Seattle. I was not expecting this, as I looked at the code > > > in Xen and the caches/TLB > > > are enabled quite early on, before the FDT is accessed by Xen. I then > > > looked at the mappings > > > used by edk2 and Xen, and found some differences. Even after > > > modifying edk2 to use the same > > > configuration as Xen, the flushing of the FDT is still required. Xen > > > and edk2 use the same memory > > > attributes in the MAIR_EL2 register (0xFF), but had different > > > sharing, access perm, and nG settings. > > > > I don't think the access perm or nG settings should have any effect, but > > the shareability forms part of the memory attributes (along with the > > memory type and cacheability), and there are several rules that apply > > when accessing a memory location with mismatched attributes. See the > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched > > memory attributes. > > > > In Linux we're likely getting lucky, and the shareability we use varies > > for an SMP or UP kernel. So we need maintenance in at least one of those > > cases. This would also apply to any initrd or other image. > > > > Do you happen to know the shareability used by EDK2 and Xen? > > Xen maps everything inner-shareable. Dunno about EDK2. Ok. That matches what an SMP Linux kernel will do, so it looks like we're just getting lucky with Linux. I'lll have a play and see if I can trigger similar issues. > Is the real issue here not a lack of specification for some corner cases > of the boot protocol? Can we get that fixed somehow? To an extent, yes. We can try to fix up the Linux side with patche to Documentation/arm64/booting.txt. As far as I am aware, for UEFI that will require membership of the UEFI forum. > Part of me wants to suggest that UEFI (and bootloaders generally) ought > to be cleaning caches for anything they have loaded into RAM before > launching an OS as a matter of good hygiene. In general, yes. Unfortunately, UEFI can't perform the maintenance in this case, because the stub modifies things. I was under the impression it copied and modified the FDT to embed the command line -- UEFI has no visibiltiy of this and therefore cannot be in charge of flushing it. So in this case, the stub needs to be thought of as the bootloader, and needs to be in charge of any required maintenance. There are a tonne of subtleties here, and certain properties we would like (e.g. a completely clean cache hierarchy upon entry to the OS) aren't necessarily possible to provide in general (thanks to the wonders of non-architected system level caches, interaction with bootloaders, etc). Mark.
On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote: > On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote: > > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote: > > > Hi Roy, > > > > > > [...] > > > > > > > It seems that for Xen we do need to flush the FDT as well - I get a > > > > variety of crashes > > > > with a corrupt FDT when cache state is modeled on the FVP model, and > > > > Suravee sees similar > > > > behavior on Seattle. I was not expecting this, as I looked at the code > > > > in Xen and the caches/TLB > > > > are enabled quite early on, before the FDT is accessed by Xen. I then > > > > looked at the mappings > > > > used by edk2 and Xen, and found some differences. Even after > > > > modifying edk2 to use the same > > > > configuration as Xen, the flushing of the FDT is still required. Xen > > > > and edk2 use the same memory > > > > attributes in the MAIR_EL2 register (0xFF), but had different > > > > sharing, access perm, and nG settings. > > > > > > I don't think the access perm or nG settings should have any effect, but > > > the shareability forms part of the memory attributes (along with the > > > memory type and cacheability), and there are several rules that apply > > > when accessing a memory location with mismatched attributes. See the > > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched > > > memory attributes. > > > > > > In Linux we're likely getting lucky, and the shareability we use varies > > > for an SMP or UP kernel. So we need maintenance in at least one of those > > > cases. This would also apply to any initrd or other image. > > > > > > Do you happen to know the shareability used by EDK2 and Xen? > > > > Xen maps everything inner-shareable. Dunno about EDK2. > > Ok. That matches what an SMP Linux kernel will do, so it looks like > we're just getting lucky with Linux. I'lll have a play and see if I can > trigger similar issues. > > > Is the real issue here not a lack of specification for some corner cases > > of the boot protocol? Can we get that fixed somehow? > > To an extent, yes. We can try to fix up the Linux side with patche to > Documentation/arm64/booting.txt. As far as I am aware, for UEFI that > will require membership of the UEFI forum. > Is Documentation/arm64/booting.txt relevant here since the kernel is being launched as an EFI app, which already has a standardised calling convention of its own. I suppose booting.txt is in addition to the UEFI convention. It probably would be best to formalise that (what if a second OS comes along with contradictory requirements?) > > Part of me wants to suggest that UEFI (and bootloaders generally) ought > > to be cleaning caches for anything they have loaded into RAM before > > launching an OS as a matter of good hygiene. > > In general, yes. > > Unfortunately, UEFI can't perform the maintenance in this case, because > the stub modifies things. I was under the impression it copied and > modified the FDT to embed the command line -- UEFI has no visibiltiy of > this and therefore cannot be in charge of flushing it. So in this case, > the stub needs to be thought of as the bootloader, and needs to be in > charge of any required maintenance. Right, that's what I was thinking. UEFI enters bootloader with everything it has done all nice and clean and consistent. Anything the stub then does it is responsible for maintaining the cleanliness. > There are a tonne of subtleties here, and certain properties we would > like (e.g. a completely clean cache hierarchy upon entry to the OS) > aren't necessarily possible to provide in general (thanks to the wonders > of non-architected system level caches, interaction with bootloaders, > etc). I suppose it is easier for the UEFI implementation, since it knows the platform it runs on and there knows about the caches. Harder for the stub though :-/ Ian.
On Tue, Oct 14, 2014 at 11:39:37AM +0100, Ian Campbell wrote: > On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote: > > On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote: > > > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote: > > > > Hi Roy, > > > > > > > > [...] > > > > > > > > > It seems that for Xen we do need to flush the FDT as well - I get a > > > > > variety of crashes > > > > > with a corrupt FDT when cache state is modeled on the FVP model, and > > > > > Suravee sees similar > > > > > behavior on Seattle. I was not expecting this, as I looked at the code > > > > > in Xen and the caches/TLB > > > > > are enabled quite early on, before the FDT is accessed by Xen. I then > > > > > looked at the mappings > > > > > used by edk2 and Xen, and found some differences. Even after > > > > > modifying edk2 to use the same > > > > > configuration as Xen, the flushing of the FDT is still required. Xen > > > > > and edk2 use the same memory > > > > > attributes in the MAIR_EL2 register (0xFF), but had different > > > > > sharing, access perm, and nG settings. > > > > > > > > I don't think the access perm or nG settings should have any effect, but > > > > the shareability forms part of the memory attributes (along with the > > > > memory type and cacheability), and there are several rules that apply > > > > when accessing a memory location with mismatched attributes. See the > > > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched > > > > memory attributes. > > > > > > > > In Linux we're likely getting lucky, and the shareability we use varies > > > > for an SMP or UP kernel. So we need maintenance in at least one of those > > > > cases. This would also apply to any initrd or other image. > > > > > > > > Do you happen to know the shareability used by EDK2 and Xen? > > > > > > Xen maps everything inner-shareable. Dunno about EDK2. > > > > Ok. That matches what an SMP Linux kernel will do, so it looks like > > we're just getting lucky with Linux. I'lll have a play and see if I can > > trigger similar issues. > > > > > Is the real issue here not a lack of specification for some corner cases > > > of the boot protocol? Can we get that fixed somehow? > > > > To an extent, yes. We can try to fix up the Linux side with patche to > > Documentation/arm64/booting.txt. As far as I am aware, for UEFI that > > will require membership of the UEFI forum. > > > > Is Documentation/arm64/booting.txt relevant here since the kernel is > being launched as an EFI app, which already has a standardised calling > convention of its own. I suppose booting.txt is in addition to the UEFI > convention. It probably would be best to formalise that (what if a > second OS comes along with contradictory requirements?) If we're trying to fix up UEFI, that needs to happen at the UEFI forum level. I believe there are some additional reqwuirements in SBSA/SBBR, but I haven't studied them in detail. If there are requirements that Linux needs to have met regardless of UEFI, we should ensure we mention that in booting.txt. It would be nice to have cross-OS agreement on boot protocols, but at the moment the table is somewhat empty beyond Linux and Xen. I had a conversation with the FreeBSD guys working on 64-bit ARM stuff, but they're still at an early stage, and I can't recall the specifics of their boot process. > > > Part of me wants to suggest that UEFI (and bootloaders generally) ought > > > to be cleaning caches for anything they have loaded into RAM before > > > launching an OS as a matter of good hygiene. > > > > In general, yes. > > > > Unfortunately, UEFI can't perform the maintenance in this case, because > > the stub modifies things. I was under the impression it copied and > > modified the FDT to embed the command line -- UEFI has no visibiltiy of > > this and therefore cannot be in charge of flushing it. So in this case, > > the stub needs to be thought of as the bootloader, and needs to be in > > charge of any required maintenance. > > Right, that's what I was thinking. UEFI enters bootloader with > everything it has done all nice and clean and consistent. Anything the > stub then does it is responsible for maintaining the cleanliness. There are two horrible parts here: * EFI has no idea what a boot loader is. As far as it's aware, the kernel + efi stub is just another UEFI application until it calls ExitBootServices. For all UEFI knows, it may as well be a calculator until that point, and flushing the entire cache hierarchy for a calculator seems a little extreme. * Defining "nice and clean and consistent". As far as I am aware, UEFI may have an arbitrary set of mappings present during boot services time, with arbitrary drivers active. That means that UEFI can create dirty cache entries concurrently with the bootloader, in addition to the usual clean entries that can be allocated at any time thanks to speculative fetches. So while we're in the bootloader, any system level caches can have entries allocated to it, and as those aren't architected the only thing we can do is flush those by VA for the portions we care about. So we can have "initially consistent", but that might not be useful. > > There are a tonne of subtleties here, and certain properties we would > > like (e.g. a completely clean cache hierarchy upon entry to the OS) > > aren't necessarily possible to provide in general (thanks to the wonders > > of non-architected system level caches, interaction with bootloaders, > > etc). > > I suppose it is easier for the UEFI implementation, since it knows the > platform it runs on and there knows about the caches. Harder for the > stub though :-/ Yeah. System-level caches interact badly with pretty much any scenario where ownership of the MMU is transferred (UEFI boot, kexec), and there doesn't seem to be a single agent that can be charged with ownership of maintenance. This is something I've been meaning to revisit, but it takes a while to get back up to speed on the minutiae of the cache architecture and the rules for memory attributes, and I haven't had the time recently. We do have a very heavy hammer that we know will work: flushing the memory by PA in the stub once the MMU and caches are disabled. A back-of-the-envelope calculation shows that could take minutes to issue on a server machine (say 2GHz, with 16GB of RAM), so that's very much a last resort. We could try to manage the system caches explicitly, but then we need code to do so very early, we need to have them described in the appropriate firmware tables, and they need to be manageable from the non-secure side (which I believe is not always the case). That somewhat defeat the portability aspect of booting as an EFI application. So yes, it's harder for the stub :/ Mark.
On Tue, 2014-10-14 at 12:23 +0100, Mark Rutland wrote: > On Tue, Oct 14, 2014 at 11:39:37AM +0100, Ian Campbell wrote: > > On Tue, 2014-10-14 at 11:32 +0100, Mark Rutland wrote: > > > On Tue, Oct 14, 2014 at 10:35:23AM +0100, Ian Campbell wrote: > > > > On Tue, 2014-10-14 at 10:21 +0100, Mark Rutland wrote: > > > > > Hi Roy, > > > > > > > > > > [...] > > > > > > > > > > > It seems that for Xen we do need to flush the FDT as well - I get a > > > > > > variety of crashes > > > > > > with a corrupt FDT when cache state is modeled on the FVP model, and > > > > > > Suravee sees similar > > > > > > behavior on Seattle. I was not expecting this, as I looked at the code > > > > > > in Xen and the caches/TLB > > > > > > are enabled quite early on, before the FDT is accessed by Xen. I then > > > > > > looked at the mappings > > > > > > used by edk2 and Xen, and found some differences. Even after > > > > > > modifying edk2 to use the same > > > > > > configuration as Xen, the flushing of the FDT is still required. Xen > > > > > > and edk2 use the same memory > > > > > > attributes in the MAIR_EL2 register (0xFF), but had different > > > > > > sharing, access perm, and nG settings. > > > > > > > > > > I don't think the access perm or nG settings should have any effect, but > > > > > the shareability forms part of the memory attributes (along with the > > > > > memory type and cacheability), and there are several rules that apply > > > > > when accessing a memory location with mismatched attributes. See the > > > > > ARMv8 ARM - The AArch64 Application Level Memory Model - Mismatched > > > > > memory attributes. > > > > > > > > > > In Linux we're likely getting lucky, and the shareability we use varies > > > > > for an SMP or UP kernel. So we need maintenance in at least one of those > > > > > cases. This would also apply to any initrd or other image. > > > > > > > > > > Do you happen to know the shareability used by EDK2 and Xen? > > > > > > > > Xen maps everything inner-shareable. Dunno about EDK2. > > > > > > Ok. That matches what an SMP Linux kernel will do, so it looks like > > > we're just getting lucky with Linux. I'lll have a play and see if I can > > > trigger similar issues. > > > > > > > Is the real issue here not a lack of specification for some corner cases > > > > of the boot protocol? Can we get that fixed somehow? > > > > > > To an extent, yes. We can try to fix up the Linux side with patche to > > > Documentation/arm64/booting.txt. As far as I am aware, for UEFI that > > > will require membership of the UEFI forum. > > > > > > > Is Documentation/arm64/booting.txt relevant here since the kernel is > > being launched as an EFI app, which already has a standardised calling > > convention of its own. I suppose booting.txt is in addition to the UEFI > > convention. It probably would be best to formalise that (what if a > > second OS comes along with contradictory requirements?) > > If we're trying to fix up UEFI, that needs to happen at the UEFI forum > level. I believe there are some additional reqwuirements in SBSA/SBBR, > but I haven't studied them in detail. > > If there are requirements that Linux needs to have met regardless of > UEFI, we should ensure we mention that in booting.txt. > > It would be nice to have cross-OS agreement on boot protocols, but at > the moment the table is somewhat empty beyond Linux and Xen. I had a > conversation with the FreeBSD guys working on 64-bit ARM stuff, but > they're still at an early stage, and I can't recall the specifics of > their boot process. I was thinking (perhaps naïvely) that these problems would be mostly the same for any OS and that the solution ought to be specified in terms which allow any OS to know what to expect and/or what is expected of them. Really OSes ought to be designing their boot protocols within the set of constraints implied by the (improved) UEFI launching spec, not vice versa. > > > > Part of me wants to suggest that UEFI (and bootloaders generally) ought > > > > to be cleaning caches for anything they have loaded into RAM before > > > > launching an OS as a matter of good hygiene. > > > > > > In general, yes. > > > > > > Unfortunately, UEFI can't perform the maintenance in this case, because > > > the stub modifies things. I was under the impression it copied and > > > modified the FDT to embed the command line -- UEFI has no visibiltiy of > > > this and therefore cannot be in charge of flushing it. So in this case, > > > the stub needs to be thought of as the bootloader, and needs to be in > > > charge of any required maintenance. > > > > Right, that's what I was thinking. UEFI enters bootloader with > > everything it has done all nice and clean and consistent. Anything the > > stub then does it is responsible for maintaining the cleanliness. > > There are two horrible parts here: > > * EFI has no idea what a boot loader is. As far as it's aware, the > kernel + efi stub is just another UEFI application until it calls > ExitBootServices. For all UEFI knows, it may as well be a calculator > until that point, and flushing the entire cache hierarchy for a > calculator seems a little extreme. Most EFI applications are not that trivial though, and any non-trivial app is going to (with some reasonably high probability) need to touch the MMU. I don't see the problem with doing something which always works even if it might be overkill for some small subset of things you might be launching. > * Defining "nice and clean and consistent". > > As far as I am aware, UEFI may have an arbitrary set of mappings > present during boot services time, with arbitrary drivers active. > That means that UEFI can create dirty cache entries concurrently with > the bootloader, in addition to the usual clean entries that can be > allocated at any time thanks to speculative fetches. > > So while we're in the bootloader, any system level caches can have > entries allocated to it, and as those aren't architected the only > thing we can do is flush those by VA for the portions we care about. > > So we can have "initially consistent", but that might not be useful. Hrm, yes, rather unfortunate. > > > > There are a tonne of subtleties here, and certain properties we would > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) > > > aren't necessarily possible to provide in general (thanks to the wonders > > > of non-architected system level caches, interaction with bootloaders, > > > etc). > > > > I suppose it is easier for the UEFI implementation, since it knows the > > platform it runs on and there knows about the caches. Harder for the > > stub though :-/ > > Yeah. System-level caches interact badly with pretty much any scenario > where ownership of the MMU is transferred (UEFI boot, kexec), and there > doesn't seem to be a single agent that can be charged with ownership of > maintenance. > > This is something I've been meaning to revisit, but it takes a while to > get back up to speed on the minutiae of the cache architecture and the > rules for memory attributes, and I haven't had the time recently. > > We do have a very heavy hammer that we know will work: flushing the > memory by PA in the stub once the MMU and caches are disabled. A > back-of-the-envelope calculation shows that could take minutes to issue > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a > last resort. Ouch... > We could try to manage the system caches explicitly, but then we need > code to do so very early, we need to have them described in the > appropriate firmware tables, and they need to be manageable from the > non-secure side (which I believe is not always the case). That somewhat > defeat the portability aspect of booting as an EFI application. > > So yes, it's harder for the stub : Indeed. Probably this isn't even close to the correct venue. I'm not sure where better to transfer it though. One of the Linaro lists perhaps? Ian.
[...] > > It would be nice to have cross-OS agreement on boot protocols, but at > > the moment the table is somewhat empty beyond Linux and Xen. I had a > > conversation with the FreeBSD guys working on 64-bit ARM stuff, but > > they're still at an early stage, and I can't recall the specifics of > > their boot process. > > I was thinking (perhaps naïvely) that these problems would be mostly the > same for any OS and that the solution ought to be specified in terms > which allow any OS to know what to expect and/or what is expected of > them. Really OSes ought to be designing their boot protocols within the > set of constraints implied by the (improved) UEFI launching spec, not > vice versa. w.r.t. anything booting via UEFI, I would expect that to be covered by the output of the UEFI forum. The cross-OS agreement would be for stuff not covered by UEFI (e.g. booting without UEFI, whether to use the UEFI memory map or one provided elsewhere, etc). [...] > > > Right, that's what I was thinking. UEFI enters bootloader with > > > everything it has done all nice and clean and consistent. Anything the > > > stub then does it is responsible for maintaining the cleanliness. > > > > There are two horrible parts here: > > > > * EFI has no idea what a boot loader is. As far as it's aware, the > > kernel + efi stub is just another UEFI application until it calls > > ExitBootServices. For all UEFI knows, it may as well be a calculator > > until that point, and flushing the entire cache hierarchy for a > > calculator seems a little extreme. > > Most EFI applications are not that trivial though, and any non-trivial > app is going to (with some reasonably high probability) need to touch > the MMU. I don't see the problem with doing something which always works > even if it might be overkill for some small subset of things you might > be launching. That sounds reasonable to me. > > * Defining "nice and clean and consistent". > > > > As far as I am aware, UEFI may have an arbitrary set of mappings > > present during boot services time, with arbitrary drivers active. > > That means that UEFI can create dirty cache entries concurrently with > > the bootloader, in addition to the usual clean entries that can be > > allocated at any time thanks to speculative fetches. > > > > So while we're in the bootloader, any system level caches can have > > entries allocated to it, and as those aren't architected the only > > thing we can do is flush those by VA for the portions we care about. > > > > So we can have "initially consistent", but that might not be useful. > > Hrm, yes, rather unfortunate. > > > > > > > There are a tonne of subtleties here, and certain properties we would > > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) > > > > aren't necessarily possible to provide in general (thanks to the wonders > > > > of non-architected system level caches, interaction with bootloaders, > > > > etc). > > > > > > I suppose it is easier for the UEFI implementation, since it knows the > > > platform it runs on and there knows about the caches. Harder for the > > > stub though :-/ > > > > Yeah. System-level caches interact badly with pretty much any scenario > > where ownership of the MMU is transferred (UEFI boot, kexec), and there > > doesn't seem to be a single agent that can be charged with ownership of > > maintenance. > > > > This is something I've been meaning to revisit, but it takes a while to > > get back up to speed on the minutiae of the cache architecture and the > > rules for memory attributes, and I haven't had the time recently. > > > > We do have a very heavy hammer that we know will work: flushing the > > memory by PA in the stub once the MMU and caches are disabled. A > > back-of-the-envelope calculation shows that could take minutes to issue > > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a > > last resort. > > Ouch... Looking at that again, I was off by an order of 1000, and that actually comes to about 0.13 seconds (though solely for CMO issue). So that might not be as blunt as I made it out to be, but it's still not great as platforms get larger. > > We could try to manage the system caches explicitly, but then we need > > code to do so very early, we need to have them described in the > > appropriate firmware tables, and they need to be manageable from the > > non-secure side (which I believe is not always the case). That somewhat > > defeat the portability aspect of booting as an EFI application. > > > > So yes, it's harder for the stub : > > Indeed. > > Probably this isn't even close to the correct venue. I'm not sure where > better to transfer it though. One of the Linaro lists perhaps? I'm not really sure where the right place is. There are quite a few parties who have an interest in this problem (whether they realise it or not). It would be nice to figure out more precisely what's happening here first, anyhow. Mark.
On Tue, Oct 14, 2014 at 7:30 AM, Mark Rutland <mark.rutland@arm.com> wrote: > [...] > >> > It would be nice to have cross-OS agreement on boot protocols, but at >> > the moment the table is somewhat empty beyond Linux and Xen. I had a >> > conversation with the FreeBSD guys working on 64-bit ARM stuff, but >> > they're still at an early stage, and I can't recall the specifics of >> > their boot process. >> >> I was thinking (perhaps naïvely) that these problems would be mostly the >> same for any OS and that the solution ought to be specified in terms >> which allow any OS to know what to expect and/or what is expected of >> them. Really OSes ought to be designing their boot protocols within the >> set of constraints implied by the (improved) UEFI launching spec, not >> vice versa. > > w.r.t. anything booting via UEFI, I would expect that to be covered by > the output of the UEFI forum. The cross-OS agreement would be for stuff > not covered by UEFI (e.g. booting without UEFI, whether to use the UEFI > memory map or one provided elsewhere, etc). > > [...] > >> > > Right, that's what I was thinking. UEFI enters bootloader with >> > > everything it has done all nice and clean and consistent. Anything the >> > > stub then does it is responsible for maintaining the cleanliness. >> > >> > There are two horrible parts here: >> > >> > * EFI has no idea what a boot loader is. As far as it's aware, the >> > kernel + efi stub is just another UEFI application until it calls >> > ExitBootServices. For all UEFI knows, it may as well be a calculator >> > until that point, and flushing the entire cache hierarchy for a >> > calculator seems a little extreme. >> >> Most EFI applications are not that trivial though, and any non-trivial >> app is going to (with some reasonably high probability) need to touch >> the MMU. I don't see the problem with doing something which always works >> even if it might be overkill for some small subset of things you might >> be launching. > > That sounds reasonable to me. > >> > * Defining "nice and clean and consistent". >> > >> > As far as I am aware, UEFI may have an arbitrary set of mappings >> > present during boot services time, with arbitrary drivers active. >> > That means that UEFI can create dirty cache entries concurrently with >> > the bootloader, in addition to the usual clean entries that can be >> > allocated at any time thanks to speculative fetches. UEFI specifies that memory in the EFI memory map is flat mapped, but I'd have to look to see if it prohibits other mappings in addition to that. Other mappings are implementation dependent (devices, etc. or memory not in the EFI memory map.) In reviewing the Aarch64 specific portion of the spec (section 2.3.6 Aarch64 Platforms) it says in part: · Implementations of boot services will enable architecturally manageable caches and TLBs i.e. those that can be managed directly using implementation independent registers using mechanisms and procedures defined in the ARM Architecture Reference Manual. They should not enable caches requiring platform information to manage or invoke non-architectural cache/ TLB lockdown mechanisms. Does this imply that system level caches should not be enabled? UEFI also specifies uni-processor, so we don't have to worry about other cores' caches. The spec does not mention the details of memory attributes - EDK2 currently maps memory as non-shared, attributes 0xFF. >> > >> > So while we're in the bootloader, any system level caches can have >> > entries allocated to it, and as those aren't architected the only >> > thing we can do is flush those by VA for the portions we care about. Maybe the firmware is 'wrong' to enable these caches? Are we guaranteed that these caches can be disabled on all implementations? Updating/clarifying the spec to have these disabled could simplify the problem a bit. >> > >> > So we can have "initially consistent", but that might not be useful. >> >> Hrm, yes, rather unfortunate. >> >> > >> > > > There are a tonne of subtleties here, and certain properties we would >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) >> > > > aren't necessarily possible to provide in general (thanks to the wonders >> > > > of non-architected system level caches, interaction with bootloaders, >> > > > etc). >> > > >> > > I suppose it is easier for the UEFI implementation, since it knows the >> > > platform it runs on and there knows about the caches. Harder for the >> > > stub though :-/ >> > >> > Yeah. System-level caches interact badly with pretty much any scenario >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there >> > doesn't seem to be a single agent that can be charged with ownership of >> > maintenance. >> > >> > This is something I've been meaning to revisit, but it takes a while to >> > get back up to speed on the minutiae of the cache architecture and the >> > rules for memory attributes, and I haven't had the time recently. >> > >> > We do have a very heavy hammer that we know will work: flushing the >> > memory by PA in the stub once the MMU and caches are disabled. A >> > back-of-the-envelope calculation shows that could take minutes to issue >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a >> > last resort. >> >> Ouch... > > Looking at that again, I was off by an order of 1000, and that actually > comes to about 0.13 seconds (though solely for CMO issue). So that might > not be as blunt as I made it out to be, but it's still not great as > platforms get larger. I think we should be able to limit the memory we need to flush, as there should be no need to flush the free memory, just what is in use. I think that good portions, if not all of that could be flushed from the C code with caches enabled, as we know they won't be modified after that point (FDT, initrd, etc.) We can do this in C code after calling ExitBootServices(), and immediately before calling the Xen entry point efi_xen_start(). There are no EFI calls in this path between the last bit of C code and the disabling of caches and MMU, so I think we should be able to identify if anything would need to be flushed in the ASM code with caches off. > >> > We could try to manage the system caches explicitly, but then we need >> > code to do so very early, we need to have them described in the >> > appropriate firmware tables, and they need to be manageable from the >> > non-secure side (which I believe is not always the case). That somewhat >> > defeat the portability aspect of booting as an EFI application. >> > >> > So yes, it's harder for the stub : >> >> Indeed. >> >> Probably this isn't even close to the correct venue. I'm not sure where >> better to transfer it though. One of the Linaro lists perhaps? > > I'm not really sure where the right place is. There are quite a few > parties who have an interest in this problem (whether they realise it or > not). It would be nice to figure out more precisely what's happening > here first, anyhow. > > Mark. Glad I'm not the only one confused :) Getting back to the practical side of this, I'm thinking I (or Suravee) should update the patch to add the flushing of the FDT, as this is required for booting with the change to flush_dcache_area(), even if the exact mechanism isn't understood. This gets us a more correct and working implementation, but not a final/robust implementation. Roy
[...] > >> > As far as I am aware, UEFI may have an arbitrary set of mappings > >> > present during boot services time, with arbitrary drivers active. > >> > That means that UEFI can create dirty cache entries concurrently with > >> > the bootloader, in addition to the usual clean entries that can be > >> > allocated at any time thanks to speculative fetches. > > UEFI specifies that memory in the EFI memory map is flat mapped, but > I'd have to look to see if > it prohibits other mappings in addition to that. Other mappings are > implementation > dependent (devices, etc. or memory not in the EFI memory map.) Regardless of the set of mapping that may exist, the key point is that we don't know what may have been allocated into a cache. Any portion of memory could have entries in the cache hierarchy, which could be clean or dirty. > In reviewing the Aarch64 specific portion of the spec (section 2.3.6 > Aarch64 Platforms) > it says in part: > > · Implementations of boot services will enable architecturally > manageable caches and TLBs i.e. > those that can be managed directly using implementation independent > registers using > mechanisms and procedures defined in the ARM Architecture Reference > Manual. They should > not enable caches requiring platform information to manage or invoke > non-architectural cache/ > TLB lockdown mechanisms. > > Does this imply that system level caches should not be enabled? Arguably yes, but on a technicality no, because it's possible to flush them by VA (albeit extremely slowly). > UEFI also specifies uni-processor, so we don't have to worry about > other cores' caches. Ok. > The spec does not mention the details of memory attributes - EDK2 currently maps > memory as non-shared, attributes 0xFF. Ok. > >> > > >> > So while we're in the bootloader, any system level caches can have > >> > entries allocated to it, and as those aren't architected the only > >> > thing we can do is flush those by VA for the portions we care about. > > Maybe the firmware is 'wrong' to enable these caches? It is certainly arguable. > Are we guaranteed that these caches can be disabled on all > implementations? I believe on some implementations the non-secure side will not have access to the control registers. Beyond that I don't know. > Updating/clarifying the spec to have these disabled could simplify the > problem a bit. Possibly, yes. I'm not sure what we'd clarify it to say, however. > >> > So we can have "initially consistent", but that might not be useful. > >> > >> Hrm, yes, rather unfortunate. > >> > >> > > >> > > > There are a tonne of subtleties here, and certain properties we would > >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) > >> > > > aren't necessarily possible to provide in general (thanks to the wonders > >> > > > of non-architected system level caches, interaction with bootloaders, > >> > > > etc). > >> > > > >> > > I suppose it is easier for the UEFI implementation, since it knows the > >> > > platform it runs on and there knows about the caches. Harder for the > >> > > stub though :-/ > >> > > >> > Yeah. System-level caches interact badly with pretty much any scenario > >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there > >> > doesn't seem to be a single agent that can be charged with ownership of > >> > maintenance. > >> > > >> > This is something I've been meaning to revisit, but it takes a while to > >> > get back up to speed on the minutiae of the cache architecture and the > >> > rules for memory attributes, and I haven't had the time recently. > >> > > >> > We do have a very heavy hammer that we know will work: flushing the > >> > memory by PA in the stub once the MMU and caches are disabled. A > >> > back-of-the-envelope calculation shows that could take minutes to issue > >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a > >> > last resort. > >> > >> Ouch... > > > > Looking at that again, I was off by an order of 1000, and that actually > > comes to about 0.13 seconds (though solely for CMO issue). So that might > > not be as blunt as I made it out to be, but it's still not great as > > platforms get larger. > > I think we should be able to limit the memory we need to flush, as > there should be no > need to flush the free memory, just what is in use. I think that good > portions, if not all of that > could be flushed from the C code with caches enabled, as we know they won't be > modified after that point (FDT, initrd, etc.) We can do this in C > code after calling > ExitBootServices(), and immediately before calling the Xen entry point > efi_xen_start(). > There are no EFI calls in this path between the last bit of C code and > the disabling > of caches and MMU, so I think we should be able to identify if > anything would need > to be flushed in the ASM code with caches off. I agree the vast majority of this maintenance could be done by C code. There might be a need to flush that free memory, depending on how it is mapped, unless you are proposing a lazy flush-before-use strategy. > >> > We could try to manage the system caches explicitly, but then we need > >> > code to do so very early, we need to have them described in the > >> > appropriate firmware tables, and they need to be manageable from the > >> > non-secure side (which I believe is not always the case). That somewhat > >> > defeat the portability aspect of booting as an EFI application. > >> > > >> > So yes, it's harder for the stub : > >> > >> Indeed. > >> > >> Probably this isn't even close to the correct venue. I'm not sure where > >> better to transfer it though. One of the Linaro lists perhaps? > > > > I'm not really sure where the right place is. There are quite a few > > parties who have an interest in this problem (whether they realise it or > > not). It would be nice to figure out more precisely what's happening > > here first, anyhow. > > > > Mark. > > Glad I'm not the only one confused :) Getting back to the practical > side of this, > I'm thinking I (or Suravee) should update the patch to add the > flushing of the FDT, > as this is required for booting with the change to flush_dcache_area(), even if > the exact mechanism isn't understood. This gets us a more correct and working > implementation, but not a final/robust implementation. On a practical front, yes. It would be nice to know if the attributes are actually the problem. Is it possible to build a UP Xen which maps memory as UEFI does (i.e. non-shareable)? Or is that problematic? Thanks, Mark.
On Tue, Oct 14, 2014 at 10:07 AM, Mark Rutland <mark.rutland@arm.com> wrote: > [...] > >> >> > As far as I am aware, UEFI may have an arbitrary set of mappings >> >> > present during boot services time, with arbitrary drivers active. >> >> > That means that UEFI can create dirty cache entries concurrently with >> >> > the bootloader, in addition to the usual clean entries that can be >> >> > allocated at any time thanks to speculative fetches. >> >> UEFI specifies that memory in the EFI memory map is flat mapped, but >> I'd have to look to see if >> it prohibits other mappings in addition to that. Other mappings are >> implementation >> dependent (devices, etc. or memory not in the EFI memory map.) > > Regardless of the set of mapping that may exist, the key point is that > we don't know what may have been allocated into a cache. Any portion of > memory could have entries in the cache hierarchy, which could be clean > or dirty. > >> In reviewing the Aarch64 specific portion of the spec (section 2.3.6 >> Aarch64 Platforms) >> it says in part: >> >> · Implementations of boot services will enable architecturally >> manageable caches and TLBs i.e. >> those that can be managed directly using implementation independent >> registers using >> mechanisms and procedures defined in the ARM Architecture Reference >> Manual. They should >> not enable caches requiring platform information to manage or invoke >> non-architectural cache/ >> TLB lockdown mechanisms. >> >> Does this imply that system level caches should not be enabled? > > Arguably yes, but on a technicality no, because it's possible to flush > them by VA (albeit extremely slowly). > >> UEFI also specifies uni-processor, so we don't have to worry about >> other cores' caches. > > Ok. > >> The spec does not mention the details of memory attributes - EDK2 currently maps >> memory as non-shared, attributes 0xFF. > > Ok. > >> >> > >> >> > So while we're in the bootloader, any system level caches can have >> >> > entries allocated to it, and as those aren't architected the only >> >> > thing we can do is flush those by VA for the portions we care about. >> >> Maybe the firmware is 'wrong' to enable these caches? > > It is certainly arguable. > >> Are we guaranteed that these caches can be disabled on all >> implementations? > > I believe on some implementations the non-secure side will not have > access to the control registers. Beyond that I don't know. > >> Updating/clarifying the spec to have these disabled could simplify the >> problem a bit. > > Possibly, yes. I'm not sure what we'd clarify it to say, however. > >> >> > So we can have "initially consistent", but that might not be useful. >> >> >> >> Hrm, yes, rather unfortunate. >> >> >> >> > >> >> > > > There are a tonne of subtleties here, and certain properties we would >> >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) >> >> > > > aren't necessarily possible to provide in general (thanks to the wonders >> >> > > > of non-architected system level caches, interaction with bootloaders, >> >> > > > etc). >> >> > > >> >> > > I suppose it is easier for the UEFI implementation, since it knows the >> >> > > platform it runs on and there knows about the caches. Harder for the >> >> > > stub though :-/ >> >> > >> >> > Yeah. System-level caches interact badly with pretty much any scenario >> >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there >> >> > doesn't seem to be a single agent that can be charged with ownership of >> >> > maintenance. >> >> > >> >> > This is something I've been meaning to revisit, but it takes a while to >> >> > get back up to speed on the minutiae of the cache architecture and the >> >> > rules for memory attributes, and I haven't had the time recently. >> >> > >> >> > We do have a very heavy hammer that we know will work: flushing the >> >> > memory by PA in the stub once the MMU and caches are disabled. A >> >> > back-of-the-envelope calculation shows that could take minutes to issue >> >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a >> >> > last resort. >> >> >> >> Ouch... >> > >> > Looking at that again, I was off by an order of 1000, and that actually >> > comes to about 0.13 seconds (though solely for CMO issue). So that might >> > not be as blunt as I made it out to be, but it's still not great as >> > platforms get larger. >> >> I think we should be able to limit the memory we need to flush, as >> there should be no >> need to flush the free memory, just what is in use. I think that good >> portions, if not all of that >> could be flushed from the C code with caches enabled, as we know they won't be >> modified after that point (FDT, initrd, etc.) We can do this in C >> code after calling >> ExitBootServices(), and immediately before calling the Xen entry point >> efi_xen_start(). >> There are no EFI calls in this path between the last bit of C code and >> the disabling >> of caches and MMU, so I think we should be able to identify if >> anything would need >> to be flushed in the ASM code with caches off. > > I agree the vast majority of this maintenance could be done by C code. > > There might be a need to flush that free memory, depending on how it is > mapped, unless you are proposing a lazy flush-before-use strategy. Yeah, I was overlooking that even though Linux doesn't care what the content of the free memory is, some of that being cached will still cause problems later. > >> >> > We could try to manage the system caches explicitly, but then we need >> >> > code to do so very early, we need to have them described in the >> >> > appropriate firmware tables, and they need to be manageable from the >> >> > non-secure side (which I believe is not always the case). That somewhat >> >> > defeat the portability aspect of booting as an EFI application. >> >> > >> >> > So yes, it's harder for the stub : >> >> >> >> Indeed. >> >> >> >> Probably this isn't even close to the correct venue. I'm not sure where >> >> better to transfer it though. One of the Linaro lists perhaps? >> > >> > I'm not really sure where the right place is. There are quite a few >> > parties who have an interest in this problem (whether they realise it or >> > not). It would be nice to figure out more precisely what's happening >> > here first, anyhow. >> > >> > Mark. >> >> Glad I'm not the only one confused :) Getting back to the practical >> side of this, >> I'm thinking I (or Suravee) should update the patch to add the >> flushing of the FDT, >> as this is required for booting with the change to flush_dcache_area(), even if >> the exact mechanism isn't understood. This gets us a more correct and working >> implementation, but not a final/robust implementation. > > On a practical front, yes. > > It would be nice to know if the attributes are actually the problem. > Is it possible to build a UP Xen which maps memory as UEFI does (i.e. > non-shareable)? Or is that problematic? > > Thanks, > Mark. I tried the other way - making EDK2 mappings match what Xen was using. I started with changing the shareability to inner shareable, and verifying that the memory attributes in MAIR_EL2 register matched (a different AttrIndex was used.) The flushing was still required. I then modified EDK2 so that the entire low 12 bits of the block entry match Xen, and the flushing was still required. So I'm kind of stumped. Roy
On Tue, 2014-10-14 at 18:07 +0100, Mark Rutland wrote: > > Glad I'm not the only one confused :) Getting back to the practical > > side of this, > > I'm thinking I (or Suravee) should update the patch to add the > > flushing of the FDT, > > as this is required for booting with the change to flush_dcache_area(), even if > > the exact mechanism isn't understood. This gets us a more correct and working > > implementation, but not a final/robust implementation. > > On a practical front, yes. > > It would be nice to know if the attributes are actually the problem. > Is it possible to build a UP Xen which maps memory as UEFI does (i.e. > non-shareable)? Or is that problematic? I think it would get to at least the point where you would observe these issues, I'm not sure if/doubt that you would make it to actually booting dom0. Ian.
On Tue, 14 Oct 2014, Mark Rutland wrote: > [...] > > > >> > As far as I am aware, UEFI may have an arbitrary set of mappings > > >> > present during boot services time, with arbitrary drivers active. > > >> > That means that UEFI can create dirty cache entries concurrently with > > >> > the bootloader, in addition to the usual clean entries that can be > > >> > allocated at any time thanks to speculative fetches. > > > > UEFI specifies that memory in the EFI memory map is flat mapped, but > > I'd have to look to see if > > it prohibits other mappings in addition to that. Other mappings are > > implementation > > dependent (devices, etc. or memory not in the EFI memory map.) > > Regardless of the set of mapping that may exist, the key point is that > we don't know what may have been allocated into a cache. Any portion of > memory could have entries in the cache hierarchy, which could be clean > or dirty. > > > In reviewing the Aarch64 specific portion of the spec (section 2.3.6 > > Aarch64 Platforms) > > it says in part: > > > > · Implementations of boot services will enable architecturally > > manageable caches and TLBs i.e. > > those that can be managed directly using implementation independent > > registers using > > mechanisms and procedures defined in the ARM Architecture Reference > > Manual. They should > > not enable caches requiring platform information to manage or invoke > > non-architectural cache/ > > TLB lockdown mechanisms. > > > > Does this imply that system level caches should not be enabled? > > Arguably yes, but on a technicality no, because it's possible to flush > them by VA (albeit extremely slowly). I think that this point should really be clearer at the spec level. > >> > So while we're in the bootloader, any system level caches can have > > >> > entries allocated to it, and as those aren't architected the only > > >> > thing we can do is flush those by VA for the portions we care about. > > > > Maybe the firmware is 'wrong' to enable these caches? > > It is certainly arguable. > > > Are we guaranteed that these caches can be disabled on all > > implementations? > > I believe on some implementations the non-secure side will not have > access to the control registers. Beyond that I don't know. > > > Updating/clarifying the spec to have these disabled could simplify the > > problem a bit. > > Possibly, yes. I'm not sure what we'd clarify it to say, however. We should start a discussion about this with the relevant parties. > > >> > So we can have "initially consistent", but that might not be useful. > > >> > > >> Hrm, yes, rather unfortunate. > > >> > > >> > > > >> > > > There are a tonne of subtleties here, and certain properties we would > > >> > > > like (e.g. a completely clean cache hierarchy upon entry to the OS) > > >> > > > aren't necessarily possible to provide in general (thanks to the wonders > > >> > > > of non-architected system level caches, interaction with bootloaders, > > >> > > > etc). > > >> > > > > >> > > I suppose it is easier for the UEFI implementation, since it knows the > > >> > > platform it runs on and there knows about the caches. Harder for the > > >> > > stub though :-/ > > >> > > > >> > Yeah. System-level caches interact badly with pretty much any scenario > > >> > where ownership of the MMU is transferred (UEFI boot, kexec), and there > > >> > doesn't seem to be a single agent that can be charged with ownership of > > >> > maintenance. > > >> > > > >> > This is something I've been meaning to revisit, but it takes a while to > > >> > get back up to speed on the minutiae of the cache architecture and the > > >> > rules for memory attributes, and I haven't had the time recently. > > >> > > > >> > We do have a very heavy hammer that we know will work: flushing the > > >> > memory by PA in the stub once the MMU and caches are disabled. A > > >> > back-of-the-envelope calculation shows that could take minutes to issue > > >> > on a server machine (say 2GHz, with 16GB of RAM), so that's very much a > > >> > last resort. > > >> > > >> Ouch... > > > > > > Looking at that again, I was off by an order of 1000, and that actually > > > comes to about 0.13 seconds (though solely for CMO issue). So that might > > > not be as blunt as I made it out to be, but it's still not great as > > > platforms get larger. > > > > I think we should be able to limit the memory we need to flush, as > > there should be no > > need to flush the free memory, just what is in use. I think that good > > portions, if not all of that > > could be flushed from the C code with caches enabled, as we know they won't be > > modified after that point (FDT, initrd, etc.) We can do this in C > > code after calling > > ExitBootServices(), and immediately before calling the Xen entry point > > efi_xen_start(). > > There are no EFI calls in this path between the last bit of C code and > > the disabling > > of caches and MMU, so I think we should be able to identify if > > anything would need > > to be flushed in the ASM code with caches off. > > I agree the vast majority of this maintenance could be done by C code. > > There might be a need to flush that free memory, depending on how it is > mapped, unless you are proposing a lazy flush-before-use strategy. Is it actually safe to only flush what we use (DTB, Xen, initrd, Linux)? What if the firmware wrote something else (ACPI tables?) that we might have to access? What if the firmware wrote something else that we don't care about? Xen scrubs all ram early at boot, so this last point might not be an issue.
diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S index a445cbf..38f96c2 100644 --- a/xen/arch/arm/arm64/cache.S +++ b/xen/arch/arm/arm64/cache.S @@ -97,3 +97,35 @@ finished: isb ret ENDPROC(__flush_dcache_all) + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * __flush_dcache_area(kaddr, size) + * + * Ensure that the data held in the page kaddr is written back to the + * page in question. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__flush_dcache_area) + dcache_line_size x2, x3 + add x1, x0, x1 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D line / unified line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__flush_dcache_area) diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S index 7650abe..704f39d 100644 --- a/xen/arch/arm/arm64/head.S +++ b/xen/arch/arm/arm64/head.S @@ -740,16 +740,30 @@ ENTRY(lookup_processor_type) */ ENTRY(efi_xen_start) /* + * Preserve x0 (fdf pointer) across call to __flush_dcache_area, + * restore for entry into Xen. + */ + mov x20, x0 + + /* + * Flush dcache covering current runtime addresses + * of xen text/data. Then flush all of icache. + */ + adrp x1, _start + add x1, x1, #:lo12:_start + adrp x2, _end + add x2, x2, #:lo12:_end + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* * Turn off cache and MMU as Xen expects. EFI enables them, but also * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the * MMU while executing EFI code before entering Xen. * The EFI loader calls this to start Xen. - * Preserve x0 (fdf pointer) across call to __flush_dcache_all, - * restore for entry into Xen. */ - mov x20, x0 - bl __flush_dcache_all - ic ialluis /* Turn off Dcache and MMU */ mrs x0, sctlr_el2