diff mbox

[Xen-devel,V2,for-4.5] xen/arm64: Use __flush_dcache_area instead of __flush_dcache_all

Message ID 1413431279-17559-1-git-send-email-roy.franz@linaro.org
State New
Headers show

Commit Message

Roy Franz Oct. 16, 2014, 3:47 a.m. UTC
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>

When booting with EFI, __flush_dcache_all does not correctly flush data.
According to Mark Rutland, __flush_dcache_all is not guaranteed to push
data to the PoC if there is a system-level cache as it uses Set/Way
operations.  Therefore, this patch switchs to use the "__flush_dcache_area"
mechanism, which is coppied from Linux.
Add flushing of FDT in addition to Xen text/data.
Remove now unused __flush_dcache_all and related helper functions.
Invalidate the instruction tlb before turning on paging
later on when starting Xen in EL2.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Signed-off-by: Roy Franz <roy.franz@linaro.org>
Tested-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
Changes since v1:
* Added flushing of FDT memory region
* Remove used __flush_dcache_all function, and related helper functions
* Fix typo in comment
* Properly set base address in __flush_dcache_area call.
* Add flush of instruction TLB

 xen/arch/arm/arm64/cache.S | 89 +++++++++++-----------------------------------
 xen/arch/arm/arm64/head.S  | 30 +++++++++++++---
 2 files changed, 46 insertions(+), 73 deletions(-)

Comments

Ian Campbell Oct. 20, 2014, 10:59 a.m. UTC | #1
On Wed, 2014-10-15 at 20:47 -0700, Roy Franz wrote:
> +        /* flush dcache covering the FDT updated by EFI boot code */
> +        mov   x1, 0x200000        /* max size of FDT allowed */
> +        bl    __flush_dcache_area

Since we are now flushing by VA doesn't this run the risk of overrunning
the end of the 1:1 map and faulting? I think we haven't yet switched to
our own 2M mapping. Also this will flush for 2M even from a non-2M
aligned starting point, which might cross the boundary of even a 2M
mapping. (I think there is probably a related shortcoming in the regular
head.S, if the DTB is very near the end of a 2M region).

The FDT has a totalsize word (4 bytes from the start). I guess it would
be simple enough to check the magic number at offset 0 and then read the
length from offset 4 (don't forget to endian swap), especially since I
originally thought we would be looking at a full parsing loop (phew!).
We'd probably also want to clamp the value to 2M or some other sanity
check value.

Alternatively we might be able to defer the flush until head.S has made
the mapping in the BOOT_MISC slot, but I'm not 100% sure about that.

Ian.
Roy Franz Oct. 20, 2014, 7:20 p.m. UTC | #2
On Mon, Oct 20, 2014 at 3:59 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Wed, 2014-10-15 at 20:47 -0700, Roy Franz wrote:
>> +        /* flush dcache covering the FDT updated by EFI boot code */
>> +        mov   x1, 0x200000        /* max size of FDT allowed */
>> +        bl    __flush_dcache_area
>
> Since we are now flushing by VA doesn't this run the risk of overrunning
> the end of the 1:1 map and faulting?
(Dropping Jan from ARM-specific thread...)

Yes it does..


> I think we haven't yet switched to
> our own 2M mapping. Also this will flush for 2M even from a non-2M
> aligned starting point, which might cross the boundary of even a 2M
> mapping. (I think there is probably a related shortcoming in the regular
> head.S, if the DTB is very near the end of a 2M region).
>
> The FDT has a totalsize word (4 bytes from the start). I guess it would
> be simple enough to check the magic number at offset 0 and then read the
> length from offset 4 (don't forget to endian swap), especially since I
> originally thought we would be looking at a full parsing loop (phew!).
> We'd probably also want to clamp the value to 2M or some other sanity
> check value.
>
> Alternatively we might be able to defer the flush until head.S has made
> the mapping in the BOOT_MISC slot, but I'm not 100% sure about that.
>
> Ian.
>
I think the simplest thing to do is to have the C code that calls this
(and just created
the FDT) to pass the size of the FDT to efi_xen_start() so it can
flush the exact amount.
This size is the actual size used (and equal or smaller than the
amount allocated),
so won't overrun the mapping, and should be a sane value.

I should be able to get an updated version out today.

Roy
Ian Campbell Oct. 21, 2014, 8:09 a.m. UTC | #3
On Mon, 2014-10-20 at 12:20 -0700, Roy Franz wrote:
> I think the simplest thing to do is to have the C code that calls this

Ah, I'd forgotten there was some C-land nearby, yes that would indeed be
far more convenient!

(and I've just seen the updated patch, applying shortly...)

Ian.
diff mbox

Patch

diff --git a/xen/arch/arm/arm64/cache.S b/xen/arch/arm/arm64/cache.S
index a445cbf..eff4e16 100644
--- a/xen/arch/arm/arm64/cache.S
+++ b/xen/arch/arm/arm64/cache.S
@@ -20,80 +20,33 @@ 
  */
 
 /*
- * Enable and disable interrupts.
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
  */
-	.macro	disable_irq
-	msr	daifset, #2
-	.endm
-
-	.macro	enable_irq
-	msr	daifclr, #2
-	.endm
-
-/*
- * Save/disable and restore interrupts.
- */
-	.macro	save_and_disable_irqs, olddaif
-	mrs	\olddaif, daif
-	disable_irq
-	.endm
-
-	.macro	restore_irqs, olddaif
-	msr	daif, \olddaif
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
 
 /*
- *	__flush_dcache_all()
+ *	__flush_dcache_area(kaddr, size)
  *
- *	Flush the whole D-cache.
+ *	Ensure that the data held in the page kaddr is written back to the
+ *	page in question.
  *
- *	Corrupted registers: x0-x7, x9-x11
+ *	- kaddr   - kernel address
+ *	- size    - size in question
  */
-ENTRY(__flush_dcache_all)
-	dmb	sy				// ensure ordering with previous memory accesses
-	mrs	x0, clidr_el1			// read clidr
-	and	x3, x0, #0x7000000		// extract loc from clidr
-	lsr	x3, x3, #23			// left align loc bit field
-	cbz	x3, finished			// if loc is 0, then no need to clean
-	mov	x10, #0				// start clean at cache level 0
-loop1:
-	add	x2, x10, x10, lsr #1		// work out 3x current cache level
-	lsr	x1, x0, x2			// extract cache type bits from clidr
-	and	x1, x1, #7			// mask of the bits for current cache only
-	cmp	x1, #2				// see what cache we have at this level
-	b.lt	skip				// skip if no cache, or just i-cache
-	save_and_disable_irqs x9		// make CSSELR and CCSIDR access atomic
-	msr	csselr_el1, x10			// select current cache level in csselr
-	isb					// isb to sych the new cssr&csidr
-	mrs	x1, ccsidr_el1			// read the new ccsidr
-	restore_irqs x9
-	and	x2, x1, #7			// extract the length of the cache lines
-	add	x2, x2, #4			// add 4 (line length offset)
-	mov	x4, #0x3ff
-	and	x4, x4, x1, lsr #3		// find maximum number on the way size
-	clz	w5, w4				// find bit position of way size increment
-	mov	x7, #0x7fff
-	and	x7, x7, x1, lsr #13		// extract max number of the index size
-loop2:
-	mov	x9, x4				// create working copy of max way size
-loop3:
-	lsl	x6, x9, x5
-	orr	x11, x10, x6			// factor way and cache number into x11
-	lsl	x6, x7, x2
-	orr	x11, x11, x6			// factor index number into x11
-	dc	cisw, x11			// clean & invalidate by set/way
-	subs	x9, x9, #1			// decrement the way
-	b.ge	loop3
-	subs	x7, x7, #1			// decrement the index
-	b.ge	loop2
-skip:
-	add	x10, x10, #2			// increment cache number
-	cmp	x3, x10
-	b.gt	loop1
-finished:
-	mov	x10, #0				// swith back to cache level 0
-	msr	csselr_el1, x10			// select current cache level in csselr
+ENTRY(__flush_dcache_area)
+	dcache_line_size x2, x3
+	add	x1, x0, x1
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D line / unified line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
 	dsb	sy
-	isb
 	ret
-ENDPROC(__flush_dcache_all)
+ENDPROC(__flush_dcache_area)
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 7650abe..85dd37a 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -740,16 +740,36 @@  ENTRY(lookup_processor_type)
  */
 ENTRY(efi_xen_start)
         /*
+         * Preserve x0 (fdt pointer) across call to __flush_dcache_area,
+         * restore for entry into Xen.
+         */
+        mov   x20, x0
+
+        /* flush dcache covering the FDT updated by EFI boot code */
+        mov   x1, 0x200000        /* max size of FDT allowed */
+        bl    __flush_dcache_area
+
+        /*
+         * Flush dcache covering current runtime addresses
+         * of xen text/data. Then flush all of icache.
+         */
+        adrp  x1, _start
+        add   x1, x1, #:lo12:_start
+        mov   x0, x1
+        adrp  x2, _end
+        add   x2, x2, #:lo12:_end
+        sub   x1, x2, x1
+
+        bl    __flush_dcache_area
+        ic    ialluis
+        tlbi alle2
+
+        /*
          * Turn off cache and MMU as Xen expects. EFI enables them, but also
          * mandates a 1:1 (unity) VA->PA mapping, so we can turn off the
          * MMU while executing EFI code before entering Xen.
          * The EFI loader calls this to start Xen.
-         * Preserve x0 (fdf pointer) across call to __flush_dcache_all,
-         * restore for entry into Xen.
          */
-        mov   x20, x0
-        bl    __flush_dcache_all
-        ic    ialluis
 
         /* Turn off Dcache and MMU */
         mrs   x0, sctlr_el2