diff mbox

[v4,1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits

Message ID 20150819174958.GA11518@cbox
State New
Headers show

Commit Message

Christoffer Dall Aug. 19, 2015, 5:49 p.m. UTC
Hi Mario,

On Wed, Aug 05, 2015 at 05:11:37PM +0100, Marc Zyngier wrote:
> On 16/07/15 22:29, Mario Smarduch wrote:
> > This patch only saves and restores FP/SIMD registers on Guest access. To do
> > this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> > lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> > context is not saved/restored
> > 
> > Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> 
> So this patch seems to break 32bit guests on arm64.  I've had a look,
> squashed a few bugs that I dangerously overlooked during the review, but
> it still doesn't work (it doesn't crash anymore, but I get random
> illegal VFP instructions in 32bit guests).
> 
> I'd be glad if someone could eyeball the following patch and tell me
> what's going wrong. If we don't find the root cause quickly enough, I'll
> have to drop the series from -next, and that'd be a real shame.
> 
> Thanks,
> 
> 	M.
> 
> commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
> Author: Marc Zyngier <marc.zyngier@arm.com>
> Date:   Wed Aug 5 16:53:01 2015 +0100
> 
>     KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
> 
>     Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
>     must be guarded by skip_fpsimd_state. Otherwise, all hell
>     break loose.
> 
>     Also, FPEXC32_EL2 must be restored when we trap to EL2 to
>     enable floating point.
> 
>     Note that while it prevents the host from catching fire, the
>     guest still doesn't work properly, and I don't understand why just
>     yet.
> 
>     Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> 
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index c8e0c70..b53ec5d 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -431,10 +431,12 @@
>  	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>  	mrs	x4, dacr32_el2
>  	mrs	x5, ifsr32_el2
> -	mrs	x6, fpexc32_el2
>  	stp	x4, x5, [x3]
> -	str	x6, [x3, #16]
> 
> +	skip_fpsimd_state x8, 3f
> +	mrs	x6, fpexc32_el2
> +	str	x6, [x3, #16]
> +3:
>  	skip_debug_state x8, 2f
>  	mrs	x7, dbgvcr32_el2
>  	str	x7, [x3, #24]
> @@ -461,10 +463,8 @@
> 
>  	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>  	ldp	x4, x5, [x3]
> -	ldr	x6, [x3, #16]
>  	msr	dacr32_el2, x4
>  	msr	ifsr32_el2, x5
> -	msr	fpexc32_el2, x6
> 
>  	skip_debug_state x8, 2f
>  	ldr	x7, [x3, #24]
> @@ -669,12 +669,14 @@ __restore_debug:
>  	ret
> 
>  __save_fpsimd:
> +	skip_fpsimd_state x3, 1f
>  	save_fpsimd
> -	ret
> +1:	ret
> 
>  __restore_fpsimd:
> +	skip_fpsimd_state x3, 1f
>  	restore_fpsimd
> -	ret
> +1:	ret
> 
>  switch_to_guest_fpsimd:
>  	push	x4, lr
> @@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
>  	mrs	x2, cptr_el2
>  	bic	x2, x2, #CPTR_EL2_TFP
>  	msr	cptr_el2, x2
> +	isb
> 
>  	mrs	x0, tpidr_el2
> 
> @@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
>  	add	x2, x0, #VCPU_CONTEXT
>  	bl __restore_fpsimd
> 
> +	skip_32bit_state x3, 1f
> +	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
> +	msr	fpexc32_el2, x4
> +1:
>  	pop	x4, lr
>  	pop	x2, x3
>  	pop	x0, x1
> @@ -754,9 +761,7 @@ __kvm_vcpu_return:
>  	add	x2, x0, #VCPU_CONTEXT
> 
>  	save_guest_regs
> -	skip_fpsimd_state x3, 1f
>  	bl __save_fpsimd
> -1:
>  	bl __save_sysregs
> 
>  	skip_debug_state x3, 1f
> @@ -777,9 +782,7 @@ __kvm_vcpu_return:
>  	kern_hyp_va x2
> 
>  	bl __restore_sysregs
> -	skip_fpsimd_state x3, 1f
>  	bl __restore_fpsimd
> -1:
>  	/* Clear FPSIMD and Trace trapping */
>  	msr     cptr_el2, xzr
> 
> 

Marc and I have hunted down the issue at KVM Forum and we believe we've
found the issue.  Please have a look at the following follow-up patch to
Marc's patch above:



Thanks,
-Christoffer
diff mbox

Patch

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8b2a73b4..842e727 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -769,11 +769,26 @@ 
 
 .macro activate_traps
 	ldr     x2, [x0, #VCPU_HCR_EL2]
+
+	/*
+	 * We are about to set CPTR_EL2.TFP to trap all floating point
+	 * register accesses to EL2, however, the ARM ARM clearly states that
+	 * traps are only taken to EL2 if the operation would not otherwise
+	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
+	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+	 */
+	tbnz	x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
+	mov	x3, #(1 << 30)
+	msr	fpexc32_el2, x3
+	isb
+99:
+
 	msr     hcr_el2, x2
 	mov	x2, #CPTR_EL2_TTA
 	orr     x2, x2, #CPTR_EL2_TFP
 	msr	cptr_el2, x2
 
+
 	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
 	msr	hstr_el2, x2