diff mbox series

[v4,12/14] KVM: arm64: Support FEAT_FPMR for guests

Message ID 20240122-arm64-2023-dpisa-v4-12-776e094861df@kernel.org
State New
Headers show
Series arm64: Support for 2023 DPISA extensions | expand

Commit Message

Mark Brown Jan. 22, 2024, 4:28 p.m. UTC
FEAT_FPMR introduces a new system register FPMR which allows configuration
of floating point behaviour, currently for FP8 specific features. Allow use
of this in guests, disabling the trap while guests are running and saving
and restoring the value along with the rest of the floating point state.
Since FPMR is stored immediately after the main floating point state we
share it with the hypervisor by adjusting the size of the shared region.

Access to FPMR is covered by both a register specific trap HCRX_EL2.EnFPM
and the overall floating point access trap so we just unconditionally
enable the FPMR specific trap and rely on the floating point access trap to
detect guest floating point usage.

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm64/include/asm/kvm_arm.h        |  2 +-
 arch/arm64/include/asm/kvm_host.h       |  3 ++-
 arch/arm64/kvm/emulate-nested.c         |  8 ++++++++
 arch/arm64/kvm/fpsimd.c                 |  2 +-
 arch/arm64/kvm/hyp/include/hyp/switch.h |  7 ++++++-
 arch/arm64/kvm/sys_regs.c               | 11 +++++++++++
 6 files changed, 29 insertions(+), 4 deletions(-)

Comments

Marc Zyngier Feb. 23, 2024, 11:18 a.m. UTC | #1
On Mon, 22 Jan 2024 16:28:15 +0000,
Mark Brown <broonie@kernel.org> wrote:
> 
> FEAT_FPMR introduces a new system register FPMR which allows configuration
> of floating point behaviour, currently for FP8 specific features. Allow use
> of this in guests, disabling the trap while guests are running and saving
> and restoring the value along with the rest of the floating point state.
> Since FPMR is stored immediately after the main floating point state we
> share it with the hypervisor by adjusting the size of the shared region.
> 
> Access to FPMR is covered by both a register specific trap HCRX_EL2.EnFPM
> and the overall floating point access trap so we just unconditionally
> enable the FPMR specific trap and rely on the floating point access trap to
> detect guest floating point usage.
> 
> Signed-off-by: Mark Brown <broonie@kernel.org>
> ---
>  arch/arm64/include/asm/kvm_arm.h        |  2 +-
>  arch/arm64/include/asm/kvm_host.h       |  3 ++-
>  arch/arm64/kvm/emulate-nested.c         |  8 ++++++++
>  arch/arm64/kvm/fpsimd.c                 |  2 +-
>  arch/arm64/kvm/hyp/include/hyp/switch.h |  7 ++++++-
>  arch/arm64/kvm/sys_regs.c               | 11 +++++++++++
>  6 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 7f45ce9170bb..b2ddb6165953 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -103,7 +103,7 @@
>  #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
>  
>  #define HCRX_GUEST_FLAGS \
> -	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
> +	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM | \

No. We don't do that anymore. This can only be enabled if the guest
has it advertised via ID_AA64PFR2_EL1.FPMR.

>  	 (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
>  #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index c4fdcc94d733..99c0f8944f04 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -384,6 +384,8 @@ enum vcpu_sysreg {
>  	APGAKEYLO_EL1,
>  	APGAKEYHI_EL1,
>  
> +	FPMR,
> +
>  	/* Memory Tagging Extension registers */
>  	RGSR_EL1,	/* Random Allocation Tag Seed Register */
>  	GCR_EL1,	/* Tag Control Register */
> @@ -544,7 +546,6 @@ struct kvm_vcpu_arch {
>  	enum fp_type fp_type;
>  	unsigned int sve_max_vl;
>  	u64 svcr;
> -	unsigned long fpmr;
>  
>  	/* Stage 2 paging state used by the hardware on next switch */
>  	struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
> index 431fd429932d..3af5fd0e28dc 100644
> --- a/arch/arm64/kvm/emulate-nested.c
> +++ b/arch/arm64/kvm/emulate-nested.c
> @@ -67,6 +67,8 @@ enum cgt_group_id {
>  	CGT_HCR_TTLBIS,
>  	CGT_HCR_TTLBOS,
>  
> +	CGT_HCRX_EnFPM,
> +
>  	CGT_MDCR_TPMCR,
>  	CGT_MDCR_TPM,
>  	CGT_MDCR_TDE,
> @@ -279,6 +281,11 @@ static const struct trap_bits coarse_trap_bits[] = {
>  		.mask		= HCR_TTLBOS,
>  		.behaviour	= BEHAVE_FORWARD_ANY,
>  	},
> +	[CGT_HCRX_EnFPM] = {
> +		.index		= HCRX_EL2,
> +		.mask		= HCRX_EL2_EnFPM,
> +		.behaviour	= BEHAVE_FORWARD_ANY,

This is obviously incorrect.

> +	},
>  	[CGT_MDCR_TPMCR] = {
>  		.index		= MDCR_EL2,
>  		.value		= MDCR_EL2_TPMCR,
> @@ -478,6 +485,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
>  	SR_TRAP(SYS_AIDR_EL1,		CGT_HCR_TID1),
>  	SR_TRAP(SYS_SMIDR_EL1,		CGT_HCR_TID1),
>  	SR_TRAP(SYS_CTR_EL0,		CGT_HCR_TID2),
> +	SR_TRAP(SYS_FPMR,		CGT_HCRX_EnFPM),
>  	SR_TRAP(SYS_CCSIDR_EL1,		CGT_HCR_TID2_TID4),
>  	SR_TRAP(SYS_CCSIDR2_EL1,	CGT_HCR_TID2_TID4),
>  	SR_TRAP(SYS_CLIDR_EL1,		CGT_HCR_TID2_TID4),
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 6cf22cd8f020..9e002489c843 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -152,7 +152,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  		fp_state.sve_vl = vcpu->arch.sve_max_vl;
>  		fp_state.sme_state = NULL;
>  		fp_state.svcr = &vcpu->arch.svcr;
> -		fp_state.fpmr = &vcpu->arch.fpmr;
> +		fp_state.fpmr = (unsigned long *)&__vcpu_sys_reg(vcpu, FPMR);
>  		fp_state.fp_type = &vcpu->arch.fp_type;
>  
>  		if (vcpu_has_sve(vcpu))
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 27fcdfd432b9..abf785c473d0 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -370,10 +370,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
>  	isb();
>  
>  	/* Write out the host state if it's in the registers */
> -	if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
> +	if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) {
>  		__fpsimd_save_state(&(vcpu->arch.host_uw->fpsimd_state));
> +		if (cpus_have_final_cap(ARM64_HAS_FPMR))

Same thing as above: this cannot be unconditional.

> +			vcpu->arch.host_uw->fpmr = read_sysreg_s(SYS_FPMR);
> +	}
>  
>  	/* Restore the guest state */
> +	if (cpus_have_final_cap(ARM64_HAS_FPMR))
> +		write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
>  	if (sve_guest)
>  		__hyp_sve_restore_guest(vcpu);
>  	else
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 38503b1cd2eb..216eac44c124 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -2067,6 +2067,15 @@ static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
>  	.visibility = hidden_user_visibility,	\
>  }
>  
> +static unsigned int fpmr_visibility(const struct kvm_vcpu *vcpu,
> +				    const struct sys_reg_desc *rd)
> +{
> +	if (cpus_have_final_cap(ARM64_HAS_FPMR))
> +		return 0;

Same thing.

> +
> +	return REG_HIDDEN;
> +}
> +
>  /*
>   * Since reset() callback and field val are not used for idregs, they will be
>   * used for specific purposes for idregs.
> @@ -2463,6 +2472,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
>  	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
>  	{ SYS_DESC(SYS_SVCR), undef_access },
> +	{ SYS_DESC(SYS_FPMR), access_rw, reset_unknown, FPMR,
> +	  .visibility = fpmr_visibility },
>  
>  	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
>  	  .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
> 

Thanks,

	M.
Mark Brown Feb. 23, 2024, 2:46 p.m. UTC | #2
On Fri, Feb 23, 2024 at 11:18:51AM +0000, Marc Zyngier wrote:
> Mark Brown <broonie@kernel.org> wrote:

> >  #define HCRX_GUEST_FLAGS \
> > -	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
> > +	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM | \

> No. We don't do that anymore. This can only be enabled if the guest
> has it advertised via ID_AA64PFR2_EL1.FPMR.

Right, as mentioned in the cover letter (and previously discussed with
one of the other serieses) this needs a rework against your at the time
of posting still pending changes to parse the ID registers.  It looks
like everything is there for those now so I'll do that after the merge
window.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f45ce9170bb..b2ddb6165953 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -103,7 +103,7 @@ 
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 #define HCRX_GUEST_FLAGS \
-	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
+	(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM | \
 	 (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
 #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c4fdcc94d733..99c0f8944f04 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -384,6 +384,8 @@  enum vcpu_sysreg {
 	APGAKEYLO_EL1,
 	APGAKEYHI_EL1,
 
+	FPMR,
+
 	/* Memory Tagging Extension registers */
 	RGSR_EL1,	/* Random Allocation Tag Seed Register */
 	GCR_EL1,	/* Tag Control Register */
@@ -544,7 +546,6 @@  struct kvm_vcpu_arch {
 	enum fp_type fp_type;
 	unsigned int sve_max_vl;
 	u64 svcr;
-	unsigned long fpmr;
 
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 431fd429932d..3af5fd0e28dc 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -67,6 +67,8 @@  enum cgt_group_id {
 	CGT_HCR_TTLBIS,
 	CGT_HCR_TTLBOS,
 
+	CGT_HCRX_EnFPM,
+
 	CGT_MDCR_TPMCR,
 	CGT_MDCR_TPM,
 	CGT_MDCR_TDE,
@@ -279,6 +281,11 @@  static const struct trap_bits coarse_trap_bits[] = {
 		.mask		= HCR_TTLBOS,
 		.behaviour	= BEHAVE_FORWARD_ANY,
 	},
+	[CGT_HCRX_EnFPM] = {
+		.index		= HCRX_EL2,
+		.mask		= HCRX_EL2_EnFPM,
+		.behaviour	= BEHAVE_FORWARD_ANY,
+	},
 	[CGT_MDCR_TPMCR] = {
 		.index		= MDCR_EL2,
 		.value		= MDCR_EL2_TPMCR,
@@ -478,6 +485,7 @@  static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
 	SR_TRAP(SYS_AIDR_EL1,		CGT_HCR_TID1),
 	SR_TRAP(SYS_SMIDR_EL1,		CGT_HCR_TID1),
 	SR_TRAP(SYS_CTR_EL0,		CGT_HCR_TID2),
+	SR_TRAP(SYS_FPMR,		CGT_HCRX_EnFPM),
 	SR_TRAP(SYS_CCSIDR_EL1,		CGT_HCR_TID2_TID4),
 	SR_TRAP(SYS_CCSIDR2_EL1,	CGT_HCR_TID2_TID4),
 	SR_TRAP(SYS_CLIDR_EL1,		CGT_HCR_TID2_TID4),
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 6cf22cd8f020..9e002489c843 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -152,7 +152,7 @@  void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
 		fp_state.sve_vl = vcpu->arch.sve_max_vl;
 		fp_state.sme_state = NULL;
 		fp_state.svcr = &vcpu->arch.svcr;
-		fp_state.fpmr = &vcpu->arch.fpmr;
+		fp_state.fpmr = (unsigned long *)&__vcpu_sys_reg(vcpu, FPMR);
 		fp_state.fp_type = &vcpu->arch.fp_type;
 
 		if (vcpu_has_sve(vcpu))
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 27fcdfd432b9..abf785c473d0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -370,10 +370,15 @@  static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 	isb();
 
 	/* Write out the host state if it's in the registers */
-	if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
+	if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) {
 		__fpsimd_save_state(&(vcpu->arch.host_uw->fpsimd_state));
+		if (cpus_have_final_cap(ARM64_HAS_FPMR))
+			vcpu->arch.host_uw->fpmr = read_sysreg_s(SYS_FPMR);
+	}
 
 	/* Restore the guest state */
+	if (cpus_have_final_cap(ARM64_HAS_FPMR))
+		write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
 	if (sve_guest)
 		__hyp_sve_restore_guest(vcpu);
 	else
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 38503b1cd2eb..216eac44c124 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2067,6 +2067,15 @@  static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
 	.visibility = hidden_user_visibility,	\
 }
 
+static unsigned int fpmr_visibility(const struct kvm_vcpu *vcpu,
+				    const struct sys_reg_desc *rd)
+{
+	if (cpus_have_final_cap(ARM64_HAS_FPMR))
+		return 0;
+
+	return REG_HIDDEN;
+}
+
 /*
  * Since reset() callback and field val are not used for idregs, they will be
  * used for specific purposes for idregs.
@@ -2463,6 +2472,8 @@  static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
 	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
 	{ SYS_DESC(SYS_SVCR), undef_access },
+	{ SYS_DESC(SYS_FPMR), access_rw, reset_unknown, FPMR,
+	  .visibility = fpmr_visibility },
 
 	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
 	  .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },