Message ID | 20240625-arm64-gcs-v9-13-0f634469b8f0@kernel.org |
---|---|
State | New |
Headers | show |
Series | [v9,01/39] arm64/mm: Restructure arch_validate_flags() for extensibility | expand |
On Tue, 25 Jun 2024 15:57:41 +0100, Mark Brown <broonie@kernel.org> wrote: > > GCS introduces a number of system registers for EL1 and EL0, on systems > with GCS we need to context switch them and expose them to VMMs to allow > guests to use GCS, as well as describe their fine grained traps to > nested virtualisation. Traps are already disabled. I don't see anything related to FGTs at all. > > Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> > Signed-off-by: Mark Brown <broonie@kernel.org> > --- > arch/arm64/include/asm/kvm_host.h | 14 +++++++++ > arch/arm64/include/asm/vncr_mapping.h | 2 ++ > arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 48 +++++++++++++++++++++++------- > arch/arm64/kvm/sys_regs.c | 25 +++++++++++++++- > 4 files changed, 78 insertions(+), 11 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 36b8e97bf49e..316fb412f355 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -411,6 +411,10 @@ enum vcpu_sysreg { > GCR_EL1, /* Tag Control Register */ > TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ > > + /* Guarded Control Stack registers */ > + GCSCRE0_EL1, /* Guarded Control Stack Control (EL0) */ > + GCSPR_EL0, /* Guarded Control Stack Pointer (EL0) */ > + > /* 32bit specific registers. */ > DACR32_EL2, /* Domain Access Control Register */ > IFSR32_EL2, /* Instruction Fault Status Register */ > @@ -481,6 +485,10 @@ enum vcpu_sysreg { > VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ > VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ > > + /* Guarded Control Stack registers */ > + VNCR(GCSPR_EL1), /* Guarded Control Stack Pointer (EL1) */ > + VNCR(GCSCR_EL1), /* Guarded Control Stack Control (EL1) */ > + > VNCR(HFGRTR_EL2), > VNCR(HFGWTR_EL2), > VNCR(HFGITR_EL2), > @@ -1343,6 +1351,12 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) > > #define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) > > +static inline bool has_gcs(void) > +{ > + return IS_ENABLED(CONFIG_ARM64_GCS) && > + cpus_have_final_cap(ARM64_HAS_GCS); > +} This is mostly useless, see below. > + > int kvm_trng_call(struct kvm_vcpu *vcpu); > #ifdef CONFIG_KVM > extern phys_addr_t hyp_mem_base; > diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h > index df2c47c55972..5e83e6f579fd 100644 > --- a/arch/arm64/include/asm/vncr_mapping.h > +++ b/arch/arm64/include/asm/vncr_mapping.h > @@ -88,6 +88,8 @@ > #define VNCR_PMSIRR_EL1 0x840 > #define VNCR_PMSLATFR_EL1 0x848 > #define VNCR_TRFCR_EL1 0x880 > +#define VNCR_GCSPR_EL1 0x8C0 > +#define VNCR_GCSCR_EL1 0x8D0 > #define VNCR_MPAM1_EL1 0x900 > #define VNCR_MPAMHCR_EL2 0x930 > #define VNCR_MPAMVPMV_EL2 0x938 > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > index 4be6a7fa0070..b20212d80e9b 100644 > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h > @@ -16,6 +16,27 @@ > #include <asm/kvm_hyp.h> > #include <asm/kvm_mmu.h> > > +static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) > +{ > + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; > + > + if (!vcpu) > + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); > + > + return vcpu; > +} > + > +static inline bool ctxt_has_gcs(struct kvm_cpu_context *ctxt) > +{ > + struct kvm_vcpu *vcpu; > + > + if (!cpus_have_final_cap(ARM64_HAS_GCS)) > + return false; > + > + vcpu = ctxt_to_vcpu(ctxt); > + return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64PFR1_EL1, GCS, IMP); > +} > + > static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) > { > ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); > @@ -25,16 +46,8 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) > { > ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); > ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); > -} > - > -static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) > -{ > - struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; > - > - if (!vcpu) > - vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); > - > - return vcpu; > + if (ctxt_has_gcs(ctxt)) > + ctxt_sys_reg(ctxt, GCSPR_EL0) = read_sysreg_s(SYS_GCSPR_EL0); > } > > static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) > @@ -80,6 +93,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) > ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); > ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); > > + if (ctxt_has_gcs(ctxt)) { Since this is conditioned on S1PIE, it should be only be evaluated when PIE is enabled in the guest. > + ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR); > + ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR); > + ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1); Why is this part of the EL1 context? It clearly only matters to EL0 execution, so it could be switched in load/put on nVHE as well. And actually, given that the whole thing is strictly for userspace, why do we switch *anything* eagerly at all? > + } > + > if (ctxt_has_mte(ctxt)) { > ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); > ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); > @@ -113,6 +132,8 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) > { > write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); > write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); > + if (ctxt_has_gcs(ctxt)) > + write_sysreg_s(ctxt_sys_reg(ctxt, GCSPR_EL0), SYS_GCSPR_EL0); > } > > static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) > @@ -156,6 +177,13 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) > write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); > write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); > > + if (ctxt_has_gcs(ctxt)) { > + write_sysreg_el1(ctxt_sys_reg(ctxt, GCSPR_EL1), SYS_GCSPR); > + write_sysreg_el1(ctxt_sys_reg(ctxt, GCSCR_EL1), SYS_GCSCR); > + write_sysreg_s(ctxt_sys_reg(ctxt, GCSCRE0_EL1), > + SYS_GCSCRE0_EL1); > + } > + > if (ctxt_has_mte(ctxt)) { > write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); > write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 22b45a15d068..cf068dcfbd49 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -2015,6 +2015,23 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, > .visibility = mte_visibility, \ > } > > +static unsigned int gcs_visibility(const struct kvm_vcpu *vcpu, > + const struct sys_reg_desc *rd) > +{ > + if (has_gcs()) > + return 0; No. we've been here before. > + > + return REG_HIDDEN; > +} > + > +#define GCS_REG(name) { \ > + SYS_DESC(SYS_##name), \ > + .access = undef_access, \ > + .reset = reset_unknown, \ > + .reg = name, \ > + .visibility = gcs_visibility, \ > +} > + > static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, > const struct sys_reg_desc *rd) > { > @@ -2306,7 +2323,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { > ID_AA64PFR0_EL1_GIC | > ID_AA64PFR0_EL1_AdvSIMD | > ID_AA64PFR0_EL1_FP), }, > - ID_SANITISED(ID_AA64PFR1_EL1), > + ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_RES0 | > + ID_AA64PFR1_EL1_BT)), I don't know what you're trying to do here, but that's not right. If you want to make this register writable, here's the shopping list: https://lore.kernel.org/all/87ikxsi0v9.wl-maz@kernel.org/ > ID_UNALLOCATED(4,2), > ID_UNALLOCATED(4,3), > ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), > @@ -2390,6 +2408,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { > PTRAUTH_KEY(APDB), > PTRAUTH_KEY(APGA), > > + GCS_REG(GCSCR_EL1), > + GCS_REG(GCSPR_EL1), > + GCS_REG(GCSCRE0_EL1), > + > { SYS_DESC(SYS_SPSR_EL1), access_spsr}, > { SYS_DESC(SYS_ELR_EL1), access_elr}, > > @@ -2476,6 +2498,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { > { SYS_DESC(SYS_SMIDR_EL1), undef_access }, > { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, > { SYS_DESC(SYS_CTR_EL0), access_ctr }, > + GCS_REG(GCSPR_EL0), > { SYS_DESC(SYS_SVCR), undef_access }, > > { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, I don't see the vcpu's hcrx_el2 being updated to enable GCS. How does it work then? I also don't see the FGU updates when GCS is disabled, nor the corresponding FGT bits being marked as RES0. M.
On Wed, Jul 10, 2024 at 04:17:02PM +0100, Marc Zyngier wrote: > Mark Brown <broonie@kernel.org> wrote: > > + if (ctxt_has_gcs(ctxt)) { > Since this is conditioned on S1PIE, it should be only be evaluated > when PIE is enabled in the guest. So make ctxt_has_gcs() embed a check of ctxt_has_s1pie()? > > + ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR); > > + ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR); > > + ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1); > Why is this part of the EL1 context? It clearly only matters to EL0 > execution, so it could be switched in load/put on nVHE as well. And > actually, given that the whole thing is strictly for userspace, why do > we switch *anything* eagerly at all? GCS can also be used independently at EL1 (and EL2 for that matter), it's not purely for userspace even though this series only implements use in userspace. GCSPR_EL1 and GCSCR_EL1 control the use of GCS at EL1, not EL0, and the guest might be using GCS at EL1 even if the host doesn't. GCSCRE0_EL1 is for EL0 though, it ended up here mainly because it's an _EL1 register and we are already context switching PIRE0_EL1 in the EL1 functions so it seemed consistent to follow the same approach for GCS. The _el1 and _user save/restore functions are called from the same place for both VHE and nVHE so the practical impact of the placement should be minimal AFAICT. Unlike PIRE0_EL1 GCSCRE0_EL1 only has an impact for code runnning at EL0 so I can move it to the _user functions. TBH I'm not following your comments about switching eagerly too here at all, where would you expect to see the switching done? You've said something along these lines before which prompted me to send a patch to only save the S1PIE registers if they'd been written to which you were quite reasonably not happy with given the extra traps it would cause: https://lore.kernel.org/r/20240301-kvm-arm64-defer-regs-v1-1-401e3de92e97@kernel.org but I'm at a loss as to how to make things less eager otherwise. > > @@ -2306,7 +2323,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { > > ID_AA64PFR0_EL1_GIC | > > ID_AA64PFR0_EL1_AdvSIMD | > > ID_AA64PFR0_EL1_FP), }, > > - ID_SANITISED(ID_AA64PFR1_EL1), > > + ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_RES0 | > > + ID_AA64PFR1_EL1_BT)), > I don't know what you're trying to do here, but that's not right. If > you want to make this register writable, here's the shopping list: > https://lore.kernel.org/all/87ikxsi0v9.wl-maz@kernel.org/ Yes, trying to make things writable. I do see we need to exclude more bits there and I'm not clear why I excluded BTI, looks like I forgot to add a TODO comment at some point and finish that off. Sorry about that. In the linked mail you say you want to see all fields explicitly handled, could you be more direct about what such explicit handling would look like? I see a number of examples in the existing code like: ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_WRITABLE(ID_AA64ISAR0_EL1, ~ID_AA64ISAR0_EL1_RES0), ID_WRITABLE(ID_AA64ISAR1_EL1, ~(ID_AA64ISAR1_EL1_GPI | ID_AA64ISAR1_EL1_GPA | ID_AA64ISAR1_EL1_API | ID_AA64ISAR1_EL1_APA)), which look to my eye very similar to the above, they do not visibliy explictly enumerate every field in the registers and given that there's a single mask specified it's not clear how that would look. If ID_WRITABLE() took separate read/write masks and combined them it'd be more obvious but it's just not written that way.
On Wed, 10 Jul 2024 18:16:46 +0100, Mark Brown <broonie@kernel.org> wrote: > > [1 <text/plain; us-ascii (7bit)>] > On Wed, Jul 10, 2024 at 04:17:02PM +0100, Marc Zyngier wrote: > > Mark Brown <broonie@kernel.org> wrote: > > > > + if (ctxt_has_gcs(ctxt)) { > > > Since this is conditioned on S1PIE, it should be only be evaluated > > when PIE is enabled in the guest. > > So make ctxt_has_gcs() embed a check of ctxt_has_s1pie()? No. I mean nest the whole thing *under* the check for S1PIE. > > > > + ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR); > > > + ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR); > > > + ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1); > > > Why is this part of the EL1 context? It clearly only matters to EL0 > > execution, so it could be switched in load/put on nVHE as well. And > > actually, given that the whole thing is strictly for userspace, why do > > we switch *anything* eagerly at all? > > GCS can also be used independently at EL1 (and EL2 for that matter), > it's not purely for userspace even though this series only implements > use in userspace. GCSPR_EL1 and GCSCR_EL1 control the use of GCS at > EL1, not EL0, and the guest might be using GCS at EL1 even if the host > doesn't. > > GCSCRE0_EL1 is for EL0 though, it ended up here mainly because it's an > _EL1 register and we are already context switching PIRE0_EL1 in the EL1 > functions so it seemed consistent to follow the same approach for GCS. > The _el1 and _user save/restore functions are called from the same place > for both VHE and nVHE so the practical impact of the placement should be > minimal AFAICT. Unlike PIRE0_EL1 GCSCRE0_EL1 only has an impact for > code runnning at EL0 so I can move it to the _user functions. Exactly. That's where it belongs, because we never execute EL0 while a vcpu is loaded. On the contrary, we can make use of a uaccess helper while a vcpu is loaded, and that makes a hell of a difference. And it makes a difference because it would allow the loading of EL0-specific context differently. We had this at some point, and it was a reasonable optimisation that we lost. I'm keen on bringing it back. > > TBH I'm not following your comments about switching eagerly too here at > all, where would you expect to see the switching done? You've said > something along these lines before which prompted me to send a patch to > only save the S1PIE registers if they'd been written to which you were > quite reasonably not happy with given the extra traps it would cause: > > https://lore.kernel.org/r/20240301-kvm-arm64-defer-regs-v1-1-401e3de92e97@kernel.org > > but I'm at a loss as to how to make things less eager otherwise. > > > > @@ -2306,7 +2323,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { > > > ID_AA64PFR0_EL1_GIC | > > > ID_AA64PFR0_EL1_AdvSIMD | > > > ID_AA64PFR0_EL1_FP), }, > > > - ID_SANITISED(ID_AA64PFR1_EL1), > > > + ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_RES0 | > > > + ID_AA64PFR1_EL1_BT)), > > > I don't know what you're trying to do here, but that's not right. If > > you want to make this register writable, here's the shopping list: > > > https://lore.kernel.org/all/87ikxsi0v9.wl-maz@kernel.org/ > > Yes, trying to make things writable. I do see we need to exclude more > bits there and I'm not clear why I excluded BTI, looks like I forgot to > add a TODO comment at some point and finish that off. Sorry about that. > > In the linked mail you say you want to see all fields explicitly > handled, could you be more direct about what such explicit handling This emails enumerate, point after point, everything that needs to be done. I really cannot be clearer or more direct. This email is the clearer I can be, short of writing the code myself. And I have decided not to do it for once, unless I really need to. And as it turns out, I don't. > would look like? I see a number of examples in the existing code like: > > ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), This is clear: Everything is writable, and there are no bits here that are otherwise conditional or unsupported. > > ID_WRITABLE(ID_AA64ISAR0_EL1, ~ID_AA64ISAR0_EL1_RES0), Same thing. > ID_WRITABLE(ID_AA64ISAR1_EL1, ~(ID_AA64ISAR1_EL1_GPI | > ID_AA64ISAR1_EL1_GPA | > ID_AA64ISAR1_EL1_API | > ID_AA64ISAR1_EL1_APA)), This one needs fixing because of LS64, and I have an in-progress series for it. > which look to my eye very similar to the above, they do not visibliy > explictly enumerate every field in the registers and given that there's > a single mask specified it's not clear how that would look. If > ID_WRITABLE() took separate read/write masks and combined them it'd be > more obvious but it's just not written that way. I don't really see what it would buy us, but never mind. M.
On Wed, Jul 10, 2024 at 07:28:09PM +0100, Marc Zyngier wrote: > Mark Brown <broonie@kernel.org> wrote: > > On Wed, Jul 10, 2024 at 04:17:02PM +0100, Marc Zyngier wrote: > > > > + if (ctxt_has_gcs(ctxt)) { > > > Since this is conditioned on S1PIE, it should be only be evaluated > > > when PIE is enabled in the guest. > > So make ctxt_has_gcs() embed a check of ctxt_has_s1pie()? > No. I mean nest the whole thing *under* the check for S1PIE. OK, increasing the level of nesting. Got it. Does that just apply for the EL1 registers given that there's no _user S1PIE registers so no existing check there? Should we also be doing a similar thing for features that depend on TCR2 - currently that's just PIE but it'll grow? Probably only when we get more features rather than now since we don't currently check if the guest has TCR2, just the system. > > GCSCRE0_EL1 is for EL0 though, it ended up here mainly because it's an > > _EL1 register and we are already context switching PIRE0_EL1 in the EL1 > > functions so it seemed consistent to follow the same approach for GCS. > > The _el1 and _user save/restore functions are called from the same place > > for both VHE and nVHE so the practical impact of the placement should be > > minimal AFAICT. Unlike PIRE0_EL1 GCSCRE0_EL1 only has an impact for > > code runnning at EL0 so I can move it to the _user functions. > Exactly. That's where it belongs, because we never execute EL0 while a > vcpu is loaded. On the contrary, we can make use of a uaccess helper > while a vcpu is loaded, and that makes a hell of a difference. OK, to be clear here "it" is GCSCRE0_EL1, not GCSPR_EL1 and GCSCR_EL1 which are for EL1? > And it makes a difference because it would allow the loading of > EL0-specific context differently. We had this at some point, and it > was a reasonable optimisation that we lost. I'm keen on bringing it > back. Ah, that'd be good - not only for the optimistation but also since at the minute it's a bit unclear why there are separate EL0/1 functions. > > > you want to make this register writable, here's the shopping list: > > > https://lore.kernel.org/all/87ikxsi0v9.wl-maz@kernel.org/ > > In the linked mail you say you want to see all fields explicitly > > handled, could you be more direct about what such explicit handling > This emails enumerate, point after point, everything that needs to be > done. I really cannot be clearer or more direct. This email is the > clearer I can be, short of writing the code myself. And I have decided > not to do it for once, unless I really need to. And as it turns out, I > don't. See below, to be clear the only bit I was querying here was: | - you *must* handle *all* the fields described in that register. There | are 15 valid fields there, and I want to see all 15 fields being | explicitly dealt with. TBH it'd probably good to have that whole list in the kernel somewhere. > > would look like? I see a number of examples in the existing code like: > > ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), > This is clear: Everything is writable, and there are no bits here that > are otherwise conditional or unsupported. Ah, I think I see. I would not have interpreted this as making everything explicit, to me this makes all the writeable fields writeable implicitly through them just not being mentioned. For everything to be explicit I would expect to see a direct, visible reference in the code to every single field rather than something like we have here where some of the fields are not mentioned directly. The end result is an explicit value but that's true for any use of ID_WRITABLE(). If my understanding is correct then were I writing the bit I quoted above I'd probably just drop the "explicitly" from that bullet point due to the handling of simple writable fields with ID_WRITABLE(), the key point being that every field needs to be handled with the other points enumerating the specific options for how each field might be handled. Does my understanding sound correct? > > which look to my eye very similar to the above, they do not visibliy > > explictly enumerate every field in the registers and given that there's > > a single mask specified it's not clear how that would look. If > > ID_WRITABLE() took separate read/write masks and combined them it'd be > > more obvious but it's just not written that way. > I don't really see what it would buy us, but never mind. That was me trying to reconcile my understanding of you asking to make everything explicit with the code as it is. I suppose the advantage would be documentation.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 36b8e97bf49e..316fb412f355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -411,6 +411,10 @@ enum vcpu_sysreg { GCR_EL1, /* Tag Control Register */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* Guarded Control Stack registers */ + GCSCRE0_EL1, /* Guarded Control Stack Control (EL0) */ + GCSPR_EL0, /* Guarded Control Stack Pointer (EL0) */ + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -481,6 +485,10 @@ enum vcpu_sysreg { VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + /* Guarded Control Stack registers */ + VNCR(GCSPR_EL1), /* Guarded Control Stack Pointer (EL1) */ + VNCR(GCSCR_EL1), /* Guarded Control Stack Control (EL1) */ + VNCR(HFGRTR_EL2), VNCR(HFGWTR_EL2), VNCR(HFGITR_EL2), @@ -1343,6 +1351,12 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) #define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) +static inline bool has_gcs(void) +{ + return IS_ENABLED(CONFIG_ARM64_GCS) && + cpus_have_final_cap(ARM64_HAS_GCS); +} + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index df2c47c55972..5e83e6f579fd 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -88,6 +88,8 @@ #define VNCR_PMSIRR_EL1 0x840 #define VNCR_PMSLATFR_EL1 0x848 #define VNCR_TRFCR_EL1 0x880 +#define VNCR_GCSPR_EL1 0x8C0 +#define VNCR_GCSCR_EL1 0x8D0 #define VNCR_MPAM1_EL1 0x900 #define VNCR_MPAMHCR_EL2 0x930 #define VNCR_MPAMVPMV_EL2 0x938 diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4be6a7fa0070..b20212d80e9b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -16,6 +16,27 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return vcpu; +} + +static inline bool ctxt_has_gcs(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_GCS)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64PFR1_EL1, GCS, IMP); +} + static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); @@ -25,16 +46,8 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); -} - -static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) -{ - struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; - - if (!vcpu) - vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); - - return vcpu; + if (ctxt_has_gcs(ctxt)) + ctxt_sys_reg(ctxt, GCSPR_EL0) = read_sysreg_s(SYS_GCSPR_EL0); } static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) @@ -80,6 +93,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + if (ctxt_has_gcs(ctxt)) { + ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR); + ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR); + ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1); + } + if (ctxt_has_mte(ctxt)) { ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); @@ -113,6 +132,8 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); + if (ctxt_has_gcs(ctxt)) + write_sysreg_s(ctxt_sys_reg(ctxt, GCSPR_EL0), SYS_GCSPR_EL0); } static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) @@ -156,6 +177,13 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (ctxt_has_gcs(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, GCSPR_EL1), SYS_GCSPR); + write_sysreg_el1(ctxt_sys_reg(ctxt, GCSCR_EL1), SYS_GCSCR); + write_sysreg_s(ctxt_sys_reg(ctxt, GCSCRE0_EL1), + SYS_GCSCRE0_EL1); + } + if (ctxt_has_mte(ctxt)) { write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22b45a15d068..cf068dcfbd49 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2015,6 +2015,23 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = mte_visibility, \ } +static unsigned int gcs_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (has_gcs()) + return 0; + + return REG_HIDDEN; +} + +#define GCS_REG(name) { \ + SYS_DESC(SYS_##name), \ + .access = undef_access, \ + .reset = reset_unknown, \ + .reg = name, \ + .visibility = gcs_visibility, \ +} + static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -2306,7 +2323,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR0_EL1_GIC | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP), }, - ID_SANITISED(ID_AA64PFR1_EL1), + ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_RES0 | + ID_AA64PFR1_EL1_BT)), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), @@ -2390,6 +2408,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { PTRAUTH_KEY(APDB), PTRAUTH_KEY(APGA), + GCS_REG(GCSCR_EL1), + GCS_REG(GCSPR_EL1), + GCS_REG(GCSCRE0_EL1), + { SYS_DESC(SYS_SPSR_EL1), access_spsr}, { SYS_DESC(SYS_ELR_EL1), access_elr}, @@ -2476,6 +2498,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, + GCS_REG(GCSPR_EL0), { SYS_DESC(SYS_SVCR), undef_access }, { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,