@@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl,
- u64 *svcr);
+ unsigned int sme_vl, u64 *svcr);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK);
+}
+
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
@@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread)
{
- return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
+ unsigned int vl;
+
+ if (system_supports_sme() && thread_sm_enabled(thread))
+ vl = thread_get_sme_vl(thread);
+ else
+ vl = thread_get_sve_vl(thread);
+
+ return (char *)thread->sve_state + sve_ffr_offset(vl);
}
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
@@ -262,6 +262,17 @@
921:
.endm
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
/* Preserve the first 128-bits of Znz and zero the rest. */
.macro _sve_flush_z nz
_sve_check_zreg \nz
@@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
return thread_get_vl(thread, ARM64_VEC_SVE);
}
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl);
@@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
return task_get_vl(task, ARM64_VEC_SVE);
}
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SME);
+}
+
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
{
task_set_vl(task, ARM64_VEC_SVE, vl);
@@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl)
ret
SYM_FUNC_END(sme_get_vl)
+SYM_FUNC_START(sme_set_vq)
+ sme_load_vq x0, x1, x2
+ ret
+SYM_FUNC_END(sme_set_vq)
+
#endif /* CONFIG_ARM64_SME */
@@ -123,6 +123,7 @@ struct fpsimd_last_state_struct {
void *sve_state;
u64 *svcr;
unsigned int sve_vl;
+ unsigned int sme_vl;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -301,17 +302,28 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
task->thread.vl_onexec[type] = vl;
}
+/*
+ * TIF_SME controls whether a task can use SME without trapping while
+ * in userspace, when TIF_SME is set then we must have storage
+ * alocated in sve_state and za_state to store the contents of both ZA
+ * and the SVE registers for both streaming and non-streaming modes.
+ *
+ * If both SVCR.ZA and SVCR.SM are disabled then at any point we
+ * may disable TIF_SME and reenable traps.
+ */
+
+
/*
* TIF_SVE controls whether a task can use SVE without trapping while
- * in userspace, and also the way a task's FPSIMD/SVE state is stored
- * in thread_struct.
+ * in userspace, and also (together with TIF_SME) the way a task's
+ * FPSIMD/SVE state is stored in thread_struct.
*
* The kernel uses this flag to track whether a user task is actively
* using SVE, and therefore whether full SVE register state needs to
* be tracked. If not, the cheaper FPSIMD context handling code can
* be used instead of the more costly SVE equivalents.
*
- * * TIF_SVE set:
+ * * TIF_SVE or SVCR.SM set:
*
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
@@ -319,7 +331,8 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
* corresponding Zn), P0-P15 and FFR are encoded in in
* task->thread.sve_state, formatted appropriately for vector
- * length task->thread.sve_vl.
+ * length task->thread.sve_vl or, if SVCR.SM is set,
+ * task->thread.sme_vl.
*
* task->thread.sve_state must point to a valid buffer at least
* sve_state_size(task) bytes in size.
@@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
*/
static void task_fpsimd_load(void)
{
+ bool restore_sve_regs = false;
+ bool restore_ffr;
+
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
- if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME))
- write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
-
+ /* Check if we should restore SVE first */
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+ restore_sve_regs = true;
+ restore_ffr = true;
+ }
+
+ /* Restore SME, override SVE register configuration if needed */
+ if (system_supports_sme()) {
+ unsigned long sme_vl = task_get_sme_vl(current);
+
+ if (test_thread_flag(TIF_SME))
+ sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
+
+ write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
+
+ if (thread_sm_enabled(¤t->thread)) {
+ restore_sve_regs = true;
+ restore_ffr = system_supports_fa64();
+ }
+ }
+
+ if (restore_sve_regs)
sve_load_state(sve_pffr(¤t->thread),
- ¤t->thread.uw.fpsimd_state.fpsr, true);
- } else {
+ ¤t->thread.uw.fpsimd_state.fpsr,
+ restore_ffr);
+ else
fpsimd_load_state(¤t->thread.uw.fpsimd_state);
- }
}
/*
@@ -387,6 +421,9 @@ static void fpsimd_save(void)
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+ bool save_sve_regs = false;
+ bool save_ffr;
+ unsigned int vl;
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
@@ -394,15 +431,33 @@ static void fpsimd_save(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
- if (IS_ENABLED(CONFIG_ARM64_SME) &&
- test_thread_flag(TIF_SME)) {
+ if (test_thread_flag(TIF_SVE)) {
+ save_sve_regs = true;
+ save_ffr = true;
+ vl = last->sve_vl;
+ }
+
+ if (system_supports_sme()) {
u64 *svcr = last->svcr;
*svcr = read_sysreg_s(SYS_SVCR_EL0);
+
+ if (thread_za_enabled(¤t->thread)) {
+ /* ZA state managment is not implemented yet */
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+ return;
+ }
+
+ /* If we are in streaming mode override regular SVE. */
+ if (*svcr & SYS_SVCR_EL0_SM_MASK) {
+ save_sve_regs = true;
+ save_ffr = system_supports_fa64();
+ vl = last->sme_vl;
+ }
}
- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
- test_thread_flag(TIF_SVE)) {
- if (WARN_ON(sve_get_vl() != last->sve_vl)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
+ /* Get the configured VL from RDVL, will account for SM */
+ if (WARN_ON(sve_get_vl() != vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
@@ -413,8 +468,8 @@ static void fpsimd_save(void)
}
sve_save_state((char *)last->sve_state +
- sve_ffr_offset(last->sve_vl),
- &last->st->fpsr, true);
+ sve_ffr_offset(vl),
+ &last->st->fpsr, save_ffr);
} else {
fpsimd_save_state(last->st);
}
@@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task)
*/
static size_t sve_state_size(struct task_struct const *task)
{
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
+ unsigned int vl = 0;
+
+ if (system_supports_sve())
+ vl = task_get_sve_vl(task);
+ if (system_supports_sme())
+ vl = max(vl, task_get_sme_vl(task));
+
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}
/*
@@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
}
fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+ if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
+ thread_sm_enabled(&task->thread))
sve_to_fpsimd(task);
if (system_supports_sme() && type == ARM64_VEC_SME)
@@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}
+ if (system_supports_sme())
+ fpsimd_flush_thread_vl(ARM64_VEC_SME);
+
put_cpu_fpsimd_context();
}
@@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->st = ¤t->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
last->sve_vl = task_get_sve_vl(current);
+ last->sme_vl = task_get_sme_vl(current);
last->svcr = ¤t->thread.svcr;
current->thread.fpsimd_cpu = smp_processor_id();
@@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void)
}
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
- unsigned int sve_vl, u64 *svcr)
+ unsigned int sve_vl, unsigned int sme_vl,
+ u64 *svcr)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
last->svcr = svcr;
last->sve_state = sve_state;
last->sve_vl = sve_vl;
+ last->sme_vl = sme_vl;
}
/*
@@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
vcpu->arch.sve_max_vl,
- NULL);
+ 0, NULL);
clear_thread_flag(TIF_FOREIGN_FPSTATE);
update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));