diff mbox series

[v3,3/5] arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD

Message ID 20231127122259.2265164-10-ardb@google.com
State Superseded
Headers show
Series arm64: Run kernel mode NEON with preemption enabled | expand

Commit Message

Ard Biesheuvel Nov. 27, 2023, 12:23 p.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

Now that kernel mode FPSIMD state is context switched along with other
task state, we can enable the existing logic that keeps track of which
task's FPSIMD state the CPU is holding in its registers. If it is the
context of the task that we are switching to, we can elide the reload of
the FPSIMD state from memory.

Note that we also need to check whether the FPSIMD state on this CPU is
the most recent: if a task gets migrated away and back again, the state
in memory may be more recent than the state in the CPU. So add another
CPU id field to task_struct to keep track of this. (We could reuse the
existing CPU id field used for user mode context, but that might result
in user state to be discarded unnecessarily, given that two distinct
CPUs could be holding the most recent user mode state and the most
recent kernel mode state)

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
---
 arch/arm64/include/asm/processor.h |  1 +
 arch/arm64/kernel/fpsimd.c         | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

Comments

Mark Rutland Nov. 27, 2023, 1:32 p.m. UTC | #1
On Mon, Nov 27, 2023 at 01:23:03PM +0100, Ard Biesheuvel wrote:
> From: Ard Biesheuvel <ardb@kernel.org>
> 
> Now that kernel mode FPSIMD state is context switched along with other
> task state, we can enable the existing logic that keeps track of which
> task's FPSIMD state the CPU is holding in its registers. If it is the
> context of the task that we are switching to, we can elide the reload of
> the FPSIMD state from memory.
> 
> Note that we also need to check whether the FPSIMD state on this CPU is
> the most recent: if a task gets migrated away and back again, the state
> in memory may be more recent than the state in the CPU. So add another
> CPU id field to task_struct to keep track of this. (We could reuse the
> existing CPU id field used for user mode context, but that might result
> in user state to be discarded unnecessarily, given that two distinct
> CPUs could be holding the most recent user mode state and the most
> recent kernel mode state)
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> Reviewed-by: Mark Brown <broonie@kernel.org>
> ---
>  arch/arm64/include/asm/processor.h |  1 +
>  arch/arm64/kernel/fpsimd.c         | 18 ++++++++++++++++++
>  2 files changed, 19 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index dcb51c0571af..332f15d0abcf 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -169,6 +169,7 @@ struct thread_struct {
>  	struct debug_info	debug;		/* debugging */
>  
>  	struct user_fpsimd_state	kmode_fpsimd_state;
> +	unsigned int			kmode_fpsimd_cpu;
>  #ifdef CONFIG_ARM64_PTR_AUTH
>  	struct ptrauth_keys_user	keys_user;
>  #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 198918805bf6..112111a078b6 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1476,12 +1476,30 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
>  
>  static void fpsimd_load_kernel_state(struct task_struct *task)
>  {
> +	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
> +
> +	/*
> +	 * Elide the load if this CPU holds the most recent kernel mode
> +	 * FPSIMD context of the current task.
> +	 */
> +	if (last->st == &task->thread.kmode_fpsimd_state &&
> +	    task->thread.kmode_fpsimd_cpu == smp_processor_id())
> +		return;
> +
>  	fpsimd_load_state(&task->thread.kmode_fpsimd_state);
>  }
>  
>  static void fpsimd_save_kernel_state(struct task_struct *task)
>  {
> +	struct cpu_fp_state cpu_fp_state = {
> +		.st		= &task->thread.kmode_fpsimd_state,
> +		.to_save	= FP_STATE_FPSIMD,
> +	};
> +
>  	fpsimd_save_state(&task->thread.kmode_fpsimd_state);
> +	fpsimd_bind_state_to_cpu(&cpu_fp_state);
> +
> +	task->thread.kmode_fpsimd_cpu = smp_processor_id();
>  }

I was a little worried tha we might be missing a change to
fpsimd_cpu_pm_notifier() to handle contesxt-destructive idle states correctly,
but since that clears the fpsimd_last_state variable already, that should do
the right thing as-is.

Acked-by: Mark Rutland <mark.rutland@arm.com>

Mark.

>  
>  void fpsimd_thread_switch(struct task_struct *next)
> -- 
> 2.43.0.rc1.413.gea7ed67945-goog
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index dcb51c0571af..332f15d0abcf 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -169,6 +169,7 @@  struct thread_struct {
 	struct debug_info	debug;		/* debugging */
 
 	struct user_fpsimd_state	kmode_fpsimd_state;
+	unsigned int			kmode_fpsimd_cpu;
 #ifdef CONFIG_ARM64_PTR_AUTH
 	struct ptrauth_keys_user	keys_user;
 #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 198918805bf6..112111a078b6 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1476,12 +1476,30 @@  void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
 
 static void fpsimd_load_kernel_state(struct task_struct *task)
 {
+	struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+	/*
+	 * Elide the load if this CPU holds the most recent kernel mode
+	 * FPSIMD context of the current task.
+	 */
+	if (last->st == &task->thread.kmode_fpsimd_state &&
+	    task->thread.kmode_fpsimd_cpu == smp_processor_id())
+		return;
+
 	fpsimd_load_state(&task->thread.kmode_fpsimd_state);
 }
 
 static void fpsimd_save_kernel_state(struct task_struct *task)
 {
+	struct cpu_fp_state cpu_fp_state = {
+		.st		= &task->thread.kmode_fpsimd_state,
+		.to_save	= FP_STATE_FPSIMD,
+	};
+
 	fpsimd_save_state(&task->thread.kmode_fpsimd_state);
+	fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+	task->thread.kmode_fpsimd_cpu = smp_processor_id();
 }
 
 void fpsimd_thread_switch(struct task_struct *next)