diff mbox series

irq_work: Migrate the remaining work of the offline CPU on PREEMPT_RT

Message ID 20220925104111.1454100-1-qiang1.zhang@intel.com
State New
Headers show
Series irq_work: Migrate the remaining work of the offline CPU on PREEMPT_RT | expand

Commit Message

Zhang, Qiang1 Sept. 25, 2022, 10:41 a.m. UTC
When CPU goes offline and invoke CPU_DYING callbacks, the
smpcfd_dying_cpu() be invoked to flush the remaining irq_work of the
offline CPU, for lazy irq_work, will wakeup per-CPU irq_work kthreads
to invoke callbacks, but the irq_work kthreads are in TASK_PARKED
state, will not get the actual wakeup, resulting in the remaining lazy
irq_work not being executed.

This commit requeue remaining of lazy irq_work to online CPU.

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 include/linux/irq_work.h |  1 +
 kernel/cpu.c             |  1 +
 kernel/irq_work.c        | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+)

Comments

Sebastian Andrzej Siewior Oct. 4, 2022, 4:47 p.m. UTC | #1
On 2022-09-25 18:41:11 [+0800], Zqiang wrote:
> When CPU goes offline and invoke CPU_DYING callbacks, the
> smpcfd_dying_cpu() be invoked to flush the remaining irq_work of the
> offline CPU, for lazy irq_work, will wakeup per-CPU irq_work kthreads
> to invoke callbacks, but the irq_work kthreads are in TASK_PARKED
> state, will not get the actual wakeup, resulting in the remaining lazy
> irq_work not being executed.
> 
> This commit requeue remaining of lazy irq_work to online CPU.

At CPU_DYING the CPU runs with disabled interrupts so we can't invoke
the remaining callbacks directly. Migrating them to another CPU will
solve the problem but callbacks enqueued with irq_work_queue_on() will
then be invoked on the wrong CPU.

Is there a specific irq_work item that made you aware of this?

> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  include/linux/irq_work.h |  1 +
>  kernel/cpu.c             |  1 +
>  kernel/irq_work.c        | 20 ++++++++++++++++++++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
> index 8cd11a223260..900d9053a62d 100644
> --- a/include/linux/irq_work.h
> +++ b/include/linux/irq_work.h
> @@ -66,6 +66,7 @@ void irq_work_sync(struct irq_work *work);
>  void irq_work_run(void);
>  bool irq_work_needs_cpu(void);
>  void irq_work_single(void *arg);
> +void irq_work_migrate(int cpu);
>  #else
>  static inline bool irq_work_needs_cpu(void) { return false; }
>  static inline void irq_work_run(void) { }
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index bbad5e375d3b..70461b6d785d 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -1075,6 +1075,7 @@ static int takedown_cpu(unsigned int cpu)
>  
>  	tick_cleanup_dead_cpu(cpu);
>  	rcutree_migrate_callbacks(cpu);
> +	irq_work_migrate(cpu);
>  	return 0;
>  }
>  
> diff --git a/kernel/irq_work.c b/kernel/irq_work.c
> index 7afa40fe5cc4..bfa21468c2df 100644
> --- a/kernel/irq_work.c
> +++ b/kernel/irq_work.c
> @@ -242,6 +242,26 @@ static void irq_work_run_list(struct llist_head *list)
>  		irq_work_single(work);
>  }
>  
> +void irq_work_migrate(int cpu)
> +{
> +	struct irq_work *work, *tmp;
> +	struct llist_node *llnode;
> +	struct llist_head *list = per_cpu_ptr(&lazy_list, cpu);
> +
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> +		return;
> +
> +	if (llist_empty(list))
> +		return;
> +
> +	llnode = llist_del_all(list);
> +	llist_for_each_entry_safe(work, tmp, llnode, node.llist) {
> +		atomic_set(&work->node.a_flags, 0);
> +		irq_work_queue(work);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(irq_work_migrate);
> +
>  /*
>   * hotplug calls this through:
>   *  hotplug_cfd() -> flush_smp_call_function_queue()
> -- 
> 2.25.1

Sebastian
Zhang, Qiang1 Oct. 12, 2022, 2:12 p.m. UTC | #2
On 2022-09-25 18:41:11 [+0800], Zqiang wrote:
> When CPU goes offline and invoke CPU_DYING callbacks, the
> smpcfd_dying_cpu() be invoked to flush the remaining irq_work of the
> offline CPU, for lazy irq_work, will wakeup per-CPU irq_work kthreads
> to invoke callbacks, but the irq_work kthreads are in TASK_PARKED
> state, will not get the actual wakeup, resulting in the remaining lazy
> irq_work not being executed.
> 
> This commit requeue remaining of lazy irq_work to online CPU.
>
>At CPU_DYING the CPU runs with disabled interrupts so we can't invoke
>the remaining callbacks directly. Migrating them to another CPU will
>solve the problem but callbacks enqueued with irq_work_queue_on() will
>then be invoked on the wrong CPU.

Yes, the remaining callbacks will invoke on the wrong CPU,  but if the remaining
callback  is to do a wake-up operation, this migration is required in order for the
callback to execute, otherwise it may cause some task hang 

Just like the ! WQ_UNBOUND workqueue's processing of cpuhotpulg, the work that queues to execute
on the specified CPU will also be executed on the wrong CPU after the CPU goes offline.

I haven't thought of a better way to deal with it, do you have a better way to deal with it?
Looking forward to your reply

Thanks
Zqiang

>
>Is there a specific irq_work item that made you aware of this?
>
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  include/linux/irq_work.h |  1 +
>  kernel/cpu.c             |  1 +
>  kernel/irq_work.c        | 20 ++++++++++++++++++++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
> index 8cd11a223260..900d9053a62d 100644
> --- a/include/linux/irq_work.h
> +++ b/include/linux/irq_work.h
> @@ -66,6 +66,7 @@ void irq_work_sync(struct irq_work *work);
>  void irq_work_run(void);
>  bool irq_work_needs_cpu(void);
>  void irq_work_single(void *arg);
> +void irq_work_migrate(int cpu);
>  #else
>  static inline bool irq_work_needs_cpu(void) { return false; }
>  static inline void irq_work_run(void) { }
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index bbad5e375d3b..70461b6d785d 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -1075,6 +1075,7 @@ static int takedown_cpu(unsigned int cpu)
>  
>  	tick_cleanup_dead_cpu(cpu);
>  	rcutree_migrate_callbacks(cpu);
> +	irq_work_migrate(cpu);
>  	return 0;
>  }
>  
> diff --git a/kernel/irq_work.c b/kernel/irq_work.c
> index 7afa40fe5cc4..bfa21468c2df 100644
> --- a/kernel/irq_work.c
> +++ b/kernel/irq_work.c
> @@ -242,6 +242,26 @@ static void irq_work_run_list(struct llist_head *list)
>  		irq_work_single(work);
>  }
>  
> +void irq_work_migrate(int cpu)
> +{
> +	struct irq_work *work, *tmp;
> +	struct llist_node *llnode;
> +	struct llist_head *list = per_cpu_ptr(&lazy_list, cpu);
> +
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> +		return;
> +
> +	if (llist_empty(list))
> +		return;
> +
> +	llnode = llist_del_all(list);
> +	llist_for_each_entry_safe(work, tmp, llnode, node.llist) {
> +		atomic_set(&work->node.a_flags, 0);
> +		irq_work_queue(work);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(irq_work_migrate);
> +
>  /*
>   * hotplug calls this through:
>   *  hotplug_cfd() -> flush_smp_call_function_queue()
> -- 
> 2.25.1

Sebastian
diff mbox series

Patch

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 8cd11a223260..900d9053a62d 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -66,6 +66,7 @@  void irq_work_sync(struct irq_work *work);
 void irq_work_run(void);
 bool irq_work_needs_cpu(void);
 void irq_work_single(void *arg);
+void irq_work_migrate(int cpu);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
 static inline void irq_work_run(void) { }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bbad5e375d3b..70461b6d785d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1075,6 +1075,7 @@  static int takedown_cpu(unsigned int cpu)
 
 	tick_cleanup_dead_cpu(cpu);
 	rcutree_migrate_callbacks(cpu);
+	irq_work_migrate(cpu);
 	return 0;
 }
 
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 7afa40fe5cc4..bfa21468c2df 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -242,6 +242,26 @@  static void irq_work_run_list(struct llist_head *list)
 		irq_work_single(work);
 }
 
+void irq_work_migrate(int cpu)
+{
+	struct irq_work *work, *tmp;
+	struct llist_node *llnode;
+	struct llist_head *list = per_cpu_ptr(&lazy_list, cpu);
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		return;
+
+	if (llist_empty(list))
+		return;
+
+	llnode = llist_del_all(list);
+	llist_for_each_entry_safe(work, tmp, llnode, node.llist) {
+		atomic_set(&work->node.a_flags, 0);
+		irq_work_queue(work);
+	}
+}
+EXPORT_SYMBOL_GPL(irq_work_migrate);
+
 /*
  * hotplug calls this through:
  *  hotplug_cfd() -> flush_smp_call_function_queue()