diff mbox series

[2/5] rcu/nocb: Perform deferred wake up before last idle's need_resched() check

Message ID 20210131230548.32970-3-frederic@kernel.org
State New
Headers show
Series rcu/sched: Fix ignored rescheduling after rcu_eqs_enter() v4 | expand

Commit Message

Frederic Weisbecker Jan. 31, 2021, 11:05 p.m. UTC
Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
kthread (rcuog) to be serviced.

Usually a local wake up happening while running the idle task is handled
in one of the need_resched() checks carefully placed within the idle
loop that can break to the scheduler.

Unfortunately the call to rcu_idle_enter() is already beyond the last
generic need_resched() check and we may halt the CPU with a resched
request unhandled, leaving the task hanging.

Fix this with splitting the rcuog wakeup handling from rcu_idle_enter()
and place it before the last generic need_resched() check in the idle
loop. It is then assumed that no call to call_rcu() will be performed
after that in the idle loop until the CPU is put in low power mode.

Reported-by: Paul E. McKenney <paulmck@kernel.org>
Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf)
Cc: stable@vger.kernel.org
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/rcupdate.h | 2 ++
 kernel/rcu/tree.c        | 3 ---
 kernel/rcu/tree_plugin.h | 5 +++++
 kernel/sched/idle.c      | 3 +++
 4 files changed, 10 insertions(+), 3 deletions(-)

Comments

Peter Zijlstra Feb. 8, 2021, 2:45 p.m. UTC | #1
On Mon, Feb 01, 2021 at 12:05:45AM +0100, Frederic Weisbecker wrote:

> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c

> index 305727ea0677..b601a3aa2152 100644

> --- a/kernel/sched/idle.c

> +++ b/kernel/sched/idle.c

> @@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup);

>  static noinline int __cpuidle cpu_idle_poll(void)

>  {

>  	trace_cpu_idle(0, smp_processor_id());

> +	rcu_nocb_flush_deferred_wakeup();

>  	stop_critical_timings();

>  	rcu_idle_enter();

>  	local_irq_enable();

> @@ -173,6 +174,8 @@ static void cpuidle_idle_call(void)

>  	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);

>  	int next_state, entered_state;

>  

> +	rcu_nocb_flush_deferred_wakeup();

> +

>  	/*

>  	 * Check if the idle task must be rescheduled. If it is the

>  	 * case, exit the function after re-enabling the local irq.


Ok if I do this instead?

--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -55,7 +55,6 @@ __setup("hlt", cpu_idle_nopoll_setup);
 static noinline int __cpuidle cpu_idle_poll(void)
 {
 	trace_cpu_idle(0, smp_processor_id());
-	rcu_nocb_flush_deferred_wakeup();
 	stop_critical_timings();
 	rcu_idle_enter();
 	local_irq_enable();
@@ -174,8 +173,6 @@ static void cpuidle_idle_call(void)
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int next_state, entered_state;
 
-	rcu_nocb_flush_deferred_wakeup();
-
 	/*
 	 * Check if the idle task must be rescheduled. If it is the
 	 * case, exit the function after re-enabling the local irq.
@@ -288,6 +285,7 @@ static void do_idle(void)
 		}
 
 		arch_cpu_idle_enter();
+		rcu_nocb_flush_deferred_wakeup();
 
 		/*
 		 * In poll mode we reenable interrupts and spin. Also if we
Frederic Weisbecker Feb. 8, 2021, 2:53 p.m. UTC | #2
On Mon, Feb 08, 2021 at 03:45:50PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 01, 2021 at 12:05:45AM +0100, Frederic Weisbecker wrote:

> 

> > diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c

> > index 305727ea0677..b601a3aa2152 100644

> > --- a/kernel/sched/idle.c

> > +++ b/kernel/sched/idle.c

> > @@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup);

> >  static noinline int __cpuidle cpu_idle_poll(void)

> >  {

> >  	trace_cpu_idle(0, smp_processor_id());

> > +	rcu_nocb_flush_deferred_wakeup();

> >  	stop_critical_timings();

> >  	rcu_idle_enter();

> >  	local_irq_enable();

> > @@ -173,6 +174,8 @@ static void cpuidle_idle_call(void)

> >  	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);

> >  	int next_state, entered_state;

> >  

> > +	rcu_nocb_flush_deferred_wakeup();

> > +

> >  	/*

> >  	 * Check if the idle task must be rescheduled. If it is the

> >  	 * case, exit the function after re-enabling the local irq.

> 

> Ok if I do this instead?

> 

> --- a/kernel/sched/idle.c

> +++ b/kernel/sched/idle.c

> @@ -55,7 +55,6 @@ __setup("hlt", cpu_idle_nopoll_setup);

>  static noinline int __cpuidle cpu_idle_poll(void)

>  {

>  	trace_cpu_idle(0, smp_processor_id());

> -	rcu_nocb_flush_deferred_wakeup();

>  	stop_critical_timings();

>  	rcu_idle_enter();

>  	local_irq_enable();

> @@ -174,8 +173,6 @@ static void cpuidle_idle_call(void)

>  	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);

>  	int next_state, entered_state;

>  

> -	rcu_nocb_flush_deferred_wakeup();

> -

>  	/*

>  	 * Check if the idle task must be rescheduled. If it is the

>  	 * case, exit the function after re-enabling the local irq.

> @@ -288,6 +285,7 @@ static void do_idle(void)

>  		}

>  

>  		arch_cpu_idle_enter();

> +		rcu_nocb_flush_deferred_wakeup();

>  

>  		/*

>  		 * In poll mode we reenable interrupts and spin. Also if we


Right, I think that should work. Nothing should call_rcu() before the
need_resched() call. And if it does, we still have the nocb_timer to do
the deferred wakeup in the worst case.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fd02c5fa60cb..36c2119de702 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -110,8 +110,10 @@  static inline void rcu_user_exit(void) { }
 
 #ifdef CONFIG_RCU_NOCB_CPU
 void rcu_init_nohz(void);
+void rcu_nocb_flush_deferred_wakeup(void);
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 static inline void rcu_init_nohz(void) { }
+static inline void rcu_nocb_flush_deferred_wakeup(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /**
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 63032e5620b9..82838e93b498 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -671,10 +671,7 @@  static noinstr void rcu_eqs_enter(bool user)
  */
 void rcu_idle_enter(void)
 {
-	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-
 	lockdep_assert_irqs_disabled();
-	do_nocb_deferred_wakeup(rdp);
 	rcu_eqs_enter(false);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7e291ce0a1d6..d5b38c28abd1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2187,6 +2187,11 @@  static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 		do_nocb_deferred_wakeup_common(rdp);
 }
 
+void rcu_nocb_flush_deferred_wakeup(void)
+{
+	do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
+}
+
 void __init rcu_init_nohz(void)
 {
 	int cpu;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 305727ea0677..b601a3aa2152 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -55,6 +55,7 @@  __setup("hlt", cpu_idle_nopoll_setup);
 static noinline int __cpuidle cpu_idle_poll(void)
 {
 	trace_cpu_idle(0, smp_processor_id());
+	rcu_nocb_flush_deferred_wakeup();
 	stop_critical_timings();
 	rcu_idle_enter();
 	local_irq_enable();
@@ -173,6 +174,8 @@  static void cpuidle_idle_call(void)
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int next_state, entered_state;
 
+	rcu_nocb_flush_deferred_wakeup();
+
 	/*
 	 * Check if the idle task must be rescheduled. If it is the
 	 * case, exit the function after re-enabling the local irq.