Message ID | 20210131230548.32970-3-frederic@kernel.org |
---|---|
State | New |
Headers | show |
Series | rcu/sched: Fix ignored rescheduling after rcu_eqs_enter() v4 | expand |
On Mon, Feb 01, 2021 at 12:05:45AM +0100, Frederic Weisbecker wrote: > diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c > index 305727ea0677..b601a3aa2152 100644 > --- a/kernel/sched/idle.c > +++ b/kernel/sched/idle.c > @@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup); > static noinline int __cpuidle cpu_idle_poll(void) > { > trace_cpu_idle(0, smp_processor_id()); > + rcu_nocb_flush_deferred_wakeup(); > stop_critical_timings(); > rcu_idle_enter(); > local_irq_enable(); > @@ -173,6 +174,8 @@ static void cpuidle_idle_call(void) > struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); > int next_state, entered_state; > > + rcu_nocb_flush_deferred_wakeup(); > + > /* > * Check if the idle task must be rescheduled. If it is the > * case, exit the function after re-enabling the local irq. Ok if I do this instead? --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -55,7 +55,6 @@ __setup("hlt", cpu_idle_nopoll_setup); static noinline int __cpuidle cpu_idle_poll(void) { trace_cpu_idle(0, smp_processor_id()); - rcu_nocb_flush_deferred_wakeup(); stop_critical_timings(); rcu_idle_enter(); local_irq_enable(); @@ -174,8 +173,6 @@ static void cpuidle_idle_call(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - rcu_nocb_flush_deferred_wakeup(); - /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq. @@ -288,6 +285,7 @@ static void do_idle(void) } arch_cpu_idle_enter(); + rcu_nocb_flush_deferred_wakeup(); /* * In poll mode we reenable interrupts and spin. Also if we
On Mon, Feb 08, 2021 at 03:45:50PM +0100, Peter Zijlstra wrote: > On Mon, Feb 01, 2021 at 12:05:45AM +0100, Frederic Weisbecker wrote: > > > diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c > > index 305727ea0677..b601a3aa2152 100644 > > --- a/kernel/sched/idle.c > > +++ b/kernel/sched/idle.c > > @@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup); > > static noinline int __cpuidle cpu_idle_poll(void) > > { > > trace_cpu_idle(0, smp_processor_id()); > > + rcu_nocb_flush_deferred_wakeup(); > > stop_critical_timings(); > > rcu_idle_enter(); > > local_irq_enable(); > > @@ -173,6 +174,8 @@ static void cpuidle_idle_call(void) > > struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); > > int next_state, entered_state; > > > > + rcu_nocb_flush_deferred_wakeup(); > > + > > /* > > * Check if the idle task must be rescheduled. If it is the > > * case, exit the function after re-enabling the local irq. > > Ok if I do this instead? > > --- a/kernel/sched/idle.c > +++ b/kernel/sched/idle.c > @@ -55,7 +55,6 @@ __setup("hlt", cpu_idle_nopoll_setup); > static noinline int __cpuidle cpu_idle_poll(void) > { > trace_cpu_idle(0, smp_processor_id()); > - rcu_nocb_flush_deferred_wakeup(); > stop_critical_timings(); > rcu_idle_enter(); > local_irq_enable(); > @@ -174,8 +173,6 @@ static void cpuidle_idle_call(void) > struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); > int next_state, entered_state; > > - rcu_nocb_flush_deferred_wakeup(); > - > /* > * Check if the idle task must be rescheduled. If it is the > * case, exit the function after re-enabling the local irq. > @@ -288,6 +285,7 @@ static void do_idle(void) > } > > arch_cpu_idle_enter(); > + rcu_nocb_flush_deferred_wakeup(); > > /* > * In poll mode we reenable interrupts and spin. Also if we Right, I think that should work. Nothing should call_rcu() before the need_resched() call. And if it does, we still have the nocb_timer to do the deferred wakeup in the worst case. Thanks.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fd02c5fa60cb..36c2119de702 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -110,8 +110,10 @@ static inline void rcu_user_exit(void) { } #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); +void rcu_nocb_flush_deferred_wakeup(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static inline void rcu_init_nohz(void) { } +static inline void rcu_nocb_flush_deferred_wakeup(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 63032e5620b9..82838e93b498 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -671,10 +671,7 @@ static noinstr void rcu_eqs_enter(bool user) */ void rcu_idle_enter(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - lockdep_assert_irqs_disabled(); - do_nocb_deferred_wakeup(rdp); rcu_eqs_enter(false); } EXPORT_SYMBOL_GPL(rcu_idle_enter); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7e291ce0a1d6..d5b38c28abd1 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2187,6 +2187,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) do_nocb_deferred_wakeup_common(rdp); } +void rcu_nocb_flush_deferred_wakeup(void) +{ + do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); +} + void __init rcu_init_nohz(void) { int cpu; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 305727ea0677..b601a3aa2152 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -55,6 +55,7 @@ __setup("hlt", cpu_idle_nopoll_setup); static noinline int __cpuidle cpu_idle_poll(void) { trace_cpu_idle(0, smp_processor_id()); + rcu_nocb_flush_deferred_wakeup(); stop_critical_timings(); rcu_idle_enter(); local_irq_enable(); @@ -173,6 +174,8 @@ static void cpuidle_idle_call(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; + rcu_nocb_flush_deferred_wakeup(); + /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq.
Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP kthread (rcuog) to be serviced. Usually a local wake up happening while running the idle task is handled in one of the need_resched() checks carefully placed within the idle loop that can break to the scheduler. Unfortunately the call to rcu_idle_enter() is already beyond the last generic need_resched() check and we may halt the CPU with a resched request unhandled, leaving the task hanging. Fix this with splitting the rcuog wakeup handling from rcu_idle_enter() and place it before the last generic need_resched() check in the idle loop. It is then assumed that no call to call_rcu() will be performed after that in the idle loop until the CPU is put in low power mode. Reported-by: Paul E. McKenney <paulmck@kernel.org> Fixes: 96d3fd0d315a (rcu: Break call_rcu() deadlock involving scheduler and perf) Cc: stable@vger.kernel.org Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> --- include/linux/rcupdate.h | 2 ++ kernel/rcu/tree.c | 3 --- kernel/rcu/tree_plugin.h | 5 +++++ kernel/sched/idle.c | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-)