Message ID | 1346350718-30937-6-git-send-email-paulmck@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Thu, Aug 30, 2012 at 11:18:21AM -0700, Paul E. McKenney wrote: > From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> > > Then rcu_gp_kthread() function is too large and furthermore needs to > have the force_quiescent_state() code pulled in. This commit therefore > breaks up rcu_gp_kthread() into rcu_gp_init() and rcu_gp_cleanup(). > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> > kernel/rcutree.c | 260 +++++++++++++++++++++++++++++------------------------- > 1 files changed, 138 insertions(+), 122 deletions(-) > > diff --git a/kernel/rcutree.c b/kernel/rcutree.c > index 84a6f55..c2c036f 100644 > --- a/kernel/rcutree.c > +++ b/kernel/rcutree.c > @@ -1040,160 +1040,176 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat > } > > /* > - * Body of kthread that handles grace periods. > + * Initialize a new grace period. > */ > -static int rcu_gp_kthread(void *arg) > +static int rcu_gp_init(struct rcu_state *rsp) > { > unsigned long flags; > - unsigned long gp_duration; > struct rcu_data *rdp; > - struct rcu_node *rnp; > - struct rcu_state *rsp = arg; > + struct rcu_node *rnp = rcu_get_root(rsp); > > - for (;;) { > + raw_spin_lock_irqsave(&rnp->lock, flags); > + rsp->gp_flags = 0; > > - /* Handle grace-period start. */ > - rnp = rcu_get_root(rsp); > - for (;;) { > - wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > - if (rsp->gp_flags) > - break; > - flush_signals(current); > - } > + if (rcu_gp_in_progress(rsp)) { > + /* Grace period already in progress, don't start another. */ > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + return 0; > + } > + > + if (rsp->fqs_active) { > + /* > + * We need a grace period, but force_quiescent_state() > + * is running. Tell it to start one on our behalf. > + */ > + rsp->fqs_need_gp = 1; > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + return 0; > + } > + > + /* Advance to a new grace period and initialize state. */ > + rsp->gpnum++; > + trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); > + WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); > + rsp->fqs_state = RCU_GP_INIT; /* Stop force_quiescent_state. */ > + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; > + record_gp_stall_check_time(rsp); > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + > + /* Exclude any concurrent CPU-hotplug operations. */ > + get_online_cpus(); > + > + /* > + * Set the quiescent-state-needed bits in all the rcu_node > + * structures for all currently online CPUs in breadth-first order, > + * starting from the root rcu_node structure, relying on the layout > + * of the tree within the rsp->node[] array. Note that other CPUs > + * access only the leaves of the hierarchy, thus seeing that no > + * grace period is in progress, at least until the corresponding > + * leaf node has been initialized. In addition, we have excluded > + * CPU-hotplug operations. > + * > + * The grace period cannot complete until the initialization > + * process finishes, because this kthread handles both. > + */ > + rcu_for_each_node_breadth_first(rsp, rnp) { > raw_spin_lock_irqsave(&rnp->lock, flags); > - rsp->gp_flags = 0; > rdp = this_cpu_ptr(rsp->rda); > + rcu_preempt_check_blocked_tasks(rnp); > + rnp->qsmask = rnp->qsmaskinit; > + rnp->gpnum = rsp->gpnum; > + rnp->completed = rsp->completed; > + if (rnp == rdp->mynode) > + rcu_start_gp_per_cpu(rsp, rnp, rdp); > + rcu_preempt_boost_start_gp(rnp); > + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, > + rnp->level, rnp->grplo, > + rnp->grphi, rnp->qsmask); > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + cond_resched(); > + } > > - if (rcu_gp_in_progress(rsp)) { > - /* > - * A grace period is already in progress, so > - * don't start another one. > - */ > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > - cond_resched(); > - continue; > - } > + rnp = rcu_get_root(rsp); > + raw_spin_lock_irqsave(&rnp->lock, flags); > + /* force_quiescent_state() now OK. */ > + rsp->fqs_state = RCU_SIGNAL_INIT; > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + put_online_cpus(); > > - if (rsp->fqs_active) { > - /* > - * We need a grace period, but force_quiescent_state() > - * is running. Tell it to start one on our behalf. > - */ > - rsp->fqs_need_gp = 1; > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > - cond_resched(); > - continue; > - } > + return 1; > +} > > - /* Advance to a new grace period and initialize state. */ > - rsp->gpnum++; > - trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); > - WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); > - rsp->fqs_state = RCU_GP_INIT; /* Stop force_quiescent_state. */ > - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; > - record_gp_stall_check_time(rsp); > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > +/* > + * Clean up after the old grace period. > + */ > +static int rcu_gp_cleanup(struct rcu_state *rsp) > +{ > + unsigned long flags; > + unsigned long gp_duration; > + struct rcu_data *rdp; > + struct rcu_node *rnp = rcu_get_root(rsp); > > - /* Exclude any concurrent CPU-hotplug operations. */ > - get_online_cpus(); > + raw_spin_lock_irqsave(&rnp->lock, flags); > + gp_duration = jiffies - rsp->gp_start; > + if (gp_duration > rsp->gp_max) > + rsp->gp_max = gp_duration; > + > + /* > + * We know the grace period is complete, but to everyone else > + * it appears to still be ongoing. But it is also the case > + * that to everyone else it looks like there is nothing that > + * they can do to advance the grace period. It is therefore > + * safe for us to drop the lock in order to mark the grace > + * period as completed in all of the rcu_node structures. > + * > + * But if this CPU needs another grace period, it will take > + * care of this while initializing the next grace period. > + * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL > + * because the callbacks have not yet been advanced: Those > + * callbacks are waiting on the grace period that just now > + * completed. > + */ > + rdp = this_cpu_ptr(rsp->rda); > + if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > > /* > - * Set the quiescent-state-needed bits in all the rcu_node > - * structures for all currently online CPUs in breadth-first > - * order, starting from the root rcu_node structure. > - * This operation relies on the layout of the hierarchy > - * within the rsp->node[] array. Note that other CPUs will > - * access only the leaves of the hierarchy, which still > - * indicate that no grace period is in progress, at least > - * until the corresponding leaf node has been initialized. > - * In addition, we have excluded CPU-hotplug operations. > - * > - * Note that the grace period cannot complete until > - * we finish the initialization process, as there will > - * be at least one qsmask bit set in the root node until > - * that time, namely the one corresponding to this CPU, > - * due to the fact that we have irqs disabled. > + * Propagate new ->completed value to rcu_node > + * structures so that other CPUs don't have to > + * wait until the start of the next grace period > + * to process their callbacks. > */ > rcu_for_each_node_breadth_first(rsp, rnp) { > raw_spin_lock_irqsave(&rnp->lock, flags); > - rcu_preempt_check_blocked_tasks(rnp); > - rnp->qsmask = rnp->qsmaskinit; > - rnp->gpnum = rsp->gpnum; > - rnp->completed = rsp->completed; > - if (rnp == rdp->mynode) > - rcu_start_gp_per_cpu(rsp, rnp, rdp); > - rcu_preempt_boost_start_gp(rnp); > - trace_rcu_grace_period_init(rsp->name, rnp->gpnum, > - rnp->level, rnp->grplo, > - rnp->grphi, rnp->qsmask); > + rnp->completed = rsp->gpnum; > raw_spin_unlock_irqrestore(&rnp->lock, flags); > cond_resched(); > } > - > rnp = rcu_get_root(rsp); > raw_spin_lock_irqsave(&rnp->lock, flags); > - /* force_quiescent_state() now OK. */ > - rsp->fqs_state = RCU_SIGNAL_INIT; > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > - put_online_cpus(); > + } > + > + rsp->completed = rsp->gpnum; /* Declare grace period done. */ > + trace_rcu_grace_period(rsp->name, rsp->completed, "end"); > + rsp->fqs_state = RCU_GP_IDLE; > + rdp = this_cpu_ptr(rsp->rda); > + if (cpu_needs_another_gp(rsp, rdp)) > + rsp->gp_flags = 1; > + raw_spin_unlock_irqrestore(&rnp->lock, flags); > + return 1; > +} > + > +/* > + * Body of kthread that handles grace periods. > + */ > +static int rcu_gp_kthread(void *arg) > +{ > + struct rcu_state *rsp = arg; > + struct rcu_node *rnp = rcu_get_root(rsp); > + > + for (;;) { > + > + /* Handle grace-period start. */ > + for (;;) { > + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > + if (rsp->gp_flags && rcu_gp_init(rsp)) > + break; > + cond_resched(); > + flush_signals(current); > + } > > /* Handle grace-period end. */ > - rnp = rcu_get_root(rsp); > for (;;) { > wait_event_interruptible(rsp->gp_wq, > !ACCESS_ONCE(rnp->qsmask) && > !rcu_preempt_blocked_readers_cgp(rnp)); > if (!ACCESS_ONCE(rnp->qsmask) && > - !rcu_preempt_blocked_readers_cgp(rnp)) > + !rcu_preempt_blocked_readers_cgp(rnp) && > + rcu_gp_cleanup(rsp)) > break; > + cond_resched(); > flush_signals(current); > } > - > - raw_spin_lock_irqsave(&rnp->lock, flags); > - gp_duration = jiffies - rsp->gp_start; > - if (gp_duration > rsp->gp_max) > - rsp->gp_max = gp_duration; > - > - /* > - * We know the grace period is complete, but to everyone else > - * it appears to still be ongoing. But it is also the case > - * that to everyone else it looks like there is nothing that > - * they can do to advance the grace period. It is therefore > - * safe for us to drop the lock in order to mark the grace > - * period as completed in all of the rcu_node structures. > - * > - * But if this CPU needs another grace period, it will take > - * care of this while initializing the next grace period. > - * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL > - * because the callbacks have not yet been advanced: Those > - * callbacks are waiting on the grace period that just now > - * completed. > - */ > - if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > - > - /* > - * Propagate new ->completed value to rcu_node > - * structures so that other CPUs don't have to > - * wait until the start of the next grace period > - * to process their callbacks. > - */ > - rcu_for_each_node_breadth_first(rsp, rnp) { > - raw_spin_lock_irqsave(&rnp->lock, flags); > - rnp->completed = rsp->gpnum; > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > - cond_resched(); > - } > - rnp = rcu_get_root(rsp); > - raw_spin_lock_irqsave(&rnp->lock, flags); > - } > - > - rsp->completed = rsp->gpnum; /* Declare grace period done. */ > - trace_rcu_grace_period(rsp->name, rsp->completed, "end"); > - rsp->fqs_state = RCU_GP_IDLE; > - if (cpu_needs_another_gp(rsp, rdp)) > - rsp->gp_flags = 1; > - raw_spin_unlock_irqrestore(&rnp->lock, flags); > } > return 0; > } > -- > 1.7.8 >
On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote: > +static int rcu_gp_kthread(void *arg) > +{ > + struct rcu_state *rsp = arg; > + struct rcu_node *rnp = rcu_get_root(rsp); > + > + for (;;) { > + > + /* Handle grace-period start. */ > + for (;;) { > + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > + if (rsp->gp_flags && rcu_gp_init(rsp)) > + break; > + cond_resched(); > + flush_signals(current); > + } > > /* Handle grace-period end. */ > for (;;) { > wait_event_interruptible(rsp->gp_wq, > !ACCESS_ONCE(rnp->qsmask) && > !rcu_preempt_blocked_readers_cgp(rnp)); > if (!ACCESS_ONCE(rnp->qsmask) && > + !rcu_preempt_blocked_readers_cgp(rnp) && > + rcu_gp_cleanup(rsp)) > break; > + cond_resched(); > flush_signals(current); > } > } > return 0; > } Should there not be a kthread_stop() / kthread_park() call somewhere in there? Also, it could be me, but all those nested for (;;) loops make the flow rather non-obvious.
On Thu, Sep 06, 2012 at 03:39:51PM +0200, Peter Zijlstra wrote: > On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote: > > +static int rcu_gp_kthread(void *arg) > > +{ > > + struct rcu_state *rsp = arg; > > + struct rcu_node *rnp = rcu_get_root(rsp); > > + > > + for (;;) { > > + > > + /* Handle grace-period start. */ > > + for (;;) { > > + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > > + if (rsp->gp_flags && rcu_gp_init(rsp)) > > + break; > > + cond_resched(); > > + flush_signals(current); > > + } > > > > /* Handle grace-period end. */ > > for (;;) { > > wait_event_interruptible(rsp->gp_wq, > > !ACCESS_ONCE(rnp->qsmask) && > > !rcu_preempt_blocked_readers_cgp(rnp)); > > if (!ACCESS_ONCE(rnp->qsmask) && > > + !rcu_preempt_blocked_readers_cgp(rnp) && > > + rcu_gp_cleanup(rsp)) > > break; > > + cond_resched(); > > flush_signals(current); > > } > > } > > return 0; > > } > > Should there not be a kthread_stop() / kthread_park() call somewhere in > there? The kthread stops only when the system goes down, so no need for any kthread_stop() or kthread_park(). The "return 0" suppresses complaints about falling of the end of a non-void function. > Also, it could be me, but all those nested for (;;) loops make the flow > rather non-obvious. For those two loops, I suppose I could pull the cond_resched() and flush_signals() to the top, and make a do-while out of it. Thanx, Paul
On Thu, Sep 06, 2012 at 10:32:07AM -0700, Paul E. McKenney wrote: > On Thu, Sep 06, 2012 at 03:39:51PM +0200, Peter Zijlstra wrote: > > On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote: > > > +static int rcu_gp_kthread(void *arg) > > > +{ > > > + struct rcu_state *rsp = arg; > > > + struct rcu_node *rnp = rcu_get_root(rsp); > > > + > > > + for (;;) { > > > + > > > + /* Handle grace-period start. */ > > > + for (;;) { > > > + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > > > + if (rsp->gp_flags && rcu_gp_init(rsp)) > > > + break; > > > + cond_resched(); > > > + flush_signals(current); > > > + } > > > > > > /* Handle grace-period end. */ > > > for (;;) { > > > wait_event_interruptible(rsp->gp_wq, > > > !ACCESS_ONCE(rnp->qsmask) && > > > !rcu_preempt_blocked_readers_cgp(rnp)); > > > if (!ACCESS_ONCE(rnp->qsmask) && > > > + !rcu_preempt_blocked_readers_cgp(rnp) && > > > + rcu_gp_cleanup(rsp)) > > > break; > > > + cond_resched(); > > > flush_signals(current); > > > } > > > } > > > return 0; > > > } > > > > Should there not be a kthread_stop() / kthread_park() call somewhere in > > there? > > The kthread stops only when the system goes down, so no need for any > kthread_stop() or kthread_park(). The "return 0" suppresses complaints > about falling of the end of a non-void function. Huh, I thought GCC knew to not emit that warning unless it actually found control flow reaching the end of the function; since the infinite loop has no break in it, you shouldn't need the return. Annoying. > > Also, it could be me, but all those nested for (;;) loops make the flow > > rather non-obvious. > > For those two loops, I suppose I could pull the cond_resched() and > flush_signals() to the top, and make a do-while out of it. I think it makes more sense to move the wait_event_interruptible to the bottom, and make a while out of it. - Josh Triplett
On Thu, 2012-09-06 at 11:49 -0700, Josh Triplett wrote: > > Huh, I thought GCC knew to not emit that warning unless it actually > found control flow reaching the end of the function; since the > infinite > loop has no break in it, you shouldn't need the return. Annoying. tag the function with __noreturn
On Thu, Sep 06, 2012 at 11:49:21AM -0700, Josh Triplett wrote: > On Thu, Sep 06, 2012 at 10:32:07AM -0700, Paul E. McKenney wrote: > > On Thu, Sep 06, 2012 at 03:39:51PM +0200, Peter Zijlstra wrote: > > > On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote: > > > > +static int rcu_gp_kthread(void *arg) > > > > +{ > > > > + struct rcu_state *rsp = arg; > > > > + struct rcu_node *rnp = rcu_get_root(rsp); > > > > + > > > > + for (;;) { > > > > + > > > > + /* Handle grace-period start. */ > > > > + for (;;) { > > > > + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); > > > > + if (rsp->gp_flags && rcu_gp_init(rsp)) > > > > + break; > > > > + cond_resched(); > > > > + flush_signals(current); > > > > + } > > > > > > > > /* Handle grace-period end. */ > > > > for (;;) { > > > > wait_event_interruptible(rsp->gp_wq, > > > > !ACCESS_ONCE(rnp->qsmask) && > > > > !rcu_preempt_blocked_readers_cgp(rnp)); > > > > if (!ACCESS_ONCE(rnp->qsmask) && > > > > + !rcu_preempt_blocked_readers_cgp(rnp) && > > > > + rcu_gp_cleanup(rsp)) > > > > break; > > > > + cond_resched(); > > > > flush_signals(current); > > > > } > > > > } > > > > return 0; > > > > } > > > > > > Should there not be a kthread_stop() / kthread_park() call somewhere in > > > there? > > > > The kthread stops only when the system goes down, so no need for any > > kthread_stop() or kthread_park(). The "return 0" suppresses complaints > > about falling of the end of a non-void function. > > Huh, I thought GCC knew to not emit that warning unless it actually > found control flow reaching the end of the function; since the infinite > loop has no break in it, you shouldn't need the return. Annoying. > > > > Also, it could be me, but all those nested for (;;) loops make the flow > > > rather non-obvious. > > > > For those two loops, I suppose I could pull the cond_resched() and > > flush_signals() to the top, and make a do-while out of it. > > I think it makes more sense to move the wait_event_interruptible to the > bottom, and make a while out of it. I know!!! Let's compromise and put the loop exit in the middle of the loop!!! Oh, wait... ;-), Paul
On Thu, Sep 06, 2012 at 09:09:01PM +0200, Peter Zijlstra wrote: > On Thu, 2012-09-06 at 11:49 -0700, Josh Triplett wrote: > > > > Huh, I thought GCC knew to not emit that warning unless it actually > > found control flow reaching the end of the function; since the > > infinite > > loop has no break in it, you shouldn't need the return. Annoying. > > tag the function with __noreturn Ah, I will try that. Thanx, Paul
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 84a6f55..c2c036f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1040,160 +1040,176 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat } /* - * Body of kthread that handles grace periods. + * Initialize a new grace period. */ -static int rcu_gp_kthread(void *arg) +static int rcu_gp_init(struct rcu_state *rsp) { unsigned long flags; - unsigned long gp_duration; struct rcu_data *rdp; - struct rcu_node *rnp; - struct rcu_state *rsp = arg; + struct rcu_node *rnp = rcu_get_root(rsp); - for (;;) { + raw_spin_lock_irqsave(&rnp->lock, flags); + rsp->gp_flags = 0; - /* Handle grace-period start. */ - rnp = rcu_get_root(rsp); - for (;;) { - wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); - if (rsp->gp_flags) - break; - flush_signals(current); - } + if (rcu_gp_in_progress(rsp)) { + /* Grace period already in progress, don't start another. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return 0; + } + + if (rsp->fqs_active) { + /* + * We need a grace period, but force_quiescent_state() + * is running. Tell it to start one on our behalf. + */ + rsp->fqs_need_gp = 1; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return 0; + } + + /* Advance to a new grace period and initialize state. */ + rsp->gpnum++; + trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); + WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); + rsp->fqs_state = RCU_GP_INIT; /* Stop force_quiescent_state. */ + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + record_gp_stall_check_time(rsp); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + + /* Exclude any concurrent CPU-hotplug operations. */ + get_online_cpus(); + + /* + * Set the quiescent-state-needed bits in all the rcu_node + * structures for all currently online CPUs in breadth-first order, + * starting from the root rcu_node structure, relying on the layout + * of the tree within the rsp->node[] array. Note that other CPUs + * access only the leaves of the hierarchy, thus seeing that no + * grace period is in progress, at least until the corresponding + * leaf node has been initialized. In addition, we have excluded + * CPU-hotplug operations. + * + * The grace period cannot complete until the initialization + * process finishes, because this kthread handles both. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); - rsp->gp_flags = 0; rdp = this_cpu_ptr(rsp->rda); + rcu_preempt_check_blocked_tasks(rnp); + rnp->qsmask = rnp->qsmaskinit; + rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; + if (rnp == rdp->mynode) + rcu_start_gp_per_cpu(rsp, rnp, rdp); + rcu_preempt_boost_start_gp(rnp); + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + rnp->level, rnp->grplo, + rnp->grphi, rnp->qsmask); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + cond_resched(); + } - if (rcu_gp_in_progress(rsp)) { - /* - * A grace period is already in progress, so - * don't start another one. - */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - cond_resched(); - continue; - } + rnp = rcu_get_root(rsp); + raw_spin_lock_irqsave(&rnp->lock, flags); + /* force_quiescent_state() now OK. */ + rsp->fqs_state = RCU_SIGNAL_INIT; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + put_online_cpus(); - if (rsp->fqs_active) { - /* - * We need a grace period, but force_quiescent_state() - * is running. Tell it to start one on our behalf. - */ - rsp->fqs_need_gp = 1; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - cond_resched(); - continue; - } + return 1; +} - /* Advance to a new grace period and initialize state. */ - rsp->gpnum++; - trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); - WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); - rsp->fqs_state = RCU_GP_INIT; /* Stop force_quiescent_state. */ - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - record_gp_stall_check_time(rsp); - raw_spin_unlock_irqrestore(&rnp->lock, flags); +/* + * Clean up after the old grace period. + */ +static int rcu_gp_cleanup(struct rcu_state *rsp) +{ + unsigned long flags; + unsigned long gp_duration; + struct rcu_data *rdp; + struct rcu_node *rnp = rcu_get_root(rsp); - /* Exclude any concurrent CPU-hotplug operations. */ - get_online_cpus(); + raw_spin_lock_irqsave(&rnp->lock, flags); + gp_duration = jiffies - rsp->gp_start; + if (gp_duration > rsp->gp_max) + rsp->gp_max = gp_duration; + + /* + * We know the grace period is complete, but to everyone else + * it appears to still be ongoing. But it is also the case + * that to everyone else it looks like there is nothing that + * they can do to advance the grace period. It is therefore + * safe for us to drop the lock in order to mark the grace + * period as completed in all of the rcu_node structures. + * + * But if this CPU needs another grace period, it will take + * care of this while initializing the next grace period. + * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL + * because the callbacks have not yet been advanced: Those + * callbacks are waiting on the grace period that just now + * completed. + */ + rdp = this_cpu_ptr(rsp->rda); + if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* - * Set the quiescent-state-needed bits in all the rcu_node - * structures for all currently online CPUs in breadth-first - * order, starting from the root rcu_node structure. - * This operation relies on the layout of the hierarchy - * within the rsp->node[] array. Note that other CPUs will - * access only the leaves of the hierarchy, which still - * indicate that no grace period is in progress, at least - * until the corresponding leaf node has been initialized. - * In addition, we have excluded CPU-hotplug operations. - * - * Note that the grace period cannot complete until - * we finish the initialization process, as there will - * be at least one qsmask bit set in the root node until - * that time, namely the one corresponding to this CPU, - * due to the fact that we have irqs disabled. + * Propagate new ->completed value to rcu_node + * structures so that other CPUs don't have to + * wait until the start of the next grace period + * to process their callbacks. */ rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); - rcu_preempt_check_blocked_tasks(rnp); - rnp->qsmask = rnp->qsmaskinit; - rnp->gpnum = rsp->gpnum; - rnp->completed = rsp->completed; - if (rnp == rdp->mynode) - rcu_start_gp_per_cpu(rsp, rnp, rdp); - rcu_preempt_boost_start_gp(rnp); - trace_rcu_grace_period_init(rsp->name, rnp->gpnum, - rnp->level, rnp->grplo, - rnp->grphi, rnp->qsmask); + rnp->completed = rsp->gpnum; raw_spin_unlock_irqrestore(&rnp->lock, flags); cond_resched(); } - rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - /* force_quiescent_state() now OK. */ - rsp->fqs_state = RCU_SIGNAL_INIT; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - put_online_cpus(); + } + + rsp->completed = rsp->gpnum; /* Declare grace period done. */ + trace_rcu_grace_period(rsp->name, rsp->completed, "end"); + rsp->fqs_state = RCU_GP_IDLE; + rdp = this_cpu_ptr(rsp->rda); + if (cpu_needs_another_gp(rsp, rdp)) + rsp->gp_flags = 1; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return 1; +} + +/* + * Body of kthread that handles grace periods. + */ +static int rcu_gp_kthread(void *arg) +{ + struct rcu_state *rsp = arg; + struct rcu_node *rnp = rcu_get_root(rsp); + + for (;;) { + + /* Handle grace-period start. */ + for (;;) { + wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); + if (rsp->gp_flags && rcu_gp_init(rsp)) + break; + cond_resched(); + flush_signals(current); + } /* Handle grace-period end. */ - rnp = rcu_get_root(rsp); for (;;) { wait_event_interruptible(rsp->gp_wq, !ACCESS_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)); if (!ACCESS_ONCE(rnp->qsmask) && - !rcu_preempt_blocked_readers_cgp(rnp)) + !rcu_preempt_blocked_readers_cgp(rnp) && + rcu_gp_cleanup(rsp)) break; + cond_resched(); flush_signals(current); } - - raw_spin_lock_irqsave(&rnp->lock, flags); - gp_duration = jiffies - rsp->gp_start; - if (gp_duration > rsp->gp_max) - rsp->gp_max = gp_duration; - - /* - * We know the grace period is complete, but to everyone else - * it appears to still be ongoing. But it is also the case - * that to everyone else it looks like there is nothing that - * they can do to advance the grace period. It is therefore - * safe for us to drop the lock in order to mark the grace - * period as completed in all of the rcu_node structures. - * - * But if this CPU needs another grace period, it will take - * care of this while initializing the next grace period. - * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL - * because the callbacks have not yet been advanced: Those - * callbacks are waiting on the grace period that just now - * completed. - */ - if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - - /* - * Propagate new ->completed value to rcu_node - * structures so that other CPUs don't have to - * wait until the start of the next grace period - * to process their callbacks. - */ - rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->completed = rsp->gpnum; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - cond_resched(); - } - rnp = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp->lock, flags); - } - - rsp->completed = rsp->gpnum; /* Declare grace period done. */ - trace_rcu_grace_period(rsp->name, rsp->completed, "end"); - rsp->fqs_state = RCU_GP_IDLE; - if (cpu_needs_another_gp(rsp, rdp)) - rsp->gp_flags = 1; - raw_spin_unlock_irqrestore(&rnp->lock, flags); } return 0; }