[tip/core/rcu,13/23] rcu: Control grace-period duration from sysfs

Message ID 1346350718-30937-13-git-send-email-paulmck@linux.vnet.ibm.com
State New
Headers show

Commit Message

Paul E. McKenney Aug. 30, 2012, 6:18 p.m.
From: "Paul E. McKenney" <paul.mckenney@linaro.org>

Some uses of RCU benefit from shorter grace periods, while others benefit
more from the greater efficiency provided by longer grace periods.
Therefore, this commit allows the durations to be controlled from sysfs.
There are two sysfs parameters, one named "jiffies_till_first_fqs" that
specifies the delay in jiffies from the end of grace-period initialization
until the first attempt to force quiescent states, and the other named
"jiffies_till_next_fqs" that specifies the delay (again in jiffies)
between subsequent attempts to force quiescent states.  They both default
to three jiffies, which is compatible with the old hard-coded behavior.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt |   11 +++++++++++
 kernel/rcutree.c                    |   25 ++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

Comments

Josh Triplett Sept. 3, 2012, 9:30 a.m. | #1
On Thu, Aug 30, 2012 at 11:18:28AM -0700, Paul E. McKenney wrote:
> From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> 
> Some uses of RCU benefit from shorter grace periods, while others benefit
> more from the greater efficiency provided by longer grace periods.
> Therefore, this commit allows the durations to be controlled from sysfs.
> There are two sysfs parameters, one named "jiffies_till_first_fqs" that
> specifies the delay in jiffies from the end of grace-period initialization
> until the first attempt to force quiescent states, and the other named
> "jiffies_till_next_fqs" that specifies the delay (again in jiffies)
> between subsequent attempts to force quiescent states.  They both default
> to three jiffies, which is compatible with the old hard-coded behavior.
> 
> Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Signed-off-by: Josh Triplett <josh@joshtriplett.org>

>  Documentation/kernel-parameters.txt |   11 +++++++++++
>  kernel/rcutree.c                    |   25 ++++++++++++++++++++++---
>  2 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index ad7e2e5..55ada04 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2385,6 +2385,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  	rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
>  			Set timeout for RCU CPU stall warning messages.
>  
> +	rcutree.jiffies_till_first_fqs= [KNL,BOOT]
> +			Set delay from grace-period initialization to
> +			first attempt to force quiescent states.
> +			Units are jiffies, minimum value is zero,
> +			and maximum value is HZ.
> +
> +	rcutree.jiffies_till_next_fqs= [KNL,BOOT]
> +			Set delay between subsequent attempts to force
> +			quiescent states.  Units are jiffies, minimum
> +			value is one, and maximum value is HZ.
> +
>  	rcutorture.fqs_duration= [KNL,BOOT]
>  			Set duration of force_quiescent_state bursts.
>  
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index ed1be62..1d33240 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -226,6 +226,12 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
>  module_param(rcu_cpu_stall_suppress, int, 0644);
>  module_param(rcu_cpu_stall_timeout, int, 0644);
>  
> +static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
> +static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
> +
> +module_param(jiffies_till_first_fqs, ulong, 0644);
> +module_param(jiffies_till_next_fqs, ulong, 0644);
> +
>  static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
>  static void force_quiescent_state(struct rcu_state *rsp);
>  static int rcu_pending(int cpu);
> @@ -1193,6 +1199,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
>  static int rcu_gp_kthread(void *arg)
>  {
>  	int fqs_state;
> +	unsigned long j;
>  	int ret;
>  	struct rcu_state *rsp = arg;
>  	struct rcu_node *rnp = rcu_get_root(rsp);
> @@ -1213,14 +1220,18 @@ static int rcu_gp_kthread(void *arg)
>  
>  		/* Handle quiescent-state forcing. */
>  		fqs_state = RCU_SAVE_DYNTICK;
> +		j = jiffies_till_first_fqs;
> +		if (j > HZ) {
> +			j = HZ;
> +			jiffies_till_first_fqs = HZ;
> +		}
>  		for (;;) {
> -			rsp->jiffies_force_qs = jiffies +
> -						RCU_JIFFIES_TILL_FORCE_QS;
> +			rsp->jiffies_force_qs = jiffies + j;
>  			ret = wait_event_interruptible_timeout(rsp->gp_wq,
>  					(rsp->gp_flags & RCU_GP_FLAG_FQS) ||
>  					(!ACCESS_ONCE(rnp->qsmask) &&
>  					 !rcu_preempt_blocked_readers_cgp(rnp)),
> -					RCU_JIFFIES_TILL_FORCE_QS);
> +					j);
>  			/* If grace period done, leave loop. */
>  			if (!ACCESS_ONCE(rnp->qsmask) &&
>  			    !rcu_preempt_blocked_readers_cgp(rnp))
> @@ -1234,6 +1245,14 @@ static int rcu_gp_kthread(void *arg)
>  				cond_resched();
>  				flush_signals(current);
>  			}
> +			j = jiffies_till_next_fqs;
> +			if (j > HZ) {
> +				j = HZ;
> +				jiffies_till_next_fqs = HZ;
> +			} else if (j < 1) {
> +				j = 1;
> +				jiffies_till_next_fqs = 1;
> +			}
>  		}
>  
>  		/* Handle grace-period end. */
> -- 
> 1.7.8
>
Josh Triplett Sept. 3, 2012, 9:31 a.m. | #2
On Mon, Sep 03, 2012 at 02:30:16AM -0700, Josh Triplett wrote:
> On Thu, Aug 30, 2012 at 11:18:28AM -0700, Paul E. McKenney wrote:
> > From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> > 
> > Some uses of RCU benefit from shorter grace periods, while others benefit
> > more from the greater efficiency provided by longer grace periods.
> > Therefore, this commit allows the durations to be controlled from sysfs.
> > There are two sysfs parameters, one named "jiffies_till_first_fqs" that
> > specifies the delay in jiffies from the end of grace-period initialization
> > until the first attempt to force quiescent states, and the other named
> > "jiffies_till_next_fqs" that specifies the delay (again in jiffies)
> > between subsequent attempts to force quiescent states.  They both default
> > to three jiffies, which is compatible with the old hard-coded behavior.
> > 
> > Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> Signed-off-by: Josh Triplett <josh@joshtriplett.org>

Er, sorry, typo:
Reviewed-by: Josh Triplett <josh@joshtriplett.org>

> >  Documentation/kernel-parameters.txt |   11 +++++++++++
> >  kernel/rcutree.c                    |   25 ++++++++++++++++++++++---
> >  2 files changed, 33 insertions(+), 3 deletions(-)
> > 
> > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> > index ad7e2e5..55ada04 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -2385,6 +2385,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> >  	rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
> >  			Set timeout for RCU CPU stall warning messages.
> >  
> > +	rcutree.jiffies_till_first_fqs= [KNL,BOOT]
> > +			Set delay from grace-period initialization to
> > +			first attempt to force quiescent states.
> > +			Units are jiffies, minimum value is zero,
> > +			and maximum value is HZ.
> > +
> > +	rcutree.jiffies_till_next_fqs= [KNL,BOOT]
> > +			Set delay between subsequent attempts to force
> > +			quiescent states.  Units are jiffies, minimum
> > +			value is one, and maximum value is HZ.
> > +
> >  	rcutorture.fqs_duration= [KNL,BOOT]
> >  			Set duration of force_quiescent_state bursts.
> >  
> > diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> > index ed1be62..1d33240 100644
> > --- a/kernel/rcutree.c
> > +++ b/kernel/rcutree.c
> > @@ -226,6 +226,12 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
> >  module_param(rcu_cpu_stall_suppress, int, 0644);
> >  module_param(rcu_cpu_stall_timeout, int, 0644);
> >  
> > +static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
> > +static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
> > +
> > +module_param(jiffies_till_first_fqs, ulong, 0644);
> > +module_param(jiffies_till_next_fqs, ulong, 0644);
> > +
> >  static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
> >  static void force_quiescent_state(struct rcu_state *rsp);
> >  static int rcu_pending(int cpu);
> > @@ -1193,6 +1199,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
> >  static int rcu_gp_kthread(void *arg)
> >  {
> >  	int fqs_state;
> > +	unsigned long j;
> >  	int ret;
> >  	struct rcu_state *rsp = arg;
> >  	struct rcu_node *rnp = rcu_get_root(rsp);
> > @@ -1213,14 +1220,18 @@ static int rcu_gp_kthread(void *arg)
> >  
> >  		/* Handle quiescent-state forcing. */
> >  		fqs_state = RCU_SAVE_DYNTICK;
> > +		j = jiffies_till_first_fqs;
> > +		if (j > HZ) {
> > +			j = HZ;
> > +			jiffies_till_first_fqs = HZ;
> > +		}
> >  		for (;;) {
> > -			rsp->jiffies_force_qs = jiffies +
> > -						RCU_JIFFIES_TILL_FORCE_QS;
> > +			rsp->jiffies_force_qs = jiffies + j;
> >  			ret = wait_event_interruptible_timeout(rsp->gp_wq,
> >  					(rsp->gp_flags & RCU_GP_FLAG_FQS) ||
> >  					(!ACCESS_ONCE(rnp->qsmask) &&
> >  					 !rcu_preempt_blocked_readers_cgp(rnp)),
> > -					RCU_JIFFIES_TILL_FORCE_QS);
> > +					j);
> >  			/* If grace period done, leave loop. */
> >  			if (!ACCESS_ONCE(rnp->qsmask) &&
> >  			    !rcu_preempt_blocked_readers_cgp(rnp))
> > @@ -1234,6 +1245,14 @@ static int rcu_gp_kthread(void *arg)
> >  				cond_resched();
> >  				flush_signals(current);
> >  			}
> > +			j = jiffies_till_next_fqs;
> > +			if (j > HZ) {
> > +				j = HZ;
> > +				jiffies_till_next_fqs = HZ;
> > +			} else if (j < 1) {
> > +				j = 1;
> > +				jiffies_till_next_fqs = 1;
> > +			}
> >  		}
> >  
> >  		/* Handle grace-period end. */
> > -- 
> > 1.7.8
> >
Peter Zijlstra Sept. 6, 2012, 2:15 p.m. | #3
On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote:
> From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> 
> Some uses of RCU benefit from shorter grace periods, while others benefit
> more from the greater efficiency provided by longer grace periods.
> Therefore, this commit allows the durations to be controlled from sysfs.
> There are two sysfs parameters, one named "jiffies_till_first_fqs" that
> specifies the delay in jiffies from the end of grace-period initialization
> until the first attempt to force quiescent states, and the other named
> "jiffies_till_next_fqs" that specifies the delay (again in jiffies)
> between subsequent attempts to force quiescent states.  They both default
> to three jiffies, which is compatible with the old hard-coded behavior.

A number of questions:

 - how do I know if my workload wants a longer or shorter forced qs
   period?
 - the above implies a measure of good/bad-ness associated with fqs,
   can one formulate this?
 - if we can, should we not do this 'automagically' and avoid burdening
   our already hard pressed sysads of the world with trying to figure
   this out?

Also, whatever made you want to provide this 'feature' in the first
place?
Paul E. McKenney Sept. 6, 2012, 5:53 p.m. | #4
On Thu, Sep 06, 2012 at 04:15:30PM +0200, Peter Zijlstra wrote:
> On Thu, 2012-08-30 at 11:18 -0700, Paul E. McKenney wrote:
> > From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> > 
> > Some uses of RCU benefit from shorter grace periods, while others benefit
> > more from the greater efficiency provided by longer grace periods.
> > Therefore, this commit allows the durations to be controlled from sysfs.
> > There are two sysfs parameters, one named "jiffies_till_first_fqs" that
> > specifies the delay in jiffies from the end of grace-period initialization
> > until the first attempt to force quiescent states, and the other named
> > "jiffies_till_next_fqs" that specifies the delay (again in jiffies)
> > between subsequent attempts to force quiescent states.  They both default
> > to three jiffies, which is compatible with the old hard-coded behavior.
> 
> A number of questions:
> 
>  - how do I know if my workload wants a longer or shorter forced qs
>    period?

Almost everyone can do just fine with the defaults.  If you have more
than about 1,000 CPUs, you might need a longer period.  Some embedded
systems might need a shorter period -- the only specific example I know
of is network diagnostic equipment running wireshark, which starts up
slowly due to grace-period length.

>  - the above implies a measure of good/bad-ness associated with fqs,
>    can one formulate this?

Maybe.  I do not yet have enough data on really big systems to have a
good formula just yet.

>  - if we can, should we not do this 'automagically' and avoid burdening
>    our already hard pressed sysads of the world with trying to figure
>    this out?

I do expect to get there at some point.

> Also, whatever made you want to provide this 'feature' in the first
> place?

Complaints from the two groups called out above.

							Thanx, Paul
Peter Zijlstra Sept. 6, 2012, 6:28 p.m. | #5
On Thu, 2012-09-06 at 10:53 -0700, Paul E. McKenney wrote:

> >  - how do I know if my workload wants a longer or shorter forced qs
> >    period?
> 
> Almost everyone can do just fine with the defaults.  If you have more
> than about 1,000 CPUs, you might need a longer period. 

Because the cost of starting a grace period is on the same order (or
larger) in cost as this period?

>  Some embedded
> systems might need a shorter period -- the only specific example I know
> of is network diagnostic equipment running wireshark, which starts up
> slowly due to grace-period length.

But but but 3 jiffies.. however is that too long?

> > Also, whatever made you want to provide this 'feature' in the first
> > place?
> 
> Complaints from the two groups called out above.

Does this really warrant a boot time knob for which even you cannot
quite explain what values to use when?
Paul E. McKenney Sept. 6, 2012, 8:37 p.m. | #6
On Thu, Sep 06, 2012 at 08:28:21PM +0200, Peter Zijlstra wrote:
> On Thu, 2012-09-06 at 10:53 -0700, Paul E. McKenney wrote:
> 
> > >  - how do I know if my workload wants a longer or shorter forced qs
> > >    period?
> > 
> > Almost everyone can do just fine with the defaults.  If you have more
> > than about 1,000 CPUs, you might need a longer period. 
> 
> Because the cost of starting a grace period is on the same order (or
> larger) in cost as this period?

Because the overhead of rcu_gp_fqs() can then be multiple jiffies, so
it doesn't make sense to run it so often.  If nothing else, the
rcu_gp_kthread() will start chewing up appreciable CPU time.

> >  Some embedded
> > systems might need a shorter period -- the only specific example I know
> > of is network diagnostic equipment running wireshark, which starts up
> > slowly due to grace-period length.
> 
> But but but 3 jiffies.. however is that too long?

Because wireshark startup runs through a great many grace periods when
starting up, and those 3-jiffy time periods add up.

> > > Also, whatever made you want to provide this 'feature' in the first
> > > place?
> > 
> > Complaints from the two groups called out above.
> 
> Does this really warrant a boot time knob for which even you cannot
> quite explain what values to use when?

If people look at me funny when I explain, I just tell them to leave
it alone.

One alternative at the low end would be to have a sysfs variable that
converted normal grace periods to expedited grace periods.  Would that
be preferable?

							Thanx, Paul

Patch

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5..55ada04 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2385,6 +2385,17 @@  bytes respectively. Such letter suffixes can also be entirely omitted.
 	rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
 			Set timeout for RCU CPU stall warning messages.
 
+	rcutree.jiffies_till_first_fqs= [KNL,BOOT]
+			Set delay from grace-period initialization to
+			first attempt to force quiescent states.
+			Units are jiffies, minimum value is zero,
+			and maximum value is HZ.
+
+	rcutree.jiffies_till_next_fqs= [KNL,BOOT]
+			Set delay between subsequent attempts to force
+			quiescent states.  Units are jiffies, minimum
+			value is one, and maximum value is HZ.
+
 	rcutorture.fqs_duration= [KNL,BOOT]
 			Set duration of force_quiescent_state bursts.
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ed1be62..1d33240 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -226,6 +226,12 @@  int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_suppress, int, 0644);
 module_param(rcu_cpu_stall_timeout, int, 0644);
 
+static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
+static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
+
+module_param(jiffies_till_first_fqs, ulong, 0644);
+module_param(jiffies_till_next_fqs, ulong, 0644);
+
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(int cpu);
@@ -1193,6 +1199,7 @@  static void rcu_gp_cleanup(struct rcu_state *rsp)
 static int rcu_gp_kthread(void *arg)
 {
 	int fqs_state;
+	unsigned long j;
 	int ret;
 	struct rcu_state *rsp = arg;
 	struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1213,14 +1220,18 @@  static int rcu_gp_kthread(void *arg)
 
 		/* Handle quiescent-state forcing. */
 		fqs_state = RCU_SAVE_DYNTICK;
+		j = jiffies_till_first_fqs;
+		if (j > HZ) {
+			j = HZ;
+			jiffies_till_first_fqs = HZ;
+		}
 		for (;;) {
-			rsp->jiffies_force_qs = jiffies +
-						RCU_JIFFIES_TILL_FORCE_QS;
+			rsp->jiffies_force_qs = jiffies + j;
 			ret = wait_event_interruptible_timeout(rsp->gp_wq,
 					(rsp->gp_flags & RCU_GP_FLAG_FQS) ||
 					(!ACCESS_ONCE(rnp->qsmask) &&
 					 !rcu_preempt_blocked_readers_cgp(rnp)),
-					RCU_JIFFIES_TILL_FORCE_QS);
+					j);
 			/* If grace period done, leave loop. */
 			if (!ACCESS_ONCE(rnp->qsmask) &&
 			    !rcu_preempt_blocked_readers_cgp(rnp))
@@ -1234,6 +1245,14 @@  static int rcu_gp_kthread(void *arg)
 				cond_resched();
 				flush_signals(current);
 			}
+			j = jiffies_till_next_fqs;
+			if (j > HZ) {
+				j = HZ;
+				jiffies_till_next_fqs = HZ;
+			} else if (j < 1) {
+				j = 1;
+				jiffies_till_next_fqs = 1;
+			}
 		}
 
 		/* Handle grace-period end. */