[tip/core/rcu,01/15] rcu: Control RCU_FANOUT_LEAF from boot-time parameter

Message ID 1339794370-28119-1-git-send-email-paulmck@linux.vnet.ibm.com
State New
Headers show

Commit Message

Paul E. McKenney June 15, 2012, 9:05 p.m.
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>

Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
than a fixed constant makes it easier for people to decrease cache-miss
overhead for large systems, it is of little help for people who must
run a single pre-built kernel binary.

This commit therefore allows the value of RCU_FANOUT_LEAF to be
increased (but not decreased!) via a boot-time parameter named
rcutree.rcu_fanout_leaf.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt |    4 ++
 kernel/rcutree.c                    |   97 ++++++++++++++++++++++++++++++-----
 kernel/rcutree.h                    |   23 +++++----
 kernel/rcutree_plugin.h             |    4 +-
 kernel/rcutree_trace.c              |    2 +-
 5 files changed, 104 insertions(+), 26 deletions(-)

Comments

Josh Triplett June 15, 2012, 9:43 p.m. | #1
On Fri, Jun 15, 2012 at 02:05:56PM -0700, Paul E. McKenney wrote:
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> 
> Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
> than a fixed constant makes it easier for people to decrease cache-miss
> overhead for large systems, it is of little help for people who must
> run a single pre-built kernel binary.
> 
> This commit therefore allows the value of RCU_FANOUT_LEAF to be
> increased (but not decreased!) via a boot-time parameter named
> rcutree.rcu_fanout_leaf.
> 
> Reported-by: Mike Galbraith <efault@gmx.de>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
>  Documentation/kernel-parameters.txt |    4 ++
>  kernel/rcutree.c                    |   97 ++++++++++++++++++++++++++++++-----
>  kernel/rcutree.h                    |   23 +++++----
>  kernel/rcutree_plugin.h             |    4 +-
>  kernel/rcutree_trace.c              |    2 +-
>  5 files changed, 104 insertions(+), 26 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index c45513d..88bd3ef 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2367,6 +2367,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  			Set maximum number of finished RCU callbacks to process
>  			in one batch.
>  
> +	rcutree.fanout_leaf=	[KNL,BOOT]
> +			Set maximum number of finished RCU callbacks to process
> +			in one batch.

Copy-paste problem.

>  	rcutree.qhimark=	[KNL,BOOT]
>  			Set threshold of queued
>  			RCU callbacks over which batch limiting is disabled.
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 0da7b88..a151184 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -60,17 +60,10 @@
>  
>  /* Data structures. */
>  
> -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
> +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];

I assume that the requirement to only increase the fanout and never
decrease it comes from the desire to not increase the sizes of all of
these arrays to MAX_RCU_LVLS?

> +/*
> + * Compute the rcu_node tree geometry from kernel parameters.  This cannot
> + * replace the definitions in rcutree.h because those are needed to size
> + * the ->node array in the rcu_state structure.
> + */
> +static void __init rcu_init_geometry(void)
> +{
> +	int i;
> +	int j;
> +	int n = NR_CPUS;
> +	int rcu_capacity[MAX_RCU_LVLS + 1];
> +
> +	/* If the compile-time values are accurate, just leave. */
> +	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
> +		return;
> +
> +	/*
> +	 * Compute number of nodes that can be handled an rcu_node tree
> +	 * with the given number of levels.  Setting rcu_capacity[0] makes
> +	 * some of the arithmetic easier.
> +	 */
> +	rcu_capacity[0] = 1;
> +	rcu_capacity[1] = rcu_fanout_leaf;
> +	for (i = 2; i <= MAX_RCU_LVLS; i++)
> +		rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
> +
> +	/*
> +	 * The boot-time rcu_fanout_leaf parameter is only permitted
> +	 * to increase the leaf-level fanout, not decrease it.  Of course,
> +	 * the leaf-level fanout cannot exceed the number of bits in
> +	 * the rcu_node masks.  Finally, the tree must be able to accommodate
> +	 * the configured number of CPUs.  Complain and fall back to the
> +	 * compile-timer values if these limits are exceeded.

Typo: s/timer/time/

> +	 */
> +	if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
> +	    rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
> +	    n > rcu_capacity[4]) {

4 seems like a magic number here; did you mean MAX_RCU_LVLS or similar?

Also, why have n as a variable when it never changes?

> --- a/kernel/rcutree.h
> +++ b/kernel/rcutree.h
> @@ -42,28 +42,28 @@
>  #define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
>  
>  #if NR_CPUS <= RCU_FANOUT_1
> -#  define NUM_RCU_LVLS	      1
> +#  define RCU_NUM_LVLS	      1

I assume you made this change to make it easier to track down all the
uses of the macro to change them; however, having now done so, the
change itself seems rather gratuitous, and inconsistent with the other
macros.  Would you consider changing it back?

> +extern int rcu_num_lvls;
> +extern int rcu_num_nodes;

Given the above, you might also want to change these for consistency.

Also, have you checked the various loops using these variables to figure
out if GCC emits less optimal code now that it can't rely on a
compile-time constant?  I don't expect it to make much of a difference,
but it seems worth checking.

You might also consider marking these as __read_mostly, at a minimum.

> diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> index 2411000..e9b44c3 100644
> --- a/kernel/rcutree_plugin.h
> +++ b/kernel/rcutree_plugin.h
> @@ -68,8 +68,10 @@ static void __init rcu_bootup_announce_oddness(void)
>  	printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
>  #endif
>  #if NUM_RCU_LVL_4 != 0
> -	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
> +	printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");

This change seems entirely unrelated to this patch.  Seems simple enough
to split it into a separate one-line patch ("Mark four-level hierarchy
as no longer experimental").

>  #endif
> +	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
> +		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout.\n");

You might consider printing rcu_fanout_leaf in this message.

- Josh Triplett
Paul E. McKenney June 15, 2012, 10:10 p.m. | #2
On Fri, Jun 15, 2012 at 02:43:09PM -0700, Josh Triplett wrote:
> On Fri, Jun 15, 2012 at 02:05:56PM -0700, Paul E. McKenney wrote:
> > From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> > 
> > Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
> > than a fixed constant makes it easier for people to decrease cache-miss
> > overhead for large systems, it is of little help for people who must
> > run a single pre-built kernel binary.
> > 
> > This commit therefore allows the value of RCU_FANOUT_LEAF to be
> > increased (but not decreased!) via a boot-time parameter named
> > rcutree.rcu_fanout_leaf.
> > 
> > Reported-by: Mike Galbraith <efault@gmx.de>
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > ---
> >  Documentation/kernel-parameters.txt |    4 ++
> >  kernel/rcutree.c                    |   97 ++++++++++++++++++++++++++++++-----
> >  kernel/rcutree.h                    |   23 +++++----
> >  kernel/rcutree_plugin.h             |    4 +-
> >  kernel/rcutree_trace.c              |    2 +-
> >  5 files changed, 104 insertions(+), 26 deletions(-)
> > 
> > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> > index c45513d..88bd3ef 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -2367,6 +2367,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> >  			Set maximum number of finished RCU callbacks to process
> >  			in one batch.
> >  
> > +	rcutree.fanout_leaf=	[KNL,BOOT]
> > +			Set maximum number of finished RCU callbacks to process
> > +			in one batch.
> 
> Copy-paste problem.

Indeed!  Good catch!

> >  	rcutree.qhimark=	[KNL,BOOT]
> >  			Set threshold of queued
> >  			RCU callbacks over which batch limiting is disabled.
> > diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> > index 0da7b88..a151184 100644
> > --- a/kernel/rcutree.c
> > +++ b/kernel/rcutree.c
> > @@ -60,17 +60,10 @@
> >  
> >  /* Data structures. */
> >  
> > -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
> > +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
> 
> I assume that the requirement to only increase the fanout and never
> decrease it comes from the desire to not increase the sizes of all of
> these arrays to MAX_RCU_LVLS?

Actually, it is the node[] array in the rcu_state structure that is
of concern.

> > +/*
> > + * Compute the rcu_node tree geometry from kernel parameters.  This cannot
> > + * replace the definitions in rcutree.h because those are needed to size
> > + * the ->node array in the rcu_state structure.
> > + */
> > +static void __init rcu_init_geometry(void)
> > +{
> > +	int i;
> > +	int j;
> > +	int n = NR_CPUS;
> > +	int rcu_capacity[MAX_RCU_LVLS + 1];
> > +
> > +	/* If the compile-time values are accurate, just leave. */
> > +	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
> > +		return;
> > +
> > +	/*
> > +	 * Compute number of nodes that can be handled an rcu_node tree
> > +	 * with the given number of levels.  Setting rcu_capacity[0] makes
> > +	 * some of the arithmetic easier.
> > +	 */
> > +	rcu_capacity[0] = 1;
> > +	rcu_capacity[1] = rcu_fanout_leaf;
> > +	for (i = 2; i <= MAX_RCU_LVLS; i++)
> > +		rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
> > +
> > +	/*
> > +	 * The boot-time rcu_fanout_leaf parameter is only permitted
> > +	 * to increase the leaf-level fanout, not decrease it.  Of course,
> > +	 * the leaf-level fanout cannot exceed the number of bits in
> > +	 * the rcu_node masks.  Finally, the tree must be able to accommodate
> > +	 * the configured number of CPUs.  Complain and fall back to the
> > +	 * compile-timer values if these limits are exceeded.
> 
> Typo: s/timer/time/

Good catch!

> > +	 */
> > +	if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
> > +	    rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
> > +	    n > rcu_capacity[4]) {
> 
> 4 seems like a magic number here; did you mean MAX_RCU_LVLS or similar?

I believe so, good catch!  That would have been painful if another
level of the hierarchy were needed...

> Also, why have n as a variable when it never changes?

Will propagate the value unless I can come up with a good reason.  ;-)

> > --- a/kernel/rcutree.h
> > +++ b/kernel/rcutree.h
> > @@ -42,28 +42,28 @@
> >  #define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
> >  
> >  #if NR_CPUS <= RCU_FANOUT_1
> > -#  define NUM_RCU_LVLS	      1
> > +#  define RCU_NUM_LVLS	      1
> 
> I assume you made this change to make it easier to track down all the
> uses of the macro to change them; however, having now done so, the
> change itself seems rather gratuitous, and inconsistent with the other
> macros.  Would you consider changing it back?
> 
> > +extern int rcu_num_lvls;
> > +extern int rcu_num_nodes;
> 
> Given the above, you might also want to change these for consistency.

This might make sense.  If I run into too many conflicts, I may defer
the change to the join of the topic trees.

> Also, have you checked the various loops using these variables to figure
> out if GCC emits less optimal code now that it can't rely on a
> compile-time constant?  I don't expect it to make much of a difference,
> but it seems worth checking.

I am sure that it generates worse code, but the uses are on slowpaths,
so I really am not worried about it.

> You might also consider marking these as __read_mostly, at a minimum.

This sounds quite sensible, will do.

> > diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> > index 2411000..e9b44c3 100644
> > --- a/kernel/rcutree_plugin.h
> > +++ b/kernel/rcutree_plugin.h
> > @@ -68,8 +68,10 @@ static void __init rcu_bootup_announce_oddness(void)
> >  	printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
> >  #endif
> >  #if NUM_RCU_LVL_4 != 0
> > -	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
> > +	printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
> 
> This change seems entirely unrelated to this patch.  Seems simple enough
> to split it into a separate one-line patch ("Mark four-level hierarchy
> as no longer experimental").

Can't see why not...

> >  #endif
> > +	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
> > +		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout.\n");
> 
> You might consider printing rcu_fanout_leaf in this message.

Ouch!  Good point, will fix.

							Thanx, Paul

Patch

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c45513d..88bd3ef 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2367,6 +2367,10 @@  bytes respectively. Such letter suffixes can also be entirely omitted.
 			Set maximum number of finished RCU callbacks to process
 			in one batch.
 
+	rcutree.fanout_leaf=	[KNL,BOOT]
+			Set maximum number of finished RCU callbacks to process
+			in one batch.
+
 	rcutree.qhimark=	[KNL,BOOT]
 			Set threshold of queued
 			RCU callbacks over which batch limiting is disabled.
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0da7b88..a151184 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,17 +60,10 @@ 
 
 /* Data structures. */
 
-static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
+static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 
 #define RCU_STATE_INITIALIZER(structname) { \
 	.level = { &structname##_state.node[0] }, \
-	.levelcnt = { \
-		NUM_RCU_LVL_0,  /* root of hierarchy. */ \
-		NUM_RCU_LVL_1, \
-		NUM_RCU_LVL_2, \
-		NUM_RCU_LVL_3, \
-		NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
-	}, \
 	.fqs_state = RCU_GP_IDLE, \
 	.gpnum = -300, \
 	.completed = -300, \
@@ -91,6 +84,19 @@  DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 static struct rcu_state *rcu_state;
 
+/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+module_param(rcu_fanout_leaf, int, 0);
+int rcu_num_lvls = RCU_NUM_LVLS;
+static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
+	NUM_RCU_LVL_0,
+	NUM_RCU_LVL_1,
+	NUM_RCU_LVL_2,
+	NUM_RCU_LVL_3,
+	NUM_RCU_LVL_4,
+};
+int rcu_num_nodes = NUM_RCU_NODES; /* Total number of rcu_nodes in use. */
+
 /*
  * The rcu_scheduler_active variable transitions from zero to one just
  * before the first task is spawned.  So when this variable is zero, RCU
@@ -2571,9 +2577,9 @@  static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
 	int i;
 
-	for (i = NUM_RCU_LVLS - 1; i > 0; i--)
+	for (i = rcu_num_lvls - 1; i > 0; i--)
 		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-	rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
+	rsp->levelspread[0] = rcu_fanout_leaf;
 }
 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -2583,7 +2589,7 @@  static void __init rcu_init_levelspread(struct rcu_state *rsp)
 	int i;
 
 	cprv = NR_CPUS;
-	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		ccur = rsp->levelcnt[i];
 		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
 		cprv = ccur;
@@ -2610,13 +2616,15 @@  static void __init rcu_init_one(struct rcu_state *rsp,
 
 	/* Initialize the level-tracking arrays. */
 
-	for (i = 1; i < NUM_RCU_LVLS; i++)
+	for (i = 0; i < rcu_num_lvls; i++)
+		rsp->levelcnt[i] = num_rcu_lvl[i];
+	for (i = 1; i < rcu_num_lvls; i++)
 		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
 	rcu_init_levelspread(rsp);
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
-	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		cpustride *= rsp->levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@@ -2646,7 +2654,7 @@  static void __init rcu_init_one(struct rcu_state *rsp,
 	}
 
 	rsp->rda = rda;
-	rnp = rsp->level[NUM_RCU_LVLS - 1];
+	rnp = rsp->level[rcu_num_lvls - 1];
 	for_each_possible_cpu(i) {
 		while (i > rnp->grphi)
 			rnp++;
@@ -2655,11 +2663,72 @@  static void __init rcu_init_one(struct rcu_state *rsp,
 	}
 }
 
+/*
+ * Compute the rcu_node tree geometry from kernel parameters.  This cannot
+ * replace the definitions in rcutree.h because those are needed to size
+ * the ->node array in the rcu_state structure.
+ */
+static void __init rcu_init_geometry(void)
+{
+	int i;
+	int j;
+	int n = NR_CPUS;
+	int rcu_capacity[MAX_RCU_LVLS + 1];
+
+	/* If the compile-time values are accurate, just leave. */
+	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
+		return;
+
+	/*
+	 * Compute number of nodes that can be handled an rcu_node tree
+	 * with the given number of levels.  Setting rcu_capacity[0] makes
+	 * some of the arithmetic easier.
+	 */
+	rcu_capacity[0] = 1;
+	rcu_capacity[1] = rcu_fanout_leaf;
+	for (i = 2; i <= MAX_RCU_LVLS; i++)
+		rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
+
+	/*
+	 * The boot-time rcu_fanout_leaf parameter is only permitted
+	 * to increase the leaf-level fanout, not decrease it.  Of course,
+	 * the leaf-level fanout cannot exceed the number of bits in
+	 * the rcu_node masks.  Finally, the tree must be able to accommodate
+	 * the configured number of CPUs.  Complain and fall back to the
+	 * compile-timer values if these limits are exceeded.
+	 */
+	if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
+	    rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
+	    n > rcu_capacity[4]) {
+		WARN_ON(1);
+		return;
+	}
+
+	/* Calculate the number of rcu_nodes at each level of the tree. */
+	for (i = 1; i <= MAX_RCU_LVLS; i++)
+		if (n <= rcu_capacity[i]) {
+			for (j = 0; j <= i; j++)
+				num_rcu_lvl[j] =
+					DIV_ROUND_UP(n, rcu_capacity[i - j]);
+			rcu_num_lvls = i;
+			for (j = i + 1; j <= MAX_RCU_LVLS; j++)
+				num_rcu_lvl[j] = 0;
+			break;
+		}
+
+	/* Calculate the total number of rcu_node structures. */
+	rcu_num_nodes = 0;
+	for (i = 0; i <= MAX_RCU_LVLS; i++)
+		rcu_num_nodes += num_rcu_lvl[i];
+	rcu_num_nodes -= n;
+}
+
 void __init rcu_init(void)
 {
 	int cpu;
 
 	rcu_bootup_announce();
+	rcu_init_geometry();
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7f5d138..df3c2c8 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -42,28 +42,28 @@ 
 #define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
 
 #if NR_CPUS <= RCU_FANOUT_1
-#  define NUM_RCU_LVLS	      1
+#  define RCU_NUM_LVLS	      1
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      (NR_CPUS)
 #  define NUM_RCU_LVL_2	      0
 #  define NUM_RCU_LVL_3	      0
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_2
-#  define NUM_RCU_LVLS	      2
+#  define RCU_NUM_LVLS	      2
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_2	      (NR_CPUS)
 #  define NUM_RCU_LVL_3	      0
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_3
-#  define NUM_RCU_LVLS	      3
+#  define RCU_NUM_LVLS	      3
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_3	      (NR_CPUS)
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_4
-#  define NUM_RCU_LVLS	      4
+#  define RCU_NUM_LVLS	      4
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
@@ -76,6 +76,9 @@ 
 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
 
+extern int rcu_num_lvls;
+extern int rcu_num_nodes;
+
 /*
  * Dynticks per-CPU state.
  */
@@ -192,7 +195,7 @@  struct rcu_node {
  */
 #define rcu_for_each_node_breadth_first(rsp, rnp) \
 	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
 /*
  * Do a breadth-first scan of the non-leaf rcu_node structures for the
@@ -201,7 +204,7 @@  struct rcu_node {
  */
 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
 	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
+	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
 
 /*
  * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -210,8 +213,8 @@  struct rcu_node {
  * It is still a leaf node, even if it is also the root node.
  */
 #define rcu_for_each_leaf_node(rsp, rnp) \
-	for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
-	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
 /* Index values for nxttail array in struct rcu_data. */
 #define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
@@ -343,9 +346,9 @@  do {									\
  */
 struct rcu_state {
 	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
-	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
+	struct rcu_node *level[RCU_NUM_LVLS];	/* Hierarchy levels. */
 	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
-	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
+	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 
 	/* The following fields are guarded by the root rcu_node's lock. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 2411000..e9b44c3 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,8 +68,10 @@  static void __init rcu_bootup_announce_oddness(void)
 	printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
 #endif
 #if NUM_RCU_LVL_4 != 0
-	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
+	printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
 #endif
+	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
+		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout.\n");
 }
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d4bc16d..a3556a2 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -278,7 +278,7 @@  static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
 		   rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
-	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
+	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");
 			level = rnp->level;