Message ID | 20191120152255.18928-1-anders.roxell@linaro.org |
---|---|
State | New |
Headers | show |
Series | [v2] net: ipmr: fix suspicious RCU warning | expand |
On 11/20/19 7:22 AM, Anders Roxell wrote: > When booting an arm64 allmodconfig kernel on linux-next next-20191115 > The following "suspicious RCU usage" warning shows up. This bug seems > to have been introduced by commit f0ad0860d01e ("ipv4: ipmr: support > multiple tables") in 2010, but the warning was added only in this past > year by commit 28875945ba98 ("rcu: Add support for consolidated-RCU > reader checking"). > > [ 32.496021][ T1] ============================= > [ 32.497616][ T1] WARNING: suspicious RCU usage > [ 32.499614][ T1] 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2 Not tainted > [ 32.502018][ T1] ----------------------------- > [ 32.503976][ T1] net/ipv4/ipmr.c:136 RCU-list traversed in non-reader section!! > [ 32.506746][ T1] > [ 32.506746][ T1] other info that might help us debug this: > [ 32.506746][ T1] > [ 32.509794][ T1] > [ 32.509794][ T1] rcu_scheduler_active = 2, debug_locks = 1 > [ 32.512661][ T1] 1 lock held by swapper/0/1: > [ 32.514169][ T1] #0: ffffa000150dd678 (pernet_ops_rwsem){+.+.}, at: register_pernet_subsys+0x24/0x50 > [ 32.517621][ T1] > [ 32.517621][ T1] stack backtrace: > [ 32.519930][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.4.0-rc6-next-20191108-00003-gf74bac957b5c-dirty #2 > [ 32.523063][ T1] Hardware name: linux,dummy-virt (DT) > [ 32.524787][ T1] Call trace: > [ 32.525946][ T1] dump_backtrace+0x0/0x2d0 > [ 32.527433][ T1] show_stack+0x20/0x30 > [ 32.528811][ T1] dump_stack+0x204/0x2ac > [ 32.530258][ T1] lockdep_rcu_suspicious+0xf4/0x108 > [ 32.531993][ T1] ipmr_get_table+0xc8/0x170 > [ 32.533496][ T1] ipmr_new_table+0x48/0xa0 > [ 32.535002][ T1] ipmr_net_init+0xe8/0x258 > [ 32.536465][ T1] ops_init+0x280/0x2d8 > [ 32.537876][ T1] register_pernet_operations+0x210/0x420 > [ 32.539707][ T1] register_pernet_subsys+0x30/0x50 > [ 32.541372][ T1] ip_mr_init+0x54/0x180 > [ 32.542785][ T1] inet_init+0x25c/0x3e8 > [ 32.544186][ T1] do_one_initcall+0x4c0/0xad8 > [ 32.545757][ T1] kernel_init_freeable+0x3e0/0x500 > [ 32.547443][ T1] kernel_init+0x14/0x1f0 > [ 32.548875][ T1] ret_from_fork+0x10/0x18 > > This commit therefore holds RTNL mutex around the problematic code path, > which is function ipmr_rules_init() in ipmr_net_init(). This commit > also adds a lockdep_rtnl_is_held() check to the ipmr_for_each_table() > macro. > > Suggested-by: David Miller <davem@davemloft.net> > Reviewed-by: Paul E. McKenney <paulmck@kernel.org> > Signed-off-by: Anders Roxell <anders.roxell@linaro.org> > --- > net/ipv4/ipmr.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > index 6e68def66822..53dff9a0e60a 100644 > --- a/net/ipv4/ipmr.c > +++ b/net/ipv4/ipmr.c > @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t); > > #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES > #define ipmr_for_each_table(mrt, net) \ > - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) > + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ > + lockdep_rtnl_is_held()) > > static struct mr_table *ipmr_mr_table_iter(struct net *net, > struct mr_table *mrt) > @@ -3086,7 +3087,9 @@ static int __net_init ipmr_net_init(struct net *net) > if (err) > goto ipmr_notifier_fail; > > + rtnl_lock(); > err = ipmr_rules_init(net); > + rtnl_unlock(); > if (err < 0) > goto ipmr_rules_fail; Hmmm... this might have performance impact for creation of a new netns Since the 'struct net' is not yet fully initialized (thus published/visible), should we really have to grab RTNL (again) only to silence a warning ? What about the following alternative ? diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __ro_after_init; -static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, @@ -245,7 +245,7 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); - mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true); if (IS_ERR(mrt)) { err = PTR_ERR(mrt); goto err1; @@ -322,7 +322,7 @@ static int __net_init ipmr_rules_init(struct net *net) { struct mr_table *mrt; - mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true); if (IS_ERR(mrt)) return PTR_ERR(mrt); net->ipv4.mrt = mrt; @@ -392,7 +392,7 @@ static struct mr_table_ops ipmr_mr_table_ops = { .cmparg_any = &ipmr_mr_table_ops_cmparg_any, }; -static struct mr_table *ipmr_new_table(struct net *net, u32 id) +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init) { struct mr_table *mrt; @@ -400,9 +400,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) if (id != RT_TABLE_DEFAULT && id >= 1000000000) return ERR_PTR(-EINVAL); - mrt = ipmr_get_table(net, id); - if (mrt) - return mrt; + if (!init) { + mrt = ipmr_get_table(net, id); + if (mrt) + return mrt; + } return mr_table_alloc(net, id, &ipmr_mr_table_ops, ipmr_expire_process, ipmr_new_table_set); @@ -1547,7 +1549,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - mrt = ipmr_new_table(net, uval); + mrt = ipmr_new_table(net, uval, false); if (IS_ERR(mrt)) ret = PTR_ERR(mrt); else
On Thu, 21 Nov 2019 at 08:15, Anders Roxell <anders.roxell@linaro.org> wrote: > > On Wed, 20 Nov 2019 at 18:45, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > > > > > On 11/20/19 7:22 AM, Anders Roxell wrote: [snippet] > > > + rtnl_lock(); > > > err = ipmr_rules_init(net); > > > + rtnl_unlock(); > > > if (err < 0) > > > goto ipmr_rules_fail; > > > > Hmmm... this might have performance impact for creation of a new netns > > > > Since the 'struct net' is not yet fully initialized (thus published/visible), > > should we really have to grab RTNL (again) only to silence a warning ? > > > > What about the following alternative ? > > I tried what you suggested, unfortunately, I still got the warning. I was wrong, so if I also add "lockdep_rtnl_is_held()" to the "ipmr_for_each_table()" macro it works. > > > [ 43.253850][ T1] ============================= > [ 43.255473][ T1] WARNING: suspicious RCU usage > [ 43.259068][ T1] > 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 Not tainted > [ 43.263078][ T1] ----------------------------- > [ 43.265134][ T1] net/ipv4/ipmr.c:1759 RCU-list traversed in > non-reader section!! > [ 43.267587][ T1] > [ 43.267587][ T1] other info that might help us debug this: > [ 43.267587][ T1] > [ 43.271129][ T1] > [ 43.271129][ T1] rcu_scheduler_active = 2, debug_locks = 1 > [ 43.274021][ T1] 2 locks held by swapper/0/1: > [ 43.275532][ T1] #0: ffff000065abeaa0 (&dev->mutex){....}, at: > __device_driver_lock+0xa0/0xb0 > [ 43.278930][ T1] #1: ffffa000153017f0 (rtnl_mutex){+.+.}, at: > rtnl_lock+0x1c/0x28 > [ 43.282023][ T1] > [ 43.282023][ T1] stack backtrace: > [ 43.283921][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted > 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 > [ 43.287152][ T1] Hardware name: linux,dummy-virt (DT) > [ 43.288920][ T1] Call trace: > [ 43.290057][ T1] dump_backtrace+0x0/0x2d0 > [ 43.291535][ T1] show_stack+0x20/0x30 > [ 43.292967][ T1] dump_stack+0x204/0x2ac > [ 43.294423][ T1] lockdep_rcu_suspicious+0xf4/0x108 > [ 43.296163][ T1] ipmr_device_event+0x100/0x1e8 > [ 43.297812][ T1] notifier_call_chain+0x100/0x1a8 > [ 43.299486][ T1] raw_notifier_call_chain+0x38/0x48 > [ 43.301248][ T1] call_netdevice_notifiers_info+0x128/0x148 > [ 43.303158][ T1] rollback_registered_many+0x684/0xb48 > [ 43.304963][ T1] rollback_registered+0xd8/0x150 > [ 43.306595][ T1] unregister_netdevice_queue+0x194/0x1b8 > [ 43.308406][ T1] unregister_netdev+0x24/0x38 > [ 43.310012][ T1] virtnet_remove+0x44/0x78 > [ 43.311519][ T1] virtio_dev_remove+0x5c/0xc0 > [ 43.313114][ T1] really_probe+0x508/0x920 > [ 43.314635][ T1] driver_probe_device+0x164/0x230 > [ 43.316337][ T1] device_driver_attach+0x8c/0xc0 > [ 43.318024][ T1] __driver_attach+0x1e0/0x1f8 > [ 43.319584][ T1] bus_for_each_dev+0xf0/0x188 > [ 43.321169][ T1] driver_attach+0x34/0x40 > [ 43.322645][ T1] bus_add_driver+0x204/0x3c8 > [ 43.324202][ T1] driver_register+0x160/0x1f8 > [ 43.325788][ T1] register_virtio_driver+0x7c/0x88 > [ 43.327480][ T1] virtio_net_driver_init+0xa8/0xf4 > [ 43.329196][ T1] do_one_initcall+0x4c0/0xad8 > [ 43.330767][ T1] kernel_init_freeable+0x3e0/0x500 > [ 43.332444][ T1] kernel_init+0x14/0x1f0 > [ 43.333901][ T1] ret_from_fork+0x10/0x18 > > > Cheers, > Anders > > > > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > > index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644 > > --- a/net/ipv4/ipmr.c > > +++ b/net/ipv4/ipmr.c > > @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); > > > > static struct kmem_cache *mrt_cachep __ro_after_init; > > > > -static struct mr_table *ipmr_new_table(struct net *net, u32 id); > > +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init); > > static void ipmr_free_table(struct mr_table *mrt); > > static void ip_mr_forward(struct net *net, struct mr_table *mrt, @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES #define ipmr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ + lockdep_rtnl_is_held()) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) Cheers, Anders > > static void ip_mr_forward(struct net *net, struct mr_table *mrt, > > @@ -245,7 +245,7 @@ static int __net_init ipmr_rules_init(struct net *net) > > > > INIT_LIST_HEAD(&net->ipv4.mr_tables); > > > > - mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); > > + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true); > > if (IS_ERR(mrt)) { > > err = PTR_ERR(mrt); > > goto err1; > > @@ -322,7 +322,7 @@ static int __net_init ipmr_rules_init(struct net *net) > > { > > struct mr_table *mrt; > > > > - mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); > > + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT, true); > > if (IS_ERR(mrt)) > > return PTR_ERR(mrt); > > net->ipv4.mrt = mrt; > > @@ -392,7 +392,7 @@ static struct mr_table_ops ipmr_mr_table_ops = { > > .cmparg_any = &ipmr_mr_table_ops_cmparg_any, > > }; > > > > -static struct mr_table *ipmr_new_table(struct net *net, u32 id) > > +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init) > > { > > struct mr_table *mrt; > > > > @@ -400,9 +400,11 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) > > if (id != RT_TABLE_DEFAULT && id >= 1000000000) > > return ERR_PTR(-EINVAL); > > > > - mrt = ipmr_get_table(net, id); > > - if (mrt) > > - return mrt; > > + if (!init) { > > + mrt = ipmr_get_table(net, id); > > + if (mrt) > > + return mrt; > > + } > > > > return mr_table_alloc(net, id, &ipmr_mr_table_ops, > > ipmr_expire_process, ipmr_new_table_set); > > @@ -1547,7 +1549,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, > > if (sk == rtnl_dereference(mrt->mroute_sk)) { > > ret = -EBUSY; > > } else { > > - mrt = ipmr_new_table(net, uval); > > + mrt = ipmr_new_table(net, uval, false); > > if (IS_ERR(mrt)) > > ret = PTR_ERR(mrt); > > else > > > >
On 11/21/19 2:17 AM, Anders Roxell wrote: > On Thu, 21 Nov 2019 at 08:15, Anders Roxell <anders.roxell@linaro.org> wrote: >> >> On Wed, 20 Nov 2019 at 18:45, Eric Dumazet <eric.dumazet@gmail.com> wrote: >>> >>> >>> >>> On 11/20/19 7:22 AM, Anders Roxell wrote: > > [snippet] > >>>> + rtnl_lock(); >>>> err = ipmr_rules_init(net); >>>> + rtnl_unlock(); >>>> if (err < 0) >>>> goto ipmr_rules_fail; >>> >>> Hmmm... this might have performance impact for creation of a new netns >>> >>> Since the 'struct net' is not yet fully initialized (thus published/visible), >>> should we really have to grab RTNL (again) only to silence a warning ? >>> >>> What about the following alternative ? >> >> I tried what you suggested, unfortunately, I still got the warning. > > I was wrong, so if I also add "lockdep_rtnl_is_held()" to the > "ipmr_for_each_table()" macro it works. > >> >> >> [ 43.253850][ T1] ============================= >> [ 43.255473][ T1] WARNING: suspicious RCU usage >> [ 43.259068][ T1] >> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 Not tainted >> [ 43.263078][ T1] ----------------------------- >> [ 43.265134][ T1] net/ipv4/ipmr.c:1759 RCU-list traversed in >> non-reader section!! >> [ 43.267587][ T1] >> [ 43.267587][ T1] other info that might help us debug this: >> [ 43.267587][ T1] >> [ 43.271129][ T1] >> [ 43.271129][ T1] rcu_scheduler_active = 2, debug_locks = 1 >> [ 43.274021][ T1] 2 locks held by swapper/0/1: >> [ 43.275532][ T1] #0: ffff000065abeaa0 (&dev->mutex){....}, at: >> __device_driver_lock+0xa0/0xb0 >> [ 43.278930][ T1] #1: ffffa000153017f0 (rtnl_mutex){+.+.}, at: >> rtnl_lock+0x1c/0x28 >> [ 43.282023][ T1] >> [ 43.282023][ T1] stack backtrace: >> [ 43.283921][ T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted >> 5.4.0-rc8-next-20191120-00003-g3aa7c2a8649e-dirty #6 >> [ 43.287152][ T1] Hardware name: linux,dummy-virt (DT) >> [ 43.288920][ T1] Call trace: >> [ 43.290057][ T1] dump_backtrace+0x0/0x2d0 >> [ 43.291535][ T1] show_stack+0x20/0x30 >> [ 43.292967][ T1] dump_stack+0x204/0x2ac >> [ 43.294423][ T1] lockdep_rcu_suspicious+0xf4/0x108 >> [ 43.296163][ T1] ipmr_device_event+0x100/0x1e8 >> [ 43.297812][ T1] notifier_call_chain+0x100/0x1a8 >> [ 43.299486][ T1] raw_notifier_call_chain+0x38/0x48 >> [ 43.301248][ T1] call_netdevice_notifiers_info+0x128/0x148 >> [ 43.303158][ T1] rollback_registered_many+0x684/0xb48 >> [ 43.304963][ T1] rollback_registered+0xd8/0x150 >> [ 43.306595][ T1] unregister_netdevice_queue+0x194/0x1b8 >> [ 43.308406][ T1] unregister_netdev+0x24/0x38 >> [ 43.310012][ T1] virtnet_remove+0x44/0x78 >> [ 43.311519][ T1] virtio_dev_remove+0x5c/0xc0 >> [ 43.313114][ T1] really_probe+0x508/0x920 >> [ 43.314635][ T1] driver_probe_device+0x164/0x230 >> [ 43.316337][ T1] device_driver_attach+0x8c/0xc0 >> [ 43.318024][ T1] __driver_attach+0x1e0/0x1f8 >> [ 43.319584][ T1] bus_for_each_dev+0xf0/0x188 >> [ 43.321169][ T1] driver_attach+0x34/0x40 >> [ 43.322645][ T1] bus_add_driver+0x204/0x3c8 >> [ 43.324202][ T1] driver_register+0x160/0x1f8 >> [ 43.325788][ T1] register_virtio_driver+0x7c/0x88 >> [ 43.327480][ T1] virtio_net_driver_init+0xa8/0xf4 >> [ 43.329196][ T1] do_one_initcall+0x4c0/0xad8 >> [ 43.330767][ T1] kernel_init_freeable+0x3e0/0x500 >> [ 43.332444][ T1] kernel_init+0x14/0x1f0 >> [ 43.333901][ T1] ret_from_fork+0x10/0x18 >> >> >> Cheers, >> Anders >> >>> >>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c >>> index 6e68def66822f47fc08d94eddd32a4bd4f9fdfb0..b6dcdce08f1d82c83756a319623e24ae0174092c 100644 >>> --- a/net/ipv4/ipmr.c >>> +++ b/net/ipv4/ipmr.c >>> @@ -94,7 +94,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); >>> >>> static struct kmem_cache *mrt_cachep __ro_after_init; >>> >>> -static struct mr_table *ipmr_new_table(struct net *net, u32 id); >>> +static struct mr_table *ipmr_new_table(struct net *net, u32 id, bool init); >>> static void ipmr_free_table(struct mr_table *mrt); >>> > > static void ip_mr_forward(struct net *net, struct mr_table *mrt, > @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t); > > #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES > #define ipmr_for_each_table(mrt, net) \ > - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) > + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ > + lockdep_rtnl_is_held()) > static struct mr_table *ipmr_mr_table_iter(struct net *net, > struct mr_table *mrt) > > > Cheers, > Anders That is great, I guess we can submit the two patches in a series.
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6e68def66822..53dff9a0e60a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES #define ipmr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ + lockdep_rtnl_is_held()) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) @@ -3086,7 +3087,9 @@ static int __net_init ipmr_net_init(struct net *net) if (err) goto ipmr_notifier_fail; + rtnl_lock(); err = ipmr_rules_init(net); + rtnl_unlock(); if (err < 0) goto ipmr_rules_fail;