diff mbox series

[net-next] net/sched: act_ct: fix lockdep splat in tcf_ct_flow_table_get

Message ID 20200308212748.107539-1-edumazet@google.com
State New
Headers show
Series [net-next] net/sched: act_ct: fix lockdep splat in tcf_ct_flow_table_get | expand

Commit Message

Eric Dumazet March 8, 2020, 9:27 p.m. UTC
Convert zones_lock spinlock to zones_mutex mutex,
and struct (tcf_ct_flow_table)->ref to a refcount,
so that control path can use regular GFP_KERNEL allocations
from standard process context. This is more robust
in case of memory pressure.

The refcount is needed because tcf_ct_flow_table_put() can
be called from RCU callback, thus in BH context.

The issue was spotted by syzbot, as rhashtable_init()
was called with a spinlock held, which is bad since GFP_KERNEL
allocations can sleep.

Note to developers : Please make sure your patches are tested
with CONFIG_DEBUG_ATOMIC_SLEEP=y

BUG: sleeping function called from invalid context at mm/slab.h:565
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9582, name: syz-executor610
2 locks held by syz-executor610/9582:
 #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:72 [inline]
 #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x3f9/0xad0 net/core/rtnetlink.c:5437
 #1: ffffffff8a3961b8 (zones_lock){+...}, at: spin_lock_bh include/linux/spinlock.h:343 [inline]
 #1: ffffffff8a3961b8 (zones_lock){+...}, at: tcf_ct_flow_table_get+0xa3/0x1700 net/sched/act_ct.c:67
Preemption disabled at:
[<0000000000000000>] 0x0
CPU: 0 PID: 9582 Comm: syz-executor610 Not tainted 5.6.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 ___might_sleep.cold+0x1f4/0x23d kernel/sched/core.c:6798
 slab_pre_alloc_hook mm/slab.h:565 [inline]
 slab_alloc_node mm/slab.c:3227 [inline]
 kmem_cache_alloc_node_trace+0x272/0x790 mm/slab.c:3593
 __do_kmalloc_node mm/slab.c:3615 [inline]
 __kmalloc_node+0x38/0x60 mm/slab.c:3623
 kmalloc_node include/linux/slab.h:578 [inline]
 kvmalloc_node+0x61/0xf0 mm/util.c:574
 kvmalloc include/linux/mm.h:645 [inline]
 kvzalloc include/linux/mm.h:653 [inline]
 bucket_table_alloc+0x8b/0x480 lib/rhashtable.c:175
 rhashtable_init+0x3d2/0x750 lib/rhashtable.c:1054
 nf_flow_table_init+0x16d/0x310 net/netfilter/nf_flow_table_core.c:498
 tcf_ct_flow_table_get+0xe33/0x1700 net/sched/act_ct.c:82
 tcf_ct_init+0xba4/0x18a6 net/sched/act_ct.c:1050
 tcf_action_init_1+0x697/0xa20 net/sched/act_api.c:945
 tcf_action_init+0x1e9/0x2f0 net/sched/act_api.c:1001
 tcf_action_add+0xdb/0x370 net/sched/act_api.c:1411
 tc_ctl_action+0x366/0x456 net/sched/act_api.c:1466
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5440
 netlink_rcv_skb+0x15a/0x410 net/netlink/af_netlink.c:2478
 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
 netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2343
 ___sys_sendmsg+0x100/0x170 net/socket.c:2397
 __sys_sendmsg+0xec/0x1b0 net/socket.c:2430
 do_syscall_64+0xf6/0x790 arch/x86/entry/common.c:294
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4403d9
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffd719af218 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004403d9
RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000005 R09: 00000000004002c8
R10: 0000000000000008 R11: 00000000000

Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paul Blakey <paulb@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
---
 net/sched/act_ct.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -35,15 +35,15 @@ 
 
 static struct workqueue_struct *act_ct_wq;
 static struct rhashtable zones_ht;
-static DEFINE_SPINLOCK(zones_lock);
+static DEFINE_MUTEX(zones_mutex);
 
 struct tcf_ct_flow_table {
 	struct rhash_head node; /* In zones tables */
 
 	struct rcu_work rwork;
 	struct nf_flowtable nf_ft;
+	refcount_t ref;
 	u16 zone;
-	u32 ref;
 
 	bool dying;
 };
@@ -64,14 +64,15 @@  static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
 	struct tcf_ct_flow_table *ct_ft;
 	int err = -ENOMEM;
 
-	spin_lock_bh(&zones_lock);
+	mutex_lock(&zones_mutex);
 	ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
-	if (ct_ft)
-		goto take_ref;
+	if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
+		goto out_unlock;
 
-	ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
+	ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
 	if (!ct_ft)
 		goto err_alloc;
+	refcount_set(&ct_ft->ref, 1);
 
 	ct_ft->zone = params->zone;
 	err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
@@ -84,10 +85,9 @@  static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
 		goto err_init;
 
 	__module_get(THIS_MODULE);
-take_ref:
+out_unlock:
 	params->ct_ft = ct_ft;
-	ct_ft->ref++;
-	spin_unlock_bh(&zones_lock);
+	mutex_unlock(&zones_mutex);
 
 	return 0;
 
@@ -96,7 +96,7 @@  static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
 err_insert:
 	kfree(ct_ft);
 err_alloc:
-	spin_unlock_bh(&zones_lock);
+	mutex_unlock(&zones_mutex);
 	return err;
 }
 
@@ -116,13 +116,11 @@  static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
 {
 	struct tcf_ct_flow_table *ct_ft = params->ct_ft;
 
-	spin_lock_bh(&zones_lock);
-	if (--params->ct_ft->ref == 0) {
+	if (refcount_dec_and_test(&params->ct_ft->ref)) {
 		rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
 		INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
 		queue_rcu_work(act_ct_wq, &ct_ft->rwork);
 	}
-	spin_unlock_bh(&zones_lock);
 }
 
 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,