diff mbox series

[net] act_ife: load meta modules before tcf_idr_check_alloc()

Message ID 20200904021011.27002-1-xiyou.wangcong@gmail.com
State New
Headers show
Series [net] act_ife: load meta modules before tcf_idr_check_alloc() | expand

Commit Message

Cong Wang Sept. 4, 2020, 2:10 a.m. UTC
The following deadlock scenario is triggered by syzbot:

Thread A:				Thread B:
tcf_idr_check_alloc()
...
populate_metalist()
  rtnl_unlock()
					rtnl_lock()
					...
  request_module()			tcf_idr_check_alloc()
  rtnl_lock()

At this point, thread A is waiting for thread B to release RTNL
lock, while thread B is waiting for thread A to commit the IDR
change with tcf_idr_insert() later.

Break this deadlock situation by preloading ife modules earlier,
before tcf_idr_check_alloc(), this is fine because we only need
to load modules we need potentially.

Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com
Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/act_ife.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

Comments

Jakub Kicinski Sept. 5, 2020, 5:14 a.m. UTC | #1
On Thu,  3 Sep 2020 19:10:11 -0700 Cong Wang wrote:
> The following deadlock scenario is triggered by syzbot:
> 
> Thread A:				Thread B:
> tcf_idr_check_alloc()
> ...
> populate_metalist()
>   rtnl_unlock()
> 					rtnl_lock()
> 					...
>   request_module()			tcf_idr_check_alloc()
>   rtnl_lock()
> 
> At this point, thread A is waiting for thread B to release RTNL
> lock, while thread B is waiting for thread A to commit the IDR
> change with tcf_idr_insert() later.
> 
> Break this deadlock situation by preloading ife modules earlier,
> before tcf_idr_check_alloc(), this is fine because we only need
> to load modules we need potentially.
> 
> Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com
> Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Vlad Buslov <vladbu@mellanox.com>

Vlad, it'd have been nice to see your review tag here.

> Cc: Jiri Pirko <jiri@resnulli.us>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

LGTM, applied and queued for stable, thank you Cong!
Vlad Buslov Sept. 7, 2020, 12:12 p.m. UTC | #2
On Fri 04 Sep 2020 at 05:10, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> The following deadlock scenario is triggered by syzbot:
>
> Thread A:				Thread B:
> tcf_idr_check_alloc()
> ...
> populate_metalist()
>   rtnl_unlock()
> 					rtnl_lock()
> 					...
>   request_module()			tcf_idr_check_alloc()
>   rtnl_lock()
>
> At this point, thread A is waiting for thread B to release RTNL
> lock, while thread B is waiting for thread A to commit the IDR
> change with tcf_idr_insert() later.
>
> Break this deadlock situation by preloading ife modules earlier,
> before tcf_idr_check_alloc(), this is fine because we only need
> to load modules we need potentially.
>
> Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com
> Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Vlad Buslov <vladbu@mellanox.com>
> Cc: Jiri Pirko <jiri@resnulli.us>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---

Thanks for fixing this, Cong! I've verified that all tdc ife tests pass
with this patch.

Reviewed-by: Vlad Buslov <vlad@buslov.dev>
Tested-by: Vlad Buslov <vlad@buslov.dev>
diff mbox series

Patch

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c1fcd85719d6..5c568757643b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -436,6 +436,25 @@  static void tcf_ife_cleanup(struct tc_action *a)
 		kfree_rcu(p, rcu);
 }
 
+static int load_metalist(struct nlattr **tb, bool rtnl_held)
+{
+	int i;
+
+	for (i = 1; i < max_metacnt; i++) {
+		if (tb[i]) {
+			void *val = nla_data(tb[i]);
+			int len = nla_len(tb[i]);
+			int rc;
+
+			rc = load_metaops_and_vet(i, val, len, rtnl_held);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 			     bool exists, bool rtnl_held)
 {
@@ -449,10 +468,6 @@  static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 			val = nla_data(tb[i]);
 			len = nla_len(tb[i]);
 
-			rc = load_metaops_and_vet(i, val, len, rtnl_held);
-			if (rc != 0)
-				return rc;
-
 			rc = add_metainfo(ife, i, val, len, exists);
 			if (rc)
 				return rc;
@@ -509,6 +524,21 @@  static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
+	if (tb[TCA_IFE_METALST]) {
+		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
+						  tb[TCA_IFE_METALST], NULL,
+						  NULL);
+		if (err) {
+			kfree(p);
+			return err;
+		}
+		err = load_metalist(tb2, rtnl_held);
+		if (err) {
+			kfree(p);
+			return err;
+		}
+	}
+
 	index = parm->index;
 	err = tcf_idr_check_alloc(tn, &index, a, bind);
 	if (err < 0) {
@@ -570,15 +600,9 @@  static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (tb[TCA_IFE_METALST]) {
-		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
-						  tb[TCA_IFE_METALST], NULL,
-						  NULL);
-		if (err)
-			goto metadata_parse_err;
 		err = populate_metalist(ife, tb2, exists, rtnl_held);
 		if (err)
 			goto metadata_parse_err;
-
 	} else {
 		/* if no passed metadata allow list or passed allow-all
 		 * then here we process by adding as many supported metadatum