diff mbox series

[bpf-next,v2,4/4] devmap: Exclude XDP broadcast to master device

Message ID 20210624091843.5151-5-joamaki@gmail.com
State New
Headers show
Series XDP bonding support | expand

Commit Message

Jussi Maki June 24, 2021, 9:18 a.m. UTC
From: Jussi Maki <joamaki@gmail.com>

If the ingress device is bond slave, do not broadcast back
through it or the bond master.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 kernel/bpf/devmap.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

Comments

Jay Vosburgh July 1, 2021, 6:12 p.m. UTC | #1
joamaki@gmail.com wrote:

>From: Jussi Maki <joamaki@gmail.com>

>

>If the ingress device is bond slave, do not broadcast back

>through it or the bond master.

>

>Signed-off-by: Jussi Maki <joamaki@gmail.com>

>---

> kernel/bpf/devmap.c | 34 ++++++++++++++++++++++++++++------

> 1 file changed, 28 insertions(+), 6 deletions(-)

>

>diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

>index 2a75e6c2d27d..0864fb28c8b5 100644

>--- a/kernel/bpf/devmap.c

>+++ b/kernel/bpf/devmap.c

>@@ -514,9 +514,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,

> }

> 

> static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,

>-			 int exclude_ifindex)

>+			 int exclude_ifindex, int exclude_ifindex_master)

> {

>-	if (!obj || obj->dev->ifindex == exclude_ifindex ||

>+	if (!obj ||

>+	    obj->dev->ifindex == exclude_ifindex ||

>+	    obj->dev->ifindex == exclude_ifindex_master ||

> 	    !obj->dev->netdev_ops->ndo_xdp_xmit)

> 		return false;

> 

>@@ -546,12 +548,19 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,

> {

> 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);

> 	int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;

>+	int exclude_ifindex_master = 0;

> 	struct bpf_dtab_netdev *dst, *last_dst = NULL;

> 	struct hlist_head *head;

> 	struct xdp_frame *xdpf;

> 	unsigned int i;

> 	int err;

> 

>+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {

>+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);

>+

>+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;

>+	}

>+


	Will the above logic do what is intended if the device stacking
isn't a simple bond -> ethX arrangement?  I.e., bond -> VLAN.?? -> ethX
or perhaps even bondA -> VLAN.?? -> bondB -> ethX ?

	-J

> 	xdpf = xdp_convert_buff_to_frame(xdp);

> 	if (unlikely(!xdpf))

> 		return -EOVERFLOW;

>@@ -559,7 +568,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,

> 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {

> 		for (i = 0; i < map->max_entries; i++) {

> 			dst = READ_ONCE(dtab->netdev_map[i]);

>-			if (!is_valid_dst(dst, xdp, exclude_ifindex))

>+			if (!is_valid_dst(dst, xdp, exclude_ifindex, exclude_ifindex_master))

> 				continue;

> 

> 			/* we only need n-1 clones; last_dst enqueued below */

>@@ -579,7 +588,9 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,

> 			head = dev_map_index_hash(dtab, i);

> 			hlist_for_each_entry_rcu(dst, head, index_hlist,

> 						 lockdep_is_held(&dtab->index_lock)) {

>-				if (!is_valid_dst(dst, xdp, exclude_ifindex))

>+				if (!is_valid_dst(dst, xdp,

>+						  exclude_ifindex,

>+						  exclude_ifindex_master))

> 					continue;

> 

> 				/* we only need n-1 clones; last_dst enqueued below */

>@@ -646,16 +657,25 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,

> {

> 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);

> 	int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;

>+	int exclude_ifindex_master = 0;

> 	struct bpf_dtab_netdev *dst, *last_dst = NULL;

> 	struct hlist_head *head;

> 	struct hlist_node *next;

> 	unsigned int i;

> 	int err;

> 

>+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {

>+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev);

>+

>+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;

>+	}

>+

> 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {

> 		for (i = 0; i < map->max_entries; i++) {

> 			dst = READ_ONCE(dtab->netdev_map[i]);

>-			if (!dst || dst->dev->ifindex == exclude_ifindex)

>+			if (!dst ||

>+			    dst->dev->ifindex == exclude_ifindex ||

>+			    dst->dev->ifindex == exclude_ifindex_master)

> 				continue;

> 

> 			/* we only need n-1 clones; last_dst enqueued below */

>@@ -674,7 +694,9 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,

> 		for (i = 0; i < dtab->n_buckets; i++) {

> 			head = dev_map_index_hash(dtab, i);

> 			hlist_for_each_entry_safe(dst, next, head, index_hlist) {

>-				if (!dst || dst->dev->ifindex == exclude_ifindex)

>+				if (!dst ||

>+				    dst->dev->ifindex == exclude_ifindex ||

>+				    dst->dev->ifindex == exclude_ifindex_master)

> 					continue;

> 

> 				/* we only need n-1 clones; last_dst enqueued below */

>-- 

>2.27.0

>


---
	-Jay Vosburgh, jay.vosburgh@canonical.com
Jussi Maki July 5, 2021, 11:44 a.m. UTC | #2
On Thu, Jul 1, 2021 at 9:12 PM Jay Vosburgh <jay.vosburgh@canonical.com> wrote:
> >+      if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {

> >+              struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);

> >+

> >+              exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;

> >+      }

> >+

>

>         Will the above logic do what is intended if the device stacking

> isn't a simple bond -> ethX arrangement?  I.e., bond -> VLAN.?? -> ethX

> or perhaps even bondA -> VLAN.?? -> bondB -> ethX ?


Good point. "bond -> VLAN -> eth" isn't an issue currently as vlan
devices do not support XDP. "bondA -> bondB -> ethX" however would be
supported, so I think it makes sense to change the code to collect all
upper devices and exclude them. I'll try to follow up with an updated
patch for this soon.
diff mbox series

Patch

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2a75e6c2d27d..0864fb28c8b5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -514,9 +514,11 @@  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 }
 
 static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
-			 int exclude_ifindex)
+			 int exclude_ifindex, int exclude_ifindex_master)
 {
-	if (!obj || obj->dev->ifindex == exclude_ifindex ||
+	if (!obj ||
+	    obj->dev->ifindex == exclude_ifindex ||
+	    obj->dev->ifindex == exclude_ifindex_master ||
 	    !obj->dev->netdev_ops->ndo_xdp_xmit)
 		return false;
 
@@ -546,12 +548,19 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
+	int exclude_ifindex_master = 0;
 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
 	struct hlist_head *head;
 	struct xdp_frame *xdpf;
 	unsigned int i;
 	int err;
 
+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev_rx);
+
+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
+	}
+
 	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -559,7 +568,7 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		for (i = 0; i < map->max_entries; i++) {
 			dst = READ_ONCE(dtab->netdev_map[i]);
-			if (!is_valid_dst(dst, xdp, exclude_ifindex))
+			if (!is_valid_dst(dst, xdp, exclude_ifindex, exclude_ifindex_master))
 				continue;
 
 			/* we only need n-1 clones; last_dst enqueued below */
@@ -579,7 +588,9 @@  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
 			head = dev_map_index_hash(dtab, i);
 			hlist_for_each_entry_rcu(dst, head, index_hlist,
 						 lockdep_is_held(&dtab->index_lock)) {
-				if (!is_valid_dst(dst, xdp, exclude_ifindex))
+				if (!is_valid_dst(dst, xdp,
+						  exclude_ifindex,
+						  exclude_ifindex_master))
 					continue;
 
 				/* we only need n-1 clones; last_dst enqueued below */
@@ -646,16 +657,25 @@  int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
+	int exclude_ifindex_master = 0;
 	struct bpf_dtab_netdev *dst, *last_dst = NULL;
 	struct hlist_head *head;
 	struct hlist_node *next;
 	unsigned int i;
 	int err;
 
+	if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+		struct net_device *master = netdev_master_upper_dev_get_rcu(dev);
+
+		exclude_ifindex_master = (master && exclude_ingress) ? master->ifindex : 0;
+	}
+
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
 		for (i = 0; i < map->max_entries; i++) {
 			dst = READ_ONCE(dtab->netdev_map[i]);
-			if (!dst || dst->dev->ifindex == exclude_ifindex)
+			if (!dst ||
+			    dst->dev->ifindex == exclude_ifindex ||
+			    dst->dev->ifindex == exclude_ifindex_master)
 				continue;
 
 			/* we only need n-1 clones; last_dst enqueued below */
@@ -674,7 +694,9 @@  int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 		for (i = 0; i < dtab->n_buckets; i++) {
 			head = dev_map_index_hash(dtab, i);
 			hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-				if (!dst || dst->dev->ifindex == exclude_ifindex)
+				if (!dst ||
+				    dst->dev->ifindex == exclude_ifindex ||
+				    dst->dev->ifindex == exclude_ifindex_master)
 					continue;
 
 				/* we only need n-1 clones; last_dst enqueued below */