diff mbox series

[net,v2] bnxt_en: Fix and improve .ndo_features_check().

Message ID 1620433768-31048-1-git-send-email-michael.chan@broadcom.com
State New
Headers show
Series [net,v2] bnxt_en: Fix and improve .ndo_features_check(). | expand

Commit Message

Michael Chan May 8, 2021, 12:29 a.m. UTC
Jakub Kicinski pointed out that we need to handle ipv6 extension headers
and to explicitly check for supported tunnel types in
.ndo_features_check().

For ipv6 extension headers, the hardware supports up to 2 ext. headers
and each must be <= 64 bytes.  For tunneled packets, the supported
packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

v2: Add missing step to check inner ipv6 header for UDP and GRE tunnels.
    Check TCP/UDP next header after skipping ipv6 ext headers for
    non-tunneled packets and for inner ipv6.
    (Both feedback from Alexander Duyck)

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 125 +++++++++++++++++++---
 1 file changed, 108 insertions(+), 17 deletions(-)

Comments

Alexander Duyck May 10, 2021, 3:41 p.m. UTC | #1
On Fri, May 7, 2021 at 5:29 PM Michael Chan <michael.chan@broadcom.com> wrote:
>

> Jakub Kicinski pointed out that we need to handle ipv6 extension headers

> and to explicitly check for supported tunnel types in

> .ndo_features_check().

>

> For ipv6 extension headers, the hardware supports up to 2 ext. headers

> and each must be <= 64 bytes.  For tunneled packets, the supported

> packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

>

> v2: Add missing step to check inner ipv6 header for UDP and GRE tunnels.

>     Check TCP/UDP next header after skipping ipv6 ext headers for

>     non-tunneled packets and for inner ipv6.

>     (Both feedback from Alexander Duyck)

>

> Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>

> Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>

> Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")

> Signed-off-by: Michael Chan <michael.chan@broadcom.com>

> ---

>  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 125 +++++++++++++++++++---

>  1 file changed, 108 insertions(+), 17 deletions(-)

>

> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> index 39ac9e2f5118..8e4670e37140 100644

> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> @@ -10785,37 +10785,128 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)

>         return rc;

>  }

>

> +static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,

> +                             u8 *nextproto)

> +{

> +       struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);

> +       int hdr_count = 0;

> +       u8 nexthdr;

> +       int start;

> +

> +       /* Check that there are at most 2 IPv6 extension headers, no

> +        * fragment header, and each is <= 64 bytes.

> +        */

> +       start = nw_off + sizeof(*ip6h);

> +       nexthdr = ip6h->nexthdr;

> +       while (ipv6_ext_hdr(nexthdr)) {

> +               struct ipv6_opt_hdr _hdr, *hp;

> +               int hdrlen;

> +

> +               if (hdr_count >= 3 || nexthdr == NEXTHDR_NONE ||

> +                   nexthdr == NEXTHDR_FRAGMENT)

> +                       return false;

> +               hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);

> +               if (!hp)

> +                       return false;

> +               if (nexthdr == NEXTHDR_AUTH)

> +                       hdrlen = ipv6_authlen(hp);

> +               else

> +                       hdrlen = ipv6_optlen(hp);

> +

> +               if (hdrlen > 64)

> +                       return false;

> +               nexthdr = hp->nexthdr;

> +               start += hdrlen;

> +               hdr_count++;

> +       }

> +       /* Only support TCP/UDP unless the caller checks the nextproto. */

> +       if (nextproto) {

> +               *nextproto = nexthdr;

> +               return true;

> +       }

> +       if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP)

> +               return true;

> +       return false;

> +}

> +

> +/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */

> +static bool bnxt_udp_tunl_check(struct bnxt *bp, struct sk_buff *skb)

> +{

> +       struct udphdr *uh = udp_hdr(skb);

> +       __be16 udp_port = uh->dest;

> +

> +       if (udp_port != bp->vxlan_port && udp_port != bp->nge_port)

> +               return false;

> +       if (skb->inner_protocol_type == ENCAP_TYPE_ETHER) {

> +               struct ethhdr *eh = inner_eth_hdr(skb);

> +

> +               switch (eh->h_proto) {

> +               case htons(ETH_P_IP):

> +                       return true;

> +               case htons(ETH_P_IPV6):

> +                       return bnxt_exthdr_check(bp, skb,

> +                                                skb_inner_network_offset(skb),

> +                                                NULL);

> +               }

> +       }

> +       return false;

> +}

> +

> +static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)

> +{

> +       switch (l4_proto) {

> +       case IPPROTO_UDP:

> +               return bnxt_udp_tunl_check(bp, skb);

> +       case IPPROTO_IPIP:

> +               return true;

> +       case IPPROTO_GRE: {

> +               switch (skb->inner_protocol) {

> +               default:

> +                       return false;

> +               case htons(ETH_P_IP):

> +                       return true;

> +               case htons(ETH_P_IPV6):

> +                       fallthrough;

> +               }

> +       }

> +       case IPPROTO_IPV6:

> +               /* Check ext headers of inner ipv6 */

> +               return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),

> +                                        NULL);

> +       }

> +       return false;

> +}

> +

>  static netdev_features_t bnxt_features_check(struct sk_buff *skb,

>                                              struct net_device *dev,

>                                              netdev_features_t features)

>  {

> -       struct bnxt *bp;

> -       __be16 udp_port;

> +       struct bnxt *bp = netdev_priv(dev);

>         u8 l4_proto = 0;

>

>         features = vlan_features_check(skb, features);

> -       if (!skb->encapsulation)

> -               return features;

> -

>         switch (vlan_get_protocol(skb)) {

>         case htons(ETH_P_IP):

> +               if (!skb->encapsulation)

> +                       return features;

>                 l4_proto = ip_hdr(skb)->protocol;

> -               break;

> +               if (!bnxt_tunl_check(bp, skb, l4_proto))

> +                       goto disable_offload;


What is the point of this label, couldn't you just use a break since
this is in a switch statement? Or you could flip the logic and have it
return features if you successfully determined the offload is doable
and then default to adding a break at the end of the case section.

> +               return features;

>         case htons(ETH_P_IPV6):

> -               l4_proto = ipv6_hdr(skb)->nexthdr;

> +               if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),

> +                                      &l4_proto))

> +                       goto disable_offload;


Same here. You could probably just use a break statement.

Also you might save yourself a step here by passing an actual pointer
instead of the address of l4_proto here. Then what you could do is in
the skb->encapsulation case you initialize the pointer to the address
of l4_proto, and in the non encap case you set the pointer to NULL.
Doing that you can avoid having to repeat the same TCP/UDP check below
and could instead just return features if !pointer ||
bnxt_tunl_check().

> +               if (skb->encapsulation) {

> +                       if (bnxt_tunl_check(bp, skb, l4_proto))

> +                               return features;

> +               } else if (l4_proto == IPPROTO_TCP || l4_proto == IPPROTO_UDP) {

> +                       return features;

> +               }

>                 break;

> -       default:

> -               return features;

>         }

>

> -       if (l4_proto != IPPROTO_UDP)

> -               return features;

> -

> -       bp = netdev_priv(dev);

> -       /* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */

> -       udp_port = udp_hdr(skb)->dest;

> -       if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)

> -               return features;

> +disable_offload:

>         return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);

>  }

>

> --

> 2.18.1

>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 39ac9e2f5118..8e4670e37140 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10785,37 +10785,128 @@  static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
+			      u8 *nextproto)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
+	int hdr_count = 0;
+	u8 nexthdr;
+	int start;
+
+	/* Check that there are at most 2 IPv6 extension headers, no
+	 * fragment header, and each is <= 64 bytes.
+	 */
+	start = nw_off + sizeof(*ip6h);
+	nexthdr = ip6h->nexthdr;
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		int hdrlen;
+
+		if (hdr_count >= 3 || nexthdr == NEXTHDR_NONE ||
+		    nexthdr == NEXTHDR_FRAGMENT)
+			return false;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (!hp)
+			return false;
+		if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = ipv6_authlen(hp);
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		if (hdrlen > 64)
+			return false;
+		nexthdr = hp->nexthdr;
+		start += hdrlen;
+		hdr_count++;
+	}
+	/* Only support TCP/UDP unless the caller checks the nextproto. */
+	if (nextproto) {
+		*nextproto = nexthdr;
+		return true;
+	}
+	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP)
+		return true;
+	return false;
+}
+
+/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+static bool bnxt_udp_tunl_check(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	__be16 udp_port = uh->dest;
+
+	if (udp_port != bp->vxlan_port && udp_port != bp->nge_port)
+		return false;
+	if (skb->inner_protocol_type == ENCAP_TYPE_ETHER) {
+		struct ethhdr *eh = inner_eth_hdr(skb);
+
+		switch (eh->h_proto) {
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			return bnxt_exthdr_check(bp, skb,
+						 skb_inner_network_offset(skb),
+						 NULL);
+		}
+	}
+	return false;
+}
+
+static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)
+{
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		return bnxt_udp_tunl_check(bp, skb);
+	case IPPROTO_IPIP:
+		return true;
+	case IPPROTO_GRE: {
+		switch (skb->inner_protocol) {
+		default:
+			return false;
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			fallthrough;
+		}
+	}
+	case IPPROTO_IPV6:
+		/* Check ext headers of inner ipv6 */
+		return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),
+					 NULL);
+	}
+	return false;
+}
+
 static netdev_features_t bnxt_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
 {
-	struct bnxt *bp;
-	__be16 udp_port;
+	struct bnxt *bp = netdev_priv(dev);
 	u8 l4_proto = 0;
 
 	features = vlan_features_check(skb, features);
-	if (!skb->encapsulation)
-		return features;
-
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
+		if (!skb->encapsulation)
+			return features;
 		l4_proto = ip_hdr(skb)->protocol;
-		break;
+		if (!bnxt_tunl_check(bp, skb, l4_proto))
+			goto disable_offload;
+		return features;
 	case htons(ETH_P_IPV6):
-		l4_proto = ipv6_hdr(skb)->nexthdr;
+		if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),
+				       &l4_proto))
+			goto disable_offload;
+		if (skb->encapsulation) {
+			if (bnxt_tunl_check(bp, skb, l4_proto))
+				return features;
+		} else if (l4_proto == IPPROTO_TCP || l4_proto == IPPROTO_UDP) {
+			return features;
+		}
 		break;
-	default:
-		return features;
 	}
 
-	if (l4_proto != IPPROTO_UDP)
-		return features;
-
-	bp = netdev_priv(dev);
-	/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
-	udp_port = udp_hdr(skb)->dest;
-	if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
-		return features;
+disable_offload:
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }