[net] bnxt_en: Fix and improve .ndo_features_check().

Message ID 1620254099-5270-1-git-send-email-michael.chan@broadcom.com
State New
Headers show
Series
  • [net] bnxt_en: Fix and improve .ndo_features_check().
Related show

Commit Message

Michael Chan May 5, 2021, 10:34 p.m.
Jakub Kicinski pointed out that we need to handle ipv6 extension headers
and to explicitly check for supported tunnel types in
.ndo_features_check().

For ipv6 extension headers, the hardware supports up to 2 ext. headers
and each must be <= 64 bytes.  For tunneled packets, the supported
packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 93 ++++++++++++++++++-----
 1 file changed, 76 insertions(+), 17 deletions(-)

Comments

Alexander Duyck May 6, 2021, 1:01 a.m. | #1
On Wed, May 5, 2021 at 3:43 PM Michael Chan <michael.chan@broadcom.com> wrote:
>

> Jakub Kicinski pointed out that we need to handle ipv6 extension headers

> and to explicitly check for supported tunnel types in

> .ndo_features_check().

>

> For ipv6 extension headers, the hardware supports up to 2 ext. headers

> and each must be <= 64 bytes.  For tunneled packets, the supported

> packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

>

> Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>

> Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>

> Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")

> Signed-off-by: Michael Chan <michael.chan@broadcom.com>

> ---

>  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 93 ++++++++++++++++++-----

>  1 file changed, 76 insertions(+), 17 deletions(-)

>

> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> index 39ac9e2f5118..c489089671fb 100644

> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> @@ -10785,37 +10785,96 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)

>         return rc;

>  }

>

> +/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */

> +static bool bnxt_udp_check(struct bnxt *bp, struct udphdr *uh)

> +{

> +       __be16 udp_port = uh->dest;

> +

> +       return udp_port == bp->vxlan_port || udp_port == bp->nge_port;

> +}

> +

> +static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,

> +                             u8 *nextproto)

> +{

> +       struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);

> +       int hdr_count = 0;

> +       u8 nexthdr;

> +       int start;

> +

> +       /* Check that there are at most 2 IPv6 extension headers, no

> +        * fragment header, and each is <= 64 bytes.

> +        */

> +       start = nw_off + sizeof(*ip6h);

> +       nexthdr = ip6h->nexthdr;

> +       while (ipv6_ext_hdr(nexthdr)) {

> +               struct ipv6_opt_hdr _hdr, *hp;

> +               int hdrlen;

> +

> +               if (hdr_count >= 3 || nexthdr == NEXTHDR_NONE ||

> +                   nexthdr == NEXTHDR_FRAGMENT)

> +                       return false;

> +               hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);

> +               if (!hp)

> +                       return false;

> +               if (nexthdr == NEXTHDR_AUTH)

> +                       hdrlen = ipv6_authlen(hp);

> +               else

> +                       hdrlen = ipv6_optlen(hp);

> +

> +               if (hdrlen > 64)

> +                       return false;

> +               nexthdr = hp->nexthdr;

> +               start += hdrlen;

> +               hdr_count++;

> +       }

> +       if (nextproto)

> +               *nextproto = nexthdr;

> +       return true;

> +}

> +


You should really be validating the nexthdr in all cases. I'm assuming
your offloads are usually for TCP and UDP. You should probably be
validating that you end with that if you are going to advertise the
CSUM and GSO offloads.

> +static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)

> +{

> +       switch (l4_proto) {

> +       case IPPROTO_UDP:

> +               return bnxt_udp_check(bp, udp_hdr(skb));

> +       case IPPROTO_GRE:

> +       case IPPROTO_IPIP:

> +               return true;

> +       case IPPROTO_IPV6:

> +               /* Check ext headers of inner ipv6 */

> +               return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),

> +                                        NULL);

> +       }

> +       return false;

> +}

> +

>  static netdev_features_t bnxt_features_check(struct sk_buff *skb,

>                                              struct net_device *dev,

>                                              netdev_features_t features)

>  {

> -       struct bnxt *bp;

> -       __be16 udp_port;

> +       struct bnxt *bp = netdev_priv(dev);

>         u8 l4_proto = 0;

>

>         features = vlan_features_check(skb, features);

> -       if (!skb->encapsulation)

> -               return features;

> -

>         switch (vlan_get_protocol(skb)) {

>         case htons(ETH_P_IP):

> +               if (!skb->encapsulation)

> +                       return features;

>                 l4_proto = ip_hdr(skb)->protocol;

> -               break;

> +               if (!bnxt_tunl_check(bp, skb, l4_proto))

> +                       goto disable_offload;

> +               return features;

>         case htons(ETH_P_IPV6):

> -               l4_proto = ipv6_hdr(skb)->nexthdr;

> -               break;

> -       default:

> +               if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),

> +                                      &l4_proto))

> +                       goto disable_offload;

> +               if (skb->encapsulation &&

> +                   !bnxt_tunl_check(bp, skb, l4_proto))

> +                       goto disable_offload;

>                 return features;

>         }

>


This still largely falls short of being able to determine if your
hardware can handle offloading the packet or not. It would likely make
much more sense to look at parsing all the way from the L2 up through
the inner-most L4 header in the case of tunnels to verify that you can
support offloading it.

For example if I had a packet that had unsupported inner IPv6
extension headers it doesn't look like it would be caught by this as
you are only checking the outer headers and the UDP port.

If your offload relies on a parser to offload various protocols you
really need to walk through and verify that the provided header is
something that will successfully be recognized by the parser with no
escapes which means having to parse the whole thing yourself.
Michael Chan May 6, 2021, 3:14 p.m. | #2
On Wed, May 5, 2021 at 6:01 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>

> On Wed, May 5, 2021 at 3:43 PM Michael Chan <michael.chan@broadcom.com> wrote:

> > +static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,

> > +                             u8 *nextproto)

> > +{

> > +       struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);

> > +       int hdr_count = 0;

> > +       u8 nexthdr;

> > +       int start;

> > +

> > +       /* Check that there are at most 2 IPv6 extension headers, no

> > +        * fragment header, and each is <= 64 bytes.

> > +        */

> > +       start = nw_off + sizeof(*ip6h);

> > +       nexthdr = ip6h->nexthdr;

> > +       while (ipv6_ext_hdr(nexthdr)) {

> > +               struct ipv6_opt_hdr _hdr, *hp;

> > +               int hdrlen;

> > +

> > +               if (hdr_count >= 3 || nexthdr == NEXTHDR_NONE ||

> > +                   nexthdr == NEXTHDR_FRAGMENT)

> > +                       return false;

> > +               hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);

> > +               if (!hp)

> > +                       return false;

> > +               if (nexthdr == NEXTHDR_AUTH)

> > +                       hdrlen = ipv6_authlen(hp);

> > +               else

> > +                       hdrlen = ipv6_optlen(hp);

> > +

> > +               if (hdrlen > 64)

> > +                       return false;

> > +               nexthdr = hp->nexthdr;

> > +               start += hdrlen;

> > +               hdr_count++;

> > +       }

> > +       if (nextproto)

> > +               *nextproto = nexthdr;

> > +       return true;

> > +}

> > +

>

> You should really be validating the nexthdr in all cases. I'm assuming

> your offloads are usually for TCP and UDP. You should probably be

> validating that you end with that if you are going to advertise the

> CSUM and GSO offloads.


Yes, I agree with you that we should check for TCP/UDP here.

> This still largely falls short of being able to determine if your

> hardware can handle offloading the packet or not. It would likely make

> much more sense to look at parsing all the way from the L2 up through

> the inner-most L4 header in the case of tunnels to verify that you can

> support offloading it.

>

> For example if I had a packet that had unsupported inner IPv6

> extension headers it doesn't look like it would be caught by this as

> you are only checking the outer headers and the UDP port.

>


I missed the inner ipv6 extension header check for the UDP and GRE
cases.  Thanks for the review.

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 39ac9e2f5118..c489089671fb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10785,37 +10785,96 @@  static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+static bool bnxt_udp_check(struct bnxt *bp, struct udphdr *uh)
+{
+	__be16 udp_port = uh->dest;
+
+	return udp_port == bp->vxlan_port || udp_port == bp->nge_port;
+}
+
+static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
+			      u8 *nextproto)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
+	int hdr_count = 0;
+	u8 nexthdr;
+	int start;
+
+	/* Check that there are at most 2 IPv6 extension headers, no
+	 * fragment header, and each is <= 64 bytes.
+	 */
+	start = nw_off + sizeof(*ip6h);
+	nexthdr = ip6h->nexthdr;
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		int hdrlen;
+
+		if (hdr_count >= 3 || nexthdr == NEXTHDR_NONE ||
+		    nexthdr == NEXTHDR_FRAGMENT)
+			return false;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (!hp)
+			return false;
+		if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = ipv6_authlen(hp);
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		if (hdrlen > 64)
+			return false;
+		nexthdr = hp->nexthdr;
+		start += hdrlen;
+		hdr_count++;
+	}
+	if (nextproto)
+		*nextproto = nexthdr;
+	return true;
+}
+
+static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)
+{
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		return bnxt_udp_check(bp, udp_hdr(skb));
+	case IPPROTO_GRE:
+	case IPPROTO_IPIP:
+		return true;
+	case IPPROTO_IPV6:
+		/* Check ext headers of inner ipv6 */
+		return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),
+					 NULL);
+	}
+	return false;
+}
+
 static netdev_features_t bnxt_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
 {
-	struct bnxt *bp;
-	__be16 udp_port;
+	struct bnxt *bp = netdev_priv(dev);
 	u8 l4_proto = 0;
 
 	features = vlan_features_check(skb, features);
-	if (!skb->encapsulation)
-		return features;
-
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
+		if (!skb->encapsulation)
+			return features;
 		l4_proto = ip_hdr(skb)->protocol;
-		break;
+		if (!bnxt_tunl_check(bp, skb, l4_proto))
+			goto disable_offload;
+		return features;
 	case htons(ETH_P_IPV6):
-		l4_proto = ipv6_hdr(skb)->nexthdr;
-		break;
-	default:
+		if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),
+				       &l4_proto))
+			goto disable_offload;
+		if (skb->encapsulation &&
+		    !bnxt_tunl_check(bp, skb, l4_proto))
+			goto disable_offload;
 		return features;
 	}
 
-	if (l4_proto != IPPROTO_UDP)
-		return features;
-
-	bp = netdev_priv(dev);
-	/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
-	udp_port = udp_hdr(skb)->dest;
-	if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
-		return features;
+disable_offload:
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }