diff mbox series

[bpf,v2] bpf: check BPF_F_ADJ_ROOM_FIXED_GSO when upgrading mss in 6 to 4

Message ID 1620714998-120657-1-git-send-email-dseok.yi@samsung.com
State New
Headers show
Series [bpf,v2] bpf: check BPF_F_ADJ_ROOM_FIXED_GSO when upgrading mss in 6 to 4 | expand

Commit Message

Dongseok Yi May 11, 2021, 6:36 a.m. UTC
In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the
coalesced packet payload can be > MSS, but < MSS + 20.
bpf_skb_proto_6_to_4 will increase the MSS and it can be > the payload
length. After then tcp_gso_segment checks for the payload length if it
is <= MSS. The condition is causing the packet to be dropped.

tcp_gso_segment():
        [...]
        mss = skb_shinfo(skb)->gso_size;
        if (unlikely(skb->len <= mss))
                goto out;
        [...]

Allow to increase MSS when BPF_F_ADJ_ROOM_FIXED_GSO is not set.

Fixes: 6578171a7ff0 (bpf: add bpf_skb_change_proto helper)
Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
---
 net/core/filter.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

v2:
per Willem de Bruijn request,
checked the flag instead of a generic approach.

Comments

Willem de Bruijn May 11, 2021, 5:42 p.m. UTC | #1
On Tue, May 11, 2021 at 2:51 AM Dongseok Yi <dseok.yi@samsung.com> wrote:
>
> In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the
> coalesced packet payload can be > MSS, but < MSS + 20.
> bpf_skb_proto_6_to_4 will increase the MSS and it can be > the payload
> length. After then tcp_gso_segment checks for the payload length if it
> is <= MSS. The condition is causing the packet to be dropped.
>
> tcp_gso_segment():
>         [...]
>         mss = skb_shinfo(skb)->gso_size;
>         if (unlikely(skb->len <= mss))
>                 goto out;
>         [...]
>
> Allow to increase MSS when BPF_F_ADJ_ROOM_FIXED_GSO is not set.
>
> Fixes: 6578171a7ff0 (bpf: add bpf_skb_change_proto helper)
> Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
>
> ---

Thanks. Note that this feature does not preclude the alternatives
discussed, of converting the packet to non-TSO (by clearing gso_size)
or optionally modifying MSS (but that should get okay from TCP
experts).

I would target this for bpf-next and drop the Fixes. But that is
admittedly debatable.

>  net/core/filter.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> v2:
> per Willem de Bruijn request,
> checked the flag instead of a generic approach.
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index cae56d0..a98b28d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3276,7 +3276,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
>         return 0;
>  }
>
> -static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
> +static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
>  {
>         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
>         u32 off = skb_mac_header_len(skb);
> @@ -3305,7 +3305,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
>                 }
>
>                 /* Due to IPv4 header, MSS can be upgraded. */
> -               skb_increase_gso_size(shinfo, len_diff);
> +               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> +                       skb_increase_gso_size(shinfo, len_diff);
>                 /* Header must be checked, and gso_segs recomputed. */
>                 shinfo->gso_type |= SKB_GSO_DODGY;
>                 shinfo->gso_segs = 0;
> @@ -3317,7 +3318,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
>         return 0;
>  }
>
> -static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
> +static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
>  {
>         __be16 from_proto = skb->protocol;
>
> @@ -3327,7 +3328,7 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
>
>         if (from_proto == htons(ETH_P_IPV6) &&
>               to_proto == htons(ETH_P_IP))
> -               return bpf_skb_proto_6_to_4(skb);
> +               return bpf_skb_proto_6_to_4(skb, flags);
>
>         return -ENOTSUPP;
>  }
> @@ -3337,7 +3338,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
>  {
>         int ret;
>
> -       if (unlikely(flags))
> +       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
>                 return -EINVAL;

Once allowing this flag, please immediately support it for both
bpf_skb_proto_6_to_4 and bpf_skb_4_to_6.

We cannot do that later if we ignore the second case now.


>         /* General idea is that this helper does the basic groundwork
> @@ -3357,7 +3358,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
>          * that. For offloads, we mark packet as dodgy, so that headers
>          * need to be verified first.
>          */
> -       ret = bpf_skb_proto_xlat(skb, proto);
> +       ret = bpf_skb_proto_xlat(skb, proto, flags);
>         bpf_compute_data_pointers(skb);
>         return ret;
>  }
> --
> 2.7.4
>
Dongseok Yi May 12, 2021, 6:56 a.m. UTC | #2
On Tue, May 11, 2021 at 01:42:46PM -0400, Willem de Bruijn wrote:
> On Tue, May 11, 2021 at 2:51 AM Dongseok Yi <dseok.yi@samsung.com> wrote:

> >

> > In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the

> > coalesced packet payload can be > MSS, but < MSS + 20.

> > bpf_skb_proto_6_to_4 will increase the MSS and it can be > the payload

> > length. After then tcp_gso_segment checks for the payload length if it

> > is <= MSS. The condition is causing the packet to be dropped.

> >

> > tcp_gso_segment():

> >         [...]

> >         mss = skb_shinfo(skb)->gso_size;

> >         if (unlikely(skb->len <= mss))

> >                 goto out;

> >         [...]

> >

> > Allow to increase MSS when BPF_F_ADJ_ROOM_FIXED_GSO is not set.

> >

> > Fixes: 6578171a7ff0 (bpf: add bpf_skb_change_proto helper)

> > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>

> >

> > ---

> 

> Thanks. Note that this feature does not preclude the alternatives

> discussed, of converting the packet to non-TSO (by clearing gso_size)

> or optionally modifying MSS (but that should get okay from TCP

> experts).

> 

> I would target this for bpf-next and drop the Fixes. But that is

> admittedly debatable.


No problem. We can make a better decision under bpf-next.

> 

> >  net/core/filter.c | 13 +++++++------

> >  1 file changed, 7 insertions(+), 6 deletions(-)

> >

> > v2:

> > per Willem de Bruijn request,

> > checked the flag instead of a generic approach.

> >

> > diff --git a/net/core/filter.c b/net/core/filter.c

> > index cae56d0..a98b28d 100644

> > --- a/net/core/filter.c

> > +++ b/net/core/filter.c

> > @@ -3276,7 +3276,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)

> >         return 0;

> >  }

> >

> > -static int bpf_skb_proto_6_to_4(struct sk_buff *skb)

> > +static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)

> >  {

> >         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);

> >         u32 off = skb_mac_header_len(skb);

> > @@ -3305,7 +3305,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)

> >                 }

> >

> >                 /* Due to IPv4 header, MSS can be upgraded. */

> > -               skb_increase_gso_size(shinfo, len_diff);

> > +               if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))

> > +                       skb_increase_gso_size(shinfo, len_diff);

> >                 /* Header must be checked, and gso_segs recomputed. */

> >                 shinfo->gso_type |= SKB_GSO_DODGY;

> >                 shinfo->gso_segs = 0;

> > @@ -3317,7 +3318,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)

> >         return 0;

> >  }

> >

> > -static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)

> > +static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)

> >  {

> >         __be16 from_proto = skb->protocol;

> >

> > @@ -3327,7 +3328,7 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)

> >

> >         if (from_proto == htons(ETH_P_IPV6) &&

> >               to_proto == htons(ETH_P_IP))

> > -               return bpf_skb_proto_6_to_4(skb);

> > +               return bpf_skb_proto_6_to_4(skb, flags);

> >

> >         return -ENOTSUPP;

> >  }

> > @@ -3337,7 +3338,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,

> >  {

> >         int ret;

> >

> > -       if (unlikely(flags))

> > +       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))

> >                 return -EINVAL;

> 

> Once allowing this flag, please immediately support it for both

> bpf_skb_proto_6_to_4 and bpf_skb_4_to_6.

> 

> We cannot do that later if we ignore the second case now.


I will make v3 for both 6_to_4 and 4_to_6.

> 

> 

> >         /* General idea is that this helper does the basic groundwork

> > @@ -3357,7 +3358,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,

> >          * that. For offloads, we mark packet as dodgy, so that headers

> >          * need to be verified first.

> >          */

> > -       ret = bpf_skb_proto_xlat(skb, proto);

> > +       ret = bpf_skb_proto_xlat(skb, proto, flags);

> >         bpf_compute_data_pointers(skb);

> >         return ret;

> >  }

> > --

> > 2.7.4

> >
Willem de Bruijn May 12, 2021, 2:13 p.m. UTC | #3
On Wed, May 12, 2021 at 3:41 AM Dongseok Yi <dseok.yi@samsung.com> wrote:
>

> In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the

> coalesced packet payload can be > MSS, but < MSS + 20.

> bpf_skb_proto_6_to_4 will upgrade the MSS and it can be > the payload

> length. After then tcp_gso_segment checks for the payload length if it

> is <= MSS. The condition is causing the packet to be dropped.

>

> tcp_gso_segment():

>         [...]

>         mss = skb_shinfo(skb)->gso_size;

>         if (unlikely(skb->len <= mss))

>                 goto out;

>         [...]

>

> Allow to upgrade/downgrade MSS only when BPF_F_ADJ_ROOM_FIXED_GSO is

> not set.

>

> Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>


Acked-by: Willem de Bruijn <willemb@google.com>
diff mbox series

Patch

diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d0..a98b28d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3276,7 +3276,7 @@  static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	return 0;
 }
 
-static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
 	u32 off = skb_mac_header_len(skb);
@@ -3305,7 +3305,8 @@  static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		}
 
 		/* Due to IPv4 header, MSS can be upgraded. */
-		skb_increase_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3317,7 +3318,7 @@  static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	return 0;
 }
 
-static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
 {
 	__be16 from_proto = skb->protocol;
 
@@ -3327,7 +3328,7 @@  static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
 
 	if (from_proto == htons(ETH_P_IPV6) &&
 	      to_proto == htons(ETH_P_IP))
-		return bpf_skb_proto_6_to_4(skb);
+		return bpf_skb_proto_6_to_4(skb, flags);
 
 	return -ENOTSUPP;
 }
@@ -3337,7 +3338,7 @@  BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 {
 	int ret;
 
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
 		return -EINVAL;
 
 	/* General idea is that this helper does the basic groundwork
@@ -3357,7 +3358,7 @@  BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 	 * that. For offloads, we mark packet as dodgy, so that headers
 	 * need to be verified first.
 	 */
-	ret = bpf_skb_proto_xlat(skb, proto);
+	ret = bpf_skb_proto_xlat(skb, proto, flags);
 	bpf_compute_data_pointers(skb);
 	return ret;
 }