mbox series

[bpf-next,v2,0/5] sock_map: clean up and refactor code for BPF_SK_SKB_VERDICT

Message ID 20210210022136.146528-1-xiyou.wangcong@gmail.com
Headers show
Series sock_map: clean up and refactor code for BPF_SK_SKB_VERDICT | expand

Message

Cong Wang Feb. 10, 2021, 2:21 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

This patchset is the first series of patches separated out from
the original large patchset, to make reviews easier. This patchset
does not add any new feature but merely cleans up the existing
sockmap and skmsg code and refactors it, to prepare for the patches
followed up. This passed all BPF selftests.

The original whole patchset is available on github:
https://github.com/congwang/linux/tree/sockmap

and this patchset is also available on github:
https://github.com/congwang/linux/tree/sockmap1

---
v2: split the original patchset
    compute data_end with bpf_convert_data_end_access()
    get rid of psock->bpf_running
    reduce the scope of CONFIG_BPF_STREAM_PARSER
    do not add CONFIG_BPF_SOCK_MAP

Cong Wang (5):
  bpf: clean up sockmap related Kconfigs
  skmsg: get rid of struct sk_psock_parser
  bpf: compute data_end dynamically with JIT code
  skmsg: use skb ext instead of TCP_SKB_CB
  sock_map: rename skb_parser and skb_verdict

 include/linux/bpf.h                           |  20 +-
 include/linux/bpf_types.h                     |   2 -
 include/linux/skbuff.h                        |   3 +
 include/linux/skmsg.h                         |  78 ++++++--
 include/net/tcp.h                             |  29 +--
 include/net/udp.h                             |   4 +-
 init/Kconfig                                  |   1 +
 net/Kconfig                                   |   7 +-
 net/core/Makefile                             |   2 +-
 net/core/filter.c                             |  46 +++--
 net/core/skbuff.c                             |   7 +
 net/core/skmsg.c                              | 187 +++++++++---------
 net/core/sock_map.c                           |  74 +++----
 net/ipv4/Makefile                             |   2 +-
 net/ipv4/tcp_bpf.c                            |   2 -
 .../selftests/bpf/prog_tests/sockmap_listen.c |   8 +-
 .../selftests/bpf/progs/test_sockmap_listen.c |   4 +-
 17 files changed, 252 insertions(+), 224 deletions(-)

Comments

Lorenz Bauer Feb. 12, 2021, 10:55 a.m. UTC | #1
On Wed, 10 Feb 2021 at 02:22, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>

> From: Cong Wang <cong.wang@bytedance.com>

>

> Currently, we compute ->data_end with a compile-time constant

> offset of skb. But as Jakub pointed out, we can actually compute

> it in eBPF JIT code at run-time, so that we can competely get

> rid of ->data_end. This is similar to skb_shinfo(skb) computation

> in bpf_convert_shinfo_access().

>

> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>

> Cc: John Fastabend <john.fastabend@gmail.com>

> Cc: Daniel Borkmann <daniel@iogearbox.net>

> Cc: Lorenz Bauer <lmb@cloudflare.com>

> Signed-off-by: Cong Wang <cong.wang@bytedance.com>


...

> @@ -9520,6 +9510,29 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,

>         return insn - insn_buf;

>  }

>

> +static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,

> +                                                   struct bpf_insn *insn)


Is it worth adding a reference to this function in skb_headlen(),
since we're basically open coding that function here?

> +{

> +       /* si->dst_reg = skb->data */

> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),

> +                             si->dst_reg, si->src_reg,

> +                             offsetof(struct sk_buff, data));

> +       /* AX = skb->len */

> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),

> +                             BPF_REG_AX, si->src_reg,

> +                             offsetof(struct sk_buff, len));

> +       /* si->dst_reg = skb->data + skb->len */

> +       *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);

> +       /* AX = skb->data_len */

> +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),

> +                             BPF_REG_AX, si->src_reg,

> +                             offsetof(struct sk_buff, data_len));

> +       /* si->dst_reg = skb->data + skb->len - skb->data_len */

> +       *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);

> +

> +       return insn;

> +}

> +

>  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,

>                                      const struct bpf_insn *si,

>                                      struct bpf_insn *insn_buf,

> @@ -9530,12 +9543,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,

>

>         switch (si->off) {

>         case offsetof(struct __sk_buff, data_end):

> -               off  = si->off;

> -               off -= offsetof(struct __sk_buff, data_end);

> -               off += offsetof(struct sk_buff, cb);

> -               off += offsetof(struct tcp_skb_cb, bpf.data_end);

> -               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,

> -                                     si->src_reg, off);

> +               insn = bpf_convert_data_end_access(si, insn);


This generates a new warning:

../net/core/filter.c: In function ‘sk_skb_convert_ctx_access’:
../net/core/filter.c:9542:6: warning: unused variable ‘off’ [-Wunused-variable]
 9542 |  int off;
      |      ^~~

--
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com
Lorenz Bauer Feb. 12, 2021, 10:58 a.m. UTC | #2
On Wed, 10 Feb 2021 at 02:22, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>

> From: Cong Wang <cong.wang@bytedance.com>

>

> Currently TCP_SKB_CB() is hard-coded in skmsg code, it certainly

> does not work for any other non-TCP protocols. We can move them to

> skb ext instead of playing with skb cb, which is harder to make

> correct.


Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>


-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com
Cong Wang Feb. 12, 2021, 7:01 p.m. UTC | #3
On Fri, Feb 12, 2021 at 2:56 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>

> On Wed, 10 Feb 2021 at 02:22, Cong Wang <xiyou.wangcong@gmail.com> wrote:

> >

> > From: Cong Wang <cong.wang@bytedance.com>

> >

> > Currently, we compute ->data_end with a compile-time constant

> > offset of skb. But as Jakub pointed out, we can actually compute

> > it in eBPF JIT code at run-time, so that we can competely get

> > rid of ->data_end. This is similar to skb_shinfo(skb) computation

> > in bpf_convert_shinfo_access().

> >

> > Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>

> > Cc: John Fastabend <john.fastabend@gmail.com>

> > Cc: Daniel Borkmann <daniel@iogearbox.net>

> > Cc: Lorenz Bauer <lmb@cloudflare.com>

> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>

>

> ...

>

> > @@ -9520,6 +9510,29 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,

> >         return insn - insn_buf;

> >  }

> >

> > +static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,

> > +                                                   struct bpf_insn *insn)

>

> Is it worth adding a reference to this function in skb_headlen(),

> since we're basically open coding that function here?


I do not mind adding a comment for this.

>

> > +{

> > +       /* si->dst_reg = skb->data */

> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),

> > +                             si->dst_reg, si->src_reg,

> > +                             offsetof(struct sk_buff, data));

> > +       /* AX = skb->len */

> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),

> > +                             BPF_REG_AX, si->src_reg,

> > +                             offsetof(struct sk_buff, len));

> > +       /* si->dst_reg = skb->data + skb->len */

> > +       *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);

> > +       /* AX = skb->data_len */

> > +       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),

> > +                             BPF_REG_AX, si->src_reg,

> > +                             offsetof(struct sk_buff, data_len));

> > +       /* si->dst_reg = skb->data + skb->len - skb->data_len */

> > +       *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);

> > +

> > +       return insn;

> > +}

> > +

> >  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,

> >                                      const struct bpf_insn *si,

> >                                      struct bpf_insn *insn_buf,

> > @@ -9530,12 +9543,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,

> >

> >         switch (si->off) {

> >         case offsetof(struct __sk_buff, data_end):

> > -               off  = si->off;

> > -               off -= offsetof(struct __sk_buff, data_end);

> > -               off += offsetof(struct sk_buff, cb);

> > -               off += offsetof(struct tcp_skb_cb, bpf.data_end);

> > -               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,

> > -                                     si->src_reg, off);

> > +               insn = bpf_convert_data_end_access(si, insn);

>

> This generates a new warning:

>

> ../net/core/filter.c: In function ‘sk_skb_convert_ctx_access’:

> ../net/core/filter.c:9542:6: warning: unused variable ‘off’ [-Wunused-variable]

>  9542 |  int off;

>       |      ^~~


Good catch!

Apparently neither my compiler nor kernel-test-bot's catches this.

Thanks.