diff mbox series

[bpf-next,08/19] udp: implement ->read_sock() for sockmap

Message ID 20210203041636.38555-9-xiyou.wangcong@gmail.com
State Superseded
Headers show
Series [bpf-next,01/19] bpf: rename BPF_STREAM_PARSER to BPF_SOCK_MAP | expand

Commit Message

Cong Wang Feb. 3, 2021, 4:16 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/udp.h  |  2 ++
 net/ipv4/af_inet.c |  1 +
 net/ipv4/udp.c     | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

Comments

Lorenz Bauer Feb. 8, 2021, 9:48 a.m. UTC | #1
On Wed, 3 Feb 2021 at 04:17, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>

> From: Cong Wang <cong.wang@bytedance.com>

>

> Cc: John Fastabend <john.fastabend@gmail.com>

> Cc: Daniel Borkmann <daniel@iogearbox.net>

> Cc: Jakub Sitnicki <jakub@cloudflare.com>

> Cc: Lorenz Bauer <lmb@cloudflare.com>

> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> ---

>  include/net/udp.h  |  2 ++

>  net/ipv4/af_inet.c |  1 +

>  net/ipv4/udp.c     | 34 ++++++++++++++++++++++++++++++++++

>  3 files changed, 37 insertions(+)

>

> diff --git a/include/net/udp.h b/include/net/udp.h

> index 13f9354dbd3e..b6b75cabf4e4 100644

> --- a/include/net/udp.h

> +++ b/include/net/udp.h

> @@ -327,6 +327,8 @@ struct sock *__udp6_lib_lookup(struct net *net,

>                                struct sk_buff *skb);

>  struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,

>                                  __be16 sport, __be16 dport);

> +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> +                 sk_read_actor_t recv_actor);

>

>  /* UDP uses skb->dev_scratch to cache as much information as possible and avoid

>   * possibly multiple cache miss on dequeue()

> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

> index d184d9379a92..4a4c6d3d2786 100644

> --- a/net/ipv4/af_inet.c

> +++ b/net/ipv4/af_inet.c

> @@ -1072,6 +1072,7 @@ const struct proto_ops inet_dgram_ops = {

>         .getsockopt        = sock_common_getsockopt,

>         .sendmsg           = inet_sendmsg,

>         .sendmsg_locked    = udp_sendmsg_locked,

> +       .read_sock         = udp_read_sock,

>         .recvmsg           = inet_recvmsg,

>         .mmap              = sock_no_mmap,

>         .sendpage          = inet_sendpage,

> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

> index 635e1e8b2968..6dffbcec0b51 100644

> --- a/net/ipv4/udp.c

> +++ b/net/ipv4/udp.c

> @@ -1792,6 +1792,40 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,

>  }

>  EXPORT_SYMBOL(__skb_recv_udp);

>

> +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> +                 sk_read_actor_t recv_actor)

> +{

> +       struct sk_buff *skb;

> +       int copied = 0, err;

> +

> +       while (1) {

> +               int offset = 0;

> +

> +               skb = __skb_recv_udp(sk, 0, 1, &offset, &err);


Seems like err isn't used outside of the loop, is that on purpose? If
yes, how about moving the declaration of err to be with offset. Maybe
rename to ignored?

> +               if (!skb)

> +                       break;

> +               if (offset < skb->len) {

> +                       int used;

> +                       size_t len;

> +

> +                       len = skb->len - offset;

> +                       used = recv_actor(desc, skb, offset, len);

> +                       if (used <= 0) {

> +                               if (!copied)

> +                                       copied = used;

> +                               break;

> +                       } else if (used <= len) {


In which case can used be > len?


> +                               copied += used;

> +                               offset += used;

> +                       }

> +               }

> +               if (!desc->count)

> +                       break;

> +       }

> +

> +       return copied;

> +}

> +

>  /*

>   *     This should be easy, if there is something there we

>   *     return it, otherwise we block.

> --

> 2.25.1

>



--
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com
Cong Wang Feb. 9, 2021, 1:35 a.m. UTC | #2
On Mon, Feb 8, 2021 at 1:48 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>

> On Wed, 3 Feb 2021 at 04:17, Cong Wang <xiyou.wangcong@gmail.com> wrote:

> >

> > From: Cong Wang <cong.wang@bytedance.com>

> >

> > Cc: John Fastabend <john.fastabend@gmail.com>

> > Cc: Daniel Borkmann <daniel@iogearbox.net>

> > Cc: Jakub Sitnicki <jakub@cloudflare.com>

> > Cc: Lorenz Bauer <lmb@cloudflare.com>

> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> > ---

> >  include/net/udp.h  |  2 ++

> >  net/ipv4/af_inet.c |  1 +

> >  net/ipv4/udp.c     | 34 ++++++++++++++++++++++++++++++++++

> >  3 files changed, 37 insertions(+)

> >

> > diff --git a/include/net/udp.h b/include/net/udp.h

> > index 13f9354dbd3e..b6b75cabf4e4 100644

> > --- a/include/net/udp.h

> > +++ b/include/net/udp.h

> > @@ -327,6 +327,8 @@ struct sock *__udp6_lib_lookup(struct net *net,

> >                                struct sk_buff *skb);

> >  struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,

> >                                  __be16 sport, __be16 dport);

> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +                 sk_read_actor_t recv_actor);

> >

> >  /* UDP uses skb->dev_scratch to cache as much information as possible and avoid

> >   * possibly multiple cache miss on dequeue()

> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

> > index d184d9379a92..4a4c6d3d2786 100644

> > --- a/net/ipv4/af_inet.c

> > +++ b/net/ipv4/af_inet.c

> > @@ -1072,6 +1072,7 @@ const struct proto_ops inet_dgram_ops = {

> >         .getsockopt        = sock_common_getsockopt,

> >         .sendmsg           = inet_sendmsg,

> >         .sendmsg_locked    = udp_sendmsg_locked,

> > +       .read_sock         = udp_read_sock,

> >         .recvmsg           = inet_recvmsg,

> >         .mmap              = sock_no_mmap,

> >         .sendpage          = inet_sendpage,

> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

> > index 635e1e8b2968..6dffbcec0b51 100644

> > --- a/net/ipv4/udp.c

> > +++ b/net/ipv4/udp.c

> > @@ -1792,6 +1792,40 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,

> >  }

> >  EXPORT_SYMBOL(__skb_recv_udp);

> >

> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +                 sk_read_actor_t recv_actor)

> > +{

> > +       struct sk_buff *skb;

> > +       int copied = 0, err;

> > +

> > +       while (1) {

> > +               int offset = 0;

> > +

> > +               skb = __skb_recv_udp(sk, 0, 1, &offset, &err);

>

> Seems like err isn't used outside of the loop, is that on purpose? If

> yes, how about moving the declaration of err to be with offset. Maybe

> rename to ignored?


It should be moved inside the loop.

>

> > +               if (!skb)

> > +                       break;

> > +               if (offset < skb->len) {

> > +                       int used;

> > +                       size_t len;

> > +

> > +                       len = skb->len - offset;

> > +                       used = recv_actor(desc, skb, offset, len);

> > +                       if (used <= 0) {

> > +                               if (!copied)

> > +                                       copied = used;

> > +                               break;

> > +                       } else if (used <= len) {

>

> In which case can used be > len?


I think in splice() case it could return a larger value than 'len', but
UDP does not support splice() even after this patchset. I can change
it to 'else', or just leave it as it is, in case we will add splice() support in
the future.

Thanks.
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index 13f9354dbd3e..b6b75cabf4e4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -327,6 +327,8 @@  struct sock *__udp6_lib_lookup(struct net *net,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d184d9379a92..4a4c6d3d2786 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1072,6 +1072,7 @@  const struct proto_ops inet_dgram_ops = {
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
 	.sendmsg_locked    = udp_sendmsg_locked,
+	.read_sock	   = udp_read_sock,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 635e1e8b2968..6dffbcec0b51 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1792,6 +1792,40 @@  struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+	struct sk_buff *skb;
+	int copied = 0, err;
+
+	while (1) {
+		int offset = 0;
+
+		skb = __skb_recv_udp(sk, 0, 1, &offset, &err);
+		if (!skb)
+			break;
+		if (offset < skb->len) {
+			int used;
+			size_t len;
+
+			len = skb->len - offset;
+			used = recv_actor(desc, skb, offset, len);
+			if (used <= 0) {
+				if (!copied)
+					copied = used;
+				break;
+			} else if (used <= len) {
+				copied += used;
+				offset += used;
+			}
+		}
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.