diff mbox series

[bpf-next,v6,08/12] udp: implement ->read_sock() for sockmap

Message ID 20210323003808.16074-9-xiyou.wangcong@gmail.com
State Superseded
Headers show
Series sockmap: introduce BPF_SK_SKB_VERDICT and support UDP | expand

Commit Message

Cong Wang March 23, 2021, 12:38 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

This is similar to tcp_read_sock(), except we do not need
to worry about connections, we just need to retrieve skb
from UDP receive queue.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/net/udp.h   |  2 ++
 net/ipv4/af_inet.c  |  1 +
 net/ipv4/udp.c      | 35 +++++++++++++++++++++++++++++++++++
 net/ipv6/af_inet6.c |  1 +
 4 files changed, 39 insertions(+)

Comments

Cong Wang March 24, 2021, 8:04 p.m. UTC | #1
On Mon, Mar 22, 2021 at 11:31 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>

> On 2021/3/23 8:38, Cong Wang wrote:

> > From: Cong Wang <cong.wang@bytedance.com>

> >

> > This is similar to tcp_read_sock(), except we do not need

> > to worry about connections, we just need to retrieve skb

> > from UDP receive queue.

> >

> > Cc: John Fastabend <john.fastabend@gmail.com>

> > Cc: Daniel Borkmann <daniel@iogearbox.net>

> > Cc: Jakub Sitnicki <jakub@cloudflare.com>

> > Cc: Lorenz Bauer <lmb@cloudflare.com>

> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> > ---

> >  include/net/udp.h   |  2 ++

> >  net/ipv4/af_inet.c  |  1 +

> >  net/ipv4/udp.c      | 35 +++++++++++++++++++++++++++++++++++

> >  net/ipv6/af_inet6.c |  1 +

> >  4 files changed, 39 insertions(+)

> >

> > diff --git a/include/net/udp.h b/include/net/udp.h

> > index df7cc1edc200..347b62a753c3 100644

> > --- a/include/net/udp.h

> > +++ b/include/net/udp.h

> > @@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,

> >                              struct sk_buff *skb);

> >  struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,

> >                                __be16 sport, __be16 dport);

> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +               sk_read_actor_t recv_actor);

> >

> >  /* UDP uses skb->dev_scratch to cache as much information as possible and avoid

> >   * possibly multiple cache miss on dequeue()

> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

> > index 1355e6c0d567..f17870ee558b 100644

> > --- a/net/ipv4/af_inet.c

> > +++ b/net/ipv4/af_inet.c

> > @@ -1070,6 +1070,7 @@ const struct proto_ops inet_dgram_ops = {

> >       .setsockopt        = sock_common_setsockopt,

> >       .getsockopt        = sock_common_getsockopt,

> >       .sendmsg           = inet_sendmsg,

> > +     .read_sock         = udp_read_sock,

> >       .recvmsg           = inet_recvmsg,

> >       .mmap              = sock_no_mmap,

> >       .sendpage          = inet_sendpage,

> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

> > index 38952aaee3a1..a0adee3b1af4 100644

> > --- a/net/ipv4/udp.c

> > +++ b/net/ipv4/udp.c

> > @@ -1782,6 +1782,41 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,

> >  }

> >  EXPORT_SYMBOL(__skb_recv_udp);

> >

> > +int udp_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +               sk_read_actor_t recv_actor)

> > +{

> > +     int copied = 0;

> > +

> > +     while (1) {

> > +             int offset = 0, err;

> > +             struct sk_buff *skb;

> > +

> > +             skb = __skb_recv_udp(sk, 0, 1, &offset, &err);

> > +             if (!skb)

> > +                     break;

>

> Does above error handling need the below additional handling?

> It seems __skb_recv_udp() will return the error by parameter "err",

> if "copied == 0", does it need to return the error?


Not for skmsg case, because the return value is just unused:

static void sk_psock_verdict_data_ready(struct sock *sk)
{
        struct socket *sock = sk->sk_socket;
        read_descriptor_t desc;

        if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
                return;

        desc.arg.data = sk;
        desc.error = 0;
        desc.count = 1;

        sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
}

Thanks.
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index df7cc1edc200..347b62a753c3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -329,6 +329,8 @@  struct sock *__udp6_lib_lookup(struct net *net,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1355e6c0d567..f17870ee558b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1070,6 +1070,7 @@  const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
+	.read_sock	   = udp_read_sock,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 38952aaee3a1..a0adee3b1af4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1782,6 +1782,41 @@  struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+	int copied = 0;
+
+	while (1) {
+		int offset = 0, err;
+		struct sk_buff *skb;
+
+		skb = __skb_recv_udp(sk, 0, 1, &offset, &err);
+		if (!skb)
+			break;
+		if (offset < skb->len) {
+			int used;
+			size_t len;
+
+			len = skb->len - offset;
+			used = recv_actor(desc, skb, offset, len);
+			if (used <= 0) {
+				if (!copied)
+					copied = used;
+				break;
+			} else if (used <= len) {
+				copied += used;
+				offset += used;
+			}
+		}
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL(udp_read_sock);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 802f5111805a..71de739b4a9e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,6 +714,7 @@  const struct proto_ops inet6_dgram_ops = {
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
+	.read_sock	   = udp_read_sock,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,