diff mbox series

[bpf-next,v3,02/10] af_unix: implement ->read_sock() for sockmap

Message ID 20210426025001.7899-3-xiyou.wangcong@gmail.com
State Superseded
Headers show
Series sockmap: add sockmap support to Unix datagram socket | expand

Commit Message

Cong Wang April 26, 2021, 2:49 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

Implement ->read_sock() for AF_UNIX datagram socket, it is
pretty much similar to udp_read_sock().

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Jakub Sitnicki May 5, 2021, 5:14 p.m. UTC | #1
On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>

>

> Implement ->read_sock() for AF_UNIX datagram socket, it is

> pretty much similar to udp_read_sock().

>

> Cc: John Fastabend <john.fastabend@gmail.com>

> Cc: Daniel Borkmann <daniel@iogearbox.net>

> Cc: Jakub Sitnicki <jakub@cloudflare.com>

> Cc: Lorenz Bauer <lmb@cloudflare.com>

> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> ---

>  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++

>  1 file changed, 38 insertions(+)

>

> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

> index 5a31307ceb76..f4dc22db371d 100644

> --- a/net/unix/af_unix.c

> +++ b/net/unix/af_unix.c

> @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,

>  				       unsigned int flags);

>  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);

>  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);

> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> +			  sk_read_actor_t recv_actor);

>  static int unix_dgram_connect(struct socket *, struct sockaddr *,

>  			      int, int);

>  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);

> @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {

>  	.listen =	sock_no_listen,

>  	.shutdown =	unix_shutdown,

>  	.sendmsg =	unix_dgram_sendmsg,

> +	.read_sock =	unix_read_sock,

>  	.recvmsg =	unix_dgram_recvmsg,

>  	.mmap =		sock_no_mmap,

>  	.sendpage =	sock_no_sendpage,

> @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,

>  	return err;

>  }

>

> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> +			  sk_read_actor_t recv_actor)

> +{

> +	int copied = 0;

> +

> +	while (1) {

> +		struct unix_sock *u = unix_sk(sk);

> +		struct sk_buff *skb;

> +		int used, err;

> +

> +		mutex_lock(&u->iolock);

> +		skb = skb_recv_datagram(sk, 0, 1, &err);

> +		if (!skb) {

> +			mutex_unlock(&u->iolock);

> +			return err;

> +		}

> +

> +		used = recv_actor(desc, skb, 0, skb->len);

> +		if (used <= 0) {

> +			if (!copied)

> +				copied = used;

> +			mutex_unlock(&u->iolock);

> +			break;

> +		} else if (used <= skb->len) {

> +			copied += used;

> +		}

> +		mutex_unlock(&u->iolock);


Do we need hold the mutex for recv_actor to process the skb?

> +

> +		if (!desc->count)

> +			break;

> +	}

> +

> +	return copied;

> +}

> +

>  /*

>   *	Sleep until more data has arrived. But check for races..

>   */
Cong Wang May 7, 2021, 1 a.m. UTC | #2
On Wed, May 5, 2021 at 10:14 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
>

> On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:

> > From: Cong Wang <cong.wang@bytedance.com>

> >

> > Implement ->read_sock() for AF_UNIX datagram socket, it is

> > pretty much similar to udp_read_sock().

> >

> > Cc: John Fastabend <john.fastabend@gmail.com>

> > Cc: Daniel Borkmann <daniel@iogearbox.net>

> > Cc: Jakub Sitnicki <jakub@cloudflare.com>

> > Cc: Lorenz Bauer <lmb@cloudflare.com>

> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> > ---

> >  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++

> >  1 file changed, 38 insertions(+)

> >

> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

> > index 5a31307ceb76..f4dc22db371d 100644

> > --- a/net/unix/af_unix.c

> > +++ b/net/unix/af_unix.c

> > @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,

> >                                      unsigned int flags);

> >  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);

> >  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);

> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +                       sk_read_actor_t recv_actor);

> >  static int unix_dgram_connect(struct socket *, struct sockaddr *,

> >                             int, int);

> >  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);

> > @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {

> >       .listen =       sock_no_listen,

> >       .shutdown =     unix_shutdown,

> >       .sendmsg =      unix_dgram_sendmsg,

> > +     .read_sock =    unix_read_sock,

> >       .recvmsg =      unix_dgram_recvmsg,

> >       .mmap =         sock_no_mmap,

> >       .sendpage =     sock_no_sendpage,

> > @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,

> >       return err;

> >  }

> >

> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +                       sk_read_actor_t recv_actor)

> > +{

> > +     int copied = 0;

> > +

> > +     while (1) {

> > +             struct unix_sock *u = unix_sk(sk);

> > +             struct sk_buff *skb;

> > +             int used, err;

> > +

> > +             mutex_lock(&u->iolock);

> > +             skb = skb_recv_datagram(sk, 0, 1, &err);

> > +             if (!skb) {

> > +                     mutex_unlock(&u->iolock);

> > +                     return err;

> > +             }

> > +

> > +             used = recv_actor(desc, skb, 0, skb->len);

> > +             if (used <= 0) {

> > +                     if (!copied)

> > +                             copied = used;

> > +                     mutex_unlock(&u->iolock);

> > +                     break;

> > +             } else if (used <= skb->len) {

> > +                     copied += used;

> > +             }

> > +             mutex_unlock(&u->iolock);

>

> Do we need hold the mutex for recv_actor to process the skb?


Hm, it does look like we can just release it after dequeuing the
skb. Let me double check.

Thanks.
John Fastabend May 11, 2021, 5:34 a.m. UTC | #3
Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>

> 

> Implement ->read_sock() for AF_UNIX datagram socket, it is

> pretty much similar to udp_read_sock().

> 

> Cc: John Fastabend <john.fastabend@gmail.com>

> Cc: Daniel Borkmann <daniel@iogearbox.net>

> Cc: Jakub Sitnicki <jakub@cloudflare.com>

> Cc: Lorenz Bauer <lmb@cloudflare.com>

> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

> ---

>  net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++++++++

>  1 file changed, 38 insertions(+)

> 

> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

> index 5a31307ceb76..f4dc22db371d 100644

> --- a/net/unix/af_unix.c

> +++ b/net/unix/af_unix.c

> @@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,

>  				       unsigned int flags);

>  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);

>  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);

> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> +			  sk_read_actor_t recv_actor);

>  static int unix_dgram_connect(struct socket *, struct sockaddr *,

>  			      int, int);

>  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);

> @@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {

>  	.listen =	sock_no_listen,

>  	.shutdown =	unix_shutdown,

>  	.sendmsg =	unix_dgram_sendmsg,

> +	.read_sock =	unix_read_sock,

>  	.recvmsg =	unix_dgram_recvmsg,

>  	.mmap =		sock_no_mmap,

>  	.sendpage =	sock_no_sendpage,

> @@ -2183,6 +2186,41 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,

>  	return err;

>  }

>  

> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> +			  sk_read_actor_t recv_actor)

> +{

> +	int copied = 0;

> +

> +	while (1) {

> +		struct unix_sock *u = unix_sk(sk);

> +		struct sk_buff *skb;

> +		int used, err;

> +

> +		mutex_lock(&u->iolock);

> +		skb = skb_recv_datagram(sk, 0, 1, &err);

> +		if (!skb) {

> +			mutex_unlock(&u->iolock);

> +			return err;


Here we should check copied and break if copied is >0. Sure the caller here
has desc.count = 1 but its still fairly fragile.

> +		}

> +

> +		used = recv_actor(desc, skb, 0, skb->len);

> +		if (used <= 0) {

> +			if (!copied)

> +				copied = used;

> +			mutex_unlock(&u->iolock);

> +			break;

> +		} else if (used <= skb->len) {

> +			copied += used;

> +		}

> +		mutex_unlock(&u->iolock);

> +

> +		if (!desc->count)

> +			break;

> +	}

> +

> +	return copied;

> +}

> +

>  /*

>   *	Sleep until more data has arrived. But check for races..

>   */

> -- 

> 2.25.1

>
Cong Wang May 18, 2021, 4:46 a.m. UTC | #4
On Mon, May 10, 2021 at 10:34 PM John Fastabend
<john.fastabend@gmail.com> wrote:
> > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> > +                       sk_read_actor_t recv_actor)

> > +{

> > +     int copied = 0;

> > +

> > +     while (1) {

> > +             struct unix_sock *u = unix_sk(sk);

> > +             struct sk_buff *skb;

> > +             int used, err;

> > +

> > +             mutex_lock(&u->iolock);

> > +             skb = skb_recv_datagram(sk, 0, 1, &err);

> > +             if (!skb) {

> > +                     mutex_unlock(&u->iolock);

> > +                     return err;

>

> Here we should check copied and break if copied is >0. Sure the caller here

> has desc.count = 1 but its still fairly fragile.


Technically, sockmap does not even care about what we return
here, so I am sure what you suggest here even makes a difference.
Also, desc->count is always 1 and never changes here.

Thanks.
John Fastabend May 18, 2021, 5:11 a.m. UTC | #5
Cong Wang wrote:
> On Mon, May 10, 2021 at 10:34 PM John Fastabend

> <john.fastabend@gmail.com> wrote:

> > > +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,

> > > +                       sk_read_actor_t recv_actor)

> > > +{

> > > +     int copied = 0;

> > > +

> > > +     while (1) {

> > > +             struct unix_sock *u = unix_sk(sk);

> > > +             struct sk_buff *skb;

> > > +             int used, err;

> > > +

> > > +             mutex_lock(&u->iolock);

> > > +             skb = skb_recv_datagram(sk, 0, 1, &err);

> > > +             if (!skb) {

> > > +                     mutex_unlock(&u->iolock);

> > > +                     return err;

> >

> > Here we should check copied and break if copied is >0. Sure the caller here

> > has desc.count = 1 but its still fairly fragile.

> 

> Technically, sockmap does not even care about what we return

> here, so I am sure what you suggest here even makes a difference.

> Also, desc->count is always 1 and never changes here.


Right, so either don't wrap it in a while() loop so its obviously
not workable or fix it so that it returns the correct copied value if
we ever did pass it count > 1.. 

> 

> Thanks.
diff mbox series

Patch

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5a31307ceb76..f4dc22db371d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,6 +661,8 @@  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 				       unsigned int flags);
 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+			  sk_read_actor_t recv_actor);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 			      int, int);
 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -738,6 +740,7 @@  static const struct proto_ops unix_dgram_ops = {
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
 	.sendmsg =	unix_dgram_sendmsg,
+	.read_sock =	unix_read_sock,
 	.recvmsg =	unix_dgram_recvmsg,
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
@@ -2183,6 +2186,41 @@  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 	return err;
 }
 
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+			  sk_read_actor_t recv_actor)
+{
+	int copied = 0;
+
+	while (1) {
+		struct unix_sock *u = unix_sk(sk);
+		struct sk_buff *skb;
+		int used, err;
+
+		mutex_lock(&u->iolock);
+		skb = skb_recv_datagram(sk, 0, 1, &err);
+		if (!skb) {
+			mutex_unlock(&u->iolock);
+			return err;
+		}
+
+		used = recv_actor(desc, skb, 0, skb->len);
+		if (used <= 0) {
+			if (!copied)
+				copied = used;
+			mutex_unlock(&u->iolock);
+			break;
+		} else if (used <= skb->len) {
+			copied += used;
+		}
+		mutex_unlock(&u->iolock);
+
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+
 /*
  *	Sleep until more data has arrived. But check for races..
  */