diff mbox series

[net-next,2/2] tcp: factorize logic into tcp_epollin_ready()

Message ID 20210212232214.2869897-3-eric.dumazet@gmail.com
State New
Headers show
Series tcp: mem pressure vs SO_RCVLOWAT | expand

Commit Message

Eric Dumazet Feb. 12, 2021, 11:22 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

Both tcp_data_ready() and tcp_stream_is_readable() share the same logic.

Add tcp_epollin_ready() helper to avoid duplication.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Arjun Roy <arjunroy@google.com>
Cc: Wei Wang <weiwan@google.com>
---
 include/net/tcp.h    | 12 ++++++++++++
 net/ipv4/tcp.c       | 16 ++++------------
 net/ipv4/tcp_input.c | 11 ++---------
 3 files changed, 18 insertions(+), 21 deletions(-)

Comments

Arjun Roy Feb. 13, 2021, 5:10 p.m. UTC | #1
On Sat, Feb 13, 2021 at 12:05 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote:
> > >
> >
> > > >  void tcp_data_ready(struct sock *sk)
> > > >  {
> > > > -       const struct tcp_sock *tp = tcp_sk(sk);
> > > > -       int avail = tp->rcv_nxt - tp->copied_seq;
> > > > -
> > > > -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> > > > -           !sock_flag(sk, SOCK_DONE) &&
> > >
> > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
> > > tcp_epollin_read(). Does it matter?
> > >
> >
> >
> > Yes, probably, good catch.
> >
> > Not sure where tcp_poll() gets this, I have to double check.
>
> It gets the info from sk->sk_hutdown & RCV_SHUTDOWN
>
> tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and
> sock_set_flag(sk, SOCK_DONE);
>
> This seems to suggest tcp_fin() could call sk->sk_data_ready() so that
> we do not have to test for this unlikely condition in tcp_data_ready()

When a thread is subsequently then woken up due to sk_data_ready(),
and it calls tcp_stream_is_readable() but we had lowat > 1 set, is
there a chance of that thread then thinking that the stream is not
readable, despite SOCK_DONE being set? This is assuming that the check
is not added to the refactored logic.

Note that on a related note if the tcp memory pressure check (for
system-wide pressure) is added just to the original code in
tcp_data_ready() but not added to tcp_stream_is_readable() we had this
kind of issue (sk_data_ready() was called but tcp_stream_is_readable()
returned false).

-Arjun

-Arjun
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 244208f6f6c2ace87920b633e469421f557427a6..484eb2362645fd478f59b26b42457ecf4510eb14 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1442,6 +1442,18 @@  static inline bool tcp_rmem_pressure(const struct sock *sk)
 	return atomic_read(&sk->sk_rmem_alloc) > threshold;
 }
 
+static inline bool tcp_epollin_ready(const struct sock *sk, int target)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+	if (avail <= 0)
+		return false;
+
+	return (avail >= target) || tcp_rmem_pressure(sk) ||
+	       (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss);
+}
+
 extern void tcp_openreq_init_rwin(struct request_sock *req,
 				  const struct sock *sk_listener,
 				  const struct dst_entry *dst);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9896ca10bb340924b779cb6a7606d57fdd5c3357..7a6b58ae408d1fb1e5536ccfed8215be123f3b57 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -481,19 +481,11 @@  static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 	}
 }
 
-static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
-					  int target, struct sock *sk)
+static bool tcp_stream_is_readable(struct sock *sk, int target)
 {
-	int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+	if (tcp_epollin_ready(sk, target))
+		return true;
 
-	if (avail > 0) {
-		if (avail >= target)
-			return true;
-		if (tcp_rmem_pressure(sk))
-			return true;
-		if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
-			return true;
-	}
 	if (sk->sk_prot->stream_memory_read)
 		return sk->sk_prot->stream_memory_read(sk);
 	return false;
@@ -568,7 +560,7 @@  __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		    tp->urg_data)
 			target++;
 
-		if (tcp_stream_is_readable(tp, target, sk))
+		if (tcp_stream_is_readable(sk, target))
 			mask |= EPOLLIN | EPOLLRDNORM;
 
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f8f98159531e5d1c80660972148986f6acd20a..e32a7056cb7640c67ef2d6a4d9484684d2602fcd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4924,15 +4924,8 @@  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 
 void tcp_data_ready(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-	int avail = tp->rcv_nxt - tp->copied_seq;
-
-	if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
-	    !sock_flag(sk, SOCK_DONE) &&
-	    tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
-		return;
-
-	sk->sk_data_ready(sk);
+	if (tcp_epollin_ready(sk, sk->sk_rcvlowat))
+		sk->sk_data_ready(sk);
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)