diff mbox series

[net,2/3] mptcp: avoid flipping mp_capable field in syn_recv_sock()

Message ID beeba536c07241eb6a7aeb4e844d1888363c34fa.1587389294.git.pabeni@redhat.com
State New
Headers show
Series mptcp: fix races on accept() | expand

Commit Message

Paolo Abeni April 20, 2020, 2:25 p.m. UTC
If multiple CPUs races on the same req_sock in syn_recv_sock(),
flipping such field can cause inconsistent child socket status.

When racing, the CPU losing the req ownership may still change
the mptcp request socket mp_capable flag while the CPU owning
the request is cloning the socket, leaving the child socket with
'is_mptcp' set but no 'mp_capable' flag.

Such socket will stay with 'conn' field cleared, heading to oops
in later mptcp callback.

Address the issue tracking the fallback status in a local variable.

Fixes: 58b09919626b ("mptcp: create msk early")
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/subflow.c | 46 +++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3a94f859347a..10090ca3d3e0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -376,6 +376,17 @@  static void mptcp_force_close(struct sock *sk)
 	sk_common_release(sk);
 }
 
+static void subflow_ulp_fallback(struct sock *sk,
+				 struct mptcp_subflow_context *old_ctx)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	mptcp_subflow_tcp_fallback(sk, old_ctx);
+	icsk->icsk_ulp_ops = NULL;
+	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	tcp_sk(sk)->is_mptcp = 0;
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 					  struct sk_buff *skb,
 					  struct request_sock *req,
@@ -388,6 +399,7 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	struct tcp_options_received opt_rx;
 	bool fallback_is_fatal = false;
 	struct sock *new_msk = NULL;
+	bool fallback = false;
 	struct sock *child;
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -412,14 +424,14 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			subflow_req->remote_key = opt_rx.mptcp.sndr_key;
 			subflow_req->remote_key_valid = 1;
 		} else {
-			subflow_req->mp_capable = 0;
+			fallback = true;
 			goto create_child;
 		}
 
 create_msk:
 		new_msk = mptcp_sk_clone(listener->conn, req);
 		if (!new_msk)
-			subflow_req->mp_capable = 0;
+			fallback = true;
 	} else if (subflow_req->mp_join) {
 		fallback_is_fatal = true;
 		opt_rx.mptcp.mp_join = 0;
@@ -438,12 +450,18 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	if (child && *own_req) {
 		struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 
-		/* we have null ctx on TCP fallback, which is fatal on
-		 * MPJ handshake
+		/* we need to fallback on ctx allocation failure and on pre-reqs
+		 * checking above. In the latter scenario we additionally need
+		 * to reset the context to non MPTCP status.
 		 */
-		if (!ctx) {
+		if (!ctx || fallback) {
 			if (fallback_is_fatal)
 				goto close_child;
+
+			if (ctx) {
+				subflow_ulp_fallback(child, ctx);
+				kfree_rcu(ctx, rcu);
+			}
 			goto out;
 		}
 
@@ -474,6 +492,13 @@  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	/* dispose of the left over mptcp master, if any */
 	if (unlikely(new_msk))
 		mptcp_force_close(new_msk);
+
+	/* check for expected invariant - should never trigger, just help
+	 * catching eariler subtle bugs
+	 */
+	WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
+		     (!mptcp_subflow_ctx(child) ||
+		      !mptcp_subflow_ctx(child)->conn));
 	return child;
 
 close_child:
@@ -1094,17 +1119,6 @@  static void subflow_ulp_release(struct sock *sk)
 	kfree_rcu(ctx, rcu);
 }
 
-static void subflow_ulp_fallback(struct sock *sk,
-				 struct mptcp_subflow_context *old_ctx)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
-	mptcp_subflow_tcp_fallback(sk, old_ctx);
-	icsk->icsk_ulp_ops = NULL;
-	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
-	tcp_sk(sk)->is_mptcp = 0;
-}
-
 static void subflow_ulp_clone(const struct request_sock *req,
 			      struct sock *newsk,
 			      const gfp_t priority)