diff mbox series

[RFCv3,01/15] tcp: authopt: Initial support and key management

Message ID 090898ac6f3345ec02999858c65c2ebb8cd274a8.1629840814.git.cdleonard@gmail.com
State New
Headers show
Series tcp: Initial support for RFC5925 auth option | expand

Commit Message

Leonard Crestez Aug. 24, 2021, 9:34 p.m. UTC
This commit adds support to add and remove keys but does not use them
further.

Similar to tcp md5 a single pointer to a struct tcp_authopt_info* struct
is added to struct tcp_sock, this avoids increasing memory usage. The
data structures related to tcp_authopt are initialized on setsockopt and
only freed on socket close.

Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
---
 include/linux/tcp.h       |   6 +
 include/net/tcp.h         |   1 +
 include/net/tcp_authopt.h |  65 +++++++++++
 include/uapi/linux/tcp.h  |  79 ++++++++++++++
 net/ipv4/Kconfig          |  14 +++
 net/ipv4/Makefile         |   1 +
 net/ipv4/tcp.c            |  27 +++++
 net/ipv4/tcp_authopt.c    | 223 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c       |   2 +
 9 files changed, 418 insertions(+)
 create mode 100644 include/net/tcp_authopt.h
 create mode 100644 net/ipv4/tcp_authopt.c

Comments

Leonard Crestez Sept. 3, 2021, 2:26 p.m. UTC | #1
On 31.08.2021 22:04, Dmitry Safonov wrote:
> Hi Leonard,
> On 8/24/21 10:34 PM, Leonard Crestez wrote:
>> +/**
>> + * struct tcp_authopt_key_info - Representation of a Master Key Tuple as per RFC5925
>> + *
>> + * Key structure lifetime is only protected by RCU so readers needs to hold a
>> + * single rcu_read_lock until they're done with the key.
>> + */
>> +struct tcp_authopt_key_info {
>> +	struct hlist_node node;
>> +	struct rcu_head rcu;
>> +	/* Local identifier */
>> +	u32 local_id;
> 
> It's unused now, can be removed.

Yes

>> +/**
>> + * enum tcp_authopt_key_flag - flags for `tcp_authopt.flags`
>> + *
>> + * @TCP_AUTHOPT_KEY_DEL: Delete the key by local_id and ignore all other fields.
>                                                ^
> By send_id and recv_id.

Yes. The identifying fields are documented on struct tcp_authopt_key so 
I will abbreviate this.

> Also, tcp_authopt_key_match_exact() seems to check
> TCP_AUTHOPT_KEY_ADDR_BIND. I wounder if that makes sense to relax it in
> case of TCP_AUTHOPT_KEY_DEL to match only send_id/recv_id if addr isn't
> specified (no hard feelings about it, though).

Same send_id/recv_id can overlap between different remote peers.

> [..]
>> +#ifdef CONFIG_TCP_AUTHOPT
>> +	case TCP_AUTHOPT: {
>> +		struct tcp_authopt info;
>> +
>> +		if (get_user(len, optlen))
>> +			return -EFAULT;
>> +
>> +		lock_sock(sk);
>> +		tcp_get_authopt_val(sk, &info);
>> +		release_sock(sk);
>> +
>> +		len = min_t(unsigned int, len, sizeof(info));
>> +		if (put_user(len, optlen))
>> +			return -EFAULT;
>> +		if (copy_to_user(optval, &info, len))
>> +			return -EFAULT;
>> +		return 0;
> 
> Failed tcp_get_authopt_val() lookup in:
> :       if (!info)
> :               return -EINVAL;
> 
> will leak uninitialized kernel memory from stack.
> ASLR guys defeated.

tcp_get_authopt_val clears *info before all checks so this will return 
zeros to userspace.

I do need to propagate the return value from tcp_get_authopt_val.

>> +#define TCP_AUTHOPT_KNOWN_FLAGS ( \
>> +	TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED)
>> +
>> +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
>> +{
>> +	struct tcp_authopt opt;
>> +	struct tcp_authopt_info *info;
>> +
>> +	sock_owned_by_me(sk);
>> +
>> +	/* If userspace optlen is too short fill the rest with zeros */
>> +	if (optlen > sizeof(opt))
>> +		return -EINVAL;
> 
> More like
> :	if (unlikely(len > sizeof(opt))) {
> :		err = check_zeroed_user(optval + sizeof(opt),
> :					len - sizeof(opt));
> :		if (err < 1)
> :			return err == 0 ? -EINVAL : err;
> :		len = sizeof(opt);
> :		if (put_user(len, optlen))
> :			return -EFAULT;
> :	}

If (optlen > sizeof(opt)) means userspace is attempting to use newer 
ABI. Current behavior is to return an error which seems very reasonable.

You seem to be suggesting that we check that the rest of option is 
zeroes and if it is to continue. That seems potentially dangerous but it 
could work if we forever ensure that zeroes always mean "no effect".

This would make it easier for new apps to run on old kernels: unless 
they specifically use new features they don't need to do anything.

Also, setsockopt can't report a new length back and there's no 
getsockopt for keys.

>> +	memset(&opt, 0, sizeof(opt));
>> +	if (copy_from_sockptr(&opt, optval, optlen))
>> +		return -EFAULT;
>> +
>> +	if (opt.flags & ~TCP_AUTHOPT_KNOWN_FLAGS)
>> +		return -EINVAL;

Here if the user requests unrecognized flags an error is reported. My 
intention is that new fields will be accompanied by new flags.

>> +	info = __tcp_authopt_info_get_or_create(sk);
>> +	if (IS_ERR(info))
>> +		return PTR_ERR(info);
>> +
>> +	info->flags = opt.flags & TCP_AUTHOPT_KNOWN_FLAGS;
>> +
>> +	return 0;
>> +}
> 
> [..]
>> +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
>> +{
>> +	struct tcp_authopt_key opt;
>> +	struct tcp_authopt_info *info;
>> +	struct tcp_authopt_key_info *key_info;
>> +
>> +	sock_owned_by_me(sk);
>> +
>> +	/* If userspace optlen is too short fill the rest with zeros */
>> +	if (optlen > sizeof(opt))
>> +		return -EINVAL;
> 
> Ditto
> 
>> +	memset(&opt, 0, sizeof(opt));
>> +	if (copy_from_sockptr(&opt, optval, optlen))
>> +		return -EFAULT;
>> +
>> +	if (opt.flags & ~TCP_AUTHOPT_KEY_KNOWN_FLAGS)
>> +		return -EINVAL;
>> +
>> +	if (opt.keylen > TCP_AUTHOPT_MAXKEYLEN)
>> +		return -EINVAL;
>> +
>> +	/* Delete is a special case: */
>> +	if (opt.flags & TCP_AUTHOPT_KEY_DEL) {
>> +		info = rcu_dereference_check(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
>> +		if (!info)
>> +			return -ENOENT;
>> +		key_info = tcp_authopt_key_lookup_exact(sk, info, &opt);
>> +		if (!key_info)
>> +			return -ENOENT;
>> +		tcp_authopt_key_del(sk, info, key_info);
> 
> Doesn't seem to be safe together with tcp_authopt_select_key().
> A key can be in use at this moment - you have to add checks for it.

tcp_authopt_key_del does kfree_rcu. As far as I understand this means 
that if select_key can see the key it is guaranteed to live until the 
next grace period, which shouldn't be until after the packet is signed.

I will attempt to document this restriction on tcp_authopt_select_key: 
you can't do anything with the key except give it to tcp_authopt_hash 
before an RCU grace period.

I'm not confident this is correct in all cases. It's inspired by what 
MD5 does but apparently those key lists are protected by a combination 
of sk_lock and rcu?

>> +		return 0;
>> +	}
>> +
>> +	/* check key family */
>> +	if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) {
>> +		if (sk->sk_family != opt.addr.ss_family)
>> +			return -EINVAL;
>> +	}
>> +
>> +	/* Initialize tcp_authopt_info if not already set */
>> +	info = __tcp_authopt_info_get_or_create(sk);
>> +	if (IS_ERR(info))
>> +		return PTR_ERR(info);
>> +
>> +	/* If an old key exists with exact ID then remove and replace.
>> +	 * RCU-protected readers might observe both and pick any.
>> +	 */
>> +	key_info = tcp_authopt_key_lookup_exact(sk, info, &opt);
>> +	if (key_info)
>> +		tcp_authopt_key_del(sk, info, key_info);
>> +	key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO);
>> +	if (!key_info)
>> +		return -ENOMEM;
> 
> So, you may end up without any key.

Moving the sock_kmalloc higher should fix this, there would be no effect 
on alloc failure.

> Also, replacing a key is not at all safe: you may receive old segments
> which you in turn will discard and reset the connection. >
> I think the limitation RFC puts on removing keys in use and replacing
> existing keys are actually reasonable. Probably, it'd be better to
> enforce "key in use => desired key is different (or key_outdated flag)
> => key not in use => key may be removed" life-cycle of MKT.

Userspace breaking its own connections seems fine, it can already do 
this in many ways.

If the current key is removed the kernel will just switch to another 
valid key. If no valid keys exist then I expect it will switch to 
unsigned packets which is possibly quite dangerous.

Maybe it should be possible to insert a "marker" key which just says 
"don't do any unsigned traffic with this peer"?

--
Regards,
Leonard
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48d8a363319e..cfddfc720b00 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -140,10 +140,12 @@  struct tcp_request_sock {
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 {
 	return (struct tcp_request_sock *)req;
 }
 
+struct tcp_authopt_info;
+
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
@@ -403,10 +405,14 @@  struct tcp_sock {
 
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	__rcu *md5sig_info;
 #endif
 
+#ifdef CONFIG_TCP_AUTHOPT
+	struct tcp_authopt_info	__rcu *authopt_info;
+#endif
+
 /* TCP fastopen related information */
 	struct tcp_fastopen_request *fastopen_req;
 	/* fastopen_rsk points to request_sock that resulted in this big
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3166dc15d7d6..bb76554e8fe5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -182,10 +182,11 @@  void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_WINDOW		3	/* Window scaling */
 #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_AUTHOPT		29	/* Auth Option (RFC5925) */
 #define TCPOPT_MPTCP		30	/* Multipath TCP (RFC6824) */
 #define TCPOPT_FASTOPEN		34	/* Fast open (RFC7413) */
 #define TCPOPT_EXP		254	/* Experimental */
 /* Magic number to be after the option value for sharing TCP
  * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
new file mode 100644
index 000000000000..b4277112b506
--- /dev/null
+++ b/include/net/tcp_authopt.h
@@ -0,0 +1,65 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_TCP_AUTHOPT_H
+#define _LINUX_TCP_AUTHOPT_H
+
+#include <uapi/linux/tcp.h>
+
+/**
+ * struct tcp_authopt_key_info - Representation of a Master Key Tuple as per RFC5925
+ *
+ * Key structure lifetime is only protected by RCU so readers needs to hold a
+ * single rcu_read_lock until they're done with the key.
+ */
+struct tcp_authopt_key_info {
+	struct hlist_node node;
+	struct rcu_head rcu;
+	/* Local identifier */
+	u32 local_id;
+	u32 flags;
+	/* Wire identifiers */
+	u8 send_id, recv_id;
+	u8 alg_id;
+	u8 keylen;
+	u8 key[TCP_AUTHOPT_MAXKEYLEN];
+	struct sockaddr_storage addr;
+};
+
+/**
+ * struct tcp_authopt_info - Per-socket information regarding tcp_authopt
+ *
+ * This is lazy-initialized in order to avoid increasing memory usage for
+ * regular TCP sockets. Once created it is only destroyed on socket close.
+ */
+struct tcp_authopt_info {
+	/** @head: List of tcp_authopt_key_info */
+	struct hlist_head head;
+	struct rcu_head rcu;
+	u32 flags;
+	u32 src_isn;
+	u32 dst_isn;
+};
+
+#ifdef CONFIG_TCP_AUTHOPT
+void tcp_authopt_clear(struct sock *sk);
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen);
+int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key);
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen);
+#else
+static inline int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	return -ENOPROTOOPT;
+}
+static inline int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key)
+{
+	return -ENOPROTOOPT;
+}
+static inline void tcp_authopt_clear(struct sock *sk)
+{
+}
+static inline int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	return -ENOPROTOOPT;
+}
+#endif
+
+#endif /* _LINUX_TCP_AUTHOPT_H */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8fc09e8638b3..575162e7e281 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -126,10 +126,12 @@  enum {
 #define TCP_INQ			36	/* Notify bytes available to read as a cmsg on read */
 
 #define TCP_CM_INQ		TCP_INQ
 
 #define TCP_TX_DELAY		37	/* delay outgoing packets by XX usec */
+#define TCP_AUTHOPT		38	/* TCP Authentication Option (RFC5925) */
+#define TCP_AUTHOPT_KEY		39	/* TCP Authentication Option Key (RFC5925) */
 
 
 #define TCP_REPAIR_ON		1
 #define TCP_REPAIR_OFF		0
 #define TCP_REPAIR_OFF_NO_WP	-1	/* Turn off without window probes */
@@ -340,10 +342,87 @@  struct tcp_diag_md5sig {
 	__u16	tcpm_keylen;
 	__be32	tcpm_addr[4];
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/**
+ * enum tcp_authopt_flag - flags for `tcp_authopt.flags`
+ */
+enum tcp_authopt_flag {
+	/**
+	 * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED:
+	 *	Configure behavior of segments with TCP-AO coming from hosts for which no
+	 *	key is configured. The default recommended by RFC is to silently accept
+	 *	such connections.
+	 */
+	TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2),
+};
+
+/**
+ * struct tcp_authopt - Per-socket options related to TCP Authentication Option
+ */
+struct tcp_authopt {
+	/** @flags: Combination of &enum tcp_authopt_flag */
+	__u32	flags;
+};
+
+/**
+ * enum tcp_authopt_key_flag - flags for `tcp_authopt.flags`
+ *
+ * @TCP_AUTHOPT_KEY_DEL: Delete the key by local_id and ignore all other fields.
+ * @TCP_AUTHOPT_KEY_EXCLUDE_OPTS: Exclude TCP options from signature.
+ * @TCP_AUTHOPT_KEY_ADDR_BIND: Key only valid for `tcp_authopt.addr`
+ */
+enum tcp_authopt_key_flag {
+	TCP_AUTHOPT_KEY_DEL = (1 << 0),
+	TCP_AUTHOPT_KEY_EXCLUDE_OPTS = (1 << 1),
+	TCP_AUTHOPT_KEY_ADDR_BIND = (1 << 2),
+};
+
+/**
+ * enum tcp_authopt_alg - Algorithms for TCP Authentication Option
+ */
+enum tcp_authopt_alg {
+	TCP_AUTHOPT_ALG_HMAC_SHA_1_96 = 1,
+	TCP_AUTHOPT_ALG_AES_128_CMAC_96 = 2,
+};
+
+/* for TCP_AUTHOPT_KEY socket option */
+#define TCP_AUTHOPT_MAXKEYLEN	80
+
+/**
+ * struct tcp_authopt_key - TCP Authentication KEY
+ *
+ * Key are identified by the combination of:
+ * - send_id
+ * - recv_id
+ * - addr (iff TCP_AUTHOPT_KEY_ADDR_BIND)
+ *
+ * RFC5925 requires that key ids must not overlap for the same TCP connection.
+ * This is not enforced by linux.
+ */
+struct tcp_authopt_key {
+	/** @flags: Combination of &enum tcp_authopt_key_flag */
+	__u32	flags;
+	/** @send_id: keyid value for send */
+	__u8	send_id;
+	/** @recv_id: keyid value for receive */
+	__u8	recv_id;
+	/** @alg: One of &enum tcp_authopt_alg */
+	__u8	alg;
+	/** @keylen: Length of the key buffer */
+	__u8	keylen;
+	/** @key: Secret key */
+	__u8	key[TCP_AUTHOPT_MAXKEYLEN];
+	/**
+	 * @addr: Key is only valid for this address
+	 *
+	 * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set
+	 */
+	struct __kernel_sockaddr_storage addr;
+};
+
 /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
 
 #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
 struct tcp_zerocopy_receive {
 	__u64 address;		/* in: address of mapping */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..6459f4ea6f1d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -740,5 +740,19 @@  config TCP_MD5SIG
 	  RFC2385 specifies a method of giving MD5 protection to TCP sessions.
 	  Its main (only?) use is to protect BGP sessions between core routers
 	  on the Internet.
 
 	  If unsure, say N.
+
+config TCP_AUTHOPT
+	bool "TCP: Authentication Option support (RFC5925)"
+	select CRYPTO
+	select CRYPTO_SHA1
+	select CRYPTO_HMAC
+	select CRYPTO_AES
+	select CRYPTO_CMAC
+	help
+	  RFC5925 specifies a new method of giving protection to TCP sessions.
+	  Its intended use is to protect BGP sessions between core routers
+	  on the Internet. It obsoletes TCP MD5 (RFC2385) but is incompatible.
+
+	  If unsure, say N.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..d336f32ce177 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -59,10 +59,11 @@  obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_TCP_AUTHOPT) += tcp_authopt.o
 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
 obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f931def6302e..fd90e80afa2c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,10 +271,11 @@ 
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/mptcp.h>
+#include <net/tcp_authopt.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/sock.h>
 
 #include <linux/uaccess.h>
@@ -3573,10 +3574,16 @@  static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 	case TCP_MD5SIG:
 	case TCP_MD5SIG_EXT:
 		err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
 		break;
 #endif
+	case TCP_AUTHOPT:
+		err = tcp_set_authopt(sk, optval, optlen);
+		break;
+	case TCP_AUTHOPT_KEY:
+		err = tcp_set_authopt_key(sk, optval, optlen);
+		break;
 	case TCP_USER_TIMEOUT:
 		/* Cap the max time in ms TCP will retry or probe the window
 		 * before giving up and aborting (ETIMEDOUT) a connection.
 		 */
 		if (val < 0)
@@ -4219,10 +4226,30 @@  static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (!err && copy_to_user(optval, &zc, len))
 			err = -EFAULT;
 		return err;
 	}
 #endif
+#ifdef CONFIG_TCP_AUTHOPT
+	case TCP_AUTHOPT: {
+		struct tcp_authopt info;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		tcp_get_authopt_val(sk, &info);
+		release_sock(sk);
+
+		len = min_t(unsigned int, len, sizeof(info));
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &info, len))
+			return -EFAULT;
+		return 0;
+	}
+#endif
+
 	default:
 		return -ENOPROTOOPT;
 	}
 
 	if (put_user(len, optlen))
diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
new file mode 100644
index 000000000000..f6dddc5775ff
--- /dev/null
+++ b/net/ipv4/tcp_authopt.c
@@ -0,0 +1,223 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <net/tcp_authopt.h>
+#include <crypto/hash.h>
+#include <trace/events/tcp.h>
+
+/* checks that ipv4 or ipv6 addr matches. */
+static bool ipvx_addr_match(struct sockaddr_storage *a1,
+			    struct sockaddr_storage *a2)
+{
+	if (a1->ss_family != a2->ss_family)
+		return false;
+	if (a1->ss_family == AF_INET && memcmp(
+			&((struct sockaddr_in *)a1)->sin_addr,
+			&((struct sockaddr_in *)a2)->sin_addr,
+			sizeof(struct in_addr)))
+		return false;
+	if (a1->ss_family == AF_INET6 && memcmp(
+			&((struct sockaddr_in6 *)a1)->sin6_addr,
+			&((struct sockaddr_in6 *)a2)->sin6_addr,
+			sizeof(struct in6_addr)))
+		return false;
+	return true;
+}
+
+static bool tcp_authopt_key_match_exact(struct tcp_authopt_key_info *info,
+					struct tcp_authopt_key *key)
+{
+	if (info->send_id != key->send_id)
+		return false;
+	if (info->recv_id != key->recv_id)
+		return false;
+	if ((info->flags & TCP_AUTHOPT_KEY_ADDR_BIND) != (key->recv_id & TCP_AUTHOPT_KEY_ADDR_BIND))
+		return false;
+	if (info->flags & TCP_AUTHOPT_KEY_ADDR_BIND)
+		if (!ipvx_addr_match(&info->addr, &key->addr))
+			return false;
+
+	return true;
+}
+
+static struct tcp_authopt_key_info *tcp_authopt_key_lookup_exact(const struct sock *sk,
+								 struct tcp_authopt_info *info,
+								 struct tcp_authopt_key *ukey)
+{
+	struct tcp_authopt_key_info *key_info;
+
+	hlist_for_each_entry_rcu(key_info, &info->head, node, lockdep_sock_is_held(sk))
+		if (tcp_authopt_key_match_exact(key_info, ukey))
+			return key_info;
+
+	return NULL;
+}
+
+static struct tcp_authopt_info *__tcp_authopt_info_get_or_create(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_authopt_info *info;
+
+	info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+	if (info)
+		return info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL | __GFP_ZERO);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+	INIT_HLIST_HEAD(&info->head);
+	rcu_assign_pointer(tp->authopt_info, info);
+
+	return info;
+}
+
+#define TCP_AUTHOPT_KNOWN_FLAGS ( \
+	TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED)
+
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	struct tcp_authopt opt;
+	struct tcp_authopt_info *info;
+
+	sock_owned_by_me(sk);
+
+	/* If userspace optlen is too short fill the rest with zeros */
+	if (optlen > sizeof(opt))
+		return -EINVAL;
+	memset(&opt, 0, sizeof(opt));
+	if (copy_from_sockptr(&opt, optval, optlen))
+		return -EFAULT;
+
+	if (opt.flags & ~TCP_AUTHOPT_KNOWN_FLAGS)
+		return -EINVAL;
+
+	info = __tcp_authopt_info_get_or_create(sk);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	info->flags = opt.flags & TCP_AUTHOPT_KNOWN_FLAGS;
+
+	return 0;
+}
+
+int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_authopt_info *info;
+
+	sock_owned_by_me(sk);
+
+	memset(opt, 0, sizeof(*opt));
+	info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+	if (!info)
+		return -EINVAL;
+
+	opt->flags = info->flags & TCP_AUTHOPT_KNOWN_FLAGS;
+
+	return 0;
+}
+
+static void tcp_authopt_key_del(struct sock *sk,
+				struct tcp_authopt_info *info,
+				struct tcp_authopt_key_info *key)
+{
+	hlist_del_rcu(&key->node);
+	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+	kfree_rcu(key, rcu);
+}
+
+/* free info and keys but don't touch tp->authopt_info */
+static void __tcp_authopt_info_free(struct sock *sk, struct tcp_authopt_info *info)
+{
+	struct hlist_node *n;
+	struct tcp_authopt_key_info *key;
+
+	hlist_for_each_entry_safe(key, n, &info->head, node)
+		tcp_authopt_key_del(sk, info, key);
+	kfree_rcu(info, rcu);
+}
+
+/* free everything and clear tcp_sock.authopt_info to NULL */
+void tcp_authopt_clear(struct sock *sk)
+{
+	struct tcp_authopt_info *info;
+
+	info = rcu_dereference_protected(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+	if (info) {
+		__tcp_authopt_info_free(sk, info);
+		tcp_sk(sk)->authopt_info = NULL;
+	}
+}
+
+#define TCP_AUTHOPT_KEY_KNOWN_FLAGS ( \
+	TCP_AUTHOPT_KEY_DEL | \
+	TCP_AUTHOPT_KEY_EXCLUDE_OPTS | \
+	TCP_AUTHOPT_KEY_ADDR_BIND)
+
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	struct tcp_authopt_key opt;
+	struct tcp_authopt_info *info;
+	struct tcp_authopt_key_info *key_info;
+
+	sock_owned_by_me(sk);
+
+	/* If userspace optlen is too short fill the rest with zeros */
+	if (optlen > sizeof(opt))
+		return -EINVAL;
+	memset(&opt, 0, sizeof(opt));
+	if (copy_from_sockptr(&opt, optval, optlen))
+		return -EFAULT;
+
+	if (opt.flags & ~TCP_AUTHOPT_KEY_KNOWN_FLAGS)
+		return -EINVAL;
+
+	if (opt.keylen > TCP_AUTHOPT_MAXKEYLEN)
+		return -EINVAL;
+
+	/* Delete is a special case: */
+	if (opt.flags & TCP_AUTHOPT_KEY_DEL) {
+		info = rcu_dereference_check(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+		if (!info)
+			return -ENOENT;
+		key_info = tcp_authopt_key_lookup_exact(sk, info, &opt);
+		if (!key_info)
+			return -ENOENT;
+		tcp_authopt_key_del(sk, info, key_info);
+		return 0;
+	}
+
+	/* check key family */
+	if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) {
+		if (sk->sk_family != opt.addr.ss_family)
+			return -EINVAL;
+	}
+
+	/* Initialize tcp_authopt_info if not already set */
+	info = __tcp_authopt_info_get_or_create(sk);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	/* If an old key exists with exact ID then remove and replace.
+	 * RCU-protected readers might observe both and pick any.
+	 */
+	key_info = tcp_authopt_key_lookup_exact(sk, info, &opt);
+	if (key_info)
+		tcp_authopt_key_del(sk, info, key_info);
+	key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO);
+	if (!key_info)
+		return -ENOMEM;
+	key_info->flags = opt.flags & TCP_AUTHOPT_KEY_KNOWN_FLAGS;
+	key_info->send_id = opt.send_id;
+	key_info->recv_id = opt.recv_id;
+	key_info->alg_id = opt.alg;
+	key_info->keylen = opt.keylen;
+	memcpy(key_info->key, opt.key, opt.keylen);
+	memcpy(&key_info->addr, &opt.addr, sizeof(key_info->addr));
+	hlist_add_head_rcu(&key_info->node, &info->head);
+
+	return 0;
+}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e62e0d6373a..1348615c7576 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -60,10 +60,11 @@ 
 
 #include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
 #include <net/tcp.h>
+#include <net/tcp_authopt.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
@@ -2256,10 +2257,11 @@  void tcp_v4_destroy_sock(struct sock *sk)
 		tcp_clear_md5_list(sk);
 		kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
 		tp->md5sig_info = NULL;
 	}
 #endif
+	tcp_authopt_clear(sk);
 
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);