diff mbox series

[RFCv2,1/9] tcp: authopt: Initial support and key management

Message ID 67c1471683200188b96a3f712dd2e8def7978462.1628544649.git.cdleonard@gmail.com
State New
Headers show
Series tcp: Initial support for RFC5925 auth option | expand

Commit Message

Leonard Crestez Aug. 9, 2021, 9:35 p.m. UTC
This commit add support to add and remove keys but does not use them
further.

Similar to tcp md5 a single point to a struct tcp_authopt_info* struct
is added to struct tcp_sock in order to avoid increasing memory usage
for everybody. The data structures related to tcp_authopt are
initialized on setsockopt and only removed on socket close.

Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
---
 include/linux/tcp.h       |   6 ++
 include/net/tcp.h         |   1 +
 include/net/tcp_authopt.h |  55 ++++++++++++
 include/uapi/linux/tcp.h  |  72 ++++++++++++++++
 net/ipv4/Kconfig          |  14 ++++
 net/ipv4/Makefile         |   1 +
 net/ipv4/tcp.c            |  27 ++++++
 net/ipv4/tcp_authopt.c    | 172 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c       |   2 +
 9 files changed, 350 insertions(+)
 create mode 100644 include/net/tcp_authopt.h
 create mode 100644 net/ipv4/tcp_authopt.c

Comments

Dmitry Safonov Aug. 10, 2021, 8:41 p.m. UTC | #1
Hi Leonard,

On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> wrote:
[..]
> +/* Representation of a Master Key Tuple as per RFC5925 */
> +struct tcp_authopt_key_info {
> +       struct hlist_node node;
> +       /* Local identifier */
> +       u32 local_id;

There is no local_id in RFC5925, what's that?
An MKT is identified by (send_id, recv_id), together with
(src_addr/src_port, dst_addr/dst_port).
Why introducing something new to already complicated RFC?

> +       u32 flags;
> +       /* Wire identifiers */
> +       u8 send_id, recv_id;
> +       u8 alg_id;
> +       u8 keylen;
> +       u8 key[TCP_AUTHOPT_MAXKEYLEN];
> +       struct rcu_head rcu;

This is unaligned and will add padding.
I wonder if it's also worth saving some bytes by something like
struct tcp_ao_key *key;

With
struct tcp_ao_key {
      u8 keylen;
      u8 key[0];
};

Hmm?

> +       struct sockaddr_storage addr;
> +};
> +
> +/* Per-socket information regarding tcp_authopt */
> +struct tcp_authopt_info {
> +       /* List of tcp_authopt_key_info */
> +       struct hlist_head head;
> +       u32 flags;
> +       u32 src_isn;
> +       u32 dst_isn;
> +       struct rcu_head rcu;

Ditto, adds padding on 64-bit.

[..]
> +++ b/include/uapi/linux/tcp.h
> @@ -126,10 +126,12 @@ enum {
>  #define TCP_INQ                        36      /* Notify bytes available to read as a cmsg on read */
>
>  #define TCP_CM_INQ             TCP_INQ
>
>  #define TCP_TX_DELAY           37      /* delay outgoing packets by XX usec */
> +#define TCP_AUTHOPT                    38      /* TCP Authentication Option (RFC2385) */
> +#define TCP_AUTHOPT_KEY                39      /* TCP Authentication Option update key (RFC2385) */

RFC2385 is md5 one.
Also, should there be TCP_AUTHOPT_ADD_KEY, TCP_AUTHOPT_DELTE_KEY
instead of using flags inside setsocketopt()? (no hard feelings)

[..]
> +/**
> + * enum tcp_authopt_flag - flags for `tcp_authopt.flags`
> + */
> +enum tcp_authopt_flag {
> +       /**
> +        * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED:
> +        *      Configure behavior of segments with TCP-AO coming from hosts for which no
> +        *      key is configured. The default recommended by RFC is to silently accept
> +        *      such connections.
> +        */
> +       TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2),
> +};
> +
> +/**
> + * struct tcp_authopt - Per-socket options related to TCP Authentication Option
> + */
> +struct tcp_authopt {
> +       /** @flags: Combination of &enum tcp_authopt_flag */
> +       __u32   flags;
> +};

I'm not sure what's the use of enum here, probably:
#define TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED BIT(2)

[..]
> +struct tcp_authopt_key {
> +       /** @flags: Combination of &enum tcp_authopt_key_flag */
> +       __u32   flags;
> +       /** @local_id: Local identifier */
> +       __u32   local_id;
> +       /** @send_id: keyid value for send */
> +       __u8    send_id;
> +       /** @recv_id: keyid value for receive */
> +       __u8    recv_id;
> +       /** @alg: One of &enum tcp_authopt_alg */
> +       __u8    alg;
> +       /** @keylen: Length of the key buffer */
> +       __u8    keylen;
> +       /** @key: Secret key */
> +       __u8    key[TCP_AUTHOPT_MAXKEYLEN];
> +       /**
> +        * @addr: Key is only valid for this address
> +        *
> +        * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set
> +        */
> +       struct __kernel_sockaddr_storage addr;
> +};

It'll be an ABI if this is accepted. As it is - it can't support RFC5925 fully.
Extending syscall ABI is painful. I think, even the initial ABI *must* support
all possible features of the RFC.
In other words, there must be src_addr, src_port, dst_addr and dst_port.
I can see from docs you've written you don't want to support a mix of different
addr/port MKTs, so you can return -EINVAL or -ENOTSUPP for any value
but zero.
This will create an ABI that can be later extended to fully support RFC.

The same is about key: I don't think you need to define/use tcp_authopt_alg.
Just use algo name - that way TCP-AO will automatically be able to use
any algo supported by crypto engine.
See how xfrm does it, e.g.:
struct xfrm_algo_auth {
    char        alg_name[64];
    unsigned int    alg_key_len;    /* in bits */
    unsigned int    alg_trunc_len;  /* in bits */
    char        alg_key[0];
};

So you can let a user chose maclen instead of hard-coding it.
Much more extendable than what you propose.

[..]
> +#ifdef CONFIG_TCP_AUTHOPT
> +       case TCP_AUTHOPT: {
> +               struct tcp_authopt info;
> +
> +               if (get_user(len, optlen))
> +                       return -EFAULT;
> +
> +               lock_sock(sk);
> +               tcp_get_authopt_val(sk, &info);
> +               release_sock(sk);
> +
> +               len = min_t(unsigned int, len, sizeof(info));
> +               if (put_user(len, optlen))
> +                       return -EFAULT;
> +               if (copy_to_user(optval, &info, len))
> +                       return -EFAULT;
> +               return 0;
> +       }

I'm pretty sure it's not a good choice to write partly tcp_authopt.
And a user has no way to check what's the correct len on this kernel.
Instead of len = min_t(unsigned int, len, sizeof(info)), it should be
if (len != sizeof(info))
    return -EINVAL;

[..]
> +int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
> +{
> +       struct tcp_authopt opt;
> +       struct tcp_authopt_info *info;
> +
> +       WARN_ON(!lockdep_sock_is_held(sk));

sock_owned_by_me(sk)

> +
> +       /* If userspace optlen is too short fill the rest with zeros */
> +       if (optlen > sizeof(opt))
> +               return -EINVAL;
> +       memset(&opt, 0, sizeof(opt));

it's 4 bytes, why not just (optlen != sizeof(opt))?

[..]
> +int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt)
> +{
> +       struct tcp_sock *tp = tcp_sk(sk);
> +       struct tcp_authopt_info *info;
> +
> +       WARN_ON(!lockdep_sock_is_held(sk));

sock_owned_by_me(sk)

[..]
> +int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
> +{
> +       struct tcp_authopt_key opt;
> +       struct tcp_authopt_info *info;
> +       struct tcp_authopt_key_info *key_info;
> +
> +       /* If userspace optlen is too short fill the rest with zeros */
> +       if (optlen > sizeof(opt))
> +               return -EINVAL;
> +       memset(&opt, 0, sizeof(opt));
> +       if (copy_from_sockptr(&opt, optval, optlen))
> +               return -EFAULT;

Again, not a good practice to zero-extend struct. Enforce proper size
with -EINVAL.

[..]
> +       /* Initialize tcp_authopt_info if not already set */
> +       info = __tcp_authopt_info_get_or_create(sk);
> +       if (IS_ERR(info))
> +               return PTR_ERR(info);
> +
> +       /* check key family */
> +       if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) {
> +               if (sk->sk_family != opt.addr.ss_family)
> +                       return -EINVAL;
> +       }

Probably, can be in the reverse order, so that
__tcp_authopt_info_get_or_create()
won't allocate memory.

> +       /* If an old value exists for same local_id it is deleted */
> +       key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id);
> +       if (key_info)
> +               tcp_authopt_key_del(sk, info, key_info);
> +       key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO);
> +       if (!key_info)
> +               return -ENOMEM;

1. You don't need sock_kmalloc() together with tcp_authopt_key_del().
    It just frees the memory and allocates it back straight away - no
sense in doing that.
2. I think RFC says you must not allow a user to change an existing key:
> MKT parameters are not changed. Instead, new MKTs can be installed, and a connection
> can change which MKT it uses.

IIUC, it means that one can't just change an existing MKT, but one can
remove and later
add MKT with the same (send_id, recv_id, src_addr/port, dst_addr/port).

So, a reasonable thing to do:
if (key_info)
    return -EEXIST.

Thanks,
             Dmitry
David Ahern Aug. 11, 2021, 1:42 p.m. UTC | #2
On 8/11/21 2:29 AM, Leonard Crestez wrote:
> On 8/10/21 11:41 PM, Dmitry Safonov wrote:
>> Hi Leonard,
>>
>> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com>
>> wrote:
>> [..]
>>> +/* Representation of a Master Key Tuple as per RFC5925 */
>>> +struct tcp_authopt_key_info {
>>> +       struct hlist_node node;
>>> +       /* Local identifier */
>>> +       u32 local_id;
>>
>> There is no local_id in RFC5925, what's that?
>> An MKT is identified by (send_id, recv_id), together with
>> (src_addr/src_port, dst_addr/dst_port).
>> Why introducing something new to already complicated RFC?
> 
> It was there to simplify user interface and initial implementation.
> 
> But it seems that BGP listeners already needs to support multiple
> keychains for different peers so identifying the key by (send_id,
> recv_id, binding) is easier for userspace to work with. Otherwise they
> need to create their own local_id mapping internally.
> 

any proposed simplification needs to be well explained and how it
relates to the RFC spec.
Dmitry Safonov Aug. 11, 2021, 8:26 p.m. UTC | #3
On 8/11/21 8:11 PM, Leonard Crestez wrote:
> On 11.08.2021 16:42, David Ahern wrote:
[..]
>>
>> any proposed simplification needs to be well explained and how it
>> relates to the RFC spec.
> 
> The local_id only exists between userspace and kernel so it's not really
> covered by the RFC.
> 
> There are objections to this and it seems to be unhelpful for userspace
> zo I will replace it with match by binding.
> 
> BTW: another somewhat dubious simplification is that I offloaded the RFC
> requirement to never add overlapping keys to userspace. So if userspace
> adds keys with same recvid that match the same TCP 4-tuple then
> connections will just start failing.
> 
> It's arguably fine to allow userspace misconfiguration to cause failures.

I think it's fine. But worth documenting. Also, keep in mind that
someone in userspace with his funny ideas might start relying on such
behavior in future.

Thanks,
        Dmitry
Leonard Crestez Aug. 12, 2021, 7:46 p.m. UTC | #4
On 8/11/21 11:29 AM, Leonard Crestez wrote:
> On 8/10/21 11:41 PM, Dmitry Safonov wrote:
>> On Tue, 10 Aug 2021 at 02:50, Leonard Crestez <cdleonard@gmail.com> 
>>> +       /* If an old value exists for same local_id it is deleted */
>>> +       key_info = __tcp_authopt_key_info_lookup(sk, info, 
>>> opt.local_id);
>>> +       if (key_info)
>>> +               tcp_authopt_key_del(sk, info, key_info);
>>> +       key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | 
>>> __GFP_ZERO);
>>> +       if (!key_info)
>>> +               return -ENOMEM;
>>
>> 1. You don't need sock_kmalloc() together with tcp_authopt_key_del().
>>      It just frees the memory and allocates it back straight away - no
>> sense in doing that.
> 
> The list is scanned in multiple places in later commits using nothing 
> but an rcu_read_lock, this means that keys can't be updated in-place.
> 
>> 2. I think RFC says you must not allow a user to change an existing key:
>>> MKT parameters are not changed. Instead, new MKTs can be installed, 
>>> and a connection
>>> can change which MKT it uses.
>>
>> IIUC, it means that one can't just change an existing MKT, but one can
>> remove and later
>> add MKT with the same (send_id, recv_id, src_addr/port, dst_addr/port).
>>
>> So, a reasonable thing to do:
>> if (key_info)
>>      return -EEXIST.
> 
> You're right, making the user delete keys explicitly is better.

On a second thought this might be required to mark keys as "send-only" 
and "recv-only" atomically.

Separately from RFC5925 some vendors implement a "keychain" model based 
on RFC8177 where each key has a distinct "accept-lifetime" and a 
"send-lifetime". This could be implemented by adding flags 
"expired_for_send" and "expired_for_recv" but requires the ability to 
set an expiration mark without the key ever being deleted.

--
Regards,
Leonard
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48d8a363319e..cfddfc720b00 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -140,10 +140,12 @@  struct tcp_request_sock {
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 {
 	return (struct tcp_request_sock *)req;
 }
 
+struct tcp_authopt_info;
+
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
@@ -403,10 +405,14 @@  struct tcp_sock {
 
 /* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	__rcu *md5sig_info;
 #endif
 
+#ifdef CONFIG_TCP_AUTHOPT
+	struct tcp_authopt_info	__rcu *authopt_info;
+#endif
+
 /* TCP fastopen related information */
 	struct tcp_fastopen_request *fastopen_req;
 	/* fastopen_rsk points to request_sock that resulted in this big
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3166dc15d7d6..bb76554e8fe5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -182,10 +182,11 @@  void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_WINDOW		3	/* Window scaling */
 #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_AUTHOPT		29	/* Auth Option (RFC5925) */
 #define TCPOPT_MPTCP		30	/* Multipath TCP (RFC6824) */
 #define TCPOPT_FASTOPEN		34	/* Fast open (RFC7413) */
 #define TCPOPT_EXP		254	/* Experimental */
 /* Magic number to be after the option value for sharing TCP
  * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
new file mode 100644
index 000000000000..458d108bb7a8
--- /dev/null
+++ b/include/net/tcp_authopt.h
@@ -0,0 +1,55 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_TCP_AUTHOPT_H
+#define _LINUX_TCP_AUTHOPT_H
+
+#include <uapi/linux/tcp.h>
+
+/* Representation of a Master Key Tuple as per RFC5925 */
+struct tcp_authopt_key_info {
+	struct hlist_node node;
+	/* Local identifier */
+	u32 local_id;
+	u32 flags;
+	/* Wire identifiers */
+	u8 send_id, recv_id;
+	u8 alg_id;
+	u8 keylen;
+	u8 key[TCP_AUTHOPT_MAXKEYLEN];
+	struct rcu_head rcu;
+	struct sockaddr_storage addr;
+};
+
+/* Per-socket information regarding tcp_authopt */
+struct tcp_authopt_info {
+	/* List of tcp_authopt_key_info */
+	struct hlist_head head;
+	u32 flags;
+	u32 src_isn;
+	u32 dst_isn;
+	struct rcu_head rcu;
+};
+
+#ifdef CONFIG_TCP_AUTHOPT
+void tcp_authopt_clear(struct sock *sk);
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen);
+int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key);
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen);
+#else
+static inline int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	return -ENOPROTOOPT;
+}
+static inline int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key)
+{
+	return -ENOPROTOOPT;
+}
+static inline void tcp_authopt_clear(struct sock *sk)
+{
+}
+static inline int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	return -ENOPROTOOPT;
+}
+#endif
+
+#endif /* _LINUX_TCP_AUTHOPT_H */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8fc09e8638b3..bc47664156eb 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -126,10 +126,12 @@  enum {
 #define TCP_INQ			36	/* Notify bytes available to read as a cmsg on read */
 
 #define TCP_CM_INQ		TCP_INQ
 
 #define TCP_TX_DELAY		37	/* delay outgoing packets by XX usec */
+#define TCP_AUTHOPT			38	/* TCP Authentication Option (RFC2385) */
+#define TCP_AUTHOPT_KEY		39	/* TCP Authentication Option update key (RFC2385) */
 
 
 #define TCP_REPAIR_ON		1
 #define TCP_REPAIR_OFF		0
 #define TCP_REPAIR_OFF_NO_WP	-1	/* Turn off without window probes */
@@ -340,10 +342,80 @@  struct tcp_diag_md5sig {
 	__u16	tcpm_keylen;
 	__be32	tcpm_addr[4];
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/**
+ * enum tcp_authopt_flag - flags for `tcp_authopt.flags`
+ */
+enum tcp_authopt_flag {
+	/**
+	 * @TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED:
+	 *	Configure behavior of segments with TCP-AO coming from hosts for which no
+	 *	key is configured. The default recommended by RFC is to silently accept
+	 *	such connections.
+	 */
+	TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED = (1 << 2),
+};
+
+/**
+ * struct tcp_authopt - Per-socket options related to TCP Authentication Option
+ */
+struct tcp_authopt {
+	/** @flags: Combination of &enum tcp_authopt_flag */
+	__u32	flags;
+};
+
+/**
+ * enum tcp_authopt_key_flag - flags for `tcp_authopt.flags`
+ *
+ * @TCP_AUTHOPT_KEY_DEL: Delete the key by local_id and ignore all other fields.
+ * @TCP_AUTHOPT_KEY_EXCLUDE_OPTS: Exclude TCP options from signature.
+ * @TCP_AUTHOPT_KEY_ADDR_BIND: Key only valid for `tcp_authopt.addr`
+ */
+enum tcp_authopt_key_flag {
+	TCP_AUTHOPT_KEY_DEL = (1 << 0),
+	TCP_AUTHOPT_KEY_EXCLUDE_OPTS = (1 << 1),
+	TCP_AUTHOPT_KEY_ADDR_BIND = (1 << 2),
+};
+
+/* for TCP_AUTHOPT_KEY socket option */
+#define TCP_AUTHOPT_MAXKEYLEN	80
+
+enum tcp_authopt_alg {
+	TCP_AUTHOPT_ALG_HMAC_SHA_1_96 = 1,
+	TCP_AUTHOPT_ALG_AES_128_CMAC_96 = 2,
+};
+
+/**
+ * struct tcp_authopt_key - TCP Authentication KEY
+ *
+ * Each key is identified by a non-zero local_id which is managed by the application.
+ */
+struct tcp_authopt_key {
+	/** @flags: Combination of &enum tcp_authopt_key_flag */
+	__u32	flags;
+	/** @local_id: Local identifier */
+	__u32	local_id;
+	/** @send_id: keyid value for send */
+	__u8	send_id;
+	/** @recv_id: keyid value for receive */
+	__u8	recv_id;
+	/** @alg: One of &enum tcp_authopt_alg */
+	__u8	alg;
+	/** @keylen: Length of the key buffer */
+	__u8	keylen;
+	/** @key: Secret key */
+	__u8	key[TCP_AUTHOPT_MAXKEYLEN];
+	/**
+	 * @addr: Key is only valid for this address
+	 *
+	 * Ignored unless TCP_AUTHOPT_KEY_ADDR_BIND flag is set
+	 */
+	struct __kernel_sockaddr_storage addr;
+};
+
 /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
 
 #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
 struct tcp_zerocopy_receive {
 	__u64 address;		/* in: address of mapping */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..6459f4ea6f1d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -740,5 +740,19 @@  config TCP_MD5SIG
 	  RFC2385 specifies a method of giving MD5 protection to TCP sessions.
 	  Its main (only?) use is to protect BGP sessions between core routers
 	  on the Internet.
 
 	  If unsure, say N.
+
+config TCP_AUTHOPT
+	bool "TCP: Authentication Option support (RFC5925)"
+	select CRYPTO
+	select CRYPTO_SHA1
+	select CRYPTO_HMAC
+	select CRYPTO_AES
+	select CRYPTO_CMAC
+	help
+	  RFC5925 specifies a new method of giving protection to TCP sessions.
+	  Its intended use is to protect BGP sessions between core routers
+	  on the Internet. It obsoletes TCP MD5 (RFC2385) but is incompatible.
+
+	  If unsure, say N.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..d336f32ce177 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -59,10 +59,11 @@  obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_TCP_AUTHOPT) += tcp_authopt.o
 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
 obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f931def6302e..fd90e80afa2c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,10 +271,11 @@ 
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/mptcp.h>
+#include <net/tcp_authopt.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/sock.h>
 
 #include <linux/uaccess.h>
@@ -3573,10 +3574,16 @@  static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 	case TCP_MD5SIG:
 	case TCP_MD5SIG_EXT:
 		err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
 		break;
 #endif
+	case TCP_AUTHOPT:
+		err = tcp_set_authopt(sk, optval, optlen);
+		break;
+	case TCP_AUTHOPT_KEY:
+		err = tcp_set_authopt_key(sk, optval, optlen);
+		break;
 	case TCP_USER_TIMEOUT:
 		/* Cap the max time in ms TCP will retry or probe the window
 		 * before giving up and aborting (ETIMEDOUT) a connection.
 		 */
 		if (val < 0)
@@ -4219,10 +4226,30 @@  static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (!err && copy_to_user(optval, &zc, len))
 			err = -EFAULT;
 		return err;
 	}
 #endif
+#ifdef CONFIG_TCP_AUTHOPT
+	case TCP_AUTHOPT: {
+		struct tcp_authopt info;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		tcp_get_authopt_val(sk, &info);
+		release_sock(sk);
+
+		len = min_t(unsigned int, len, sizeof(info));
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &info, len))
+			return -EFAULT;
+		return 0;
+	}
+#endif
+
 	default:
 		return -ENOPROTOOPT;
 	}
 
 	if (put_user(len, optlen))
diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
new file mode 100644
index 000000000000..5fa7bce8891b
--- /dev/null
+++ b/net/ipv4/tcp_authopt.c
@@ -0,0 +1,172 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <net/tcp_authopt.h>
+#include <crypto/hash.h>
+#include <trace/events/tcp.h>
+
+struct tcp_authopt_key_info *__tcp_authopt_key_info_lookup(const struct sock *sk,
+							   struct tcp_authopt_info *info,
+							   int key_id)
+{
+	struct tcp_authopt_key_info *key;
+
+	hlist_for_each_entry_rcu(key, &info->head, node, lockdep_sock_is_held(sk))
+		if (key->local_id == key_id)
+			return key;
+
+	return NULL;
+}
+
+static struct tcp_authopt_info *__tcp_authopt_info_get_or_create(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_authopt_info *info;
+
+	info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+	if (info)
+		return info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL | __GFP_ZERO);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+	INIT_HLIST_HEAD(&info->head);
+	rcu_assign_pointer(tp->authopt_info, info);
+
+	return info;
+}
+
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	struct tcp_authopt opt;
+	struct tcp_authopt_info *info;
+
+	WARN_ON(!lockdep_sock_is_held(sk));
+
+	/* If userspace optlen is too short fill the rest with zeros */
+	if (optlen > sizeof(opt))
+		return -EINVAL;
+	memset(&opt, 0, sizeof(opt));
+	if (copy_from_sockptr(&opt, optval, optlen))
+		return -EFAULT;
+
+	info = __tcp_authopt_info_get_or_create(sk);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	info->flags = opt.flags & TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED);
+
+	return 0;
+}
+
+int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_authopt_info *info;
+
+	WARN_ON(!lockdep_sock_is_held(sk));
+	memset(opt, 0, sizeof(*opt));
+	info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+	if (!info)
+		return -EINVAL;
+	opt->flags = info->flags & TCP_AUTHOPT_FLAG_REJECT_UNEXPECTED;
+
+	return 0;
+}
+
+static void tcp_authopt_key_del(struct sock *sk,
+				struct tcp_authopt_info *info,
+				struct tcp_authopt_key_info *key)
+{
+	hlist_del_rcu(&key->node);
+	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+	kfree_rcu(key, rcu);
+}
+
+/* free info and keys but don't touch tp->authopt_info */
+void __tcp_authopt_info_free(struct sock *sk, struct tcp_authopt_info *info)
+{
+	struct hlist_node *n;
+	struct tcp_authopt_key_info *key;
+
+	hlist_for_each_entry_safe(key, n, &info->head, node)
+		tcp_authopt_key_del(sk, info, key);
+	kfree_rcu(info, rcu);
+}
+
+/* free everything and clear tcp_sock.authopt_info to NULL */
+void tcp_authopt_clear(struct sock *sk)
+{
+	struct tcp_authopt_info *info;
+
+	info = rcu_dereference_protected(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+	if (info) {
+		__tcp_authopt_info_free(sk, info);
+		tcp_sk(sk)->authopt_info = NULL;
+	}
+}
+
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+	struct tcp_authopt_key opt;
+	struct tcp_authopt_info *info;
+	struct tcp_authopt_key_info *key_info;
+
+	/* If userspace optlen is too short fill the rest with zeros */
+	if (optlen > sizeof(opt))
+		return -EINVAL;
+	memset(&opt, 0, sizeof(opt));
+	if (copy_from_sockptr(&opt, optval, optlen))
+		return -EFAULT;
+
+	if (opt.keylen > TCP_AUTHOPT_MAXKEYLEN)
+		return -EINVAL;
+
+	if (opt.local_id == 0)
+		return -EINVAL;
+
+	/* Delete is a special case: we ignore all fields other than local_id */
+	if (opt.flags & TCP_AUTHOPT_KEY_DEL) {
+		info = rcu_dereference_check(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+		if (!info)
+			return -ENOENT;
+		key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id);
+		if (!key_info)
+			return -ENOENT;
+		tcp_authopt_key_del(sk, info, key_info);
+		return 0;
+	}
+
+	/* Initialize tcp_authopt_info if not already set */
+	info = __tcp_authopt_info_get_or_create(sk);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	/* check key family */
+	if (opt.flags & TCP_AUTHOPT_KEY_ADDR_BIND) {
+		if (sk->sk_family != opt.addr.ss_family)
+			return -EINVAL;
+	}
+
+	/* If an old value exists for same local_id it is deleted */
+	key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id);
+	if (key_info)
+		tcp_authopt_key_del(sk, info, key_info);
+	key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO);
+	if (!key_info)
+		return -ENOMEM;
+	key_info->local_id = opt.local_id;
+	key_info->flags = opt.flags & (TCP_AUTHOPT_KEY_EXCLUDE_OPTS | TCP_AUTHOPT_KEY_ADDR_BIND);
+	key_info->send_id = opt.send_id;
+	key_info->recv_id = opt.recv_id;
+	key_info->alg_id = opt.alg;
+	key_info->keylen = opt.keylen;
+	memcpy(key_info->key, opt.key, opt.keylen);
+	memcpy(&key_info->addr, &opt.addr, sizeof(key_info->addr));
+	hlist_add_head_rcu(&key_info->node, &info->head);
+
+	return 0;
+}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e62e0d6373a..1348615c7576 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -60,10 +60,11 @@ 
 
 #include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
 #include <net/tcp.h>
+#include <net/tcp_authopt.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
@@ -2256,10 +2257,11 @@  void tcp_v4_destroy_sock(struct sock *sk)
 		tcp_clear_md5_list(sk);
 		kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
 		tp->md5sig_info = NULL;
 	}
 #endif
+	tcp_authopt_clear(sk);
 
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);