@@ -58,6 +58,8 @@ struct ipv6_stub {
bool router, bool solicited, bool override, bool inc_opt);
#if IS_ENABLED(CONFIG_XFRM)
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type);
#endif
struct neigh_table *nd_tbl;
};
@@ -384,6 +384,7 @@ config INET_ESPINTCP
depends on XFRM && INET_ESP
select STREAM_PARSER
select NET_SOCK_MSG
+ select XFRM_ESPINTCP
help
Support for RFC 8229 encapsulation of ESP and IKE over
TCP/IPv4 sockets.
@@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD
If unsure, say N.
+config INET6_ESPINTCP
+ bool "IPv6: ESP in TCP encapsulation (RFC 8229)"
+ depends on XFRM && INET6_ESP
+ select STREAM_PARSER
+ select NET_SOCK_MSG
+ select XFRM_ESPINTCP
+ help
+ Support for RFC 8229 encapsulation of ESP and IKE over
+ TCP/IPv6 sockets.
+
+ If unsure, say N.
+
config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
select INET6_XFRM_TUNNEL
@@ -964,6 +964,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ndisc_send_na = ndisc_send_na,
#if IS_ENABLED(CONFIG_XFRM)
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+ .xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
};
@@ -33,6 +33,9 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <linux/icmpv6.h>
+#include <net/tcp.h>
+#include <net/espintcp.h>
+#include <net/inet6_hashtables.h>
#include <linux/highmem.h>
@@ -132,6 +135,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
put_page(sg_page(sg));
}
+#ifdef CONFIG_INET6_ESPINTCP
+struct esp_tcp_sk {
+ struct sock *sk;
+ struct rcu_head rcu;
+};
+
+static void esp_free_tcp_sk(struct rcu_head *head)
+{
+ struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+
+ sock_put(esk->sk);
+ kfree(esk);
+}
+
+static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
+{
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct esp_tcp_sk *esk;
+ __be16 sport, dport;
+ struct sock *nsk;
+ struct sock *sk;
+
+ sk = rcu_dereference(x->encap_sk);
+ if (sk && sk->sk_state == TCP_ESTABLISHED)
+ return sk;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (sk && sk == nsk) {
+ esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+ if (!esk) {
+ spin_unlock_bh(&x->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ RCU_INIT_POINTER(x->encap_sk, NULL);
+ esk->sk = sk;
+ call_rcu(&esk->rcu, esp_free_tcp_sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6,
+ dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
+ if (!sk)
+ return ERR_PTR(-ENOENT);
+
+ if (!tcp_is_ulp_esp(sk)) {
+ sock_put(sk);
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock_bh(&x->lock);
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (encap->encap_sport != sport ||
+ encap->encap_dport != dport) {
+ sock_put(sk);
+ sk = nsk ?: ERR_PTR(-EREMCHG);
+ } else if (sk == nsk) {
+ sock_put(sk);
+ } else {
+ rcu_assign_pointer(x->encap_sk, sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ return sk;
+}
+
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct sock *sk;
+ int err;
+
+ rcu_read_lock();
+
+ sk = esp6_find_tcp_sk(x);
+ err = PTR_ERR_OR_ZERO(sk);
+ if (err)
+ goto out;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ err = espintcp_queue_out(sk, skb);
+ else
+ err = espintcp_push_skb(sk, skb);
+ bh_unlock_sock(sk);
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+
+ return esp_output_tcp_finish(x, skb);
+}
+
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ local_bh_disable();
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+ local_bh_enable();
+
+ /* EINPROGRESS just happens to do the right thing. It
+ * actually means that the skb has been consumed and
+ * isn't coming back.
+ */
+ return err ?: -EINPROGRESS;
+}
+#else
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+
+ return -EOPNOTSUPP;
+}
+#endif
+
static void esp_output_encap_csum(struct sk_buff *skb)
{
/* UDP encap with IPv6 requires a valid checksum */
@@ -181,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err)
secpath_reset(skb);
xfrm_dev_resume(skb);
} else {
- xfrm_output_resume(skb, err);
+ if (!err &&
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ esp_output_tail_tcp(x, skb);
+ else
+ xfrm_output_resume(skb, err);
}
}
@@ -274,6 +407,41 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
return (struct ip_esp_hdr *)(uh + 1);
}
+#ifdef CONFIG_INET6_ESPINTCP
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ __be16 *lenp = (void *)esp->esph;
+ struct ip_esp_hdr *esph;
+ unsigned int len;
+ struct sock *sk;
+
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len > IP_MAX_MTU)
+ return ERR_PTR(-EMSGSIZE);
+
+ rcu_read_lock();
+ sk = esp6_find_tcp_sk(x);
+ rcu_read_unlock();
+
+ if (IS_ERR(sk))
+ return ERR_CAST(sk);
+
+ *lenp = htons(len);
+ esph = (struct ip_esp_hdr *)(lenp + 1);
+
+ return esph;
+}
+#else
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+#endif
+
static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
struct esp_info *esp)
{
@@ -294,6 +462,9 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
+ case TCP_ENCAP_ESPINTCP:
+ esph = esp6_output_tcp_encap(x, skb, esp);
+ break;
}
if (IS_ERR(esph))
@@ -509,6 +680,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
if (sg != dsg)
esp_ssg_unref(x, tmp);
+ if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ err = esp_output_tail_tcp(x, skb);
+
error_free:
kfree(tmp);
error:
@@ -633,9 +807,13 @@ int esp6_input_done2(struct sk_buff *skb, int err)
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
+ struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
__be16 source;
switch (x->encap->encap_type) {
+ case TCP_ENCAP_ESPINTCP:
+ source = th->source;
+ break;
case UDP_ENCAP_ESPINUDP:
case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
@@ -1039,6 +1217,14 @@ static int esp6_init_state(struct xfrm_state *x)
case UDP_ENCAP_ESPINUDP_NON_IKE:
x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
break;
+#ifdef CONFIG_INET6_ESPINTCP
+ case TCP_ENCAP_ESPINTCP:
+ /* only the length field, TCP encap is done by
+ * the socket
+ */
+ x->props.header_len += 2;
+ break;
+#endif
}
}
@@ -99,4 +99,7 @@ config NET_KEY_MIGRATE
If unsure, say N.
+config XFRM_ESPINTCP
+ bool
+
endif # INET
@@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
-obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
+obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
@@ -6,6 +6,9 @@
#include <net/espintcp.h>
#include <linux/skmsg.h>
#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk)
rcu_read_lock();
skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
local_bh_disable();
- xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+ else
+#endif
+ xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
local_bh_enable();
rcu_read_unlock();
}
@@ -347,6 +355,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
static struct proto espintcp_prot __ro_after_init;
static struct proto_ops espintcp_ops __ro_after_init;
+static struct proto espintcp6_prot;
+static struct proto_ops espintcp6_ops;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
static void espintcp_data_ready(struct sock *sk)
{
@@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk)
bool tcp_is_ulp_esp(struct sock *sk)
{
- return sk->sk_prot == &espintcp_prot;
+ return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
}
EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops);
static int espintcp_init_sk(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk)
strp_check_rcv(&ctx->strp);
skb_queue_head_init(&ctx->ike_queue);
skb_queue_head_init(&ctx->out_queue);
- sk->sk_prot = &espintcp_prot;
- sk->sk_socket->ops = &espintcp_ops;
+
+ if (sk->sk_family == AF_INET) {
+ sk->sk_prot = &espintcp_prot;
+ sk->sk_socket->ops = &espintcp_ops;
+ } else {
+ mutex_lock(&tcpv6_prot_mutex);
+ if (!espintcp6_prot.recvmsg)
+ build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
+ mutex_unlock(&tcpv6_prot_mutex);
+
+ sk->sk_prot = &espintcp6_prot;
+ sk->sk_socket->ops = &espintcp6_ops;
+ }
ctx->saved_data_ready = sk->sk_data_ready;
ctx->saved_write_space = sk->sk_write_space;
sk->sk_data_ready = espintcp_data_ready;
@@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock,
return mask;
}
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops)
+{
+ memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
+ memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
+ espintcp_prot->sendmsg = espintcp_sendmsg;
+ espintcp_prot->recvmsg = espintcp_recvmsg;
+ espintcp_prot->close = espintcp_close;
+ espintcp_prot->release_cb = espintcp_release;
+ espintcp_ops->poll = espintcp_poll;
+}
+
static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
.name = "espintcp",
.owner = THIS_MODULE,
@@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
void __init espintcp_init(void)
{
- memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot));
- memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops));
- espintcp_prot.sendmsg = espintcp_sendmsg;
- espintcp_prot.recvmsg = espintcp_recvmsg;
- espintcp_prot.close = espintcp_close;
- espintcp_prot.release_cb = espintcp_release;
- espintcp_ops.poll = espintcp_poll;
+ build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
tcp_register_ulp(&espintcp_ulp);
}