@@ -43,6 +43,7 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
struct saved_syn {
u32 network_hdrlen;
+ u32 bpf_hdr_opt_off;
u8 data[];
};
@@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
*/
#define TCPOPT_FASTOPEN_MAGIC 0xF989
#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
+#define TCPOPT_BPF_MAGIC 0xEB9F
/*
* TCP option lengths
@@ -204,6 +205,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_FASTOPEN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_EXP_SMC_BASE 6
+#define TCPOLEN_EXP_BPF_BASE 4
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -857,6 +859,7 @@ struct tcp_skb_cb {
has_rxtstamp:1, /* SKB has a RX timestamp */
unused:5;
__u32 ack_seq; /* Sequence number ACK'd */
+ __u8 bpf_hdr_opt_off;/* offset to bpf hdr option. rx only. */
union {
struct {
/* There is space for up to 24 bytes */
@@ -3924,6 +3924,10 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
+ else if (opsize >= TCPOLEN_EXP_BPF_BASE &&
+ get_unaligned_be16(ptr) ==
+ TCPOPT_BPF_MAGIC)
+ TCP_SKB_CB(skb)->bpf_hdr_opt_off = (ptr - 2) - (unsigned char *)th;
else
smc_parse_options(th, opt_rx, ptr,
opsize);
@@ -6562,6 +6566,8 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
saved_syn = kmalloc(len + sizeof(*saved_syn), GFP_ATOMIC);
if (saved_syn) {
saved_syn->network_hdrlen = skb_network_header_len(skb);
+ saved_syn->bpf_hdr_opt_off =
+ TCP_SKB_CB(skb)->bpf_hdr_opt_off;
memcpy(saved_syn->data, skb_network_header(skb), len);
req->saved_syn = saved_syn;
}
@@ -1864,6 +1864,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+ TCP_SKB_CB(skb)->bpf_hdr_opt_off = 0;
}
/*
@@ -1545,6 +1545,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+ TCP_SKB_CB(skb)->bpf_hdr_opt_off = 0;
}
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
This patch adds logic to parse experimental kind 254 with 16 bit magic 0xeB9F. The latter patch will allow bpf prog to write and parse data under this experimental kind and magic. A one byte bpf_hdr_opt_off is added to tcp_skb_cb by using an existing 4 byte hole. It is only used in rx. It stores the offset to the bpf experimental option and will be made available to BPF prog in a latter patch. This offset is also stored in the saved_syn. Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- include/net/request_sock.h | 1 + include/net/tcp.h | 3 +++ net/ipv4/tcp_input.c | 6 ++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 5 files changed, 12 insertions(+)