[bpf-next,02/10] tcp: bpf: Parse BPF experimental header option

Message ID 20200626175514.1460570-1-kafai@fb.com
State New
Headers show
Series
  • Untitled series #40562
Related show

Commit Message

Martin KaFai Lau June 26, 2020, 5:55 p.m.
This patch adds logic to parse experimental kind 254 with 16 bit magic
0xeB9F.  The latter patch will allow bpf prog to write and parse data
under this experimental kind and magic.

A one byte bpf_hdr_opt_off is added to tcp_skb_cb by using an existing
4 byte hole.  It is only used in rx.  It stores the offset to the
bpf experimental option and will be made available to BPF prog
in a latter patch.  This offset is also stored in the saved_syn.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/net/request_sock.h | 1 +
 include/net/tcp.h          | 3 +++
 net/ipv4/tcp_input.c       | 6 ++++++
 net/ipv4/tcp_ipv4.c        | 1 +
 net/ipv6/tcp_ipv6.c        | 1 +
 5 files changed, 12 insertions(+)

Patch

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index d77237ec9fb4..55297286c066 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -43,6 +43,7 @@  int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
 
 struct saved_syn {
 	u32 network_hdrlen;
+	u32 bpf_hdr_opt_off;
 	u8 data[];
 };
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab1c7d0facb..07a9dfe35242 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -191,6 +191,7 @@  void tcp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define TCPOPT_FASTOPEN_MAGIC	0xF989
 #define TCPOPT_SMC_MAGIC	0xE2D4C3D9
+#define TCPOPT_BPF_MAGIC	0xEB9F
 
 /*
  *     TCP option lengths
@@ -204,6 +205,7 @@  void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_FASTOPEN_BASE  2
 #define TCPOLEN_EXP_FASTOPEN_BASE  4
 #define TCPOLEN_EXP_SMC_BASE   6
+#define TCPOLEN_EXP_BPF_BASE   4
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -857,6 +859,7 @@  struct tcp_skb_cb {
 			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
 			unused:5;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
+	__u8            bpf_hdr_opt_off;/* offset to bpf hdr option. rx only. */
 	union {
 		struct {
 			/* There is space for up to 24 bytes */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb0e32b2def9..640408a80b3d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3924,6 +3924,10 @@  void tcp_parse_options(const struct net *net,
 					tcp_parse_fastopen_option(opsize -
 						TCPOLEN_EXP_FASTOPEN_BASE,
 						ptr + 2, th->syn, foc, true);
+				else if (opsize >= TCPOLEN_EXP_BPF_BASE &&
+					 get_unaligned_be16(ptr) ==
+					 TCPOPT_BPF_MAGIC)
+					TCP_SKB_CB(skb)->bpf_hdr_opt_off = (ptr - 2) - (unsigned char *)th;
 				else
 					smc_parse_options(th, opt_rx, ptr,
 							  opsize);
@@ -6562,6 +6566,8 @@  static void tcp_reqsk_record_syn(const struct sock *sk,
 		saved_syn = kmalloc(len + sizeof(*saved_syn), GFP_ATOMIC);
 		if (saved_syn) {
 			saved_syn->network_hdrlen = skb_network_header_len(skb);
+			saved_syn->bpf_hdr_opt_off =
+				TCP_SKB_CB(skb)->bpf_hdr_opt_off;
 			memcpy(saved_syn->data, skb_network_header(skb), len);
 			req->saved_syn = saved_syn;
 		}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ea0df9fd7618..a3535b7fe002 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1864,6 +1864,7 @@  static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
 	TCP_SKB_CB(skb)->sacked	 = 0;
 	TCP_SKB_CB(skb)->has_rxtstamp =
 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+	TCP_SKB_CB(skb)->bpf_hdr_opt_off = 0;
 }
 
 /*
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f67d45ff00b4..8356d0562279 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1545,6 +1545,7 @@  static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->sacked = 0;
 	TCP_SKB_CB(skb)->has_rxtstamp =
 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+	TCP_SKB_CB(skb)->bpf_hdr_opt_off = 0;
 }
 
 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)