diff mbox series

[net-next,v3,2/3] virtio_net: add optional flow dissection in virtio_net_hdr_to_skb

Message ID 20210601221841.1251830-3-tannerlove.kernel@gmail.com
State Superseded
Headers show
Series virtio_net: add optional flow dissection in virtio_net_hdr_to_skb | expand

Commit Message

Tanner Love June 1, 2021, 10:18 p.m. UTC
From: Tanner Love <tannerlove@google.com>

Syzkaller bugs have resulted from loose specification of
virtio_net_hdr[1]. Enable execution of a BPF flow dissector program
in virtio_net_hdr_to_skb to validate the vnet header and drop bad
input.

The existing behavior of accepting these vnet headers is part of the
ABI. But individual admins may want to enforce restrictions. For
example, verifying that a GSO_TCPV4 gso_type matches packet contents:
unencapsulated TCP/IPV4 packet with payload exceeding gso_size and
hdr_len at payload offset.

Introduce a new sysctl net.core.flow_dissect_vnet_hdr controlling a
static key to decide whether to perform flow dissection. When the key
is false, virtio_net_hdr_to_skb computes as before.

[1] https://syzkaller.appspot.com/bug?id=b419a5ca95062664fe1a60b764621eb4526e2cd0

[ um build error ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Tanner Love <tannerlove@google.com>
Suggested-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/virtio_net.h | 25 +++++++++++++++++++++----
 net/core/flow_dissector.c  |  3 +++
 net/core/sysctl_net_core.c |  9 +++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)

Comments

Stanislav Fomichev June 3, 2021, 3:54 p.m. UTC | #1
On 06/01, Tanner Love wrote:
> From: Tanner Love <tannerlove@google.com>


> Syzkaller bugs have resulted from loose specification of

> virtio_net_hdr[1]. Enable execution of a BPF flow dissector program

> in virtio_net_hdr_to_skb to validate the vnet header and drop bad

> input.


> The existing behavior of accepting these vnet headers is part of the

> ABI. But individual admins may want to enforce restrictions. For

> example, verifying that a GSO_TCPV4 gso_type matches packet contents:

> unencapsulated TCP/IPV4 packet with payload exceeding gso_size and

> hdr_len at payload offset.


> Introduce a new sysctl net.core.flow_dissect_vnet_hdr controlling a

> static key to decide whether to perform flow dissection. When the key

> is false, virtio_net_hdr_to_skb computes as before.


> [1]  

> https://syzkaller.appspot.com/bug?id=b419a5ca95062664fe1a60b764621eb4526e2cd0


> [ um build error ]

> Reported-by: kernel test robot <lkp@intel.com>

> Signed-off-by: Tanner Love <tannerlove@google.com>

> Suggested-by: Willem de Bruijn <willemb@google.com>

> ---

>   include/linux/virtio_net.h | 25 +++++++++++++++++++++----

>   net/core/flow_dissector.c  |  3 +++

>   net/core/sysctl_net_core.c |  9 +++++++++

>   3 files changed, 33 insertions(+), 4 deletions(-)


> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h

> index b465f8f3e554..cdc6152b40c6 100644

> --- a/include/linux/virtio_net.h

> +++ b/include/linux/virtio_net.h

> @@ -25,10 +25,13 @@ static inline int virtio_net_hdr_set_proto(struct  

> sk_buff *skb,

>   	return 0;

>   }


> +DECLARE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);

> +

>   static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,

>   					const struct virtio_net_hdr *hdr,

>   					bool little_endian)

>   {

> +	struct flow_keys_basic keys;

>   	unsigned int gso_type = 0;

>   	unsigned int thlen = 0;

>   	unsigned int p_off = 0;

> @@ -78,13 +81,24 @@ static inline int virtio_net_hdr_to_skb(struct  

> sk_buff *skb,

>   		p_off = skb_transport_offset(skb) + thlen;

>   		if (!pskb_may_pull(skb, p_off))

>   			return -EINVAL;

> -	} else {

> +	}

> +

> +	/* BPF flow dissection for optional strict validation.

> +	 *

> +	 * Admins can define permitted packets more strictly, such as dropping

> +	 * deprecated UDP_UFO packets and requiring skb->protocol to be non-zero

> +	 * and matching packet headers.

> +	 */

> +	if (static_branch_unlikely(&sysctl_flow_dissect_vnet_hdr_key) &&

> +	    !__skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0,

> +						0, hdr))

> +		return -EINVAL;

> +

> +	if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {

>   		/* gso packets without NEEDS_CSUM do not set transport_offset.

>   		 * probe and drop if does not match one of the above types.

>   		 */

>   		if (gso_type && skb->network_header) {

> -			struct flow_keys_basic keys;

> -

>   			if (!skb->protocol) {

>   				__be16 protocol = dev_parse_header_protocol(skb);


> @@ -92,8 +106,11 @@ static inline int virtio_net_hdr_to_skb(struct  

> sk_buff *skb,

>   				if (protocol && protocol != skb->protocol)

>   					return -EINVAL;

>   			}

> +

>   retry:

> -			if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,

> +			/* only if flow dissection not already done */

> +			if (!static_branch_unlikely(&sysctl_flow_dissect_vnet_hdr_key) &&

> +			    !skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,

>   							      NULL, 0, 0, 0,

>   							      0)) {

>   				/* UFO does not specify ipv4 or 6: try both */

> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c

> index 4b171ebec084..ae2ce382f4f4 100644

> --- a/net/core/flow_dissector.c

> +++ b/net/core/flow_dissector.c

> @@ -35,6 +35,9 @@

>   #endif

>   #include <linux/bpf-netns.h>


> +DEFINE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);

> +EXPORT_SYMBOL(sysctl_flow_dissect_vnet_hdr_key);

> +

>   static void dissector_set_key(struct flow_dissector *flow_dissector,

>   			      enum flow_dissector_key_id key_id)

>   {

> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

> index c8496c1142c9..c01b9366bb75 100644

> --- a/net/core/sysctl_net_core.c

> +++ b/net/core/sysctl_net_core.c

> @@ -36,6 +36,8 @@ static int net_msg_warn;	/* Unused, but still a sysctl  

> */

>   int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;

>   EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);


> +DECLARE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);

> +

>   /* 0 - Keep current behavior:

>    *     IPv4: inherit all current settings from init_net

>    *     IPv6: reset all settings to default

> @@ -580,6 +582,13 @@ static struct ctl_table net_core_table[] = {

>   		.extra1		= SYSCTL_ONE,

>   		.extra2		= &int_3600,

>   	},


[..]

> +	{

> +		.procname       = "flow_dissect_vnet_hdr",


This sounds generic, but iiuc, it makes sense only for bpf flow
dissection? Should it be bpf_flow_dissect_vnet_hdr instead?

Or should we drop this sysctl in favor of some per-program flag
(either attach_flags or some signal via btf)? That way,
individual bpf programs can signal their willingness to
parse vnet hdr.

> +		.data           = &sysctl_flow_dissect_vnet_hdr_key.key,

> +		.maxlen         = sizeof(sysctl_flow_dissect_vnet_hdr_key),

> +		.mode           = 0644,

> +		.proc_handler   = proc_do_static_key,

> +	},

>   	{ }

>   };


> --

> 2.32.0.rc0.204.g9fa02ecfa5-goog
Alexei Starovoitov June 3, 2021, 11:56 p.m. UTC | #2
On Tue, Jun 01, 2021 at 06:18:39PM -0400, Tanner Love wrote:
> From: Tanner Love <tannerlove@google.com>

> 

> Syzkaller bugs have resulted from loose specification of

> virtio_net_hdr[1]. Enable execution of a BPF flow dissector program

> in virtio_net_hdr_to_skb to validate the vnet header and drop bad

> input.

> 

> The existing behavior of accepting these vnet headers is part of the

> ABI. 


So ?
It's ok to fix ABI when it's broken.
The whole feature is a way to workaround broken ABI with additional
BPF based validation.
It's certainly a novel idea.
I've never seen BPF being used to fix the kernel bugs.
But I think the better way forward is to admit that vnet ABI is broken
and fix it in the kernel with proper validation.
BPF-based validation is a band-aid. The out of the box kernel will
stay broken and syzbot will continue to crash it.
Willem de Bruijn June 4, 2021, 12:44 a.m. UTC | #3
On Thu, Jun 3, 2021 at 7:56 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>

> On Tue, Jun 01, 2021 at 06:18:39PM -0400, Tanner Love wrote:

> > From: Tanner Love <tannerlove@google.com>

> >

> > Syzkaller bugs have resulted from loose specification of

> > virtio_net_hdr[1]. Enable execution of a BPF flow dissector program

> > in virtio_net_hdr_to_skb to validate the vnet header and drop bad

> > input.

> >

> > The existing behavior of accepting these vnet headers is part of the

> > ABI.

>

> So ?

> It's ok to fix ABI when it's broken.

> The whole feature is a way to workaround broken ABI with additional

> BPF based validation.

> It's certainly a novel idea.

> I've never seen BPF being used to fix the kernel bugs.

> But I think the better way forward is to admit that vnet ABI is broken

> and fix it in the kernel with proper validation.

> BPF-based validation is a band-aid. The out of the box kernel will

> stay broken and syzbot will continue to crash it.


The intention is not to use this to avoid kernel fixes.

There are three parts:

1. is the ABI broken and can we simply drop certain weird packets?
2. will we accept an extra packet parsing stage in
virtio_net_hdr_to_skb to find all the culprits?
3. do we want to add yet another parser in C when this is exactly what
the BPF flow dissector does well?

The virtio_net_hdr interface is more permissive than I think it was
intended. But weird edge cases like protocol 0 do occur. So I believe
it is simply too late to drop everything that is not obviously
correct.

More problematic is that some of the gray area cases are non-trivial
and require protocol parsing. Do the packet headers match the gso
type? Are subtle variants like IPv6/TCP with extension headers
supported or not?

We have previously tried to add unconditional protocol parsing in
virtio_net_hdr_to_skb, but received pushback because this will cause
performance regressions, e.g., for vm-to-vm forwarding.

Combined, that's why I think BPF flow dissection is the right approach
here. It is optional, can be as pedantic as the admin wants / workload
allows (e.g., dropping UFO altogether) and ensures protocol parsing
itself is robust. Even if not enabled continuously, offers a quick way
to patch a vulnerability when it is discovered. This is the same
reasoning of the existing BPF flow dissector.

It is not intended as an alternative to subsequently fixing a bug in
the kernel path.
Alexei Starovoitov June 4, 2021, 2:04 a.m. UTC | #4
On Thu, Jun 03, 2021 at 08:44:51PM -0400, Willem de Bruijn wrote:
> On Thu, Jun 3, 2021 at 7:56 PM Alexei Starovoitov

> <alexei.starovoitov@gmail.com> wrote:

> >

> > On Tue, Jun 01, 2021 at 06:18:39PM -0400, Tanner Love wrote:

> > > From: Tanner Love <tannerlove@google.com>

> > >

> > > Syzkaller bugs have resulted from loose specification of

> > > virtio_net_hdr[1]. Enable execution of a BPF flow dissector program

> > > in virtio_net_hdr_to_skb to validate the vnet header and drop bad

> > > input.

> > >

> > > The existing behavior of accepting these vnet headers is part of the

> > > ABI.

> >

> > So ?

> > It's ok to fix ABI when it's broken.

> > The whole feature is a way to workaround broken ABI with additional

> > BPF based validation.

> > It's certainly a novel idea.

> > I've never seen BPF being used to fix the kernel bugs.

> > But I think the better way forward is to admit that vnet ABI is broken

> > and fix it in the kernel with proper validation.

> > BPF-based validation is a band-aid. The out of the box kernel will

> > stay broken and syzbot will continue to crash it.

> 

> The intention is not to use this to avoid kernel fixes.

> 

> There are three parts:

> 

> 1. is the ABI broken and can we simply drop certain weird packets?

> 2. will we accept an extra packet parsing stage in

> virtio_net_hdr_to_skb to find all the culprits?

> 3. do we want to add yet another parser in C when this is exactly what

> the BPF flow dissector does well?

> 

> The virtio_net_hdr interface is more permissive than I think it was

> intended. But weird edge cases like protocol 0 do occur. So I believe

> it is simply too late to drop everything that is not obviously

> correct.

> 

> More problematic is that some of the gray area cases are non-trivial

> and require protocol parsing. Do the packet headers match the gso

> type? Are subtle variants like IPv6/TCP with extension headers

> supported or not?

> 

> We have previously tried to add unconditional protocol parsing in

> virtio_net_hdr_to_skb, but received pushback because this will cause

> performance regressions, e.g., for vm-to-vm forwarding.

> 

> Combined, that's why I think BPF flow dissection is the right approach

> here. It is optional, can be as pedantic as the admin wants / workload

> allows (e.g., dropping UFO altogether) and ensures protocol parsing

> itself is robust. Even if not enabled continuously, offers a quick way

> to patch a vulnerability when it is discovered. This is the same

> reasoning of the existing BPF flow dissector.

> 

> It is not intended as an alternative to subsequently fixing a bug in

> the kernel path.


Thanks for these details. They need to be part of commit log.

As far as patches the overhead of copying extra fields can be avoided.
How about copying single pointer to struct virtio_net_hdr into bpf_flow_keys instead?
See struct bpf_sockopt and _bpf_md_ptr(struct bpf_sock *, sk).
The overhead will much lower and bpf prog will access all virtio_net_hdr directly.
diff mbox series

Patch

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index b465f8f3e554..cdc6152b40c6 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -25,10 +25,13 @@  static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
 	return 0;
 }
 
+DECLARE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);
+
 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 					const struct virtio_net_hdr *hdr,
 					bool little_endian)
 {
+	struct flow_keys_basic keys;
 	unsigned int gso_type = 0;
 	unsigned int thlen = 0;
 	unsigned int p_off = 0;
@@ -78,13 +81,24 @@  static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		p_off = skb_transport_offset(skb) + thlen;
 		if (!pskb_may_pull(skb, p_off))
 			return -EINVAL;
-	} else {
+	}
+
+	/* BPF flow dissection for optional strict validation.
+	 *
+	 * Admins can define permitted packets more strictly, such as dropping
+	 * deprecated UDP_UFO packets and requiring skb->protocol to be non-zero
+	 * and matching packet headers.
+	 */
+	if (static_branch_unlikely(&sysctl_flow_dissect_vnet_hdr_key) &&
+	    !__skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0,
+						0, hdr))
+		return -EINVAL;
+
+	if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
 		/* gso packets without NEEDS_CSUM do not set transport_offset.
 		 * probe and drop if does not match one of the above types.
 		 */
 		if (gso_type && skb->network_header) {
-			struct flow_keys_basic keys;
-
 			if (!skb->protocol) {
 				__be16 protocol = dev_parse_header_protocol(skb);
 
@@ -92,8 +106,11 @@  static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 				if (protocol && protocol != skb->protocol)
 					return -EINVAL;
 			}
+
 retry:
-			if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+			/* only if flow dissection not already done */
+			if (!static_branch_unlikely(&sysctl_flow_dissect_vnet_hdr_key) &&
+			    !skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
 							      NULL, 0, 0, 0,
 							      0)) {
 				/* UFO does not specify ipv4 or 6: try both */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 4b171ebec084..ae2ce382f4f4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -35,6 +35,9 @@ 
 #endif
 #include <linux/bpf-netns.h>
 
+DEFINE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);
+EXPORT_SYMBOL(sysctl_flow_dissect_vnet_hdr_key);
+
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
 {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..c01b9366bb75 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -36,6 +36,8 @@  static int net_msg_warn;	/* Unused, but still a sysctl */
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+DECLARE_STATIC_KEY_FALSE(sysctl_flow_dissect_vnet_hdr_key);
+
 /* 0 - Keep current behavior:
  *     IPv4: inherit all current settings from init_net
  *     IPv6: reset all settings to default
@@ -580,6 +582,13 @@  static struct ctl_table net_core_table[] = {
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &int_3600,
 	},
+	{
+		.procname       = "flow_dissect_vnet_hdr",
+		.data           = &sysctl_flow_dissect_vnet_hdr_key.key,
+		.maxlen         = sizeof(sysctl_flow_dissect_vnet_hdr_key),
+		.mode           = 0644,
+		.proc_handler   = proc_do_static_key,
+	},
 	{ }
 };