diff mbox series

[bpf-next,V1,3/6] bpf: add BPF-helper for reading MTU from net_device via ifindex

Message ID 160200018165.719143.3249298786187115149.stgit@firesoul
State New
Headers show
Series bpf: New approach for BPF MTU handling and enforcement | expand

Commit Message

Jesper Dangaard Brouer Oct. 6, 2020, 4:03 p.m. UTC
FIXME: add description.

FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check()
instead of bpf_mtu_lookup(), because a flag can be used for requesting
GRO segment size checking.  The ret value of bpf_mtu_check() says
if MTU was violoated, but also return MTU via pointer arg to allow
BPF-progs to do own logic.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/uapi/linux/bpf.h |   13 +++++++++++
 net/core/filter.c        |   56 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

Comments

Jesper Dangaard Brouer Oct. 6, 2020, 4:33 p.m. UTC | #1
On Tue, 06 Oct 2020 18:03:01 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> FIXME: add description.


Ups, I will obviously send a V2.

I still want feedback on whether I should implement another BPF-helper
as sketched below:

> FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check()

> instead of bpf_mtu_lookup(), because a flag can be used for requesting

> GRO segment size checking.  The ret value of bpf_mtu_check() says

> if MTU was violoated, but also return MTU via pointer arg to allow

> BPF-progs to do own logic.

> 

> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

> ---

>  include/uapi/linux/bpf.h |   13 +++++++++++

>  net/core/filter.c        |   56 ++++++++++++++++++++++++++++++++++++++++++++++

>  2 files changed, 69 insertions(+)

> 

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

> index 50ce65e37b16..29b335cb96ef 100644

> --- a/include/uapi/linux/bpf.h

> +++ b/include/uapi/linux/bpf.h

> @@ -3718,6 +3718,18 @@ union bpf_attr {

>   *		never return NULL.

>   *	Return

>   *		A pointer pointing to the kernel percpu variable on this cpu.

> + *

> + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags)

> + *	Description

> + *		Lookup MTU of net device based on ifindex.  The Linux kernel

> + *		route table can configure MTUs on a more specific per route

> + *		level, which is not provided by this helper. For route level

> + *		MTU checks use the **bpf_fib_lookup**\ () helper.

> + *

> + *		*ctx* is either **struct xdp_md** for XDP programs or

> + *		**struct sk_buff** tc cls_act programs.

> + *	Return

> + *		On success, MTU size is returned. On error, a negative value.

>   */

>  #define __BPF_FUNC_MAPPER(FN)		\

>  	FN(unspec),			\

> @@ -3875,6 +3887,7 @@ union bpf_attr {

>  	FN(redirect_neigh),		\

>  	FN(bpf_per_cpu_ptr),            \

>  	FN(bpf_this_cpu_ptr),		\

> +	FN(mtu_lookup),			\

>  	/* */

>  

>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper

> diff --git a/net/core/filter.c b/net/core/filter.c

> index d84723f347c0..49ae3b80027b 100644

> --- a/net/core/filter.c

> +++ b/net/core/filter.c

> @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {

>  	.arg4_type	= ARG_ANYTHING,

>  };

>  

> +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags)

> +{

> +	struct net_device *dev;

> +

> +	// XXX: Do we even need flags?

> +	// Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev

> +	if (flags)

> +		return -EINVAL;

> +

> +	dev = dev_get_by_index_rcu(netns, ifindex);

> +	if (!dev)

> +		return -ENODEV;

> +

> +	return dev->mtu;

> +}

> +

> +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb,

> +	   u32, ifindex, u64, flags)

> +{

> +	struct net *netns = dev_net(skb->dev);

> +

> +	return bpf_mtu_lookup(netns, ifindex, flags);

> +}

> +

> +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp,

> +	   u32, ifindex, u64, flags)

> +{

> +	struct net *netns = dev_net(xdp->rxq->dev);

> +	// XXX: Handle if this runs in devmap prog (then is rxq invalid?)

> +

> +	return bpf_mtu_lookup(netns, ifindex, flags);

> +}

> +

> +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = {

> +	.func		= bpf_skb_mtu_lookup,

> +	.gpl_only	= true,

> +	.ret_type	= RET_INTEGER,

> +	.arg1_type      = ARG_PTR_TO_CTX,

> +	.arg2_type      = ARG_ANYTHING,

> +	.arg3_type      = ARG_ANYTHING,

> +};

> +

> +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = {

> +	.func		= bpf_xdp_mtu_lookup,

> +	.gpl_only	= true,

> +	.ret_type	= RET_INTEGER,

> +	.arg1_type      = ARG_PTR_TO_CTX,

> +	.arg2_type      = ARG_ANYTHING,

> +	.arg3_type      = ARG_ANYTHING,

> +};

> +

> +

>  #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)

>  static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)

>  {

> @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)

>  		return &bpf_get_socket_uid_proto;

>  	case BPF_FUNC_fib_lookup:

>  		return &bpf_skb_fib_lookup_proto;

> +	case BPF_FUNC_mtu_lookup:

> +		return &bpf_skb_mtu_lookup_proto;

>  	case BPF_FUNC_sk_fullsock:

>  		return &bpf_sk_fullsock_proto;

>  	case BPF_FUNC_sk_storage_get:

> @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)

>  		return &bpf_xdp_adjust_tail_proto;

>  	case BPF_FUNC_fib_lookup:

>  		return &bpf_xdp_fib_lookup_proto;

> +	case BPF_FUNC_mtu_lookup:

> +		return &bpf_xdp_mtu_lookup_proto;

>  #ifdef CONFIG_INET

>  	case BPF_FUNC_sk_lookup_udp:

>  		return &bpf_xdp_sk_lookup_udp_proto;

> 

> 




-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer
Jakub Kicinski Oct. 7, 2020, 1:18 a.m. UTC | #2
On Tue, 6 Oct 2020 18:33:02 +0200 Jesper Dangaard Brouer wrote:
> > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = {

> > +	.func		= bpf_xdp_mtu_lookup,

> > +	.gpl_only	= true,

> > +	.ret_type	= RET_INTEGER,

> > +	.arg1_type      = ARG_PTR_TO_CTX,

> > +	.arg2_type      = ARG_ANYTHING,

> > +	.arg3_type      = ARG_ANYTHING,

> > +};

> > +

> > +


FWIW

CHECK: Please don't use multiple blank lines
#112: FILE: net/core/filter.c:5566:
+
+
Maciej Żenczykowski Oct. 7, 2020, 1:24 a.m. UTC | #3
On Tue, Oct 6, 2020 at 6:19 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Tue, 6 Oct 2020 18:33:02 +0200 Jesper Dangaard Brouer wrote:

> > > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = {

> > > +   .func           = bpf_xdp_mtu_lookup,

> > > +   .gpl_only       = true,

> > > +   .ret_type       = RET_INTEGER,

> > > +   .arg1_type      = ARG_PTR_TO_CTX,

> > > +   .arg2_type      = ARG_ANYTHING,

> > > +   .arg3_type      = ARG_ANYTHING,

> > > +};

> > > +

> > > +

>

> FWIW

>

> CHECK: Please don't use multiple blank lines

> #112: FILE: net/core/filter.c:5566:


FYI: It would be nice to have a similar function to return a device's
L2 header size (ie. 14 for ethernet) and/or hwtype.

Also, should this be restricted to gpl only?

[I'm not actually sure, I'm actually fed up with non-gpl code atm, and
wouldn't be against all bpf code needing to be gpl'ed...]
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 50ce65e37b16..29b335cb96ef 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3718,6 +3718,18 @@  union bpf_attr {
  *		never return NULL.
  *	Return
  *		A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags)
+ *	Description
+ *		Lookup MTU of net device based on ifindex.  The Linux kernel
+ *		route table can configure MTUs on a more specific per route
+ *		level, which is not provided by this helper. For route level
+ *		MTU checks use the **bpf_fib_lookup**\ () helper.
+ *
+ *		*ctx* is either **struct xdp_md** for XDP programs or
+ *		**struct sk_buff** tc cls_act programs.
+ *	Return
+ *		On success, MTU size is returned. On error, a negative value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3875,6 +3887,7 @@  union bpf_attr {
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
+	FN(mtu_lookup),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index d84723f347c0..49ae3b80027b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5512,6 +5512,58 @@  static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags)
+{
+	struct net_device *dev;
+
+	// XXX: Do we even need flags?
+	// Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev
+	if (flags)
+		return -EINVAL;
+
+	dev = dev_get_by_index_rcu(netns, ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	return dev->mtu;
+}
+
+BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb,
+	   u32, ifindex, u64, flags)
+{
+	struct net *netns = dev_net(skb->dev);
+
+	return bpf_mtu_lookup(netns, ifindex, flags);
+}
+
+BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp,
+	   u32, ifindex, u64, flags)
+{
+	struct net *netns = dev_net(xdp->rxq->dev);
+	// XXX: Handle if this runs in devmap prog (then is rxq invalid?)
+
+	return bpf_mtu_lookup(netns, ifindex, flags);
+}
+
+static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = {
+	.func		= bpf_skb_mtu_lookup,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = {
+	.func		= bpf_xdp_mtu_lookup,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
+
 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
 {
@@ -7075,6 +7127,8 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_uid_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_skb_fib_lookup_proto;
+	case BPF_FUNC_mtu_lookup:
+		return &bpf_skb_mtu_lookup_proto;
 	case BPF_FUNC_sk_fullsock:
 		return &bpf_sk_fullsock_proto;
 	case BPF_FUNC_sk_storage_get:
@@ -7144,6 +7198,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+	case BPF_FUNC_mtu_lookup:
+		return &bpf_xdp_mtu_lookup_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;