diff mbox series

[v4,bpf-next,06/13] bpf: introduce bpf_xdp_get_frags_{count, total_size} helpers

Message ID deb81e4cf02db9a1da2b4088a49afd7acf8b82b6.1601648734.git.lorenzo@kernel.org
State New
Headers show
Series mvneta: introduce XDP multi-buffer support | expand

Commit Message

Lorenzo Bianconi Oct. 2, 2020, 2:42 p.m. UTC
From: Sameeh Jubran <sameehj@amazon.com>

Introduce the two following bpf helpers in order to provide some
metadata about a xdp multi-buff fame to bpf layer:

- bpf_xdp_get_frags_count()
  get the number of fragments for a given xdp multi-buffer.

* bpf_xdp_get_frags_total_size()
  get the total size of fragments for a given xdp multi-buffer.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/uapi/linux/bpf.h       | 14 ++++++++++++
 net/core/filter.c              | 42 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 14 ++++++++++++
 3 files changed, 70 insertions(+)

Comments

John Fastabend Oct. 2, 2020, 3:36 p.m. UTC | #1
Lorenzo Bianconi wrote:
> From: Sameeh Jubran <sameehj@amazon.com>
> 
> Introduce the two following bpf helpers in order to provide some
> metadata about a xdp multi-buff fame to bpf layer:
> 
> - bpf_xdp_get_frags_count()
>   get the number of fragments for a given xdp multi-buffer.

Same comment as in the cover letter can you provide a use case
for how/where I would use xdp_get_frags_count()? Is it just for
debug? If its just debug do we really want a uapi helper for it.

> 
> * bpf_xdp_get_frags_total_size()
>   get the total size of fragments for a given xdp multi-buffer.

This is awkward IMO. If total size is needed it should return total size
in all cases not just in the mb case otherwise programs will have two
paths the mb path and the non-mb path. And if you have mixed workload
the branch predictor will miss? Plus its extra instructions to load.

And if its useful for something beyond just debug and its going to be
read every packet or something I think we should put it in the metadata
so that its not hidden behind a helper which likely will show up as
overhead on a 40+gbps nic. The use case I have in mind is counting
bytes maybe sliced by IP or protocol. Here you will always read it
and I don't want code with a if/else stuck in the middle when if
we do it right we have a single read.

> 
> Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
> Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/uapi/linux/bpf.h       | 14 ++++++++++++
>  net/core/filter.c              | 42 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 14 ++++++++++++
>  3 files changed, 70 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4f556cfcbfbe..0715995eb18c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3668,6 +3668,18 @@ union bpf_attr {
>   * 	Return
>   * 		The helper returns **TC_ACT_REDIRECT** on success or
>   * 		**TC_ACT_SHOT** on error.
> + *
> + * int bpf_xdp_get_frags_count(struct xdp_buff *xdp_md)
> + *	Description
> + *		Get the number of fragments for a given xdp multi-buffer.
> + *	Return
> + *		The number of fragments
> + *
> + * int bpf_xdp_get_frags_total_size(struct xdp_buff *xdp_md)
> + *	Description
> + *		Get the total size of fragments for a given xdp multi-buffer.

Why just fragments? Will I have to also add the initial frag0 to it
or not. I think the description is a bit ambiguous.

> + *	Return
> + *		The total size of fragments for a given xdp multi-buffer.
>   */

[...]

> +const struct bpf_func_proto bpf_xdp_get_frags_count_proto = {
> +	.func		= bpf_xdp_get_frags_count,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +};
> +
> +BPF_CALL_1(bpf_xdp_get_frags_total_size, struct  xdp_buff*, xdp)
> +{
> +	struct skb_shared_info *sinfo;
> +	int nfrags, i, size = 0;
> +
> +	if (likely(!xdp->mb))
> +		return 0;
> +
> +	sinfo = xdp_get_shared_info_from_buff(xdp);
> +	nfrags = min_t(u8, sinfo->nr_frags, MAX_SKB_FRAGS);
> +
> +	for (i = 0; i < nfrags; i++)
> +		size += skb_frag_size(&sinfo->frags[i]);

Wont the hardware just know this? I think walking the frag list
just to get the total seems wrong. The hardware should have a
total_len field somewhere we can just read no? If mvneta doesn't
know the total length that seems like a driver limitation and we
shouldn't encode it in the helper.

> +
> +	return size;
> +}
Lorenzo Bianconi Oct. 2, 2020, 4:25 p.m. UTC | #2
> Lorenzo Bianconi wrote:
> > From: Sameeh Jubran <sameehj@amazon.com>
> > 
> > Introduce the two following bpf helpers in order to provide some
> > metadata about a xdp multi-buff fame to bpf layer:
> > 
> > - bpf_xdp_get_frags_count()
> >   get the number of fragments for a given xdp multi-buffer.
> 
> Same comment as in the cover letter can you provide a use case
> for how/where I would use xdp_get_frags_count()? Is it just for
> debug? If its just debug do we really want a uapi helper for it.

I have no a strong opinion on it, I guess we can just drop this helper,
but I am not the original author of the patch :)

> 
> > 
> > * bpf_xdp_get_frags_total_size()
> >   get the total size of fragments for a given xdp multi-buffer.
> 
> This is awkward IMO. If total size is needed it should return total size
> in all cases not just in the mb case otherwise programs will have two
> paths the mb path and the non-mb path. And if you have mixed workload
> the branch predictor will miss? Plus its extra instructions to load.

ack, I am fine to make the helper reporing to total size instead of paged ones
(we can compute it if we really need it)

> 
> And if its useful for something beyond just debug and its going to be
> read every packet or something I think we should put it in the metadata
> so that its not hidden behind a helper which likely will show up as
> overhead on a 40+gbps nic. The use case I have in mind is counting
> bytes maybe sliced by IP or protocol. Here you will always read it
> and I don't want code with a if/else stuck in the middle when if
> we do it right we have a single read.

do you mean xdp_frame or data_meta area? As explained in the cover-letter we
choose this approach to save space in xdp_frame.

> 
> > 
> > Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
> > Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  include/uapi/linux/bpf.h       | 14 ++++++++++++
> >  net/core/filter.c              | 42 ++++++++++++++++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h | 14 ++++++++++++
> >  3 files changed, 70 insertions(+)
> > 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4f556cfcbfbe..0715995eb18c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3668,6 +3668,18 @@ union bpf_attr {
> >   * 	Return
> >   * 		The helper returns **TC_ACT_REDIRECT** on success or
> >   * 		**TC_ACT_SHOT** on error.
> > + *
> > + * int bpf_xdp_get_frags_count(struct xdp_buff *xdp_md)
> > + *	Description
> > + *		Get the number of fragments for a given xdp multi-buffer.
> > + *	Return
> > + *		The number of fragments
> > + *
> > + * int bpf_xdp_get_frags_total_size(struct xdp_buff *xdp_md)
> > + *	Description
> > + *		Get the total size of fragments for a given xdp multi-buffer.
> 
> Why just fragments? Will I have to also add the initial frag0 to it
> or not. I think the description is a bit ambiguous.
> 
> > + *	Return
> > + *		The total size of fragments for a given xdp multi-buffer.
> >   */
> 
> [...]
> 
> > +const struct bpf_func_proto bpf_xdp_get_frags_count_proto = {
> > +	.func		= bpf_xdp_get_frags_count,
> > +	.gpl_only	= false,
> > +	.ret_type	= RET_INTEGER,
> > +	.arg1_type	= ARG_PTR_TO_CTX,
> > +};
> > +
> > +BPF_CALL_1(bpf_xdp_get_frags_total_size, struct  xdp_buff*, xdp)
> > +{
> > +	struct skb_shared_info *sinfo;
> > +	int nfrags, i, size = 0;
> > +
> > +	if (likely(!xdp->mb))
> > +		return 0;
> > +
> > +	sinfo = xdp_get_shared_info_from_buff(xdp);
> > +	nfrags = min_t(u8, sinfo->nr_frags, MAX_SKB_FRAGS);
> > +
> > +	for (i = 0; i < nfrags; i++)
> > +		size += skb_frag_size(&sinfo->frags[i]);
> 
> Wont the hardware just know this? I think walking the frag list
> just to get the total seems wrong. The hardware should have a
> total_len field somewhere we can just read no? If mvneta doesn't
> know the total length that seems like a driver limitation and we
> shouldn't encode it in the helper.

I have a couple of patches to improve this (not posted yet):
- https://github.com/LorenzoBianconi/bpf-next/commit/ff9b3a74a105b64947931f83fe86a4b8b1808103
- https://github.com/LorenzoBianconi/bpf-next/commit/712e67333cbc5f6304122b1009cdae1e18e6eb26

Regards,
Lorenzo

> 
> > +
> > +	return size;
> > +}
>
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f556cfcbfbe..0715995eb18c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3668,6 +3668,18 @@  union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * int bpf_xdp_get_frags_count(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the number of fragments for a given xdp multi-buffer.
+ *	Return
+ *		The number of fragments
+ *
+ * int bpf_xdp_get_frags_total_size(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the total size of fragments for a given xdp multi-buffer.
+ *	Return
+ *		The total size of fragments for a given xdp multi-buffer.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3823,6 +3835,8 @@  union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
+	FN(xdp_get_frags_count),	\
+	FN(xdp_get_frags_total_size),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 3fb6adad1957..4c55b788c4c5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3739,6 +3739,44 @@  static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_xdp_get_frags_count, struct  xdp_buff*, xdp)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+	return xdp->mb ? sinfo->nr_frags : 0;
+}
+
+const struct bpf_func_proto bpf_xdp_get_frags_count_proto = {
+	.func		= bpf_xdp_get_frags_count,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_xdp_get_frags_total_size, struct  xdp_buff*, xdp)
+{
+	struct skb_shared_info *sinfo;
+	int nfrags, i, size = 0;
+
+	if (likely(!xdp->mb))
+		return 0;
+
+	sinfo = xdp_get_shared_info_from_buff(xdp);
+	nfrags = min_t(u8, sinfo->nr_frags, MAX_SKB_FRAGS);
+
+	for (i = 0; i < nfrags; i++)
+		size += skb_frag_size(&sinfo->frags[i]);
+
+	return size;
+}
+
+const struct bpf_func_proto bpf_xdp_get_frags_total_size_proto = {
+	.func		= bpf_xdp_get_frags_total_size,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 {
 	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
@@ -7092,6 +7130,10 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_redirect_map_proto;
 	case BPF_FUNC_xdp_adjust_tail:
 		return &bpf_xdp_adjust_tail_proto;
+	case BPF_FUNC_xdp_get_frags_count:
+		return &bpf_xdp_get_frags_count_proto;
+	case BPF_FUNC_xdp_get_frags_total_size:
+		return &bpf_xdp_get_frags_total_size_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
 #ifdef CONFIG_INET
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4f556cfcbfbe..0715995eb18c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3668,6 +3668,18 @@  union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * int bpf_xdp_get_frags_count(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the number of fragments for a given xdp multi-buffer.
+ *	Return
+ *		The number of fragments
+ *
+ * int bpf_xdp_get_frags_total_size(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the total size of fragments for a given xdp multi-buffer.
+ *	Return
+ *		The total size of fragments for a given xdp multi-buffer.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3823,6 +3835,8 @@  union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
+	FN(xdp_get_frags_count),	\
+	FN(xdp_get_frags_total_size),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper