diff mbox series

[RFC,bpf-next,03/15] samples: bpf: split out common bpf progs to its own file

Message ID 20210528235250.2635167-4-memxor@gmail.com
State New
Headers show
Series [RFC,bpf-next,01/15] samples: bpf: fix a couple of NULL dereferences | expand

Commit Message

Kumar Kartikeya Dwivedi May 28, 2021, 11:52 p.m. UTC
This is done to later reuse these in a way that can be shared
among multiple samples.

We are using xdp_redirect_cpu_kern.c as a base to build further support on
top (mostly adding a few other things missing that xdp_monitor does in
subsequent patches).

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 samples/bpf/xdp_sample_kern.h

Comments

Andrii Nakryiko May 30, 2021, 3:05 a.m. UTC | #1
On Fri, May 28, 2021 at 4:53 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>

> This is done to later reuse these in a way that can be shared

> among multiple samples.

>

> We are using xdp_redirect_cpu_kern.c as a base to build further support on

> top (mostly adding a few other things missing that xdp_monitor does in

> subsequent patches).

>

> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>

> ---

>  samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++

>  1 file changed, 220 insertions(+)

>  create mode 100644 samples/bpf/xdp_sample_kern.h

>

> diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h


instead of doing it as a header, can you please use BPF static linking
instead? I think that's a better approach and a good showcase for
anyone that would like to use static linking for their BPF programs

> new file mode 100644

> index 000000000000..bb809542ac20

> --- /dev/null

> +++ b/samples/bpf/xdp_sample_kern.h

> @@ -0,0 +1,220 @@

> +// SPDX-License-Identifier: GPL-2.0

> +/*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */

> +#pragma once

> +

> +#include <uapi/linux/bpf.h>

> +#include <bpf/bpf_helpers.h>

> +

> +#define MAX_CPUS 64

> +

> +/* Common stats data record to keep userspace more simple */

> +struct datarec {

> +       __u64 processed;

> +       __u64 dropped;

> +       __u64 issue;

> +       __u64 xdp_pass;

> +       __u64 xdp_drop;

> +       __u64 xdp_redirect;

> +};

> +

> +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success

> + * feedback.  Redirect TX errors can be caught via a tracepoint.

> + */

> +struct {

> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

> +       __type(key, u32);

> +       __type(value, struct datarec);

> +       __uint(max_entries, 1);

> +} rx_cnt SEC(".maps");

> +

> +/* Used by trace point */

> +struct {

> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

> +       __type(key, u32);

> +       __type(value, struct datarec);

> +       __uint(max_entries, 2);

> +       /* TODO: have entries for all possible errno's */

> +} redirect_err_cnt SEC(".maps");

> +

> +/* Used by trace point */

> +struct {

> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

> +       __type(key, u32);

> +       __type(value, struct datarec);

> +       __uint(max_entries, MAX_CPUS);

> +} cpumap_enqueue_cnt SEC(".maps");


One way to squeeze a bit more performance would be to instead use
global variables instead of maps:

struct datarec cpu_map_enqueue_cnts[MAX_CPUS][MAX_CPUS];

and other PERCPU_ARRAY arrays could be just one-dimensional arrays.

You'd need to ensure each value sits on its own cache-line, of course.

> +

> +/* Used by trace point */

> +struct {

> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

> +       __type(key, u32);

> +       __type(value, struct datarec);

> +       __uint(max_entries, 1);

> +} cpumap_kthread_cnt SEC(".maps");

> +


[...]

> +

> +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format

> + * Code in:         kernel/include/trace/events/xdp.h

> + */

> +struct cpumap_enqueue_ctx {

> +       u64 __pad;              // First 8 bytes are not accessible by bpf code

> +       int map_id;             //      offset:8;  size:4; signed:1;

> +       u32 act;                //      offset:12; size:4; signed:0;

> +       int cpu;                //      offset:16; size:4; signed:1;

> +       unsigned int drops;     //      offset:20; size:4; signed:0;

> +       unsigned int processed; //      offset:24; size:4; signed:0;

> +       int to_cpu;             //      offset:28; size:4; signed:1;

> +};


if you used vmlinux.h, this is already in there as struct
trace_event_raw_xdp_cpumap_enqueue, similarly for other tracepoints

> +

> +SEC("tracepoint/xdp/xdp_cpumap_enqueue")

> +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)

> +{

> +       u32 to_cpu = ctx->to_cpu;

> +       struct datarec *rec;

> +

> +       if (to_cpu >= MAX_CPUS)

> +               return 1;

> +

> +       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);

> +       if (!rec)

> +               return 0;

> +       rec->processed += ctx->processed;

> +       rec->dropped   += ctx->drops;

> +

> +       /* Record bulk events, then userspace can calc average bulk size */

> +       if (ctx->processed > 0)

> +               rec->issue += 1;

> +

> +       /* Inception: It's possible to detect overload situations, via

> +        * this tracepoint.  This can be used for creating a feedback

> +        * loop to XDP, which can take appropriate actions to mitigate

> +        * this overload situation.

> +        */

> +       return 0;

> +}

> +


[...]
diff mbox series

Patch

diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h
new file mode 100644
index 000000000000..bb809542ac20
--- /dev/null
+++ b/samples/bpf/xdp_sample_kern.h
@@ -0,0 +1,220 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#pragma once
+
+#include <uapi/linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_CPUS 64
+
+/* Common stats data record to keep userspace more simple */
+struct datarec {
+	__u64 processed;
+	__u64 dropped;
+	__u64 issue;
+	__u64 xdp_pass;
+	__u64 xdp_drop;
+	__u64 xdp_redirect;
+};
+
+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
+ * feedback.  Redirect TX errors can be caught via a tracepoint.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} rx_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 2);
+	/* TODO: have entries for all possible errno's */
+} redirect_err_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, MAX_CPUS);
+} cpumap_enqueue_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} cpumap_kthread_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} exception_cnt SEC(".maps");
+
+/*** Trace point code ***/
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
+ * Code in:                kernel/include/trace/events/xdp.h
+ */
+struct xdp_redirect_ctx {
+	u64 __pad;	// First 8 bytes are not accessible by bpf code
+	int prog_id;	//	offset:8;  size:4; signed:1;
+	u32 act;	//	offset:12  size:4; signed:0;
+	int ifindex;	//	offset:16  size:4; signed:1;
+	int err;	//	offset:20  size:4; signed:1;
+	int to_ifindex;	//	offset:24  size:4; signed:1;
+	u32 map_id;	//	offset:28  size:4; signed:0;
+	int map_index;	//	offset:32  size:4; signed:1;
+};			//	offset:36
+
+enum {
+	XDP_REDIRECT_SUCCESS = 0,
+	XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline
+int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+{
+	u32 key = XDP_REDIRECT_ERROR;
+	struct datarec *rec;
+	int err = ctx->err;
+
+	if (!err)
+		key = XDP_REDIRECT_SUCCESS;
+
+	rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->dropped += 1;
+
+	return 0; /* Indicate event was filtered (no further processing)*/
+	/*
+	 * Returning 1 here would allow e.g. a perf-record tracepoint
+	 * to see and record these events, but it doesn't work well
+	 * in-practice as stopping perf-record also unload this
+	 * bpf_prog.  Plus, there is additional overhead of doing so.
+	 */
+}
+
+SEC("tracepoint/xdp/xdp_redirect_err")
+int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+SEC("tracepoint/xdp/xdp_redirect_map_err")
+int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
+ * Code in:                kernel/include/trace/events/xdp.h
+ */
+struct xdp_exception_ctx {
+	u64 __pad;	// First 8 bytes are not accessible by bpf code
+	int prog_id;	//	offset:8;  size:4; signed:1;
+	u32 act;	//	offset:12; size:4; signed:0;
+	int ifindex;	//	offset:16; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_exception")
+int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&exception_cnt, &key);
+	if (!rec)
+		return 1;
+	rec->dropped += 1;
+
+	return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+	u64 __pad;		// First 8 bytes are not accessible by bpf code
+	int map_id;		//	offset:8;  size:4; signed:1;
+	u32 act;		//	offset:12; size:4; signed:0;
+	int cpu;		//	offset:16; size:4; signed:1;
+	unsigned int drops;	//	offset:20; size:4; signed:0;
+	unsigned int processed;	//	offset:24; size:4; signed:0;
+	int to_cpu;		//	offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+	u32 to_cpu = ctx->to_cpu;
+	struct datarec *rec;
+
+	if (to_cpu >= MAX_CPUS)
+		return 1;
+
+	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+
+	/* Record bulk events, then userspace can calc average bulk size */
+	if (ctx->processed > 0)
+		rec->issue += 1;
+
+	/* Inception: It's possible to detect overload situations, via
+	 * this tracepoint.  This can be used for creating a feedback
+	 * loop to XDP, which can take appropriate actions to mitigate
+	 * this overload situation.
+	 */
+	return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+	u64 __pad;			// First 8 bytes are not accessible
+	int map_id;			//	offset:8;  size:4; signed:1;
+	u32 act;			//	offset:12; size:4; signed:0;
+	int cpu;			//	offset:16; size:4; signed:1;
+	unsigned int drops;		//	offset:20; size:4; signed:0;
+	unsigned int processed;		//	offset:24; size:4; signed:0;
+	int sched;			//	offset:28; size:4; signed:1;
+	unsigned int xdp_pass;		//	offset:32; size:4; signed:0;
+	unsigned int xdp_drop;		//	offset:36; size:4; signed:0;
+	unsigned int xdp_redirect;	//	offset:40; size:4; signed:0;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+	rec->xdp_pass  += ctx->xdp_pass;
+	rec->xdp_drop  += ctx->xdp_drop;
+	rec->xdp_redirect  += ctx->xdp_redirect;
+
+	/* Count times kthread yielded CPU via schedule call */
+	if (ctx->sched)
+		rec->issue++;
+
+	return 0;
+}