diff mbox

[07/13] bpf tools: Load a program with different instances using preprocessor

Message ID 1447675815-166222-8-git-send-email-wangnan0@huawei.com
State New
Headers show

Commit Message

Wang Nan Nov. 16, 2015, 12:10 p.m. UTC
This patch is a preparation for BPF prologue support which allows
generating a series of BPF bytecode for fetching kernel data before
calling program code. With the newly introduced multiple instances
support, perf is able to create different prologues for different
kprobe points.

Before this patch, a bpf_program can be loaded into kernel only once,
and get the only resuling fd. What this patch done is to allow creating
and loading different variants of one bpf_program, then fetching their
fds.

Here describe the basic idea in this patch. The detail description of
the newly introduced APIs can be found in comment in the patch body.

The key of this patch is the new mechanism in bpf_program__load().
Instead of loading BPF program into kernel directly, it calls a
'pre-processor' to generate program instances which would be final
loaded into kernel based on the original code. To enable multiple
instances generation, libbpf passes an index to the pre-processor
so it know which instance is being loaded.

Pre-processor should be passed from libbpf's user (perf) using
bpf_program__set_prep(). The number of instances and the relationship
between indics and the target instance should be clear when calling
bpf_program__set_prep().

To retrive fd for a specific instance of a program,
bpf_program__nth_fd() is introduced. It return the resuling fd
according to index.

Signed-off-by: Wang Nan <wangnan0@huawei.com>

Signed-off-by: He Kuang <hekuang@huawei.com>

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
---
 tools/lib/bpf/libbpf.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++---
 tools/lib/bpf/libbpf.h |  64 ++++++++++++++++++++++
 2 files changed, 200 insertions(+), 9 deletions(-)

-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Comments

Wang Nan Nov. 17, 2015, 3:53 a.m. UTC | #1
On 2015/11/17 3:02, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 16, 2015 at 12:10:09PM +0000, Wang Nan escreveu:

>> This patch is a preparation for BPF prologue support which allows

>> generating a series of BPF bytecode for fetching kernel data before

>> calling program code. With the newly introduced multiple instances

>> support, perf is able to create different prologues for different

>> kprobe points.

>>

>> Before this patch, a bpf_program can be loaded into kernel only once,

>> and get the only resuling fd. What this patch done is to allow creating

>> and loading different variants of one bpf_program, then fetching their

>> fds.

>>

>> Here describe the basic idea in this patch. The detail description of

>> the newly introduced APIs can be found in comment in the patch body.

>>

>> The key of this patch is the new mechanism in bpf_program__load().

>> Instead of loading BPF program into kernel directly, it calls a

>> 'pre-processor' to generate program instances which would be final

>> loaded into kernel based on the original code. To enable multiple

>> instances generation, libbpf passes an index to the pre-processor

>> so it know which instance is being loaded.

>>

>> Pre-processor should be passed from libbpf's user (perf) using

>> bpf_program__set_prep(). The number of instances and the relationship

>> between indics and the target instance should be clear when calling

>> bpf_program__set_prep().

>>

>> To retrive fd for a specific instance of a program,

>> bpf_program__nth_fd() is introduced. It return the resuling fd

>> according to index.

>>

>> Signed-off-by: Wang Nan <wangnan0@huawei.com>

>> Signed-off-by: He Kuang <hekuang@huawei.com>

>> Cc: Alexei Starovoitov <ast@kernel.org>

>> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>

>> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

>> Cc: Zefan Li <lizefan@huawei.com>

>> Cc: pi3orama@163.com

>> ---

>>   tools/lib/bpf/libbpf.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++---

>>   tools/lib/bpf/libbpf.h |  64 ++++++++++++++++++++++

>>   2 files changed, 200 insertions(+), 9 deletions(-)

>>

>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

>> index e176bad..fcfa39f 100644

>> --- a/tools/lib/bpf/libbpf.c

>> +++ b/tools/lib/bpf/libbpf.c

>> @@ -152,7 +152,11 @@ struct bpf_program {

>>   	} *reloc_desc;

>>   	int nr_reloc;

>>   

>> -	int fd;

>> +	struct {

>> +		int nr;

>> +		int *fds;

>> +	} instances;

>> +	bpf_program_prep_t preprocessor;

>>   

>>   	struct bpf_object *obj;

>>   	void *priv;

>> @@ -206,10 +210,24 @@ struct bpf_object {

>>   

>>   static void bpf_program__unload(struct bpf_program *prog)

>>   {

>> +	int i;

>> +

>>   	if (!prog)

>>   		return;

>>   

>> -	zclose(prog->fd);

>> +	/*

>> +	 * If the object is opened but the program is never loaded,

>> +	 * it is possible that prog->instances.nr == -1.

>> +	 */

>> +	if (prog->instances.nr > 0) {

>> +		for (i = 0; i < prog->instances.nr; i++)

>> +			zclose(prog->instances.fds[i]);

>> +	} else if (prog->instances.nr != -1)

>> +		pr_warning("Internal error: instances.nr is %d\n",

>> +			   prog->instances.nr);

>> +

>

> Multi line if/else blocks should be under {}, like you did for the if

> part, but forgot to do for the else part.

>

> I'm fixing this up, but please try to be consistent about these details

> next time, more below.

>

>> +	prog->instances.nr = -1;

>> +	zfree(&prog->instances.fds);

>>   }

>>   

>>   static void bpf_program__exit(struct bpf_program *prog)

>> @@ -260,7 +278,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,

>>   	memcpy(prog->insns, data,

>>   	       prog->insns_cnt * sizeof(struct bpf_insn));

>>   	prog->idx = idx;

>> -	prog->fd = -1;

>> +	prog->instances.fds = NULL;

>> +	prog->instances.nr = -1;

>>   

>>   	return 0;

>>   errout:

>> @@ -860,13 +879,73 @@ static int

>>   bpf_program__load(struct bpf_program *prog,

>>   		  char *license, u32 kern_version)

>>   {

>> -	int err, fd;

>> +	int err = 0, fd, i;

>> +

>> +	if (prog->instances.nr < 0 || !prog->instances.fds) {

>> +		if (prog->preprocessor) {

>> +			pr_warning("Internal error: can't load program '%s'\n",

>> +				   prog->section_name);

>> +			return -LIBBPF_ERRNO__INTERNAL;

>> +		}

> Those errors I think should come with some prefix to provide more

> context about what kind of "internal error" is this, in some cases I am

> adding "BPF", but not in all, more to think about how to make this

> clearer.


In bpf_program__set_prep, the parameters are already checked, so
if prog->preprocessor is set (!= NULL), the conditions in
'if' statement should never be satisified. Therefore it must an
internal error (bug?) in libbpf.

All LIBBPF error code except LIBBPF_ERRNO__INTERNAL can be triggered
by user by some way (for example, providing an invalid object file).
Therefore, we need to tell them the reason of failure formally, let
error messages hint users how to fix their fault.
LIBBPF_ERRNO__INTERNAL is different. If it raise there must be a bug
in libbpf or perf (incorrectly use libbpf's API). In this case, we
have already provided pr_{debug,warning} to developers. In my opinion,
formally reporting is not require because perf's user don't need to
be told too much about the internal implementation (it causes confusion).
All they need to know is "there is a bug, let's report it with '-v' output".

Thank you.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e176bad..fcfa39f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,7 +152,11 @@  struct bpf_program {
 	} *reloc_desc;
 	int nr_reloc;
 
-	int fd;
+	struct {
+		int nr;
+		int *fds;
+	} instances;
+	bpf_program_prep_t preprocessor;
 
 	struct bpf_object *obj;
 	void *priv;
@@ -206,10 +210,24 @@  struct bpf_object {
 
 static void bpf_program__unload(struct bpf_program *prog)
 {
+	int i;
+
 	if (!prog)
 		return;
 
-	zclose(prog->fd);
+	/*
+	 * If the object is opened but the program is never loaded,
+	 * it is possible that prog->instances.nr == -1.
+	 */
+	if (prog->instances.nr > 0) {
+		for (i = 0; i < prog->instances.nr; i++)
+			zclose(prog->instances.fds[i]);
+	} else if (prog->instances.nr != -1)
+		pr_warning("Internal error: instances.nr is %d\n",
+			   prog->instances.nr);
+
+	prog->instances.nr = -1;
+	zfree(&prog->instances.fds);
 }
 
 static void bpf_program__exit(struct bpf_program *prog)
@@ -260,7 +278,8 @@  bpf_program__init(void *data, size_t size, char *name, int idx,
 	memcpy(prog->insns, data,
 	       prog->insns_cnt * sizeof(struct bpf_insn));
 	prog->idx = idx;
-	prog->fd = -1;
+	prog->instances.fds = NULL;
+	prog->instances.nr = -1;
 
 	return 0;
 errout:
@@ -860,13 +879,73 @@  static int
 bpf_program__load(struct bpf_program *prog,
 		  char *license, u32 kern_version)
 {
-	int err, fd;
+	int err = 0, fd, i;
+
+	if (prog->instances.nr < 0 || !prog->instances.fds) {
+		if (prog->preprocessor) {
+			pr_warning("Internal error: can't load program '%s'\n",
+				   prog->section_name);
+			return -LIBBPF_ERRNO__INTERNAL;
+		}
+
+		prog->instances.fds = malloc(sizeof(int));
+		if (!prog->instances.fds) {
+			pr_warning("No enough memory for fds\n");
+			return -ENOMEM;
+		}
+		prog->instances.nr = 1;
+		prog->instances.fds[0] = -1;
+	}
+
+	if (!prog->preprocessor) {
+		if (prog->instances.nr != 1)
+			pr_warning("Program '%s' inconsistent: nr(%d) not 1\n",
+				   prog->section_name, prog->instances.nr);
 
-	err = load_program(prog->insns, prog->insns_cnt,
-			   license, kern_version, &fd);
-	if (!err)
-		prog->fd = fd;
+		err = load_program(prog->insns, prog->insns_cnt,
+				   license, kern_version, &fd);
+		if (!err)
+			prog->instances.fds[0] = fd;
+		goto out;
+	}
+
+	for (i = 0; i < prog->instances.nr; i++) {
+		struct bpf_prog_prep_result result;
+		bpf_program_prep_t preprocessor = prog->preprocessor;
+
+		bzero(&result, sizeof(result));
+		err = (*preprocessor)(prog, i, prog->insns,
+				      prog->insns_cnt, &result);
+		if (err) {
+			pr_warning("Preprocessing %dth instance of program '%s' failed\n",
+				   i, prog->section_name);
+			goto out;
+		}
+
+		if (!result.new_insn_ptr || !result.new_insn_cnt) {
+			pr_debug("Skip loading %dth instance of program '%s'\n",
+				 i, prog->section_name);
+			prog->instances.fds[i] = -1;
+			if (result.pfd)
+				*result.pfd = -1;
+			continue;
+		}
+
+		err = load_program(result.new_insn_ptr,
+				   result.new_insn_cnt,
+				   license, kern_version, &fd);
+
+		if (err) {
+			pr_warning("Loading %dth instance of program '%s' failed\n",
+					i, prog->section_name);
+			goto out;
+		}
 
+		if (result.pfd)
+			*result.pfd = fd;
+		prog->instances.fds[i] = fd;
+	}
+out:
 	if (err)
 		pr_warning("failed to load program '%s'\n",
 			   prog->section_name);
@@ -1121,5 +1200,53 @@  const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 
 int bpf_program__fd(struct bpf_program *prog)
 {
-	return prog->fd;
+	return bpf_program__nth_fd(prog, 0);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+			  bpf_program_prep_t prep)
+{
+	int *instances_fds;
+
+	if (nr_instances <= 0 || !prep)
+		return -EINVAL;
+
+	if (prog->instances.nr > 0 || prog->instances.fds) {
+		pr_warning("Can't set pre-processor after loading\n");
+		return -EINVAL;
+	}
+
+	instances_fds = malloc(sizeof(int) * nr_instances);
+	if (!instances_fds) {
+		pr_warning("alloc memory failed for fds\n");
+		return -ENOMEM;
+	}
+
+	/* fill all fd with -1 */
+	memset(instances_fds, 0xff, sizeof(int) * nr_instances);
+
+	prog->instances.nr = nr_instances;
+	prog->instances.fds = instances_fds;
+	prog->preprocessor = prep;
+	return 0;
+}
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n)
+{
+	int fd;
+
+	if (n >= prog->instances.nr || n < 0) {
+		pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
+			   n, prog->section_name, prog->instances.nr);
+		return -EINVAL;
+	}
+
+	fd = prog->instances.fds[n];
+	if (fd < 0) {
+		pr_warning("%dth instance of program '%s' is invalid\n",
+			   n, prog->section_name);
+		return -ENOENT;
+	}
+
+	return fd;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c9a9aef..949df4b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -88,6 +88,70 @@  const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
 
 int bpf_program__fd(struct bpf_program *prog);
 
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transform into
+ * multiple variants to be attached to different code.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * are APIs for this propose.
+ *
+ * - bpf_program_prep_t:
+ *   It defines 'preprocessor', which is a caller defined function
+ *   passed to libbpf through bpf_program__set_prep(), and will be
+ *   called before program is loaded. The processor should adjust
+ *   the program one time for each instances according to the number
+ *   passed to it.
+ *
+ * - bpf_program__set_prep:
+ *   Attachs a preprocessor to a BPF program. The number of instances
+ *   whould be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ *   After the program is loaded, get resuling fds from bpf program for
+ *   each instances.
+ *
+ * If bpf_program__set_prep() is not used, the program whould be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+	/*
+	 * If not NULL, load new instruction array.
+	 * If set to NULL, don't load this instance.
+	 */
+	struct bpf_insn *new_insn_ptr;
+	int new_insn_cnt;
+
+	/* If not NULL, result fd is set to it */
+	int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ *  - prog:	The bpf_program being loaded.
+ *  - n:	Index of instance being generated.
+ *  - insns:	BPF instructions array.
+ *  - insns_cnt:Number of instructions in insns.
+ *  - res:	Output parameter, result of transformation.
+ *
+ * Return value:
+ *  - Zero: pre-processing success.
+ *  - Non-zero: pre-processing, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+				  struct bpf_insn *insns, int insns_cnt,
+				  struct bpf_prog_prep_result *res);
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+			  bpf_program_prep_t prep);
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n);
+
 /*
  * We don't need __attribute__((packed)) now since it is
  * unnecessary for 'bpf_map_def' because they are all aligned.