diff mbox series

[13/23] io_uring: implement bpf prog registration

Message ID c246d3736b9440532f3e82199a616e3f74d1b8ba.1621424513.git.asml.silence@gmail.com
State New
Headers show
Series io_uring BPF requests | expand

Commit Message

Pavel Begunkov May 19, 2021, 2:13 p.m. UTC
[de]register BPF programs through io_uring_register() with new
IORING_ATTACH_BPF and IORING_DETACH_BPF commands.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 81 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  2 +
 2 files changed, 83 insertions(+)

Comments

Song Liu May 20, 2021, 11:45 p.m. UTC | #1
> On May 19, 2021, at 7:13 AM, Pavel Begunkov <asml.silence@gmail.com> wrote:

> 

> [de]register BPF programs through io_uring_register() with new

> IORING_ATTACH_BPF and IORING_DETACH_BPF commands.

> 

> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>

> ---

> fs/io_uring.c                 | 81 +++++++++++++++++++++++++++++++++++

> include/uapi/linux/io_uring.h |  2 +

> 2 files changed, 83 insertions(+)

> 

> diff --git a/fs/io_uring.c b/fs/io_uring.c

> index 882b16b5e5eb..b13cbcd5c47b 100644

> --- a/fs/io_uring.c

> +++ b/fs/io_uring.c

> @@ -78,6 +78,7 @@

> #include <linux/task_work.h>

> #include <linux/pagemap.h>

> #include <linux/io_uring.h>

> +#include <linux/bpf.h>

> 

> #define CREATE_TRACE_POINTS

> #include <trace/events/io_uring.h>

> @@ -103,6 +104,8 @@

> #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \

> 				 IORING_REGISTER_LAST + IORING_OP_LAST)

> 

> +#define IORING_MAX_BPF_PROGS	100


Is 100 a realistic number here? 

> +

> #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\

> 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \

> 				IOSQE_BUFFER_SELECT)

> @@ -266,6 +269,10 @@ struct io_restriction {

> 	bool registered;

> };

> 


[...]
Pavel Begunkov May 21, 2021, 12:43 a.m. UTC | #2
On 5/21/21 12:45 AM, Song Liu wrote:
>> On May 19, 2021, at 7:13 AM, Pavel Begunkov <asml.silence@gmail.com> wrote:

>>

>> [de]register BPF programs through io_uring_register() with new

>> IORING_ATTACH_BPF and IORING_DETACH_BPF commands.

>>

>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>

>> ---

>> fs/io_uring.c                 | 81 +++++++++++++++++++++++++++++++++++

>> include/uapi/linux/io_uring.h |  2 +

>> 2 files changed, 83 insertions(+)

>>

>> diff --git a/fs/io_uring.c b/fs/io_uring.c

>> index 882b16b5e5eb..b13cbcd5c47b 100644

>> --- a/fs/io_uring.c

>> +++ b/fs/io_uring.c

>> @@ -78,6 +78,7 @@

>> #include <linux/task_work.h>

>> #include <linux/pagemap.h>

>> #include <linux/io_uring.h>

>> +#include <linux/bpf.h>

>>

>> #define CREATE_TRACE_POINTS

>> #include <trace/events/io_uring.h>

>> @@ -103,6 +104,8 @@

>> #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \

>> 				 IORING_REGISTER_LAST + IORING_OP_LAST)

>>

>> +#define IORING_MAX_BPF_PROGS	100

> 

> Is 100 a realistic number here? 


Arbitrary test value, will update

> 

>> +

>> #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\

>> 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \

>> 				IOSQE_BUFFER_SELECT)

>> @@ -266,6 +269,10 @@ struct io_restriction {

>> 	bool registered;

>> };

>>

> 

> [...]

> 


-- 
Pavel Begunkov
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 882b16b5e5eb..b13cbcd5c47b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -78,6 +78,7 @@ 
 #include <linux/task_work.h>
 #include <linux/pagemap.h>
 #include <linux/io_uring.h>
+#include <linux/bpf.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -103,6 +104,8 @@ 
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define IORING_MAX_BPF_PROGS	100
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
@@ -266,6 +269,10 @@  struct io_restriction {
 	bool registered;
 };
 
+struct io_bpf_prog {
+	struct bpf_prog *prog;
+};
+
 enum {
 	IO_SQ_THREAD_SHOULD_STOP = 0,
 	IO_SQ_THREAD_SHOULD_PARK,
@@ -411,6 +418,10 @@  struct io_ring_ctx {
 	struct xarray		personalities;
 	u32			pers_next;
 
+	/* bpf programs */
+	unsigned		nr_bpf_progs;
+	struct io_bpf_prog	*bpf_progs;
+
 	struct fasync_struct	*cq_fasync;
 	struct eventfd_ctx	*cq_ev_fd;
 	atomic_t		cq_timeouts;
@@ -8627,6 +8638,66 @@  static void io_req_caches_free(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 }
 
+static int io_bpf_unregister(struct io_ring_ctx *ctx)
+{
+	int i;
+
+	if (!ctx->nr_bpf_progs)
+		return -ENXIO;
+
+	for (i = 0; i < ctx->nr_bpf_progs; ++i) {
+		struct bpf_prog *prog = ctx->bpf_progs[i].prog;
+
+		if (prog)
+			bpf_prog_put(prog);
+	}
+	kfree(ctx->bpf_progs);
+	ctx->bpf_progs = NULL;
+	ctx->nr_bpf_progs = 0;
+	return 0;
+}
+
+static int io_bpf_register(struct io_ring_ctx *ctx, void __user *arg,
+			   unsigned int nr_args)
+{
+	u32 __user *fds = arg;
+	int i, ret = 0;
+
+	if (!nr_args || nr_args > IORING_MAX_BPF_PROGS)
+		return -EINVAL;
+	if (ctx->nr_bpf_progs)
+		return -EBUSY;
+
+	ctx->bpf_progs = kcalloc(nr_args, sizeof(ctx->bpf_progs[0]),
+				 GFP_KERNEL);
+	if (!ctx->bpf_progs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; ++i) {
+		struct bpf_prog *prog;
+		u32 fd;
+
+		if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (fd == -1)
+			continue;
+
+		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_IOURING);
+		if (IS_ERR(prog)) {
+			ret = PTR_ERR(prog);
+			break;
+		}
+		ctx->bpf_progs[i].prog = prog;
+	}
+
+	ctx->nr_bpf_progs = i;
+	if (ret)
+		io_bpf_unregister(ctx);
+	return ret;
+}
+
 static bool io_wait_rsrc_data(struct io_rsrc_data *data)
 {
 	if (!data)
@@ -8657,6 +8728,7 @@  static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 	io_eventfd_unregister(ctx);
 	io_destroy_buffers(ctx);
+	io_bpf_unregister(ctx);
 	if (ctx->sq_creds)
 		put_cred(ctx->sq_creds);
 
@@ -10188,6 +10260,15 @@  static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RSRC_UPDATE:
 		ret = io_register_rsrc_update(ctx, arg, nr_args);
 		break;
+	case IORING_REGISTER_BPF:
+		ret = io_bpf_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_BPF:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_bpf_unregister(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 67a97c793de7..b450f41d7389 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -304,6 +304,8 @@  enum {
 	IORING_REGISTER_ENABLE_RINGS		= 12,
 	IORING_REGISTER_RSRC			= 13,
 	IORING_REGISTER_RSRC_UPDATE		= 14,
+	IORING_REGISTER_BPF			= 15,
+	IORING_UNREGISTER_BPF			= 16,
 
 	/* this goes last */
 	IORING_REGISTER_LAST