Message ID | 8ec8373d406d1fcb41719e641799dcc5c0455db3.1621424513.git.asml.silence@gmail.com |
---|---|
State | New |
Headers | show |
Series | io_uring BPF requests | expand |
> On May 19, 2021, at 7:13 AM, Pavel Begunkov <asml.silence@gmail.com> wrote: > > Add a BPF_FUNC_iouring_queue_sqe BPF function as a demonstration of > submmiting a new request by a BPF request. > > Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> > --- > fs/io_uring.c | 51 ++++++++++++++++++++++++++++++++++++---- > include/uapi/linux/bpf.h | 1 + > 2 files changed, 48 insertions(+), 4 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 20fddc5945f2..aae786291c57 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -882,6 +882,7 @@ struct io_defer_entry { > }; > > struct io_bpf_ctx { > + struct io_ring_ctx *ctx; > }; > > struct io_op_def { > @@ -6681,7 +6682,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, > ret = -EBADF; > } > > - state->ios_left--; > + if (state->ios_left > 1) > + state->ios_left--; > return ret; > } > > @@ -10345,10 +10347,50 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, > return ret; > } > > +BPF_CALL_3(io_bpf_queue_sqe, struct io_bpf_ctx *, bpf_ctx, > + const struct io_uring_sqe *, sqe, > + u32, sqe_len) > +{ > + struct io_ring_ctx *ctx = bpf_ctx->ctx; > + struct io_kiocb *req; > + > + if (sqe_len != sizeof(struct io_uring_sqe)) > + return -EINVAL; > + > + req = io_alloc_req(ctx); > + if (unlikely(!req)) > + return -ENOMEM; > + if (!percpu_ref_tryget_many(&ctx->refs, 1)) { > + kmem_cache_free(req_cachep, req); > + return -EAGAIN; > + } > + percpu_counter_add(¤t->io_uring->inflight, 1); > + refcount_add(1, ¤t->usage); > + > + /* returns number of submitted SQEs or an error */ > + return !io_submit_sqe(ctx, req, sqe); > +} > + > +const struct bpf_func_proto io_bpf_queue_sqe_proto = { > + .func = io_bpf_queue_sqe, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_PTR_TO_MEM, > + .arg3_type = ARG_CONST_SIZE, > +}; > + > static const struct bpf_func_proto * > io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > - return bpf_base_func_proto(func_id); > + switch (func_id) { > + case BPF_FUNC_copy_from_user: > + return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; > + case BPF_FUNC_iouring_queue_sqe: > + return prog->aux->sleepable ? &io_bpf_queue_sqe_proto : NULL; > + default: > + return bpf_base_func_proto(func_id); > + } > } > > static bool io_bpf_is_valid_access(int off, int size, > @@ -10379,9 +10421,10 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags) > atomic_read(&req->task->io_uring->in_idle))) > goto done; > > - memset(&bpf_ctx, 0, sizeof(bpf_ctx)); > + bpf_ctx.ctx = ctx; > prog = req->bpf.prog; > > + io_submit_state_start(&ctx->submit_state, 1); > if (prog->aux->sleepable) { > rcu_read_lock(); > bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx); > @@ -10389,7 +10432,7 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags) > } else { > bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx); > } > - > + io_submit_state_end(&ctx->submit_state, ctx); > ret = 0; > done: > __io_req_complete(req, issue_flags, ret, 0); > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index de544f0fbeef..cc268f749a7d 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -4082,6 +4082,7 @@ union bpf_attr { > FN(ima_inode_hash), \ > FN(sock_from_file), \ > FN(check_mtu), \ > + FN(iouring_queue_sqe), \ We need to describe this function in the comment above, just like 20/23 does. > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > -- > 2.31.1 >
On Wed, May 19, 2021 at 03:13:26PM +0100, Pavel Begunkov wrote: > > +BPF_CALL_3(io_bpf_queue_sqe, struct io_bpf_ctx *, bpf_ctx, > + const struct io_uring_sqe *, sqe, > + u32, sqe_len) > +{ > + struct io_ring_ctx *ctx = bpf_ctx->ctx; > + struct io_kiocb *req; > + > + if (sqe_len != sizeof(struct io_uring_sqe)) > + return -EINVAL; > + > + req = io_alloc_req(ctx); that is GFP_KERNEL allocation. It's only allowed from sleepable bpf progs and further down there is a correct check for it, so all good. But submitting sqe is a fundemntal io_uring operation, so what is the use case for non-sleepable? In other words why bother? Allow sleepable only and simplify the code? > + if (unlikely(!req)) > + return -ENOMEM; > + if (!percpu_ref_tryget_many(&ctx->refs, 1)) { > + kmem_cache_free(req_cachep, req); > + return -EAGAIN; > + } > + percpu_counter_add(¤t->io_uring->inflight, 1); > + refcount_add(1, ¤t->usage); > + > + /* returns number of submitted SQEs or an error */ > + return !io_submit_sqe(ctx, req, sqe); A buggy bpf prog will be able to pass junk sizeof(struct io_uring_sqe) as 'sqe' here. What kind of validation io_submit_sqe() does to avoid crashing the kernel? General comments that apply to all patches: - commit logs are way too terse. Pls expand with details. - describe new bpf helpers in comments in bpf.h. Just adding them to an enum is not enough. - selftest/bpf are mandatory for all new bpf features. - consider bpf_link style of attaching bpf progs. We had enough issues with progs that get stuck due to application bugs. Auto-detach saves the day more often than not.
On 5/21/21 2:07 AM, Alexei Starovoitov wrote: > On Wed, May 19, 2021 at 03:13:26PM +0100, Pavel Begunkov wrote: >> >> +BPF_CALL_3(io_bpf_queue_sqe, struct io_bpf_ctx *, bpf_ctx, >> + const struct io_uring_sqe *, sqe, >> + u32, sqe_len) >> +{ >> + struct io_ring_ctx *ctx = bpf_ctx->ctx; >> + struct io_kiocb *req; >> + >> + if (sqe_len != sizeof(struct io_uring_sqe)) >> + return -EINVAL; >> + >> + req = io_alloc_req(ctx); > > that is GFP_KERNEL allocation. > It's only allowed from sleepable bpf progs and further down > there is a correct check for it, so all good. > But submitting sqe is a fundemntal io_uring operation, > so what is the use case for non-sleepable? > In other words why bother? Allow sleepable only and simplify the code? Actual submission may be moved out of BPF, so enabling it for both, but the question I wonder about is what are the plans for sleepable programs? E.g. if it's a marginal features much limited in functionality, e.g. iirc as it's not allowed to use some BPF data types, it may not worth doing. > >> + if (unlikely(!req)) >> + return -ENOMEM; >> + if (!percpu_ref_tryget_many(&ctx->refs, 1)) { >> + kmem_cache_free(req_cachep, req); >> + return -EAGAIN; >> + } >> + percpu_counter_add(¤t->io_uring->inflight, 1); >> + refcount_add(1, ¤t->usage); >> + >> + /* returns number of submitted SQEs or an error */ >> + return !io_submit_sqe(ctx, req, sqe); > > A buggy bpf prog will be able to pass junk sizeof(struct io_uring_sqe) > as 'sqe' here. > What kind of validation io_submit_sqe() does to avoid crashing the kernel? It works on memory rw shared with userspace, so it already assumes the worst > General comments that apply to all patches: > - commit logs are way too terse. Pls expand with details. > - describe new bpf helpers in comments in bpf.h. Just adding them to an enum is not enough. > - selftest/bpf are mandatory for all new bpf features. > - consider bpf_link style of attaching bpf progs. We had enough issues with progs > that get stuck due to application bugs. Auto-detach saves the day more often than not. Thanks for taking a look! I have no idea what bpf_link is, need to check it out -- Pavel Begunkov
diff --git a/fs/io_uring.c b/fs/io_uring.c index 20fddc5945f2..aae786291c57 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -882,6 +882,7 @@ struct io_defer_entry { }; struct io_bpf_ctx { + struct io_ring_ctx *ctx; }; struct io_op_def { @@ -6681,7 +6682,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ret = -EBADF; } - state->ios_left--; + if (state->ios_left > 1) + state->ios_left--; return ret; } @@ -10345,10 +10347,50 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return ret; } +BPF_CALL_3(io_bpf_queue_sqe, struct io_bpf_ctx *, bpf_ctx, + const struct io_uring_sqe *, sqe, + u32, sqe_len) +{ + struct io_ring_ctx *ctx = bpf_ctx->ctx; + struct io_kiocb *req; + + if (sqe_len != sizeof(struct io_uring_sqe)) + return -EINVAL; + + req = io_alloc_req(ctx); + if (unlikely(!req)) + return -ENOMEM; + if (!percpu_ref_tryget_many(&ctx->refs, 1)) { + kmem_cache_free(req_cachep, req); + return -EAGAIN; + } + percpu_counter_add(¤t->io_uring->inflight, 1); + refcount_add(1, ¤t->usage); + + /* returns number of submitted SQEs or an error */ + return !io_submit_sqe(ctx, req, sqe); +} + +const struct bpf_func_proto io_bpf_queue_sqe_proto = { + .func = io_bpf_queue_sqe, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id); + switch (func_id) { + case BPF_FUNC_copy_from_user: + return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + case BPF_FUNC_iouring_queue_sqe: + return prog->aux->sleepable ? &io_bpf_queue_sqe_proto : NULL; + default: + return bpf_base_func_proto(func_id); + } } static bool io_bpf_is_valid_access(int off, int size, @@ -10379,9 +10421,10 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags) atomic_read(&req->task->io_uring->in_idle))) goto done; - memset(&bpf_ctx, 0, sizeof(bpf_ctx)); + bpf_ctx.ctx = ctx; prog = req->bpf.prog; + io_submit_state_start(&ctx->submit_state, 1); if (prog->aux->sleepable) { rcu_read_lock(); bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx); @@ -10389,7 +10432,7 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags) } else { bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx); } - + io_submit_state_end(&ctx->submit_state, ctx); ret = 0; done: __io_req_complete(req, issue_flags, ret, 0); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de544f0fbeef..cc268f749a7d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4082,6 +4082,7 @@ union bpf_attr { FN(ima_inode_hash), \ FN(sock_from_file), \ FN(check_mtu), \ + FN(iouring_queue_sqe), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
Add a BPF_FUNC_iouring_queue_sqe BPF function as a demonstration of submmiting a new request by a BPF request. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- fs/io_uring.c | 51 ++++++++++++++++++++++++++++++++++++---- include/uapi/linux/bpf.h | 1 + 2 files changed, 48 insertions(+), 4 deletions(-)