Message ID | 20250504224149.1033867-3-tjmercier@google.com |
---|---|
State | New |
Headers | show |
Series | Replace CONFIG_DMABUF_SYSFS_STATS with BPF | expand |
On Mon, May 5, 2025 at 4:17 AM Christian König <christian.koenig@amd.com> wrote: > > On 5/5/25 00:41, T.J. Mercier wrote: > > The dmabuf iterator traverses the list of all DMA buffers. > > > > DMA buffers are refcounted through their associated struct file. A > > reference is taken on each buffer as the list is iterated to ensure each > > buffer persists for the duration of the bpf program execution without > > holding the list mutex. > > > > Signed-off-by: T.J. Mercier <tjmercier@google.com> > > --- > > kernel/bpf/Makefile | 3 + > > kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 137 insertions(+) > > create mode 100644 kernel/bpf/dmabuf_iter.c > > > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > > index 70502f038b92..3a335c50e6e3 100644 > > --- a/kernel/bpf/Makefile > > +++ b/kernel/bpf/Makefile > > @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o > > obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o > > obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o > > obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o > > +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) > > +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o > > +endif > > > > CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) > > CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) > > diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c > > new file mode 100644 > > index 000000000000..968762e11f73 > > --- /dev/null > > +++ b/kernel/bpf/dmabuf_iter.c > > @@ -0,0 +1,134 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* Copyright (c) 2025 Google LLC */ > > +#include <linux/bpf.h> > > +#include <linux/btf_ids.h> > > +#include <linux/dma-buf.h> > > +#include <linux/kernel.h> > > +#include <linux/seq_file.h> > > + > > +BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) > > +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf) > > + > > +static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) > > +{ > > + struct dma_buf *ret = NULL; > > + > > + /* > > + * Look for the first/next buffer we can obtain a reference to. > > + * > > + * The list mutex does not protect a dmabuf's refcount, so it can be > > + * zeroed while we are iterating. We cannot call get_dma_buf() since the > > + * caller of this program may not already own a reference to the buffer. > > + */ > > + mutex_lock(&dmabuf_list_mutex); > > + if (dmabuf) { > > That looks like you try to mangle the start and next functionality in just one function. > > I would just inline that into the dmabuf_iter_seq_start() and dmabuf_iter_seq_next() functions. Primarily this is to share between the open coded iterator (next patch) and this normal iterator since I didn't want to duplicate the same list traversal code across both of them. > > > > + dma_buf_put(dmabuf); > > + list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) { > > That you can put the DMA-buf and then still uses it in list_for_each_entry_continue() only works because the mutex is locked in the destroy path. Yup, this was deliberate. > > > I strongly suggest to just put those two functions into drivers/dma-buf/dma-buf.c right next to the __dma_buf_debugfs_list_add() and __dma_buf_debugfs_list_del() functions. By two functions, you mean a get_first_dmabuf(void) and a get_next_dmabuf(struct dma_buf*)? To make the dma_buf_put() call a little less scary since all the mutex ops are right there? > > > Apart from those style suggestions looks good to me from the technical side, but I'm not an expert for the BPF stuff. > > Regards, > Christian. Thanks for your comments and reviews! > > + if (file_ref_get(&dmabuf->file->f_ref)) { > > + ret = dmabuf; > > + break; > > + } > > + } > > + } else { > > + list_for_each_entry(dmabuf, &dmabuf_list, list_node) { > > + if (file_ref_get(&dmabuf->file->f_ref)) { > > + ret = dmabuf; > > + break; > > + } > > + } > > + } > > + mutex_unlock(&dmabuf_list_mutex); > > + return ret; > > +} > > + > > +static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) > > +{ > > + if (*pos) > > + return NULL; > > + > > + return get_next_dmabuf(NULL); > > +} > > + > > +static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) > > +{ > > + struct dma_buf *dmabuf = v; > > + > > + ++*pos; > > + > > + return get_next_dmabuf(dmabuf); > > +} > > + > > +struct bpf_iter__dmabuf { > > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > > + __bpf_md_ptr(struct dma_buf *, dmabuf); > > +}; > > + > > +static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) > > +{ > > + struct bpf_iter_meta meta = { > > + .seq = seq, > > + }; > > + struct bpf_iter__dmabuf ctx = { > > + .meta = &meta, > > + .dmabuf = v, > > + }; > > + struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop); > > + > > + if (prog) > > + return bpf_iter_run_prog(prog, &ctx); > > + > > + return 0; > > +} > > + > > +static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) > > +{ > > + return __dmabuf_seq_show(seq, v, false); > > +} > > + > > +static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) > > +{ > > + struct dma_buf *dmabuf = v; > > + > > + if (dmabuf) > > + dma_buf_put(dmabuf); > > +} > > + > > +static const struct seq_operations dmabuf_iter_seq_ops = { > > + .start = dmabuf_iter_seq_start, > > + .next = dmabuf_iter_seq_next, > > + .stop = dmabuf_iter_seq_stop, > > + .show = dmabuf_iter_seq_show, > > +}; > > + > > +static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, > > + struct seq_file *seq) > > +{ > > + seq_puts(seq, "dmabuf iter\n"); > > +} > > + > > +static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { > > + .seq_ops = &dmabuf_iter_seq_ops, > > + .init_seq_private = NULL, > > + .fini_seq_private = NULL, > > + .seq_priv_size = 0, > > +}; > > + > > +static struct bpf_iter_reg bpf_dmabuf_reg_info = { > > + .target = "dmabuf", > > + .feature = BPF_ITER_RESCHED, > > + .show_fdinfo = bpf_iter_dmabuf_show_fdinfo, > > + .ctx_arg_info_size = 1, > > + .ctx_arg_info = { > > + { offsetof(struct bpf_iter__dmabuf, dmabuf), > > + PTR_TO_BTF_ID_OR_NULL }, > > + }, > > + .seq_info = &dmabuf_iter_seq_info, > > +}; > > + > > +static int __init dmabuf_iter_init(void) > > +{ > > + bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0]; > > + return bpf_iter_reg_target(&bpf_dmabuf_reg_info); > > +} > > + > > +late_initcall(dmabuf_iter_init); >
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> + +BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf) + +static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{ + struct dma_buf *ret = NULL; + + /* + * Look for the first/next buffer we can obtain a reference to. + * + * The list mutex does not protect a dmabuf's refcount, so it can be + * zeroed while we are iterating. We cannot call get_dma_buf() since the + * caller of this program may not already own a reference to the buffer. + */ + mutex_lock(&dmabuf_list_mutex); + if (dmabuf) { + dma_buf_put(dmabuf); + list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + } else { + list_for_each_entry(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + } + mutex_unlock(&dmabuf_list_mutex); + return ret; +} + +static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos) + return NULL; + + return get_next_dmabuf(NULL); +} + +static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct dma_buf *dmabuf = v; + + ++*pos; + + return get_next_dmabuf(dmabuf); +} + +struct bpf_iter__dmabuf { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct dma_buf *, dmabuf); +}; + +static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter_meta meta = { + .seq = seq, + }; + struct bpf_iter__dmabuf ctx = { + .meta = &meta, + .dmabuf = v, + }; + struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop); + + if (prog) + return bpf_iter_run_prog(prog, &ctx); + + return 0; +} + +static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{ + return __dmabuf_seq_show(seq, v, false); +} + +static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{ + struct dma_buf *dmabuf = v; + + if (dmabuf) + dma_buf_put(dmabuf); +} + +static const struct seq_operations dmabuf_iter_seq_ops = { + .start = dmabuf_iter_seq_start, + .next = dmabuf_iter_seq_next, + .stop = dmabuf_iter_seq_stop, + .show = dmabuf_iter_seq_show, +}; + +static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq) +{ + seq_puts(seq, "dmabuf iter\n"); +} + +static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { + .seq_ops = &dmabuf_iter_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = 0, +}; + +static struct bpf_iter_reg bpf_dmabuf_reg_info = { + .target = "dmabuf", + .feature = BPF_ITER_RESCHED, + .show_fdinfo = bpf_iter_dmabuf_show_fdinfo, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__dmabuf, dmabuf), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &dmabuf_iter_seq_info, +}; + +static int __init dmabuf_iter_init(void) +{ + bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0]; + return bpf_iter_reg_target(&bpf_dmabuf_reg_info); +} + +late_initcall(dmabuf_iter_init);
The dmabuf iterator traverses the list of all DMA buffers. DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex. Signed-off-by: T.J. Mercier <tjmercier@google.com> --- kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c