diff mbox series

[bpf-next,v1,08/19] bpf: create file bpf iterator

Message ID 20200427201244.2995241-1-yhs@fb.com
State New
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song April 27, 2020, 8:12 p.m. UTC
A new obj type BPF_TYPE_ITER is added to bpffs.
To produce a file bpf iterator, the fd must be
corresponding to a link_fd assocciated with a
trace/iter program. When the pinned file is
opened, a seq_file will be generated.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   |  3 +++
 kernel/bpf/bpf_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/inode.c    | 28 +++++++++++++++++++++++++
 kernel/bpf/syscall.c  |  2 +-
 4 files changed, 79 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0f0cafc65a04..601b3299b7e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1021,6 +1021,8 @@  static inline void bpf_enable_instrumentation(void)
 
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
+extern const struct file_operations bpf_link_fops;
+extern const struct file_operations bpffs_iter_fops;
 
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	extern const struct bpf_prog_ops _name ## _prog_ops; \
@@ -1136,6 +1138,7 @@  int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
 			  struct bpf_prog *new_prog);
 int bpf_iter_new_fd(struct bpf_link *link);
+void *bpf_iter_get_from_fd(u32 ufd);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 1f4e778d1814..f5e933236996 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -123,7 +123,8 @@  struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
 {
 	struct extra_priv_data *extra_data;
 
-	if (seq->file->f_op != &anon_bpf_iter_fops)
+	if (seq->file->f_op != &anon_bpf_iter_fops &&
+	    seq->file->f_op != &bpffs_iter_fops)
 		return NULL;
 
 	extra_data = get_extra_priv_dptr(seq->private, priv_data_size);
@@ -310,3 +311,48 @@  int bpf_iter_new_fd(struct bpf_link *link)
 	put_unused_fd(fd);
 	return err;
 }
+
+static int bpffs_iter_open(struct inode *inode, struct file *file)
+{
+	struct bpf_iter_link *link = inode->i_private;
+
+	return prepare_seq_file(file, link);
+}
+
+static int bpffs_iter_release(struct inode *inode, struct file *file)
+{
+	return anon_iter_release(inode, file);
+}
+
+const struct file_operations bpffs_iter_fops = {
+	.open		= bpffs_iter_open,
+	.read		= seq_read,
+	.release	= bpffs_iter_release,
+};
+
+void *bpf_iter_get_from_fd(u32 ufd)
+{
+	struct bpf_link *link;
+	struct bpf_prog *prog;
+	struct fd f;
+
+	f = fdget(ufd);
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+	if (f.file->f_op != &bpf_link_fops) {
+		link = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	link = f.file->private_data;
+	prog = link->prog;
+	if (prog->expected_attach_type != BPF_TRACE_ITER) {
+		link = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	bpf_link_inc(link);
+out:
+	fdput(f);
+	return link;
+}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 95087d9f4ed3..de4493983a37 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -26,6 +26,7 @@  enum bpf_type {
 	BPF_TYPE_PROG,
 	BPF_TYPE_MAP,
 	BPF_TYPE_LINK,
+	BPF_TYPE_ITER,
 };
 
 static void *bpf_any_get(void *raw, enum bpf_type type)
@@ -38,6 +39,7 @@  static void *bpf_any_get(void *raw, enum bpf_type type)
 		bpf_map_inc_with_uref(raw);
 		break;
 	case BPF_TYPE_LINK:
+	case BPF_TYPE_ITER:
 		bpf_link_inc(raw);
 		break;
 	default:
@@ -58,6 +60,7 @@  static void bpf_any_put(void *raw, enum bpf_type type)
 		bpf_map_put_with_uref(raw);
 		break;
 	case BPF_TYPE_LINK:
+	case BPF_TYPE_ITER:
 		bpf_link_put(raw);
 		break;
 	default:
@@ -82,6 +85,15 @@  static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
 		return raw;
 	}
 
+	/* check bpf_iter before bpf_link as
+	 * ufd is also a link.
+	 */
+	raw = bpf_iter_get_from_fd(ufd);
+	if (!IS_ERR(raw)) {
+		*type = BPF_TYPE_ITER;
+		return raw;
+	}
+
 	raw = bpf_link_get_from_fd(ufd);
 	if (!IS_ERR(raw)) {
 		*type = BPF_TYPE_LINK;
@@ -96,6 +108,7 @@  static const struct inode_operations bpf_dir_iops;
 static const struct inode_operations bpf_prog_iops = { };
 static const struct inode_operations bpf_map_iops  = { };
 static const struct inode_operations bpf_link_iops  = { };
+static const struct inode_operations bpf_iter_iops  = { };
 
 static struct inode *bpf_get_inode(struct super_block *sb,
 				   const struct inode *dir,
@@ -135,6 +148,8 @@  static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
 		*type = BPF_TYPE_MAP;
 	else if (inode->i_op == &bpf_link_iops)
 		*type = BPF_TYPE_LINK;
+	else if (inode->i_op == &bpf_iter_iops)
+		*type = BPF_TYPE_ITER;
 	else
 		return -EACCES;
 
@@ -362,6 +377,12 @@  static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
 			     &bpffs_obj_fops);
 }
 
+static int bpf_mkiter(struct dentry *dentry, umode_t mode, void *arg)
+{
+	return bpf_mkobj_ops(dentry, mode, arg, &bpf_iter_iops,
+			     &bpffs_iter_fops);
+}
+
 static struct dentry *
 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
@@ -441,6 +462,9 @@  static int bpf_obj_do_pin(const char __user *pathname, void *raw,
 	case BPF_TYPE_LINK:
 		ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
 		break;
+	case BPF_TYPE_ITER:
+		ret = vfs_mkobj(dentry, mode, bpf_mkiter, raw);
+		break;
 	default:
 		ret = -EPERM;
 	}
@@ -519,6 +543,8 @@  int bpf_obj_get_user(const char __user *pathname, int flags)
 		ret = bpf_map_new_fd(raw, f_flags);
 	else if (type == BPF_TYPE_LINK)
 		ret = bpf_link_new_fd(raw);
+	else if (type == BPF_TYPE_ITER)
+		ret = bpf_iter_new_fd(raw);
 	else
 		return -ENOENT;
 
@@ -538,6 +564,8 @@  static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
 		return ERR_PTR(-EINVAL);
 	if (inode->i_op == &bpf_link_iops)
 		return ERR_PTR(-EINVAL);
+	if (inode->i_op == &bpf_iter_iops)
+		return ERR_PTR(-EINVAL);
 	if (inode->i_op != &bpf_prog_iops)
 		return ERR_PTR(-EACCES);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 458f7000887a..e9ca5fbe8723 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2285,7 +2285,7 @@  static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 }
 #endif
 
-static const struct file_operations bpf_link_fops = {
+const struct file_operations bpf_link_fops = {
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo	= bpf_link_show_fdinfo,
 #endif