diff mbox series

[RFC,bpf-next,05/16] bpf: create file or anonymous dumpers

Message ID 20200408232526.2675664-1-yhs@fb.com
State New
Headers show
Series None | expand

Commit Message

Yonghong Song April 8, 2020, 11:25 p.m. UTC
Given a loaded dumper bpf program, which already
knows which target it should bind to, there
two ways to create a dumper:
  - a file based dumper under hierarchy of
    /sys/kernel/bpfdump/ which uses can
    "cat" to print out the output.
  - an anonymous dumper which user application
    can "read" the dumping output.

For file based dumper, BPF_OBJ_PIN syscall interface
is used. For anonymous dumper, BPF_PROG_ATTACH
syscall interface is used.

To facilitate target seq_ops->show() to get the
bpf program easily, dumper creation increased
the target-provided seq_file private data size
so bpf program pointer is also stored in seq_file
private data.

Further, a seq_num which represents how many
bpf_dump_get_prog() has been called is also
available to the target seq_ops->show().
Such information can be used to e.g., print
banner before printing out actual data.

Note the seq_num does not represent the num
of unique kernel objects the bpf program has
seen. But it should be a good approximate.

A target feature BPF_DUMP_SEQ_NET_PRIVATE
is implemented specifically useful for
net based dumpers. It sets net namespace
as the current process net namespace.
This avoids changing existing net seq_ops
in order to retrieve net namespace from
the seq_file pointer.

For open dumper files, anonymous or not, the
fdinfo will show the target and prog_id associated
with that file descriptor. For dumper file itself,
a kernel interface will be provided to retrieve the
prog_id in one of the later patches.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |   5 +
 include/uapi/linux/bpf.h       |   6 +-
 kernel/bpf/dump.c              | 338 ++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c           |  11 +-
 tools/include/uapi/linux/bpf.h |   6 +-
 5 files changed, 362 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 44268d36d901..8171e01ff4be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1110,10 +1110,15 @@  struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+#define BPF_DUMP_SEQ_NET_PRIVATE	BIT(0)
+
 int bpf_dump_reg_target(const char *target, const char *target_proto,
 			const struct seq_operations *seq_ops,
 			u32 seq_priv_size, u32 target_feature);
 int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog);
+int bpf_dump_create(u32 prog_fd, const char __user *dumper_name);
+struct bpf_prog *bpf_dump_get_prog(struct seq_file *seq, u32 priv_data_size,
+				   u64 *seq_num);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f1cbed446c1..b51d56fc77f9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -354,6 +354,7 @@  enum {
 /* Flags for accessing BPF object from syscall side. */
 	BPF_F_RDONLY		= (1U << 3),
 	BPF_F_WRONLY		= (1U << 4),
+	BPF_F_DUMP		= (1U << 5),
 
 /* Flag for stack_map, store build_id+offset instead of pointer */
 	BPF_F_STACK_BUILD_ID	= (1U << 5),
@@ -481,7 +482,10 @@  union bpf_attr {
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-		__aligned_u64	pathname;
+		union {
+			__aligned_u64	pathname;
+			__aligned_u64	dumper_name;
+		};
 		__u32		bpf_fd;
 		__u32		file_flags;
 	};
diff --git a/kernel/bpf/dump.c b/kernel/bpf/dump.c
index 1091affe8b3f..ac6856abb711 100644
--- a/kernel/bpf/dump.c
+++ b/kernel/bpf/dump.c
@@ -30,22 +30,173 @@  struct bpfdump_targets {
 	struct mutex dumper_mutex;
 };
 
+struct dumper_inode_info {
+	struct bpfdump_target_info *tinfo;
+	struct bpf_prog *prog;
+};
+
+struct dumper_info {
+	struct list_head list;
+	/* file to identify an anon dumper,
+	 * dentry to identify a file dumper.
+	 */
+	union {
+		struct file *file;
+		struct dentry *dentry;
+	};
+	struct bpfdump_target_info *tinfo;
+	struct bpf_prog *prog;
+};
+
+struct dumpers {
+	struct list_head dumpers;
+	struct mutex dumper_mutex;
+};
+
+struct extra_priv_data {
+	struct bpf_prog *prog;
+	u64 seq_num;
+};
+
 /* registered dump targets */
 static struct bpfdump_targets dump_targets;
 
 static struct dentry *bpfdump_dentry;
 
+static struct dumpers anon_dumpers, file_dumpers;
+
+static const struct file_operations bpf_dumper_ops;
+static const struct inode_operations bpf_dir_iops;
+
+static struct dentry *bpfdump_add_file(const char *name, struct dentry *parent,
+				       const struct file_operations *f_ops,
+				       void *data);
 static struct dentry *bpfdump_add_dir(const char *name, struct dentry *parent,
 				      const struct inode_operations *i_ops,
 				      void *data);
 static int __bpfdump_init(void);
 
+static u32 get_total_priv_dsize(u32 old_size)
+{
+	return roundup(old_size, 8) + sizeof(struct extra_priv_data);
+}
+
+static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
+{
+	return old_ptr + roundup(old_size, 8);
+}
+
+#ifdef CONFIG_PROC_FS
+static void dumper_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+	struct dumper_inode_info *i_info = filp->f_inode->i_private;
+
+	seq_printf(m, "target:\t%s\n"
+		      "prog_id:\t%u\n",
+		   i_info->tinfo->target,
+		   i_info->prog->aux->id);
+}
+
+static void anon_dumper_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+	struct dumper_info *dinfo;
+
+	mutex_lock(&anon_dumpers.dumper_mutex);
+	list_for_each_entry(dinfo, &anon_dumpers.dumpers, list) {
+		if (dinfo->file == filp) {
+			seq_printf(m, "target:\t%s\n"
+				      "prog_id:\t%u\n",
+				   dinfo->tinfo->target,
+				   dinfo->prog->aux->id);
+			break;
+		}
+	}
+	mutex_unlock(&anon_dumpers.dumper_mutex);
+}
+
+#endif
+
+static void process_target_feature(u32 feature, void *priv_data)
+{
+	/* use the current net namespace */
+	if (feature & BPF_DUMP_SEQ_NET_PRIVATE)
+		set_seq_net_private((struct seq_net_private *)priv_data,
+				    current->nsproxy->net_ns);
+}
+
+static int dumper_open(struct inode *inode, struct file *file)
+{
+	struct dumper_inode_info *i_info = inode->i_private;
+	struct extra_priv_data *extra_data;
+	u32 old_priv_size, total_priv_size;
+	void *priv_data;
+
+	old_priv_size = i_info->tinfo->seq_priv_size;
+	total_priv_size = get_total_priv_dsize(old_priv_size);
+	priv_data = __seq_open_private(file, i_info->tinfo->seq_ops,
+				       total_priv_size);
+	if (!priv_data)
+		return -ENOMEM;
+
+	process_target_feature(i_info->tinfo->target_feature, priv_data);
+
+	extra_data = get_extra_priv_dptr(priv_data, old_priv_size);
+	extra_data->prog = i_info->prog;
+	extra_data->seq_num = 0;
+
+	return 0;
+}
+
+static int anon_dumper_release(struct inode *inode, struct file *file)
+{
+	struct dumper_info *dinfo;
+
+	/* release the bpf program */
+	mutex_lock(&anon_dumpers.dumper_mutex);
+	list_for_each_entry(dinfo, &anon_dumpers.dumpers, list) {
+		if (dinfo->file == file) {
+			bpf_prog_put(dinfo->prog);
+			list_del(&dinfo->list);
+			break;
+		}
+	}
+	mutex_unlock(&anon_dumpers.dumper_mutex);
+
+	return seq_release_private(inode, file);
+}
+
+static int dumper_release(struct inode *inode, struct file *file)
+{
+	return seq_release_private(inode, file);
+}
+
 static int dumper_unlink(struct inode *dir, struct dentry *dentry)
 {
-	kfree(d_inode(dentry)->i_private);
+	struct dumper_inode_info *i_info = d_inode(dentry)->i_private;
+
+	bpf_prog_put(i_info->prog);
+	kfree(i_info);
+
 	return simple_unlink(dir, dentry);
 }
 
+static const struct file_operations bpf_dumper_ops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= dumper_show_fdinfo,
+#endif
+	.open		= dumper_open,
+	.read		= seq_read,
+	.release	= dumper_release,
+};
+
+static const struct file_operations anon_bpf_dumper_ops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= anon_dumper_show_fdinfo,
+#endif
+	.read		= seq_read,
+	.release	= anon_dumper_release,
+};
+
 static const struct inode_operations bpf_dir_iops = {
 	.lookup		= simple_lookup,
 	.unlink		= dumper_unlink,
@@ -88,6 +239,179 @@  int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
 	return err;
 }
 
+static int create_anon_dumper(struct bpfdump_target_info *tinfo,
+			      struct bpf_prog *prog)
+{
+	struct extra_priv_data *extra_data;
+	u32 old_priv_size, total_priv_size;
+	struct dumper_info *dinfo;
+	struct file *file;
+	int err, anon_fd;
+	void *priv_data;
+	struct fd fd;
+
+	anon_fd = anon_inode_getfd("bpf-dumper", &anon_bpf_dumper_ops,
+				   NULL, O_CLOEXEC);
+	if (anon_fd < 0)
+		return anon_fd;
+
+	/* setup seq_file for anon dumper */
+	fd = fdget(anon_fd);
+	file = fd.file;
+
+	dinfo = kmalloc(sizeof(*dinfo), GFP_KERNEL);
+	if (!dinfo) {
+		err = -ENOMEM;
+		goto free_fd;
+	}
+
+	old_priv_size = tinfo->seq_priv_size;
+	total_priv_size = get_total_priv_dsize(old_priv_size);
+
+	priv_data = __seq_open_private(file, tinfo->seq_ops,
+				       total_priv_size);
+	if (!priv_data) {
+		err = -ENOMEM;
+		goto free_dinfo;
+	}
+
+	dinfo->file = file;
+	dinfo->tinfo = tinfo;
+	dinfo->prog = prog;
+
+	mutex_lock(&anon_dumpers.dumper_mutex);
+	list_add(&dinfo->list, &anon_dumpers.dumpers);
+	mutex_unlock(&anon_dumpers.dumper_mutex);
+
+	process_target_feature(tinfo->target_feature, priv_data);
+
+	extra_data = get_extra_priv_dptr(priv_data, old_priv_size);
+	extra_data->prog = prog;
+	extra_data->seq_num = 0;
+
+	fdput(fd);
+	return anon_fd;
+
+free_dinfo:
+	kfree(dinfo);
+free_fd:
+	fdput(fd);
+	return err;
+}
+
+static int create_dumper(struct bpfdump_target_info *tinfo,
+			 const char __user *dumper_name,
+			 struct bpf_prog *prog)
+{
+	struct dumper_inode_info *i_info;
+	struct dumper_info *dinfo;
+	struct dentry *dentry;
+	const char *dname;
+	int err = 0;
+
+	i_info = kmalloc(sizeof(*i_info), GFP_KERNEL);
+	if (!i_info)
+		return -ENOMEM;
+
+	i_info->tinfo = tinfo;
+	i_info->prog = prog;
+
+	dinfo = kmalloc(sizeof(*dinfo), GFP_KERNEL);
+	if (!dinfo) {
+		err = -ENOMEM;
+		goto free_i_info;
+	}
+
+	dname = strndup_user(dumper_name, PATH_MAX);
+	if (!dname) {
+		err = -ENOMEM;
+		goto free_dinfo;
+	}
+
+	dentry = bpfdump_add_file(dname, tinfo->dir_dentry,
+				  &bpf_dumper_ops, i_info);
+	kfree(dname);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto free_dinfo;
+	}
+
+	dinfo->dentry = dentry;
+	dinfo->tinfo = tinfo;
+	dinfo->prog = prog;
+
+	mutex_lock(&file_dumpers.dumper_mutex);
+	list_add(&dinfo->list, &file_dumpers.dumpers);
+	mutex_unlock(&file_dumpers.dumper_mutex);
+
+	return 0;
+
+free_dinfo:
+	kfree(dinfo);
+free_i_info:
+	kfree(i_info);
+	return err;
+}
+
+int bpf_dump_create(u32 prog_fd, const char __user *dumper_name)
+{
+	struct bpfdump_target_info *tinfo;
+	const char *target;
+	struct bpf_prog *prog;
+	bool existed = false;
+	int err = 0;
+
+	prog = bpf_prog_get(prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	target = prog->aux->dump_target;
+	if (!target) {
+		err = -EINVAL;
+		goto free_prog;
+	}
+
+	mutex_lock(&dump_targets.dumper_mutex);
+	list_for_each_entry(tinfo, &dump_targets.dumpers, list) {
+		if (strcmp(tinfo->target, target) == 0) {
+			existed = true;
+			break;
+		}
+	}
+	mutex_unlock(&dump_targets.dumper_mutex);
+
+	if (!existed) {
+		err = -EINVAL;
+		goto free_prog;
+	}
+
+	err = dumper_name ? create_dumper(tinfo, dumper_name, prog)
+			  : create_anon_dumper(tinfo, prog);
+	if (err < 0)
+		goto free_prog;
+
+	return err;
+
+free_prog:
+	bpf_prog_put(prog);
+	return err;
+}
+
+struct bpf_prog *bpf_dump_get_prog(struct seq_file *seq, u32 priv_data_size,
+	u64 *seq_num)
+{
+	struct extra_priv_data *extra_data;
+
+	if (seq->file->f_op != &bpf_dumper_ops &&
+	    seq->file->f_op != &anon_bpf_dumper_ops)
+		return NULL;
+
+	extra_data = get_extra_priv_dptr(seq->private, priv_data_size);
+	*seq_num = extra_data->seq_num++;
+
+	return extra_data->prog;
+}
+
 int bpf_dump_reg_target(const char *target,
 			const char *target_proto,
 			const struct seq_operations *seq_ops,
@@ -211,6 +535,14 @@  bpfdump_create_dentry(const char *name, umode_t mode, struct dentry *parent,
 	return dentry;
 }
 
+static struct dentry *
+bpfdump_add_file(const char *name, struct dentry *parent,
+		 const struct file_operations *f_ops, void *data)
+{
+	return bpfdump_create_dentry(name, S_IFREG | 0444, parent,
+				     data, NULL, f_ops);
+}
+
 static struct dentry *
 bpfdump_add_dir(const char *name, struct dentry *parent,
 		const struct inode_operations *i_ops, void *data)
@@ -290,6 +622,10 @@  static int __bpfdump_init(void)
 
 	INIT_LIST_HEAD(&dump_targets.dumpers);
 	mutex_init(&dump_targets.dumper_mutex);
+	INIT_LIST_HEAD(&anon_dumpers.dumpers);
+	mutex_init(&anon_dumpers.dumper_mutex);
+	INIT_LIST_HEAD(&file_dumpers.dumpers);
+	mutex_init(&file_dumpers.dumper_mutex);
 	return 0;
 
 remove_mount:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 41005dee8957..b5e4f18cc633 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2173,9 +2173,13 @@  static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 static int bpf_obj_pin(const union bpf_attr *attr)
 {
-	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
+	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_DUMP)
 		return -EINVAL;
 
+	if (attr->file_flags == BPF_F_DUMP)
+		return bpf_dump_create(attr->bpf_fd,
+				       u64_to_user_ptr(attr->dumper_name));
+
 	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 }
 
@@ -2605,6 +2609,8 @@  attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_GETSOCKOPT:
 	case BPF_CGROUP_SETSOCKOPT:
 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+	case BPF_TRACE_DUMP:
+		return BPF_PROG_TYPE_TRACING;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -2663,6 +2669,9 @@  static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_SOCK_OPS:
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		ret = bpf_dump_create(attr->attach_bpf_fd, (void __user *)NULL);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0f1cbed446c1..b51d56fc77f9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -354,6 +354,7 @@  enum {
 /* Flags for accessing BPF object from syscall side. */
 	BPF_F_RDONLY		= (1U << 3),
 	BPF_F_WRONLY		= (1U << 4),
+	BPF_F_DUMP		= (1U << 5),
 
 /* Flag for stack_map, store build_id+offset instead of pointer */
 	BPF_F_STACK_BUILD_ID	= (1U << 5),
@@ -481,7 +482,10 @@  union bpf_attr {
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-		__aligned_u64	pathname;
+		union {
+			__aligned_u64	pathname;
+			__aligned_u64	dumper_name;
+		};
 		__u32		bpf_fd;
 		__u32		file_flags;
 	};