diff mbox series

[RFC,bpf-next,3/8] bpf: implement __async and __s_async kfunc suffixes

Message ID 20240507-bpf_async-v1-3-b4df966096d8@kernel.org
State New
Headers show
Series Implement generic bpf_async cb | expand

Commit Message

Benjamin Tissoires May 7, 2024, 1:19 p.m. UTC
still mostly a WIP, but it seems to be working for the couple of tests.

Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>

---

This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
 kernel/bpf/verifier.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 206 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2b1e24c440c5..cc4dab81b306 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -336,6 +336,16 @@  struct bpf_kfunc_call_arg_meta {
 		struct bpf_map *ptr;
 		int uid;
 	} map;
+	struct {
+		bool enabled;
+		bool sleepable;
+		u32 nr_args;
+		struct {
+			// FIXME: should be enum kfunc_ptr_arg_type type;
+			int type;
+			u32 btf_id;
+		} args[5];
+	} async_cb;
 	u64 mem_size;
 };
 
@@ -9538,7 +9548,8 @@  static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
 	 */
 	env->subprog_info[subprog].is_cb = true;
 	if (bpf_pseudo_kfunc_call(insn) &&
-	    !is_callback_calling_kfunc(insn->imm)) {
+	    !is_callback_calling_kfunc(insn->imm) &&
+	    !(kfunc_meta && kfunc_meta->async_cb.enabled)) {
 		verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
 			func_id_name(insn->imm), insn->imm);
 		return -EFAULT;
@@ -9549,14 +9560,15 @@  static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
 		return -EFAULT;
 	}
 
-	if (is_async_callback_calling_insn(insn)) {
+	if (is_async_callback_calling_insn(insn) || (kfunc_meta && kfunc_meta->async_cb.enabled)) {
 		struct bpf_verifier_state *async_cb;
 
 		/* there is no real recursion here. timer and workqueue callbacks are async */
 		env->subprog_info[subprog].is_async_cb = true;
 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
 					 insn_idx, subprog,
-					 is_bpf_wq_set_callback_impl_kfunc(insn->imm));
+					 (is_bpf_wq_set_callback_impl_kfunc(insn->imm) ||
+					  (kfunc_meta && kfunc_meta->async_cb.sleepable)));
 		if (!async_cb)
 			return -EFAULT;
 		callee = async_cb->frame[0];
@@ -10890,6 +10902,16 @@  static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param
 	return btf_param_match_suffix(btf, arg, "__str");
 }
 
+static bool is_kfunc_arg_async_cb(const struct btf *btf, const struct btf_param *arg)
+{
+	return btf_param_match_suffix(btf, arg, "__async");
+}
+
+static bool is_kfunc_arg_sleepable_async_cb(const struct btf *btf, const struct btf_param *arg)
+{
+	return btf_param_match_suffix(btf, arg, "__s_async");
+}
+
 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 					  const struct btf_param *arg,
 					  const char *name)
@@ -11045,6 +11067,48 @@  enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_WORKQUEUE,
 };
 
+static const char *__kfunc_ptr_arg_type_str(enum kfunc_ptr_arg_type value)
+{
+	switch (value) {
+	case KF_ARG_PTR_TO_CTX:
+		return "KF_ARG_PTR_TO_CTX";
+	case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+		return "KF_ARG_PTR_TO_ALLOC_BTF_ID";
+	case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+		return "KF_ARG_PTR_TO_REFCOUNTED_KPTR";
+	case KF_ARG_PTR_TO_DYNPTR:
+		return "KF_ARG_PTR_TO_DYNPTR";
+	case KF_ARG_PTR_TO_ITER:
+		return "KF_ARG_PTR_TO_ITER";
+	case KF_ARG_PTR_TO_LIST_HEAD:
+		return "KF_ARG_PTR_TO_LIST_HEAD";
+	case KF_ARG_PTR_TO_LIST_NODE:
+		return "KF_ARG_PTR_TO_LIST_NODE";
+	case KF_ARG_PTR_TO_BTF_ID:
+		return "KF_ARG_PTR_TO_BTF_ID";
+	case KF_ARG_PTR_TO_MEM:
+		return "KF_ARG_PTR_TO_MEM";
+	case KF_ARG_PTR_TO_MEM_SIZE:
+		return "KF_ARG_PTR_TO_MEM_SIZE";
+	case KF_ARG_PTR_TO_CALLBACK:
+		return "KF_ARG_PTR_TO_CALLBACK";
+	case KF_ARG_PTR_TO_RB_ROOT:
+		return "KF_ARG_PTR_TO_RB_ROOT";
+	case KF_ARG_PTR_TO_RB_NODE:
+		return "KF_ARG_PTR_TO_RB_NODE";
+	case KF_ARG_PTR_TO_NULL:
+		return "KF_ARG_PTR_TO_NULL";
+	case KF_ARG_PTR_TO_CONST_STR:
+		return "KF_ARG_PTR_TO_CONST_STR";
+	case KF_ARG_PTR_TO_MAP:
+		return "KF_ARG_PTR_TO_MAP";
+	case KF_ARG_PTR_TO_WORKQUEUE:
+		return "KF_ARG_PTR_TO_WORKQUEUE";
+	}
+
+	return "UNKNOWN";
+}
+
 enum special_kfunc_type {
 	KF_bpf_obj_new_impl,
 	KF_bpf_obj_drop_impl,
@@ -12151,6 +12215,39 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return -EINVAL;
 			}
 			meta->subprogno = reg->subprogno;
+			meta->async_cb.sleepable = is_kfunc_arg_sleepable_async_cb(meta->btf, &args[i]);
+			meta->async_cb.enabled = meta->async_cb.sleepable ||
+						 is_kfunc_arg_async_cb(meta->btf, &args[i]);
+			if (meta->async_cb.enabled) {
+				const struct btf_type *cb_proto;
+				const struct btf_param *cb_args;
+				u32 cb_type = args[i].type;
+				int i;
+
+				cb_proto = btf_type_resolve_func_ptr(btf, cb_type, NULL);
+				if (cb_proto) {
+					meta->async_cb.nr_args = btf_type_vlen(cb_proto);
+					cb_args = btf_params(cb_proto);
+					for (i = 0; i < meta->async_cb.nr_args; i++) {
+						const struct btf_type *t, *ref_t;
+						const char *ref_tname;
+						u32 ref_id, t_id;
+
+						t = btf_type_skip_modifiers(btf, cb_args[i].type, &t_id);
+						ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
+						ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+						meta->async_cb.args[i].type = get_kfunc_ptr_arg_type(env, meta,
+							t, ref_t, ref_tname, cb_args, i, meta->async_cb.nr_args);
+
+						/* FIXME: we should not get an error from get_kfunc_ptr_arg_type() */
+						if (meta->async_cb.args[i].type < 0)
+							meta->async_cb.args[i].type = KF_ARG_PTR_TO_BTF_ID;
+						meta->async_cb.args[i].btf_id = ref_id;
+					}
+				} else {
+					meta->async_cb.nr_args = 0;
+				}
+			}
 			break;
 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
 			if (!type_is_ptr_alloc_obj(reg->type)) {
@@ -12248,6 +12345,71 @@  static int fetch_kfunc_meta(struct bpf_verifier_env *env,
 
 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
 
+static int set_generic_callback_state(struct bpf_verifier_env *env,
+				      struct bpf_func_state *caller,
+				      struct bpf_func_state *callee,
+				      int insn_idx,
+				      struct bpf_kfunc_call_arg_meta *meta)
+{
+	int i;
+
+	for (i = 0; i < 5; i++) {
+		if (i < meta->async_cb.nr_args) {
+			u32 type = meta->async_cb.args[i].type;
+
+			switch (type) {
+			case KF_ARG_PTR_TO_CTX:
+			case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+			case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+			case KF_ARG_PTR_TO_DYNPTR:
+			case KF_ARG_PTR_TO_ITER:
+			case KF_ARG_PTR_TO_LIST_HEAD:
+			case KF_ARG_PTR_TO_LIST_NODE:
+			case KF_ARG_PTR_TO_CALLBACK:
+			case KF_ARG_PTR_TO_RB_ROOT:
+			case KF_ARG_PTR_TO_RB_NODE:
+			case KF_ARG_PTR_TO_NULL:
+			case KF_ARG_PTR_TO_CONST_STR:
+				verbose(env, "argument #%d of type %s is not supported in async callbacks\n",
+					i, __kfunc_ptr_arg_type_str(meta->async_cb.args[i].type));
+				return -EINVAL;
+			case KF_ARG_PTR_TO_MEM:
+			case KF_ARG_PTR_TO_MEM_SIZE:
+				callee->regs[BPF_REG_1 + i].type = PTR_TO_MEM;
+				__mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+				callee->regs[BPF_REG_1 + i].mem_size = 8; // FIXME: should store the size while walking the arguments
+				break;
+			case KF_ARG_PTR_TO_MAP:
+				callee->regs[BPF_REG_1 + i].type = CONST_PTR_TO_MAP;
+				__mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+				callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr;
+				break;
+			case KF_ARG_PTR_TO_WORKQUEUE:
+				callee->regs[BPF_REG_1 + i].type = PTR_TO_MAP_VALUE;
+				__mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+				callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr;
+				break;
+			case KF_ARG_PTR_TO_BTF_ID:
+				callee->regs[BPF_REG_1 + i].type = PTR_TO_BTF_ID;
+				__mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+				callee->regs[BPF_REG_1 + i].btf =  meta->btf;
+				callee->regs[BPF_REG_1 + i].btf_id = meta->async_cb.args[i].btf_id;
+				break;
+			default:
+				verbose(env, "verifier bug: unexpected arg#%d type (%d) in async callback\n",
+					i, type);
+				return -EFAULT;
+			}
+		} else {
+			__mark_reg_not_init(env, &callee->regs[BPF_REG_1 + i]);
+		}
+	}
+	callee->in_callback_fn = true;
+	// callee->callback_ret_range = retval_range(-MAX_ERRNO, );
+	return 0;
+}
+
+
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
@@ -12313,6 +12475,16 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
+	if (meta.async_cb.enabled) {
+		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+					 set_generic_callback_state, &meta);
+		if (err) {
+			verbose(env, "kfunc %s#%d failed callback verification\n",
+				func_name, meta.func_id);
+			return err;
+		}
+	}
+
 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 
@@ -15918,22 +16090,39 @@  static int visit_insn(int t, struct bpf_verifier_env *env)
 		}
 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 			struct bpf_kfunc_call_arg_meta meta;
+			const struct btf_param *args;
+			u32 i, nargs;
 
 			ret = fetch_kfunc_meta(env, insn, &meta, NULL);
-			if (ret == 0 && is_iter_next_kfunc(&meta)) {
-				mark_prune_point(env, t);
-				/* Checking and saving state checkpoints at iter_next() call
-				 * is crucial for fast convergence of open-coded iterator loop
-				 * logic, so we need to force it. If we don't do that,
-				 * is_state_visited() might skip saving a checkpoint, causing
-				 * unnecessarily long sequence of not checkpointed
-				 * instructions and jumps, leading to exhaustion of jump
-				 * history buffer, and potentially other undesired outcomes.
-				 * It is expected that with correct open-coded iterators
-				 * convergence will happen quickly, so we don't run a risk of
-				 * exhausting memory.
-				 */
-				mark_force_checkpoint(env, t);
+			if (ret == 0) {
+				args = (const struct btf_param *)(meta.func_proto + 1);
+				nargs = btf_type_vlen(meta.func_proto);
+
+				for (i = 0; i < nargs; i++) {
+					if (is_kfunc_arg_sleepable_async_cb(meta.btf, &args[i]) ||
+					    is_kfunc_arg_async_cb(meta.btf, &args[i]))
+						/* Mark this call insn as a prune point to trigger
+						 * is_state_visited() check before call itself is
+						 * processed by __check_func_call(). Otherwise new
+						 * async state will be pushed for further exploration.
+						 */
+						mark_prune_point(env, t);
+				}
+				if (is_iter_next_kfunc(&meta)) {
+					mark_prune_point(env, t);
+					/* Checking and saving state checkpoints at iter_next() call
+					 * is crucial for fast convergence of open-coded iterator loop
+					 * logic, so we need to force it. If we don't do that,
+					 * is_state_visited() might skip saving a checkpoint, causing
+					 * unnecessarily long sequence of not checkpointed
+					 * instructions and jumps, leading to exhaustion of jump
+					 * history buffer, and potentially other undesired outcomes.
+					 * It is expected that with correct open-coded iterators
+					 * convergence will happen quickly, so we don't run a risk of
+					 * exhausting memory.
+					 */
+					mark_force_checkpoint(env, t);
+				}
 			}
 		}
 		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);