@@ -284,6 +284,7 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+ ARG_PTR_TO_SOCK_COMMON_OR_NULL, /* pointer to sock_common or NULL */
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
@@ -432,7 +432,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
- return type == ARG_PTR_TO_SOCK_COMMON;
+ return type == ARG_PTR_TO_SOCK_COMMON ||
+ type == ARG_PTR_TO_SOCK_COMMON_OR_NULL;
}
static bool arg_type_may_be_null(enum bpf_arg_type type)
@@ -440,6 +441,7 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
+ type == ARG_PTR_TO_SOCK_COMMON_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
}
@@ -486,7 +488,12 @@ static bool is_acquire_function(enum bpf_func_id func_id,
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
- func_id == BPF_FUNC_sk_fullsock;
+ func_id == BPF_FUNC_sk_fullsock ||
+ func_id == BPF_FUNC_skc_to_tcp_sock ||
+ func_id == BPF_FUNC_skc_to_tcp6_sock ||
+ func_id == BPF_FUNC_skc_to_udp6_sock ||
+ func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
+ func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
/* string representation of 'enum bpf_reg_type' */
@@ -3906,6 +3913,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
struct bpf_reg_types {
const enum bpf_reg_type types[10];
+ u32 *btf_id;
};
static const struct bpf_reg_types map_key_value_types = {
@@ -3926,6 +3934,17 @@ static const struct bpf_reg_types sock_types = {
},
};
+static const struct bpf_reg_types sock_or_null_types = {
+ .types = {
+ PTR_TO_SOCK_COMMON,
+ PTR_TO_SOCKET,
+ PTR_TO_TCP_SOCK,
+ PTR_TO_XDP_SOCK,
+ PTR_TO_BTF_ID,
+ },
+ .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+};
+
static const struct bpf_reg_types mem_types = {
.types = {
PTR_TO_STACK,
@@ -3967,6 +3986,7 @@ static const struct bpf_reg_types *compatible_reg_types[] = {
[ARG_PTR_TO_CTX] = &context_types,
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+ [ARG_PTR_TO_SOCK_COMMON_OR_NULL]= &sock_or_null_types,
[ARG_PTR_TO_SOCKET] = &fullsock_types,
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
@@ -4016,6 +4036,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 arg,
if (type == PTR_TO_BTF_ID) {
u32 *expected_btf_id = fn->arg_btf_id[arg];
+ if (!expected_btf_id) {
+ if (!compatible->btf_id) {
+ verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
+ return -EFAULT;
+ }
+ expected_btf_id = compatible->btf_id;
+ }
+
if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
*expected_btf_id)) {
verbose(env, "R%d is a pointer to in-kernel struct %s but %s is expected instead\n",
@@ -77,6 +77,9 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id);
+
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
if (in_compat_syscall()) {
@@ -5693,7 +5696,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
- if (sk_is_refcounted(sk))
+ if (sk && sk_is_refcounted(sk))
sock_gen_put(sk);
return 0;
}
@@ -5702,7 +5705,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -6619,7 +6622,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6638,7 +6641,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6799,7 +6802,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6840,7 +6843,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6882,7 +6885,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6928,7 +6931,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6970,7 +6973,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6981,7 +6984,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -7008,7 +7011,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -9745,7 +9748,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -9912,8 +9915,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
.func = bpf_skc_to_tcp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};
@@ -9929,8 +9931,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
.func = bpf_skc_to_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};
@@ -9953,8 +9954,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
.func = bpf_skc_to_tcp_timewait_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};
@@ -9977,8 +9977,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
.func = bpf_skc_to_tcp_request_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};
@@ -9999,7 +9998,37 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.func = bpf_skc_to_udp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON_OR_NULL,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
+
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id)
+{
+ const struct bpf_func_proto *func;
+
+ switch (func_id) {
+ case BPF_FUNC_skc_to_tcp6_sock:
+ func = &bpf_skc_to_tcp6_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_sock:
+ func = &bpf_skc_to_tcp_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_timewait_sock:
+ func = &bpf_skc_to_tcp_timewait_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_request_sock:
+ func = &bpf_skc_to_tcp_request_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_udp6_sock:
+ func = &bpf_skc_to_udp6_sock_proto;
+ break;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+
+ if (!perfmon_capable())
+ return NULL;
+
+ return func;
+}
There is a constant need to add more fields into the bpf_tcp_sock for the bpf programs running at tc, sock_ops...etc. A current workaround could be to use bpf_probe_read_kernel(). However, other than making another helper call for reading each field and missing CO-RE, it is also not as intuitive to use as directly reading "tp->lsndtime" for example. While already having perfmon cap to do bpf_probe_read_kernel(), it will be much easier if the bpf prog can directly read from the tcp_sock. This patch tries to do that by using the existing casting-helpers bpf_skc_to_*() whose func_proto returns a btf_id. For example, the func_proto of bpf_skc_to_tcp_sock returns the btf_id of the kernel "struct tcp_sock". This patch modifies the existing bpf_skc_to_* func_proto to take ARG_PTR_TO_SOCK_COMMON_OR_NULL instead of taking "ARG_PTR_TO_BTF_ID + &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]". That will allow tc, sock_ops,...etc to call these casting helpers because they already hold the PTR_TO_SOCK_COMMON (or its equivalent). For example: sk = sock_ops->sk; if (!sk) return; tp = bpf_skc_to_tcp_sock(sk); if (!tp) return; /* Read tp as a PTR_TO_BTF_ID */ lsndtime = tp->lsndtime; To ensure the current bpf prog passing a PTR_TO_BTF_ID to bpf_skc_to_*() still works as is, the verifier is modified such that ARG_PTR_TO_SOCK_COMMON_OR_NULL can accept a reg with reg->type == PTR_TO_BTF_ID and reg->btf_id is btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON] To do that, an idea is borrowed from one of the Lorenz's patch: https://lore.kernel.org/bpf/20200904112401.667645-12-lmb@cloudflare.com/ . It adds PTR_TO_BTF_ID as one of the acceptable reg->type for ARG_PTR_TO_SOCK_COMMON_OR_NULL and also specifies what btf_id it can take. By doing this, the bpf_skc_to_* will work as before and can still take PTR_TO_BTF_ID as the arg. e.g. The bpf tcp iter will work as is. ARG_PTR_TO_SOCK_COMMON_OR_NULL is newly added. The existing helpers have to change to take this new arg type in order to work with the PTR_TO_BTF_ID reg. OR_NULL is needed because PTR_TO_BTF_ID could be NULL. For example, req_sk = bpf_skc_to_tcp_request_sock(sk); /* req_sk->sk is a PTR_TO_BTF_ID and it could be NULL */ bpf_sk_cgroup_id(req_sk->sk); [ Note: To handle PTR_TO_BTF_ID that could be NULL, one considered option is to introduce PTR_TO_BTF_ID_NOFAULT which will be used in the return value of the bpf_skc_to_*() helpers. It is because the helper has verified the pointer can access the whole struct described by the btf_id and NULL check must have been done by the program. However, the above req_sk->sk example will still have a PTR_TO_BTF_ID which may be NULL. ] bpf_sk_release() is changed in this patch to take ARG_PTR_TO_SOCK_COMMON_OR_NULL such that the following will work: sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); if (!sk) return; tp = bpf_skc_to_tcp_sock(sk); if (!tp) { bpf_sk_release(sk); return; } lsndtime = tp->lsndtime; /* Pass tp to bpf_sk_release() will also work */ bpf_sk_release(tp); Other helpers will be changed to take ARG_PTR_TO_SOCK_COMMONN_OR_NULL later. The BPF_FUNC_skc_to_* func_id is added to is_ptr_cast_function(). It ensures the returning reg (BPF_REF_0) which is a PTR_TO_BTF_ID_OR_NULL also carries the ref_obj_id. That will keep the ref-tracking works properly. The bpf_skc_to_* helpers are made available to most of the bpf prog types in filter.c. They are limited by perfmon cap. Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- include/linux/bpf.h | 1 + kernel/bpf/verifier.c | 32 +++++++++++++++++-- net/core/filter.c | 73 ++++++++++++++++++++++++++++++------------- 3 files changed, 82 insertions(+), 24 deletions(-)