@@ -411,6 +411,8 @@ struct bpf_verifier_env {
u32 peak_states;
/* longest register parentage chain walked for liveness marking */
u32 longest_mark_read_walk;
+ bool all_redirect_map_tail;
+ u32 redirect_map_call_cnt;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
@@ -534,7 +534,8 @@ struct bpf_prog {
is_func:1, /* program is a bpf function */
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1, /* callchain buffer allocated? */
- enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
+ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
+ redirect_tail_call:1;
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -613,6 +614,9 @@ struct bpf_redirect_info {
void *tgt_value;
struct bpf_map *map;
u32 kern_flags;
+ struct bpf_prog *xdp_prog_redirect_tailcall; // if !NULL, we can do "everything" from BPF context.
+ struct xdp_buff *xdp;
+ struct net_device *dev;
};
DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
@@ -620,6 +624,19 @@ DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
/* flags for bpf_redirect_info kern_flags */
#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
+
+static inline void xdp_set_redirect_tailcall(struct bpf_prog *xdp_prog,
+ struct xdp_buff *xdp,
+ struct net_device *dev)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ ri->xdp_prog_redirect_tailcall = xdp_prog;
+ ri->xdp = xdp;
+ ri->dev = dev;
+}
+
+
/* Compute the linear packet data range [data, data_end) which
* will be accessed by various program types (cls_bpf, act_bpf,
* lwt, ...). Subsystems allowing direct data access must (!)
@@ -4594,6 +4594,54 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
+static void check_redirect_map_tail_call(struct bpf_verifier_env *env, int func_id,
+ int insn_idx)
+{
+ struct bpf_insn *insns = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ bool is_tail_call = false;
+ struct bpf_insn *insn;
+
+ if (func_id != BPF_FUNC_redirect_map)
+ return;
+
+ /* Naive peephole tail call checking */
+ insn_idx++;
+again:
+ if (insn_idx >= insn_cnt)
+ goto out;
+
+ insn = &insns[insn_idx];
+ insn_idx++;
+
+ switch (insn->code) {
+ /* 1. Is the instruction following the call, an exit? */
+ case BPF_JMP | BPF_EXIT:
+ is_tail_call = true;
+ break;
+ /* 2. True branch of "if w0 != 0 goto pc+X", an exit? */
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ if (insn->dst_reg == BPF_REG_0 && insn->imm == 0) {
+ insn_idx += insn->off;
+ goto again;
+ }
+ break;
+ /* 3... more checks. XXX */
+ default:
+ break;
+ }
+
+out:
+ if (!env->redirect_map_call_cnt++) {
+ env->all_redirect_map_tail = is_tail_call;
+ return;
+ }
+
+ if (!is_tail_call)
+ env->all_redirect_map_tail = false;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -4685,6 +4733,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
}
+ check_redirect_map_tail_call(env, func_id, insn_idx);
+
regs = cur_regs(env);
/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -11064,6 +11114,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret == 0)
adjust_btf_func(env);
+ if (ret == 0)
+ env->prog->redirect_tail_call = env->all_redirect_map_tail;
+
err_release_maps:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
@@ -3701,6 +3701,60 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u64 __bpf_xdp_redirect_map_tailcall(struct bpf_map *map, u32 index,
+ u64 flags,
+ struct bpf_redirect_info *ri)
+{
+ struct xdp_buff *xdp = ri->xdp;
+ struct net_device *d = ri->dev;
+ struct bpf_prog *xdp_prog;
+ void *val;
+ int err;
+
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP: {
+ val = __dev_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = dev_map_enqueue(val, xdp, d);
+ break;
+ }
+ case BPF_MAP_TYPE_DEVMAP_HASH: {
+ val = __dev_map_hash_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = dev_map_enqueue(val, xdp, d);
+ break;
+ }
+ case BPF_MAP_TYPE_CPUMAP: {
+ val = __cpu_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = cpu_map_enqueue(val, xdp, d);
+ break;
+ }
+ case BPF_MAP_TYPE_XSKMAP: {
+ val = __xsk_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = __xsk_map_redirect(val, xdp);
+ break;
+ }
+ default:
+ return flags;
+ }
+
+ xdp_prog = ri->xdp_prog_redirect_tailcall;
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(d, xdp_prog, val, map, index);
+ return XDP_REDIRECT;
+err:
+ _trace_xdp_redirect_map_err(d, xdp_prog, val, map, index, err);
+ return XDP_DROP;
+}
+
BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
u64, flags)
{
@@ -3710,6 +3764,9 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
if (unlikely(flags > XDP_TX))
return XDP_ABORTED;
+ if (ri->xdp_prog_redirect_tailcall)
+ return __bpf_xdp_redirect_map_tailcall(map, ifindex, flags, ri);
+
ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
if (unlikely(!ri->tgt_value)) {
/* If the lookup fails we want to clear out the state in the