@@ -1424,6 +1424,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
extern const struct bpf_func_proto bpf_skb_output_proto;
extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
@@ -1521,6 +1522,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_from_file_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_ptr_cookie_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_trace_proto;
#endif
case BPF_FUNC_seq_printf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
@@ -3805,6 +3805,15 @@ static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -4619,10 +4628,52 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
};
#endif
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
unsigned long off, unsigned long len)
{
- memcpy(dst_buff, src_buff + off, len);
+ unsigned long base_len, copy_len, frag_off_total;
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_buff_is_mb(xdp))) {
+ memcpy(dst_buff, xdp->data + off, len);
+ return 0;
+ }
+
+ base_len = xdp->data_end - xdp->data;
+ frag_off_total = base_len;
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+
+ /* If we need to copy data from the base buffer do it */
+ if (off < base_len) {
+ copy_len = min(len, base_len - off);
+ memcpy(dst_buff, xdp->data + off, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ dst_buff += copy_len;
+ }
+
+ /* Copy any remaining data from the fragments */
+ for (i = 0; len && i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ unsigned long frag_len, frag_off;
+
+ frag_len = skb_frag_size(frag);
+ frag_off = off - frag_off_total;
+ if (frag_off < frag_len) {
+ copy_len = min(len, frag_len - frag_off);
+ memcpy(dst_buff,
+ skb_frag_address(frag) + frag_off, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ dst_buff += copy_len;
+ }
+ frag_off_total += frag_len;
+ }
+
return 0;
}
@@ -4634,10 +4685,19 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
if (unlikely(!xdp ||
- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ (likely(!xdp_buff_is_mb(xdp)) &&
+ xdp_size > (unsigned long)(xdp->data_end - xdp->data))))
return -EFAULT;
+ if (unlikely(xdp_buff_is_mb(xdp))) {
+ struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ if (unlikely(xdp_size > ((int)(xdp->data_end - xdp->data) +
+ sinfo->xdp_frags_size)))
+ return -EFAULT;
+ }
- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+ return bpf_event_output(map, flags, meta, meta_size, xdp,
xdp_size, bpf_xdp_copy);
}
@@ -10,11 +10,20 @@ struct meta {
int pkt_len;
};
+struct test_ctx_s {
+ bool passed;
+ int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
- int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+ unsigned char *raw_pkt = data + sizeof(*meta);
+ struct test_ctx_s *tst_ctx = ctx;
+ int duration = 0;
if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
"check_size", "size %u < %zu\n",
@@ -25,25 +34,114 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
"meta->ifindex = %d\n", meta->ifindex))
return;
- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ if (CHECK(meta->pkt_len != tst_ctx->pkt_size, "check_meta_pkt_len",
+ "meta->pkt_len = %d\n", tst_ctx->pkt_size))
return;
if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
"check_packet_content", "content not the same\n"))
return;
- *(bool *)ctx = true;
+ if (meta->pkt_len > sizeof(pkt_v4)) {
+ for (int i = 0; i < (meta->pkt_len - sizeof(pkt_v4)); i++) {
+ if (raw_pkt[i + sizeof(pkt_v4)] != (unsigned char)i) {
+ CHECK(true, "check_packet_content",
+ "byte %zu does not match %u != %u\n",
+ i + sizeof(pkt_v4),
+ raw_pkt[i + sizeof(pkt_v4)],
+ (unsigned char)i);
+ break;
+ }
+ }
+ }
+
+ tst_ctx->passed = true;
}
-void test_xdp_bpf2bpf(void)
+#define BUF_SZ 9000
+
+static int run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+ struct test_xdp_bpf2bpf *ftrace_skel,
+ int pkt_size)
{
__u32 duration = 0, retval, size;
- char buf[128];
+ __u8 *buf, *buf_in;
+ int err, ret = 0;
+
+ if (pkt_size > BUF_SZ || pkt_size < sizeof(pkt_v4))
+ return -EINVAL;
+
+ buf_in = malloc(BUF_SZ);
+ if (CHECK(!buf_in, "buf_in malloc()", "error:%s\n", strerror(errno)))
+ return -ENOMEM;
+
+ buf = malloc(BUF_SZ);
+ if (CHECK(!buf, "buf malloc()", "error:%s\n", strerror(errno))) {
+ ret = -ENOMEM;
+ goto free_buf_in;
+ }
+
+ test_ctx.passed = false;
+ test_ctx.pkt_size = pkt_size;
+
+ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+ if (pkt_size > sizeof(pkt_v4)) {
+ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+ buf_in[i + sizeof(pkt_v4)] = i;
+ }
+
+ /* Run test program */
+ err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+ buf, &size, &retval, &duration);
+
+ if (CHECK(err || retval != XDP_PASS || size != pkt_size,
+ "ipv4", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size)) {
+ ret = err ? err : -EINVAL;
+ goto free_buf;
+ }
+
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err <= 0, "perf_buffer__poll", "err %d\n", err)) {
+ ret = -EINVAL;
+ goto free_buf;
+ }
+
+ if (CHECK_FAIL(!test_ctx.passed)) {
+ ret = -EINVAL;
+ goto free_buf;
+ }
+
+ /* Verify test results */
+ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+ "result", "fentry failed err %llu\n",
+ ftrace_skel->bss->test_result_fentry)) {
+ ret = -EINVAL;
+ goto free_buf;
+ }
+
+ if (CHECK(ftrace_skel->bss->test_result_fexit != XDP_PASS, "result",
+ "fexit failed err %llu\n",
+ ftrace_skel->bss->test_result_fexit))
+ ret = -EINVAL;
+
+free_buf:
+ free(buf);
+free_buf_in:
+ free(buf_in);
+
+ return ret;
+}
+
+void test_xdp_bpf2bpf(void)
+{
int err, pkt_fd, map_fd;
- bool passed = false;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
- struct iptnl_info value4 = {.family = AF_INET};
+ __u32 duration = 0;
+ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+ struct iptnl_info value4 = {.family = AF_INET6};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -87,40 +185,15 @@ void test_xdp_bpf2bpf(void)
/* Set up perf buffer */
pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
+ pb_opts.ctx = &test_ctx;
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
- 1, &pb_opts);
+ 8, &pb_opts);
if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
- /* Run test program */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size))
- goto out;
-
- /* Make sure bpf_xdp_output() was triggered and it sent the expected
- * data to the perf ring buffer.
- */
- err = perf_buffer__poll(pb, 100);
- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
- goto out;
-
- CHECK_FAIL(!passed);
-
- /* Verify test results */
- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
- "result", "fentry failed err %llu\n",
- ftrace_skel->bss->test_result_fentry))
- goto out;
-
- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+ pkt_sizes[i]);
out:
if (pb)
perf_buffer__free(pb);
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
- meta.pkt_len = data_end - data;
+ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
bpf_xdp_output(xdp, &perf_buf_map,
((__u64) meta.pkt_len << 32) |
BPF_F_CURRENT_CPU,