Message ID | 20200827220114.69225-6-alexei.starovoitov@gmail.com |
---|---|
State | New |
Headers | show |
Series | bpf: Introduce minimal support for sleepable progs | expand |
On 8/27/20 3:01 PM, Alexei Starovoitov wrote: > From: Alexei Starovoitov <ast@kernel.org> > > Modify few tests to sanity test sleepable bpf functionality. > > Running 'bench trig-fentry-sleep' vs 'bench trig-fentry' and 'perf report': > sleepable with SRCU: > 3.86% bench [k] __srcu_read_unlock > 3.22% bench [k] __srcu_read_lock > 0.92% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep > 0.50% bench [k] bpf_trampoline_10297 > 0.26% bench [k] __bpf_prog_exit_sleepable > 0.21% bench [k] __bpf_prog_enter_sleepable > > sleepable with RCU_TRACE: > 0.79% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep > 0.72% bench [k] bpf_trampoline_10381 > 0.31% bench [k] __bpf_prog_exit_sleepable > 0.29% bench [k] __bpf_prog_enter_sleepable > > non-sleepable with RCU: > 0.88% bench [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry > 0.84% bench [k] bpf_trampoline_10297 > 0.13% bench [k] __bpf_prog_enter > 0.12% bench [k] __bpf_prog_exit > > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > Acked-by: KP Singh <kpsingh@google.com> > --- > tools/testing/selftests/bpf/bench.c | 2 + > .../selftests/bpf/benchs/bench_trigger.c | 17 +++++ > .../selftests/bpf/prog_tests/test_lsm.c | 9 +++ > tools/testing/selftests/bpf/progs/lsm.c | 66 ++++++++++++++++++- > .../selftests/bpf/progs/trigger_bench.c | 7 ++ > 5 files changed, 99 insertions(+), 2 deletions(-) > [...] > diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c > index b4598d4bc4f7..49fa6ca99755 100644 > --- a/tools/testing/selftests/bpf/progs/lsm.c > +++ b/tools/testing/selftests/bpf/progs/lsm.c > @@ -9,16 +9,41 @@ > #include <bpf/bpf_tracing.h> > #include <errno.h> > > +struct { > + __uint(type, BPF_MAP_TYPE_ARRAY); > + __uint(max_entries, 1); > + __type(key, __u32); > + __type(value, __u64); > +} array SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_HASH); > + __uint(max_entries, 1); > + __type(key, __u32); > + __type(value, __u64); > +} hash SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_LRU_HASH); > + __uint(max_entries, 1); > + __type(key, __u32); > + __type(value, __u64); > +} lru_hash SEC(".maps"); > + > char _license[] SEC("license") = "GPL"; > > int monitored_pid = 0; > int mprotect_count = 0; > int bprm_count = 0; > > -SEC("lsm/file_mprotect") > +SEC("lsm.s/file_mprotect") When running selftest, I hit the following kernel warning: [ 250.871267] ============================================ [ 250.871902] WARNING: possible recursive locking detected [ 250.872561] 5.9.0-rc1+ #830 Not tainted [ 250.873166] -------------------------------------------- [ 250.873991] true/2053 is trying to acquire lock: [ 250.874715] ffff8fc1f9cd2068 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90 [ 250.875943] [ 250.875943] but task is already holding lock: [ 250.876688] ffff8fc1f9cd2068 (&mm->mmap_lock#2){++++}-{3:3}, at: do_mprotect_pkey+0xb5/0x2f0 [ 250.877978] [ 250.877978] other info that might help us debug this: [ 250.878797] Possible unsafe locking scenario: [ 250.878797] [ 250.879708] CPU0 [ 250.880095] ---- [ 250.880482] lock(&mm->mmap_lock#2); [ 250.881063] lock(&mm->mmap_lock#2); [ 250.881645] [ 250.881645] *** DEADLOCK *** [ 250.881645] [ 250.882559] May be due to missing lock nesting notation [ 250.882559] [ 250.883613] 2 locks held by true/2053: [ 250.884194] #0: ffff8fc1f9cd2068 (&mm->mmap_lock#2){++++}-{3:3}, at: do_mprotect_pkey+0xb5/0x2f0 [ 250.885558] #1: ffffffffbc47b8a0 (rcu_read_lock_trace){....}-{0:0}, at: __bpf_prog_enter_sleepable+0x0/0x40 [ 250.887062] [ 250.887062] stack backtrace: [ 250.887583] CPU: 1 PID: 2053 Comm: true Not tainted 5.9.0-rc1+ #830 [ 250.888546] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.el7.centos 04/01/2014 [ 250.889896] Call Trace: [ 250.890222] dump_stack+0x78/0xa0 [ 250.890644] __lock_acquire.cold.74+0x209/0x2e3 [ 250.891350] lock_acquire+0xba/0x380 [ 250.891919] ? __might_fault+0x3e/0x90 [ 250.892510] ? __lock_acquire+0x639/0x20c0 [ 250.893150] __might_fault+0x68/0x90 [ 250.893717] ? __might_fault+0x3e/0x90 [ 250.894325] _copy_from_user+0x1e/0xa0 [ 250.894946] bpf_copy_from_user+0x22/0x50 [ 250.895581] bpf_prog_3717002769f30998_test_int_hook+0x76/0x60c [ 250.896446] ? __bpf_prog_enter_sleepable+0x3c/0x40 [ 250.897207] ? __bpf_prog_exit+0xa0/0xa0 [ 250.897819] bpf_trampoline_18669+0x29/0x1000 [ 250.898476] bpf_lsm_file_mprotect+0x5/0x10 [ 250.899133] security_file_mprotect+0x32/0x50 [ 250.899816] do_mprotect_pkey+0x18a/0x2f0 [ 250.900472] __x64_sys_mprotect+0x1b/0x20 [ 250.901107] do_syscall_64+0x33/0x40 [ 250.901670] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 250.902450] RIP: 0033:0x7fd95c141ef7 [ 250.903014] Code: ff 66 90 b8 0b 00 00 00 0f 05 48 3d 01 f0 ff ff 73 01 c3 48 8d 0d 21 c2 2 0 00 f7 d8 89 01 48 83 c8 ff c3 b8 0a 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8d 0d 01 c2 20 00 f7 d8 89 01 48 83 [ 250.905732] RSP: 002b:00007ffd4c291fe8 EFLAGS: 00000246 ORIG_RAX: 000000000000000a [ 250.906773] RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fd95c141ef7 [ 250.907866] RDX: 0000000000000000 RSI: 00000000001ff000 RDI: 00007fd95bf20000 [ 250.908906] RBP: 00007ffd4c292320 R08: 0000000000000000 R09: 0000000000000000 [ 250.909915] R10: 00007ffd4c291ff0 R11: 0000000000000246 R12: 00007fd95c341000 [ 250.910919] R13: 00007ffd4c292408 R14: 0000000000000002 R15: 0000000000000801 Could this be an real issue here? do_mprotect_pkey() gets a lock of current->mm->mmap_lock before calling security_file_mprotect(bpf_lsm_file_mprotect). Later on, when do _copy_to_user(), page fault may happen and current->mm->mmap_lock might be acquired again and may have a deadlock here? > int BPF_PROG(test_int_hook, struct vm_area_struct *vma, > unsigned long reqprot, unsigned long prot, int ret) > { > + char args[64]; > + __u32 key = 0; > + __u64 *value; > + > if (ret != 0) > return ret; > > @@ -28,6 +53,18 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, > is_stack = (vma->vm_start <= vma->vm_mm->start_stack && > vma->vm_end >= vma->vm_mm->start_stack); > > + bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start); > + > + value = bpf_map_lookup_elem(&array, &key); > + if (value) > + *value = 0; > + value = bpf_map_lookup_elem(&hash, &key); > + if (value) > + *value = 0; > + value = bpf_map_lookup_elem(&lru_hash, &key); > + if (value) > + *value = 0; > + > if (is_stack && monitored_pid == pid) { > mprotect_count++; > ret = -EPERM; > @@ -36,7 +73,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, > return ret; > } > [...]
On 8/29/20 5:22 PM, Yonghong Song wrote: >> LOCKDEP=y >> DEBUG_ATOMIC_SLEEP=y >> LOCK_DEBUGGING_SUPPORT=y >> KASAN=y >> in my .config and don't see it :( >> Could pls send me your .config? > > The config file is attached. In my environment, the warning is only > printed out during the first run of `./test_progs -t lsm`. All later > runs did not have this warning. Thanks. Turned out LOCKDEP=y and DEBUG_ATOMIC_SLEEP=y wasn't enough. My config was missing PROVE_LOCKING=y.
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 944ad4721c83..1a427685a8a8 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -317,6 +317,7 @@ extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -338,6 +339,7 @@ static const struct bench *benchs[] = { &bench_trig_rawtp, &bench_trig_kprobe, &bench_trig_fentry, + &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_rb_libbpf, &bench_rb_custom, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 49c22832f216..2a0b6c9885a4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -90,6 +90,12 @@ static void trigger_fentry_setup() attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fentry_sleep_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); +} + static void trigger_fmodret_setup() { setup_ctx(); @@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_fentry_sleep = { + .name = "trig-fentry-sleep", + .validate = trigger_validate, + .setup = trigger_fentry_sleep_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fmodret = { .name = "trig-fmodret", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index b17eb2045c1d..6ab29226c99b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,6 +10,7 @@ #include <unistd.h> #include <malloc.h> #include <stdlib.h> +#include <unistd.h> #include "lsm.skel.h" @@ -55,6 +56,7 @@ void test_test_lsm(void) { struct lsm *skel = NULL; int err, duration = 0; + int buf = 1234; skel = lsm__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -81,6 +83,13 @@ void test_test_lsm(void) CHECK(skel->bss->mprotect_count != 1, "mprotect_count", "mprotect_count = %d\n", skel->bss->mprotect_count); + syscall(__NR_setdomainname, &buf, -2L); + syscall(__NR_setdomainname, 0, -3L); + syscall(__NR_setdomainname, ~0L, -4L); + + CHECK(skel->bss->copy_test != 3, "copy_test", + "copy_test = %d\n", skel->bss->copy_test); + close_prog: lsm__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index b4598d4bc4f7..49fa6ca99755 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -9,16 +9,41 @@ #include <bpf/bpf_tracing.h> #include <errno.h> +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_hash SEC(".maps"); + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; int mprotect_count = 0; int bprm_count = 0; -SEC("lsm/file_mprotect") +SEC("lsm.s/file_mprotect") int BPF_PROG(test_int_hook, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot, int ret) { + char args[64]; + __u32 key = 0; + __u64 *value; + if (ret != 0) return ret; @@ -28,6 +53,18 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, is_stack = (vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack); + bpf_copy_from_user(args, sizeof(args), (void *)vma->vm_mm->arg_start); + + value = bpf_map_lookup_elem(&array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_hash, &key); + if (value) + *value = 0; + if (is_stack && monitored_pid == pid) { mprotect_count++; ret = -EPERM; @@ -36,7 +73,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, return ret; } -SEC("lsm/bprm_committed_creds") +SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; @@ -46,3 +83,28 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm) return 0; } +SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */ +int BPF_PROG(test_task_free, struct task_struct *task) +{ + return 0; +} + +int copy_test = 0; + +SEC("fentry.s/__x64_sys_setdomainname") +int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) +{ + void *ptr = (void *)PT_REGS_PARM1(regs); + int len = PT_REGS_PARM2(regs); + int buf = 0; + long ret; + + ret = bpf_copy_from_user(&buf, sizeof(buf), ptr); + if (len == -2 && ret == 0 && buf == 1234) + copy_test++; + if (len == -3 && ret == -EFAULT) + copy_test++; + if (len == -4 && ret == -EFAULT) + copy_test++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 8b36b6640e7e..9a4d09590b3d 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("fentry.s/__x64_sys_getpgid") +int bench_trigger_fentry_sleep(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fmod_ret/__x64_sys_getpgid") int bench_trigger_fmodret(void *ctx) {