Message ID | d4150caecdbef4205178753772e3bc301e908355.1600967205.git.daniel@iogearbox.net |
---|---|
State | New |
Headers | show |
Series | [bpf-next,1/6] bpf: add classid helper only based on skb->sk | expand |
On 9/24/20 8:21 PM, Daniel Borkmann wrote: > With its use in BPF the cookie generator can be called very frequently > in particular when used out of cgroup v2 hooks (e.g. connect / sendmsg) > and attached to the root cgroup, for example, when used in v1/v2 mixed > environments. In particular when there's a high churn on sockets in the > system there can be many parallel requests to the bpf_get_socket_cookie() > and bpf_get_netns_cookie() helpers which then cause contention on the > shared atomic counter. As similarly done in f991bd2e1421 ("fs: introduce > a per-cpu last_ino allocator"), add a small helper library that both can > then use for the 64 bit counters. > > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> > --- > include/linux/cookie.h | 41 ++++++++++++++++++++++++++++++++++++++++ > net/core/net_namespace.c | 5 +++-- > net/core/sock_diag.c | 7 ++++--- > 3 files changed, 48 insertions(+), 5 deletions(-) > create mode 100644 include/linux/cookie.h > > diff --git a/include/linux/cookie.h b/include/linux/cookie.h > new file mode 100644 > index 000000000000..2488203dc004 > --- /dev/null > +++ b/include/linux/cookie.h > @@ -0,0 +1,41 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __LINUX_COOKIE_H > +#define __LINUX_COOKIE_H > + > +#include <linux/atomic.h> > +#include <linux/percpu.h> > + > +struct gen_cookie { > + u64 __percpu *local_last; > + atomic64_t shared_last ____cacheline_aligned_in_smp; > +}; > + > +#define COOKIE_LOCAL_BATCH 4096 > + > +#define DEFINE_COOKIE(name) \ > + static DEFINE_PER_CPU(u64, __##name); \ > + static struct gen_cookie name = { \ > + .local_last = &__##name, \ > + .shared_last = ATOMIC64_INIT(0), \ > + } > + > +static inline u64 gen_cookie_next(struct gen_cookie *gc) > +{ > + u64 *local_last = &get_cpu_var(*gc->local_last); > + u64 val = *local_last; > + > + if (__is_defined(CONFIG_SMP) && > + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { > + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, > + &gc->shared_last); > + val = next - COOKIE_LOCAL_BATCH; > + } > + val++; > + if (unlikely(!val)) > + val++; > + *local_last = val; > + put_cpu_var(local_last); > + return val; This is not interrupt safe. I think sock_gen_cookie() can be called from interrupt context. get_next_ino() is only called from process context, that is what I used get_cpu_var() and put_cpu_var()
On 9/24/20 8:58 PM, Eric Dumazet wrote: > On 9/24/20 8:21 PM, Daniel Borkmann wrote: [...] >> diff --git a/include/linux/cookie.h b/include/linux/cookie.h >> new file mode 100644 >> index 000000000000..2488203dc004 >> --- /dev/null >> +++ b/include/linux/cookie.h >> @@ -0,0 +1,41 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef __LINUX_COOKIE_H >> +#define __LINUX_COOKIE_H >> + >> +#include <linux/atomic.h> >> +#include <linux/percpu.h> >> + >> +struct gen_cookie { >> + u64 __percpu *local_last; >> + atomic64_t shared_last ____cacheline_aligned_in_smp; >> +}; >> + >> +#define COOKIE_LOCAL_BATCH 4096 >> + >> +#define DEFINE_COOKIE(name) \ >> + static DEFINE_PER_CPU(u64, __##name); \ >> + static struct gen_cookie name = { \ >> + .local_last = &__##name, \ >> + .shared_last = ATOMIC64_INIT(0), \ >> + } >> + >> +static inline u64 gen_cookie_next(struct gen_cookie *gc) >> +{ >> + u64 *local_last = &get_cpu_var(*gc->local_last); >> + u64 val = *local_last; >> + >> + if (__is_defined(CONFIG_SMP) && >> + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { >> + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, >> + &gc->shared_last); >> + val = next - COOKIE_LOCAL_BATCH; >> + } >> + val++; >> + if (unlikely(!val)) >> + val++; >> + *local_last = val; >> + put_cpu_var(local_last); >> + return val; > > This is not interrupt safe. > > I think sock_gen_cookie() can be called from interrupt context. > > get_next_ino() is only called from process context, that is what I used get_cpu_var() > and put_cpu_var() Hmm, agree, good point. Need to experiment a bit more .. initial thinking potentially something like the below could do where we fall back to atomic counter iff we encounter nesting (which should be an extremely rare case normally). BPF progs where this can be called from are non-preemptible, so we could actually move the temp preempt_disable/enable() from get/put_cpu_var() into a wrapper func for slow path non-BPF users as well. static inline u64 gen_cookie_next(struct gen_cookie *gc) { u64 val; if (likely(this_cpu_inc_return(*gc->level_nesting) == 1)) { u64 *local_last = this_cpu_ptr(gc->local_last); val = *local_last; if (__is_defined(CONFIG_SMP) && unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, &gc->shared_last); val = next - COOKIE_LOCAL_BATCH; } val++; if (unlikely(!val)) val++; *local_last = val; } else { val = atomic64_add_return(COOKIE_LOCAL_BATCH, &gc->shared_last); } this_cpu_dec(*gc->level_nesting); return val; } Thanks, Daniel
On 9/25/20 12:03 AM, Daniel Borkmann wrote: > On 9/24/20 8:58 PM, Eric Dumazet wrote: >> On 9/24/20 8:21 PM, Daniel Borkmann wrote: > [...] >>> diff --git a/include/linux/cookie.h b/include/linux/cookie.h >>> new file mode 100644 >>> index 000000000000..2488203dc004 >>> --- /dev/null >>> +++ b/include/linux/cookie.h >>> @@ -0,0 +1,41 @@ >>> +/* SPDX-License-Identifier: GPL-2.0 */ >>> +#ifndef __LINUX_COOKIE_H >>> +#define __LINUX_COOKIE_H >>> + >>> +#include <linux/atomic.h> >>> +#include <linux/percpu.h> >>> + >>> +struct gen_cookie { >>> + u64 __percpu *local_last; >>> + atomic64_t shared_last ____cacheline_aligned_in_smp; >>> +}; >>> + >>> +#define COOKIE_LOCAL_BATCH 4096 >>> + >>> +#define DEFINE_COOKIE(name) \ >>> + static DEFINE_PER_CPU(u64, __##name); \ >>> + static struct gen_cookie name = { \ >>> + .local_last = &__##name, \ >>> + .shared_last = ATOMIC64_INIT(0), \ >>> + } >>> + >>> +static inline u64 gen_cookie_next(struct gen_cookie *gc) >>> +{ >>> + u64 *local_last = &get_cpu_var(*gc->local_last); >>> + u64 val = *local_last; >>> + >>> + if (__is_defined(CONFIG_SMP) && >>> + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { >>> + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, >>> + &gc->shared_last); >>> + val = next - COOKIE_LOCAL_BATCH; >>> + } >>> + val++; >>> + if (unlikely(!val)) >>> + val++; >>> + *local_last = val; >>> + put_cpu_var(local_last); >>> + return val; >> >> This is not interrupt safe. >> >> I think sock_gen_cookie() can be called from interrupt context. >> >> get_next_ino() is only called from process context, that is what I used get_cpu_var() >> and put_cpu_var() > > Hmm, agree, good point. Need to experiment a bit more .. initial thinking > potentially something like the below could do where we fall back to atomic > counter iff we encounter nesting (which should be an extremely rare case > normally). > > BPF progs where this can be called from are non-preemptible, so we could > actually move the temp preempt_disable/enable() from get/put_cpu_var() into > a wrapper func for slow path non-BPF users as well. > > static inline u64 gen_cookie_next(struct gen_cookie *gc) > { > u64 val; > I presume you would use a single structure to hold level_nesting and local_last in the same cache line. struct pcpu_gen_cookie { int level_nesting; u64 local_last; } __aligned(16); > if (likely(this_cpu_inc_return(*gc->level_nesting) == 1)) { > u64 *local_last = this_cpu_ptr(gc->local_last); > > val = *local_last; > if (__is_defined(CONFIG_SMP) && > unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { > s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, > &gc->shared_last); > val = next - COOKIE_LOCAL_BATCH; > } > val++; > if (unlikely(!val)) > val++; Note that we really expect this wrapping will never happen, with 64bit value. (We had to take care of the wrapping in get_next_ino() as it was dealing with 32bit values) > *local_last = val; > } else { > val = atomic64_add_return(COOKIE_LOCAL_BATCH, > &gc->shared_last); Or val = atomic64_dec_return(&reverse_counter) With reverse_counter initial value set to ATOMIC64_INIT(0) ? This will start sending 'big cookies like 0xFFFFFFFFxxxxxxxx' to make sure applications are not breaking with them, after few months of uptime. This would also not consume COOKIE_LOCAL_BATCH units per value, but this seems minor based on the available space. > } > this_cpu_dec(*gc->level_nesting); > return val; > } > > Thanks, > Daniel
On 9/25/20 9:49 AM, Eric Dumazet wrote: > On 9/25/20 12:03 AM, Daniel Borkmann wrote: >> On 9/24/20 8:58 PM, Eric Dumazet wrote: >>> On 9/24/20 8:21 PM, Daniel Borkmann wrote: >> [...] >>>> diff --git a/include/linux/cookie.h b/include/linux/cookie.h >>>> new file mode 100644 >>>> index 000000000000..2488203dc004 >>>> --- /dev/null >>>> +++ b/include/linux/cookie.h >>>> @@ -0,0 +1,41 @@ >>>> +/* SPDX-License-Identifier: GPL-2.0 */ >>>> +#ifndef __LINUX_COOKIE_H >>>> +#define __LINUX_COOKIE_H >>>> + >>>> +#include <linux/atomic.h> >>>> +#include <linux/percpu.h> >>>> + >>>> +struct gen_cookie { >>>> + u64 __percpu *local_last; >>>> + atomic64_t shared_last ____cacheline_aligned_in_smp; >>>> +}; >>>> + >>>> +#define COOKIE_LOCAL_BATCH 4096 >>>> + >>>> +#define DEFINE_COOKIE(name) \ >>>> + static DEFINE_PER_CPU(u64, __##name); \ >>>> + static struct gen_cookie name = { \ >>>> + .local_last = &__##name, \ >>>> + .shared_last = ATOMIC64_INIT(0), \ >>>> + } >>>> + >>>> +static inline u64 gen_cookie_next(struct gen_cookie *gc) >>>> +{ >>>> + u64 *local_last = &get_cpu_var(*gc->local_last); >>>> + u64 val = *local_last; >>>> + >>>> + if (__is_defined(CONFIG_SMP) && >>>> + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { >>>> + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, >>>> + &gc->shared_last); >>>> + val = next - COOKIE_LOCAL_BATCH; >>>> + } >>>> + val++; >>>> + if (unlikely(!val)) >>>> + val++; >>>> + *local_last = val; >>>> + put_cpu_var(local_last); >>>> + return val; >>> >>> This is not interrupt safe. >>> >>> I think sock_gen_cookie() can be called from interrupt context. >>> >>> get_next_ino() is only called from process context, that is what I used get_cpu_var() >>> and put_cpu_var() >> >> Hmm, agree, good point. Need to experiment a bit more .. initial thinking >> potentially something like the below could do where we fall back to atomic >> counter iff we encounter nesting (which should be an extremely rare case >> normally). >> >> BPF progs where this can be called from are non-preemptible, so we could >> actually move the temp preempt_disable/enable() from get/put_cpu_var() into >> a wrapper func for slow path non-BPF users as well. >> >> static inline u64 gen_cookie_next(struct gen_cookie *gc) >> { >> u64 val; > > I presume you would use a single structure to hold level_nesting and local_last > in the same cache line. > > struct pcpu_gen_cookie { > int level_nesting; > u64 local_last; > } __aligned(16); Yes. >> if (likely(this_cpu_inc_return(*gc->level_nesting) == 1)) { >> u64 *local_last = this_cpu_ptr(gc->local_last); >> >> val = *local_last; >> if (__is_defined(CONFIG_SMP) && >> unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { >> s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, >> &gc->shared_last); >> val = next - COOKIE_LOCAL_BATCH; >> } >> val++; > >> if (unlikely(!val)) >> val++; > > Note that we really expect this wrapping will never happen, with 64bit value. > (We had to take care of the wrapping in get_next_ino() as it was dealing with 32bit values) Agree, all local counters will start off at 0, but we inc right after the batch and thus never run into it anyway and neither via overflow. Will remove. >> *local_last = val; >> } else { >> val = atomic64_add_return(COOKIE_LOCAL_BATCH, >> &gc->shared_last); > > Or val = atomic64_dec_return(&reverse_counter) > > With reverse_counter initial value set to ATOMIC64_INIT(0) ? > > This will start sending 'big cookies like 0xFFFFFFFFxxxxxxxx' to make sure applications > are not breaking with them, after few months of uptime. > > This would also not consume COOKIE_LOCAL_BATCH units per value, > but this seems minor based on the available space. Excellent idea, I like it given it doesn't waste COOKIE_LOCAL_BATCH space. Thanks for the feedback! >> } >> this_cpu_dec(*gc->level_nesting); >> return val; >> } >> >> Thanks, >> Daniel
On Fri, 25 Sep 2020 00:03:14 +0200 Daniel Borkmann wrote: > static inline u64 gen_cookie_next(struct gen_cookie *gc) > { > u64 val; > > if (likely(this_cpu_inc_return(*gc->level_nesting) == 1)) { Is this_cpu_inc() in itself atomic? Is there a comparison of performance of various atomic ops and locking somewhere? I wonder how this scheme would compare to a using a cmpxchg. > u64 *local_last = this_cpu_ptr(gc->local_last); > > val = *local_last; > if (__is_defined(CONFIG_SMP) && > unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { Can we reasonably assume we won't have more than 4k CPUs and just statically divide this space by encoding CPU id in top bits? > s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, > &gc->shared_last); > val = next - COOKIE_LOCAL_BATCH; > } > val++; > if (unlikely(!val)) > val++; > *local_last = val; > } else { > val = atomic64_add_return(COOKIE_LOCAL_BATCH, > &gc->shared_last); > } > this_cpu_dec(*gc->level_nesting); > return val; > }
On 9/25/20 5:00 PM, Jakub Kicinski wrote: unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { > > Can we reasonably assume we won't have more than 4k CPUs and just > statically divide this space by encoding CPU id in top bits? This might give some food to side channel attacks, since this would give an indication of cpu that allocated the id. Also, I hear that some distros enabled 8K cpus.
On Fri, 25 Sep 2020 17:15:17 +0200 Eric Dumazet wrote: > On 9/25/20 5:00 PM, Jakub Kicinski wrote: > > Is this_cpu_inc() in itself atomic? To answer my own question - it is :) > > unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) > > > > Can we reasonably assume we won't have more than 4k CPUs and just > > statically divide this space by encoding CPU id in top bits? > > This might give some food to side channel attacks, since this would > give an indication of cpu that allocated the id. > > Also, I hear that some distros enabled 8K cpus. Ok :(
On 9/25/20 5:31 PM, Jakub Kicinski wrote: > On Fri, 25 Sep 2020 17:15:17 +0200 Eric Dumazet wrote: >> On 9/25/20 5:00 PM, Jakub Kicinski wrote: >>> Is this_cpu_inc() in itself atomic? > > To answer my own question - it is :) > >>> unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) >>> >>> Can we reasonably assume we won't have more than 4k CPUs and just >>> statically divide this space by encoding CPU id in top bits? >> >> This might give some food to side channel attacks, since this would >> give an indication of cpu that allocated the id. >> >> Also, I hear that some distros enabled 8K cpus. > > Ok :( > I was not really serious about the side channel attacks, just some thought about possible implications :) Even with 8192 max cpus, splitting space into 2^(64-13) blocks would be fine I think.
diff --git a/include/linux/cookie.h b/include/linux/cookie.h new file mode 100644 index 000000000000..2488203dc004 --- /dev/null +++ b/include/linux/cookie.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COOKIE_H +#define __LINUX_COOKIE_H + +#include <linux/atomic.h> +#include <linux/percpu.h> + +struct gen_cookie { + u64 __percpu *local_last; + atomic64_t shared_last ____cacheline_aligned_in_smp; +}; + +#define COOKIE_LOCAL_BATCH 4096 + +#define DEFINE_COOKIE(name) \ + static DEFINE_PER_CPU(u64, __##name); \ + static struct gen_cookie name = { \ + .local_last = &__##name, \ + .shared_last = ATOMIC64_INIT(0), \ + } + +static inline u64 gen_cookie_next(struct gen_cookie *gc) +{ + u64 *local_last = &get_cpu_var(*gc->local_last); + u64 val = *local_last; + + if (__is_defined(CONFIG_SMP) && + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, + &gc->shared_last); + val = next - COOKIE_LOCAL_BATCH; + } + val++; + if (unlikely(!val)) + val++; + *local_last = val; + put_cpu_var(local_last); + return val; +} + +#endif /* __LINUX_COOKIE_H */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcd61aca343e..cf29ed8950d1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -19,6 +19,7 @@ #include <linux/net_namespace.h> #include <linux/sched/task.h> #include <linux/uidgid.h> +#include <linux/cookie.h> #include <net/sock.h> #include <net/netlink.h> @@ -69,7 +70,7 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem); static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; -static atomic64_t cookie_gen; +DEFINE_COOKIE(net_cookie); u64 net_gen_cookie(struct net *net) { @@ -78,7 +79,7 @@ u64 net_gen_cookie(struct net *net) if (res) return res; - res = atomic64_inc_return(&cookie_gen); + res = gen_cookie_next(&net_cookie); atomic64_cmpxchg(&net->net_cookie, 0, res); } } diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index c13ffbd33d8d..4a4180e8dd35 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -11,7 +11,7 @@ #include <linux/tcp.h> #include <linux/workqueue.h> #include <linux/nospec.h> - +#include <linux/cookie.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> @@ -19,7 +19,8 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; -static atomic64_t cookie_gen; + +DEFINE_COOKIE(sock_cookie); u64 sock_gen_cookie(struct sock *sk) { @@ -28,7 +29,7 @@ u64 sock_gen_cookie(struct sock *sk) if (res) return res; - res = atomic64_inc_return(&cookie_gen); + res = gen_cookie_next(&sock_cookie); atomic64_cmpxchg(&sk->sk_cookie, 0, res); } }
With its use in BPF the cookie generator can be called very frequently in particular when used out of cgroup v2 hooks (e.g. connect / sendmsg) and attached to the root cgroup, for example, when used in v1/v2 mixed environments. In particular when there's a high churn on sockets in the system there can be many parallel requests to the bpf_get_socket_cookie() and bpf_get_netns_cookie() helpers which then cause contention on the shared atomic counter. As similarly done in f991bd2e1421 ("fs: introduce a per-cpu last_ino allocator"), add a small helper library that both can then use for the 64 bit counters. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> --- include/linux/cookie.h | 41 ++++++++++++++++++++++++++++++++++++++++ net/core/net_namespace.c | 5 +++-- net/core/sock_diag.c | 7 ++++--- 3 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 include/linux/cookie.h