diff mbox series

[bpf,v3,2/2] bpf, sockmap: sk_prot needs inuse_idx set for proc stats

Message ID 20210706163150.112591-3-john.fastabend@gmail.com
State New
Headers show
Series potential sockmap memleak and proc stats fix | expand

Commit Message

John Fastabend July 6, 2021, 4:31 p.m. UTC
Proc socket stats use sk_prot->inuse_idx value to record inuse sock stats.
We currently do not set this correctly from sockmap side. The result is
reading sock stats '/proc/net/sockstat' gives incorrect values. The
socket counter is incremented correctly, but because we don't set the
counter correctly when we replace sk_prot we may omit the decrement.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 net/core/sock_map.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Comments

Cong Wang July 8, 2021, 7:42 p.m. UTC | #1
On Tue, Jul 6, 2021 at 9:31 AM John Fastabend <john.fastabend@gmail.com> wrote:
>

> Proc socket stats use sk_prot->inuse_idx value to record inuse sock stats.

> We currently do not set this correctly from sockmap side. The result is

> reading sock stats '/proc/net/sockstat' gives incorrect values. The

> socket counter is incremented correctly, but because we don't set the

> counter correctly when we replace sk_prot we may omit the decrement.

>

> Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")

> Signed-off-by: John Fastabend <john.fastabend@gmail.com>

> ---

>  net/core/sock_map.c | 11 ++++++++++-

>  1 file changed, 10 insertions(+), 1 deletion(-)

>

> diff --git a/net/core/sock_map.c b/net/core/sock_map.c

> index 60decd6420ca..27bdf768aa8c 100644

> --- a/net/core/sock_map.c

> +++ b/net/core/sock_map.c

> @@ -185,10 +185,19 @@ static void sock_map_unref(struct sock *sk, void *link_raw)

>

>  static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)

>  {

> +       int err;

> +#ifdef CONFIG_PROC_FS

> +       int idx = sk->sk_prot->inuse_idx;

> +#endif


A nit: Reverse XMAS tree declaration style is preferred for networking
subsystem.

Thanks.
Jakub Sitnicki July 12, 2021, 7:22 a.m. UTC | #2
On Tue, Jul 06, 2021 at 06:31 PM CEST, John Fastabend wrote:
> Proc socket stats use sk_prot->inuse_idx value to record inuse sock stats.

> We currently do not set this correctly from sockmap side. The result is

> reading sock stats '/proc/net/sockstat' gives incorrect values. The

> socket counter is incremented correctly, but because we don't set the

> counter correctly when we replace sk_prot we may omit the decrement.

>

> Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")

> Signed-off-by: John Fastabend <john.fastabend@gmail.com>

> ---

>  net/core/sock_map.c | 11 ++++++++++-

>  1 file changed, 10 insertions(+), 1 deletion(-)

>

> diff --git a/net/core/sock_map.c b/net/core/sock_map.c

> index 60decd6420ca..27bdf768aa8c 100644

> --- a/net/core/sock_map.c

> +++ b/net/core/sock_map.c

> @@ -185,10 +185,19 @@ static void sock_map_unref(struct sock *sk, void *link_raw)

>

>  static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)

>  {

> +	int err;

> +#ifdef CONFIG_PROC_FS

> +	int idx = sk->sk_prot->inuse_idx;

> +#endif

>  	if (!sk->sk_prot->psock_update_sk_prot)

>  		return -EINVAL;

>  	psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;

> -	return sk->sk_prot->psock_update_sk_prot(sk, psock, false);

> +	err = sk->sk_prot->psock_update_sk_prot(sk, psock, false);

> +#ifdef CONFIG_PROC_FS

> +	if (!err)

> +		sk->sk_prot->inuse_idx = idx;

> +#endif

> +	return err;

>  }

>

>  static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)


We could initialize inuse_idx just once in {tcp,udp}_bpf_rebuild_protos,
if we changed {tcp,udp}_bpf_v4_build_proto to be a late_initcall, so
that it runs after inet_init when {tcp,udp}_prot and udp_prot are
already registered and have inuse_idx assigned.
John Fastabend July 12, 2021, 5:17 p.m. UTC | #3
Jakub Sitnicki wrote:
> On Tue, Jul 06, 2021 at 06:31 PM CEST, John Fastabend wrote:

> > Proc socket stats use sk_prot->inuse_idx value to record inuse sock stats.

> > We currently do not set this correctly from sockmap side. The result is

> > reading sock stats '/proc/net/sockstat' gives incorrect values. The

> > socket counter is incremented correctly, but because we don't set the

> > counter correctly when we replace sk_prot we may omit the decrement.

> >

> > Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")

> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>

> > ---

> >  net/core/sock_map.c | 11 ++++++++++-

> >  1 file changed, 10 insertions(+), 1 deletion(-)

> >

> > diff --git a/net/core/sock_map.c b/net/core/sock_map.c

> > index 60decd6420ca..27bdf768aa8c 100644

> > --- a/net/core/sock_map.c

> > +++ b/net/core/sock_map.c

> > @@ -185,10 +185,19 @@ static void sock_map_unref(struct sock *sk, void *link_raw)

> >

> >  static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)

> >  {

> > +	int err;

> > +#ifdef CONFIG_PROC_FS

> > +	int idx = sk->sk_prot->inuse_idx;

> > +#endif

> >  	if (!sk->sk_prot->psock_update_sk_prot)

> >  		return -EINVAL;

> >  	psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;

> > -	return sk->sk_prot->psock_update_sk_prot(sk, psock, false);

> > +	err = sk->sk_prot->psock_update_sk_prot(sk, psock, false);

> > +#ifdef CONFIG_PROC_FS

> > +	if (!err)

> > +		sk->sk_prot->inuse_idx = idx;

> > +#endif

> > +	return err;

> >  }

> >

> >  static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)

> 

> We could initialize inuse_idx just once in {tcp,udp}_bpf_rebuild_protos,

> if we changed {tcp,udp}_bpf_v4_build_proto to be a late_initcall, so

> that it runs after inet_init when {tcp,udp}_prot and udp_prot are

> already registered and have inuse_idx assigned.


OK does seem slightly nicer. Then I guess the diff is just,

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index f26916a62f25..d3e9386b493e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -503,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void)
        tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
        return 0;
 }
-core_initcall(tcp_bpf_v4_build_proto);
+late_initcall(tcp_bpf_v4_build_proto);
 
 static int tcp_bpf_assert_proto_ops(struct proto *ops)
 {
diff mbox series

Patch

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 60decd6420ca..27bdf768aa8c 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -185,10 +185,19 @@  static void sock_map_unref(struct sock *sk, void *link_raw)
 
 static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
 {
+	int err;
+#ifdef CONFIG_PROC_FS
+	int idx = sk->sk_prot->inuse_idx;
+#endif
 	if (!sk->sk_prot->psock_update_sk_prot)
 		return -EINVAL;
 	psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
-	return sk->sk_prot->psock_update_sk_prot(sk, psock, false);
+	err = sk->sk_prot->psock_update_sk_prot(sk, psock, false);
+#ifdef CONFIG_PROC_FS
+	if (!err)
+		sk->sk_prot->inuse_idx = idx;
+#endif
+	return err;
 }
 
 static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)