Message ID | 20210804154712.929986-2-desmondcheongzx@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | Bluetooth: fix locking and socket killing in SCO and RFCOMM | expand |
Hi Desmond, On Wed, Aug 4, 2021 at 8:48 AM Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> wrote: > > struct sock.sk_timer should be used as a sock cleanup timer. However, > SCO uses it to implement sock timeouts. > > This causes issues because struct sock.sk_timer's callback is run in > an IRQ context, and the timer callback function sco_sock_timeout takes > a spin lock on the socket. However, other functions such as > sco_conn_del and sco_conn_ready take the spin lock with interrupts > enabled. > > This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could > lead to deadlocks as reported by Syzbot [1]: > CPU0 > ---- > lock(slock-AF_BLUETOOTH-BTPROTO_SCO); > <Interrupt> > lock(slock-AF_BLUETOOTH-BTPROTO_SCO); > > To fix this, we use delayed work to implement SCO sock timouts > instead. This allows us to avoid taking the spin lock on the socket in > an IRQ context, and corrects the misuse of struct sock.sk_timer. > > As a note, cancel_delayed_work is used instead of > cancel_delayed_work_sync in sco_sock_set_timer and > sco_sock_clear_timer to avoid a deadlock. In the future, the call to > bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to > synchronize with other functions using lock_sock. However, since > sco_sock_set_timer and sco_sock_clear_timer are sometimes called under > the locked socket (in sco_connect and __sco_sock_close), > cancel_delayed_work_sync might cause them to sleep until an > sco_sock_timeout that has started finishes running. But > sco_sock_timeout would also sleep until it can grab the lock_sock. > > Using cancel_delayed_work is fine because sco_sock_timeout does not > change from run to run, hence there is no functional difference > between: > 1. waiting for a timeout to finish running before scheduling another > timeout > 2. scheduling another timeout while a timeout is running. > > Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b31e [1] > Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com > Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com > Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> > --- > net/bluetooth/sco.c | 41 +++++++++++++++++++++++++++++++++++------ > 1 file changed, 35 insertions(+), 6 deletions(-) > > diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c > index ffa2a77a3e4c..89cb987ca9eb 100644 > --- a/net/bluetooth/sco.c > +++ b/net/bluetooth/sco.c > @@ -48,6 +48,8 @@ struct sco_conn { > spinlock_t lock; > struct sock *sk; > > + struct delayed_work timeout_work; > + > unsigned int mtu; > }; > > @@ -74,9 +76,20 @@ struct sco_pinfo { > #define SCO_CONN_TIMEOUT (HZ * 40) > #define SCO_DISCONN_TIMEOUT (HZ * 2) > > -static void sco_sock_timeout(struct timer_list *t) > +static void sco_sock_timeout(struct work_struct *work) > { > - struct sock *sk = from_timer(sk, t, sk_timer); > + struct sco_conn *conn = container_of(work, struct sco_conn, > + timeout_work.work); > + struct sock *sk; > + > + sco_conn_lock(conn); > + sk = conn->sk; > + if (sk) > + sock_hold(sk); > + sco_conn_unlock(conn); > + > + if (!sk) > + return; > > BT_DBG("sock %p state %d", sk, sk->sk_state); > > @@ -91,14 +104,27 @@ static void sco_sock_timeout(struct timer_list *t) > > static void sco_sock_set_timer(struct sock *sk, long timeout) > { > + struct delayed_work *work; Minor nitpick but I don't think using a dedicated variable here makes much sense. > + if (!sco_pi(sk)->conn) > + return; > + work = &sco_pi(sk)->conn->timeout_work; > + > BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); > - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); > + cancel_delayed_work(work); > + schedule_delayed_work(work, timeout); > } > > static void sco_sock_clear_timer(struct sock *sk) > { > + struct delayed_work *work; Ditto. > + if (!sco_pi(sk)->conn) > + return; > + work = &sco_pi(sk)->conn->timeout_work; > + > BT_DBG("sock %p state %d", sk, sk->sk_state); > - sk_stop_timer(sk, &sk->sk_timer); > + cancel_delayed_work(work); > } > > /* ---- SCO connections ---- */ > @@ -179,6 +205,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err) > bh_unlock_sock(sk); > sco_sock_kill(sk); > sock_put(sk); > + > + /* Ensure no more work items will run before freeing conn. */ > + cancel_delayed_work_sync(&conn->timeout_work); > } > > hcon->sco_data = NULL; > @@ -193,6 +222,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, > sco_pi(sk)->conn = conn; > conn->sk = sk; > > + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); > + > if (parent) > bt_accept_enqueue(parent, sk, true); > } > @@ -500,8 +531,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, > > sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; > > - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); > - > bt_sock_link(&sco_sk_list, sk); > return sk; > } > -- > 2.25.1 >
On 6/8/21 3:06 am, Luiz Augusto von Dentz wrote: > Hi Desmond, > > On Wed, Aug 4, 2021 at 8:48 AM Desmond Cheong Zhi Xi > <desmondcheongzx@gmail.com> wrote: >> >> struct sock.sk_timer should be used as a sock cleanup timer. However, >> SCO uses it to implement sock timeouts. >> >> This causes issues because struct sock.sk_timer's callback is run in >> an IRQ context, and the timer callback function sco_sock_timeout takes >> a spin lock on the socket. However, other functions such as >> sco_conn_del and sco_conn_ready take the spin lock with interrupts >> enabled. >> >> This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could >> lead to deadlocks as reported by Syzbot [1]: >> CPU0 >> ---- >> lock(slock-AF_BLUETOOTH-BTPROTO_SCO); >> <Interrupt> >> lock(slock-AF_BLUETOOTH-BTPROTO_SCO); >> >> To fix this, we use delayed work to implement SCO sock timouts >> instead. This allows us to avoid taking the spin lock on the socket in >> an IRQ context, and corrects the misuse of struct sock.sk_timer. >> >> As a note, cancel_delayed_work is used instead of >> cancel_delayed_work_sync in sco_sock_set_timer and >> sco_sock_clear_timer to avoid a deadlock. In the future, the call to >> bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to >> synchronize with other functions using lock_sock. However, since >> sco_sock_set_timer and sco_sock_clear_timer are sometimes called under >> the locked socket (in sco_connect and __sco_sock_close), >> cancel_delayed_work_sync might cause them to sleep until an >> sco_sock_timeout that has started finishes running. But >> sco_sock_timeout would also sleep until it can grab the lock_sock. >> >> Using cancel_delayed_work is fine because sco_sock_timeout does not >> change from run to run, hence there is no functional difference >> between: >> 1. waiting for a timeout to finish running before scheduling another >> timeout >> 2. scheduling another timeout while a timeout is running. >> >> Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b31e [1] >> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com >> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com >> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> >> --- >> net/bluetooth/sco.c | 41 +++++++++++++++++++++++++++++++++++------ >> 1 file changed, 35 insertions(+), 6 deletions(-) >> >> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c >> index ffa2a77a3e4c..89cb987ca9eb 100644 >> --- a/net/bluetooth/sco.c >> +++ b/net/bluetooth/sco.c >> @@ -48,6 +48,8 @@ struct sco_conn { >> spinlock_t lock; >> struct sock *sk; >> >> + struct delayed_work timeout_work; >> + >> unsigned int mtu; >> }; >> >> @@ -74,9 +76,20 @@ struct sco_pinfo { >> #define SCO_CONN_TIMEOUT (HZ * 40) >> #define SCO_DISCONN_TIMEOUT (HZ * 2) >> >> -static void sco_sock_timeout(struct timer_list *t) >> +static void sco_sock_timeout(struct work_struct *work) >> { >> - struct sock *sk = from_timer(sk, t, sk_timer); >> + struct sco_conn *conn = container_of(work, struct sco_conn, >> + timeout_work.work); >> + struct sock *sk; >> + >> + sco_conn_lock(conn); >> + sk = conn->sk; >> + if (sk) >> + sock_hold(sk); >> + sco_conn_unlock(conn); >> + >> + if (!sk) >> + return; >> >> BT_DBG("sock %p state %d", sk, sk->sk_state); >> >> @@ -91,14 +104,27 @@ static void sco_sock_timeout(struct timer_list *t) >> >> static void sco_sock_set_timer(struct sock *sk, long timeout) >> { >> + struct delayed_work *work; > > Minor nitpick but I don't think using a dedicated variable here makes > much sense. > Thanks for the feedback, Luiz. Agreed, I can make the change in the next version of the series after the other patches are reviewed. Best wishes, Desmond >> + if (!sco_pi(sk)->conn) >> + return; >> + work = &sco_pi(sk)->conn->timeout_work; >> + >> BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); >> - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); >> + cancel_delayed_work(work); >> + schedule_delayed_work(work, timeout); >> } >> >> static void sco_sock_clear_timer(struct sock *sk) >> { >> + struct delayed_work *work; > > Ditto. > >> + if (!sco_pi(sk)->conn) >> + return; >> + work = &sco_pi(sk)->conn->timeout_work; >> + >> BT_DBG("sock %p state %d", sk, sk->sk_state); >> - sk_stop_timer(sk, &sk->sk_timer); >> + cancel_delayed_work(work); >> } >> >> /* ---- SCO connections ---- */ >> @@ -179,6 +205,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err) >> bh_unlock_sock(sk); >> sco_sock_kill(sk); >> sock_put(sk); >> + >> + /* Ensure no more work items will run before freeing conn. */ >> + cancel_delayed_work_sync(&conn->timeout_work); >> } >> >> hcon->sco_data = NULL; >> @@ -193,6 +222,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, >> sco_pi(sk)->conn = conn; >> conn->sk = sk; >> >> + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); >> + >> if (parent) >> bt_accept_enqueue(parent, sk, true); >> } >> @@ -500,8 +531,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, >> >> sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; >> >> - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); >> - >> bt_sock_link(&sco_sk_list, sk); >> return sk; >> } >> -- >> 2.25.1 >> > >
Hi Desmond, On Sun, Aug 8, 2021 at 9:04 PM Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> wrote: > > On 6/8/21 3:06 am, Luiz Augusto von Dentz wrote: > > Hi Desmond, > > > > On Wed, Aug 4, 2021 at 8:48 AM Desmond Cheong Zhi Xi > > <desmondcheongzx@gmail.com> wrote: > >> > >> struct sock.sk_timer should be used as a sock cleanup timer. However, > >> SCO uses it to implement sock timeouts. > >> > >> This causes issues because struct sock.sk_timer's callback is run in > >> an IRQ context, and the timer callback function sco_sock_timeout takes > >> a spin lock on the socket. However, other functions such as > >> sco_conn_del and sco_conn_ready take the spin lock with interrupts > >> enabled. > >> > >> This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could > >> lead to deadlocks as reported by Syzbot [1]: > >> CPU0 > >> ---- > >> lock(slock-AF_BLUETOOTH-BTPROTO_SCO); > >> <Interrupt> > >> lock(slock-AF_BLUETOOTH-BTPROTO_SCO); > >> > >> To fix this, we use delayed work to implement SCO sock timouts > >> instead. This allows us to avoid taking the spin lock on the socket in > >> an IRQ context, and corrects the misuse of struct sock.sk_timer. > >> > >> As a note, cancel_delayed_work is used instead of > >> cancel_delayed_work_sync in sco_sock_set_timer and > >> sco_sock_clear_timer to avoid a deadlock. In the future, the call to > >> bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to > >> synchronize with other functions using lock_sock. However, since > >> sco_sock_set_timer and sco_sock_clear_timer are sometimes called under > >> the locked socket (in sco_connect and __sco_sock_close), > >> cancel_delayed_work_sync might cause them to sleep until an > >> sco_sock_timeout that has started finishes running. But > >> sco_sock_timeout would also sleep until it can grab the lock_sock. > >> > >> Using cancel_delayed_work is fine because sco_sock_timeout does not > >> change from run to run, hence there is no functional difference > >> between: > >> 1. waiting for a timeout to finish running before scheduling another > >> timeout > >> 2. scheduling another timeout while a timeout is running. > >> > >> Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b31e [1] > >> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com > >> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com > >> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> > >> --- > >> net/bluetooth/sco.c | 41 +++++++++++++++++++++++++++++++++++------ > >> 1 file changed, 35 insertions(+), 6 deletions(-) > >> > >> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c > >> index ffa2a77a3e4c..89cb987ca9eb 100644 > >> --- a/net/bluetooth/sco.c > >> +++ b/net/bluetooth/sco.c > >> @@ -48,6 +48,8 @@ struct sco_conn { > >> spinlock_t lock; > >> struct sock *sk; > >> > >> + struct delayed_work timeout_work; > >> + > >> unsigned int mtu; > >> }; > >> > >> @@ -74,9 +76,20 @@ struct sco_pinfo { > >> #define SCO_CONN_TIMEOUT (HZ * 40) > >> #define SCO_DISCONN_TIMEOUT (HZ * 2) > >> > >> -static void sco_sock_timeout(struct timer_list *t) > >> +static void sco_sock_timeout(struct work_struct *work) > >> { > >> - struct sock *sk = from_timer(sk, t, sk_timer); > >> + struct sco_conn *conn = container_of(work, struct sco_conn, > >> + timeout_work.work); > >> + struct sock *sk; > >> + > >> + sco_conn_lock(conn); > >> + sk = conn->sk; > >> + if (sk) > >> + sock_hold(sk); > >> + sco_conn_unlock(conn); > >> + > >> + if (!sk) > >> + return; > >> > >> BT_DBG("sock %p state %d", sk, sk->sk_state); > >> > >> @@ -91,14 +104,27 @@ static void sco_sock_timeout(struct timer_list *t) > >> > >> static void sco_sock_set_timer(struct sock *sk, long timeout) > >> { > >> + struct delayed_work *work; > > > > Minor nitpick but I don't think using a dedicated variable here makes > > much sense. > > > > Thanks for the feedback, Luiz. Agreed, I can make the change in the next > version of the series after the other patches are reviewed. Others look good, so please go ahead and send the new version once you address these comments. > Best wishes, > Desmond > > >> + if (!sco_pi(sk)->conn) > >> + return; > >> + work = &sco_pi(sk)->conn->timeout_work; > >> + > >> BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); > >> - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); > >> + cancel_delayed_work(work); > >> + schedule_delayed_work(work, timeout); > >> } > >> > >> static void sco_sock_clear_timer(struct sock *sk) > >> { > >> + struct delayed_work *work; > > > > Ditto. > > > >> + if (!sco_pi(sk)->conn) > >> + return; > >> + work = &sco_pi(sk)->conn->timeout_work; > >> + > >> BT_DBG("sock %p state %d", sk, sk->sk_state); > >> - sk_stop_timer(sk, &sk->sk_timer); > >> + cancel_delayed_work(work); > >> } > >> > >> /* ---- SCO connections ---- */ > >> @@ -179,6 +205,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err) > >> bh_unlock_sock(sk); > >> sco_sock_kill(sk); > >> sock_put(sk); > >> + > >> + /* Ensure no more work items will run before freeing conn. */ > >> + cancel_delayed_work_sync(&conn->timeout_work); > >> } > >> > >> hcon->sco_data = NULL; > >> @@ -193,6 +222,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, > >> sco_pi(sk)->conn = conn; > >> conn->sk = sk; > >> > >> + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); > >> + > >> if (parent) > >> bt_accept_enqueue(parent, sk, true); > >> } > >> @@ -500,8 +531,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, > >> > >> sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; > >> > >> - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); > >> - > >> bt_sock_link(&sco_sk_list, sk); > >> return sk; > >> } > >> -- > >> 2.25.1 > >> > > > > > -- Luiz Augusto von Dentz
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ffa2a77a3e4c..89cb987ca9eb 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,6 +48,8 @@ struct sco_conn { spinlock_t lock; struct sock *sk; + struct delayed_work timeout_work; + unsigned int mtu; }; @@ -74,9 +76,20 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) -static void sco_sock_timeout(struct timer_list *t) +static void sco_sock_timeout(struct work_struct *work) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sco_conn *conn = container_of(work, struct sco_conn, + timeout_work.work); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + sco_conn_unlock(conn); + + if (!sk) + return; BT_DBG("sock %p state %d", sk, sk->sk_state); @@ -91,14 +104,27 @@ static void sco_sock_timeout(struct timer_list *t) static void sco_sock_set_timer(struct sock *sk, long timeout) { + struct delayed_work *work; + + if (!sco_pi(sk)->conn) + return; + work = &sco_pi(sk)->conn->timeout_work; + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); + cancel_delayed_work(work); + schedule_delayed_work(work, timeout); } static void sco_sock_clear_timer(struct sock *sk) { + struct delayed_work *work; + + if (!sco_pi(sk)->conn) + return; + work = &sco_pi(sk)->conn->timeout_work; + BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); + cancel_delayed_work(work); } /* ---- SCO connections ---- */ @@ -179,6 +205,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err) bh_unlock_sock(sk); sco_sock_kill(sk); sock_put(sk); + + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); } hcon->sco_data = NULL; @@ -193,6 +222,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk; + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); + if (parent) bt_accept_enqueue(parent, sk, true); } @@ -500,8 +531,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); - bt_sock_link(&sco_sk_list, sk); return sk; }