diff mbox series

Bluetooth: Use lock_sock() when acquiring lock in sco_conn_del

Message ID 20201014071731.34279-1-yanfei.xu@windriver.com
State New
Headers show
Series Bluetooth: Use lock_sock() when acquiring lock in sco_conn_del | expand

Commit Message

Xu, Yanfei Oct. 14, 2020, 7:17 a.m. UTC
From: Yanfei Xu <yanfei.xu@windriver.com>

Locking slock-AF_BLUETOOTH-BTPROTO_SCO may happen in process context or
BH context. If in process context, we should use lock_sock(). As blow
warning, sco_conn_del() is called in process context, so let's use
lock_sock() instead of bh_lock_sock().

Comments

Xu, Yanfei Oct. 16, 2020, 3:15 a.m. UTC | #1
On 10/14/20 8:31 PM, Hillf Danton wrote:
> 

> On Wed, 14 Oct 2020 15:17:31 +0800

>> From: Yanfei Xu <yanfei.xu@windriver.com>

>>

>> Locking slock-AF_BLUETOOTH-BTPROTO_SCO may happen in process context or

>> BH context. If in process context, we should use lock_sock(). As blow

>> warning, sco_conn_del() is called in process context, so let's use

>> lock_sock() instead of bh_lock_sock().

>>

> Sounds opposite because blocking BH in BH context provides no extra

> protection while it makes sense in the less critical context particularly

> wrt sock lock.

> 

>> ================================

>> WARNING: inconsistent lock state

>> 5.9.0-rc4-syzkaller #0 Not tainted

>> --------------------------------

>> inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.

>> syz-executor675/31233 [HC0[0]:SC0[0]:HE1:SE1] takes:

>> ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:

>> spin_lock include/linux/spinlock.h:354 [inline]

>> ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:

>> sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176

>> {IN-SOFTIRQ-W} state was registered at:

>>    lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006

>>    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]

>>    _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151

>>    spin_lock include/linux/spinlock.h:354 [inline]

>>    sco_sock_timeout+0x24/0x140 net/bluetooth/sco.c:83

>>    call_timer_fn+0x1ac/0x760 kernel/time/timer.c:1413

>>    expire_timers kernel/time/timer.c:1458 [inline]

>>    __run_timers.part.0+0x67c/0xaa0 kernel/time/timer.c:1755

>>    __run_timers kernel/time/timer.c:1736 [inline]

>>    run_timer_softirq+0xae/0x1a0 kernel/time/timer.c:1768

>>    __do_softirq+0x1f7/0xa91 kernel/softirq.c:298

>>    asm_call_on_stack+0xf/0x20 arch/x86/entry/entry_64.S:706

>>    __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline]

>>    run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline]

>>    do_softirq_own_stack+0x9d/0xd0 arch/x86/kernel/irq_64.c:77

>>    invoke_softirq kernel/softirq.c:393 [inline]

>>    __irq_exit_rcu kernel/softirq.c:423 [inline]

>>    irq_exit_rcu+0x235/0x280 kernel/softirq.c:435

>>    sysvec_apic_timer_interrupt+0x51/0xf0 arch/x86/kernel/apic/apic.c:1091

>>    asm_sysvec_apic_timer_interrupt+0x12/0x20

>>    arch/x86/include/asm/idtentry.h:581

>>    unwind_next_frame+0x139a/0x1f90 arch/x86/kernel/unwind_orc.c:607

>>    arch_stack_walk+0x81/0xf0 arch/x86/kernel/stacktrace.c:25

>>    stack_trace_save+0x8c/0xc0 kernel/stacktrace.c:123

>>    kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48

>>    kasan_set_track mm/kasan/common.c:56 [inline]

>>    __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461

>>    slab_post_alloc_hook mm/slab.h:518 [inline]

>>    slab_alloc mm/slab.c:3312 [inline]

>>    kmem_cache_alloc+0x13a/0x3a0 mm/slab.c:3482

>>    __d_alloc+0x2a/0x950 fs/dcache.c:1709

>>    d_alloc+0x4a/0x230 fs/dcache.c:1788

>>    d_alloc_parallel+0xe9/0x18e0 fs/dcache.c:2540

>>    lookup_open.isra.0+0x9ac/0x1350 fs/namei.c:3030

>>    open_last_lookups fs/namei.c:3177 [inline]

>>    path_openat+0x96d/0x2730 fs/namei.c:3365

>>    do_filp_open+0x17e/0x3c0 fs/namei.c:3395

>>    do_sys_openat2+0x16d/0x420 fs/open.c:1168

>>    do_sys_open fs/open.c:1184 [inline]

>>    __do_sys_open fs/open.c:1192 [inline]

>>    __se_sys_open fs/open.c:1188 [inline]

>>    __x64_sys_open+0x119/0x1c0 fs/open.c:1188

>>    do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46

>>    entry_SYSCALL_64_after_hwframe+0x44/0xa9

>> irq event stamp: 853

>> hardirqs last  enabled at (853): [<ffffffff87f733af>]

>> __raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 [inline]

>> hardirqs last  enabled at (853): [<ffffffff87f733af>]

>> _raw_spin_unlock_irq+0x1f/0x80 kernel/locking/spinlock.c:199

>> hardirqs last disabled at (852): [<ffffffff87f73764>]

>> __raw_spin_lock_irq include/linux/spinlock_api_smp.h:126 [inline]

>> hardirqs last disabled at (852): [<ffffffff87f73764>]

>> _raw_spin_lock_irq+0xa4/0xd0 kernel/locking/spinlock.c:167

>> softirqs last  enabled at (0): [<ffffffff8144c929>]

>> copy_process+0x1a99/0x6920 kernel/fork.c:2018

>> softirqs last disabled at (0): [<0000000000000000>] 0x0

>>

>> other info that might help us debug this:

>>   Possible unsafe locking scenario:

>>

>>         CPU0

>>         ----

>>    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

>>    <Interrupt>

>>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

>>

>>   *** DEADLOCK ***

>>

>> 3 locks held by syz-executor675/31233:

>>   #0: ffff88809f104f40 (&hdev->req_lock){+.+.}-{3:3}, at:

>> hci_dev_do_close+0xf5/0x1080 net/bluetooth/hci_core.c:1720

>>   #1: ffff88809f104078 (&hdev->lock){+.+.}-{3:3}, at:

>> hci_dev_do_close+0x253/0x1080 net/bluetooth/hci_core.c:1757

>>   #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:

>> hci_disconn_cfm include/net/bluetooth/hci_core.h:1435 [inline]

>>   #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:

>> hci_conn_hash_flush+0xc7/0x220 net/bluetooth/hci_conn.c:1557

>>

>> stack backtrace:

>> CPU: 1 PID: 31233 Comm: syz-executor675 Not tainted 5.9.0-rc4-syzkaller

>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS

>> Google 01/01/2011

>> Call Trace:

>>   __dump_stack lib/dump_stack.c:77 [inline]

>>   dump_stack+0x198/0x1fd lib/dump_stack.c:118

>>   print_usage_bug kernel/locking/lockdep.c:4020 [inline]

>>   valid_state kernel/locking/lockdep.c:3361 [inline]

>>   mark_lock_irq kernel/locking/lockdep.c:3560 [inline]

>>   mark_lock.cold+0x7a/0x7f kernel/locking/lockdep.c:4006

>>   mark_usage kernel/locking/lockdep.c:3923 [inline]

>>   __lock_acquire+0x876/0x5570 kernel/locking/lockdep.c:4380

>>   lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006

>>   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]

>>   _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151

>>   spin_lock include/linux/spinlock.h:354 [inline]

>>   sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176

>>   sco_disconn_cfm net/bluetooth/sco.c:1178 [inline]

>>   sco_disconn_cfm+0x62/0x80 net/bluetooth/sco.c:1171

>>   hci_disconn_cfm include/net/bluetooth/hci_core.h:1438 [inline]

>>   hci_conn_hash_flush+0x114/0x220 net/bluetooth/hci_conn.c:1557

>>   hci_dev_do_close+0x5c6/0x1080 net/bluetooth/hci_core.c:1770

>>   hci_unregister_dev+0x1bd/0xe30 net/bluetooth/hci_core.c:3790

>>   vhci_release+0x70/0xe0 drivers/bluetooth/hci_vhci.c:340

>>   __f put+0x285/0x920 fs/file_table.c:281

>>   task_work_run+0xdd/0x190 kernel/task_work.c:141

>>   exit_task_work include/linux/task_work.h:25 [inline]

>>   do_exit+0xb7d/0x29f0 kernel/exit.c:806

>>   do_group_exit+0x125/0x310 kernel/exit.c:903

>>   get_signal+0x428/0x1f00 kernel/signal.c:2757

>>   arch_do_signal+0x82/0x2520 arch/x86/kernel/signal.c:811

>>   exit_to_user_mode_loop kernel/entry/common.c:159 [inline]

>>   exit_to_user_mode_prepare+0x1ae/0x200 kernel/entry/common.c:190

>>   syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265

>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9

>> RIP: 0033:0x447279

>>

>> Reported-by: syzbot+65684128cd7c35bc66a1@syzkaller.appspotmail.com

>> Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>

>> ---

>>   net/bluetooth/sco.c | 4 ++--

>>   1 file changed, 2 insertions(+), 2 deletions(-)

>>

>> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

>> index dcf7f96ff417..559b883c815f 100644

>> --- a/net/bluetooth/sco.c

>> +++ b/net/bluetooth/sco.c

>> @@ -173,10 +173,10 @@ static void sco_conn_del(struct hci_conn *hcon, int err)

>>   

>>   	if (sk) {

>>   		sock_hold(sk);

>> -		bh_lock_sock(sk);

>> +		lock_sock(sk);

>>   		sco_sock_clear_timer(sk);

>>   		sco_chan_del(sk, err);

>> -		bh_unlock_sock(sk);

>> +		release_sock(sk);

>>   		sco_sock_kill(sk);

>>   		sock_put(sk);

>>   	}

>> -- 

>> 2.18.2

> 

> 

> --- a/net/bluetooth/sco.c

> +++ b/net/bluetooth/sco.c

> @@ -80,10 +80,10 @@ static void sco_sock_timeout(struct time

>   

>   	BT_DBG("sock %p state %d", sk, sk->sk_state);

>   

> -	bh_lock_sock(sk);

> +	lock_sock(sk);

>   	sk->sk_err = ETIMEDOUT;

>   	sk->sk_state_change(sk);

> -	bh_unlock_sock(sk);

> +	unlock_sock(sk);

>   

>   	sco_sock_kill(sk);

>   	sock_put(sk);

> 

Hi Hillf,

Thanks for your reply! But I don't clearly understand what you mean.

After your change, If sco_conn_del() have got the lock and then run into 
sco_sock_timeout which is in BH, the potential deadlock is still exsit.

As the function define, use bh_lock_sock in sco_sock_timeout(BH context) 
is right. The root cause is prevent from locking in BH after we've got 
the lock in sco_conn_del, isn't it?

/* BH context may only use the following locking interface. */
#define bh_lock_sock(__sk)      spin_lock(&((__sk)->sk_lock.slock))


Regards,
Yanfei
Xu, Yanfei Oct. 19, 2020, 1:54 a.m. UTC | #2
On 10/16/20 12:10 PM, Hillf Danton wrote:
> On Fri, 16 Oct 2020 11:15:27 +0800 Yanfei Xu wrote:

>> On 10/14/20 8:31 PM, Hillf Danton wrote:

>>>

>>> On Wed, 14 Oct 2020 15:17:31 +0800

>>>> From: Yanfei Xu <yanfei.xu@windriver.com>

>>>>

>>>> Locking slock-AF_BLUETOOTH-BTPROTO_SCO may happen in process context or

>>>> BH context. If in process context, we should use lock_sock(). As blow

>>>> warning, sco_conn_del() is called in process context, so let's use

>>>> lock_sock() instead of bh_lock_sock().

>>>>

>>> Sounds opposite because blocking BH in BH context provides no extra

>>> protection while it makes sense in the less critical context particularly

>>> wrt sock lock.

>>>

>>>> ================================

>>>> WARNING: inconsistent lock state

>>>> 5.9.0-rc4-syzkaller #0 Not tainted

>>>> --------------------------------

>>>> inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.

>>>> syz-executor675/31233 [HC0[0]:SC0[0]:HE1:SE1] takes:

>>>> ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:

>>>> spin_lock include/linux/spinlock.h:354 [inline]

>>>> ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:

>>>> sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176

>>>> {IN-SOFTIRQ-W} state was registered at:

>>>>     lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006

>>>>     __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]

>>>>     _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151

>>>>     spin_lock include/linux/spinlock.h:354 [inline]

>>>>     sco_sock_timeout+0x24/0x140 net/bluetooth/sco.c:83

>>>>     call_timer_fn+0x1ac/0x760 kernel/time/timer.c:1413

>>>>     expire_timers kernel/time/timer.c:1458 [inline]

>>>>     __run_timers.part.0+0x67c/0xaa0 kernel/time/timer.c:1755

>>>>     __run_timers kernel/time/timer.c:1736 [inline]

>>>>     run_timer_softirq+0xae/0x1a0 kernel/time/timer.c:1768

>>>>     __do_softirq+0x1f7/0xa91 kernel/softirq.c:298

>>>>     asm_call_on_stack+0xf/0x20 arch/x86/entry/entry_64.S:706

>>>>     __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline]

>>>>     run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline]

>>>>     do_softirq_own_stack+0x9d/0xd0 arch/x86/kernel/irq_64.c:77

>>>>     invoke_softirq kernel/softirq.c:393 [inline]

>>>>     __irq_exit_rcu kernel/softirq.c:423 [inline]

>>>>     irq_exit_rcu+0x235/0x280 kernel/softirq.c:435

>>>>     sysvec_apic_timer_interrupt+0x51/0xf0 arch/x86/kernel/apic/apic.c:1091

>>>>     asm_sysvec_apic_timer_interrupt+0x12/0x20

>>>>     arch/x86/include/asm/idtentry.h:581

>>>>     unwind_next_frame+0x139a/0x1f90 arch/x86/kernel/unwind_orc.c:607

>>>>     arch_stack_walk+0x81/0xf0 arch/x86/kernel/stacktrace.c:25

>>>>     stack_trace_save+0x8c/0xc0 kernel/stacktrace.c:123

>>>>     kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48

>>>>     kasan_set_track mm/kasan/common.c:56 [inline]

>>>>     __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461

>>>>     slab_post_alloc_hook mm/slab.h:518 [inline]

>>>>     slab_alloc mm/slab.c:3312 [inline]

>>>>     kmem_cache_alloc+0x13a/0x3a0 mm/slab.c:3482

>>>>     __d_alloc+0x2a/0x950 fs/dcache.c:1709

>>>>     d_alloc+0x4a/0x230 fs/dcache.c:1788

>>>>     d_alloc_parallel+0xe9/0x18e0 fs/dcache.c:2540

>>>>     lookup_open.isra.0+0x9ac/0x1350 fs/namei.c:3030

>>>>     open_last_lookups fs/namei.c:3177 [inline]

>>>>     path_openat+0x96d/0x2730 fs/namei.c:3365

>>>>     do_filp_open+0x17e/0x3c0 fs/namei.c:3395

>>>>     do_sys_openat2+0x16d/0x420 fs/open.c:1168

>>>>     do_sys_open fs/open.c:1184 [inline]

>>>>     __do_sys_open fs/open.c:1192 [inline]

>>>>     __se_sys_open fs/open.c:1188 [inline]

>>>>     __x64_sys_open+0x119/0x1c0 fs/open.c:1188

>>>>     do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46

>>>>     entry_SYSCALL_64_after_hwframe+0x44/0xa9

>>>> irq event stamp: 853

>>>> hardirqs last  enabled at (853): [<ffffffff87f733af>]

>>>> __raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 [inline]

>>>> hardirqs last  enabled at (853): [<ffffffff87f733af>]

>>>> _raw_spin_unlock_irq+0x1f/0x80 kernel/locking/spinlock.c:199

>>>> hardirqs last disabled at (852): [<ffffffff87f73764>]

>>>> __raw_spin_lock_irq include/linux/spinlock_api_smp.h:126 [inline]

>>>> hardirqs last disabled at (852): [<ffffffff87f73764>]

>>>> _raw_spin_lock_irq+0xa4/0xd0 kernel/locking/spinlock.c:167

>>>> softirqs last  enabled at (0): [<ffffffff8144c929>]

>>>> copy_process+0x1a99/0x6920 kernel/fork.c:2018

>>>> softirqs last disabled at (0): [<0000000000000000>] 0x0

>>>>

>>>> other info that might help us debug this:

>>>>    Possible unsafe locking scenario:

>>>>

>>>>          CPU0

>>>>          ----

>>>>     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

>>>>     <Interrupt>

>>>>       lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

>>>>

>>>>    *** DEADLOCK ***

>>>>

>>>> 3 locks held by syz-executor675/31233:

>>>>    #0: ffff88809f104f40 (&hdev->req_lock){+.+.}-{3:3}, at:

>>>> hci_dev_do_close+0xf5/0x1080 net/bluetooth/hci_core.c:1720

>>>>    #1: ffff88809f104078 (&hdev->lock){+.+.}-{3:3}, at:

>>>> hci_dev_do_close+0x253/0x1080 net/bluetooth/hci_core.c:1757

>>>>    #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:

>>>> hci_disconn_cfm include/net/bluetooth/hci_core.h:1435 [inline]

>>>>    #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:

>>>> hci_conn_hash_flush+0xc7/0x220 net/bluetooth/hci_conn.c:1557

>>>>

>>>> stack backtrace:

>>>> CPU: 1 PID: 31233 Comm: syz-executor675 Not tainted 5.9.0-rc4-syzkaller

>>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS

>>>> Google 01/01/2011

>>>> Call Trace:

>>>>    __dump_stack lib/dump_stack.c:77 [inline]

>>>>    dump_stack+0x198/0x1fd lib/dump_stack.c:118

>>>>    print_usage_bug kernel/locking/lockdep.c:4020 [inline]

>>>>    valid_state kernel/locking/lockdep.c:3361 [inline]

>>>>    mark_lock_irq kernel/locking/lockdep.c:3560 [inline]

>>>>    mark_lock.cold+0x7a/0x7f kernel/locking/lockdep.c:4006

>>>>    mark_usage kernel/locking/lockdep.c:3923 [inline]

>>>>    __lock_acquire+0x876/0x5570 kernel/locking/lockdep.c:4380

>>>>    lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006

>>>>    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]

>>>>    _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151

>>>>    spin_lock include/linux/spinlock.h:354 [inline]

>>>>    sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176

>>>>    sco_disconn_cfm net/bluetooth/sco.c:1178 [inline]

>>>>    sco_disconn_cfm+0x62/0x80 net/bluetooth/sco.c:1171

>>>>    hci_disconn_cfm include/net/bluetooth/hci_core.h:1438 [inline]

>>>>    hci_conn_hash_flush+0x114/0x220 net/bluetooth/hci_conn.c:1557

>>>>    hci_dev_do_close+0x5c6/0x1080 net/bluetooth/hci_core.c:1770

>>>>    hci_unregister_dev+0x1bd/0xe30 net/bluetooth/hci_core.c:3790

>>>>    vhci_release+0x70/0xe0 drivers/bluetooth/hci_vhci.c:340

>>>>    __f put+0x285/0x920 fs/file_table.c:281

>>>>    task_work_run+0xdd/0x190 kernel/task_work.c:141

>>>>    exit_task_work include/linux/task_work.h:25 [inline]

>>>>    do_exit+0xb7d/0x29f0 kernel/exit.c:806

>>>>    do_group_exit+0x125/0x310 kernel/exit.c:903

>>>>    get_signal+0x428/0x1f00 kernel/signal.c:2757

>>>>    arch_do_signal+0x82/0x2520 arch/x86/kernel/signal.c:811

>>>>    exit_to_user_mode_loop kernel/entry/common.c:159 [inline]

>>>>    exit_to_user_mode_prepare+0x1ae/0x200 kernel/entry/common.c:190

>>>>    syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265

>>>>    entry_SYSCALL_64_after_hwframe+0x44/0xa9

>>>> RIP: 0033:0x447279

>>>>

>>>> Reported-by: syzbot+65684128cd7c35bc66a1@syzkaller.appspotmail.com

>>>> Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>

>>>> ---

>>>>    net/bluetooth/sco.c | 4 ++--

>>>>    1 file changed, 2 insertions(+), 2 deletions(-)

>>>>

>>>> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

>>>> index dcf7f96ff417..559b883c815f 100644

>>>> --- a/net/bluetooth/sco.c

>>>> +++ b/net/bluetooth/sco.c

>>>> @@ -173,10 +173,10 @@ static void sco_conn_del(struct hci_conn *hcon, int err)

>>>>    

>>>>    	if (sk) {

>>>>    		sock_hold(sk);

>>>> -		bh_lock_sock(sk);

>>>> +		lock_sock(sk);

>>>>    		sco_sock_clear_timer(sk);

>>>>    		sco_chan_del(sk, err);

>>>> -		bh_unlock_sock(sk);

>>>> +		release_sock(sk);

>>>>    		sco_sock_kill(sk);

>>>>    		sock_put(sk);

>>>>    	}

>>>> -- 

>>>> 2.18.2

>>>

>>>

>>> --- a/net/bluetooth/sco.c

>>> +++ b/net/bluetooth/sco.c

>>> @@ -80,10 +80,10 @@ static void sco_sock_timeout(struct time

>>>    

>>>    	BT_DBG("sock %p state %d", sk, sk->sk_state);

>>>    

>>> -	bh_lock_sock(sk);

>>> +	lock_sock(sk);

>>>    	sk->sk_err = ETIMEDOUT;

>>>    	sk->sk_state_change(sk);

>>> -	bh_unlock_sock(sk);

>>> +	unlock_sock(sk);

>>>    

>>>    	sco_sock_kill(sk);

>>>    	sock_put(sk);

>>>

>> Hi Hillf,

>>

>> Thanks for your reply! But I don't clearly understand what you mean.

>>

>> After your change, If sco_conn_del() have got the lock and then run into

>> sco_sock_timeout which is in BH, the potential deadlock is still exsit.

> 

> My change is incorrect. Thanks for your reply.

> 

>>

>> As the function define, use bh_lock_sock in sco_sock_timeout(BH context)

>> is right. The root cause is prevent from locking in BH after we've got

>> the lock in sco_conn_del, isn't it?

>>

>> /* BH context may only use the following locking interface. */

>> #define bh_lock_sock(__sk)      spin_lock(&((__sk)->sk_lock.slock))

> 

> Then replacing bh_lock_sock() with lock_sock() in sco_conn_del() seems

> not to work because bh is enabled after lock_sock(). What we want instead

> is block bh here until we release sock, something like the diff below.

> 

> --- a/net/bluetooth/sco.c

> +++ b/net/bluetooth/sco.c

> @@ -173,10 +173,12 @@ static void sco_conn_del(struct hci_conn

>   

>   	if (sk) {

>   		sock_hold(sk);

> +		local_bh_disable();

>   		bh_lock_sock(sk);

>   		sco_sock_clear_timer(sk);

>   		sco_chan_del(sk, err);

>   		bh_unlock_sock(sk);

> +		local_bh_enable();

>   		sco_sock_kill(sk);

>   		sock_put(sk);

>   	}

> 


Anyone else have any suggestion? :)

Thanks,
Yanfei
diff mbox series

Patch

================================
WARNING: inconsistent lock state
5.9.0-rc4-syzkaller #0 Not tainted
--------------------------------
inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
syz-executor675/31233 [HC0[0]:SC0[0]:HE1:SE1] takes:
ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:
spin_lock include/linux/spinlock.h:354 [inline]
ffff8880a75c50a0 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.?.}-{2:2}, at:
sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176
{IN-SOFTIRQ-W} state was registered at:
  lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
  spin_lock include/linux/spinlock.h:354 [inline]
  sco_sock_timeout+0x24/0x140 net/bluetooth/sco.c:83
  call_timer_fn+0x1ac/0x760 kernel/time/timer.c:1413
  expire_timers kernel/time/timer.c:1458 [inline]
  __run_timers.part.0+0x67c/0xaa0 kernel/time/timer.c:1755
  __run_timers kernel/time/timer.c:1736 [inline]
  run_timer_softirq+0xae/0x1a0 kernel/time/timer.c:1768
  __do_softirq+0x1f7/0xa91 kernel/softirq.c:298
  asm_call_on_stack+0xf/0x20 arch/x86/entry/entry_64.S:706
  __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline]
  run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline]
  do_softirq_own_stack+0x9d/0xd0 arch/x86/kernel/irq_64.c:77
  invoke_softirq kernel/softirq.c:393 [inline]
  __irq_exit_rcu kernel/softirq.c:423 [inline]
  irq_exit_rcu+0x235/0x280 kernel/softirq.c:435
  sysvec_apic_timer_interrupt+0x51/0xf0 arch/x86/kernel/apic/apic.c:1091
  asm_sysvec_apic_timer_interrupt+0x12/0x20
  arch/x86/include/asm/idtentry.h:581
  unwind_next_frame+0x139a/0x1f90 arch/x86/kernel/unwind_orc.c:607
  arch_stack_walk+0x81/0xf0 arch/x86/kernel/stacktrace.c:25
  stack_trace_save+0x8c/0xc0 kernel/stacktrace.c:123
  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48
  kasan_set_track mm/kasan/common.c:56 [inline]
  __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461
  slab_post_alloc_hook mm/slab.h:518 [inline]
  slab_alloc mm/slab.c:3312 [inline]
  kmem_cache_alloc+0x13a/0x3a0 mm/slab.c:3482
  __d_alloc+0x2a/0x950 fs/dcache.c:1709
  d_alloc+0x4a/0x230 fs/dcache.c:1788
  d_alloc_parallel+0xe9/0x18e0 fs/dcache.c:2540
  lookup_open.isra.0+0x9ac/0x1350 fs/namei.c:3030
  open_last_lookups fs/namei.c:3177 [inline]
  path_openat+0x96d/0x2730 fs/namei.c:3365
  do_filp_open+0x17e/0x3c0 fs/namei.c:3395
  do_sys_openat2+0x16d/0x420 fs/open.c:1168
  do_sys_open fs/open.c:1184 [inline]
  __do_sys_open fs/open.c:1192 [inline]
  __se_sys_open fs/open.c:1188 [inline]
  __x64_sys_open+0x119/0x1c0 fs/open.c:1188
  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
irq event stamp: 853
hardirqs last  enabled at (853): [<ffffffff87f733af>]
__raw_spin_unlock_irq include/linux/spinlock_api_smp.h:168 [inline]
hardirqs last  enabled at (853): [<ffffffff87f733af>]
_raw_spin_unlock_irq+0x1f/0x80 kernel/locking/spinlock.c:199
hardirqs last disabled at (852): [<ffffffff87f73764>]
__raw_spin_lock_irq include/linux/spinlock_api_smp.h:126 [inline]
hardirqs last disabled at (852): [<ffffffff87f73764>]
_raw_spin_lock_irq+0xa4/0xd0 kernel/locking/spinlock.c:167
softirqs last  enabled at (0): [<ffffffff8144c929>]
copy_process+0x1a99/0x6920 kernel/fork.c:2018
softirqs last disabled at (0): [<0000000000000000>] 0x0

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
  <Interrupt>
    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

 *** DEADLOCK ***

3 locks held by syz-executor675/31233:
 #0: ffff88809f104f40 (&hdev->req_lock){+.+.}-{3:3}, at:
hci_dev_do_close+0xf5/0x1080 net/bluetooth/hci_core.c:1720
 #1: ffff88809f104078 (&hdev->lock){+.+.}-{3:3}, at:
hci_dev_do_close+0x253/0x1080 net/bluetooth/hci_core.c:1757
 #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:
hci_disconn_cfm include/net/bluetooth/hci_core.h:1435 [inline]
 #2: ffffffff8a9188c8 (hci_cb_list_lock){+.+.}-{3:3}, at:
hci_conn_hash_flush+0xc7/0x220 net/bluetooth/hci_conn.c:1557

stack backtrace:
CPU: 1 PID: 31233 Comm: syz-executor675 Not tainted 5.9.0-rc4-syzkaller
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 print_usage_bug kernel/locking/lockdep.c:4020 [inline]
 valid_state kernel/locking/lockdep.c:3361 [inline]
 mark_lock_irq kernel/locking/lockdep.c:3560 [inline]
 mark_lock.cold+0x7a/0x7f kernel/locking/lockdep.c:4006
 mark_usage kernel/locking/lockdep.c:3923 [inline]
 __lock_acquire+0x876/0x5570 kernel/locking/lockdep.c:4380
 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:354 [inline]
 sco_conn_del+0x128/0x270 net/bluetooth/sco.c:176
 sco_disconn_cfm net/bluetooth/sco.c:1178 [inline]
 sco_disconn_cfm+0x62/0x80 net/bluetooth/sco.c:1171
 hci_disconn_cfm include/net/bluetooth/hci_core.h:1438 [inline]
 hci_conn_hash_flush+0x114/0x220 net/bluetooth/hci_conn.c:1557
 hci_dev_do_close+0x5c6/0x1080 net/bluetooth/hci_core.c:1770
 hci_unregister_dev+0x1bd/0xe30 net/bluetooth/hci_core.c:3790
 vhci_release+0x70/0xe0 drivers/bluetooth/hci_vhci.c:340
 __fput+0x285/0x920 fs/file_table.c:281
 task_work_run+0xdd/0x190 kernel/task_work.c:141
 exit_task_work include/linux/task_work.h:25 [inline]
 do_exit+0xb7d/0x29f0 kernel/exit.c:806
 do_group_exit+0x125/0x310 kernel/exit.c:903
 get_signal+0x428/0x1f00 kernel/signal.c:2757
 arch_do_signal+0x82/0x2520 arch/x86/kernel/signal.c:811
 exit_to_user_mode_loop kernel/entry/common.c:159 [inline]
 exit_to_user_mode_prepare+0x1ae/0x200 kernel/entry/common.c:190
 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x447279

Reported-by: syzbot+65684128cd7c35bc66a1@syzkaller.appspotmail.com
Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>
---
 net/bluetooth/sco.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dcf7f96ff417..559b883c815f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -173,10 +173,10 @@  static void sco_conn_del(struct hci_conn *hcon, int err)
 
 	if (sk) {
 		sock_hold(sk);
-		bh_lock_sock(sk);
+		lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
-		bh_unlock_sock(sk);
+		release_sock(sk);
 		sco_sock_kill(sk);
 		sock_put(sk);
 	}