diff mbox

bfq-mq: cause deadlock by executing exit_icq body immediately

Message ID 20170207173346.4789-1-paolo.valente@linaro.org
State New
Headers show

Commit Message

Paolo Valente Feb. 7, 2017, 5:33 p.m. UTC
Hi,
this patch is meant to show that, if the  body of the hook exit_icq is executed
from inside that hook, and not as deferred work, then a circular deadlock
occurs.

It happens if, on a CPU
- the body of icq_exit takes the scheduler lock,
- it does so from inside the exit_icq hook, which is invoked with the queue
  lock held

while, on another CPU
- bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,
  which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a
  lock, because it invokes ioc_lookup_icq.

For more details, here is a lockdep report, right before the deadlock did occur.

[   44.059877] ======================================================
[   44.124922] [ INFO: possible circular locking dependency detected ]
[   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted
[   44.126414] -------------------------------------------------------
[   44.127291] sync/2043 is trying to acquire lock:
[   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140
[   44.134052]
[   44.134052] but task is already holding lock:
[   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0
[   44.136292]
[   44.136292] which lock already depends on the new lock.
[   44.136292]
[   44.137411]
[   44.137411] the existing dependency chain (in reverse order) is:
[   44.139936]
[   44.139936] -> #1 (&(&q->__queue_lock)->rlock){-.....}:
[   44.141512]
[   44.141517] [<ffffffff900ee6bb>] lock_acquire+0x11b/0x220
[   44.142569]
[   44.142578] [<ffffffff90943e66>] _raw_spin_lock_irqsave+0x56/0x90
[   44.146365]
[   44.146371] [<ffffffff904822f5>] bfq_bic_lookup.isra.112+0x25/0x60
[   44.147523]
[   44.147525] [<ffffffff9048245d>] bfq_request_merge+0x3d/0xe0
[   44.149738]
[   44.149742] [<ffffffff90439aef>] elv_merge+0xcf/0xe0
[   44.150726]
[   44.150728] [<ffffffff90453c46>] blk_mq_sched_try_merge+0x36/0x150
[   44.151878]
[   44.151881] [<ffffffff9047fb6a>] bfq_bio_merge+0x5a/0xa0
[   44.153913]
[   44.153916] [<ffffffff90454500>] __blk_mq_sched_bio_merge+0x60/0x70
[   44.155089]
[   44.155091] [<ffffffff9044e6c7>] blk_sq_make_request+0x277/0xa90
[   44.157455]
[   44.157458] [<ffffffff90440846>] generic_make_request+0xf6/0x2b0
[   44.158597]
[   44.158599] [<ffffffff90440a73>] submit_bio+0x73/0x150
[   44.160537]
[   44.160541] [<ffffffff90366e0b>] ext4_mpage_readpages+0x48b/0x950
[   44.162961]
[   44.162971] [<ffffffff90313236>] ext4_readpages+0x36/0x40
[   44.164037]
[   44.164040] [<ffffffff901eca4e>] __do_page_cache_readahead+0x2ae/0x3a0
[   44.165224]
[   44.165227] [<ffffffff901ecc4e>] ondemand_readahead+0x10e/0x4b0
[   44.166334]
[   44.166336] [<ffffffff901ed1a1>] page_cache_sync_readahead+0x31/0x50
[   44.167502]
[   44.167503] [<ffffffff901dcaed>] generic_file_read_iter+0x68d/0x8d0
[   44.168661]
[   44.168663] [<ffffffff9030e6c7>] ext4_file_read_iter+0x37/0xc0
[   44.169760]
[   44.169764] [<ffffffff9026e7c3>] __vfs_read+0xe3/0x150
[   44.171987]
[   44.171990] [<ffffffff9026ee58>] vfs_read+0xa8/0x170
[   44.178999]
[   44.179005] [<ffffffff902761e8>] prepare_binprm+0x118/0x200
[   44.180080]
[   44.180083] [<ffffffff90277bcb>] do_execveat_common.isra.39+0x56b/0xa00
[   44.184409]
[   44.184414] [<ffffffff902782fa>] SyS_execve+0x3a/0x50
[   44.185398]
[   44.185401] [<ffffffff90003e49>] do_syscall_64+0x69/0x160
[   44.187349]
[   44.187353] [<ffffffff9094408d>] return_from_SYSCALL_64+0x0/0x7a
[   44.188475]
[   44.188475] -> #0 (&(&bfqd->lock)->rlock){-.-...}:
[   44.199960]
[   44.199966] [<ffffffff900edd24>] __lock_acquire+0x15e4/0x1890
[   44.201056]
[   44.201058] [<ffffffff900ee6bb>] lock_acquire+0x11b/0x220
[   44.202099]
[   44.202101] [<ffffffff909434da>] _raw_spin_lock_irq+0x4a/0x80
[   44.203197]
[   44.203200] [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140
[   44.204848]
[   44.204851] [<ffffffff9048429c>] bfq_exit_icq+0x1c/0x30
[   44.205863]
[   44.205866] [<ffffffff90446e68>] ioc_exit_icq+0x38/0x50
[   44.206881]
[   44.206883] [<ffffffff9044739a>] put_io_context_active+0x7a/0xc0
[   44.215156] sh (2042): drop_caches: 3
[   44.216738]
[   44.216742] [<ffffffff90447428>] exit_io_context+0x48/0x50
[   44.217497]
[   44.217500] [<ffffffff90095727>] do_exit+0x787/0xc50
[   44.218207]
[   44.218209] [<ffffffff90095c80>] do_group_exit+0x50/0xd0
[   44.218969]
[   44.218970] [<ffffffff90095d14>] SyS_exit_group+0x14/0x20
[   44.219716]
[   44.219718] [<ffffffff90943fc5>] entry_SYSCALL_64_fastpath+0x23/0xc6
[   44.220550]
[   44.220550] other info that might help us debug this:
[   44.220550]
[   44.223477]  Possible unsafe locking scenario:
[   44.223477]
[   44.224281]        CPU0                    CPU1
[   44.224903]        ----                    ----
[   44.225523]   lock(&(&q->__queue_lock)->rlock);
[   44.226144]                                lock(&(&bfqd->lock)->rlock);
[   44.227051]                                lock(&(&q->__queue_lock)->rlock);
[   44.228019]   lock(&(&bfqd->lock)->rlock);
[   44.228643]
[   44.228643]  *** DEADLOCK ***
[   44.228643]
[   44.230136] 2 locks held by sync/2043:
[   44.230591]  #0:  (&(&ioc->lock)->rlock/1){......}, at: [<ffffffff90447358>] put_io_context_active+0x38/0xc0
[   44.231553]  #1:  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0
[   44.232542]
[   44.232542] stack backtrace:
[   44.232974] CPU: 1 PID: 2043 Comm: sync Not tainted 4.10.0-rc5-bfq-mq+ #38
[   44.238224] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[   44.239364] Call Trace:
[   44.239717]  dump_stack+0x85/0xc2
[   44.240186]  print_circular_bug+0x1e3/0x250
[   44.240773]  __lock_acquire+0x15e4/0x1890
[   44.241350]  lock_acquire+0x11b/0x220
[   44.241867]  ? bfq_exit_icq_bfqq+0x55/0x140
[   44.242455]  _raw_spin_lock_irq+0x4a/0x80
[   44.243018]  ? bfq_exit_icq_bfqq+0x55/0x140
[   44.243629]  bfq_exit_icq_bfqq+0x55/0x140
[   44.244192]  bfq_exit_icq+0x1c/0x30
[   44.244713]  ioc_exit_icq+0x38/0x50
[   44.246518]  put_io_context_active+0x7a/0xc0
[   44.247116]  exit_io_context+0x48/0x50
[   44.247647]  do_exit+0x787/0xc50
[   44.248103]  do_group_exit+0x50/0xd0
[   44.249055]  SyS_exit_group+0x14/0x20
[   44.249568]  entry_SYSCALL_64_fastpath+0x23/0xc6
[   44.250208] RIP: 0033:0x7fd70b22ab98
[   44.250704] RSP: 002b:00007ffc9cc43878 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[   44.251745] RAX: ffffffffffffffda RBX: 00007fd70b523620 RCX: 00007fd70b22ab98
[   44.252730] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
[   44.253713] RBP: 00007fd70b523620 R08: 00000000000000e7 R09: ffffffffffffff98
[   44.254690] R10: 00007ffc9cc437c8 R11: 0000000000000246 R12: 0000000000000000
[   44.255674] R13: 00007fd70b523c40 R14: 0000000000000000 R15: 0000000000000000

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>

---
 block/bfq-mq-iosched.c | 34 +++-------------------------------
 block/bfq-mq.h         |  3 ---
 2 files changed, 3 insertions(+), 34 deletions(-)

-- 
2.10.0

Comments

Omar Sandoval Feb. 7, 2017, 9:45 p.m. UTC | #1
On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:
> Hi,

> this patch is meant to show that, if the  body of the hook exit_icq is executed

> from inside that hook, and not as deferred work, then a circular deadlock

> occurs.

> 

> It happens if, on a CPU

> - the body of icq_exit takes the scheduler lock,

> - it does so from inside the exit_icq hook, which is invoked with the queue

>   lock held

> 

> while, on another CPU

> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

>   which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

>   lock, because it invokes ioc_lookup_icq.

> 

> For more details, here is a lockdep report, right before the deadlock did occur.

> 

> [   44.059877] ======================================================

> [   44.124922] [ INFO: possible circular locking dependency detected ]

> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

> [   44.126414] -------------------------------------------------------

> [   44.127291] sync/2043 is trying to acquire lock:

> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

> [   44.134052]

> [   44.134052] but task is already holding lock:

> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0


Hey, Paolo,

I only briefly skimmed the code, but what are you using the queue_lock
for? You should just use your scheduler lock everywhere. blk-mq doesn't
use the queue lock, so the scheduler is the only thing you need mutual
exclusion against. I'm guessing if you stopped using that, your locking
issues would go away.
Paolo Valente Feb. 8, 2017, 10:03 a.m. UTC | #2
> Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

> 

> On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

>> Hi,

>> this patch is meant to show that, if the  body of the hook exit_icq is executed

>> from inside that hook, and not as deferred work, then a circular deadlock

>> occurs.

>> 

>> It happens if, on a CPU

>> - the body of icq_exit takes the scheduler lock,

>> - it does so from inside the exit_icq hook, which is invoked with the queue

>>  lock held

>> 

>> while, on another CPU

>> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

>>  which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

>>  lock, because it invokes ioc_lookup_icq.

>> 

>> For more details, here is a lockdep report, right before the deadlock did occur.

>> 

>> [   44.059877] ======================================================

>> [   44.124922] [ INFO: possible circular locking dependency detected ]

>> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

>> [   44.126414] -------------------------------------------------------

>> [   44.127291] sync/2043 is trying to acquire lock:

>> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

>> [   44.134052]

>> [   44.134052] but task is already holding lock:

>> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

> 

> Hey, Paolo,

> 

> I only briefly skimmed the code, but what are you using the queue_lock

> for? You should just use your scheduler lock everywhere. blk-mq doesn't

> use the queue lock, so the scheduler is the only thing you need mutual

> exclusion against.


Hi Omar,
the cause of the problem is that the hook functions bfq_request_merge
and bfq_allow_bio_merge invoke, directly or through other functions,
the function bfq_bic_lookup, which, in its turn, invokes
ioc_lookup_icq.  The latter must be invoked with the queue lock held.
In particular the offending lines in bfq_bic_lookup are:

		spin_lock_irqsave(q->queue_lock, flags);
		icq = icq_to_bic(ioc_lookup_icq(ioc, q));
		spin_unlock_irqrestore(q->queue_lock, flags);

Maybe I'm missing something and we can avoid taking this lock?

Thanks,
Paolo

> I'm guessing if you stopped using that, your locking

> issues would go away.
Omar Sandoval Feb. 8, 2017, 10:33 a.m. UTC | #3
On Wed, Feb 08, 2017 at 11:03:01AM +0100, Paolo Valente wrote:
> 

> > Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

> > 

> > On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

> >> Hi,

> >> this patch is meant to show that, if the  body of the hook exit_icq is executed

> >> from inside that hook, and not as deferred work, then a circular deadlock

> >> occurs.

> >> 

> >> It happens if, on a CPU

> >> - the body of icq_exit takes the scheduler lock,

> >> - it does so from inside the exit_icq hook, which is invoked with the queue

> >>  lock held

> >> 

> >> while, on another CPU

> >> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

> >>  which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

> >>  lock, because it invokes ioc_lookup_icq.

> >> 

> >> For more details, here is a lockdep report, right before the deadlock did occur.

> >> 

> >> [   44.059877] ======================================================

> >> [   44.124922] [ INFO: possible circular locking dependency detected ]

> >> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

> >> [   44.126414] -------------------------------------------------------

> >> [   44.127291] sync/2043 is trying to acquire lock:

> >> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

> >> [   44.134052]

> >> [   44.134052] but task is already holding lock:

> >> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

> > 

> > Hey, Paolo,

> > 

> > I only briefly skimmed the code, but what are you using the queue_lock

> > for? You should just use your scheduler lock everywhere. blk-mq doesn't

> > use the queue lock, so the scheduler is the only thing you need mutual

> > exclusion against.

> 

> Hi Omar,

> the cause of the problem is that the hook functions bfq_request_merge

> and bfq_allow_bio_merge invoke, directly or through other functions,

> the function bfq_bic_lookup, which, in its turn, invokes

> ioc_lookup_icq.  The latter must be invoked with the queue lock held.

> In particular the offending lines in bfq_bic_lookup are:

> 

> 		spin_lock_irqsave(q->queue_lock, flags);

> 		icq = icq_to_bic(ioc_lookup_icq(ioc, q));

> 		spin_unlock_irqrestore(q->queue_lock, flags);

> 

> Maybe I'm missing something and we can avoid taking this lock?


Ah, I didn't realize we still used the q->queue_lock for the icq stuff.
You're right, you still need that lock for ioc_lookup_icq(). Unless
there's something else I'm forgetting, that should be the only thing you
need it for in the core code, and you should use your scheduler lock for
everything else. What else are you using q->queue_lock for?
Paolo Valente Feb. 8, 2017, 10:39 a.m. UTC | #4
> Il giorno 08 feb 2017, alle ore 11:33, Omar Sandoval <osandov@osandov.com> ha scritto:

> 

> On Wed, Feb 08, 2017 at 11:03:01AM +0100, Paolo Valente wrote:

>> 

>>> Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

>>> 

>>> On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

>>>> Hi,

>>>> this patch is meant to show that, if the  body of the hook exit_icq is executed

>>>> from inside that hook, and not as deferred work, then a circular deadlock

>>>> occurs.

>>>> 

>>>> It happens if, on a CPU

>>>> - the body of icq_exit takes the scheduler lock,

>>>> - it does so from inside the exit_icq hook, which is invoked with the queue

>>>> lock held

>>>> 

>>>> while, on another CPU

>>>> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

>>>> which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

>>>> lock, because it invokes ioc_lookup_icq.

>>>> 

>>>> For more details, here is a lockdep report, right before the deadlock did occur.

>>>> 

>>>> [   44.059877] ======================================================

>>>> [   44.124922] [ INFO: possible circular locking dependency detected ]

>>>> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

>>>> [   44.126414] -------------------------------------------------------

>>>> [   44.127291] sync/2043 is trying to acquire lock:

>>>> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

>>>> [   44.134052]

>>>> [   44.134052] but task is already holding lock:

>>>> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

>>> 

>>> Hey, Paolo,

>>> 

>>> I only briefly skimmed the code, but what are you using the queue_lock

>>> for? You should just use your scheduler lock everywhere. blk-mq doesn't

>>> use the queue lock, so the scheduler is the only thing you need mutual

>>> exclusion against.

>> 

>> Hi Omar,

>> the cause of the problem is that the hook functions bfq_request_merge

>> and bfq_allow_bio_merge invoke, directly or through other functions,

>> the function bfq_bic_lookup, which, in its turn, invokes

>> ioc_lookup_icq.  The latter must be invoked with the queue lock held.

>> In particular the offending lines in bfq_bic_lookup are:

>> 

>> 		spin_lock_irqsave(q->queue_lock, flags);

>> 		icq = icq_to_bic(ioc_lookup_icq(ioc, q));

>> 		spin_unlock_irqrestore(q->queue_lock, flags);

>> 

>> Maybe I'm missing something and we can avoid taking this lock?

> 

> Ah, I didn't realize we still used the q->queue_lock for the icq stuff.

> You're right, you still need that lock for ioc_lookup_icq(). Unless

> there's something else I'm forgetting, that should be the only thing you

> need it for in the core code, and you should use your scheduler lock for

> everything else. What else are you using q->queue_lock for? 


Nothing.  The deadlock follows from that bfq_request_merge gets called
with the scheduler lock already held.  Problematic paths start from:
bfq_bio_merge and bfq_insert_request.

I'm trying to understand whether I/we can reorder operations in some
way that avoids the nested locking, but at no avail so far.

Thanks,
Paolo
Omar Sandoval Feb. 8, 2017, 5:17 p.m. UTC | #5
On Wed, Feb 08, 2017 at 11:39:24AM +0100, Paolo Valente wrote:
> 

> > Il giorno 08 feb 2017, alle ore 11:33, Omar Sandoval <osandov@osandov.com> ha scritto:

> > 

> > On Wed, Feb 08, 2017 at 11:03:01AM +0100, Paolo Valente wrote:

> >> 

> >>> Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

> >>> 

> >>> On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

> >>>> Hi,

> >>>> this patch is meant to show that, if the  body of the hook exit_icq is executed

> >>>> from inside that hook, and not as deferred work, then a circular deadlock

> >>>> occurs.

> >>>> 

> >>>> It happens if, on a CPU

> >>>> - the body of icq_exit takes the scheduler lock,

> >>>> - it does so from inside the exit_icq hook, which is invoked with the queue

> >>>> lock held

> >>>> 

> >>>> while, on another CPU

> >>>> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

> >>>> which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

> >>>> lock, because it invokes ioc_lookup_icq.

> >>>> 

> >>>> For more details, here is a lockdep report, right before the deadlock did occur.

> >>>> 

> >>>> [   44.059877] ======================================================

> >>>> [   44.124922] [ INFO: possible circular locking dependency detected ]

> >>>> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

> >>>> [   44.126414] -------------------------------------------------------

> >>>> [   44.127291] sync/2043 is trying to acquire lock:

> >>>> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

> >>>> [   44.134052]

> >>>> [   44.134052] but task is already holding lock:

> >>>> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

> >>> 

> >>> Hey, Paolo,

> >>> 

> >>> I only briefly skimmed the code, but what are you using the queue_lock

> >>> for? You should just use your scheduler lock everywhere. blk-mq doesn't

> >>> use the queue lock, so the scheduler is the only thing you need mutual

> >>> exclusion against.

> >> 

> >> Hi Omar,

> >> the cause of the problem is that the hook functions bfq_request_merge

> >> and bfq_allow_bio_merge invoke, directly or through other functions,

> >> the function bfq_bic_lookup, which, in its turn, invokes

> >> ioc_lookup_icq.  The latter must be invoked with the queue lock held.

> >> In particular the offending lines in bfq_bic_lookup are:

> >> 

> >> 		spin_lock_irqsave(q->queue_lock, flags);

> >> 		icq = icq_to_bic(ioc_lookup_icq(ioc, q));

> >> 		spin_unlock_irqrestore(q->queue_lock, flags);

> >> 

> >> Maybe I'm missing something and we can avoid taking this lock?

> > 

> > Ah, I didn't realize we still used the q->queue_lock for the icq stuff.

> > You're right, you still need that lock for ioc_lookup_icq(). Unless

> > there's something else I'm forgetting, that should be the only thing you

> > need it for in the core code, and you should use your scheduler lock for

> > everything else. What else are you using q->queue_lock for? 

> 

> Nothing.  The deadlock follows from that bfq_request_merge gets called

> with the scheduler lock already held.  Problematic paths start from:

> bfq_bio_merge and bfq_insert_request.

> 

> I'm trying to understand whether I/we can reorder operations in some

> way that avoids the nested locking, but at no avail so far.

> 

> Thanks,

> Paolo


Okay, I understand what you're saying now. It was all in the first email
but I didn't see it right away, sorry about that.

I don't think it makes sense for ->exit_icq() to be invoked while
holding q->queue_lock for blk-mq -- we don't hold that lock for any of
the other hooks. Could you try the below? I haven't convinced myself
that there isn't a circular locking dependency between bfqd->lock and
ioc->lock now, but it's probably easiest for you to just try it.diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index fe186a9eade9..b12f9c87b4c3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,7 +35,10 @@ static void icq_free_icq_rcu(struct rcu_head *head)
 	kmem_cache_free(icq->__rcu_icq_cache, icq);
 }
 
-/* Exit an icq. Called with both ioc and q locked. */
+/*
+ * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for
+ * mq.
+ */
 static void ioc_exit_icq(struct io_cq *icq)
 {
 	struct elevator_type *et = icq->q->elevator->type;
@@ -166,6 +169,7 @@ EXPORT_SYMBOL(put_io_context);
  */
 void put_io_context_active(struct io_context *ioc)
 {
+	struct elevator_type *et;
 	unsigned long flags;
 	struct io_cq *icq;
 
@@ -184,13 +188,19 @@ void put_io_context_active(struct io_context *ioc)
 	hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
 		if (icq->flags & ICQ_EXITED)
 			continue;
-		if (spin_trylock(icq->q->queue_lock)) {
+
+		et = icq->q->elevator->type;
+		if (et->uses_mq) {
 			ioc_exit_icq(icq);
-			spin_unlock(icq->q->queue_lock);
 		} else {
-			spin_unlock_irqrestore(&ioc->lock, flags);
-			cpu_relax();
-			goto retry;
+			if (spin_trylock(icq->q->queue_lock)) {
+				ioc_exit_icq(icq);
+				spin_unlock(icq->q->queue_lock);
+			} else {
+				spin_unlock_irqrestore(&ioc->lock, flags);
+				cpu_relax();
+				goto retry;
+			}
 		}
 	}
 	spin_unlock_irqrestore(&ioc->lock, flags);

Paolo Valente Feb. 10, 2017, 1 p.m. UTC | #6
> Il giorno 08 feb 2017, alle ore 18:17, Omar Sandoval <osandov@osandov.com> ha scritto:

> 

> On Wed, Feb 08, 2017 at 11:39:24AM +0100, Paolo Valente wrote:

>> 

>>> Il giorno 08 feb 2017, alle ore 11:33, Omar Sandoval <osandov@osandov.com> ha scritto:

>>> 

>>> On Wed, Feb 08, 2017 at 11:03:01AM +0100, Paolo Valente wrote:

>>>> 

>>>>> Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

>>>>> 

>>>>> On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

>>>>>> Hi,

>>>>>> this patch is meant to show that, if the  body of the hook exit_icq is executed

>>>>>> from inside that hook, and not as deferred work, then a circular deadlock

>>>>>> occurs.

>>>>>> 

>>>>>> It happens if, on a CPU

>>>>>> - the body of icq_exit takes the scheduler lock,

>>>>>> - it does so from inside the exit_icq hook, which is invoked with the queue

>>>>>> lock held

>>>>>> 

>>>>>> while, on another CPU

>>>>>> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

>>>>>> which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

>>>>>> lock, because it invokes ioc_lookup_icq.

>>>>>> 

>>>>>> For more details, here is a lockdep report, right before the deadlock did occur.

>>>>>> 

>>>>>> [   44.059877] ======================================================

>>>>>> [   44.124922] [ INFO: possible circular locking dependency detected ]

>>>>>> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

>>>>>> [   44.126414] -------------------------------------------------------

>>>>>> [   44.127291] sync/2043 is trying to acquire lock:

>>>>>> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

>>>>>> [   44.134052]

>>>>>> [   44.134052] but task is already holding lock:

>>>>>> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

>>>>> 

>>>>> Hey, Paolo,

>>>>> 

>>>>> I only briefly skimmed the code, but what are you using the queue_lock

>>>>> for? You should just use your scheduler lock everywhere. blk-mq doesn't

>>>>> use the queue lock, so the scheduler is the only thing you need mutual

>>>>> exclusion against.

>>>> 

>>>> Hi Omar,

>>>> the cause of the problem is that the hook functions bfq_request_merge

>>>> and bfq_allow_bio_merge invoke, directly or through other functions,

>>>> the function bfq_bic_lookup, which, in its turn, invokes

>>>> ioc_lookup_icq.  The latter must be invoked with the queue lock held.

>>>> In particular the offending lines in bfq_bic_lookup are:

>>>> 

>>>> 		spin_lock_irqsave(q->queue_lock, flags);

>>>> 		icq = icq_to_bic(ioc_lookup_icq(ioc, q));

>>>> 		spin_unlock_irqrestore(q->queue_lock, flags);

>>>> 

>>>> Maybe I'm missing something and we can avoid taking this lock?

>>> 

>>> Ah, I didn't realize we still used the q->queue_lock for the icq stuff.

>>> You're right, you still need that lock for ioc_lookup_icq(). Unless

>>> there's something else I'm forgetting, that should be the only thing you

>>> need it for in the core code, and you should use your scheduler lock for

>>> everything else. What else are you using q->queue_lock for? 

>> 

>> Nothing.  The deadlock follows from that bfq_request_merge gets called

>> with the scheduler lock already held.  Problematic paths start from:

>> bfq_bio_merge and bfq_insert_request.

>> 

>> I'm trying to understand whether I/we can reorder operations in some

>> way that avoids the nested locking, but at no avail so far.

>> 

>> Thanks,

>> Paolo

> 

> Okay, I understand what you're saying now. It was all in the first email

> but I didn't see it right away, sorry about that.

> 

> I don't think it makes sense for ->exit_icq() to be invoked while

> holding q->queue_lock for blk-mq -- we don't hold that lock for any of

> the other hooks. Could you try the below? I haven't convinced myself

> that there isn't a circular locking dependency between bfqd->lock and

> ioc->lock now, but it's probably easiest for you to just try it.

> 


Just passed the last of a heavy batch of tests!

I have updated the bfq-mq branch [1], adding a temporary commit that
contains your diffs (while waiting for you final patch or the like).

Looking forward to some feedback on the other issue I have raised on
locking, and to some review of the current version of bfq-mq, before
proceeding with cgroups support.

Thanks,
Paolo

[1] https://github.com/Algodev-github/bfq-mq

> diff --git a/block/blk-ioc.c b/block/blk-ioc.c

> index fe186a9eade9..b12f9c87b4c3 100644

> --- a/block/blk-ioc.c

> +++ b/block/blk-ioc.c

> @@ -35,7 +35,10 @@ static void icq_free_icq_rcu(struct rcu_head *head)

> 	kmem_cache_free(icq->__rcu_icq_cache, icq);

> }

> 

> -/* Exit an icq. Called with both ioc and q locked. */

> +/*

> + * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for

> + * mq.

> + */

> static void ioc_exit_icq(struct io_cq *icq)

> {

> 	struct elevator_type *et = icq->q->elevator->type;

> @@ -166,6 +169,7 @@ EXPORT_SYMBOL(put_io_context);

>  */

> void put_io_context_active(struct io_context *ioc)

> {

> +	struct elevator_type *et;

> 	unsigned long flags;

> 	struct io_cq *icq;

> 

> @@ -184,13 +188,19 @@ void put_io_context_active(struct io_context *ioc)

> 	hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {

> 		if (icq->flags & ICQ_EXITED)

> 			continue;

> -		if (spin_trylock(icq->q->queue_lock)) {

> +

> +		et = icq->q->elevator->type;

> +		if (et->uses_mq) {

> 			ioc_exit_icq(icq);

> -			spin_unlock(icq->q->queue_lock);

> 		} else {

> -			spin_unlock_irqrestore(&ioc->lock, flags);

> -			cpu_relax();

> -			goto retry;

> +			if (spin_trylock(icq->q->queue_lock)) {

> +				ioc_exit_icq(icq);

> +				spin_unlock(icq->q->queue_lock);

> +			} else {

> +				spin_unlock_irqrestore(&ioc->lock, flags);

> +				cpu_relax();

> +				goto retry;

> +			}

> 		}

> 	}

> 	spin_unlock_irqrestore(&ioc->lock, flags);
Jens Axboe Feb. 10, 2017, 4:09 p.m. UTC | #7
On 02/10/2017 06:00 AM, Paolo Valente wrote:
> 

>> Il giorno 08 feb 2017, alle ore 18:17, Omar Sandoval <osandov@osandov.com> ha scritto:

>>

>> On Wed, Feb 08, 2017 at 11:39:24AM +0100, Paolo Valente wrote:

>>>

>>>> Il giorno 08 feb 2017, alle ore 11:33, Omar Sandoval <osandov@osandov.com> ha scritto:

>>>>

>>>> On Wed, Feb 08, 2017 at 11:03:01AM +0100, Paolo Valente wrote:

>>>>>

>>>>>> Il giorno 07 feb 2017, alle ore 22:45, Omar Sandoval <osandov@osandov.com> ha scritto:

>>>>>>

>>>>>> On Tue, Feb 07, 2017 at 06:33:46PM +0100, Paolo Valente wrote:

>>>>>>> Hi,

>>>>>>> this patch is meant to show that, if the  body of the hook exit_icq is executed

>>>>>>> from inside that hook, and not as deferred work, then a circular deadlock

>>>>>>> occurs.

>>>>>>>

>>>>>>> It happens if, on a CPU

>>>>>>> - the body of icq_exit takes the scheduler lock,

>>>>>>> - it does so from inside the exit_icq hook, which is invoked with the queue

>>>>>>> lock held

>>>>>>>

>>>>>>> while, on another CPU

>>>>>>> - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup,

>>>>>>> which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a

>>>>>>> lock, because it invokes ioc_lookup_icq.

>>>>>>>

>>>>>>> For more details, here is a lockdep report, right before the deadlock did occur.

>>>>>>>

>>>>>>> [   44.059877] ======================================================

>>>>>>> [   44.124922] [ INFO: possible circular locking dependency detected ]

>>>>>>> [   44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted

>>>>>>> [   44.126414] -------------------------------------------------------

>>>>>>> [   44.127291] sync/2043 is trying to acquire lock:

>>>>>>> [   44.128918]  (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140

>>>>>>> [   44.134052]

>>>>>>> [   44.134052] but task is already holding lock:

>>>>>>> [   44.134868]  (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0

>>>>>>

>>>>>> Hey, Paolo,

>>>>>>

>>>>>> I only briefly skimmed the code, but what are you using the queue_lock

>>>>>> for? You should just use your scheduler lock everywhere. blk-mq doesn't

>>>>>> use the queue lock, so the scheduler is the only thing you need mutual

>>>>>> exclusion against.

>>>>>

>>>>> Hi Omar,

>>>>> the cause of the problem is that the hook functions bfq_request_merge

>>>>> and bfq_allow_bio_merge invoke, directly or through other functions,

>>>>> the function bfq_bic_lookup, which, in its turn, invokes

>>>>> ioc_lookup_icq.  The latter must be invoked with the queue lock held.

>>>>> In particular the offending lines in bfq_bic_lookup are:

>>>>>

>>>>> 		spin_lock_irqsave(q->queue_lock, flags);

>>>>> 		icq = icq_to_bic(ioc_lookup_icq(ioc, q));

>>>>> 		spin_unlock_irqrestore(q->queue_lock, flags);

>>>>>

>>>>> Maybe I'm missing something and we can avoid taking this lock?

>>>>

>>>> Ah, I didn't realize we still used the q->queue_lock for the icq stuff.

>>>> You're right, you still need that lock for ioc_lookup_icq(). Unless

>>>> there's something else I'm forgetting, that should be the only thing you

>>>> need it for in the core code, and you should use your scheduler lock for

>>>> everything else. What else are you using q->queue_lock for? 

>>>

>>> Nothing.  The deadlock follows from that bfq_request_merge gets called

>>> with the scheduler lock already held.  Problematic paths start from:

>>> bfq_bio_merge and bfq_insert_request.

>>>

>>> I'm trying to understand whether I/we can reorder operations in some

>>> way that avoids the nested locking, but at no avail so far.

>>>

>>> Thanks,

>>> Paolo

>>

>> Okay, I understand what you're saying now. It was all in the first email

>> but I didn't see it right away, sorry about that.

>>

>> I don't think it makes sense for ->exit_icq() to be invoked while

>> holding q->queue_lock for blk-mq -- we don't hold that lock for any of

>> the other hooks. Could you try the below? I haven't convinced myself

>> that there isn't a circular locking dependency between bfqd->lock and

>> ioc->lock now, but it's probably easiest for you to just try it.

>>

> 

> Just passed the last of a heavy batch of tests!


Omar, care to turn this into a real patch and submit it?

-- 
Jens Axboe
diff mbox

Patch

diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c
index 05a12b6..6e79bb8 100644
--- a/block/bfq-mq-iosched.c
+++ b/block/bfq-mq-iosched.c
@@ -4001,28 +4001,13 @@  static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
 	}
 }
 
-static void bfq_exit_icq_body(struct work_struct *work)
-{
-	struct bfq_io_cq *bic =
-		container_of(work, struct bfq_io_cq, exit_icq_work);
-
-	bfq_exit_icq_bfqq(bic, true);
-	bfq_exit_icq_bfqq(bic, false);
-}
-
-static void bfq_init_icq(struct io_cq *icq)
-{
-	struct bfq_io_cq *bic = icq_to_bic(icq);
-
-	INIT_WORK(&bic->exit_icq_work, bfq_exit_icq_body);
-}
-
 static void bfq_exit_icq(struct io_cq *icq)
 {
 	struct bfq_io_cq *bic = icq_to_bic(icq);
 
 	BUG_ON(!bic);
-	kblockd_schedule_work(&bic->exit_icq_work);
+	bfq_exit_icq_bfqq(bic, true);
+	bfq_exit_icq_bfqq(bic, false);
 }
 
 /*
@@ -4934,21 +4919,9 @@  static void bfq_exit_queue(struct elevator_queue *e)
 	BUG_ON(bfqd->in_service_queue);
 	BUG_ON(!list_empty(&bfqd->active_list));
 
-	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) {
-		if (bfqq->bic) /* bfqqs without bic are handled below */
-			cancel_work_sync(&bfqq->bic->exit_icq_work);
-	}
-
 	spin_lock_irq(&bfqd->lock);
-	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) {
+	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
 		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
-		/*
-		 * Make sure that deferred exit_icq_work completes
-		 * without errors for bfq_queues without bic
-		 */
-		if (!bfqq->bic)
-			bfqq->bfqd = NULL;
-	}
 	spin_unlock_irq(&bfqd->lock);
 
 	hrtimer_cancel(&bfqd->idle_slice_timer);
@@ -5384,7 +5357,6 @@  static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
 		.get_rq_priv		= bfq_get_rq_private,
 		.put_rq_priv		= bfq_put_rq_private,
-		.init_icq		= bfq_init_icq,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
 		.dispatch_request	= bfq_dispatch_request,
diff --git a/block/bfq-mq.h b/block/bfq-mq.h
index 6e1c0d8..41b9d33 100644
--- a/block/bfq-mq.h
+++ b/block/bfq-mq.h
@@ -345,9 +345,6 @@  struct bfq_io_cq {
 	uint64_t blkcg_serial_nr; /* the current blkcg serial */
 #endif
 
-	/* delayed work to exec the body of the the exit_icq handler */
-	struct work_struct exit_icq_work;
-
 	/*
 	 * Snapshot of the idle window before merging; taken to
 	 * remember this value while the queue is merged, so as to be