diff mbox series

[2/4] drm/shmem: Use mutex_trylock in drm_gem_shmem_purge

Message ID 20190819161204.3106-3-robh@kernel.org
State Superseded
Headers show
Series panfrost: Locking fixes | expand

Commit Message

Rob Herring (Arm) Aug. 19, 2019, 4:12 p.m. UTC
Lockdep reports a circular locking dependency with pages_lock taken in
the shrinker callback. The deadlock can't actually happen with current
users at least as a BO will never be purgeable when pages_lock is held.
To be safe, let's use mutex_trylock() instead and bail if a BO is locked
already.

WARNING: possible circular locking dependency detected
5.3.0-rc1+ #100 Tainted: G             L
------------------------------------------------------
kswapd0/171 is trying to acquire lock:
000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40

but task is already holding lock:
00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (fs_reclaim){+.+.}:
       fs_reclaim_acquire.part.18+0x34/0x40
       fs_reclaim_acquire+0x20/0x28
       __kmalloc_node+0x6c/0x4c0
       kvmalloc_node+0x38/0xa8
       drm_gem_get_pages+0x80/0x1d0
       drm_gem_shmem_get_pages+0x58/0xa0
       drm_gem_shmem_get_pages_sgt+0x48/0xd0
       panfrost_mmu_map+0x38/0xf8 [panfrost]
       panfrost_gem_open+0xc0/0xe8 [panfrost]
       drm_gem_handle_create_tail+0xe8/0x198
       drm_gem_handle_create+0x3c/0x50
       panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
       panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
       drm_ioctl_kernel+0xb8/0x110
       drm_ioctl+0x244/0x3f0
       do_vfs_ioctl+0xbc/0x910
       ksys_ioctl+0x78/0xa8
       __arm64_sys_ioctl+0x1c/0x28
       el0_svc_common.constprop.0+0x90/0x168
       el0_svc_handler+0x28/0x78
       el0_svc+0x8/0xc

-> #0 (&shmem->pages_lock){+.+.}:
       __lock_acquire+0xa2c/0x1d70
       lock_acquire+0xdc/0x228
       __mutex_lock+0x8c/0x800
       mutex_lock_nested+0x1c/0x28
       drm_gem_shmem_purge+0x20/0x40
       panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
       do_shrink_slab+0x208/0x500
       shrink_slab+0x10c/0x2c0
       shrink_node+0x28c/0x4d8
       balance_pgdat+0x2c8/0x570
       kswapd+0x22c/0x638
       kthread+0x128/0x130
       ret_from_fork+0x10/0x18

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(fs_reclaim);
                               lock(&shmem->pages_lock);
                               lock(fs_reclaim);
  lock(&shmem->pages_lock);

 *** DEADLOCK ***

3 locks held by kswapd0/171:
 #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]

Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Sean Paul <sean@poorly.run>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
 include/drm/drm_gem_shmem_helper.h     | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

Comments

Daniel Vetter Aug. 20, 2019, 9:05 a.m. UTC | #1
On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
> Lockdep reports a circular locking dependency with pages_lock taken in
> the shrinker callback. The deadlock can't actually happen with current
> users at least as a BO will never be purgeable when pages_lock is held.
> To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> already.
> 
> WARNING: possible circular locking dependency detected
> 5.3.0-rc1+ #100 Tainted: G             L
> ------------------------------------------------------
> kswapd0/171 is trying to acquire lock:
> 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
> 
> but task is already holding lock:
> 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #1 (fs_reclaim){+.+.}:
>        fs_reclaim_acquire.part.18+0x34/0x40
>        fs_reclaim_acquire+0x20/0x28
>        __kmalloc_node+0x6c/0x4c0
>        kvmalloc_node+0x38/0xa8
>        drm_gem_get_pages+0x80/0x1d0
>        drm_gem_shmem_get_pages+0x58/0xa0
>        drm_gem_shmem_get_pages_sgt+0x48/0xd0
>        panfrost_mmu_map+0x38/0xf8 [panfrost]
>        panfrost_gem_open+0xc0/0xe8 [panfrost]
>        drm_gem_handle_create_tail+0xe8/0x198
>        drm_gem_handle_create+0x3c/0x50
>        panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
>        panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
>        drm_ioctl_kernel+0xb8/0x110
>        drm_ioctl+0x244/0x3f0
>        do_vfs_ioctl+0xbc/0x910
>        ksys_ioctl+0x78/0xa8
>        __arm64_sys_ioctl+0x1c/0x28
>        el0_svc_common.constprop.0+0x90/0x168
>        el0_svc_handler+0x28/0x78
>        el0_svc+0x8/0xc
> 
> -> #0 (&shmem->pages_lock){+.+.}:
>        __lock_acquire+0xa2c/0x1d70
>        lock_acquire+0xdc/0x228
>        __mutex_lock+0x8c/0x800
>        mutex_lock_nested+0x1c/0x28
>        drm_gem_shmem_purge+0x20/0x40
>        panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
>        do_shrink_slab+0x208/0x500
>        shrink_slab+0x10c/0x2c0
>        shrink_node+0x28c/0x4d8
>        balance_pgdat+0x2c8/0x570
>        kswapd+0x22c/0x638
>        kthread+0x128/0x130
>        ret_from_fork+0x10/0x18
> 
> other info that might help us debug this:
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(fs_reclaim);
>                                lock(&shmem->pages_lock);
>                                lock(fs_reclaim);
>   lock(&shmem->pages_lock);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kswapd0/171:
>  #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
>  #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
>  #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
> 
> Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Maxime Ripard <maxime.ripard@bootlin.com>
> Cc: Sean Paul <sean@poorly.run>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Rob Herring <robh@kernel.org>
> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
>  include/drm/drm_gem_shmem_helper.h     | 2 +-
>  2 files changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index 5423ec56b535..f5918707672f 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
>  }
>  EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
>  
> -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
>  {
>  	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
>  
> -	mutex_lock(&shmem->pages_lock);
> +	if (!mutex_trylock(&shmem->pages_lock))

Did you see my ping about cutting all the locking over to dma_resv? Would
align shmem helpers with ttm a lot more, for that bright glorious future
taste. Should we capture that in some todo.rst entry?

Cheers, Daniel

> +		return false;
>  	drm_gem_shmem_purge_locked(obj);
>  	mutex_unlock(&shmem->pages_lock);
> +
> +	return true;
>  }
>  EXPORT_SYMBOL(drm_gem_shmem_purge);
>  
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index ce1600fdfc3e..01f514521687 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
>  }
>  
>  void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
> -void drm_gem_shmem_purge(struct drm_gem_object *obj);
> +bool drm_gem_shmem_purge(struct drm_gem_object *obj);
>  
>  struct drm_gem_shmem_object *
>  drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
> -- 
> 2.20.1
>
Rob Herring (Arm) Aug. 20, 2019, 12:35 p.m. UTC | #2
On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
> > Lockdep reports a circular locking dependency with pages_lock taken in
> > the shrinker callback. The deadlock can't actually happen with current
> > users at least as a BO will never be purgeable when pages_lock is held.
> > To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> > already.
> >
> > WARNING: possible circular locking dependency detected
> > 5.3.0-rc1+ #100 Tainted: G             L
> > ------------------------------------------------------
> > kswapd0/171 is trying to acquire lock:
> > 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
> >
> > but task is already holding lock:
> > 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> >
> > which lock already depends on the new lock.
> >
> > the existing dependency chain (in reverse order) is:
> >
> > -> #1 (fs_reclaim){+.+.}:
> >        fs_reclaim_acquire.part.18+0x34/0x40
> >        fs_reclaim_acquire+0x20/0x28
> >        __kmalloc_node+0x6c/0x4c0
> >        kvmalloc_node+0x38/0xa8
> >        drm_gem_get_pages+0x80/0x1d0
> >        drm_gem_shmem_get_pages+0x58/0xa0
> >        drm_gem_shmem_get_pages_sgt+0x48/0xd0
> >        panfrost_mmu_map+0x38/0xf8 [panfrost]
> >        panfrost_gem_open+0xc0/0xe8 [panfrost]
> >        drm_gem_handle_create_tail+0xe8/0x198
> >        drm_gem_handle_create+0x3c/0x50
> >        panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
> >        panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
> >        drm_ioctl_kernel+0xb8/0x110
> >        drm_ioctl+0x244/0x3f0
> >        do_vfs_ioctl+0xbc/0x910
> >        ksys_ioctl+0x78/0xa8
> >        __arm64_sys_ioctl+0x1c/0x28
> >        el0_svc_common.constprop.0+0x90/0x168
> >        el0_svc_handler+0x28/0x78
> >        el0_svc+0x8/0xc
> >
> > -> #0 (&shmem->pages_lock){+.+.}:
> >        __lock_acquire+0xa2c/0x1d70
> >        lock_acquire+0xdc/0x228
> >        __mutex_lock+0x8c/0x800
> >        mutex_lock_nested+0x1c/0x28
> >        drm_gem_shmem_purge+0x20/0x40
> >        panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
> >        do_shrink_slab+0x208/0x500
> >        shrink_slab+0x10c/0x2c0
> >        shrink_node+0x28c/0x4d8
> >        balance_pgdat+0x2c8/0x570
> >        kswapd+0x22c/0x638
> >        kthread+0x128/0x130
> >        ret_from_fork+0x10/0x18
> >
> > other info that might help us debug this:
> >
> >  Possible unsafe locking scenario:
> >
> >        CPU0                    CPU1
> >        ----                    ----
> >   lock(fs_reclaim);
> >                                lock(&shmem->pages_lock);
> >                                lock(fs_reclaim);
> >   lock(&shmem->pages_lock);
> >
> >  *** DEADLOCK ***
> >
> > 3 locks held by kswapd0/171:
> >  #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> >  #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
> >  #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
> >
> > Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Cc: Maxime Ripard <maxime.ripard@bootlin.com>
> > Cc: Sean Paul <sean@poorly.run>
> > Cc: David Airlie <airlied@linux.ie>
> > Cc: Daniel Vetter <daniel@ffwll.ch>
> > Signed-off-by: Rob Herring <robh@kernel.org>
> > ---
> >  drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
> >  include/drm/drm_gem_shmem_helper.h     | 2 +-
> >  2 files changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > index 5423ec56b535..f5918707672f 100644
> > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
> >  }
> >  EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
> >
> > -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> > +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
> >  {
> >       struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
> >
> > -     mutex_lock(&shmem->pages_lock);
> > +     if (!mutex_trylock(&shmem->pages_lock))
>
> Did you see my ping about cutting all the locking over to dma_resv?

Yes, but you didn't reply to Rob C. about it. I guess I'll have to go
figure out how reservation objects work...

> Would
> align shmem helpers with ttm a lot more, for that bright glorious future
> taste. Should we capture that in some todo.rst entry?

Sure.

Rob
Daniel Vetter Aug. 21, 2019, 8:23 a.m. UTC | #3
On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
> On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
> > > Lockdep reports a circular locking dependency with pages_lock taken in
> > > the shrinker callback. The deadlock can't actually happen with current
> > > users at least as a BO will never be purgeable when pages_lock is held.
> > > To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> > > already.
> > >
> > > WARNING: possible circular locking dependency detected
> > > 5.3.0-rc1+ #100 Tainted: G             L
> > > ------------------------------------------------------
> > > kswapd0/171 is trying to acquire lock:
> > > 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
> > >
> > > but task is already holding lock:
> > > 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> > >
> > > which lock already depends on the new lock.
> > >
> > > the existing dependency chain (in reverse order) is:
> > >
> > > -> #1 (fs_reclaim){+.+.}:
> > >        fs_reclaim_acquire.part.18+0x34/0x40
> > >        fs_reclaim_acquire+0x20/0x28
> > >        __kmalloc_node+0x6c/0x4c0
> > >        kvmalloc_node+0x38/0xa8
> > >        drm_gem_get_pages+0x80/0x1d0
> > >        drm_gem_shmem_get_pages+0x58/0xa0
> > >        drm_gem_shmem_get_pages_sgt+0x48/0xd0
> > >        panfrost_mmu_map+0x38/0xf8 [panfrost]
> > >        panfrost_gem_open+0xc0/0xe8 [panfrost]
> > >        drm_gem_handle_create_tail+0xe8/0x198
> > >        drm_gem_handle_create+0x3c/0x50
> > >        panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
> > >        panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
> > >        drm_ioctl_kernel+0xb8/0x110
> > >        drm_ioctl+0x244/0x3f0
> > >        do_vfs_ioctl+0xbc/0x910
> > >        ksys_ioctl+0x78/0xa8
> > >        __arm64_sys_ioctl+0x1c/0x28
> > >        el0_svc_common.constprop.0+0x90/0x168
> > >        el0_svc_handler+0x28/0x78
> > >        el0_svc+0x8/0xc
> > >
> > > -> #0 (&shmem->pages_lock){+.+.}:
> > >        __lock_acquire+0xa2c/0x1d70
> > >        lock_acquire+0xdc/0x228
> > >        __mutex_lock+0x8c/0x800
> > >        mutex_lock_nested+0x1c/0x28
> > >        drm_gem_shmem_purge+0x20/0x40
> > >        panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
> > >        do_shrink_slab+0x208/0x500
> > >        shrink_slab+0x10c/0x2c0
> > >        shrink_node+0x28c/0x4d8
> > >        balance_pgdat+0x2c8/0x570
> > >        kswapd+0x22c/0x638
> > >        kthread+0x128/0x130
> > >        ret_from_fork+0x10/0x18
> > >
> > > other info that might help us debug this:
> > >
> > >  Possible unsafe locking scenario:
> > >
> > >        CPU0                    CPU1
> > >        ----                    ----
> > >   lock(fs_reclaim);
> > >                                lock(&shmem->pages_lock);
> > >                                lock(fs_reclaim);
> > >   lock(&shmem->pages_lock);
> > >
> > >  *** DEADLOCK ***
> > >
> > > 3 locks held by kswapd0/171:
> > >  #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> > >  #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
> > >  #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
> > >
> > > Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
> > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > Cc: Maxime Ripard <maxime.ripard@bootlin.com>
> > > Cc: Sean Paul <sean@poorly.run>
> > > Cc: David Airlie <airlied@linux.ie>
> > > Cc: Daniel Vetter <daniel@ffwll.ch>
> > > Signed-off-by: Rob Herring <robh@kernel.org>
> > > ---
> > >  drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
> > >  include/drm/drm_gem_shmem_helper.h     | 2 +-
> > >  2 files changed, 6 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > index 5423ec56b535..f5918707672f 100644
> > > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > > @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
> > >  }
> > >  EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
> > >
> > > -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> > > +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
> > >  {
> > >       struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
> > >
> > > -     mutex_lock(&shmem->pages_lock);
> > > +     if (!mutex_trylock(&shmem->pages_lock))
> >
> > Did you see my ping about cutting all the locking over to dma_resv?
> 
> Yes, but you didn't reply to Rob C. about it. I guess I'll have to go
> figure out how reservation objects work...

msm was the last driver that still used struct_mutex. It's a long-term
dead-end, and I think with all the effort recently to create helpers for
rendering drivers (shmem, vram, ttm refactoring) we should make a solid
attempt to get aligned. Or did you mean that Rob Clark had some
reply/questions that I didn' respond to because it fell through cracks?

> > Would
> > align shmem helpers with ttm a lot more, for that bright glorious future
> > taste. Should we capture that in some todo.rst entry?
> 
> Sure.

Cheers, Daniel
Rob Herring (Arm) Aug. 21, 2019, 4:03 p.m. UTC | #4
On Wed, Aug 21, 2019 at 3:23 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
> > On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > >
> > > On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
> > > > Lockdep reports a circular locking dependency with pages_lock taken in
> > > > the shrinker callback. The deadlock can't actually happen with current
> > > > users at least as a BO will never be purgeable when pages_lock is held.
> > > > To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> > > > already.

[...]

> > > > -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> > > > +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
> > > >  {
> > > >       struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
> > > >
> > > > -     mutex_lock(&shmem->pages_lock);
> > > > +     if (!mutex_trylock(&shmem->pages_lock))
> > >
> > > Did you see my ping about cutting all the locking over to dma_resv?
> >
> > Yes, but you didn't reply to Rob C. about it. I guess I'll have to go
> > figure out how reservation objects work...
>
> msm was the last driver that still used struct_mutex. It's a long-term
> dead-end, and I think with all the effort recently to create helpers for
> rendering drivers (shmem, vram, ttm refactoring) we should make a solid
> attempt to get aligned. Or did you mean that Rob Clark had some
> reply/questions that I didn' respond to because it fell through cracks?

I'm not using struct_mutex, so I'm confused as to why you keep
mentioning it. The list of BOs for the shrinker is protected with a
mutex for the list. That list head, list mutex, and the shrinker
instance all have to live at the driver level, so they can't be moved
into shmem as you suggested. Agreed?

Then there is the pages_lock within the shmem BO. I assume that is
what you are suggesting converting to dma_resv? I'm not really sure
what that would look like. You're going to have to spell it out for
me. In my brief look at it, it seems like added complexity and it's
not clear to me what that buys. Also, I think it would mostly be an
internal implementation detail of shmem helpers, though there is one
spot in panfrost that takes the lock (2 before this series). So it's
kind of orthogonal to this series.

Also, I think getting more drivers using shmem is more beneficial than
aligning the implementations of the GEM helpers. We should at least be
able to convert vgem and vkms I would think. Various KMS drivers too,
but there's an issue around kernel mappings (or lack of). There really
should be little reason for most KMS drivers to have a custom BO as
CMA or shmem helpers should work.

Rob
Daniel Vetter Aug. 21, 2019, 4:32 p.m. UTC | #5
On Wed, Aug 21, 2019 at 11:03:55AM -0500, Rob Herring wrote:
> On Wed, Aug 21, 2019 at 3:23 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Tue, Aug 20, 2019 at 07:35:47AM -0500, Rob Herring wrote:
> > > On Tue, Aug 20, 2019 at 4:05 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > > >
> > > > On Mon, Aug 19, 2019 at 11:12:02AM -0500, Rob Herring wrote:
> > > > > Lockdep reports a circular locking dependency with pages_lock taken in
> > > > > the shrinker callback. The deadlock can't actually happen with current
> > > > > users at least as a BO will never be purgeable when pages_lock is held.
> > > > > To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> > > > > already.
> 
> [...]
> 
> > > > > -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> > > > > +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
> > > > >  {
> > > > >       struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
> > > > >
> > > > > -     mutex_lock(&shmem->pages_lock);
> > > > > +     if (!mutex_trylock(&shmem->pages_lock))
> > > >
> > > > Did you see my ping about cutting all the locking over to dma_resv?
> > >
> > > Yes, but you didn't reply to Rob C. about it. I guess I'll have to go
> > > figure out how reservation objects work...
> >
> > msm was the last driver that still used struct_mutex. It's a long-term
> > dead-end, and I think with all the effort recently to create helpers for
> > rendering drivers (shmem, vram, ttm refactoring) we should make a solid
> > attempt to get aligned. Or did you mean that Rob Clark had some
> > reply/questions that I didn' respond to because it fell through cracks?
> 
> I'm not using struct_mutex, so I'm confused as to why you keep
> mentioning it. The list of BOs for the shrinker is protected with a
> mutex for the list. That list head, list mutex, and the shrinker
> instance all have to live at the driver level, so they can't be moved
> into shmem as you suggested. Agreed?

struct_mutex is just the historical baggage.

Wrt shrinker/lru, why not? We've talked about maybe moving that to make it
easier to share ...

> Then there is the pages_lock within the shmem BO. I assume that is
> what you are suggesting converting to dma_resv? I'm not really sure
> what that would look like. You're going to have to spell it out for
> me. In my brief look at it, it seems like added complexity and it's
> not clear to me what that buys. Also, I think it would mostly be an
> internal implementation detail of shmem helpers, though there is one
> spot in panfrost that takes the lock (2 before this series). So it's
> kind of orthogonal to this series.

The issue roughly is that having multiple per-bo locks gets fun, once you
add in multiple drivers and dynamic dma-buf sharing. Maybe that's never
going to be an issue for drivers using shmem helpers, but who knows. The
cross-driver per-bo lock to untangle that maze is dma_resv, and if you
then also have your own per-bo locks it can get rather interesting. Best
case you end up with two locks nesting, and your own per-bo lock being
fully redundant. Worst case you get different nesting depending whether
you import or export. So that's roughly the context.

Of course fixing locking is going to be easier the fewer users you have.
Once there's lots of code and users of it out there, it's pretty much
impossible.

So yeah it would be a 1:1 replacement with all the per-bo locks you have
now, and seeing how badly it bites.

> Also, I think getting more drivers using shmem is more beneficial than
> aligning the implementations of the GEM helpers. We should at least be
> able to convert vgem and vkms I would think. Various KMS drivers too,
> but there's an issue around kernel mappings (or lack of). There really
> should be little reason for most KMS drivers to have a custom BO as
> CMA or shmem helpers should work.

Yeah agreed on this, I just want to make sure we're not doing this
multiple times ...
-Daniel
Steven Price Aug. 22, 2019, 1:28 p.m. UTC | #6
On 19/08/2019 17:12, Rob Herring wrote:
> Lockdep reports a circular locking dependency with pages_lock taken in
> the shrinker callback. The deadlock can't actually happen with current
> users at least as a BO will never be purgeable when pages_lock is held.
> To be safe, let's use mutex_trylock() instead and bail if a BO is locked
> already.
> 
> WARNING: possible circular locking dependency detected
> 5.3.0-rc1+ #100 Tainted: G             L
> ------------------------------------------------------
> kswapd0/171 is trying to acquire lock:
> 000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40
> 
> but task is already holding lock:
> 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #1 (fs_reclaim){+.+.}:
>        fs_reclaim_acquire.part.18+0x34/0x40
>        fs_reclaim_acquire+0x20/0x28
>        __kmalloc_node+0x6c/0x4c0
>        kvmalloc_node+0x38/0xa8
>        drm_gem_get_pages+0x80/0x1d0
>        drm_gem_shmem_get_pages+0x58/0xa0
>        drm_gem_shmem_get_pages_sgt+0x48/0xd0
>        panfrost_mmu_map+0x38/0xf8 [panfrost]
>        panfrost_gem_open+0xc0/0xe8 [panfrost]
>        drm_gem_handle_create_tail+0xe8/0x198
>        drm_gem_handle_create+0x3c/0x50
>        panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
>        panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
>        drm_ioctl_kernel+0xb8/0x110
>        drm_ioctl+0x244/0x3f0
>        do_vfs_ioctl+0xbc/0x910
>        ksys_ioctl+0x78/0xa8
>        __arm64_sys_ioctl+0x1c/0x28
>        el0_svc_common.constprop.0+0x90/0x168
>        el0_svc_handler+0x28/0x78
>        el0_svc+0x8/0xc
> 
> -> #0 (&shmem->pages_lock){+.+.}:
>        __lock_acquire+0xa2c/0x1d70
>        lock_acquire+0xdc/0x228
>        __mutex_lock+0x8c/0x800
>        mutex_lock_nested+0x1c/0x28
>        drm_gem_shmem_purge+0x20/0x40
>        panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
>        do_shrink_slab+0x208/0x500
>        shrink_slab+0x10c/0x2c0
>        shrink_node+0x28c/0x4d8
>        balance_pgdat+0x2c8/0x570
>        kswapd+0x22c/0x638
>        kthread+0x128/0x130
>        ret_from_fork+0x10/0x18
> 
> other info that might help us debug this:
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(fs_reclaim);
>                                lock(&shmem->pages_lock);
>                                lock(fs_reclaim);
>   lock(&shmem->pages_lock);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by kswapd0/171:
>  #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
>  #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
>  #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]
> 
> Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Maxime Ripard <maxime.ripard@bootlin.com>
> Cc: Sean Paul <sean@poorly.run>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Rob Herring <robh@kernel.org>

Seems reasonable, like you state I don't think this can actually happen,
but keeping lockdep happy is a good idea.

Reviewed-by: Steven Price <steven.price@arm.com>

Steve

> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
>  include/drm/drm_gem_shmem_helper.h     | 2 +-
>  2 files changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index 5423ec56b535..f5918707672f 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
>  }
>  EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
>  
> -void drm_gem_shmem_purge(struct drm_gem_object *obj)
> +bool drm_gem_shmem_purge(struct drm_gem_object *obj)
>  {
>  	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
>  
> -	mutex_lock(&shmem->pages_lock);
> +	if (!mutex_trylock(&shmem->pages_lock))
> +		return false;
>  	drm_gem_shmem_purge_locked(obj);
>  	mutex_unlock(&shmem->pages_lock);
> +
> +	return true;
>  }
>  EXPORT_SYMBOL(drm_gem_shmem_purge);
>  
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index ce1600fdfc3e..01f514521687 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
>  }
>  
>  void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
> -void drm_gem_shmem_purge(struct drm_gem_object *obj);
> +bool drm_gem_shmem_purge(struct drm_gem_object *obj);
>  
>  struct drm_gem_shmem_object *
>  drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 5423ec56b535..f5918707672f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -415,13 +415,16 @@  void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
 
-void drm_gem_shmem_purge(struct drm_gem_object *obj)
+bool drm_gem_shmem_purge(struct drm_gem_object *obj)
 {
 	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
 
-	mutex_lock(&shmem->pages_lock);
+	if (!mutex_trylock(&shmem->pages_lock))
+		return false;
 	drm_gem_shmem_purge_locked(obj);
 	mutex_unlock(&shmem->pages_lock);
+
+	return true;
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge);
 
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index ce1600fdfc3e..01f514521687 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -134,7 +134,7 @@  static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
 }
 
 void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
-void drm_gem_shmem_purge(struct drm_gem_object *obj);
+bool drm_gem_shmem_purge(struct drm_gem_object *obj);
 
 struct drm_gem_shmem_object *
 drm_gem_shmem_create_with_handle(struct drm_file *file_priv,