diff mbox series

ovl: use a dedicated semaphore for dir upperfile caching

Message ID 20210101201230.768653-1-icenowy@aosc.io
State New
Headers show
Series ovl: use a dedicated semaphore for dir upperfile caching | expand

Commit Message

Icenowy Zheng Jan. 1, 2021, 8:12 p.m. UTC
The function ovl_dir_real_file() currently uses the semaphore of the
inode to synchronize write to the upperfile cache field.

However, this function will get called by ovl_ioctl_set_flags(), which
utilizes the inode semaphore too. In this case ovl_dir_real_file() will
try to claim a lock that is owned by a function in its call stack, which
won't get released before ovl_dir_real_file() returns.

Define a dedicated semaphore for the upperfile cache, so that the
deadlock won't happen.

Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and FS[S|G]ETXATTR ioctls for directories")
Cc: stable@vger.kernel.org # v5.10
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
---
 fs/overlayfs/readdir.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

Comments

Amir Goldstein Jan. 3, 2021, 2:10 p.m. UTC | #1
On Fri, Jan 1, 2021 at 10:12 PM Icenowy Zheng <icenowy@aosc.io> wrote:
>

> The function ovl_dir_real_file() currently uses the semaphore of the

> inode to synchronize write to the upperfile cache field.

>

> However, this function will get called by ovl_ioctl_set_flags(), which

> utilizes the inode semaphore too. In this case ovl_dir_real_file() will

> try to claim a lock that is owned by a function in its call stack, which

> won't get released before ovl_dir_real_file() returns.


oops. I wondered why I didn't see any warnings on this from lockdep.
Ah! because the xfstest that exercises ovl_ioctl_set_flags() on directory,
generic/079, starts with an already upper dir.

And the xfstest that checks chattr+i on lower/upper files, overlay/040,
does not check chattr on dirs (ioctl on overlay dirs wasn't supported at
the time the test was written).

Would you be able to create a variant of test overlay/040 that also tests
chattr +i on lower/upper dirs to test your patch and confirm that the test
fails on master with the appropriate Kconfig debug options.

>

> Define a dedicated semaphore for the upperfile cache, so that the

> deadlock won't happen.

>

> Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and FS[S|G]ETXATTR ioctls for directories")

> Cc: stable@vger.kernel.org # v5.10

> Signed-off-by: Icenowy Zheng <icenowy@aosc.io>

> ---

>  fs/overlayfs/readdir.c | 6 ++++--

>  1 file changed, 4 insertions(+), 2 deletions(-)

>

> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

> index 01620ebae1bd..f10701aabb71 100644

> --- a/fs/overlayfs/readdir.c

> +++ b/fs/overlayfs/readdir.c

> @@ -56,6 +56,7 @@ struct ovl_dir_file {

>         struct list_head *cursor;

>         struct file *realfile;

>         struct file *upperfile;

> +       struct semaphore upperfile_sem;


mutex please

>  };

>

>  static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)

> @@ -883,7 +884,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)

>                         ovl_path_upper(dentry, &upperpath);

>                         realfile = ovl_dir_open_realfile(file, &upperpath);

>

> -                       inode_lock(inode);

> +                       down(&od->upperfile_sem);

>                         if (!od->upperfile) {

>                                 if (IS_ERR(realfile)) {

>                                         inode_unlock(inode);


You missed this unlock

Thanks,
Amir.
Icenowy Zheng Jan. 4, 2021, 7:28 a.m. UTC | #2
在 2021-01-03星期日的 16:10 +0200,Amir Goldstein写道:
> On Fri, Jan 1, 2021 at 10:12 PM Icenowy Zheng <icenowy@aosc.io>

> wrote:

> > 

> > The function ovl_dir_real_file() currently uses the semaphore of

> > the

> > inode to synchronize write to the upperfile cache field.

> > 

> > However, this function will get called by ovl_ioctl_set_flags(),

> > which

> > utilizes the inode semaphore too. In this case ovl_dir_real_file()

> > will

> > try to claim a lock that is owned by a function in its call stack,

> > which

> > won't get released before ovl_dir_real_file() returns.

> 

> oops. I wondered why I didn't see any warnings on this from lockdep.

> Ah! because the xfstest that exercises ovl_ioctl_set_flags() on

> directory,

> generic/079, starts with an already upper dir.

> 

> And the xfstest that checks chattr+i on lower/upper files,

> overlay/040,

> does not check chattr on dirs (ioctl on overlay dirs wasn't supported

> at

> the time the test was written).

> 

> Would you be able to create a variant of test overlay/040 that also

> tests

> chattr +i on lower/upper dirs to test your patch and confirm that the

> test

> fails on master with the appropriate Kconfig debug options.


https://gist.github.com/Icenowy/c7d8decb6812d6e5064d143c57281ad3

Here's a test that would break on master (I used linux-next/master for
test).

[  246.521880] INFO: task chattr:715 blocked for more than 122 seconds.
[  246.525659]       Not tainted 5.11.0-rc1-next-20210104+ #20
[  246.528498] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  246.535076] task:chattr          state:D stack:13736 pid:  715 ppid:
529 flags:0x00000000
[  246.538923] Call Trace:
[  246.540241]  __schedule+0x2a9/0x820
[  246.541986]  schedule+0x56/0xc0
[  246.543616]  rwsem_down_write_slowpath+0x375/0x630
[  246.545565]  ovl_dir_real_file+0xc1/0x120
[  246.547512]  ovl_real_fdget+0x35/0x80
[  246.549303]  ovl_real_ioctl+0x26/0x90
[  246.551050]  ? mnt_drop_write+0x2c/0x70
[  246.553068]  ovl_ioctl_set_flags+0x93/0x110
[  246.555407]  __x64_sys_ioctl+0x7e/0xb0
[  246.557175]  do_syscall_64+0x33/0x40
[  246.558869]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  246.561057] RIP: 0033:0x7fe4a3830b67
[  246.565799] RSP: 002b:00007ffe7ad504f8 EFLAGS: 00000246 ORIG_RAX:
0000000000000010
[  246.569438] RAX: ffffffffffffffda RBX: 0000000000000001 RCX:
00007fe4a3830b67
[  246.572061] RDX: 00007ffe7ad5050c RSI: 0000000040086602 RDI:
0000000000000003
[  246.575509] RBP: 0000000000000003 R08: 0000000000000001 R09:
0000000000000000
[  246.578932] R10: 0000000000000000 R11: 0000000000000246 R12:
0000000000000010
[  246.581014] R13: 00007ffe7ad50810 R14: 0000000000000002 R15:
0000000000000001
[  246.582818] 
[  246.582818] Showing all locks held in the system:
[  246.584741] 1 lock held by khungtaskd/18:
[  246.586085]  #0: ffffffff9e951540 (rcu_read_lock){....}-{1:2}, at:
debug_show_all_locks+0x15/0x100
[  246.589775] 3 locks held by chattr/715:
[  246.591364]  #0: ffff96a74b92c450 (sb_writers#11){....}-{0:0}, at:
ovl_ioctl_set_flags+0x2f/0x110
[  246.597182]  #1: ffff96a7489c3500
(&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:
ovl_ioctl_set_flags+0x54/0x110
[  246.601325]  #2: ffff96a7489c3500
(&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:
ovl_dir_real_file+0xc1/0x120

> 

> > 

> > Define a dedicated semaphore for the upperfile cache, so that the

> > deadlock won't happen.

> > 

> > Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and FS[S|G]ETXATTR

> > ioctls for directories")

> > Cc: stable@vger.kernel.org # v5.10

> > Signed-off-by: Icenowy Zheng <icenowy@aosc.io>

> > ---

> >  fs/overlayfs/readdir.c | 6 ++++--

> >  1 file changed, 4 insertions(+), 2 deletions(-)

> > 

> > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

> > index 01620ebae1bd..f10701aabb71 100644

> > --- a/fs/overlayfs/readdir.c

> > +++ b/fs/overlayfs/readdir.c

> > @@ -56,6 +56,7 @@ struct ovl_dir_file {

> >         struct list_head *cursor;

> >         struct file *realfile;

> >         struct file *upperfile;

> > +       struct semaphore upperfile_sem;

> 

> mutex please

> 

> >  };

> > 

> >  static struct ovl_cache_entry *ovl_cache_entry_from_node(struct

> > rb_node *n)

> > @@ -883,7 +884,7 @@ struct file *ovl_dir_real_file(const struct

> > file *file, bool want_upper)

> >                         ovl_path_upper(dentry, &upperpath);

> >                         realfile = ovl_dir_open_realfile(file,

> > &upperpath);

> > 

> > -                       inode_lock(inode);

> > +                       down(&od->upperfile_sem);

> >                         if (!od->upperfile) {

> >                                 if (IS_ERR(realfile)) {

> >                                         inode_unlock(inode);

> 

> You missed this unlock


Thanks, will send v2 now.

> 

> Thanks,

> Amir.
Amir Goldstein Jan. 4, 2021, 8:35 a.m. UTC | #3
On Mon, Jan 4, 2021 at 9:28 AM Icenowy Zheng <icenowy@aosc.io> wrote:
>

> 在 2021-01-03星期日的 16:10 +0200,Amir Goldstein写道:

> > On Fri, Jan 1, 2021 at 10:12 PM Icenowy Zheng <icenowy@aosc.io>

> > wrote:

> > >

> > > The function ovl_dir_real_file() currently uses the semaphore of

> > > the

> > > inode to synchronize write to the upperfile cache field.

> > >

> > > However, this function will get called by ovl_ioctl_set_flags(),

> > > which

> > > utilizes the inode semaphore too. In this case ovl_dir_real_file()

> > > will

> > > try to claim a lock that is owned by a function in its call stack,

> > > which

> > > won't get released before ovl_dir_real_file() returns.

> >

> > oops. I wondered why I didn't see any warnings on this from lockdep.

> > Ah! because the xfstest that exercises ovl_ioctl_set_flags() on

> > directory,

> > generic/079, starts with an already upper dir.

> >

> > And the xfstest that checks chattr+i on lower/upper files,

> > overlay/040,

> > does not check chattr on dirs (ioctl on overlay dirs wasn't supported

> > at

> > the time the test was written).

> >

> > Would you be able to create a variant of test overlay/040 that also

> > tests

> > chattr +i on lower/upper dirs to test your patch and confirm that the

> > test

> > fails on master with the appropriate Kconfig debug options.

>

> https://gist.github.com/Icenowy/c7d8decb6812d6e5064d143c57281ad3

>

> Here's a test that would break on master (I used linux-next/master for

> test).


Thanks.
I am working on another test to improve overlay/030 that may also
cover this bug, so maybe no need for both tests. I'll let you know when
I'm done.
If you like, I can post your test for you with your Signed-of-by if I think
it is also needed.

>

> [  246.521880] INFO: task chattr:715 blocked for more than 122 seconds.

> [  246.525659]       Not tainted 5.11.0-rc1-next-20210104+ #20

> [  246.528498] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"

> disables this message.

> [  246.535076] task:chattr          state:D stack:13736 pid:  715 ppid:

> 529 flags:0x00000000

> [  246.538923] Call Trace:

> [  246.540241]  __schedule+0x2a9/0x820

> [  246.541986]  schedule+0x56/0xc0

> [  246.543616]  rwsem_down_write_slowpath+0x375/0x630

> [  246.545565]  ovl_dir_real_file+0xc1/0x120

> [  246.547512]  ovl_real_fdget+0x35/0x80

> [  246.549303]  ovl_real_ioctl+0x26/0x90

> [  246.551050]  ? mnt_drop_write+0x2c/0x70

> [  246.553068]  ovl_ioctl_set_flags+0x93/0x110

> [  246.555407]  __x64_sys_ioctl+0x7e/0xb0

> [  246.557175]  do_syscall_64+0x33/0x40

> [  246.558869]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

> [  246.561057] RIP: 0033:0x7fe4a3830b67

> [  246.565799] RSP: 002b:00007ffe7ad504f8 EFLAGS: 00000246 ORIG_RAX:

> 0000000000000010

> [  246.569438] RAX: ffffffffffffffda RBX: 0000000000000001 RCX:

> 00007fe4a3830b67

> [  246.572061] RDX: 00007ffe7ad5050c RSI: 0000000040086602 RDI:

> 0000000000000003

> [  246.575509] RBP: 0000000000000003 R08: 0000000000000001 R09:

> 0000000000000000

> [  246.578932] R10: 0000000000000000 R11: 0000000000000246 R12:

> 0000000000000010

> [  246.581014] R13: 00007ffe7ad50810 R14: 0000000000000002 R15:

> 0000000000000001

> [  246.582818]

> [  246.582818] Showing all locks held in the system:

> [  246.584741] 1 lock held by khungtaskd/18:

> [  246.586085]  #0: ffffffff9e951540 (rcu_read_lock){....}-{1:2}, at:

> debug_show_all_locks+0x15/0x100

> [  246.589775] 3 locks held by chattr/715:

> [  246.591364]  #0: ffff96a74b92c450 (sb_writers#11){....}-{0:0}, at:

> ovl_ioctl_set_flags+0x2f/0x110

> [  246.597182]  #1: ffff96a7489c3500

> (&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:

> ovl_ioctl_set_flags+0x54/0x110

> [  246.601325]  #2: ffff96a7489c3500

> (&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:

> ovl_dir_real_file+0xc1/0x120

>

> >

> > >

> > > Define a dedicated semaphore for the upperfile cache, so that the

> > > deadlock won't happen.

> > >

> > > Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and FS[S|G]ETXATTR

> > > ioctls for directories")

> > > Cc: stable@vger.kernel.org # v5.10

> > > Signed-off-by: Icenowy Zheng <icenowy@aosc.io>

> > > ---

> > >  fs/overlayfs/readdir.c | 6 ++++--

> > >  1 file changed, 4 insertions(+), 2 deletions(-)

> > >

> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

> > > index 01620ebae1bd..f10701aabb71 100644

> > > --- a/fs/overlayfs/readdir.c

> > > +++ b/fs/overlayfs/readdir.c

> > > @@ -56,6 +56,7 @@ struct ovl_dir_file {

> > >         struct list_head *cursor;

> > >         struct file *realfile;

> > >         struct file *upperfile;

> > > +       struct semaphore upperfile_sem;

> >

> > mutex please

> >


You missed this comment.
semaphore is discouraged as a locking primitive.
Please use struct mutex.

Thanks,
Amir.
Icenowy Zheng Jan. 4, 2021, 8:36 a.m. UTC | #4
于 2021年1月4日 GMT+08:00 下午4:35:20, Amir Goldstein <amir73il@gmail.com> 写到:
>On Mon, Jan 4, 2021 at 9:28 AM Icenowy Zheng <icenowy@aosc.io> wrote:

>>

>> 在 2021-01-03星期日的 16:10 +0200,Amir Goldstein写道:

>> > On Fri, Jan 1, 2021 at 10:12 PM Icenowy Zheng <icenowy@aosc.io>

>> > wrote:

>> > >

>> > > The function ovl_dir_real_file() currently uses the semaphore of

>> > > the

>> > > inode to synchronize write to the upperfile cache field.

>> > >

>> > > However, this function will get called by ovl_ioctl_set_flags(),

>> > > which

>> > > utilizes the inode semaphore too. In this case

>ovl_dir_real_file()

>> > > will

>> > > try to claim a lock that is owned by a function in its call

>stack,

>> > > which

>> > > won't get released before ovl_dir_real_file() returns.

>> >

>> > oops. I wondered why I didn't see any warnings on this from

>lockdep.

>> > Ah! because the xfstest that exercises ovl_ioctl_set_flags() on

>> > directory,

>> > generic/079, starts with an already upper dir.

>> >

>> > And the xfstest that checks chattr+i on lower/upper files,

>> > overlay/040,

>> > does not check chattr on dirs (ioctl on overlay dirs wasn't

>supported

>> > at

>> > the time the test was written).

>> >

>> > Would you be able to create a variant of test overlay/040 that also

>> > tests

>> > chattr +i on lower/upper dirs to test your patch and confirm that

>the

>> > test

>> > fails on master with the appropriate Kconfig debug options.

>>

>> https://gist.github.com/Icenowy/c7d8decb6812d6e5064d143c57281ad3

>>

>> Here's a test that would break on master (I used linux-next/master

>for

>> test).

>

>Thanks.

>I am working on another test to improve overlay/030 that may also

>cover this bug, so maybe no need for both tests. I'll let you know when

>I'm done.

>If you like, I can post your test for you with your Signed-of-by if I

>think

>it is also needed.

>

>>

>> [  246.521880] INFO: task chattr:715 blocked for more than 122

>seconds.

>> [  246.525659]       Not tainted 5.11.0-rc1-next-20210104+ #20

>> [  246.528498] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"

>> disables this message.

>> [  246.535076] task:chattr          state:D stack:13736 pid:  715

>ppid:

>> 529 flags:0x00000000

>> [  246.538923] Call Trace:

>> [  246.540241]  __schedule+0x2a9/0x820

>> [  246.541986]  schedule+0x56/0xc0

>> [  246.543616]  rwsem_down_write_slowpath+0x375/0x630

>> [  246.545565]  ovl_dir_real_file+0xc1/0x120

>> [  246.547512]  ovl_real_fdget+0x35/0x80

>> [  246.549303]  ovl_real_ioctl+0x26/0x90

>> [  246.551050]  ? mnt_drop_write+0x2c/0x70

>> [  246.553068]  ovl_ioctl_set_flags+0x93/0x110

>> [  246.555407]  __x64_sys_ioctl+0x7e/0xb0

>> [  246.557175]  do_syscall_64+0x33/0x40

>> [  246.558869]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

>> [  246.561057] RIP: 0033:0x7fe4a3830b67

>> [  246.565799] RSP: 002b:00007ffe7ad504f8 EFLAGS: 00000246 ORIG_RAX:

>> 0000000000000010

>> [  246.569438] RAX: ffffffffffffffda RBX: 0000000000000001 RCX:

>> 00007fe4a3830b67

>> [  246.572061] RDX: 00007ffe7ad5050c RSI: 0000000040086602 RDI:

>> 0000000000000003

>> [  246.575509] RBP: 0000000000000003 R08: 0000000000000001 R09:

>> 0000000000000000

>> [  246.578932] R10: 0000000000000000 R11: 0000000000000246 R12:

>> 0000000000000010

>> [  246.581014] R13: 00007ffe7ad50810 R14: 0000000000000002 R15:

>> 0000000000000001

>> [  246.582818]

>> [  246.582818] Showing all locks held in the system:

>> [  246.584741] 1 lock held by khungtaskd/18:

>> [  246.586085]  #0: ffffffff9e951540 (rcu_read_lock){....}-{1:2}, at:

>> debug_show_all_locks+0x15/0x100

>> [  246.589775] 3 locks held by chattr/715:

>> [  246.591364]  #0: ffff96a74b92c450 (sb_writers#11){....}-{0:0}, at:

>> ovl_ioctl_set_flags+0x2f/0x110

>> [  246.597182]  #1: ffff96a7489c3500

>> (&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:

>> ovl_ioctl_set_flags+0x54/0x110

>> [  246.601325]  #2: ffff96a7489c3500

>> (&ovl_i_mutex_dir_key[depth]){....}-{3:3}, at:

>> ovl_dir_real_file+0xc1/0x120

>>

>> >

>> > >

>> > > Define a dedicated semaphore for the upperfile cache, so that the

>> > > deadlock won't happen.

>> > >

>> > > Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and

>FS[S|G]ETXATTR

>> > > ioctls for directories")

>> > > Cc: stable@vger.kernel.org # v5.10

>> > > Signed-off-by: Icenowy Zheng <icenowy@aosc.io>

>> > > ---

>> > >  fs/overlayfs/readdir.c | 6 ++++--

>> > >  1 file changed, 4 insertions(+), 2 deletions(-)

>> > >

>> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

>> > > index 01620ebae1bd..f10701aabb71 100644

>> > > --- a/fs/overlayfs/readdir.c

>> > > +++ b/fs/overlayfs/readdir.c

>> > > @@ -56,6 +56,7 @@ struct ovl_dir_file {

>> > >         struct list_head *cursor;

>> > >         struct file *realfile;

>> > >         struct file *upperfile;

>> > > +       struct semaphore upperfile_sem;

>> >

>> > mutex please

>> >

>

>You missed this comment.

>semaphore is discouraged as a locking primitive.

>Please use struct mutex.


Okay, sorry.

I will check it out.

>

>Thanks,

>Amir.
diff mbox series

Patch

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 01620ebae1bd..f10701aabb71 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -56,6 +56,7 @@  struct ovl_dir_file {
 	struct list_head *cursor;
 	struct file *realfile;
 	struct file *upperfile;
+	struct semaphore upperfile_sem;
 };
 
 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
@@ -883,7 +884,7 @@  struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
 			ovl_path_upper(dentry, &upperpath);
 			realfile = ovl_dir_open_realfile(file, &upperpath);
 
-			inode_lock(inode);
+			down(&od->upperfile_sem);
 			if (!od->upperfile) {
 				if (IS_ERR(realfile)) {
 					inode_unlock(inode);
@@ -896,7 +897,7 @@  struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
 					fput(realfile);
 				realfile = od->upperfile;
 			}
-			inode_unlock(inode);
+			up(&od->upperfile_sem);
 		}
 	}
 
@@ -959,6 +960,7 @@  static int ovl_dir_open(struct inode *inode, struct file *file)
 	od->realfile = realfile;
 	od->is_real = ovl_dir_is_real(file->f_path.dentry);
 	od->is_upper = OVL_TYPE_UPPER(type);
+	sema_init(&od->upperfile_sem, 1);
 	file->private_data = od;
 
 	return 0;