diff mbox series

ceph: switch to use cap_delay_lock for the unlink delay list

Message ID 20240409064943.1377026-1-xiubli@redhat.com
State New
Headers show
Series ceph: switch to use cap_delay_lock for the unlink delay list | expand

Commit Message

Xiubo Li April 9, 2024, 6:49 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

The same list item will be used in both cap_delay_list and
cap_unlink_delay_list, so it's buggy to use two different locks
to protect them.

Reported-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de>
Fixes: dbc347ef7f0c ("ceph: add ceph_cap_unlink_work to fire check_caps() immediately")
URL: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/AODC76VXRAMXKLFDCTK4TKFDDPWUSCN5
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 4 ++--
 fs/ceph/mds_client.c | 9 ++++-----
 fs/ceph/mds_client.h | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

Comments

Ilya Dryomov April 11, 2024, 5:20 p.m. UTC | #1
On Tue, Apr 9, 2024 at 8:52 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> The same list item will be used in both cap_delay_list and
> cap_unlink_delay_list, so it's buggy to use two different locks
> to protect them.
>
> Reported-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de>
> Fixes: dbc347ef7f0c ("ceph: add ceph_cap_unlink_work to fire check_caps() immediately")
> URL: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/AODC76VXRAMXKLFDCTK4TKFDDPWUSCN5
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 4 ++--
>  fs/ceph/mds_client.c | 9 ++++-----
>  fs/ceph/mds_client.h | 3 +--
>  3 files changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index bfa8f72cd3cf..197cb383f829 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -4799,13 +4799,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
>
>                         doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
>                               ceph_vinop(inode));
> -                       spin_lock(&mdsc->cap_unlink_delay_lock);
> +                       spin_lock(&mdsc->cap_delay_lock);
>                         ci->i_ceph_flags |= CEPH_I_FLUSH;
>                         if (!list_empty(&ci->i_cap_delay_list))
>                                 list_del_init(&ci->i_cap_delay_list);
>                         list_add_tail(&ci->i_cap_delay_list,
>                                       &mdsc->cap_unlink_delay_list);
> -                       spin_unlock(&mdsc->cap_unlink_delay_lock);
> +                       spin_unlock(&mdsc->cap_delay_lock);
>
>                         /*
>                          * Fire the work immediately, because the MDS maybe
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 639bde44ab23..494d80b3e41d 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2532,7 +2532,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
>         struct ceph_client *cl = mdsc->fsc->client;
>
>         doutc(cl, "begin\n");
> -       spin_lock(&mdsc->cap_unlink_delay_lock);
> +       spin_lock(&mdsc->cap_delay_lock);
>         while (!list_empty(&mdsc->cap_unlink_delay_list)) {
>                 struct ceph_inode_info *ci;
>                 struct inode *inode;
> @@ -2544,15 +2544,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
>
>                 inode = igrab(&ci->netfs.inode);
>                 if (inode) {
> -                       spin_unlock(&mdsc->cap_unlink_delay_lock);
> +                       spin_unlock(&mdsc->cap_delay_lock);
>                         doutc(cl, "on %p %llx.%llx\n", inode,
>                               ceph_vinop(inode));
>                         ceph_check_caps(ci, CHECK_CAPS_FLUSH);
>                         iput(inode);
> -                       spin_lock(&mdsc->cap_unlink_delay_lock);
> +                       spin_lock(&mdsc->cap_delay_lock);
>                 }
>         }
> -       spin_unlock(&mdsc->cap_unlink_delay_lock);
> +       spin_unlock(&mdsc->cap_delay_lock);
>         doutc(cl, "done\n");
>  }
>
> @@ -5538,7 +5538,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>         INIT_LIST_HEAD(&mdsc->cap_wait_list);
>         spin_lock_init(&mdsc->cap_delay_lock);
>         INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
> -       spin_lock_init(&mdsc->cap_unlink_delay_lock);
>         INIT_LIST_HEAD(&mdsc->snap_flush_list);
>         spin_lock_init(&mdsc->snap_flush_lock);
>         mdsc->last_cap_flush_tid = 1;
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 317a0fd6a8ba..80b310e33f2b 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -481,9 +481,8 @@ struct ceph_mds_client {
>         struct delayed_work    delayed_work;  /* delayed work */
>         unsigned long    last_renew_caps;  /* last time we renewed our caps */
>         struct list_head cap_delay_list;   /* caps with delayed release */
> -       spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
>         struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
> -       spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
> +       spinlock_t       cap_delay_lock;   /* protects cap_delay_list and cap_unlink_delay_list */
>         struct list_head snap_flush_list;  /* cap_snaps ready to flush */
>         spinlock_t       snap_flush_lock;
>
> --
> 2.43.0
>

LGTM, queued up for 6.9-rc.

Thanks,

                Ilya
Marc Ruhmann April 11, 2024, 8:28 p.m. UTC | #2
Am 09.04.24 um 08:49 schrieb xiubli@redhat.com:
> From: Xiubo Li <xiubli@redhat.com>
> 
> The same list item will be used in both cap_delay_list and
> cap_unlink_delay_list, so it's buggy to use two different locks
> to protect them.
> 
> Reported-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de>
> Fixes: dbc347ef7f0c ("ceph: add ceph_cap_unlink_work to fire check_caps() immediately")
> URL: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/AODC76VXRAMXKLFDCTK4TKFDDPWUSCN5
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/caps.c       | 4 ++--
>   fs/ceph/mds_client.c | 9 ++++-----
>   fs/ceph/mds_client.h | 3 +--
>   3 files changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index bfa8f72cd3cf..197cb383f829 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -4799,13 +4799,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
>   
>   			doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
>   			      ceph_vinop(inode));
> -			spin_lock(&mdsc->cap_unlink_delay_lock);
> +			spin_lock(&mdsc->cap_delay_lock);
>   			ci->i_ceph_flags |= CEPH_I_FLUSH;
>   			if (!list_empty(&ci->i_cap_delay_list))
>   				list_del_init(&ci->i_cap_delay_list);
>   			list_add_tail(&ci->i_cap_delay_list,
>   				      &mdsc->cap_unlink_delay_list);
> -			spin_unlock(&mdsc->cap_unlink_delay_lock);
> +			spin_unlock(&mdsc->cap_delay_lock);
>   
>   			/*
>   			 * Fire the work immediately, because the MDS maybe
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 639bde44ab23..494d80b3e41d 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2532,7 +2532,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
>   	struct ceph_client *cl = mdsc->fsc->client;
>   
>   	doutc(cl, "begin\n");
> -	spin_lock(&mdsc->cap_unlink_delay_lock);
> +	spin_lock(&mdsc->cap_delay_lock);
>   	while (!list_empty(&mdsc->cap_unlink_delay_list)) {
>   		struct ceph_inode_info *ci;
>   		struct inode *inode;
> @@ -2544,15 +2544,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
>   
>   		inode = igrab(&ci->netfs.inode);
>   		if (inode) {
> -			spin_unlock(&mdsc->cap_unlink_delay_lock);
> +			spin_unlock(&mdsc->cap_delay_lock);
>   			doutc(cl, "on %p %llx.%llx\n", inode,
>   			      ceph_vinop(inode));
>   			ceph_check_caps(ci, CHECK_CAPS_FLUSH);
>   			iput(inode);
> -			spin_lock(&mdsc->cap_unlink_delay_lock);
> +			spin_lock(&mdsc->cap_delay_lock);
>   		}
>   	}
> -	spin_unlock(&mdsc->cap_unlink_delay_lock);
> +	spin_unlock(&mdsc->cap_delay_lock);
>   	doutc(cl, "done\n");
>   }
>   
> @@ -5538,7 +5538,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>   	INIT_LIST_HEAD(&mdsc->cap_wait_list);
>   	spin_lock_init(&mdsc->cap_delay_lock);
>   	INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
> -	spin_lock_init(&mdsc->cap_unlink_delay_lock);
>   	INIT_LIST_HEAD(&mdsc->snap_flush_list);
>   	spin_lock_init(&mdsc->snap_flush_lock);
>   	mdsc->last_cap_flush_tid = 1;
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 317a0fd6a8ba..80b310e33f2b 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -481,9 +481,8 @@ struct ceph_mds_client {
>   	struct delayed_work    delayed_work;  /* delayed work */
>   	unsigned long    last_renew_caps;  /* last time we renewed our caps */
>   	struct list_head cap_delay_list;   /* caps with delayed release */
> -	spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
>   	struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
> -	spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
> +	spinlock_t       cap_delay_lock;   /* protects cap_delay_list and cap_unlink_delay_list */
>   	struct list_head snap_flush_list;  /* cap_snaps ready to flush */
>   	spinlock_t       snap_flush_lock;
>   

Tested-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de>
Reviewed-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de>

Patch looks sound. Using different locks when operating on two lists
containing the same item can cause conflicts.

Changes tested on 5.14.0-435.el9. Without this patch we observed about
10 crashes a day, the majority of which related to corrupted lists when
dropping caps for unlink. With the patch applied not a single crash
occurred in 1.5 days.

Thanks and best regards,

Marc
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index bfa8f72cd3cf..197cb383f829 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4799,13 +4799,13 @@  int ceph_drop_caps_for_unlink(struct inode *inode)
 
 			doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
 			      ceph_vinop(inode));
-			spin_lock(&mdsc->cap_unlink_delay_lock);
+			spin_lock(&mdsc->cap_delay_lock);
 			ci->i_ceph_flags |= CEPH_I_FLUSH;
 			if (!list_empty(&ci->i_cap_delay_list))
 				list_del_init(&ci->i_cap_delay_list);
 			list_add_tail(&ci->i_cap_delay_list,
 				      &mdsc->cap_unlink_delay_list);
-			spin_unlock(&mdsc->cap_unlink_delay_lock);
+			spin_unlock(&mdsc->cap_delay_lock);
 
 			/*
 			 * Fire the work immediately, because the MDS maybe
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 639bde44ab23..494d80b3e41d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2532,7 +2532,7 @@  static void ceph_cap_unlink_work(struct work_struct *work)
 	struct ceph_client *cl = mdsc->fsc->client;
 
 	doutc(cl, "begin\n");
-	spin_lock(&mdsc->cap_unlink_delay_lock);
+	spin_lock(&mdsc->cap_delay_lock);
 	while (!list_empty(&mdsc->cap_unlink_delay_list)) {
 		struct ceph_inode_info *ci;
 		struct inode *inode;
@@ -2544,15 +2544,15 @@  static void ceph_cap_unlink_work(struct work_struct *work)
 
 		inode = igrab(&ci->netfs.inode);
 		if (inode) {
-			spin_unlock(&mdsc->cap_unlink_delay_lock);
+			spin_unlock(&mdsc->cap_delay_lock);
 			doutc(cl, "on %p %llx.%llx\n", inode,
 			      ceph_vinop(inode));
 			ceph_check_caps(ci, CHECK_CAPS_FLUSH);
 			iput(inode);
-			spin_lock(&mdsc->cap_unlink_delay_lock);
+			spin_lock(&mdsc->cap_delay_lock);
 		}
 	}
-	spin_unlock(&mdsc->cap_unlink_delay_lock);
+	spin_unlock(&mdsc->cap_delay_lock);
 	doutc(cl, "done\n");
 }
 
@@ -5538,7 +5538,6 @@  int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	INIT_LIST_HEAD(&mdsc->cap_wait_list);
 	spin_lock_init(&mdsc->cap_delay_lock);
 	INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
-	spin_lock_init(&mdsc->cap_unlink_delay_lock);
 	INIT_LIST_HEAD(&mdsc->snap_flush_list);
 	spin_lock_init(&mdsc->snap_flush_lock);
 	mdsc->last_cap_flush_tid = 1;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 317a0fd6a8ba..80b310e33f2b 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -481,9 +481,8 @@  struct ceph_mds_client {
 	struct delayed_work    delayed_work;  /* delayed work */
 	unsigned long    last_renew_caps;  /* last time we renewed our caps */
 	struct list_head cap_delay_list;   /* caps with delayed release */
-	spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
 	struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
-	spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
+	spinlock_t       cap_delay_lock;   /* protects cap_delay_list and cap_unlink_delay_list */
 	struct list_head snap_flush_list;  /* cap_snaps ready to flush */
 	spinlock_t       snap_flush_lock;