diff mbox series

ceph: fix possible deadlock while holding Fcr to use getattr

Message ID 20220422092520.18505-1-xiubli@redhat.com
State New
Headers show
Series ceph: fix possible deadlock while holding Fcr to use getattr | expand

Commit Message

Xiubo Li April 22, 2022, 9:25 a.m. UTC
We can't use getattr to fetch inline data after getting Fcr caps,
because it can cause deadlock. The solution is try uniline the
inline data when opening the file, thanks David Howells' previous
work on uninlining the inline data work.

It was caused from one possible call path:
  ceph_filemap_fault()-->
     ceph_get_caps(Fcr);
     filemap_fault()-->
        do_sync_mmap_readahead()-->
           page_cache_ra_order()-->
              read_pages()-->
                 aops->readahead()-->
                    netfs_readahead()-->
                       netfs_begin_read()-->
                          netfs_rreq_submit_slice()-->
                             netfs_read_from_server()-->
                                netfs_ops->issue_read()-->
                                   ceph_netfs_issue_read()-->
                                      ceph_netfs_issue_op_inline()-->
                                         getattr()
      ceph_pu_caps_ref(Fcr);

This because if the Locker state is LOCK_EXEC_MIX for auth MDS, and
the replica MDSes' lock state is LOCK_LOCK. Then the kclient could
get 'Frwcb' caps from both auth and replica MDSes.

But if the getattr is sent to any MDS, the MDS needs to do Locker
transition to LOCK_MIX first and then to LOCK_SYNC. But when
transfering to LOCK_MIX state the MDS Locker need to revoke the Fcb
caps back, but the kclient already holding it and waiting the MDS
to finish.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/addr.c | 65 ++++++--------------------------------------------
 fs/ceph/file.c |  3 +--
 2 files changed, 8 insertions(+), 60 deletions(-)
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 261bc8bb2ab8..b0b9a2f4adb0 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -244,61 +244,6 @@  static void finish_netfs_read(struct ceph_osd_request *req)
 	iput(req->r_inode);
 }
 
-static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
-{
-	struct netfs_io_request *rreq = subreq->rreq;
-	struct inode *inode = rreq->inode;
-	struct ceph_mds_reply_info_parsed *rinfo;
-	struct ceph_mds_reply_info_in *iinfo;
-	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct iov_iter iter;
-	ssize_t err = 0;
-	size_t len;
-	int mode;
-
-	__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
-	__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-
-	if (subreq->start >= inode->i_size)
-		goto out;
-
-	/* We need to fetch the inline data. */
-	mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-	req->r_ino1 = ci->i_vino;
-	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
-	req->r_num_caps = 2;
-
-	err = ceph_mdsc_do_request(mdsc, NULL, req);
-	if (err < 0)
-		goto out;
-
-	rinfo = &req->r_reply_info;
-	iinfo = &rinfo->targeti;
-	if (iinfo->inline_version == CEPH_INLINE_NONE) {
-		/* The data got uninlined */
-		ceph_mdsc_put_request(req);
-		return false;
-	}
-
-	len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
-	iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
-	err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
-	if (err == 0)
-		err = -EFAULT;
-
-	ceph_mdsc_put_request(req);
-out:
-	netfs_subreq_terminated(subreq, err, false);
-	return true;
-}
-
 static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 {
 	struct netfs_io_request *rreq = subreq->rreq;
@@ -313,9 +258,13 @@  static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 	int err = 0;
 	u64 len = subreq->len;
 
-	if (ci->i_inline_version != CEPH_INLINE_NONE &&
-	    ceph_netfs_issue_op_inline(subreq))
-		return;
+	/*
+	 * We have uninlined the inline data when openning the file,
+	 * or we must send a GETATTR request to the MDS, which is
+	 * buggy and will cause deadlock while holding the Fcr
+	 * reference in ceph_filemap_fault().
+	 */
+	BUG_ON(ci->i_inline_version != CEPH_INLINE_NONE);
 
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
 			0, 1, CEPH_OSD_OP_READ,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6c9e837aa1d3..a98a61ec4ada 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -241,8 +241,7 @@  static int ceph_init_file_info(struct inode *inode, struct file *file,
 	INIT_LIST_HEAD(&fi->rw_contexts);
 	fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
 
-	if ((file->f_mode & FMODE_WRITE) &&
-	    ci->i_inline_version != CEPH_INLINE_NONE) {
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
 		ret = ceph_uninline_data(file);
 		if (ret < 0)
 			goto error;