@@ -244,61 +244,6 @@ static void finish_netfs_read(struct ceph_osd_request *req)
iput(req->r_inode);
}
-static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
-{
- struct netfs_io_request *rreq = subreq->rreq;
- struct inode *inode = rreq->inode;
- struct ceph_mds_reply_info_parsed *rinfo;
- struct ceph_mds_reply_info_in *iinfo;
- struct ceph_mds_request *req;
- struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct iov_iter iter;
- ssize_t err = 0;
- size_t len;
- int mode;
-
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-
- if (subreq->start >= inode->i_size)
- goto out;
-
- /* We need to fetch the inline data. */
- mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto out;
- }
- req->r_ino1 = ci->i_vino;
- req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
- req->r_num_caps = 2;
-
- err = ceph_mdsc_do_request(mdsc, NULL, req);
- if (err < 0)
- goto out;
-
- rinfo = &req->r_reply_info;
- iinfo = &rinfo->targeti;
- if (iinfo->inline_version == CEPH_INLINE_NONE) {
- /* The data got uninlined */
- ceph_mdsc_put_request(req);
- return false;
- }
-
- len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
- iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
- err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
- if (err == 0)
- err = -EFAULT;
-
- ceph_mdsc_put_request(req);
-out:
- netfs_subreq_terminated(subreq, err, false);
- return true;
-}
-
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -313,9 +258,13 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
int err = 0;
u64 len = subreq->len;
- if (ci->i_inline_version != CEPH_INLINE_NONE &&
- ceph_netfs_issue_op_inline(subreq))
- return;
+ /*
+ * We have uninlined the inline data when openning the file,
+ * or we must send a GETATTR request to the MDS, which is
+ * buggy and will cause deadlock while holding the Fcr
+ * reference in ceph_filemap_fault().
+ */
+ BUG_ON(ci->i_inline_version != CEPH_INLINE_NONE);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
0, 1, CEPH_OSD_OP_READ,
@@ -241,8 +241,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
INIT_LIST_HEAD(&fi->rw_contexts);
fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
- if ((file->f_mode & FMODE_WRITE) &&
- ci->i_inline_version != CEPH_INLINE_NONE) {
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
ret = ceph_uninline_data(file);
if (ret < 0)
goto error;
We can't use getattr to fetch inline data after getting Fcr caps, because it can cause deadlock. The solution is try uniline the inline data when opening the file, thanks David Howells' previous work on uninlining the inline data work. It was caused from one possible call path: ceph_filemap_fault()--> ceph_get_caps(Fcr); filemap_fault()--> do_sync_mmap_readahead()--> page_cache_ra_order()--> read_pages()--> aops->readahead()--> netfs_readahead()--> netfs_begin_read()--> netfs_rreq_submit_slice()--> netfs_read_from_server()--> netfs_ops->issue_read()--> ceph_netfs_issue_read()--> ceph_netfs_issue_op_inline()--> getattr() ceph_pu_caps_ref(Fcr); This because if the Locker state is LOCK_EXEC_MIX for auth MDS, and the replica MDSes' lock state is LOCK_LOCK. Then the kclient could get 'Frwcb' caps from both auth and replica MDSes. But if the getattr is sent to any MDS, the MDS needs to do Locker transition to LOCK_MIX first and then to LOCK_SYNC. But when transfering to LOCK_MIX state the MDS Locker need to revoke the Fcb caps back, but the kclient already holding it and waiting the MDS to finish. Signed-off-by: Xiubo Li <xiubli@redhat.com> --- fs/ceph/addr.c | 65 ++++++-------------------------------------------- fs/ceph/file.c | 3 +-- 2 files changed, 8 insertions(+), 60 deletions(-)