Message ID | 20211208124528.679831-2-xiubli@redhat.com |
---|---|
State | New |
Headers | show |
Series | ceph: size handling for the fscrypt | expand |
On Wed, 2021-12-08 at 20:45 +0800, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > Always return the last object's version. > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/file.c | 8 ++++++-- > fs/ceph/super.h | 3 ++- > 2 files changed, 8 insertions(+), 3 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index b42158c9aa16..9279b8642add 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -883,7 +883,8 @@ enum { > * only return a short read to the caller if we hit EOF. > */ > ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > - struct iov_iter *to, int *retry_op) > + struct iov_iter *to, int *retry_op, > + u64 *last_objver) > { > struct ceph_inode_info *ci = ceph_inode(inode); > struct ceph_fs_client *fsc = ceph_inode_to_client(inode); > @@ -950,6 +951,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > req->r_end_latency, > len, ret); > > + if (last_objver) > + *last_objver = req->r_version; > + Much better! That said, this might be unreliable if (say) the first OSD read was successful and then the second one failed on a long read that spans objects. We'd want to return a short read in that case, but the last_objver would end up being set to 0. I think you shouldn't set last_objver unless the call is going to return >0, and then you want to set it to the object version of the last successful read in the series. > ceph_osdc_put_request(req); > > i_size = i_size_read(inode); > @@ -1020,7 +1024,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, > (unsigned)iov_iter_count(to), > (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); > > - return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op); > + return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL); > } > > struct ceph_aio_request { > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 84fec46308b0..a7bdb28af595 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1258,7 +1258,8 @@ extern int ceph_open(struct inode *inode, struct file *file); > extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, > struct file *file, unsigned flags, umode_t mode); > extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > - struct iov_iter *to, int *retry_op); > + struct iov_iter *to, int *retry_op, > + u64 *last_objver); > extern int ceph_release(struct inode *inode, struct file *filp); > extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, > char *data, size_t len);
On 12/9/21 2:22 AM, Jeff Layton wrote: > On Wed, 2021-12-08 at 10:33 -0500, Jeff Layton wrote: >> On Wed, 2021-12-08 at 20:45 +0800, xiubli@redhat.com wrote: >>> From: Xiubo Li <xiubli@redhat.com> >>> >>> Always return the last object's version. >>> >>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>> --- >>> fs/ceph/file.c | 8 ++++++-- >>> fs/ceph/super.h | 3 ++- >>> 2 files changed, 8 insertions(+), 3 deletions(-) >>> >>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c >>> index b42158c9aa16..9279b8642add 100644 >>> --- a/fs/ceph/file.c >>> +++ b/fs/ceph/file.c >>> @@ -883,7 +883,8 @@ enum { >>> * only return a short read to the caller if we hit EOF. >>> */ >>> ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, >>> - struct iov_iter *to, int *retry_op) >>> + struct iov_iter *to, int *retry_op, >>> + u64 *last_objver) >>> { >>> struct ceph_inode_info *ci = ceph_inode(inode); >>> struct ceph_fs_client *fsc = ceph_inode_to_client(inode); >>> @@ -950,6 +951,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, >>> req->r_end_latency, >>> len, ret); >>> >>> + if (last_objver) >>> + *last_objver = req->r_version; >>> + >> Much better! That said, this might be unreliable if (say) the first OSD >> read was successful and then the second one failed on a long read that >> spans objects. We'd want to return a short read in that case, but the >> last_objver would end up being set to 0. >> >> I think you shouldn't set last_objver unless the call is going to return >>> 0, and then you want to set it to the object version of the last >> successful read in the series. >> Make sense to me. > Since this was a simple change, I went ahead and folded the patch below > into this patch and updated wip-fscrypt-size. Let me know if you have > any objections: > > [PATCH] SQUASH: only set last_objver iff we're returning success > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/ceph/file.c | 8 +++++--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 9279b8642add..ee6fb642cf05 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -893,6 +893,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > u64 off = *ki_pos; > u64 len = iov_iter_count(to); > u64 i_size = i_size_read(inode); > + u64 objver = 0; > > dout("sync_read on inode %p %llu~%u\n", inode, *ki_pos, (unsigned)len); > > @@ -951,9 +952,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > req->r_end_latency, > len, ret); > > - if (last_objver) > - *last_objver = req->r_version; > - > + objver = req->r_version; > ceph_osdc_put_request(req); > > i_size = i_size_read(inode); > @@ -1010,6 +1009,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, > } > } > > + if (ret > 0) > + *last_objver = objver; > + > dout("sync_read result %zd retry_op %d\n", ret, *retry_op); > return ret; > } This also looks good to me :-) Thanks. BRs
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b42158c9aa16..9279b8642add 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -883,7 +883,8 @@ enum { * only return a short read to the caller if we hit EOF. */ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, - struct iov_iter *to, int *retry_op) + struct iov_iter *to, int *retry_op, + u64 *last_objver) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); @@ -950,6 +951,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, req->r_end_latency, len, ret); + if (last_objver) + *last_objver = req->r_version; + ceph_osdc_put_request(req); i_size = i_size_read(inode); @@ -1020,7 +1024,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, (unsigned)iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op); + return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL); } struct ceph_aio_request { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 84fec46308b0..a7bdb28af595 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1258,7 +1258,8 @@ extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode); extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, - struct iov_iter *to, int *retry_op); + struct iov_iter *to, int *retry_op, + u64 *last_objver); extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len);