Message ID | 20241206165014.165614-1-max.kellermann@ionos.com |
---|---|
State | New |
Headers | show |
Series | fs/ceph/io: make ceph_start_io_*() killable | expand |
What happened to this patch submission? Similar patches were accepted in NFS and VFS core. On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > This allows killing processes that wait for a lock when one process is > stuck waiting for the Ceph server. This is similar to the NFS commit > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable"). > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > --- > fs/ceph/file.c | 22 +++++++++++++--------- > fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++----------- > fs/ceph/io.h | 8 +++++--- > 3 files changed, 51 insertions(+), 23 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 4b8d59ebda00..d79c0774dc6e 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) > if (ceph_inode_is_shutdown(inode)) > return -ESTALE; > > - if (direct_lock) > - ceph_start_io_direct(inode); > - else > - ceph_start_io_read(inode); > + ret = direct_lock > + ? ceph_start_io_direct(inode) > + : ceph_start_io_read(inode); > + if (ret) > + return ret; > > if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) > want |= CEPH_CAP_FILE_CACHE; > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, > (fi->flags & CEPH_F_SYNC)) > return copy_splice_read(in, ppos, pipe, len, flags); > > - ceph_start_io_read(inode); > + ret = ceph_start_io_read(inode); > + if (ret) > + return ret; > > want = CEPH_CAP_FILE_CACHE; > if (fi->fmode & CEPH_FILE_MODE_LAZY) > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > direct_lock = true; > > retry_snap: > - if (direct_lock) > - ceph_start_io_direct(inode); > - else > - ceph_start_io_write(inode); > + err = direct_lock > + ? ceph_start_io_direct(inode) > + : ceph_start_io_write(inode); > + if (err) > + goto out_unlocked; > > if (iocb->ki_flags & IOCB_APPEND) { > err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); > diff --git a/fs/ceph/io.c b/fs/ceph/io.c > index c456509b31c3..2735503bc479 100644 > --- a/fs/ceph/io.c > +++ b/fs/ceph/io.c > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) > * Note that buffered writes and truncates both take a write lock on > * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. > */ > -void > +int > ceph_start_io_read(struct inode *inode) > { > struct ceph_inode_info *ci = ceph_inode(inode); > + int err; > > /* Be an optimist! */ > - down_read(&inode->i_rwsem); > + err = down_read_killable(&inode->i_rwsem); > + if (err) > + return err; > + > if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) > - return; > + return 0; > up_read(&inode->i_rwsem); > + > /* Slow path.... */ > - down_write(&inode->i_rwsem); > + err = down_write_killable(&inode->i_rwsem); > + if (err) > + return err; > + > ceph_block_o_direct(ci, inode); > downgrade_write(&inode->i_rwsem); > + > + return 0; > } > > /** > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode) > * Declare that a buffered write operation is about to start, and ensure > * that we block all direct I/O. > */ > -void > +int > ceph_start_io_write(struct inode *inode) > { > - down_write(&inode->i_rwsem); > - ceph_block_o_direct(ceph_inode(inode), inode); > + int err = down_write_killable(&inode->i_rwsem); > + if (!err) > + ceph_block_o_direct(ceph_inode(inode), inode); > + return err; > } > > /** > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) > * Note that buffered writes and truncates both take a write lock on > * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. > */ > -void > +int > ceph_start_io_direct(struct inode *inode) > { > struct ceph_inode_info *ci = ceph_inode(inode); > + int err; > > /* Be an optimist! */ > - down_read(&inode->i_rwsem); > + err = down_read_killable(&inode->i_rwsem); > + if (err) > + return err; > + > if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) > - return; > + return 0; > up_read(&inode->i_rwsem); > + > /* Slow path.... */ > - down_write(&inode->i_rwsem); > + err = down_write_killable(&inode->i_rwsem); > + if (err) > + return err; > + > ceph_block_buffered(ci, inode); > downgrade_write(&inode->i_rwsem); > + > + return 0; > } > > /** > diff --git a/fs/ceph/io.h b/fs/ceph/io.h > index fa594cd77348..08d58253f533 100644 > --- a/fs/ceph/io.h > +++ b/fs/ceph/io.h > @@ -2,11 +2,13 @@ > #ifndef _FS_CEPH_IO_H > #define _FS_CEPH_IO_H > > -void ceph_start_io_read(struct inode *inode); > +#include <linux/compiler_attributes.h> // for __must_check > + > +__must_check int ceph_start_io_read(struct inode *inode); > void ceph_end_io_read(struct inode *inode); > -void ceph_start_io_write(struct inode *inode); > +__must_check int ceph_start_io_write(struct inode *inode); > void ceph_end_io_write(struct inode *inode); > -void ceph_start_io_direct(struct inode *inode); > +__must_check int ceph_start_io_direct(struct inode *inode); > void ceph_end_io_direct(struct inode *inode); > > #endif /* FS_CEPH_IO_H */ > -- > 2.45.2 >
On Mon, May 19, 2025 at 12:15 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > What happened to this patch submission? Similar patches were accepted > in NFS and VFS core. Hi Slava, Can you take another look? It doesn't make sense to deviate from NFS or other filesystems in this area. Thanks, Ilya > > On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > > This allows killing processes that wait for a lock when one process is > > stuck waiting for the Ceph server. This is similar to the NFS commit > > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable"). > > > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > > --- > > fs/ceph/file.c | 22 +++++++++++++--------- > > fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++----------- > > fs/ceph/io.h | 8 +++++--- > > 3 files changed, 51 insertions(+), 23 deletions(-) > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > index 4b8d59ebda00..d79c0774dc6e 100644 > > --- a/fs/ceph/file.c > > +++ b/fs/ceph/file.c > > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) > > if (ceph_inode_is_shutdown(inode)) > > return -ESTALE; > > > > - if (direct_lock) > > - ceph_start_io_direct(inode); > > - else > > - ceph_start_io_read(inode); > > + ret = direct_lock > > + ? ceph_start_io_direct(inode) > > + : ceph_start_io_read(inode); > > + if (ret) > > + return ret; > > > > if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) > > want |= CEPH_CAP_FILE_CACHE; > > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, > > (fi->flags & CEPH_F_SYNC)) > > return copy_splice_read(in, ppos, pipe, len, flags); > > > > - ceph_start_io_read(inode); > > + ret = ceph_start_io_read(inode); > > + if (ret) > > + return ret; > > > > want = CEPH_CAP_FILE_CACHE; > > if (fi->fmode & CEPH_FILE_MODE_LAZY) > > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > direct_lock = true; > > > > retry_snap: > > - if (direct_lock) > > - ceph_start_io_direct(inode); > > - else > > - ceph_start_io_write(inode); > > + err = direct_lock > > + ? ceph_start_io_direct(inode) > > + : ceph_start_io_write(inode); > > + if (err) > > + goto out_unlocked; > > > > if (iocb->ki_flags & IOCB_APPEND) { > > err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); > > diff --git a/fs/ceph/io.c b/fs/ceph/io.c > > index c456509b31c3..2735503bc479 100644 > > --- a/fs/ceph/io.c > > +++ b/fs/ceph/io.c > > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) > > * Note that buffered writes and truncates both take a write lock on > > * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. > > */ > > -void > > +int > > ceph_start_io_read(struct inode *inode) > > { > > struct ceph_inode_info *ci = ceph_inode(inode); > > + int err; > > > > /* Be an optimist! */ > > - down_read(&inode->i_rwsem); > > + err = down_read_killable(&inode->i_rwsem); > > + if (err) > > + return err; > > + > > if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) > > - return; > > + return 0; > > up_read(&inode->i_rwsem); > > + > > /* Slow path.... */ > > - down_write(&inode->i_rwsem); > > + err = down_write_killable(&inode->i_rwsem); > > + if (err) > > + return err; > > + > > ceph_block_o_direct(ci, inode); > > downgrade_write(&inode->i_rwsem); > > + > > + return 0; > > } > > > > /** > > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode) > > * Declare that a buffered write operation is about to start, and ensure > > * that we block all direct I/O. > > */ > > -void > > +int > > ceph_start_io_write(struct inode *inode) > > { > > - down_write(&inode->i_rwsem); > > - ceph_block_o_direct(ceph_inode(inode), inode); > > + int err = down_write_killable(&inode->i_rwsem); > > + if (!err) > > + ceph_block_o_direct(ceph_inode(inode), inode); > > + return err; > > } > > > > /** > > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) > > * Note that buffered writes and truncates both take a write lock on > > * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. > > */ > > -void > > +int > > ceph_start_io_direct(struct inode *inode) > > { > > struct ceph_inode_info *ci = ceph_inode(inode); > > + int err; > > > > /* Be an optimist! */ > > - down_read(&inode->i_rwsem); > > + err = down_read_killable(&inode->i_rwsem); > > + if (err) > > + return err; > > + > > if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) > > - return; > > + return 0; > > up_read(&inode->i_rwsem); > > + > > /* Slow path.... */ > > - down_write(&inode->i_rwsem); > > + err = down_write_killable(&inode->i_rwsem); > > + if (err) > > + return err; > > + > > ceph_block_buffered(ci, inode); > > downgrade_write(&inode->i_rwsem); > > + > > + return 0; > > } > > > > /** > > diff --git a/fs/ceph/io.h b/fs/ceph/io.h > > index fa594cd77348..08d58253f533 100644 > > --- a/fs/ceph/io.h > > +++ b/fs/ceph/io.h > > @@ -2,11 +2,13 @@ > > #ifndef _FS_CEPH_IO_H > > #define _FS_CEPH_IO_H > > > > -void ceph_start_io_read(struct inode *inode); > > +#include <linux/compiler_attributes.h> // for __must_check > > + > > +__must_check int ceph_start_io_read(struct inode *inode); > > void ceph_end_io_read(struct inode *inode); > > -void ceph_start_io_write(struct inode *inode); > > +__must_check int ceph_start_io_write(struct inode *inode); > > void ceph_end_io_write(struct inode *inode); > > -void ceph_start_io_direct(struct inode *inode); > > +__must_check int ceph_start_io_direct(struct inode *inode); > > void ceph_end_io_direct(struct inode *inode); > > > > #endif /* FS_CEPH_IO_H */ > > -- > > 2.45.2 > >
Hi Ilya, On Fri, 2025-06-06 at 19:08 +0200, Ilya Dryomov wrote: > On Mon, May 19, 2025 at 12:15 PM Max Kellermann > <max.kellermann@ionos.com> wrote: > > > > What happened to this patch submission? Similar patches were accepted > > in NFS and VFS core. > > Hi Slava, > > Can you take another look? It doesn't make sense to deviate from NFS > or other filesystems in this area. > > I see the point. Our last discussion has finished with statement that Max doesn't care about this patch set and we don't need to pick it up. If he changed his mind, then I can return to the review of the patch. :) My understanding was that he prefers another person for the review. :) This is why I keep silence. Thanks, Slava. > > > > > On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > > > > This allows killing processes that wait for a lock when one process is > > > stuck waiting for the Ceph server. This is similar to the NFS commit > > > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable"). > > > > > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > > > --- > > > fs/ceph/file.c | 22 +++++++++++++--------- > > > fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++----------- > > > fs/ceph/io.h | 8 +++++--- > > > 3 files changed, 51 insertions(+), 23 deletions(-) > > > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > index 4b8d59ebda00..d79c0774dc6e 100644 > > > --- a/fs/ceph/file.c > > > +++ b/fs/ceph/file.c > > > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) > > > if (ceph_inode_is_shutdown(inode)) > > > return -ESTALE; > > > > > > - if (direct_lock) > > > - ceph_start_io_direct(inode); > > > - else > > > - ceph_start_io_read(inode); > > > + ret = direct_lock > > > + ? ceph_start_io_direct(inode) > > > + : ceph_start_io_read(inode); > > > + if (ret) > > > + return ret; > > > > > > if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) > > > want |= CEPH_CAP_FILE_CACHE; > > > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, > > > (fi->flags & CEPH_F_SYNC)) > > > return copy_splice_read(in, ppos, pipe, len, flags); > > > > > > - ceph_start_io_read(inode); > > > + ret = ceph_start_io_read(inode); > > > + if (ret) > > > + return ret; > > > > > > want = CEPH_CAP_FILE_CACHE; > > > if (fi->fmode & CEPH_FILE_MODE_LAZY) > > > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > direct_lock = true; > > > > > > retry_snap: > > > - if (direct_lock) > > > - ceph_start_io_direct(inode); > > > - else > > > - ceph_start_io_write(inode); > > > + err = direct_lock > > > + ? ceph_start_io_direct(inode) > > > + : ceph_start_io_write(inode); > > > + if (err) > > > + goto out_unlocked; > > > > > > if (iocb->ki_flags & IOCB_APPEND) { > > > err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); > > > diff --git a/fs/ceph/io.c b/fs/ceph/io.c > > > index c456509b31c3..2735503bc479 100644 > > > --- a/fs/ceph/io.c > > > +++ b/fs/ceph/io.c > > > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) > > > * Note that buffered writes and truncates both take a write lock on > > > * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. > > > */ > > > -void > > > +int > > > ceph_start_io_read(struct inode *inode) > > > { > > > struct ceph_inode_info *ci = ceph_inode(inode); > > > + int err; > > > > > > /* Be an optimist! */ > > > - down_read(&inode->i_rwsem); > > > + err = down_read_killable(&inode->i_rwsem); > > > + if (err) > > > + return err; > > > + > > > if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) > > > - return; > > > + return 0; > > > up_read(&inode->i_rwsem); > > > + > > > /* Slow path.... */ > > > - down_write(&inode->i_rwsem); > > > + err = down_write_killable(&inode->i_rwsem); > > > + if (err) > > > + return err; > > > + > > > ceph_block_o_direct(ci, inode); > > > downgrade_write(&inode->i_rwsem); > > > + > > > + return 0; > > > } > > > > > > /** > > > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode) > > > * Declare that a buffered write operation is about to start, and ensure > > > * that we block all direct I/O. > > > */ > > > -void > > > +int > > > ceph_start_io_write(struct inode *inode) > > > { > > > - down_write(&inode->i_rwsem); > > > - ceph_block_o_direct(ceph_inode(inode), inode); > > > + int err = down_write_killable(&inode->i_rwsem); > > > + if (!err) > > > + ceph_block_o_direct(ceph_inode(inode), inode); > > > + return err; > > > } > > > > > > /** > > > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) > > > * Note that buffered writes and truncates both take a write lock on > > > * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. > > > */ > > > -void > > > +int > > > ceph_start_io_direct(struct inode *inode) > > > { > > > struct ceph_inode_info *ci = ceph_inode(inode); > > > + int err; > > > > > > /* Be an optimist! */ > > > - down_read(&inode->i_rwsem); > > > + err = down_read_killable(&inode->i_rwsem); > > > + if (err) > > > + return err; > > > + > > > if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) > > > - return; > > > + return 0; > > > up_read(&inode->i_rwsem); > > > + > > > /* Slow path.... */ > > > - down_write(&inode->i_rwsem); > > > + err = down_write_killable(&inode->i_rwsem); > > > + if (err) > > > + return err; > > > + > > > ceph_block_buffered(ci, inode); > > > downgrade_write(&inode->i_rwsem); > > > + > > > + return 0; > > > } > > > > > > /** > > > diff --git a/fs/ceph/io.h b/fs/ceph/io.h > > > index fa594cd77348..08d58253f533 100644 > > > --- a/fs/ceph/io.h > > > +++ b/fs/ceph/io.h > > > @@ -2,11 +2,13 @@ > > > #ifndef _FS_CEPH_IO_H > > > #define _FS_CEPH_IO_H > > > > > > -void ceph_start_io_read(struct inode *inode); > > > +#include <linux/compiler_attributes.h> // for __must_check > > > + > > > +__must_check int ceph_start_io_read(struct inode *inode); > > > void ceph_end_io_read(struct inode *inode); > > > -void ceph_start_io_write(struct inode *inode); > > > +__must_check int ceph_start_io_write(struct inode *inode); > > > void ceph_end_io_write(struct inode *inode); > > > -void ceph_start_io_direct(struct inode *inode); > > > +__must_check int ceph_start_io_direct(struct inode *inode); > > > void ceph_end_io_direct(struct inode *inode); > > > > > > #endif /* FS_CEPH_IO_H */ > > > -- > > > 2.45.2 > > >
On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote: > I see the point. Our last discussion has finished with statement that Max > doesn't care about this patch set and we don't need to pick it up. If he changed > his mind, then I can return to the review of the patch. :) My understanding was > that he prefers another person for the review. :) This is why I keep silence. I do care, always did. I answered your questions, but they were not really about my patch but about whether error handling is necessary. Well, yes, of course! The whole point of my patch is to add an error condition that did not exist before. If locking can fail, of course you have to check that and propagate the error to the caller (and unlocking after a failed lock of course leads to sorrow). That is so trivial, I don't even know where to start to explain this if that isn't already obvious enough. If you keep questioning that, are you really qualified to do a code review? Max
On Fri, 2025-06-06 at 19:34 +0200, Max Kellermann wrote: > On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote: > > I see the point. Our last discussion has finished with statement that Max > > doesn't care about this patch set and we don't need to pick it up. If he changed > > his mind, then I can return to the review of the patch. :) My understanding was > > that he prefers another person for the review. :) This is why I keep silence. > > I do care, always did. I answered your questions, but they were not > really about my patch but about whether error handling is necessary. > Well, yes, of course! The whole point of my patch is to add an error > condition that did not exist before. If locking can fail, of course > you have to check that and propagate the error to the caller (and > unlocking after a failed lock of course leads to sorrow). That is so > trivial, I don't even know where to start to explain this if that > isn't already obvious enough. > > If you keep questioning that, are you really qualified to do a code review? > OK. If I am not good enough, then somebody else can do the review. :) Thanks, Slava.
On Fri, Jun 6, 2025 at 7:42 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote: > > On Fri, 2025-06-06 at 19:34 +0200, Max Kellermann wrote: > > On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote: > > > I see the point. Our last discussion has finished with statement that Max > > > doesn't care about this patch set and we don't need to pick it up. If he changed > > > his mind, then I can return to the review of the patch. :) My understanding was > > > that he prefers another person for the review. :) This is why I keep silence. > > > > I do care, always did. I answered your questions, but they were not > > really about my patch but about whether error handling is necessary. > > Well, yes, of course! The whole point of my patch is to add an error > > condition that did not exist before. If locking can fail, of course > > you have to check that and propagate the error to the caller (and > > unlocking after a failed lock of course leads to sorrow). That is so > > trivial, I don't even know where to start to explain this if that > > isn't already obvious enough. > > > > If you keep questioning that, are you really qualified to do a code review? > > > > OK. If I am not good enough, then somebody else can do the review. :) The patch looked sensible to me, so I have picked it up into the testing branch after some massaging as part of my own review: https://github.com/ceph/ceph-client/commit/837b07491efc3e21cf08732f0320ce3ac52951f6 I tried to consider Slava's comments while at it. AFAICS the points raised were: the need for __must_check to begin with, whether the new error needs to be propagated and the comment on compiler_attributes.h include. For __must_check itself, I kept it -- it makes sense because ignoring the return value would be a straight ticket to lock imbalance. Slava's observation may have been that there are many similar scenarios where __must_check isn't used, but that can't serve as a justification for not adopting __must_check IMO. It's also there in the corresponding NFS patch. Propagating the new error also makes sense to me -- I don't think CephFS does anything special with EINTR and control wouldn't return to userspace anyway because of the kill. I don't see how "we simply need not to execute the logic" behavior is possible without returning some kind of error to the caller of e.g. ceph_read_iter(). For the comment, I dropped it because __must_check is very obviously tied to compiler_attributes.h and such comments aren't common. As I was touching the patch, I formatted the ternary if statements to fit the rest of the Ceph client code more (despite inconsistencies none of the existing ones are formatted that way) and made the return type to be on the same line and __must_check come after it as per the coding style (Documentation/process/coding-style.rst). Thanks, Ilya
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4b8d59ebda00..d79c0774dc6e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - if (direct_lock) - ceph_start_io_direct(inode); - else - ceph_start_io_read(inode); + ret = direct_lock + ? ceph_start_io_direct(inode) + : ceph_start_io_read(inode); + if (ret) + return ret; if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) want |= CEPH_CAP_FILE_CACHE; @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, (fi->flags & CEPH_F_SYNC)) return copy_splice_read(in, ppos, pipe, len, flags); - ceph_start_io_read(inode); + ret = ceph_start_io_read(inode); + if (ret) + return ret; want = CEPH_CAP_FILE_CACHE; if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) direct_lock = true; retry_snap: - if (direct_lock) - ceph_start_io_direct(inode); - else - ceph_start_io_write(inode); + err = direct_lock + ? ceph_start_io_direct(inode) + : ceph_start_io_write(inode); + if (err) + goto out_unlocked; if (iocb->ki_flags & IOCB_APPEND) { err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); diff --git a/fs/ceph/io.c b/fs/ceph/io.c index c456509b31c3..2735503bc479 100644 --- a/fs/ceph/io.c +++ b/fs/ceph/io.c @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. */ -void +int ceph_start_io_read(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + int err; /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; + if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; + ceph_block_o_direct(ci, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode) * Declare that a buffered write operation is about to start, and ensure * that we block all direct I/O. */ -void +int ceph_start_io_write(struct inode *inode) { - down_write(&inode->i_rwsem); - ceph_block_o_direct(ceph_inode(inode), inode); + int err = down_write_killable(&inode->i_rwsem); + if (!err) + ceph_block_o_direct(ceph_inode(inode), inode); + return err; } /** @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. */ -void +int ceph_start_io_direct(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + int err; /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; + if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; + ceph_block_buffered(ci, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** diff --git a/fs/ceph/io.h b/fs/ceph/io.h index fa594cd77348..08d58253f533 100644 --- a/fs/ceph/io.h +++ b/fs/ceph/io.h @@ -2,11 +2,13 @@ #ifndef _FS_CEPH_IO_H #define _FS_CEPH_IO_H -void ceph_start_io_read(struct inode *inode); +#include <linux/compiler_attributes.h> // for __must_check + +__must_check int ceph_start_io_read(struct inode *inode); void ceph_end_io_read(struct inode *inode); -void ceph_start_io_write(struct inode *inode); +__must_check int ceph_start_io_write(struct inode *inode); void ceph_end_io_write(struct inode *inode); -void ceph_start_io_direct(struct inode *inode); +__must_check int ceph_start_io_direct(struct inode *inode); void ceph_end_io_direct(struct inode *inode); #endif /* FS_CEPH_IO_H */
This allows killing processes that wait for a lock when one process is stuck waiting for the Ceph server. This is similar to the NFS commit 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable"). Signed-off-by: Max Kellermann <max.kellermann@ionos.com> --- fs/ceph/file.c | 22 +++++++++++++--------- fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++----------- fs/ceph/io.h | 8 +++++--- 3 files changed, 51 insertions(+), 23 deletions(-)