diff mbox series

fs/ceph/io: make ceph_start_io_*() killable

Message ID 20241206165014.165614-1-max.kellermann@ionos.com
State New
Headers show
Series fs/ceph/io: make ceph_start_io_*() killable | expand

Commit Message

Max Kellermann Dec. 6, 2024, 4:50 p.m. UTC
This allows killing processes that wait for a lock when one process is
stuck waiting for the Ceph server.  This is similar to the NFS commit
38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/file.c | 22 +++++++++++++---------
 fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
 fs/ceph/io.h   |  8 +++++---
 3 files changed, 51 insertions(+), 23 deletions(-)

Comments

Max Kellermann May 19, 2025, 10:15 a.m. UTC | #1
What happened to this patch submission? Similar patches were accepted
in NFS and VFS core.

On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
>
> This allows killing processes that wait for a lock when one process is
> stuck waiting for the Ceph server.  This is similar to the NFS commit
> 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
>  fs/ceph/file.c | 22 +++++++++++++---------
>  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
>  fs/ceph/io.h   |  8 +++++---
>  3 files changed, 51 insertions(+), 23 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 4b8d59ebda00..d79c0774dc6e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
>         if (ceph_inode_is_shutdown(inode))
>                 return -ESTALE;
>
> -       if (direct_lock)
> -               ceph_start_io_direct(inode);
> -       else
> -               ceph_start_io_read(inode);
> +       ret = direct_lock
> +               ? ceph_start_io_direct(inode)
> +               : ceph_start_io_read(inode);
> +       if (ret)
> +               return ret;
>
>         if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
>                 want |= CEPH_CAP_FILE_CACHE;
> @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
>             (fi->flags & CEPH_F_SYNC))
>                 return copy_splice_read(in, ppos, pipe, len, flags);
>
> -       ceph_start_io_read(inode);
> +       ret = ceph_start_io_read(inode);
> +       if (ret)
> +               return ret;
>
>         want = CEPH_CAP_FILE_CACHE;
>         if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>                 direct_lock = true;
>
>  retry_snap:
> -       if (direct_lock)
> -               ceph_start_io_direct(inode);
> -       else
> -               ceph_start_io_write(inode);
> +       err = direct_lock
> +               ? ceph_start_io_direct(inode)
> +               : ceph_start_io_write(inode);
> +       if (err)
> +               goto out_unlocked;
>
>         if (iocb->ki_flags & IOCB_APPEND) {
>                 err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> index c456509b31c3..2735503bc479 100644
> --- a/fs/ceph/io.c
> +++ b/fs/ceph/io.c
> @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
>   */
> -void
> +int
>  ceph_start_io_read(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       int err;
>
>         /* Be an optimist! */
> -       down_read(&inode->i_rwsem);
> +       err = down_read_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> -               return;
> +               return 0;
>         up_read(&inode->i_rwsem);
> +
>         /* Slow path.... */
> -       down_write(&inode->i_rwsem);
> +       err = down_write_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         ceph_block_o_direct(ci, inode);
>         downgrade_write(&inode->i_rwsem);
> +
> +       return 0;
>  }
>
>  /**
> @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
>   * Declare that a buffered write operation is about to start, and ensure
>   * that we block all direct I/O.
>   */
> -void
> +int
>  ceph_start_io_write(struct inode *inode)
>  {
> -       down_write(&inode->i_rwsem);
> -       ceph_block_o_direct(ceph_inode(inode), inode);
> +       int err = down_write_killable(&inode->i_rwsem);
> +       if (!err)
> +               ceph_block_o_direct(ceph_inode(inode), inode);
> +       return err;
>  }
>
>  /**
> @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
>   */
> -void
> +int
>  ceph_start_io_direct(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       int err;
>
>         /* Be an optimist! */
> -       down_read(&inode->i_rwsem);
> +       err = down_read_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> -               return;
> +               return 0;
>         up_read(&inode->i_rwsem);
> +
>         /* Slow path.... */
> -       down_write(&inode->i_rwsem);
> +       err = down_write_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         ceph_block_buffered(ci, inode);
>         downgrade_write(&inode->i_rwsem);
> +
> +       return 0;
>  }
>
>  /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index fa594cd77348..08d58253f533 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,13 @@
>  #ifndef _FS_CEPH_IO_H
>  #define _FS_CEPH_IO_H
>
> -void ceph_start_io_read(struct inode *inode);
> +#include <linux/compiler_attributes.h> // for __must_check
> +
> +__must_check int ceph_start_io_read(struct inode *inode);
>  void ceph_end_io_read(struct inode *inode);
> -void ceph_start_io_write(struct inode *inode);
> +__must_check int ceph_start_io_write(struct inode *inode);
>  void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +__must_check int ceph_start_io_direct(struct inode *inode);
>  void ceph_end_io_direct(struct inode *inode);
>
>  #endif /* FS_CEPH_IO_H */
> --
> 2.45.2
>
Ilya Dryomov June 6, 2025, 5:08 p.m. UTC | #2
On Mon, May 19, 2025 at 12:15 PM Max Kellermann
<max.kellermann@ionos.com> wrote:
>
> What happened to this patch submission? Similar patches were accepted
> in NFS and VFS core.

Hi Slava,

Can you take another look?  It doesn't make sense to deviate from NFS
or other filesystems in this area.

Thanks,

                Ilya

>
> On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> >
> > This allows killing processes that wait for a lock when one process is
> > stuck waiting for the Ceph server.  This is similar to the NFS commit
> > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
> >
> > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > ---
> >  fs/ceph/file.c | 22 +++++++++++++---------
> >  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
> >  fs/ceph/io.h   |  8 +++++---
> >  3 files changed, 51 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 4b8d59ebda00..d79c0774dc6e 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >         if (ceph_inode_is_shutdown(inode))
> >                 return -ESTALE;
> >
> > -       if (direct_lock)
> > -               ceph_start_io_direct(inode);
> > -       else
> > -               ceph_start_io_read(inode);
> > +       ret = direct_lock
> > +               ? ceph_start_io_direct(inode)
> > +               : ceph_start_io_read(inode);
> > +       if (ret)
> > +               return ret;
> >
> >         if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> >                 want |= CEPH_CAP_FILE_CACHE;
> > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> >             (fi->flags & CEPH_F_SYNC))
> >                 return copy_splice_read(in, ppos, pipe, len, flags);
> >
> > -       ceph_start_io_read(inode);
> > +       ret = ceph_start_io_read(inode);
> > +       if (ret)
> > +               return ret;
> >
> >         want = CEPH_CAP_FILE_CACHE;
> >         if (fi->fmode & CEPH_FILE_MODE_LAZY)
> > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >                 direct_lock = true;
> >
> >  retry_snap:
> > -       if (direct_lock)
> > -               ceph_start_io_direct(inode);
> > -       else
> > -               ceph_start_io_write(inode);
> > +       err = direct_lock
> > +               ? ceph_start_io_direct(inode)
> > +               : ceph_start_io_write(inode);
> > +       if (err)
> > +               goto out_unlocked;
> >
> >         if (iocb->ki_flags & IOCB_APPEND) {
> >                 err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> > diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> > index c456509b31c3..2735503bc479 100644
> > --- a/fs/ceph/io.c
> > +++ b/fs/ceph/io.c
> > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> >   * Note that buffered writes and truncates both take a write lock on
> >   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> >   */
> > -void
> > +int
> >  ceph_start_io_read(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       int err;
> >
> >         /* Be an optimist! */
> > -       down_read(&inode->i_rwsem);
> > +       err = down_read_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> > -               return;
> > +               return 0;
> >         up_read(&inode->i_rwsem);
> > +
> >         /* Slow path.... */
> > -       down_write(&inode->i_rwsem);
> > +       err = down_write_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         ceph_block_o_direct(ci, inode);
> >         downgrade_write(&inode->i_rwsem);
> > +
> > +       return 0;
> >  }
> >
> >  /**
> > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
> >   * Declare that a buffered write operation is about to start, and ensure
> >   * that we block all direct I/O.
> >   */
> > -void
> > +int
> >  ceph_start_io_write(struct inode *inode)
> >  {
> > -       down_write(&inode->i_rwsem);
> > -       ceph_block_o_direct(ceph_inode(inode), inode);
> > +       int err = down_write_killable(&inode->i_rwsem);
> > +       if (!err)
> > +               ceph_block_o_direct(ceph_inode(inode), inode);
> > +       return err;
> >  }
> >
> >  /**
> > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> >   * Note that buffered writes and truncates both take a write lock on
> >   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> >   */
> > -void
> > +int
> >  ceph_start_io_direct(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       int err;
> >
> >         /* Be an optimist! */
> > -       down_read(&inode->i_rwsem);
> > +       err = down_read_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> > -               return;
> > +               return 0;
> >         up_read(&inode->i_rwsem);
> > +
> >         /* Slow path.... */
> > -       down_write(&inode->i_rwsem);
> > +       err = down_write_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         ceph_block_buffered(ci, inode);
> >         downgrade_write(&inode->i_rwsem);
> > +
> > +       return 0;
> >  }
> >
> >  /**
> > diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> > index fa594cd77348..08d58253f533 100644
> > --- a/fs/ceph/io.h
> > +++ b/fs/ceph/io.h
> > @@ -2,11 +2,13 @@
> >  #ifndef _FS_CEPH_IO_H
> >  #define _FS_CEPH_IO_H
> >
> > -void ceph_start_io_read(struct inode *inode);
> > +#include <linux/compiler_attributes.h> // for __must_check
> > +
> > +__must_check int ceph_start_io_read(struct inode *inode);
> >  void ceph_end_io_read(struct inode *inode);
> > -void ceph_start_io_write(struct inode *inode);
> > +__must_check int ceph_start_io_write(struct inode *inode);
> >  void ceph_end_io_write(struct inode *inode);
> > -void ceph_start_io_direct(struct inode *inode);
> > +__must_check int ceph_start_io_direct(struct inode *inode);
> >  void ceph_end_io_direct(struct inode *inode);
> >
> >  #endif /* FS_CEPH_IO_H */
> > --
> > 2.45.2
> >
Viacheslav Dubeyko June 6, 2025, 5:15 p.m. UTC | #3
Hi Ilya,

On Fri, 2025-06-06 at 19:08 +0200, Ilya Dryomov wrote:
> On Mon, May 19, 2025 at 12:15 PM Max Kellermann
> <max.kellermann@ionos.com> wrote:
> > 
> > What happened to this patch submission? Similar patches were accepted
> > in NFS and VFS core.
> 
> Hi Slava,
> 
> Can you take another look?  It doesn't make sense to deviate from NFS
> or other filesystems in this area.
> 
> 

I see the point. Our last discussion has finished with statement that Max
doesn't care about this patch set and we don't need to pick it up. If he changed
his mind, then I can return to the review of the patch. :) My understanding was
that he prefers another person for the review. :) This is why I keep silence.

Thanks,
Slava. 

> 
> > 
> > On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> > > 
> > > This allows killing processes that wait for a lock when one process is
> > > stuck waiting for the Ceph server.  This is similar to the NFS commit
> > > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
> > > 
> > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > > ---
> > >  fs/ceph/file.c | 22 +++++++++++++---------
> > >  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
> > >  fs/ceph/io.h   |  8 +++++---
> > >  3 files changed, 51 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > index 4b8d59ebda00..d79c0774dc6e 100644
> > > --- a/fs/ceph/file.c
> > > +++ b/fs/ceph/file.c
> > > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > >         if (ceph_inode_is_shutdown(inode))
> > >                 return -ESTALE;
> > > 
> > > -       if (direct_lock)
> > > -               ceph_start_io_direct(inode);
> > > -       else
> > > -               ceph_start_io_read(inode);
> > > +       ret = direct_lock
> > > +               ? ceph_start_io_direct(inode)
> > > +               : ceph_start_io_read(inode);
> > > +       if (ret)
> > > +               return ret;
> > > 
> > >         if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> > >                 want |= CEPH_CAP_FILE_CACHE;
> > > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> > >             (fi->flags & CEPH_F_SYNC))
> > >                 return copy_splice_read(in, ppos, pipe, len, flags);
> > > 
> > > -       ceph_start_io_read(inode);
> > > +       ret = ceph_start_io_read(inode);
> > > +       if (ret)
> > > +               return ret;
> > > 
> > >         want = CEPH_CAP_FILE_CACHE;
> > >         if (fi->fmode & CEPH_FILE_MODE_LAZY)
> > > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > >                 direct_lock = true;
> > > 
> > >  retry_snap:
> > > -       if (direct_lock)
> > > -               ceph_start_io_direct(inode);
> > > -       else
> > > -               ceph_start_io_write(inode);
> > > +       err = direct_lock
> > > +               ? ceph_start_io_direct(inode)
> > > +               : ceph_start_io_write(inode);
> > > +       if (err)
> > > +               goto out_unlocked;
> > > 
> > >         if (iocb->ki_flags & IOCB_APPEND) {
> > >                 err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> > > diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> > > index c456509b31c3..2735503bc479 100644
> > > --- a/fs/ceph/io.c
> > > +++ b/fs/ceph/io.c
> > > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> > >   * Note that buffered writes and truncates both take a write lock on
> > >   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> > >   */
> > > -void
> > > +int
> > >  ceph_start_io_read(struct inode *inode)
> > >  {
> > >         struct ceph_inode_info *ci = ceph_inode(inode);
> > > +       int err;
> > > 
> > >         /* Be an optimist! */
> > > -       down_read(&inode->i_rwsem);
> > > +       err = down_read_killable(&inode->i_rwsem);
> > > +       if (err)
> > > +               return err;
> > > +
> > >         if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> > > -               return;
> > > +               return 0;
> > >         up_read(&inode->i_rwsem);
> > > +
> > >         /* Slow path.... */
> > > -       down_write(&inode->i_rwsem);
> > > +       err = down_write_killable(&inode->i_rwsem);
> > > +       if (err)
> > > +               return err;
> > > +
> > >         ceph_block_o_direct(ci, inode);
> > >         downgrade_write(&inode->i_rwsem);
> > > +
> > > +       return 0;
> > >  }
> > > 
> > >  /**
> > > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
> > >   * Declare that a buffered write operation is about to start, and ensure
> > >   * that we block all direct I/O.
> > >   */
> > > -void
> > > +int
> > >  ceph_start_io_write(struct inode *inode)
> > >  {
> > > -       down_write(&inode->i_rwsem);
> > > -       ceph_block_o_direct(ceph_inode(inode), inode);
> > > +       int err = down_write_killable(&inode->i_rwsem);
> > > +       if (!err)
> > > +               ceph_block_o_direct(ceph_inode(inode), inode);
> > > +       return err;
> > >  }
> > > 
> > >  /**
> > > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> > >   * Note that buffered writes and truncates both take a write lock on
> > >   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> > >   */
> > > -void
> > > +int
> > >  ceph_start_io_direct(struct inode *inode)
> > >  {
> > >         struct ceph_inode_info *ci = ceph_inode(inode);
> > > +       int err;
> > > 
> > >         /* Be an optimist! */
> > > -       down_read(&inode->i_rwsem);
> > > +       err = down_read_killable(&inode->i_rwsem);
> > > +       if (err)
> > > +               return err;
> > > +
> > >         if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> > > -               return;
> > > +               return 0;
> > >         up_read(&inode->i_rwsem);
> > > +
> > >         /* Slow path.... */
> > > -       down_write(&inode->i_rwsem);
> > > +       err = down_write_killable(&inode->i_rwsem);
> > > +       if (err)
> > > +               return err;
> > > +
> > >         ceph_block_buffered(ci, inode);
> > >         downgrade_write(&inode->i_rwsem);
> > > +
> > > +       return 0;
> > >  }
> > > 
> > >  /**
> > > diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> > > index fa594cd77348..08d58253f533 100644
> > > --- a/fs/ceph/io.h
> > > +++ b/fs/ceph/io.h
> > > @@ -2,11 +2,13 @@
> > >  #ifndef _FS_CEPH_IO_H
> > >  #define _FS_CEPH_IO_H
> > > 
> > > -void ceph_start_io_read(struct inode *inode);
> > > +#include <linux/compiler_attributes.h> // for __must_check
> > > +
> > > +__must_check int ceph_start_io_read(struct inode *inode);
> > >  void ceph_end_io_read(struct inode *inode);
> > > -void ceph_start_io_write(struct inode *inode);
> > > +__must_check int ceph_start_io_write(struct inode *inode);
> > >  void ceph_end_io_write(struct inode *inode);
> > > -void ceph_start_io_direct(struct inode *inode);
> > > +__must_check int ceph_start_io_direct(struct inode *inode);
> > >  void ceph_end_io_direct(struct inode *inode);
> > > 
> > >  #endif /* FS_CEPH_IO_H */
> > > --
> > > 2.45.2
> > >
Max Kellermann June 6, 2025, 5:34 p.m. UTC | #4
On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote:
> I see the point. Our last discussion has finished with statement that Max
> doesn't care about this patch set and we don't need to pick it up. If he changed
> his mind, then I can return to the review of the patch. :) My understanding was
> that he prefers another person for the review. :) This is why I keep silence.

I do care, always did. I answered your questions, but they were not
really about my patch but about whether error handling is necessary.
Well, yes, of course! The whole point of my patch is to add an error
condition that did not exist before. If locking can fail, of course
you have to check that and propagate the error to the caller (and
unlocking after a failed lock of course leads to sorrow). That is so
trivial, I don't even know where to start to explain this if that
isn't already obvious enough.

If you keep questioning that, are you really qualified to do a code review?

Max
Viacheslav Dubeyko June 6, 2025, 5:42 p.m. UTC | #5
On Fri, 2025-06-06 at 19:34 +0200, Max Kellermann wrote:
> On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote:
> > I see the point. Our last discussion has finished with statement that Max
> > doesn't care about this patch set and we don't need to pick it up. If he changed
> > his mind, then I can return to the review of the patch. :) My understanding was
> > that he prefers another person for the review. :) This is why I keep silence.
> 
> I do care, always did. I answered your questions, but they were not
> really about my patch but about whether error handling is necessary.
> Well, yes, of course! The whole point of my patch is to add an error
> condition that did not exist before. If locking can fail, of course
> you have to check that and propagate the error to the caller (and
> unlocking after a failed lock of course leads to sorrow). That is so
> trivial, I don't even know where to start to explain this if that
> isn't already obvious enough.
> 
> If you keep questioning that, are you really qualified to do a code review?
> 

OK. If I am not good enough, then somebody else can do the review. :)

Thanks,
Slava.
Ilya Dryomov June 9, 2025, 1:13 p.m. UTC | #6
On Fri, Jun 6, 2025 at 7:42 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote:
>
> On Fri, 2025-06-06 at 19:34 +0200, Max Kellermann wrote:
> > On Fri, Jun 6, 2025 at 7:15 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote:
> > > I see the point. Our last discussion has finished with statement that Max
> > > doesn't care about this patch set and we don't need to pick it up. If he changed
> > > his mind, then I can return to the review of the patch. :) My understanding was
> > > that he prefers another person for the review. :) This is why I keep silence.
> >
> > I do care, always did. I answered your questions, but they were not
> > really about my patch but about whether error handling is necessary.
> > Well, yes, of course! The whole point of my patch is to add an error
> > condition that did not exist before. If locking can fail, of course
> > you have to check that and propagate the error to the caller (and
> > unlocking after a failed lock of course leads to sorrow). That is so
> > trivial, I don't even know where to start to explain this if that
> > isn't already obvious enough.
> >
> > If you keep questioning that, are you really qualified to do a code review?
> >
>
> OK. If I am not good enough, then somebody else can do the review. :)

The patch looked sensible to me, so I have picked it up into the
testing branch after some massaging as part of my own review:

https://github.com/ceph/ceph-client/commit/837b07491efc3e21cf08732f0320ce3ac52951f6

I tried to consider Slava's comments while at it.  AFAICS the points
raised were: the need for __must_check to begin with, whether the new
error needs to be propagated and the comment on compiler_attributes.h
include.

For __must_check itself, I kept it -- it makes sense because ignoring
the return value would be a straight ticket to lock imbalance.  Slava's
observation may have been that there are many similar scenarios where
__must_check isn't used, but that can't serve as a justification for
not adopting __must_check IMO.  It's also there in the corresponding
NFS patch.

Propagating the new error also makes sense to me -- I don't think
CephFS does anything special with EINTR and control wouldn't return to
userspace anyway because of the kill.  I don't see how "we simply need
not to execute the logic" behavior is possible without returning some
kind of error to the caller of e.g. ceph_read_iter().

For the comment, I dropped it because __must_check is very obviously
tied to compiler_attributes.h and such comments aren't common.

As I was touching the patch, I formatted the ternary if statements to
fit the rest of the Ceph client code more (despite inconsistencies none
of the existing ones are formatted that way) and made the return type
to be on the same line and __must_check come after it as per the coding
style (Documentation/process/coding-style.rst).

Thanks,

                Ilya
diff mbox series

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4b8d59ebda00..d79c0774dc6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2127,10 +2127,11 @@  static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
 
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else
-		ceph_start_io_read(inode);
+	ret = direct_lock
+		? ceph_start_io_direct(inode)
+		: ceph_start_io_read(inode);
+	if (ret)
+		return ret;
 
 	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
 		want |= CEPH_CAP_FILE_CACHE;
@@ -2283,7 +2284,9 @@  static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
 	    (fi->flags & CEPH_F_SYNC))
 		return copy_splice_read(in, ppos, pipe, len, flags);
 
-	ceph_start_io_read(inode);
+	ret = ceph_start_io_read(inode);
+	if (ret)
+		return ret;
 
 	want = CEPH_CAP_FILE_CACHE;
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2362,10 +2365,11 @@  static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		direct_lock = true;
 
 retry_snap:
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else
-		ceph_start_io_write(inode);
+	err = direct_lock
+		? ceph_start_io_direct(inode)
+		: ceph_start_io_write(inode);
+	if (err)
+		goto out_unlocked;
 
 	if (iocb->ki_flags & IOCB_APPEND) {
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
index c456509b31c3..2735503bc479 100644
--- a/fs/ceph/io.c
+++ b/fs/ceph/io.c
@@ -47,20 +47,30 @@  static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
  */
-void
+int
 ceph_start_io_read(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	err = down_read_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
+
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	err = down_write_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	ceph_block_o_direct(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+
+	return 0;
 }
 
 /**
@@ -83,11 +93,13 @@  ceph_end_io_read(struct inode *inode)
  * Declare that a buffered write operation is about to start, and ensure
  * that we block all direct I/O.
  */
-void
+int
 ceph_start_io_write(struct inode *inode)
 {
-	down_write(&inode->i_rwsem);
-	ceph_block_o_direct(ceph_inode(inode), inode);
+	int err = down_write_killable(&inode->i_rwsem);
+	if (!err)
+		ceph_block_o_direct(ceph_inode(inode), inode);
+	return err;
 }
 
 /**
@@ -133,20 +145,30 @@  static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
  */
-void
+int
 ceph_start_io_direct(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	err = down_read_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
+
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	err = down_write_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	ceph_block_buffered(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+
+	return 0;
 }
 
 /**
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
index fa594cd77348..08d58253f533 100644
--- a/fs/ceph/io.h
+++ b/fs/ceph/io.h
@@ -2,11 +2,13 @@ 
 #ifndef _FS_CEPH_IO_H
 #define _FS_CEPH_IO_H
 
-void ceph_start_io_read(struct inode *inode);
+#include <linux/compiler_attributes.h> // for __must_check
+
+__must_check int ceph_start_io_read(struct inode *inode);
 void ceph_end_io_read(struct inode *inode);
-void ceph_start_io_write(struct inode *inode);
+__must_check int ceph_start_io_write(struct inode *inode);
 void ceph_end_io_write(struct inode *inode);
-void ceph_start_io_direct(struct inode *inode);
+__must_check int ceph_start_io_direct(struct inode *inode);
 void ceph_end_io_direct(struct inode *inode);
 
 #endif /* FS_CEPH_IO_H */