diff mbox series

[v13,05/13] vdpa: Add reset callback in vdpa_config_ops

Message ID 20210831103634.33-6-xieyongji@bytedance.com
State New
Headers show
Series Introduce VDUSE - vDPA Device in Userspace | expand

Commit Message

Yongji Xie Aug. 31, 2021, 10:36 a.m. UTC
This adds a new callback to support device specific reset
behavior. The vdpa bus driver will call the reset function
instead of setting status to zero during resetting.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 drivers/vdpa/ifcvf/ifcvf_main.c   | 35 +++++++++++++++++++++++-----------
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 40 +++++++++++++++++++++++----------------
 drivers/vdpa/vdpa_sim/vdpa_sim.c  | 18 +++++++++++++++---
 drivers/vdpa/virtio_pci/vp_vdpa.c | 15 +++++++++++++--
 drivers/vhost/vdpa.c              |  9 +++++++--
 include/linux/vdpa.h              |  8 ++++++--
 6 files changed, 89 insertions(+), 36 deletions(-)

Comments

Michael S. Tsirkin Sept. 6, 2021, 5:55 a.m. UTC | #1
On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:
> This adds a new callback to support device specific reset

> behavior. The vdpa bus driver will call the reset function

> instead of setting status to zero during resetting.

> 

> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>



This does gloss over a significant change though:


> ---

> @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

>  	return vdev->dma_dev;

>  }

>  

> -static inline void vdpa_reset(struct vdpa_device *vdev)

> +static inline int vdpa_reset(struct vdpa_device *vdev)

>  {

>  	const struct vdpa_config_ops *ops = vdev->config;

>  

>  	vdev->features_valid = false;

> -	ops->set_status(vdev, 0);

> +	return ops->reset(vdev);

>  }

>  

>  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)



Unfortunately this breaks virtio_vdpa:


static void virtio_vdpa_reset(struct virtio_device *vdev)
{
        struct vdpa_device *vdpa = vd_get_vdpa(vdev);

        vdpa_reset(vdpa);
}


and there's no easy way to fix this, kernel can't recover
from a reset failure e.g. during driver unbind.

Find a way to disable virtio_vdpa for now?


> -- 

> 2.11.0
Yongji Xie Sept. 6, 2021, 6:09 a.m. UTC | #2
On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > This adds a new callback to support device specific reset

> > behavior. The vdpa bus driver will call the reset function

> > instead of setting status to zero during resetting.

> >

> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

>

>

> This does gloss over a significant change though:

>

>

> > ---

> > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> >       return vdev->dma_dev;

> >  }

> >

> > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > +static inline int vdpa_reset(struct vdpa_device *vdev)

> >  {

> >       const struct vdpa_config_ops *ops = vdev->config;

> >

> >       vdev->features_valid = false;

> > -     ops->set_status(vdev, 0);

> > +     return ops->reset(vdev);

> >  }

> >

> >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

>

>

> Unfortunately this breaks virtio_vdpa:

>

>

> static void virtio_vdpa_reset(struct virtio_device *vdev)

> {

>         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

>

>         vdpa_reset(vdpa);

> }

>

>

> and there's no easy way to fix this, kernel can't recover

> from a reset failure e.g. during driver unbind.

>


Yes, but it should be safe with the protection of software IOTLB even
if the reset() fails during driver unbind.

Thanks,
Yongji
Michael S. Tsirkin Sept. 6, 2021, 6:37 a.m. UTC | #3
On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:
> On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> >

> > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > This adds a new callback to support device specific reset

> > > behavior. The vdpa bus driver will call the reset function

> > > instead of setting status to zero during resetting.

> > >

> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> >

> >

> > This does gloss over a significant change though:

> >

> >

> > > ---

> > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > >       return vdev->dma_dev;

> > >  }

> > >

> > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > >  {

> > >       const struct vdpa_config_ops *ops = vdev->config;

> > >

> > >       vdev->features_valid = false;

> > > -     ops->set_status(vdev, 0);

> > > +     return ops->reset(vdev);

> > >  }

> > >

> > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> >

> >

> > Unfortunately this breaks virtio_vdpa:

> >

> >

> > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > {

> >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> >

> >         vdpa_reset(vdpa);

> > }

> >

> >

> > and there's no easy way to fix this, kernel can't recover

> > from a reset failure e.g. during driver unbind.

> >

> 

> Yes, but it should be safe with the protection of software IOTLB even

> if the reset() fails during driver unbind.

> 

> Thanks,

> Yongji


Hmm. I don't see it.
What exactly will happen? What prevents device from poking at
memory after reset? Note that dma unmap in e.g. del_vqs happens
too late.  And what about e.g. interrupts?
E.g. we have this:

        /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
        vblk->vdev = NULL;

and this is no longer true at this point.


-- 
MST
Yongji Xie Sept. 6, 2021, 7:06 a.m. UTC | #4
On Mon, Sep 6, 2021 at 2:37 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:

> > On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > > This adds a new callback to support device specific reset

> > > > behavior. The vdpa bus driver will call the reset function

> > > > instead of setting status to zero during resetting.

> > > >

> > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> > >

> > >

> > > This does gloss over a significant change though:

> > >

> > >

> > > > ---

> > > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > > >       return vdev->dma_dev;

> > > >  }

> > > >

> > > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > > >  {

> > > >       const struct vdpa_config_ops *ops = vdev->config;

> > > >

> > > >       vdev->features_valid = false;

> > > > -     ops->set_status(vdev, 0);

> > > > +     return ops->reset(vdev);

> > > >  }

> > > >

> > > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> > >

> > >

> > > Unfortunately this breaks virtio_vdpa:

> > >

> > >

> > > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > > {

> > >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> > >

> > >         vdpa_reset(vdpa);

> > > }

> > >

> > >

> > > and there's no easy way to fix this, kernel can't recover

> > > from a reset failure e.g. during driver unbind.

> > >

> >

> > Yes, but it should be safe with the protection of software IOTLB even

> > if the reset() fails during driver unbind.

> >

> > Thanks,

> > Yongji

>

> Hmm. I don't see it.

> What exactly will happen? What prevents device from poking at

> memory after reset? Note that dma unmap in e.g. del_vqs happens

> too late.


But I didn't see any problems with touching the memory for virtqueues.
The memory should not be freed after dma unmap?

And the memory for the bounce buffer should also be safe to be
accessed by userspace in this case.

> And what about e.g. interrupts?

> E.g. we have this:

>

>         /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */

>         vblk->vdev = NULL;

>

> and this is no longer true at this point.

>


You're right. But I didn't see where the interrupt handler will use
the vblk->vdev.

So it seems to be not too late to fix it:

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c
b/drivers/vdpa/vdpa_user/vduse_dev.c
index 5c25ff6483ad..ea41a7389a26 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -665,13 +665,13 @@ static void vduse_vdpa_set_config(struct
vdpa_device *vdpa, unsigned int offset,
 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
 {
        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+       int ret;

-       if (vduse_dev_set_status(dev, 0))
-               return -EIO;
+       ret = vduse_dev_set_status(dev, 0);

        vduse_dev_reset(dev);

-       return 0;
+       return ret;
 }

 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)

Thanks,
Yongji
Michael S. Tsirkin Sept. 6, 2021, 8 a.m. UTC | #5
On Mon, Sep 06, 2021 at 03:06:44PM +0800, Yongji Xie wrote:
> On Mon, Sep 6, 2021 at 2:37 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> >

> > On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:

> > > On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > >

> > > > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > > > This adds a new callback to support device specific reset

> > > > > behavior. The vdpa bus driver will call the reset function

> > > > > instead of setting status to zero during resetting.

> > > > >

> > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> > > >

> > > >

> > > > This does gloss over a significant change though:

> > > >

> > > >

> > > > > ---

> > > > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > > > >       return vdev->dma_dev;

> > > > >  }

> > > > >

> > > > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > > > >  {

> > > > >       const struct vdpa_config_ops *ops = vdev->config;

> > > > >

> > > > >       vdev->features_valid = false;

> > > > > -     ops->set_status(vdev, 0);

> > > > > +     return ops->reset(vdev);

> > > > >  }

> > > > >

> > > > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> > > >

> > > >

> > > > Unfortunately this breaks virtio_vdpa:

> > > >

> > > >

> > > > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > > > {

> > > >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> > > >

> > > >         vdpa_reset(vdpa);

> > > > }

> > > >

> > > >

> > > > and there's no easy way to fix this, kernel can't recover

> > > > from a reset failure e.g. during driver unbind.

> > > >

> > >

> > > Yes, but it should be safe with the protection of software IOTLB even

> > > if the reset() fails during driver unbind.

> > >

> > > Thanks,

> > > Yongji

> >

> > Hmm. I don't see it.

> > What exactly will happen? What prevents device from poking at

> > memory after reset? Note that dma unmap in e.g. del_vqs happens

> > too late.

> 

> But I didn't see any problems with touching the memory for virtqueues.


Drivers make the assumption that after reset returns no new
buffers will be consumed. For example a bunch of drivers
call virtqueue_detach_unused_buf.
I can't say whether block makes this assumption anywhere.
Needs careful auditing.

> The memory should not be freed after dma unmap?


But unmap does not happen until after the reset.


> And the memory for the bounce buffer should also be safe to be

> accessed by userspace in this case.

> 

> > And what about e.g. interrupts?

> > E.g. we have this:

> >

> >         /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */

> >         vblk->vdev = NULL;

> >

> > and this is no longer true at this point.

> >

> 

> You're right. But I didn't see where the interrupt handler will use

> the vblk->vdev.


static void virtblk_done(struct virtqueue *vq)
{
        struct virtio_blk *vblk = vq->vdev->priv;

vq->vdev is the same as vblk->vdev.


> So it seems to be not too late to fix it:

> 

> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c

> b/drivers/vdpa/vdpa_user/vduse_dev.c

> index 5c25ff6483ad..ea41a7389a26 100644

> --- a/drivers/vdpa/vdpa_user/vduse_dev.c

> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c

> @@ -665,13 +665,13 @@ static void vduse_vdpa_set_config(struct

> vdpa_device *vdpa, unsigned int offset,

>  static int vduse_vdpa_reset(struct vdpa_device *vdpa)

>  {

>         struct vduse_dev *dev = vdpa_to_vduse(vdpa);

> +       int ret;

> 

> -       if (vduse_dev_set_status(dev, 0))

> -               return -EIO;

> +       ret = vduse_dev_set_status(dev, 0);

> 

>         vduse_dev_reset(dev);

> 

> -       return 0;

> +       return ret;

>  }

> 

>  static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)

> 

> Thanks,

> Yongji


Needs some comments to explain why it's done like this.

BTW device is generally wedged at this point right?
E.g. if reset during initialization fails, userspace
will still get the reset at some later point and be
confused ...

-- 
MST
Yongji Xie Sept. 6, 2021, 8:45 a.m. UTC | #6
On Mon, Sep 6, 2021 at 4:01 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Mon, Sep 06, 2021 at 03:06:44PM +0800, Yongji Xie wrote:

> > On Mon, Sep 6, 2021 at 2:37 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:

> > > > On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > > >

> > > > > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > > > > This adds a new callback to support device specific reset

> > > > > > behavior. The vdpa bus driver will call the reset function

> > > > > > instead of setting status to zero during resetting.

> > > > > >

> > > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> > > > >

> > > > >

> > > > > This does gloss over a significant change though:

> > > > >

> > > > >

> > > > > > ---

> > > > > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > > > > >       return vdev->dma_dev;

> > > > > >  }

> > > > > >

> > > > > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > > > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > > > > >  {

> > > > > >       const struct vdpa_config_ops *ops = vdev->config;

> > > > > >

> > > > > >       vdev->features_valid = false;

> > > > > > -     ops->set_status(vdev, 0);

> > > > > > +     return ops->reset(vdev);

> > > > > >  }

> > > > > >

> > > > > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> > > > >

> > > > >

> > > > > Unfortunately this breaks virtio_vdpa:

> > > > >

> > > > >

> > > > > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > > > > {

> > > > >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> > > > >

> > > > >         vdpa_reset(vdpa);

> > > > > }

> > > > >

> > > > >

> > > > > and there's no easy way to fix this, kernel can't recover

> > > > > from a reset failure e.g. during driver unbind.

> > > > >

> > > >

> > > > Yes, but it should be safe with the protection of software IOTLB even

> > > > if the reset() fails during driver unbind.

> > > >

> > > > Thanks,

> > > > Yongji

> > >

> > > Hmm. I don't see it.

> > > What exactly will happen? What prevents device from poking at

> > > memory after reset? Note that dma unmap in e.g. del_vqs happens

> > > too late.

> >

> > But I didn't see any problems with touching the memory for virtqueues.

>

> Drivers make the assumption that after reset returns no new

> buffers will be consumed. For example a bunch of drivers

> call virtqueue_detach_unused_buf.


I'm not sure if I get your point. But it looks like
virtqueue_detach_unused_buf() will check the driver's metadata first
rather than read the memory from virtqueue.

> I can't say whether block makes this assumption anywhere.

> Needs careful auditing.

>

> > The memory should not be freed after dma unmap?

>

> But unmap does not happen until after the reset.

>


I mean the memory is totally allocated and controlled by the VDUSE
driver. The VDUSE driver will not return them to the buddy system
unless userspace unmap it.

>

> > And the memory for the bounce buffer should also be safe to be

> > accessed by userspace in this case.

> >

> > > And what about e.g. interrupts?

> > > E.g. we have this:

> > >

> > >         /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */

> > >         vblk->vdev = NULL;

> > >

> > > and this is no longer true at this point.

> > >

> >

> > You're right. But I didn't see where the interrupt handler will use

> > the vblk->vdev.

>

> static void virtblk_done(struct virtqueue *vq)

> {

>         struct virtio_blk *vblk = vq->vdev->priv;

>

> vq->vdev is the same as vblk->vdev.

>


We will test the vq->ready (will be set to false in del_vqs()) before
injecting an interrupt in the VDUSE driver. So it should be OK?

>

> > So it seems to be not too late to fix it:

> >

> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c

> > b/drivers/vdpa/vdpa_user/vduse_dev.c

> > index 5c25ff6483ad..ea41a7389a26 100644

> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c

> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c

> > @@ -665,13 +665,13 @@ static void vduse_vdpa_set_config(struct

> > vdpa_device *vdpa, unsigned int offset,

> >  static int vduse_vdpa_reset(struct vdpa_device *vdpa)

> >  {

> >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);

> > +       int ret;

> >

> > -       if (vduse_dev_set_status(dev, 0))

> > -               return -EIO;

> > +       ret = vduse_dev_set_status(dev, 0);

> >

> >         vduse_dev_reset(dev);

> >

> > -       return 0;

> > +       return ret;

> >  }

> >

> >  static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)

> >

> > Thanks,

> > Yongji

>

> Needs some comments to explain why it's done like this.

>


This is used to make sure the userspace can't not inject the interrupt
any more after reset. The vduse_dev_reset() will clear the interrupt
callback and flush the irq kworker.

> BTW device is generally wedged at this point right?

> E.g. if reset during initialization fails, userspace

> will still get the reset at some later point and be

> confused ...

>


Sorry, I don't get why userspace will get the reset at some later point?

Thanks,
Yongji
Michael S. Tsirkin Sept. 6, 2021, 10:43 a.m. UTC | #7
On Mon, Sep 06, 2021 at 04:45:55PM +0800, Yongji Xie wrote:
> On Mon, Sep 6, 2021 at 4:01 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> >

> > On Mon, Sep 06, 2021 at 03:06:44PM +0800, Yongji Xie wrote:

> > > On Mon, Sep 6, 2021 at 2:37 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > >

> > > > On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:

> > > > > On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > > > >

> > > > > > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > > > > > This adds a new callback to support device specific reset

> > > > > > > behavior. The vdpa bus driver will call the reset function

> > > > > > > instead of setting status to zero during resetting.

> > > > > > >

> > > > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> > > > > >

> > > > > >

> > > > > > This does gloss over a significant change though:

> > > > > >

> > > > > >

> > > > > > > ---

> > > > > > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > > > > > >       return vdev->dma_dev;

> > > > > > >  }

> > > > > > >

> > > > > > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > > > > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > > > > > >  {

> > > > > > >       const struct vdpa_config_ops *ops = vdev->config;

> > > > > > >

> > > > > > >       vdev->features_valid = false;

> > > > > > > -     ops->set_status(vdev, 0);

> > > > > > > +     return ops->reset(vdev);

> > > > > > >  }

> > > > > > >

> > > > > > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> > > > > >

> > > > > >

> > > > > > Unfortunately this breaks virtio_vdpa:

> > > > > >

> > > > > >

> > > > > > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > > > > > {

> > > > > >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> > > > > >

> > > > > >         vdpa_reset(vdpa);

> > > > > > }

> > > > > >

> > > > > >

> > > > > > and there's no easy way to fix this, kernel can't recover

> > > > > > from a reset failure e.g. during driver unbind.

> > > > > >

> > > > >

> > > > > Yes, but it should be safe with the protection of software IOTLB even

> > > > > if the reset() fails during driver unbind.

> > > > >

> > > > > Thanks,

> > > > > Yongji

> > > >

> > > > Hmm. I don't see it.

> > > > What exactly will happen? What prevents device from poking at

> > > > memory after reset? Note that dma unmap in e.g. del_vqs happens

> > > > too late.

> > >

> > > But I didn't see any problems with touching the memory for virtqueues.

> >

> > Drivers make the assumption that after reset returns no new

> > buffers will be consumed. For example a bunch of drivers

> > call virtqueue_detach_unused_buf.

> 

> I'm not sure if I get your point. But it looks like

> virtqueue_detach_unused_buf() will check the driver's metadata first

> rather than read the memory from virtqueue.

> 

> > I can't say whether block makes this assumption anywhere.

> > Needs careful auditing.

> >

> > > The memory should not be freed after dma unmap?

> >

> > But unmap does not happen until after the reset.

> >

> 

> I mean the memory is totally allocated and controlled by the VDUSE

> driver. The VDUSE driver will not return them to the buddy system

> unless userspace unmap it.


Right. But what stops VDUSE from poking at memory after
reset failed?



> >

> > > And the memory for the bounce buffer should also be safe to be

> > > accessed by userspace in this case.

> > >

> > > > And what about e.g. interrupts?

> > > > E.g. we have this:

> > > >

> > > >         /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */

> > > >         vblk->vdev = NULL;

> > > >

> > > > and this is no longer true at this point.

> > > >

> > >

> > > You're right. But I didn't see where the interrupt handler will use

> > > the vblk->vdev.

> >

> > static void virtblk_done(struct virtqueue *vq)

> > {

> >         struct virtio_blk *vblk = vq->vdev->priv;

> >

> > vq->vdev is the same as vblk->vdev.

> >

> 

> We will test the vq->ready (will be set to false in del_vqs()) before

> injecting an interrupt in the VDUSE driver. So it should be OK?


Maybe not ...  It's not designed for such asynchronous access, so e.g.
there's no locking or memory ordering around accesses.


> >

> > > So it seems to be not too late to fix it:

> > >

> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c

> > > b/drivers/vdpa/vdpa_user/vduse_dev.c

> > > index 5c25ff6483ad..ea41a7389a26 100644

> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c

> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c

> > > @@ -665,13 +665,13 @@ static void vduse_vdpa_set_config(struct

> > > vdpa_device *vdpa, unsigned int offset,

> > >  static int vduse_vdpa_reset(struct vdpa_device *vdpa)

> > >  {

> > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);

> > > +       int ret;

> > >

> > > -       if (vduse_dev_set_status(dev, 0))

> > > -               return -EIO;

> > > +       ret = vduse_dev_set_status(dev, 0);

> > >

> > >         vduse_dev_reset(dev);

> > >

> > > -       return 0;

> > > +       return ret;

> > >  }

> > >

> > >  static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)

> > >

> > > Thanks,

> > > Yongji

> >

> > Needs some comments to explain why it's done like this.

> >

> 

> This is used to make sure the userspace can't not inject the interrupt

> any more after reset. The vduse_dev_reset() will clear the interrupt

> callback and flush the irq kworker.

> 

> > BTW device is generally wedged at this point right?

> > E.g. if reset during initialization fails, userspace

> > will still get the reset at some later point and be

> > confused ...

> >

> 

> Sorry, I don't get why userspace will get the reset at some later point?

> 

> Thanks,

> Yongji


I am generally a bit confused about how does reset work with vduse.
We clearly want device to get back to its original state.
How is that supposed to be achieved?

-- 
MST
Yongji Xie Sept. 6, 2021, 12:13 p.m. UTC | #8
On Mon, Sep 6, 2021 at 6:43 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Mon, Sep 06, 2021 at 04:45:55PM +0800, Yongji Xie wrote:

> > On Mon, Sep 6, 2021 at 4:01 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Mon, Sep 06, 2021 at 03:06:44PM +0800, Yongji Xie wrote:

> > > > On Mon, Sep 6, 2021 at 2:37 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > > >

> > > > > On Mon, Sep 06, 2021 at 02:09:25PM +0800, Yongji Xie wrote:

> > > > > > On Mon, Sep 6, 2021 at 1:56 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > > > > > >

> > > > > > > On Tue, Aug 31, 2021 at 06:36:26PM +0800, Xie Yongji wrote:

> > > > > > > > This adds a new callback to support device specific reset

> > > > > > > > behavior. The vdpa bus driver will call the reset function

> > > > > > > > instead of setting status to zero during resetting.

> > > > > > > >

> > > > > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>

> > > > > > >

> > > > > > >

> > > > > > > This does gloss over a significant change though:

> > > > > > >

> > > > > > >

> > > > > > > > ---

> > > > > > > > @@ -348,12 +352,12 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)

> > > > > > > >       return vdev->dma_dev;

> > > > > > > >  }

> > > > > > > >

> > > > > > > > -static inline void vdpa_reset(struct vdpa_device *vdev)

> > > > > > > > +static inline int vdpa_reset(struct vdpa_device *vdev)

> > > > > > > >  {

> > > > > > > >       const struct vdpa_config_ops *ops = vdev->config;

> > > > > > > >

> > > > > > > >       vdev->features_valid = false;

> > > > > > > > -     ops->set_status(vdev, 0);

> > > > > > > > +     return ops->reset(vdev);

> > > > > > > >  }

> > > > > > > >

> > > > > > > >  static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)

> > > > > > >

> > > > > > >

> > > > > > > Unfortunately this breaks virtio_vdpa:

> > > > > > >

> > > > > > >

> > > > > > > static void virtio_vdpa_reset(struct virtio_device *vdev)

> > > > > > > {

> > > > > > >         struct vdpa_device *vdpa = vd_get_vdpa(vdev);

> > > > > > >

> > > > > > >         vdpa_reset(vdpa);

> > > > > > > }

> > > > > > >

> > > > > > >

> > > > > > > and there's no easy way to fix this, kernel can't recover

> > > > > > > from a reset failure e.g. during driver unbind.

> > > > > > >

> > > > > >

> > > > > > Yes, but it should be safe with the protection of software IOTLB even

> > > > > > if the reset() fails during driver unbind.

> > > > > >

> > > > > > Thanks,

> > > > > > Yongji

> > > > >

> > > > > Hmm. I don't see it.

> > > > > What exactly will happen? What prevents device from poking at

> > > > > memory after reset? Note that dma unmap in e.g. del_vqs happens

> > > > > too late.

> > > >

> > > > But I didn't see any problems with touching the memory for virtqueues.

> > >

> > > Drivers make the assumption that after reset returns no new

> > > buffers will be consumed. For example a bunch of drivers

> > > call virtqueue_detach_unused_buf.

> >

> > I'm not sure if I get your point. But it looks like

> > virtqueue_detach_unused_buf() will check the driver's metadata first

> > rather than read the memory from virtqueue.

> >

> > > I can't say whether block makes this assumption anywhere.

> > > Needs careful auditing.

> > >

> > > > The memory should not be freed after dma unmap?

> > >

> > > But unmap does not happen until after the reset.

> > >

> >

> > I mean the memory is totally allocated and controlled by the VDUSE

> > driver. The VDUSE driver will not return them to the buddy system

> > unless userspace unmap it.

>

> Right. But what stops VDUSE from poking at memory after

> reset failed?

>


Only itself. But in normal cases, userspace would not poke at the
memory since there is no available data after reset.

And it makes me think whether it's better to disallow returning errors
from userspace for the reset message. Then the only case that leads to
reset failure is the request timeout, which will mark the device
broken now.

>

> > >

> > > > And the memory for the bounce buffer should also be safe to be

> > > > accessed by userspace in this case.

> > > >

> > > > > And what about e.g. interrupts?

> > > > > E.g. we have this:

> > > > >

> > > > >         /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */

> > > > >         vblk->vdev = NULL;

> > > > >

> > > > > and this is no longer true at this point.

> > > > >

> > > >

> > > > You're right. But I didn't see where the interrupt handler will use

> > > > the vblk->vdev.

> > >

> > > static void virtblk_done(struct virtqueue *vq)

> > > {

> > >         struct virtio_blk *vblk = vq->vdev->priv;

> > >

> > > vq->vdev is the same as vblk->vdev.

> > >

> >

> > We will test the vq->ready (will be set to false in del_vqs()) before

> > injecting an interrupt in the VDUSE driver. So it should be OK?

>

> Maybe not ...  It's not designed for such asynchronous access, so e.g.

> there's no locking or memory ordering around accesses.

>


Yes, so we still need the below fix.

>

> > >

> > > > So it seems to be not too late to fix it:

> > > >

> > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c

> > > > b/drivers/vdpa/vdpa_user/vduse_dev.c

> > > > index 5c25ff6483ad..ea41a7389a26 100644

> > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c

> > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c

> > > > @@ -665,13 +665,13 @@ static void vduse_vdpa_set_config(struct

> > > > vdpa_device *vdpa, unsigned int offset,

> > > >  static int vduse_vdpa_reset(struct vdpa_device *vdpa)

> > > >  {

> > > >         struct vduse_dev *dev = vdpa_to_vduse(vdpa);

> > > > +       int ret;

> > > >

> > > > -       if (vduse_dev_set_status(dev, 0))

> > > > -               return -EIO;

> > > > +       ret = vduse_dev_set_status(dev, 0);

> > > >

> > > >         vduse_dev_reset(dev);

> > > >

> > > > -       return 0;

> > > > +       return ret;

> > > >  }

> > > >

> > > >  static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)

> > > >

> > > > Thanks,

> > > > Yongji

> > >

> > > Needs some comments to explain why it's done like this.

> > >

> >

> > This is used to make sure the userspace can't not inject the interrupt

> > any more after reset. The vduse_dev_reset() will clear the interrupt

> > callback and flush the irq kworker.

> >

> > > BTW device is generally wedged at this point right?

> > > E.g. if reset during initialization fails, userspace

> > > will still get the reset at some later point and be

> > > confused ...

> > >

> >

> > Sorry, I don't get why userspace will get the reset at some later point?

> >

> > Thanks,

> > Yongji

>

> I am generally a bit confused about how does reset work with vduse.

> We clearly want device to get back to its original state.

> How is that supposed to be achieved?

>


I think both the userspace daemon and kernel should reset their
internal states. Besides, userspace daemon needs to stop processing
virtqueue and clear the memory mapping.

Thanks,
Yongji
diff mbox series

Patch

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index bfc3d7d40c09..4293481ce910 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -222,17 +222,6 @@  static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 	if (status_old == status)
 		return;
 
-	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) &&
-	    !(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-		ifcvf_stop_datapath(adapter);
-		ifcvf_free_irq(adapter, vf->nr_vring);
-	}
-
-	if (status == 0) {
-		ifcvf_reset_vring(adapter);
-		return;
-	}
-
 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 	    !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
 		ret = ifcvf_request_irq(adapter);
@@ -252,6 +241,29 @@  static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 	ifcvf_set_status(vf, status);
 }
 
+static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
+{
+	struct ifcvf_adapter *adapter;
+	struct ifcvf_hw *vf;
+	u8 status_old;
+
+	vf  = vdpa_to_vf(vdpa_dev);
+	adapter = vdpa_to_adapter(vdpa_dev);
+	status_old = ifcvf_get_status(vf);
+
+	if (status_old == 0)
+		return 0;
+
+	if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
+		ifcvf_stop_datapath(adapter);
+		ifcvf_free_irq(adapter, vf->nr_vring);
+	}
+
+	ifcvf_reset_vring(adapter);
+
+	return 0;
+}
+
 static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
 {
 	return IFCVF_QUEUE_MAX;
@@ -435,6 +447,7 @@  static const struct vdpa_config_ops ifc_vdpa_ops = {
 	.set_features	= ifcvf_vdpa_set_features,
 	.get_status	= ifcvf_vdpa_get_status,
 	.set_status	= ifcvf_vdpa_set_status,
+	.reset		= ifcvf_vdpa_reset,
 	.get_vq_num_max	= ifcvf_vdpa_get_vq_num_max,
 	.get_vq_state	= ifcvf_vdpa_get_vq_state,
 	.set_vq_state	= ifcvf_vdpa_set_vq_state,
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 4ba3ac48ee83..608f6b900cd9 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2154,22 +2154,6 @@  static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 	int err;
 
 	print_status(mvdev, status, true);
-	if (!status) {
-		mlx5_vdpa_info(mvdev, "performing device reset\n");
-		teardown_driver(ndev);
-		clear_vqs_ready(ndev);
-		mlx5_vdpa_destroy_mr(&ndev->mvdev);
-		ndev->mvdev.status = 0;
-		ndev->mvdev.mlx_features = 0;
-		memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
-		ndev->mvdev.actual_features = 0;
-		++mvdev->generation;
-		if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
-			if (mlx5_vdpa_create_mr(mvdev, NULL))
-				mlx5_vdpa_warn(mvdev, "create MR failed\n");
-		}
-		return;
-	}
 
 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
@@ -2192,6 +2176,29 @@  static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
 }
 
+static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+
+	print_status(mvdev, 0, true);
+	mlx5_vdpa_info(mvdev, "performing device reset\n");
+	teardown_driver(ndev);
+	clear_vqs_ready(ndev);
+	mlx5_vdpa_destroy_mr(&ndev->mvdev);
+	ndev->mvdev.status = 0;
+	ndev->mvdev.mlx_features = 0;
+	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
+	ndev->mvdev.actual_features = 0;
+	++mvdev->generation;
+	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+		if (mlx5_vdpa_create_mr(mvdev, NULL))
+			mlx5_vdpa_warn(mvdev, "create MR failed\n");
+	}
+
+	return 0;
+}
+
 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
 {
 	return sizeof(struct virtio_net_config);
@@ -2305,6 +2312,7 @@  static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
 	.get_status = mlx5_vdpa_get_status,
 	.set_status = mlx5_vdpa_set_status,
+	.reset = mlx5_vdpa_reset,
 	.get_config_size = mlx5_vdpa_get_config_size,
 	.get_config = mlx5_vdpa_get_config,
 	.set_config = mlx5_vdpa_set_config,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 5b51d0ac8bae..f292bb05d6c9 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -92,7 +92,7 @@  static void vdpasim_vq_reset(struct vdpasim *vdpasim,
 	vq->vring.notify = NULL;
 }
 
-static void vdpasim_reset(struct vdpasim *vdpasim)
+static void vdpasim_do_reset(struct vdpasim *vdpasim)
 {
 	int i;
 
@@ -460,11 +460,21 @@  static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
 
 	spin_lock(&vdpasim->lock);
 	vdpasim->status = status;
-	if (status == 0)
-		vdpasim_reset(vdpasim);
 	spin_unlock(&vdpasim->lock);
 }
 
+static int vdpasim_reset(struct vdpa_device *vdpa)
+{
+	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+	spin_lock(&vdpasim->lock);
+	vdpasim->status = 0;
+	vdpasim_do_reset(vdpasim);
+	spin_unlock(&vdpasim->lock);
+
+	return 0;
+}
+
 static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -608,6 +618,7 @@  static const struct vdpa_config_ops vdpasim_config_ops = {
 	.get_vendor_id          = vdpasim_get_vendor_id,
 	.get_status             = vdpasim_get_status,
 	.set_status             = vdpasim_set_status,
+	.reset			= vdpasim_reset,
 	.get_config_size        = vdpasim_get_config_size,
 	.get_config             = vdpasim_get_config,
 	.set_config             = vdpasim_set_config,
@@ -636,6 +647,7 @@  static const struct vdpa_config_ops vdpasim_batch_config_ops = {
 	.get_vendor_id          = vdpasim_get_vendor_id,
 	.get_status             = vdpasim_get_status,
 	.set_status             = vdpasim_set_status,
+	.reset			= vdpasim_reset,
 	.get_config_size        = vdpasim_get_config_size,
 	.get_config             = vdpasim_get_config,
 	.set_config             = vdpasim_set_config,
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index fe0527329857..cd7718b43a6e 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -189,10 +189,20 @@  static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
 	}
 
 	vp_modern_set_status(mdev, status);
+}
 
-	if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
-	    (s & VIRTIO_CONFIG_S_DRIVER_OK))
+static int vp_vdpa_reset(struct vdpa_device *vdpa)
+{
+	struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+	struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+	u8 s = vp_vdpa_get_status(vdpa);
+
+	vp_modern_set_status(mdev, 0);
+
+	if (s & VIRTIO_CONFIG_S_DRIVER_OK)
 		vp_vdpa_free_irq(vp_vdpa);
+
+	return 0;
 }
 
 static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
@@ -398,6 +408,7 @@  static const struct vdpa_config_ops vp_vdpa_ops = {
 	.set_features	= vp_vdpa_set_features,
 	.get_status	= vp_vdpa_get_status,
 	.set_status	= vp_vdpa_set_status,
+	.reset		= vp_vdpa_reset,
 	.get_vq_num_max	= vp_vdpa_get_vq_num_max,
 	.get_vq_state	= vp_vdpa_get_vq_state,
 	.get_vq_notification = vp_vdpa_get_vq_notification,
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 9479f7f79217..ab7a24613982 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -157,7 +157,7 @@  static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	u8 status, status_old;
-	int nvqs = v->nvqs;
+	int ret, nvqs = v->nvqs;
 	u16 i;
 
 	if (copy_from_user(&status, statusp, sizeof(status)))
@@ -172,7 +172,12 @@  static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 	if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 		return -EINVAL;
 
-	ops->set_status(vdpa, status);
+	if (status == 0) {
+		ret = ops->reset(vdpa);
+		if (ret)
+			return ret;
+	} else
+		ops->set_status(vdpa, status);
 
 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 		for (i = 0; i < nvqs; i++)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 8ae1134070eb..e1eae8c7483d 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -171,6 +171,9 @@  struct vdpa_iova_range {
  * @set_status:			Set the device status
  *				@vdev: vdpa device
  *				@status: virtio device status
+ * @reset:			Reset device
+ *				@vdev: vdpa device
+ *				Returns integer: success (0) or error (< 0)
  * @get_config_size:		Get the size of the configuration space
  *				@vdev: vdpa device
  *				Returns size_t: configuration size
@@ -255,6 +258,7 @@  struct vdpa_config_ops {
 	u32 (*get_vendor_id)(struct vdpa_device *vdev);
 	u8 (*get_status)(struct vdpa_device *vdev);
 	void (*set_status)(struct vdpa_device *vdev, u8 status);
+	int (*reset)(struct vdpa_device *vdev);
 	size_t (*get_config_size)(struct vdpa_device *vdev);
 	void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
 			   void *buf, unsigned int len);
@@ -348,12 +352,12 @@  static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
 	return vdev->dma_dev;
 }
 
-static inline void vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev)
 {
 	const struct vdpa_config_ops *ops = vdev->config;
 
 	vdev->features_valid = false;
-	ops->set_status(vdev, 0);
+	return ops->reset(vdev);
 }
 
 static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)