diff mbox series

[v8,2/4] iommufd: Add iommufd_access_replace() API

Message ID 5dfe3e9a9d511919cb105459ca9d96f013daadb4.1690226015.git.nicolinc@nvidia.com
State New
Headers show
Series [v8,1/4] vfio: Do not allow !ops->dma_unmap in vfio_pin/unpin_pages() | expand

Commit Message

Nicolin Chen July 24, 2023, 7:47 p.m. UTC
Extract all common procedure, between the iommufd_access_attach API and a
new iommufd_access_replace API, to an iommufd_access_change_pt helper. And
separate an unlocked __iommufd_access_detach to use it in the helper too.

This adds a new iommufd_access_replace() for VFIO emulated pathway to use.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/device.c | 72 ++++++++++++++++++++++++++--------
 include/linux/iommufd.h        |  1 +
 2 files changed, 57 insertions(+), 16 deletions(-)

Comments

Jason Gunthorpe July 26, 2023, 2:30 p.m. UTC | #1
On Mon, Jul 24, 2023 at 12:47:05PM -0700, Nicolin Chen wrote:
> -int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
> +static int iommufd_access_change_pt(struct iommufd_access *access, u32 ioas_id)
>  {
> +	struct iommufd_ioas *cur_ioas = access->ioas;
>  	struct iommufd_ioas *new_ioas;
> -	int rc = 0;
> +	int rc;
>  
> -	mutex_lock(&access->ioas_lock);
> -	if (WARN_ON(access->ioas || access->ioas_unpin)) {
> -		mutex_unlock(&access->ioas_lock);
> -		return -EINVAL;
> -	}
> +	lockdep_assert_held(&access->ioas_lock);
>  
>  	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
> -	if (IS_ERR(new_ioas)) {
> -		mutex_unlock(&access->ioas_lock);
> +	if (IS_ERR(new_ioas))
>  		return PTR_ERR(new_ioas);
> -	}
> +
> +	if (cur_ioas)
> +		__iommufd_access_detach(access);

The drop of the mutex while this function runs is racey with the rest
of this, we can mitigate it by blocking concurrent change while
detaching which is if access->ioas_unpin is set
  
>  	rc = iopt_add_access(&new_ioas->iopt, access);
>  	if (rc) {
> -		mutex_unlock(&access->ioas_lock);
>  		iommufd_put_object(&new_ioas->obj);
> +		if (cur_ioas)
> +			WARN_ON(iommufd_access_change_pt(access,
> +							 cur_ioas->obj.id));

We've already dropped our ref to cur_ioas, so this is also racy with
destroy.

This is what I came up with:

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 57c0e81f5073b2..e55d6e902edb98 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -758,64 +758,101 @@ void iommufd_access_destroy(struct iommufd_access *access)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
 
-void iommufd_access_detach(struct iommufd_access *access)
+static int iommufd_access_change_ioas(struct iommufd_access *access,
+				      struct iommufd_ioas *new_ioas)
 {
 	struct iommufd_ioas *cur_ioas = access->ioas;
+	int rc;
+
+	lockdep_assert_held(&access->ioas_lock);
+
+	/* We are racing with a concurrent detach, bail */
+	if (access->ioas_unpin)
+		return -EBUSY;
+
+	if (IS_ERR(new_ioas))
+		return PTR_ERR(new_ioas);
+
+	if (cur_ioas == new_ioas)
+		return 0;
 
-	mutex_lock(&access->ioas_lock);
-	if (WARN_ON(!access->ioas))
-		goto out;
 	/*
 	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
 	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
 	 */
 	access->ioas = NULL;
-
-	if (access->ops->unmap) {
+	if (cur_ioas && access->ops->unmap) {
 		mutex_unlock(&access->ioas_lock);
 		access->ops->unmap(access->data, 0, ULONG_MAX);
 		mutex_lock(&access->ioas_lock);
 	}
+
+	if (new_ioas) {
+		rc = iopt_add_access(&new_ioas->iopt, access);
+		if (rc) {
+			iommufd_put_object(&new_ioas->obj);
+			access->ioas = cur_ioas;
+			return rc;
+		}
+		iommufd_ref_to_users(&new_ioas->obj);
+	}
+
+	access->ioas = new_ioas;
+	access->ioas_unpin = new_ioas;
 	iopt_remove_access(&cur_ioas->iopt, access);
 	refcount_dec(&cur_ioas->obj.users);
-out:
-	access->ioas_unpin = NULL;
+
+	return 0;
+}
+
+void iommufd_access_detach(struct iommufd_access *access)
+{
+	int rc;
+
+	mutex_lock(&access->ioas_lock);
+	if (WARN_ON(!access->ioas)) {
+		mutex_unlock(&access->ioas_lock);
+		return;
+	}
+	rc = iommufd_access_change_ioas(access, NULL);
+	WARN_ON(rc);
 	mutex_unlock(&access->ioas_lock);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
 
 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
 {
-	struct iommufd_ioas *new_ioas;
-	int rc = 0;
+	int rc;
 
 	mutex_lock(&access->ioas_lock);
-	if (WARN_ON(access->ioas || access->ioas_unpin)) {
+	if (WARN_ON(access->ioas)) {
 		mutex_unlock(&access->ioas_lock);
 		return -EINVAL;
 	}
 
-	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
-	if (IS_ERR(new_ioas)) {
-		mutex_unlock(&access->ioas_lock);
-		return PTR_ERR(new_ioas);
-	}
-
-	rc = iopt_add_access(&new_ioas->iopt, access);
-	if (rc) {
-		mutex_unlock(&access->ioas_lock);
-		iommufd_put_object(&new_ioas->obj);
-		return rc;
-	}
-	iommufd_ref_to_users(&new_ioas->obj);
-
-	access->ioas = new_ioas;
-	access->ioas_unpin = new_ioas;
+	rc = iommufd_access_change_ioas(access,
+				      iommufd_get_ioas(access->ictx, ioas_id));
 	mutex_unlock(&access->ioas_lock);
-	return 0;
+	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
 
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
+{
+	int rc;
+
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	rc = iommufd_access_change_ioas(access,
+				      iommufd_get_ioas(access->ictx, ioas_id));
+	mutex_unlock(&access->ioas_lock);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
+
 /**
  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
  * @iopt: iopt to work on
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 0ac60256b65929..ffc3a949f8374f 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -49,6 +49,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
 		      const struct iommufd_access_ops *ops, void *data, u32 *id);
 void iommufd_access_destroy(struct iommufd_access *access);
 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id);
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id);
 void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);


Jason
Nicolin Chen July 26, 2023, 8:50 p.m. UTC | #2
On Wed, Jul 26, 2023 at 11:30:17AM -0300, Jason Gunthorpe wrote:
> On Mon, Jul 24, 2023 at 12:47:05PM -0700, Nicolin Chen wrote:
> > -int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
> > +static int iommufd_access_change_pt(struct iommufd_access *access, u32 ioas_id)
> >  {
> > +	struct iommufd_ioas *cur_ioas = access->ioas;
> >  	struct iommufd_ioas *new_ioas;
> > -	int rc = 0;
> > +	int rc;
> >  
> > -	mutex_lock(&access->ioas_lock);
> > -	if (WARN_ON(access->ioas || access->ioas_unpin)) {
> > -		mutex_unlock(&access->ioas_lock);
> > -		return -EINVAL;
> > -	}
> > +	lockdep_assert_held(&access->ioas_lock);
> >  
> >  	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
> > -	if (IS_ERR(new_ioas)) {
> > -		mutex_unlock(&access->ioas_lock);
> > +	if (IS_ERR(new_ioas))
> >  		return PTR_ERR(new_ioas);
> > -	}
> > +
> > +	if (cur_ioas)
> > +		__iommufd_access_detach(access);
> 
> The drop of the mutex while this function runs is racey with the rest
> of this, we can mitigate it by blocking concurrent change while
> detaching which is if access->ioas_unpin is set

Oh. You mean that unmap part dropping the mutex right? I see.

> >  	rc = iopt_add_access(&new_ioas->iopt, access);
> >  	if (rc) {
> > -		mutex_unlock(&access->ioas_lock);
> >  		iommufd_put_object(&new_ioas->obj);
> > +		if (cur_ioas)
> > +			WARN_ON(iommufd_access_change_pt(access,
> > +							 cur_ioas->obj.id));
> 
> We've already dropped our ref to cur_ioas, so this is also racy with
> destroy.

Would it be better by calling iommufd_access_detach() that holds
the same mutex in the iommufd_access_destroy_object()? We could
also unwrap the detach and delay the refcount_dec, as you did in
your attaching patch.

> This is what I came up with:
> 
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 57c0e81f5073b2..e55d6e902edb98 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -758,64 +758,101 @@ void iommufd_access_destroy(struct iommufd_access *access)
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
>  
> -void iommufd_access_detach(struct iommufd_access *access)
> +static int iommufd_access_change_ioas(struct iommufd_access *access,
> +				      struct iommufd_ioas *new_ioas)
>  {
>  	struct iommufd_ioas *cur_ioas = access->ioas;
> +	int rc;
> +
> +	lockdep_assert_held(&access->ioas_lock);
> +
> +	/* We are racing with a concurrent detach, bail */
> +	if (access->ioas_unpin)
> +		return -EBUSY;

I think this should check access->ioas too? I mean:

+	/* We are racing with a concurrent detach, bail */
+	if (!access->ioas && access->ioas_unpin)
+		return -EBUSY;

Otherwise, a normal detach() would fail, since an access has both
a valid ioas and a valid ioas_unpin.

> +
> +	if (IS_ERR(new_ioas))
> +		return PTR_ERR(new_ioas);
> +
> +	if (cur_ioas == new_ioas)
> +		return 0;
>  
> -	mutex_lock(&access->ioas_lock);
> -	if (WARN_ON(!access->ioas))
> -		goto out;
>  	/*
>  	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
>  	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
>  	 */
>  	access->ioas = NULL;
> -
> -	if (access->ops->unmap) {
> +	if (cur_ioas && access->ops->unmap) {
>  		mutex_unlock(&access->ioas_lock);
>  		access->ops->unmap(access->data, 0, ULONG_MAX);
>  		mutex_lock(&access->ioas_lock);
>  	}
> +
> +	if (new_ioas) {
> +		rc = iopt_add_access(&new_ioas->iopt, access);
> +		if (rc) {
> +			iommufd_put_object(&new_ioas->obj);
> +			access->ioas = cur_ioas;
> +			return rc;
> +		}
> +		iommufd_ref_to_users(&new_ioas->obj);
> +	}
> +
> +	access->ioas = new_ioas;
> +	access->ioas_unpin = new_ioas;
>  	iopt_remove_access(&cur_ioas->iopt, access);

There was a bug in my earlier version, having the same flow by
calling iopt_add_access() prior to iopt_remove_access(). But,
doing that would override the access->iopt_access_list_id and
it would then get unset by the following iopt_remove_access().

Please refer to :
https://lore.kernel.org/linux-iommu/ZJYYWz2wy%2F86FapK@Asurada-Nvidia/

If we want a cleaner detach-then-attach flow, we would need an
atomic function in the io_pagetable.c file handling the id, yet
I couldn't figure a good naming for the atomic function since
it's about acccess shifting between two iopts other than simply
"iopt_repalce_access".

So, I came up with this version calling an iopt_remove_access()
prior to iopt_add_access(), which requires an add-back the old
ioas upon an failure at iopt_add_access(new_ioas).

I will try making some change accordingly on top of this patch.

Thanks
Nicolin
Jason Gunthorpe July 26, 2023, 11:36 p.m. UTC | #3
On Wed, Jul 26, 2023 at 01:50:28PM -0700, Nicolin Chen wrote:
> 
> > >  	rc = iopt_add_access(&new_ioas->iopt, access);
> > >  	if (rc) {
> > > -		mutex_unlock(&access->ioas_lock);
> > >  		iommufd_put_object(&new_ioas->obj);
> > > +		if (cur_ioas)
> > > +			WARN_ON(iommufd_access_change_pt(access,
> > > +							 cur_ioas->obj.id));
> > 
> > We've already dropped our ref to cur_ioas, so this is also racy with
> > destroy.
> 
> Would it be better by calling iommufd_access_detach() that holds
> the same mutex in the iommufd_access_destroy_object()? We could
> also unwrap the detach and delay the refcount_dec, as you did in
> your attaching patch.

It is better just to integrate it with this algorithm so we don't have
the refcounting issues, like I did


> 
> > This is what I came up with:
> > 
> > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > index 57c0e81f5073b2..e55d6e902edb98 100644
> > --- a/drivers/iommu/iommufd/device.c
> > +++ b/drivers/iommu/iommufd/device.c
> > @@ -758,64 +758,101 @@ void iommufd_access_destroy(struct iommufd_access *access)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
> >  
> > -void iommufd_access_detach(struct iommufd_access *access)
> > +static int iommufd_access_change_ioas(struct iommufd_access *access,
> > +				      struct iommufd_ioas *new_ioas)
> >  {
> >  	struct iommufd_ioas *cur_ioas = access->ioas;
> > +	int rc;
> > +
> > +	lockdep_assert_held(&access->ioas_lock);
> > +
> > +	/* We are racing with a concurrent detach, bail */
> > +	if (access->ioas_unpin)
> > +		return -EBUSY;
> 
> I think this should check access->ioas too? I mean:

> 
> +	/* We are racing with a concurrent detach, bail */
> +	if (!access->ioas && access->ioas_unpin)
> +		return -EBUSY;

Oh, yes, that should basically be 'cur_ioas != access->ioas_unpin' -
ie any difference means we are racing with the unmap call.

> > +	if (new_ioas) {
> > +		rc = iopt_add_access(&new_ioas->iopt, access);
> > +		if (rc) {
> > +			iommufd_put_object(&new_ioas->obj);
> > +			access->ioas = cur_ioas;
> > +			return rc;
> > +		}
> > +		iommufd_ref_to_users(&new_ioas->obj);
> > +	}
> > +
> > +	access->ioas = new_ioas;
> > +	access->ioas_unpin = new_ioas;
> >  	iopt_remove_access(&cur_ioas->iopt, access);
> 
> There was a bug in my earlier version, having the same flow by
> calling iopt_add_access() prior to iopt_remove_access(). But,
> doing that would override the access->iopt_access_list_id and
> it would then get unset by the following iopt_remove_access().

Ah, I was wondering about that order but didn't check it.

Maybe we just need to pass the ID into iopt_remove_access and keep the
right version on the stack.

> So, I came up with this version calling an iopt_remove_access()
> prior to iopt_add_access(), which requires an add-back the old
> ioas upon an failure at iopt_add_access(new_ioas).

That is also sort of reasonable if the refcounting is organized like
this does.

Jason
Nicolin Chen July 27, 2023, 2:59 a.m. UTC | #4
On Wed, Jul 26, 2023 at 08:36:31PM -0300, Jason Gunthorpe wrote:
> On Wed, Jul 26, 2023 at 01:50:28PM -0700, Nicolin Chen wrote:
> > 
> > > >  	rc = iopt_add_access(&new_ioas->iopt, access);
> > > >  	if (rc) {
> > > > -		mutex_unlock(&access->ioas_lock);
> > > >  		iommufd_put_object(&new_ioas->obj);
> > > > +		if (cur_ioas)
> > > > +			WARN_ON(iommufd_access_change_pt(access,
> > > > +							 cur_ioas->obj.id));
> > > 
> > > We've already dropped our ref to cur_ioas, so this is also racy with
> > > destroy.
> > 
> > Would it be better by calling iommufd_access_detach() that holds
> > the same mutex in the iommufd_access_destroy_object()? We could
> > also unwrap the detach and delay the refcount_dec, as you did in
> > your attaching patch.
> 
> It is better just to integrate it with this algorithm so we don't have
> the refcounting issues, like I did

OK. I will have a patch adding the iommufd_access_change_ioas
first, and it can update iommufd_access_destroy_object() too.

> > > This is what I came up with:
> > > 
> > > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > > index 57c0e81f5073b2..e55d6e902edb98 100644
> > > --- a/drivers/iommu/iommufd/device.c
> > > +++ b/drivers/iommu/iommufd/device.c
> > > @@ -758,64 +758,101 @@ void iommufd_access_destroy(struct iommufd_access *access)
> > >  }
> > >  EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
> > >  
> > > -void iommufd_access_detach(struct iommufd_access *access)
> > > +static int iommufd_access_change_ioas(struct iommufd_access *access,
> > > +				      struct iommufd_ioas *new_ioas)
> > >  {
> > >  	struct iommufd_ioas *cur_ioas = access->ioas;
> > > +	int rc;
> > > +
> > > +	lockdep_assert_held(&access->ioas_lock);
> > > +
> > > +	/* We are racing with a concurrent detach, bail */
> > > +	if (access->ioas_unpin)
> > > +		return -EBUSY;
> > 
> > I think this should check access->ioas too? I mean:
> 
> > 
> > +	/* We are racing with a concurrent detach, bail */
> > +	if (!access->ioas && access->ioas_unpin)
> > +		return -EBUSY;
> 
> Oh, yes, that should basically be 'cur_ioas != access->ioas_unpin' -
> ie any difference means we are racing with the unmap call.

Yea, will update to 'cur_ioas != access->ioas_unpin'.

> > > +	if (new_ioas) {
> > > +		rc = iopt_add_access(&new_ioas->iopt, access);
> > > +		if (rc) {
> > > +			iommufd_put_object(&new_ioas->obj);
> > > +			access->ioas = cur_ioas;
> > > +			return rc;
> > > +		}
> > > +		iommufd_ref_to_users(&new_ioas->obj);
> > > +	}
> > > +
> > > +	access->ioas = new_ioas;
> > > +	access->ioas_unpin = new_ioas;
> > >  	iopt_remove_access(&cur_ioas->iopt, access);
> > 
> > There was a bug in my earlier version, having the same flow by
> > calling iopt_add_access() prior to iopt_remove_access(). But,
> > doing that would override the access->iopt_access_list_id and
> > it would then get unset by the following iopt_remove_access().
> 
> Ah, I was wondering about that order but didn't check it.
> 
> Maybe we just need to pass the ID into iopt_remove_access and keep the
> right version on the stack.
> 
> > So, I came up with this version calling an iopt_remove_access()
> > prior to iopt_add_access(), which requires an add-back the old
> > ioas upon an failure at iopt_add_access(new_ioas).
> 
> That is also sort of reasonable if the refcounting is organized like
> this does.

I just realized that either my v8 or your version calls unmap()
first at the entire cur_ioas. So, there seems to be no point in
doing that fallback re-add routine since the cur_ioas isn't the
same, which I don't feel quite right...

Perhaps we should pass the ID into iopt_add/remove_access like
you said above. And then we attach the new_ioas, piror to the
detach the cur_ioas?

Thanks
Nicolin
Nicolin Chen July 27, 2023, 7:30 a.m. UTC | #5
On Wed, Jul 26, 2023 at 07:59:17PM -0700, Nicolin Chen wrote:
 
> > > > +	if (new_ioas) {
> > > > +		rc = iopt_add_access(&new_ioas->iopt, access);
> > > > +		if (rc) {
> > > > +			iommufd_put_object(&new_ioas->obj);
> > > > +			access->ioas = cur_ioas;
> > > > +			return rc;
> > > > +		}
> > > > +		iommufd_ref_to_users(&new_ioas->obj);
> > > > +	}
> > > > +
> > > > +	access->ioas = new_ioas;
> > > > +	access->ioas_unpin = new_ioas;
> > > >  	iopt_remove_access(&cur_ioas->iopt, access);
> > > 
> > > There was a bug in my earlier version, having the same flow by
> > > calling iopt_add_access() prior to iopt_remove_access(). But,
> > > doing that would override the access->iopt_access_list_id and
> > > it would then get unset by the following iopt_remove_access().
> > 
> > Ah, I was wondering about that order but didn't check it.
> > 
> > Maybe we just need to pass the ID into iopt_remove_access and keep the
> > right version on the stack.
> > 
> > > So, I came up with this version calling an iopt_remove_access()
> > > prior to iopt_add_access(), which requires an add-back the old
> > > ioas upon an failure at iopt_add_access(new_ioas).
> > 
> > That is also sort of reasonable if the refcounting is organized like
> > this does.
> 
> I just realized that either my v8 or your version calls unmap()
> first at the entire cur_ioas. So, there seems to be no point in
> doing that fallback re-add routine since the cur_ioas isn't the
> same, which I don't feel quite right...
> 
> Perhaps we should pass the ID into iopt_add/remove_access like
> you said above. And then we attach the new_ioas, piror to the
> detach the cur_ioas?

I sent v9 having the iopt_remove_access trick, so we can do an
iopt_remove_access only upon success. Let's continue there.

Thanks
Nic
Jason Gunthorpe July 27, 2023, 12:03 p.m. UTC | #6
On Wed, Jul 26, 2023 at 07:59:11PM -0700, Nicolin Chen wrote:

> I just realized that either my v8 or your version calls unmap()
> first at the entire cur_ioas. So, there seems to be no point in
> doing that fallback re-add routine since the cur_ioas isn't the
> same, which I don't feel quite right...

The point is to restore the access back to how it should be on failure
so future use of the accesss still does the right thing.

We already have built into this a certain non-atomicity for mdevs,
they can see a pin failure during replace if they race an access
during this unmap window. This is similar to the real HW iommu's
without atomic replace.

> Perhaps we should pass the ID into iopt_add/remove_access like
> you said above. And then we attach the new_ioas, piror to the
> detach the cur_ioas?

If it is simple this seems like the most robust

Jason
Tian, Kevin July 28, 2023, 3:45 a.m. UTC | #7
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Friday, July 28, 2023 3:04 AM
> 
> On Thu, Jul 27, 2023 at 09:03:01AM -0300, Jason Gunthorpe wrote:
> > On Wed, Jul 26, 2023 at 07:59:11PM -0700, Nicolin Chen wrote:
> >
> > > I just realized that either my v8 or your version calls unmap()
> > > first at the entire cur_ioas. So, there seems to be no point in
> > > doing that fallback re-add routine since the cur_ioas isn't the
> > > same, which I don't feel quite right...
> >
> > The point is to restore the access back to how it should be on failure
> > so future use of the accesss still does the right thing.
> >
> > We already have built into this a certain non-atomicity for mdevs,
> > they can see a pin failure during replace if they race an access
> > during this unmap window. This is similar to the real HW iommu's
> > without atomic replace.
> 
> I was concerned about, after the replace, mdev losing all the
> mappings due to the unmap() call, which means the fallback is
> not really a status quo. Do you mean that they could pin those
> lost mappings back?

None of mdev drivers does that.

but we need think about the actual usage. I don't think the user
can request ioas change w/o actually reconfiguring the mdev
device. Presumably the latter could lead to reconstructure of pinned
pages.

so in code-level as Jason said we just need ensure the access is
back to an usable state.
Nicolin Chen July 28, 2023, 4:43 a.m. UTC | #8
On Fri, Jul 28, 2023 at 03:45:39AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Friday, July 28, 2023 3:04 AM
> >
> > On Thu, Jul 27, 2023 at 09:03:01AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Jul 26, 2023 at 07:59:11PM -0700, Nicolin Chen wrote:
> > >
> > > > I just realized that either my v8 or your version calls unmap()
> > > > first at the entire cur_ioas. So, there seems to be no point in
> > > > doing that fallback re-add routine since the cur_ioas isn't the
> > > > same, which I don't feel quite right...
> > >
> > > The point is to restore the access back to how it should be on failure
> > > so future use of the accesss still does the right thing.
> > >
> > > We already have built into this a certain non-atomicity for mdevs,
> > > they can see a pin failure during replace if they race an access
> > > during this unmap window. This is similar to the real HW iommu's
> > > without atomic replace.
> >
> > I was concerned about, after the replace, mdev losing all the
> > mappings due to the unmap() call, which means the fallback is
> > not really a status quo. Do you mean that they could pin those
> > lost mappings back?
> 
> None of mdev drivers does that.
> 
> but we need think about the actual usage. I don't think the user
> can request ioas change w/o actually reconfiguring the mdev
> device. Presumably the latter could lead to reconstructure of pinned
> pages.

I can understand that the user should reconfigure the IOAS on
success. Yet, should we expect it to reconfigure on a failure
also?

Thanks!
Nic
Tian, Kevin July 28, 2023, 6:20 a.m. UTC | #9
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Friday, July 28, 2023 12:43 PM
> 
> On Fri, Jul 28, 2023 at 03:45:39AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Friday, July 28, 2023 3:04 AM
> > >
> > > On Thu, Jul 27, 2023 at 09:03:01AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Jul 26, 2023 at 07:59:11PM -0700, Nicolin Chen wrote:
> > > >
> > > > > I just realized that either my v8 or your version calls unmap()
> > > > > first at the entire cur_ioas. So, there seems to be no point in
> > > > > doing that fallback re-add routine since the cur_ioas isn't the
> > > > > same, which I don't feel quite right...
> > > >
> > > > The point is to restore the access back to how it should be on failure
> > > > so future use of the accesss still does the right thing.
> > > >
> > > > We already have built into this a certain non-atomicity for mdevs,
> > > > they can see a pin failure during replace if they race an access
> > > > during this unmap window. This is similar to the real HW iommu's
> > > > without atomic replace.
> > >
> > > I was concerned about, after the replace, mdev losing all the
> > > mappings due to the unmap() call, which means the fallback is
> > > not really a status quo. Do you mean that they could pin those
> > > lost mappings back?
> >
> > None of mdev drivers does that.
> >
> > but we need think about the actual usage. I don't think the user
> > can request ioas change w/o actually reconfiguring the mdev
> > device. Presumably the latter could lead to reconstructure of pinned
> > pages.
> 
> I can understand that the user should reconfigure the IOAS on
> success. Yet, should we expect it to reconfigure on a failure
> also?
> 

I thought the user will likely stop the device before changing IOAS
and then re-enable device DMA afterwards. If that is the typical
flow then no matter this replace request succeeds or fails the
re-enabling sequence should lead to the addition of pinned pages
back to the current IOAS.

But this does imply inconsistent behavior between success and failure.
Not sure whether it's worth a fix e.g. introducing another notifier for
mdev drivers to re-pin...
Jason Gunthorpe July 28, 2023, 12:28 p.m. UTC | #10
On Fri, Jul 28, 2023 at 06:20:56AM +0000, Tian, Kevin wrote:

> But this does imply inconsistent behavior between success and failure.
> Not sure whether it's worth a fix e.g. introducing another notifier for
> mdev drivers to re-pin...

After unmap drivers should re-establish their DMA mappings when they
are next required. It is a mdev driver bug if they don't do this..

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 072912d87f53..1015d6c42e2b 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -757,13 +757,11 @@  void iommufd_access_destroy(struct iommufd_access *access)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
 
-void iommufd_access_detach(struct iommufd_access *access)
+static void __iommufd_access_detach(struct iommufd_access *access)
 {
 	struct iommufd_ioas *cur_ioas = access->ioas;
 
-	mutex_lock(&access->ioas_lock);
-	if (WARN_ON(!access->ioas))
-		goto out;
+	lockdep_assert_held(&access->ioas_lock);
 	/*
 	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
 	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
@@ -777,44 +775,86 @@  void iommufd_access_detach(struct iommufd_access *access)
 	}
 	iopt_remove_access(&cur_ioas->iopt, access);
 	refcount_dec(&cur_ioas->obj.users);
+}
+
+void iommufd_access_detach(struct iommufd_access *access)
+{
+	mutex_lock(&access->ioas_lock);
+	if (WARN_ON(!access->ioas))
+		goto out;
+	__iommufd_access_detach(access);
 out:
 	access->ioas_unpin = NULL;
 	mutex_unlock(&access->ioas_lock);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
 
-int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
+static int iommufd_access_change_pt(struct iommufd_access *access, u32 ioas_id)
 {
+	struct iommufd_ioas *cur_ioas = access->ioas;
 	struct iommufd_ioas *new_ioas;
-	int rc = 0;
+	int rc;
 
-	mutex_lock(&access->ioas_lock);
-	if (WARN_ON(access->ioas || access->ioas_unpin)) {
-		mutex_unlock(&access->ioas_lock);
-		return -EINVAL;
-	}
+	lockdep_assert_held(&access->ioas_lock);
 
 	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
-	if (IS_ERR(new_ioas)) {
-		mutex_unlock(&access->ioas_lock);
+	if (IS_ERR(new_ioas))
 		return PTR_ERR(new_ioas);
-	}
+
+	if (cur_ioas)
+		__iommufd_access_detach(access);
 
 	rc = iopt_add_access(&new_ioas->iopt, access);
 	if (rc) {
-		mutex_unlock(&access->ioas_lock);
 		iommufd_put_object(&new_ioas->obj);
+		if (cur_ioas)
+			WARN_ON(iommufd_access_change_pt(access,
+							 cur_ioas->obj.id));
 		return rc;
 	}
 	iommufd_ref_to_users(&new_ioas->obj);
 
 	access->ioas = new_ioas;
 	access->ioas_unpin = new_ioas;
-	mutex_unlock(&access->ioas_lock);
 	return 0;
 }
+
+int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
+{
+	int rc;
+
+	mutex_lock(&access->ioas_lock);
+	if (WARN_ON(access->ioas || access->ioas_unpin)) {
+		mutex_unlock(&access->ioas_lock);
+		return -EINVAL;
+	}
+
+	rc = iommufd_access_change_pt(access, ioas_id);
+	mutex_unlock(&access->ioas_lock);
+	return rc;
+}
 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
 
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
+{
+	int rc;
+
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	if (access->ioas->obj.id == ioas_id) {
+		mutex_unlock(&access->ioas_lock);
+		return 0;
+	}
+
+	rc = iommufd_access_change_pt(access, ioas_id);
+	mutex_unlock(&access->ioas_lock);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
+
 /**
  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
  * @iopt: iopt to work on
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 0ac60256b659..ffc3a949f837 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -49,6 +49,7 @@  iommufd_access_create(struct iommufd_ctx *ictx,
 		      const struct iommufd_access_ops *ops, void *data, u32 *id);
 void iommufd_access_destroy(struct iommufd_access *access);
 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id);
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id);
 void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);