[v8,5/7] iommu/arm-smmu-v3: Add support for non-strict mode

Message ID b98f8f695178c9c9ac6e081680090d5ac97e8d7c.1537458163.git.robin.murphy@arm.com
State New
Headers show
Series
  • [v8,1/7] iommu/arm-smmu-v3: Implement flush_iotlb_all hook
Related show

Commit Message

Robin Murphy Sept. 20, 2018, 4:10 p.m.
From: Zhen Lei <thunder.leizhen@huawei.com>


Now that io-pgtable knows how to dodge strict TLB maintenance, all
that's left to do is bridge the gap between the IOMMU core requesting
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE for default domains, and showing the
appropriate IO_PGTABLE_QUIRK_NON_STRICT flag to alloc_io_pgtable_ops().

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>

[rm: convert to domain attribute, tweak commit message]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>

---

v8:
 - Use nested switches for attrs
 - Document barrier semantics

 drivers/iommu/arm-smmu-v3.c | 79 ++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 23 deletions(-)

-- 
2.19.0.dirty

Comments

Will Deacon Sept. 28, 2018, 12:19 p.m. | #1
On Thu, Sep 20, 2018 at 05:10:25PM +0100, Robin Murphy wrote:
> From: Zhen Lei <thunder.leizhen@huawei.com>

> 

> Now that io-pgtable knows how to dodge strict TLB maintenance, all

> that's left to do is bridge the gap between the IOMMU core requesting

> DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE for default domains, and showing the

> appropriate IO_PGTABLE_QUIRK_NON_STRICT flag to alloc_io_pgtable_ops().

> 

> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>

> [rm: convert to domain attribute, tweak commit message]

> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

> ---

> 

> v8:

>  - Use nested switches for attrs

>  - Document barrier semantics

> 

>  drivers/iommu/arm-smmu-v3.c | 79 ++++++++++++++++++++++++++-----------

>  1 file changed, 56 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

> index f10c852479fc..db402e8b068b 100644

> --- a/drivers/iommu/arm-smmu-v3.c

> +++ b/drivers/iommu/arm-smmu-v3.c

> @@ -612,6 +612,7 @@ struct arm_smmu_domain {

>  	struct mutex			init_mutex; /* Protects smmu pointer */

>  

>  	struct io_pgtable_ops		*pgtbl_ops;

> +	bool				non_strict;

>  

>  	enum arm_smmu_domain_stage	stage;

>  	union {

> @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)

>  		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;

>  	}

>  

> +	/*

> +	 * NOTE: when io-pgtable is in non-strict mode, we may get here with

> +	 * PTEs previously cleared by unmaps on the current CPU not yet visible

> +	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()

> +	 * to guarantee those are observed before the TLBI. Do be careful, 007.

> +	 */


Good, so you can ignore my comment on the previous patch :)

Will
Robin Murphy Sept. 28, 2018, 12:26 p.m. | #2
On 28/09/18 13:19, Will Deacon wrote:
> On Thu, Sep 20, 2018 at 05:10:25PM +0100, Robin Murphy wrote:

>> From: Zhen Lei <thunder.leizhen@huawei.com>

>>

>> Now that io-pgtable knows how to dodge strict TLB maintenance, all

>> that's left to do is bridge the gap between the IOMMU core requesting

>> DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE for default domains, and showing the

>> appropriate IO_PGTABLE_QUIRK_NON_STRICT flag to alloc_io_pgtable_ops().

>>

>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>

>> [rm: convert to domain attribute, tweak commit message]

>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

>> ---

>>

>> v8:

>>   - Use nested switches for attrs

>>   - Document barrier semantics

>>

>>   drivers/iommu/arm-smmu-v3.c | 79 ++++++++++++++++++++++++++-----------

>>   1 file changed, 56 insertions(+), 23 deletions(-)

>>

>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

>> index f10c852479fc..db402e8b068b 100644

>> --- a/drivers/iommu/arm-smmu-v3.c

>> +++ b/drivers/iommu/arm-smmu-v3.c

>> @@ -612,6 +612,7 @@ struct arm_smmu_domain {

>>   	struct mutex			init_mutex; /* Protects smmu pointer */

>>   

>>   	struct io_pgtable_ops		*pgtbl_ops;

>> +	bool				non_strict;

>>   

>>   	enum arm_smmu_domain_stage	stage;

>>   	union {

>> @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)

>>   		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;

>>   	}

>>   

>> +	/*

>> +	 * NOTE: when io-pgtable is in non-strict mode, we may get here with

>> +	 * PTEs previously cleared by unmaps on the current CPU not yet visible

>> +	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()

>> +	 * to guarantee those are observed before the TLBI. Do be careful, 007.

>> +	 */

> 

> Good, so you can ignore my comment on the previous patch :)


Well, I suppose that comment in io-pgtable *could* have explicitly noted 
that same-CPU order is dealt with elsewhere - feel free to fix it up if 
you think it would be a helpful reminder for the future.

Cheers,
Robin.
Will Deacon Sept. 28, 2018, 12:47 p.m. | #3
On Fri, Sep 28, 2018 at 01:26:00PM +0100, Robin Murphy wrote:
> On 28/09/18 13:19, Will Deacon wrote:

> > On Thu, Sep 20, 2018 at 05:10:25PM +0100, Robin Murphy wrote:

> > > From: Zhen Lei <thunder.leizhen@huawei.com>

> > > 

> > > Now that io-pgtable knows how to dodge strict TLB maintenance, all

> > > that's left to do is bridge the gap between the IOMMU core requesting

> > > DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE for default domains, and showing the

> > > appropriate IO_PGTABLE_QUIRK_NON_STRICT flag to alloc_io_pgtable_ops().

> > > 

> > > Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>

> > > [rm: convert to domain attribute, tweak commit message]

> > > Signed-off-by: Robin Murphy <robin.murphy@arm.com>

> > > ---

> > > 

> > > v8:

> > >   - Use nested switches for attrs

> > >   - Document barrier semantics

> > > 

> > >   drivers/iommu/arm-smmu-v3.c | 79 ++++++++++++++++++++++++++-----------

> > >   1 file changed, 56 insertions(+), 23 deletions(-)

> > > 

> > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

> > > index f10c852479fc..db402e8b068b 100644

> > > --- a/drivers/iommu/arm-smmu-v3.c

> > > +++ b/drivers/iommu/arm-smmu-v3.c

> > > @@ -612,6 +612,7 @@ struct arm_smmu_domain {

> > >   	struct mutex			init_mutex; /* Protects smmu pointer */

> > >   	struct io_pgtable_ops		*pgtbl_ops;

> > > +	bool				non_strict;

> > >   	enum arm_smmu_domain_stage	stage;

> > >   	union {

> > > @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)

> > >   		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;

> > >   	}

> > > +	/*

> > > +	 * NOTE: when io-pgtable is in non-strict mode, we may get here with

> > > +	 * PTEs previously cleared by unmaps on the current CPU not yet visible

> > > +	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()

> > > +	 * to guarantee those are observed before the TLBI. Do be careful, 007.

> > > +	 */

> > 

> > Good, so you can ignore my comment on the previous patch :)

> 

> Well, I suppose that comment in io-pgtable *could* have explicitly noted

> that same-CPU order is dealt with elsewhere - feel free to fix it up if you

> think it would be a helpful reminder for the future.


I think I'll move it into the documentation for the new attribute, so that
any driver authors wanting to enable lazy invalidation know that they need
to provide this guarantee in their full TLB invalidation callback.

Will
Will Deacon Sept. 28, 2018, 1:55 p.m. | #4
On Fri, Sep 28, 2018 at 01:47:04PM +0100, Will Deacon wrote:
> On Fri, Sep 28, 2018 at 01:26:00PM +0100, Robin Murphy wrote:

> > On 28/09/18 13:19, Will Deacon wrote:

> > > On Thu, Sep 20, 2018 at 05:10:25PM +0100, Robin Murphy wrote:

> > > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

> > > > index f10c852479fc..db402e8b068b 100644

> > > > --- a/drivers/iommu/arm-smmu-v3.c

> > > > +++ b/drivers/iommu/arm-smmu-v3.c

> > > > @@ -612,6 +612,7 @@ struct arm_smmu_domain {

> > > >   	struct mutex			init_mutex; /* Protects smmu pointer */

> > > >   	struct io_pgtable_ops		*pgtbl_ops;

> > > > +	bool				non_strict;

> > > >   	enum arm_smmu_domain_stage	stage;

> > > >   	union {

> > > > @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)

> > > >   		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;

> > > >   	}

> > > > +	/*

> > > > +	 * NOTE: when io-pgtable is in non-strict mode, we may get here with

> > > > +	 * PTEs previously cleared by unmaps on the current CPU not yet visible

> > > > +	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()

> > > > +	 * to guarantee those are observed before the TLBI. Do be careful, 007.

> > > > +	 */

> > > 

> > > Good, so you can ignore my comment on the previous patch :)

> > 

> > Well, I suppose that comment in io-pgtable *could* have explicitly noted

> > that same-CPU order is dealt with elsewhere - feel free to fix it up if you

> > think it would be a helpful reminder for the future.

> 

> I think I'll move it into the documentation for the new attribute, so that

> any driver authors wanting to enable lazy invalidation know that they need

> to provide this guarantee in their full TLB invalidation callback.


Hmm, so I started doing this but then realised we already required this
behaviour for tlb_add_flush() afaict. That would mean that mainline
currently has a bug for arm-smmu.c, because we use the _relaxed I/O
accessors in there and there's no DSB after clearing the PTE on unmap().

Am I missing something?

Will
Robin Murphy Sept. 28, 2018, 2:01 p.m. | #5
On 28/09/18 14:55, Will Deacon wrote:
> On Fri, Sep 28, 2018 at 01:47:04PM +0100, Will Deacon wrote:

>> On Fri, Sep 28, 2018 at 01:26:00PM +0100, Robin Murphy wrote:

>>> On 28/09/18 13:19, Will Deacon wrote:

>>>> On Thu, Sep 20, 2018 at 05:10:25PM +0100, Robin Murphy wrote:

>>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

>>>>> index f10c852479fc..db402e8b068b 100644

>>>>> --- a/drivers/iommu/arm-smmu-v3.c

>>>>> +++ b/drivers/iommu/arm-smmu-v3.c

>>>>> @@ -612,6 +612,7 @@ struct arm_smmu_domain {

>>>>>    	struct mutex			init_mutex; /* Protects smmu pointer */

>>>>>    	struct io_pgtable_ops		*pgtbl_ops;

>>>>> +	bool				non_strict;

>>>>>    	enum arm_smmu_domain_stage	stage;

>>>>>    	union {

>>>>> @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)

>>>>>    		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;

>>>>>    	}

>>>>> +	/*

>>>>> +	 * NOTE: when io-pgtable is in non-strict mode, we may get here with

>>>>> +	 * PTEs previously cleared by unmaps on the current CPU not yet visible

>>>>> +	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()

>>>>> +	 * to guarantee those are observed before the TLBI. Do be careful, 007.

>>>>> +	 */

>>>>

>>>> Good, so you can ignore my comment on the previous patch :)

>>>

>>> Well, I suppose that comment in io-pgtable *could* have explicitly noted

>>> that same-CPU order is dealt with elsewhere - feel free to fix it up if you

>>> think it would be a helpful reminder for the future.

>>

>> I think I'll move it into the documentation for the new attribute, so that

>> any driver authors wanting to enable lazy invalidation know that they need

>> to provide this guarantee in their full TLB invalidation callback.

> 

> Hmm, so I started doing this but then realised we already required this

> behaviour for tlb_add_flush() afaict. That would mean that mainline

> currently has a bug for arm-smmu.c, because we use the _relaxed I/O

> accessors in there and there's no DSB after clearing the PTE on unmap().

> 

> Am I missing something?


Argh, no - I started having the same suspicion when changing those 
writel()s in patch #7, resolved to either mention it to you or 
investigate it myself as a separate fix, then promptly forgot entirely. 
Thanks for the reminder...

Robin.

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f10c852479fc..db402e8b068b 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -612,6 +612,7 @@  struct arm_smmu_domain {
 	struct mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
+	bool				non_strict;
 
 	enum arm_smmu_domain_stage	stage;
 	union {
@@ -1407,6 +1408,12 @@  static void arm_smmu_tlb_inv_context(void *cookie)
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
 	}
 
+	/*
+	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
+	 * PTEs previously cleared by unmaps on the current CPU not yet visible
+	 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
+	 * to guarantee those are observed before the TLBI. Do be careful, 007.
+	 */
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 	__arm_smmu_tlb_sync(smmu);
 }
@@ -1633,6 +1640,9 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 
+	if (smmu_domain->non_strict)
+		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
@@ -1934,15 +1944,27 @@  static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
-		return 0;
+	switch (domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch (attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			*(int *)data = smmu_domain->non_strict;
+			return 0;
+		default:
+			return -ENODEV;
+		}
+		break;
 	default:
-		return -ENODEV;
+		return -EINVAL;
 	}
 }
 
@@ -1952,26 +1974,37 @@  static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
 	int ret = 0;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-		return -EINVAL;
-
 	mutex_lock(&smmu_domain->init_mutex);
 
-	switch (attr) {
-	case DOMAIN_ATTR_NESTING:
-		if (smmu_domain->smmu) {
-			ret = -EPERM;
-			goto out_unlock;
+	switch (domain->type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		switch (attr) {
+		case DOMAIN_ATTR_NESTING:
+			if (smmu_domain->smmu) {
+				ret = -EPERM;
+				goto out_unlock;
+			}
+
+			if (*(int *)data)
+				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+			else
+				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+			break;
+		default:
+			ret = -ENODEV;
+		}
+		break;
+	case IOMMU_DOMAIN_DMA:
+		switch(attr) {
+		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+			smmu_domain->non_strict = *(int *)data;
+			break;
+		default:
+			ret = -ENODEV;
 		}
-
-		if (*(int *)data)
-			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
-		else
-			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
 		break;
 	default:
-		ret = -ENODEV;
+		ret = -EINVAL;
 	}
 
 out_unlock: