Message ID | 5-v2-621370057090+91fec-smmuv3_nesting_jgg@nvidia.com |
---|---|
State | New |
Headers | show |
Series | Initial support for SMMUv3 nested translation | expand |
Hi Jason, On Tue, Aug 27, 2024 at 12:51:35PM -0300, Jason Gunthorpe wrote: > HW with CANWBS is always cache coherent and ignores PCI No Snoop requests > as well. This meets the requirement for IOMMU_CAP_ENFORCE_CACHE_COHERENCY, > so let's return it. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Mostafa Saleh <smostafa@google.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 +++++++++++++++++++++ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + > 2 files changed, 36 insertions(+) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index e2b97ad6d74b03..c2021e821e5cb6 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -2253,6 +2253,9 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) > case IOMMU_CAP_CACHE_COHERENCY: > /* Assume that a coherent TCU implies coherent TBUs */ > return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; > + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: > + return dev_iommu_fwspec_get(dev)->flags & > + IOMMU_FWSPEC_PCI_RC_CANWBS; > case IOMMU_CAP_NOEXEC: > case IOMMU_CAP_DEFERRED_FLUSH: > return true; > @@ -2263,6 +2266,28 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) > } > } > > +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) > +{ > + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > + struct arm_smmu_master_domain *master_domain; > + unsigned long flags; > + bool ret = false; nit: we can avoid the goto, if we inverse the logic of ret (and set it to false if device doesn't support CANWBS) > + spin_lock_irqsave(&smmu_domain->devices_lock, flags); > + list_for_each_entry(master_domain, &smmu_domain->devices, > + devices_elm) { > + if (!(dev_iommu_fwspec_get(master_domain->master->dev)->flags & > + IOMMU_FWSPEC_PCI_RC_CANWBS)) > + goto out; > + } > + > + smmu_domain->enforce_cache_coherency = true; > + ret = true; > +out: > + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > + return ret; > +} > + > struct arm_smmu_domain *arm_smmu_domain_alloc(void) > { > struct arm_smmu_domain *smmu_domain; > @@ -2693,6 +2718,15 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, > * one of them. > */ > spin_lock_irqsave(&smmu_domain->devices_lock, flags); > + if (smmu_domain->enforce_cache_coherency && > + !(dev_iommu_fwspec_get(master->dev)->flags & > + IOMMU_FWSPEC_PCI_RC_CANWBS)) { > + kfree(master_domain); > + spin_unlock_irqrestore(&smmu_domain->devices_lock, > + flags); > + return -EINVAL; > + } > + > if (state->ats_enabled) > atomic_inc(&smmu_domain->nr_ats_masters); > list_add(&master_domain->devices_elm, &smmu_domain->devices); > @@ -3450,6 +3484,7 @@ static struct iommu_ops arm_smmu_ops = { > .owner = THIS_MODULE, > .default_domain_ops = &(const struct iommu_domain_ops) { > .attach_dev = arm_smmu_attach_dev, > + .enforce_cache_coherency = arm_smmu_enforce_cache_coherency, > .set_dev_pasid = arm_smmu_s1_set_dev_pasid, > .map_pages = arm_smmu_map_pages, > .unmap_pages = arm_smmu_unmap_pages, > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index 7e8d2f36faebf3..45882f65bfcad0 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -787,6 +787,7 @@ struct arm_smmu_domain { > /* List of struct arm_smmu_master_domain */ > struct list_head devices; > spinlock_t devices_lock; > + bool enforce_cache_coherency : 1; > > struct mmu_notifier mmu_notifier; > }; > -- > 2.46.0 >
On Fri, Aug 30, 2024 at 03:19:16PM +0000, Mostafa Saleh wrote: > > @@ -2263,6 +2266,28 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) > > } > > } > > > > +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) > > +{ > > + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > + struct arm_smmu_master_domain *master_domain; > > + unsigned long flags; > > + bool ret = false; > nit: we can avoid the goto, if we inverse the logic of ret (and set it > to false if device doesn't support CANWBS) Yeah, that is tidier: static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master_domain *master_domain; unsigned long flags; bool ret = true; spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (!arm_smmu_master_canwbs(master_domain->master)) { ret = false; break; } } smmu_domain->enforce_cache_coherency = ret; spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); return ret; } Thanks, Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e2b97ad6d74b03..c2021e821e5cb6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2253,6 +2253,9 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_CACHE_COHERENCY: /* Assume that a coherent TCU implies coherent TBUs */ return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return dev_iommu_fwspec_get(dev)->flags & + IOMMU_FWSPEC_PCI_RC_CANWBS; case IOMMU_CAP_NOEXEC: case IOMMU_CAP_DEFERRED_FLUSH: return true; @@ -2263,6 +2266,28 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master_domain *master_domain; + unsigned long flags; + bool ret = false; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + if (!(dev_iommu_fwspec_get(master_domain->master->dev)->flags & + IOMMU_FWSPEC_PCI_RC_CANWBS)) + goto out; + } + + smmu_domain->enforce_cache_coherency = true; + ret = true; +out: + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + return ret; +} + struct arm_smmu_domain *arm_smmu_domain_alloc(void) { struct arm_smmu_domain *smmu_domain; @@ -2693,6 +2718,15 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * one of them. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); + if (smmu_domain->enforce_cache_coherency && + !(dev_iommu_fwspec_get(master->dev)->flags & + IOMMU_FWSPEC_PCI_RC_CANWBS)) { + kfree(master_domain); + spin_unlock_irqrestore(&smmu_domain->devices_lock, + flags); + return -EINVAL; + } + if (state->ats_enabled) atomic_inc(&smmu_domain->nr_ats_masters); list_add(&master_domain->devices_elm, &smmu_domain->devices); @@ -3450,6 +3484,7 @@ static struct iommu_ops arm_smmu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, + .enforce_cache_coherency = arm_smmu_enforce_cache_coherency, .set_dev_pasid = arm_smmu_s1_set_dev_pasid, .map_pages = arm_smmu_map_pages, .unmap_pages = arm_smmu_unmap_pages, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 7e8d2f36faebf3..45882f65bfcad0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -787,6 +787,7 @@ struct arm_smmu_domain { /* List of struct arm_smmu_master_domain */ struct list_head devices; spinlock_t devices_lock; + bool enforce_cache_coherency : 1; struct mmu_notifier mmu_notifier; };
HW with CANWBS is always cache coherent and ignores PCI No Snoop requests as well. This meets the requirement for IOMMU_CAP_ENFORCE_CACHE_COHERENCY, so let's return it. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 36 insertions(+)