diff mbox

[2/5] iommu/arm-smmu: add support for PCI master devices

Message ID 1404125530-17984-3-git-send-email-will.deacon@arm.com
State Accepted
Commit a9a1b0b53d8b7ca60abef0687eae927f286f07c2
Headers show

Commit Message

Will Deacon June 30, 2014, 10:52 a.m. UTC
This patch extends the ARM SMMU driver so that it can handle PCI master
devices in addition to platform devices described in the device tree.

The driver is informed about the PCI host controller in the DT via a
phandle to the host controller in the mmu-masters property. The host
controller is then added to the master tree for that SMMU, just like a
normal master (although it probably doesn't advertise any StreamIDs).

When a device is added to the PCI bus, we set the archdata.iommu pointer
for that device to describe its StreamID (actually its RequesterID for
the moment). This allows us to re-use our existing data structures using
the host controller of_node for everything apart from StreamID
configuration, where we reach into the archdata for the information we
require.

Cc: Varun Sethi <varun.sethi@freescale.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 242 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 156 insertions(+), 86 deletions(-)

Comments

Varun Sethi July 3, 2014, 2:22 p.m. UTC | #1
Hi Will,

> -----Original Message-----
> From: Will Deacon [mailto:will.deacon@arm.com]
> Sent: Monday, June 30, 2014 4:22 PM
> To: linux-arm-kernel@lists.infradead.org; iommu@lists.linux-
> foundation.org
> Cc: thierry.reding@gmail.com; arnd@arndb.de; ohaugan@codeaurora.org;
> Sethi Varun-B16395; joro@8bytes.org; a.motakis@virtualopensystems.com;
> marc.zyngier@arm.com; Will Deacon; Sethi Varun-B16395
> Subject: [PATCH 2/5] iommu/arm-smmu: add support for PCI master devices
> 
> This patch extends the ARM SMMU driver so that it can handle PCI master
> devices in addition to platform devices described in the device tree.
> 
> The driver is informed about the PCI host controller in the DT via a
> phandle to the host controller in the mmu-masters property. The host
> controller is then added to the master tree for that SMMU, just like a
> normal master (although it probably doesn't advertise any StreamIDs).
> 
> When a device is added to the PCI bus, we set the archdata.iommu pointer
> for that device to describe its StreamID (actually its RequesterID for
> the moment). This allows us to re-use our existing data structures using
> the host controller of_node for everything apart from StreamID
> configuration, where we reach into the archdata for the information we
> require.
> 
> Cc: Varun Sethi <varun.sethi@freescale.com>
> Signed-off-by: Will Deacon <will.deacon@arm.com>
> ---
>  drivers/iommu/arm-smmu.c | 242 ++++++++++++++++++++++++++++++-----------
> ------
>  1 file changed, 156 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index
> 81e8ec290756..e4eebc50612c 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -39,6 +39,7 @@
>  #include <linux/mm.h>
>  #include <linux/module.h>
>  #include <linux/of.h>
> +#include <linux/pci.h>
>  #include <linux/platform_device.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
> @@ -329,14 +330,7 @@ struct arm_smmu_smr {
>  	u16				id;
>  };
> 
> -struct arm_smmu_master {
> -	struct device_node		*of_node;
> -
> -	/*
> -	 * The following is specific to the master's position in the
> -	 * SMMU chain.
> -	 */
> -	struct rb_node			node;
> +struct arm_smmu_master_cfg {
>  	int				num_streamids;
>  	u16				streamids[MAX_MASTER_STREAMIDS];
> 
> @@ -347,6 +341,17 @@ struct arm_smmu_master {
>  	struct arm_smmu_smr		*smrs;
>  };
> 
> +struct arm_smmu_master {
> +	struct device_node		*of_node;
> +
> +	/*
> +	 * The following is specific to the master's position in the
> +	 * SMMU chain.
> +	 */
> +	struct rb_node			node;
> +	struct arm_smmu_master_cfg	cfg;
> +};
> +
>  struct arm_smmu_device {
>  	struct device			*dev;
>  	struct device_node		*parent_of_node;
> @@ -437,6 +442,18 @@ static void parse_driver_options(struct
> arm_smmu_device *smmu)
>  	} while (arm_smmu_options[++i].opt);
>  }
> 
> +static struct device *dev_get_master_dev(struct device *dev) {
> +	if (dev_is_pci(dev)) {
> +		struct pci_bus *bus = to_pci_dev(dev)->bus;
> +		while (!pci_is_root_bus(bus))
> +			bus = bus->parent;
> +		return bus->bridge->parent;
> +	}
> +
> +	return dev;
> +}
> +
>  static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device
> *smmu,
>  						struct device_node *dev_node)
>  {
> @@ -457,6 +474,18 @@ static struct arm_smmu_master
> *find_smmu_master(struct arm_smmu_device *smmu,
>  	return NULL;
>  }
> 
> +static struct arm_smmu_master_cfg *
> +find_smmu_master_cfg(struct arm_smmu_device *smmu, struct device *dev)
> +{
> +	struct arm_smmu_master *master;
> +
> +	if (dev_is_pci(dev))
> +		return dev->archdata.iommu;
> +
> +	master = find_smmu_master(smmu, dev_get_master_dev(dev)->of_node);
[Sethi Varun-B16395] Why use dev_get_master_dev over here? You can simply use dev.

> +	return master ? &master->cfg : NULL;
> +}
> +
>  static int insert_smmu_master(struct arm_smmu_device *smmu,
>  			      struct arm_smmu_master *master)  { @@ -508,11
> +537,11 @@ static int register_smmu_master(struct arm_smmu_device *smmu,
>  	if (!master)
>  		return -ENOMEM;
> 
> -	master->of_node		= masterspec->np;
> -	master->num_streamids	= masterspec->args_count;
> +	master->of_node			= masterspec->np;
> +	master->cfg.num_streamids	= masterspec->args_count;
> 
> -	for (i = 0; i < master->num_streamids; ++i)
> -		master->streamids[i] = masterspec->args[i];
> +	for (i = 0; i < master->cfg.num_streamids; ++i)
> +		master->cfg.streamids[i] = masterspec->args[i];
> 
>  	return insert_smmu_master(smmu, master);  } @@ -537,6 +566,42 @@
> out_unlock:
>  	return parent;
>  }
> 
> +static struct arm_smmu_device *find_parent_smmu_for_device(struct
> +device *dev) {
> +	struct arm_smmu_device *child, *parent, *smmu;
> +	struct arm_smmu_master *master = NULL;
> +	struct device_node *dev_node = dev_get_master_dev(dev)->of_node;
> +
> +	spin_lock(&arm_smmu_devices_lock);
> +	list_for_each_entry(parent, &arm_smmu_devices, list) {
> +		smmu = parent;
> +
> +		/* Try to find a child of the current SMMU. */
> +		list_for_each_entry(child, &arm_smmu_devices, list) {
> +			if (child->parent_of_node == parent->dev->of_node) {
> +				/* Does the child sit above our master? */
> +				master = find_smmu_master(child, dev_node);
> +				if (master) {
> +					smmu = NULL;
> +					break;
> +				}
> +			}
> +		}
> +
> +		/* We found some children, so keep searching. */
> +		if (!smmu) {
> +			master = NULL;
> +			continue;
> +		}
> +
> +		master = find_smmu_master(smmu, dev_node);
> +		if (master)
> +			break;
> +	}
> +	spin_unlock(&arm_smmu_devices_lock);
> +	return master ? smmu : NULL;
> +}
> +
>  static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int
> end)  {
>  	int idx;
> @@ -855,7 +920,8 @@ static void arm_smmu_init_context_bank(struct
> arm_smmu_domain *smmu_domain)  }
> 
>  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
> -					struct device *dev)
> +					struct device *dev,
> +					struct arm_smmu_device *device_smmu)
>  {
>  	int irq, ret, start;
>  	struct arm_smmu_domain *smmu_domain = domain->priv; @@ -868,15
> +934,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain
> *domain,
>  	 * early, and therefore check that the root SMMU does indeed have
>  	 * a StreamID for the master in question.
>  	 */
> -	parent = dev->archdata.iommu;
> +	parent = device_smmu;
>  	smmu_domain->output_mask = -1;
>  	do {
>  		smmu = parent;
>  		smmu_domain->output_mask &= (1ULL << smmu->s2_output_size) -
> 1;
>  	} while ((parent = find_parent_smmu(smmu)));
> 
> -	if (!find_smmu_master(smmu, dev->of_node)) {
> -		dev_err(dev, "unable to find root SMMU for device\n");
> +	if (!find_smmu_master_cfg(smmu, dev)) {
> +		dev_err(dev, "unable to find root SMMU config for device\n");
>  		return -ENODEV;
>  	}
> 
> @@ -920,7 +986,8 @@ static int arm_smmu_init_domain_context(struct
> iommu_domain *domain,
> 
>  	root_cfg->smmu = smmu;
>  	arm_smmu_init_context_bank(smmu_domain);
> -	return ret;
> +	smmu_domain->leaf_smmu = device_smmu;
> +	return 0;
> 
>  out_free_context:
>  	__arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); @@ -
> 1056,7 +1123,7 @@ static void arm_smmu_domain_destroy(struct iommu_domain
> *domain)  }
> 
>  static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
> -					  struct arm_smmu_master *master)
> +					  struct arm_smmu_master_cfg *cfg)
>  {
>  	int i;
>  	struct arm_smmu_smr *smrs;
> @@ -1065,18 +1132,18 @@ static int arm_smmu_master_configure_smrs(struct
> arm_smmu_device *smmu,
>  	if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
>  		return 0;
> 
> -	if (master->smrs)
> +	if (cfg->smrs)
>  		return -EEXIST;
> 
> -	smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL);
> +	smrs = kmalloc(sizeof(*smrs) * cfg->num_streamids, GFP_KERNEL);
>  	if (!smrs) {
> -		dev_err(smmu->dev, "failed to allocate %d SMRs for master
> %s\n",
> -			master->num_streamids, master->of_node->name);
> +		dev_err(smmu->dev, "failed to allocate %d SMRs\n",
> +			cfg->num_streamids);
>  		return -ENOMEM;
>  	}
> 
>  	/* Allocate the SMRs on the root SMMU */
> -	for (i = 0; i < master->num_streamids; ++i) {
> +	for (i = 0; i < cfg->num_streamids; ++i) {
>  		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
>  						  smmu->num_mapping_groups);
>  		if (IS_ERR_VALUE(idx)) {
> @@ -1087,18 +1154,18 @@ static int arm_smmu_master_configure_smrs(struct
> arm_smmu_device *smmu,
>  		smrs[i] = (struct arm_smmu_smr) {
>  			.idx	= idx,
>  			.mask	= 0, /* We don't currently share SMRs */
> -			.id	= master->streamids[i],
> +			.id	= cfg->streamids[i],
>  		};
>  	}
> 
>  	/* It worked! Now, poke the actual hardware */
> -	for (i = 0; i < master->num_streamids; ++i) {
> +	for (i = 0; i < cfg->num_streamids; ++i) {
>  		u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
>  			  smrs[i].mask << SMR_MASK_SHIFT;
>  		writel_relaxed(reg, gr0_base +
> ARM_SMMU_GR0_SMR(smrs[i].idx));
>  	}
> 
> -	master->smrs = smrs;
> +	cfg->smrs = smrs;
>  	return 0;
> 
>  err_free_smrs:
> @@ -1109,44 +1176,44 @@ err_free_smrs:
>  }
> 
>  static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
> -				      struct arm_smmu_master *master)
> +				      struct arm_smmu_master_cfg *cfg)
>  {
>  	int i;
>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
> -	struct arm_smmu_smr *smrs = master->smrs;
> +	struct arm_smmu_smr *smrs = cfg->smrs;
> 
>  	/* Invalidate the SMRs before freeing back to the allocator */
> -	for (i = 0; i < master->num_streamids; ++i) {
> +	for (i = 0; i < cfg->num_streamids; ++i) {
>  		u8 idx = smrs[i].idx;
>  		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
>  		__arm_smmu_free_bitmap(smmu->smr_map, idx);
>  	}
> 
> -	master->smrs = NULL;
> +	cfg->smrs = NULL;
>  	kfree(smrs);
>  }
> 
>  static void arm_smmu_bypass_stream_mapping(struct arm_smmu_device *smmu,
> -					   struct arm_smmu_master *master)
> +					   struct arm_smmu_master_cfg *cfg)
>  {
>  	int i;
>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
> 
> -	for (i = 0; i < master->num_streamids; ++i) {
> -		u16 sid = master->streamids[i];
> +	for (i = 0; i < cfg->num_streamids; ++i) {
> +		u16 sid = cfg->streamids[i];
>  		writel_relaxed(S2CR_TYPE_BYPASS,
>  			       gr0_base + ARM_SMMU_GR0_S2CR(sid));
>  	}
>  }
> 
>  static int arm_smmu_domain_add_master(struct arm_smmu_domain
> *smmu_domain,
> -				      struct arm_smmu_master *master)
> +				      struct arm_smmu_master_cfg *cfg)
>  {
>  	int i, ret;
>  	struct arm_smmu_device *parent, *smmu = smmu_domain->root_cfg.smmu;
>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
> 
> -	ret = arm_smmu_master_configure_smrs(smmu, master);
> +	ret = arm_smmu_master_configure_smrs(smmu, cfg);
>  	if (ret)
>  		return ret;
> 
> @@ -1161,14 +1228,14 @@ static int arm_smmu_domain_add_master(struct
> arm_smmu_domain *smmu_domain,
>  		if (smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)
>  			continue;
> 
> -		arm_smmu_bypass_stream_mapping(smmu, master);
> +		arm_smmu_bypass_stream_mapping(smmu, cfg);
>  		smmu = parent;
>  	}
> 
>  	/* Now we're at the root, time to point at our context bank */
> -	for (i = 0; i < master->num_streamids; ++i) {
> +	for (i = 0; i < cfg->num_streamids; ++i) {
>  		u32 idx, s2cr;
> -		idx = master->smrs ? master->smrs[i].idx : master-
> >streamids[i];
> +		idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
>  		s2cr = S2CR_TYPE_TRANS |
>  		       (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT);
>  		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx)); @@ -
> 1178,7 +1245,7 @@ static int arm_smmu_domain_add_master(struct
> arm_smmu_domain *smmu_domain,  }
> 
>  static void arm_smmu_domain_remove_master(struct arm_smmu_domain
> *smmu_domain,
> -					  struct arm_smmu_master *master)
> +					  struct arm_smmu_master_cfg *cfg)
>  {
>  	struct arm_smmu_device *smmu = smmu_domain->root_cfg.smmu;
> 
> @@ -1186,18 +1253,19 @@ static void arm_smmu_domain_remove_master(struct
> arm_smmu_domain *smmu_domain,
>  	 * We *must* clear the S2CR first, because freeing the SMR means
>  	 * that it can be re-allocated immediately.
>  	 */
> -	arm_smmu_bypass_stream_mapping(smmu, master);
> -	arm_smmu_master_free_smrs(smmu, master);
> +	arm_smmu_bypass_stream_mapping(smmu, cfg);
> +	arm_smmu_master_free_smrs(smmu, cfg);
>  }
> 
>  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct
> device *dev)  {
>  	int ret = -EINVAL;
>  	struct arm_smmu_domain *smmu_domain = domain->priv;
> -	struct arm_smmu_device *device_smmu = dev->archdata.iommu;
> -	struct arm_smmu_master *master;
> +	struct arm_smmu_device *device_smmu;
> +	struct arm_smmu_master_cfg *cfg;
>  	unsigned long flags;
> 
> +	device_smmu = dev_get_master_dev(dev)->archdata.iommu;
>  	if (!device_smmu) {
>  		dev_err(dev, "cannot attach to SMMU, is it on the same
> bus?\n");
>  		return -ENXIO;
> @@ -1210,11 +1278,9 @@ static int arm_smmu_attach_dev(struct iommu_domain
> *domain, struct device *dev)
>  	spin_lock_irqsave(&smmu_domain->lock, flags);
>  	if (!smmu_domain->leaf_smmu) {
>  		/* Now that we have a master, we can finalise the domain */
> -		ret = arm_smmu_init_domain_context(domain, dev);
> +		ret = arm_smmu_init_domain_context(domain, dev, device_smmu);
>  		if (IS_ERR_VALUE(ret))
>  			goto err_unlock;
> -
> -		smmu_domain->leaf_smmu = device_smmu;
>  	} else if (smmu_domain->leaf_smmu != device_smmu) {
>  		dev_err(dev,
>  			"cannot attach to SMMU %s whilst already attached to
> domain on SMMU %s\n", @@ -1225,11 +1291,11 @@ static int
> arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	spin_unlock_irqrestore(&smmu_domain->lock, flags);
> 
>  	/* Looks ok, so add the device to the domain */
> -	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
> -	if (!master)
> +	cfg = find_smmu_master_cfg(smmu_domain->leaf_smmu, dev);
> +	if (!cfg)
>  		return -ENODEV;
> 
> -	return arm_smmu_domain_add_master(smmu_domain, master);
> +	return arm_smmu_domain_add_master(smmu_domain, cfg);
> 
>  err_unlock:
>  	spin_unlock_irqrestore(&smmu_domain->lock, flags); @@ -1239,11
> +1305,11 @@ err_unlock:
>  static void arm_smmu_detach_dev(struct iommu_domain *domain, struct
> device *dev)  {
>  	struct arm_smmu_domain *smmu_domain = domain->priv;
> -	struct arm_smmu_master *master;
> +	struct arm_smmu_master_cfg *cfg;
> 
> -	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
> -	if (master)
> -		arm_smmu_domain_remove_master(smmu_domain, master);
> +	cfg = find_smmu_master_cfg(smmu_domain->leaf_smmu, dev);
> +	if (cfg)
> +		arm_smmu_domain_remove_master(smmu_domain, cfg);
>  }
> 
>  static bool arm_smmu_pte_is_contiguous_range(unsigned long addr, @@ -
> 1549,10 +1615,15 @@ static int arm_smmu_domain_has_cap(struct
> iommu_domain *domain,
>  	return !!(cap & caps);
>  }
> 
> +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void
> +*data) {
> +	*((u16 *)data) = alias;
> +	return 0; /* Continue walking */
> +}
> +
>  static int arm_smmu_add_device(struct device *dev)  {
> -	struct arm_smmu_device *child, *parent, *smmu;
> -	struct arm_smmu_master *master = NULL;
> +	struct arm_smmu_device *smmu;
>  	struct iommu_group *group;
>  	int ret;
> 
> @@ -1561,35 +1632,8 @@ static int arm_smmu_add_device(struct device *dev)
>  		return -EINVAL;
>  	}
> 
> -	spin_lock(&arm_smmu_devices_lock);
> -	list_for_each_entry(parent, &arm_smmu_devices, list) {
> -		smmu = parent;
> -
> -		/* Try to find a child of the current SMMU. */
> -		list_for_each_entry(child, &arm_smmu_devices, list) {
> -			if (child->parent_of_node == parent->dev->of_node) {
> -				/* Does the child sit above our master? */
> -				master = find_smmu_master(child, dev->of_node);
> -				if (master) {
> -					smmu = NULL;
> -					break;
> -				}
> -			}
> -		}
> -
> -		/* We found some children, so keep searching. */
> -		if (!smmu) {
> -			master = NULL;
> -			continue;
> -		}
> -
> -		master = find_smmu_master(smmu, dev->of_node);
> -		if (master)
> -			break;
> -	}
> -	spin_unlock(&arm_smmu_devices_lock);
> -
> -	if (!master)
> +	smmu = find_parent_smmu_for_device(dev);
> +	if (!smmu)
>  		return -ENODEV;
> 
>  	group = iommu_group_alloc();
> @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> *dev)
>  		return PTR_ERR(group);
>  	}
> 
> +	if (dev_is_pci(dev)) {
> +		struct arm_smmu_master_cfg *cfg;
> +		struct pci_dev *pdev = to_pci_dev(dev);
> +
> +		cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> +		if (!cfg) {
> +			ret = -ENOMEM;
> +			goto out_put_group;
> +		}
> +
> +		cfg->num_streamids = 1;
> +		pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> +				       &cfg->streamids[0]);
[Sethi Varun-B16395] We need to look for upstream DMA device. We should be using pci_find_dma_isolation_root here. Also, this would also imply that there could be multiple devices sharing the same stream ID. So, we should check if a particular stream ID value has already been configured in the SMR registers.

-Varun
Will Deacon July 3, 2014, 2:43 p.m. UTC | #2
On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> Hi Will,

Hi Varun,

Thanks for taking a look at this!

> > +static struct arm_smmu_master_cfg *
> > +find_smmu_master_cfg(struct arm_smmu_device *smmu, struct device *dev)
> > +{
> > +     struct arm_smmu_master *master;
> > +
> > +     if (dev_is_pci(dev))
> > +             return dev->archdata.iommu;
> > +
> > +     master = find_smmu_master(smmu, dev_get_master_dev(dev)->of_node);
> [Sethi Varun-B16395] Why use dev_get_master_dev over here? You can simply use dev.

True; we've already done the PCI check above. I'll tidy this up.

> > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > *dev)
> >               return PTR_ERR(group);
> >       }
> >
> > +     if (dev_is_pci(dev)) {
> > +             struct arm_smmu_master_cfg *cfg;
> > +             struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > +             if (!cfg) {
> > +                     ret = -ENOMEM;
> > +                     goto out_put_group;
> > +             }
> > +
> > +             cfg->num_streamids = 1;
> > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > +                                    &cfg->streamids[0]);
> [Sethi Varun-B16395] We need to look for upstream DMA device. We should be
> using pci_find_dma_isolation_root here. Also, this would also imply that
> there could be multiple devices sharing the same stream ID. So, we should
> check if a particular stream ID value has already been configured in the
> SMR registers.

That function doesn't seem to appear in mainline or next. I can move to it
when it's available, but in the meantime the above is working for me. I'm
making the assumption here that the system is configured so that there
aren't any duplicate stream IDs. What we actually need is a function here
which maps the requester ID to a stream ID using firmware tables (provided
by DT or ACPI). In the absence of those tables at the moment, I just assign
the ID directly, which happens to work on my platform (1:1 mapping).

Once Thierry's generic IOMMU binding is sorted, we should look at adding
support for the Stream ID description. Have you looked at that at all?

Will

BTW: You seem to have a rather strange quoting style on your replies. Is
there any way to configure your editor to limit your lines to 80 columns?
You also don't need the prefix with your name and number in brackets!
Varun Sethi July 4, 2014, 7:41 a.m. UTC | #3
Hi Will,

> -----Original Message-----
> From: Will Deacon [mailto:will.deacon@arm.com]
> Sent: Thursday, July 03, 2014 8:14 PM
> To: Sethi Varun-B16395
> Cc: linux-arm-kernel@lists.infradead.org; iommu@lists.linux-
> foundation.org; thierry.reding@gmail.com; arnd@arndb.de;
> ohaugan@codeaurora.org; joro@8bytes.org;
> a.motakis@virtualopensystems.com; Marc Zyngier
> Subject: Re: [PATCH 2/5] iommu/arm-smmu: add support for PCI master
> devices
> 
> On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> > Hi Will,
> 
> Hi Varun,
> 
> Thanks for taking a look at this!
> 
> > > +static struct arm_smmu_master_cfg * find_smmu_master_cfg(struct
> > > +arm_smmu_device *smmu, struct device *dev) {
> > > +     struct arm_smmu_master *master;
> > > +
> > > +     if (dev_is_pci(dev))
> > > +             return dev->archdata.iommu;
> > > +
> > > +     master = find_smmu_master(smmu,
> > > + dev_get_master_dev(dev)->of_node);
> > [Sethi Varun-B16395] Why use dev_get_master_dev over here? You can
> simply use dev.
> 
> True; we've already done the PCI check above. I'll tidy this up.
> 
> > > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > > *dev)
> > >               return PTR_ERR(group);
> > >       }
> > >
> > > +     if (dev_is_pci(dev)) {
> > > +             struct arm_smmu_master_cfg *cfg;
> > > +             struct pci_dev *pdev = to_pci_dev(dev);
> > > +
> > > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > > +             if (!cfg) {
> > > +                     ret = -ENOMEM;
> > > +                     goto out_put_group;
> > > +             }
> > > +
> > > +             cfg->num_streamids = 1;
> > > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > > +                                    &cfg->streamids[0]);
> > [Sethi Varun-B16395] We need to look for upstream DMA device. We
> > should be using pci_find_dma_isolation_root here. Also, this would
> > also imply that there could be multiple devices sharing the same
> > stream ID. So, we should check if a particular stream ID value has
> > already been configured in the SMR registers.
> 
> That function doesn't seem to appear in mainline or next. I can move to
> it when it's available, but in the meantime the above is working for me.
> I'm making the assumption here that the system is configured so that
> there aren't any duplicate stream IDs. What we actually need is a
> function here which maps the requester ID to a stream ID using firmware
> tables (provided by DT or ACPI). In the absence of those tables at the
> moment, I just assign the ID directly, which happens to work on my
> platform (1:1 mapping).
> 
> Once Thierry's generic IOMMU binding is sorted, we should look at adding
> support for the Stream ID description. Have you looked at that at all?
> 
Yes, I have looked at the bindings. Would we need to represent the stream ids for PCI devices in the device tree? Why do we want to depend on the firmware to map the requestor id to the stream id? It can be handled using the APIs proposed by Alex Williamson. This is similar to IOMMU group determination, which is handled by the IOMMU driver.

> Will
> 
> BTW: You seem to have a rather strange quoting style on your replies. Is
> there any way to configure your editor to limit your lines to 80 columns?
> You also don't need the prefix with your name and number in brackets!


Will try to sort out issues with my e-mail client.

-Varun
Will Deacon July 4, 2014, 8:13 a.m. UTC | #4
On Fri, Jul 04, 2014 at 08:41:53AM +0100, Varun Sethi wrote:
> Hi Will,

Hey Varun,

> > Once Thierry's generic IOMMU binding is sorted, we should look at adding
> > support for the Stream ID description. Have you looked at that at all?
> > 
> Yes, I have looked at the bindings. Would we need to represent the stream
> ids for PCI devices in the device tree? Why do we want to depend on the
> firmware to map the requestor id to the stream id? It can be handled using
> the APIs proposed by Alex Williamson. This is similar to IOMMU group
> determination, which is handled by the IOMMU driver.

Well, there could easily be a fixed mapping from the ID at the host controller
and the ID seem by the SMMU (e.g. two host controllers sharing an SMMU?). I
don't think walking the PCI buses can help you there.

The way I was thinking to handle this is that we express SID = RID +
offset. In the device-tree, we can then describe a range of RIDs on the host
controller, a single offset, and we get back a range of SIDs.

In the worst case scenario, each RID maps to a totally random SID, so then
you have a huge table describing the mapping. I *think* this is actually
unlikely, and if we ever see such a device we can either have a large
mapping or put it into C code for that specific SoC (if it's really huge).

FWIW: I believe that the ACPI folks are thinking along similar lines.

Will
Will Deacon July 9, 2014, 1:26 p.m. UTC | #5
[Adding Alex; question below]

On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> > +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void
> > +*data) {
> > +     *((u16 *)data) = alias;
> > +     return 0; /* Continue walking */
> > +}

[...]

> > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > *dev)
> >               return PTR_ERR(group);
> >       }
> >
> > +     if (dev_is_pci(dev)) {
> > +             struct arm_smmu_master_cfg *cfg;
> > +             struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > +             if (!cfg) {
> > +                     ret = -ENOMEM;
> > +                     goto out_put_group;
> > +             }
> > +
> > +             cfg->num_streamids = 1;
> > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > +                                    &cfg->streamids[0]);
> [Sethi Varun-B16395] We need to look for upstream DMA device. We should be
> using pci_find_dma_isolation_root here. Also, this would also imply that
> there could be multiple devices sharing the same stream ID. So, we should
> check if a particular stream ID value has already been configured in the
> SMR registers.

pci_find_dma_isolation_root doesn't exist in any of the trees I have. Alex,
is this queued anywhere and do I actually need it?

The purpose of this code is to find the requester ID of a device as it
appears at the host controller. At this point, we can map it (via firmware
tables that are TBD) to a Stream ID for the SMMU. It looks to me like
pci_for_each_dma_alias walks over non-transparent PCI bridges correctly, so
the callback I provide just updates the alias until the walk has completed.

Will
Alex Williamson July 9, 2014, 2:13 p.m. UTC | #6
On Wed, 2014-07-09 at 14:26 +0100, Will Deacon wrote:
> [Adding Alex; question below]
> 
> On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> > > +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void
> > > +*data) {
> > > +     *((u16 *)data) = alias;
> > > +     return 0; /* Continue walking */
> > > +}
> 
> [...]
> 
> > > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > > *dev)
> > >               return PTR_ERR(group);
> > >       }
> > >
> > > +     if (dev_is_pci(dev)) {
> > > +             struct arm_smmu_master_cfg *cfg;
> > > +             struct pci_dev *pdev = to_pci_dev(dev);
> > > +
> > > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > > +             if (!cfg) {
> > > +                     ret = -ENOMEM;
> > > +                     goto out_put_group;
> > > +             }
> > > +
> > > +             cfg->num_streamids = 1;
> > > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > > +                                    &cfg->streamids[0]);
> > [Sethi Varun-B16395] We need to look for upstream DMA device. We should be
> > using pci_find_dma_isolation_root here. Also, this would also imply that
> > there could be multiple devices sharing the same stream ID. So, we should
> > check if a particular stream ID value has already been configured in the
> > SMR registers.
> 
> pci_find_dma_isolation_root doesn't exist in any of the trees I have. Alex,
> is this queued anywhere and do I actually need it?

That function was in the v3 DMA alias series, in v4 it got replaced.
iommu_group_get_for_pci_dev() is the common function for finding or
creating a group for a device and pci_for_each_dma_alias() is the
interface to walk each alias in a PCI topology.  The pci_ interface is
in 3.16-rc and the iommu_ pieces are current in next via the iommu tree.

> The purpose of this code is to find the requester ID of a device as it
> appears at the host controller. At this point, we can map it (via firmware
> tables that are TBD) to a Stream ID for the SMMU. It looks to me like
> pci_for_each_dma_alias walks over non-transparent PCI bridges correctly, so
> the callback I provide just updates the alias until the walk has completed.

Yep, that's the intended usage.  There are cases in VT-d where it wants
to add context entries for every alias and cases elsewhere that we just
want the final alias.  pci_for_each_dma_alias() is meant to fit both use
cases.  Thanks,

Alex
Varun Sethi July 9, 2014, 2:21 p.m. UTC | #7
Hi Will,

> -----Original Message-----
> From: Will Deacon [mailto:will.deacon@arm.com]
> Sent: Wednesday, July 09, 2014 6:57 PM
> To: Sethi Varun-B16395; alex.williamson@redhat.com
> Cc: linux-arm-kernel@lists.infradead.org; iommu@lists.linux-
> foundation.org; thierry.reding@gmail.com; arnd@arndb.de;
> ohaugan@codeaurora.org; joro@8bytes.org;
> a.motakis@virtualopensystems.com; Marc Zyngier
> Subject: Re: [PATCH 2/5] iommu/arm-smmu: add support for PCI master
> devices
> 
> [Adding Alex; question below]
> 
> On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> > > +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias,
> > > +void
> > > +*data) {
> > > +     *((u16 *)data) = alias;
> > > +     return 0; /* Continue walking */ }
> 
> [...]
> 
> > > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > > *dev)
> > >               return PTR_ERR(group);
> > >       }
> > >
> > > +     if (dev_is_pci(dev)) {
> > > +             struct arm_smmu_master_cfg *cfg;
> > > +             struct pci_dev *pdev = to_pci_dev(dev);
> > > +
> > > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > > +             if (!cfg) {
> > > +                     ret = -ENOMEM;
> > > +                     goto out_put_group;
> > > +             }
> > > +
> > > +             cfg->num_streamids = 1;
> > > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > > +                                    &cfg->streamids[0]);
> > [Sethi Varun-B16395] We need to look for upstream DMA device. We
> > should be using pci_find_dma_isolation_root here. Also, this would
> > also imply that there could be multiple devices sharing the same
> > stream ID. So, we should check if a particular stream ID value has
> > already been configured in the SMR registers.
> 
> pci_find_dma_isolation_root doesn't exist in any of the trees I have.
> Alex, is this queued anywhere and do I actually need it?
> 
> The purpose of this code is to find the requester ID of a device as it
> appears at the host controller. At this point, we can map it (via
> firmware tables that are TBD) to a Stream ID for the SMMU. It looks to me
> like pci_for_each_dma_alias walks over non-transparent PCI bridges
> correctly, so the callback I provide just updates the alias until the
> walk has completed.
I think pci_for_each_dma_alias should work fine here. Isolation would be specifically required while setting up the iommu groups.

-Varun
Will Deacon July 9, 2014, 4:39 p.m. UTC | #8
On Wed, Jul 09, 2014 at 03:13:04PM +0100, Alex Williamson wrote:
> On Wed, 2014-07-09 at 14:26 +0100, Will Deacon wrote:
> > [Adding Alex; question below]
> > 
> > On Thu, Jul 03, 2014 at 03:22:37PM +0100, Varun Sethi wrote:
> > > > +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void
> > > > +*data) {
> > > > +     *((u16 *)data) = alias;
> > > > +     return 0; /* Continue walking */
> > > > +}
> > 
> > [...]
> > 
> > > > @@ -1598,15 +1642,36 @@ static int arm_smmu_add_device(struct device
> > > > *dev)
> > > >               return PTR_ERR(group);
> > > >       }
> > > >
> > > > +     if (dev_is_pci(dev)) {
> > > > +             struct arm_smmu_master_cfg *cfg;
> > > > +             struct pci_dev *pdev = to_pci_dev(dev);
> > > > +
> > > > +             cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
> > > > +             if (!cfg) {
> > > > +                     ret = -ENOMEM;
> > > > +                     goto out_put_group;
> > > > +             }
> > > > +
> > > > +             cfg->num_streamids = 1;
> > > > +             pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
> > > > +                                    &cfg->streamids[0]);
> > > [Sethi Varun-B16395] We need to look for upstream DMA device. We should be
> > > using pci_find_dma_isolation_root here. Also, this would also imply that
> > > there could be multiple devices sharing the same stream ID. So, we should
> > > check if a particular stream ID value has already been configured in the
> > > SMR registers.
> > 
> > pci_find_dma_isolation_root doesn't exist in any of the trees I have. Alex,
> > is this queued anywhere and do I actually need it?
> 
> That function was in the v3 DMA alias series, in v4 it got replaced.
> iommu_group_get_for_pci_dev() is the common function for finding or
> creating a group for a device and pci_for_each_dma_alias() is the
> interface to walk each alias in a PCI topology.  The pci_ interface is
> in 3.16-rc and the iommu_ pieces are current in next via the iommu tree.

Cheers, Alex. I'll move my driver to using iommu_group_get_for_pci_dev once
that's hit mainline (since I currently put each master into its own group).

> > The purpose of this code is to find the requester ID of a device as it
> > appears at the host controller. At this point, we can map it (via firmware
> > tables that are TBD) to a Stream ID for the SMMU. It looks to me like
> > pci_for_each_dma_alias walks over non-transparent PCI bridges correctly, so
> > the callback I provide just updates the alias until the walk has completed.
> 
> Yep, that's the intended usage.  There are cases in VT-d where it wants
> to add context entries for every alias and cases elsewhere that we just
> want the final alias.  pci_for_each_dma_alias() is meant to fit both use
> cases.  Thanks,

Cracking, cheers for confirming.

Will
Will Deacon Oct. 6, 2014, 12:42 p.m. UTC | #9
Hi Alex,

On Wed, Jul 09, 2014 at 03:13:04PM +0100, Alex Williamson wrote:
> On Wed, 2014-07-09 at 14:26 +0100, Will Deacon wrote:
> > pci_find_dma_isolation_root doesn't exist in any of the trees I have. Alex,
> > is this queued anywhere and do I actually need it?
> 
> That function was in the v3 DMA alias series, in v4 it got replaced.
> iommu_group_get_for_pci_dev() is the common function for finding or
> creating a group for a device and pci_for_each_dma_alias() is the
> interface to walk each alias in a PCI topology.  The pci_ interface is
> in 3.16-rc and the iommu_ pieces are current in next via the iommu tree.
> 
> > The purpose of this code is to find the requester ID of a device as it
> > appears at the host controller. At this point, we can map it (via firmware
> > tables that are TBD) to a Stream ID for the SMMU. It looks to me like
> > pci_for_each_dma_alias walks over non-transparent PCI bridges correctly, so
> > the callback I provide just updates the alias until the walk has completed.
> 
> Yep, that's the intended usage.  There are cases in VT-d where it wants
> to add context entries for every alias and cases elsewhere that we just
> want the final alias.  pci_for_each_dma_alias() is meant to fit both use
> cases.  Thanks,

I'm looking at moving the arm-smmu driver over to using
iommu_group_get_for_dev, but I've hit a slight snag. Once I have the group,
I actually need to get hold of the alias for that group, since that will be
converted into a StreamID used to program the SMMU. I can do this by
open-coding my own version of iommu_group_get_for_pci_dev, but that's pretty
horrible.

What's the best way to get hold of the alias for a given group?

Cheers,

Will
diff mbox

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 81e8ec290756..e4eebc50612c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -39,6 +39,7 @@ 
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -329,14 +330,7 @@  struct arm_smmu_smr {
 	u16				id;
 };
 
-struct arm_smmu_master {
-	struct device_node		*of_node;
-
-	/*
-	 * The following is specific to the master's position in the
-	 * SMMU chain.
-	 */
-	struct rb_node			node;
+struct arm_smmu_master_cfg {
 	int				num_streamids;
 	u16				streamids[MAX_MASTER_STREAMIDS];
 
@@ -347,6 +341,17 @@  struct arm_smmu_master {
 	struct arm_smmu_smr		*smrs;
 };
 
+struct arm_smmu_master {
+	struct device_node		*of_node;
+
+	/*
+	 * The following is specific to the master's position in the
+	 * SMMU chain.
+	 */
+	struct rb_node			node;
+	struct arm_smmu_master_cfg	cfg;
+};
+
 struct arm_smmu_device {
 	struct device			*dev;
 	struct device_node		*parent_of_node;
@@ -437,6 +442,18 @@  static void parse_driver_options(struct arm_smmu_device *smmu)
 	} while (arm_smmu_options[++i].opt);
 }
 
+static struct device *dev_get_master_dev(struct device *dev)
+{
+	if (dev_is_pci(dev)) {
+		struct pci_bus *bus = to_pci_dev(dev)->bus;
+		while (!pci_is_root_bus(bus))
+			bus = bus->parent;
+		return bus->bridge->parent;
+	}
+
+	return dev;
+}
+
 static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
 						struct device_node *dev_node)
 {
@@ -457,6 +474,18 @@  static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
 	return NULL;
 }
 
+static struct arm_smmu_master_cfg *
+find_smmu_master_cfg(struct arm_smmu_device *smmu, struct device *dev)
+{
+	struct arm_smmu_master *master;
+
+	if (dev_is_pci(dev))
+		return dev->archdata.iommu;
+
+	master = find_smmu_master(smmu, dev_get_master_dev(dev)->of_node);
+	return master ? &master->cfg : NULL;
+}
+
 static int insert_smmu_master(struct arm_smmu_device *smmu,
 			      struct arm_smmu_master *master)
 {
@@ -508,11 +537,11 @@  static int register_smmu_master(struct arm_smmu_device *smmu,
 	if (!master)
 		return -ENOMEM;
 
-	master->of_node		= masterspec->np;
-	master->num_streamids	= masterspec->args_count;
+	master->of_node			= masterspec->np;
+	master->cfg.num_streamids	= masterspec->args_count;
 
-	for (i = 0; i < master->num_streamids; ++i)
-		master->streamids[i] = masterspec->args[i];
+	for (i = 0; i < master->cfg.num_streamids; ++i)
+		master->cfg.streamids[i] = masterspec->args[i];
 
 	return insert_smmu_master(smmu, master);
 }
@@ -537,6 +566,42 @@  out_unlock:
 	return parent;
 }
 
+static struct arm_smmu_device *find_parent_smmu_for_device(struct device *dev)
+{
+	struct arm_smmu_device *child, *parent, *smmu;
+	struct arm_smmu_master *master = NULL;
+	struct device_node *dev_node = dev_get_master_dev(dev)->of_node;
+
+	spin_lock(&arm_smmu_devices_lock);
+	list_for_each_entry(parent, &arm_smmu_devices, list) {
+		smmu = parent;
+
+		/* Try to find a child of the current SMMU. */
+		list_for_each_entry(child, &arm_smmu_devices, list) {
+			if (child->parent_of_node == parent->dev->of_node) {
+				/* Does the child sit above our master? */
+				master = find_smmu_master(child, dev_node);
+				if (master) {
+					smmu = NULL;
+					break;
+				}
+			}
+		}
+
+		/* We found some children, so keep searching. */
+		if (!smmu) {
+			master = NULL;
+			continue;
+		}
+
+		master = find_smmu_master(smmu, dev_node);
+		if (master)
+			break;
+	}
+	spin_unlock(&arm_smmu_devices_lock);
+	return master ? smmu : NULL;
+}
+
 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
 {
 	int idx;
@@ -855,7 +920,8 @@  static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
 }
 
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
-					struct device *dev)
+					struct device *dev,
+					struct arm_smmu_device *device_smmu)
 {
 	int irq, ret, start;
 	struct arm_smmu_domain *smmu_domain = domain->priv;
@@ -868,15 +934,15 @@  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	 * early, and therefore check that the root SMMU does indeed have
 	 * a StreamID for the master in question.
 	 */
-	parent = dev->archdata.iommu;
+	parent = device_smmu;
 	smmu_domain->output_mask = -1;
 	do {
 		smmu = parent;
 		smmu_domain->output_mask &= (1ULL << smmu->s2_output_size) - 1;
 	} while ((parent = find_parent_smmu(smmu)));
 
-	if (!find_smmu_master(smmu, dev->of_node)) {
-		dev_err(dev, "unable to find root SMMU for device\n");
+	if (!find_smmu_master_cfg(smmu, dev)) {
+		dev_err(dev, "unable to find root SMMU config for device\n");
 		return -ENODEV;
 	}
 
@@ -920,7 +986,8 @@  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 
 	root_cfg->smmu = smmu;
 	arm_smmu_init_context_bank(smmu_domain);
-	return ret;
+	smmu_domain->leaf_smmu = device_smmu;
+	return 0;
 
 out_free_context:
 	__arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
@@ -1056,7 +1123,7 @@  static void arm_smmu_domain_destroy(struct iommu_domain *domain)
 }
 
 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
-					  struct arm_smmu_master *master)
+					  struct arm_smmu_master_cfg *cfg)
 {
 	int i;
 	struct arm_smmu_smr *smrs;
@@ -1065,18 +1132,18 @@  static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 	if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
 		return 0;
 
-	if (master->smrs)
+	if (cfg->smrs)
 		return -EEXIST;
 
-	smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL);
+	smrs = kmalloc(sizeof(*smrs) * cfg->num_streamids, GFP_KERNEL);
 	if (!smrs) {
-		dev_err(smmu->dev, "failed to allocate %d SMRs for master %s\n",
-			master->num_streamids, master->of_node->name);
+		dev_err(smmu->dev, "failed to allocate %d SMRs\n",
+			cfg->num_streamids);
 		return -ENOMEM;
 	}
 
 	/* Allocate the SMRs on the root SMMU */
-	for (i = 0; i < master->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_streamids; ++i) {
 		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
 						  smmu->num_mapping_groups);
 		if (IS_ERR_VALUE(idx)) {
@@ -1087,18 +1154,18 @@  static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 		smrs[i] = (struct arm_smmu_smr) {
 			.idx	= idx,
 			.mask	= 0, /* We don't currently share SMRs */
-			.id	= master->streamids[i],
+			.id	= cfg->streamids[i],
 		};
 	}
 
 	/* It worked! Now, poke the actual hardware */
-	for (i = 0; i < master->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_streamids; ++i) {
 		u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
 			  smrs[i].mask << SMR_MASK_SHIFT;
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
 	}
 
-	master->smrs = smrs;
+	cfg->smrs = smrs;
 	return 0;
 
 err_free_smrs:
@@ -1109,44 +1176,44 @@  err_free_smrs:
 }
 
 static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
-				      struct arm_smmu_master *master)
+				      struct arm_smmu_master_cfg *cfg)
 {
 	int i;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
-	struct arm_smmu_smr *smrs = master->smrs;
+	struct arm_smmu_smr *smrs = cfg->smrs;
 
 	/* Invalidate the SMRs before freeing back to the allocator */
-	for (i = 0; i < master->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_streamids; ++i) {
 		u8 idx = smrs[i].idx;
 		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
 		__arm_smmu_free_bitmap(smmu->smr_map, idx);
 	}
 
-	master->smrs = NULL;
+	cfg->smrs = NULL;
 	kfree(smrs);
 }
 
 static void arm_smmu_bypass_stream_mapping(struct arm_smmu_device *smmu,
-					   struct arm_smmu_master *master)
+					   struct arm_smmu_master_cfg *cfg)
 {
 	int i;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
-	for (i = 0; i < master->num_streamids; ++i) {
-		u16 sid = master->streamids[i];
+	for (i = 0; i < cfg->num_streamids; ++i) {
+		u16 sid = cfg->streamids[i];
 		writel_relaxed(S2CR_TYPE_BYPASS,
 			       gr0_base + ARM_SMMU_GR0_S2CR(sid));
 	}
 }
 
 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
-				      struct arm_smmu_master *master)
+				      struct arm_smmu_master_cfg *cfg)
 {
 	int i, ret;
 	struct arm_smmu_device *parent, *smmu = smmu_domain->root_cfg.smmu;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
-	ret = arm_smmu_master_configure_smrs(smmu, master);
+	ret = arm_smmu_master_configure_smrs(smmu, cfg);
 	if (ret)
 		return ret;
 
@@ -1161,14 +1228,14 @@  static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
 		if (smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)
 			continue;
 
-		arm_smmu_bypass_stream_mapping(smmu, master);
+		arm_smmu_bypass_stream_mapping(smmu, cfg);
 		smmu = parent;
 	}
 
 	/* Now we're at the root, time to point at our context bank */
-	for (i = 0; i < master->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_streamids; ++i) {
 		u32 idx, s2cr;
-		idx = master->smrs ? master->smrs[i].idx : master->streamids[i];
+		idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
 		s2cr = S2CR_TYPE_TRANS |
 		       (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT);
 		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
@@ -1178,7 +1245,7 @@  static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
 }
 
 static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
-					  struct arm_smmu_master *master)
+					  struct arm_smmu_master_cfg *cfg)
 {
 	struct arm_smmu_device *smmu = smmu_domain->root_cfg.smmu;
 
@@ -1186,18 +1253,19 @@  static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
 	 * We *must* clear the S2CR first, because freeing the SMR means
 	 * that it can be re-allocated immediately.
 	 */
-	arm_smmu_bypass_stream_mapping(smmu, master);
-	arm_smmu_master_free_smrs(smmu, master);
+	arm_smmu_bypass_stream_mapping(smmu, cfg);
+	arm_smmu_master_free_smrs(smmu, cfg);
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	int ret = -EINVAL;
 	struct arm_smmu_domain *smmu_domain = domain->priv;
-	struct arm_smmu_device *device_smmu = dev->archdata.iommu;
-	struct arm_smmu_master *master;
+	struct arm_smmu_device *device_smmu;
+	struct arm_smmu_master_cfg *cfg;
 	unsigned long flags;
 
+	device_smmu = dev_get_master_dev(dev)->archdata.iommu;
 	if (!device_smmu) {
 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
 		return -ENXIO;
@@ -1210,11 +1278,9 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	spin_lock_irqsave(&smmu_domain->lock, flags);
 	if (!smmu_domain->leaf_smmu) {
 		/* Now that we have a master, we can finalise the domain */
-		ret = arm_smmu_init_domain_context(domain, dev);
+		ret = arm_smmu_init_domain_context(domain, dev, device_smmu);
 		if (IS_ERR_VALUE(ret))
 			goto err_unlock;
-
-		smmu_domain->leaf_smmu = device_smmu;
 	} else if (smmu_domain->leaf_smmu != device_smmu) {
 		dev_err(dev,
 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
@@ -1225,11 +1291,11 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	spin_unlock_irqrestore(&smmu_domain->lock, flags);
 
 	/* Looks ok, so add the device to the domain */
-	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
-	if (!master)
+	cfg = find_smmu_master_cfg(smmu_domain->leaf_smmu, dev);
+	if (!cfg)
 		return -ENODEV;
 
-	return arm_smmu_domain_add_master(smmu_domain, master);
+	return arm_smmu_domain_add_master(smmu_domain, cfg);
 
 err_unlock:
 	spin_unlock_irqrestore(&smmu_domain->lock, flags);
@@ -1239,11 +1305,11 @@  err_unlock:
 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	struct arm_smmu_domain *smmu_domain = domain->priv;
-	struct arm_smmu_master *master;
+	struct arm_smmu_master_cfg *cfg;
 
-	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
-	if (master)
-		arm_smmu_domain_remove_master(smmu_domain, master);
+	cfg = find_smmu_master_cfg(smmu_domain->leaf_smmu, dev);
+	if (cfg)
+		arm_smmu_domain_remove_master(smmu_domain, cfg);
 }
 
 static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,
@@ -1549,10 +1615,15 @@  static int arm_smmu_domain_has_cap(struct iommu_domain *domain,
 	return !!(cap & caps);
 }
 
+static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
+{
+	*((u16 *)data) = alias;
+	return 0; /* Continue walking */
+}
+
 static int arm_smmu_add_device(struct device *dev)
 {
-	struct arm_smmu_device *child, *parent, *smmu;
-	struct arm_smmu_master *master = NULL;
+	struct arm_smmu_device *smmu;
 	struct iommu_group *group;
 	int ret;
 
@@ -1561,35 +1632,8 @@  static int arm_smmu_add_device(struct device *dev)
 		return -EINVAL;
 	}
 
-	spin_lock(&arm_smmu_devices_lock);
-	list_for_each_entry(parent, &arm_smmu_devices, list) {
-		smmu = parent;
-
-		/* Try to find a child of the current SMMU. */
-		list_for_each_entry(child, &arm_smmu_devices, list) {
-			if (child->parent_of_node == parent->dev->of_node) {
-				/* Does the child sit above our master? */
-				master = find_smmu_master(child, dev->of_node);
-				if (master) {
-					smmu = NULL;
-					break;
-				}
-			}
-		}
-
-		/* We found some children, so keep searching. */
-		if (!smmu) {
-			master = NULL;
-			continue;
-		}
-
-		master = find_smmu_master(smmu, dev->of_node);
-		if (master)
-			break;
-	}
-	spin_unlock(&arm_smmu_devices_lock);
-
-	if (!master)
+	smmu = find_parent_smmu_for_device(dev);
+	if (!smmu)
 		return -ENODEV;
 
 	group = iommu_group_alloc();
@@ -1598,15 +1642,36 @@  static int arm_smmu_add_device(struct device *dev)
 		return PTR_ERR(group);
 	}
 
+	if (dev_is_pci(dev)) {
+		struct arm_smmu_master_cfg *cfg;
+		struct pci_dev *pdev = to_pci_dev(dev);
+
+		cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+		if (!cfg) {
+			ret = -ENOMEM;
+			goto out_put_group;
+		}
+
+		cfg->num_streamids = 1;
+		pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid,
+				       &cfg->streamids[0]);
+		dev->archdata.iommu = cfg;
+	} else {
+		dev->archdata.iommu = smmu;
+	}
+
 	ret = iommu_group_add_device(group, dev);
-	iommu_group_put(group);
-	dev->archdata.iommu = smmu;
 
+out_put_group:
+	iommu_group_put(group);
 	return ret;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+	if (dev_is_pci(dev))
+		kfree(dev->archdata.iommu);
+
 	dev->archdata.iommu = NULL;
 	iommu_group_remove_device(dev);
 }
@@ -2050,6 +2115,11 @@  static int __init arm_smmu_init(void)
 		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
 #endif
 
+#ifdef CONFIG_PCI
+	if (!iommu_present(&pci_bus_type))
+		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+#endif
+
 	return 0;
 }