diff mbox

[Xen-devel,for,4.6,12/13] xen/iommu: smmu: Introduce automatic stream-id-masking

Message ID 1418760534-18163-13-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall Dec. 16, 2014, 8:08 p.m. UTC
From: Andreas Herrmann <andreas.herrmann@calxeda.com>

Try to determine mask/id values that match several stream IDs of a
master device when doing Stream ID matching. Thus the number of used
SMR groups that are required to map all stream IDs of a master device
to a context should be less than the number of SMR groups used so far
(currently one SMR group is used for one stream ID).

Taken from the Linux ML:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-January/226100.html

Changes compare to the Linux ML version:
    - _fls doesn't exist on Xen so use fls
    - Use num_s2crs rather than num_streamids in the arm_smmu_free_smrs.
    This former is the field used to configure SRMS

Cc: Andreas Herrmann <herrmann.der.user@googlemail.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann@calxeda.com>
Signed-off-by: Julien Grall <julien.grall@linaro.org>
---
 xen/drivers/passthrough/arm/smmu.c | 177 +++++++++++++++++++++++++++++++++----
 1 file changed, 162 insertions(+), 15 deletions(-)
diff mbox

Patch

diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c
index bfc1069..8a6514f 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -43,6 +43,7 @@ 
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/bitops.h>
 
 #include <linux/amba/bus.h>
 
@@ -346,8 +347,10 @@  struct arm_smmu_smr {
 };
 
 struct arm_smmu_master_cfg {
-	int				num_streamids;
+	u32				num_streamids;
 	u16				streamids[MAX_MASTER_STREAMIDS];
+	int				num_s2crs;
+
 	struct arm_smmu_smr		*smrs;
 };
 
@@ -392,6 +395,9 @@  struct arm_smmu_device {
 	u32				num_context_irqs;
 	unsigned int			*irqs;
 
+	u32				smr_mask_mask;
+	u32				smr_id_mask;
+
 	struct list_head		list;
 	struct rb_root			masters;
 };
@@ -1113,6 +1119,137 @@  static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
 	kfree(pgd_base);
 }
 
+/*
+ * For a given set N of 2**order different stream IDs (no duplicates
+ * please!) we determine values mask and id such that
+ *
+ * (1)          (x & mask) == id
+ *
+ * for each stream ID x from the given set N.
+ *
+ * If the number of bits that are set in mask equals n, then there
+ * exist 2**n different values y for which
+ *
+ * (2)          (y & mask) == id
+ *
+ * Thus if n equals order we know that for the calculated mask and id
+ * values there are exactly 2**order == 2**n stream IDs for which (1)
+ * is true. And we finally can use mask and id to configure an SMR to
+ * match all stream IDs in the set N.
+ */
+static int determine_smr_mask(struct arm_smmu_device *smmu,
+			      struct arm_smmu_master_cfg *cfg,
+			      struct arm_smmu_smr *smr, int start, int order)
+{
+	u16 i, zero_bits_mask, one_bits_mask, const_mask;
+	int nr;
+
+	nr = 1 << order;
+
+	if (nr == 1) {
+		/* no mask, use streamid to match and be done with it */
+		smr->mask = 0;
+		smr->id = cfg->streamids[start];
+		return 0;
+	}
+
+	zero_bits_mask = 0;
+	one_bits_mask = 0xffff;
+	for (i = start; i < start + nr; i++) {
+		zero_bits_mask |= cfg->streamids[i];	/* const 0 bits */
+		one_bits_mask &= cfg->streamids[i];	/* const 1 bits */
+	}
+	zero_bits_mask = ~zero_bits_mask;
+
+	/* bits having constant values (either 0 or 1) */
+	const_mask = zero_bits_mask | one_bits_mask;
+
+	i = hweight16(~const_mask);
+	if (i == order) {
+		/*
+		 * We have found a mask/id pair that matches exactly
+		 * nr = 2**order stream IDs which we used for its
+		 * calculation.
+		 */
+		smr->mask = ~const_mask;
+		smr->id = one_bits_mask;
+	} else {
+		/*
+		 * No usable mask/id pair for this set of streamids.
+		 * If i > order then mask/id would match more than nr
+		 * streamids.
+		 * If i < order then mask/id would match less than nr
+		 * streamids. (In this case we potentially have used
+		 * some duplicate streamids for the calculation.)
+		 */
+		return 1;
+	}
+
+	if (((smr->mask & smmu->smr_mask_mask) != smr->mask) ||
+		((smr->id & smmu->smr_id_mask) != smr->id))
+		/* insufficient number of mask/id bits */
+		return 1;
+
+	return 0;
+}
+
+static int determine_smr_mapping(struct arm_smmu_device *smmu,
+				 struct arm_smmu_master_cfg *cfg,
+				 struct arm_smmu_smr *smrs, int max_smrs)
+{
+	int nr_sid, nr, i, bit, start;
+
+	/*
+	 * This function is called only once -- when a master is added
+	 * to a domain. If cfg->num_s2crs != 0 then this master
+	 * was already added to a domain.
+	 */
+	if (cfg->num_s2crs)
+		return -EINVAL;
+
+	start = nr = 0;
+	nr_sid = cfg->num_streamids;
+	do {
+		/*
+		 * largest power-of-2 number of streamids for which to
+		 * determine a usable mask/id pair for stream matching
+		 */
+		bit = fls(nr_sid) - 1;
+		if (bit < 0)
+			return 0;
+
+		/*
+		 * iterate over power-of-2 numbers to determine
+		 * largest possible mask/id pair for stream matching
+		 * of next 2**i streamids
+		 */
+		for (i = bit; i >= 0; i--) {
+			if (!determine_smr_mask(smmu, cfg,
+						&smrs[cfg->num_s2crs],
+						start, i))
+				break;
+		}
+
+		if (i < 0)
+			goto out;
+
+		nr = 1 << i;
+		nr_sid -= nr;
+		start += nr;
+		cfg->num_s2crs++;
+	} while (cfg->num_s2crs <= max_smrs);
+
+out:
+	if (nr_sid) {
+		/* not enough mapping groups available */
+		cfg->num_s2crs = 0;
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+
 static void arm_smmu_domain_destroy(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = domain->priv;
@@ -1129,7 +1266,7 @@  static void arm_smmu_domain_destroy(struct iommu_domain *domain)
 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 					  struct arm_smmu_master_cfg *cfg)
 {
-	int i;
+	int i, max_smrs, ret;
 	struct arm_smmu_smr *smrs;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
@@ -1139,31 +1276,32 @@  static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 	if (cfg->smrs)
 		return -EEXIST;
 
-	smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
+	max_smrs = min(smmu->num_mapping_groups, cfg->num_streamids);
+	smrs = kmalloc(sizeof(*smrs) * max_smrs, GFP_KERNEL);
 	if (!smrs) {
 		dev_err(smmu->dev, "failed to allocate %d SMRs\n",
-			cfg->num_streamids);
+			max_smrs);
 		return -ENOMEM;
 	}
 
+	ret = determine_smr_mapping(smmu, cfg, smrs, max_smrs);
+	if (ret)
+		goto err_free_smrs;
+
 	/* Allocate the SMRs on the SMMU */
-	for (i = 0; i < cfg->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_s2crs; ++i) {
 		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
 						  smmu->num_mapping_groups);
 		if (IS_ERR_VALUE(idx)) {
 			dev_err(smmu->dev, "failed to allocate free SMR\n");
-			goto err_free_smrs;
+			goto err_free_bitmap;
 		}
 
-		smrs[i] = (struct arm_smmu_smr) {
-			.idx	= idx,
-			.mask	= 0, /* We don't currently share SMRs */
-			.id	= cfg->streamids[i],
-		};
+		smrs[i].idx = idx;
 	}
 
 	/* It worked! Now, poke the actual hardware */
-	for (i = 0; i < cfg->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_s2crs; ++i) {
 		u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
 			  smrs[i].mask << SMR_MASK_SHIFT;
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
@@ -1172,9 +1310,11 @@  static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 	cfg->smrs = smrs;
 	return 0;
 
-err_free_smrs:
+err_free_bitmap:
 	while (--i >= 0)
 		__arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
+	cfg->num_s2crs = 0;
+err_free_smrs:
 	kfree(smrs);
 	return -ENOSPC;
 }
@@ -1190,13 +1330,15 @@  static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
 		return;
 
 	/* Invalidate the SMRs before freeing back to the allocator */
-	for (i = 0; i < cfg->num_streamids; ++i) {
+	for (i = 0; i < cfg->num_s2crs; ++i) {
 		u8 idx = smrs[i].idx;
 
 		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
 		__arm_smmu_free_bitmap(smmu->smr_map, idx);
 	}
 
+	cfg->num_s2crs = 0;
+
 	cfg->smrs = NULL;
 	kfree(smrs);
 }
@@ -1213,12 +1355,15 @@  static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
 	if (ret)
 		return ret == -EEXIST ? 0 : ret;
 
-	for (i = 0; i < cfg->num_streamids; ++i) {
+	if (!cfg->num_s2crs)
+		cfg->num_s2crs = cfg->num_streamids;
+	for (i = 0; i < cfg->num_s2crs; ++i) {
 		u32 idx, s2cr;
 
 		idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
 		s2cr = S2CR_TYPE_TRANS |
 		       (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
+		dev_dbg(smmu->dev, "S2CR%d: 0x%x\n", idx, s2cr);
 		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
 	}
 
@@ -1890,6 +2035,8 @@  static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 				mask, sid);
 			return -ENODEV;
 		}
+		smmu->smr_mask_mask = mask;
+		smmu->smr_id_mask = sid;
 
 		dev_notice(smmu->dev,
 			   "\tstream matching with %u register groups, mask 0x%x",