@@ -43,6 +43,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/bitops.h>
#include <linux/amba/bus.h>
@@ -346,8 +347,10 @@ struct arm_smmu_smr {
};
struct arm_smmu_master_cfg {
- int num_streamids;
+ u32 num_streamids;
u16 streamids[MAX_MASTER_STREAMIDS];
+ int num_s2crs;
+
struct arm_smmu_smr *smrs;
};
@@ -392,6 +395,9 @@ struct arm_smmu_device {
u32 num_context_irqs;
unsigned int *irqs;
+ u32 smr_mask_mask;
+ u32 smr_id_mask;
+
struct list_head list;
struct rb_root masters;
};
@@ -1113,6 +1119,137 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
kfree(pgd_base);
}
+/*
+ * For a given set N of 2**order different stream IDs (no duplicates
+ * please!) we determine values mask and id such that
+ *
+ * (1) (x & mask) == id
+ *
+ * for each stream ID x from the given set N.
+ *
+ * If the number of bits that are set in mask equals n, then there
+ * exist 2**n different values y for which
+ *
+ * (2) (y & mask) == id
+ *
+ * Thus if n equals order we know that for the calculated mask and id
+ * values there are exactly 2**order == 2**n stream IDs for which (1)
+ * is true. And we finally can use mask and id to configure an SMR to
+ * match all stream IDs in the set N.
+ */
+static int determine_smr_mask(struct arm_smmu_device *smmu,
+ struct arm_smmu_master_cfg *cfg,
+ struct arm_smmu_smr *smr, int start, int order)
+{
+ u16 i, zero_bits_mask, one_bits_mask, const_mask;
+ int nr;
+
+ nr = 1 << order;
+
+ if (nr == 1) {
+ /* no mask, use streamid to match and be done with it */
+ smr->mask = 0;
+ smr->id = cfg->streamids[start];
+ return 0;
+ }
+
+ zero_bits_mask = 0;
+ one_bits_mask = 0xffff;
+ for (i = start; i < start + nr; i++) {
+ zero_bits_mask |= cfg->streamids[i]; /* const 0 bits */
+ one_bits_mask &= cfg->streamids[i]; /* const 1 bits */
+ }
+ zero_bits_mask = ~zero_bits_mask;
+
+ /* bits having constant values (either 0 or 1) */
+ const_mask = zero_bits_mask | one_bits_mask;
+
+ i = hweight16(~const_mask);
+ if (i == order) {
+ /*
+ * We have found a mask/id pair that matches exactly
+ * nr = 2**order stream IDs which we used for its
+ * calculation.
+ */
+ smr->mask = ~const_mask;
+ smr->id = one_bits_mask;
+ } else {
+ /*
+ * No usable mask/id pair for this set of streamids.
+ * If i > order then mask/id would match more than nr
+ * streamids.
+ * If i < order then mask/id would match less than nr
+ * streamids. (In this case we potentially have used
+ * some duplicate streamids for the calculation.)
+ */
+ return 1;
+ }
+
+ if (((smr->mask & smmu->smr_mask_mask) != smr->mask) ||
+ ((smr->id & smmu->smr_id_mask) != smr->id))
+ /* insufficient number of mask/id bits */
+ return 1;
+
+ return 0;
+}
+
+static int determine_smr_mapping(struct arm_smmu_device *smmu,
+ struct arm_smmu_master_cfg *cfg,
+ struct arm_smmu_smr *smrs, int max_smrs)
+{
+ int nr_sid, nr, i, bit, start;
+
+ /*
+ * This function is called only once -- when a master is added
+ * to a domain. If cfg->num_s2crs != 0 then this master
+ * was already added to a domain.
+ */
+ if (cfg->num_s2crs)
+ return -EINVAL;
+
+ start = nr = 0;
+ nr_sid = cfg->num_streamids;
+ do {
+ /*
+ * largest power-of-2 number of streamids for which to
+ * determine a usable mask/id pair for stream matching
+ */
+ bit = fls(nr_sid) - 1;
+ if (bit < 0)
+ return 0;
+
+ /*
+ * iterate over power-of-2 numbers to determine
+ * largest possible mask/id pair for stream matching
+ * of next 2**i streamids
+ */
+ for (i = bit; i >= 0; i--) {
+ if (!determine_smr_mask(smmu, cfg,
+ &smrs[cfg->num_s2crs],
+ start, i))
+ break;
+ }
+
+ if (i < 0)
+ goto out;
+
+ nr = 1 << i;
+ nr_sid -= nr;
+ start += nr;
+ cfg->num_s2crs++;
+ } while (cfg->num_s2crs <= max_smrs);
+
+out:
+ if (nr_sid) {
+ /* not enough mapping groups available */
+ cfg->num_s2crs = 0;
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+
static void arm_smmu_domain_destroy(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = domain->priv;
@@ -1129,7 +1266,7 @@ static void arm_smmu_domain_destroy(struct iommu_domain *domain)
static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
struct arm_smmu_master_cfg *cfg)
{
- int i;
+ int i, max_smrs, ret;
struct arm_smmu_smr *smrs;
void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
@@ -1139,31 +1276,32 @@ static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
if (cfg->smrs)
return -EEXIST;
- smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
+ max_smrs = min(smmu->num_mapping_groups, cfg->num_streamids);
+ smrs = kmalloc(sizeof(*smrs) * max_smrs, GFP_KERNEL);
if (!smrs) {
dev_err(smmu->dev, "failed to allocate %d SMRs\n",
- cfg->num_streamids);
+ max_smrs);
return -ENOMEM;
}
+ ret = determine_smr_mapping(smmu, cfg, smrs, max_smrs);
+ if (ret)
+ goto err_free_smrs;
+
/* Allocate the SMRs on the SMMU */
- for (i = 0; i < cfg->num_streamids; ++i) {
+ for (i = 0; i < cfg->num_s2crs; ++i) {
int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
smmu->num_mapping_groups);
if (IS_ERR_VALUE(idx)) {
dev_err(smmu->dev, "failed to allocate free SMR\n");
- goto err_free_smrs;
+ goto err_free_bitmap;
}
- smrs[i] = (struct arm_smmu_smr) {
- .idx = idx,
- .mask = 0, /* We don't currently share SMRs */
- .id = cfg->streamids[i],
- };
+ smrs[i].idx = idx;
}
/* It worked! Now, poke the actual hardware */
- for (i = 0; i < cfg->num_streamids; ++i) {
+ for (i = 0; i < cfg->num_s2crs; ++i) {
u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
smrs[i].mask << SMR_MASK_SHIFT;
writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
@@ -1172,9 +1310,11 @@ static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
cfg->smrs = smrs;
return 0;
-err_free_smrs:
+err_free_bitmap:
while (--i >= 0)
__arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
+ cfg->num_s2crs = 0;
+err_free_smrs:
kfree(smrs);
return -ENOSPC;
}
@@ -1190,13 +1330,15 @@ static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
return;
/* Invalidate the SMRs before freeing back to the allocator */
- for (i = 0; i < cfg->num_streamids; ++i) {
+ for (i = 0; i < cfg->num_s2crs; ++i) {
u8 idx = smrs[i].idx;
writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
__arm_smmu_free_bitmap(smmu->smr_map, idx);
}
+ cfg->num_s2crs = 0;
+
cfg->smrs = NULL;
kfree(smrs);
}
@@ -1213,12 +1355,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
if (ret)
return ret == -EEXIST ? 0 : ret;
- for (i = 0; i < cfg->num_streamids; ++i) {
+ if (!cfg->num_s2crs)
+ cfg->num_s2crs = cfg->num_streamids;
+ for (i = 0; i < cfg->num_s2crs; ++i) {
u32 idx, s2cr;
idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
s2cr = S2CR_TYPE_TRANS |
(smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
+ dev_dbg(smmu->dev, "S2CR%d: 0x%x\n", idx, s2cr);
writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
}
@@ -1890,6 +2035,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
mask, sid);
return -ENODEV;
}
+ smmu->smr_mask_mask = mask;
+ smmu->smr_id_mask = sid;
dev_notice(smmu->dev,
"\tstream matching with %u register groups, mask 0x%x",