[RFC] IOMMU: SMMUv2: Support for Extended Stream ID (16 bit)

Message ID 20170110115755.19102-1-aleksey.makarov@linaro.org
State New
Headers show

Commit Message

Aleksey Makarov Jan. 10, 2017, 11:57 a.m.
Enable the Extended Stream ID feature when available.

This patch on top of series "[PATCH v7 00/19] KVM PCIe/MSI passthrough
on ARM/ARM64 and IOVA reserved regions" by Eric Auger allows
to passthrough an external PCIe network card on a ThunderX server
successfully.

Without this patch that card caused a warning like

	pci 0006:90:00.0: stream ID 0x9000 out of range for SMMU (0x7fff)

during boot.

Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>

---
 drivers/iommu/arm-smmu.c | 53 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 16 deletions(-)

-- 
2.11.0

Comments

Robin Murphy Jan. 11, 2017, 2:15 p.m. | #1
On 10/01/17 11:57, Aleksey Makarov wrote:
> Enable the Extended Stream ID feature when available.

> 

> This patch on top of series "[PATCH v7 00/19] KVM PCIe/MSI passthrough

> on ARM/ARM64 and IOVA reserved regions" by Eric Auger allows

> to passthrough an external PCIe network card on a ThunderX server

> successfully.

> 

> Without this patch that card caused a warning like

> 

> 	pci 0006:90:00.0: stream ID 0x9000 out of range for SMMU (0x7fff)

> 

> during boot.

> 

> Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>

> ---

>  drivers/iommu/arm-smmu.c | 53 +++++++++++++++++++++++++++++++++---------------

>  1 file changed, 37 insertions(+), 16 deletions(-)

> 

> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c

> index 13d26009b8e0..d160c12828f4 100644

> --- a/drivers/iommu/arm-smmu.c

> +++ b/drivers/iommu/arm-smmu.c

> @@ -24,6 +24,7 @@

>   *	- v7/v8 long-descriptor format

>   *	- Non-secure access to the SMMU

>   *	- Context fault reporting

> + *	- Extended Stream ID (16 bit)

>   */

>  

>  #define pr_fmt(fmt) "arm-smmu: " fmt

> @@ -87,6 +88,7 @@

>  #define sCR0_CLIENTPD			(1 << 0)

>  #define sCR0_GFRE			(1 << 1)

>  #define sCR0_GFIE			(1 << 2)

> +#define sCR0_EXIDENABLE			(1 << 3)

>  #define sCR0_GCFGFRE			(1 << 4)

>  #define sCR0_GCFGFIE			(1 << 5)

>  #define sCR0_USFCFG			(1 << 10)

> @@ -126,6 +128,7 @@

>  #define ID0_NUMIRPT_MASK		0xff

>  #define ID0_NUMSIDB_SHIFT		9

>  #define ID0_NUMSIDB_MASK		0xf

> +#define ID0_EXIDS			(1 << 8)

>  #define ID0_NUMSMRG_SHIFT		0

>  #define ID0_NUMSMRG_MASK		0xff

>  

> @@ -169,6 +172,7 @@

>  #define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))

>  #define S2CR_CBNDX_SHIFT		0

>  #define S2CR_CBNDX_MASK			0xff

> +#define S2CR_EXIDVALID			(1 << 10)

>  #define S2CR_TYPE_SHIFT			16

>  #define S2CR_TYPE_MASK			0x3

>  enum arm_smmu_s2cr_type {

> @@ -354,6 +358,7 @@ struct arm_smmu_device {

>  #define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)

>  #define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)

>  #define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)

> +#define ARM_SMMU_FEAT_EXIDS		(1 << 12)

>  	u32				features;

>  

>  #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)

> @@ -1051,7 +1056,7 @@ static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)

>  	struct arm_smmu_smr *smr = smmu->smrs + idx;

>  	u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;

>  

> -	if (smr->valid)

> +	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)

>  		reg |= SMR_VALID;

>  	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));

>  }

> @@ -1063,6 +1068,9 @@ static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)

>  		  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |

>  		  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;

>  

> +	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&

> +	    smmu->smrs[idx].valid)

> +		reg |= S2CR_EXIDVALID;

>  	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));

>  }

>  

> @@ -1674,6 +1682,9 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)

>  	if (smmu->features & ARM_SMMU_FEAT_VMID16)

>  		reg |= sCR0_VMID16EN;

>  

> +	if (smmu->features & ARM_SMMU_FEAT_EXIDS)

> +		reg |= sCR0_EXIDENABLE;

> +

>  	/* Push the button */

>  	__arm_smmu_tlb_sync(smmu);

>  	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);

> @@ -1761,7 +1772,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

>  			   "\t(IDR0.CTTW overridden by FW configuration)\n");

>  

>  	/* Max. number of entries we have for stream matching/indexing */

> -	size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);

> +	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {

> +		smmu->features |= ARM_SMMU_FEAT_EXIDS;

> +		size = (1 << 16);


Unnecessary parentheses.

> +	} else {

> +		size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);

> +	}


Given what the architecture says about the relationship between EXIDS
and NUMSIDB, I suppose an even shorter version could be:

	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS)
		size *= 2;

but I'm not sure that's actually any nicer to read.

>  	smmu->streamid_mask = size - 1;

>  	if (id & ID0_SMS) {

>  		u32 smr;

> @@ -1774,20 +1790,25 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

>  			return -ENODEV;

>  		}

>  

> -		/*

> -		 * SMR.ID bits may not be preserved if the corresponding MASK

> -		 * bits are set, so check each one separately. We can reject

> -		 * masters later if they try to claim IDs outside these masks.

> -		 */

> -		smr = smmu->streamid_mask << SMR_ID_SHIFT;

> -		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

> -		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

> -		smmu->streamid_mask = smr >> SMR_ID_SHIFT;

> -

> -		smr = smmu->streamid_mask << SMR_MASK_SHIFT;

> -		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

> -		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

> -		smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;

> +		if (smmu->features & ARM_SMMU_FEAT_EXIDS) {

> +			smmu->smr_mask_mask = smmu->streamid_mask;

> +		} else {

> +			/*

> +			 * SMR.ID bits may not be preserved if the corresponding

> +			 * MASK bits are set, so check each one separately.

> +			 * We can reject masters later if they try to claim IDs

> +			 * outside these masks.

> +			 */

> +			smr = smmu->streamid_mask << SMR_ID_SHIFT;

> +			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

> +			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

> +			smmu->streamid_mask = smr >> SMR_ID_SHIFT;

> +

> +			smr = smmu->streamid_mask << SMR_MASK_SHIFT;

> +			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

> +			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

> +			smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;

> +		}


This hunk is quite possibly wrong. I don't see any guarantee in the
architecture that all EXMASK/EXID bits *must* be implemented, and even
so there's still no harm in the driver determining that experimentally.
It looks like we need a bit of refactoring such that we move the probing
of SMR fields to after counting and allocating the SME structures, then
in the EXIDS case we can explicitly clear the SMEs and poke EXIDENABLE
inbetween.

Robin.

>  

>  		/* Zero-initialised to mark as invalid */

>  		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),

>
Aleksey Makarov Jan. 16, 2017, 2:09 p.m. | #2
On 01/11/2017 05:15 PM, Robin Murphy wrote:
> On 10/01/17 11:57, Aleksey Makarov wrote:

>> Enable the Extended Stream ID feature when available.

>>

>> This patch on top of series "[PATCH v7 00/19] KVM PCIe/MSI passthrough

>> on ARM/ARM64 and IOVA reserved regions" by Eric Auger allows

>> to passthrough an external PCIe network card on a ThunderX server

>> successfully.

>>

>> Without this patch that card caused a warning like

>>

>> 	pci 0006:90:00.0: stream ID 0x9000 out of range for SMMU (0x7fff)

>>

>> during boot.

>>

>> Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>

>> ---

>>  drivers/iommu/arm-smmu.c | 53 +++++++++++++++++++++++++++++++++---------------

>>  1 file changed, 37 insertions(+), 16 deletions(-)

>>


[...]

>> @@ -1761,7 +1772,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

>>  			   "\t(IDR0.CTTW overridden by FW configuration)\n");

>>  

>>  	/* Max. number of entries we have for stream matching/indexing */

>> -	size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);

>> +	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {

>> +		smmu->features |= ARM_SMMU_FEAT_EXIDS;

>> +		size = (1 << 16);

> 

> Unnecessary parentheses.


Thank you

>> +	} else {

>> +		size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);

>> +	}

> 

> Given what the architecture says about the relationship between EXIDS

> and NUMSIDB, I suppose an even shorter version could be:

> 

> 	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS)

> 		size *= 2;

> 

> but I'm not sure that's actually any nicer to read.


I think it is not nicer: the one who reads this needs to know what is the value of NUMSIDB
in the case id & ID0_EXIDS == true; and also this makes the code depend on this, i. e.
on the correct implementation of hardware.

So I would like to leave it as is.  If you are not agree, I will change it.

>>  	smmu->streamid_mask = size - 1;

>>  	if (id & ID0_SMS) {

>>  		u32 smr;

>> @@ -1774,20 +1790,25 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

>>  			return -ENODEV;

>>  		}

>>  

>> -		/*

>> -		 * SMR.ID bits may not be preserved if the corresponding MASK

>> -		 * bits are set, so check each one separately. We can reject

>> -		 * masters later if they try to claim IDs outside these masks.

>> -		 */

>> -		smr = smmu->streamid_mask << SMR_ID_SHIFT;

>> -		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

>> -		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

>> -		smmu->streamid_mask = smr >> SMR_ID_SHIFT;

>> -

>> -		smr = smmu->streamid_mask << SMR_MASK_SHIFT;

>> -		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

>> -		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

>> -		smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;

>> +		if (smmu->features & ARM_SMMU_FEAT_EXIDS) {

>> +			smmu->smr_mask_mask = smmu->streamid_mask;

>> +		} else {

>> +			/*

>> +			 * SMR.ID bits may not be preserved if the corresponding

>> +			 * MASK bits are set, so check each one separately.

>> +			 * We can reject masters later if they try to claim IDs

>> +			 * outside these masks.

>> +			 */

>> +			smr = smmu->streamid_mask << SMR_ID_SHIFT;

>> +			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

>> +			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

>> +			smmu->streamid_mask = smr >> SMR_ID_SHIFT;

>> +

>> +			smr = smmu->streamid_mask << SMR_MASK_SHIFT;

>> +			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));

>> +			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));

>> +			smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;

>> +		}

> 

> This hunk is quite possibly wrong. I don't see any guarantee in the

> architecture that all EXMASK/EXID bits *must* be implemented, and even

> so there's still no harm in the driver determining that experimentally.

> It looks like we need a bit of refactoring such that we move the probing

> of SMR fields to after counting and allocating the SME structures, then

> in the EXIDS case we can explicitly clear the SMEs and poke EXIDENABLE

> inbetween.


I am not quite sure I understand where you are suggesting to poke EXIDENABLE.
I am going to send v2 of the patch, I'd appreciate if you would review that please.

Thank you for review
Aleksey Makarov

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 13d26009b8e0..d160c12828f4 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -24,6 +24,7 @@ 
  *	- v7/v8 long-descriptor format
  *	- Non-secure access to the SMMU
  *	- Context fault reporting
+ *	- Extended Stream ID (16 bit)
  */
 
 #define pr_fmt(fmt) "arm-smmu: " fmt
@@ -87,6 +88,7 @@ 
 #define sCR0_CLIENTPD			(1 << 0)
 #define sCR0_GFRE			(1 << 1)
 #define sCR0_GFIE			(1 << 2)
+#define sCR0_EXIDENABLE			(1 << 3)
 #define sCR0_GCFGFRE			(1 << 4)
 #define sCR0_GCFGFIE			(1 << 5)
 #define sCR0_USFCFG			(1 << 10)
@@ -126,6 +128,7 @@ 
 #define ID0_NUMIRPT_MASK		0xff
 #define ID0_NUMSIDB_SHIFT		9
 #define ID0_NUMSIDB_MASK		0xf
+#define ID0_EXIDS			(1 << 8)
 #define ID0_NUMSMRG_SHIFT		0
 #define ID0_NUMSMRG_MASK		0xff
 
@@ -169,6 +172,7 @@ 
 #define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))
 #define S2CR_CBNDX_SHIFT		0
 #define S2CR_CBNDX_MASK			0xff
+#define S2CR_EXIDVALID			(1 << 10)
 #define S2CR_TYPE_SHIFT			16
 #define S2CR_TYPE_MASK			0x3
 enum arm_smmu_s2cr_type {
@@ -354,6 +358,7 @@  struct arm_smmu_device {
 #define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)
 #define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)
 #define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)
+#define ARM_SMMU_FEAT_EXIDS		(1 << 12)
 	u32				features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1051,7 +1056,7 @@  static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
 	struct arm_smmu_smr *smr = smmu->smrs + idx;
 	u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
 
-	if (smr->valid)
+	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
 		reg |= SMR_VALID;
 	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
 }
@@ -1063,6 +1068,9 @@  static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
 		  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
 		  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
 
+	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
+	    smmu->smrs[idx].valid)
+		reg |= S2CR_EXIDVALID;
 	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
 }
 
@@ -1674,6 +1682,9 @@  static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
 		reg |= sCR0_VMID16EN;
 
+	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
+		reg |= sCR0_EXIDENABLE;
+
 	/* Push the button */
 	__arm_smmu_tlb_sync(smmu);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -1761,7 +1772,12 @@  static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
 
 	/* Max. number of entries we have for stream matching/indexing */
-	size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
+		smmu->features |= ARM_SMMU_FEAT_EXIDS;
+		size = (1 << 16);
+	} else {
+		size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+	}
 	smmu->streamid_mask = size - 1;
 	if (id & ID0_SMS) {
 		u32 smr;
@@ -1774,20 +1790,25 @@  static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 			return -ENODEV;
 		}
 
-		/*
-		 * SMR.ID bits may not be preserved if the corresponding MASK
-		 * bits are set, so check each one separately. We can reject
-		 * masters later if they try to claim IDs outside these masks.
-		 */
-		smr = smmu->streamid_mask << SMR_ID_SHIFT;
-		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
-		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
-		smmu->streamid_mask = smr >> SMR_ID_SHIFT;
-
-		smr = smmu->streamid_mask << SMR_MASK_SHIFT;
-		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
-		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
-		smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+		if (smmu->features & ARM_SMMU_FEAT_EXIDS) {
+			smmu->smr_mask_mask = smmu->streamid_mask;
+		} else {
+			/*
+			 * SMR.ID bits may not be preserved if the corresponding
+			 * MASK bits are set, so check each one separately.
+			 * We can reject masters later if they try to claim IDs
+			 * outside these masks.
+			 */
+			smr = smmu->streamid_mask << SMR_ID_SHIFT;
+			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+			smmu->streamid_mask = smr >> SMR_ID_SHIFT;
+
+			smr = smmu->streamid_mask << SMR_MASK_SHIFT;
+			writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+			smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+			smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+		}
 
 		/* Zero-initialised to mark as invalid */
 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),