diff mbox series

[RFC,2/4] irqchip, gicv3-its:Workaround for HiSilicon erratum 161010801

Message ID 58875B0D.4080303@huawei.com
State New
Headers show
Series None | expand

Commit Message

Shameerali Kolothum Thodi Jan. 24, 2017, 1:47 p.m. UTC
The HiSilicon erratum 161010801 describes the limitation of certain
HiSilicon platforms to support the SMMU mappings for MSI transactions.

On these platforms GICv3 ITS translator is presented with the deviceID
by extending the MSI payload data to 64 bits to include the deviceID.
Hence, the PCIe controller on this platforms has to differentiate the
MSI payload against other DMA payload and has to modify the MSI payload.
This basically makes it difficult for this platforms to have a SMMU
translation for MSI. Also these platforms doesn't have a proper IIDR
register to use the existing IIDR based quirk mechanism.

This workaround based on the devicetree binding property, supports
bypassing the SMMU for the MSI transactions on this platforms.

Signed-off-by: shameer <shameerali.kolothum.thodi@huawei.com>

---
 arch/arm64/Kconfig               | 15 ++++++++++++
 drivers/irqchip/irq-gic-common.h |  1 +
 drivers/irqchip/irq-gic-v3-its.c | 52 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 1 deletion(-)

-- 
1.9.1


.

Comments

Robin Murphy Jan. 24, 2017, 4:30 p.m. UTC | #1
On 24/01/17 16:14, Shameerali Kolothum Thodi wrote:
> 

> 

>> -----Original Message-----

>> From: Marc Zyngier [mailto:marc.zyngier@arm.com]

>> Sent: Tuesday, January 24, 2017 3:50 PM

>> To: Shameerali Kolothum Thodi; Robin Murphy; mark.rutland@arm.com;

>> will.deacon@arm.com; eric.auger@redhat.com

>> Cc: linux-arm-kernel@lists.infradead.org; Linuxarm; linux-

>> kernel@vger.kernel.org; devicetree@vger.kernel.org; John Garry;

>> Guohanjun (Hanjun Guo)

>> Subject: Re: [RFC 2/4] irqchip, gicv3-its:Workaround for HiSilicon

>> erratum 161010801

>>

>> On 24/01/17 15:39, Shameerali Kolothum Thodi wrote:

>>> +Eric

>>>

>>>> -----Original Message-----

>>>> From: Robin Murphy [mailto:robin.murphy@arm.com]

>>>> Sent: Tuesday, January 24, 2017 2:42 PM

>>>> To: Marc Zyngier; Shameerali Kolothum Thodi; mark.rutland@arm.com;

>>>> will.deacon@arm.com

>>>> Cc: linux-arm-kernel@lists.infradead.org; Linuxarm; linux-

>>>> kernel@vger.kernel.org; devicetree@vger.kernel.org; John Garry;

>>>> Guohanjun (Hanjun Guo)

>>>> Subject: Re: [RFC 2/4] irqchip, gicv3-its:Workaround for HiSilicon

>>>> erratum 161010801

>>>>

>>>> On 24/01/17 14:11, Marc Zyngier wrote:

>>>>> + Robin,

>>>>>

>>>>> On 24/01/17 13:47, Shameerali Kolothum Thodi wrote:

>>>>>> The HiSilicon erratum 161010801 describes the limitation of

>> certain

>>>>>> HiSilicon platforms to support the SMMU mappings for MSI

>>>> transactions.

>>>>>>

>>>>>> On these platforms GICv3 ITS translator is presented with the

>>>> deviceID

>>>>>> by extending the MSI payload data to 64 bits to include the

>>>> deviceID.

>>>>>> Hence, the PCIe controller on this platforms has to differentiate

>>>> the

>>>>>> MSI payload against other DMA payload and has to modify the MSI

>>>> payload.

>>>>>> This basically makes it difficult for this platforms to have a

>> SMMU

>>>>>> translation for MSI. Also these platforms doesn't have a proper

>>>>>> IIDR register to use the existing IIDR based quirk mechanism.

>>>>>>

>>>>>> This workaround based on the devicetree binding property, supports

>>>>>> bypassing the SMMU for the MSI transactions on this platforms.

>>>>>>

>>>>>> Signed-off-by: shameer <shameerali.kolothum.thodi@huawei.com>

>>>>>> ---

>>>>>>  arch/arm64/Kconfig               | 15 ++++++++++++

>>>>>>  drivers/irqchip/irq-gic-common.h |  1 +

>>>>>> drivers/irqchip/irq-gic-v3-its.c | 52

>>>> +++++++++++++++++++++++++++++++++++++++-

>>>>>>  3 files changed, 67 insertions(+), 1 deletion(-)

>>>>>>

>>>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index

>>>>>> 0ae0427..8d600b0 100644

>>>>>> --- a/arch/arm64/Kconfig

>>>>>> +++ b/arch/arm64/Kconfig

>>>>>> @@ -485,6 +485,21 @@ config CAVIUM_ERRATUM_27456

>>>>>>

>>>>>>  	  If unsure, say Y.

>>>>>>

>>>>>> +config HISILICON_ERRATUM_161010801

>>>>>> +	bool "HiSilicon erratum 161010801"

>>>>>> +	default y

>>>>>> +	help

>>>>>> +	  Enable workaround for erratum 161010801.

>>>>>> +

>>>>>> +	  This implements a gicv3-its errata workaround for

>> HiSilicon

>>>>>> +	  platforms Hip05/Hip07. These platforms cannot support the

>> MSI

>>>>>> +	  interrupt remapping and MSI transaction has to be

>> bypassed by

>>>> SMMU.

>>>>>> +

>>>>>> +	  The fix is to avoid calling the remapping hook into the

>> SMMU

>>>>>> +	  driver from the its_irq_compose_msi_msg().

>>>>>> +

>>>>>> +	  If unsure, say Y.

>>>>>> +

>>>>>>  endmenu

>>>>>>

>>>>>>

>>>>>> diff --git a/drivers/irqchip/irq-gic-common.h

>>>>>> b/drivers/irqchip/irq-

>>>> gic-common.h

>>>>>> index 205e5fd..de0385a 100644

>>>>>> --- a/drivers/irqchip/irq-gic-common.h

>>>>>> +++ b/drivers/irqchip/irq-gic-common.h

>>>>>> @@ -26,6 +26,7 @@ struct gic_quirk {

>>>>>>  	void (*init)(void *data);

>>>>>>  	u32 iidr;

>>>>>>  	u32 mask;

>>>>>> +	const char *erratum;

>>>>>>  };

>>>>>>

>>>>>>  int gic_configure_irq(unsigned int irq, unsigned int type, diff

>>>>>> --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-

>>>> gic-v3-its.c

>>>>>> index f471939..0a326f6 100644

>>>>>> --- a/drivers/irqchip/irq-gic-v3-its.c

>>>>>> +++ b/drivers/irqchip/irq-gic-v3-its.c

>>>>>> @@ -44,6 +44,7 @@

>>>>>>  #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING		(1ULL << 0)

>>>>>>  #define ITS_FLAGS_WORKAROUND_CAVIUM_22375	(1ULL << 1)

>>>>>>  #define ITS_FLAGS_WORKAROUND_CAVIUM_23144	(1ULL << 2)

>>>>>> +#define ITS_FLAGS_WORKAROUND_HISILICON_161010801	(1ULL << 3)

>>>>>>

>>>>>>  #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING	(1 << 0)

>>>>>>

>>>>>> @@ -659,7 +660,8 @@ static void its_irq_compose_msi_msg(struct

>>>> irq_data *d, struct msi_msg *msg)

>>>>>>  	msg->address_hi		= upper_32_bits(addr);

>>>>>>  	msg->data		= its_get_event_id(d);

>>>>>>

>>>>>> -	iommu_dma_map_msi_msg(d->irq, msg);

>>>>>> +	if (!(its->flags &

>> ITS_FLAGS_WORKAROUND_HISILICON_161010801))

>>>>>> +		iommu_dma_map_msi_msg(d->irq, msg);

>>>>>

>>>>> Let's contemplate this for a moment. If we're on the affected ITS,

>>>> we're

>>>>> using the physical address of the GITS_TRANSLATER register. What

>>>>> guarantees that this is not going to conflict with an IOVA that DMA

>>>> is

>>>>> going to use? From looking at these patches, my feeling is "not

>>>> much".

>>>>>

>>>>> So if I'm right, you're opening the door to some interesting memory

>>>>> corruption if the two regions ever intersect.

>>>>>

>>>>> Robin, what do you think?

>>>>

>>>> Yup. Unless the ITS physical address is actually reserved from the

>>>> IOVA domain, it's still free to be allocated for DMA mappings, and

>> if

>>>> that ever happens then you'll get odd bits of data landing in the

>> ITS

>>>> instead of RAM, and maybe even locked-up devices or worse if the

>>>> doorbell gives back decode errors on read attempts. It's essentially

>>>> the exact same problem as we have with memory-mapped PCI windows,

>> and

>>>> needs to be solved in the same fashion, i.e. between the SMMU and

>> the

>>>> IOMMU-DMA code.

>>>

>>> Is this something that can incorporated in Eric's latest patch

>> series[1]?

>>> It does mentions reserved regions can be:

>>> - directly mapped regions

>>> - regions that cannot be iommu mapped (PCI host bridge windows, ...)

>>> - MSI regions (because they belong to another address space or

>> because

>>>   they are not translated by the IOMMU and need special handling)

>>>

>>> Though I am not clear our case comes under "the MSI regions that are

>>> not translated by the IOMMU and need special handling" or not.

>>

>> Well, given that in your case, the IOMMU never sees the MSI write, it

>> definitely falls into the "not translated" category.

>>

>> As for handling it on top of Eric's series, that's probably the most

>> reasonable thing to do, which also means that none of this should

>> appear in the ITS driver. Robin seems to have an idea on how to

>> approach this.

> 

> Ok. Thanks for that Marc/Robin.

> 

> But I am not sure we can get away with ITS driver. Because current vfio

> patch series[1] treats GICV3 ITS as irq safe and is setting 

> IRQ_DOMAIN_FLAG_MSI_REMAP in ITS driver. But this is not the case with

> our ITS. 


As far as I understand, it *is* the case, and largely the reason we're
here in the first place. "MSI remapping" is an x86-specific term for the
part of their IOMMU which intercepts writes to the architectural MSI
region and can "remap" a given device's interrupts to different targets
on the CPU side - i.e. more or less the same thing the ITS does with the
device ID data which your implementation apparently needs this
address-matching widget to append to the write. If your ITS couldn't
actually distinguish between requesters (like a GICv2m) we'd have a
*much* bigger problem.

Robin.

> 

> Thanks,

> Shameer

> 

> [1] https://github.com/eauger/linux/commit/df2a17bd8cc73b5a381ed1570870a2aeb6f7e068

> 

>
Robin Murphy Jan. 24, 2017, 4:42 p.m. UTC | #2
On 24/01/17 16:29, Marc Zyngier wrote:
> On 24/01/17 16:14, Shameerali Kolothum Thodi wrote:

>>>>>> Let's contemplate this for a moment. If we're on the affected ITS,

>>>>> we're

>>>>>> using the physical address of the GITS_TRANSLATER register. What

>>>>>> guarantees that this is not going to conflict with an IOVA that DMA

>>>>> is

>>>>>> going to use? From looking at these patches, my feeling is "not

>>>>> much".

>>>>>>

>>>>>> So if I'm right, you're opening the door to some interesting memory

>>>>>> corruption if the two regions ever intersect.

>>>>>>

>>>>>> Robin, what do you think?

>>>>>

>>>>> Yup. Unless the ITS physical address is actually reserved from the

>>>>> IOVA domain, it's still free to be allocated for DMA mappings, and

>>> if

>>>>> that ever happens then you'll get odd bits of data landing in the

>>> ITS

>>>>> instead of RAM, and maybe even locked-up devices or worse if the

>>>>> doorbell gives back decode errors on read attempts. It's essentially

>>>>> the exact same problem as we have with memory-mapped PCI windows,

>>> and

>>>>> needs to be solved in the same fashion, i.e. between the SMMU and

>>> the

>>>>> IOMMU-DMA code.

>>>>

>>>> Is this something that can incorporated in Eric's latest patch

>>> series[1]?

>>>> It does mentions reserved regions can be:

>>>> - directly mapped regions

>>>> - regions that cannot be iommu mapped (PCI host bridge windows, ...)

>>>> - MSI regions (because they belong to another address space or

>>> because

>>>>   they are not translated by the IOMMU and need special handling)

>>>>

>>>> Though I am not clear our case comes under "the MSI regions that are

>>>> not translated by the IOMMU and need special handling" or not.

>>>

>>> Well, given that in your case, the IOMMU never sees the MSI write, it

>>> definitely falls into the "not translated" category.

>>>

>>> As for handling it on top of Eric's series, that's probably the most

>>> reasonable thing to do, which also means that none of this should

>>> appear in the ITS driver. Robin seems to have an idea on how to

>>> approach this.

>>

>> Ok. Thanks for that Marc/Robin.

>>

>> But I am not sure we can get away with ITS driver. Because current vfio

>> patch series[1] treats GICV3 ITS as irq safe and is setting 

>> IRQ_DOMAIN_FLAG_MSI_REMAP in ITS driver. But this is not the case with

>> our ITS. 

> 

> The ITS itself is perfectly safe, as it does perform device isolation

> just fine (at least as far as I can tell from this bug description).

> 

> There is two things we need to take care of:

> - When the device is used on the host, the hardwired MSI region must be

> excluded from the DMA IOVA allocator, and the iommu_dma_map_msi_msg()

> call becomes a NOP.

> - When the device is assigned to VFIO, the MSI region must be exposed to

> userspace through /sys so that it knows that the guest RAM cannot alias

> with this region (or face the corruption we've talked about above).

> 

> None of that actually involves the ITS. Eric's stuff has some of the

> initial infrastructure, but there is of course more to it. I'll let

> Robin chime in and correct me if I've missed something (very likely).


That's pretty much it. Once Eric's patches for the iommu_resv_regions
interface have been merged, I'm planning to convert IOMMU-DMA over to
using those instead of the internal PCI-specific hook it currently has.
Then we would simply need the SMMU driver to expose this hardwired MSI
region where necessary so that IOMMU-DMA can directly insert 1:1
iommu_dma_msi_page entries to cover it up-front in iommu_dma_init_domain().

Robin.

> 

> Thanks,

> 

> 	M.

>
Shameerali Kolothum Thodi Jan. 25, 2017, 10:30 a.m. UTC | #3
> -----Original Message-----

> From: Mark Rutland [mailto:mark.rutland@arm.com]

> Sent: Tuesday, January 24, 2017 2:15 PM

> To: Shameerali Kolothum Thodi

> Cc: marc.zyngier@arm.com; will.deacon@arm.com; linux-arm-

> kernel@lists.infradead.org; Linuxarm; linux-kernel@vger.kernel.org;

> devicetree@vger.kernel.org; John Garry; Guohanjun (Hanjun Guo);

> robin.murphy@arm.com

> Subject: Re: [RFC 2/4] irqchip, gicv3-its:Workaround for HiSilicon

> erratum 161010801

sounds like this will have severe implications for virtualization.
> 

> > Also these platforms doesn't have a proper IIDR

> > register to use the existing IIDR based quirk mechanism.

> 

> What exactly is wrong with the IIDR on these platforms? That sounds

> like

> an erratum as of itself.

> 

> What precise value do reads of the IIDR return? Or do reads result in

> other erroneous behaviour?


As far as I know, there is no erroneous behavior while reading.  But the
IIDR JEP106 identity code is 0 on these platforms, hence not much of use.

This will be corrected in next revision of hw.

Thanks,
Shameer
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0ae0427..8d600b0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -485,6 +485,21 @@  config CAVIUM_ERRATUM_27456

 	  If unsure, say Y.

+config HISILICON_ERRATUM_161010801
+	bool "HiSilicon erratum 161010801"
+	default y
+	help
+	  Enable workaround for erratum 161010801.
+
+	  This implements a gicv3-its errata workaround for HiSilicon
+	  platforms Hip05/Hip07. These platforms cannot support the MSI
+	  interrupt remapping and MSI transaction has to be bypassed by SMMU.
+
+	  The fix is to avoid calling the remapping hook into the SMMU
+	  driver from the its_irq_compose_msi_msg().
+
+	  If unsure, say Y.
+
 endmenu


diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 205e5fd..de0385a 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -26,6 +26,7 @@  struct gic_quirk {
 	void (*init)(void *data);
 	u32 iidr;
 	u32 mask;
+	const char *erratum;
 };

 int gic_configure_irq(unsigned int irq, unsigned int type,
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index f471939..0a326f6 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -44,6 +44,7 @@ 
 #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING		(1ULL << 0)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_22375	(1ULL << 1)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_23144	(1ULL << 2)
+#define ITS_FLAGS_WORKAROUND_HISILICON_161010801	(1ULL << 3)

 #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING	(1 << 0)

@@ -659,7 +660,8 @@  static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
 	msg->address_hi		= upper_32_bits(addr);
 	msg->data		= its_get_event_id(d);

-	iommu_dma_map_msi_msg(d->irq, msg);
+	if (!(its->flags & ITS_FLAGS_WORKAROUND_HISILICON_161010801))
+		iommu_dma_map_msi_msg(d->irq, msg);
 }

 static struct irq_chip its_irq_chip = {
@@ -1596,6 +1598,13 @@  static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 	its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }

+static void __maybe_unused its_enable_quirk_hisilicon_161010801(void *data)
+{
+	struct its_node *its = data;
+
+	its->flags |= ITS_FLAGS_WORKAROUND_HISILICON_161010801;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
 	{
@@ -1613,15 +1622,54 @@  static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 		.init	= its_enable_quirk_cavium_23144,
 	},
 #endif
+#ifdef CONFIG_HISILICON_ERRATUM_161010801
+	{
+		.desc	 = "ITS: HISILICON erratum 161010801",
+		.iidr	 = 0xffffffff,	/*invalid, use erratum instead*/
+		.mask	 = 0xffffffff,
+		.erratum = "hisilicon,erratum-161010801",
+		.init	 = its_enable_quirk_hisilicon_161010801,
+	},
+#endif
 	{
 	}
 };

+const struct gic_quirk  *erratum_workarounds[ARRAY_SIZE(its_quirks)] = {};
+
+static void its_enable_erratums(struct its_node *its)
+{
+	int i = 0;
+	const struct gic_quirk  *workarounds;
+
+	while ((workarounds = erratum_workarounds[i])) {
+		workarounds->init(its);
+		pr_info("GIC: enabling workaround for %s\n", workarounds->desc);
+		erratum_workarounds[i++] = NULL;
+	}
+
+}
+
 static void its_enable_quirks(struct its_node *its)
 {
 	u32 iidr = readl_relaxed(its->base + GITS_IIDR);

 	gic_enable_quirks(iidr, its_quirks, its);
+
+	its_enable_erratums(its);
+}
+
+static void of_its_enable_erratum(struct device_node *np)
+{
+	const struct gic_quirk *quirks = its_quirks;
+	int i = 0;
+
+	for (; quirks->desc; quirks++) {
+		const char *erratum = quirks->erratum;
+
+		if ((erratum) && (of_property_read_bool(np, erratum)))
+			erratum_workarounds[i++] = quirks;
+	}
 }

 static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
@@ -1801,6 +1849,8 @@  static int __init its_of_probe(struct device_node *node)
 			continue;
 		}

+		of_its_enable_erratum(np);
+
 		its_probe_one(&res, &np->fwnode, of_node_to_nid(np));
 	}
 	return 0;