[v9,5/7] vfio/type1: also check IRQ remapping capability at msi domain

Message ID 1462362858-2925-6-git-send-email-eric.auger@linaro.org
State New
Headers show

Commit Message

Auger Eric May 4, 2016, 11:54 a.m.
On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is abstracted
by the msi controller. vfio_safe_irq_domain allows to check whether
interrupts are "safe" for a given device. They are if the device does
not use MSI or if the device uses MSI and the msi-parent controller
supports IRQ remapping.

Then we check at group level if all devices have safe interrupts: if not,
we only allow the group to be attached if allow_unsafe_interrupts is set.

At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is
changed in next patch.

Signed-off-by: Eric Auger <eric.auger@linaro.org>


---
v3 -> v4:
- rename vfio_msi_parent_irq_remapping_capable into vfio_safe_irq_domain
  and irq_remapping into safe_irq_domains

v2 -> v3:
- protect vfio_msi_parent_irq_remapping_capable with
  CONFIG_GENERIC_MSI_IRQ_DOMAIN
---
 drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

-- 
1.9.1

Comments

Auger Eric May 9, 2016, 8:05 a.m. | #1
Hi Chalamarla,
On 05/05/2016 09:23 PM, Chalamarla, Tirumalesh wrote:
> 

> 

> 

> 

> 

> On 5/4/16, 4:54 AM, "linux-arm-kernel on behalf of Eric Auger" <linux-arm-kernel-bounces@lists.infradead.org on behalf of eric.auger@linaro.org> wrote:

> 

>> On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is abstracted

>> by the msi controller. vfio_safe_irq_domain allows to check whether

>> interrupts are "safe" for a given device. They are if the device does

>> not use MSI or if the device uses MSI and the msi-parent controller

>> supports IRQ remapping.

>>

>> Then we check at group level if all devices have safe interrupts: if not,

>> we only allow the group to be attached if allow_unsafe_interrupts is set.

>>

>> At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is

>> changed in next patch.

> 

> Will this work in systems with multiple ITS?

Yes it should support multiple ITS.

Please note however that the series does not yet implement the
msi_doorbell_info callback in GICv3 ITS PCIe/platform irqchip. Without
that, the pci_enable is going to fail. I am going to submit a separate
RFC for that but I can't test it at the moment.

Best Regards

Eric
>>

>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>

>> ---

>> v3 -> v4:

>> - rename vfio_msi_parent_irq_remapping_capable into vfio_safe_irq_domain

>>  and irq_remapping into safe_irq_domains

>>

>> v2 -> v3:

>> - protect vfio_msi_parent_irq_remapping_capable with

>>  CONFIG_GENERIC_MSI_IRQ_DOMAIN

>> ---

>> drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--

>> 1 file changed, 42 insertions(+), 2 deletions(-)

>>

>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c

>> index 4d3a6f1..2fc8197 100644

>> --- a/drivers/vfio/vfio_iommu_type1.c

>> +++ b/drivers/vfio/vfio_iommu_type1.c

>> @@ -37,6 +37,8 @@

>> #include <linux/vfio.h>

>> #include <linux/workqueue.h>

>> #include <linux/msi-iommu.h>

>> +#include <linux/irqdomain.h>

>> +#include <linux/msi.h>

>>

>> #define DRIVER_VERSION  "0.2"

>> #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"

>> @@ -777,6 +779,33 @@ static int vfio_bus_type(struct device *dev, void *data)

>> 	return 0;

>> }

>>

>> +/**

>> + * vfio_safe_irq_domain: returns whether the irq domain

>> + * the device is attached to is safe with respect to MSI isolation.

>> + * If the irq domain is not an MSI domain, we return it is safe.

>> + *

>> + * @dev: device handle

>> + * @data: unused

>> + * returns 0 if the irq domain is safe, -1 if not.

>> + */

>> +static int vfio_safe_irq_domain(struct device *dev, void *data)

>> +{

>> +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

>> +	struct irq_domain *domain;

>> +	struct msi_domain_info *info;

>> +

>> +	domain = dev_get_msi_domain(dev);

>> +	if (!domain)

>> +		return 0;

>> +

>> +	info = msi_get_domain_info(domain);

>> +

>> +	if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))

>> +		return -1;

>> +#endif

>> +	return 0;

>> +}

>> +

>> static int vfio_iommu_replay(struct vfio_iommu *iommu,

>> 			     struct vfio_domain *domain)

>> {

>> @@ -870,7 +899,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>> 	struct vfio_group *group, *g;

>> 	struct vfio_domain *domain, *d;

>> 	struct bus_type *bus = NULL;

>> -	int ret;

>> +	int ret, safe_irq_domains;

>>

>> 	mutex_lock(&iommu->lock);

>>

>> @@ -893,6 +922,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>>

>> 	group->iommu_group = iommu_group;

>>

>> +	/*

>> +	 * Determine if all the devices of the group have a safe irq domain

>> +	 * with respect to MSI isolation

>> +	 */

>> +	safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,

>> +				       vfio_safe_irq_domain);

>> +

>> 	/* Determine bus_type in order to allocate a domain */

>> 	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);

>> 	if (ret)

>> @@ -920,8 +956,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>> 	INIT_LIST_HEAD(&domain->group_list);

>> 	list_add(&group->next, &domain->group_list);

>>

>> +	/*

>> +	 * to advertise safe interrupts either the IOMMU or the MSI controllers

>> +	 * must support IRQ remapping/interrupt translation

>> +	 */

>> 	if (!allow_unsafe_interrupts &&

>> -	    !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {

>> +	    (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) && !safe_irq_domains)) {

>> 		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",

>> 		       __func__);

>> 		ret = -EPERM;

>> -- 

>> 1.9.1

>>

>>

>> _______________________________________________

>> linux-arm-kernel mailing list

>> linux-arm-kernel@lists.infradead.org

>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Auger Eric May 10, 2016, 4:10 p.m. | #2
Hi Alex,
On 05/10/2016 12:49 AM, Alex Williamson wrote:
> On Wed,  4 May 2016 11:54:16 +0000

> Eric Auger <eric.auger@linaro.org> wrote:

> 

>> On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is abstracted

>> by the msi controller. vfio_safe_irq_domain allows to check whether

>> interrupts are "safe" for a given device. They are if the device does

>> not use MSI

> 

> Are we sure we're not opening a security hole here?  An MSI is simply a

> DMA write, so really whether or not a device uses MSI is irrelevant.

> If it can generate a DMA to the MSI doorbell then we need to be

> protected and I think we pretty much need to assume that devices are

> DMA capable.  Do the MSI domain checks cover this?

Let me try to rephrase: we check the device is not attached to an MSI
controller (I think this is the semantic of dev_get_msi_domain(dev)).

If it is not, we don't have to care about MSI isolation: there will be
no IOMMU binding between the device and any MSI doorbell. If it is we
check the msi domain is backed by an MSI controller able to perform MSI
isolation.

So effectively "usage of MSIs" is improper - since it is decided after
the group attachment anyway  - and the commit message should rather
state "if the device is linked to an MSI controller" (dt msi-parent
notion I think).

Does it sound better?

Regards

Eric
> 

>> or if the device uses MSI and the msi-parent controller

>> supports IRQ remapping.

>>

>> Then we check at group level if all devices have safe interrupts: if not,

>> we only allow the group to be attached if allow_unsafe_interrupts is set.

>>

>> At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is

>> changed in next patch.

>>

>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>

>> ---

>> v3 -> v4:

>> - rename vfio_msi_parent_irq_remapping_capable into vfio_safe_irq_domain

>>   and irq_remapping into safe_irq_domains

>>

>> v2 -> v3:

>> - protect vfio_msi_parent_irq_remapping_capable with

>>   CONFIG_GENERIC_MSI_IRQ_DOMAIN

>> ---

>>  drivers/vfio/vfio_iommu_type1.c | 44 +++++++++++++++++++++++++++++++++++++++--

>>  1 file changed, 42 insertions(+), 2 deletions(-)

>>

>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c

>> index 4d3a6f1..2fc8197 100644

>> --- a/drivers/vfio/vfio_iommu_type1.c

>> +++ b/drivers/vfio/vfio_iommu_type1.c

>> @@ -37,6 +37,8 @@

>>  #include <linux/vfio.h>

>>  #include <linux/workqueue.h>

>>  #include <linux/msi-iommu.h>

>> +#include <linux/irqdomain.h>

>> +#include <linux/msi.h>

>>  

>>  #define DRIVER_VERSION  "0.2"

>>  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"

>> @@ -777,6 +779,33 @@ static int vfio_bus_type(struct device *dev, void *data)

>>  	return 0;

>>  }

>>  

>> +/**

>> + * vfio_safe_irq_domain: returns whether the irq domain

>> + * the device is attached to is safe with respect to MSI isolation.

>> + * If the irq domain is not an MSI domain, we return it is safe.

>> + *

>> + * @dev: device handle

>> + * @data: unused

>> + * returns 0 if the irq domain is safe, -1 if not.

>> + */

>> +static int vfio_safe_irq_domain(struct device *dev, void *data)

>> +{

>> +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

>> +	struct irq_domain *domain;

>> +	struct msi_domain_info *info;

>> +

>> +	domain = dev_get_msi_domain(dev);

>> +	if (!domain)

>> +		return 0;

>> +

>> +	info = msi_get_domain_info(domain);

>> +

>> +	if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))

>> +		return -1;

>> +#endif

>> +	return 0;

>> +}

>> +

>>  static int vfio_iommu_replay(struct vfio_iommu *iommu,

>>  			     struct vfio_domain *domain)

>>  {

>> @@ -870,7 +899,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>>  	struct vfio_group *group, *g;

>>  	struct vfio_domain *domain, *d;

>>  	struct bus_type *bus = NULL;

>> -	int ret;

>> +	int ret, safe_irq_domains;

>>  

>>  	mutex_lock(&iommu->lock);

>>  

>> @@ -893,6 +922,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>>  

>>  	group->iommu_group = iommu_group;

>>  

>> +	/*

>> +	 * Determine if all the devices of the group have a safe irq domain

>> +	 * with respect to MSI isolation

>> +	 */

>> +	safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,

>> +				       vfio_safe_irq_domain);

>> +

>>  	/* Determine bus_type in order to allocate a domain */

>>  	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);

>>  	if (ret)

>> @@ -920,8 +956,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,

>>  	INIT_LIST_HEAD(&domain->group_list);

>>  	list_add(&group->next, &domain->group_list);

>>  

>> +	/*

>> +	 * to advertise safe interrupts either the IOMMU or the MSI controllers

>> +	 * must support IRQ remapping/interrupt translation

>> +	 */

>>  	if (!allow_unsafe_interrupts &&

>> -	    !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {

>> +	    (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) && !safe_irq_domains)) {

>>  		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",

>>  		       __func__);

>>  		ret = -EPERM;

>
Auger Eric May 11, 2016, 8:38 a.m. | #3
Hi Robin, Alex,
On 05/10/2016 07:24 PM, Robin Murphy wrote:
> Hi Eric,

> 

> On 10/05/16 17:10, Eric Auger wrote:

>> Hi Alex,

>> On 05/10/2016 12:49 AM, Alex Williamson wrote:

>>> On Wed,  4 May 2016 11:54:16 +0000

>>> Eric Auger <eric.auger@linaro.org> wrote:

>>>

>>>> On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is

>>>> abstracted

>>>> by the msi controller. vfio_safe_irq_domain allows to check whether

>>>> interrupts are "safe" for a given device. They are if the device does

>>>> not use MSI

>>>

>>> Are we sure we're not opening a security hole here?  An MSI is simply a

>>> DMA write, so really whether or not a device uses MSI is irrelevant.

>>> If it can generate a DMA to the MSI doorbell then we need to be

>>> protected and I think we pretty much need to assume that devices are

>>> DMA capable.  Do the MSI domain checks cover this?

>> Let me try to rephrase: we check the device is not attached to an MSI

>> controller (I think this is the semantic of dev_get_msi_domain(dev)).

>>

>> If it is not, we don't have to care about MSI isolation: there will be

>> no IOMMU binding between the device and any MSI doorbell. If it is we

>> check the msi domain is backed by an MSI controller able to perform MSI

>> isolation.

>>

>> So effectively "usage of MSIs" is improper - since it is decided after

>> the group attachment anyway  - and the commit message should rather

>> state "if the device is linked to an MSI controller" (dt msi-parent

>> notion I think).

> 

> Hmm, I think Alex has a point here - on a GICv2m I can happily fire

> arbitrary MSIs from _a shell_ (using /dev/mem), and the CPUs definitely

> aren't in an MSI domain, so I don't think it's valid to assume that a

> device using only wired interrupts, therefore with no connection to any

> MSI controller, isn't still capable of maliciously spewing DMA all over

> any and every doorbell region in the system.


Sorry but I still don't get the point. For the device to reach the
doorbell there must be an IOMMU mapping.

- if the device is not attached to an MSI domain, there won't be any
doorbell iommu mapping built by this series, so no risk, right?

The device will be allowed to reach only memory iommu mapped by
userspace with VFIO DMA MAP standard API. Of course if the userspace can
mmap all the host PA that's a more general issue, right?

- If the device is attached to an MSI domain (msi-parent link), 2 cases:
1) the MSI controller advertises MSI isolation (ITS cases), no risk
2) the MSI controller does not advertise MSI isolation (GICv2m), there
is a security hole.
   a) by default we reject the device attachment
   b) if the userspace overrides the safe interrupt option he accepts
the security hole

What am I missing?

Best Regards

Eric
> 

> Robin.

> 

>> Does it sound better?

>>

>> Regards

>>

>> Eric

>>>

>>>> or if the device uses MSI and the msi-parent controller

>>>> supports IRQ remapping.

>>>>

>>>> Then we check at group level if all devices have safe interrupts: if

>>>> not,

>>>> we only allow the group to be attached if allow_unsafe_interrupts is

>>>> set.

>>>>

>>>> At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is

>>>> changed in next patch.

>>>>

>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>>>

>>>> ---

>>>> v3 -> v4:

>>>> - rename vfio_msi_parent_irq_remapping_capable into

>>>> vfio_safe_irq_domain

>>>>    and irq_remapping into safe_irq_domains

>>>>

>>>> v2 -> v3:

>>>> - protect vfio_msi_parent_irq_remapping_capable with

>>>>    CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>> ---

>>>>   drivers/vfio/vfio_iommu_type1.c | 44

>>>> +++++++++++++++++++++++++++++++++++++++--

>>>>   1 file changed, 42 insertions(+), 2 deletions(-)

>>>>

>>>> diff --git a/drivers/vfio/vfio_iommu_type1.c

>>>> b/drivers/vfio/vfio_iommu_type1.c

>>>> index 4d3a6f1..2fc8197 100644

>>>> --- a/drivers/vfio/vfio_iommu_type1.c

>>>> +++ b/drivers/vfio/vfio_iommu_type1.c

>>>> @@ -37,6 +37,8 @@

>>>>   #include <linux/vfio.h>

>>>>   #include <linux/workqueue.h>

>>>>   #include <linux/msi-iommu.h>

>>>> +#include <linux/irqdomain.h>

>>>> +#include <linux/msi.h>

>>>>

>>>>   #define DRIVER_VERSION  "0.2"

>>>>   #define DRIVER_AUTHOR   "Alex Williamson

>>>> <alex.williamson@redhat.com>"

>>>> @@ -777,6 +779,33 @@ static int vfio_bus_type(struct device *dev,

>>>> void *data)

>>>>       return 0;

>>>>   }

>>>>

>>>> +/**

>>>> + * vfio_safe_irq_domain: returns whether the irq domain

>>>> + * the device is attached to is safe with respect to MSI isolation.

>>>> + * If the irq domain is not an MSI domain, we return it is safe.

>>>> + *

>>>> + * @dev: device handle

>>>> + * @data: unused

>>>> + * returns 0 if the irq domain is safe, -1 if not.

>>>> + */

>>>> +static int vfio_safe_irq_domain(struct device *dev, void *data)

>>>> +{

>>>> +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>> +    struct irq_domain *domain;

>>>> +    struct msi_domain_info *info;

>>>> +

>>>> +    domain = dev_get_msi_domain(dev);

>>>> +    if (!domain)

>>>> +        return 0;

>>>> +

>>>> +    info = msi_get_domain_info(domain);

>>>> +

>>>> +    if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))

>>>> +        return -1;

>>>> +#endif

>>>> +    return 0;

>>>> +}

>>>> +

>>>>   static int vfio_iommu_replay(struct vfio_iommu *iommu,

>>>>                    struct vfio_domain *domain)

>>>>   {

>>>> @@ -870,7 +899,7 @@ static int vfio_iommu_type1_attach_group(void

>>>> *iommu_data,

>>>>       struct vfio_group *group, *g;

>>>>       struct vfio_domain *domain, *d;

>>>>       struct bus_type *bus = NULL;

>>>> -    int ret;

>>>> +    int ret, safe_irq_domains;

>>>>

>>>>       mutex_lock(&iommu->lock);

>>>>

>>>> @@ -893,6 +922,13 @@ static int vfio_iommu_type1_attach_group(void

>>>> *iommu_data,

>>>>

>>>>       group->iommu_group = iommu_group;

>>>>

>>>> +    /*

>>>> +     * Determine if all the devices of the group have a safe irq

>>>> domain

>>>> +     * with respect to MSI isolation

>>>> +     */

>>>> +    safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,

>>>> +                       vfio_safe_irq_domain);

>>>> +

>>>>       /* Determine bus_type in order to allocate a domain */

>>>>       ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);

>>>>       if (ret)

>>>> @@ -920,8 +956,12 @@ static int vfio_iommu_type1_attach_group(void

>>>> *iommu_data,

>>>>       INIT_LIST_HEAD(&domain->group_list);

>>>>       list_add(&group->next, &domain->group_list);

>>>>

>>>> +    /*

>>>> +     * to advertise safe interrupts either the IOMMU or the MSI

>>>> controllers

>>>> +     * must support IRQ remapping/interrupt translation

>>>> +     */

>>>>       if (!allow_unsafe_interrupts &&

>>>> -        !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {

>>>> +        (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) &&

>>>> !safe_irq_domains)) {

>>>>           pr_warn("%s: No interrupt remapping support.  Use the

>>>> module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU

>>>> support on this platform\n",

>>>>                  __func__);

>>>>           ret = -EPERM;

>>>

>>

>
Auger Eric May 11, 2016, 9:44 a.m. | #4
Hi Robin,
On 05/11/2016 11:31 AM, Robin Murphy wrote:
> On 11/05/16 09:38, Eric Auger wrote:

>> Hi Robin, Alex,

>> On 05/10/2016 07:24 PM, Robin Murphy wrote:

>>> Hi Eric,

>>>

>>> On 10/05/16 17:10, Eric Auger wrote:

>>>> Hi Alex,

>>>> On 05/10/2016 12:49 AM, Alex Williamson wrote:

>>>>> On Wed,  4 May 2016 11:54:16 +0000

>>>>> Eric Auger <eric.auger@linaro.org> wrote:

>>>>>

>>>>>> On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is

>>>>>> abstracted

>>>>>> by the msi controller. vfio_safe_irq_domain allows to check whether

>>>>>> interrupts are "safe" for a given device. They are if the device does

>>>>>> not use MSI

>>>>>

>>>>> Are we sure we're not opening a security hole here?  An MSI is

>>>>> simply a

>>>>> DMA write, so really whether or not a device uses MSI is irrelevant.

>>>>> If it can generate a DMA to the MSI doorbell then we need to be

>>>>> protected and I think we pretty much need to assume that devices are

>>>>> DMA capable.  Do the MSI domain checks cover this?

>>>> Let me try to rephrase: we check the device is not attached to an MSI

>>>> controller (I think this is the semantic of dev_get_msi_domain(dev)).

>>>>

>>>> If it is not, we don't have to care about MSI isolation: there will be

>>>> no IOMMU binding between the device and any MSI doorbell. If it is we

>>>> check the msi domain is backed by an MSI controller able to perform MSI

>>>> isolation.

>>>>

>>>> So effectively "usage of MSIs" is improper - since it is decided after

>>>> the group attachment anyway  - and the commit message should rather

>>>> state "if the device is linked to an MSI controller" (dt msi-parent

>>>> notion I think).

>>>

>>> Hmm, I think Alex has a point here - on a GICv2m I can happily fire

>>> arbitrary MSIs from _a shell_ (using /dev/mem), and the CPUs definitely

>>> aren't in an MSI domain, so I don't think it's valid to assume that a

>>> device using only wired interrupts, therefore with no connection to any

>>> MSI controller, isn't still capable of maliciously spewing DMA all over

>>> any and every doorbell region in the system.

>>

>> Sorry but I still don't get the point. For the device to reach the

>> doorbell there must be an IOMMU mapping.

> 

> Only when the doorbell is _downstream_ of IOMMU translation.

Yes but that's the root hypothesis with VFIO assignment, right?

Except if the no-iommu option is explicitly set by the userspace, there
must be an IOMMU downstream to the device that protects all the DMA
transactions.
> 

>> - if the device is not attached to an MSI domain, there won't be any

>> doorbell iommu mapping built by this series, so no risk, right?

>>

>> The device will be allowed to reach only memory iommu mapped by

>> userspace with VFIO DMA MAP standard API. Of course if the userspace can

>> mmap all the host PA that's a more general issue, right?

>>

>> - If the device is attached to an MSI domain (msi-parent link), 2 cases:

>> 1) the MSI controller advertises MSI isolation (ITS cases), no risk

>> 2) the MSI controller does not advertise MSI isolation (GICv2m), there

>> is a security hole.

>>     a) by default we reject the device attachment

>>     b) if the userspace overrides the safe interrupt option he accepts

>> the security hole

>>

>> What am I missing?

> 

> The x86-with-interrupt-remapping-disabled case. Currently, without

> unsafe_interrupts, everything is rejected - with this patch, we'll still

> reject anything with an MSI domain, but e.g. legacy PCI devices using

> INTx would be free to scribble all over the un-translated interrupt

> region willy-nilly.

Assuming we have an IOMMU, either we have translated transactions or
faults? So if legacy PCI devices try to write into a doorbell, there
must be faults because there is no IOMMU mapping for doorbells?

Best Regards

Eric
 That's the subtle, but significant, change in
> behaviour.

> 

> Robin.

> 

>> Best Regards

>>

>> Eric

>>>

>>> Robin.

>>>

>>>> Does it sound better?

>>>>

>>>> Regards

>>>>

>>>> Eric

>>>>>

>>>>>> or if the device uses MSI and the msi-parent controller

>>>>>> supports IRQ remapping.

>>>>>>

>>>>>> Then we check at group level if all devices have safe interrupts: if

>>>>>> not,

>>>>>> we only allow the group to be attached if allow_unsafe_interrupts is

>>>>>> set.

>>>>>>

>>>>>> At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP. This is

>>>>>> changed in next patch.

>>>>>>

>>>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>>>>>

>>>>>> ---

>>>>>> v3 -> v4:

>>>>>> - rename vfio_msi_parent_irq_remapping_capable into

>>>>>> vfio_safe_irq_domain

>>>>>>     and irq_remapping into safe_irq_domains

>>>>>>

>>>>>> v2 -> v3:

>>>>>> - protect vfio_msi_parent_irq_remapping_capable with

>>>>>>     CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>>>> ---

>>>>>>    drivers/vfio/vfio_iommu_type1.c | 44

>>>>>> +++++++++++++++++++++++++++++++++++++++--

>>>>>>    1 file changed, 42 insertions(+), 2 deletions(-)

>>>>>>

>>>>>> diff --git a/drivers/vfio/vfio_iommu_type1.c

>>>>>> b/drivers/vfio/vfio_iommu_type1.c

>>>>>> index 4d3a6f1..2fc8197 100644

>>>>>> --- a/drivers/vfio/vfio_iommu_type1.c

>>>>>> +++ b/drivers/vfio/vfio_iommu_type1.c

>>>>>> @@ -37,6 +37,8 @@

>>>>>>    #include <linux/vfio.h>

>>>>>>    #include <linux/workqueue.h>

>>>>>>    #include <linux/msi-iommu.h>

>>>>>> +#include <linux/irqdomain.h>

>>>>>> +#include <linux/msi.h>

>>>>>>

>>>>>>    #define DRIVER_VERSION  "0.2"

>>>>>>    #define DRIVER_AUTHOR   "Alex Williamson

>>>>>> <alex.williamson@redhat.com>"

>>>>>> @@ -777,6 +779,33 @@ static int vfio_bus_type(struct device *dev,

>>>>>> void *data)

>>>>>>        return 0;

>>>>>>    }

>>>>>>

>>>>>> +/**

>>>>>> + * vfio_safe_irq_domain: returns whether the irq domain

>>>>>> + * the device is attached to is safe with respect to MSI isolation.

>>>>>> + * If the irq domain is not an MSI domain, we return it is safe.

>>>>>> + *

>>>>>> + * @dev: device handle

>>>>>> + * @data: unused

>>>>>> + * returns 0 if the irq domain is safe, -1 if not.

>>>>>> + */

>>>>>> +static int vfio_safe_irq_domain(struct device *dev, void *data)

>>>>>> +{

>>>>>> +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>>>> +    struct irq_domain *domain;

>>>>>> +    struct msi_domain_info *info;

>>>>>> +

>>>>>> +    domain = dev_get_msi_domain(dev);

>>>>>> +    if (!domain)

>>>>>> +        return 0;

>>>>>> +

>>>>>> +    info = msi_get_domain_info(domain);

>>>>>> +

>>>>>> +    if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))

>>>>>> +        return -1;

>>>>>> +#endif

>>>>>> +    return 0;

>>>>>> +}

>>>>>> +

>>>>>>    static int vfio_iommu_replay(struct vfio_iommu *iommu,

>>>>>>                     struct vfio_domain *domain)

>>>>>>    {

>>>>>> @@ -870,7 +899,7 @@ static int vfio_iommu_type1_attach_group(void

>>>>>> *iommu_data,

>>>>>>        struct vfio_group *group, *g;

>>>>>>        struct vfio_domain *domain, *d;

>>>>>>        struct bus_type *bus = NULL;

>>>>>> -    int ret;

>>>>>> +    int ret, safe_irq_domains;

>>>>>>

>>>>>>        mutex_lock(&iommu->lock);

>>>>>>

>>>>>> @@ -893,6 +922,13 @@ static int vfio_iommu_type1_attach_group(void

>>>>>> *iommu_data,

>>>>>>

>>>>>>        group->iommu_group = iommu_group;

>>>>>>

>>>>>> +    /*

>>>>>> +     * Determine if all the devices of the group have a safe irq

>>>>>> domain

>>>>>> +     * with respect to MSI isolation

>>>>>> +     */

>>>>>> +    safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,

>>>>>> +                       vfio_safe_irq_domain);

>>>>>> +

>>>>>>        /* Determine bus_type in order to allocate a domain */

>>>>>>        ret = iommu_group_for_each_dev(iommu_group, &bus,

>>>>>> vfio_bus_type);

>>>>>>        if (ret)

>>>>>> @@ -920,8 +956,12 @@ static int vfio_iommu_type1_attach_group(void

>>>>>> *iommu_data,

>>>>>>        INIT_LIST_HEAD(&domain->group_list);

>>>>>>        list_add(&group->next, &domain->group_list);

>>>>>>

>>>>>> +    /*

>>>>>> +     * to advertise safe interrupts either the IOMMU or the MSI

>>>>>> controllers

>>>>>> +     * must support IRQ remapping/interrupt translation

>>>>>> +     */

>>>>>>        if (!allow_unsafe_interrupts &&

>>>>>> -        !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {

>>>>>> +        (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) &&

>>>>>> !safe_irq_domains)) {

>>>>>>            pr_warn("%s: No interrupt remapping support.  Use the

>>>>>> module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU

>>>>>> support on this platform\n",

>>>>>>                   __func__);

>>>>>>            ret = -EPERM;

>>>>>

>>>>

>>>

>>

>
Auger Eric May 11, 2016, 2:37 p.m. | #5
Hi Robin,
On 05/11/2016 03:48 PM, Robin Murphy wrote:
> On 11/05/16 10:44, Eric Auger wrote:

>> Hi Robin,

>> On 05/11/2016 11:31 AM, Robin Murphy wrote:

>>> On 11/05/16 09:38, Eric Auger wrote:

>>>> Hi Robin, Alex,

>>>> On 05/10/2016 07:24 PM, Robin Murphy wrote:

>>>>> Hi Eric,

>>>>>

>>>>> On 10/05/16 17:10, Eric Auger wrote:

>>>>>> Hi Alex,

>>>>>> On 05/10/2016 12:49 AM, Alex Williamson wrote:

>>>>>>> On Wed,  4 May 2016 11:54:16 +0000

>>>>>>> Eric Auger <eric.auger@linaro.org> wrote:

>>>>>>>

>>>>>>>> On x86 IRQ remapping is abstracted by the IOMMU. On ARM this is

>>>>>>>> abstracted

>>>>>>>> by the msi controller. vfio_safe_irq_domain allows to check whether

>>>>>>>> interrupts are "safe" for a given device. They are if the device

>>>>>>>> does

>>>>>>>> not use MSI

>>>>>>>

>>>>>>> Are we sure we're not opening a security hole here?  An MSI is

>>>>>>> simply a

>>>>>>> DMA write, so really whether or not a device uses MSI is irrelevant.

>>>>>>> If it can generate a DMA to the MSI doorbell then we need to be

>>>>>>> protected and I think we pretty much need to assume that devices are

>>>>>>> DMA capable.  Do the MSI domain checks cover this?

>>>>>> Let me try to rephrase: we check the device is not attached to an MSI

>>>>>> controller (I think this is the semantic of dev_get_msi_domain(dev)).

>>>>>>

>>>>>> If it is not, we don't have to care about MSI isolation: there

>>>>>> will be

>>>>>> no IOMMU binding between the device and any MSI doorbell. If it is we

>>>>>> check the msi domain is backed by an MSI controller able to

>>>>>> perform MSI

>>>>>> isolation.

>>>>>>

>>>>>> So effectively "usage of MSIs" is improper - since it is decided

>>>>>> after

>>>>>> the group attachment anyway  - and the commit message should rather

>>>>>> state "if the device is linked to an MSI controller" (dt msi-parent

>>>>>> notion I think).

>>>>>

>>>>> Hmm, I think Alex has a point here - on a GICv2m I can happily fire

>>>>> arbitrary MSIs from _a shell_ (using /dev/mem), and the CPUs

>>>>> definitely

>>>>> aren't in an MSI domain, so I don't think it's valid to assume that a

>>>>> device using only wired interrupts, therefore with no connection to

>>>>> any

>>>>> MSI controller, isn't still capable of maliciously spewing DMA all

>>>>> over

>>>>> any and every doorbell region in the system.

>>>>

>>>> Sorry but I still don't get the point. For the device to reach the

>>>> doorbell there must be an IOMMU mapping.

>>>

>>> Only when the doorbell is _downstream_ of IOMMU translation.

>> Yes but that's the root hypothesis with VFIO assignment, right?

>>

>> Except if the no-iommu option is explicitly set by the userspace, there

>> must be an IOMMU downstream to the device that protects all the DMA

>> transactions.

> 

> The IOMMU is downstream of the device, clearly, but the MSI machinery

> itself could be _in between_ the two - say, integrated into a PCIe root

> complex served by an external SMMU. The doorbell is carved out of the

> upstream address space so that a write targeting the appropriate address

> hits it directly and never even goes out to the SMMU - e.g.

> http://thread.gmane.org/gmane.linux.kernel.pci/47174/focus=47268


Ah OK, I now better understand the point. So in that case not all the
DMA accesses of the devices are properly protected by the sMMU. Doesn't
it look as a kind of PCIe Access Control Service issue here where
transactions are able to bypass the translation service (downstream to
the root complex)?

I clearly did not address that kind of topology in this series. In such
a case the MSIs may even not be mapped at all which was the primary
purpose of the series.

Do you have any clue about how to detect that situation? If we cannot
rely on any iommu service I don't find any other solution but to
recognize this situation and reject the assignment.

> 

>>>> - if the device is not attached to an MSI domain, there won't be any

>>>> doorbell iommu mapping built by this series, so no risk, right?

>>>>

>>>> The device will be allowed to reach only memory iommu mapped by

>>>> userspace with VFIO DMA MAP standard API. Of course if the userspace

>>>> can

>>>> mmap all the host PA that's a more general issue, right?

>>>>

>>>> - If the device is attached to an MSI domain (msi-parent link), 2

>>>> cases:

>>>> 1) the MSI controller advertises MSI isolation (ITS cases), no risk

>>>> 2) the MSI controller does not advertise MSI isolation (GICv2m), there

>>>> is a security hole.

>>>>      a) by default we reject the device attachment

>>>>      b) if the userspace overrides the safe interrupt option he accepts

>>>> the security hole

>>>>

>>>> What am I missing?

>>>

>>> The x86-with-interrupt-remapping-disabled case. Currently, without

>>> unsafe_interrupts, everything is rejected - with this patch, we'll still

>>> reject anything with an MSI domain, but e.g. legacy PCI devices using

>>> INTx would be free to scribble all over the un-translated interrupt

>>> region willy-nilly.

>> Assuming we have an IOMMU, either we have translated transactions or

>> faults? So if legacy PCI devices try to write into a doorbell, there

>> must be faults because there is no IOMMU mapping for doorbells?

> 

> To the best of my understanding, having been wading through the VT-d

> spec, the case on x86 is as above (OK, so strictly it's more "in

> parallel with" than "in front of") - writes outside the 0xFEExxxxx

> interrupt region go to the DMA remapping units, whereas writes within

> that region go to the interrupt remapping unit, and if that's disabled

> or not implemented, simply sail straight through to the APIC beyond.

that's my understanding too

Best Regards

Eric
> 

> Robin.

> 

>> Best Regards

>>

>> Eric

>>   That's the subtle, but significant, change in

>>> behaviour.

>>>

>>> Robin.

>>>

>>>> Best Regards

>>>>

>>>> Eric

>>>>>

>>>>> Robin.

>>>>>

>>>>>> Does it sound better?

>>>>>>

>>>>>> Regards

>>>>>>

>>>>>> Eric

>>>>>>>

>>>>>>>> or if the device uses MSI and the msi-parent controller

>>>>>>>> supports IRQ remapping.

>>>>>>>>

>>>>>>>> Then we check at group level if all devices have safe

>>>>>>>> interrupts: if

>>>>>>>> not,

>>>>>>>> we only allow the group to be attached if

>>>>>>>> allow_unsafe_interrupts is

>>>>>>>> set.

>>>>>>>>

>>>>>>>> At this point ARM sMMU still advertises IOMMU_CAP_INTR_REMAP.

>>>>>>>> This is

>>>>>>>> changed in next patch.

>>>>>>>>

>>>>>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>>>>>>>

>>>>>>>> ---

>>>>>>>> v3 -> v4:

>>>>>>>> - rename vfio_msi_parent_irq_remapping_capable into

>>>>>>>> vfio_safe_irq_domain

>>>>>>>>      and irq_remapping into safe_irq_domains

>>>>>>>>

>>>>>>>> v2 -> v3:

>>>>>>>> - protect vfio_msi_parent_irq_remapping_capable with

>>>>>>>>      CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>>>>>> ---

>>>>>>>>     drivers/vfio/vfio_iommu_type1.c | 44

>>>>>>>> +++++++++++++++++++++++++++++++++++++++--

>>>>>>>>     1 file changed, 42 insertions(+), 2 deletions(-)

>>>>>>>>

>>>>>>>> diff --git a/drivers/vfio/vfio_iommu_type1.c

>>>>>>>> b/drivers/vfio/vfio_iommu_type1.c

>>>>>>>> index 4d3a6f1..2fc8197 100644

>>>>>>>> --- a/drivers/vfio/vfio_iommu_type1.c

>>>>>>>> +++ b/drivers/vfio/vfio_iommu_type1.c

>>>>>>>> @@ -37,6 +37,8 @@

>>>>>>>>     #include <linux/vfio.h>

>>>>>>>>     #include <linux/workqueue.h>

>>>>>>>>     #include <linux/msi-iommu.h>

>>>>>>>> +#include <linux/irqdomain.h>

>>>>>>>> +#include <linux/msi.h>

>>>>>>>>

>>>>>>>>     #define DRIVER_VERSION  "0.2"

>>>>>>>>     #define DRIVER_AUTHOR   "Alex Williamson

>>>>>>>> <alex.williamson@redhat.com>"

>>>>>>>> @@ -777,6 +779,33 @@ static int vfio_bus_type(struct device *dev,

>>>>>>>> void *data)

>>>>>>>>         return 0;

>>>>>>>>     }

>>>>>>>>

>>>>>>>> +/**

>>>>>>>> + * vfio_safe_irq_domain: returns whether the irq domain

>>>>>>>> + * the device is attached to is safe with respect to MSI

>>>>>>>> isolation.

>>>>>>>> + * If the irq domain is not an MSI domain, we return it is safe.

>>>>>>>> + *

>>>>>>>> + * @dev: device handle

>>>>>>>> + * @data: unused

>>>>>>>> + * returns 0 if the irq domain is safe, -1 if not.

>>>>>>>> + */

>>>>>>>> +static int vfio_safe_irq_domain(struct device *dev, void *data)

>>>>>>>> +{

>>>>>>>> +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

>>>>>>>> +    struct irq_domain *domain;

>>>>>>>> +    struct msi_domain_info *info;

>>>>>>>> +

>>>>>>>> +    domain = dev_get_msi_domain(dev);

>>>>>>>> +    if (!domain)

>>>>>>>> +        return 0;

>>>>>>>> +

>>>>>>>> +    info = msi_get_domain_info(domain);

>>>>>>>> +

>>>>>>>> +    if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))

>>>>>>>> +        return -1;

>>>>>>>> +#endif

>>>>>>>> +    return 0;

>>>>>>>> +}

>>>>>>>> +

>>>>>>>>     static int vfio_iommu_replay(struct vfio_iommu *iommu,

>>>>>>>>                      struct vfio_domain *domain)

>>>>>>>>     {

>>>>>>>> @@ -870,7 +899,7 @@ static int vfio_iommu_type1_attach_group(void

>>>>>>>> *iommu_data,

>>>>>>>>         struct vfio_group *group, *g;

>>>>>>>>         struct vfio_domain *domain, *d;

>>>>>>>>         struct bus_type *bus = NULL;

>>>>>>>> -    int ret;

>>>>>>>> +    int ret, safe_irq_domains;

>>>>>>>>

>>>>>>>>         mutex_lock(&iommu->lock);

>>>>>>>>

>>>>>>>> @@ -893,6 +922,13 @@ static int vfio_iommu_type1_attach_group(void

>>>>>>>> *iommu_data,

>>>>>>>>

>>>>>>>>         group->iommu_group = iommu_group;

>>>>>>>>

>>>>>>>> +    /*

>>>>>>>> +     * Determine if all the devices of the group have a safe irq

>>>>>>>> domain

>>>>>>>> +     * with respect to MSI isolation

>>>>>>>> +     */

>>>>>>>> +    safe_irq_domains = !iommu_group_for_each_dev(iommu_group,

>>>>>>>> &bus,

>>>>>>>> +                       vfio_safe_irq_domain);

>>>>>>>> +

>>>>>>>>         /* Determine bus_type in order to allocate a domain */

>>>>>>>>         ret = iommu_group_for_each_dev(iommu_group, &bus,

>>>>>>>> vfio_bus_type);

>>>>>>>>         if (ret)

>>>>>>>> @@ -920,8 +956,12 @@ static int vfio_iommu_type1_attach_group(void

>>>>>>>> *iommu_data,

>>>>>>>>         INIT_LIST_HEAD(&domain->group_list);

>>>>>>>>         list_add(&group->next, &domain->group_list);

>>>>>>>>

>>>>>>>> +    /*

>>>>>>>> +     * to advertise safe interrupts either the IOMMU or the MSI

>>>>>>>> controllers

>>>>>>>> +     * must support IRQ remapping/interrupt translation

>>>>>>>> +     */

>>>>>>>>         if (!allow_unsafe_interrupts &&

>>>>>>>> -        !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {

>>>>>>>> +        (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) &&

>>>>>>>> !safe_irq_domains)) {

>>>>>>>>             pr_warn("%s: No interrupt remapping support.  Use the

>>>>>>>> module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU

>>>>>>>> support on this platform\n",

>>>>>>>>                    __func__);

>>>>>>>>             ret = -EPERM;

>>>>>>>

>>>>>>

>>>>>

>>>>

>>>

>>

>

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 4d3a6f1..2fc8197 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -37,6 +37,8 @@ 
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
 #include <linux/msi-iommu.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -777,6 +779,33 @@  static int vfio_bus_type(struct device *dev, void *data)
 	return 0;
 }
 
+/**
+ * vfio_safe_irq_domain: returns whether the irq domain
+ * the device is attached to is safe with respect to MSI isolation.
+ * If the irq domain is not an MSI domain, we return it is safe.
+ *
+ * @dev: device handle
+ * @data: unused
+ * returns 0 if the irq domain is safe, -1 if not.
+ */
+static int vfio_safe_irq_domain(struct device *dev, void *data)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+	struct irq_domain *domain;
+	struct msi_domain_info *info;
+
+	domain = dev_get_msi_domain(dev);
+	if (!domain)
+		return 0;
+
+	info = msi_get_domain_info(domain);
+
+	if (!(info->flags & MSI_FLAG_IRQ_REMAPPING))
+		return -1;
+#endif
+	return 0;
+}
+
 static int vfio_iommu_replay(struct vfio_iommu *iommu,
 			     struct vfio_domain *domain)
 {
@@ -870,7 +899,7 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 	struct vfio_group *group, *g;
 	struct vfio_domain *domain, *d;
 	struct bus_type *bus = NULL;
-	int ret;
+	int ret, safe_irq_domains;
 
 	mutex_lock(&iommu->lock);
 
@@ -893,6 +922,13 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	group->iommu_group = iommu_group;
 
+	/*
+	 * Determine if all the devices of the group have a safe irq domain
+	 * with respect to MSI isolation
+	 */
+	safe_irq_domains = !iommu_group_for_each_dev(iommu_group, &bus,
+				       vfio_safe_irq_domain);
+
 	/* Determine bus_type in order to allocate a domain */
 	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
 	if (ret)
@@ -920,8 +956,12 @@  static int vfio_iommu_type1_attach_group(void *iommu_data,
 	INIT_LIST_HEAD(&domain->group_list);
 	list_add(&group->next, &domain->group_list);
 
+	/*
+	 * to advertise safe interrupts either the IOMMU or the MSI controllers
+	 * must support IRQ remapping/interrupt translation
+	 */
 	if (!allow_unsafe_interrupts &&
-	    !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
+	    (!iommu_capable(bus, IOMMU_CAP_INTR_REMAP) && !safe_irq_domains)) {
 		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
 		       __func__);
 		ret = -EPERM;