diff mbox

[Xen-devel,v3,07/24] xen/arm: Introduce xen, passthrough property

Message ID 1421159133-31526-8-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall Jan. 13, 2015, 2:25 p.m. UTC
When a device is marked for passthrough (via the new property "xen,passthrough"),
dom0 must not access to the device (i.e not loading a driver), but should
be able to manage the MMIO/interrupt of the passthrough device.

The latter part will allow the toolstack to map MMIO/IRQ when a device
is pass through to a guest.

The property "xen,passthrough" will be translated as 'status="disabled"'
in the device tree to avoid DOM0 using the device. We assume that DOM0 is
able to cope with this property (already the case for Linux).

Rework the function map_device (renamed into handle_device) to:

* For a given device node:
    - Give permission to manage IRQ/MMIO for this device
    - Retrieve the IRQ configuration (i.e edge/level) from the device
    tree
* When the device is not marked for guest passthrough:
    - Assign the device to the guest if it's protected by an IOMMU
    - Map the IRQs and MMIOs regions to the guest

Signed-off-by: Julien Grall <julien.grall@linaro.org>

---
    Changes in v3:
        - This patch was formely "xen/arm: Follow-up to allow DOM0
        manage IRQ and MMIO". It has been split in 2 parts [1].
        - Update commit title and improve message
        - Remove spurious change

[1] https://patches.linaro.org/34669/
---
 docs/misc/arm/device-tree/passthrough.txt |   7 ++
 xen/arch/arm/device.c                     |   2 +-
 xen/arch/arm/domain_build.c               | 102 ++++++++++++++++++++++--------
 xen/common/device_tree.c                  |   6 ++
 xen/include/xen/device_tree.h             |  11 ++++
 5 files changed, 100 insertions(+), 28 deletions(-)
 create mode 100644 docs/misc/arm/device-tree/passthrough.txt

Comments

Julien Grall Jan. 28, 2015, 4:53 p.m. UTC | #1
On 28/01/15 16:36, Stefano Stabellini wrote:
> On Tue, 13 Jan 2015, Julien Grall wrote:
>> When a device is marked for passthrough (via the new property "xen,passthrough"),
>> dom0 must not access to the device (i.e not loading a driver), but should
>> be able to manage the MMIO/interrupt of the passthrough device.
>>
>> The latter part will allow the toolstack to map MMIO/IRQ when a device
>> is pass through to a guest.
>>
>> The property "xen,passthrough" will be translated as 'status="disabled"'
>> in the device tree to avoid DOM0 using the device. We assume that DOM0 is
>> able to cope with this property (already the case for Linux).
>>
>> Rework the function map_device (renamed into handle_device) to:
>>
>> * For a given device node:
>>     - Give permission to manage IRQ/MMIO for this device
>>     - Retrieve the IRQ configuration (i.e edge/level) from the device
>>     tree
>> * When the device is not marked for guest passthrough:
>>     - Assign the device to the guest if it's protected by an IOMMU
>>     - Map the IRQs and MMIOs regions to the guest
>>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>
>> ---
>>     Changes in v3:
>>         - This patch was formely "xen/arm: Follow-up to allow DOM0
>>         manage IRQ and MMIO". It has been split in 2 parts [1].
>>         - Update commit title and improve message
>>         - Remove spurious change
>>
>> [1] https://patches.linaro.org/34669/
>> ---
>>  docs/misc/arm/device-tree/passthrough.txt |   7 ++
>>  xen/arch/arm/device.c                     |   2 +-
>>  xen/arch/arm/domain_build.c               | 102 ++++++++++++++++++++++--------
>>  xen/common/device_tree.c                  |   6 ++
>>  xen/include/xen/device_tree.h             |  11 ++++
>>  5 files changed, 100 insertions(+), 28 deletions(-)
>>  create mode 100644 docs/misc/arm/device-tree/passthrough.txt
>>
>> diff --git a/docs/misc/arm/device-tree/passthrough.txt b/docs/misc/arm/device-tree/passthrough.txt
>> new file mode 100644
>> index 0000000..04645b3
>> --- /dev/null
>> +++ b/docs/misc/arm/device-tree/passthrough.txt
>> @@ -0,0 +1,7 @@
>> +Device passthrough
>> +===================
>> +
>> +Any device that will be passthrough to a guest should have a property
>> +"xen,passthrough" in their device tree node.
>> +
>> +The device won't be exposed to DOM0 and therefore no driver will be loaded.
>> diff --git a/xen/arch/arm/device.c b/xen/arch/arm/device.c
>> index 1993929..1a01793 100644
>> --- a/xen/arch/arm/device.c
>> +++ b/xen/arch/arm/device.c
>> @@ -30,7 +30,7 @@ int __init device_init(struct dt_device_node *dev, enum device_match type,
>>  
>>      ASSERT(dev != NULL);
>>  
>> -    if ( !dt_device_is_available(dev) )
>> +    if ( !dt_device_is_available(dev) || dt_device_for_passthrough(dev) )
>>          return  -ENODEV;
>>  
>>      for ( desc = _sdevice; desc != _edevice; desc++ )
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index f68755f..b48b5d0 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -402,7 +402,7 @@ static int write_properties(struct domain *d, struct kernel_info *kinfo,
>>                              const struct dt_device_node *node)
>>  {
>>      const char *bootargs = NULL;
>> -    const struct dt_property *prop;
>> +    const struct dt_property *prop, *status = NULL;
>>      int res = 0;
>>      int had_dom0_bootargs = 0;
>>  
>> @@ -457,6 +457,17 @@ static int write_properties(struct domain *d, struct kernel_info *kinfo,
>>              }
>>          }
>>  
>> +        /* Don't expose the property "xen,passthrough" to the guest */
>> +        if ( dt_property_name_is_equal(prop, "xen,passthrough") )
>> +            continue;
>> +
>> +        /* Remember and skip the status property as Xen may modify it later */
>> +        if ( dt_property_name_is_equal(prop, "status") )
>> +        {
>> +            status = prop;
>> +            continue;
>> +        }
>> +
>>          res = fdt_property(kinfo->fdt, prop->name, prop_data, prop_len);
>>  
>>          xfree(new_data);
>> @@ -465,6 +476,19 @@ static int write_properties(struct domain *d, struct kernel_info *kinfo,
>>              return res;
>>      }
>>  
>> +    /*
>> +     * Override the property "status" to disable the device when it's
>> +     * marked for passthrough.
>> +     */
>> +    if ( dt_device_for_passthrough(node) )
>> +        res = fdt_property_string(kinfo->fdt, "status", "disabled");
>> +    else if ( status )
>> +        res = fdt_property(kinfo->fdt, "status", status->value,
>> +                           status->length);
> 
> Why is the "else" needed? Wouldn't the status property for
> non-passtrough devices already be covered by the
> dt_for_each_property_node loop above?


Because the property "status" is skipped earlier in any case.


Regards,
Julien Grall Feb. 20, 2015, 5:01 p.m. UTC | #2
Hi Ian,

On 20/02/15 15:38, Ian Campbell wrote:
> On Tue, 2015-01-13 at 14:25 +0000, Julien Grall wrote:
>> When a device is marked for passthrough (via the new property "xen,passthrough"),
>> dom0 must not access to the device (i.e not loading a driver), but should
> 
> "must not access the device (i.e. not load a driver)"
> 
> perhaps "should still be able"
> 
>> be able to manage the MMIO/interrupt of the passthrough device.
>>
>> The latter part will allow the toolstack to map MMIO/IRQ when a device
>> is pass through to a guest.
>>
>> The property "xen,passthrough" will be translated as 'status="disabled"'
>> in the device tree to avoid DOM0 using the device. We assume that DOM0 is
>> able to cope with this property (already the case for Linux).
> 
> Is it true to say "already the case for Linux, and required by ePAPR"?
> Or are we relying on a Linux implementation detail here?

The ePAPR says:

"Indicates that the device is not presently operational, but it might
become operational in the future (for example, something is not plugged
in, or switched off).
Refer to the device binding for details on what disabled means for a
given device."

So I think we can say "required by ePAPR".

>> diff --git a/docs/misc/arm/device-tree/passthrough.txt b/docs/misc/arm/device-tree/passthrough.txt
>> new file mode 100644
>> index 0000000..04645b3
>> --- /dev/null
>> +++ b/docs/misc/arm/device-tree/passthrough.txt
>> @@ -0,0 +1,7 @@
>> +Device passthrough
>> +===================
>> +
>> +Any device that will be passthrough to a guest should have a property
>> +"xen,passthrough" in their device tree node.
>> +
>> +The device won't be exposed to DOM0 and therefore no driver will be loaded.
> 
> This (and the commit message to some extent) seem stricter than what I
> think is actually required here.
> 
> I understand that it is a very good idea for any sort of passthrough to
> prevent dom0 from messing with a device before it gets assigned to the
> eventual guest domain.
> 
> But AIUI it is not strictly speaking a hard requirement that dom0 does
> not do this and depending on the device it may be perfectly acceptable
> for dom0 to drive a device for a bit and then give it to a guest and
> then have it back again etc.

When the device has the property "xen,passthrough", Xen won't map the
MMIO and IRQ to DOM0. So it's an hard requirement.

We may want to be less strict if we decide to let DOM0 cope with
platform device passthrough (as it's done for PCI).

But this is more complicate than the current use case for this type of
passthrough.

Regards,
Julien Grall Feb. 20, 2015, 5:03 p.m. UTC | #3
On 20/02/15 15:42, Ian Campbell wrote:
> On Tue, 2015-01-13 at 14:25 +0000, Julien Grall wrote:
>> @@ -919,8 +943,14 @@ static int make_timer_node(const struct domain *d, void *fdt,
>>      return res;
>>  }
>>  
>> -/* Map the device in the domain */
>> -static int map_device(struct domain *d, struct dt_device_node *dev)
>> +/* For a given device node:
> 
> Strictly speaking should be:
>  /*
>   * For a given...
> 
> (I don't care all that much, but since I'm commenting elsewhere)

Hmmm right. I will change it.

>> @@ -947,7 +979,7 @@ static int map_device(struct domain *d, struct dt_device_node *dev)
>>          }
>>      }
>>  
>> -    /* Map IRQs */
>> +    /* Give permission and  map IRQs */
> 
> Another Nit: "  " -> " ".
> 
>> +        if ( need_mapping )
>> +        {
>> +            /*
>> +             * Checking the return of vgic_reserve_virq is not
>> +             * necessary. It should not fail except when we try to map
>> +             * twice the IRQ. This can happen if the IRQ is shared
> 
> "when we try to map the IRQ twice"
> 
> Other than those nits the code itself looks good, will ack once we've
> agreed on the bindings wording.

BTW, should we upstream the bindings to device tree git?

Thanks,
Julien Grall Feb. 23, 2015, 3:43 p.m. UTC | #4
Hi Ian,

On 23/02/15 15:12, Ian Campbell wrote:
> On Fri, 2015-02-20 at 17:01 +0000, Julien Grall wrote:
>>>> diff --git a/docs/misc/arm/device-tree/passthrough.txt b/docs/misc/arm/device-tree/passthrough.txt
>>>> new file mode 100644
>>>> index 0000000..04645b3
>>>> --- /dev/null
>>>> +++ b/docs/misc/arm/device-tree/passthrough.txt
>>>> @@ -0,0 +1,7 @@
>>>> +Device passthrough
>>>> +===================
>>>> +
>>>> +Any device that will be passthrough to a guest should have a property
>>>> +"xen,passthrough" in their device tree node.
>>>> +
>>>> +The device won't be exposed to DOM0 and therefore no driver will be loaded.
>>>
>>> This (and the commit message to some extent) seem stricter than what I
>>> think is actually required here.
>>>
>>> I understand that it is a very good idea for any sort of passthrough to
>>> prevent dom0 from messing with a device before it gets assigned to the
>>> eventual guest domain.
>>>
>>> But AIUI it is not strictly speaking a hard requirement that dom0 does
>>> not do this and depending on the device it may be perfectly acceptable
>>> for dom0 to drive a device for a bit and then give it to a guest and
>>> then have it back again etc.
>>
>> When the device has the property "xen,passthrough", Xen won't map the
>> MMIO and IRQ to DOM0. So it's an hard requirement.
> 
> That's not what I meant.
> 
> What I was getting at is that it's not 100% mandatory for a device you
> want to pass through to have the "xen,passthrough" property at all, but
> the wording of the binding implies otherwise..
> 
> There will be at least some devices in the world (even if only trivial
> ones) where you can pass them through even if dom0 has been touching
> them before. Or dom0 might choose to implement a reset helper for some
> device or other.

Hmmm right... I will update the binding.

Regards,
Julien Grall Feb. 23, 2015, 3:44 p.m. UTC | #5
On 23/02/15 15:15, Ian Campbell wrote:
> On Fri, 2015-02-20 at 17:03 +0000, Julien Grall wrote:
>> On 20/02/15 15:42, Ian Campbell wrote:
>>> On Tue, 2015-01-13 at 14:25 +0000, Julien Grall wrote:
>>>> @@ -919,8 +943,14 @@ static int make_timer_node(const struct domain *d, void *fdt,
>>>>      return res;
>>>>  }
>>>>  
>>>> -/* Map the device in the domain */
>>>> -static int map_device(struct domain *d, struct dt_device_node *dev)
>>>> +/* For a given device node:
>>>
>>> Strictly speaking should be:
>>>  /*
>>>   * For a given...
>>>
>>> (I don't care all that much, but since I'm commenting elsewhere)
>>
>> Hmmm right. I will change it.
> 
> FWIW I noticed this pattern a lot in this series.
> 
>>>> @@ -947,7 +979,7 @@ static int map_device(struct domain *d, struct dt_device_node *dev)
>>>>          }
>>>>      }
>>>>  
>>>> -    /* Map IRQs */
>>>> +    /* Give permission and  map IRQs */
>>>
>>> Another Nit: "  " -> " ".
>>>
>>>> +        if ( need_mapping )
>>>> +        {
>>>> +            /*
>>>> +             * Checking the return of vgic_reserve_virq is not
>>>> +             * necessary. It should not fail except when we try to map
>>>> +             * twice the IRQ. This can happen if the IRQ is shared
>>>
>>> "when we try to map the IRQ twice"
>>>
>>> Other than those nits the code itself looks good, will ack once we've
>>> agreed on the bindings wording.
>>
>> BTW, should we upstream the bindings to device tree git?
> 
> Arguably we should upstream all of our bindings (e.g.
> docs/misc/arm/device-tree/*, admittedly a single file right now) but
> doing just one/some seems worse than keeping them in tree.
> 
> IOW it should be all or nothing, and I have no problem with you deciding
> that nothing is easier for you here...

For now, I will stick for nothing :).

Regards,
diff mbox

Patch

diff --git a/docs/misc/arm/device-tree/passthrough.txt b/docs/misc/arm/device-tree/passthrough.txt
new file mode 100644
index 0000000..04645b3
--- /dev/null
+++ b/docs/misc/arm/device-tree/passthrough.txt
@@ -0,0 +1,7 @@ 
+Device passthrough
+===================
+
+Any device that will be passthrough to a guest should have a property
+"xen,passthrough" in their device tree node.
+
+The device won't be exposed to DOM0 and therefore no driver will be loaded.
diff --git a/xen/arch/arm/device.c b/xen/arch/arm/device.c
index 1993929..1a01793 100644
--- a/xen/arch/arm/device.c
+++ b/xen/arch/arm/device.c
@@ -30,7 +30,7 @@  int __init device_init(struct dt_device_node *dev, enum device_match type,
 
     ASSERT(dev != NULL);
 
-    if ( !dt_device_is_available(dev) )
+    if ( !dt_device_is_available(dev) || dt_device_for_passthrough(dev) )
         return  -ENODEV;
 
     for ( desc = _sdevice; desc != _edevice; desc++ )
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index f68755f..b48b5d0 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -402,7 +402,7 @@  static int write_properties(struct domain *d, struct kernel_info *kinfo,
                             const struct dt_device_node *node)
 {
     const char *bootargs = NULL;
-    const struct dt_property *prop;
+    const struct dt_property *prop, *status = NULL;
     int res = 0;
     int had_dom0_bootargs = 0;
 
@@ -457,6 +457,17 @@  static int write_properties(struct domain *d, struct kernel_info *kinfo,
             }
         }
 
+        /* Don't expose the property "xen,passthrough" to the guest */
+        if ( dt_property_name_is_equal(prop, "xen,passthrough") )
+            continue;
+
+        /* Remember and skip the status property as Xen may modify it later */
+        if ( dt_property_name_is_equal(prop, "status") )
+        {
+            status = prop;
+            continue;
+        }
+
         res = fdt_property(kinfo->fdt, prop->name, prop_data, prop_len);
 
         xfree(new_data);
@@ -465,6 +476,19 @@  static int write_properties(struct domain *d, struct kernel_info *kinfo,
             return res;
     }
 
+    /*
+     * Override the property "status" to disable the device when it's
+     * marked for passthrough.
+     */
+    if ( dt_device_for_passthrough(node) )
+        res = fdt_property_string(kinfo->fdt, "status", "disabled");
+    else if ( status )
+        res = fdt_property(kinfo->fdt, "status", status->value,
+                           status->length);
+
+    if ( res )
+        return res;
+
     if ( dt_node_path_is_equal(node, "/chosen") )
     {
         const struct bootmodule *mod = kinfo->initrd_bootmodule;
@@ -919,8 +943,14 @@  static int make_timer_node(const struct domain *d, void *fdt,
     return res;
 }
 
-/* Map the device in the domain */
-static int map_device(struct domain *d, struct dt_device_node *dev)
+/* For a given device node:
+ *  - Give permission to the guest to manage IRQ and MMIO range
+ *  - Retrieve the IRQ configuration (i.e edge/level) from device tree
+ * When the device is not marked for guest passthrough:
+ *  - Assign the device to the guest if it's protected by an IOMMU
+ *  - Map the IRQs and iomem regions to DOM0
+ */
+static int handle_device(struct domain *d, struct dt_device_node *dev)
 {
     unsigned int nirq;
     unsigned int naddr;
@@ -929,13 +959,15 @@  static int map_device(struct domain *d, struct dt_device_node *dev)
     unsigned int irq;
     struct dt_raw_irq rirq;
     u64 addr, size;
+    bool_t need_mapping = !dt_device_for_passthrough(dev);
 
     nirq = dt_number_of_irq(dev);
     naddr = dt_number_of_address(dev);
 
-    DPRINT("%s nirq = %d naddr = %u\n", dt_node_full_name(dev), nirq, naddr);
+    DPRINT("%s passthrough = %d nirq = %d naddr = %u\n", dt_node_full_name(dev),
+           need_mapping, nirq, naddr);
 
-    if ( dt_device_is_protected(dev) )
+    if ( dt_device_is_protected(dev) && need_mapping )
     {
         DPRINT("%s setup iommu\n", dt_node_full_name(dev));
         res = iommu_assign_dt_device(d, dev);
@@ -947,7 +979,7 @@  static int map_device(struct domain *d, struct dt_device_node *dev)
         }
     }
 
-    /* Map IRQs */
+    /* Give permission and  map IRQs */
     for ( i = 0; i < nirq; i++ )
     {
         res = dt_device_get_raw_irq(dev, i, &rirq);
@@ -980,22 +1012,34 @@  static int map_device(struct domain *d, struct dt_device_node *dev)
         irq = res;
 
         DPRINT("irq %u = %u\n", i, irq);
-        /*
-         * Checking the return of vgic_reserve_virq is not
-         * necessary. It should not fail except when we try to map
-         * twice the IRQ. This can happen if the IRQ is shared
-         */
-        vgic_reserve_virq(d, irq);
-        res = route_irq_to_guest(d, irq, dt_node_name(dev));
+
+        res = irq_permit_access(d, irq);
         if ( res )
         {
-            printk(XENLOG_ERR "Unable to route IRQ %u to domain %u\n",
-                   irq, d->domain_id);
+            printk(XENLOG_ERR "Unable to permit to dom%u access to IRQ %u\n",
+                   d->domain_id, irq);
             return res;
         }
+
+        if ( need_mapping )
+        {
+            /*
+             * Checking the return of vgic_reserve_virq is not
+             * necessary. It should not fail except when we try to map
+             * twice the IRQ. This can happen if the IRQ is shared
+             */
+            vgic_reserve_virq(d, irq);
+            res = route_irq_to_guest(d, irq, dt_node_name(dev));
+            if ( res )
+            {
+                printk(XENLOG_ERR "Unable to route IRQ %u to domain %u\n",
+                       irq, d->domain_id);
+                return res;
+            }
+        }
     }
 
-    /* Map the address ranges */
+    /* Give permission and map MMIOs */
     for ( i = 0; i < naddr; i++ )
     {
         res = dt_device_get_address(dev, i, &addr, &size);
@@ -1019,17 +1063,21 @@  static int map_device(struct domain *d, struct dt_device_node *dev)
                    addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
             return res;
         }
-        res = map_mmio_regions(d,
-                               paddr_to_pfn(addr & PAGE_MASK),
-                               DIV_ROUND_UP(size, PAGE_SIZE),
-                               paddr_to_pfn(addr & PAGE_MASK));
-        if ( res )
+
+        if ( need_mapping )
         {
-            printk(XENLOG_ERR "Unable to map 0x%"PRIx64
-                   " - 0x%"PRIx64" in domain %d\n",
-                   addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1,
-                   d->domain_id);
-            return res;
+            res = map_mmio_regions(d,
+                                   paddr_to_pfn(addr & PAGE_MASK),
+                                   DIV_ROUND_UP(size, PAGE_SIZE),
+                                   paddr_to_pfn(addr & PAGE_MASK));
+            if ( res )
+            {
+                printk(XENLOG_ERR "Unable to map 0x%"PRIx64
+                       " - 0x%"PRIx64" in domain %d\n",
+                       addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1,
+                       d->domain_id);
+                return res;
+            }
         }
     }
 
@@ -1104,7 +1152,7 @@  static int handle_node(struct domain *d, struct kernel_info *kinfo,
         return 0;
     }
 
-    res = map_device(d, node);
+    res = handle_device(d, node);
     if ( res)
         return res;
 
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index bb9d7ce..ce10574 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -1103,6 +1103,12 @@  bool_t dt_device_is_available(const struct dt_device_node *device)
     return 0;
 }
 
+bool_t dt_device_for_passthrough(const struct dt_device_node *device)
+{
+    return (dt_find_property(device, "xen,passthrough", NULL) != NULL);
+
+}
+
 static int __dt_parse_phandle_with_args(const struct dt_device_node *np,
                                         const char *list_name,
                                         const char *cells_name,
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index 890d356..caaf65f 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -555,6 +555,17 @@  int dt_n_addr_cells(const struct dt_device_node *np);
 bool_t dt_device_is_available(const struct dt_device_node *device);
 
 /**
+ * dt_device_for_passthrough - Check if a device will be used for
+ * passthrough later
+ *
+ * @device: Node to check
+ *
+ * Return true if the property "xen,passthrough" is present in the node,
+ * false otherwise.
+ */
+bool_t dt_device_for_passthrough(const struct dt_device_node *device);
+
+/**
  * dt_match_node - Tell if a device_node has a matching of dt_device_match
  * @matches: array of dt_device_match structures to search in
  * @node: the dt_device_node structure to match against