diff mbox

[Xen-devel,RFC,14/19] xen/passthrough: dt: Add new domctl XEN_DOMCTL_assign_dt_device

Message ID 1402935486-29136-15-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall June 16, 2014, 4:18 p.m. UTC
A device node is described by a path. It will be used to retrieved the
node in the device tree and assign the related device to the domain.

Only device protected by an IOMMU can be assigned to a guest.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
---
 tools/libxc/xc_domain.c               |   29 +++++++++++++++++++++
 tools/libxc/xenctrl.h                 |    4 +++
 xen/drivers/passthrough/device_tree.c |   45 ++++++++++++++++++++++++++++++---
 xen/drivers/passthrough/iommu.c       |    7 +++++
 xen/include/public/domctl.h           |   10 ++++++++
 xen/include/xen/iommu.h               |    3 +++
 6 files changed, 95 insertions(+), 3 deletions(-)

Comments

Jan Beulich June 17, 2014, 8:34 a.m. UTC | #1
>>> On 16.06.14 at 18:18, <julien.grall@linaro.org> wrote:
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h

I think you should be Cc-ing all relevant maintainers for common code
(here: interface) changes.

> @@ -936,6 +936,14 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
>  #endif
>  
> +/* Device Tree: Assign a non-PCI device to a guest */
> +struct xen_domctl_assign_dt_device {
> +    uint32_t size; /* IN: Length of the path */
> +    XEN_GUEST_HANDLE_64(char) path; /* IN: path to the device tree node */

Are paths (encoded as strings) indeed the canonical way of
representing devices? How does the tool stack know what is valid
to be passed in here?

> +};
> +typedef struct xen_domctl_assign_dt_device xen_domctl_assign_dt_device_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_dt_device_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -1008,6 +1016,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_cacheflush                    71
>  #define XEN_DOMCTL_get_vcpu_msrs                 72
>  #define XEN_DOMCTL_set_vcpu_msrs                 73
> +#define XEN_DOMCTL_assign_dt_device              74

How come you get away with just one operation here, when for PCI
pass-through we have three (assign, test-assign, and deassign)?

Jan
Julien Grall June 17, 2014, 1:23 p.m. UTC | #2
Hi Jan,

On 06/17/2014 09:34 AM, Jan Beulich wrote:
>>>> On 16.06.14 at 18:18, <julien.grall@linaro.org> wrote:
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
> 
> I think you should be Cc-ing all relevant maintainers for common code
> (here: interface) changes.

Sorry, get_maintainers.pl mislead me on the relevant maintainers (see
output below).

42sh> scripts/get_maintainers.pl < this.patch
Ian Jackson <ian.jackson@eu.citrix.com>
Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Ian Campbell <ian.campbell@citrix.com>
xen-devel@lists.xen.org

For some reason it doesn't provide neither Keir nor you.

> 
>> @@ -936,6 +936,14 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
>>  #endif
>>  
>> +/* Device Tree: Assign a non-PCI device to a guest */
>> +struct xen_domctl_assign_dt_device {
>> +    uint32_t size; /* IN: Length of the path */
>> +    XEN_GUEST_HANDLE_64(char) path; /* IN: path to the device tree node */
> 
> Are paths (encoded as strings) indeed the canonical way of
> representing devices?

Yes, a device node is uniquely identified by the full path from the root
node.

This path will look like:

/soc/ethernet@fff51000

> How does the tool stack know what is valid
> to be passed in here?

The path is provided directly by the user. The sanity check is only done
in the hypervisor side.

>> +};
>> +typedef struct xen_domctl_assign_dt_device xen_domctl_assign_dt_device_t;
>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_dt_device_t);
>> +
>>  struct xen_domctl {
>>      uint32_t cmd;
>>  #define XEN_DOMCTL_createdomain                   1
>> @@ -1008,6 +1016,7 @@ struct xen_domctl {
>>  #define XEN_DOMCTL_cacheflush                    71
>>  #define XEN_DOMCTL_get_vcpu_msrs                 72
>>  #define XEN_DOMCTL_set_vcpu_msrs                 73
>> +#define XEN_DOMCTL_assign_dt_device              74
> 
> How come you get away with just one operation here, when for PCI
> pass-through we have three (assign, test-assign, and deassign)?

As said on my cover letter this a very very RFC. I send it to have some
comment about the design.

For now device are reassign directly in Xen when the domain is
destroyed. I plan to implement deassign in the next version.

I don't think test-assign is useful for non-PCI passthrough. The assign
hypercall will correctly check if we can passthrough this device to the
guest.

Regards,
Jan Beulich June 17, 2014, 1:30 p.m. UTC | #3
>>> On 17.06.14 at 15:23, <julien.grall@linaro.org> wrote:
> On 06/17/2014 09:34 AM, Jan Beulich wrote:
>>>>> On 16.06.14 at 18:18, <julien.grall@linaro.org> wrote:
>>> @@ -1008,6 +1016,7 @@ struct xen_domctl {
>>>  #define XEN_DOMCTL_cacheflush                    71
>>>  #define XEN_DOMCTL_get_vcpu_msrs                 72
>>>  #define XEN_DOMCTL_set_vcpu_msrs                 73
>>> +#define XEN_DOMCTL_assign_dt_device              74
>> 
>> How come you get away with just one operation here, when for PCI
>> pass-through we have three (assign, test-assign, and deassign)?
> 
> As said on my cover letter this a very very RFC. I send it to have some
> comment about the design.
> 
> For now device are reassign directly in Xen when the domain is
> destroyed. I plan to implement deassign in the next version.
> 
> I don't think test-assign is useful for non-PCI passthrough. The assign
> hypercall will correctly check if we can passthrough this device to the
> guest.

But that may mean more diverging code on the tools side. Unless the
current PCI model is wrong or unusable for DT pass-through, I think
best would be to retain the abstract model as is.

Jan
Julien Grall June 17, 2014, 1:48 p.m. UTC | #4
On 06/17/2014 02:30 PM, Jan Beulich wrote:
>>>> On 17.06.14 at 15:23, <julien.grall@linaro.org> wrote:
>> On 06/17/2014 09:34 AM, Jan Beulich wrote:
>>>>>> On 16.06.14 at 18:18, <julien.grall@linaro.org> wrote:
>>>> @@ -1008,6 +1016,7 @@ struct xen_domctl {
>>>>  #define XEN_DOMCTL_cacheflush                    71
>>>>  #define XEN_DOMCTL_get_vcpu_msrs                 72
>>>>  #define XEN_DOMCTL_set_vcpu_msrs                 73
>>>> +#define XEN_DOMCTL_assign_dt_device              74
>>>
>>> How come you get away with just one operation here, when for PCI
>>> pass-through we have three (assign, test-assign, and deassign)?
>>
>> As said on my cover letter this a very very RFC. I send it to have some
>> comment about the design.
>>
>> For now device are reassign directly in Xen when the domain is
>> destroyed. I plan to implement deassign in the next version.
>>
>> I don't think test-assign is useful for non-PCI passthrough. The assign
>> hypercall will correctly check if we can passthrough this device to the
>> guest.
> 
> But that may mean more diverging code on the tools side. Unless the
> current PCI model is wrong or unusable for DT pass-through, I think
> best would be to retain the abstract model as is.

I looked at the libxl code for pci and this is only used in
libxl__device_pci_add just before calling do_pci_add in case of an HVM.

I'm not sure why we function is used but from the DT pass-through POV,
we only need to ask the hypervisor to assign this specific device to the
guest. The hypercall will return "ok" if it has succeeded or an error if
the device is not protected (i.e not behind an IOMMU). I don't see why
we should have a test-assign hypercall in this case...

Anyway, I'm fine to introduce the hypercall for consistency but I don't
plan to use it in the toolstack because it's pointless.

Regards,
Jan Beulich June 17, 2014, 1:55 p.m. UTC | #5
>>> On 17.06.14 at 15:48, <julien.grall@linaro.org> wrote:
> I'm not sure why we function is used but from the DT pass-through POV,
> we only need to ask the hypervisor to assign this specific device to the
> guest. The hypercall will return "ok" if it has succeeded or an error if
> the device is not protected (i.e not behind an IOMMU). I don't see why
> we should have a test-assign hypercall in this case...

The reason for its existence isn't entirely clear to me either - you may
want to ping whoever added this.

> Anyway, I'm fine to introduce the hypercall for consistency but I don't
> plan to use it in the toolstack because it's pointless.

It's the tools maintainers' call in the end, but my advice would be to not
have it used in the PCI case, but not in the DT one. Either rip it out on
the PCI side too, or have it be called even if not strictly needed.

Jan
Ian Campbell July 3, 2014, 11:54 a.m. UTC | #6
On Tue, 2014-06-17 at 14:55 +0100, Jan Beulich wrote:
> >>> On 17.06.14 at 15:48, <julien.grall@linaro.org> wrote:
> > I'm not sure why we function is used but from the DT pass-through POV,
> > we only need to ask the hypervisor to assign this specific device to the
> > guest. The hypercall will return "ok" if it has succeeded or an error if
> > the device is not protected (i.e not behind an IOMMU). I don't see why
> > we should have a test-assign hypercall in this case...
> 
> The reason for its existence isn't entirely clear to me either - you may
> want to ping whoever added this.

Yes, I was about to suggest trawling through git log a bit.

This may be an old xend-ism perhaps.

I can sort of imagine an early sanity check early the availability of
the devices before building the domain (despite the TOCTOU race perhaps
that's useful?)

The other use case might be to be able to list all assignable devices.

Those are all wild guesses on my part though.

Ian.

> 
> > Anyway, I'm fine to introduce the hypercall for consistency but I don't
> > plan to use it in the toolstack because it's pointless.
> 
> It's the tools maintainers' call in the end, but my advice would be to not
> have it used in the PCI case, but not in the DT one. Either rip it out on
> the PCI side too, or have it be called even if not strictly needed.
> 
> Jan
>
diff mbox

Patch

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 7909536..ea8fc0d 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1628,6 +1628,35 @@  int xc_deassign_device(
     return do_domctl(xch, &domctl);
 }
 
+int xc_assign_dt_device(
+    xc_interface *xch,
+    uint32_t domid,
+    char *path)
+{
+    int rc;
+    size_t size = strlen(path);
+    xen_domctl_assign_dt_device_t *assign_dt_device;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, path) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_assign_dt_device;
+    domctl.domain = (domid_t)domid;
+
+    assign_dt_device = &domctl.u.assign_dt_device;
+
+    assign_dt_device->size = size;
+    set_xen_guest_handle(assign_dt_device->path, path);
+
+    rc = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, path);
+
+    return rc;
+}
+
 int xc_domain_update_msi_irq(
     xc_interface *xch,
     uint32_t domid,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 5ad2d65..07dcadc 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -2010,6 +2010,10 @@  int xc_deassign_device(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_assign_dt_device(xc_interface *xch,
+                        uint32_t domid,
+                        char *path);
+
 int xc_domain_memory_mapping(xc_interface *xch,
                              uint32_t domid,
                              unsigned long first_gfn,
diff --git a/xen/drivers/passthrough/device_tree.c b/xen/drivers/passthrough/device_tree.c
index 8a4bc69..9df4343 100644
--- a/xen/drivers/passthrough/device_tree.c
+++ b/xen/drivers/passthrough/device_tree.c
@@ -1,9 +1,6 @@ 
 /*
  * Code to passthrough a device tree node to a guest
  *
- * TODO: This contains only the necessary code to protected device passed to
- * dom0. It will need some updates when device passthrough will is added.
- *
  * Julien Grall <julien.grall@linaro.org>
  * Copyright (c) 2014 Linaro Limited.
  *
@@ -20,6 +17,7 @@ 
 
 #include <xen/lib.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 #include <xen/iommu.h>
 #include <xen/device_tree.h>
 
@@ -111,3 +109,44 @@  void iommu_dt_domain_destroy(struct domain *d)
                     dt_node_full_name(dev), d->domain_id);
     }
 }
+
+int iommu_do_dt_domctl(struct xen_domctl *domctl, struct domain *d,
+                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+{
+    int ret;
+
+    /* TODO: How to deal with XSM? */
+
+    switch ( domctl->cmd )
+    {
+    case XEN_DOMCTL_assign_dt_device:
+    {
+        struct dt_device_node *dev;
+
+
+        /* TODO: Do we need to check is_dying? Mostly to protect against
+         * hypercall trying to passthrough a device while we are
+         * dying.
+         */
+
+        ret = dt_find_node_by_gpath(domctl->u.assign_dt_device.path,
+                                    domctl->u.assign_dt_device.size,
+                                    &dev);
+        if ( ret )
+            break;
+
+        ret = iommu_assign_dt_device(d, dev);
+
+        if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_dt_device: assign \"%s\""
+                   " to dom%u failed (%d)\n",
+                   dt_node_full_name(dev), d->domain_id, ret);
+    }
+    break;
+    default:
+        ret = -ENOSYS;
+        break;
+    }
+
+    return ret;
+}
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index d71ab03..8351814 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -343,6 +343,13 @@  int iommu_do_domctl(
     ret = iommu_do_pci_domctl(domctl, d, u_domctl);
 #endif
 
+    if ( ret != -ENOSYS )
+        return ret;
+
+#ifdef HAS_DEVICE_TREE
+    ret = iommu_do_dt_domctl(domctl, d, u_domctl);
+#endif
+
     return ret;
 }
 
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..66806d2 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -936,6 +936,14 @@  typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
 #endif
 
+/* Device Tree: Assign a non-PCI device to a guest */
+struct xen_domctl_assign_dt_device {
+    uint32_t size; /* IN: Length of the path */
+    XEN_GUEST_HANDLE_64(char) path; /* IN: path to the device tree node */
+};
+typedef struct xen_domctl_assign_dt_device xen_domctl_assign_dt_device_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_dt_device_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -1008,6 +1016,7 @@  struct xen_domctl {
 #define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_get_vcpu_msrs                 72
 #define XEN_DOMCTL_set_vcpu_msrs                 73
+#define XEN_DOMCTL_assign_dt_device              74
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1044,6 +1053,7 @@  struct xen_domctl {
         struct xen_domctl_sendtrigger       sendtrigger;
         struct xen_domctl_get_device_group  get_device_group;
         struct xen_domctl_assign_device     assign_device;
+        struct xen_domctl_assign_dt_device  assign_dt_device;
         struct xen_domctl_bind_pt_irq       bind_pt_irq;
         struct xen_domctl_memory_mapping    memory_mapping;
         struct xen_domctl_ioport_mapping    ioport_mapping;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 9b2af51..833baca 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -118,6 +118,9 @@  int iommu_deassign_dt_device(struct domain *d, struct dt_device_node *dev);
 int iommu_dt_domain_init(struct domain *d);
 void iommu_dt_domain_destroy(struct domain *d);
 
+int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
+                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+
 #endif /* HAS_DEVICE_TREE */
 
 struct page_info;