diff mbox

[Xen-devel,v3,20/24] xen/passthrough: Extend XEN_DOMCTL_assign_device to support DT device

Message ID 1421159133-31526-21-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall Jan. 13, 2015, 2:25 p.m. UTC
TODO: Update the commit message

A device node is described by a path. It will be used to retrieved the
node in the device tree and assign the related device to the domain.

Only device protected by an IOMMU can be assigned to a guest.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>

---
    Changes in v2:
        - Use a different number for XEN_DOMCTL_assign_dt_device
---
 tools/libxc/include/xenctrl.h         | 10 ++++
 tools/libxc/xc_domain.c               | 95 ++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/device_tree.c | 97 +++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/iommu.c       |  7 +++
 xen/drivers/passthrough/pci.c         | 43 +++++++++++-----
 xen/include/public/domctl.h           | 15 +++++-
 xen/include/xen/iommu.h               |  3 ++
 7 files changed, 249 insertions(+), 21 deletions(-)

Comments

Julien Grall Jan. 20, 2015, 2:37 p.m. UTC | #1
On 20/01/15 09:34, Jan Beulich wrote:
>>>> On 13.01.15 at 15:25, <julien.grall@linaro.org> wrote:
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -337,6 +337,13 @@ int iommu_do_domctl(
>>      ret = iommu_do_pci_domctl(domctl, d, u_domctl);
>>  #endif
>>  
>> +    if ( ret != -ENOSYS )
>> +        return ret;
>> +
>> +#ifdef HAS_DEVICE_TREE
>> +    ret = iommu_do_dt_domctl(domctl, d, u_domctl);
>> +#endif
> 
> Please move the (inverted) if() into the #ifdef block (but also see
> below regarding the specific error code used).

What do you mean by the "inverted if()"?

>> @@ -1533,13 +1534,19 @@ int iommu_do_pci_domctl(
>>      break;
>>  
>>      case XEN_DOMCTL_test_assign_device:
>> -        ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
>> +        ret = -ENOSYS;
>> +        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
>> +            break;
> 
> I'm rather uncertain about the use of -ENOSYS here: The hypercall
> isn't unimplemented after all. Provided you use consistent error
> codes in the PCI and DT variants, there's no problem calling the
> other variant if that specific error code got returned from the first
> one - the second would then just return the same one again, even
> if the first one failed on something other than the device type
> check. As a result, I'd much prefer -ENODEV here.

I will use -ENODEV on the next version.

>> +
>> +        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
>> +
>> +        ret = xsm_test_assign_device(XSM_HOOK, machine_sbdf);
>>          if ( ret )
>>              break;
>>  
>> -        seg = domctl->u.assign_device.machine_sbdf >> 16;
>> -        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
>> -        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
>> +        seg = machine_sbdf >> 16;
>> +        bus = (machine_sbdf >> 8) & 0xff;
>> +        devfn = machine_sbdf & 0xff;
> 
> If you fiddle with these, please make them use at least PCI_BUS()
> and PCI_DEVFN2() (we don't have a matching macro for retrieving
> the segment).

Ok.

> 
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -475,12 +475,23 @@ typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
>>  
>>  
>> -/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
>> +/* Assign a device to a guest. Sets up IOMMU structures. */
>>  /* XEN_DOMCTL_assign_device */
>>  /* XEN_DOMCTL_test_assign_device */
>>  /* XEN_DOMCTL_deassign_device */
>> +#define XEN_DOMCTL_DEV_PCI      0
>> +#define XEN_DOMCTL_DEV_DT       1
>>  struct xen_domctl_assign_device {
>> -    uint32_t  machine_sbdf;   /* machine PCI ID of assigned device */
>> +    uint32_t dev;   /* XEN_DOMCTL_DEV_* */
>> +    union {
>> +        struct {
>> +            uint32_t machine_sbdf;   /* machine PCI ID of assigned device */
>> +        } pci;
>> +        struct {
>> +            uint32_t size; /* Length of the path */
>> +            XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
>> +        } dt;
>> +    } u;
>>  };
> 
> An incompatible change like this requires bumping
> XEN_DOMCTL_INTERFACE_VERSION when not already done during
> the current release cycle.

The XEN_DOMCTL_INTERFACE_VERSION will be bumped in patch #1 of this
series [1].

Regards,

[1] https://patches.linaro.org/43014/
Julien Grall Jan. 29, 2015, 11:40 a.m. UTC | #2
On 29/01/15 10:29, Stefano Stabellini wrote:
>>> -        seg = domctl->u.assign_device.machine_sbdf >> 16;
>>> -        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
>>> -        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
>>> +        seg = machine_sbdf >> 16;
>>> +        bus = (machine_sbdf >> 8) & 0xff;
>>> +        devfn = machine_sbdf & 0xff;
>>
>> If you fiddle with these, please make them use at least PCI_BUS()
>> and PCI_DEVFN2() (we don't have a matching macro for retrieving
>> the segment).
> 
> Maybe we should?

I could add one.

Regards,
Julien Grall Jan. 29, 2015, 11:45 a.m. UTC | #3
Hi Stefano,

On 29/01/15 10:29, Stefano Stabellini wrote:
>> +static bool_t iommu_dt_device_is_assigned(const struct dt_device_node *dev)
>> +{
>> +    bool_t assigned = 0;
>> +
>> +    if ( !dt_device_is_protected(dev) )
>> +        return 1;
> 
> Why return true here?

Because any device not protected cannot be assigned to another guest.

This could be used by the toolstack to know whether the device is
assigned or not. IHMO, returning 0 would be a false negative.

Would a comment in the code suitable?

Regards,
Julien Grall Jan. 29, 2015, 1:09 p.m. UTC | #4
On 29/01/15 12:09, Stefano Stabellini wrote:
> On Thu, 29 Jan 2015, Julien Grall wrote:
>> Hi Stefano,
>>
>> On 29/01/15 10:29, Stefano Stabellini wrote:
>>>> +static bool_t iommu_dt_device_is_assigned(const struct dt_device_node *dev)
>>>> +{
>>>> +    bool_t assigned = 0;
>>>> +
>>>> +    if ( !dt_device_is_protected(dev) )
>>>> +        return 1;
>>>
>>> Why return true here?
>>
>> Because any device not protected cannot be assigned to another guest.
>> This could be used by the toolstack to know whether the device is
>> assigned or not.
> 
> I understand that much.
> 
> 
>> IHMO, returning 0 would be a false negative.
> 
> Why? Returning 0 means that the device is not assigned, that would be
> correct. From this statement I think that actually you are thinking as
> if this function actually returned whether a given device is assignable.

0 means the device is not assigned and therefore can be used for
passthrough.

This function is used in the DOMCTL test_assign_device. That would mean
we will return 0 for non-protected device. That doesn't sound right as a
such device can never be assigned.

> In that case you should rename the function to
> 
> iommu_dt_device_is_assignable

dt_device_is_assignable is not more clear. Return 0 here would mean the
device cannot be assigned in the sense it's not protected.

Moreover, this name would not be in sync with the DOMCTL test_assign_device.


Regards,
Julien Grall Jan. 29, 2015, 3:14 p.m. UTC | #5
On 29/01/15 15:03, Stefano Stabellini wrote:
> On Thu, 29 Jan 2015, Julien Grall wrote:
>> On 29/01/15 12:09, Stefano Stabellini wrote:
>>> On Thu, 29 Jan 2015, Julien Grall wrote:
>>>> Hi Stefano,
>>>>
>>>> On 29/01/15 10:29, Stefano Stabellini wrote:
>>>>>> +static bool_t iommu_dt_device_is_assigned(const struct dt_device_node *dev)
>>>>>> +{
>>>>>> +    bool_t assigned = 0;
>>>>>> +
>>>>>> +    if ( !dt_device_is_protected(dev) )
>>>>>> +        return 1;
>>>>>
>>>>> Why return true here?
>>>>
>>>> Because any device not protected cannot be assigned to another guest.
>>>> This could be used by the toolstack to know whether the device is
>>>> assigned or not.
>>>
>>> I understand that much.
>>>
>>>
>>>> IHMO, returning 0 would be a false negative.
>>>
>>> Why? Returning 0 means that the device is not assigned, that would be
>>> correct. From this statement I think that actually you are thinking as
>>> if this function actually returned whether a given device is assignable.
>>
>> 0 means the device is not assigned and therefore can be used for
>> passthrough.
> 
> I disagree on the "therefore".
> 0 means (or should mean that) the device is not assigned and stop there.
> 
> 
>> This function is used in the DOMCTL test_assign_device. That would mean
>> we will return 0 for non-protected device. That doesn't sound right as a
>> such device can never be assigned.
> 
> assigned -> passed through to a VM
> assignable -> can potentially be assigned
> 
> A non-assignable device cannot be assigned by definition. It does not
> make sense to returned true for it, if the question is "is it
> assigned?".

Ok. I will return 0 in when the device is not protected.

Regards,
Julien Grall March 10, 2015, 4:33 p.m. UTC | #6
Hi Ian,

On 20/02/15 17:17, Ian Campbell wrote:
>> +    /* TODO: Do we need to check is_dying? Mostly to protect against
>> +     * hypercall trying to passthrough a device while we are
>> +     * dying.
> 
> FWIW the PCI case appears not to care...

There is one place in XEN_DOMCTL_assign_device...

Although I don't understand much the usage of is_dying.

>> +     */
>> +
>> +    switch ( domctl->cmd )
>> +    {
>> +    case XEN_DOMCTL_assign_device:
>> +        ret = -ENOSYS;
>> +        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
>> +            break;
> 
> You added something similar to iommu_do_pci_domctl, would it not be
> preferable for the caller to switch on domctl->u.assign_device.dev and
> call the correct iommu_do_*_domctl?

I though about it. It would require to stub iommu_do_*_domctl. So I
preferred to chose the current solution.

Regards,
Julien Grall March 10, 2015, 4:52 p.m. UTC | #7
Hi Daniel,

On 23/02/15 16:25, Daniel De Graaf wrote:
> On 02/20/2015 12:17 PM, Ian Campbell wrote:
>> On Tue, 2015-01-13 at 14:25 +0000, Julien Grall wrote:
>>> TODO: Update the commit message
>>>
>>> A device node is described by a path. It will be used to retrieved the
>>> node in the device tree and assign the related device to the domain.
>>>
>>> Only device protected by an IOMMU can be assigned to a guest.
>>>
>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
>>> Cc: Wei Liu <wei.liu2@citrix.com>
>>> Cc: Jan Beulich <jbeulich@suse.com>
>>>
>>> ---
>>>      Changes in v2:
>>>          - Use a different number for XEN_DOMCTL_assign_dt_device
>>> ---
>>>   tools/libxc/include/xenctrl.h         | 10 ++++
>>>   tools/libxc/xc_domain.c               | 95
>>> ++++++++++++++++++++++++++++++++--
>>
>> These bits all look fine.
>>
>>> +int iommu_do_dt_domctl(struct xen_domctl *domctl, struct domain *d,
>>> +                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>>> +{
>>> +    int ret;
>>> +    struct dt_device_node *dev;
>>> +
>>> +    /* TODO: How to deal with XSM? */
>>
>> Adding Daniel.
>>
>> It seems the PCI ones are protected by
>>          xsm_test_assign_device(XSM_HOOK,
>> domctl->u.assign_device.machine_sbdf);
>>
>> So it seem that either this needs to become "test_assign_pci_device" and
>> a similar "test_assign_dt_device" needs to be added and plumbed through
>> or it needs to grow a type parameter and take the union for the
>> identifier.
> 
> Either would work, but a distinct hook seems simpler to me, especially as
> the call sites are distinct and the hook would process them differently.

Sounds good.

>> The code to apply an XSM context to a DT node would need consideration
>> too I suppose?
> 
> This may require a bit more thought.  At first glance, the dt_phandle
> field seems to be an identifier that could be used by FLASK to identify a
> device using an ocontext lookup.  Labeling would then be done in the same
> way as PCI devices and x86 legacy I/O ports.

We don't always have a dt_phandle in hand. They are mostly used for
referencing a node within another (such as IOMMU, interrupt
controller...). Also, the value is controlled by the compiler.

AFAICT, the only unique value we have in hand is the path of the device.

BTW, do you have any pointer on how to write a policy for device/IRQ
passthrough?

Regards,
Julien Grall March 10, 2015, 11:07 p.m. UTC | #8
Hi Daniel,

On 10/03/2015 22:45, Daniel De Graaf wrote:
>> BTW, do you have any pointer on how to write a policy for device/IRQ
>> passthrough?
>
> There is a bit of documentation in xsm-flask.txt about device labeling,
> which is the hard part of making passthrough work.  Labels can be set
> either statically in the security policy (as documented in the section
> "Device Labeling") or dynamically using a tool like flask-label-pci
> as documented in "Resource Policy".  Once that is done, then rules to
> allow the passthrough operation can be added, similar to the example
> resource nic_dev_t in xen.te.

I tried to follow xsm-flask.txt and uncomment one of the pirqcon line in 
the xsm policy.

But I got the following error:

policy/modules/xen/xen.te:199:ERROR 'syntax error' at token 'pirqcon' on 
line 1986:
pirqcon 33 system_u:object_r:nic_dev_t

Did I miss anything?

> In order to do static labeling for device passthrough, the nodes in a
> device tree need a 32-bit numeric identifier.  IO memory uses the MFN,
> PCI devices use SBDF, and IRQs and x86 legacy IOs just use the number.

Why it's restricted to an integer? Would it be possible to use a string 
as it's done for the sid?

Regards,
Julien Grall March 11, 2015, 12:15 p.m. UTC | #9
Hi Jan,

On 11/03/2015 08:53, Jan Beulich wrote:
>>>> On 10.03.15 at 23:45, <dgdegra@tycho.nsa.gov> wrote:
>> In order to do static labeling for device passthrough, the nodes in a
>> device tree need a 32-bit numeric identifier.  IO memory uses the MFN,
>> PCI devices use SBDF, and IRQs and x86 legacy IOs just use the number.
>
> A 32-bit number can't uniquely identify an MMIO MFN.

Isn't 32-bit enough to describe an MFN?

Although, MMIO are described with an MFN range.

Regards,
Julien Grall March 11, 2015, 12:30 p.m. UTC | #10
Hi Daniel,

On 10/03/2015 23:39, Daniel De Graaf wrote:
> On 03/10/2015 07:07 PM, Julien Grall wrote:
>> Hi Daniel,
>>
>> On 10/03/2015 22:45, Daniel De Graaf wrote:
>>>> BTW, do you have any pointer on how to write a policy for device/IRQ
>>>> passthrough?
>>>
>>> There is a bit of documentation in xsm-flask.txt about device labeling,
>>> which is the hard part of making passthrough work.  Labels can be set
>>> either statically in the security policy (as documented in the section
>>> "Device Labeling") or dynamically using a tool like flask-label-pci
>>> as documented in "Resource Policy".  Once that is done, then rules to
>>> allow the passthrough operation can be added, similar to the example
>>> resource nic_dev_t in xen.te.
>>
>> I tried to follow xsm-flask.txt and uncomment one of the pirqcon line
>> in the xsm policy.
>>
>> But I got the following error:
>>
>> policy/modules/xen/xen.te:199:ERROR 'syntax error' at token 'pirqcon'
>> on line 1986:
>> pirqcon 33 system_u:object_r:nic_dev_t
>>
>> Did I miss anything?
>
> No, this is an error in either the policy or the parser in checkpolicy.
> The parser in checkpolicy is rather inflexible, and it currently requires
> that the device labels be specified at the end of the policy.conf instead
> of in the middle (as they are now).  You should add the commented out
> lines to the end of tools/flask/policy/policy/initial_sids for now; I will
> be sending a patch to move them to another file tomorrow.

It's working now. Thanks!

>>> In order to do static labeling for device passthrough, the nodes in a
>>> device tree need a 32-bit numeric identifier.  IO memory uses the MFN,
>>> PCI devices use SBDF, and IRQs and x86 legacy IOs just use the number.
>>
>> Why it's restricted to an integer? Would it be possible to use a
>> string as it's done for the sid?
>
> The sid is not actually represented as a string internally: it is mapped
> into a set of integers for the user/role/type and MLS range.

I gave a look to the checkpolicy code and the ocontext can store pretty 
much anything.

Currently for IOMEM, 2 32-bit number is stored to describe the range.

AFAIU, in order to support DT device, I will have to update checkpolicy 
for adding a new token (smth like dtdevicecon). So I don't see why I 
couldn't use a string for purpose.

Or maybe you had in mind to reuse pcidevicecon?

Regards,
Julien Grall March 11, 2015, 12:35 p.m. UTC | #11
On 11/03/2015 12:23, Ian Campbell wrote:
> On Wed, 2015-03-11 at 12:15 +0000, Julien Grall wrote:
>> Hi Jan,
>>
>> On 11/03/2015 08:53, Jan Beulich wrote:
>>>>>> On 10.03.15 at 23:45, <dgdegra@tycho.nsa.gov> wrote:
>>>> In order to do static labeling for device passthrough, the nodes in a
>>>> device tree need a 32-bit numeric identifier.  IO memory uses the MFN,
>>>> PCI devices use SBDF, and IRQs and x86 legacy IOs just use the number.
>>>
>>> A 32-bit number can't uniquely identify an MMIO MFN.
>>
>> Isn't 32-bit enough to describe an MFN?
>
> Up to 32+12== 44 bits, yes, but we know there are ARM systems with 48
> bit PA out there.

Hmmm... checkpolicy and flask will have to be fixed then.

Regards,
Julien Grall March 11, 2015, 1:50 p.m. UTC | #12
Hi Ian,

On 11/03/2015 12:37, Ian Campbell wrote:
> On Tue, 2015-03-10 at 16:33 +0000, Julien Grall wrote:
>> Hi Ian,
>>
>> On 20/02/15 17:17, Ian Campbell wrote:
>>>> +    /* TODO: Do we need to check is_dying? Mostly to protect against
>>>> +     * hypercall trying to passthrough a device while we are
>>>> +     * dying.
>>>
>>> FWIW the PCI case appears not to care...
>>
>> There is one place in XEN_DOMCTL_assign_device...
>>
>> Although I don't understand much the usage of is_dying.
>>
>>>> +     */
>>>> +
>>>> +    switch ( domctl->cmd )
>>>> +    {
>>>> +    case XEN_DOMCTL_assign_device:
>>>> +        ret = -ENOSYS;
>>>> +        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
>>>> +            break;
>>>
>>> You added something similar to iommu_do_pci_domctl, would it not be
>>> preferable for the caller to switch on domctl->u.assign_device.dev and
>>> call the correct iommu_do_*_domctl?
>>
>> I though about it. It would require to stub iommu_do_*_domctl. So I
>> preferred to chose the current solution.
>
> You mean iommu_do_{pci,dt}_domctl?
>
> IIRC you already added suitable #defines for when these features are
> present, so reusing them at the call sites wouldn't be too bad, or else
> stubs aren't the worst thing either.

Hmmm I think I got you point now. Do you mean have something like:

switch (domctl->u.assign_device.dev)
{
#ifdef HAS_PCI
case XEN_DOMCTL_DEV_PCI:
     ret = iommu_do_pci_domctl();
     break;
#endif
#ifdef HAS_DEVICE_TREE
case XEN_DOMCTL_DEV_DT:
     ret = iommu_do_dt_domctl()
     break;
#endif

default:
    ret = -ENOSYS;
}

Regards,
Julien Grall March 11, 2015, 2:11 p.m. UTC | #13
Hi Jan,

On 11/03/2015 14:03, Jan Beulich wrote:
>>>> On 11.03.15 at 14:55, <ian.campbell@citrix.com> wrote:
>> On Wed, 2015-03-11 at 13:50 +0000, Julien Grall wrote:
>>> Hmmm I think I got you point now. Do you mean have something like:
>>
>> That's one option I'd be happy with, yes. Jan might disagree.
>
> Looks quite reasonable to except for ...
>
>>> switch (domctl->u.assign_device.dev)
>>> {
>>> #ifdef HAS_PCI
>>> case XEN_DOMCTL_DEV_PCI:
>>>       ret = iommu_do_pci_domctl();
>>>       break;
>>> #endif
>>> #ifdef HAS_DEVICE_TREE
>>> case XEN_DOMCTL_DEV_DT:
>>>       ret = iommu_do_dt_domctl()
>>>       break;
>>> #endif
>>>
>>> default:
>>>      ret = -ENOSYS;
>
> ... the -ENOSYS here: We commonly use e.g. -EOPNOTSUPP in
> such situations.

Ok I will use it.

Regards,
Julien Grall March 13, 2015, 4:47 p.m. UTC | #14
Hi,

On 11/03/2015 13:50, Julien Grall wrote:
> On 11/03/2015 12:37, Ian Campbell wrote:
>> On Tue, 2015-03-10 at 16:33 +0000, Julien Grall wrote:
>>> Hi Ian,
>>>
>>> On 20/02/15 17:17, Ian Campbell wrote:
>>>>> +    /* TODO: Do we need to check is_dying? Mostly to protect against
>>>>> +     * hypercall trying to passthrough a device while we are
>>>>> +     * dying.
>>>>
>>>> FWIW the PCI case appears not to care...
>>>
>>> There is one place in XEN_DOMCTL_assign_device...
>>>
>>> Although I don't understand much the usage of is_dying.
>>>
>>>>> +     */
>>>>> +
>>>>> +    switch ( domctl->cmd )
>>>>> +    {
>>>>> +    case XEN_DOMCTL_assign_device:
>>>>> +        ret = -ENOSYS;
>>>>> +        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
>>>>> +            break;
>>>>
>>>> You added something similar to iommu_do_pci_domctl, would it not be
>>>> preferable for the caller to switch on domctl->u.assign_device.dev and
>>>> call the correct iommu_do_*_domctl?
>>>
>>> I though about it. It would require to stub iommu_do_*_domctl. So I
>>> preferred to chose the current solution.
>>
>> You mean iommu_do_{pci,dt}_domctl?
>>
>> IIRC you already added suitable #defines for when these features are
>> present, so reusing them at the call sites wouldn't be too bad, or else
>> stubs aren't the worst thing either.
>
> Hmmm I think I got you point now. Do you mean have something like:
>
> switch (domctl->u.assign_device.dev)
> {
> #ifdef HAS_PCI
> case XEN_DOMCTL_DEV_PCI:
>      ret = iommu_do_pci_domctl();
>      break;
> #endif
> #ifdef HAS_DEVICE_TREE
> case XEN_DOMCTL_DEV_DT:
>      ret = iommu_do_dt_domctl()
>      break;
> #endif
>
> default:
>     ret = -ENOSYS;
> }

I though more about it and it won't be possible to do it unless we 
decode DOMCTL too.

This is because the DOMCTL_get_device_group use a different structure 
and is only for PCI.

So unless we decide to consolidate the 2 DOMCTL structures in a single 
one. I would prefer to keep the current solution with the suggestion 
made by Jan earlier.

Regards,
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index d66571f..db45475 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2055,6 +2055,16 @@  int xc_deassign_device(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_assign_dt_device(xc_interface *xch,
+                        uint32_t domid,
+                        char *path);
+int xc_test_assign_dt_device(xc_interface *xch,
+                             uint32_t domid,
+                             char *path);
+int xc_deassign_dt_device(xc_interface *xch,
+                          uint32_t domid,
+                          char *path);
+
 int xc_domain_memory_mapping(xc_interface *xch,
                              uint32_t domid,
                              unsigned long first_gfn,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index eb066cf..bca3aee 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1637,7 +1637,8 @@  int xc_assign_device(
 
     domctl.cmd = XEN_DOMCTL_assign_device;
     domctl.domain = domid;
-    domctl.u.assign_device.machine_sbdf = machine_sbdf;
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
 
     return do_domctl(xch, &domctl);
 }
@@ -1686,7 +1687,8 @@  int xc_test_assign_device(
 
     domctl.cmd = XEN_DOMCTL_test_assign_device;
     domctl.domain = domid;
-    domctl.u.assign_device.machine_sbdf = machine_sbdf;
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
 
     return do_domctl(xch, &domctl);
 }
@@ -1700,11 +1702,96 @@  int xc_deassign_device(
 
     domctl.cmd = XEN_DOMCTL_deassign_device;
     domctl.domain = domid;
-    domctl.u.assign_device.machine_sbdf = machine_sbdf;
- 
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+
     return do_domctl(xch, &domctl);
 }
 
+int xc_assign_dt_device(
+    xc_interface *xch,
+    uint32_t domid,
+    char *path)
+{
+    int rc;
+    size_t size = strlen(path);
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, path) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_assign_device;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_DT;
+    domctl.u.assign_device.u.dt.size = size;
+    set_xen_guest_handle(domctl.u.assign_device.u.dt.path, path);
+
+    rc = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, path);
+
+    return rc;
+}
+
+int xc_test_assign_dt_device(
+    xc_interface *xch,
+    uint32_t domid,
+    char *path)
+{
+    int rc;
+    size_t size = strlen(path);
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, path) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_test_assign_device;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_DT;
+    domctl.u.assign_device.u.dt.size = size;
+    set_xen_guest_handle(domctl.u.assign_device.u.dt.path, path);
+
+    rc = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, path);
+
+    return rc;
+}
+
+int xc_deassign_dt_device(
+    xc_interface *xch,
+    uint32_t domid,
+    char *path)
+{
+    int rc;
+    size_t size = strlen(path);
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, path) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_deassign_device;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.assign_device.dev = XEN_DOMCTL_DEV_DT;
+    domctl.u.assign_device.u.dt.size = size;
+    set_xen_guest_handle(domctl.u.assign_device.u.dt.path, path);
+
+    rc = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, path);
+
+    return rc;
+}
+
+
+
+
 int xc_domain_update_msi_irq(
     xc_interface *xch,
     uint32_t domid,
diff --git a/xen/drivers/passthrough/device_tree.c b/xen/drivers/passthrough/device_tree.c
index d9b486e..11deb1d 100644
--- a/xen/drivers/passthrough/device_tree.c
+++ b/xen/drivers/passthrough/device_tree.c
@@ -1,9 +1,6 @@ 
 /*
  * Code to passthrough a device tree node to a guest
  *
- * TODO: This contains only the necessary code to protected device passed to
- * dom0. It will need some updates when device passthrough will is added.
- *
  * Julien Grall <julien.grall@linaro.org>
  * Copyright (c) 2014 Linaro Limited.
  *
@@ -20,6 +17,7 @@ 
 
 #include <xen/lib.h>
 #include <xen/sched.h>
+#include <xen/guest_access.h>
 #include <xen/iommu.h>
 #include <xen/device_tree.h>
 
@@ -85,6 +83,20 @@  fail:
     return rc;
 }
 
+static bool_t iommu_dt_device_is_assigned(const struct dt_device_node *dev)
+{
+    bool_t assigned = 0;
+
+    if ( !dt_device_is_protected(dev) )
+        return 1;
+
+    spin_lock(&dtdevs_lock);
+    assigned = !list_empty(&dev->domain_list);
+    spin_unlock(&dtdevs_lock);
+
+    return assigned;
+}
+
 int iommu_dt_domain_init(struct domain *d)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
@@ -111,3 +123,82 @@  int iommu_release_dt_devices(struct domain *d)
 
     return 0;
 }
+
+int iommu_do_dt_domctl(struct xen_domctl *domctl, struct domain *d,
+                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+{
+    int ret;
+    struct dt_device_node *dev;
+
+    /* TODO: How to deal with XSM? */
+    /* TODO: Do we need to check is_dying? Mostly to protect against
+     * hypercall trying to passthrough a device while we are
+     * dying.
+     */
+
+    switch ( domctl->cmd )
+    {
+    case XEN_DOMCTL_assign_device:
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
+            break;
+
+        ret = dt_find_node_by_gpath(domctl->u.assign_device.u.dt.path,
+                                    domctl->u.assign_device.u.dt.size,
+                                    &dev);
+        if ( ret )
+            break;
+
+        ret = iommu_assign_dt_device(d, dev);
+
+        if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_dt_device: assign \"%s\""
+                   " to dom%u failed (%d)\n",
+                   dt_node_full_name(dev), d->domain_id, ret);
+        break;
+
+    case XEN_DOMCTL_deassign_device:
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
+            break;
+
+        ret = dt_find_node_by_gpath(domctl->u.assign_device.u.dt.path,
+                                    domctl->u.assign_device.u.dt.size,
+                                    &dev);
+        if ( ret )
+            break;
+
+        ret = iommu_deassign_dt_device(d, dev);
+
+        if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_dt_device: assign \"%s\""
+                   " to dom%u failed (%d)\n",
+                   dt_node_full_name(dev), d->domain_id, ret);
+        break;
+
+    case XEN_DOMCTL_test_assign_device:
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_DT )
+            break;
+
+        ret = dt_find_node_by_gpath(domctl->u.assign_device.u.dt.path,
+                                    domctl->u.assign_device.u.dt.size,
+                                    &dev);
+        if ( ret )
+            break;
+
+        if ( iommu_dt_device_is_assigned(dev) )
+        {
+            printk(XENLOG_G_ERR "%s already assigned, or not protected\n",
+                   dt_node_full_name(dev));
+            ret = -EINVAL;
+        }
+        break;
+
+    default:
+        ret = -ENOSYS;
+        break;
+    }
+
+    return ret;
+}
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 8915244..02d5ec1 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -337,6 +337,13 @@  int iommu_do_domctl(
     ret = iommu_do_pci_domctl(domctl, d, u_domctl);
 #endif
 
+    if ( ret != -ENOSYS )
+        return ret;
+
+#ifdef HAS_DEVICE_TREE
+    ret = iommu_do_dt_domctl(domctl, d, u_domctl);
+#endif
+
     return ret;
 }
 
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 9a47a37..ecc0cd1 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1497,6 +1497,7 @@  int iommu_do_pci_domctl(
     u16 seg;
     u8 bus, devfn;
     int ret = 0;
+    uint32_t machine_sbdf;
 
     switch ( domctl->cmd )
     {
@@ -1533,13 +1534,19 @@  int iommu_do_pci_domctl(
     break;
 
     case XEN_DOMCTL_test_assign_device:
-        ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
+            break;
+
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+
+        ret = xsm_test_assign_device(XSM_HOOK, machine_sbdf);
         if ( ret )
             break;
 
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+        seg = machine_sbdf >> 16;
+        bus = (machine_sbdf >> 8) & 0xff;
+        devfn = machine_sbdf & 0xff;
 
         if ( device_assigned(seg, bus, devfn) )
         {
@@ -1551,19 +1558,25 @@  int iommu_do_pci_domctl(
         break;
 
     case XEN_DOMCTL_assign_device:
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
+            break;
+
         if ( unlikely(d->is_dying) )
         {
             ret = -EINVAL;
             break;
         }
 
-        ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+
+        ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
         if ( ret )
             break;
 
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+        seg = machine_sbdf >> 16;
+        bus = (machine_sbdf >> 8) & 0xff;
+        devfn = machine_sbdf & 0xff;
 
         ret = device_assigned(seg, bus, devfn) ?:
               assign_device(d, seg, bus, devfn);
@@ -1579,13 +1592,19 @@  int iommu_do_pci_domctl(
         break;
 
     case XEN_DOMCTL_deassign_device:
-        ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+        ret = -ENOSYS;
+        if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
+            break;
+
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+
+        ret = xsm_deassign_device(XSM_HOOK, d, machine_sbdf);
         if ( ret )
             break;
 
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+        seg = machine_sbdf >> 16;
+        bus = (machine_sbdf >> 8) & 0xff;
+        devfn = machine_sbdf & 0xff;
 
         spin_lock(&pcidevs_lock);
         ret = deassign_device(d, seg, bus, devfn);
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index b742b23..d905ab0 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -475,12 +475,23 @@  typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
 
 
-/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
+/* Assign a device to a guest. Sets up IOMMU structures. */
 /* XEN_DOMCTL_assign_device */
 /* XEN_DOMCTL_test_assign_device */
 /* XEN_DOMCTL_deassign_device */
+#define XEN_DOMCTL_DEV_PCI      0
+#define XEN_DOMCTL_DEV_DT       1
 struct xen_domctl_assign_device {
-    uint32_t  machine_sbdf;   /* machine PCI ID of assigned device */
+    uint32_t dev;   /* XEN_DOMCTL_DEV_* */
+    union {
+        struct {
+            uint32_t machine_sbdf;   /* machine PCI ID of assigned device */
+        } pci;
+        struct {
+            uint32_t size; /* Length of the path */
+            XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
+        } dt;
+    } u;
 };
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index d03df14..d261277 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -119,6 +119,9 @@  int iommu_deassign_dt_device(struct domain *d, struct dt_device_node *dev);
 int iommu_dt_domain_init(struct domain *d);
 int iommu_release_dt_devices(struct domain *d);
 
+int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
+                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+
 #endif /* HAS_DEVICE_TREE */
 
 struct page_info;