diff mbox series

[v6,3/4] util/vfio-helpers: Introduce qemu_vfio_pci_init_msix_irqs()

Message ID 20200909142354.334859-4-philmd@redhat.com
State Superseded
Headers show
Series util/vfio-helpers: Add support for multiple IRQs | expand

Commit Message

Philippe Mathieu-Daudé Sept. 9, 2020, 2:23 p.m. UTC
qemu_vfio_pci_init_irq() allows us to initialize any type of IRQ,
but only one. Introduce qemu_vfio_pci_init_msix_irqs() which is
specific to MSIX IRQ type, and allow us to use multiple IRQs
(thus passing multiple eventfd notifiers).

Reviewed-by: Fam Zheng <fam@euphon.net>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 include/qemu/vfio-helpers.h |  6 +++-
 util/vfio-helpers.c         | 65 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

Comments

Stefan Hajnoczi Sept. 10, 2020, 10:44 a.m. UTC | #1
On Wed, Sep 09, 2020 at 04:23:53PM +0200, Philippe Mathieu-Daudé wrote:
> +/**
> + * Initialize device MSIX IRQs and register event notifiers.
> + * @irq_count: pointer to number of MSIX IRQs to initialize
> + * @notifier: Array of @irq_count notifiers (each corresponding to a MSIX IRQ)
> +
> + * If the number of IRQs requested exceeds the available on the device,
> + * store the number of available IRQs in @irq_count and return -EOVERFLOW.
> + */
> +int qemu_vfio_pci_init_msix_irqs(QEMUVFIOState *s, EventNotifier *notifier,
> +                                 unsigned *irq_count, Error **errp)
> +{
> +    int r;
> +    size_t irq_set_size;
> +    struct vfio_irq_set *irq_set;
> +    struct vfio_irq_info irq_info = {
> +        .argsz = sizeof(irq_info),
> +        .index = VFIO_PCI_MSIX_IRQ_INDEX
> +    };
> +
> +    if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
> +        error_setg_errno(errp, errno, "Failed to get device interrupt info");
> +        return -errno;
> +    }
> +    if (irq_info.count < *irq_count) {
> +        error_setg(errp, "Not enough device interrupts available");
> +        *irq_count = irq_info.count;
> +        return -EOVERFLOW;
> +    }
> +    if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
> +        error_setg(errp, "Device interrupt doesn't support eventfd");
> +        return -EINVAL;
> +    }
> +
> +    irq_set_size = sizeof(*irq_set) + *irq_count * sizeof(int32_t);
> +    irq_set = g_malloc0(irq_set_size);
> +
> +    /* Get to a known IRQ state */
> +    *irq_set = (struct vfio_irq_set) {
> +        .argsz = irq_set_size,
> +        .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
> +        .index = irq_info.index,
> +        .start = 0,
> +        .count = *irq_count,
> +    };
> +
> +    for (unsigned i = 0; i < *irq_count; i++) {
> +        ((int32_t *)&irq_set->data)[i] = event_notifier_get_fd(&notifier[i]);
> +    }
> +    r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
> +    g_free(irq_set);
> +    if (r <= 0) {
> +        error_setg_errno(errp, errno, "Failed to setup device interrupts");
> +        return -errno;
> +    } else if (r < *irq_count) {
> +        error_setg(errp, "Not enough device interrupts available");
> +        *irq_count = r;
> +        return -EOVERFLOW;
> +    }

EOVERFLOW can occur in two cases: VFIO_DEVICE_GET_IRQ_INFO and
VFIO_DEVICE_SET_IRQS.

If it happens in the second case the notifier[] array has been
registered successfully.

The caller has no way of distinguishing the two cases. Therefore the
caller doesn't know if the eventfds will be used by the kernel after
EOVERFLOW.

If the second case can ever happen then this function should probably
call VFIO_DEVICE_SET_IRQS again with VFIO_IRQ_SET_DATA_NONE to
unregister the eventfds before returning EOVERFLOW.

STefan
Philippe Mathieu-Daudé Sept. 10, 2020, 3:29 p.m. UTC | #2
Hi Stefan, Alex.

On 9/10/20 12:44 PM, Stefan Hajnoczi wrote:
> On Wed, Sep 09, 2020 at 04:23:53PM +0200, Philippe Mathieu-Daudé wrote:
>> +/**
>> + * Initialize device MSIX IRQs and register event notifiers.
>> + * @irq_count: pointer to number of MSIX IRQs to initialize
>> + * @notifier: Array of @irq_count notifiers (each corresponding to a MSIX IRQ)
>> +
>> + * If the number of IRQs requested exceeds the available on the device,
>> + * store the number of available IRQs in @irq_count and return -EOVERFLOW.
>> + */
>> +int qemu_vfio_pci_init_msix_irqs(QEMUVFIOState *s, EventNotifier *notifier,
>> +                                 unsigned *irq_count, Error **errp)
>> +{
>> +    int r;
>> +    size_t irq_set_size;
>> +    struct vfio_irq_set *irq_set;
>> +    struct vfio_irq_info irq_info = {
>> +        .argsz = sizeof(irq_info),
>> +        .index = VFIO_PCI_MSIX_IRQ_INDEX
>> +    };
>> +
>> +    if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
>> +        error_setg_errno(errp, errno, "Failed to get device interrupt info");
>> +        return -errno;
>> +    }
>> +    if (irq_info.count < *irq_count) {
>> +        error_setg(errp, "Not enough device interrupts available");
>> +        *irq_count = irq_info.count;
>> +        return -EOVERFLOW;
>> +    }
>> +    if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
>> +        error_setg(errp, "Device interrupt doesn't support eventfd");
>> +        return -EINVAL;
>> +    }
>> +
>> +    irq_set_size = sizeof(*irq_set) + *irq_count * sizeof(int32_t);
>> +    irq_set = g_malloc0(irq_set_size);
>> +
>> +    /* Get to a known IRQ state */
>> +    *irq_set = (struct vfio_irq_set) {
>> +        .argsz = irq_set_size,
>> +        .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
>> +        .index = irq_info.index,
>> +        .start = 0,
>> +        .count = *irq_count,
>> +    };
>> +
>> +    for (unsigned i = 0; i < *irq_count; i++) {
>> +        ((int32_t *)&irq_set->data)[i] = event_notifier_get_fd(&notifier[i]);
>> +    }
>> +    r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
>> +    g_free(irq_set);
>> +    if (r <= 0) {
>> +        error_setg_errno(errp, errno, "Failed to setup device interrupts");
>> +        return -errno;
>> +    } else if (r < *irq_count) {
>> +        error_setg(errp, "Not enough device interrupts available");
>> +        *irq_count = r;
>> +        return -EOVERFLOW;
>> +    }
> 
> EOVERFLOW can occur in two cases: VFIO_DEVICE_GET_IRQ_INFO and
> VFIO_DEVICE_SET_IRQS.

Yes.

> 
> If it happens in the second case the notifier[] array has been
> registered successfully.

No, I don't think so:

vfio_pci_set_msi_trigger() register the notifier only if
vfio_msi_enable() succeeded (returned 0). If vfio_msi_enable()
failed it returns the number of vectors available but do
not register the notifiers.

Alex, do you confirm?

> 
> The caller has no way of distinguishing the two cases. Therefore the
> caller doesn't know if the eventfds will be used by the kernel after
> EOVERFLOW.
> 
> If the second case can ever happen then this function should probably
> call VFIO_DEVICE_SET_IRQS again with VFIO_IRQ_SET_DATA_NONE to
> unregister the eventfds before returning EOVERFLOW.
> 
> STefan
>
Philippe Mathieu-Daudé Sept. 10, 2020, 4:37 p.m. UTC | #3
On 9/10/20 6:29 PM, Alex Williamson wrote:
> On Thu, 10 Sep 2020 17:29:25 +0200
> Philippe Mathieu-Daudé <philmd@redhat.com> wrote:
> 
>> Hi Stefan, Alex.
>>
>> On 9/10/20 12:44 PM, Stefan Hajnoczi wrote:
>>> On Wed, Sep 09, 2020 at 04:23:53PM +0200, Philippe Mathieu-Daudé wrote:  
>>>> +/**
>>>> + * Initialize device MSIX IRQs and register event notifiers.
>>>> + * @irq_count: pointer to number of MSIX IRQs to initialize
>>>> + * @notifier: Array of @irq_count notifiers (each corresponding to a MSIX IRQ)
>>>> +
>>>> + * If the number of IRQs requested exceeds the available on the device,
>>>> + * store the number of available IRQs in @irq_count and return -EOVERFLOW.
>>>> + */
>>>> +int qemu_vfio_pci_init_msix_irqs(QEMUVFIOState *s, EventNotifier *notifier,
>>>> +                                 unsigned *irq_count, Error **errp)
>>>> +{
>>>> +    int r;
>>>> +    size_t irq_set_size;
>>>> +    struct vfio_irq_set *irq_set;
>>>> +    struct vfio_irq_info irq_info = {
>>>> +        .argsz = sizeof(irq_info),
>>>> +        .index = VFIO_PCI_MSIX_IRQ_INDEX
>>>> +    };
>>>> +
>>>> +    if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
>>>> +        error_setg_errno(errp, errno, "Failed to get device interrupt info");
>>>> +        return -errno;
>>>> +    }
>>>> +    if (irq_info.count < *irq_count) {
>>>> +        error_setg(errp, "Not enough device interrupts available");
>>>> +        *irq_count = irq_info.count;
>>>> +        return -EOVERFLOW;
>>>> +    }
>>>> +    if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
>>>> +        error_setg(errp, "Device interrupt doesn't support eventfd");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    irq_set_size = sizeof(*irq_set) + *irq_count * sizeof(int32_t);
>>>> +    irq_set = g_malloc0(irq_set_size);
>>>> +
>>>> +    /* Get to a known IRQ state */
>>>> +    *irq_set = (struct vfio_irq_set) {
>>>> +        .argsz = irq_set_size,
>>>> +        .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
>>>> +        .index = irq_info.index,
>>>> +        .start = 0,
>>>> +        .count = *irq_count,
>>>> +    };
>>>> +
>>>> +    for (unsigned i = 0; i < *irq_count; i++) {
>>>> +        ((int32_t *)&irq_set->data)[i] = event_notifier_get_fd(&notifier[i]);
>>>> +    }
>>>> +    r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
>>>> +    g_free(irq_set);
>>>> +    if (r <= 0) {
>>>> +        error_setg_errno(errp, errno, "Failed to setup device interrupts");
>>>> +        return -errno;
>>>> +    } else if (r < *irq_count) {
>>>> +        error_setg(errp, "Not enough device interrupts available");
>>>> +        *irq_count = r;
>>>> +        return -EOVERFLOW;
>>>> +    }  
>>>
>>> EOVERFLOW can occur in two cases: VFIO_DEVICE_GET_IRQ_INFO and
>>> VFIO_DEVICE_SET_IRQS.  
>>
>> Yes.
>>
>>>
>>> If it happens in the second case the notifier[] array has been
>>> registered successfully.  
>>
>> No, I don't think so:
>>
>> vfio_pci_set_msi_trigger() register the notifier only if
>> vfio_msi_enable() succeeded (returned 0). If vfio_msi_enable()
>> failed it returns the number of vectors available but do
>> not register the notifiers.
>>
>> Alex, do you confirm?
> 
> Yes, if we can't setup what the user requested we don't setup anything.
> However, I think we return zero on success, which seems to fall into
> your error condition.  Has this been tested?  Thanks,

Not v6 as I didn't have the testing setup handy, and thought
v5 -> v6 change was trivial enough :S

Good news: my next task is to add a test :)

> 
> Alex
> 
>>> The caller has no way of distinguishing the two cases. Therefore the
>>> caller doesn't know if the eventfds will be used by the kernel after
>>> EOVERFLOW.
>>>
>>> If the second case can ever happen then this function should probably
>>> call VFIO_DEVICE_SET_IRQS again with VFIO_IRQ_SET_DATA_NONE to
>>> unregister the eventfds before returning EOVERFLOW.
>>>
>>> STefan
>>>   
>>
>
diff mbox series

Patch

diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index 1f057c2b9e4..092b7b243f9 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -1,11 +1,13 @@ 
 /*
  * QEMU VFIO helpers
  *
- * Copyright 2016 - 2018 Red Hat, Inc.
+ * Copyright 2016 - 2020 Red Hat, Inc.
  *
  * Authors:
  *   Fam Zheng <famz@redhat.com>
+ *   Philippe Mathieu-Daudé <philmd@redhat.com>
  *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  */
@@ -28,5 +30,7 @@  void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
                              uint64_t offset, uint64_t size);
 int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
                            int irq_type, Error **errp);
+int qemu_vfio_pci_init_msix_irqs(QEMUVFIOState *s, EventNotifier *e,
+                                 unsigned *irq_count, Error **errp);
 
 #endif
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 4cc5697dcb2..3b1b81caf8b 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -1,11 +1,13 @@ 
 /*
  * VFIO utility
  *
- * Copyright 2016 - 2018 Red Hat, Inc.
+ * Copyright 2016 - 2020 Red Hat, Inc.
  *
  * Authors:
  *   Fam Zheng <famz@redhat.com>
+ *   Philippe Mathieu-Daudé <philmd@redhat.com>
  *
+ * SPDX-License-Identifier: GPL-2.0-or-later
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  */
@@ -216,6 +218,67 @@  int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
     return 0;
 }
 
+/**
+ * Initialize device MSIX IRQs and register event notifiers.
+ * @irq_count: pointer to number of MSIX IRQs to initialize
+ * @notifier: Array of @irq_count notifiers (each corresponding to a MSIX IRQ)
+
+ * If the number of IRQs requested exceeds the available on the device,
+ * store the number of available IRQs in @irq_count and return -EOVERFLOW.
+ */
+int qemu_vfio_pci_init_msix_irqs(QEMUVFIOState *s, EventNotifier *notifier,
+                                 unsigned *irq_count, Error **errp)
+{
+    int r;
+    size_t irq_set_size;
+    struct vfio_irq_set *irq_set;
+    struct vfio_irq_info irq_info = {
+        .argsz = sizeof(irq_info),
+        .index = VFIO_PCI_MSIX_IRQ_INDEX
+    };
+
+    if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
+        error_setg_errno(errp, errno, "Failed to get device interrupt info");
+        return -errno;
+    }
+    if (irq_info.count < *irq_count) {
+        error_setg(errp, "Not enough device interrupts available");
+        *irq_count = irq_info.count;
+        return -EOVERFLOW;
+    }
+    if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
+        error_setg(errp, "Device interrupt doesn't support eventfd");
+        return -EINVAL;
+    }
+
+    irq_set_size = sizeof(*irq_set) + *irq_count * sizeof(int32_t);
+    irq_set = g_malloc0(irq_set_size);
+
+    /* Get to a known IRQ state */
+    *irq_set = (struct vfio_irq_set) {
+        .argsz = irq_set_size,
+        .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
+        .index = irq_info.index,
+        .start = 0,
+        .count = *irq_count,
+    };
+
+    for (unsigned i = 0; i < *irq_count; i++) {
+        ((int32_t *)&irq_set->data)[i] = event_notifier_get_fd(&notifier[i]);
+    }
+    r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
+    g_free(irq_set);
+    if (r <= 0) {
+        error_setg_errno(errp, errno, "Failed to setup device interrupts");
+        return -errno;
+    } else if (r < *irq_count) {
+        error_setg(errp, "Not enough device interrupts available");
+        *irq_count = r;
+        return -EOVERFLOW;
+    }
+    return 0;
+}
+
 static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
                                      int size, int ofs)
 {