Message ID | 1603365127-14202-1-git-send-email-kwankhede@nvidia.com |
---|---|
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Thu, 22 Oct 2020 16:41:54 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Whether the VFIO device supports migration or not is decided based of > migration region query. If migration region query is successful and migration > region initialization is successful then migration is supported else > migration is blocked. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/vfio/meson.build | 1 + > hw/vfio/migration.c | 129 ++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 3 + > include/hw/vfio/vfio-common.h | 9 +++ > 4 files changed, 142 insertions(+) > create mode 100644 hw/vfio/migration.c > > diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build > index 37efa74018bc..da9af297a0c5 100644 > --- a/hw/vfio/meson.build > +++ b/hw/vfio/meson.build > @@ -2,6 +2,7 @@ vfio_ss = ss.source_set() > vfio_ss.add(files( > 'common.c', > 'spapr.c', > + 'migration.c', > )) > vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( > 'display.c', > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > new file mode 100644 > index 000000000000..5f74a3ad1d72 > --- /dev/null > +++ b/hw/vfio/migration.c > @@ -0,0 +1,129 @@ > +/* > + * Migration support for VFIO devices > + * > + * Copyright NVIDIA, Inc. 2020 > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include <linux/vfio.h> > + > +#include "hw/vfio/vfio-common.h" > +#include "cpu.h" > +#include "migration/migration.h" > +#include "migration/qemu-file.h" > +#include "migration/register.h" > +#include "migration/blocker.h" > +#include "migration/misc.h" > +#include "qapi/error.h" > +#include "exec/ramlist.h" > +#include "exec/ram_addr.h" > +#include "pci.h" > +#include "trace.h" > + > +static void vfio_migration_region_exit(VFIODevice *vbasedev) > +{ > + VFIOMigration *migration = vbasedev->migration; > + > + if (!migration) { > + return; > + } > + > + if (migration->region.size) { > + vfio_region_exit(&migration->region); > + vfio_region_finalize(&migration->region); > + } > +} > + > +static int vfio_migration_init(VFIODevice *vbasedev, > + struct vfio_region_info *info) > +{ > + int ret; > + Object *obj; > + VFIOMigration *migration; > + > + if (!vbasedev->ops->vfio_get_object) { > + return -EINVAL; > + } > + > + obj = vbasedev->ops->vfio_get_object(vbasedev); > + if (!obj) { > + return -EINVAL; > + } > + > + migration = g_new0(VFIOMigration, 1); > + > + ret = vfio_region_setup(obj, vbasedev, &migration->region, > + info->index, "migration"); > + if (ret) { > + error_report("%s: Failed to setup VFIO migration region %d: %s", > + vbasedev->name, info->index, strerror(-ret)); > + goto err; > + } > + > + if (!migration->region.size) { > + error_report("%s: Invalid zero-sized of VFIO migration region %d", > + vbasedev->name, info->index); > + ret = -EINVAL; > + goto err; > + } > + > + vbasedev->migration = migration; > + return 0; > + > +err: > + vfio_migration_region_exit(vbasedev); We can't get here with vbasedev->migration set, did you intend to set vbasedev->migration before testing region.size? Thanks, Alex > + g_free(migration); > + return ret; > +} > + > +/* ---------------------------------------------------------------------- */ > + > +int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) > +{ > + struct vfio_region_info *info = NULL; > + Error *local_err = NULL; > + int ret; > + > + ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, > + VFIO_REGION_SUBTYPE_MIGRATION, &info); > + if (ret) { > + goto add_blocker; > + } > + > + ret = vfio_migration_init(vbasedev, info); > + if (ret) { > + goto add_blocker; > + } > + > + g_free(info); > + trace_vfio_migration_probe(vbasedev->name, info->index); > + return 0; > + > +add_blocker: > + error_setg(&vbasedev->migration_blocker, > + "VFIO device doesn't support migration"); > + g_free(info); > + > + ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + error_free(vbasedev->migration_blocker); > + vbasedev->migration_blocker = NULL; > + } > + return ret; > +} > + > +void vfio_migration_finalize(VFIODevice *vbasedev) > +{ > + if (vbasedev->migration_blocker) { > + migrate_del_blocker(vbasedev->migration_blocker); > + error_free(vbasedev->migration_blocker); > + vbasedev->migration_blocker = NULL; > + } > + > + vfio_migration_region_exit(vbasedev); > + g_free(vbasedev->migration); > +} > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index a0c7b49a2ebc..9ced5ec6277c 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -145,3 +145,6 @@ vfio_display_edid_link_up(void) "" > vfio_display_edid_link_down(void) "" > vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" > vfio_display_edid_write_error(void) "" > + > +# migration.c > +vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index ba6169cd926e..8275c4c68f45 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -57,6 +57,10 @@ typedef struct VFIORegion { > uint8_t nr; /* cache the region number for debug */ > } VFIORegion; > > +typedef struct VFIOMigration { > + VFIORegion region; > +} VFIOMigration; > + > typedef struct VFIOAddressSpace { > AddressSpace *as; > QLIST_HEAD(, VFIOContainer) containers; > @@ -113,6 +117,8 @@ typedef struct VFIODevice { > unsigned int num_irqs; > unsigned int num_regions; > unsigned int flags; > + VFIOMigration *migration; > + Error *migration_blocker; > } VFIODevice; > > struct VFIODeviceOps { > @@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container, > int vfio_spapr_remove_window(VFIOContainer *container, > hwaddr offset_within_address_space); > > +int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); > +void vfio_migration_finalize(VFIODevice *vbasedev); > + > #endif /* HW_VFIO_VFIO_COMMON_H */
On 10/22/2020 7:52 PM, Alex Williamson wrote: > On Thu, 22 Oct 2020 16:41:54 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Whether the VFIO device supports migration or not is decided based of >> migration region query. If migration region query is successful and migration >> region initialization is successful then migration is supported else >> migration is blocked. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> hw/vfio/meson.build | 1 + >> hw/vfio/migration.c | 129 ++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 3 + >> include/hw/vfio/vfio-common.h | 9 +++ >> 4 files changed, 142 insertions(+) >> create mode 100644 hw/vfio/migration.c >> >> diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build >> index 37efa74018bc..da9af297a0c5 100644 >> --- a/hw/vfio/meson.build >> +++ b/hw/vfio/meson.build >> @@ -2,6 +2,7 @@ vfio_ss = ss.source_set() >> vfio_ss.add(files( >> 'common.c', >> 'spapr.c', >> + 'migration.c', >> )) >> vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( >> 'display.c', >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> new file mode 100644 >> index 000000000000..5f74a3ad1d72 >> --- /dev/null >> +++ b/hw/vfio/migration.c >> @@ -0,0 +1,129 @@ >> +/* >> + * Migration support for VFIO devices >> + * >> + * Copyright NVIDIA, Inc. 2020 >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. See >> + * the COPYING file in the top-level directory. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include <linux/vfio.h> >> + >> +#include "hw/vfio/vfio-common.h" >> +#include "cpu.h" >> +#include "migration/migration.h" >> +#include "migration/qemu-file.h" >> +#include "migration/register.h" >> +#include "migration/blocker.h" >> +#include "migration/misc.h" >> +#include "qapi/error.h" >> +#include "exec/ramlist.h" >> +#include "exec/ram_addr.h" >> +#include "pci.h" >> +#include "trace.h" >> + >> +static void vfio_migration_region_exit(VFIODevice *vbasedev) >> +{ >> + VFIOMigration *migration = vbasedev->migration; >> + >> + if (!migration) { >> + return; >> + } >> + >> + if (migration->region.size) { >> + vfio_region_exit(&migration->region); >> + vfio_region_finalize(&migration->region); >> + } >> +} >> + >> +static int vfio_migration_init(VFIODevice *vbasedev, >> + struct vfio_region_info *info) >> +{ >> + int ret; >> + Object *obj; >> + VFIOMigration *migration; >> + >> + if (!vbasedev->ops->vfio_get_object) { >> + return -EINVAL; >> + } >> + >> + obj = vbasedev->ops->vfio_get_object(vbasedev); >> + if (!obj) { >> + return -EINVAL; >> + } >> + >> + migration = g_new0(VFIOMigration, 1); >> + >> + ret = vfio_region_setup(obj, vbasedev, &migration->region, >> + info->index, "migration"); >> + if (ret) { >> + error_report("%s: Failed to setup VFIO migration region %d: %s", >> + vbasedev->name, info->index, strerror(-ret)); >> + goto err; >> + } >> + >> + if (!migration->region.size) { >> + error_report("%s: Invalid zero-sized of VFIO migration region %d", >> + vbasedev->name, info->index); >> + ret = -EINVAL; >> + goto err; >> + } >> + >> + vbasedev->migration = migration; >> + return 0; >> + >> +err: >> + vfio_migration_region_exit(vbasedev); > > We can't get here with vbasedev->migration set, did you intend to set > vbasedev->migration before testing region.size? Thanks, > Oh yes, I missed to address this when I moved migration variable to VFIODevice. Moving vbasedev->migration before region.size check. Also removing region.size check vfio_migration_region_exit() for vfio_region_exit() and vfio_region_finalize(). Thanks, Kirti > Alex > > > >> + g_free(migration); >> + return ret; >> +} >> + >> +/* ---------------------------------------------------------------------- */ >> + >> +int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) >> +{ >> + struct vfio_region_info *info = NULL; >> + Error *local_err = NULL; >> + int ret; >> + >> + ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, >> + VFIO_REGION_SUBTYPE_MIGRATION, &info); >> + if (ret) { >> + goto add_blocker; >> + } >> + >> + ret = vfio_migration_init(vbasedev, info); >> + if (ret) { >> + goto add_blocker; >> + } >> + >> + g_free(info); >> + trace_vfio_migration_probe(vbasedev->name, info->index); >> + return 0; >> + >> +add_blocker: >> + error_setg(&vbasedev->migration_blocker, >> + "VFIO device doesn't support migration"); >> + g_free(info); >> + >> + ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); >> + if (local_err) { >> + error_propagate(errp, local_err); >> + error_free(vbasedev->migration_blocker); >> + vbasedev->migration_blocker = NULL; >> + } >> + return ret; >> +} >> + >> +void vfio_migration_finalize(VFIODevice *vbasedev) >> +{ >> + if (vbasedev->migration_blocker) { >> + migrate_del_blocker(vbasedev->migration_blocker); >> + error_free(vbasedev->migration_blocker); >> + vbasedev->migration_blocker = NULL; >> + } >> + >> + vfio_migration_region_exit(vbasedev); >> + g_free(vbasedev->migration); >> +} >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index a0c7b49a2ebc..9ced5ec6277c 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -145,3 +145,6 @@ vfio_display_edid_link_up(void) "" >> vfio_display_edid_link_down(void) "" >> vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" >> vfio_display_edid_write_error(void) "" >> + >> +# migration.c >> +vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index ba6169cd926e..8275c4c68f45 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -57,6 +57,10 @@ typedef struct VFIORegion { >> uint8_t nr; /* cache the region number for debug */ >> } VFIORegion; >> >> +typedef struct VFIOMigration { >> + VFIORegion region; >> +} VFIOMigration; >> + >> typedef struct VFIOAddressSpace { >> AddressSpace *as; >> QLIST_HEAD(, VFIOContainer) containers; >> @@ -113,6 +117,8 @@ typedef struct VFIODevice { >> unsigned int num_irqs; >> unsigned int num_regions; >> unsigned int flags; >> + VFIOMigration *migration; >> + Error *migration_blocker; >> } VFIODevice; >> >> struct VFIODeviceOps { >> @@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container, >> int vfio_spapr_remove_window(VFIOContainer *container, >> hwaddr offset_within_address_space); >> >> +int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); >> +void vfio_migration_finalize(VFIODevice *vbasedev); >> + >> #endif /* HW_VFIO_VFIO_COMMON_H */ >
On 10/22/2020 10:05 PM, Alex Williamson wrote: > On Thu, 22 Oct 2020 16:41:55 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> VM state change handler is called on change in VM's state. Based on >> VM state, VFIO device state should be changed. >> Added read/write helper functions for migration region. >> Added function to set device_state. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> hw/vfio/migration.c | 158 ++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 2 + >> include/hw/vfio/vfio-common.h | 4 ++ >> 3 files changed, 164 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 5f74a3ad1d72..34f39c7e2e28 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -10,6 +10,7 @@ >> #include "qemu/osdep.h" >> #include <linux/vfio.h> >> >> +#include "sysemu/runstate.h" >> #include "hw/vfio/vfio-common.h" >> #include "cpu.h" >> #include "migration/migration.h" >> @@ -22,6 +23,157 @@ >> #include "exec/ram_addr.h" >> #include "pci.h" >> #include "trace.h" >> +#include "hw/hw.h" >> + >> +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, >> + off_t off, bool iswrite) >> +{ >> + int ret; >> + >> + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : >> + pread(vbasedev->fd, val, count, off); >> + if (ret < count) { >> + error_report("vfio_mig_%s %d byte %s: failed at offset 0x%lx, err: %s", >> + iswrite ? "write" : "read", count, >> + vbasedev->name, off, strerror(errno)); >> + return (ret < 0) ? ret : -EINVAL; >> + } >> + return 0; >> +} >> + >> +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, >> + off_t off, bool iswrite) >> +{ >> + int ret, done = 0; >> + __u8 *tbuf = buf; >> + >> + while (count) { >> + int bytes = 0; >> + >> + if (count >= 8 && !(off % 8)) { >> + bytes = 8; >> + } else if (count >= 4 && !(off % 4)) { >> + bytes = 4; >> + } else if (count >= 2 && !(off % 2)) { >> + bytes = 2; >> + } else { >> + bytes = 1; >> + } >> + >> + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); >> + if (ret) { >> + return ret; >> + } >> + >> + count -= bytes; >> + done += bytes; >> + off += bytes; >> + tbuf += bytes; >> + } >> + return done; >> +} >> + >> +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) >> +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) >> + >> +#define VFIO_MIG_STRUCT_OFFSET(f) \ >> + offsetof(struct vfio_device_migration_info, f) >> +/* >> + * Change the device_state register for device @vbasedev. Bits set in @mask >> + * are preserved, bits set in @value are set, and bits not set in either @mask >> + * or @value are cleared in device_state. If the register cannot be accessed, >> + * the resulting state would be invalid, or the device enters an error state, >> + * an error is returned. >> + */ >> + >> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, >> + uint32_t value) >> +{ >> + VFIOMigration *migration = vbasedev->migration; >> + VFIORegion *region = &migration->region; >> + off_t dev_state_off = region->fd_offset + >> + VFIO_MIG_STRUCT_OFFSET(device_state); >> + uint32_t device_state; >> + int ret; >> + >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + device_state = (device_state & mask) | value; >> + >> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { >> + return -EINVAL; >> + } >> + >> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + if (ret < 0) { >> + int rret; >> + >> + rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), >> + dev_state_off); >> + >> + if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { >> + hw_error("%s: Device in error state 0x%x", vbasedev->name, >> + device_state); >> + return rret ? rret : -EIO; >> + } >> + return ret; >> + } >> + >> + migration->device_state = device_state; >> + trace_vfio_migration_set_state(vbasedev->name, device_state); >> + return 0; >> +} >> + >> +static void vfio_vmstate_change(void *opaque, int running, RunState state) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + uint32_t value, mask; >> + int ret; >> + >> + if ((vbasedev->migration->vm_running == running)) { >> + return; >> + } >> + >> + if (running) { >> + /* >> + * Here device state can have one of _SAVING, _RESUMING or _STOP bit. >> + * Transition from _SAVING to _RUNNING can happen if there is migration >> + * failure, in that case clear _SAVING bit. >> + * Transition from _RESUMING to _RUNNING occurs during resuming >> + * phase, in that case clear _RESUMING bit. >> + * In both the above cases, set _RUNNING bit. >> + */ >> + mask = ~VFIO_DEVICE_STATE_MASK; >> + value = VFIO_DEVICE_STATE_RUNNING; >> + } else { >> + /* >> + * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset >> + * _RUNNING bit >> + */ >> + mask = ~VFIO_DEVICE_STATE_RUNNING; >> + value = 0; >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, mask, value); >> + if (ret) { >> + /* >> + * Migration should be aborted in this case, but vm_state_notify() >> + * currently does not support reporting failures. >> + */ >> + error_report("%s: Failed to set device state 0x%x", vbasedev->name, >> + (migration->device_state & mask) | value); >> + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); >> + } >> + vbasedev->migration->vm_running = running; >> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), >> + (migration->device_state & mask) | value); >> +} >> >> static void vfio_migration_region_exit(VFIODevice *vbasedev) >> { >> @@ -71,6 +223,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, >> } >> >> vbasedev->migration = migration; >> + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, >> + vbasedev); >> return 0; >> >> err: >> @@ -118,6 +272,10 @@ add_blocker: >> >> void vfio_migration_finalize(VFIODevice *vbasedev) >> { >> + if (vbasedev->migration->vm_state) { >> + qemu_del_vm_change_state_handler(vbasedev->migration->vm_state); >> + } > > > This looks like a segfault, vfio_migration_teardown() is eventually > called unconditionally. The next patch modifies this function further, > but never checks that vbasedev->migration is non-NULL. Thanks, > Yup. Updated patch 4, 5 and 6. So now vfio_migration_finalize() looks like: void vfio_migration_finalize(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; if (migration) { if (migration->migration_state.notify) { remove_migration_state_change_notifier(&migration->migration_state); } if (migration->vm_state) { qemu_del_vm_change_state_handler(migration->vm_state); } vfio_migration_region_exit(vbasedev); g_free(vbasedev->migration); vbasedev->migration = NULL; } if (vbasedev->migration_blocker) { migrate_del_blocker(vbasedev->migration_blocker); error_free(vbasedev->migration_blocker); vbasedev->migration_blocker = NULL; } } Thanks, Kirti > Alex > > >> + >> if (vbasedev->migration_blocker) { >> migrate_del_blocker(vbasedev->migration_blocker); >> error_free(vbasedev->migration_blocker); >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 9ced5ec6277c..41de81f12f60 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -148,3 +148,5 @@ vfio_display_edid_write_error(void) "" >> >> # migration.c >> vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" >> +vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" >> +vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 8275c4c68f45..9a571f1fb552 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -29,6 +29,7 @@ >> #ifdef CONFIG_LINUX >> #include <linux/vfio.h> >> #endif >> +#include "sysemu/sysemu.h" >> >> #define VFIO_MSG_PREFIX "vfio %s: " >> >> @@ -58,7 +59,10 @@ typedef struct VFIORegion { >> } VFIORegion; >> >> typedef struct VFIOMigration { >> + VMChangeStateEntry *vm_state; >> VFIORegion region; >> + uint32_t device_state; >> + int vm_running; >> } VFIOMigration; >> >> typedef struct VFIOAddressSpace { >
On Thu, 22 Oct 2020 23:11:39 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 10/22/2020 10:05 PM, Alex Williamson wrote: > > On Thu, 22 Oct 2020 16:41:55 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> VM state change handler is called on change in VM's state. Based on > >> VM state, VFIO device state should be changed. > >> Added read/write helper functions for migration region. > >> Added function to set device_state. > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >> --- > >> hw/vfio/migration.c | 158 ++++++++++++++++++++++++++++++++++++++++++ > >> hw/vfio/trace-events | 2 + > >> include/hw/vfio/vfio-common.h | 4 ++ > >> 3 files changed, 164 insertions(+) > >> > >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > >> index 5f74a3ad1d72..34f39c7e2e28 100644 > >> --- a/hw/vfio/migration.c > >> +++ b/hw/vfio/migration.c > >> @@ -10,6 +10,7 @@ > >> #include "qemu/osdep.h" > >> #include <linux/vfio.h> > >> > >> +#include "sysemu/runstate.h" > >> #include "hw/vfio/vfio-common.h" > >> #include "cpu.h" > >> #include "migration/migration.h" > >> @@ -22,6 +23,157 @@ > >> #include "exec/ram_addr.h" > >> #include "pci.h" > >> #include "trace.h" > >> +#include "hw/hw.h" > >> + > >> +static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret; > >> + > >> + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : > >> + pread(vbasedev->fd, val, count, off); > >> + if (ret < count) { > >> + error_report("vfio_mig_%s %d byte %s: failed at offset 0x%lx, err: %s", > >> + iswrite ? "write" : "read", count, > >> + vbasedev->name, off, strerror(errno)); > >> + return (ret < 0) ? ret : -EINVAL; > >> + } > >> + return 0; > >> +} > >> + > >> +static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, > >> + off_t off, bool iswrite) > >> +{ > >> + int ret, done = 0; > >> + __u8 *tbuf = buf; > >> + > >> + while (count) { > >> + int bytes = 0; > >> + > >> + if (count >= 8 && !(off % 8)) { > >> + bytes = 8; > >> + } else if (count >= 4 && !(off % 4)) { > >> + bytes = 4; > >> + } else if (count >= 2 && !(off % 2)) { > >> + bytes = 2; > >> + } else { > >> + bytes = 1; > >> + } > >> + > >> + ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); > >> + if (ret) { > >> + return ret; > >> + } > >> + > >> + count -= bytes; > >> + done += bytes; > >> + off += bytes; > >> + tbuf += bytes; > >> + } > >> + return done; > >> +} > >> + > >> +#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) > >> +#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) > >> + > >> +#define VFIO_MIG_STRUCT_OFFSET(f) \ > >> + offsetof(struct vfio_device_migration_info, f) > >> +/* > >> + * Change the device_state register for device @vbasedev. Bits set in @mask > >> + * are preserved, bits set in @value are set, and bits not set in either @mask > >> + * or @value are cleared in device_state. If the register cannot be accessed, > >> + * the resulting state would be invalid, or the device enters an error state, > >> + * an error is returned. > >> + */ > >> + > >> +static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > >> + uint32_t value) > >> +{ > >> + VFIOMigration *migration = vbasedev->migration; > >> + VFIORegion *region = &migration->region; > >> + off_t dev_state_off = region->fd_offset + > >> + VFIO_MIG_STRUCT_OFFSET(device_state); > >> + uint32_t device_state; > >> + int ret; > >> + > >> + ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + return ret; > >> + } > >> + > >> + device_state = (device_state & mask) | value; > >> + > >> + if (!VFIO_DEVICE_STATE_VALID(device_state)) { > >> + return -EINVAL; > >> + } > >> + > >> + ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + if (ret < 0) { > >> + int rret; > >> + > >> + rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), > >> + dev_state_off); > >> + > >> + if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { > >> + hw_error("%s: Device in error state 0x%x", vbasedev->name, > >> + device_state); > >> + return rret ? rret : -EIO; > >> + } > >> + return ret; > >> + } > >> + > >> + migration->device_state = device_state; > >> + trace_vfio_migration_set_state(vbasedev->name, device_state); > >> + return 0; > >> +} > >> + > >> +static void vfio_vmstate_change(void *opaque, int running, RunState state) > >> +{ > >> + VFIODevice *vbasedev = opaque; > >> + VFIOMigration *migration = vbasedev->migration; > >> + uint32_t value, mask; > >> + int ret; > >> + > >> + if ((vbasedev->migration->vm_running == running)) { > >> + return; > >> + } > >> + > >> + if (running) { > >> + /* > >> + * Here device state can have one of _SAVING, _RESUMING or _STOP bit. > >> + * Transition from _SAVING to _RUNNING can happen if there is migration > >> + * failure, in that case clear _SAVING bit. > >> + * Transition from _RESUMING to _RUNNING occurs during resuming > >> + * phase, in that case clear _RESUMING bit. > >> + * In both the above cases, set _RUNNING bit. > >> + */ > >> + mask = ~VFIO_DEVICE_STATE_MASK; > >> + value = VFIO_DEVICE_STATE_RUNNING; > >> + } else { > >> + /* > >> + * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset > >> + * _RUNNING bit > >> + */ > >> + mask = ~VFIO_DEVICE_STATE_RUNNING; > >> + value = 0; > >> + } > >> + > >> + ret = vfio_migration_set_state(vbasedev, mask, value); > >> + if (ret) { > >> + /* > >> + * Migration should be aborted in this case, but vm_state_notify() > >> + * currently does not support reporting failures. > >> + */ > >> + error_report("%s: Failed to set device state 0x%x", vbasedev->name, > >> + (migration->device_state & mask) | value); > >> + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); > >> + } > >> + vbasedev->migration->vm_running = running; > >> + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), > >> + (migration->device_state & mask) | value); > >> +} > >> > >> static void vfio_migration_region_exit(VFIODevice *vbasedev) > >> { > >> @@ -71,6 +223,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, > >> } > >> > >> vbasedev->migration = migration; > >> + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > >> + vbasedev); > >> return 0; > >> > >> err: > >> @@ -118,6 +272,10 @@ add_blocker: > >> > >> void vfio_migration_finalize(VFIODevice *vbasedev) > >> { > >> + if (vbasedev->migration->vm_state) { > >> + qemu_del_vm_change_state_handler(vbasedev->migration->vm_state); > >> + } > > > > > > This looks like a segfault, vfio_migration_teardown() is eventually > > called unconditionally. The next patch modifies this function further, > > but never checks that vbasedev->migration is non-NULL. Thanks, > > > > Yup. > Updated patch 4, 5 and 6. So now vfio_migration_finalize() looks like: > > void vfio_migration_finalize(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > > if (migration) { > if (migration->migration_state.notify) { > > remove_migration_state_change_notifier(&migration->migration_state); > } > > if (migration->vm_state) { > qemu_del_vm_change_state_handler(migration->vm_state); > } We should be able to remove these based only on migration being non-NULL, their setup cannot fail once we've setup the migration pointer. Thanks, Alex > > vfio_migration_region_exit(vbasedev); > g_free(vbasedev->migration); > vbasedev->migration = NULL; > } > > if (vbasedev->migration_blocker) { > migrate_del_blocker(vbasedev->migration_blocker); > error_free(vbasedev->migration_blocker); > vbasedev->migration_blocker = NULL; > } > } > > Thanks, > Kirti > > > Alex > > > > > >> + > >> if (vbasedev->migration_blocker) { > >> migrate_del_blocker(vbasedev->migration_blocker); > >> error_free(vbasedev->migration_blocker); > >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > >> index 9ced5ec6277c..41de81f12f60 100644 > >> --- a/hw/vfio/trace-events > >> +++ b/hw/vfio/trace-events > >> @@ -148,3 +148,5 @@ vfio_display_edid_write_error(void) "" > >> > >> # migration.c > >> vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > >> +vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" > >> +vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 8275c4c68f45..9a571f1fb552 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -29,6 +29,7 @@ > >> #ifdef CONFIG_LINUX > >> #include <linux/vfio.h> > >> #endif > >> +#include "sysemu/sysemu.h" > >> > >> #define VFIO_MSG_PREFIX "vfio %s: " > >> > >> @@ -58,7 +59,10 @@ typedef struct VFIORegion { > >> } VFIORegion; > >> > >> typedef struct VFIOMigration { > >> + VMChangeStateEntry *vm_state; > >> VFIORegion region; > >> + uint32_t device_state; > >> + int vm_running; > >> } VFIOMigration; > >> > >> typedef struct VFIOAddressSpace { > > >
On Thu, 22 Oct 2020 16:41:50 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Hi, > > This Patch set adds migration support for VFIO devices in QEMU. We're cutting it pretty close for the 5.2 soft freeze, but clearly we've seen this series a few times. The key points for me are that I no longer see anything that should adversely affect non-migration support (aside from the easily fixed bugs noted) and I think our config space vmstate is sane now, so we hopefully won't need to throw it away and start over (experts, please verify). I think there's still a respin needed, but I hope that others can squeeze in a review, find and verify issues they've noted previously, re-confirm their reviews and acks, and maybe we can get this in by Tuesday. If migration is broken, we can fix that as we go, but the foundation looks reasonable enough to me. Thanks, Alex
On 10/23/2020 1:20 AM, Alex Williamson wrote: > On Thu, 22 Oct 2020 16:41:59 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Sequence during _RESUMING device state: >> While data for this device is available, repeat below steps: >> a. read data_offset from where user application should write data. >> b. write data of data_size to migration region from data_offset. >> c. write data_size which indicates vendor driver that data is written in >> staging buffer. >> >> For user, data is opaque. User should write data in the same order as >> received. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> hw/vfio/migration.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 3 + >> 2 files changed, 195 insertions(+) >> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 5506cef15d88..46d05d230e2a 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) >> return ret; >> } >> >> +static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, >> + uint64_t data_size) >> +{ >> + VFIORegion *region = &vbasedev->migration->region; >> + uint64_t data_offset = 0, size, report_size; >> + int ret; >> + >> + do { >> + ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), >> + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + if (data_offset + data_size > region->size) { >> + /* >> + * If data_size is greater than the data section of migration region >> + * then iterate the write buffer operation. This case can occur if >> + * size of migration region at destination is smaller than size of >> + * migration region at source. >> + */ >> + report_size = size = region->size - data_offset; >> + data_size -= size; >> + } else { >> + report_size = size = data_size; >> + data_size = 0; >> + } >> + >> + trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); >> + >> + while (size) { >> + void *buf; >> + uint64_t sec_size; >> + bool buf_alloc = false; >> + >> + buf = get_data_section_size(region, data_offset, size, &sec_size); >> + >> + if (!buf) { >> + buf = g_try_malloc(sec_size); >> + if (!buf) { >> + error_report("%s: Error allocating buffer ", __func__); >> + return -ENOMEM; >> + } >> + buf_alloc = true; >> + } >> + >> + qemu_get_buffer(f, buf, sec_size); >> + >> + if (buf_alloc) { >> + ret = vfio_mig_write(vbasedev, buf, sec_size, >> + region->fd_offset + data_offset); >> + g_free(buf); >> + >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + size -= sec_size; >> + data_offset += sec_size; >> + } >> + >> + ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), >> + region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); >> + if (ret < 0) { >> + return ret; >> + } >> + } while (data_size); >> + >> + return 0; >> +} >> + >> static int vfio_update_pending(VFIODevice *vbasedev) >> { >> VFIOMigration *migration = vbasedev->migration; >> @@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) >> return qemu_file_get_error(f); >> } >> >> +static int vfio_load_device_config_state(QEMUFile *f, void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + uint64_t data; >> + >> + if (vbasedev->ops && vbasedev->ops->vfio_load_config) { >> + int ret; >> + >> + ret = vbasedev->ops->vfio_load_config(vbasedev, f); >> + if (ret) { >> + error_report("%s: Failed to load device config space", >> + vbasedev->name); >> + return ret; >> + } >> + } >> + >> + data = qemu_get_be64(f); >> + if (data != VFIO_MIG_FLAG_END_OF_STATE) { >> + error_report("%s: Failed loading device config space, " >> + "end flag incorrect 0x%"PRIx64, vbasedev->name, data); >> + return -EINVAL; >> + } >> + >> + trace_vfio_load_device_config_state(vbasedev->name); >> + return qemu_file_get_error(f); >> +} >> + >> /* ---------------------------------------------------------------------- */ >> >> static int vfio_save_setup(QEMUFile *f, void *opaque) >> @@ -477,12 +575,106 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) >> return ret; >> } >> >> +static int vfio_load_setup(QEMUFile *f, void *opaque) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + int ret = 0; >> + >> + if (migration->region.mmaps) { >> + ret = vfio_region_mmap(&migration->region); > > > Checking, are we in the right thread context not to require locking the > iothread as we did in vfio_save_setup()? > > iothread lock is held when calling load_setup. >> + if (ret) { >> + error_report("%s: Failed to mmap VFIO migration region %d: %s", >> + vbasedev->name, migration->region.nr, >> + strerror(-ret)); >> + error_report("%s: Falling back to slow path", vbasedev->name); >> + } >> + } >> + >> + ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, >> + VFIO_DEVICE_STATE_RESUMING); >> + if (ret) { >> + error_report("%s: Failed to set state RESUMING", vbasedev->name); >> + if (migration->region.mmaps) { >> + vfio_region_unmap(&migration->region); >> + } >> + } >> + return ret; >> +} >> + >> +static int vfio_load_cleanup(void *opaque) >> +{ >> + vfio_save_cleanup(opaque); > > > The tracing in there is going to be rather confusing. Thanks, > Updating patch 7 and this to separate tracing. Thanks, Kirti > Alex > > >> + return 0; >> +} >> + >> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) >> +{ >> + VFIODevice *vbasedev = opaque; >> + int ret = 0; >> + uint64_t data; >> + >> + data = qemu_get_be64(f); >> + while (data != VFIO_MIG_FLAG_END_OF_STATE) { >> + >> + trace_vfio_load_state(vbasedev->name, data); >> + >> + switch (data) { >> + case VFIO_MIG_FLAG_DEV_CONFIG_STATE: >> + { >> + ret = vfio_load_device_config_state(f, opaque); >> + if (ret) { >> + return ret; >> + } >> + break; >> + } >> + case VFIO_MIG_FLAG_DEV_SETUP_STATE: >> + { >> + data = qemu_get_be64(f); >> + if (data == VFIO_MIG_FLAG_END_OF_STATE) { >> + return ret; >> + } else { >> + error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, >> + vbasedev->name, data); >> + return -EINVAL; >> + } >> + break; >> + } >> + case VFIO_MIG_FLAG_DEV_DATA_STATE: >> + { >> + uint64_t data_size = qemu_get_be64(f); >> + >> + if (data_size) { >> + ret = vfio_load_buffer(f, vbasedev, data_size); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + break; >> + } >> + default: >> + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); >> + return -EINVAL; >> + } >> + >> + data = qemu_get_be64(f); >> + ret = qemu_file_get_error(f); >> + if (ret) { >> + return ret; >> + } >> + } >> + return ret; >> +} >> + >> static SaveVMHandlers savevm_vfio_handlers = { >> .save_setup = vfio_save_setup, >> .save_cleanup = vfio_save_cleanup, >> .save_live_pending = vfio_save_pending, >> .save_live_iterate = vfio_save_iterate, >> .save_live_complete_precopy = vfio_save_complete_precopy, >> + .load_setup = vfio_load_setup, >> + .load_cleanup = vfio_load_cleanup, >> + .load_state = vfio_load_state, >> }; >> >> /* ---------------------------------------------------------------------- */ >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 9f5712dab1ea..4804cc266d44 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -159,3 +159,6 @@ vfio_save_device_config_state(const char *name) " (%s)" >> vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 >> vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" >> vfio_save_complete_precopy(const char *name) " (%s)" >> +vfio_load_device_config_state(const char *name) " (%s)" >> +vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 >> +vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 >
On 10/23/2020 3:48 AM, Alex Williamson wrote: > On Thu, 22 Oct 2020 16:42:07 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Added amount of bytes transferred to the VM at destination by all VFIO >> devices >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> hw/vfio/common.c | 20 ++++++++++++++++++++ >> hw/vfio/migration.c | 10 ++++++++++ >> include/qemu/vfio-helpers.h | 3 +++ >> migration/migration.c | 14 ++++++++++++++ >> monitor/hmp-cmds.c | 6 ++++++ >> qapi/migration.json | 17 +++++++++++++++++ >> 6 files changed, 70 insertions(+) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 9c879e5c0f62..8d0758eda9fa 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -39,6 +39,7 @@ >> #include "trace.h" >> #include "qapi/error.h" >> #include "migration/migration.h" >> +#include "qemu/vfio-helpers.h" >> >> VFIOGroupList vfio_group_list = >> QLIST_HEAD_INITIALIZER(vfio_group_list); >> @@ -292,6 +293,25 @@ const MemoryRegionOps vfio_region_ops = { >> * Device state interfaces >> */ >> >> +bool vfio_mig_active(void) >> +{ >> + VFIOGroup *group; >> + VFIODevice *vbasedev; >> + >> + if (QLIST_EMPTY(&vfio_group_list)) { >> + return false; >> + } >> + >> + QLIST_FOREACH(group, &vfio_group_list, next) { >> + QLIST_FOREACH(vbasedev, &group->device_list, next) { >> + if (vbasedev->migration_blocker) { >> + return false; >> + } >> + } >> + } >> + return true; >> +} >> + >> static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) >> { >> VFIOGroup *group; >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index 77ee60a43ea5..b23e21c6de2b 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c >> @@ -28,6 +28,7 @@ >> #include "pci.h" >> #include "trace.h" >> #include "hw/hw.h" >> +#include "qemu/vfio-helpers.h" >> >> /* >> * Flags to be used as unique delimiters for VFIO devices in the migration >> @@ -45,6 +46,8 @@ >> #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) >> #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) >> >> +static int64_t bytes_transferred; >> + >> static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, >> off_t off, bool iswrite) >> { >> @@ -255,6 +258,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) >> *size = data_size; >> } >> >> + bytes_transferred += data_size; >> return ret; >> } >> >> @@ -776,6 +780,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) >> case MIGRATION_STATUS_CANCELLING: >> case MIGRATION_STATUS_CANCELLED: >> case MIGRATION_STATUS_FAILED: >> + bytes_transferred = 0; >> ret = vfio_migration_set_state(vbasedev, >> ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), >> VFIO_DEVICE_STATE_RUNNING); >> @@ -862,6 +867,11 @@ err: >> >> /* ---------------------------------------------------------------------- */ >> >> +int64_t vfio_mig_bytes_transferred(void) >> +{ >> + return bytes_transferred; >> +} >> + >> int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) >> { >> VFIOContainer *container = vbasedev->group->container; >> diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h >> index 4491c8e1a6e9..7f7a46e6ef2d 100644 >> --- a/include/qemu/vfio-helpers.h >> +++ b/include/qemu/vfio-helpers.h >> @@ -29,4 +29,7 @@ void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar, >> int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e, >> int irq_type, Error **errp); >> >> +bool vfio_mig_active(void); >> +int64_t vfio_mig_bytes_transferred(void); >> + >> #endif > > > I don't think vfio-helpers is the right place for this, this header is > specifically for using util/vfio-helpers.c. Would > include/hw/vfio/vfio-common.h work? > > Yes, works with CONFIG_VFIO check. Changing it. >> diff --git a/migration/migration.c b/migration/migration.c >> index 0575ecb37953..8b2865d25ef4 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -56,6 +56,7 @@ >> #include "net/announce.h" >> #include "qemu/queue.h" >> #include "multifd.h" >> +#include "qemu/vfio-helpers.h" >> >> #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ >> >> @@ -1002,6 +1003,17 @@ static void populate_disk_info(MigrationInfo *info) >> } >> } >> >> +static void populate_vfio_info(MigrationInfo *info) >> +{ >> +#ifdef CONFIG_LINUX > > Use CONFIG_VFIO? I get a build failure on qemu-system-avr > > /usr/bin/ld: /tmp/tmp.3QbqxgbENl/build/../migration/migration.c:1012: > undefined reference to `vfio_mig_bytes_transferred'. Thanks, > Ok Changing it. > Alex > >> + if (vfio_mig_active()) { >> + info->has_vfio = true; >> + info->vfio = g_malloc0(sizeof(*info->vfio)); >> + info->vfio->transferred = vfio_mig_bytes_transferred(); >> + } >> +#endif >> +} >> + >> static void fill_source_migration_info(MigrationInfo *info) >> { >> MigrationState *s = migrate_get_current(); >> @@ -1026,6 +1038,7 @@ static void fill_source_migration_info(MigrationInfo *info) >> populate_time_info(info, s); >> populate_ram_info(info, s); >> populate_disk_info(info); >> + populate_vfio_info(info); >> break; >> case MIGRATION_STATUS_COLO: >> info->has_status = true; >> @@ -1034,6 +1047,7 @@ static void fill_source_migration_info(MigrationInfo *info) >> case MIGRATION_STATUS_COMPLETED: >> populate_time_info(info, s); >> populate_ram_info(info, s); >> + populate_vfio_info(info); >> break; >> case MIGRATION_STATUS_FAILED: >> info->has_status = true; >> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c >> index 9789f4277f50..56e9bad33d94 100644 >> --- a/monitor/hmp-cmds.c >> +++ b/monitor/hmp-cmds.c >> @@ -357,6 +357,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) >> } >> monitor_printf(mon, "]\n"); >> } >> + >> + if (info->has_vfio) { >> + monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n", >> + info->vfio->transferred >> 10); >> + } >> + >> qapi_free_MigrationInfo(info); >> } >> >> diff --git a/qapi/migration.json b/qapi/migration.json >> index a5da513c9e05..3c7582052725 100644 >> --- a/qapi/migration.json >> +++ b/qapi/migration.json >> @@ -147,6 +147,18 @@ >> 'active', 'postcopy-active', 'postcopy-paused', >> 'postcopy-recover', 'completed', 'failed', 'colo', >> 'pre-switchover', 'device', 'wait-unplug' ] } >> +## >> +# @VfioStats: >> +# >> +# Detailed VFIO devices migration statistics >> +# >> +# @transferred: amount of bytes transferred to the target VM by VFIO devices >> +# >> +# Since: 5.2 >> +# >> +## >> +{ 'struct': 'VfioStats', >> + 'data': {'transferred': 'int' } } >> >> ## >> # @MigrationInfo: >> @@ -208,11 +220,16 @@ >> # >> # @socket-address: Only used for tcp, to know what the real port is (Since 4.0) >> # >> +# @vfio: @VfioStats containing detailed VFIO devices migration statistics, >> +# only returned if VFIO device is present, migration is supported by all >> +# VFIO devices and status is 'active' or 'completed' (since 5.2) >> +# >> # Since: 0.14.0 >> ## >> { 'struct': 'MigrationInfo', >> 'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats', >> '*disk': 'MigrationStats', >> + '*vfio': 'VfioStats', >> '*xbzrle-cache': 'XBZRLECacheStats', >> '*total-time': 'int', >> '*expected-downtime': 'int', >
On Thu, 22 Oct 2020 16:41:54 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Whether the VFIO device supports migration or not is decided based of > migration region query. If migration region query is successful and migration > region initialization is successful then migration is supported else > migration is blocked. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > hw/vfio/meson.build | 1 + > hw/vfio/migration.c | 129 ++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 3 + > include/hw/vfio/vfio-common.h | 9 +++ > 4 files changed, 142 insertions(+) > create mode 100644 hw/vfio/migration.c (...) > +static int vfio_migration_init(VFIODevice *vbasedev, > + struct vfio_region_info *info) > +{ > + int ret; > + Object *obj; > + VFIOMigration *migration; > + > + if (!vbasedev->ops->vfio_get_object) { > + return -EINVAL; > + } > + > + obj = vbasedev->ops->vfio_get_object(vbasedev); > + if (!obj) { > + return -EINVAL; > + } > + > + migration = g_new0(VFIOMigration, 1); > + > + ret = vfio_region_setup(obj, vbasedev, &migration->region, > + info->index, "migration"); > + if (ret) { > + error_report("%s: Failed to setup VFIO migration region %d: %s", > + vbasedev->name, info->index, strerror(-ret)); > + goto err; > + } > + > + if (!migration->region.size) { > + error_report("%s: Invalid zero-sized of VFIO migration region %d", s/of // > + vbasedev->name, info->index); > + ret = -EINVAL; > + goto err; > + } > + > + vbasedev->migration = migration; > + return 0; > + > +err: > + vfio_migration_region_exit(vbasedev); > + g_free(migration); > + return ret; > +} (...)