Message ID | 20200820134547.2355743-4-clg@kaod.org |
---|---|
State | New |
Headers | show |
Series | spapr/xive: Allocate vCPU IPIs from the vCPU contexts | expand |
On 8/20/20 3:45 PM, Cédric Le Goater wrote: > The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the > vCPU connects to the KVM device and not when all the sources are reset > in kvmppc_xive_source_reset() This patch is introducing a regression when vsmt is in used. https://bugs.launchpad.net/qemu/+bug/1900241 when the OS boots, H_INT_SET_SOURCE_CONFIG fails with EINVAL, which should mean that the IPI is not created at the host/KVM level. > This requires extra care for hotplug vCPUs and VM restore. > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > hw/intc/spapr_xive_kvm.c | 47 +++++++++++++++++++++++++++++++++++----- > 1 file changed, 42 insertions(+), 5 deletions(-) > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 4ea687c93ecd..f838c208b559 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -146,6 +146,15 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) > return s.ret; > } > > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) > +{ > + unsigned long ipi = kvm_arch_vcpu_id(cs); ( I am wondering if this is the correct id to use ? ) > + uint64_t state = 0; > + > + return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, > + &state, true, errp); > +} > + > int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > { > ERRP_GUARD(); > @@ -175,6 +184,12 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) > return ret; > } > > + /* Create/reset the vCPU IPI */ > + ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); > + if (ret < 0) { > + return ret; > + } > + > kvm_cpu_enable(tctx->cs); > return 0; > } > @@ -260,6 +275,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) > > assert(xive->fd != -1); > > + /* > + * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() > + * and not with all sources in kvmppc_xive_source_reset() > + */ > + assert(srcno >= SPAPR_XIRQ_BASE); > + > if (xive_source_irq_is_lsi(xsrc, srcno)) { > state |= KVM_XIVE_LEVEL_SENSITIVE; > if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { > @@ -271,12 +292,28 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) > true, errp); > } > > +/* > + * To be valid, a source must have been claimed by the machine (valid > + * entry in the EAS table) and if it is a vCPU IPI, the vCPU should > + * have been enabled, which means the IPI has been allocated in > + * kvmppc_xive_cpu_connect(). > + */ > +static bool xive_source_is_valid(SpaprXive *xive, int i) > +{ > + return xive_eas_is_valid(&xive->eat[i]) && > + (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); > +} > + > static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > { > SpaprXive *xive = SPAPR_XIVE(xsrc->xive); > int i; > > - for (i = 0; i < xsrc->nr_irqs; i++) { > + /* > + * Skip the vCPU IPIs. These are created/reset when the vCPUs are > + * connected in kvmppc_xive_cpu_connect() > + */ > + for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { This change skips the range [ 0x0 ... 0x1000 ] and relies on the presenter to create the vCPU IPIs at the KVM level. But spapr_irq_init() could have claimed more in : /* Enable the CPU IPIs */ for (i = 0; i < nr_servers; ++i) { SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(spapr->xive); if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, false, errp) < 0) { return; } } I think this is what is happening with vsmt. However, I don't know how to fix it :/ Thanks, C. > int ret; > > if (!xive_eas_is_valid(&xive->eat[i])) { > @@ -370,7 +407,7 @@ static void kvmppc_xive_source_get_state(XiveSource *xsrc) > for (i = 0; i < xsrc->nr_irqs; i++) { > uint8_t pq; > > - if (!xive_eas_is_valid(&xive->eat[i])) { > + if (!xive_source_is_valid(xive, i)) { > continue; > } > > @@ -553,7 +590,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running, > uint8_t pq; > uint8_t old_pq; > > - if (!xive_eas_is_valid(&xive->eat[i])) { > + if (!xive_source_is_valid(xive, i)) { > continue; > } > > @@ -581,7 +618,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running, > for (i = 0; i < xsrc->nr_irqs; i++) { > uint8_t pq; > > - if (!xive_eas_is_valid(&xive->eat[i])) { > + if (!xive_source_is_valid(xive, i)) { > continue; > } > > @@ -696,7 +733,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > > /* Restore the EAT */ > for (i = 0; i < xive->nr_irqs; i++) { > - if (!xive_eas_is_valid(&xive->eat[i])) { > + if (!xive_source_is_valid(xive, i)) { > continue; > } > >
On Thu, 5 Nov 2020 09:37:27 +0100 Cédric Le Goater <clg@kaod.org> wrote: > On 8/20/20 3:45 PM, Cédric Le Goater wrote: > > The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the > > vCPU connects to the KVM device and not when all the sources are reset > > in kvmppc_xive_source_reset() > > This patch is introducing a regression when vsmt is in used. > Well, it isn't exactly when vsmt is used, it is when vsmt is set to a different value than the one which is passed to -smp threads, otherwise you always get consecutive vcpu ids. > https://bugs.launchpad.net/qemu/+bug/1900241 > In this report we have threads=1, so depending on vsmt this gives the following vcpu ids: -M vsmt=1 -smp 8,cores=8,threads=1 kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0 kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=1 kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=2 kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=3 kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=4 kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=5 kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=6 kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=7 -M vsmt=2 -smp 8,cores=8,threads=1 kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0 kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=2 kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=4 kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=6 kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=8 kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=10 kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=12 kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=14 -M vsmt=4 -smp 8,cores=8,threads=1 kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0 kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=4 kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=8 kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=12 kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=16 kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=20 kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=24 kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=28 -M vsmt=8 -smp 8,cores=8,threads=1 kvmppc_xive_reset_ipi_on_cpu: cpu_index=0 vcpu_id=0 kvmppc_xive_reset_ipi_on_cpu: cpu_index=1 vcpu_id=8 kvmppc_xive_reset_ipi_on_cpu: cpu_index=2 vcpu_id=16 kvmppc_xive_reset_ipi_on_cpu: cpu_index=3 vcpu_id=24 kvmppc_xive_reset_ipi_on_cpu: cpu_index=4 vcpu_id=32 kvmppc_xive_reset_ipi_on_cpu: cpu_index=5 vcpu_id=40 kvmppc_xive_reset_ipi_on_cpu: cpu_index=6 vcpu_id=48 kvmppc_xive_reset_ipi_on_cpu: cpu_index=7 vcpu_id=56 > when the OS boots, H_INT_SET_SOURCE_CONFIG fails with EINVAL, which > should mean that the IPI is not created at the host/KVM level. > [...] > > +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) > > +{ > > + unsigned long ipi = kvm_arch_vcpu_id(cs); > > ( I am wondering if this is the correct id to use ? ) > Setting the ipi to the vcpu id seems to assume that the vcpu ids are consecutive, which is definitely not the case when vsmt != threads as explained above. Passing cs->cpu_index would provide consecutive ids but I'm not sure this is a correct fix. I gave a try : all the vCPUs get online in the guest as expected but something goes wrong when terminating QEMU: [ 5557.599881] WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm] [ 5557.599897] Modules linked in: xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun bridge stp llc nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache kvm_hv kvm i2c_dev sunrpc ext4 mbcache jbd2 xts vmx_crypto ofpart ipmi_powernv ipmi_devintf powernv_flash ipmi_msghandler mtd ibmpowernv opal_prd at24 uio_pdrv_genirq uio ip_tables xfs libcrc32c sd_mod sg ast i2c_algo_bit drm_vram_helper drm_ttm_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ahci libahci tg3 libata drm_panel_orientation_quirks dm_mirror dm_region_hash dm_log dm_mod [ 5557.600010] CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G W --------- - - 4.18.0-240.el8.ppc64le #1 [ 5557.600041] NIP: c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8 [ 5557.600060] REGS: c000001f69617840 TRAP: 0700 Tainted: G W --------- - - (4.18.0-240.el8.ppc64le) [ 5557.600089] MSR: 9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 44044282 XER: 00000000 [ 5557.600110] CFAR: c00000000044b160 IRQMASK: 0 [ 5557.600110] GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10 [ 5557.600110] GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff [ 5557.600110] GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001 [ 5557.600110] GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000 [ 5557.600110] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 5557.600110] GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90 [ 5557.600110] GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78 [ 5557.600110] GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011 [ 5557.600255] NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm] [ 5557.600274] LR [c00000000044b164] __do_fault+0x64/0x220 [ 5557.600298] Call Trace: [ 5557.600302] [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable) [ 5557.600320] [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220 [ 5557.600337] [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930 [ 5557.600355] [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0 [ 5557.600373] [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310 [ 5557.600393] [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0 [ 5557.600411] [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0 [ 5557.600429] [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38 [ 5557.600438] Instruction dump: [ 5557.600444] 40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac [ 5557.600455] 7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018 [ 5557.600485] ---[ end trace 66c6ff034c53f64f ]--- [ 5557.600509] xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 ! So it looks like something needs to be done in the XIVE KVM device anyway. [...] > > static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) > > { > > SpaprXive *xive = SPAPR_XIVE(xsrc->xive); > > int i; > > > > - for (i = 0; i < xsrc->nr_irqs; i++) { > > + /* > > + * Skip the vCPU IPIs. These are created/reset when the vCPUs are > > + * connected in kvmppc_xive_cpu_connect() > > + */ > > + for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { > > This change skips the range [ 0x0 ... 0x1000 ] and relies on the presenter > to create the vCPU IPIs at the KVM level. But spapr_irq_init() could have > claimed more in : > > /* Enable the CPU IPIs */ > for (i = 0; i < nr_servers; ++i) { > SpaprInterruptControllerClass *sicc > = SPAPR_INTC_GET_CLASS(spapr->xive); > > if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, > false, errp) < 0) { > return; > } > } > This doesn't reach the XIVE KVM device when running in dual mode because it doesn't exist yet. > I think this is what is happening with vsmt. However, I don't know how to > fix it :/ > > Thanks, > > C. >
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 4ea687c93ecd..f838c208b559 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -146,6 +146,15 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) return s.ret; } +static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) +{ + unsigned long ipi = kvm_arch_vcpu_id(cs); + uint64_t state = 0; + + return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, + &state, true, errp); +} + int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) { ERRP_GUARD(); @@ -175,6 +184,12 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) return ret; } + /* Create/reset the vCPU IPI */ + ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); + if (ret < 0) { + return ret; + } + kvm_cpu_enable(tctx->cs); return 0; } @@ -260,6 +275,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) assert(xive->fd != -1); + /* + * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() + * and not with all sources in kvmppc_xive_source_reset() + */ + assert(srcno >= SPAPR_XIRQ_BASE); + if (xive_source_irq_is_lsi(xsrc, srcno)) { state |= KVM_XIVE_LEVEL_SENSITIVE; if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { @@ -271,12 +292,28 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) true, errp); } +/* + * To be valid, a source must have been claimed by the machine (valid + * entry in the EAS table) and if it is a vCPU IPI, the vCPU should + * have been enabled, which means the IPI has been allocated in + * kvmppc_xive_cpu_connect(). + */ +static bool xive_source_is_valid(SpaprXive *xive, int i) +{ + return xive_eas_is_valid(&xive->eat[i]) && + (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); +} + static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) { SpaprXive *xive = SPAPR_XIVE(xsrc->xive); int i; - for (i = 0; i < xsrc->nr_irqs; i++) { + /* + * Skip the vCPU IPIs. These are created/reset when the vCPUs are + * connected in kvmppc_xive_cpu_connect() + */ + for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { int ret; if (!xive_eas_is_valid(&xive->eat[i])) { @@ -370,7 +407,7 @@ static void kvmppc_xive_source_get_state(XiveSource *xsrc) for (i = 0; i < xsrc->nr_irqs; i++) { uint8_t pq; - if (!xive_eas_is_valid(&xive->eat[i])) { + if (!xive_source_is_valid(xive, i)) { continue; } @@ -553,7 +590,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running, uint8_t pq; uint8_t old_pq; - if (!xive_eas_is_valid(&xive->eat[i])) { + if (!xive_source_is_valid(xive, i)) { continue; } @@ -581,7 +618,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running, for (i = 0; i < xsrc->nr_irqs; i++) { uint8_t pq; - if (!xive_eas_is_valid(&xive->eat[i])) { + if (!xive_source_is_valid(xive, i)) { continue; } @@ -696,7 +733,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) /* Restore the EAT */ for (i = 0; i < xive->nr_irqs; i++) { - if (!xive_eas_is_valid(&xive->eat[i])) { + if (!xive_source_is_valid(xive, i)) { continue; }
The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() when the vCPU connects to the KVM device and not when all the sources are reset in kvmppc_xive_source_reset() This requires extra care for hotplug vCPUs and VM restore. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- hw/intc/spapr_xive_kvm.c | 47 +++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-)