Message ID | 1397595918-30419-2-git-send-email-w1.huang@samsung.com |
---|---|
State | New |
Headers | show |
On 15/04/2014 22:05, Wei Huang wrote: > From: Jaeyong Yoo <jaeyong.yoo@samsung.com> > > This patch implements HVM context hypercalls to support ARM > guest save and restore. It saves the states of guest GIC, > arch timer, and CPU registers. > > Signed-off-by: Evgeny Fedotov <e.fedotov@samsung.com> > Signed-off-by: Wei Huang <w1.huang@samsung.com> > --- > xen/arch/arm/Makefile | 1 + > xen/arch/arm/hvm.c | 268 +++++++++++++++++++++++++++++++- > xen/arch/arm/save.c | 65 ++++++++ > xen/arch/arm/vgic.c | 146 +++++++++++++++++ > xen/arch/arm/vtimer.c | 71 +++++++++ > xen/arch/x86/domctl.c | 70 --------- > xen/common/Makefile | 2 +- > xen/common/domctl.c | 74 +++++++++ > xen/include/asm-arm/hvm/support.h | 29 ++++ > xen/include/public/arch-arm/hvm/save.h | 130 ++++++++++++++++ > 10 files changed, 784 insertions(+), 72 deletions(-) > create mode 100644 xen/arch/arm/save.c > create mode 100644 xen/include/asm-arm/hvm/support.h > > diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile > index 63e0460..d9a328c 100644 > --- a/xen/arch/arm/Makefile > +++ b/xen/arch/arm/Makefile > @@ -33,6 +33,7 @@ obj-y += hvm.o > obj-y += device.o > obj-y += decode.o > obj-y += processor.o > +obj-y += save.o > > #obj-bin-y += ....o > > diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c > index 471c4cd..18eb2bd 100644 > --- a/xen/arch/arm/hvm.c > +++ b/xen/arch/arm/hvm.c > @@ -4,6 +4,7 @@ > #include <xen/errno.h> > #include <xen/guest_access.h> > #include <xen/sched.h> > +#include <xen/hvm/save.h> > > #include <xsm/xsm.h> > > @@ -12,9 +13,9 @@ > #include <public/hvm/hvm_op.h> > > #include <asm/hypercall.h> > +#include <asm/gic.h> > > long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) > - > { > long rc = 0; > > @@ -65,3 +66,268 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) > > return rc; > } > + > +/* Save CPU related states into save/retore context */ > +static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + struct hvm_hw_cpu ctxt; > + struct vcpu_guest_core_regs c; > + struct vcpu *v; > + int ret = 0; > + > + /* Save the state of CPU */ > + for_each_vcpu( d, v ) > + { > + memset(&ctxt, 0, sizeof(ctxt)); > + > + ctxt.sctlr = v->arch.sctlr; > + ctxt.ttbr0 = v->arch.ttbr0; > + ctxt.ttbr1 = v->arch.ttbr1; > + ctxt.ttbcr = v->arch.ttbcr; > + > + ctxt.dacr = v->arch.dacr; > + ctxt.ifsr = v->arch.ifsr; > +#ifdef CONFIG_ARM_32 > + ctxt.ifar = v->arch.ifar; > + ctxt.dfar = v->arch.dfar; > + ctxt.dfsr = v->arch.dfsr; > +#else > + ctxt.far = v->arch.far; > + ctxt.esr = v->arch.esr; > +#endif > + > +#ifdef CONFIG_ARM_32 > + ctxt.mair0 = v->arch.mair0; > + ctxt.mair1 = v->arch.mair1; > +#else > + ctxt.mair0 = v->arch.mair; > +#endif > + > + /* Control Registers */ > + ctxt.actlr = v->arch.actlr; > + ctxt.sctlr = v->arch.sctlr; > + ctxt.cpacr = v->arch.cpacr; > + > + ctxt.contextidr = v->arch.contextidr; > + ctxt.tpidr_el0 = v->arch.tpidr_el0; > + ctxt.tpidr_el1 = v->arch.tpidr_el1; > + ctxt.tpidrro_el0 = v->arch.tpidrro_el0; > + > + /* CP 15 */ > + ctxt.csselr = v->arch.csselr; > + > + ctxt.afsr0 = v->arch.afsr0; > + ctxt.afsr1 = v->arch.afsr1; > + ctxt.vbar = v->arch.vbar; > + ctxt.par = v->arch.par; > + ctxt.teecr = v->arch.teecr; > + ctxt.teehbr = v->arch.teehbr; > + > +#ifdef CONFIG_ARM_32 > + ctxt.joscr = v->arch.joscr; > + ctxt.jmcr = v->arch.jmcr; > +#endif > + > + memset(&c, 0, sizeof(c)); > + > + /* get guest core registers */ > + vcpu_regs_hyp_to_user(v, &c); > + > + ctxt.x0 = c.x0; > + ctxt.x1 = c.x1; > + ctxt.x2 = c.x2; > + ctxt.x3 = c.x3; > + ctxt.x4 = c.x4; > + ctxt.x5 = c.x5; > + ctxt.x6 = c.x6; > + ctxt.x7 = c.x7; > + ctxt.x8 = c.x8; > + ctxt.x9 = c.x9; > + ctxt.x10 = c.x10; > + ctxt.x11 = c.x11; > + ctxt.x12 = c.x12; > + ctxt.x13 = c.x13; > + ctxt.x14 = c.x14; > + ctxt.x15 = c.x15; > + ctxt.x16 = c.x16; > + ctxt.x17 = c.x17; > + ctxt.x18 = c.x18; > + ctxt.x19 = c.x19; > + ctxt.x20 = c.x20; > + ctxt.x21 = c.x21; > + ctxt.x22 = c.x22; > + ctxt.x23 = c.x23; > + ctxt.x24 = c.x24; > + ctxt.x25 = c.x25; > + ctxt.x26 = c.x26; > + ctxt.x27 = c.x27; > + ctxt.x28 = c.x28; > + ctxt.x29 = c.x29; > + ctxt.x30 = c.x30; > + ctxt.pc64 = c.pc64; > + ctxt.cpsr = c.cpsr; > + ctxt.spsr_el1 = c.spsr_el1; /* spsr_svc */ > + > +#ifdef CONFIG_ARM_32 > + ctxt.spsr_fiq = c.spsr_fiq; > + ctxt.spsr_irq = c.spsr_irq; > + ctxt.spsr_und = c.spsr_und; > + ctxt.spsr_abt = c.spsr_abt; > +#endif > +#ifdef CONFIG_ARM_64 > + ctxt.sp_el0 = c.sp_el0; > + ctxt.sp_el1 = c.sp_el1; > + ctxt.elr_el1 = c.elr_el1; > +#endif > + > + /* check VFP state size */ > + BUILD_BUG_ON(sizeof(v->arch.vfp) > sizeof (ctxt.vfp)); > + memcpy((void*) &ctxt.vfp, (void*) &v->arch.vfp, sizeof(v->arch.vfp)); > + > + ctxt.pause_flags = v->pause_flags; > + > + if ( (ret = hvm_save_entry(VCPU, v->vcpu_id, h, &ctxt)) != 0 ) > + return ret; > + } > + > + return ret; > +} > + > +/* Load CPU related states from existing save/retore context */ > +static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + int vcpuid; > + struct hvm_hw_cpu ctxt; > + struct vcpu *v; > + struct vcpu_guest_core_regs c; > + > + vcpuid = hvm_load_instance(h); > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + if ( hvm_load_entry(VCPU, h, &ctxt) != 0 ) > + return -EINVAL; > + > + v->arch.sctlr = ctxt.sctlr; > + v->arch.ttbr0 = ctxt.ttbr0; > + v->arch.ttbr1 = ctxt.ttbr1; > + v->arch.ttbcr = ctxt.ttbcr; > + > + v->arch.dacr = ctxt.dacr; > + v->arch.ifsr = ctxt.ifsr; > +#ifdef CONFIG_ARM_32 > + v->arch.ifar = ctxt.ifar; > + v->arch.dfar = ctxt.dfar; > + v->arch.dfsr = ctxt.dfsr; > +#else > + v->arch.far = ctxt.far; > + v->arch.esr = ctxt.esr; > +#endif > + > +#ifdef CONFIG_ARM_32 > + v->arch.mair0 = ctxt.mair0; > + v->arch.mair1 = ctxt.mair1; > +#else > + v->arch.mair = ctxt.mair0; > +#endif > + > + /* Control Registers */ > + v->arch.actlr = ctxt.actlr; > + v->arch.cpacr = ctxt.cpacr; > + v->arch.contextidr = ctxt.contextidr; > + v->arch.tpidr_el0 = ctxt.tpidr_el0; > + v->arch.tpidr_el1 = ctxt.tpidr_el1; > + v->arch.tpidrro_el0 = ctxt.tpidrro_el0; > + > + /* CP 15 */ > + v->arch.csselr = ctxt.csselr; > + > + v->arch.afsr0 = ctxt.afsr0; > + v->arch.afsr1 = ctxt.afsr1; > + v->arch.vbar = ctxt.vbar; > + v->arch.par = ctxt.par; > + v->arch.teecr = ctxt.teecr; > + v->arch.teehbr = ctxt.teehbr; > +#ifdef CONFIG_ARM_32 > + v->arch.joscr = ctxt.joscr; > + v->arch.jmcr = ctxt.jmcr; > +#endif > + > + /* fill guest core registers */ > + memset(&c, 0, sizeof(c)); > + c.x0 = ctxt.x0; > + c.x1 = ctxt.x1; > + c.x2 = ctxt.x2; > + c.x3 = ctxt.x3; > + c.x4 = ctxt.x4; > + c.x5 = ctxt.x5; > + c.x6 = ctxt.x6; > + c.x7 = ctxt.x7; > + c.x8 = ctxt.x8; > + c.x9 = ctxt.x9; > + c.x10 = ctxt.x10; > + c.x11 = ctxt.x11; > + c.x12 = ctxt.x12; > + c.x13 = ctxt.x13; > + c.x14 = ctxt.x14; > + c.x15 = ctxt.x15; > + c.x16 = ctxt.x16; > + c.x17 = ctxt.x17; > + c.x18 = ctxt.x18; > + c.x19 = ctxt.x19; > + c.x20 = ctxt.x20; > + c.x21 = ctxt.x21; > + c.x22 = ctxt.x22; > + c.x23 = ctxt.x23; > + c.x24 = ctxt.x24; > + c.x25 = ctxt.x25; > + c.x26 = ctxt.x26; > + c.x27 = ctxt.x27; > + c.x28 = ctxt.x28; > + c.x29 = ctxt.x29; > + c.x30 = ctxt.x30; > + c.pc64 = ctxt.pc64; > + c.cpsr = ctxt.cpsr; > + c.spsr_el1 = ctxt.spsr_el1; /* spsr_svc */ > + > +#ifdef CONFIG_ARM_32 > + c.spsr_fiq = ctxt.spsr_fiq; > + c.spsr_irq = ctxt.spsr_irq; > + c.spsr_und = ctxt.spsr_und; > + c.spsr_abt = ctxt.spsr_abt; > +#endif > +#ifdef CONFIG_ARM_64 > + c.sp_el0 = ctxt.sp_el0; > + c.sp_el1 = ctxt.sp_el1; > + c.elr_el1 = ctxt.elr_el1; > +#endif > + > + /* set guest core registers */ > + vcpu_regs_user_to_hyp(v, &c); > + > + /* check VFP state size */ > + BUILD_BUG_ON(sizeof(v->arch.vfp) > sizeof (ctxt.vfp)); > + memcpy(&v->arch.vfp, &ctxt, sizeof(v->arch.vfp)); > + > + v->is_initialised = 1; > + v->pause_flags = ctxt.pause_flags; > + > + return 0; > +} > + > +HVM_REGISTER_SAVE_RESTORE(VCPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, > + HVMSR_PER_VCPU); > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/arch/arm/save.c b/xen/arch/arm/save.c > new file mode 100644 > index 0000000..eef14a8 > --- /dev/null > +++ b/xen/arch/arm/save.c > @@ -0,0 +1,65 @@ > +/* > + * Save.c: Save and restore HVM guest's emulated hardware state for ARM. > + * > + * Copyright (c) 2014, Samsung Electronics. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. > + */ > +#include <asm/hvm/support.h> > +#include <public/hvm/save.h> > + > +void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr) > +{ > + hdr->cpuid = current_cpu_data.midr.bits; > +} > + > +int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) > +{ > + uint32_t cpuid; > + > + if ( hdr->magic != HVM_FILE_MAGIC ) > + { > + printk(XENLOG_G_ERR "HVM%d restore: bad magic number %#"PRIx32"\n", > + d->domain_id, hdr->magic); > + return -EINVAL; > + } > + > + if ( hdr->version != HVM_FILE_VERSION ) > + { > + printk(XENLOG_G_ERR "HVM%d restore: unsupported version %u\n", > + d->domain_id, hdr->version); > + return -EINVAL; > + } > + > + cpuid = current_cpu_data.midr.bits; > + if ( hdr->cpuid != cpuid ) > + { > + printk(XENLOG_G_INFO "HVM%d restore: VM saved on one CPU " > + "(%#"PRIx32") and restored on another (%#"PRIx32").\n", > + d->domain_id, hdr->cpuid, cpuid); > + return -EINVAL; > + } > + > + return 0; > +} > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c > index 9fc9586..af244a7 100644 > --- a/xen/arch/arm/vgic.c > +++ b/xen/arch/arm/vgic.c > @@ -24,6 +24,7 @@ > #include <xen/softirq.h> > #include <xen/irq.h> > #include <xen/sched.h> > +#include <xen/hvm/save.h> > > #include <asm/current.h> > > @@ -73,6 +74,75 @@ static struct vgic_irq_rank *vgic_irq_rank(struct vcpu *v, int b, int n) > return NULL; > } > > +/* Save rank info into a context to support domain save/restore */ > +static int vgic_save_irq_rank(struct vcpu *v, struct vgic_rank *ext, > + struct vgic_irq_rank *rank) > +{ > + spin_lock(&rank->lock); > + > + /* IENABLE, IACTIVE, IPEND, PENDSGI registers */ > + ext->ienable = rank->ienable; > + ext->iactive = rank->iactive; > + ext->ipend = rank->ipend; > + ext->pendsgi = rank->pendsgi; > + > + /* ICFG */ > + ext->icfg[0] = rank->icfg[0]; > + ext->icfg[1] = rank->icfg[1]; > + > + /* IPRIORITY */ > + BUILD_BUG_ON(sizeof(rank->ipriority) != sizeof (ext->ipriority)); > + memcpy(ext->ipriority, rank->ipriority, sizeof(rank->ipriority)); > + > + /* ITARGETS */ > + BUILD_BUG_ON(sizeof(rank->itargets) != sizeof (ext->itargets)); > + memcpy(ext->itargets, rank->itargets, sizeof(rank->itargets)); > + > + spin_unlock(&rank->lock); > + > + return 0; > +} > + > +/* Load rank info from a context to support for domain save/restore */ > +static int vgic_load_irq_rank(struct vcpu *v, struct vgic_irq_rank *rank, > + struct vgic_rank *ext) > +{ > + struct pending_irq *p; > + unsigned int irq = 0; > + const unsigned long enable_bits = ext->ienable; > + > + spin_lock(&rank->lock); > + > + /* IENABLE, IACTIVE, IPEND, PENDSGI registers */ > + rank->ienable = ext->ienable; > + rank->iactive = ext->iactive; > + rank->ipend = ext->ipend; > + rank->pendsgi = ext->pendsgi; > + > + /* ICFG */ > + rank->icfg[0] = ext->icfg[0]; > + rank->icfg[1] = ext->icfg[1]; > + > + /* IPRIORITY */ > + BUILD_BUG_ON(sizeof(rank->ipriority) != sizeof (ext->ipriority)); > + memcpy(rank->ipriority, ext->ipriority, sizeof(rank->ipriority)); > + > + /* ITARGETS */ > + BUILD_BUG_ON(sizeof(rank->itargets) != sizeof (ext->itargets)); > + memcpy(rank->itargets, ext->itargets, sizeof(rank->itargets)); > + > + /* Set IRQ status as enabled by iterating through rank->ienable */ > + while ( (irq = find_next_bit(&enable_bits, 32, irq)) < 32 ) { > + p = irq_to_pending(v, irq); > + set_bit(GIC_IRQ_GUEST_ENABLED, &p->status); > + irq++; > + } > + > + spin_unlock(&rank->lock); > + > + return 0; > +} > + > int domain_vgic_init(struct domain *d) > { > int i; > @@ -749,6 +819,82 @@ out: > smp_send_event_check_mask(cpumask_of(v->processor)); > } > > + > +/* Save GIC state into a context to support save/restore */ > +static int hvm_gic_save_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + struct hvm_hw_gic ctxt; > + struct vcpu *v; > + int ret = 0; > + > + /* Save the state of GICs */ > + for_each_vcpu( d, v ) > + { > + ctxt.gic_hcr = v->arch.gic_hcr; > + ctxt.gic_vmcr = v->arch.gic_vmcr; > + ctxt.gic_apr = v->arch.gic_apr; > + > + /* Save list registers and masks */ > + BUILD_BUG_ON(sizeof(v->arch.gic_lr) > sizeof (ctxt.gic_lr)); > + memcpy(ctxt.gic_lr, v->arch.gic_lr, sizeof(v->arch.gic_lr)); > + > + ctxt.lr_mask = v->arch.lr_mask; > + ctxt.event_mask = v->arch.event_mask; > + > + /* Save PPI states (per-CPU), necessary for SMP-enabled guests */ > + if ( (ret = vgic_save_irq_rank(v, &ctxt.ppi_state, > + &v->arch.vgic.private_irqs)) != 0 ) > + return ret; > + > + if ( (ret = hvm_save_entry(GIC, v->vcpu_id, h, &ctxt)) != 0 ) > + return ret; > + } > + > + return ret; > +} > + > +/* Restore GIC state from a context to support save/restore */ > +static int hvm_gic_load_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + int vcpuid; > + struct hvm_hw_gic ctxt; > + struct vcpu *v; > + int ret = 0; > + > + /* Which vcpu is this? */ > + vcpuid = hvm_load_instance(h); > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + if ( hvm_load_entry(GIC, h, &ctxt) != 0 ) > + return -EINVAL; > + > + v->arch.gic_hcr = ctxt.gic_hcr; > + v->arch.gic_vmcr = ctxt.gic_vmcr; > + v->arch.gic_apr = ctxt.gic_apr; > + > + /* Restore list registers and masks */ > + BUILD_BUG_ON(sizeof(v->arch.gic_lr) > sizeof (ctxt.gic_lr)); > + memcpy(v->arch.gic_lr, ctxt.gic_lr, sizeof(v->arch.gic_lr)); > + > + v->arch.lr_mask = ctxt.lr_mask; > + v->arch.event_mask = ctxt.event_mask; > + > + /* Restore PPI states */ > + if ( (ret = vgic_load_irq_rank(v, &v->arch.vgic.private_irqs, > + &ctxt.ppi_state)) != 0 ) > + return ret; > + > + return ret; > +} > + > +HVM_REGISTER_SAVE_RESTORE(GIC, hvm_gic_save_ctxt, hvm_gic_load_ctxt, 1, > + HVMSR_PER_VCPU); > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c > index 3d6a721..7c47eac 100644 > --- a/xen/arch/arm/vtimer.c > +++ b/xen/arch/arm/vtimer.c > @@ -21,6 +21,7 @@ > #include <xen/lib.h> > #include <xen/timer.h> > #include <xen/sched.h> > +#include <xen/hvm/save.h> > #include <asm/irq.h> > #include <asm/time.h> > #include <asm/gic.h> > @@ -284,6 +285,76 @@ int vtimer_emulate(struct cpu_user_regs *regs, union hsr hsr) > } > } > > +static int hvm_vtimer_save_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + struct hvm_hw_timer ctxt; > + struct vcpu *v; > + struct vtimer *t; > + int i, ret = 0; > + > + /* Save the state of vtimer and ptimer */ > + for_each_vcpu( d, v ) > + { > + t = &v->arch.virt_timer; > + for ( i = 0; i < 2; i++ ) > + { > + ctxt.cval = t->cval; > + ctxt.ctl = t->ctl; > + ctxt.vtb_offset = i ? d->arch.phys_timer_base.offset : > + d->arch.virt_timer_base.offset; > + ctxt.type = i ? TIMER_TYPE_PHYS : TIMER_TYPE_VIRT; > + > + if ( (ret = hvm_save_entry(TIMER, v->vcpu_id, h, &ctxt)) != 0 ) > + return ret; > + > + t = &v->arch.phys_timer; > + } > + } > + > + return ret; > +} > + > +static int hvm_vtimer_load_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + int vcpuid; > + struct hvm_hw_timer ctxt; > + struct vcpu *v; > + struct vtimer *t = NULL; > + > + /* Which vcpu is this? */ > + vcpuid = hvm_load_instance(h); > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + if ( hvm_load_entry(TIMER, h, &ctxt) != 0 ) > + return -EINVAL; > + > + if ( ctxt.type == TIMER_TYPE_VIRT ) > + { > + t = &v->arch.virt_timer; > + d->arch.virt_timer_base.offset = ctxt.vtb_offset; > + } > + else > + { > + t = &v->arch.phys_timer; > + d->arch.phys_timer_base.offset = ctxt.vtb_offset; > + } > + > + t->cval = ctxt.cval; > + t->ctl = ctxt.ctl; > + t->v = v; > + > + return 0; > +} > + > +HVM_REGISTER_SAVE_RESTORE(TIMER, hvm_vtimer_save_ctxt, hvm_vtimer_load_ctxt, > + 2, HVMSR_PER_VCPU); > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c > index 26635ff..30fbd30 100644 > --- a/xen/arch/x86/domctl.c > +++ b/xen/arch/x86/domctl.c > @@ -399,76 +399,6 @@ long arch_do_domctl( > } > break; > > - case XEN_DOMCTL_sethvmcontext: > - { > - struct hvm_domain_context c = { .size = domctl->u.hvmcontext.size }; > - > - ret = -EINVAL; > - if ( !is_hvm_domain(d) ) > - goto sethvmcontext_out; > - > - ret = -ENOMEM; > - if ( (c.data = xmalloc_bytes(c.size)) == NULL ) > - goto sethvmcontext_out; > - > - ret = -EFAULT; > - if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0) > - goto sethvmcontext_out; > - > - domain_pause(d); > - ret = hvm_load(d, &c); > - domain_unpause(d); > - > - sethvmcontext_out: > - if ( c.data != NULL ) > - xfree(c.data); > - } > - break; > - > - case XEN_DOMCTL_gethvmcontext: > - { > - struct hvm_domain_context c = { 0 }; > - > - ret = -EINVAL; > - if ( !is_hvm_domain(d) ) > - goto gethvmcontext_out; > - > - c.size = hvm_save_size(d); > - > - if ( guest_handle_is_null(domctl->u.hvmcontext.buffer) ) > - { > - /* Client is querying for the correct buffer size */ > - domctl->u.hvmcontext.size = c.size; > - ret = 0; > - goto gethvmcontext_out; > - } > - > - /* Check that the client has a big enough buffer */ > - ret = -ENOSPC; > - if ( domctl->u.hvmcontext.size < c.size ) > - goto gethvmcontext_out; > - > - /* Allocate our own marshalling buffer */ > - ret = -ENOMEM; > - if ( (c.data = xmalloc_bytes(c.size)) == NULL ) > - goto gethvmcontext_out; > - > - domain_pause(d); > - ret = hvm_save(d, &c); > - domain_unpause(d); > - > - domctl->u.hvmcontext.size = c.cur; > - if ( copy_to_guest(domctl->u.hvmcontext.buffer, c.data, c.size) != 0 ) > - ret = -EFAULT; > - > - gethvmcontext_out: > - copyback = 1; > - > - if ( c.data != NULL ) > - xfree(c.data); > - } > - break; > - > case XEN_DOMCTL_gethvmcontext_partial: > { > ret = -EINVAL; > diff --git a/xen/common/Makefile b/xen/common/Makefile > index 3683ae3..13b781f 100644 > --- a/xen/common/Makefile > +++ b/xen/common/Makefile > @@ -62,7 +62,7 @@ obj-$(CONFIG_XENCOMM) += xencomm.o > > subdir-$(CONFIG_COMPAT) += compat > > -subdir-$(x86_64) += hvm > +subdir-y += hvm > > subdir-$(coverage) += gcov > > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > index 5342e5d..2ea4af5 100644 > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -24,6 +24,8 @@ > #include <xen/bitmap.h> > #include <xen/paging.h> > #include <xen/hypercall.h> > +#include <xen/hvm/save.h> > +#include <xen/guest_access.h> > #include <asm/current.h> > #include <asm/irq.h> > #include <asm/page.h> > @@ -881,6 +883,78 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > } > break; > > + case XEN_DOMCTL_sethvmcontext: > + { > + struct hvm_domain_context c = { .size = op->u.hvmcontext.size }; > + > + ret = -EINVAL; > + if ( (d == current->domain) || /* no domain_pause() */ > + !is_hvm_domain(d) ) > + goto sethvmcontext_out; > + > + ret = -ENOMEM; > + if ( (c.data = xmalloc_bytes(c.size)) == NULL ) > + goto sethvmcontext_out; > + > + ret = -EFAULT; > + if ( copy_from_guest(c.data, op->u.hvmcontext.buffer, c.size) != 0) The != 0 is not needed. > + goto sethvmcontext_out; > + > + domain_pause(d); > + ret = hvm_load(d, &c); > + domain_unpause(d); > + > + sethvmcontext_out: > + if ( c.data != NULL ) > + xfree(c.data); I know you have copied this code and it was poor to start with, but xfree() has the same semantics as free(), so is happy with a NULL pointer. You can drop the if condition here and below in getcontext. > + } > + break; > + > + case XEN_DOMCTL_gethvmcontext: > + { > + struct hvm_domain_context c = { 0 }; > + > + ret = -EINVAL; > + if ( (d == current->domain) || /* no domain_pause() */ > + !is_hvm_domain(d) ) > + goto gethvmcontext_out; > + > + c.size = hvm_save_size(d); > + > + if ( guest_handle_is_null(op->u.hvmcontext.buffer) ) > + { > + /* Client is querying for the correct buffer size */ > + op->u.hvmcontext.size = c.size; > + ret = 0; > + goto gethvmcontext_out; > + } > + > + /* Check that the client has a big enough buffer */ > + ret = -ENOSPC; > + if ( op->u.hvmcontext.size < c.size ) > + goto gethvmcontext_out; > + > + /* Allocate our own marshalling buffer */ > + ret = -ENOMEM; > + if ( (c.data = xmalloc_bytes(c.size)) == NULL ) > + goto gethvmcontext_out; > + > + domain_pause(d); > + ret = hvm_save(d, &c); > + domain_unpause(d); > + > + op->u.hvmcontext.size = c.cur; > + if ( copy_to_guest(op->u.hvmcontext.buffer, c.data, c.size) != 0 ) > + ret = -EFAULT; > + > + gethvmcontext_out: > + copyback = 1; > + > + if ( c.data != NULL ) > + xfree(c.data); > + } > + break; > + > default: > ret = arch_do_domctl(op, d, u_domctl); > break; > diff --git a/xen/include/asm-arm/hvm/support.h b/xen/include/asm-arm/hvm/support.h > new file mode 100644 > index 0000000..09f7cb8 > --- /dev/null > +++ b/xen/include/asm-arm/hvm/support.h > @@ -0,0 +1,29 @@ > +/* > + * asm-arm/hvm/support.h: HVM support routines used by ARM. > + * > + * Copyright (c) 2014, Samsung Electronics. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. > + */ > + > +#ifndef __ASM_ARM_HVM_SUPPORT_H__ > +#define __ASM_ARM_HVM_SUPPORT_H__ > + > +#include <xen/types.h> > +#include <public/hvm/ioreq.h> > +#include <xen/sched.h> > +#include <xen/hvm/save.h> > +#include <asm/processor.h> > + > +#endif /* __ASM_ARM_HVM_SUPPORT_H__ */ > diff --git a/xen/include/public/arch-arm/hvm/save.h b/xen/include/public/arch-arm/hvm/save.h > index 75b8e65..f6ad258 100644 > --- a/xen/include/public/arch-arm/hvm/save.h > +++ b/xen/include/public/arch-arm/hvm/save.h > @@ -26,6 +26,136 @@ > #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ > #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ > > +#define HVM_FILE_MAGIC 0x92385520 > +#define HVM_FILE_VERSION 0x00000001 > + > +struct hvm_save_header > +{ > + uint32_t magic; /* Must be HVM_FILE_MAGIC */ > + uint32_t version; /* File format version */ > + uint64_t changeset; /* Version of Xen that saved this file */ > + uint32_t cpuid; /* MIDR_EL1 on the saving machine */ This looks needlessly copied from x86, which is far from ideal. On x86, Xen tries to parse the mercural revision number from its compile time information and fails now that the underlying codebase has moved from hg to git. As a result, the value is now generally -1. cpuid is also an x86ism as far as I am aware. I also wonder about the wisdom of having identically named structures like this in arch code without an arch_ prefix? We should make it as hard as possible for things like this to accidentally get referenced in common code. > +}; > +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); > + > +struct vgic_rank > +{ > + uint32_t ienable, iactive, ipend, pendsgi; > + uint32_t icfg[2]; > + uint32_t ipriority[8]; > + uint32_t itargets[8]; > +}; > + > +struct hvm_hw_gic > +{ > + uint32_t gic_hcr; > + uint32_t gic_vmcr; > + uint32_t gic_apr; > + uint32_t gic_lr[64]; > + uint64_t event_mask; Does this uint64_t has alignment issues between 32 and 64 bit builds? It certainly would on x86, but I don't know for sure on arm. > + uint64_t lr_mask; > + struct vgic_rank ppi_state; > +}; > +DECLARE_HVM_SAVE_TYPE(GIC, 2, struct hvm_hw_gic); > + > +#define TIMER_TYPE_VIRT 0 > +#define TIMER_TYPE_PHYS 1 > + > +struct hvm_hw_timer > +{ > + uint64_t vtb_offset; > + uint32_t ctl; > + uint64_t cval; Another alignment query. > + uint32_t type; > +}; > +DECLARE_HVM_SAVE_TYPE(TIMER, 3, struct hvm_hw_timer); > + > +struct hvm_hw_cpu > +{ > +#ifdef CONFIG_ARM_32 > + uint64_t vfp[34]; /* 32-bit VFP registers */ > +#else > + uint64_t vfp[66]; /* 64-bit VFP registers */ > +#endif > + > + /* Guest core registers */ > + uint64_t x0; /* r0_usr */ > + uint64_t x1; /* r1_usr */ > + uint64_t x2; /* r2_usr */ > + uint64_t x3; /* r3_usr */ > + uint64_t x4; /* r4_usr */ > + uint64_t x5; /* r5_usr */ > + uint64_t x6; /* r6_usr */ > + uint64_t x7; /* r7_usr */ > + uint64_t x8; /* r8_usr */ > + uint64_t x9; /* r9_usr */ > + uint64_t x10; /* r10_usr */ > + uint64_t x11; /* r11_usr */ > + uint64_t x12; /* r12_usr */ > + uint64_t x13; /* sp_usr */ > + uint64_t x14; /* lr_usr; */ > + uint64_t x15; /* __unused_sp_hyp */ > + uint64_t x16; /* lr_irq */ > + uint64_t x17; /* sp_irq */ > + uint64_t x18; /* lr_svc */ > + uint64_t x19; /* sp_svc */ > + uint64_t x20; /* lr_abt */ > + uint64_t x21; /* sp_abt */ > + uint64_t x22; /* lr_und */ > + uint64_t x23; /* sp_und */ > + uint64_t x24; /* r8_fiq */ > + uint64_t x25; /* r9_fiq */ > + uint64_t x26; /* r10_fiq */ > + uint64_t x27; /* r11_fiq */ > + uint64_t x28; /* r12_fiq */ > + uint64_t x29; /* fp,sp_fiq */ > + uint64_t x30; /* lr_fiq */ > + uint64_t pc64; /* ELR_EL2 */ > + uint32_t cpsr; /* SPSR_EL2 */ > + uint32_t spsr_el1; /*spsr_svc */ > + /* AArch32 guests only */ > + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; > + /* AArch64 guests only */ > + uint64_t sp_el0; > + uint64_t sp_el1, elr_el1; If these are arch exclusive, should they be in a union? > + > + uint32_t sctlr, ttbcr; > + uint64_t ttbr0, ttbr1; > + > + uint32_t ifar, dfar; > + uint32_t ifsr, dfsr; > + uint32_t dacr; > + uint64_t par; > + > + uint64_t far; > + uint64_t esr; > + > + uint64_t mair0, mair1; > + uint64_t tpidr_el0; > + uint64_t tpidr_el1; > + uint64_t tpidrro_el0; > + uint64_t vbar; > + > + /* Control Registers */ > + uint32_t actlr; > + uint32_t cpacr; > + uint32_t afsr0, afsr1; > + uint32_t contextidr; > + uint32_t teecr, teehbr; /* ThumbEE, 32-bit guests only */ > + uint32_t joscr, jmcr; > + /* CP 15 */ > + uint32_t csselr; > + > + unsigned long pause_flags; What is this doing here? This is not architectural state of the cpu. > + > +}; > +DECLARE_HVM_SAVE_TYPE(VCPU, 4, struct hvm_hw_cpu); > + > +/* > + * Largest type-code in use > + */ > +#define HVM_SAVE_CODE_MAX 4 > + > #endif > > /*
Hello Wei, Thank you for the patch. You are modifying common code in this patch. I've add Jan and Keir. Also, I would create a separate patch for this code movement. On 15/04/14 22:05, Wei Huang wrote: > + > +HVM_REGISTER_SAVE_RESTORE(GIC, hvm_gic_save_ctxt, hvm_gic_load_ctxt, 1, > + HVMSR_PER_VCPU); With new support for different GIC, I would differentiate VGIC and GIC save/restore. Also can you append V2 in the name? GICv3 support will be added soon. [..] > + > /* > * Local variables: > * mode: C > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c > index 3d6a721..7c47eac 100644 > --- a/xen/arch/arm/vtimer.c > +++ b/xen/arch/arm/vtimer.c > @@ -21,6 +21,7 @@ > #include <xen/lib.h> > #include <xen/timer.h> > #include <xen/sched.h> > +#include <xen/hvm/save.h> > #include <asm/irq.h> > #include <asm/time.h> > #include <asm/gic.h> > @@ -284,6 +285,76 @@ int vtimer_emulate(struct cpu_user_regs *regs, union hsr hsr) > } > } > > +static int hvm_vtimer_save_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + struct hvm_hw_timer ctxt; > + struct vcpu *v; > + struct vtimer *t; > + int i, ret = 0; > + > + /* Save the state of vtimer and ptimer */ > + for_each_vcpu( d, v ) > + { > + t = &v->arch.virt_timer; > + for ( i = 0; i < 2; i++ ) > + { Looping here is very confusing, what does mean 0? 1? I would create an helper with the content of this loop and call twice this helper with the correct value in parameter. Smth like: hvm_save_vtimer(TIMER_TYPE_PHYS, &v->arch.phys_timer); hvm_save_vtimer(TIMER_TYPE_VIRT, &v->arch.virt_timer); > + ctxt.cval = t->cval; > + ctxt.ctl = t->ctl; > + ctxt.vtb_offset = i ? d->arch.phys_timer_base.offset : > + d->arch.virt_timer_base.offset; > + ctxt.type = i ? TIMER_TYPE_PHYS : TIMER_TYPE_VIRT; > + > + if ( (ret = hvm_save_entry(TIMER, v->vcpu_id, h, &ctxt)) != 0 ) > + return ret; > + > + t = &v->arch.phys_timer; It will avoid this hackish line. > + } > + } > + > + return ret; > +} > + > +static int hvm_vtimer_load_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + int vcpuid; > + struct hvm_hw_timer ctxt; > + struct vcpu *v; > + struct vtimer *t = NULL; > + > + /* Which vcpu is this? */ > + vcpuid = hvm_load_instance(h); > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + if ( hvm_load_entry(TIMER, h, &ctxt) != 0 ) > + return -EINVAL; > + > + if ( ctxt.type == TIMER_TYPE_VIRT ) > + { > + t = &v->arch.virt_timer; > + d->arch.virt_timer_base.offset = ctxt.vtb_offset; > + } > + else else if ( ctx.type == TIMER_TYPE_PHYS ) ? Then fail if the ctx.type is wrong. Even better I would use a switch case. > + { > + t = &v->arch.phys_timer; > + d->arch.phys_timer_base.offset = ctxt.vtb_offset; Saving {virt,phys}_timer_base.offset which are per-domain seems a waste of space and confusing. > + } > + > + t->cval = ctxt.cval; > + t->ctl = ctxt.ctl; > + t->v = v; > + > + return 0; > +} > + [..] > diff --git a/xen/include/asm-arm/hvm/support.h b/xen/include/asm-arm/hvm/support.h > new file mode 100644 > index 0000000..09f7cb8 > --- /dev/null > +++ b/xen/include/asm-arm/hvm/support.h > @@ -0,0 +1,29 @@ > +/* > + * asm-arm/hvm/support.h: HVM support routines used by ARM. > + * > + * Copyright (c) 2014, Samsung Electronics. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. > + */ > + > +#ifndef __ASM_ARM_HVM_SUPPORT_H__ > +#define __ASM_ARM_HVM_SUPPORT_H__ > + > +#include <xen/types.h> > +#include <public/hvm/ioreq.h> > +#include <xen/sched.h> > +#include <xen/hvm/save.h> > +#include <asm/processor.h> > + > +#endif /* __ASM_ARM_HVM_SUPPORT_H__ */ > diff --git a/xen/include/public/arch-arm/hvm/save.h b/xen/include/public/arch-arm/hvm/save.h > index 75b8e65..f6ad258 100644 > --- a/xen/include/public/arch-arm/hvm/save.h > +++ b/xen/include/public/arch-arm/hvm/save.h > @@ -26,6 +26,136 @@ > #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ > #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ > > +#define HVM_FILE_MAGIC 0x92385520 > +#define HVM_FILE_VERSION 0x00000001 > + > +struct hvm_save_header > +{ > + uint32_t magic; /* Must be HVM_FILE_MAGIC */ > + uint32_t version; /* File format version */ > + uint64_t changeset; /* Version of Xen that saved this file */ > + uint32_t cpuid; /* MIDR_EL1 on the saving machine */ > +}; > +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); > + > +struct vgic_rank I would rename into vgic_v2_rank. > +{ > + uint32_t ienable, iactive, ipend, pendsgi; > + uint32_t icfg[2]; > + uint32_t ipriority[8]; > + uint32_t itargets[8]; > +}; > + > +struct hvm_hw_gic I would rename into hvm_hw_vgic. > +{ > + uint32_t gic_hcr; > + uint32_t gic_vmcr; > + uint32_t gic_apr; > + uint32_t gic_lr[64]; > + uint64_t event_mask; > + uint64_t lr_mask; > + struct vgic_rank ppi_state; As said previously, I would separate GIC from VGIC save/restore.
>>> On 16.04.14 at 11:48, <julien.grall@linaro.org> wrote: > You are modifying common code in this patch. I've add Jan and Keir. That's for x86 side changes. IanC, IanJ, and Tim should also be copied for common code changes now (unless it is clear that IanC, who was already copied, would deal with it). > Also, I would create a separate patch for this code movement. Indeed. Plus it needs explanation why XEN_DOMCTL_gethvmcontext_partial doesn't also get moved. Jan
On 04/16/2014 05:30 AM, Jan Beulich wrote: >>>> On 16.04.14 at 11:48, <julien.grall@linaro.org> wrote: >> You are modifying common code in this patch. I've add Jan and Keir. > > That's for x86 side changes. IanC, IanJ, and Tim should also be copied > for common code changes now (unless it is clear that IanC, who was > already copied, would deal with it). Sorry, will do next time. I will break the patches into smaller, independent components for specific reviewers to comment. > >> Also, I would create a separate patch for this code movement. > > Indeed. Plus it needs explanation why XEN_DOMCTL_gethvmcontext_partial > doesn't also get moved. Will fix. > > Jan > >
On 04/15/2014 06:37 PM, Andrew Cooper wrote: >> --- a/xen/include/public/arch-arm/hvm/save.h >> +++ b/xen/include/public/arch-arm/hvm/save.h >> @@ -26,6 +26,136 @@ >> #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ >> #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ >> >> +#define HVM_FILE_MAGIC 0x92385520 >> +#define HVM_FILE_VERSION 0x00000001 >> + >> +struct hvm_save_header >> +{ >> + uint32_t magic; /* Must be HVM_FILE_MAGIC */ >> + uint32_t version; /* File format version */ >> + uint64_t changeset; /* Version of Xen that saved this file */ >> + uint32_t cpuid; /* MIDR_EL1 on the saving machine */ > > This looks needlessly copied from x86, which is far from ideal. > > On x86, Xen tries to parse the mercural revision number from its compile > time information and fails now that the underlying codebase has moved > from hg to git. As a result, the value is now generally -1. > > cpuid is also an x86ism as far as I am aware. > > I also wonder about the wisdom of having identically named structures > like this in arch code without an arch_ prefix? We should make it as > hard as possible for things like this to accidentally get referenced in > common code. > It is tricky for hvm_save_header. This is a struct used in common code (xen/common/hvm/save.c). Instead of making it arch_ specific, I would move it to common code (include/public/hvm/save.h), with the following modification: 1. Re-define "cpuid" of hvm_save_header as cpu_id. hvm_save_header will be shared by both x86 and ARM. 2. Rename HVM_FILE_MAGIC to HVM_ARM_FILE_MAGIC. Still keep it in arch-arm/hvm/save.h file. This is used in arch- specific code, so we won't get confused. The same applies to HVM_FILE_MAGIC in x86. 3. Other struct in arch-arm/hvm/save.h will remain in the same file. Those structs are arch specific anyway. -Wei
On 16/04/2014 22:50, Wei Huang wrote: > On 04/15/2014 06:37 PM, Andrew Cooper wrote: >>> --- a/xen/include/public/arch-arm/hvm/save.h >>> +++ b/xen/include/public/arch-arm/hvm/save.h >>> @@ -26,6 +26,136 @@ >>> #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ >>> #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ >>> >>> +#define HVM_FILE_MAGIC 0x92385520 >>> +#define HVM_FILE_VERSION 0x00000001 >>> + >>> +struct hvm_save_header >>> +{ >>> + uint32_t magic; /* Must be HVM_FILE_MAGIC */ >>> + uint32_t version; /* File format version */ >>> + uint64_t changeset; /* Version of Xen that saved this >>> file */ >>> + uint32_t cpuid; /* MIDR_EL1 on the saving machine */ >> >> This looks needlessly copied from x86, which is far from ideal. >> >> On x86, Xen tries to parse the mercural revision number from its compile >> time information and fails now that the underlying codebase has moved >> from hg to git. As a result, the value is now generally -1. >> >> cpuid is also an x86ism as far as I am aware. >> >> I also wonder about the wisdom of having identically named structures >> like this in arch code without an arch_ prefix? We should make it as >> hard as possible for things like this to accidentally get referenced in >> common code. >> > It is tricky for hvm_save_header. This is a struct used in common code > (xen/common/hvm/save.c). Instead of making it arch_ specific, I would > move it to common code (include/public/hvm/save.h), with the following > modification: > 1. Re-define "cpuid" of hvm_save_header as cpu_id. hvm_save_header > will be shared by both x86 and ARM. > 2. Rename HVM_FILE_MAGIC to HVM_ARM_FILE_MAGIC. Still keep it in > arch-arm/hvm/save.h file. This is used in arch- specific code, so we > won't get confused. The same applies to HVM_FILE_MAGIC in x86. > 3. Other struct in arch-arm/hvm/save.h will remain in the same file. > Those structs are arch specific anyway. > > -Wei > Eugh. Having a more thorough look through all of this code, it is in need of improvement. For the sake of hdr.{magic,version,changeset}, it is not worth keeping some common save logic for the header. arch_hvm_save() should be updated to be given the hvm context and should construct & write the entire structure. This also matches the current semantics of arch_hvm_load() where the arch handler deals with the entire structure. The currently existing hvm_save_header is quite silly. 'changeset' conveys no useful information since the switch from hg to git. 'cpuid' is used for the sole purpose a printk(), and 'gtsc_khz' is unconditionally repeated later in the migration record with the rest of the tsc information. Everything currently in arch-x86/hvm/save.h should be renamed to identify them as hvm_x86. This can be done in a backwards compatible manner by using some __XEN_INTERFACE_VERSION__ ifdefary Everything new in arch-arm/hvm/save.h should be identified as hvm_arm right from the outset. Beyond that, the only key point should need to be that HVM_$arch_FILE_MAGIC need be different for each $arch, but that appears already in hand. ~Andrew
Hello Wei, On 04/15/2014 10:05 PM, Wei Huang wrote: > +static int hvm_vtimer_load_ctxt(struct domain *d, hvm_domain_context_t *h) > +{ > + int vcpuid; > + struct hvm_hw_timer ctxt; > + struct vcpu *v; > + struct vtimer *t = NULL; > + > + /* Which vcpu is this? */ > + vcpuid = hvm_load_instance(h); > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + if ( hvm_load_entry(TIMER, h, &ctxt) != 0 ) > + return -EINVAL; > + > + if ( ctxt.type == TIMER_TYPE_VIRT ) > + { > + t = &v->arch.virt_timer; > + d->arch.virt_timer_base.offset = ctxt.vtb_offset; > + } > + else > + { > + t = &v->arch.phys_timer; > + d->arch.phys_timer_base.offset = ctxt.vtb_offset; I thought a bit more about the {phys,virt}_timer_base.offset. When you are migrating a guest, this offset will be invalidated. This offset is used to get a relative offset from the Xen timer counter. That also made me think the context switch in Xen for the timer looks wrong to me. When a guest VCPU is context switch, the Xen timer counter continue to run. But not CVAL, so the timer_base.offset will drift a bit. It will result by setting a wrong timer via set_timer in Xen. Did I miss something?
On 04/17/2014 10:06 AM, Julien Grall wrote: > Hello Wei, > > On 04/15/2014 10:05 PM, Wei Huang wrote: >> +static int hvm_vtimer_load_ctxt(struct domain *d, hvm_domain_context_t *h) >> +{ >> + int vcpuid; >> + struct hvm_hw_timer ctxt; >> + struct vcpu *v; >> + struct vtimer *t = NULL; >> + >> + /* Which vcpu is this? */ >> + vcpuid = hvm_load_instance(h); >> + >> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) >> + { >> + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", >> + d->domain_id, vcpuid); >> + return -EINVAL; >> + } >> + >> + if ( hvm_load_entry(TIMER, h, &ctxt) != 0 ) >> + return -EINVAL; >> + >> + if ( ctxt.type == TIMER_TYPE_VIRT ) >> + { >> + t = &v->arch.virt_timer; >> + d->arch.virt_timer_base.offset = ctxt.vtb_offset; >> + } >> + else >> + { >> + t = &v->arch.phys_timer; >> + d->arch.phys_timer_base.offset = ctxt.vtb_offset; > > I thought a bit more about the {phys,virt}_timer_base.offset. > > When you are migrating a guest, this offset will be invalidated. This > offset is used to get a relative offset from the Xen timer counter. > Agreed. > That also made me think the context switch in Xen for the timer looks > wrong to me. > > When a guest VCPU is context switch, the Xen timer counter continue to > run. But not CVAL, so the timer_base.offset will drift a bit. It will > result by setting a wrong timer via set_timer in Xen. > I need to examine the code more carefully. But by skimming through vtimer.c, it looks like this is the case: the missing ticks were not compensated. > Did I miss something? >
On Thu, 2014-04-17 at 16:06 +0100, Julien Grall wrote: > I thought a bit more about the {phys,virt}_timer_base.offset. > > When you are migrating a guest, this offset will be invalidated. This > offset is used to get a relative offset from the Xen timer counter. > > That also made me think the context switch in Xen for the timer looks > wrong to me. > > When a guest VCPU is context switch, the Xen timer counter continue to > run. But not CVAL, so the timer_base.offset will drift a bit. It will > result by setting a wrong timer via set_timer in Xen. > > Did I miss something? The timer offset is mainly accounting for the fact that the domain is not booted when the hardware is started. However time does continue while a VCPU is not scheduled, this is exposed via the PV "stolen time" mechanism. Now it is in theory possible to virtualise time differently so that stolen time is not possible, but unless you want to cope with different VCPUs seeing different times (because they have been descheduled for differently lengths of times) then you either need to do gang scheduling or play other (likely complicated) tricks. With the model we have on ARM paravirtualising this is the right thing to do. Not sure what you mean about CVAL (the timer compare val) not running, when we deschedule a VCPU we figure out when CVAL would have caused the timer interrupt to fire and setup a Xen timer to make sure we unblock the VCPU at that point. When we switch back to the VCPU we of course restore the compare value to what the guest wrote, nothing else would make sense. Ian
On 05/12/2014 10:16 AM, Ian Campbell wrote: > On Thu, 2014-04-17 at 16:06 +0100, Julien Grall wrote: >> I thought a bit more about the {phys,virt}_timer_base.offset. >> >> When you are migrating a guest, this offset will be invalidated. This >> offset is used to get a relative offset from the Xen timer counter. >> >> That also made me think the context switch in Xen for the timer looks >> wrong to me. >> >> When a guest VCPU is context switch, the Xen timer counter continue to >> run. But not CVAL, so the timer_base.offset will drift a bit. It will >> result by setting a wrong timer via set_timer in Xen. >> >> Did I miss something? > > The timer offset is mainly accounting for the fact that the domain is > not booted when the hardware is started. > > However time does continue while a VCPU is not scheduled, this is > exposed via the PV "stolen time" mechanism. > > Now it is in theory possible to virtualise time differently so that > stolen time is not possible, but unless you want to cope with different > VCPUs seeing different times (because they have been descheduled for > differently lengths of times) then you either need to do gang scheduling > or play other (likely complicated) tricks. With the model we have on ARM > paravirtualising this is the right thing to do. > > Not sure what you mean about CVAL (the timer compare val) not running, > when we deschedule a VCPU we figure out when CVAL would have caused the > timer interrupt to fire and setup a Xen timer to make sure we unblock > the VCPU at that point. When we switch back to the VCPU we of course > restore the compare value to what the guest wrote, nothing else would > make sense. After reading your explanation and the ARM ARM again, I think I mingled CNT (the counter) and CVAL (the compare val). Thank you for the explanation.
(Adding back Xen devel) On 05/13/2014 04:31 PM, Wei Huang wrote: > On 05/12/2014 07:04 AM, Julien Grall wrote: >> On 05/12/2014 10:16 AM, Ian Campbell wrote: >>> On Thu, 2014-04-17 at 16:06 +0100, Julien Grall wrote: >>>> I thought a bit more about the {phys,virt}_timer_base.offset. >>>> >>>> When you are migrating a guest, this offset will be invalidated. This >>>> offset is used to get a relative offset from the Xen timer counter. >>>> >>>> That also made me think the context switch in Xen for the timer looks >>>> wrong to me. >>>> >>>> When a guest VCPU is context switch, the Xen timer counter continue to >>>> run. But not CVAL, so the timer_base.offset will drift a bit. It will >>>> result by setting a wrong timer via set_timer in Xen. >>>> >>>> Did I miss something? >>> >>> The timer offset is mainly accounting for the fact that the domain is >>> not booted when the hardware is started. >>> >>> However time does continue while a VCPU is not scheduled, this is >>> exposed via the PV "stolen time" mechanism. >>> >>> Now it is in theory possible to virtualise time differently so that >>> stolen time is not possible, but unless you want to cope with different >>> VCPUs seeing different times (because they have been descheduled for >>> differently lengths of times) then you either need to do gang scheduling >>> or play other (likely complicated) tricks. With the model we have on ARM >>> paravirtualising this is the right thing to do. >>> >>> Not sure what you mean about CVAL (the timer compare val) not running, >>> when we deschedule a VCPU we figure out when CVAL would have caused the >>> timer interrupt to fire and setup a Xen timer to make sure we unblock >>> the VCPU at that point. When we switch back to the VCPU we of course >>> restore the compare value to what the guest wrote, nothing else would >>> make sense. >> >> After reading your explanation and the ARM ARM again, I think I mingled >> CNT (the counter) and CVAL (the compare val). >> >> Thank you for the explanation. >> > Other than the code comments (case/switch), are you OK with the design > of the latest ARCH_TIMER patch? I made some comment on the v3. Once you will address comments from Andrew and me, the patch will be in good shape. Regards,
On 05/13/2014 10:42 AM, Julien Grall wrote: > (Adding back Xen devel) > > On 05/13/2014 04:31 PM, Wei Huang wrote: >> On 05/12/2014 07:04 AM, Julien Grall wrote: >>> On 05/12/2014 10:16 AM, Ian Campbell wrote: >>>> On Thu, 2014-04-17 at 16:06 +0100, Julien Grall wrote: >>>>> I thought a bit more about the {phys,virt}_timer_base.offset. >>>>> >>>>> When you are migrating a guest, this offset will be invalidated. This >>>>> offset is used to get a relative offset from the Xen timer counter. >>>>> >>>>> That also made me think the context switch in Xen for the timer looks >>>>> wrong to me. >>>>> >>>>> When a guest VCPU is context switch, the Xen timer counter continue to >>>>> run. But not CVAL, so the timer_base.offset will drift a bit. It will >>>>> result by setting a wrong timer via set_timer in Xen. >>>>> >>>>> Did I miss something? >>>> >>>> The timer offset is mainly accounting for the fact that the domain is >>>> not booted when the hardware is started. >>>> >>>> However time does continue while a VCPU is not scheduled, this is >>>> exposed via the PV "stolen time" mechanism. >>>> >>>> Now it is in theory possible to virtualise time differently so that >>>> stolen time is not possible, but unless you want to cope with different >>>> VCPUs seeing different times (because they have been descheduled for >>>> differently lengths of times) then you either need to do gang scheduling >>>> or play other (likely complicated) tricks. With the model we have on ARM >>>> paravirtualising this is the right thing to do. >>>> >>>> Not sure what you mean about CVAL (the timer compare val) not running, >>>> when we deschedule a VCPU we figure out when CVAL would have caused the >>>> timer interrupt to fire and setup a Xen timer to make sure we unblock >>>> the VCPU at that point. When we switch back to the VCPU we of course >>>> restore the compare value to what the guest wrote, nothing else would >>>> make sense. >>> >>> After reading your explanation and the ARM ARM again, I think I mingled >>> CNT (the counter) and CVAL (the compare val). >>> >>> Thank you for the explanation. >>> >> Other than the code comments (case/switch), are you OK with the design >> of the latest ARCH_TIMER patch? > > I made some comment on the v3. Once you will address comments from > Andrew and me, the patch will be in good shape. > Given the comments from you and Andrew, I will revise the context struct to the following format. With this, we can get rid of most problems (switch/case/...). struct hvm_arm_timer { /* phys_timer */ uint64_t phys_vtb_offset; uint64_t phys_cval; uint32_t phys_ctl; /* virt_timer */ uint64_t virt_vtb_offset; uint64_t virt_cval; uint32_t virt_ctl; }; DECLARE_HVM_SAVE_TYPE(TIMER, 4, struct hvm_arm_timer); Any comments, please let me know. > Regards, >
On 05/13/2014 05:18 PM, Wei Huang wrote: > Given the comments from you and Andrew, I will revise the context struct > to the following format. With this, we can get rid of most problems > (switch/case/...). With this solution, you will duplicate code to save/restore the timer. > struct hvm_arm_timer > { > /* phys_timer */ > uint64_t phys_vtb_offset; > uint64_t phys_cval; > uint32_t phys_ctl; If I'm not mistaken, you need a 32 bit padding here ... > > /* virt_timer */ > uint64_t virt_vtb_offset; > uint64_t virt_cval; > uint32_t virt_ctl; ... and here Regards,
On 05/13/2014 11:37 AM, Julien Grall wrote: > On 05/13/2014 05:18 PM, Wei Huang wrote: >> Given the comments from you and Andrew, I will revise the context struct >> to the following format. With this, we can get rid of most problems >> (switch/case/...). > > With this solution, you will duplicate code to save/restore the timer. The code size will be reduced and looks cleaner? Here is the example: static int hvm_timer_save(struct domain *d, hvm_domain_context_t *h) { struct hvm_arm_timer ctxt; struct vcpu *v; int rc = 0; /* Save the state of vtimer and ptimer */ for_each_vcpu( d, v ) { /* save phys_timer */ ctxt.phys_cval = v->arch.phys_timer.cval; ctxt.phys_ctl = v->arch.phys_timer.ctl; ctxt.phys_vtb_offset = d->arch.phys_timer_base.offset; /* save virt_timer */ ctxt.virt_cval = v->arch.virt_timer.cval; ctxt.virt_ctl = v->arch.virt_timer.ctl; ctxt.virt_vtb_offset = d->arch.virt_timer_base.offset; if ( (rc = hvm_save_entry(TIMER, v->vcpu_id, h, &ctxt)) != 0 ) return rc; } return rc; } > >> struct hvm_arm_timer >> { >> /* phys_timer */ >> uint64_t phys_vtb_offset; >> uint64_t phys_cval; >> uint32_t phys_ctl; > > If I'm not mistaken, you need a 32 bit padding here ... > >> >> /* virt_timer */ >> uint64_t virt_vtb_offset; >> uint64_t virt_cval; >> uint32_t virt_ctl; > > ... and here > > Regards, >
On 05/13/2014 05:44 PM, Wei Huang wrote: > On 05/13/2014 11:37 AM, Julien Grall wrote: >> On 05/13/2014 05:18 PM, Wei Huang wrote: >>> Given the comments from you and Andrew, I will revise the context struct >>> to the following format. With this, we can get rid of most problems >>> (switch/case/...). >> >> With this solution, you will duplicate code to save/restore the timer. > The code size will be reduced and looks cleaner? Here is the example: LGTM except ... > static int hvm_timer_save(struct domain *d, hvm_domain_context_t *h) > { > struct hvm_arm_timer ctxt; > struct vcpu *v; > int rc = 0; > > /* Save the state of vtimer and ptimer */ > for_each_vcpu( d, v ) > { > /* save phys_timer */ > ctxt.phys_cval = v->arch.phys_timer.cval; > ctxt.phys_ctl = v->arch.phys_timer.ctl; > ctxt.phys_vtb_offset = d->arch.phys_timer_base.offset; > > /* save virt_timer */ > ctxt.virt_cval = v->arch.virt_timer.cval; > ctxt.virt_ctl = v->arch.virt_timer.ctl; > ctxt.virt_vtb_offset = d->arch.virt_timer_base.offset; I think you need to store d->arch.virt_timer_base.offset in ns rather than ticks. Regards,
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile index 63e0460..d9a328c 100644 --- a/xen/arch/arm/Makefile +++ b/xen/arch/arm/Makefile @@ -33,6 +33,7 @@ obj-y += hvm.o obj-y += device.o obj-y += decode.o obj-y += processor.o +obj-y += save.o #obj-bin-y += ....o diff --git a/xen/arch/arm/hvm.c b/xen/arch/arm/hvm.c index 471c4cd..18eb2bd 100644 --- a/xen/arch/arm/hvm.c +++ b/xen/arch/arm/hvm.c @@ -4,6 +4,7 @@ #include <xen/errno.h> #include <xen/guest_access.h> #include <xen/sched.h> +#include <xen/hvm/save.h> #include <xsm/xsm.h> @@ -12,9 +13,9 @@ #include <public/hvm/hvm_op.h> #include <asm/hypercall.h> +#include <asm/gic.h> long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) - { long rc = 0; @@ -65,3 +66,268 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) return rc; } + +/* Save CPU related states into save/retore context */ +static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + struct hvm_hw_cpu ctxt; + struct vcpu_guest_core_regs c; + struct vcpu *v; + int ret = 0; + + /* Save the state of CPU */ + for_each_vcpu( d, v ) + { + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.sctlr = v->arch.sctlr; + ctxt.ttbr0 = v->arch.ttbr0; + ctxt.ttbr1 = v->arch.ttbr1; + ctxt.ttbcr = v->arch.ttbcr; + + ctxt.dacr = v->arch.dacr; + ctxt.ifsr = v->arch.ifsr; +#ifdef CONFIG_ARM_32 + ctxt.ifar = v->arch.ifar; + ctxt.dfar = v->arch.dfar; + ctxt.dfsr = v->arch.dfsr; +#else + ctxt.far = v->arch.far; + ctxt.esr = v->arch.esr; +#endif + +#ifdef CONFIG_ARM_32 + ctxt.mair0 = v->arch.mair0; + ctxt.mair1 = v->arch.mair1; +#else + ctxt.mair0 = v->arch.mair; +#endif + + /* Control Registers */ + ctxt.actlr = v->arch.actlr; + ctxt.sctlr = v->arch.sctlr; + ctxt.cpacr = v->arch.cpacr; + + ctxt.contextidr = v->arch.contextidr; + ctxt.tpidr_el0 = v->arch.tpidr_el0; + ctxt.tpidr_el1 = v->arch.tpidr_el1; + ctxt.tpidrro_el0 = v->arch.tpidrro_el0; + + /* CP 15 */ + ctxt.csselr = v->arch.csselr; + + ctxt.afsr0 = v->arch.afsr0; + ctxt.afsr1 = v->arch.afsr1; + ctxt.vbar = v->arch.vbar; + ctxt.par = v->arch.par; + ctxt.teecr = v->arch.teecr; + ctxt.teehbr = v->arch.teehbr; + +#ifdef CONFIG_ARM_32 + ctxt.joscr = v->arch.joscr; + ctxt.jmcr = v->arch.jmcr; +#endif + + memset(&c, 0, sizeof(c)); + + /* get guest core registers */ + vcpu_regs_hyp_to_user(v, &c); + + ctxt.x0 = c.x0; + ctxt.x1 = c.x1; + ctxt.x2 = c.x2; + ctxt.x3 = c.x3; + ctxt.x4 = c.x4; + ctxt.x5 = c.x5; + ctxt.x6 = c.x6; + ctxt.x7 = c.x7; + ctxt.x8 = c.x8; + ctxt.x9 = c.x9; + ctxt.x10 = c.x10; + ctxt.x11 = c.x11; + ctxt.x12 = c.x12; + ctxt.x13 = c.x13; + ctxt.x14 = c.x14; + ctxt.x15 = c.x15; + ctxt.x16 = c.x16; + ctxt.x17 = c.x17; + ctxt.x18 = c.x18; + ctxt.x19 = c.x19; + ctxt.x20 = c.x20; + ctxt.x21 = c.x21; + ctxt.x22 = c.x22; + ctxt.x23 = c.x23; + ctxt.x24 = c.x24; + ctxt.x25 = c.x25; + ctxt.x26 = c.x26; + ctxt.x27 = c.x27; + ctxt.x28 = c.x28; + ctxt.x29 = c.x29; + ctxt.x30 = c.x30; + ctxt.pc64 = c.pc64; + ctxt.cpsr = c.cpsr; + ctxt.spsr_el1 = c.spsr_el1; /* spsr_svc */ + +#ifdef CONFIG_ARM_32 + ctxt.spsr_fiq = c.spsr_fiq; + ctxt.spsr_irq = c.spsr_irq; + ctxt.spsr_und = c.spsr_und; + ctxt.spsr_abt = c.spsr_abt; +#endif +#ifdef CONFIG_ARM_64 + ctxt.sp_el0 = c.sp_el0; + ctxt.sp_el1 = c.sp_el1; + ctxt.elr_el1 = c.elr_el1; +#endif + + /* check VFP state size */ + BUILD_BUG_ON(sizeof(v->arch.vfp) > sizeof (ctxt.vfp)); + memcpy((void*) &ctxt.vfp, (void*) &v->arch.vfp, sizeof(v->arch.vfp)); + + ctxt.pause_flags = v->pause_flags; + + if ( (ret = hvm_save_entry(VCPU, v->vcpu_id, h, &ctxt)) != 0 ) + return ret; + } + + return ret; +} + +/* Load CPU related states from existing save/retore context */ +static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + int vcpuid; + struct hvm_hw_cpu ctxt; + struct vcpu *v; + struct vcpu_guest_core_regs c; + + vcpuid = hvm_load_instance(h); + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + if ( hvm_load_entry(VCPU, h, &ctxt) != 0 ) + return -EINVAL; + + v->arch.sctlr = ctxt.sctlr; + v->arch.ttbr0 = ctxt.ttbr0; + v->arch.ttbr1 = ctxt.ttbr1; + v->arch.ttbcr = ctxt.ttbcr; + + v->arch.dacr = ctxt.dacr; + v->arch.ifsr = ctxt.ifsr; +#ifdef CONFIG_ARM_32 + v->arch.ifar = ctxt.ifar; + v->arch.dfar = ctxt.dfar; + v->arch.dfsr = ctxt.dfsr; +#else + v->arch.far = ctxt.far; + v->arch.esr = ctxt.esr; +#endif + +#ifdef CONFIG_ARM_32 + v->arch.mair0 = ctxt.mair0; + v->arch.mair1 = ctxt.mair1; +#else + v->arch.mair = ctxt.mair0; +#endif + + /* Control Registers */ + v->arch.actlr = ctxt.actlr; + v->arch.cpacr = ctxt.cpacr; + v->arch.contextidr = ctxt.contextidr; + v->arch.tpidr_el0 = ctxt.tpidr_el0; + v->arch.tpidr_el1 = ctxt.tpidr_el1; + v->arch.tpidrro_el0 = ctxt.tpidrro_el0; + + /* CP 15 */ + v->arch.csselr = ctxt.csselr; + + v->arch.afsr0 = ctxt.afsr0; + v->arch.afsr1 = ctxt.afsr1; + v->arch.vbar = ctxt.vbar; + v->arch.par = ctxt.par; + v->arch.teecr = ctxt.teecr; + v->arch.teehbr = ctxt.teehbr; +#ifdef CONFIG_ARM_32 + v->arch.joscr = ctxt.joscr; + v->arch.jmcr = ctxt.jmcr; +#endif + + /* fill guest core registers */ + memset(&c, 0, sizeof(c)); + c.x0 = ctxt.x0; + c.x1 = ctxt.x1; + c.x2 = ctxt.x2; + c.x3 = ctxt.x3; + c.x4 = ctxt.x4; + c.x5 = ctxt.x5; + c.x6 = ctxt.x6; + c.x7 = ctxt.x7; + c.x8 = ctxt.x8; + c.x9 = ctxt.x9; + c.x10 = ctxt.x10; + c.x11 = ctxt.x11; + c.x12 = ctxt.x12; + c.x13 = ctxt.x13; + c.x14 = ctxt.x14; + c.x15 = ctxt.x15; + c.x16 = ctxt.x16; + c.x17 = ctxt.x17; + c.x18 = ctxt.x18; + c.x19 = ctxt.x19; + c.x20 = ctxt.x20; + c.x21 = ctxt.x21; + c.x22 = ctxt.x22; + c.x23 = ctxt.x23; + c.x24 = ctxt.x24; + c.x25 = ctxt.x25; + c.x26 = ctxt.x26; + c.x27 = ctxt.x27; + c.x28 = ctxt.x28; + c.x29 = ctxt.x29; + c.x30 = ctxt.x30; + c.pc64 = ctxt.pc64; + c.cpsr = ctxt.cpsr; + c.spsr_el1 = ctxt.spsr_el1; /* spsr_svc */ + +#ifdef CONFIG_ARM_32 + c.spsr_fiq = ctxt.spsr_fiq; + c.spsr_irq = ctxt.spsr_irq; + c.spsr_und = ctxt.spsr_und; + c.spsr_abt = ctxt.spsr_abt; +#endif +#ifdef CONFIG_ARM_64 + c.sp_el0 = ctxt.sp_el0; + c.sp_el1 = ctxt.sp_el1; + c.elr_el1 = ctxt.elr_el1; +#endif + + /* set guest core registers */ + vcpu_regs_user_to_hyp(v, &c); + + /* check VFP state size */ + BUILD_BUG_ON(sizeof(v->arch.vfp) > sizeof (ctxt.vfp)); + memcpy(&v->arch.vfp, &ctxt, sizeof(v->arch.vfp)); + + v->is_initialised = 1; + v->pause_flags = ctxt.pause_flags; + + return 0; +} + +HVM_REGISTER_SAVE_RESTORE(VCPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, + HVMSR_PER_VCPU); + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/arm/save.c b/xen/arch/arm/save.c new file mode 100644 index 0000000..eef14a8 --- /dev/null +++ b/xen/arch/arm/save.c @@ -0,0 +1,65 @@ +/* + * Save.c: Save and restore HVM guest's emulated hardware state for ARM. + * + * Copyright (c) 2014, Samsung Electronics. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#include <asm/hvm/support.h> +#include <public/hvm/save.h> + +void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr) +{ + hdr->cpuid = current_cpu_data.midr.bits; +} + +int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) +{ + uint32_t cpuid; + + if ( hdr->magic != HVM_FILE_MAGIC ) + { + printk(XENLOG_G_ERR "HVM%d restore: bad magic number %#"PRIx32"\n", + d->domain_id, hdr->magic); + return -EINVAL; + } + + if ( hdr->version != HVM_FILE_VERSION ) + { + printk(XENLOG_G_ERR "HVM%d restore: unsupported version %u\n", + d->domain_id, hdr->version); + return -EINVAL; + } + + cpuid = current_cpu_data.midr.bits; + if ( hdr->cpuid != cpuid ) + { + printk(XENLOG_G_INFO "HVM%d restore: VM saved on one CPU " + "(%#"PRIx32") and restored on another (%#"PRIx32").\n", + d->domain_id, hdr->cpuid, cpuid); + return -EINVAL; + } + + return 0; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c index 9fc9586..af244a7 100644 --- a/xen/arch/arm/vgic.c +++ b/xen/arch/arm/vgic.c @@ -24,6 +24,7 @@ #include <xen/softirq.h> #include <xen/irq.h> #include <xen/sched.h> +#include <xen/hvm/save.h> #include <asm/current.h> @@ -73,6 +74,75 @@ static struct vgic_irq_rank *vgic_irq_rank(struct vcpu *v, int b, int n) return NULL; } +/* Save rank info into a context to support domain save/restore */ +static int vgic_save_irq_rank(struct vcpu *v, struct vgic_rank *ext, + struct vgic_irq_rank *rank) +{ + spin_lock(&rank->lock); + + /* IENABLE, IACTIVE, IPEND, PENDSGI registers */ + ext->ienable = rank->ienable; + ext->iactive = rank->iactive; + ext->ipend = rank->ipend; + ext->pendsgi = rank->pendsgi; + + /* ICFG */ + ext->icfg[0] = rank->icfg[0]; + ext->icfg[1] = rank->icfg[1]; + + /* IPRIORITY */ + BUILD_BUG_ON(sizeof(rank->ipriority) != sizeof (ext->ipriority)); + memcpy(ext->ipriority, rank->ipriority, sizeof(rank->ipriority)); + + /* ITARGETS */ + BUILD_BUG_ON(sizeof(rank->itargets) != sizeof (ext->itargets)); + memcpy(ext->itargets, rank->itargets, sizeof(rank->itargets)); + + spin_unlock(&rank->lock); + + return 0; +} + +/* Load rank info from a context to support for domain save/restore */ +static int vgic_load_irq_rank(struct vcpu *v, struct vgic_irq_rank *rank, + struct vgic_rank *ext) +{ + struct pending_irq *p; + unsigned int irq = 0; + const unsigned long enable_bits = ext->ienable; + + spin_lock(&rank->lock); + + /* IENABLE, IACTIVE, IPEND, PENDSGI registers */ + rank->ienable = ext->ienable; + rank->iactive = ext->iactive; + rank->ipend = ext->ipend; + rank->pendsgi = ext->pendsgi; + + /* ICFG */ + rank->icfg[0] = ext->icfg[0]; + rank->icfg[1] = ext->icfg[1]; + + /* IPRIORITY */ + BUILD_BUG_ON(sizeof(rank->ipriority) != sizeof (ext->ipriority)); + memcpy(rank->ipriority, ext->ipriority, sizeof(rank->ipriority)); + + /* ITARGETS */ + BUILD_BUG_ON(sizeof(rank->itargets) != sizeof (ext->itargets)); + memcpy(rank->itargets, ext->itargets, sizeof(rank->itargets)); + + /* Set IRQ status as enabled by iterating through rank->ienable */ + while ( (irq = find_next_bit(&enable_bits, 32, irq)) < 32 ) { + p = irq_to_pending(v, irq); + set_bit(GIC_IRQ_GUEST_ENABLED, &p->status); + irq++; + } + + spin_unlock(&rank->lock); + + return 0; +} + int domain_vgic_init(struct domain *d) { int i; @@ -749,6 +819,82 @@ out: smp_send_event_check_mask(cpumask_of(v->processor)); } + +/* Save GIC state into a context to support save/restore */ +static int hvm_gic_save_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + struct hvm_hw_gic ctxt; + struct vcpu *v; + int ret = 0; + + /* Save the state of GICs */ + for_each_vcpu( d, v ) + { + ctxt.gic_hcr = v->arch.gic_hcr; + ctxt.gic_vmcr = v->arch.gic_vmcr; + ctxt.gic_apr = v->arch.gic_apr; + + /* Save list registers and masks */ + BUILD_BUG_ON(sizeof(v->arch.gic_lr) > sizeof (ctxt.gic_lr)); + memcpy(ctxt.gic_lr, v->arch.gic_lr, sizeof(v->arch.gic_lr)); + + ctxt.lr_mask = v->arch.lr_mask; + ctxt.event_mask = v->arch.event_mask; + + /* Save PPI states (per-CPU), necessary for SMP-enabled guests */ + if ( (ret = vgic_save_irq_rank(v, &ctxt.ppi_state, + &v->arch.vgic.private_irqs)) != 0 ) + return ret; + + if ( (ret = hvm_save_entry(GIC, v->vcpu_id, h, &ctxt)) != 0 ) + return ret; + } + + return ret; +} + +/* Restore GIC state from a context to support save/restore */ +static int hvm_gic_load_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + int vcpuid; + struct hvm_hw_gic ctxt; + struct vcpu *v; + int ret = 0; + + /* Which vcpu is this? */ + vcpuid = hvm_load_instance(h); + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + if ( hvm_load_entry(GIC, h, &ctxt) != 0 ) + return -EINVAL; + + v->arch.gic_hcr = ctxt.gic_hcr; + v->arch.gic_vmcr = ctxt.gic_vmcr; + v->arch.gic_apr = ctxt.gic_apr; + + /* Restore list registers and masks */ + BUILD_BUG_ON(sizeof(v->arch.gic_lr) > sizeof (ctxt.gic_lr)); + memcpy(v->arch.gic_lr, ctxt.gic_lr, sizeof(v->arch.gic_lr)); + + v->arch.lr_mask = ctxt.lr_mask; + v->arch.event_mask = ctxt.event_mask; + + /* Restore PPI states */ + if ( (ret = vgic_load_irq_rank(v, &v->arch.vgic.private_irqs, + &ctxt.ppi_state)) != 0 ) + return ret; + + return ret; +} + +HVM_REGISTER_SAVE_RESTORE(GIC, hvm_gic_save_ctxt, hvm_gic_load_ctxt, 1, + HVMSR_PER_VCPU); + /* * Local variables: * mode: C diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c index 3d6a721..7c47eac 100644 --- a/xen/arch/arm/vtimer.c +++ b/xen/arch/arm/vtimer.c @@ -21,6 +21,7 @@ #include <xen/lib.h> #include <xen/timer.h> #include <xen/sched.h> +#include <xen/hvm/save.h> #include <asm/irq.h> #include <asm/time.h> #include <asm/gic.h> @@ -284,6 +285,76 @@ int vtimer_emulate(struct cpu_user_regs *regs, union hsr hsr) } } +static int hvm_vtimer_save_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + struct hvm_hw_timer ctxt; + struct vcpu *v; + struct vtimer *t; + int i, ret = 0; + + /* Save the state of vtimer and ptimer */ + for_each_vcpu( d, v ) + { + t = &v->arch.virt_timer; + for ( i = 0; i < 2; i++ ) + { + ctxt.cval = t->cval; + ctxt.ctl = t->ctl; + ctxt.vtb_offset = i ? d->arch.phys_timer_base.offset : + d->arch.virt_timer_base.offset; + ctxt.type = i ? TIMER_TYPE_PHYS : TIMER_TYPE_VIRT; + + if ( (ret = hvm_save_entry(TIMER, v->vcpu_id, h, &ctxt)) != 0 ) + return ret; + + t = &v->arch.phys_timer; + } + } + + return ret; +} + +static int hvm_vtimer_load_ctxt(struct domain *d, hvm_domain_context_t *h) +{ + int vcpuid; + struct hvm_hw_timer ctxt; + struct vcpu *v; + struct vtimer *t = NULL; + + /* Which vcpu is this? */ + vcpuid = hvm_load_instance(h); + + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%u has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + if ( hvm_load_entry(TIMER, h, &ctxt) != 0 ) + return -EINVAL; + + if ( ctxt.type == TIMER_TYPE_VIRT ) + { + t = &v->arch.virt_timer; + d->arch.virt_timer_base.offset = ctxt.vtb_offset; + } + else + { + t = &v->arch.phys_timer; + d->arch.phys_timer_base.offset = ctxt.vtb_offset; + } + + t->cval = ctxt.cval; + t->ctl = ctxt.ctl; + t->v = v; + + return 0; +} + +HVM_REGISTER_SAVE_RESTORE(TIMER, hvm_vtimer_save_ctxt, hvm_vtimer_load_ctxt, + 2, HVMSR_PER_VCPU); + /* * Local variables: * mode: C diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 26635ff..30fbd30 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -399,76 +399,6 @@ long arch_do_domctl( } break; - case XEN_DOMCTL_sethvmcontext: - { - struct hvm_domain_context c = { .size = domctl->u.hvmcontext.size }; - - ret = -EINVAL; - if ( !is_hvm_domain(d) ) - goto sethvmcontext_out; - - ret = -ENOMEM; - if ( (c.data = xmalloc_bytes(c.size)) == NULL ) - goto sethvmcontext_out; - - ret = -EFAULT; - if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0) - goto sethvmcontext_out; - - domain_pause(d); - ret = hvm_load(d, &c); - domain_unpause(d); - - sethvmcontext_out: - if ( c.data != NULL ) - xfree(c.data); - } - break; - - case XEN_DOMCTL_gethvmcontext: - { - struct hvm_domain_context c = { 0 }; - - ret = -EINVAL; - if ( !is_hvm_domain(d) ) - goto gethvmcontext_out; - - c.size = hvm_save_size(d); - - if ( guest_handle_is_null(domctl->u.hvmcontext.buffer) ) - { - /* Client is querying for the correct buffer size */ - domctl->u.hvmcontext.size = c.size; - ret = 0; - goto gethvmcontext_out; - } - - /* Check that the client has a big enough buffer */ - ret = -ENOSPC; - if ( domctl->u.hvmcontext.size < c.size ) - goto gethvmcontext_out; - - /* Allocate our own marshalling buffer */ - ret = -ENOMEM; - if ( (c.data = xmalloc_bytes(c.size)) == NULL ) - goto gethvmcontext_out; - - domain_pause(d); - ret = hvm_save(d, &c); - domain_unpause(d); - - domctl->u.hvmcontext.size = c.cur; - if ( copy_to_guest(domctl->u.hvmcontext.buffer, c.data, c.size) != 0 ) - ret = -EFAULT; - - gethvmcontext_out: - copyback = 1; - - if ( c.data != NULL ) - xfree(c.data); - } - break; - case XEN_DOMCTL_gethvmcontext_partial: { ret = -EINVAL; diff --git a/xen/common/Makefile b/xen/common/Makefile index 3683ae3..13b781f 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -62,7 +62,7 @@ obj-$(CONFIG_XENCOMM) += xencomm.o subdir-$(CONFIG_COMPAT) += compat -subdir-$(x86_64) += hvm +subdir-y += hvm subdir-$(coverage) += gcov diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 5342e5d..2ea4af5 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -24,6 +24,8 @@ #include <xen/bitmap.h> #include <xen/paging.h> #include <xen/hypercall.h> +#include <xen/hvm/save.h> +#include <xen/guest_access.h> #include <asm/current.h> #include <asm/irq.h> #include <asm/page.h> @@ -881,6 +883,78 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) } break; + case XEN_DOMCTL_sethvmcontext: + { + struct hvm_domain_context c = { .size = op->u.hvmcontext.size }; + + ret = -EINVAL; + if ( (d == current->domain) || /* no domain_pause() */ + !is_hvm_domain(d) ) + goto sethvmcontext_out; + + ret = -ENOMEM; + if ( (c.data = xmalloc_bytes(c.size)) == NULL ) + goto sethvmcontext_out; + + ret = -EFAULT; + if ( copy_from_guest(c.data, op->u.hvmcontext.buffer, c.size) != 0) + goto sethvmcontext_out; + + domain_pause(d); + ret = hvm_load(d, &c); + domain_unpause(d); + + sethvmcontext_out: + if ( c.data != NULL ) + xfree(c.data); + } + break; + + case XEN_DOMCTL_gethvmcontext: + { + struct hvm_domain_context c = { 0 }; + + ret = -EINVAL; + if ( (d == current->domain) || /* no domain_pause() */ + !is_hvm_domain(d) ) + goto gethvmcontext_out; + + c.size = hvm_save_size(d); + + if ( guest_handle_is_null(op->u.hvmcontext.buffer) ) + { + /* Client is querying for the correct buffer size */ + op->u.hvmcontext.size = c.size; + ret = 0; + goto gethvmcontext_out; + } + + /* Check that the client has a big enough buffer */ + ret = -ENOSPC; + if ( op->u.hvmcontext.size < c.size ) + goto gethvmcontext_out; + + /* Allocate our own marshalling buffer */ + ret = -ENOMEM; + if ( (c.data = xmalloc_bytes(c.size)) == NULL ) + goto gethvmcontext_out; + + domain_pause(d); + ret = hvm_save(d, &c); + domain_unpause(d); + + op->u.hvmcontext.size = c.cur; + if ( copy_to_guest(op->u.hvmcontext.buffer, c.data, c.size) != 0 ) + ret = -EFAULT; + + gethvmcontext_out: + copyback = 1; + + if ( c.data != NULL ) + xfree(c.data); + } + break; + default: ret = arch_do_domctl(op, d, u_domctl); break; diff --git a/xen/include/asm-arm/hvm/support.h b/xen/include/asm-arm/hvm/support.h new file mode 100644 index 0000000..09f7cb8 --- /dev/null +++ b/xen/include/asm-arm/hvm/support.h @@ -0,0 +1,29 @@ +/* + * asm-arm/hvm/support.h: HVM support routines used by ARM. + * + * Copyright (c) 2014, Samsung Electronics. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef __ASM_ARM_HVM_SUPPORT_H__ +#define __ASM_ARM_HVM_SUPPORT_H__ + +#include <xen/types.h> +#include <public/hvm/ioreq.h> +#include <xen/sched.h> +#include <xen/hvm/save.h> +#include <asm/processor.h> + +#endif /* __ASM_ARM_HVM_SUPPORT_H__ */ diff --git a/xen/include/public/arch-arm/hvm/save.h b/xen/include/public/arch-arm/hvm/save.h index 75b8e65..f6ad258 100644 --- a/xen/include/public/arch-arm/hvm/save.h +++ b/xen/include/public/arch-arm/hvm/save.h @@ -26,6 +26,136 @@ #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ +#define HVM_FILE_MAGIC 0x92385520 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header +{ + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* MIDR_EL1 on the saving machine */ +}; +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + +struct vgic_rank +{ + uint32_t ienable, iactive, ipend, pendsgi; + uint32_t icfg[2]; + uint32_t ipriority[8]; + uint32_t itargets[8]; +}; + +struct hvm_hw_gic +{ + uint32_t gic_hcr; + uint32_t gic_vmcr; + uint32_t gic_apr; + uint32_t gic_lr[64]; + uint64_t event_mask; + uint64_t lr_mask; + struct vgic_rank ppi_state; +}; +DECLARE_HVM_SAVE_TYPE(GIC, 2, struct hvm_hw_gic); + +#define TIMER_TYPE_VIRT 0 +#define TIMER_TYPE_PHYS 1 + +struct hvm_hw_timer +{ + uint64_t vtb_offset; + uint32_t ctl; + uint64_t cval; + uint32_t type; +}; +DECLARE_HVM_SAVE_TYPE(TIMER, 3, struct hvm_hw_timer); + +struct hvm_hw_cpu +{ +#ifdef CONFIG_ARM_32 + uint64_t vfp[34]; /* 32-bit VFP registers */ +#else + uint64_t vfp[66]; /* 64-bit VFP registers */ +#endif + + /* Guest core registers */ + uint64_t x0; /* r0_usr */ + uint64_t x1; /* r1_usr */ + uint64_t x2; /* r2_usr */ + uint64_t x3; /* r3_usr */ + uint64_t x4; /* r4_usr */ + uint64_t x5; /* r5_usr */ + uint64_t x6; /* r6_usr */ + uint64_t x7; /* r7_usr */ + uint64_t x8; /* r8_usr */ + uint64_t x9; /* r9_usr */ + uint64_t x10; /* r10_usr */ + uint64_t x11; /* r11_usr */ + uint64_t x12; /* r12_usr */ + uint64_t x13; /* sp_usr */ + uint64_t x14; /* lr_usr; */ + uint64_t x15; /* __unused_sp_hyp */ + uint64_t x16; /* lr_irq */ + uint64_t x17; /* sp_irq */ + uint64_t x18; /* lr_svc */ + uint64_t x19; /* sp_svc */ + uint64_t x20; /* lr_abt */ + uint64_t x21; /* sp_abt */ + uint64_t x22; /* lr_und */ + uint64_t x23; /* sp_und */ + uint64_t x24; /* r8_fiq */ + uint64_t x25; /* r9_fiq */ + uint64_t x26; /* r10_fiq */ + uint64_t x27; /* r11_fiq */ + uint64_t x28; /* r12_fiq */ + uint64_t x29; /* fp,sp_fiq */ + uint64_t x30; /* lr_fiq */ + uint64_t pc64; /* ELR_EL2 */ + uint32_t cpsr; /* SPSR_EL2 */ + uint32_t spsr_el1; /*spsr_svc */ + /* AArch32 guests only */ + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; + /* AArch64 guests only */ + uint64_t sp_el0; + uint64_t sp_el1, elr_el1; + + uint32_t sctlr, ttbcr; + uint64_t ttbr0, ttbr1; + + uint32_t ifar, dfar; + uint32_t ifsr, dfsr; + uint32_t dacr; + uint64_t par; + + uint64_t far; + uint64_t esr; + + uint64_t mair0, mair1; + uint64_t tpidr_el0; + uint64_t tpidr_el1; + uint64_t tpidrro_el0; + uint64_t vbar; + + /* Control Registers */ + uint32_t actlr; + uint32_t cpacr; + uint32_t afsr0, afsr1; + uint32_t contextidr; + uint32_t teecr, teehbr; /* ThumbEE, 32-bit guests only */ + uint32_t joscr, jmcr; + /* CP 15 */ + uint32_t csselr; + + unsigned long pause_flags; + +}; +DECLARE_HVM_SAVE_TYPE(VCPU, 4, struct hvm_hw_cpu); + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 4 + #endif /*