[Xen-devel,v4,3/5] xen/arm: clean and invalidate all guest caches by VMID after domain build.

Message ID 1391775176-30313-3-git-send-email-ian.campbell@citrix.com
State New
Headers show

Commit Message

Ian Campbell Feb. 7, 2014, 12:12 p.m.
Guests are initially started with caches disabled and so we need to make sure
they see consistent data in RAM (requiring a cache clean) but also that they
do not have old stale data suddenly appear in the caches when they enable
their caches (requiring the invalidate).

This can be split into two halves. First we must flush each page as it is
allocated to the guest. It is not sufficient to do the flush at scrub time
since this will miss pages which are ballooned out by the guest (where the
guest must scrub if it cares about not leaking the pagecontent). We need to
clean as well as invalidate to make sure that any scrubbing which has occured
gets committed to real RAM. To achieve this add a new cacheflush_page function,
which is a stub on x86.

Secondly we need to flush anything which the domain builder touches, which we
do via a new domctl.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: jbeulich@suse.com
Cc: keir@xen.org
Cc: ian.jackson@eu.citrix.com
--
v4: introduce a function to clean and invalidate as intended

    make the domctl take a length not an end.

v3:
    s/cacheflush_page/sync_page_to_ram/

    xc interface takes a length instead of an end

    make the domctl range inclusive.

    make xc interface internal -- it isn't needed from libxl in the current
    design and it is easier to expose an interface in the future than to hide
    it.

v2:
   Switch to cleaning at page allocation time + explicit flushing of the
   regions which the toolstack touches.

   Add XSM for new domctl.

   New domctl restricts the amount of space it is willing to flush, to avoid
   thinking about preemption.
---
 tools/libxc/xc_dom_boot.c           |    4 ++++
 tools/libxc/xc_dom_core.c           |    2 ++
 tools/libxc/xc_domain.c             |   10 ++++++++++
 tools/libxc/xc_private.c            |    2 ++
 tools/libxc/xc_private.h            |    3 +++
 xen/arch/arm/domctl.c               |   14 ++++++++++++++
 xen/arch/arm/mm.c                   |   12 ++++++++++++
 xen/arch/arm/p2m.c                  |   25 +++++++++++++++++++++++++
 xen/common/page_alloc.c             |    5 +++++
 xen/include/asm-arm/arm32/page.h    |    4 ++++
 xen/include/asm-arm/arm64/page.h    |    4 ++++
 xen/include/asm-arm/p2m.h           |    3 +++
 xen/include/asm-arm/page.h          |    3 +++
 xen/include/asm-x86/page.h          |    3 +++
 xen/include/public/domctl.h         |   13 +++++++++++++
 xen/xsm/flask/hooks.c               |    3 +++
 xen/xsm/flask/policy/access_vectors |    2 ++
 17 files changed, 112 insertions(+)

Comments

Julien Grall Feb. 7, 2014, 1:24 p.m. | #1
On 07/02/14 12:12, Ian Campbell wrote:
> Guests are initially started with caches disabled and so we need to make sure
> they see consistent data in RAM (requiring a cache clean) but also that they
> do not have old stale data suddenly appear in the caches when they enable
> their caches (requiring the invalidate).
>
> This can be split into two halves. First we must flush each page as it is
> allocated to the guest. It is not sufficient to do the flush at scrub time
> since this will miss pages which are ballooned out by the guest (where the
> guest must scrub if it cares about not leaking the pagecontent). We need to
> clean as well as invalidate to make sure that any scrubbing which has occured
> gets committed to real RAM. To achieve this add a new cacheflush_page function,
> which is a stub on x86.
>
> Secondly we need to flush anything which the domain builder touches, which we
> do via a new domctl.
>
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Julien Grall <julien.grall@linaro.org> [for the ARM part]

> Cc: jbeulich@suse.com
> Cc: keir@xen.org
> Cc: ian.jackson@eu.citrix.com
> --
> v4: introduce a function to clean and invalidate as intended
>
>      make the domctl take a length not an end.
>
> v3:
>      s/cacheflush_page/sync_page_to_ram/
>
>      xc interface takes a length instead of an end
>
>      make the domctl range inclusive.
>
>      make xc interface internal -- it isn't needed from libxl in the current
>      design and it is easier to expose an interface in the future than to hide
>      it.
>
> v2:
>     Switch to cleaning at page allocation time + explicit flushing of the
>     regions which the toolstack touches.
>
>     Add XSM for new domctl.
>
>     New domctl restricts the amount of space it is willing to flush, to avoid
>     thinking about preemption.
> ---
>   tools/libxc/xc_dom_boot.c           |    4 ++++
>   tools/libxc/xc_dom_core.c           |    2 ++
>   tools/libxc/xc_domain.c             |   10 ++++++++++
>   tools/libxc/xc_private.c            |    2 ++
>   tools/libxc/xc_private.h            |    3 +++
>   xen/arch/arm/domctl.c               |   14 ++++++++++++++
>   xen/arch/arm/mm.c                   |   12 ++++++++++++
>   xen/arch/arm/p2m.c                  |   25 +++++++++++++++++++++++++
>   xen/common/page_alloc.c             |    5 +++++
>   xen/include/asm-arm/arm32/page.h    |    4 ++++
>   xen/include/asm-arm/arm64/page.h    |    4 ++++
>   xen/include/asm-arm/p2m.h           |    3 +++
>   xen/include/asm-arm/page.h          |    3 +++
>   xen/include/asm-x86/page.h          |    3 +++
>   xen/include/public/domctl.h         |   13 +++++++++++++
>   xen/xsm/flask/hooks.c               |    3 +++
>   xen/xsm/flask/policy/access_vectors |    2 ++
>   17 files changed, 112 insertions(+)
>
> diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
> index 5a9cfc6..3d4d107 100644
> --- a/tools/libxc/xc_dom_boot.c
> +++ b/tools/libxc/xc_dom_boot.c
> @@ -351,6 +351,10 @@ int xc_dom_gnttab_seed(xc_interface *xch, domid_t domid,
>           return -1;
>       }
>
> +    /* Guest shouldn't really touch its grant table until it has
> +     * enabled its caches. But lets be nice. */
> +    xc_domain_cacheflush(xch, domid, gnttab_gmfn, 1);
> +
>       return 0;
>   }
>
> diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
> index 77a4e64..b9d1015 100644
> --- a/tools/libxc/xc_dom_core.c
> +++ b/tools/libxc/xc_dom_core.c
> @@ -603,6 +603,8 @@ void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
>           prev->next = phys->next;
>       else
>           dom->phys_pages = phys->next;
> +
> +    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);
>   }
>
>   void xc_dom_unmap_all(struct xc_dom_image *dom)
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index c2fdd74..f10ec01 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -48,6 +48,16 @@ int xc_domain_create(xc_interface *xch,
>       return 0;
>   }
>
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns)
> +{
> +    DECLARE_DOMCTL;
> +    domctl.cmd = XEN_DOMCTL_cacheflush;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.cacheflush.start_pfn = start_pfn;
> +    domctl.u.cacheflush.nr_pfns = nr_pfns;
> +    return do_domctl(xch, &domctl);
> +}
>
>   int xc_domain_pause(xc_interface *xch,
>                       uint32_t domid)
> diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
> index 838fd21..33ed15b 100644
> --- a/tools/libxc/xc_private.c
> +++ b/tools/libxc/xc_private.c
> @@ -628,6 +628,7 @@ int xc_copy_to_domain_page(xc_interface *xch,
>           return -1;
>       memcpy(vaddr, src_page, PAGE_SIZE);
>       munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>       return 0;
>   }
>
> @@ -641,6 +642,7 @@ int xc_clear_domain_page(xc_interface *xch,
>           return -1;
>       memset(vaddr, 0, PAGE_SIZE);
>       munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>       return 0;
>   }
>
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 92271c9..a610f0c 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -304,6 +304,9 @@ void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
>   /* Optionally flush file to disk and discard page cache */
>   void discard_file_cache(xc_interface *xch, int fd, int flush);
>
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +			 xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
> +
>   #define MAX_MMU_UPDATES 1024
>   struct xc_mmu {
>       mmu_update_t updates[MAX_MMU_UPDATES];
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index 546e86b..ffb68da 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -17,6 +17,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
>   {
>       switch ( domctl->cmd )
>       {
> +    case XEN_DOMCTL_cacheflush:
> +    {
> +        unsigned long s = domctl->u.cacheflush.start_pfn;
> +        unsigned long e = s + domctl->u.cacheflush.nr_pfns;
> +
> +        if ( e < s )
> +            return -EINVAL;
> +
> +        if ( get_order_from_pages(domctl->u.cacheflush.nr_pfns) > MAX_ORDER )
> +            return -EINVAL;
> +
> +        return p2m_cache_flush(d, s, e);
> +    }
> +
>       default:
>           return subarch_do_domctl(domctl, d, u_domctl);
>       }
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 2f48347..d2cfe64 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -338,6 +338,18 @@ unsigned long domain_page_map_to_mfn(const void *va)
>   }
>   #endif
>
> +void sync_page_to_ram(unsigned long mfn)
> +{
> +    void *p, *v = map_domain_page(mfn);
> +
> +    dsb();           /* So the CPU issues all writes to the range */
> +    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )
> +        asm volatile (__clean_and_invalidate_xen_dcache_one(0) : : "r" (p));
> +    dsb();           /* So we know the flushes happen before continuing */
> +
> +    unmap_domain_page(v);
> +}
> +
>   void __init arch_init_memory(void)
>   {
>       /*
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index a61edeb..86f13e9 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -8,6 +8,7 @@
>   #include <asm/gic.h>
>   #include <asm/event.h>
>   #include <asm/hardirq.h>
> +#include <asm/page.h>
>
>   /* First level P2M is 2 consecutive pages */
>   #define P2M_FIRST_ORDER 1
> @@ -228,6 +229,7 @@ enum p2m_operation {
>       ALLOCATE,
>       REMOVE,
>       RELINQUISH,
> +    CACHEFLUSH,
>   };
>
>   static int apply_p2m_changes(struct domain *d,
> @@ -381,6 +383,15 @@ static int apply_p2m_changes(struct domain *d,
>                       count++;
>                   }
>                   break;
> +
> +            case CACHEFLUSH:
> +                {
> +                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
> +                        break;
> +
> +                    sync_page_to_ram(pte.p2m.base);
> +                }
> +                break;
>           }
>
>           /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
> @@ -624,6 +635,20 @@ int relinquish_p2m_mapping(struct domain *d)
>                                 MATTR_MEM, p2m_invalid);
>   }
>
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn)
> +{
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +
> +    start_mfn = MAX(start_mfn, p2m->lowest_mapped_gfn);
> +    end_mfn = MIN(end_mfn, p2m->max_mapped_gfn);
> +
> +    return apply_p2m_changes(d, CACHEFLUSH,
> +                             pfn_to_paddr(start_mfn),
> +                             pfn_to_paddr(end_mfn),
> +                             pfn_to_paddr(INVALID_MFN),
> +                             MATTR_MEM, p2m_invalid);
> +}
> +
>   unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>   {
>       paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 5f484a2..c73c717 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -710,6 +710,11 @@ static struct page_info *alloc_heap_pages(
>           /* Initialise fields which have other uses for free pages. */
>           pg[i].u.inuse.type_info = 0;
>           page_set_owner(&pg[i], NULL);
> +
> +        /* Ensure cache and RAM are consistent for platforms where the
> +         * guest can control its own visibility of/through the cache.
> +         */
> +        sync_page_to_ram(page_to_mfn(&pg[i]));
>       }
>
>       spin_unlock(&heap_lock);
> diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
> index cf12a89..cb6add4 100644
> --- a/xen/include/asm-arm/arm32/page.h
> +++ b/xen/include/asm-arm/arm32/page.h
> @@ -22,6 +22,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>   /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>   #define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC)
>
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) STORE_CP32(R, DCCIMVAC)
> +
>   /*
>    * Flush all hypervisor mappings from the TLB and branch predictor.
>    * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
> index 9551f90..baf8903 100644
> --- a/xen/include/asm-arm/arm64/page.h
> +++ b/xen/include/asm-arm/arm64/page.h
> @@ -17,6 +17,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>   /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>   #define __flush_xen_dcache_one(R) "dc cvac, %" #R ";"
>
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) "dc  civac, %" #R ";"
> +
>   /*
>    * Flush all hypervisor mappings from the TLB
>    * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index e9c884a..3b39c45 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -78,6 +78,9 @@ void p2m_load_VTTBR(struct domain *d);
>   /* Look up the MFN corresponding to a domain's PFN. */
>   paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
>
> +/* Clean & invalidate caches corresponding to a region of guest address space */
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn);
> +
>   /* Setup p2m RAM mapping for domain d from start-end. */
>   int p2m_populate_ram(struct domain *d, paddr_t start, paddr_t end);
>   /* Map MMIO regions in the p2m: start_gaddr and end_gaddr is the range
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 670d4e7..67d64c9 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -253,6 +253,9 @@ static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
>               : : "r" (_p), "m" (*_p));                                   \
>   } while (0)
>
> +/* Flush the dcache for an entire page. */
> +void sync_page_to_ram(unsigned long mfn);
> +
>   /* Print a walk of an arbitrary page table */
>   void dump_pt_walk(lpae_t *table, paddr_t addr);
>
> diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
> index 7a46af5..abe35fb 100644
> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -346,6 +346,9 @@ static inline uint32_t cacheattr_to_pte_flags(uint32_t cacheattr)
>       return ((cacheattr & 4) << 5) | ((cacheattr & 3) << 3);
>   }
>
> +/* No cache maintenance required on x86 architecture. */
> +static inline void sync_page_to_ram(unsigned long mfn) {}
> +
>   /* return true if permission increased */
>   static inline bool_t
>   perms_strictly_increased(uint32_t old_flags, uint32_t new_flags)
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 91f01fa..f22fe2e 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
>   typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
>   DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
>
> +/*
> + * ARM: Clean and invalidate caches associated with given region of
> + * guest memory.
> + */
> +struct xen_domctl_cacheflush {
> +    /* IN: page range to flush. */
> +    xen_pfn_t start_pfn, nr_pfns;
> +};
> +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
> +
>   struct xen_domctl {
>       uint32_t cmd;
>   #define XEN_DOMCTL_createdomain                   1
> @@ -954,6 +965,7 @@ struct xen_domctl {
>   #define XEN_DOMCTL_setnodeaffinity               68
>   #define XEN_DOMCTL_getnodeaffinity               69
>   #define XEN_DOMCTL_set_max_evtchn                70
> +#define XEN_DOMCTL_cacheflush                    71
>   #define XEN_DOMCTL_gdbsx_guestmemio            1000
>   #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>   #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1012,6 +1024,7 @@ struct xen_domctl {
>           struct xen_domctl_set_max_evtchn    set_max_evtchn;
>           struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>           struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
> +        struct xen_domctl_cacheflush        cacheflush;
>           struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>           struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>           uint8_t                             pad[128];
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index 50a35fc..1345d7e 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -737,6 +737,9 @@ static int flask_domctl(struct domain *d, int cmd)
>       case XEN_DOMCTL_set_max_evtchn:
>           return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__SET_MAX_EVTCHN);
>
> +    case XEN_DOMCTL_cacheflush:
> +	    return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);
> +
>       default:
>           printk("flask_domctl: Unknown op %d\n", cmd);
>           return -EPERM;
> diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
> index 1fbe241..a0ed13d 100644
> --- a/xen/xsm/flask/policy/access_vectors
> +++ b/xen/xsm/flask/policy/access_vectors
> @@ -196,6 +196,8 @@ class domain2
>       setclaim
>   # XEN_DOMCTL_set_max_evtchn
>       set_max_evtchn
> +# XEN_DOMCTL_cacheflush
> +    cacheflush
>   }
>
>   # Similar to class domain, but primarily contains domctls related to HVM domains
>
Ian Campbell Feb. 7, 2014, 2:34 p.m. | #2
On Fri, 2014-02-07 at 12:57 +0000, Jan Beulich wrote:
> >>> On 07.02.14 at 13:12, Ian Campbell <ian.campbell@citrix.com> wrote:
> > --- a/xen/arch/arm/domctl.c
> > +++ b/xen/arch/arm/domctl.c
> > @@ -17,6 +17,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
> >  {
> >      switch ( domctl->cmd )
> >      {
> > +    case XEN_DOMCTL_cacheflush:
> > +    {
> > +        unsigned long s = domctl->u.cacheflush.start_pfn;
> > +        unsigned long e = s + domctl->u.cacheflush.nr_pfns;
> > +
> > +        if ( e < s )
> > +            return -EINVAL;
> > +
> > +        if ( get_order_from_pages(domctl->u.cacheflush.nr_pfns) > MAX_ORDER )
> > +            return -EINVAL;
> 
> get_order_from_pages() takes an unsigned long, while xen_pfn_t
> is - iirc - 64-bits even on arm32. So you're not checking the full
> passed in value, yet use the full one in the calculation of "e" (which
> is what gets passed down).

Yes, you are right, I should have made nr_mfns be a smaller type.

> Also, did you consider the nr_pfns == 0 case? At present, due to
> the way get_order_from_pages() works, this will produce -EINVAL.
> I'm not sure that's intended.

I think nr == 0 => EINVAL is probably ok.

But actually this made me realise that using get_order_from_pages is a
bit silly here. It seems more logical to compare nr_pfns with
1<<(MAX_ORDER-PAGE_ORDER) now that we have a length in hand.

> 
> > --- a/xen/include/public/domctl.h
> > +++ b/xen/include/public/domctl.h
> > @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
> >  typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
> >  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
> >  
> > +/*
> > + * ARM: Clean and invalidate caches associated with given region of
> > + * guest memory.
> > + */
> > +struct xen_domctl_cacheflush {
> > +    /* IN: page range to flush. */
> > +    xen_pfn_t start_pfn, nr_pfns;
> > +};
> 
> The name here (and of the libxc interface) is now certainly
> counterintuitive. But it's a domctl (and an internal interface),
> which we can change post-4.4 (I'd envision it to actually take
> a flags parameter indicating the kind of flush that's wanted).

Sounds OK to me, thanks.

> > --- a/xen/xsm/flask/hooks.c
> > +++ b/xen/xsm/flask/hooks.c
> > @@ -737,6 +737,9 @@ static int flask_domctl(struct domain *d, int cmd)
> >      case XEN_DOMCTL_set_max_evtchn:
> >          return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__SET_MAX_EVTCHN);
> >  
> > +    case XEN_DOMCTL_cacheflush:
> > +	    return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);
> 
> Hard tab.

Well spotted.

This file is missing the emacs magic block. I'll fix this and add one.

Ian.
Stefano Stabellini Feb. 7, 2014, 2:49 p.m. | #3
On Fri, 7 Feb 2014, Ian Campbell wrote:
> Guests are initially started with caches disabled and so we need to make sure
> they see consistent data in RAM (requiring a cache clean) but also that they
> do not have old stale data suddenly appear in the caches when they enable
> their caches (requiring the invalidate).
> 
> This can be split into two halves. First we must flush each page as it is
> allocated to the guest. It is not sufficient to do the flush at scrub time
> since this will miss pages which are ballooned out by the guest (where the
> guest must scrub if it cares about not leaking the pagecontent). We need to
> clean as well as invalidate to make sure that any scrubbing which has occured
> gets committed to real RAM. To achieve this add a new cacheflush_page function,
> which is a stub on x86.
> 
> Secondly we need to flush anything which the domain builder touches, which we
> do via a new domctl.
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: jbeulich@suse.com
> Cc: keir@xen.org
> Cc: ian.jackson@eu.citrix.com
> --
> v4: introduce a function to clean and invalidate as intended
> 
>     make the domctl take a length not an end.
> 
> v3:
>     s/cacheflush_page/sync_page_to_ram/
> 
>     xc interface takes a length instead of an end
> 
>     make the domctl range inclusive.
> 
>     make xc interface internal -- it isn't needed from libxl in the current
>     design and it is easier to expose an interface in the future than to hide
>     it.
> 
> v2:
>    Switch to cleaning at page allocation time + explicit flushing of the
>    regions which the toolstack touches.
> 
>    Add XSM for new domctl.
> 
>    New domctl restricts the amount of space it is willing to flush, to avoid
>    thinking about preemption.
> ---
>  tools/libxc/xc_dom_boot.c           |    4 ++++
>  tools/libxc/xc_dom_core.c           |    2 ++
>  tools/libxc/xc_domain.c             |   10 ++++++++++
>  tools/libxc/xc_private.c            |    2 ++
>  tools/libxc/xc_private.h            |    3 +++
>  xen/arch/arm/domctl.c               |   14 ++++++++++++++
>  xen/arch/arm/mm.c                   |   12 ++++++++++++
>  xen/arch/arm/p2m.c                  |   25 +++++++++++++++++++++++++
>  xen/common/page_alloc.c             |    5 +++++
>  xen/include/asm-arm/arm32/page.h    |    4 ++++
>  xen/include/asm-arm/arm64/page.h    |    4 ++++
>  xen/include/asm-arm/p2m.h           |    3 +++
>  xen/include/asm-arm/page.h          |    3 +++
>  xen/include/asm-x86/page.h          |    3 +++
>  xen/include/public/domctl.h         |   13 +++++++++++++
>  xen/xsm/flask/hooks.c               |    3 +++
>  xen/xsm/flask/policy/access_vectors |    2 ++
>  17 files changed, 112 insertions(+)
> 
> diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
> index 5a9cfc6..3d4d107 100644
> --- a/tools/libxc/xc_dom_boot.c
> +++ b/tools/libxc/xc_dom_boot.c
> @@ -351,6 +351,10 @@ int xc_dom_gnttab_seed(xc_interface *xch, domid_t domid,
>          return -1;
>      }
>  
> +    /* Guest shouldn't really touch its grant table until it has
> +     * enabled its caches. But lets be nice. */
> +    xc_domain_cacheflush(xch, domid, gnttab_gmfn, 1);
> +
>      return 0;
>  }
>  
> diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
> index 77a4e64..b9d1015 100644
> --- a/tools/libxc/xc_dom_core.c
> +++ b/tools/libxc/xc_dom_core.c
> @@ -603,6 +603,8 @@ void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
>          prev->next = phys->next;
>      else
>          dom->phys_pages = phys->next;
> +
> +    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);

I am not sure I understand the semantic of xc_dom_unmap_one: from the
name of the function and the parameters I would think that it is
supposed to unmap just one page. In that case we should flush just one
page. However the implementation calls

munmap(phys->ptr, phys->count << page_shift)

so it actually can unmap more than a single page, so I think that you
did the right thing by calling xc_domain_cacheflush with phys->first and
phys->count.


>  }
>  
>  void xc_dom_unmap_all(struct xc_dom_image *dom)
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index c2fdd74..f10ec01 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -48,6 +48,16 @@ int xc_domain_create(xc_interface *xch,
>      return 0;
>  }
>  
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns)
> +{
> +    DECLARE_DOMCTL;
> +    domctl.cmd = XEN_DOMCTL_cacheflush;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.cacheflush.start_pfn = start_pfn;
> +    domctl.u.cacheflush.nr_pfns = nr_pfns;
> +    return do_domctl(xch, &domctl);
> +}
>  
>  int xc_domain_pause(xc_interface *xch,
>                      uint32_t domid)
> diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
> index 838fd21..33ed15b 100644
> --- a/tools/libxc/xc_private.c
> +++ b/tools/libxc/xc_private.c
> @@ -628,6 +628,7 @@ int xc_copy_to_domain_page(xc_interface *xch,
>          return -1;
>      memcpy(vaddr, src_page, PAGE_SIZE);
>      munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>      return 0;
>  }
>  
> @@ -641,6 +642,7 @@ int xc_clear_domain_page(xc_interface *xch,
>          return -1;
>      memset(vaddr, 0, PAGE_SIZE);
>      munmap(vaddr, PAGE_SIZE);
> +    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
>      return 0;
>  }
>  
> diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
> index 92271c9..a610f0c 100644
> --- a/tools/libxc/xc_private.h
> +++ b/tools/libxc/xc_private.h
> @@ -304,6 +304,9 @@ void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
>  /* Optionally flush file to disk and discard page cache */
>  void discard_file_cache(xc_interface *xch, int fd, int flush);
>  
> +int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
> +			 xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
> +
>  #define MAX_MMU_UPDATES 1024
>  struct xc_mmu {
>      mmu_update_t updates[MAX_MMU_UPDATES];
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index 546e86b..ffb68da 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -17,6 +17,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
>  {
>      switch ( domctl->cmd )
>      {
> +    case XEN_DOMCTL_cacheflush:
> +    {
> +        unsigned long s = domctl->u.cacheflush.start_pfn;
> +        unsigned long e = s + domctl->u.cacheflush.nr_pfns;
> +
> +        if ( e < s )
> +            return -EINVAL;
> +
> +        if ( get_order_from_pages(domctl->u.cacheflush.nr_pfns) > MAX_ORDER )
> +            return -EINVAL;
> +
> +        return p2m_cache_flush(d, s, e);
> +    }
> +
>      default:
>          return subarch_do_domctl(domctl, d, u_domctl);
>      }
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 2f48347..d2cfe64 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -338,6 +338,18 @@ unsigned long domain_page_map_to_mfn(const void *va)
>  }
>  #endif
>  
> +void sync_page_to_ram(unsigned long mfn)
> +{
> +    void *p, *v = map_domain_page(mfn);
> +
> +    dsb();           /* So the CPU issues all writes to the range */
> +    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )

What about the last few bytes on a page?
Can we assume that PAGE_SIZE is a multiple of cacheline_bytes?


> +        asm volatile (__clean_and_invalidate_xen_dcache_one(0) : : "r" (p));
> +    dsb();           /* So we know the flushes happen before continuing */
> +
> +    unmap_domain_page(v);
> +}
> +
>  void __init arch_init_memory(void)
>  {
>      /*
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index a61edeb..86f13e9 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -8,6 +8,7 @@
>  #include <asm/gic.h>
>  #include <asm/event.h>
>  #include <asm/hardirq.h>
> +#include <asm/page.h>
>  
>  /* First level P2M is 2 consecutive pages */
>  #define P2M_FIRST_ORDER 1
> @@ -228,6 +229,7 @@ enum p2m_operation {
>      ALLOCATE,
>      REMOVE,
>      RELINQUISH,
> +    CACHEFLUSH,
>  };
>  
>  static int apply_p2m_changes(struct domain *d,
> @@ -381,6 +383,15 @@ static int apply_p2m_changes(struct domain *d,
>                      count++;
>                  }
>                  break;
> +
> +            case CACHEFLUSH:
> +                {
> +                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
> +                        break;
> +
> +                    sync_page_to_ram(pte.p2m.base);
> +                }
> +                break;
>          }
>  
>          /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
> @@ -624,6 +635,20 @@ int relinquish_p2m_mapping(struct domain *d)
>                                MATTR_MEM, p2m_invalid);
>  }
>  
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn)
> +{
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +
> +    start_mfn = MAX(start_mfn, p2m->lowest_mapped_gfn);
> +    end_mfn = MIN(end_mfn, p2m->max_mapped_gfn);
> +
> +    return apply_p2m_changes(d, CACHEFLUSH,
> +                             pfn_to_paddr(start_mfn),
> +                             pfn_to_paddr(end_mfn),
> +                             pfn_to_paddr(INVALID_MFN),
> +                             MATTR_MEM, p2m_invalid);
> +}
> +
>  unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>  {
>      paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 5f484a2..c73c717 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -710,6 +710,11 @@ static struct page_info *alloc_heap_pages(
>          /* Initialise fields which have other uses for free pages. */
>          pg[i].u.inuse.type_info = 0;
>          page_set_owner(&pg[i], NULL);
> +
> +        /* Ensure cache and RAM are consistent for platforms where the
> +         * guest can control its own visibility of/through the cache.
> +         */
> +        sync_page_to_ram(page_to_mfn(&pg[i]));
>      }
>  
>      spin_unlock(&heap_lock);
> diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
> index cf12a89..cb6add4 100644
> --- a/xen/include/asm-arm/arm32/page.h
> +++ b/xen/include/asm-arm/arm32/page.h
> @@ -22,6 +22,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>  /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>  #define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC)
>  
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) STORE_CP32(R, DCCIMVAC)
> +
>  /*
>   * Flush all hypervisor mappings from the TLB and branch predictor.
>   * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
> index 9551f90..baf8903 100644
> --- a/xen/include/asm-arm/arm64/page.h
> +++ b/xen/include/asm-arm/arm64/page.h
> @@ -17,6 +17,10 @@ static inline void write_pte(lpae_t *p, lpae_t pte)
>  /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
>  #define __flush_xen_dcache_one(R) "dc cvac, %" #R ";"
>  
> +/* Inline ASM to clean and invalidate dcache on register R (may be an
> + * inline asm operand) */
> +#define __clean_and_invalidate_xen_dcache_one(R) "dc  civac, %" #R ";"
> +
>  /*
>   * Flush all hypervisor mappings from the TLB
>   * This is needed after changing Xen code mappings.
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index e9c884a..3b39c45 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -78,6 +78,9 @@ void p2m_load_VTTBR(struct domain *d);
>  /* Look up the MFN corresponding to a domain's PFN. */
>  paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
>  
> +/* Clean & invalidate caches corresponding to a region of guest address space */
> +int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn);
> +
>  /* Setup p2m RAM mapping for domain d from start-end. */
>  int p2m_populate_ram(struct domain *d, paddr_t start, paddr_t end);
>  /* Map MMIO regions in the p2m: start_gaddr and end_gaddr is the range
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 670d4e7..67d64c9 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -253,6 +253,9 @@ static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
>              : : "r" (_p), "m" (*_p));                                   \
>  } while (0)
>  
> +/* Flush the dcache for an entire page. */
> +void sync_page_to_ram(unsigned long mfn);
> +
>  /* Print a walk of an arbitrary page table */
>  void dump_pt_walk(lpae_t *table, paddr_t addr);
>  
> diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
> index 7a46af5..abe35fb 100644
> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -346,6 +346,9 @@ static inline uint32_t cacheattr_to_pte_flags(uint32_t cacheattr)
>      return ((cacheattr & 4) << 5) | ((cacheattr & 3) << 3);
>  }
>  
> +/* No cache maintenance required on x86 architecture. */
> +static inline void sync_page_to_ram(unsigned long mfn) {}
> +
>  /* return true if permission increased */
>  static inline bool_t
>  perms_strictly_increased(uint32_t old_flags, uint32_t new_flags)
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 91f01fa..f22fe2e 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
>  typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
>  
> +/*
> + * ARM: Clean and invalidate caches associated with given region of
> + * guest memory.
> + */
> +struct xen_domctl_cacheflush {
> +    /* IN: page range to flush. */
> +    xen_pfn_t start_pfn, nr_pfns;
> +};
> +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -954,6 +965,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_setnodeaffinity               68
>  #define XEN_DOMCTL_getnodeaffinity               69
>  #define XEN_DOMCTL_set_max_evtchn                70
> +#define XEN_DOMCTL_cacheflush                    71
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1012,6 +1024,7 @@ struct xen_domctl {
>          struct xen_domctl_set_max_evtchn    set_max_evtchn;
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
>          struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
> +        struct xen_domctl_cacheflush        cacheflush;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          uint8_t                             pad[128];
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index 50a35fc..1345d7e 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -737,6 +737,9 @@ static int flask_domctl(struct domain *d, int cmd)
>      case XEN_DOMCTL_set_max_evtchn:
>          return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__SET_MAX_EVTCHN);
>  
> +    case XEN_DOMCTL_cacheflush:
> +	    return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);
> +
>      default:
>          printk("flask_domctl: Unknown op %d\n", cmd);
>          return -EPERM;
> diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
> index 1fbe241..a0ed13d 100644
> --- a/xen/xsm/flask/policy/access_vectors
> +++ b/xen/xsm/flask/policy/access_vectors
> @@ -196,6 +196,8 @@ class domain2
>      setclaim
>  # XEN_DOMCTL_set_max_evtchn
>      set_max_evtchn
> +# XEN_DOMCTL_cacheflush
> +    cacheflush
>  }
>  
>  # Similar to class domain, but primarily contains domctls related to HVM domains
> -- 
> 1.7.10.4
>
Ian Campbell Feb. 7, 2014, 3:01 p.m. | #4
On Fri, 2014-02-07 at 14:49 +0000, Stefano Stabellini wrote:
> > diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
> > index 77a4e64..b9d1015 100644
> > --- a/tools/libxc/xc_dom_core.c
> > +++ b/tools/libxc/xc_dom_core.c
> > @@ -603,6 +603,8 @@ void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
> >          prev->next = phys->next;
> >      else
> >          dom->phys_pages = phys->next;
> > +
> > +    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);
> 
> I am not sure I understand the semantic of xc_dom_unmap_one: from the
> name of the function and the parameters I would think that it is
> supposed to unmap just one page. In that case we should flush just one
> page. However the implementation calls
> 
> munmap(phys->ptr, phys->count << page_shift)
> 
> so it actually can unmap more than a single page, so I think that you
> did the right thing by calling xc_domain_cacheflush with phys->first and
> phys->count.

"one" in the name means "one struct xc_dom_phys", which is a range
starting at the given pfn, which is why using phys->{first,count} is
correct.

> > +void sync_page_to_ram(unsigned long mfn)
> > +{
> > +    void *p, *v = map_domain_page(mfn);
> > +
> > +    dsb();           /* So the CPU issues all writes to the range */
> > +    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )
> 
> What about the last few bytes on a page?
> Can we assume that PAGE_SIZE is a multiple of cacheline_bytes?

I sure hope so! A non power of two cache line size would be pretty
crazy!

The cacheline is always a 2^N (the register contains log2(cacheline
size)) and so is PAGE_SIZE.

Ian.
Stefano Stabellini Feb. 7, 2014, 3:09 p.m. | #5
On Fri, 7 Feb 2014, Ian Campbell wrote:
> > > +void sync_page_to_ram(unsigned long mfn)
> > > +{
> > > +    void *p, *v = map_domain_page(mfn);
> > > +
> > > +    dsb();           /* So the CPU issues all writes to the range */
> > > +    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )
> > 
> > What about the last few bytes on a page?
> > Can we assume that PAGE_SIZE is a multiple of cacheline_bytes?
> 
> I sure hope so! A non power of two cache line size would be pretty
> crazy!
> 
> The cacheline is always a 2^N (the register contains log2(cacheline
> size)) and so is PAGE_SIZE.

Thought so.

For the Xen ARM side:

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Ian Campbell Feb. 10, 2014, 2:02 p.m. | #6
On Mon, 2014-02-10 at 13:49 +0000, Jan Beulich wrote:
> >>> On 07.02.14 at 13:57, "Jan Beulich" <JBeulich@suse.com> wrote:
> >>>> On 07.02.14 at 13:12, Ian Campbell <ian.campbell@citrix.com> wrote:
> >> --- a/xen/include/public/domctl.h
> >> +++ b/xen/include/public/domctl.h
> >> @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
> >>  typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
> >>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
> >>  
> >> +/*
> >> + * ARM: Clean and invalidate caches associated with given region of
> >> + * guest memory.
> >> + */
> >> +struct xen_domctl_cacheflush {
> >> +    /* IN: page range to flush. */
> >> +    xen_pfn_t start_pfn, nr_pfns;
> >> +};
> > 
> > The name here (and of the libxc interface) is now certainly
> > counterintuitive. But it's a domctl (and an internal interface),
> > which we can change post-4.4 (I'd envision it to actually take
> > a flags parameter indicating the kind of flush that's wanted).
> 
> Actually the naming of things in the hypervisor part of the patch
> is now bogus too - sysc_page_to_ram(), for example, does in no
> way imply that the cache needs not just flushing, but also
> invalidating.

sync_and_clean_page ? Not quite right I think.

>  The need for which, btw, I continue to not
> understand: Are there ways in ARM for one guest to observe
> cache contents put in place by another guest (incl Dom0)?

The data cache is physically tagged on armv7+ (which is all we support),
so if I understand your question correctly the answer is: a guest cannot
see cache contents placed by another guest *unless* that page is freed
back to the hypervisor and recycled as a new page for that guest, which
is why we want to clean+invalidate in the allocation path, after the
page has been scrubbed if required.

If you don't invalidate the cache then a guest which starts without
caches enabled and writes to RAM may be surprised when it enables the
cache and finds the data it wrote is now shadowed by some stale cache
entries.

Ian.
Ian Campbell Feb. 11, 2014, 1:29 p.m. | #7
On Mon, 2014-02-10 at 14:02 +0000, Ian Campbell wrote:
> On Mon, 2014-02-10 at 13:49 +0000, Jan Beulich wrote:
> > >>> On 07.02.14 at 13:57, "Jan Beulich" <JBeulich@suse.com> wrote:
> > >>>> On 07.02.14 at 13:12, Ian Campbell <ian.campbell@citrix.com> wrote:
> > >> --- a/xen/include/public/domctl.h
> > >> +++ b/xen/include/public/domctl.h
> > >> @@ -885,6 +885,17 @@ struct xen_domctl_set_max_evtchn {
> > >>  typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
> > >>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
> > >>  
> > >> +/*
> > >> + * ARM: Clean and invalidate caches associated with given region of
> > >> + * guest memory.
> > >> + */
> > >> +struct xen_domctl_cacheflush {
> > >> +    /* IN: page range to flush. */
> > >> +    xen_pfn_t start_pfn, nr_pfns;
> > >> +};
> > > 
> > > The name here (and of the libxc interface) is now certainly
> > > counterintuitive. But it's a domctl (and an internal interface),
> > > which we can change post-4.4 (I'd envision it to actually take
> > > a flags parameter indicating the kind of flush that's wanted).
> > 
> > Actually the naming of things in the hypervisor part of the patch
> > is now bogus too - sysc_page_to_ram(), for example, does in no
> > way imply that the cache needs not just flushing, but also
> > invalidating.
> 
> sync_and_clean_page ? Not quite right I think.

Unless there are objections my next posting will use
"flush_page_to_ram"..

Patch

diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
index 5a9cfc6..3d4d107 100644
--- a/tools/libxc/xc_dom_boot.c
+++ b/tools/libxc/xc_dom_boot.c
@@ -351,6 +351,10 @@  int xc_dom_gnttab_seed(xc_interface *xch, domid_t domid,
         return -1;
     }
 
+    /* Guest shouldn't really touch its grant table until it has
+     * enabled its caches. But lets be nice. */
+    xc_domain_cacheflush(xch, domid, gnttab_gmfn, 1);
+
     return 0;
 }
 
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index 77a4e64..b9d1015 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -603,6 +603,8 @@  void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
         prev->next = phys->next;
     else
         dom->phys_pages = phys->next;
+
+    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);
 }
 
 void xc_dom_unmap_all(struct xc_dom_image *dom)
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index c2fdd74..f10ec01 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -48,6 +48,16 @@  int xc_domain_create(xc_interface *xch,
     return 0;
 }
 
+int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
+                         xen_pfn_t start_pfn, xen_pfn_t nr_pfns)
+{
+    DECLARE_DOMCTL;
+    domctl.cmd = XEN_DOMCTL_cacheflush;
+    domctl.domain = (domid_t)domid;
+    domctl.u.cacheflush.start_pfn = start_pfn;
+    domctl.u.cacheflush.nr_pfns = nr_pfns;
+    return do_domctl(xch, &domctl);
+}
 
 int xc_domain_pause(xc_interface *xch,
                     uint32_t domid)
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index 838fd21..33ed15b 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -628,6 +628,7 @@  int xc_copy_to_domain_page(xc_interface *xch,
         return -1;
     memcpy(vaddr, src_page, PAGE_SIZE);
     munmap(vaddr, PAGE_SIZE);
+    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
     return 0;
 }
 
@@ -641,6 +642,7 @@  int xc_clear_domain_page(xc_interface *xch,
         return -1;
     memset(vaddr, 0, PAGE_SIZE);
     munmap(vaddr, PAGE_SIZE);
+    xc_domain_cacheflush(xch, domid, dst_pfn, 1);
     return 0;
 }
 
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 92271c9..a610f0c 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -304,6 +304,9 @@  void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
 /* Optionally flush file to disk and discard page cache */
 void discard_file_cache(xc_interface *xch, int fd, int flush);
 
+int xc_domain_cacheflush(xc_interface *xch, uint32_t domid,
+			 xen_pfn_t start_pfn, xen_pfn_t nr_pfns);
+
 #define MAX_MMU_UPDATES 1024
 struct xc_mmu {
     mmu_update_t updates[MAX_MMU_UPDATES];
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 546e86b..ffb68da 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -17,6 +17,20 @@  long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
 {
     switch ( domctl->cmd )
     {
+    case XEN_DOMCTL_cacheflush:
+    {
+        unsigned long s = domctl->u.cacheflush.start_pfn;
+        unsigned long e = s + domctl->u.cacheflush.nr_pfns;
+
+        if ( e < s )
+            return -EINVAL;
+
+        if ( get_order_from_pages(domctl->u.cacheflush.nr_pfns) > MAX_ORDER )
+            return -EINVAL;
+
+        return p2m_cache_flush(d, s, e);
+    }
+
     default:
         return subarch_do_domctl(domctl, d, u_domctl);
     }
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 2f48347..d2cfe64 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -338,6 +338,18 @@  unsigned long domain_page_map_to_mfn(const void *va)
 }
 #endif
 
+void sync_page_to_ram(unsigned long mfn)
+{
+    void *p, *v = map_domain_page(mfn);
+
+    dsb();           /* So the CPU issues all writes to the range */
+    for ( p = v; p < v + PAGE_SIZE ; p += cacheline_bytes )
+        asm volatile (__clean_and_invalidate_xen_dcache_one(0) : : "r" (p));
+    dsb();           /* So we know the flushes happen before continuing */
+
+    unmap_domain_page(v);
+}
+
 void __init arch_init_memory(void)
 {
     /*
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index a61edeb..86f13e9 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -8,6 +8,7 @@ 
 #include <asm/gic.h>
 #include <asm/event.h>
 #include <asm/hardirq.h>
+#include <asm/page.h>
 
 /* First level P2M is 2 consecutive pages */
 #define P2M_FIRST_ORDER 1
@@ -228,6 +229,7 @@  enum p2m_operation {
     ALLOCATE,
     REMOVE,
     RELINQUISH,
+    CACHEFLUSH,
 };
 
 static int apply_p2m_changes(struct domain *d,
@@ -381,6 +383,15 @@  static int apply_p2m_changes(struct domain *d,
                     count++;
                 }
                 break;
+
+            case CACHEFLUSH:
+                {
+                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
+                        break;
+
+                    sync_page_to_ram(pte.p2m.base);
+                }
+                break;
         }
 
         /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
@@ -624,6 +635,20 @@  int relinquish_p2m_mapping(struct domain *d)
                               MATTR_MEM, p2m_invalid);
 }
 
+int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+
+    start_mfn = MAX(start_mfn, p2m->lowest_mapped_gfn);
+    end_mfn = MIN(end_mfn, p2m->max_mapped_gfn);
+
+    return apply_p2m_changes(d, CACHEFLUSH,
+                             pfn_to_paddr(start_mfn),
+                             pfn_to_paddr(end_mfn),
+                             pfn_to_paddr(INVALID_MFN),
+                             MATTR_MEM, p2m_invalid);
+}
+
 unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
 {
     paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 5f484a2..c73c717 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -710,6 +710,11 @@  static struct page_info *alloc_heap_pages(
         /* Initialise fields which have other uses for free pages. */
         pg[i].u.inuse.type_info = 0;
         page_set_owner(&pg[i], NULL);
+
+        /* Ensure cache and RAM are consistent for platforms where the
+         * guest can control its own visibility of/through the cache.
+         */
+        sync_page_to_ram(page_to_mfn(&pg[i]));
     }
 
     spin_unlock(&heap_lock);
diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
index cf12a89..cb6add4 100644
--- a/xen/include/asm-arm/arm32/page.h
+++ b/xen/include/asm-arm/arm32/page.h
@@ -22,6 +22,10 @@  static inline void write_pte(lpae_t *p, lpae_t pte)
 /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
 #define __flush_xen_dcache_one(R) STORE_CP32(R, DCCMVAC)
 
+/* Inline ASM to clean and invalidate dcache on register R (may be an
+ * inline asm operand) */
+#define __clean_and_invalidate_xen_dcache_one(R) STORE_CP32(R, DCCIMVAC)
+
 /*
  * Flush all hypervisor mappings from the TLB and branch predictor.
  * This is needed after changing Xen code mappings.
diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
index 9551f90..baf8903 100644
--- a/xen/include/asm-arm/arm64/page.h
+++ b/xen/include/asm-arm/arm64/page.h
@@ -17,6 +17,10 @@  static inline void write_pte(lpae_t *p, lpae_t pte)
 /* Inline ASM to flush dcache on register R (may be an inline asm operand) */
 #define __flush_xen_dcache_one(R) "dc cvac, %" #R ";"
 
+/* Inline ASM to clean and invalidate dcache on register R (may be an
+ * inline asm operand) */
+#define __clean_and_invalidate_xen_dcache_one(R) "dc  civac, %" #R ";"
+
 /*
  * Flush all hypervisor mappings from the TLB
  * This is needed after changing Xen code mappings.
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index e9c884a..3b39c45 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -78,6 +78,9 @@  void p2m_load_VTTBR(struct domain *d);
 /* Look up the MFN corresponding to a domain's PFN. */
 paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
 
+/* Clean & invalidate caches corresponding to a region of guest address space */
+int p2m_cache_flush(struct domain *d, xen_pfn_t start_mfn, xen_pfn_t end_mfn);
+
 /* Setup p2m RAM mapping for domain d from start-end. */
 int p2m_populate_ram(struct domain *d, paddr_t start, paddr_t end);
 /* Map MMIO regions in the p2m: start_gaddr and end_gaddr is the range
diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
index 670d4e7..67d64c9 100644
--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -253,6 +253,9 @@  static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
             : : "r" (_p), "m" (*_p));                                   \
 } while (0)
 
+/* Flush the dcache for an entire page. */
+void sync_page_to_ram(unsigned long mfn);
+
 /* Print a walk of an arbitrary page table */
 void dump_pt_walk(lpae_t *table, paddr_t addr);
 
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 7a46af5..abe35fb 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -346,6 +346,9 @@  static inline uint32_t cacheattr_to_pte_flags(uint32_t cacheattr)
     return ((cacheattr & 4) << 5) | ((cacheattr & 3) << 3);
 }
 
+/* No cache maintenance required on x86 architecture. */
+static inline void sync_page_to_ram(unsigned long mfn) {}
+
 /* return true if permission increased */
 static inline bool_t
 perms_strictly_increased(uint32_t old_flags, uint32_t new_flags)
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 91f01fa..f22fe2e 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -885,6 +885,17 @@  struct xen_domctl_set_max_evtchn {
 typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
 
+/*
+ * ARM: Clean and invalidate caches associated with given region of
+ * guest memory.
+ */
+struct xen_domctl_cacheflush {
+    /* IN: page range to flush. */
+    xen_pfn_t start_pfn, nr_pfns;
+};
+typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -954,6 +965,7 @@  struct xen_domctl {
 #define XEN_DOMCTL_setnodeaffinity               68
 #define XEN_DOMCTL_getnodeaffinity               69
 #define XEN_DOMCTL_set_max_evtchn                70
+#define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1012,6 +1024,7 @@  struct xen_domctl {
         struct xen_domctl_set_max_evtchn    set_max_evtchn;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
         struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
+        struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 50a35fc..1345d7e 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -737,6 +737,9 @@  static int flask_domctl(struct domain *d, int cmd)
     case XEN_DOMCTL_set_max_evtchn:
         return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__SET_MAX_EVTCHN);
 
+    case XEN_DOMCTL_cacheflush:
+	    return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH);
+
     default:
         printk("flask_domctl: Unknown op %d\n", cmd);
         return -EPERM;
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 1fbe241..a0ed13d 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -196,6 +196,8 @@  class domain2
     setclaim
 # XEN_DOMCTL_set_max_evtchn
     set_max_evtchn
+# XEN_DOMCTL_cacheflush
+    cacheflush
 }
 
 # Similar to class domain, but primarily contains domctls related to HVM domains