diff mbox series

[Xen-devel,for-4.12,v3,4/5] xen/arm: Implement Set/Way operations

Message ID 20181214115855.6713-5-julien.grall@arm.com
State Superseded
Headers show
Series xen/arm: Implement Set/Way operations | expand

Commit Message

Julien Grall Dec. 14, 2018, 11:58 a.m. UTC
Set/Way operations are used to perform maintenance on a given cache.
At the moment, Set/Way operations are not trapped and therefore a guest
OS will directly act on the local cache. However, a vCPU may migrate to
another pCPU in the middle of the processor. This will result to have
cache with stall data (Set/Way are not propagated) potentially causing
crash. This may be the cause of heisenbug noticed in Osstest [1].

Furthermore, Set/Way operations are not available on system cache. This
means that OS, such as Linux 32-bit, relying on those operations to
fully clean the cache before disabling MMU may break because data may
sits in system caches and not in RAM.

For more details about Set/Way, see the talk "The Art of Virtualizing
Cache Maintenance" given at Xen Summit 2018 [2].

In the context of Xen, we need to trap Set/Way operations and emulate
them. From the Arm Arm (B1.14.4 in DDI 046C.c), Set/Way operations are
difficult to virtualized. So we can assume that a guest OS using them will
suffer the consequence (i.e slowness) until developer removes all the usage
of Set/Way.

As the software is not allowed to infer the Set/Way to Physical Address
mapping, Xen will need to go through the guest P2M and clean &
invalidate all the entries mapped.

Because Set/Way happen in batch (a loop on all Set/Way of a cache), Xen
would need to go through the P2M for every instructions. This is quite
expensive and would severely impact the guest OS. The implementation is
re-using the KVM policy to limit the number of flush:
    - If we trap a Set/Way operations, we enable VM trapping (i.e
      HVC_EL2.TVM) to detect cache being turned on/off, and do a full
    clean.
    - We clean the caches when turning on and off
    - Once the caches are enabled, we stop trapping VM instructions

[1] https://lists.xenproject.org/archives/html/xen-devel/2017-09/msg03191.html
[2] https://fr.slideshare.net/xen_com_mgr/virtualizing-cache

Signed-off-by: Julien Grall <julien.grall@arm.com>

---
    Changes in v2:
        - Fix emulation for Set/Way cache flush arm64 sysreg
        - Add support for preemption
        - Check cache status on every VM traps in Arm64
        - Remove spurious change
---
 xen/arch/arm/arm64/vsysreg.c | 17 ++++++++
 xen/arch/arm/p2m.c           | 92 ++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/traps.c         | 25 +++++++++++-
 xen/arch/arm/vcpreg.c        | 22 +++++++++++
 xen/include/asm-arm/domain.h |  8 ++++
 xen/include/asm-arm/p2m.h    | 20 ++++++++++
 6 files changed, 183 insertions(+), 1 deletion(-)

Comments

Stefano Stabellini Dec. 14, 2018, 9:22 p.m. UTC | #1
On Fri, 14 Dec 2018, Julien Grall wrote:
> Set/Way operations are used to perform maintenance on a given cache.
> At the moment, Set/Way operations are not trapped and therefore a guest
> OS will directly act on the local cache. However, a vCPU may migrate to
> another pCPU in the middle of the processor. This will result to have
> cache with stall data (Set/Way are not propagated) potentially causing
> crash. This may be the cause of heisenbug noticed in Osstest [1].
> 
> Furthermore, Set/Way operations are not available on system cache. This
> means that OS, such as Linux 32-bit, relying on those operations to
> fully clean the cache before disabling MMU may break because data may
> sits in system caches and not in RAM.
> 
> For more details about Set/Way, see the talk "The Art of Virtualizing
> Cache Maintenance" given at Xen Summit 2018 [2].
> 
> In the context of Xen, we need to trap Set/Way operations and emulate
> them. From the Arm Arm (B1.14.4 in DDI 046C.c), Set/Way operations are
> difficult to virtualized. So we can assume that a guest OS using them will
> suffer the consequence (i.e slowness) until developer removes all the usage
> of Set/Way.
> 
> As the software is not allowed to infer the Set/Way to Physical Address
> mapping, Xen will need to go through the guest P2M and clean &
> invalidate all the entries mapped.
> 
> Because Set/Way happen in batch (a loop on all Set/Way of a cache), Xen
> would need to go through the P2M for every instructions. This is quite
> expensive and would severely impact the guest OS. The implementation is
> re-using the KVM policy to limit the number of flush:
>     - If we trap a Set/Way operations, we enable VM trapping (i.e
>       HVC_EL2.TVM) to detect cache being turned on/off, and do a full
>     clean.
>     - We clean the caches when turning on and off
>     - Once the caches are enabled, we stop trapping VM instructions
> 
> [1] https://lists.xenproject.org/archives/html/xen-devel/2017-09/msg03191.html
> [2] https://fr.slideshare.net/xen_com_mgr/virtualizing-cache
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> 
> ---
>     Changes in v2:
>         - Fix emulation for Set/Way cache flush arm64 sysreg
>         - Add support for preemption
>         - Check cache status on every VM traps in Arm64
>         - Remove spurious change
> ---
>  xen/arch/arm/arm64/vsysreg.c | 17 ++++++++
>  xen/arch/arm/p2m.c           | 92 ++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/traps.c         | 25 +++++++++++-
>  xen/arch/arm/vcpreg.c        | 22 +++++++++++
>  xen/include/asm-arm/domain.h |  8 ++++
>  xen/include/asm-arm/p2m.h    | 20 ++++++++++
>  6 files changed, 183 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
> index 16ac9c344a..8a85507d9d 100644
> --- a/xen/arch/arm/arm64/vsysreg.c
> +++ b/xen/arch/arm/arm64/vsysreg.c
> @@ -34,9 +34,14 @@
>  static bool vreg_emulate_##reg(struct cpu_user_regs *regs,          \
>                                 uint64_t *r, bool read)              \
>  {                                                                   \
> +    struct vcpu *v = current;                                       \
> +    bool cache_enabled = vcpu_has_cache_enabled(v);                 \
> +                                                                    \
>      GUEST_BUG_ON(read);                                             \
>      WRITE_SYSREG64(*r, reg);                                        \
>                                                                      \
> +    p2m_toggle_cache(v, cache_enabled);                             \
> +                                                                    \
>      return true;                                                    \
>  }
>  
> @@ -85,6 +90,18 @@ void do_sysreg(struct cpu_user_regs *regs,
>          break;
>  
>      /*
> +     * HCR_EL2.TSW
> +     *
> +     * ARMv8 (DDI 0487B.b): Table D1-42
> +     */
> +    case HSR_SYSREG_DCISW:
> +    case HSR_SYSREG_DCCSW:
> +    case HSR_SYSREG_DCCISW:
> +        if ( !hsr.sysreg.read )
> +            p2m_set_way_flush(current);
> +        break;
> +
> +    /*
>       * HCR_EL2.TVM
>       *
>       * ARMv8 (DDI 0487D.a): Table D1-38
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index 5639e4b64c..125d858d02 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -3,6 +3,7 @@
>  #include <xen/iocap.h>
>  #include <xen/lib.h>
>  #include <xen/sched.h>
> +#include <xen/softirq.h>
>  
>  #include <asm/event.h>
>  #include <asm/flushtlb.h>
> @@ -1615,6 +1616,97 @@ int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end)
>      return rc;
>  }
>  
> +/*
> + * Clean & invalidate RAM associated to the guest vCPU.
> + *
> + * The function can only work with the current vCPU and should be called
> + * with IRQ enabled as the vCPU could get preempted.
> + */
> +void p2m_flush_vm(struct vcpu *v)
> +{
> +    int rc;
> +    gfn_t start = _gfn(0);
> +
> +    ASSERT(v == current);
> +    ASSERT(local_irq_is_enabled());
> +    ASSERT(v->arch.need_flush_to_ram);
> +
> +    do
> +    {
> +        rc = p2m_cache_flush_range(v->domain, &start, _gfn(ULONG_MAX));
> +        if ( rc == -ERESTART )
> +            do_softirq();
> +    } while ( rc == -ERESTART );
> +
> +    if ( rc != 0 )
> +        gprintk(XENLOG_WARNING,
> +                "P2M has not been correctly cleaned (rc = %d)\n",
> +                rc);
> +
> +    v->arch.need_flush_to_ram = false;
> +}
> +
> +/*
> + * See note at ARMv7 ARM B1.14.4 (DDI 0406C.c) (TL;DR: S/W ops are not
> + * easily virtualized).
> + *
> + * Main problems:
> + *  - S/W ops are local to a CPU (not broadcast)
> + *  - We have line migration behind our back (speculation)
> + *  - System caches don't support S/W at all (damn!)
> + *
> + * In the face of the above, the best we can do is to try and convert
> + * S/W ops to VA ops. Because the guest is not allowed to infer the S/W
> + * to PA mapping, it can only use S/W to nuke the whole cache, which is
> + * rather a good thing for us.
> + *
> + * Also, it is only used when turning caches on/off ("The expected
> + * usage of the cache maintenance instructions that operate by set/way
> + * is associated with the powerdown and powerup of caches, if this is
> + * required by the implementation.").
> + *
> + * We use the following policy:
> + *  - If we trap a S/W operation, we enabled VM trapping to detect
> + *  caches being turned on/off, and do a full clean.
> + *
> + *  - We flush the caches on both caches being turned on and off.
> + *
> + *  - Once the caches are enabled, we stop trapping VM ops.
> + */
> +void p2m_set_way_flush(struct vcpu *v)
> +{
> +    /* This function can only work with the current vCPU. */
> +    ASSERT(v == current);
> +
> +    if ( !(v->arch.hcr_el2 & HCR_TVM) )
> +    {
> +        v->arch.need_flush_to_ram = true;
> +        vcpu_hcr_set_flags(v, HCR_TVM);
> +    }
> +}
> +
> +void p2m_toggle_cache(struct vcpu *v, bool was_enabled)
> +{
> +    bool now_enabled = vcpu_has_cache_enabled(v);
> +
> +    /* This function can only work with the current vCPU. */
> +    ASSERT(v == current);
> +
> +    /*
> +     * If switching the MMU+caches on, need to invalidate the caches.
> +     * If switching it off, need to clean the caches.
> +     * Clean + invalidate does the trick always.
> +     */
> +    if ( was_enabled != now_enabled )
> +    {
> +        v->arch.need_flush_to_ram = true;
> +    }

NIT: no need for brakets


> +    /* Caches are now on, stop trapping VM ops (until a S/W op) */
> +    if ( now_enabled )
> +        vcpu_hcr_clear_flags(v, HCR_TVM);
> +}
> +
>  mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
>  {
>      return p2m_lookup(d, gfn, NULL);
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index 02665cc7b4..221c762ada 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -97,7 +97,7 @@ register_t get_default_hcr_flags(void)
>  {
>      return  (HCR_PTW|HCR_BSU_INNER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM|
>               (vwfi != NATIVE ? (HCR_TWI|HCR_TWE) : 0) |
> -             HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB);
> +             HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB|HCR_TSW);
>  }
>  
>  static enum {
> @@ -2258,10 +2258,33 @@ static void check_for_pcpu_work(void)
>      }
>  }
>  
> +/*
> + * Process pending work for the vCPU. Any call should be fast or
> + * implement preemption.
> + */
> +static void check_for_vcpu_work(void)
> +{
> +    struct vcpu *v = current;
> +
> +    if ( likely(!v->arch.need_flush_to_ram) )
> +        return;
> +
> +    /*
> +     * Give a chance for the pCPU to process work before handling the vCPU
> +     * pending work.
> +     */
> +    check_for_pcpu_work();
> +
> +    local_irq_enable();
> +    p2m_flush_vm(v);
> +    local_irq_disable();
> +}
> +
>  void leave_hypervisor_tail(void)
>  {
>      local_irq_disable();
>  
> +    check_for_vcpu_work();
>      check_for_pcpu_work();
>  
>      vgic_sync_to_lrs();
> diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
> index 550c25ec3f..cdc91cdf5b 100644
> --- a/xen/arch/arm/vcpreg.c
> +++ b/xen/arch/arm/vcpreg.c
> @@ -51,9 +51,14 @@
>  #define TVM_REG(sz, func, reg...)                                           \
>  static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
>  {                                                                           \
> +    struct vcpu *v = current;                                               \
> +    bool cache_enabled = vcpu_has_cache_enabled(v);                         \
> +                                                                            \
>      GUEST_BUG_ON(read);                                                     \
>      WRITE_SYSREG##sz(*r, reg);                                              \
>                                                                              \
> +    p2m_toggle_cache(v, cache_enabled);                                     \
> +                                                                            \
>      return true;                                                            \
>  }
>  
> @@ -71,6 +76,8 @@ static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
>  static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
>                                  bool read, bool hi)                         \
>  {                                                                           \
> +    struct vcpu *v = current;                                               \
> +    bool cache_enabled = vcpu_has_cache_enabled(v);                         \
>      register_t reg = READ_SYSREG(xreg);                                     \
>                                                                              \
>      GUEST_BUG_ON(read);                                                     \
> @@ -86,6 +93,8 @@ static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
>      }                                                                       \
>      WRITE_SYSREG(reg, xreg);                                                \
>                                                                              \
> +    p2m_toggle_cache(v, cache_enabled);                                     \
> +                                                                            \
>      return true;                                                            \
>  }                                                                           \
>                                                                              \
> @@ -186,6 +195,19 @@ void do_cp15_32(struct cpu_user_regs *regs, const union hsr hsr)
>          break;
>  
>      /*
> +     * HCR_EL2.TSW
> +     *
> +     * ARMv7 (DDI 0406C.b): B1.14.6
> +     * ARMv8 (DDI 0487B.b): Table D1-42
> +     */
> +    case HSR_CPREG32(DCISW):
> +    case HSR_CPREG32(DCCSW):
> +    case HSR_CPREG32(DCCISW):
> +        if ( !cp32.read )
> +            p2m_set_way_flush(current);
> +        break;
> +
> +    /*
>       * HCR_EL2.TVM
>       *
>       * ARMv8 (DDI 0487D.a): Table D1-38
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 175de44927..f16b973e0d 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -202,6 +202,14 @@ struct arch_vcpu
>      struct vtimer phys_timer;
>      struct vtimer virt_timer;
>      bool   vtimer_initialized;
> +
> +    /*
> +     * The full P2M may require some cleaning (e.g when emulation
> +     * set/way). As the action can take a long time, it requires
> +     * preemption. So this is deferred until we return to the guest.

Please replace the last sentence of this comment with:

"It is deferred until we return to guest, where we can more easily check
for softirqs and preempt the vcpu safely."


> +     */
> +    bool need_flush_to_ram;
> +
>  }  __cacheline_aligned;
>  
>  void vcpu_show_execution_state(struct vcpu *);
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index a633e27cc9..79abcb5a63 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -6,6 +6,8 @@
>  #include <xen/rwlock.h>
>  #include <xen/mem_access.h>
>  
> +#include <asm/current.h>
> +
>  #define paddr_bits PADDR_BITS
>  
>  /* Holds the bit size of IPAs in p2m tables.  */
> @@ -237,6 +239,12 @@ bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn);
>   */
>  int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end);
>  
> +void p2m_set_way_flush(struct vcpu *v);
> +
> +void p2m_toggle_cache(struct vcpu *v, bool was_enabled);
> +
> +void p2m_flush_vm(struct vcpu *v);
> +
>  /*
>   * Map a region in the guest p2m with a specific p2m type.
>   * The memory attributes will be derived from the p2m type.
> @@ -364,6 +372,18 @@ static inline int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
>      return -EOPNOTSUPP;
>  }
>  
> +/*
> + * A vCPU has cache enabled only when the MMU is enabled and data cache
> + * is enabled.
> + */
> +static inline bool vcpu_has_cache_enabled(struct vcpu *v)
> +{
> +    /* Only works with the current vCPU */
> +    ASSERT(current == v);
> +
> +    return (READ_SYSREG32(SCTLR_EL1) & (SCTLR_C|SCTLR_M)) == (SCTLR_C|SCTLR_M);

Line > 80


> +}
> +
>  #endif /* _XEN_P2M_H */
>  
>  /*
> -- 
> 2.11.0
>
Andrew Cooper Dec. 14, 2018, 9:31 p.m. UTC | #2
On 14/12/2018 03:58, Julien Grall wrote:
> Set/Way operations are used to perform maintenance on a given cache.
> At the moment, Set/Way operations are not trapped and therefore a guest
> OS will directly act on the local cache. However, a vCPU may migrate to
> another pCPU in the middle of the processor. This will result to have
> cache with stall data (Set/Way are not propagated) potentially causing

s/stall/stale/ ?

~Andrew
Julien Grall Dec. 17, 2018, 10:17 a.m. UTC | #3
Hi,

On 14/12/2018 21:22, Stefano Stabellini wrote:
> On Fri, 14 Dec 2018, Julien Grall wrote:
>> +
>> +    /*
>> +     * The full P2M may require some cleaning (e.g when emulation
>> +     * set/way). As the action can take a long time, it requires
>> +     * preemption. So this is deferred until we return to the guest.
> 
> Please replace the last sentence of this comment with:
> 
> "It is deferred until we return to guest, where we can more easily check
> for softirqs and preempt the vcpu safely."

Ok.

> 
>> +     */
>> +    bool need_flush_to_ram;
>> +
>>   }  __cacheline_aligned;
>>   
>>   void vcpu_show_execution_state(struct vcpu *);
>> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
>> index a633e27cc9..79abcb5a63 100644
>> --- a/xen/include/asm-arm/p2m.h
>> +++ b/xen/include/asm-arm/p2m.h
>> @@ -6,6 +6,8 @@
>>   #include <xen/rwlock.h>
>>   #include <xen/mem_access.h>
>>   
>> +#include <asm/current.h>
>> +
>>   #define paddr_bits PADDR_BITS
>>   
>>   /* Holds the bit size of IPAs in p2m tables.  */
>> @@ -237,6 +239,12 @@ bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn);
>>    */
>>   int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end);
>>   
>> +void p2m_set_way_flush(struct vcpu *v);
>> +
>> +void p2m_toggle_cache(struct vcpu *v, bool was_enabled);
>> +
>> +void p2m_flush_vm(struct vcpu *v);
>> +
>>   /*
>>    * Map a region in the guest p2m with a specific p2m type.
>>    * The memory attributes will be derived from the p2m type.
>> @@ -364,6 +372,18 @@ static inline int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
>>       return -EOPNOTSUPP;
>>   }
>>   
>> +/*
>> + * A vCPU has cache enabled only when the MMU is enabled and data cache
>> + * is enabled.
>> + */
>> +static inline bool vcpu_has_cache_enabled(struct vcpu *v)
>> +{
>> +    /* Only works with the current vCPU */
>> +    ASSERT(current == v);
>> +
>> +    return (READ_SYSREG32(SCTLR_EL1) & (SCTLR_C|SCTLR_M)) == (SCTLR_C|SCTLR_M);
> 
> Line > 80

No, it is 79 characters (not counting \n). Why do you think it is more than 80 
characters?

Cheers,
Stefano Stabellini Dec. 17, 2018, 5:31 p.m. UTC | #4
On Mon, 17 Dec 2018, Julien Grall wrote:
> Hi,
> 
> On 14/12/2018 21:22, Stefano Stabellini wrote:
> > On Fri, 14 Dec 2018, Julien Grall wrote:
> > > +
> > > +    /*
> > > +     * The full P2M may require some cleaning (e.g when emulation
> > > +     * set/way). As the action can take a long time, it requires
> > > +     * preemption. So this is deferred until we return to the guest.
> > 
> > Please replace the last sentence of this comment with:
> > 
> > "It is deferred until we return to guest, where we can more easily check
> > for softirqs and preempt the vcpu safely."
> 
> Ok.
> 
> > 
> > > +     */
> > > +    bool need_flush_to_ram;
> > > +
> > >   }  __cacheline_aligned;
> > >     void vcpu_show_execution_state(struct vcpu *);
> > > diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> > > index a633e27cc9..79abcb5a63 100644
> > > --- a/xen/include/asm-arm/p2m.h
> > > +++ b/xen/include/asm-arm/p2m.h
> > > @@ -6,6 +6,8 @@
> > >   #include <xen/rwlock.h>
> > >   #include <xen/mem_access.h>
> > >   +#include <asm/current.h>
> > > +
> > >   #define paddr_bits PADDR_BITS
> > >     /* Holds the bit size of IPAs in p2m tables.  */
> > > @@ -237,6 +239,12 @@ bool p2m_resolve_translation_fault(struct domain *d,
> > > gfn_t gfn);
> > >    */
> > >   int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end);
> > >   +void p2m_set_way_flush(struct vcpu *v);
> > > +
> > > +void p2m_toggle_cache(struct vcpu *v, bool was_enabled);
> > > +
> > > +void p2m_flush_vm(struct vcpu *v);
> > > +
> > >   /*
> > >    * Map a region in the guest p2m with a specific p2m type.
> > >    * The memory attributes will be derived from the p2m type.
> > > @@ -364,6 +372,18 @@ static inline int set_foreign_p2m_entry(struct domain
> > > *d, unsigned long gfn,
> > >       return -EOPNOTSUPP;
> > >   }
> > >   +/*
> > > + * A vCPU has cache enabled only when the MMU is enabled and data cache
> > > + * is enabled.
> > > + */
> > > +static inline bool vcpu_has_cache_enabled(struct vcpu *v)
> > > +{
> > > +    /* Only works with the current vCPU */
> > > +    ASSERT(current == v);
> > > +
> > > +    return (READ_SYSREG32(SCTLR_EL1) & (SCTLR_C|SCTLR_M)) ==
> > > (SCTLR_C|SCTLR_M);
> > 
> > Line > 80
> 
> No, it is 79 characters (not counting \n). Why do you think it is more than 80
> characters?

Weird. I must have miscounted in my reply email, where '>', '+' and tabs
increase the line count.
Julien Grall Dec. 18, 2018, 5:58 p.m. UTC | #5
Hi Andrew,

On 12/14/18 9:31 PM, Andrew Cooper wrote:
> On 14/12/2018 03:58, Julien Grall wrote:
>> Set/Way operations are used to perform maintenance on a given cache.
>> At the moment, Set/Way operations are not trapped and therefore a guest
>> OS will directly act on the local cache. However, a vCPU may migrate to
>> another pCPU in the middle of the processor. This will result to have
>> cache with stall data (Set/Way are not propagated) potentially causing
> 
> s/stall/stale/ ?

Yes. I tend to confuse the both a lot.

Cheers,

> 
> ~Andrew
>
diff mbox series

Patch

diff --git a/xen/arch/arm/arm64/vsysreg.c b/xen/arch/arm/arm64/vsysreg.c
index 16ac9c344a..8a85507d9d 100644
--- a/xen/arch/arm/arm64/vsysreg.c
+++ b/xen/arch/arm/arm64/vsysreg.c
@@ -34,9 +34,14 @@ 
 static bool vreg_emulate_##reg(struct cpu_user_regs *regs,          \
                                uint64_t *r, bool read)              \
 {                                                                   \
+    struct vcpu *v = current;                                       \
+    bool cache_enabled = vcpu_has_cache_enabled(v);                 \
+                                                                    \
     GUEST_BUG_ON(read);                                             \
     WRITE_SYSREG64(*r, reg);                                        \
                                                                     \
+    p2m_toggle_cache(v, cache_enabled);                             \
+                                                                    \
     return true;                                                    \
 }
 
@@ -85,6 +90,18 @@  void do_sysreg(struct cpu_user_regs *regs,
         break;
 
     /*
+     * HCR_EL2.TSW
+     *
+     * ARMv8 (DDI 0487B.b): Table D1-42
+     */
+    case HSR_SYSREG_DCISW:
+    case HSR_SYSREG_DCCSW:
+    case HSR_SYSREG_DCCISW:
+        if ( !hsr.sysreg.read )
+            p2m_set_way_flush(current);
+        break;
+
+    /*
      * HCR_EL2.TVM
      *
      * ARMv8 (DDI 0487D.a): Table D1-38
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 5639e4b64c..125d858d02 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -3,6 +3,7 @@ 
 #include <xen/iocap.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
+#include <xen/softirq.h>
 
 #include <asm/event.h>
 #include <asm/flushtlb.h>
@@ -1615,6 +1616,97 @@  int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end)
     return rc;
 }
 
+/*
+ * Clean & invalidate RAM associated to the guest vCPU.
+ *
+ * The function can only work with the current vCPU and should be called
+ * with IRQ enabled as the vCPU could get preempted.
+ */
+void p2m_flush_vm(struct vcpu *v)
+{
+    int rc;
+    gfn_t start = _gfn(0);
+
+    ASSERT(v == current);
+    ASSERT(local_irq_is_enabled());
+    ASSERT(v->arch.need_flush_to_ram);
+
+    do
+    {
+        rc = p2m_cache_flush_range(v->domain, &start, _gfn(ULONG_MAX));
+        if ( rc == -ERESTART )
+            do_softirq();
+    } while ( rc == -ERESTART );
+
+    if ( rc != 0 )
+        gprintk(XENLOG_WARNING,
+                "P2M has not been correctly cleaned (rc = %d)\n",
+                rc);
+
+    v->arch.need_flush_to_ram = false;
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (DDI 0406C.c) (TL;DR: S/W ops are not
+ * easily virtualized).
+ *
+ * Main problems:
+ *  - S/W ops are local to a CPU (not broadcast)
+ *  - We have line migration behind our back (speculation)
+ *  - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the S/W
+ * to PA mapping, it can only use S/W to nuke the whole cache, which is
+ * rather a good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the powerdown and powerup of caches, if this is
+ * required by the implementation.").
+ *
+ * We use the following policy:
+ *  - If we trap a S/W operation, we enabled VM trapping to detect
+ *  caches being turned on/off, and do a full clean.
+ *
+ *  - We flush the caches on both caches being turned on and off.
+ *
+ *  - Once the caches are enabled, we stop trapping VM ops.
+ */
+void p2m_set_way_flush(struct vcpu *v)
+{
+    /* This function can only work with the current vCPU. */
+    ASSERT(v == current);
+
+    if ( !(v->arch.hcr_el2 & HCR_TVM) )
+    {
+        v->arch.need_flush_to_ram = true;
+        vcpu_hcr_set_flags(v, HCR_TVM);
+    }
+}
+
+void p2m_toggle_cache(struct vcpu *v, bool was_enabled)
+{
+    bool now_enabled = vcpu_has_cache_enabled(v);
+
+    /* This function can only work with the current vCPU. */
+    ASSERT(v == current);
+
+    /*
+     * If switching the MMU+caches on, need to invalidate the caches.
+     * If switching it off, need to clean the caches.
+     * Clean + invalidate does the trick always.
+     */
+    if ( was_enabled != now_enabled )
+    {
+        v->arch.need_flush_to_ram = true;
+    }
+
+    /* Caches are now on, stop trapping VM ops (until a S/W op) */
+    if ( now_enabled )
+        vcpu_hcr_clear_flags(v, HCR_TVM);
+}
+
 mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
 {
     return p2m_lookup(d, gfn, NULL);
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 02665cc7b4..221c762ada 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -97,7 +97,7 @@  register_t get_default_hcr_flags(void)
 {
     return  (HCR_PTW|HCR_BSU_INNER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM|
              (vwfi != NATIVE ? (HCR_TWI|HCR_TWE) : 0) |
-             HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB);
+             HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP|HCR_FB|HCR_TSW);
 }
 
 static enum {
@@ -2258,10 +2258,33 @@  static void check_for_pcpu_work(void)
     }
 }
 
+/*
+ * Process pending work for the vCPU. Any call should be fast or
+ * implement preemption.
+ */
+static void check_for_vcpu_work(void)
+{
+    struct vcpu *v = current;
+
+    if ( likely(!v->arch.need_flush_to_ram) )
+        return;
+
+    /*
+     * Give a chance for the pCPU to process work before handling the vCPU
+     * pending work.
+     */
+    check_for_pcpu_work();
+
+    local_irq_enable();
+    p2m_flush_vm(v);
+    local_irq_disable();
+}
+
 void leave_hypervisor_tail(void)
 {
     local_irq_disable();
 
+    check_for_vcpu_work();
     check_for_pcpu_work();
 
     vgic_sync_to_lrs();
diff --git a/xen/arch/arm/vcpreg.c b/xen/arch/arm/vcpreg.c
index 550c25ec3f..cdc91cdf5b 100644
--- a/xen/arch/arm/vcpreg.c
+++ b/xen/arch/arm/vcpreg.c
@@ -51,9 +51,14 @@ 
 #define TVM_REG(sz, func, reg...)                                           \
 static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
 {                                                                           \
+    struct vcpu *v = current;                                               \
+    bool cache_enabled = vcpu_has_cache_enabled(v);                         \
+                                                                            \
     GUEST_BUG_ON(read);                                                     \
     WRITE_SYSREG##sz(*r, reg);                                              \
                                                                             \
+    p2m_toggle_cache(v, cache_enabled);                                     \
+                                                                            \
     return true;                                                            \
 }
 
@@ -71,6 +76,8 @@  static bool func(struct cpu_user_regs *regs, uint##sz##_t *r, bool read)    \
 static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
                                 bool read, bool hi)                         \
 {                                                                           \
+    struct vcpu *v = current;                                               \
+    bool cache_enabled = vcpu_has_cache_enabled(v);                         \
     register_t reg = READ_SYSREG(xreg);                                     \
                                                                             \
     GUEST_BUG_ON(read);                                                     \
@@ -86,6 +93,8 @@  static bool vreg_emulate_##xreg(struct cpu_user_regs *regs, uint32_t *r,    \
     }                                                                       \
     WRITE_SYSREG(reg, xreg);                                                \
                                                                             \
+    p2m_toggle_cache(v, cache_enabled);                                     \
+                                                                            \
     return true;                                                            \
 }                                                                           \
                                                                             \
@@ -186,6 +195,19 @@  void do_cp15_32(struct cpu_user_regs *regs, const union hsr hsr)
         break;
 
     /*
+     * HCR_EL2.TSW
+     *
+     * ARMv7 (DDI 0406C.b): B1.14.6
+     * ARMv8 (DDI 0487B.b): Table D1-42
+     */
+    case HSR_CPREG32(DCISW):
+    case HSR_CPREG32(DCCSW):
+    case HSR_CPREG32(DCCISW):
+        if ( !cp32.read )
+            p2m_set_way_flush(current);
+        break;
+
+    /*
      * HCR_EL2.TVM
      *
      * ARMv8 (DDI 0487D.a): Table D1-38
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 175de44927..f16b973e0d 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -202,6 +202,14 @@  struct arch_vcpu
     struct vtimer phys_timer;
     struct vtimer virt_timer;
     bool   vtimer_initialized;
+
+    /*
+     * The full P2M may require some cleaning (e.g when emulation
+     * set/way). As the action can take a long time, it requires
+     * preemption. So this is deferred until we return to the guest.
+     */
+    bool need_flush_to_ram;
+
 }  __cacheline_aligned;
 
 void vcpu_show_execution_state(struct vcpu *);
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index a633e27cc9..79abcb5a63 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -6,6 +6,8 @@ 
 #include <xen/rwlock.h>
 #include <xen/mem_access.h>
 
+#include <asm/current.h>
+
 #define paddr_bits PADDR_BITS
 
 /* Holds the bit size of IPAs in p2m tables.  */
@@ -237,6 +239,12 @@  bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn);
  */
 int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end);
 
+void p2m_set_way_flush(struct vcpu *v);
+
+void p2m_toggle_cache(struct vcpu *v, bool was_enabled);
+
+void p2m_flush_vm(struct vcpu *v);
+
 /*
  * Map a region in the guest p2m with a specific p2m type.
  * The memory attributes will be derived from the p2m type.
@@ -364,6 +372,18 @@  static inline int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
     return -EOPNOTSUPP;
 }
 
+/*
+ * A vCPU has cache enabled only when the MMU is enabled and data cache
+ * is enabled.
+ */
+static inline bool vcpu_has_cache_enabled(struct vcpu *v)
+{
+    /* Only works with the current vCPU */
+    ASSERT(current == v);
+
+    return (READ_SYSREG32(SCTLR_EL1) & (SCTLR_C|SCTLR_M)) == (SCTLR_C|SCTLR_M);
+}
+
 #endif /* _XEN_P2M_H */
 
 /*