diff mbox series

[Xen-devel,for-4.12,v2,6/7] xen/arm: Implement workaround for Cortex-A76 erratum 1165522

Message ID 20190128115026.3336-7-julien.grall@arm.com
State New
Headers show
Series xen/arm: Workaround for Cortex-A76 erratum 1165522 | expand

Commit Message

Julien Grall Jan. 28, 2019, 11:50 a.m. UTC
Early version of Cortex-A76 can end-up with corrupt TLBs if they
speculate an AT instruction while the S1/S2 system registers are in an
inconsistent state.

This can happen during guest context switch and when invalidating the
TLBs for other than the current VMID.

The workaround implemented in Xen will:
    - Use an empty stage-2 with a reserved VMID while context switching
    between 2 guests
    - Use an empty stage-2 with the VMID where TLBs need to be flushed

Signed-off-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Andrii Anisov <andrii_anisov@epam.com>

---
    Changes in v2:
        - Fix typo
        - Add Andrii's reviewed-by
---
 docs/misc/arm/silicon-errata.txt |  1 +
 xen/arch/arm/cpuerrata.c         |  6 ++++
 xen/arch/arm/domain.c            |  8 +++--
 xen/arch/arm/p2m.c               | 78 ++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/cpufeature.h |  3 +-
 xen/include/asm-arm/processor.h  |  2 ++
 6 files changed, 93 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 906bf5fd48..6cd1366f15 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -48,4 +48,5 @@  stable hypervisors.
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | Cortex-A76      | #1165522        | N/A                     |
 | ARM            | MMU-500         | #842869         | N/A                     |
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index f4815cafb4..4431b244fd 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -489,6 +489,12 @@  static const struct arm_cpu_capabilities arm_errata[] = {
         .matches = has_ssbd_mitigation,
     },
 #endif
+    {
+        /* Cortex-A76 r0p0 - r2p0 */
+        .desc = "ARM erratum 116522",
+        .capability = ARM64_WORKAROUND_AT_SPECULATE,
+        MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT),
+    },
     {},
 };
 
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 41f101746e..6dc633ed50 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -181,8 +181,6 @@  static void ctxt_switch_to(struct vcpu *n)
     if ( is_idle_vcpu(n) )
         return;
 
-    p2m_restore_state(n);
-
     vpidr = READ_SYSREG32(MIDR_EL1);
     WRITE_SYSREG32(vpidr, VPIDR_EL2);
     WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
@@ -235,6 +233,12 @@  static void ctxt_switch_to(struct vcpu *n)
 #endif
     isb();
 
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after
+     * the stage-1 MMU sysregs have been restored.
+     */
+    p2m_restore_state(n);
+
     /* Control Registers */
     WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
 
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 44391a5f8c..c38bd7e16e 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -5,6 +5,7 @@ 
 #include <xen/sched.h>
 #include <xen/softirq.h>
 
+#include <asm/alternative.h>
 #include <asm/event.h>
 #include <asm/flushtlb.h>
 #include <asm/guest_walk.h>
@@ -41,6 +42,8 @@  static const paddr_t level_masks[] =
 static const uint8_t level_orders[] =
     { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
 
+static mfn_t __read_mostly empty_root_mfn;
+
 static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
 {
     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
@@ -92,9 +95,25 @@  void dump_p2m_lookup(struct domain *d, paddr_t addr)
                  P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
 }
 
+/*
+ * p2m_save_state and p2m_restore_state work in pair to workaround
+ * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to
+ * point to the empty page-tables to stop allocating TLB entries.
+ */
 void p2m_save_state(struct vcpu *p)
 {
     p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
+
+    if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
+        /*
+         * Ensure VTTBR_EL2 is correctly synchronized so we can restore
+         * the next vCPU context without worrying about AT instruction
+         * speculation.
+         */
+        isb();
+    }
 }
 
 void p2m_restore_state(struct vcpu *n)
@@ -105,10 +124,17 @@  void p2m_restore_state(struct vcpu *n)
     if ( is_idle_vcpu(n) )
         return;
 
-    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
     WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
     WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
 
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all
+     * registers associated to EL1/EL0 translations regime have been
+     * synchronized.
+     */
+    asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
+    WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
+
     last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
 
     /*
@@ -149,8 +175,23 @@  static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m)
     ovttbr = READ_SYSREG64(VTTBR_EL2);
     if ( ovttbr != p2m->vttbr )
     {
+        uint64_t vttbr;
+
         local_irq_save(flags);
-        WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
+
+        /*
+         * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
+         * TLBs entries because the context is partially modified. We
+         * only need the VMID for flushing the TLBs, so we can generate
+         * a new VTTBR with the VMID to flush and the empty root table.
+         */
+        if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+            vttbr = p2m->vttbr;
+        else
+            vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
+
+        WRITE_SYSREG64(vttbr, VTTBR_EL2);
+
         /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
         isb();
     }
@@ -1913,6 +1954,23 @@  static uint32_t __read_mostly vtcr;
 static void setup_virt_paging_one(void *data)
 {
     WRITE_SYSREG32(vtcr, VTCR_EL2);
+
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from
+     * entries related to EL1/EL0 translation regime until a guest vCPU
+     * is running. For that, we need to set-up VTTBR to point to an empty
+     * page-table and turn on stage-2 translation. The TLB entries
+     * associated with EL1/EL0 translation regime will also be flushed in case
+     * an AT instruction was speculated before hand.
+     */
+    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
+        WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2);
+        isb();
+
+        flush_tlb_all_local();
+    }
 }
 
 void __init setup_virt_paging(void)
@@ -1996,6 +2054,22 @@  void __init setup_virt_paging(void)
     /* It is not allowed to concatenate a level zero root */
     BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
     vtcr = val;
+
+    /*
+     * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table
+     * with all entries zeroed.
+     */
+    if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
+    {
+        struct page_info *root;
+
+        root = p2m_allocate_root();
+        if ( !root )
+            panic("Unable to allocate root table for ARM64_WORKAROUND_AT_SPECULATE\n");
+
+        empty_root_mfn = page_to_mfn(root);
+    }
+
     setup_virt_paging_one(NULL);
     smp_call_function(setup_virt_paging_one, NULL, 1);
 }
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 17de928467..c2c8f3417c 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -45,8 +45,9 @@ 
 #define ARM_HARDEN_BRANCH_PREDICTOR 7
 #define ARM_SSBD 8
 #define ARM_SMCCC_1_1 9
+#define ARM64_WORKAROUND_AT_SPECULATE 10
 
-#define ARM_NCAPS           10
+#define ARM_NCAPS           11
 
 #ifndef __ASSEMBLY__
 
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index cb781751a6..b5f515805d 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -52,6 +52,7 @@ 
 #define ARM_CPU_PART_CORTEX_A72     0xD08
 #define ARM_CPU_PART_CORTEX_A73     0xD09
 #define ARM_CPU_PART_CORTEX_A75     0xD0A
+#define ARM_CPU_PART_CORTEX_A76     0xD0B
 
 #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12)
 #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17)
@@ -61,6 +62,7 @@ 
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
 #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
 #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
+#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
 
 /* MPIDR Multiprocessor Affinity Register */
 #define _MPIDR_UP           (30)