diff mbox

[Xen-devel,RFC,9/9] xen: arm: support for up to 48-bit IPA addressing on arm64

Message ID 178041f951bc35a80f5a2a7e228b95ddc6c7a64d.1406728037.git.ian.campbell@citrix.com
State New
Headers show

Commit Message

Ian Campbell July 30, 2014, 1:47 p.m. UTC
Currently we support only 40-bits. This is insufficient on systems where
peripherals which need to be 1:1 mapped to dom0 are above the 40-bit limit.

Unfortunately the hardware requirements are such that this means that the
number of levels in the P2M is not static and must vary with the number of
implemented physical address bits. This is described in ARM DDI 0487A.b Table
D4-5. In short there is no single p2m configuration which supports everything
from 40- to 48- bits.

For example a system which supports up to 40-bit addressing will only support 3
level p2m (maximum SL0 is 1 == 3 levels), requiring a concatenated page table
root consisting of two pages to make the full 40-bits of addressing.

A maximum of 16 pages can be concatenated meaning that a 3 level p2m can only
support up to 43-bit addreses. Therefore support for 48-bit addressing requres
SL0==2 (4 levels of paging).

After the previous patches our various p2m lookup and manipulation functions
already support starting at arbitrary level and with arbitrary root
concatenation. All that remains is to determine the correct settings from
ID_AA64MMFR0_EL1.PARange for which we use a lookup table.

As well as supporting 44 and 48 bit addressing we can also reduce the order of
the first level for systems which support only 32 or 36 physical address bits,
saving a page.

Systems with 42-bits are an interesting case, since they only support 3 levels
of paging, implying that 8 pages are required at the root level. So far I am
not aware of any systems with peripheral located so high up (the only 42-bit
system I've seen has nothing above 40-bits), so such systems remain configured
for 40-bit IPA with a pair of pages at the root of the p2m.

Switching to synbolic names for the VTCR_EL2 bits as we go improves the clarity
of the result.

Parts of this are derived from "xen/arm: Add 4-level page table for
stage 2 translation" by Vijaya Kumar K.

arm32 remains with the static 3-level, 2 page root configuration.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 xen/arch/arm/p2m.c              |   82 +++++++++++++++++++++++++++++++--------
 xen/include/asm-arm/p2m.h       |    2 +-
 xen/include/asm-arm/processor.h |   28 +++++++++++++
 3 files changed, 94 insertions(+), 18 deletions(-)
diff mbox

Patch

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index e9938ae..0e8e8e0 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -11,10 +11,17 @@ 
 #include <asm/hardirq.h>
 #include <asm/page.h>
 
-#define P2M_ROOT_LEVEL       1
+#ifdef CONFIG_ARM_64
+static int __read_mostly p2m_root_order;
+static int __read_mostly p2m_root_level;
+#define P2M_ROOT_ORDER    p2m_root_order
+#define P2M_ROOT_LEVEL p2m_root_level
+#else
+/* First level P2M is alway 2 consecutive pages */
+#define P2M_ROOT_LEVEL 1
+#define P2M_ROOT_ORDER    1
+#endif
 
-/* First level P2M is 2 consecutive pages */
-#define P2M_ROOT_ORDER 1
 #define P2M_ROOT_PAGES    (1<<P2M_ROOT_ORDER)
 
 static bool_t p2m_valid(lpae_t pte)
@@ -164,6 +171,8 @@  paddr_t p2m_lookup(struct domain *d, paddr_t paddr, p2m_type_t *t)
 
     map = __map_domain_page(p2m->root + root_table);
 
+    BUG_ON(P2M_ROOT_LEVEL >= 4);
+
     for ( level = P2M_ROOT_LEVEL ; level < 4 ; level++ )
     {
         mask = masks[level];
@@ -883,6 +892,7 @@  int p2m_alloc_table(struct domain *d)
 {
     struct p2m_domain *p2m = &d->arch.p2m;
     struct page_info *page;
+    int i;
 
     page = alloc_domheap_pages(NULL, P2M_ROOT_ORDER, 0);
     if ( page == NULL )
@@ -891,8 +901,8 @@  int p2m_alloc_table(struct domain *d)
     spin_lock(&p2m->lock);
 
     /* Clear both first level pages */
-    clear_and_clean_page(page);
-    clear_and_clean_page(page + 1);
+    for ( i = 0; i < P2M_ROOT_PAGES; i++ )
+        clear_and_clean_page(page + i);
 
     p2m->root = page;
 
@@ -1078,21 +1088,59 @@  static void setup_virt_paging_one(void *data)
 
 void __cpuinit setup_virt_paging(void)
 {
-    unsigned long val;
-
     /* Setup Stage 2 address translation */
-    /* SH0=11 (Inner-shareable)
-     * ORGN0=IRGN0=01 (Normal memory, Write-Back Write-Allocate Cacheable)
-     * SL0=01 (Level-1)
-     * ARVv7: T0SZ=(1)1000 = -8 (32-(-8) = 40 bit physical addresses)
-     * ARMv8: T0SZ=01 1000 = 24 (64-24   = 40 bit physical addresses)
-     *        PS=010 == 40 bits
-     */
+    unsigned long val = VTCR_RES1|VTCR_SH0_IS|VTCR_ORGN0_WBWA|VTCR_IRGN0_WBWA;
+
 #ifdef CONFIG_ARM_32
-    val = 0x80003558;
-#else
-    val = 0x80023558;
+    printk("P2M: 40-bit IPA\n");
+    val |= VTCR_T0SZ(0x18); /* 40 bit IPA */
+#else /* CONFIG_ARM_64 */
+    const struct {
+        unsigned pabits; /* Physical Address Size */
+        unsigned t0sz;   /* Desired T0SZ, minimum in comment */
+        unsigned root_order; /* Page order of the root of the p2m */
+        unsigned sl0;    /* Desired SL0, maximum in comment */
+    } pa_range_info[] = {
+        /* T0SZ minimum and SL0 maximum from ARM DDI 0487A.b Table D4-5 */
+        /*      PA size, t0sz(min), root-order, sl0(max) */
+        [0] = { 32,      32/*32*/,  0,          1 },
+        [1] = { 36,      28/*28*/,  0,          1 },
+        [2] = { 40,      24/*24*/,  1,          1 },
+        [3] = { 42,      24/*22*/,  1,          1 },
+        [4] = { 44,      20/*20*/,  0,          2 },
+        [5] = { 48,      16/*16*/,  0,          2 },
+        [6] = { 0 }, /* Invalid */
+        [7] = { 0 }  /* Invalid */
+    };
+
+    int cpu;
+    int pa_range = 0x10; /* Larger than any possible value */
+
+    for_each_online_cpu ( cpu )
+    {
+        struct cpuinfo_arm *info = &cpu_data[cpu];
+        if ( info->mm64.pa_range < pa_range )
+            pa_range = info->mm64.pa_range;
+    }
+
+    /* pa_range is 4 bits, but the defined encodings are only 3 bits */
+    if ( pa_range&0x8 || !pa_range_info[pa_range].pabits )
+        panic("Unknown encoding of ID_AA64MMFR0_EL1.PARange %x\n", pa_range);
+
+    val |= VTCR_PS(pa_range);
+    val |= VTCR_TG0_4K;
+    val |= VTCR_SL0(pa_range_info[pa_range].sl0);
+    val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
+
+    p2m_root_order = pa_range_info[pa_range].root_order;
+    p2m_root_level = 2 - pa_range_info[pa_range].sl0;
+
+    printk("P2M: %d-bit IPA with %d-bit PA\n",
+           64 - pa_range_info[pa_range].t0sz,
+           pa_range_info[pa_range].pabits);
 #endif
+    printk("P2M: %d levels with order-%d root, VTCR 0x%lx\n",
+           4 - P2M_ROOT_LEVEL, P2M_ROOT_ORDER, val);
     setup_virt_paging_one((void *)val);
     smp_call_function(setup_virt_paging_one, (void *)val, 1);
 }
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index cfd0c52..a0cf2f4 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -13,7 +13,7 @@  struct p2m_domain {
     /* Pages used to construct the p2m */
     struct page_list_head pages;
 
-    /* Root of p2m page tables, 2 contiguous pages */
+    /* The root of the p2m tree. May be concatenated */
     struct page_info *root;
 
     /* Current VMID in use */
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 979a41d..a744a67 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -116,6 +116,34 @@ 
 #define TCR_RES1        (_AC(1,UL)<<31)
 #endif
 
+/* VTCR: Stage 2 Translation Control */
+
+#define VTCR_T0SZ(x)    ((x)<<0)
+
+#define VTCR_SL0(x)     ((x)<<6)
+
+#define VTCR_IRGN0_NC   (_AC(0x0,UL)<<8)
+#define VTCR_IRGN0_WBWA (_AC(0x1,UL)<<8)
+#define VTCR_IRGN0_WT   (_AC(0x2,UL)<<8)
+#define VTCR_IRGN0_WB   (_AC(0x3,UL)<<8)
+
+#define VTCR_ORGN0_NC   (_AC(0x0,UL)<<10)
+#define VTCR_ORGN0_WBWA (_AC(0x1,UL)<<10)
+#define VTCR_ORGN0_WT   (_AC(0x2,UL)<<10)
+#define VTCR_ORGN0_WB   (_AC(0x3,UL)<<10)
+
+#define VTCR_SH0_NS     (_AC(0x0,UL)<<12)
+#define VTCR_SH0_OS     (_AC(0x2,UL)<<12)
+#define VTCR_SH0_IS     (_AC(0x3,UL)<<12)
+
+#define VTCR_TG0_4K     (_AC(0x0,UL)<<14) /* arm64 only */
+#define VTCR_TG0_64K    (_AC(0x1,UL)<<14)
+#define VTCR_TG0_16K    (_AC(0x2,UL)<<14)
+
+#define VTCR_PS(x)      ((x)<<16)
+
+#define VTCR_RES1       (_AC(1,UL)<<31)
+
 /* HCPTR Hyp. Coprocessor Trap Register */
 #define HCPTR_TTA       ((_AC(1,U)<<20))        /* Trap trace registers */
 #define HCPTR_CP(x)     ((_AC(1,U)<<(x)))       /* Trap Coprocessor x */