diff mbox

[Xen-devel,for-4.5,v2,2/2] xen: arm: Enable physical address space compression (PDX) on arm32

Message ID 1410534923-17209-2-git-send-email-ian.campbell@citrix.com
State New
Headers show

Commit Message

Ian Campbell Sept. 12, 2014, 3:15 p.m. UTC
This allows us to support sparse physical address maps which we previously
could not because the frametable would end up taking up an enourmous fraction
of RAM.

On a fast model which has RAM at 0x80000000-0x100000000 and
0x880000000-0x900000000 this reduces the size of the frametable from
478M to 84M.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
v2:
 - Implement support for arm32 (tested on models).
 - Simplify the arm64 stuff a bit
 - Fixedsome bugs
---
 xen/arch/arm/Rules.mk        |    1 +
 xen/arch/arm/mm.c            |   18 +++---
 xen/arch/arm/setup.c         |  136 +++++++++++++++++++-----------------------
 xen/include/asm-arm/config.h |   11 +++-
 xen/include/asm-arm/mm.h     |   37 ++++++------
 xen/include/asm-arm/numa.h   |    2 +-
 6 files changed, 101 insertions(+), 104 deletions(-)

Comments

Julien Grall Sept. 12, 2014, 11 p.m. UTC | #1
Hi Ian,

Don't you implement PDX for both arm32 and arm64? If so, you may need to 
update the commit title.

The patch looks good to me. I have only few comments on it.

On 12/09/14 08:15, Ian Campbell wrote:
> This allows us to support sparse physical address maps which we previously
> could not because the frametable would end up taking up an enourmous fraction

enormous

[..]

> -    memset(&frame_table[0], 0, nr_pages * sizeof(struct page_info));
> -    memset(&frame_table[nr_pages], -1,
> -           frametable_size - (nr_pages * sizeof(struct page_info)));
> +    memset(&frame_table[0], 0, nr_pdxs * sizeof(struct page_info));
> +    memset(&frame_table[nr_pdxs], -1,
> +           frametable_size - (nr_pdxs * sizeof(struct page_info)));
> +
> +    frametable_virt_end = FRAMETABLE_VIRT_START + (nr_pdxs * sizeof(struct page_info));
>

NIT: I don't think it's necessary to have a blank line before the end of 
the block. I would drop it.

[..]

> +/* Sets all bits from the most-significant 1-bit down to the LSB */
> +static u64 __init fill_mask(u64 mask)
> +{
> +    while (mask & (mask + 1))
> +        mask |= mask + 1;
> +    return mask;
> +}

The function is the same on x86. Shall we create an helper?

[..]

>           setup_xenheap_mappings(bank_start>>PAGE_SHIFT, bank_size>>PAGE_SHIFT);

You forgot to drop check if ( bank != bootinfo.mem.nr_banks ) which, I 
think, is not relevant anymore.


> diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
> index 1c3abcf..59b2887 100644
> --- a/xen/include/asm-arm/config.h
> +++ b/xen/include/asm-arm/config.h
> @@ -126,7 +126,12 @@
>   #define CONFIG_SEPARATE_XENHEAP 1
>
>   #define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)
> -#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)
> +#define FRAMETABLE_SIZE        MB(128-32)

I would add a comment about why the frametable size is "MB(128-32)".

Regards,
Ian Campbell Sept. 16, 2014, 7:33 p.m. UTC | #2
On Fri, 2014-09-12 at 16:00 -0700, Julien Grall wrote:
> > +/* Sets all bits from the most-significant 1-bit down to the LSB */
> > +static u64 __init fill_mask(u64 mask)
> > +{
> > +    while (mask & (mask + 1))
> > +        mask |= mask + 1;
> > +    return mask;
> > +}
> 
> The function is the same on x86. Shall we create an helper?

I'll see what I can do.

> > diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
> > index 1c3abcf..59b2887 100644
> > --- a/xen/include/asm-arm/config.h
> > +++ b/xen/include/asm-arm/config.h
> > @@ -126,7 +126,12 @@
> >   #define CONFIG_SEPARATE_XENHEAP 1
> >
> >   #define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)
> > -#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)
> > +#define FRAMETABLE_SIZE        MB(128-32)
> 
> I would add a comment about why the frametable size is "MB(128-32)".

There is a big comment just above here which gives an overview of the
virtual address space layout. I don't think it needs repeating here.

Ian.
Julien Grall Sept. 16, 2014, 8:49 p.m. UTC | #3
Hi Ian,

On 16/09/14 12:33, Ian Campbell wrote:
> On Fri, 2014-09-12 at 16:00 -0700, Julien Grall wrote:
>>> diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
>>> index 1c3abcf..59b2887 100644
>>> --- a/xen/include/asm-arm/config.h
>>> +++ b/xen/include/asm-arm/config.h
>>> @@ -126,7 +126,12 @@
>>>    #define CONFIG_SEPARATE_XENHEAP 1
>>>	
>>>    #define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)
>>> -#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)
>>> +#define FRAMETABLE_SIZE        MB(128-32)
>>
>> I would add a comment about why the frametable size is "MB(128-32)".
>
> There is a big comment just above here which gives an overview of the
> virtual address space layout. I don't think it needs repeating here.

Hmm right. I haven't look at include/asm-arm/config.h, sorry.

Regards,
diff mbox

Patch

diff --git a/xen/arch/arm/Rules.mk b/xen/arch/arm/Rules.mk
index 8658176..26fafa2 100644
--- a/xen/arch/arm/Rules.mk
+++ b/xen/arch/arm/Rules.mk
@@ -10,6 +10,7 @@  HAS_DEVICE_TREE := y
 HAS_VIDEO := y
 HAS_ARM_HDLCD := y
 HAS_PASSTHROUGH := y
+HAS_PDX := y
 
 CFLAGS += -I$(BASEDIR)/include
 
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 0a243b0..7a04cc4 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -138,7 +138,7 @@  unsigned long xenheap_mfn_start __read_mostly = ~0UL;
 unsigned long xenheap_mfn_end __read_mostly;
 unsigned long xenheap_virt_end __read_mostly;
 
-unsigned long frametable_base_mfn __read_mostly;
+unsigned long frametable_base_pdx __read_mostly;
 unsigned long frametable_virt_end __read_mostly;
 
 unsigned long max_page;
@@ -665,7 +665,7 @@  void __init setup_xenheap_mappings(unsigned long base_mfn,
     /* Align to previous 1GB boundary */
     base_mfn &= ~((FIRST_SIZE>>PAGE_SHIFT)-1);
 
-    offset = base_mfn - xenheap_mfn_start;
+    offset = pfn_to_pdx(base_mfn - xenheap_mfn_start);
     vaddr = DIRECTMAP_VIRT_START + offset*PAGE_SIZE;
 
     while ( base_mfn < end_mfn )
@@ -716,7 +716,8 @@  void __init setup_xenheap_mappings(unsigned long base_mfn,
 void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
 {
     unsigned long nr_pages = (pe - ps) >> PAGE_SHIFT;
-    unsigned long frametable_size = nr_pages * sizeof(struct page_info);
+    unsigned long nr_pdxs = pfn_to_pdx(nr_pages);
+    unsigned long frametable_size = nr_pdxs * sizeof(struct page_info);
     unsigned long base_mfn;
 #ifdef CONFIG_ARM_64
     lpae_t *second, pte;
@@ -724,7 +725,7 @@  void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
     int i;
 #endif
 
-    frametable_base_mfn = ps >> PAGE_SHIFT;
+    frametable_base_pdx = pfn_to_pdx(ps >> PAGE_SHIFT);
 
     /* Round up to 32M boundary */
     frametable_size = (frametable_size + 0x1ffffff) & ~0x1ffffff;
@@ -745,11 +746,12 @@  void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
     create_32mb_mappings(xen_second, FRAMETABLE_VIRT_START, base_mfn, frametable_size >> PAGE_SHIFT);
 #endif
 
-    memset(&frame_table[0], 0, nr_pages * sizeof(struct page_info));
-    memset(&frame_table[nr_pages], -1,
-           frametable_size - (nr_pages * sizeof(struct page_info)));
+    memset(&frame_table[0], 0, nr_pdxs * sizeof(struct page_info));
+    memset(&frame_table[nr_pdxs], -1,
+           frametable_size - (nr_pdxs * sizeof(struct page_info)));
+
+    frametable_virt_end = FRAMETABLE_VIRT_START + (nr_pdxs * sizeof(struct page_info));
 
-    frametable_virt_end = FRAMETABLE_VIRT_START + (nr_pages * sizeof(struct page_info));
 }
 
 void *__init arch_vmap_virt_end(void)
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 446b4dc..7b11344 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -423,11 +423,58 @@  static paddr_t __init get_xen_paddr(void)
     return paddr;
 }
 
+/* Sets all bits from the most-significant 1-bit down to the LSB */
+static u64 __init fill_mask(u64 mask)
+{
+    while (mask & (mask + 1))
+        mask |= mask + 1;
+    return mask;
+}
+
+static void init_pdx(void)
+{
+    paddr_t bank_start, bank_size, bank_end;
+
+    u64 mask = fill_mask(bootinfo.mem.bank[0].start - 1);
+    int bank;
+
+    for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ )
+    {
+        bank_start = bootinfo.mem.bank[bank].start;
+        bank_size = bootinfo.mem.bank[bank].size;
+        bank_end = bank_start + bank_size;
+
+        mask |= bank_start | fill_mask(bank_start ^ (bank_end - 1));
+    }
+
+    for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ )
+    {
+        bank_start = bootinfo.mem.bank[bank].start;
+        bank_size = bootinfo.mem.bank[bank].size;
+        bank_end = bank_start + bank_size;
+
+        if (~mask &
+            fill_mask(bank_start ^ (bank_end - 1)))
+            mask = 0;
+    }
+
+    pfn_pdx_hole_setup(mask >> PAGE_SHIFT);
+
+    for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ )
+    {
+        bank_start = bootinfo.mem.bank[bank].start;
+        bank_size = bootinfo.mem.bank[bank].size;
+        bank_end = bank_start + bank_size;
+
+        set_pdx_range(paddr_to_pfn(bank_start),
+                      paddr_to_pfn(bank_end));
+    }
+}
+
 #ifdef CONFIG_ARM_32
 static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
 {
     paddr_t ram_start, ram_end, ram_size;
-    paddr_t contig_start, contig_end;
     paddr_t s, e;
     unsigned long ram_pages;
     unsigned long heap_pages, xenheap_pages, domheap_pages;
@@ -439,24 +486,11 @@  static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
     if ( !bootinfo.mem.nr_banks )
         panic("No memory bank");
 
-    /*
-     * We are going to accumulate two regions here.
-     *
-     * The first is the bounds of the initial memory region which is
-     * contiguous with the first bank. For simplicity the xenheap is
-     * always allocated from this region.
-     *
-     * The second is the complete bounds of the regions containing RAM
-     * (ie. from the lowest RAM address to the highest), which
-     * includes any holes.
-     *
-     * We also track the number of actual RAM pages (i.e. not counting
-     * the holes).
-     */
-    ram_size  = bootinfo.mem.bank[0].size;
+    init_pdx();
 
-    contig_start = ram_start = bootinfo.mem.bank[0].start;
-    contig_end   = ram_end = ram_start + ram_size;
+    ram_start = bootinfo.mem.bank[0].start;
+    ram_size  = bootinfo.mem.bank[0].size;
+    ram_end   = ram_start + ram_size;
 
     for ( i = 1; i < bootinfo.mem.nr_banks; i++ )
     {
@@ -464,41 +498,9 @@  static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
         paddr_t bank_size = bootinfo.mem.bank[i].size;
         paddr_t bank_end = bank_start + bank_size;
 
-        paddr_t new_ram_size = ram_size + bank_size;
-        paddr_t new_ram_start = min(ram_start,bank_start);
-        paddr_t new_ram_end = max(ram_end,bank_end);
-
-        /*
-         * If the new bank is contiguous with the initial contiguous
-         * region then incorporate it into the contiguous region.
-         *
-         * Otherwise we allow non-contigious regions so long as at
-         * least half of the total RAM region actually contains
-         * RAM. We actually fudge this slightly and require that
-         * adding the current bank does not cause us to violate this
-         * restriction.
-         *
-         * This restriction ensures that the frametable (which is not
-         * currently sparse) does not consume all available RAM.
-         */
-        if ( bank_start == contig_end )
-            contig_end = bank_end;
-        else if ( bank_end == contig_start )
-            contig_start = bank_start;
-        else if ( 2 * new_ram_size < new_ram_end - new_ram_start )
-            /* Would create memory map which is too sparse, so stop here. */
-            break;
-
-        ram_size = new_ram_size;
-        ram_start = new_ram_start;
-        ram_end = new_ram_end;
-    }
-
-    if ( i != bootinfo.mem.nr_banks )
-    {
-        printk("WARNING: only using %d out of %d memory banks\n",
-               i, bootinfo.mem.nr_banks);
-        bootinfo.mem.nr_banks = i;
+        ram_size  = ram_size + bank_size;
+        ram_start = min(ram_start,bank_start);
+        ram_end   = max(ram_end,bank_end);
     }
 
     total_pages = ram_pages = ram_size >> PAGE_SHIFT;
@@ -520,8 +522,7 @@  static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
 
     do
     {
-        /* xenheap is always in the initial contiguous region */
-        e = consider_modules(contig_start, contig_end,
+        e = consider_modules(ram_start, ram_end,
                              pfn_to_paddr(xenheap_pages),
                              32<<20, 0);
         if ( e )
@@ -616,6 +617,8 @@  static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
     unsigned long dtb_pages;
     void *fdt;
 
+    init_pdx();
+
     total_pages = 0;
     for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ )
     {
@@ -624,26 +627,9 @@  static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
         paddr_t bank_end = bank_start + bank_size;
         paddr_t s, e;
 
-        paddr_t new_ram_size = ram_size + bank_size;
-        paddr_t new_ram_start = min(ram_start,bank_start);
-        paddr_t new_ram_end = max(ram_end,bank_end);
-
-        /*
-         * We allow non-contigious regions so long as at least half of
-         * the total RAM region actually contains RAM. We actually
-         * fudge this slightly and require that adding the current
-         * bank does not cause us to violate this restriction.
-         *
-         * This restriction ensures that the frametable (which is not
-         * currently sparse) does not consume all available RAM.
-         */
-        if ( bank > 0 && 2 * new_ram_size < new_ram_end - new_ram_start )
-            /* Would create memory map which is too sparse, so stop here. */
-            break;
-
-        ram_start = new_ram_start;
-        ram_end = new_ram_end;
-        ram_size = new_ram_size;
+        ram_size = ram_size + bank_size;
+        ram_start = min(ram_start,bank_start);
+        ram_end = max(ram_end,bank_end);
 
         setup_xenheap_mappings(bank_start>>PAGE_SHIFT, bank_size>>PAGE_SHIFT);
 
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index 1c3abcf..59b2887 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -126,7 +126,12 @@ 
 #define CONFIG_SEPARATE_XENHEAP 1
 
 #define FRAMETABLE_VIRT_START  _AT(vaddr_t,0x02000000)
-#define VMAP_VIRT_START  _AT(vaddr_t,0x10000000)
+#define FRAMETABLE_SIZE        MB(128-32)
+#define FRAMETABLE_NR          (FRAMETABLE_SIZE / sizeof(*frame_table))
+#define FRAMETABLE_VIRT_END    (FRAMETABLE_VIRT_START + FRAMETABLE_SIZE - 1)
+
+#define VMAP_VIRT_START        _AT(vaddr_t,0x10000000)
+
 #define XENHEAP_VIRT_START     _AT(vaddr_t,0x40000000)
 #define XENHEAP_VIRT_END       _AT(vaddr_t,0x7fffffff)
 #define DOMHEAP_VIRT_START     _AT(vaddr_t,0x80000000)
@@ -149,7 +154,9 @@ 
 #define VMAP_VIRT_END    (VMAP_VIRT_START + GB(1) - 1)
 
 #define FRAMETABLE_VIRT_START  GB(32)
-#define FRAMETABLE_VIRT_END    (FRAMETABLE_VIRT_START + GB(32) - 1)
+#define FRAMETABLE_SIZE        GB(32)
+#define FRAMETABLE_NR          (FRAMETABLE_SIZE / sizeof(*frame_table))
+#define FRAMETABLE_VIRT_END    (FRAMETABLE_VIRT_START + FRAMETABLE_SIZE - 1)
 
 #define DIRECTMAP_VIRT_START   SLOT0(256)
 #define DIRECTMAP_SIZE         (SLOT0_ENTRY_SIZE * (265-256))
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 9fa80a4..120500f 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -6,6 +6,7 @@ 
 #include <asm/page.h>
 #include <public/xen.h>
 #include <xen/domain_page.h>
+#include <xen/pdx.h>
 
 /* Align Xen to a 2 MiB boundary. */
 #define XEN_PADDR_ALIGN (1 << 21)
@@ -140,12 +141,14 @@  extern void share_xen_page_with_privileged_guests(
     struct page_info *page, int readonly);
 
 #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START)
-/* MFN of the first page in the frame table. */
-extern unsigned long frametable_base_mfn;
+/* PDX of the first page in the frame table. */
+extern unsigned long frametable_base_pdx;
 
 extern unsigned long max_page;
 extern unsigned long total_pages;
 
+#define PDX_GROUP_SHIFT SECOND_SHIFT
+
 /* Boot-time pagetable setup */
 extern void setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr);
 /* Remove early mappings */
@@ -184,20 +187,15 @@  static inline void __iomem *ioremap_wc(paddr_t start, size_t len)
     return ioremap_attr(start, len, PAGE_HYPERVISOR_WC);
 }
 
+/* XXX -- account for base */
 #define mfn_valid(mfn)        ({                                              \
     unsigned long __m_f_n = (mfn);                                            \
-    likely(__m_f_n >= frametable_base_mfn && __m_f_n < max_page);             \
+    likely(pfn_to_pdx(__m_f_n) >= frametable_base_pdx && __mfn_valid(__m_f_n)); \
 })
 
-#define max_pdx                 max_page
-#define pfn_to_pdx(pfn)         (pfn)
-#define pdx_to_pfn(pdx)         (pdx)
-#define virt_to_pdx(va)         virt_to_mfn(va)
-#define pdx_to_virt(pdx)        mfn_to_virt(pdx)
-
 /* Convert between machine frame numbers and page-info structures. */
-#define mfn_to_page(mfn)  (frame_table + (pfn_to_pdx(mfn) - frametable_base_mfn))
-#define page_to_mfn(pg)   pdx_to_pfn((unsigned long)((pg) - frame_table) + frametable_base_mfn)
+#define mfn_to_page(mfn)  (frame_table + (pfn_to_pdx(mfn) - frametable_base_pdx))
+#define page_to_mfn(pg)   pdx_to_pfn((unsigned long)((pg) - frame_table) + frametable_base_pdx)
 #define __page_to_mfn(pg)  page_to_mfn(pg)
 #define __mfn_to_page(mfn) mfn_to_page(mfn)
 
@@ -230,9 +228,11 @@  static inline void *maddr_to_virt(paddr_t ma)
 #else
 static inline void *maddr_to_virt(paddr_t ma)
 {
-    ASSERT((ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >> PAGE_SHIFT));
-    ma -= pfn_to_paddr(xenheap_mfn_start);
-    return (void *)(unsigned long) ma + DIRECTMAP_VIRT_START;
+    ASSERT(pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >> PAGE_SHIFT));
+    return (void *)(DIRECTMAP_VIRT_START -
+                    pfn_to_paddr(xenheap_mfn_start) +
+                    ((ma & ma_va_bottom_mask) |
+                     ((ma & ma_top_mask) >> pfn_pdx_hole_shift)));
 }
 #endif
 
@@ -258,13 +258,14 @@  static inline int gvirt_to_maddr(vaddr_t va, paddr_t *pa, unsigned int flags)
 static inline struct page_info *virt_to_page(const void *v)
 {
     unsigned long va = (unsigned long)v;
+    unsigned long pdx;
+
     ASSERT(va >= XENHEAP_VIRT_START);
     ASSERT(va < xenheap_virt_end);
 
-    return frame_table
-        + ((va - XENHEAP_VIRT_START) >> PAGE_SHIFT)
-        + xenheap_mfn_start
-        - frametable_base_mfn;
+    pdx = (va - XENHEAP_VIRT_START) >> PAGE_SHIFT;
+    pdx += pfn_to_pdx(xenheap_mfn_start);
+    return frame_table + pdx - frametable_base_pdx;
 }
 
 static inline void *page_to_virt(const struct page_info *pg)
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index 2c019d7..06a9d5a 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -12,7 +12,7 @@  static inline __attribute__((pure)) int phys_to_nid(paddr_t addr)
 
 /* XXX: implement NUMA support */
 #define node_spanned_pages(nid) (total_pages)
-#define node_start_pfn(nid) (frametable_base_mfn)
+#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
 #define __node_distance(a, b) (20)
 
 #endif /* __ARCH_ARM_NUMA_H */