diff mbox

[RFC] arm64: mm: increase VA range of identity map

Message ID 1424775342-30588-1-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel Feb. 24, 2015, 10:55 a.m. UTC
Hi all,

This patch fixes the annoying issue that AMD Seattle cannot boot the
arm64 defconfig build, crashing so early that even earlycon is completely
silent. It does so by allowing an ID map to have more translation levels
than the default configured value.

Instead, I considered adding code to the EFI stub that catches this problem
(i.e., physical offset of RAM exceeds configured VA range), but that is only
a partial solution: it only covers UEFI boot, not kexec, and works around
rather than fixes the root issue.

Note that it effectively disables KVM upon encountering this condition, but
I suppose a more sophisticated solution would be feasible for that as well.

---------------->8-----------------

The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.

This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kvm/mmu.c                     |  6 ++++++
 arch/arm64/include/asm/memory.h        |  2 ++
 arch/arm64/include/asm/mmu_context.h   | 38 ++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h          |  6 ++++--
 arch/arm64/include/asm/pgtable-hwdef.h |  7 ++++++-
 arch/arm64/kernel/head.S               | 20 ++++++++++++++++++
 arch/arm64/kernel/smp.c                |  1 +
 arch/arm64/mm/mmu.c                    |  5 +++++
 arch/arm64/mm/proc-macros.S            | 11 ++++++++++
 arch/arm64/mm/proc.S                   |  3 +++
 10 files changed, 96 insertions(+), 3 deletions(-)

Comments

Ard Biesheuvel Feb. 24, 2015, 12:18 p.m. UTC | #1
On 24 February 2015 at 12:03, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Tue, Feb 24, 2015 at 10:55:42AM +0000, Ard Biesheuvel wrote:
>> This patch fixes the annoying issue that AMD Seattle cannot boot the
>> arm64 defconfig build, crashing so early that even earlycon is completely
>> silent. It does so by allowing an ID map to have more translation levels
>> than the default configured value.
>
> We could have changed the defconfig to 4 levels of page tables but since
> it's currently only needed for idmap, I like your approach.
>
>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>> index 3e6859bc3e11..c2da529bb7bd 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -1532,6 +1532,12 @@ int kvm_mmu_init(void)
>>                        (unsigned long)phys_base);
>>       }
>>
>> +     if ((hyp_idmap_start >> PGDIR_SHIFT) >= PTRS_PER_PGD) {
>> +             kvm_err("Couldn't identity map HYP init page (PA exceeds VA range)\n");
>> +             err = -ENOMEM;
>> +             goto out;
>> +     }
>> +
>>       hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
>>       boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
>
> I wonder whether Hyp can share the same idmap as the rest of the kernel
> (extending the latter to cover the hyp text). The Hyp one requires the
> PTE_USER attributes but we could use the same attributes on the kernel
> idmap since it's never used at the same time with a user process.
>

I can have a go at that: I just tried to avoid breaking the boot here,
but it would indeed be nice to fix KVM at the same time (or at least,
in the same way)

>> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
>> index f800d45ea226..19f6297a472b 100644
>> --- a/arch/arm64/include/asm/memory.h
>> +++ b/arch/arm64/include/asm/memory.h
>> @@ -42,12 +42,14 @@
>>   * PAGE_OFFSET - the virtual address of the start of the kernel image (top
>>   *            (VA_BITS - 1))
>>   * VA_BITS - the maximum number of bits for virtual addresses.
>> + * MAX_VA_BITS - architectural max value for VA_BITS
>>   * TASK_SIZE - the maximum size of a user space task.
>>   * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
>>   * The module space lives between the addresses given by TASK_SIZE
>>   * and PAGE_OFFSET - it must be within 128MB of the kernel text.
>>   */
>>  #define VA_BITS                      (CONFIG_ARM64_VA_BITS)
>> +#define MAX_VA_BITS          48
>
> Can we determine this dynamically based on the phys address? So on
> hardware that doesn't need such large idmap, it would stick to the host
> configured levels.
>

I guess we could. Is there a benefit to keeping TCR.T0SZ as high as
possible if you are adding the additional level anyway?
It only costs us one page regardless, and only if the runtime check
shows that system RAM exceeds VA_BITS

>> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
>> index a9eee33dfa62..641ce0574999 100644
>> --- a/arch/arm64/include/asm/mmu_context.h
>> +++ b/arch/arm64/include/asm/mmu_context.h
>> @@ -64,6 +64,44 @@ static inline void cpu_set_reserved_ttbr0(void)
>>       : "r" (ttbr));
>>  }
>>
>> +/*
>> + * TCR.T0SZ value to use when the ID map is active. Usually equals
>> + * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
>> + * physical memory, in which case it will be smaller.
>> + */
>> +extern u64 idmap_t0sz;
>> +
>> +static inline void __cpu_set_tcr_t0sz(u64 t0sz)
>> +{
>> +     unsigned long tcr;
>> +
>> +     if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
>> +         && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
>> +             asm volatile(
>> +             "       mrs     %0, tcr_el1             ;"
>> +             "       bfi     %0, %1, #%2, #%3        ;"
>> +             "       msr     tcr_el1, %0             ;"
>> +             "       isb"
>> +             : "=&r" (tcr)
>> +             : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
>> +}
>
> We need some TLB invalidation here (or in the functions below) as the
> TCR bits are allowed to be cached in the TLB. I think in general it
> should be:
>
>         cpu_set_reserved_ttbr0();
>         flush_tlb_all();        /* any trace of previous T0SZ */
>         cpu_set_(idmap|default)_tcr_t0sz();
>

OK

>> +/*
>> + * Set TCR.T0SZ to the value appropriate for activating the identity map.
>> + */
>> +static inline void cpu_set_idmap_tcr_t0sz(void)
>> +{
>> +     __cpu_set_tcr_t0sz(idmap_t0sz);
>> +}
>> +
>> +/*
>> + * Set TCR.T0SZ to its default value (based on VA_BITS)
>> + */
>> +static inline void cpu_set_default_tcr_t0sz(void)
>> +{
>> +     __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
>> +}
>> +
>
> So flush_tlb_all() could be at the beginning of these functions.
>
>> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
>> index 22b16232bd60..3d02b1869eb8 100644
>> --- a/arch/arm64/include/asm/page.h
>> +++ b/arch/arm64/include/asm/page.h
>> @@ -33,7 +33,9 @@
>>   * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
>>   * map the kernel. With the 64K page configuration, swapper and idmap need to
>>   * map to pte level. The swapper also maps the FDT (see __create_page_tables
>> - * for more information).
>> + * for more information). Note that the number of ID map translation levels
>> + * could be increased on the fly if system RAM is out of reach for the default
>> + * VA range, so 3 pages are reserved in all cases.
>>   */
>>  #ifdef CONFIG_ARM64_64K_PAGES
>>  #define SWAPPER_PGTABLE_LEVELS       (CONFIG_ARM64_PGTABLE_LEVELS)
>> @@ -42,7 +44,7 @@
>>  #endif
>>
>>  #define SWAPPER_DIR_SIZE     (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
>> -#define IDMAP_DIR_SIZE               (SWAPPER_DIR_SIZE)
>> +#define IDMAP_DIR_SIZE               (3 * PAGE_SIZE)
>
> That's fine, we make this permanent.
>
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 8ce88e08c030..8e1778e7638e 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -387,6 +387,26 @@ __create_page_tables:
>>       mov     x0, x25                         // idmap_pg_dir
>>       ldr     x3, =KERNEL_START
>>       add     x3, x3, x28                     // __pa(KERNEL_START)
>> +
>> +#ifndef CONFIG_ARM64_VA_BITS_48
>> +#define EXTRA_SHIFT  (PGDIR_SHIFT + PAGE_SHIFT - 3)
>> +     /*
>> +      * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
>> +      * created that covers system RAM if that is located sufficiently high
>> +      * in the physical address space. So for the ID map, use the entire
>> +      * available virtual range in that case.
>> +      */
>> +     lsr     x5, x3, #VA_BITS
>> +     cbz     x5, 1f
>> +
>> +     adrp    x6, idmap_t0sz
>> +     mov     x5, #TCR_T0SZ(MAX_VA_BITS)
>> +     str     x5, [x6, #:lo12:idmap_t0sz]
>
> Could we use memstart_addr (it's probably available in x24 here) to
> calculate the idmap_t0sz? We get get the ilog2(memstart_addr +
> KERNEL_END - PAGE_OFFSET) using clz.
>

Yes, but same question as above.

>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index 328b8ce4b007..606005101020 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -150,6 +150,7 @@ asmlinkage void secondary_start_kernel(void)
>>        * point to zero page to avoid speculatively fetching new entries.
>>        */
>>       cpu_set_reserved_ttbr0();
>> +     cpu_set_default_tcr_t0sz();
>>       flush_tlb_all();
>
> If you avoid the flush_tlb_all() in cpu_set_default_tcr_t0sz(), you
> should place the latter after the TLB invalidation.
>
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index c6daaf6c6f97..dffa1d05a101 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -40,6 +40,8 @@
>>
>>  #include "mm.h"
>>
>> +u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
>> +
>>  /*
>>   * Empty_zero_page is a special page that is used for zero-initialized data
>>   * and COW.
>> @@ -453,6 +455,7 @@ void __init paging_init(void)
>>        * point to zero page to avoid speculatively fetching new entries.
>>        */
>>       cpu_set_reserved_ttbr0();
>> +     cpu_set_default_tcr_t0sz();
>>       flush_tlb_all();
>>  }
>
> Same here.
>
>> @@ -461,6 +464,8 @@ void __init paging_init(void)
>>   */
>>  void setup_mm_for_reboot(void)
>>  {
>> +     cpu_set_reserved_ttbr0();
>> +     cpu_set_idmap_tcr_t0sz();
>>       cpu_switch_mm(idmap_pg_dir, &init_mm);
>>       flush_tlb_all();
>>  }
>
> And I think here we should move the flush_tlb_all() just after
> cpu_set_reserved_ttbr0(). Since this would point to the zero page, we
> won't get any new allocations, so I don't think we would need another
> TLB invalidation after cpu_switch_mm().
>

Thanks,
Ard.
Ard Biesheuvel Feb. 24, 2015, 2:53 p.m. UTC | #2
On 24 February 2015 at 14:43, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Tue, Feb 24, 2015 at 12:18:26PM +0000, Ard Biesheuvel wrote:
>> On 24 February 2015 at 12:03, Catalin Marinas <catalin.marinas@arm.com> wrote:
>> > On Tue, Feb 24, 2015 at 10:55:42AM +0000, Ard Biesheuvel wrote:
>> >> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
>> >> index f800d45ea226..19f6297a472b 100644
>> >> --- a/arch/arm64/include/asm/memory.h
>> >> +++ b/arch/arm64/include/asm/memory.h
>> >> @@ -42,12 +42,14 @@
>> >>   * PAGE_OFFSET - the virtual address of the start of the kernel image (top
>> >>   *            (VA_BITS - 1))
>> >>   * VA_BITS - the maximum number of bits for virtual addresses.
>> >> + * MAX_VA_BITS - architectural max value for VA_BITS
>> >>   * TASK_SIZE - the maximum size of a user space task.
>> >>   * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
>> >>   * The module space lives between the addresses given by TASK_SIZE
>> >>   * and PAGE_OFFSET - it must be within 128MB of the kernel text.
>> >>   */
>> >>  #define VA_BITS                      (CONFIG_ARM64_VA_BITS)
>> >> +#define MAX_VA_BITS          48
>> >
>> > Can we determine this dynamically based on the phys address? So on
>> > hardware that doesn't need such large idmap, it would stick to the host
>> > configured levels.
>>
>> I guess we could. Is there a benefit to keeping TCR.T0SZ as high as
>> possible if you are adding the additional level anyway?
>> It only costs us one page regardless, and only if the runtime check
>> shows that system RAM exceeds VA_BITS
>
> It's not the page that we may waste in the idmap table but higher T0SZ
> implies deeper translation tables, so an additional page table walk
> during power up/down sequences.
>

OK, but the patch only modifies T0SZ from the default if the system
RAM is observed to be out of reach at runtime, and so MAX_VA_BITS is
only used in that case. For both 4k and 64k pages, this will configure
one additional level of translation. Does it still matter then which
exact T0SZ value is used between the possible values that all
correspond with 4 levels, for instance?

I think it is cleaner to use the PA of KERNEL_END, as you suggest, so
I am removing MAX_VA_BITS anyway, but I am just curious.

> We could populate the idmap table as maximum number of levels
> permanently (based on MAX_VA_BITS) but set T0SZ according to the SoC
> needs and TTBR0_EL1 may need to skip the first page of the original
> idmap (we do something similar in arch/arm/mm/proc-v7-3level.S with
> TTBR1_OFFSET since for 1GB of kernel linear mapping we only need 2
> levels but swapper_pg_dir is always 3).
>
diff mbox

Patch

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..c2da529bb7bd 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1532,6 +1532,12 @@  int kvm_mmu_init(void)
 			 (unsigned long)phys_base);
 	}
 
+	if ((hyp_idmap_start >> PGDIR_SHIFT) >= PTRS_PER_PGD) {
+		kvm_err("Couldn't identity map HYP init page (PA exceeds VA range)\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
 	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
 	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
 
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f800d45ea226..19f6297a472b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -42,12 +42,14 @@ 
  * PAGE_OFFSET - the virtual address of the start of the kernel image (top
  *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
+ * MAX_VA_BITS - architectural max value for VA_BITS
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
+#define MAX_VA_BITS		48
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END		(PAGE_OFFSET)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..641ce0574999 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,44 @@  static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48)
+	    && unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)))
+		asm volatile(
+		"	mrs	%0, tcr_el1		;"
+		"	bfi	%0, %1, #%2, #%3	;"
+		"	msr	tcr_el1, %0		;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 22b16232bd60..3d02b1869eb8 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@ 
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@ 
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5f930cc9ea83..847e864202cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@ 
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8ce88e08c030..8e1778e7638e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -387,6 +387,26 @@  __create_page_tables:
 	mov	x0, x25				// idmap_pg_dir
 	ldr	x3, =KERNEL_START
 	add	x3, x3, x28			// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use the entire
+	 * available virtual range in that case.
+	 */
+	lsr	x5, x3, #VA_BITS
+	cbz	x5, 1f
+
+	adrp	x6, idmap_t0sz
+	mov	x5, #TCR_T0SZ(MAX_VA_BITS)
+	str	x5, [x6, #:lo12:idmap_t0sz]
+
+	create_table_entry x0, x3, EXTRA_SHIFT, PTRS_PER_PGD, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
 	ldr	x6, =KERNEL_END
 	mov	x5, x3				// __pa(KERNEL_START)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..606005101020 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -150,6 +150,7 @@  asmlinkage void secondary_start_kernel(void)
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
+	cpu_set_default_tcr_t0sz();
 	flush_tlb_all();
 
 	preempt_disable();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c6daaf6c6f97..dffa1d05a101 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@ 
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -453,6 +455,7 @@  void __init paging_init(void)
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
+	cpu_set_default_tcr_t0sz();
 	flush_tlb_all();
 }
 
@@ -461,6 +464,8 @@  void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
+	cpu_set_reserved_ttbr0();
+	cpu_set_idmap_tcr_t0sz();
 	cpu_switch_mm(idmap_pg_dir, &init_mm);
 	flush_tlb_all();
 }
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..c17fdd6a19bc 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,14 @@ 
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	adrp	\tmpreg, idmap_t0sz
+	ldr	\tmpreg, [\tmpreg, #:lo12:idmap_t0sz]
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@  ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@  ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.