diff mbox

[v5sub1,7/8] arm64: move kernel image to base of vmalloc area

Message ID 1454324093-15998-8-git-send-email-ard.biesheuvel@linaro.org
State Superseded
Headers show

Commit Message

Ard Biesheuvel Feb. 1, 2016, 10:54 a.m. UTC
This moves the module area to right before the vmalloc area, and
moves the kernel image to the base of the vmalloc area. This is
an intermediate step towards implementing KASLR, which allows the
kernel image to be located anywhere in the vmalloc area.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

---
 arch/arm64/include/asm/kasan.h   |  2 +-
 arch/arm64/include/asm/memory.h  | 21 +++--
 arch/arm64/include/asm/pgtable.h | 10 +-
 arch/arm64/mm/dump.c             | 12 +--
 arch/arm64/mm/init.c             | 23 ++---
 arch/arm64/mm/kasan_init.c       | 31 ++++++-
 arch/arm64/mm/mmu.c              | 97 +++++++++++++-------
 7 files changed, 129 insertions(+), 67 deletions(-)

-- 
2.5.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

Comments

Mark Rutland Feb. 1, 2016, 2:32 p.m. UTC | #1
On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:
> This moves the module area to right before the vmalloc area, and

> moves the kernel image to the base of the vmalloc area. This is

> an intermediate step towards implementing KASLR, which allows the

> kernel image to be located anywhere in the vmalloc area.

> 

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


With the fix for the issue Catalin spotted:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>


Mark.

> ---

>  arch/arm64/include/asm/kasan.h   |  2 +-

>  arch/arm64/include/asm/memory.h  | 21 +++--

>  arch/arm64/include/asm/pgtable.h | 10 +-

>  arch/arm64/mm/dump.c             | 12 +--

>  arch/arm64/mm/init.c             | 23 ++---

>  arch/arm64/mm/kasan_init.c       | 31 ++++++-

>  arch/arm64/mm/mmu.c              | 97 +++++++++++++-------

>  7 files changed, 129 insertions(+), 67 deletions(-)

> 

> diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h

> index de0d21211c34..71ad0f93eb71 100644

> --- a/arch/arm64/include/asm/kasan.h

> +++ b/arch/arm64/include/asm/kasan.h

> @@ -14,7 +14,7 @@

>   * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.

>   */

>  #define KASAN_SHADOW_START      (VA_START)

> -#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))

> +#define KASAN_SHADOW_END        (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)

>  

>  /*

>   * This value is used to map an address to the corresponding shadow

> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h

> index aebc739f5a11..4388651d1f0d 100644

> --- a/arch/arm64/include/asm/memory.h

> +++ b/arch/arm64/include/asm/memory.h

> @@ -45,16 +45,15 @@

>   * VA_START - the first kernel virtual address.

>   * TASK_SIZE - the maximum size of a user space task.

>   * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.

> - * The module space lives between the addresses given by TASK_SIZE

> - * and PAGE_OFFSET - it must be within 128MB of the kernel text.

>   */

>  #define VA_BITS			(CONFIG_ARM64_VA_BITS)

>  #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)

>  #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))

> -#define KIMAGE_VADDR		(PAGE_OFFSET)

> -#define MODULES_END		(KIMAGE_VADDR)

> -#define MODULES_VADDR		(MODULES_END - SZ_64M)

> -#define PCI_IO_END		(MODULES_VADDR - SZ_2M)

> +#define KIMAGE_VADDR		(MODULES_END)

> +#define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)

> +#define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)

> +#define MODULES_VSIZE		(SZ_64M)

> +#define PCI_IO_END		(PAGE_OFFSET - SZ_2M)

>  #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)

>  #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)

>  #define TASK_SIZE_64		(UL(1) << VA_BITS)

> @@ -72,6 +71,16 @@

>  #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))

>  

>  /*

> + * The size of the KASAN shadow region. This should be 1/8th of the

> + * size of the entire kernel virtual address space.

> + */

> +#ifdef CONFIG_KASAN

> +#define KASAN_SHADOW_SIZE	(UL(1) << (VA_BITS - 3))

> +#else

> +#define KASAN_SHADOW_SIZE	(0)

> +#endif

> +

> +/*

>   * Physical vs virtual RAM address space conversion.  These are

>   * private definitions which should NOT be used outside memory.h

>   * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.

> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h

> index 87355408d448..a440f5a85d08 100644

> --- a/arch/arm64/include/asm/pgtable.h

> +++ b/arch/arm64/include/asm/pgtable.h

> @@ -36,19 +36,13 @@

>   *

>   * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array

>   *	(rounded up to PUD_SIZE).

> - * VMALLOC_START: beginning of the kernel VA space

> + * VMALLOC_START: beginning of the kernel vmalloc space

>   * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,

>   *	fixed mappings and modules

>   */

>  #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)

>  

> -#ifndef CONFIG_KASAN

> -#define VMALLOC_START		(VA_START)

> -#else

> -#include <asm/kasan.h>

> -#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)

> -#endif

> -

> +#define VMALLOC_START		(MODULES_END)

>  #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)

>  

>  #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))

> diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c

> index 0adbebbc2803..e83ffb00560c 100644

> --- a/arch/arm64/mm/dump.c

> +++ b/arch/arm64/mm/dump.c

> @@ -35,7 +35,9 @@ struct addr_marker {

>  };

>  

>  enum address_markers_idx {

> -	VMALLOC_START_NR = 0,

> +	MODULES_START_NR = 0,

> +	MODULES_END_NR,

> +	VMALLOC_START_NR,

>  	VMALLOC_END_NR,

>  #ifdef CONFIG_SPARSEMEM_VMEMMAP

>  	VMEMMAP_START_NR,

> @@ -45,12 +47,12 @@ enum address_markers_idx {

>  	FIXADDR_END_NR,

>  	PCI_START_NR,

>  	PCI_END_NR,

> -	MODULES_START_NR,

> -	MODULES_END_NR,

>  	KERNEL_SPACE_NR,

>  };

>  

>  static struct addr_marker address_markers[] = {

> +	{ MODULES_VADDR,	"Modules start" },

> +	{ MODULES_END,		"Modules end" },

>  	{ VMALLOC_START,	"vmalloc() Area" },

>  	{ VMALLOC_END,		"vmalloc() End" },

>  #ifdef CONFIG_SPARSEMEM_VMEMMAP

> @@ -61,9 +63,7 @@ static struct addr_marker address_markers[] = {

>  	{ FIXADDR_TOP,		"Fixmap end" },

>  	{ PCI_IO_START,		"PCI I/O start" },

>  	{ PCI_IO_END,		"PCI I/O end" },

> -	{ MODULES_VADDR,	"Modules start" },

> -	{ MODULES_END,		"Modules end" },

> -	{ PAGE_OFFSET,		"Kernel Mapping" },

> +	{ PAGE_OFFSET,		"Linear Mapping" },

>  	{ -1,			NULL },

>  };

>  

> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

> index f3b061e67bfe..1d627cd8121c 100644

> --- a/arch/arm64/mm/init.c

> +++ b/arch/arm64/mm/init.c

> @@ -36,6 +36,7 @@

>  #include <linux/swiotlb.h>

>  

>  #include <asm/fixmap.h>

> +#include <asm/kasan.h>

>  #include <asm/memory.h>

>  #include <asm/sections.h>

>  #include <asm/setup.h>

> @@ -302,22 +303,26 @@ void __init mem_init(void)

>  #ifdef CONFIG_KASAN

>  		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"

>  #endif

> +		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"

>  		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"

> +		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"

> +		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"

> +		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"

>  #ifdef CONFIG_SPARSEMEM_VMEMMAP

>  		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"

>  		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"

>  #endif

>  		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"

>  		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"

> -		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"

> -		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"

> -		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"

> -		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"

> -		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",

> +		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",

>  #ifdef CONFIG_KASAN

>  		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),

>  #endif

> +		  MLM(MODULES_VADDR, MODULES_END),

>  		  MLG(VMALLOC_START, VMALLOC_END),

> +		  MLK_ROUNDUP(__init_begin, __init_end),

> +		  MLK_ROUNDUP(_text, _etext),

> +		  MLK_ROUNDUP(_sdata, _edata),

>  #ifdef CONFIG_SPARSEMEM_VMEMMAP

>  		  MLG((unsigned long)vmemmap,

>  		      (unsigned long)vmemmap + VMEMMAP_SIZE),

> @@ -326,11 +331,7 @@ void __init mem_init(void)

>  #endif

>  		  MLK(FIXADDR_START, FIXADDR_TOP),

>  		  MLM(PCI_IO_START, PCI_IO_END),

> -		  MLM(MODULES_VADDR, MODULES_END),

> -		  MLM(PAGE_OFFSET, (unsigned long)high_memory),

> -		  MLK_ROUNDUP(__init_begin, __init_end),

> -		  MLK_ROUNDUP(_text, _etext),

> -		  MLK_ROUNDUP(_sdata, _edata));

> +		  MLM(PAGE_OFFSET, (unsigned long)high_memory));

>  

>  #undef MLK

>  #undef MLM

> @@ -358,8 +359,8 @@ void __init mem_init(void)

>  

>  void free_initmem(void)

>  {

> -	fixup_init();

>  	free_initmem_default(0);

> +	fixup_init();

>  }

>  

>  #ifdef CONFIG_BLK_DEV_INITRD

> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c

> index cc569a38bc76..66c246871d2e 100644

> --- a/arch/arm64/mm/kasan_init.c

> +++ b/arch/arm64/mm/kasan_init.c

> @@ -17,9 +17,11 @@

>  #include <linux/start_kernel.h>

>  

>  #include <asm/mmu_context.h>

> +#include <asm/kernel-pgtable.h>

>  #include <asm/page.h>

>  #include <asm/pgalloc.h>

>  #include <asm/pgtable.h>

> +#include <asm/sections.h>

>  #include <asm/tlbflush.h>

>  

>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);

> @@ -33,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,

>  	if (pmd_none(*pmd))

>  		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);

>  

> -	pte = pte_offset_kernel(pmd, addr);

> +	pte = pte_offset_kimg(pmd, addr);

>  	do {

>  		next = addr + PAGE_SIZE;

>  		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),

> @@ -51,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,

>  	if (pud_none(*pud))

>  		pud_populate(&init_mm, pud, kasan_zero_pmd);

>  

> -	pmd = pmd_offset(pud, addr);

> +	pmd = pmd_offset_kimg(pud, addr);

>  	do {

>  		next = pmd_addr_end(addr, end);

>  		kasan_early_pte_populate(pmd, addr, next);

> @@ -68,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,

>  	if (pgd_none(*pgd))

>  		pgd_populate(&init_mm, pgd, kasan_zero_pud);

>  

> -	pud = pud_offset(pgd, addr);

> +	pud = pud_offset_kimg(pgd, addr);

>  	do {

>  		next = pud_addr_end(addr, end);

>  		kasan_early_pmd_populate(pud, addr, next);

> @@ -126,9 +128,13 @@ static void __init clear_pgds(unsigned long start,

>  

>  void __init kasan_init(void)

>  {

> +	u64 kimg_shadow_start, kimg_shadow_end;

>  	struct memblock_region *reg;

>  	int i;

>  

> +	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);

> +	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);

> +

>  	/*

>  	 * We are going to perform proper setup of shadow memory.

>  	 * At first we should unmap early shadow (clear_pgds() call bellow).

> @@ -142,8 +148,25 @@ void __init kasan_init(void)

>  

>  	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);

>  

> +	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);

> +

> +	/*

> +	 * vmemmap_populate() has populated the shadow region that covers the

> +	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round

> +	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent

> +	 * kasan_populate_zero_shadow() from replacing the PMD block mappings

> +	 * with PMD table mappings at the edges of the shadow region for the

> +	 * kernel image.

> +	 */

> +	if (ARM64_SWAPPER_USES_SECTION_MAPS) {

> +		kimg_shadow_start = round_down(kimg_shadow_start,

> +					       SWAPPER_BLOCK_SIZE);

> +		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);

> +	}

>  	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,

> -			kasan_mem_to_shadow((void *)MODULES_VADDR));

> +				   (void *)kimg_shadow_start);

> +	kasan_populate_zero_shadow((void *)kimg_shadow_end,

> +				   kasan_mem_to_shadow((void *)PAGE_OFFSET));

>  

>  	for_each_memblock(memory, reg) {

>  		void *start = (void *)__phys_to_virt(reg->base);

> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c

> index b84915723ea0..4c4b15932963 100644

> --- a/arch/arm64/mm/mmu.c

> +++ b/arch/arm64/mm/mmu.c

> @@ -53,6 +53,10 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS);

>  unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;

>  EXPORT_SYMBOL(empty_zero_page);

>  

> +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;

> +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;

> +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;

> +

>  pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,

>  			      unsigned long size, pgprot_t vma_prot)

>  {

> @@ -349,14 +353,14 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end

>  {

>  

>  	unsigned long kernel_start = __pa(_stext);

> -	unsigned long kernel_end = __pa(_end);

> +	unsigned long kernel_end = __pa(_etext);

>  

>  	/*

> -	 * The kernel itself is mapped at page granularity. Map all other

> -	 * memory, making sure we don't overwrite the existing kernel mappings.

> +	 * Take care not to create a writable alias for the

> +	 * read-only text and rodata sections of the kernel image.

>  	 */

>  

> -	/* No overlap with the kernel. */

> +	/* No overlap with the kernel text */

>  	if (end < kernel_start || start >= kernel_end) {

>  		__create_pgd_mapping(pgd, start, __phys_to_virt(start),

>  				     end - start, PAGE_KERNEL,

> @@ -365,7 +369,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end

>  	}

>  

>  	/*

> -	 * This block overlaps the kernel mapping. Map the portion(s) which

> +	 * This block overlaps the kernel text mapping. Map the portion(s) which

>  	 * don't overlap.

>  	 */

>  	if (start < kernel_start)

> @@ -398,25 +402,28 @@ static void __init map_mem(pgd_t *pgd)

>  	}

>  }

>  

> -#ifdef CONFIG_DEBUG_RODATA

>  void mark_rodata_ro(void)

>  {

> +	if (!IS_ENABLED(CONFIG_DEBUG_RODATA))

> +		return;

> +

>  	create_mapping_late(__pa(_stext), (unsigned long)_stext,

>  				(unsigned long)_etext - (unsigned long)_stext,

>  				PAGE_KERNEL_ROX);

> -

>  }

> -#endif

>  

>  void fixup_init(void)

>  {

> -	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,

> -			(unsigned long)__init_end - (unsigned long)__init_begin,

> -			PAGE_KERNEL);

> +	/*

> +	 * Unmap the __init region but leave the VM area in place. This

> +	 * prevents the region from being reused for kernel modules, which

> +	 * is not supported by kallsyms.

> +	 */

> +	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));

>  }

>  

>  static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,

> -				    pgprot_t prot)

> +				    pgprot_t prot, struct vm_struct *vma)

>  {

>  	phys_addr_t pa_start = __pa(va_start);

>  	unsigned long size = va_end - va_start;

> @@ -426,6 +433,14 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,

>  

>  	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,

>  			     early_pgtable_alloc);

> +

> +	vma->addr	= va_start;

> +	vma->phys_addr	= pa_start;

> +	vma->size	= size;

> +	vma->flags	= VM_MAP;

> +	vma->caller	= map_kernel_chunk;

> +

> +	vm_area_add_early(vma);

>  }

>  

>  /*

> @@ -433,17 +448,35 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,

>   */

>  static void __init map_kernel(pgd_t *pgd)

>  {

> +	static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data;

>  

> -	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);

> -	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);

> -	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);

> +	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);

> +	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,

> +			 &vmlinux_init);

> +	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);

>  

> -	/*

> -	 * The fixmap falls in a separate pgd to the kernel, and doesn't live

> -	 * in the carveout for the swapper_pg_dir. We can simply re-use the

> -	 * existing dir for the fixmap.

> -	 */

> -	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));

> +	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {

> +		/*

> +		 * The fixmap falls in a separate pgd to the kernel, and doesn't

> +		 * live in the carveout for the swapper_pg_dir. We can simply

> +		 * re-use the existing dir for the fixmap.

> +		 */

> +		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),

> +			*pgd_offset_k(FIXADDR_START));

> +	} else if (CONFIG_PGTABLE_LEVELS > 3) {

> +		/*

> +		 * The fixmap shares its top level pgd entry with the kernel

> +		 * mapping. This can really only occur when we are running

> +		 * with 16k/4 levels, so we can simply reuse the pud level

> +		 * entry instead.

> +		 */

> +		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));

> +		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),

> +			__pud(__pa(bm_pmd) | PUD_TYPE_TABLE));

> +		pud_clear_fixmap();

> +	} else {

> +		BUG();

> +	}

>  

>  	kasan_copy_shadow(pgd);

>  }

> @@ -569,14 +602,6 @@ void vmemmap_free(unsigned long start, unsigned long end)

>  }

>  #endif	/* CONFIG_SPARSEMEM_VMEMMAP */

>  

> -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;

> -#if CONFIG_PGTABLE_LEVELS > 2

> -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;

> -#endif

> -#if CONFIG_PGTABLE_LEVELS > 3

> -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;

> -#endif

> -

>  static inline pud_t * fixmap_pud(unsigned long addr)

>  {

>  	pgd_t *pgd = pgd_offset_k(addr);

> @@ -608,8 +633,18 @@ void __init early_fixmap_init(void)

>  	unsigned long addr = FIXADDR_START;

>  

>  	pgd = pgd_offset_k(addr);

> -	pgd_populate(&init_mm, pgd, bm_pud);

> -	pud = fixmap_pud(addr);

> +	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {

> +		/*

> +		 * We only end up here if the kernel mapping and the fixmap

> +		 * share the top level pgd entry, which should only happen on

> +		 * 16k/4 levels configurations.

> +		 */

> +		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));

> +		pud = pud_offset_kimg(pgd, addr);

> +	} else {

> +		pgd_populate(&init_mm, pgd, bm_pud);

> +		pud = fixmap_pud(addr);

> +	}

>  	pud_populate(&init_mm, pud, bm_pmd);

>  	pmd = fixmap_pmd(addr);

>  	pmd_populate_kernel(&init_mm, pmd, bm_pte);

> -- 

> 2.5.0

> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Catalin Marinas Feb. 12, 2016, 2:58 p.m. UTC | #2
Hi Ard,

On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:
> This moves the module area to right before the vmalloc area, and

> moves the kernel image to the base of the vmalloc area. This is

> an intermediate step towards implementing KASLR, which allows the

> kernel image to be located anywhere in the vmalloc area.

> 

> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


This patch is causing lots of KASAN warnings on Juno (interestingly, it
doesn't seem to trigger on Seattle, though we only tried for-next/core).
I pushed the branch that I'm currently using here:

git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/kernmap


A typical error (though its place varies based on the config options,
kernel layout):

BUG: KASAN: stack-out-of-bounds in clockevents_program_event+0x28/0x1b0 at addr ffffffc936257cc8
Read of size 8 by task swapper/2/0
page:ffffffbde6d895c0 count:0 mapcount:0 mapping:          (null) index:0x0
flags: 0x4000000000000000()
page dumped because: kasan: bad access detected
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.5.0-rc1+ #130
Hardware name: Juno (DT)
Call trace:
[<ffffff900408b590>] dump_backtrace+0x0/0x258
[<ffffff900408b7fc>] show_stack+0x14/0x20
[<ffffff900448789c>] dump_stack+0xac/0x100
[<ffffff9004224f3c>] kasan_report_error+0x544/0x570
[<ffffff9004225328>] kasan_report+0x40/0x48
[<ffffff9004223c58>] __asan_load8+0x60/0x78
[<ffffff90041596f0>] clockevents_program_event+0x28/0x1b0
[<ffffff900415c63c>] tick_program_event+0x74/0xb8
[<ffffff9004148944>] __remove_hrtimer+0xcc/0x100
[<ffffff9004148f0c>] hrtimer_start_range_ns+0x3f4/0x538
[<ffffff900415d450>] __tick_nohz_idle_enter+0x558/0x590
[<ffffff900415d74c>] tick_nohz_idle_enter+0x44/0x78
[<ffffff900411fcc8>] cpu_startup_entry+0x48/0x2c0
[<ffffff9004091f58>] secondary_start_kernel+0x208/0x278
[<0000000080082aac>] 0x80082aac
Memory state around the buggy address:
 ffffffc936257b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffffc936257c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1
>ffffffc936257c80: f1 f1 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00

                                              ^
 ffffffc936257d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffffc936257d80: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1


And some additional info from the kernel boot:

Processing EFI memory map:
  0x000008000000-0x00000bffffff [Memory Mapped I/O  |RUN|  |  |  |  |  |   |  |  |  |UC]
  0x00001c170000-0x00001c170fff [Memory Mapped I/O  |RUN|  |  |  |  |  |   |  |  |  |UC]
  0x000080000000-0x00008000ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x000080010000-0x00008007ffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x000080080000-0x00008149ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0000814a0000-0x00009fdfffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x00009fe00000-0x00009fe0ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x00009fe10000-0x0000dfffffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0000e00f0000-0x0000febd5fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0000febd6000-0x0000febd9fff [ACPI Reclaim Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0000febda000-0x0000febdafff [ACPI Memory NVS    |   |  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0000febdb000-0x0000febdcfff [ACPI Reclaim Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0000febdd000-0x0000feffffff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x000880000000-0x0009f8794fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009f8795000-0x0009f8796fff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009f8797000-0x0009f9bb4fff [Loader Code        |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009f9bb5000-0x0009faf6efff [Boot Code          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009faf6f000-0x0009fafa9fff [Runtime Data       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0009fafaa000-0x0009ff2b1fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ff2b2000-0x0009ffb70fff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffb71000-0x0009ffb89fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffb8a000-0x0009ffb8dfff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffb8e000-0x0009ffb8efff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffb8f000-0x0009ffdddfff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffdde000-0x0009ffe76fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009ffe77000-0x0009fff6dfff [Boot Code          |   |  |  |  |  |  |   |WB|WT|WC|UC]
  0x0009fff6e000-0x0009fffaefff [Runtime Code       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0009fffaf000-0x0009ffffefff [Runtime Data       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*
  0x0009fffff000-0x0009ffffffff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]


Memory: 7068520K/8371264K available (10424K kernel code, 3464K rwdata, 5284K rodata, 1016K init, 380K bss, 1286360K reserved, 16384K cma-reserved)
Virtual kernel memory layout:
    kasan   : 0xffffff8000000000 - 0xffffff9000000000   (    64 GB)
    modules : 0xffffff9000000000 - 0xffffff9004000000   (    64 MB)
    vmalloc : 0xffffff9004000000 - 0xffffffbdbfff0000   (   182 GB)
      .init : 0xffffff9004fd9000 - 0xffffff90050d7000   (  1016 KB)
      .text : 0xffffff9004080000 - 0xffffff9004fd9000   ( 15716 KB)
      .data : 0xffffff90050d7000 - 0xffffff9005439200   (  3465 KB)
    vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000   (     8 GB maximum)
              0xffffffbdc2000000 - 0xffffffbde8000000   (   608 MB actual)
    fixed   : 0xffffffbffe7fd000 - 0xffffffbffec00000   (  4108 KB)
    PCI I/O : 0xffffffbffee00000 - 0xffffffbfffe00000   (    16 MB)
    memory  : 0xffffffc000000000 - 0xffffffc980000000   ( 38912 MB)

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Feb. 12, 2016, 3:02 p.m. UTC | #3
On 12 February 2016 at 15:58, Catalin Marinas <catalin.marinas@arm.com> wrote:
> Hi Ard,

>

> On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:

>> This moves the module area to right before the vmalloc area, and

>> moves the kernel image to the base of the vmalloc area. This is

>> an intermediate step towards implementing KASLR, which allows the

>> kernel image to be located anywhere in the vmalloc area.

>>

>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>

> This patch is causing lots of KASAN warnings on Juno (interestingly, it

> doesn't seem to trigger on Seattle, though we only tried for-next/core).

> I pushed the branch that I'm currently using here:

>

> git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/kernmap

>

>

> A typical error (though its place varies based on the config options,

> kernel layout):

>

> BUG: KASAN: stack-out-of-bounds in clockevents_program_event+0x28/0x1b0 at addr ffffffc936257cc8


Can you confirm that these are stack accesses? I was having similar
errors before, and I ended up creating the kasan zero page patch
because it turned out the kasan shadow page in question was aliased
and the stack writes were occurring elsewhere.


> Read of size 8 by task swapper/2/0

> page:ffffffbde6d895c0 count:0 mapcount:0 mapping:          (null) index:0x0

> flags: 0x4000000000000000()

> page dumped because: kasan: bad access detected

> CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.5.0-rc1+ #130

> Hardware name: Juno (DT)

> Call trace:

> [<ffffff900408b590>] dump_backtrace+0x0/0x258

> [<ffffff900408b7fc>] show_stack+0x14/0x20

> [<ffffff900448789c>] dump_stack+0xac/0x100

> [<ffffff9004224f3c>] kasan_report_error+0x544/0x570

> [<ffffff9004225328>] kasan_report+0x40/0x48

> [<ffffff9004223c58>] __asan_load8+0x60/0x78

> [<ffffff90041596f0>] clockevents_program_event+0x28/0x1b0

> [<ffffff900415c63c>] tick_program_event+0x74/0xb8

> [<ffffff9004148944>] __remove_hrtimer+0xcc/0x100

> [<ffffff9004148f0c>] hrtimer_start_range_ns+0x3f4/0x538

> [<ffffff900415d450>] __tick_nohz_idle_enter+0x558/0x590

> [<ffffff900415d74c>] tick_nohz_idle_enter+0x44/0x78

> [<ffffff900411fcc8>] cpu_startup_entry+0x48/0x2c0

> [<ffffff9004091f58>] secondary_start_kernel+0x208/0x278

> [<0000000080082aac>] 0x80082aac

> Memory state around the buggy address:

>  ffffffc936257b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

>  ffffffc936257c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1

>>ffffffc936257c80: f1 f1 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00

>                                               ^

>  ffffffc936257d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

>  ffffffc936257d80: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1

>

>

> And some additional info from the kernel boot:

>

> Processing EFI memory map:

>   0x000008000000-0x00000bffffff [Memory Mapped I/O  |RUN|  |  |  |  |  |   |  |  |  |UC]

>   0x00001c170000-0x00001c170fff [Memory Mapped I/O  |RUN|  |  |  |  |  |   |  |  |  |UC]

>   0x000080000000-0x00008000ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x000080010000-0x00008007ffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x000080080000-0x00008149ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0000814a0000-0x00009fdfffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x00009fe00000-0x00009fe0ffff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x00009fe10000-0x0000dfffffff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0000e00f0000-0x0000febd5fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0000febd6000-0x0000febd9fff [ACPI Reclaim Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0000febda000-0x0000febdafff [ACPI Memory NVS    |   |  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0000febdb000-0x0000febdcfff [ACPI Reclaim Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0000febdd000-0x0000feffffff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x000880000000-0x0009f8794fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009f8795000-0x0009f8796fff [Loader Data        |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009f8797000-0x0009f9bb4fff [Loader Code        |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009f9bb5000-0x0009faf6efff [Boot Code          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009faf6f000-0x0009fafa9fff [Runtime Data       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0009fafaa000-0x0009ff2b1fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ff2b2000-0x0009ffb70fff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffb71000-0x0009ffb89fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffb8a000-0x0009ffb8dfff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffb8e000-0x0009ffb8efff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffb8f000-0x0009ffdddfff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffdde000-0x0009ffe76fff [Conventional Memory|   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009ffe77000-0x0009fff6dfff [Boot Code          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>   0x0009fff6e000-0x0009fffaefff [Runtime Code       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0009fffaf000-0x0009ffffefff [Runtime Data       |RUN|  |  |  |  |  |   |WB|WT|WC|UC]*

>   0x0009fffff000-0x0009ffffffff [Boot Data          |   |  |  |  |  |  |   |WB|WT|WC|UC]

>

>

> Memory: 7068520K/8371264K available (10424K kernel code, 3464K rwdata, 5284K rodata, 1016K init, 380K bss, 1286360K reserved, 16384K cma-reserved)

> Virtual kernel memory layout:

>     kasan   : 0xffffff8000000000 - 0xffffff9000000000   (    64 GB)

>     modules : 0xffffff9000000000 - 0xffffff9004000000   (    64 MB)

>     vmalloc : 0xffffff9004000000 - 0xffffffbdbfff0000   (   182 GB)

>       .init : 0xffffff9004fd9000 - 0xffffff90050d7000   (  1016 KB)

>       .text : 0xffffff9004080000 - 0xffffff9004fd9000   ( 15716 KB)

>       .data : 0xffffff90050d7000 - 0xffffff9005439200   (  3465 KB)

>     vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000   (     8 GB maximum)

>               0xffffffbdc2000000 - 0xffffffbde8000000   (   608 MB actual)

>     fixed   : 0xffffffbffe7fd000 - 0xffffffbffec00000   (  4108 KB)

>     PCI I/O : 0xffffffbffee00000 - 0xffffffbfffe00000   (    16 MB)

>     memory  : 0xffffffc000000000 - 0xffffffc980000000   ( 38912 MB)

>

> --

> Catalin


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Catalin Marinas Feb. 12, 2016, 3:10 p.m. UTC | #4
On Fri, Feb 12, 2016 at 04:02:58PM +0100, Ard Biesheuvel wrote:
> On 12 February 2016 at 15:58, Catalin Marinas <catalin.marinas@arm.com> wrote:

> > Hi Ard,

> >

> > On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:

> >> This moves the module area to right before the vmalloc area, and

> >> moves the kernel image to the base of the vmalloc area. This is

> >> an intermediate step towards implementing KASLR, which allows the

> >> kernel image to be located anywhere in the vmalloc area.

> >>

> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> >

> > This patch is causing lots of KASAN warnings on Juno (interestingly, it

> > doesn't seem to trigger on Seattle, though we only tried for-next/core).

> > I pushed the branch that I'm currently using here:

> >

> > git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/kernmap

> >

> >

> > A typical error (though its place varies based on the config options,

> > kernel layout):

> >

> > BUG: KASAN: stack-out-of-bounds in clockevents_program_event+0x28/0x1b0 at addr ffffffc936257cc8

> 

> Can you confirm that these are stack accesses? I was having similar

> errors before, and I ended up creating the kasan zero page patch

> because it turned out the kasan shadow page in question was aliased

> and the stack writes were occurring elsewhere.


It's possible, we are looking into this. Is there any other patch I miss on
the above branch?

BTW, disabling CPU_IDLE, I get other errors:

WARNING: at /work/Linux/linux-2.6-aarch64/mm/vmalloc.c:135
Modules linked in:

CPU: 2 PID: 973 Comm: systemd-modules Tainted: G        W       4.5.0-rc1+ #131
Hardware name: Juno (DT)
task: ffffffc93448e200 ti: ffffffc9346ac000 task.ti: ffffffc9346ac000
PC is at vmap_page_range_noflush+0x240/0x2e8
LR is at vmap_page_range_noflush+0x16c/0x2e8
pc : [<ffffff90041fef78>] lr : [<ffffff90041feea4>] pstate: 20000145
sp : ffffffc9346af9b0
x29: ffffffc9346af9b0 x28: ffffff90050da000
x27: ffffffc001438008 x26: ffffffbde6d16440
x25: 0000004240000000 x24: ffffffc97ff3a000
x23: 0000000000000041 x22: ffffffc078e9e600
x21: ffffff8200002000 x20: ffffff8200001000
x19: 0000000000000000 x18: 00000000f3294c2f
x17: 00000000f7dc90fb x16: 0000000087b402ce
x15: ffffffffffffffff x14: ffffff0000000000
x13: ffffffffffffffff x12: 0000000000000028
x11: 0101010101010101 x10: 00000001801a001a
x9 : 0000000000000000 x8 : ffffff89268b2400
x7 : 0000000000000000 x6 : 000000000000003f
x5 : 0000000000000040 x4 : 0000000000000000
x3 : 0000000000000000 x2 : 1ffffff800287001
x1 : dfffff9000000000 x0 : 00e8000081439713

---[ end trace 8a78d7ad8d08d2a9 ]---
Call trace:
Exception stack(0xffffffc9346af790 to 0xffffffc9346af8b0)
f780:                                   0000000000000000 ffffff8200001000
f7a0: ffffffc9346af9b0 ffffff90041fef78 0000000020000145 000000000000003d
f7c0: 0000004240000000 ffffffc078e9f600 0000000041b58ab3 ffffff9004f0c370
f7e0: ffffff9004082608 ffffffc078e9f620 00000000024080c2 0000000000400000
f800: ffffffc9346afe70 ffffffc9346afe50 ffffffc9346af9b0 ffffffc93448e200
f820: ffffffc9346af830 ffffff900408b1b0 ffffffc9346af900 ffffff900408b228
f840: ffffffc9346ac000 ffffffc9346af9b0 ffffffc9346ac000 ffffffc078e9e600
f860: 0000000041b58ab3 ffffff9004f0c8a8 ffffff900408b080 000000010010000e
f880: ffffffc9346af9b0 0000000000000000 00e8000081439713 dfffff9000000000
f8a0: 1ffffff800287001 0000000000000000
[<ffffff90041fef78>] vmap_page_range_noflush+0x240/0x2e8
[<ffffff90041ff078>] map_vm_area+0x58/0x88
[<ffffff9004200400>] __vmalloc_node_range+0x2b8/0x350
[<ffffff9004224394>] kasan_module_alloc+0x64/0xb8
[<ffffff90040943f4>] module_alloc+0x5c/0xa0
[<ffffff9004169460>] load_module+0x1798/0x3098
[<ffffff900416b020>] SyS_finit_module+0xf8/0x108
[<ffffff9004085d30>] el0_svc_naked+0x24/0x28
vmalloc: allocation failure, allocated 4096 of 4096 bytes

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Feb. 12, 2016, 3:17 p.m. UTC | #5
On 12 February 2016 at 16:10, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Fri, Feb 12, 2016 at 04:02:58PM +0100, Ard Biesheuvel wrote:

>> On 12 February 2016 at 15:58, Catalin Marinas <catalin.marinas@arm.com> wrote:

>> > Hi Ard,

>> >

>> > On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:

>> >> This moves the module area to right before the vmalloc area, and

>> >> moves the kernel image to the base of the vmalloc area. This is

>> >> an intermediate step towards implementing KASLR, which allows the

>> >> kernel image to be located anywhere in the vmalloc area.

>> >>

>> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>> >

>> > This patch is causing lots of KASAN warnings on Juno (interestingly, it

>> > doesn't seem to trigger on Seattle, though we only tried for-next/core).

>> > I pushed the branch that I'm currently using here:

>> >

>> > git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/kernmap

>> >

>> >

>> > A typical error (though its place varies based on the config options,

>> > kernel layout):

>> >

>> > BUG: KASAN: stack-out-of-bounds in clockevents_program_event+0x28/0x1b0 at addr ffffffc936257cc8

>>

>> Can you confirm that these are stack accesses? I was having similar

>> errors before, and I ended up creating the kasan zero page patch

>> because it turned out the kasan shadow page in question was aliased

>> and the stack writes were occurring elsewhere.

>

> It's possible, we are looking into this. Is there any other patch I miss on

> the above branch?

>


I don't think so but I will check

> BTW, disabling CPU_IDLE, I get other errors:

>

> WARNING: at /work/Linux/linux-2.6-aarch64/mm/vmalloc.c:135

> Modules linked in:

>


Since this occurs in kasan_module_alloc(), I think this may be a
symptom of the same underlying issue, where the kernel VA space and
the projection onto the Kasan shadow area are somehow out of sync.

I will try to reproduce with the branch above.



> CPU: 2 PID: 973 Comm: systemd-modules Tainted: G        W       4.5.0-rc1+ #131

> Hardware name: Juno (DT)

> task: ffffffc93448e200 ti: ffffffc9346ac000 task.ti: ffffffc9346ac000

> PC is at vmap_page_range_noflush+0x240/0x2e8

> LR is at vmap_page_range_noflush+0x16c/0x2e8

> pc : [<ffffff90041fef78>] lr : [<ffffff90041feea4>] pstate: 20000145

> sp : ffffffc9346af9b0

> x29: ffffffc9346af9b0 x28: ffffff90050da000

> x27: ffffffc001438008 x26: ffffffbde6d16440

> x25: 0000004240000000 x24: ffffffc97ff3a000

> x23: 0000000000000041 x22: ffffffc078e9e600

> x21: ffffff8200002000 x20: ffffff8200001000

> x19: 0000000000000000 x18: 00000000f3294c2f

> x17: 00000000f7dc90fb x16: 0000000087b402ce

> x15: ffffffffffffffff x14: ffffff0000000000

> x13: ffffffffffffffff x12: 0000000000000028

> x11: 0101010101010101 x10: 00000001801a001a

> x9 : 0000000000000000 x8 : ffffff89268b2400

> x7 : 0000000000000000 x6 : 000000000000003f

> x5 : 0000000000000040 x4 : 0000000000000000

> x3 : 0000000000000000 x2 : 1ffffff800287001

> x1 : dfffff9000000000 x0 : 00e8000081439713

>

> ---[ end trace 8a78d7ad8d08d2a9 ]---

> Call trace:

> Exception stack(0xffffffc9346af790 to 0xffffffc9346af8b0)

> f780:                                   0000000000000000 ffffff8200001000

> f7a0: ffffffc9346af9b0 ffffff90041fef78 0000000020000145 000000000000003d

> f7c0: 0000004240000000 ffffffc078e9f600 0000000041b58ab3 ffffff9004f0c370

> f7e0: ffffff9004082608 ffffffc078e9f620 00000000024080c2 0000000000400000

> f800: ffffffc9346afe70 ffffffc9346afe50 ffffffc9346af9b0 ffffffc93448e200

> f820: ffffffc9346af830 ffffff900408b1b0 ffffffc9346af900 ffffff900408b228

> f840: ffffffc9346ac000 ffffffc9346af9b0 ffffffc9346ac000 ffffffc078e9e600

> f860: 0000000041b58ab3 ffffff9004f0c8a8 ffffff900408b080 000000010010000e

> f880: ffffffc9346af9b0 0000000000000000 00e8000081439713 dfffff9000000000

> f8a0: 1ffffff800287001 0000000000000000

> [<ffffff90041fef78>] vmap_page_range_noflush+0x240/0x2e8

> [<ffffff90041ff078>] map_vm_area+0x58/0x88

> [<ffffff9004200400>] __vmalloc_node_range+0x2b8/0x350

> [<ffffff9004224394>] kasan_module_alloc+0x64/0xb8

> [<ffffff90040943f4>] module_alloc+0x5c/0xa0

> [<ffffff9004169460>] load_module+0x1798/0x3098

> [<ffffff900416b020>] SyS_finit_module+0xf8/0x108

> [<ffffff9004085d30>] el0_svc_naked+0x24/0x28

> vmalloc: allocation failure, allocated 4096 of 4096 bytes

>

> --

> Catalin


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Catalin Marinas Feb. 12, 2016, 3:26 p.m. UTC | #6
On Fri, Feb 12, 2016 at 04:17:09PM +0100, Ard Biesheuvel wrote:
> On 12 February 2016 at 16:10, Catalin Marinas <catalin.marinas@arm.com> wrote:

> > On Fri, Feb 12, 2016 at 04:02:58PM +0100, Ard Biesheuvel wrote:

> >> On 12 February 2016 at 15:58, Catalin Marinas <catalin.marinas@arm.com> wrote:

> >> > On Mon, Feb 01, 2016 at 11:54:52AM +0100, Ard Biesheuvel wrote:

> >> >> This moves the module area to right before the vmalloc area, and

> >> >> moves the kernel image to the base of the vmalloc area. This is

> >> >> an intermediate step towards implementing KASLR, which allows the

> >> >> kernel image to be located anywhere in the vmalloc area.

> >> >>

> >> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> >> >

> >> > This patch is causing lots of KASAN warnings on Juno (interestingly, it

> >> > doesn't seem to trigger on Seattle, though we only tried for-next/core).

> >> > I pushed the branch that I'm currently using here:

> >> >

> >> > git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux for-next/kernmap

> >> >

> >> >

> >> > A typical error (though its place varies based on the config options,

> >> > kernel layout):

> >> >

> >> > BUG: KASAN: stack-out-of-bounds in clockevents_program_event+0x28/0x1b0 at addr ffffffc936257cc8

> >>

> >> Can you confirm that these are stack accesses? I was having similar

> >> errors before, and I ended up creating the kasan zero page patch

> >> because it turned out the kasan shadow page in question was aliased

> >> and the stack writes were occurring elsewhere.

> >

> > It's possible, we are looking into this. Is there any other patch I miss on

> > the above branch?

> 

> I don't think so but I will check


Commit 7b1af9795773 ("arm64: kasan: ensure that the KASAN zero page is
mapped read-only") was merged in -rc2 while the branch above is based on
-rc1. Anyway, I merged it into -rc2 and the errors are similar.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Feb. 12, 2016, 6:01 p.m. UTC | #7
On 12 February 2016 at 18:47, James Morse <james.morse@arm.com> wrote:
> Hi Ard,

>

> On 01/02/16 10:54, Ard Biesheuvel wrote:

>> This moves the module area to right before the vmalloc area, and

>> moves the kernel image to the base of the vmalloc area. This is

>> an intermediate step towards implementing KASLR, which allows the

>> kernel image to be located anywhere in the vmalloc area.

>

> I've rebased hibernate onto for-next/core, and this patch leads to the hibernate

> core code falling down a kernel shaped hole in the linear map.

>

> The hibernate code assumes that for zones returned by for_each_populated_zone(),

> if pfn_valid() says a page is present, then it is okay to access the page via

> page_address(pfn_to_page(pfn)). But for pfns that correspond to the kernel text,

> this is still returning an address in the linear map, which isn't mapped...

>

> I'm not sure what the correct fix is here.

> Should this sort of walk be valid?

>


I think the correct fix would be to mark the [_stext, _etext] interval
as NOMAP. That will also simplify the mapping routine where I now
check manually whether a memblock intersects that interval. And it
should make this particular piece of code behave.

However, you would still need to preserve the contents of the
interval, since the generic hibernate routines will not do that
anymore after this change.

I will experiment with this on Monday, and report back.

Thanks,
Ard.


>

> From include/linux/mm.h:

>> static __always_inline void *lowmem_page_address(const struct page *page)

>> {

>>       return __va(PFN_PHYS(page_to_pfn(page)));

>> }

>

>

> Suggestions welcome!

>

>

> Thanks,

>

> James


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index de0d21211c34..71ad0f93eb71 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -14,7 +14,7 @@ 
  * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
  */
 #define KASAN_SHADOW_START      (VA_START)
-#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
 
 /*
  * This value is used to map an address to the corresponding shadow
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index aebc739f5a11..4388651d1f0d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -45,16 +45,15 @@ 
  * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define KIMAGE_VADDR		(PAGE_OFFSET)
-#define MODULES_END		(KIMAGE_VADDR)
-#define MODULES_VADDR		(MODULES_END - SZ_64M)
-#define PCI_IO_END		(MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR		(MODULES_END)
+#define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE		(SZ_64M)
+#define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
@@ -72,6 +71,16 @@ 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
 /*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE	(UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE	(0)
+#endif
+
+/*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 87355408d448..a440f5a85d08 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -36,19 +36,13 @@ 
  *
  * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
  *	(rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
 
-#ifndef CONFIG_KASAN
-#define VMALLOC_START		(VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)
-#endif
-
+#define VMALLOC_START		(MODULES_END)
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0adbebbc2803..e83ffb00560c 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -35,7 +35,9 @@  struct addr_marker {
 };
 
 enum address_markers_idx {
-	VMALLOC_START_NR = 0,
+	MODULES_START_NR = 0,
+	MODULES_END_NR,
+	VMALLOC_START_NR,
 	VMALLOC_END_NR,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 	VMEMMAP_START_NR,
@@ -45,12 +47,12 @@  enum address_markers_idx {
 	FIXADDR_END_NR,
 	PCI_START_NR,
 	PCI_END_NR,
-	MODULES_START_NR,
-	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 
 static struct addr_marker address_markers[] = {
+	{ MODULES_VADDR,	"Modules start" },
+	{ MODULES_END,		"Modules end" },
 	{ VMALLOC_START,	"vmalloc() Area" },
 	{ VMALLOC_END,		"vmalloc() End" },
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +63,7 @@  static struct addr_marker address_markers[] = {
 	{ FIXADDR_TOP,		"Fixmap end" },
 	{ PCI_IO_START,		"PCI I/O start" },
 	{ PCI_IO_END,		"PCI I/O end" },
-	{ MODULES_VADDR,	"Modules start" },
-	{ MODULES_END,		"Modules end" },
-	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ PAGE_OFFSET,		"Linear Mapping" },
 	{ -1,			NULL },
 };
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e67bfe..1d627cd8121c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@ 
 #include <linux/swiotlb.h>
 
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -302,22 +303,26 @@  void __init mem_init(void)
 #ifdef CONFIG_KASAN
 		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #endif
+		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
 		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
 		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
 #endif
 		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
 		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 #ifdef CONFIG_KASAN
 		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
 #endif
+		  MLM(MODULES_VADDR, MODULES_END),
 		  MLG(VMALLOC_START, VMALLOC_END),
+		  MLK_ROUNDUP(__init_begin, __init_end),
+		  MLK_ROUNDUP(_text, _etext),
+		  MLK_ROUNDUP(_sdata, _edata),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG((unsigned long)vmemmap,
 		      (unsigned long)vmemmap + VMEMMAP_SIZE),
@@ -326,11 +331,7 @@  void __init mem_init(void)
 #endif
 		  MLK(FIXADDR_START, FIXADDR_TOP),
 		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(MODULES_VADDR, MODULES_END),
-		  MLM(PAGE_OFFSET, (unsigned long)high_memory),
-		  MLK_ROUNDUP(__init_begin, __init_end),
-		  MLK_ROUNDUP(_text, _etext),
-		  MLK_ROUNDUP(_sdata, _edata));
+		  MLM(PAGE_OFFSET, (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM
@@ -358,8 +359,8 @@  void __init mem_init(void)
 
 void free_initmem(void)
 {
-	fixup_init();
 	free_initmem_default(0);
+	fixup_init();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cc569a38bc76..66c246871d2e 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -17,9 +17,11 @@ 
 #include <linux/start_kernel.h>
 
 #include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -33,7 +35,7 @@  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 	if (pmd_none(*pmd))
 		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
 
-	pte = pte_offset_kernel(pmd, addr);
+	pte = pte_offset_kimg(pmd, addr);
 	do {
 		next = addr + PAGE_SIZE;
 		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -51,7 +53,7 @@  static void __init kasan_early_pmd_populate(pud_t *pud,
 	if (pud_none(*pud))
 		pud_populate(&init_mm, pud, kasan_zero_pmd);
 
-	pmd = pmd_offset(pud, addr);
+	pmd = pmd_offset_kimg(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		kasan_early_pte_populate(pmd, addr, next);
@@ -68,7 +70,7 @@  static void __init kasan_early_pud_populate(pgd_t *pgd,
 	if (pgd_none(*pgd))
 		pgd_populate(&init_mm, pgd, kasan_zero_pud);
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset_kimg(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		kasan_early_pmd_populate(pud, addr, next);
@@ -126,9 +128,13 @@  static void __init clear_pgds(unsigned long start,
 
 void __init kasan_init(void)
 {
+	u64 kimg_shadow_start, kimg_shadow_end;
 	struct memblock_region *reg;
 	int i;
 
+	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -142,8 +148,25 @@  void __init kasan_init(void)
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
+	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);
+
+	/*
+	 * vmemmap_populate() has populated the shadow region that covers the
+	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+	 * kasan_populate_zero_shadow() from replacing the PMD block mappings
+	 * with PMD table mappings at the edges of the shadow region for the
+	 * kernel image.
+	 */
+	if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+		kimg_shadow_start = round_down(kimg_shadow_start,
+					       SWAPPER_BLOCK_SIZE);
+		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+	}
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
-			kasan_mem_to_shadow((void *)MODULES_VADDR));
+				   (void *)kimg_shadow_start);
+	kasan_populate_zero_shadow((void *)kimg_shadow_end,
+				   kasan_mem_to_shadow((void *)PAGE_OFFSET));
 
 	for_each_memblock(memory, reg) {
 		void *start = (void *)__phys_to_virt(reg->base);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b84915723ea0..4c4b15932963 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -53,6 +53,10 @@  u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
@@ -349,14 +353,14 @@  static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 {
 
 	unsigned long kernel_start = __pa(_stext);
-	unsigned long kernel_end = __pa(_end);
+	unsigned long kernel_end = __pa(_etext);
 
 	/*
-	 * The kernel itself is mapped at page granularity. Map all other
-	 * memory, making sure we don't overwrite the existing kernel mappings.
+	 * Take care not to create a writable alias for the
+	 * read-only text and rodata sections of the kernel image.
 	 */
 
-	/* No overlap with the kernel. */
+	/* No overlap with the kernel text */
 	if (end < kernel_start || start >= kernel_end) {
 		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
 				     end - start, PAGE_KERNEL,
@@ -365,7 +369,7 @@  static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 	}
 
 	/*
-	 * This block overlaps the kernel mapping. Map the portion(s) which
+	 * This block overlaps the kernel text mapping. Map the portion(s) which
 	 * don't overlap.
 	 */
 	if (start < kernel_start)
@@ -398,25 +402,28 @@  static void __init map_mem(pgd_t *pgd)
 	}
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void)
 {
+	if (!IS_ENABLED(CONFIG_DEBUG_RODATA))
+		return;
+
 	create_mapping_late(__pa(_stext), (unsigned long)_stext,
 				(unsigned long)_etext - (unsigned long)_stext,
 				PAGE_KERNEL_ROX);
-
 }
-#endif
 
 void fixup_init(void)
 {
-	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
-			(unsigned long)__init_end - (unsigned long)__init_begin,
-			PAGE_KERNEL);
+	/*
+	 * Unmap the __init region but leave the VM area in place. This
+	 * prevents the region from being reused for kernel modules, which
+	 * is not supported by kallsyms.
+	 */
+	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
 static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
-				    pgprot_t prot)
+				    pgprot_t prot, struct vm_struct *vma)
 {
 	phys_addr_t pa_start = __pa(va_start);
 	unsigned long size = va_end - va_start;
@@ -426,6 +433,14 @@  static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
 
 	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
 			     early_pgtable_alloc);
+
+	vma->addr	= va_start;
+	vma->phys_addr	= pa_start;
+	vma->size	= size;
+	vma->flags	= VM_MAP;
+	vma->caller	= map_kernel_chunk;
+
+	vm_area_add_early(vma);
 }
 
 /*
@@ -433,17 +448,35 @@  static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
  */
 static void __init map_kernel(pgd_t *pgd)
 {
+	static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data;
 
-	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
+	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+			 &vmlinux_init);
+	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
 
-	/*
-	 * The fixmap falls in a separate pgd to the kernel, and doesn't live
-	 * in the carveout for the swapper_pg_dir. We can simply re-use the
-	 * existing dir for the fixmap.
-	 */
-	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
+	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+		/*
+		 * The fixmap falls in a separate pgd to the kernel, and doesn't
+		 * live in the carveout for the swapper_pg_dir. We can simply
+		 * re-use the existing dir for the fixmap.
+		 */
+		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+			*pgd_offset_k(FIXADDR_START));
+	} else if (CONFIG_PGTABLE_LEVELS > 3) {
+		/*
+		 * The fixmap shares its top level pgd entry with the kernel
+		 * mapping. This can really only occur when we are running
+		 * with 16k/4 levels, so we can simply reuse the pud level
+		 * entry instead.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+			__pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+		pud_clear_fixmap();
+	} else {
+		BUG();
+	}
 
 	kasan_copy_shadow(pgd);
 }
@@ -569,14 +602,6 @@  void vmemmap_free(unsigned long start, unsigned long end)
 }
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
@@ -608,8 +633,18 @@  void __init early_fixmap_init(void)
 	unsigned long addr = FIXADDR_START;
 
 	pgd = pgd_offset_k(addr);
-	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = fixmap_pud(addr);
+	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {
+		/*
+		 * We only end up here if the kernel mapping and the fixmap
+		 * share the top level pgd entry, which should only happen on
+		 * 16k/4 levels configurations.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		pud = pud_offset_kimg(pgd, addr);
+	} else {
+		pgd_populate(&init_mm, pgd, bm_pud);
+		pud = fixmap_pud(addr);
+	}
 	pud_populate(&init_mm, pud, bm_pmd);
 	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);