diff mbox series

[v2,1/4] arm64: export memblock_reserve()d regions via /proc/iomem

Message ID 20180619064424.6642-2-takahiro.akashi@linaro.org
State Superseded
Headers show
Series arm64: kexec,kdump: fix boot failures on acpi-only system | expand

Commit Message

AKASHI Takahiro June 19, 2018, 6:44 a.m. UTC
From: James Morse <james.morse@arm.com>


There has been some confusion around what is necessary to prevent kexec
overwriting important memory regions. memblock: reserve, or nomap?
Only memblock nomap regions are reported via /proc/iomem, kexec's
user-space doesn't know about memblock_reserve()d regions.

Until commit f56ab9a5b73ca ("efi/arm: Don't mark ACPI reclaim memory
as MEMBLOCK_NOMAP") the ACPI tables were nomap, now they are reserved
and thus possible for kexec to overwrite with the new kernel or initrd.
But this was always broken, as the UEFI memory map is also reserved
and not marked as nomap.

Exporting both nomap and reserved memblock types is a nuisance as
they live in different memblock structures which we can't walk at
the same time.

Take a second walk over memblock.reserved and add new 'reserved'
subnodes for the memblock_reserved() regions that aren't already
described by the existing code. (e.g. Kernel Code)

We use reserve_region_with_split() to find the gaps in existing named
regions. This handles the gap between 'kernel code' and 'kernel data'
which is memblock_reserve()d, but already partially described by
request_standard_resources(). e.g.:
| 80000000-dfffffff : System RAM
|   80080000-80ffffff : Kernel code
|   81000000-8158ffff : reserved
|   81590000-8237efff : Kernel data
|   a0000000-dfffffff : Crash kernel
| e00f0000-f949ffff : System RAM

reserve_region_with_split needs kzalloc() which isn't available when
request_standard_resources() is called, use an initcall.

Reported-by: Bhupesh Sharma <bhsharma@redhat.com>
Reported-by: Tyler Baicar <tbaicar@codeaurora.org>
Suggested-by: Akashi Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: James Morse <james.morse@arm.com>

Fixes: d28f6df1305a ("arm64/kexec: Add core kexec support")
CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>
CC: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/kernel/setup.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

-- 
2.17.0

Comments

Dave Kleikamp June 19, 2018, 1:37 p.m. UTC | #1
On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

> +static int __init reserve_memblock_reserved_regions(void)

> +{

> +	phys_addr_t start, end, roundup_end = 0;

> +	struct resource *mem, *res;

> +	u64 i;

> +

> +	for_each_reserved_mem_region(i, &start, &end) {

> +		if (end <= roundup_end)

> +			continue; /* done already */

> +

> +		start = __pfn_to_phys(PFN_DOWN(start));

> +		end = __pfn_to_phys(PFN_UP(end)) - 1;

> +		roundup_end = end;

> +

> +		res = kzalloc(sizeof(*res), GFP_ATOMIC);

> +		if (WARN_ON(!res))

> +			return -ENOMEM;

> +		res->start = start;

> +		res->end = end;

> +		res->name  = "reserved";

> +		res->flags = IORESOURCE_MEM;

> +

> +		mem = request_resource_conflict(&iomem_resource, res);

> +		/*

> +		 * We expected memblock_reserve() regions to conflict with

> +		 * memory created by request_standard_resources().

> +		 */

> +		if (WARN_ON_ONCE(!mem))

> +			continue;

> +		kfree(res);


Why is kfree() after the conditional continue? This is a memory leak.

> +

> +		reserve_region_with_split(mem, start, end, "reserved");

> +	}

> +

> +	return 0;

> +}

> +arch_initcall(reserve_memblock_reserved_regions);

> +

>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>  

>  void __init setup_arch(char **cmdline_p)

>
James Morse June 19, 2018, 3 p.m. UTC | #2
Hi Dave,

On 19/06/18 14:37, Dave Kleikamp wrote:
> On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

>> +static int __init reserve_memblock_reserved_regions(void)

>> +{

>> +	phys_addr_t start, end, roundup_end = 0;

>> +	struct resource *mem, *res;

>> +	u64 i;

>> +

>> +	for_each_reserved_mem_region(i, &start, &end) {

>> +		if (end <= roundup_end)

>> +			continue; /* done already */

>> +

>> +		start = __pfn_to_phys(PFN_DOWN(start));

>> +		end = __pfn_to_phys(PFN_UP(end)) - 1;

>> +		roundup_end = end;

>> +

>> +		res = kzalloc(sizeof(*res), GFP_ATOMIC);

>> +		if (WARN_ON(!res))

>> +			return -ENOMEM;

>> +		res->start = start;

>> +		res->end = end;

>> +		res->name  = "reserved";

>> +		res->flags = IORESOURCE_MEM;

>> +

>> +		mem = request_resource_conflict(&iomem_resource, res);

>> +		/*

>> +		 * We expected memblock_reserve() regions to conflict with

>> +		 * memory created by request_standard_resources().

>> +		 */

>> +		if (WARN_ON_ONCE(!mem))

>> +			continue;

>> +		kfree(res);

> 

> Why is kfree() after the conditional continue? This is a memory leak.


request_resource_conflict() inserts res into the iomem_resource tree, or returns
the conflict if there is already something at this address.

We expect something at this address: a 'System RAM' section added by
request_resource(). This code wants the conflicting 'System RAM' entry so that
reserve_region_with_split() can fill in the gaps below it with 'reserved'. See
the commit-message for an example.

If there was no conflict, it means the memory map doesn't look like we expect,
we can't pass NULL to reserve_region_with_split(), and we just inserted the
'reserved' at the top level. Freeing res at this point would be a use-after-free
hanging from /proc/iomem.
This code generates a WARN_ON_ONCE() and leaves the 'reserved' description where
it is.

Trying to cleanup here is pointless, we can remove the resource entry and free
it ... but we still want to report it as reserved, which is what just happened,
just not quite how we we wanted it.

If you ever see this warning, it means some RAM stopped being nomap between
request_resources() and reserve_memblock_reserved_regions(). I can't find any
case where that ever happens.


If all that makes sense: how can I improve the comment above the WARN_ON_ONCE()
to make it clearer?


Thanks,

James


>> +

>> +		reserve_region_with_split(mem, start, end, "reserved");

>> +	}

>> +

>> +	return 0;

>> +}

>> +arch_initcall(reserve_memblock_reserved_regions);

>> +

>>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>>  

>>  void __init setup_arch(char **cmdline_p)

>>
Dave Kleikamp June 19, 2018, 3:22 p.m. UTC | #3
On 06/19/2018 10:00 AM, James Morse wrote:
> Hi Dave,

> 

> On 19/06/18 14:37, Dave Kleikamp wrote:

>> On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

>>> +static int __init reserve_memblock_reserved_regions(void)

>>> +{

>>> +	phys_addr_t start, end, roundup_end = 0;

>>> +	struct resource *mem, *res;

>>> +	u64 i;

>>> +

>>> +	for_each_reserved_mem_region(i, &start, &end) {

>>> +		if (end <= roundup_end)

>>> +			continue; /* done already */

>>> +

>>> +		start = __pfn_to_phys(PFN_DOWN(start));

>>> +		end = __pfn_to_phys(PFN_UP(end)) - 1;

>>> +		roundup_end = end;

>>> +

>>> +		res = kzalloc(sizeof(*res), GFP_ATOMIC);

>>> +		if (WARN_ON(!res))

>>> +			return -ENOMEM;

>>> +		res->start = start;

>>> +		res->end = end;

>>> +		res->name  = "reserved";

>>> +		res->flags = IORESOURCE_MEM;

>>> +

>>> +		mem = request_resource_conflict(&iomem_resource, res);

>>> +		/*

>>> +		 * We expected memblock_reserve() regions to conflict with

>>> +		 * memory created by request_standard_resources().

>>> +		 */

>>> +		if (WARN_ON_ONCE(!mem))

>>> +			continue;

>>> +		kfree(res);

>>

>> Why is kfree() after the conditional continue? This is a memory leak.

> 

> request_resource_conflict() inserts res into the iomem_resource tree, or returns

> the conflict if there is already something at this address.

> 

> We expect something at this address: a 'System RAM' section added by

> request_resource(). This code wants the conflicting 'System RAM' entry so that

> reserve_region_with_split() can fill in the gaps below it with 'reserved'. See

> the commit-message for an example.

> 

> If there was no conflict, it means the memory map doesn't look like we expect,

> we can't pass NULL to reserve_region_with_split(), and we just inserted the

> 'reserved' at the top level. Freeing res at this point would be a use-after-free

> hanging from /proc/iomem.

> This code generates a WARN_ON_ONCE() and leaves the 'reserved' description where

> it is.


Okay. I get it.

> Trying to cleanup here is pointless, we can remove the resource entry and free

> it ... but we still want to report it as reserved, which is what just happened,

> just not quite how we we wanted it.

> 

> If you ever see this warning, it means some RAM stopped being nomap between

> request_resources() and reserve_memblock_reserved_regions(). I can't find any

> case where that ever happens.

> 

> 

> If all that makes sense: how can I improve the comment above the WARN_ON_ONCE()

> to make it clearer?


I guess something like you described above.

/*
 * We expect a 'System RAM' section at this address. In the unexpected
 * case where one is not found, request_resource_conflict() will insert
 * res into the iomem_resource tree.
 */

Do you think this is clearer?

> 

> 

> Thanks,

> 

> James

> 

> 

>>> +

>>> +		reserve_region_with_split(mem, start, end, "reserved");

>>> +	}

>>> +

>>> +	return 0;

>>> +}

>>> +arch_initcall(reserve_memblock_reserved_regions);

>>> +

>>>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>>>  

>>>  void __init setup_arch(char **cmdline_p)

>>>

>
AKASHI Takahiro July 3, 2018, 6:47 a.m. UTC | #4
On Tue, Jun 19, 2018 at 10:22:46AM -0500, Dave Kleikamp wrote:
> On 06/19/2018 10:00 AM, James Morse wrote:

> > Hi Dave,

> > 

> > On 19/06/18 14:37, Dave Kleikamp wrote:

> >> On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

> >>> +static int __init reserve_memblock_reserved_regions(void)

> >>> +{

> >>> +	phys_addr_t start, end, roundup_end = 0;

> >>> +	struct resource *mem, *res;

> >>> +	u64 i;

> >>> +

> >>> +	for_each_reserved_mem_region(i, &start, &end) {

> >>> +		if (end <= roundup_end)

> >>> +			continue; /* done already */

> >>> +

> >>> +		start = __pfn_to_phys(PFN_DOWN(start));

> >>> +		end = __pfn_to_phys(PFN_UP(end)) - 1;

> >>> +		roundup_end = end;

> >>> +

> >>> +		res = kzalloc(sizeof(*res), GFP_ATOMIC);

> >>> +		if (WARN_ON(!res))

> >>> +			return -ENOMEM;

> >>> +		res->start = start;

> >>> +		res->end = end;

> >>> +		res->name  = "reserved";

> >>> +		res->flags = IORESOURCE_MEM;

> >>> +

> >>> +		mem = request_resource_conflict(&iomem_resource, res);

> >>> +		/*

> >>> +		 * We expected memblock_reserve() regions to conflict with

> >>> +		 * memory created by request_standard_resources().

> >>> +		 */

> >>> +		if (WARN_ON_ONCE(!mem))

> >>> +			continue;

> >>> +		kfree(res);

> >>

> >> Why is kfree() after the conditional continue? This is a memory leak.

> > 

> > request_resource_conflict() inserts res into the iomem_resource tree, or returns

> > the conflict if there is already something at this address.

> > 

> > We expect something at this address: a 'System RAM' section added by

> > request_resource(). This code wants the conflicting 'System RAM' entry so that

> > reserve_region_with_split() can fill in the gaps below it with 'reserved'. See

> > the commit-message for an example.

> > 

> > If there was no conflict, it means the memory map doesn't look like we expect,

> > we can't pass NULL to reserve_region_with_split(), and we just inserted the

> > 'reserved' at the top level. Freeing res at this point would be a use-after-free

> > hanging from /proc/iomem.

> > This code generates a WARN_ON_ONCE() and leaves the 'reserved' description where

> > it is.

> 

> Okay. I get it.

> 

> > Trying to cleanup here is pointless, we can remove the resource entry and free

> > it ... but we still want to report it as reserved, which is what just happened,

> > just not quite how we we wanted it.

> > 

> > If you ever see this warning, it means some RAM stopped being nomap between

> > request_resources() and reserve_memblock_reserved_regions(). I can't find any

> > case where that ever happens.

> > 

> > 

> > If all that makes sense: how can I improve the comment above the WARN_ON_ONCE()

> > to make it clearer?

> 

> I guess something like you described above.

> 

> /*

>  * We expect a 'System RAM' section at this address. In the unexpected

>  * case where one is not found, request_resource_conflict() will insert

>  * res into the iomem_resource tree.

>  */

> 

> Do you think this is clearer?


If this is the only change needed in my patchset, I'd like the maintainers
to take care of it instead of my posting another version.

Thanks,
-Takahiro AKASHI

> > 

> > 

> > Thanks,

> > 

> > James

> > 

> > 

> >>> +

> >>> +		reserve_region_with_split(mem, start, end, "reserved");

> >>> +	}

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +arch_initcall(reserve_memblock_reserved_regions);

> >>> +

> >>>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

> >>>  

> >>>  void __init setup_arch(char **cmdline_p)

> >>>

> >
Bhupesh Sharma July 3, 2018, 12:14 p.m. UTC | #5
On Tue, Jul 3, 2018 at 12:17 PM, AKASHI Takahiro
<takahiro.akashi@linaro.org> wrote:
> On Tue, Jun 19, 2018 at 10:22:46AM -0500, Dave Kleikamp wrote:

>> On 06/19/2018 10:00 AM, James Morse wrote:

>> > Hi Dave,

>> >

>> > On 19/06/18 14:37, Dave Kleikamp wrote:

>> >> On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

>> >>> +static int __init reserve_memblock_reserved_regions(void)

>> >>> +{

>> >>> + phys_addr_t start, end, roundup_end = 0;

>> >>> + struct resource *mem, *res;

>> >>> + u64 i;

>> >>> +

>> >>> + for_each_reserved_mem_region(i, &start, &end) {

>> >>> +         if (end <= roundup_end)

>> >>> +                 continue; /* done already */

>> >>> +

>> >>> +         start = __pfn_to_phys(PFN_DOWN(start));

>> >>> +         end = __pfn_to_phys(PFN_UP(end)) - 1;

>> >>> +         roundup_end = end;

>> >>> +

>> >>> +         res = kzalloc(sizeof(*res), GFP_ATOMIC);

>> >>> +         if (WARN_ON(!res))

>> >>> +                 return -ENOMEM;

>> >>> +         res->start = start;

>> >>> +         res->end = end;

>> >>> +         res->name  = "reserved";

>> >>> +         res->flags = IORESOURCE_MEM;

>> >>> +

>> >>> +         mem = request_resource_conflict(&iomem_resource, res);

>> >>> +         /*

>> >>> +          * We expected memblock_reserve() regions to conflict with

>> >>> +          * memory created by request_standard_resources().

>> >>> +          */

>> >>> +         if (WARN_ON_ONCE(!mem))

>> >>> +                 continue;

>> >>> +         kfree(res);

>> >>

>> >> Why is kfree() after the conditional continue? This is a memory leak.

>> >

>> > request_resource_conflict() inserts res into the iomem_resource tree, or returns

>> > the conflict if there is already something at this address.

>> >

>> > We expect something at this address: a 'System RAM' section added by

>> > request_resource(). This code wants the conflicting 'System RAM' entry so that

>> > reserve_region_with_split() can fill in the gaps below it with 'reserved'. See

>> > the commit-message for an example.

>> >

>> > If there was no conflict, it means the memory map doesn't look like we expect,

>> > we can't pass NULL to reserve_region_with_split(), and we just inserted the

>> > 'reserved' at the top level. Freeing res at this point would be a use-after-free

>> > hanging from /proc/iomem.

>> > This code generates a WARN_ON_ONCE() and leaves the 'reserved' description where

>> > it is.

>>

>> Okay. I get it.

>>

>> > Trying to cleanup here is pointless, we can remove the resource entry and free

>> > it ... but we still want to report it as reserved, which is what just happened,

>> > just not quite how we we wanted it.

>> >

>> > If you ever see this warning, it means some RAM stopped being nomap between

>> > request_resources() and reserve_memblock_reserved_regions(). I can't find any

>> > case where that ever happens.

>> >

>> >

>> > If all that makes sense: how can I improve the comment above the WARN_ON_ONCE()

>> > to make it clearer?

>>

>> I guess something like you described above.

>>

>> /*

>>  * We expect a 'System RAM' section at this address. In the unexpected

>>  * case where one is not found, request_resource_conflict() will insert

>>  * res into the iomem_resource tree.

>>  */

>>

>> Do you think this is clearer?

>

> If this is the only change needed in my patchset, I'd like the maintainers

> to take care of it instead of my posting another version.

>


+1.

crashkernel booting on arm64 machines which support boot via acpi
tables is broken since a long time, so it would be great to get these
into upstream asap.

I think the comment can be addressed while picking up the patchset in
the maintainer's tree.

I am not sure whether it will go through the ARM tree (Will) or the
EFI tree (Ard), but since this has a Tested-by (from myself) and
Reviewed-by (from James), probably this can be pulled in to allow
further tests via the maintainer's tree.

Thanks,
Bhupesh


>> >

>> >

>> > Thanks,

>> >

>> > James

>> >

>> >

>> >>> +

>> >>> +         reserve_region_with_split(mem, start, end, "reserved");

>> >>> + }

>> >>> +

>> >>> + return 0;

>> >>> +}

>> >>> +arch_initcall(reserve_memblock_reserved_regions);

>> >>> +

>> >>>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>> >>>

>> >>>  void __init setup_arch(char **cmdline_p)

>> >>>

>> >
Dave Kleikamp July 3, 2018, 4:12 p.m. UTC | #6
On 07/03/2018 01:47 AM, AKASHI Takahiro wrote:
> On Tue, Jun 19, 2018 at 10:22:46AM -0500, Dave Kleikamp wrote:

>> On 06/19/2018 10:00 AM, James Morse wrote:

>>> Hi Dave,

>>>

>>> On 19/06/18 14:37, Dave Kleikamp wrote:

>>>> On 06/19/2018 01:44 AM, AKASHI Takahiro wrote:

>>>>> +static int __init reserve_memblock_reserved_regions(void)

>>>>> +{

>>>>> +	phys_addr_t start, end, roundup_end = 0;

>>>>> +	struct resource *mem, *res;

>>>>> +	u64 i;

>>>>> +

>>>>> +	for_each_reserved_mem_region(i, &start, &end) {

>>>>> +		if (end <= roundup_end)

>>>>> +			continue; /* done already */

>>>>> +

>>>>> +		start = __pfn_to_phys(PFN_DOWN(start));

>>>>> +		end = __pfn_to_phys(PFN_UP(end)) - 1;

>>>>> +		roundup_end = end;

>>>>> +

>>>>> +		res = kzalloc(sizeof(*res), GFP_ATOMIC);

>>>>> +		if (WARN_ON(!res))

>>>>> +			return -ENOMEM;

>>>>> +		res->start = start;

>>>>> +		res->end = end;

>>>>> +		res->name  = "reserved";

>>>>> +		res->flags = IORESOURCE_MEM;

>>>>> +

>>>>> +		mem = request_resource_conflict(&iomem_resource, res);

>>>>> +		/*

>>>>> +		 * We expected memblock_reserve() regions to conflict with

>>>>> +		 * memory created by request_standard_resources().

>>>>> +		 */

>>>>> +		if (WARN_ON_ONCE(!mem))

>>>>> +			continue;

>>>>> +		kfree(res);

>>>>

>>>> Why is kfree() after the conditional continue? This is a memory leak.

>>>

>>> request_resource_conflict() inserts res into the iomem_resource tree, or returns

>>> the conflict if there is already something at this address.

>>>

>>> We expect something at this address: a 'System RAM' section added by

>>> request_resource(). This code wants the conflicting 'System RAM' entry so that

>>> reserve_region_with_split() can fill in the gaps below it with 'reserved'. See

>>> the commit-message for an example.

>>>

>>> If there was no conflict, it means the memory map doesn't look like we expect,

>>> we can't pass NULL to reserve_region_with_split(), and we just inserted the

>>> 'reserved' at the top level. Freeing res at this point would be a use-after-free

>>> hanging from /proc/iomem.

>>> This code generates a WARN_ON_ONCE() and leaves the 'reserved' description where

>>> it is.

>>

>> Okay. I get it.

>>

>>> Trying to cleanup here is pointless, we can remove the resource entry and free

>>> it ... but we still want to report it as reserved, which is what just happened,

>>> just not quite how we we wanted it.

>>>

>>> If you ever see this warning, it means some RAM stopped being nomap between

>>> request_resources() and reserve_memblock_reserved_regions(). I can't find any

>>> case where that ever happens.

>>>

>>>

>>> If all that makes sense: how can I improve the comment above the WARN_ON_ONCE()

>>> to make it clearer?

>>

>> I guess something like you described above.

>>

>> /*

>>  * We expect a 'System RAM' section at this address. In the unexpected

>>  * case where one is not found, request_resource_conflict() will insert

>>  * res into the iomem_resource tree.

>>  */

>>

>> Do you think this is clearer?

> 

> If this is the only change needed in my patchset, I'd like the maintainers

> to take care of it instead of my posting another version.


Agreed. The sooner this gets fixed, the better.

Thanks,
Dave

> 

> Thanks,

> -Takahiro AKASHI

> 

>>>

>>>

>>> Thanks,

>>>

>>> James

>>>

>>>

>>>>> +

>>>>> +		reserve_region_with_split(mem, start, end, "reserved");

>>>>> +	}

>>>>> +

>>>>> +	return 0;

>>>>> +}

>>>>> +arch_initcall(reserve_memblock_reserved_regions);

>>>>> +

>>>>>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>>>>>  

>>>>>  void __init setup_arch(char **cmdline_p)

>>>>>

>>>
Ard Biesheuvel July 5, 2018, 10:29 p.m. UTC | #7
On 19 June 2018 at 08:44, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> From: James Morse <james.morse@arm.com>

>

> There has been some confusion around what is necessary to prevent kexec

> overwriting important memory regions. memblock: reserve, or nomap?

> Only memblock nomap regions are reported via /proc/iomem, kexec's

> user-space doesn't know about memblock_reserve()d regions.

>

> Until commit f56ab9a5b73ca ("efi/arm: Don't mark ACPI reclaim memory

> as MEMBLOCK_NOMAP") the ACPI tables were nomap, now they are reserved

> and thus possible for kexec to overwrite with the new kernel or initrd.

> But this was always broken, as the UEFI memory map is also reserved

> and not marked as nomap.

>

> Exporting both nomap and reserved memblock types is a nuisance as

> they live in different memblock structures which we can't walk at

> the same time.

>

> Take a second walk over memblock.reserved and add new 'reserved'

> subnodes for the memblock_reserved() regions that aren't already

> described by the existing code. (e.g. Kernel Code)

>

> We use reserve_region_with_split() to find the gaps in existing named

> regions. This handles the gap between 'kernel code' and 'kernel data'

> which is memblock_reserve()d, but already partially described by

> request_standard_resources(). e.g.:

> | 80000000-dfffffff : System RAM

> |   80080000-80ffffff : Kernel code

> |   81000000-8158ffff : reserved

> |   81590000-8237efff : Kernel data

> |   a0000000-dfffffff : Crash kernel

> | e00f0000-f949ffff : System RAM

>

> reserve_region_with_split needs kzalloc() which isn't available when

> request_standard_resources() is called, use an initcall.

>

> Reported-by: Bhupesh Sharma <bhsharma@redhat.com>

> Reported-by: Tyler Baicar <tbaicar@codeaurora.org>

> Suggested-by: Akashi Takahiro <takahiro.akashi@linaro.org>

> Signed-off-by: James Morse <james.morse@arm.com>

> Fixes: d28f6df1305a ("arm64/kexec: Add core kexec support")

> CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> CC: Mark Rutland <mark.rutland@arm.com>


Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


> ---

>  arch/arm64/kernel/setup.c | 38 ++++++++++++++++++++++++++++++++++++++

>  1 file changed, 38 insertions(+)

>

> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c

> index 30ad2f085d1f..5b4fac434c84 100644

> --- a/arch/arm64/kernel/setup.c

> +++ b/arch/arm64/kernel/setup.c

> @@ -241,6 +241,44 @@ static void __init request_standard_resources(void)

>         }

>  }

>

> +static int __init reserve_memblock_reserved_regions(void)

> +{

> +       phys_addr_t start, end, roundup_end = 0;

> +       struct resource *mem, *res;

> +       u64 i;

> +

> +       for_each_reserved_mem_region(i, &start, &end) {

> +               if (end <= roundup_end)

> +                       continue; /* done already */

> +

> +               start = __pfn_to_phys(PFN_DOWN(start));

> +               end = __pfn_to_phys(PFN_UP(end)) - 1;

> +               roundup_end = end;

> +

> +               res = kzalloc(sizeof(*res), GFP_ATOMIC);

> +               if (WARN_ON(!res))

> +                       return -ENOMEM;

> +               res->start = start;

> +               res->end = end;

> +               res->name  = "reserved";

> +               res->flags = IORESOURCE_MEM;

> +

> +               mem = request_resource_conflict(&iomem_resource, res);

> +               /*

> +                * We expected memblock_reserve() regions to conflict with

> +                * memory created by request_standard_resources().

> +                */

> +               if (WARN_ON_ONCE(!mem))

> +                       continue;

> +               kfree(res);

> +

> +               reserve_region_with_split(mem, start, end, "reserved");

> +       }

> +

> +       return 0;

> +}

> +arch_initcall(reserve_memblock_reserved_regions);

> +

>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };

>

>  void __init setup_arch(char **cmdline_p)

> --

> 2.17.0

>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 30ad2f085d1f..5b4fac434c84 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -241,6 +241,44 @@  static void __init request_standard_resources(void)
 	}
 }
 
+static int __init reserve_memblock_reserved_regions(void)
+{
+	phys_addr_t start, end, roundup_end = 0;
+	struct resource *mem, *res;
+	u64 i;
+
+	for_each_reserved_mem_region(i, &start, &end) {
+		if (end <= roundup_end)
+			continue; /* done already */
+
+		start = __pfn_to_phys(PFN_DOWN(start));
+		end = __pfn_to_phys(PFN_UP(end)) - 1;
+		roundup_end = end;
+
+		res = kzalloc(sizeof(*res), GFP_ATOMIC);
+		if (WARN_ON(!res))
+			return -ENOMEM;
+		res->start = start;
+		res->end = end;
+		res->name  = "reserved";
+		res->flags = IORESOURCE_MEM;
+
+		mem = request_resource_conflict(&iomem_resource, res);
+		/*
+		 * We expected memblock_reserve() regions to conflict with
+		 * memory created by request_standard_resources().
+		 */
+		if (WARN_ON_ONCE(!mem))
+			continue;
+		kfree(res);
+
+		reserve_region_with_split(mem, start, end, "reserved");
+	}
+
+	return 0;
+}
+arch_initcall(reserve_memblock_reserved_regions);
+
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)