diff mbox series

[v9,06/11] arm64: kexec_file: allow for loading Image-format kernel

Message ID 20180425062629.29404-7-takahiro.akashi@linaro.org
State New
Headers show
Series arm64: kexec: add kexec_file_load() support | expand

Commit Message

AKASHI Takahiro April 25, 2018, 6:26 a.m. UTC
This patch provides kexec_file_ops for "Image"-format kernel. In this
implementation, a binary is always loaded with a fixed offset identified
in text_offset field of its header.

Regarding signature verification for trusted boot, this patch doesn't
contains CONFIG_KEXEC_VERIFY_SIG support, which is to be added later
in this series, but file-attribute-based verification is still a viable
option by enabling IMA security subsystem.

You can sign(label) a to-be-kexec'ed kernel image on target file system
with:
    $ evmctl ima_sign --key /path/to/private_key.pem Image

On live system, you must have IMA enforced with, at least, the following
security policy:
    "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig"

See more details about IMA here:
    https://sourceforge.net/p/linux-ima/wiki/Home/

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kexec.h         | 50 ++++++++++++++++
 arch/arm64/kernel/Makefile             |  2 +-
 arch/arm64/kernel/kexec_image.c        | 79 ++++++++++++++++++++++++++
 arch/arm64/kernel/machine_kexec_file.c |  1 +
 4 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/kexec_image.c

-- 
2.17.0

Comments

James Morse May 1, 2018, 5:46 p.m. UTC | #1
Hi Akashi,

On 25/04/18 07:26, AKASHI Takahiro wrote:
> This patch provides kexec_file_ops for "Image"-format kernel. In this

> implementation, a binary is always loaded with a fixed offset identified

> in text_offset field of its header.



> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

> index e4de1223715f..3cba4161818a 100644

> --- a/arch/arm64/include/asm/kexec.h

> +++ b/arch/arm64/include/asm/kexec.h

> @@ -102,6 +102,56 @@ struct kimage_arch {

>  	void *dtb_buf;

>  };

>  

> +/**

> + * struct arm64_image_header - arm64 kernel image header

> + *

> + * @pe_sig: Optional PE format 'MZ' signature

> + * @branch_code: Instruction to branch to stext

> + * @text_offset: Image load offset, little endian

> + * @image_size: Effective image size, little endian

> + * @flags:

> + *	Bit 0: Kernel endianness. 0=little endian, 1=big endian


Page size? What about 'phys_base'?, (whatever that is...)
Probably best to refer to Documentation/arm64/booting.txt here, its the
authoritative source of what these fields mean.


> + * @reserved: Reserved

> + * @magic: Magic number, "ARM\x64"

> + * @pe_header: Optional offset to a PE format header

> + **/

> +

> +struct arm64_image_header {

> +	u8 pe_sig[2];

> +	u8 pad[2];

> +	u32 branch_code;

> +	u64 text_offset;

> +	u64 image_size;

> +	u64 flags;


__le64 as appropriate here would let tools like sparse catch any missing endian
conversion bugs.


> +	u64 reserved[3];

> +	u8 magic[4];

> +	u32 pe_header;

> +};


I'm surprised we don't have a definition for this already, I guess its always
done in asm. We have kernel/image.h that holds some of this stuff, if we are
going to validate the flags, is it worth adding the code there, (and moving it
to include/asm)?


> +static const u8 arm64_image_magic[4] = {'A', 'R', 'M', 0x64U};


Any chance this magic could be a pre-processor symbol shared with head.S?


> +

> +/**

> + * arm64_header_check_magic - Helper to check the arm64 image header.

> + *

> + * Returns non-zero if header is OK.

> + */

> +

> +static inline int arm64_header_check_magic(const struct arm64_image_header *h)

> +{

> +	if (!h)

> +		return 0;

> +

> +	if (!h->text_offset)

> +		return 0;

> +

> +	return (h->magic[0] == arm64_image_magic[0]

> +		&& h->magic[1] == arm64_image_magic[1]

> +		&& h->magic[2] == arm64_image_magic[2]

> +		&& h->magic[3] == arm64_image_magic[3]);


memcmp()? Or just define it as a 32bit value?
I guess you skip the MZ prefix as its not present for !EFI?

Could we check branch_code is non-zero, and text-offset points within image-size?


We could check that this platform supports the page-size/endian config that this
Image was built with... We get a message from the EFI stub if the page-size
can't be supported, it would be nice to do the same here (as we can).

(no idea if kexec-tool checks this stuff, it probably can't get at the id
registers to know)


> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

> new file mode 100644

> index 000000000000..4dd524ad6611

> --- /dev/null

> +++ b/arch/arm64/kernel/kexec_image.c

> @@ -0,0 +1,79 @@


> +static void *image_load(struct kimage *image,

> +				char *kernel, unsigned long kernel_len,

> +				char *initrd, unsigned long initrd_len,

> +				char *cmdline, unsigned long cmdline_len)

> +{

> +	struct kexec_buf kbuf;

> +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

> +	unsigned long text_offset;

> +	int ret;

> +

> +	/* Load the kernel */

> +	kbuf.image = image;

> +	kbuf.buf_min = 0;

> +	kbuf.buf_max = ULONG_MAX;

> +	kbuf.top_down = false;

> +

> +	kbuf.buffer = kernel;

> +	kbuf.bufsz = kernel_len;

> +	kbuf.memsz = le64_to_cpu(h->image_size);

> +	text_offset = le64_to_cpu(h->text_offset);

> +	kbuf.buf_align = SZ_2M;


> +	/* Adjust kernel segment with TEXT_OFFSET */

> +	kbuf.memsz += text_offset;

> +

> +	ret = kexec_add_buffer(&kbuf);

> +	if (ret)

> +		goto out;

> +

> +	image->arch.kern_segment = image->nr_segments - 1;


You only seem to use kern_segment here, and in load_other_segments() called
below. Could it not be a local variable passed in? Instead of arch-specific data
we keep forever?


> +	image->segment[image->arch.kern_segment].mem += text_offset;

> +	image->segment[image->arch.kern_segment].memsz -= text_offset;

> +	image->start = image->segment[image->arch.kern_segment].mem;

> +

> +	pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",

> +				image->segment[image->arch.kern_segment].mem,

> +				kbuf.bufsz, kbuf.memsz);

> +

> +	/* Load additional data */

> +	ret = load_other_segments(image, initrd, initrd_len,

> +				cmdline, cmdline_len);

> +

> +out:

> +	return ERR_PTR(ret);

> +}

Looks good,

Thanks,

James
AKASHI Takahiro May 7, 2018, 7:21 a.m. UTC | #2
James,

On Tue, May 01, 2018 at 06:46:11PM +0100, James Morse wrote:
> Hi Akashi,

> 

> On 25/04/18 07:26, AKASHI Takahiro wrote:

> > This patch provides kexec_file_ops for "Image"-format kernel. In this

> > implementation, a binary is always loaded with a fixed offset identified

> > in text_offset field of its header.

> 

> 

> > diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

> > index e4de1223715f..3cba4161818a 100644

> > --- a/arch/arm64/include/asm/kexec.h

> > +++ b/arch/arm64/include/asm/kexec.h

> > @@ -102,6 +102,56 @@ struct kimage_arch {

> >  	void *dtb_buf;

> >  };

> >  

> > +/**

> > + * struct arm64_image_header - arm64 kernel image header

> > + *

> > + * @pe_sig: Optional PE format 'MZ' signature

> > + * @branch_code: Instruction to branch to stext

> > + * @text_offset: Image load offset, little endian

> > + * @image_size: Effective image size, little endian

> > + * @flags:

> > + *	Bit 0: Kernel endianness. 0=little endian, 1=big endian

> 

> Page size? What about 'phys_base'?, (whatever that is...)

> Probably best to refer to Documentation/arm64/booting.txt here, its the

> authoritative source of what these fields mean.


While we don't care other bit fields for now, I will add the reference
to the Documentation file.

> 

> > + * @reserved: Reserved

> > + * @magic: Magic number, "ARM\x64"

> > + * @pe_header: Optional offset to a PE format header

> > + **/

> > +

> > +struct arm64_image_header {

> > +	u8 pe_sig[2];

> > +	u8 pad[2];

> > +	u32 branch_code;

> > +	u64 text_offset;

> > +	u64 image_size;

> > +	u64 flags;

> 

> __le64 as appropriate here would let tools like sparse catch any missing endian

> conversion bugs.


OK.

> 

> > +	u64 reserved[3];

> > +	u8 magic[4];

> > +	u32 pe_header;

> > +};

> 

> I'm surprised we don't have a definition for this already, I guess its always

> done in asm. We have kernel/image.h that holds some of this stuff, if we are

> going to validate the flags, is it worth adding the code there, (and moving it

> to include/asm)?


A comment at the beginning of this file says,
    #ifndef LINKER_SCRIPT
    #error This file should only be included in vmlinux.lds.S
    #endif
Let me think about.

> 

> > +static const u8 arm64_image_magic[4] = {'A', 'R', 'M', 0x64U};

> 

> Any chance this magic could be a pre-processor symbol shared with head.S?


OK.

> 

> > +

> > +/**

> > + * arm64_header_check_magic - Helper to check the arm64 image header.

> > + *

> > + * Returns non-zero if header is OK.

> > + */

> > +

> > +static inline int arm64_header_check_magic(const struct arm64_image_header *h)

> > +{

> > +	if (!h)

> > +		return 0;

> > +

> > +	if (!h->text_offset)

> > +		return 0;

> > +

> > +	return (h->magic[0] == arm64_image_magic[0]

> > +		&& h->magic[1] == arm64_image_magic[1]

> > +		&& h->magic[2] == arm64_image_magic[2]

> > +		&& h->magic[3] == arm64_image_magic[3]);

> 

> memcmp()? Or just define it as a 32bit value?


OK. As you know, I always tried to keep the code not diverted
from kexec-tools for maintainability reason.

> I guess you skip the MZ prefix as its not present for !EFI?


CONFIG_KEXEC_IMAGE_VERIFY_SIG depends on the fact that the file
format is PE (that is, EFI is enabled).


> Could we check branch_code is non-zero, and text-offset points within image-size?


We could do it, but I don't think this check is very useful.

> 

> We could check that this platform supports the page-size/endian config that this

> Image was built with... We get a message from the EFI stub if the page-size

> can't be supported, it would be nice to do the same here (as we can).


There is no restriction on page-size or endianness for kexec.
What will be the purpose of this check?

> (no idea if kexec-tool checks this stuff, it probably can't get at the id

> registers to know)

> 

> 

> > diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

> > new file mode 100644

> > index 000000000000..4dd524ad6611

> > --- /dev/null

> > +++ b/arch/arm64/kernel/kexec_image.c

> > @@ -0,0 +1,79 @@

> 

> > +static void *image_load(struct kimage *image,

> > +				char *kernel, unsigned long kernel_len,

> > +				char *initrd, unsigned long initrd_len,

> > +				char *cmdline, unsigned long cmdline_len)

> > +{

> > +	struct kexec_buf kbuf;

> > +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

> > +	unsigned long text_offset;

> > +	int ret;

> > +

> > +	/* Load the kernel */

> > +	kbuf.image = image;

> > +	kbuf.buf_min = 0;

> > +	kbuf.buf_max = ULONG_MAX;

> > +	kbuf.top_down = false;

> > +

> > +	kbuf.buffer = kernel;

> > +	kbuf.bufsz = kernel_len;

> > +	kbuf.memsz = le64_to_cpu(h->image_size);

> > +	text_offset = le64_to_cpu(h->text_offset);

> > +	kbuf.buf_align = SZ_2M;

> 

> > +	/* Adjust kernel segment with TEXT_OFFSET */

> > +	kbuf.memsz += text_offset;

> > +

> > +	ret = kexec_add_buffer(&kbuf);

> > +	if (ret)

> > +		goto out;

> > +

> > +	image->arch.kern_segment = image->nr_segments - 1;

> 

> You only seem to use kern_segment here, and in load_other_segments() called

> below. Could it not be a local variable passed in? Instead of arch-specific data

> we keep forever?


No, kern_segment is also used in load_other_segments() in machine_kexec_file.c.
To optimize memory hole allocation logic in locate_mem_hole_callback(),
we need to know the exact range of kernel image (start and end).

(Known drawback in this code is that Image only occupies one segment, but
once vmlinux might be supported, it would occupy two segments for text and
data.)

> 

> > +	image->segment[image->arch.kern_segment].mem += text_offset;

> > +	image->segment[image->arch.kern_segment].memsz -= text_offset;

> > +	image->start = image->segment[image->arch.kern_segment].mem;

> > +

> > +	pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",

> > +				image->segment[image->arch.kern_segment].mem,

> > +				kbuf.bufsz, kbuf.memsz);

> > +

> > +	/* Load additional data */

> > +	ret = load_other_segments(image, initrd, initrd_len,

> > +				cmdline, cmdline_len);

> > +

> > +out:

> > +	return ERR_PTR(ret);

> > +}

> Looks good,


Thank you for thorough review.

-Takahiro AKASHI


> Thanks,

> 

> James
James Morse May 11, 2018, 5:07 p.m. UTC | #3
Hi Akashi,

On 07/05/18 08:21, AKASHI Takahiro wrote:
> On Tue, May 01, 2018 at 06:46:11PM +0100, James Morse wrote:

>> On 25/04/18 07:26, AKASHI Takahiro wrote:

>>> This patch provides kexec_file_ops for "Image"-format kernel. In this

>>> implementation, a binary is always loaded with a fixed offset identified

>>> in text_offset field of its header.


>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

>>> index e4de1223715f..3cba4161818a 100644

>>> --- a/arch/arm64/include/asm/kexec.h

>>> +++ b/arch/arm64/include/asm/kexec.h

>>> @@ -102,6 +102,56 @@ struct kimage_arch {

>>>  	void *dtb_buf;

>>>  };

>>>  

>>> +/**

>>> + * struct arm64_image_header - arm64 kernel image header

>>> + *

>>> + * @pe_sig: Optional PE format 'MZ' signature

>>> + * @branch_code: Instruction to branch to stext

>>> + * @text_offset: Image load offset, little endian

>>> + * @image_size: Effective image size, little endian

>>> + * @flags:

>>> + *	Bit 0: Kernel endianness. 0=little endian, 1=big endian

>>

>> Page size? What about 'phys_base'?, (whatever that is...)

>> Probably best to refer to Documentation/arm64/booting.txt here, its the

>> authoritative source of what these fields mean.

> 

> While we don't care other bit fields for now, I will add the reference

> to the Documentation file.


Thanks, I don't want to create a second, incomplete set of documentation!


>>> +	u64 reserved[3];

>>> +	u8 magic[4];

>>> +	u32 pe_header;

>>> +};

>>

>> I'm surprised we don't have a definition for this already, I guess its always

>> done in asm. We have kernel/image.h that holds some of this stuff, if we are

>> going to validate the flags, is it worth adding the code there, (and moving it

>> to include/asm)?

> 

> A comment at the beginning of this file says,

>     #ifndef LINKER_SCRIPT

>     #error This file should only be included in vmlinux.lds.S

>     #endif

> Let me think about.


Ah, I missed that.

Having two definitions of something makes me nervous that they can become
different... looks like that header belongs to the linker, and shouldn't be used
here then.


>> I guess you skip the MZ prefix as its not present for !EFI?

> 

> CONFIG_KEXEC_IMAGE_VERIFY_SIG depends on the fact that the file

> format is PE (that is, EFI is enabled).


So if the signature checking is enabled, its already been checked.


>> Could we check branch_code is non-zero, and text-offset points within image-size?

> 

> We could do it, but I don't think this check is very useful.

> 

>>

>> We could check that this platform supports the page-size/endian config that this

>> Image was built with... We get a message from the EFI stub if the page-size

>> can't be supported, it would be nice to do the same here (as we can).

> 

> There is no restriction on page-size or endianness for kexec.


No, but it won't boot if the hardware doesn't support it. The kernel will spin
at a magic address that is, difficult, to debug without JTAG. The bug report
will be "it didn't boot".


> What will be the purpose of this check?


These values are in the header so that the bootloader can check them, then print
a meaningful error. Here, kexec_file_load() is playing the part of the bootloader.

I'm assuming kexec_file_load() can only be used to kexec linux... unlike regular
kexec. Is this where I'm going wrong?


>>> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

>>> new file mode 100644

>>> index 000000000000..4dd524ad6611

>>> --- /dev/null

>>> +++ b/arch/arm64/kernel/kexec_image.c

>>> @@ -0,0 +1,79 @@

>>

>>> +static void *image_load(struct kimage *image,

>>> +				char *kernel, unsigned long kernel_len,

>>> +				char *initrd, unsigned long initrd_len,

>>> +				char *cmdline, unsigned long cmdline_len)

>>> +{

>>> +	struct kexec_buf kbuf;

>>> +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

>>> +	unsigned long text_offset;

>>> +	int ret;

>>> +

>>> +	/* Load the kernel */

>>> +	kbuf.image = image;

>>> +	kbuf.buf_min = 0;

>>> +	kbuf.buf_max = ULONG_MAX;

>>> +	kbuf.top_down = false;

>>> +

>>> +	kbuf.buffer = kernel;

>>> +	kbuf.bufsz = kernel_len;

>>> +	kbuf.memsz = le64_to_cpu(h->image_size);

>>> +	text_offset = le64_to_cpu(h->text_offset);

>>> +	kbuf.buf_align = SZ_2M;

>>

>>> +	/* Adjust kernel segment with TEXT_OFFSET */

>>> +	kbuf.memsz += text_offset;

>>> +

>>> +	ret = kexec_add_buffer(&kbuf);

>>> +	if (ret)

>>> +		goto out;

>>> +

>>> +	image->arch.kern_segment = image->nr_segments - 1;

>>

>> You only seem to use kern_segment here, and in load_other_segments() called

>> below. Could it not be a local variable passed in? Instead of arch-specific data

>> we keep forever?

> 

> No, kern_segment is also used in load_other_segments() in machine_kexec_file.c.

> To optimize memory hole allocation logic in locate_mem_hole_callback(),

> we need to know the exact range of kernel image (start and end).


That's the second user. My badly-made point is one calls the other, but passes
the data via some until-kexec lifetime struct. (its not important, just an
indicator this worked differently in the past and hasn't been cleaned up).
I meant something like [0].


Thanks,

James


[0] a diff is worth a thousand words:
--------------------%<--------------------
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_
kexec_file.c
index 762f9102899c..c50ce844f09e 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -325,11 +325,10 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
        return ret;
 }

-int load_other_segments(struct kimage *image,
+int load_other_segments(struct kimage *image, struct kexec_segment *kern_seg,
                        char *initrd, unsigned long initrd_len,
                        char *cmdline, unsigned long cmdline_len)
 {
-       struct kexec_segment *kern_seg;
        struct kexec_buf kbuf;
        void *hdrs_addr;
        unsigned long hdrs_sz;
@@ -368,7 +367,6 @@ int load_other_segments(struct kimage *image,
                                 image->arch.elf_load_addr, hdrs_sz, hdrs_sz);
        }

-       kern_seg = &image->segment[image->arch.kern_segment];
        kbuf.image = image;
        /* not allocate anything below the kernel */
        kbuf.buf_min = kern_seg->mem + kern_seg->memsz;
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 891f2484969d..085cb69293ca 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -173,8 +172,10 @@ static inline int arm64_header_check_pe_sig(const struct ar
m64_image_header *h)
 extern const struct kexec_file_ops kexec_image_ops;

 struct kimage;
+struct kexec_segment;

 extern int load_other_segments(struct kimage *image,
+               struct kexec_segment *kern_seg,
                char *initrd, unsigned long initrd_len,
                char *cmdline, unsigned long cmdline_len);
 #endif
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 7c11beefe65f..0e032d30a79c 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -37,6 +37,7 @@ static void *image_load(struct kimage *image,
                                char *cmdline, unsigned long cmdline_len)
 {
        struct kexec_buf kbuf;
+       struct kexec_segment *kern_seg;
        struct arm64_image_header *h = (struct arm64_image_header *)kernel;
        unsigned long text_offset;
        int ret;
@@ -65,17 +66,17 @@ static void *image_load(struct kimage *image,
        if (ret)
                goto out;

-       image->arch.kern_segment = image->nr_segments - 1;
-       image->segment[image->arch.kern_segment].mem += text_offset;
-       image->segment[image->arch.kern_segment].memsz -= text_offset;
-       image->start = image->segment[image->arch.kern_segment].mem;
+       kern_seg = &image->segment[image->nr_segments - 1];
+       kern_seg->mem += text_offset;
+       kern_seg->memsz -= text_offset;
+       image->start = kern_seg->mem;

        pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-                               image->segment[image->arch.kern_segment].mem,
+                               kern_seg->mem,
                                kbuf.bufsz, kbuf.memsz);

        /* Load additional data */
-       ret = load_other_segments(image, initrd, initrd_len,
+       ret = load_other_segments(image, kern_seg, initrd, initrd_len,
                                cmdline, cmdline_len);

 out:
--------------------%<--------------------
AKASHI Takahiro May 15, 2018, 5:13 a.m. UTC | #4
James,

On Fri, May 11, 2018 at 06:07:06PM +0100, James Morse wrote:
> Hi Akashi,

> 

> On 07/05/18 08:21, AKASHI Takahiro wrote:

> > On Tue, May 01, 2018 at 06:46:11PM +0100, James Morse wrote:

> >> On 25/04/18 07:26, AKASHI Takahiro wrote:

> >>> This patch provides kexec_file_ops for "Image"-format kernel. In this

> >>> implementation, a binary is always loaded with a fixed offset identified

> >>> in text_offset field of its header.

> 

> >>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

> >>> index e4de1223715f..3cba4161818a 100644

> >>> --- a/arch/arm64/include/asm/kexec.h

> >>> +++ b/arch/arm64/include/asm/kexec.h

> >>> @@ -102,6 +102,56 @@ struct kimage_arch {

> >>>  	void *dtb_buf;

> >>>  };

> >>>  

> >>> +/**

> >>> + * struct arm64_image_header - arm64 kernel image header

> >>> + *

> >>> + * @pe_sig: Optional PE format 'MZ' signature


To be precise, this is NOT a PE signature but MS-DOS header's magic.
(There is another "PE" signature in PE COFF file header pointed to by
'pe_header'.)
I will correct its name.

> >>> + * @branch_code: Instruction to branch to stext

> >>> + * @text_offset: Image load offset, little endian

> >>> + * @image_size: Effective image size, little endian

> >>> + * @flags:

> >>> + *	Bit 0: Kernel endianness. 0=little endian, 1=big endian

> >>

> >> Page size? What about 'phys_base'?, (whatever that is...)

> >> Probably best to refer to Documentation/arm64/booting.txt here, its the

> >> authoritative source of what these fields mean.

> > 

> > While we don't care other bit fields for now, I will add the reference

> > to the Documentation file.

> 

> Thanks, I don't want to create a second, incomplete set of documentation!


I will leave a minimum of description of parameters here.

> 

> 

> >>> +	u64 reserved[3];

> >>> +	u8 magic[4];

> >>> +	u32 pe_header;

> >>> +};

> >>

> >> I'm surprised we don't have a definition for this already, I guess its always

> >> done in asm. We have kernel/image.h that holds some of this stuff, if we are

> >> going to validate the flags, is it worth adding the code there, (and moving it

> >> to include/asm)?

> > 

> > A comment at the beginning of this file says,

> >     #ifndef LINKER_SCRIPT

> >     #error This file should only be included in vmlinux.lds.S

> >     #endif

> > Let me think about.

> 

> Ah, I missed that.

> 

> Having two definitions of something makes me nervous that they can become

> different... looks like that header belongs to the linker, and shouldn't be used

> here then.


OK.

> 

> >> I guess you skip the MZ prefix as its not present for !EFI?


Correct, but MZ checking in probe function is just an informative message.

> > 

> > CONFIG_KEXEC_IMAGE_VERIFY_SIG depends on the fact that the file

> > format is PE (that is, EFI is enabled).

> 

> So if the signature checking is enabled, its already been checked.


The signature, either MZ or PE, in a file will be actually checked
in verify_pefile_signature().

> 

> >> Could we check branch_code is non-zero, and text-offset points within image-size?

> > 

> > We could do it, but I don't think this check is very useful.

> > 

> >>

> >> We could check that this platform supports the page-size/endian config that this

> >> Image was built with... We get a message from the EFI stub if the page-size

> >> can't be supported, it would be nice to do the same here (as we can).

> > 

> > There is no restriction on page-size or endianness for kexec.

> 

> No, but it won't boot if the hardware doesn't support it. The kernel will spin

> at a magic address that is, difficult, to debug without JTAG. The bug report

> will be "it didn't boot".


OK.
Added sanity checks for cpu features, endianness as well as page size.

> 

> > What will be the purpose of this check?

> 

> These values are in the header so that the bootloader can check them, then print

> a meaningful error. Here, kexec_file_load() is playing the part of the bootloader.

> 

> I'm assuming kexec_file_load() can only be used to kexec linux... unlike regular

> kexec. Is this where I'm going wrong?

> 

> 

> >>> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

> >>> new file mode 100644

> >>> index 000000000000..4dd524ad6611

> >>> --- /dev/null

> >>> +++ b/arch/arm64/kernel/kexec_image.c

> >>> @@ -0,0 +1,79 @@

> >>

> >>> +static void *image_load(struct kimage *image,

> >>> +				char *kernel, unsigned long kernel_len,

> >>> +				char *initrd, unsigned long initrd_len,

> >>> +				char *cmdline, unsigned long cmdline_len)

> >>> +{

> >>> +	struct kexec_buf kbuf;

> >>> +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

> >>> +	unsigned long text_offset;

> >>> +	int ret;

> >>> +

> >>> +	/* Load the kernel */

> >>> +	kbuf.image = image;

> >>> +	kbuf.buf_min = 0;

> >>> +	kbuf.buf_max = ULONG_MAX;

> >>> +	kbuf.top_down = false;

> >>> +

> >>> +	kbuf.buffer = kernel;

> >>> +	kbuf.bufsz = kernel_len;

> >>> +	kbuf.memsz = le64_to_cpu(h->image_size);

> >>> +	text_offset = le64_to_cpu(h->text_offset);

> >>> +	kbuf.buf_align = SZ_2M;

> >>

> >>> +	/* Adjust kernel segment with TEXT_OFFSET */

> >>> +	kbuf.memsz += text_offset;

> >>> +

> >>> +	ret = kexec_add_buffer(&kbuf);

> >>> +	if (ret)

> >>> +		goto out;

> >>> +

> >>> +	image->arch.kern_segment = image->nr_segments - 1;

> >>

> >> You only seem to use kern_segment here, and in load_other_segments() called

> >> below. Could it not be a local variable passed in? Instead of arch-specific data

> >> we keep forever?

> > 

> > No, kern_segment is also used in load_other_segments() in machine_kexec_file.c.

> > To optimize memory hole allocation logic in locate_mem_hole_callback(),

> > we need to know the exact range of kernel image (start and end).

> 

> That's the second user. My badly-made point is one calls the other, but passes

> the data via some until-kexec lifetime struct. (its not important, just an

> indicator this worked differently in the past and hasn't been cleaned up).

> I meant something like [0].


OK, but instead of adding kern_seg, I want to change the interface to:

| extern int load_other_segments(struct kimage *image,
|		unsigned long kernel_load_addr, unsigned long kernel_size,
|		char *initrd, unsigned long initrd_len,
|		char *cmdline, unsigned long cmdline_len);

This way, we will in future be able to address an issue I mentioned in
my previous e-mail. (If we support vmlinux, the kernel occupies two segments
for text and data, respectively.)

Thanks,
-Takahiro AKASHI


> 

> Thanks,

> 

> James

> 

> 

> [0] a diff is worth a thousand words:

> --------------------%<--------------------

> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_

> kexec_file.c

> index 762f9102899c..c50ce844f09e 100644

> --- a/arch/arm64/kernel/machine_kexec_file.c

> +++ b/arch/arm64/kernel/machine_kexec_file.c

> @@ -325,11 +325,10 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)

>         return ret;

>  }

> 

> -int load_other_segments(struct kimage *image,

> +int load_other_segments(struct kimage *image, struct kexec_segment *kern_seg,

>                         char *initrd, unsigned long initrd_len,

>                         char *cmdline, unsigned long cmdline_len)

>  {

> -       struct kexec_segment *kern_seg;

>         struct kexec_buf kbuf;

>         void *hdrs_addr;

>         unsigned long hdrs_sz;

> @@ -368,7 +367,6 @@ int load_other_segments(struct kimage *image,

>                                  image->arch.elf_load_addr, hdrs_sz, hdrs_sz);

>         }

> 

> -       kern_seg = &image->segment[image->arch.kern_segment];

>         kbuf.image = image;

>         /* not allocate anything below the kernel */

>         kbuf.buf_min = kern_seg->mem + kern_seg->memsz;

> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

> index 891f2484969d..085cb69293ca 100644

> --- a/arch/arm64/include/asm/kexec.h

> +++ b/arch/arm64/include/asm/kexec.h

> @@ -173,8 +172,10 @@ static inline int arm64_header_check_pe_sig(const struct ar

> m64_image_header *h)

>  extern const struct kexec_file_ops kexec_image_ops;

> 

>  struct kimage;

> +struct kexec_segment;

> 

>  extern int load_other_segments(struct kimage *image,

> +               struct kexec_segment *kern_seg,

>                 char *initrd, unsigned long initrd_len,

>                 char *cmdline, unsigned long cmdline_len);

>  #endif

> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

> index 7c11beefe65f..0e032d30a79c 100644

> --- a/arch/arm64/kernel/kexec_image.c

> +++ b/arch/arm64/kernel/kexec_image.c

> @@ -37,6 +37,7 @@ static void *image_load(struct kimage *image,

>                                 char *cmdline, unsigned long cmdline_len)

>  {

>         struct kexec_buf kbuf;

> +       struct kexec_segment *kern_seg;

>         struct arm64_image_header *h = (struct arm64_image_header *)kernel;

>         unsigned long text_offset;

>         int ret;

> @@ -65,17 +66,17 @@ static void *image_load(struct kimage *image,

>         if (ret)

>                 goto out;

> 

> -       image->arch.kern_segment = image->nr_segments - 1;

> -       image->segment[image->arch.kern_segment].mem += text_offset;

> -       image->segment[image->arch.kern_segment].memsz -= text_offset;

> -       image->start = image->segment[image->arch.kern_segment].mem;

> +       kern_seg = &image->segment[image->nr_segments - 1];

> +       kern_seg->mem += text_offset;

> +       kern_seg->memsz -= text_offset;

> +       image->start = kern_seg->mem;

> 

>         pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",

> -                               image->segment[image->arch.kern_segment].mem,

> +                               kern_seg->mem,

>                                 kbuf.bufsz, kbuf.memsz);

> 

>         /* Load additional data */

> -       ret = load_other_segments(image, initrd, initrd_len,

> +       ret = load_other_segments(image, kern_seg, initrd, initrd_len,

>                                 cmdline, cmdline_len);

> 

>  out:

> --------------------%<--------------------
James Morse May 15, 2018, 5:14 p.m. UTC | #5
Hi Akashi,

On 15/05/18 06:13, AKASHI Takahiro wrote:
> On Fri, May 11, 2018 at 06:07:06PM +0100, James Morse wrote:

>> On 07/05/18 08:21, AKASHI Takahiro wrote:

>>> On Tue, May 01, 2018 at 06:46:11PM +0100, James Morse wrote:

>>>> On 25/04/18 07:26, AKASHI Takahiro wrote:

>>>>> This patch provides kexec_file_ops for "Image"-format kernel. In this

>>>>> implementation, a binary is always loaded with a fixed offset identified

>>>>> in text_offset field of its header.

>>

>>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

>>>>> index e4de1223715f..3cba4161818a 100644

>>>>> --- a/arch/arm64/include/asm/kexec.h

>>>>> +++ b/arch/arm64/include/asm/kexec.h


>>>> Could we check branch_code is non-zero, and text-offset points within image-size?

>>>

>>> We could do it, but I don't think this check is very useful.

>>>

>>>>

>>>> We could check that this platform supports the page-size/endian config that this

>>>> Image was built with... We get a message from the EFI stub if the page-size

>>>> can't be supported, it would be nice to do the same here (as we can).

>>>

>>> There is no restriction on page-size or endianness for kexec.

>>

>> No, but it won't boot if the hardware doesn't support it. The kernel will spin

>> at a magic address that is, difficult, to debug without JTAG. The bug report

>> will be "it didn't boot".

> 

> OK.

> Added sanity checks for cpu features, endianness as well as page size.

> 

>>

>>> What will be the purpose of this check?

>>

>> These values are in the header so that the bootloader can check them, then print

>> a meaningful error. Here, kexec_file_load() is playing the part of the bootloader.


>> I'm assuming kexec_file_load() can only be used to kexec linux... unlike regular

>> kexec. Is this where I'm going wrong?


Trying to work this out for myself: we can't support any UEFI application as we
can't give it the boot-services environment, so I'm pretty sure
kexec_file_load() must be linux-specific.

Can we state somewhere that we only expect arm64 linux to be booted with
kexec_file_load()? Its not clear from the kconfig text, which refers to kexec,
which explicitly states it can boot other OS. But for kexec_file_load() we're
following the kernel's booting.txt.


>>>>> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

>>>>> new file mode 100644

>>>>> index 000000000000..4dd524ad6611

>>>>> --- /dev/null

>>>>> +++ b/arch/arm64/kernel/kexec_image.c

>>>>> @@ -0,0 +1,79 @@

>>>>

>>>>> +static void *image_load(struct kimage *image,

>>>>> +				char *kernel, unsigned long kernel_len,

>>>>> +				char *initrd, unsigned long initrd_len,

>>>>> +				char *cmdline, unsigned long cmdline_len)

>>>>> +{

>>>>> +	struct kexec_buf kbuf;

>>>>> +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

>>>>> +	unsigned long text_offset;

>>>>> +	int ret;

>>>>> +

>>>>> +	/* Load the kernel */

>>>>> +	kbuf.image = image;

>>>>> +	kbuf.buf_min = 0;

>>>>> +	kbuf.buf_max = ULONG_MAX;

>>>>> +	kbuf.top_down = false;

>>>>> +

>>>>> +	kbuf.buffer = kernel;

>>>>> +	kbuf.bufsz = kernel_len;

>>>>> +	kbuf.memsz = le64_to_cpu(h->image_size);

>>>>> +	text_offset = le64_to_cpu(h->text_offset);

>>>>> +	kbuf.buf_align = SZ_2M;

>>>>

>>>>> +	/* Adjust kernel segment with TEXT_OFFSET */

>>>>> +	kbuf.memsz += text_offset;

>>>>> +

>>>>> +	ret = kexec_add_buffer(&kbuf);

>>>>> +	if (ret)

>>>>> +		goto out;

>>>>> +

>>>>> +	image->arch.kern_segment = image->nr_segments - 1;

>>>>

>>>> You only seem to use kern_segment here, and in load_other_segments() called

>>>> below. Could it not be a local variable passed in? Instead of arch-specific data

>>>> we keep forever?

>>>

>>> No, kern_segment is also used in load_other_segments() in machine_kexec_file.c.

>>> To optimize memory hole allocation logic in locate_mem_hole_callback(),

>>> we need to know the exact range of kernel image (start and end).

>>

>> That's the second user. My badly-made point is one calls the other, but passes

>> the data via some until-kexec lifetime struct. (its not important, just an

>> indicator this worked differently in the past and hasn't been cleaned up).

>> I meant something like [0].

> 

> OK, but instead of adding kern_seg, I want to change the interface to:

> 

> | extern int load_other_segments(struct kimage *image,

> |		unsigned long kernel_load_addr, unsigned long kernel_size,

> |		char *initrd, unsigned long initrd_len,

> |		char *cmdline, unsigned long cmdline_len);

> 

> This way, we will in future be able to address an issue I mentioned in

> my previous e-mail. (If we support vmlinux, the kernel occupies two segments

> for text and data, respectively.)


Aha, its not from old-stuff, its for future-stuff!


James
AKASHI Takahiro May 21, 2018, 9:32 a.m. UTC | #6
James,

I haven't commented on this email.

On Tue, May 15, 2018 at 06:14:37PM +0100, James Morse wrote:
> Hi Akashi,

> 

> On 15/05/18 06:13, AKASHI Takahiro wrote:

> > On Fri, May 11, 2018 at 06:07:06PM +0100, James Morse wrote:

> >> On 07/05/18 08:21, AKASHI Takahiro wrote:

> >>> On Tue, May 01, 2018 at 06:46:11PM +0100, James Morse wrote:

> >>>> On 25/04/18 07:26, AKASHI Takahiro wrote:

> >>>>> This patch provides kexec_file_ops for "Image"-format kernel. In this

> >>>>> implementation, a binary is always loaded with a fixed offset identified

> >>>>> in text_offset field of its header.

> >>

> >>>>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h

> >>>>> index e4de1223715f..3cba4161818a 100644

> >>>>> --- a/arch/arm64/include/asm/kexec.h

> >>>>> +++ b/arch/arm64/include/asm/kexec.h

> 

> >>>> Could we check branch_code is non-zero, and text-offset points within image-size?

> >>>

> >>> We could do it, but I don't think this check is very useful.

> >>>

> >>>>

> >>>> We could check that this platform supports the page-size/endian config that this

> >>>> Image was built with... We get a message from the EFI stub if the page-size

> >>>> can't be supported, it would be nice to do the same here (as we can).

> >>>

> >>> There is no restriction on page-size or endianness for kexec.

> >>

> >> No, but it won't boot if the hardware doesn't support it. The kernel will spin

> >> at a magic address that is, difficult, to debug without JTAG. The bug report

> >> will be "it didn't boot".

> > 

> > OK.

> > Added sanity checks for cpu features, endianness as well as page size.

> > 

> >>

> >>> What will be the purpose of this check?

> >>

> >> These values are in the header so that the bootloader can check them, then print

> >> a meaningful error. Here, kexec_file_load() is playing the part of the bootloader.

> 

> >> I'm assuming kexec_file_load() can only be used to kexec linux... unlike regular

> >> kexec. Is this where I'm going wrong?

> 

> Trying to work this out for myself: we can't support any UEFI application as we

> can't give it the boot-services environment, so I'm pretty sure

> kexec_file_load() must be linux-specific.

> 

> Can we state somewhere that we only expect arm64 linux to be booted with

> kexec_file_load()? Its not clear from the kconfig text, which refers to kexec,

> which explicitly states it can boot other OS. But for kexec_file_load() we're

> following the kernel's booting.txt.


While I don't know anything about requirements in booting other OS's nor
if we can boot them even with kexec, I agree that kexec_file_load is a more
limited form of booting mechanism. I will add some statement in Kconfig.

> >>>>> diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c

> >>>>> new file mode 100644

> >>>>> index 000000000000..4dd524ad6611

> >>>>> --- /dev/null

> >>>>> +++ b/arch/arm64/kernel/kexec_image.c

> >>>>> @@ -0,0 +1,79 @@

> >>>>

> >>>>> +static void *image_load(struct kimage *image,

> >>>>> +				char *kernel, unsigned long kernel_len,

> >>>>> +				char *initrd, unsigned long initrd_len,

> >>>>> +				char *cmdline, unsigned long cmdline_len)

> >>>>> +{

> >>>>> +	struct kexec_buf kbuf;

> >>>>> +	struct arm64_image_header *h = (struct arm64_image_header *)kernel;

> >>>>> +	unsigned long text_offset;

> >>>>> +	int ret;

> >>>>> +

> >>>>> +	/* Load the kernel */

> >>>>> +	kbuf.image = image;

> >>>>> +	kbuf.buf_min = 0;

> >>>>> +	kbuf.buf_max = ULONG_MAX;

> >>>>> +	kbuf.top_down = false;

> >>>>> +

> >>>>> +	kbuf.buffer = kernel;

> >>>>> +	kbuf.bufsz = kernel_len;

> >>>>> +	kbuf.memsz = le64_to_cpu(h->image_size);

> >>>>> +	text_offset = le64_to_cpu(h->text_offset);

> >>>>> +	kbuf.buf_align = SZ_2M;

> >>>>

> >>>>> +	/* Adjust kernel segment with TEXT_OFFSET */

> >>>>> +	kbuf.memsz += text_offset;

> >>>>> +

> >>>>> +	ret = kexec_add_buffer(&kbuf);

> >>>>> +	if (ret)

> >>>>> +		goto out;

> >>>>> +

> >>>>> +	image->arch.kern_segment = image->nr_segments - 1;

> >>>>

> >>>> You only seem to use kern_segment here, and in load_other_segments() called

> >>>> below. Could it not be a local variable passed in? Instead of arch-specific data

> >>>> we keep forever?

> >>>

> >>> No, kern_segment is also used in load_other_segments() in machine_kexec_file.c.

> >>> To optimize memory hole allocation logic in locate_mem_hole_callback(),

> >>> we need to know the exact range of kernel image (start and end).

> >>

> >> That's the second user. My badly-made point is one calls the other, but passes

> >> the data via some until-kexec lifetime struct. (its not important, just an

> >> indicator this worked differently in the past and hasn't been cleaned up).

> >> I meant something like [0].

> > 

> > OK, but instead of adding kern_seg, I want to change the interface to:

> > 

> > | extern int load_other_segments(struct kimage *image,

> > |		unsigned long kernel_load_addr, unsigned long kernel_size,

> > |		char *initrd, unsigned long initrd_len,

> > |		char *cmdline, unsigned long cmdline_len);

> > 

> > This way, we will in future be able to address an issue I mentioned in

> > my previous e-mail. (If we support vmlinux, the kernel occupies two segments

> > for text and data, respectively.)

> 

> Aha, its not from old-stuff, its for future-stuff!


I have vmlinux patch, but it is very unlikely for me to submit it :)

Thanks,
-Takahiro AKASHI

> 

> James
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index e4de1223715f..3cba4161818a 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -102,6 +102,56 @@  struct kimage_arch {
 	void *dtb_buf;
 };
 
+/**
+ * struct arm64_image_header - arm64 kernel image header
+ *
+ * @pe_sig: Optional PE format 'MZ' signature
+ * @branch_code: Instruction to branch to stext
+ * @text_offset: Image load offset, little endian
+ * @image_size: Effective image size, little endian
+ * @flags:
+ *	Bit 0: Kernel endianness. 0=little endian, 1=big endian
+ * @reserved: Reserved
+ * @magic: Magic number, "ARM\x64"
+ * @pe_header: Optional offset to a PE format header
+ **/
+
+struct arm64_image_header {
+	u8 pe_sig[2];
+	u8 pad[2];
+	u32 branch_code;
+	u64 text_offset;
+	u64 image_size;
+	u64 flags;
+	u64 reserved[3];
+	u8 magic[4];
+	u32 pe_header;
+};
+
+static const u8 arm64_image_magic[4] = {'A', 'R', 'M', 0x64U};
+
+/**
+ * arm64_header_check_magic - Helper to check the arm64 image header.
+ *
+ * Returns non-zero if header is OK.
+ */
+
+static inline int arm64_header_check_magic(const struct arm64_image_header *h)
+{
+	if (!h)
+		return 0;
+
+	if (!h->text_offset)
+		return 0;
+
+	return (h->magic[0] == arm64_image_magic[0]
+		&& h->magic[1] == arm64_image_magic[1]
+		&& h->magic[2] == arm64_image_magic[2]
+		&& h->magic[3] == arm64_image_magic[3]);
+}
+
+extern const struct kexec_file_ops kexec_image_ops;
+
 struct kimage;
 
 extern int load_other_segments(struct kimage *image,
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2f2b2757ae7a..1e110aa571dd 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -50,7 +50,7 @@  arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
-arm64-obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o
+arm64-obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
 arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
new file mode 100644
index 000000000000..4dd524ad6611
--- /dev/null
+++ b/arch/arm64/kernel/kexec_image.c
@@ -0,0 +1,79 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kexec image loader
+
+ * Copyright (C) 2018 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ */
+
+#define pr_fmt(fmt)	"kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/byteorder.h>
+#include <asm/memory.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+	const struct arm64_image_header *h;
+
+	h = (const struct arm64_image_header *)(kernel_buf);
+
+	if ((kernel_len < sizeof(*h)) || !arm64_header_check_magic(h))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void *image_load(struct kimage *image,
+				char *kernel, unsigned long kernel_len,
+				char *initrd, unsigned long initrd_len,
+				char *cmdline, unsigned long cmdline_len)
+{
+	struct kexec_buf kbuf;
+	struct arm64_image_header *h = (struct arm64_image_header *)kernel;
+	unsigned long text_offset;
+	int ret;
+
+	/* Load the kernel */
+	kbuf.image = image;
+	kbuf.buf_min = 0;
+	kbuf.buf_max = ULONG_MAX;
+	kbuf.top_down = false;
+
+	kbuf.buffer = kernel;
+	kbuf.bufsz = kernel_len;
+	kbuf.memsz = le64_to_cpu(h->image_size);
+	text_offset = le64_to_cpu(h->text_offset);
+	kbuf.buf_align = SZ_2M;
+
+	/* Adjust kernel segment with TEXT_OFFSET */
+	kbuf.memsz += text_offset;
+
+	ret = kexec_add_buffer(&kbuf);
+	if (ret)
+		goto out;
+
+	image->arch.kern_segment = image->nr_segments - 1;
+	image->segment[image->arch.kern_segment].mem += text_offset;
+	image->segment[image->arch.kern_segment].memsz -= text_offset;
+	image->start = image->segment[image->arch.kern_segment].mem;
+
+	pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+				image->segment[image->arch.kern_segment].mem,
+				kbuf.bufsz, kbuf.memsz);
+
+	/* Load additional data */
+	ret = load_other_segments(image, initrd, initrd_len,
+				cmdline, cmdline_len);
+
+out:
+	return ERR_PTR(ret);
+}
+
+const struct kexec_file_ops kexec_image_ops = {
+	.probe = image_probe,
+	.load = image_load,
+};
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index b3b9b1725d8a..37c0a9dc2e47 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -23,6 +23,7 @@  static int __dt_root_addr_cells;
 static int __dt_root_size_cells;
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&kexec_image_ops,
 	NULL
 };