diff mbox

[v2,5/6] target-arm: support QMP dump-guest-memory

Message ID 1448411877-22019-6-git-send-email-drjones@redhat.com
State New
Headers show

Commit Message

Andrew Jones Nov. 25, 2015, 12:37 a.m. UTC
Add the support needed for creating prstatus elf notes. This
allows us to use QMP dump-guest-memory.

Signed-off-by: Andrew Jones <drjones@redhat.com>

---
 target-arm/Makefile.objs |   3 +-
 target-arm/arch_dump.c   | 230 +++++++++++++++++++++++++++++++++++++++++++++++
 target-arm/cpu-qom.h     |   5 ++
 target-arm/cpu.c         |   3 +
 4 files changed, 239 insertions(+), 2 deletions(-)
 create mode 100644 target-arm/arch_dump.c

-- 
2.4.3

Comments

Peter Maydell Dec. 3, 2015, 11:44 a.m. UTC | #1
On 25 November 2015 at 00:37, Andrew Jones <drjones@redhat.com> wrote:
> Add the support needed for creating prstatus elf notes. This

> allows us to use QMP dump-guest-memory.


> +

> +    if (is_a64(env)) {

> +        for (i = 0; i < 31; ++i) {

> +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);

> +        }

> +        note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);

> +        note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);

> +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate_read(env));

> +    } else {

> +        aarch64_sync_64_to_32(env);

> +        for (i = 0; i < 16; ++i) {

> +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->regs[i]);

> +        }

> +        note.prstatus.pr_reg.sp = note.prstatus.pr_reg.regs[13];

> +        note.prstatus.pr_reg.pc = note.prstatus.pr_reg.regs[15];

> +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, cpsr_read(env));

> +    }


This doesn't look right. sync_64_to_32 is copying the state held
in the 64-bit env->xregs etc into the 32-bit env->regs. But if we're
in 32-bit state then the true state is in the 32-bit fields and
this will trash it. You want to sync_32_to_64 here, and then the
code to write the values to the dump is the same either way
(except for pstate vs cpsr which we haven't managed to clean up
and unify yet, sadly).

I think you want
   uint64_t pstate;
   [...]

   if (!is_a64(env)) {
       aarch64_sync_32_to_64(env);
       pstate = cpsr_read(env);
   } else {
       pstate = pstate_read(env);
   }
   for (i = 0; i < 31; ++i) {
       note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);
   }
   note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);
   note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);
   note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate);

(Note that the 32-bit SP is not architecturally in X31;
it's in one of the other xregs, depending what mode the CPU
was in. For 32-bit userspace that will be USR and it's in X13.)


> +

> +    ret = f(&note, sizeof(note), s);

> +    if (ret < 0) {

> +        return -1;

> +    }

> +

> +    return 0;

> +}

> +

> +/* struct pt_regs from arch/arm/include/asm/ptrace.h */

> +struct arm_user_regs {

> +    uint32_t regs[17];

> +    char pad[4];

> +} QEMU_PACKED;

> +

> +QEMU_BUILD_BUG_ON(sizeof(struct arm_user_regs) != 72);

> +

> +/* struct elf_prstatus from include/uapi/linux/elfcore.h */

> +struct arm_elf_prstatus {

> +    char pad1[24]; /* 24 == offsetof(struct elf_prstatus, pr_pid) */

> +    uint32_t pr_pid;

> +    char pad2[44]; /* 44 == offsetof(struct elf_prstatus, pr_reg) -

> +                            offsetof(struct elf_prstatus, pr_ppid) */

> +    struct arm_user_regs pr_reg;

> +    int pr_fpvalid;

> +} QEMU_PACKED arm_elf_prstatus;

> +

> +QEMU_BUILD_BUG_ON(sizeof(struct arm_elf_prstatus) != 148);

> +

> +struct arm_note {

> +    Elf32_Nhdr hdr;

> +    char name[QEMU_ALIGN_UP(NOTE_NAMESZ, 4)];

> +    struct arm_elf_prstatus prstatus;

> +} QEMU_PACKED;

> +

> +QEMU_BUILD_BUG_ON(sizeof(struct arm_note) != 168);

> +

> +static int

> +arm_write_elf32_note(WriteCoreDumpFunction f, CPUARMState *env,

> +                     int id, DumpState *s)

> +{

> +    struct arm_note note;

> +    int ret, i;

> +

> +    memset(&note, 0, sizeof(note));

> +

> +    note.hdr.n_namesz = cpu_to_dump32(s, NOTE_NAMESZ);

> +    note.hdr.n_descsz = cpu_to_dump32(s, sizeof(note.prstatus));

> +    note.hdr.n_type = cpu_to_dump32(s, NT_PRSTATUS);

> +

> +    memcpy(note.name, NOTE_NAME, NOTE_NAMESZ);

> +    note.prstatus.pr_pid = cpu_to_dump32(s, id);

> +

> +    for (i = 0; i < 16; ++i) {

> +        note.prstatus.pr_reg.regs[i] = cpu_to_dump32(s, env->regs[i]);

> +    }

> +    note.prstatus.pr_reg.regs[16] = cpu_to_dump32(s, cpsr_read(env));

> +

> +    ret = f(&note, sizeof(note), s);

> +    if (ret < 0) {

> +        return -1;

> +    }

> +

> +    return 0;

> +}

> +

> +int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,

> +                             int cpuid, void *opaque)

> +{

> +    CPUARMState *env = &ARM_CPU(cs)->env;

> +    int ret;

> +

> +    if (arm_el_is_aa64(env, 1)) {

> +        ret = aarch64_write_elf64_note(f, env, cpuid, opaque);

> +    } else {

> +        ret = arm_write_elf32_note(f, env, cpuid, opaque);

> +    }


This might produce the wrong kind of dump if we're in EL2
or EL3 at the point we take it (can only happen in emulation
and only once we add EL2 and EL3 emulation support, which isn't
active yet). Do we care?

thanks
-- PMM
Andrew Jones Dec. 3, 2015, 6:55 p.m. UTC | #2
On Thu, Dec 03, 2015 at 11:44:05AM +0000, Peter Maydell wrote:
> On 25 November 2015 at 00:37, Andrew Jones <drjones@redhat.com> wrote:

> > Add the support needed for creating prstatus elf notes. This

> > allows us to use QMP dump-guest-memory.

> 

> > +

> > +    if (is_a64(env)) {

> > +        for (i = 0; i < 31; ++i) {

> > +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);

> > +        }

> > +        note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);

> > +        note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);

> > +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate_read(env));

> > +    } else {

> > +        aarch64_sync_64_to_32(env);

> > +        for (i = 0; i < 16; ++i) {

> > +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->regs[i]);

> > +        }

> > +        note.prstatus.pr_reg.sp = note.prstatus.pr_reg.regs[13];

> > +        note.prstatus.pr_reg.pc = note.prstatus.pr_reg.regs[15];

> > +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, cpsr_read(env));

> > +    }

> 

> This doesn't look right. sync_64_to_32 is copying the state held

> in the 64-bit env->xregs etc into the 32-bit env->regs. But if we're

> in 32-bit state then the true state is in the 32-bit fields and

> this will trash it. You want to sync_32_to_64 here, and then the

> code to write the values to the dump is the same either way

> (except for pstate vs cpsr which we haven't managed to clean up

> and unify yet, sadly).


Besides the unnecessary call to aarch64_sync_64_to_32(), then, for the
KVM case, the above code is correct. However, for the TCG case, I now
see why it's wrong.

The KVM case starts with 64-bit state, because this function is dealing
with 64-bit guest kernels. The TCG case, when userspace is running a
32-bit binary, starts with 32-bit state. In both cases we want to get
32-bit state into a 64-bit elf note. KVM needs aarch64_sync_64_to_32(),
which is actually already done by cpu_synchronize_all_states(), and
then to shoehorn the 32-bit registers into the 64-bit elf note, as done
above. TCG, on the other hand, doesn't need to sync any state, it just
needs to shoehorn. So the above aarch64_sync_64_to_32() call, which I
actually added *for* TCG (since I misunderstood your comment on v1),
actually makes it wrong. Needless to say, I didn't test TCG :-)

Now, to fix it, we could do what you have here below

> 

> I think you want

>    uint64_t pstate;

>    [...]

> 

>    if (!is_a64(env)) {

>        aarch64_sync_32_to_64(env);

>        pstate = cpsr_read(env);

>    } else {

>        pstate = pstate_read(env);

>    }

>    for (i = 0; i < 31; ++i) {

>        note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);

>    }


But, this adds an unnecessary aarch64_sync_32_to_64() call to the kvm
case (although it wouldn't hurt, as aarch64_sync_32_to_64 is the inverse
of aarch64_sync_64_to_32, which we've already done earlier). It also
always adds register values 16..30 to the elf note (which may not always
be zero in the 32-bit userspace case?). The way I have it above makes
sure those registers are zero in that case.

So, how about we just remove the aarch64_sync_64_to_32() from the code
I have above? Won't that make it work for both KVM and TCG?


>    note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);

>    note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);

>    note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate);

> 

> (Note that the 32-bit SP is not architecturally in X31;

> it's in one of the other xregs, depending what mode the CPU

> was in. For 32-bit userspace that will be USR and it's in X13.)


Yup, that's why I was pulling it from x13 in the above code. In your
version you can now use x31, due to the aarch64_sync_32_to_64().

Anyway, I'll actually test with TCG for v3.

> 

> 

> > +

> > +    ret = f(&note, sizeof(note), s);

> > +    if (ret < 0) {

> > +        return -1;

> > +    }

> > +

> > +    return 0;

> > +}

> > +

> > +/* struct pt_regs from arch/arm/include/asm/ptrace.h */

> > +struct arm_user_regs {

> > +    uint32_t regs[17];

> > +    char pad[4];

> > +} QEMU_PACKED;

> > +

> > +QEMU_BUILD_BUG_ON(sizeof(struct arm_user_regs) != 72);

> > +

> > +/* struct elf_prstatus from include/uapi/linux/elfcore.h */

> > +struct arm_elf_prstatus {

> > +    char pad1[24]; /* 24 == offsetof(struct elf_prstatus, pr_pid) */

> > +    uint32_t pr_pid;

> > +    char pad2[44]; /* 44 == offsetof(struct elf_prstatus, pr_reg) -

> > +                            offsetof(struct elf_prstatus, pr_ppid) */

> > +    struct arm_user_regs pr_reg;

> > +    int pr_fpvalid;

> > +} QEMU_PACKED arm_elf_prstatus;

> > +

> > +QEMU_BUILD_BUG_ON(sizeof(struct arm_elf_prstatus) != 148);

> > +

> > +struct arm_note {

> > +    Elf32_Nhdr hdr;

> > +    char name[QEMU_ALIGN_UP(NOTE_NAMESZ, 4)];

> > +    struct arm_elf_prstatus prstatus;

> > +} QEMU_PACKED;

> > +

> > +QEMU_BUILD_BUG_ON(sizeof(struct arm_note) != 168);

> > +

> > +static int

> > +arm_write_elf32_note(WriteCoreDumpFunction f, CPUARMState *env,

> > +                     int id, DumpState *s)

> > +{

> > +    struct arm_note note;

> > +    int ret, i;

> > +

> > +    memset(&note, 0, sizeof(note));

> > +

> > +    note.hdr.n_namesz = cpu_to_dump32(s, NOTE_NAMESZ);

> > +    note.hdr.n_descsz = cpu_to_dump32(s, sizeof(note.prstatus));

> > +    note.hdr.n_type = cpu_to_dump32(s, NT_PRSTATUS);

> > +

> > +    memcpy(note.name, NOTE_NAME, NOTE_NAMESZ);

> > +    note.prstatus.pr_pid = cpu_to_dump32(s, id);

> > +

> > +    for (i = 0; i < 16; ++i) {

> > +        note.prstatus.pr_reg.regs[i] = cpu_to_dump32(s, env->regs[i]);

> > +    }

> > +    note.prstatus.pr_reg.regs[16] = cpu_to_dump32(s, cpsr_read(env));

> > +

> > +    ret = f(&note, sizeof(note), s);

> > +    if (ret < 0) {

> > +        return -1;

> > +    }

> > +

> > +    return 0;

> > +}

> > +

> > +int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,

> > +                             int cpuid, void *opaque)

> > +{

> > +    CPUARMState *env = &ARM_CPU(cs)->env;

> > +    int ret;

> > +

> > +    if (arm_el_is_aa64(env, 1)) {

> > +        ret = aarch64_write_elf64_note(f, env, cpuid, opaque);

> > +    } else {

> > +        ret = arm_write_elf32_note(f, env, cpuid, opaque);

> > +    }

> 

> This might produce the wrong kind of dump if we're in EL2

> or EL3 at the point we take it (can only happen in emulation

> and only once we add EL2 and EL3 emulation support, which isn't

> active yet). Do we care?


"care" is loaded word :-) If I can tweak this in some easy way now to get
it ready for el2/el3 emulation, then I'm happy to do so. However, without
a test environment, and without strong motivation to use this feature on
emulation in the first place, then I'd rather not bother for the initial
introduction of it. We can always modify it later.

Thanks,
drew
Peter Maydell Dec. 3, 2015, 7:54 p.m. UTC | #3
On 3 December 2015 at 18:55, Andrew Jones <drjones@redhat.com> wrote:
> On Thu, Dec 03, 2015 at 11:44:05AM +0000, Peter Maydell wrote:

>> On 25 November 2015 at 00:37, Andrew Jones <drjones@redhat.com> wrote:

>> > Add the support needed for creating prstatus elf notes. This

>> > allows us to use QMP dump-guest-memory.

>>

>> > +

>> > +    if (is_a64(env)) {

>> > +        for (i = 0; i < 31; ++i) {

>> > +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);

>> > +        }

>> > +        note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);

>> > +        note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);

>> > +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate_read(env));

>> > +    } else {

>> > +        aarch64_sync_64_to_32(env);

>> > +        for (i = 0; i < 16; ++i) {

>> > +            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->regs[i]);

>> > +        }

>> > +        note.prstatus.pr_reg.sp = note.prstatus.pr_reg.regs[13];

>> > +        note.prstatus.pr_reg.pc = note.prstatus.pr_reg.regs[15];

>> > +        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, cpsr_read(env));

>> > +    }

>>

>> This doesn't look right. sync_64_to_32 is copying the state held

>> in the 64-bit env->xregs etc into the 32-bit env->regs. But if we're

>> in 32-bit state then the true state is in the 32-bit fields and

>> this will trash it. You want to sync_32_to_64 here, and then the

>> code to write the values to the dump is the same either way

>> (except for pstate vs cpsr which we haven't managed to clean up

>> and unify yet, sadly).

>

> Besides the unnecessary call to aarch64_sync_64_to_32(), then, for the

> KVM case, the above code is correct. However, for the TCG case, I now

> see why it's wrong.

>

> The KVM case starts with 64-bit state, because this function is dealing

> with 64-bit guest kernels. The TCG case, when userspace is running a

> 32-bit binary, starts with 32-bit state. In both cases we want to get

> 32-bit state into a 64-bit elf note. KVM needs aarch64_sync_64_to_32(),

> which is actually already done by cpu_synchronize_all_states(), and

> then to shoehorn the 32-bit registers into the 64-bit elf note, as done

> above. TCG, on the other hand, doesn't need to sync any state, it just

> needs to shoehorn. So the above aarch64_sync_64_to_32() call, which I

> actually added *for* TCG (since I misunderstood your comment on v1),

> actually makes it wrong. Needless to say, I didn't test TCG :-)

>

> Now, to fix it, we could do what you have here below

>

>>

>> I think you want

>>    uint64_t pstate;

>>    [...]

>>

>>    if (!is_a64(env)) {

>>        aarch64_sync_32_to_64(env);

>>        pstate = cpsr_read(env);

>>    } else {

>>        pstate = pstate_read(env);

>>    }

>>    for (i = 0; i < 31; ++i) {

>>        note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);

>>    }

>

> But, this adds an unnecessary aarch64_sync_32_to_64() call to the kvm

> case (although it wouldn't hurt, as aarch64_sync_32_to_64 is the inverse

> of aarch64_sync_64_to_32, which we've already done earlier). It also

> always adds register values 16..30 to the elf note (which may not always

> be zero in the 32-bit userspace case?). The way I have it above makes

> sure those registers are zero in that case.


If you do that then you'll lose the information about the other
32-bit registers (the svc/irq/etc banked versions). Those aren't
relevant if the 32-bit code is in usermode, but probably you want
them if you're doing a dump of a 64-bit (emulated) hypervisor
that happens to be running a 32-bit guest kernel at point of dump.

> So, how about we just remove the aarch64_sync_64_to_32() from the code

> I have above? Won't that make it work for both KVM and TCG?

>

>

>>    note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);

>>    note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);

>>    note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate);

>>

>> (Note that the 32-bit SP is not architecturally in X31;

>> it's in one of the other xregs, depending what mode the CPU

>> was in. For 32-bit userspace that will be USR and it's in X13.)

>

> Yup, that's why I was pulling it from x13 in the above code. In your

> version you can now use x31, due to the aarch64_sync_32_to_64().


Note that sync_32_to_64 does not copy regs[13] into x31, which was
my point. In a 64-bit-format dump of a VM that happens to be
running 32 bit code you should not expect pstate.sp to be the
32-bit process's SP...

>> > +int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,

>> > +                             int cpuid, void *opaque)

>> > +{

>> > +    CPUARMState *env = &ARM_CPU(cs)->env;

>> > +    int ret;

>> > +

>> > +    if (arm_el_is_aa64(env, 1)) {

>> > +        ret = aarch64_write_elf64_note(f, env, cpuid, opaque);

>> > +    } else {

>> > +        ret = arm_write_elf32_note(f, env, cpuid, opaque);

>> > +    }

>>

>> This might produce the wrong kind of dump if we're in EL2

>> or EL3 at the point we take it (can only happen in emulation

>> and only once we add EL2 and EL3 emulation support, which isn't

>> active yet). Do we care?

>

> "care" is loaded word :-) If I can tweak this in some easy way now to get

> it ready for el2/el3 emulation, then I'm happy to do so. However, without

> a test environment, and without strong motivation to use this feature on

> emulation in the first place, then I'd rather not bother for the initial

> introduction of it. We can always modify it later.


For this test I think you can just say
  if (arm_feature(env, ARM_FEATURE_AARCH64)) {

which basically says "64-bit dump if the CPU supports 64-bit" (32-bit
KVM VMs won't have this feature bit). The other awkward part is
figuring out which endianness to use. I think there we can just put
in a comment
    /* We assume the relevant endianness is that of EL1; this is right
     * for kernels but might give the wrong answer if you're trying to
     * take a dump of a hypervisor that happens currently to be running
     * a wrong-endian kernel.
     */
and leave it for somebody who cares to try to figure out the right
semantics.

thanks
-- PMM
diff mbox

Patch

diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index 9460b409a5a1c..a80eb39743a78 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -1,5 +1,5 @@ 
 obj-y += arm-semi.o
-obj-$(CONFIG_SOFTMMU) += machine.o
+obj-$(CONFIG_SOFTMMU) += machine.o psci.o arch_dump.o
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(call land,$(CONFIG_KVM),$(call lnot,$(TARGET_AARCH64))) += kvm32.o
 obj-$(call land,$(CONFIG_KVM),$(TARGET_AARCH64)) += kvm64.o
@@ -7,6 +7,5 @@  obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
-obj-$(CONFIG_SOFTMMU) += psci.o
 obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
 obj-y += crypto_helper.o
diff --git a/target-arm/arch_dump.c b/target-arm/arch_dump.c
new file mode 100644
index 0000000000000..5debe549d721d
--- /dev/null
+++ b/target-arm/arch_dump.c
@@ -0,0 +1,230 @@ 
+/* Support for writing ELF notes for ARM architectures
+ *
+ * Copyright (C) 2015 Red Hat Inc.
+ *
+ * Author: Andrew Jones <drjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "elf.h"
+#include "sysemu/dump.h"
+
+#define NOTE_NAME       "CORE"
+#define NOTE_NAMESZ     5
+
+/* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */
+struct aarch64_user_regs {
+    uint64_t regs[31];
+    uint64_t sp;
+    uint64_t pc;
+    uint64_t pstate;
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_regs) != 272);
+
+/* struct elf_prstatus from include/uapi/linux/elfcore.h */
+struct aarch64_elf_prstatus {
+    char pad1[32]; /* 32 == offsetof(struct elf_prstatus, pr_pid) */
+    uint32_t pr_pid;
+    char pad2[76]; /* 76 == offsetof(struct elf_prstatus, pr_reg) -
+                            offsetof(struct elf_prstatus, pr_ppid) */
+    struct aarch64_user_regs pr_reg;
+    int pr_fpvalid;
+    char pad3[4];
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct aarch64_elf_prstatus) != 392);
+
+struct aarch64_note {
+    Elf64_Nhdr hdr;
+    char name[QEMU_ALIGN_UP(NOTE_NAMESZ, 4)];
+    struct aarch64_elf_prstatus prstatus;
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct aarch64_note) != 412);
+
+static int
+aarch64_write_elf64_note(WriteCoreDumpFunction f, CPUARMState *env,
+                         int id, DumpState *s)
+{
+    struct aarch64_note note;
+    int ret, i;
+
+    memset(&note, 0, sizeof(note));
+
+    note.hdr.n_namesz = cpu_to_dump32(s, NOTE_NAMESZ);
+    note.hdr.n_descsz = cpu_to_dump32(s, sizeof(note.prstatus));
+    note.hdr.n_type = cpu_to_dump32(s, NT_PRSTATUS);
+
+    memcpy(note.name, NOTE_NAME, NOTE_NAMESZ);
+    note.prstatus.pr_pid = cpu_to_dump32(s, id);
+
+    if (is_a64(env)) {
+        for (i = 0; i < 31; ++i) {
+            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->xregs[i]);
+        }
+        note.prstatus.pr_reg.sp = cpu_to_dump64(s, env->xregs[31]);
+        note.prstatus.pr_reg.pc = cpu_to_dump64(s, env->pc);
+        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, pstate_read(env));
+    } else {
+        aarch64_sync_64_to_32(env);
+        for (i = 0; i < 16; ++i) {
+            note.prstatus.pr_reg.regs[i] = cpu_to_dump64(s, env->regs[i]);
+        }
+        note.prstatus.pr_reg.sp = note.prstatus.pr_reg.regs[13];
+        note.prstatus.pr_reg.pc = note.prstatus.pr_reg.regs[15];
+        note.prstatus.pr_reg.pstate = cpu_to_dump64(s, cpsr_read(env));
+    }
+
+    ret = f(&note, sizeof(note), s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* struct pt_regs from arch/arm/include/asm/ptrace.h */
+struct arm_user_regs {
+    uint32_t regs[17];
+    char pad[4];
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct arm_user_regs) != 72);
+
+/* struct elf_prstatus from include/uapi/linux/elfcore.h */
+struct arm_elf_prstatus {
+    char pad1[24]; /* 24 == offsetof(struct elf_prstatus, pr_pid) */
+    uint32_t pr_pid;
+    char pad2[44]; /* 44 == offsetof(struct elf_prstatus, pr_reg) -
+                            offsetof(struct elf_prstatus, pr_ppid) */
+    struct arm_user_regs pr_reg;
+    int pr_fpvalid;
+} QEMU_PACKED arm_elf_prstatus;
+
+QEMU_BUILD_BUG_ON(sizeof(struct arm_elf_prstatus) != 148);
+
+struct arm_note {
+    Elf32_Nhdr hdr;
+    char name[QEMU_ALIGN_UP(NOTE_NAMESZ, 4)];
+    struct arm_elf_prstatus prstatus;
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct arm_note) != 168);
+
+static int
+arm_write_elf32_note(WriteCoreDumpFunction f, CPUARMState *env,
+                     int id, DumpState *s)
+{
+    struct arm_note note;
+    int ret, i;
+
+    memset(&note, 0, sizeof(note));
+
+    note.hdr.n_namesz = cpu_to_dump32(s, NOTE_NAMESZ);
+    note.hdr.n_descsz = cpu_to_dump32(s, sizeof(note.prstatus));
+    note.hdr.n_type = cpu_to_dump32(s, NT_PRSTATUS);
+
+    memcpy(note.name, NOTE_NAME, NOTE_NAMESZ);
+    note.prstatus.pr_pid = cpu_to_dump32(s, id);
+
+    for (i = 0; i < 16; ++i) {
+        note.prstatus.pr_reg.regs[i] = cpu_to_dump32(s, env->regs[i]);
+    }
+    note.prstatus.pr_reg.regs[16] = cpu_to_dump32(s, cpsr_read(env));
+
+    ret = f(&note, sizeof(note), s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
+                             int cpuid, void *opaque)
+{
+    CPUARMState *env = &ARM_CPU(cs)->env;
+    int ret;
+
+    if (arm_el_is_aa64(env, 1)) {
+        ret = aarch64_write_elf64_note(f, env, cpuid, opaque);
+    } else {
+        ret = arm_write_elf32_note(f, env, cpuid, opaque);
+    }
+    return ret;
+}
+
+int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                             int cpuid, void *opaque)
+{
+    return arm_write_elf32_note(f, &ARM_CPU(cs)->env, cpuid, opaque);
+}
+
+int cpu_get_dump_info(ArchDumpInfo *info,
+                      const GuestPhysBlockList *guest_phys_blocks)
+{
+    ARMCPU *cpu = ARM_CPU(first_cpu);
+    CPUARMState *env = &cpu->env;
+    GuestPhysBlock *block;
+    hwaddr lowest_addr = ULLONG_MAX;
+
+    /* Take a best guess at the phys_base. If we get it wrong then crash
+     * will need '--machdep phys_offset=<phys-offset>' added to its command
+     * line, which isn't any worse than assuming we can use zero, but being
+     * wrong. This is the same algorithm the crash utility uses when
+     * attempting to guess as it loads non-dumpfile formatted files.
+     */
+    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
+        if (block->target_start < lowest_addr) {
+            lowest_addr = block->target_start;
+        }
+    }
+
+    if (arm_el_is_aa64(env, 1)) {
+        info->d_machine = EM_AARCH64;
+        info->d_class = ELFCLASS64;
+        info->page_size = (1 << 16); /* aarch64 max pagesize */
+        if (lowest_addr != ULLONG_MAX) {
+            info->phys_base = lowest_addr;
+        }
+    } else {
+        info->d_machine = EM_ARM;
+        info->d_class = ELFCLASS32;
+        info->page_size = (1 << 12);
+        if (lowest_addr < UINT_MAX) {
+            info->phys_base = lowest_addr;
+        }
+    }
+
+    info->d_endian = (env->cp15.sctlr_el[1] & SCTLR_EE) != 0
+                     ? ELFDATA2MSB : ELFDATA2LSB;
+
+    return 0;
+}
+
+ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
+{
+    size_t note_size;
+
+    if (class == ELFCLASS64) {
+        note_size = sizeof(struct aarch64_note);
+    } else {
+        note_size = sizeof(struct arm_note);
+    }
+
+    return note_size * nr_cpus;
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 25fb1ce0f3f3d..5bd9b7bb9fa7e 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -221,6 +221,11 @@  hwaddr arm_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int arm_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 
+int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
+                             int cpuid, void *opaque);
+int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                             int cpuid, void *opaque);
+
 /* Callback functions for the generic timer's timers. */
 void arm_gt_ptimer_cb(void *opaque);
 void arm_gt_vtimer_cb(void *opaque);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 30739fc0dfa74..db91a3f9eb467 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -1428,6 +1428,9 @@  static void arm_cpu_class_init(ObjectClass *oc, void *data)
 
     cc->disas_set_info = arm_disas_set_info;
 
+    cc->write_elf64_note = arm_cpu_write_elf64_note;
+    cc->write_elf32_note = arm_cpu_write_elf32_note;
+
     /*
      * Reason: arm_cpu_initfn() calls cpu_exec_init(), which saves
      * the object in cpus -> dangling pointer after final