Message ID | 20240504000521.314531-2-thiago.bauermann@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Add support for AArch64 MOPS instructions | expand |
On Sat, 4 May 2024 at 02:05, Thiago Jung Bauermann <thiago.bauermann@linaro.org> wrote: > > The AArch64 MOPS (Memory Operation) instructions provide a standardised > instruction sequence to perform a memset, memcpy or memmove. A sequence is > always composed of three instructions: a prologue instruction, a main > instruction and an epilogue instruction. As an illustration, here are the > implementations of these memory operations in glibc 2.39: > > (gdb) disassemble/r > Dump of assembler code for function __memset_mops: > => 0x0000fffff7e8d780 <+0>: d503201f nop > 0x0000fffff7e8d784 <+4>: aa0003e3 mov x3, x0 > 0x0000fffff7e8d788 <+8>: 19c10443 setp [x3]!, x2!, x1 > 0x0000fffff7e8d78c <+12>: 19c14443 setm [x3]!, x2!, x1 > 0x0000fffff7e8d790 <+16>: 19c18443 sete [x3]!, x2!, x1 > 0x0000fffff7e8d794 <+20>: d65f03c0 ret > End of assembler dump. > > (gdb) disassemble/r > Dump of assembler code for function __memcpy_mops: > => 0x0000fffff7e8c580 <+0>: d503201f nop > 0x0000fffff7e8c584 <+4>: aa0003e3 mov x3, x0 > 0x0000fffff7e8c588 <+8>: 19010443 cpyfp [x3]!, [x1]!, x2! > 0x0000fffff7e8c58c <+12>: 19410443 cpyfm [x3]!, [x1]!, x2! > 0x0000fffff7e8c590 <+16>: 19810443 cpyfe [x3]!, [x1]!, x2! > 0x0000fffff7e8c594 <+20>: d65f03c0 ret > End of assembler dump. > > (gdb) disassemble/r > Dump of assembler code for function __memmove_mops: > => 0x0000fffff7e8d180 <+0>: d503201f nop > 0x0000fffff7e8d184 <+4>: aa0003e3 mov x3, x0 > 0x0000fffff7e8d188 <+8>: 1d010443 cpyp [x3]!, [x1]!, x2! > 0x0000fffff7e8d18c <+12>: 1d410443 cpym [x3]!, [x1]!, x2! > 0x0000fffff7e8d190 <+16>: 1d810443 cpye [x3]!, [x1]!, x2! > 0x0000fffff7e8d194 <+20>: d65f03c0 ret > End of assembler dump. > > The Arm Architecture Reference Manual says that "the prologue, main, and > epilogue instructions are expected to be run in succession and to appear > consecutively in memory". Therefore GDB needs to treat them as an atomic > instruction sequence, and also can't do displaced stepping with them. > > This patch implements support for executing the sequence atomically, and > also disables displaced step on them. > > PR tdep/31666 > Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31666 > --- > gdb/aarch64-tdep.c | 107 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 105 insertions(+), 2 deletions(-) > > diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c > index 8d0553f3d7cd..e920cea49066 100644 > --- a/gdb/aarch64-tdep.c > +++ b/gdb/aarch64-tdep.c > @@ -3444,6 +3444,104 @@ value_of_aarch64_user_reg (const frame_info_ptr &frame, const void *baton) > return value_of_register (*reg_p, get_next_frame_sentinel_okay (frame)); > } > > +/* Single step through MOPS instruction sequences on AArch64. */ > + > +static std::vector<CORE_ADDR> > +aarch64_software_single_step_mops (struct regcache *regcache, CORE_ADDR loc, > + uint32_t insn) > +{ > + const int insn_size = 4; > + struct gdbarch *gdbarch = regcache->arch (); > + enum bfd_endian byte_order_for_code = gdbarch_byte_order_for_code (gdbarch); > + uint8_t o0 = bit (insn, 21); > + uint8_t op1 = bits (insn, 22, 23); > + uint8_t op2 = bits (insn, 12, 15); > + > + /* Look for the prologue instruction that begins the sequence. */ > + > + /* CPYFP* */ > + if (!((o0 == 0 && op1 == 0) > + /* SETP* */ > + || (o0 == 0 && op1 == 3 && op2 < 4) > + /* CPYP* */ > + || (o0 == 1 && op1 == 0) > + /* SETGP* */ > + || (o0 == 1 && op1 == 3 && op2 < 4))) > + /* Prologue instruction not found. */ > + return {}; > + > + /* Now look for the main instruction in the middle of the sequence. */ > + > + loc += insn_size; > + ULONGEST insn_from_memory; > + if (!safe_read_memory_unsigned_integer (loc, insn_size, > + byte_order_for_code, > + &insn_from_memory)) > + { > + /* Assume we don't have a MOPS sequence, as we couldn't read the > + instruction in this location. */ > + return {}; > + } > + > + insn = insn_from_memory; > + aarch64_inst inst; > + if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0) > + return {}; > + if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) > + return {}; > + > + o0 = bit (insn, 21); > + op1 = bits (insn, 22, 23); > + op2 = bits (insn, 12, 15); > + > + /* CPYFM* */ > + if (!((o0 == 0 && op1 == 1) > + /* SETM* */ > + || (o0 == 0 && op1 == 3 && op2 >= 4 && op2 < 8) > + /* CPYM* */ > + || (o0 == 1 && op1 == 1) > + /* SETGM* */ > + || (o0 == 1 && op1 == 3 && op2 >= 4 && op2 < 8))) > + /* Main instruction not found. */ > + return {}; > + > + /* Now look for the epilogue instruction that ends the sequence. */ > + > + loc += insn_size; > + if (!safe_read_memory_unsigned_integer (loc, insn_size, > + byte_order_for_code, > + &insn_from_memory)) > + { > + /* Assume we don't have a MOPS sequence, as we couldn't read the > + instruction in this location. */ > + return {}; > + } > + > + insn = insn_from_memory; > + if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0) > + return {}; > + if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) > + return {}; > + > + o0 = bit (insn, 21); > + op1 = bits (insn, 22, 23); > + op2 = bits (insn, 12, 15); > + > + /* CPYFE* */ > + if (!((o0 == 0 && op1 == 2) > + /* SETE* (op2 >= 12 is unallocated space) */ > + || (o0 == 0 && op1 == 3 && op2 >= 8 && op2 < 12) > + /* CPYE* */ > + || (o0 == 1 && op1 == 2) > + /* SETGE* (op2 >= 12 is unallocated space) */ > + || (o0 == 1 && op1 == 3 && op2 >= 8 && op2 < 12))) > + /* Epilogue instruction not found. */ > + return {}; > + > + /* Insert breakpoint after the end of the atomic sequence. */ > + return { loc + insn_size }; > +} > + > /* Implement the "software_single_step" gdbarch method, needed to > single step through atomic sequences on AArch64. */ > > @@ -3479,6 +3577,9 @@ aarch64_software_single_step (struct regcache *regcache) > if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0) > return {}; > > + if (AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) > + return aarch64_software_single_step_mops (regcache, loc, insn); > + > /* Look for a Load Exclusive instruction which begins the sequence. */ > if (inst.opcode->iclass != ldstexcl || bit (insn, 22) == 0) > return {}; > @@ -3808,8 +3909,10 @@ aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch, > if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0) > return NULL; > > - /* Look for a Load Exclusive instruction which begins the sequence. */ > - if (inst.opcode->iclass == ldstexcl && bit (insn, 22)) > + /* Look for a Load Exclusive instruction which begins the sequence, > + or for a MOPS instruction. */ > + if ((inst.opcode->iclass == ldstexcl && bit (insn, 22)) > + || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) Sorry for the naive question: doesn't this check that the CPU supports MOPS, rather than the instruction sequence is a MOPS one? Thanks, Christophe > { > /* We can't displaced step atomic sequences. */ > return NULL;
Hello Christophe, Thanks for the patch review! Christophe Lyon <christophe.lyon@linaro.org> writes: > On Sat, 4 May 2024 at 02:05, Thiago Jung Bauermann > <thiago.bauermann@linaro.org> wrote: >> >> @@ -3808,8 +3909,10 @@ aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch, >> if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0) >> return NULL; >> >> - /* Look for a Load Exclusive instruction which begins the sequence. */ >> - if (inst.opcode->iclass == ldstexcl && bit (insn, 22)) >> + /* Look for a Load Exclusive instruction which begins the sequence, >> + or for a MOPS instruction. */ >> + if ((inst.opcode->iclass == ldstexcl && bit (insn, 22)) >> + || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) > > Sorry for the naive question: doesn't this check that the CPU supports > MOPS, rather than the instruction sequence is a MOPS one? It's an interesting question actually, at least for me because it's my first time calling opcode functions. You're right that AARCH64_CPU_HAS_FEATURE checks whether the given CPU supports the given feature. But the call above uses the inst.opcode->avariant "CPU": /* Which architecture variant provides this instruction. */ const aarch64_feature_set *avariant; For MOPS instructions that field points to aarch64_feature_mops, defined in opcodes/aarch64-tbl.h as: static const aarch64_feature_set aarch64_feature_mops = AARCH64_FEATURE (MOPS); Which defines a CPU containing only the MOPS feature. I also checked — just to be sure — that whether or not the VM's CPU support the MOPS feature, the above description holds and the check returns true for a MOPS instruction and false otherwise. >> { >> /* We can't displaced step atomic sequences. */ >> return NULL;
diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c index 8d0553f3d7cd..e920cea49066 100644 --- a/gdb/aarch64-tdep.c +++ b/gdb/aarch64-tdep.c @@ -3444,6 +3444,104 @@ value_of_aarch64_user_reg (const frame_info_ptr &frame, const void *baton) return value_of_register (*reg_p, get_next_frame_sentinel_okay (frame)); } +/* Single step through MOPS instruction sequences on AArch64. */ + +static std::vector<CORE_ADDR> +aarch64_software_single_step_mops (struct regcache *regcache, CORE_ADDR loc, + uint32_t insn) +{ + const int insn_size = 4; + struct gdbarch *gdbarch = regcache->arch (); + enum bfd_endian byte_order_for_code = gdbarch_byte_order_for_code (gdbarch); + uint8_t o0 = bit (insn, 21); + uint8_t op1 = bits (insn, 22, 23); + uint8_t op2 = bits (insn, 12, 15); + + /* Look for the prologue instruction that begins the sequence. */ + + /* CPYFP* */ + if (!((o0 == 0 && op1 == 0) + /* SETP* */ + || (o0 == 0 && op1 == 3 && op2 < 4) + /* CPYP* */ + || (o0 == 1 && op1 == 0) + /* SETGP* */ + || (o0 == 1 && op1 == 3 && op2 < 4))) + /* Prologue instruction not found. */ + return {}; + + /* Now look for the main instruction in the middle of the sequence. */ + + loc += insn_size; + ULONGEST insn_from_memory; + if (!safe_read_memory_unsigned_integer (loc, insn_size, + byte_order_for_code, + &insn_from_memory)) + { + /* Assume we don't have a MOPS sequence, as we couldn't read the + instruction in this location. */ + return {}; + } + + insn = insn_from_memory; + aarch64_inst inst; + if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0) + return {}; + if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) + return {}; + + o0 = bit (insn, 21); + op1 = bits (insn, 22, 23); + op2 = bits (insn, 12, 15); + + /* CPYFM* */ + if (!((o0 == 0 && op1 == 1) + /* SETM* */ + || (o0 == 0 && op1 == 3 && op2 >= 4 && op2 < 8) + /* CPYM* */ + || (o0 == 1 && op1 == 1) + /* SETGM* */ + || (o0 == 1 && op1 == 3 && op2 >= 4 && op2 < 8))) + /* Main instruction not found. */ + return {}; + + /* Now look for the epilogue instruction that ends the sequence. */ + + loc += insn_size; + if (!safe_read_memory_unsigned_integer (loc, insn_size, + byte_order_for_code, + &insn_from_memory)) + { + /* Assume we don't have a MOPS sequence, as we couldn't read the + instruction in this location. */ + return {}; + } + + insn = insn_from_memory; + if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0) + return {}; + if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) + return {}; + + o0 = bit (insn, 21); + op1 = bits (insn, 22, 23); + op2 = bits (insn, 12, 15); + + /* CPYFE* */ + if (!((o0 == 0 && op1 == 2) + /* SETE* (op2 >= 12 is unallocated space) */ + || (o0 == 0 && op1 == 3 && op2 >= 8 && op2 < 12) + /* CPYE* */ + || (o0 == 1 && op1 == 2) + /* SETGE* (op2 >= 12 is unallocated space) */ + || (o0 == 1 && op1 == 3 && op2 >= 8 && op2 < 12))) + /* Epilogue instruction not found. */ + return {}; + + /* Insert breakpoint after the end of the atomic sequence. */ + return { loc + insn_size }; +} + /* Implement the "software_single_step" gdbarch method, needed to single step through atomic sequences on AArch64. */ @@ -3479,6 +3577,9 @@ aarch64_software_single_step (struct regcache *regcache) if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0) return {}; + if (AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) + return aarch64_software_single_step_mops (regcache, loc, insn); + /* Look for a Load Exclusive instruction which begins the sequence. */ if (inst.opcode->iclass != ldstexcl || bit (insn, 22) == 0) return {}; @@ -3808,8 +3909,10 @@ aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch, if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0) return NULL; - /* Look for a Load Exclusive instruction which begins the sequence. */ - if (inst.opcode->iclass == ldstexcl && bit (insn, 22)) + /* Look for a Load Exclusive instruction which begins the sequence, + or for a MOPS instruction. */ + if ((inst.opcode->iclass == ldstexcl && bit (insn, 22)) + || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS)) { /* We can't displaced step atomic sequences. */ return NULL;