diff mbox series

[1/5] gdb/aarch64: Implement software single stepping for MOPS instructions

Message ID 20240504000521.314531-2-thiago.bauermann@linaro.org
State Superseded
Headers show
Series Add support for AArch64 MOPS instructions | expand

Commit Message

Thiago Jung Bauermann May 4, 2024, 12:05 a.m. UTC
The AArch64 MOPS (Memory Operation) instructions provide a standardised
instruction sequence to perform a memset, memcpy or memmove.  A sequence is
always composed of three instructions: a prologue instruction, a main
instruction and an epilogue instruction.  As an illustration, here are the
implementations of these memory operations in glibc 2.39:

  (gdb) disassemble/r
  Dump of assembler code for function __memset_mops:
  => 0x0000fffff7e8d780 <+0>:     d503201f        nop
     0x0000fffff7e8d784 <+4>:     aa0003e3        mov     x3, x0
     0x0000fffff7e8d788 <+8>:     19c10443        setp    [x3]!, x2!, x1
     0x0000fffff7e8d78c <+12>:    19c14443        setm    [x3]!, x2!, x1
     0x0000fffff7e8d790 <+16>:    19c18443        sete    [x3]!, x2!, x1
     0x0000fffff7e8d794 <+20>:    d65f03c0        ret
  End of assembler dump.

  (gdb) disassemble/r
  Dump of assembler code for function __memcpy_mops:
  => 0x0000fffff7e8c580 <+0>:     d503201f        nop
     0x0000fffff7e8c584 <+4>:     aa0003e3        mov     x3, x0
     0x0000fffff7e8c588 <+8>:     19010443        cpyfp   [x3]!, [x1]!, x2!
     0x0000fffff7e8c58c <+12>:    19410443        cpyfm   [x3]!, [x1]!, x2!
     0x0000fffff7e8c590 <+16>:    19810443        cpyfe   [x3]!, [x1]!, x2!
     0x0000fffff7e8c594 <+20>:    d65f03c0        ret
  End of assembler dump.

  (gdb) disassemble/r
  Dump of assembler code for function __memmove_mops:
  => 0x0000fffff7e8d180 <+0>:     d503201f        nop
     0x0000fffff7e8d184 <+4>:     aa0003e3        mov     x3, x0
     0x0000fffff7e8d188 <+8>:     1d010443        cpyp    [x3]!, [x1]!, x2!
     0x0000fffff7e8d18c <+12>:    1d410443        cpym    [x3]!, [x1]!, x2!
     0x0000fffff7e8d190 <+16>:    1d810443        cpye    [x3]!, [x1]!, x2!
     0x0000fffff7e8d194 <+20>:    d65f03c0        ret
  End of assembler dump.

The Arm Architecture Reference Manual says that "the prologue, main, and
epilogue instructions are expected to be run in succession and to appear
consecutively in memory".  Therefore GDB needs to treat them as an atomic
instruction sequence, and also can't do displaced stepping with them.

This patch implements support for executing the sequence atomically, and
also disables displaced step on them.

PR tdep/31666
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31666
---
 gdb/aarch64-tdep.c | 107 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

Comments

Christophe Lyon May 6, 2024, 9:29 a.m. UTC | #1
On Sat, 4 May 2024 at 02:05, Thiago Jung Bauermann
<thiago.bauermann@linaro.org> wrote:
>
> The AArch64 MOPS (Memory Operation) instructions provide a standardised
> instruction sequence to perform a memset, memcpy or memmove.  A sequence is
> always composed of three instructions: a prologue instruction, a main
> instruction and an epilogue instruction.  As an illustration, here are the
> implementations of these memory operations in glibc 2.39:
>
>   (gdb) disassemble/r
>   Dump of assembler code for function __memset_mops:
>   => 0x0000fffff7e8d780 <+0>:     d503201f        nop
>      0x0000fffff7e8d784 <+4>:     aa0003e3        mov     x3, x0
>      0x0000fffff7e8d788 <+8>:     19c10443        setp    [x3]!, x2!, x1
>      0x0000fffff7e8d78c <+12>:    19c14443        setm    [x3]!, x2!, x1
>      0x0000fffff7e8d790 <+16>:    19c18443        sete    [x3]!, x2!, x1
>      0x0000fffff7e8d794 <+20>:    d65f03c0        ret
>   End of assembler dump.
>
>   (gdb) disassemble/r
>   Dump of assembler code for function __memcpy_mops:
>   => 0x0000fffff7e8c580 <+0>:     d503201f        nop
>      0x0000fffff7e8c584 <+4>:     aa0003e3        mov     x3, x0
>      0x0000fffff7e8c588 <+8>:     19010443        cpyfp   [x3]!, [x1]!, x2!
>      0x0000fffff7e8c58c <+12>:    19410443        cpyfm   [x3]!, [x1]!, x2!
>      0x0000fffff7e8c590 <+16>:    19810443        cpyfe   [x3]!, [x1]!, x2!
>      0x0000fffff7e8c594 <+20>:    d65f03c0        ret
>   End of assembler dump.
>
>   (gdb) disassemble/r
>   Dump of assembler code for function __memmove_mops:
>   => 0x0000fffff7e8d180 <+0>:     d503201f        nop
>      0x0000fffff7e8d184 <+4>:     aa0003e3        mov     x3, x0
>      0x0000fffff7e8d188 <+8>:     1d010443        cpyp    [x3]!, [x1]!, x2!
>      0x0000fffff7e8d18c <+12>:    1d410443        cpym    [x3]!, [x1]!, x2!
>      0x0000fffff7e8d190 <+16>:    1d810443        cpye    [x3]!, [x1]!, x2!
>      0x0000fffff7e8d194 <+20>:    d65f03c0        ret
>   End of assembler dump.
>
> The Arm Architecture Reference Manual says that "the prologue, main, and
> epilogue instructions are expected to be run in succession and to appear
> consecutively in memory".  Therefore GDB needs to treat them as an atomic
> instruction sequence, and also can't do displaced stepping with them.
>
> This patch implements support for executing the sequence atomically, and
> also disables displaced step on them.
>
> PR tdep/31666
> Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31666
> ---
>  gdb/aarch64-tdep.c | 107 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 105 insertions(+), 2 deletions(-)
>
> diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
> index 8d0553f3d7cd..e920cea49066 100644
> --- a/gdb/aarch64-tdep.c
> +++ b/gdb/aarch64-tdep.c
> @@ -3444,6 +3444,104 @@ value_of_aarch64_user_reg (const frame_info_ptr &frame, const void *baton)
>    return value_of_register (*reg_p, get_next_frame_sentinel_okay (frame));
>  }
>
> +/* Single step through MOPS instruction sequences on AArch64.  */
> +
> +static std::vector<CORE_ADDR>
> +aarch64_software_single_step_mops (struct regcache *regcache, CORE_ADDR loc,
> +                                  uint32_t insn)
> +{
> +  const int insn_size = 4;
> +  struct gdbarch *gdbarch = regcache->arch ();
> +  enum bfd_endian byte_order_for_code = gdbarch_byte_order_for_code (gdbarch);
> +  uint8_t o0 = bit (insn, 21);
> +  uint8_t op1 = bits (insn, 22, 23);
> +  uint8_t op2 = bits (insn, 12, 15);
> +
> +  /* Look for the prologue instruction that begins the sequence.  */
> +
> +       /* CPYFP* */
> +  if (!((o0 == 0 && op1 == 0)
> +       /* SETP* */
> +       || (o0 == 0 && op1 == 3 && op2 < 4)
> +       /* CPYP* */
> +       || (o0 == 1 && op1 == 0)
> +       /* SETGP* */
> +       || (o0 == 1 && op1 == 3 && op2 < 4)))
> +    /* Prologue instruction not found.  */
> +    return {};
> +
> +  /* Now look for the main instruction in the middle of the sequence.  */
> +
> +  loc += insn_size;
> +  ULONGEST insn_from_memory;
> +  if (!safe_read_memory_unsigned_integer (loc, insn_size,
> +                                         byte_order_for_code,
> +                                         &insn_from_memory))
> +    {
> +      /* Assume we don't have a MOPS sequence, as we couldn't read the
> +        instruction in this location.  */
> +      return {};
> +    }
> +
> +  insn = insn_from_memory;
> +  aarch64_inst inst;
> +  if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0)
> +    return {};
> +  if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
> +    return {};
> +
> +  o0 = bit (insn, 21);
> +  op1 = bits (insn, 22, 23);
> +  op2 = bits (insn, 12, 15);
> +
> +       /* CPYFM* */
> +  if (!((o0 == 0 && op1 == 1)
> +       /* SETM* */
> +       || (o0 == 0 && op1 == 3 && op2 >= 4 && op2 < 8)
> +       /* CPYM* */
> +       || (o0 == 1 && op1 == 1)
> +       /* SETGM* */
> +       || (o0 == 1 && op1 == 3 && op2 >= 4 && op2 < 8)))
> +    /* Main instruction not found.  */
> +    return {};
> +
> +  /* Now look for the epilogue instruction that ends the sequence.  */
> +
> +  loc += insn_size;
> +  if (!safe_read_memory_unsigned_integer (loc, insn_size,
> +                                         byte_order_for_code,
> +                                         &insn_from_memory))
> +    {
> +      /* Assume we don't have a MOPS sequence, as we couldn't read the
> +        instruction in this location.  */
> +      return {};
> +    }
> +
> +  insn = insn_from_memory;
> +  if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0)
> +    return {};
> +  if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
> +    return {};
> +
> +  o0 = bit (insn, 21);
> +  op1 = bits (insn, 22, 23);
> +  op2 = bits (insn, 12, 15);
> +
> +       /* CPYFE* */
> +  if (!((o0 == 0 && op1 == 2)
> +       /* SETE* (op2 >= 12 is unallocated space) */
> +       || (o0 == 0 && op1 == 3 && op2 >= 8 && op2 < 12)
> +       /* CPYE* */
> +       || (o0 == 1 && op1 == 2)
> +       /* SETGE* (op2 >= 12 is unallocated space) */
> +       || (o0 == 1 && op1 == 3 && op2 >= 8 && op2 < 12)))
> +    /* Epilogue instruction not found.  */
> +    return {};
> +
> +  /* Insert breakpoint after the end of the atomic sequence.  */
> +  return { loc + insn_size };
> +}
> +
>  /* Implement the "software_single_step" gdbarch method, needed to
>     single step through atomic sequences on AArch64.  */
>
> @@ -3479,6 +3577,9 @@ aarch64_software_single_step (struct regcache *regcache)
>    if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0)
>      return {};
>
> +  if (AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
> +    return aarch64_software_single_step_mops (regcache, loc, insn);
> +
>    /* Look for a Load Exclusive instruction which begins the sequence.  */
>    if (inst.opcode->iclass != ldstexcl || bit (insn, 22) == 0)
>      return {};
> @@ -3808,8 +3909,10 @@ aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch,
>    if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0)
>      return NULL;
>
> -  /* Look for a Load Exclusive instruction which begins the sequence.  */
> -  if (inst.opcode->iclass == ldstexcl && bit (insn, 22))
> +  /* Look for a Load Exclusive instruction which begins the sequence,
> +     or for a MOPS instruction.  */
> +  if ((inst.opcode->iclass == ldstexcl && bit (insn, 22))
> +      || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))

Sorry for the naive question: doesn't this check that the CPU supports
MOPS, rather than the instruction sequence is a MOPS one?

Thanks,

Christophe

>      {
>        /* We can't displaced step atomic sequences.  */
>        return NULL;
Thiago Jung Bauermann May 7, 2024, 12:56 a.m. UTC | #2
Hello Christophe,

Thanks for the patch review!

Christophe Lyon <christophe.lyon@linaro.org> writes:

> On Sat, 4 May 2024 at 02:05, Thiago Jung Bauermann
> <thiago.bauermann@linaro.org> wrote:
>>
>> @@ -3808,8 +3909,10 @@ aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch,
>>    if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0)
>>      return NULL;
>>
>> -  /* Look for a Load Exclusive instruction which begins the sequence.  */
>> -  if (inst.opcode->iclass == ldstexcl && bit (insn, 22))
>> +  /* Look for a Load Exclusive instruction which begins the sequence,
>> +     or for a MOPS instruction.  */
>> +  if ((inst.opcode->iclass == ldstexcl && bit (insn, 22))
>> +      || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
>
> Sorry for the naive question: doesn't this check that the CPU supports
> MOPS, rather than the instruction sequence is a MOPS one?

It's an interesting question actually, at least for me because it's my
first time calling opcode functions.  You're right that
AARCH64_CPU_HAS_FEATURE checks whether the given CPU supports the given
feature.  But the call above uses the inst.opcode->avariant "CPU":

  /* Which architecture variant provides this instruction.  */
  const aarch64_feature_set *avariant;

For MOPS instructions that field points to aarch64_feature_mops, defined
in opcodes/aarch64-tbl.h as:

static const aarch64_feature_set aarch64_feature_mops =
  AARCH64_FEATURE (MOPS);

Which defines a CPU containing only the MOPS feature.

I also checked — just to be sure — that whether or not the VM's CPU
support the MOPS feature, the above description holds and the check
returns true for a MOPS instruction and false otherwise.

>>      {
>>        /* We can't displaced step atomic sequences.  */
>>        return NULL;
diff mbox series

Patch

diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
index 8d0553f3d7cd..e920cea49066 100644
--- a/gdb/aarch64-tdep.c
+++ b/gdb/aarch64-tdep.c
@@ -3444,6 +3444,104 @@  value_of_aarch64_user_reg (const frame_info_ptr &frame, const void *baton)
   return value_of_register (*reg_p, get_next_frame_sentinel_okay (frame));
 }
 
+/* Single step through MOPS instruction sequences on AArch64.  */
+
+static std::vector<CORE_ADDR>
+aarch64_software_single_step_mops (struct regcache *regcache, CORE_ADDR loc,
+				   uint32_t insn)
+{
+  const int insn_size = 4;
+  struct gdbarch *gdbarch = regcache->arch ();
+  enum bfd_endian byte_order_for_code = gdbarch_byte_order_for_code (gdbarch);
+  uint8_t o0 = bit (insn, 21);
+  uint8_t op1 = bits (insn, 22, 23);
+  uint8_t op2 = bits (insn, 12, 15);
+
+  /* Look for the prologue instruction that begins the sequence.  */
+
+	/* CPYFP* */
+  if (!((o0 == 0 && op1 == 0)
+	/* SETP* */
+	|| (o0 == 0 && op1 == 3 && op2 < 4)
+	/* CPYP* */
+	|| (o0 == 1 && op1 == 0)
+	/* SETGP* */
+	|| (o0 == 1 && op1 == 3 && op2 < 4)))
+    /* Prologue instruction not found.  */
+    return {};
+
+  /* Now look for the main instruction in the middle of the sequence.  */
+
+  loc += insn_size;
+  ULONGEST insn_from_memory;
+  if (!safe_read_memory_unsigned_integer (loc, insn_size,
+					  byte_order_for_code,
+					  &insn_from_memory))
+    {
+      /* Assume we don't have a MOPS sequence, as we couldn't read the
+	 instruction in this location.  */
+      return {};
+    }
+
+  insn = insn_from_memory;
+  aarch64_inst inst;
+  if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0)
+    return {};
+  if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
+    return {};
+
+  o0 = bit (insn, 21);
+  op1 = bits (insn, 22, 23);
+  op2 = bits (insn, 12, 15);
+
+	/* CPYFM* */
+  if (!((o0 == 0 && op1 == 1)
+	/* SETM* */
+	|| (o0 == 0 && op1 == 3 && op2 >= 4 && op2 < 8)
+	/* CPYM* */
+	|| (o0 == 1 && op1 == 1)
+	/* SETGM* */
+	|| (o0 == 1 && op1 == 3 && op2 >= 4 && op2 < 8)))
+    /* Main instruction not found.  */
+    return {};
+
+  /* Now look for the epilogue instruction that ends the sequence.  */
+
+  loc += insn_size;
+  if (!safe_read_memory_unsigned_integer (loc, insn_size,
+					  byte_order_for_code,
+					  &insn_from_memory))
+    {
+      /* Assume we don't have a MOPS sequence, as we couldn't read the
+	 instruction in this location.  */
+      return {};
+    }
+
+  insn = insn_from_memory;
+  if (aarch64_decode_insn (insn, &inst, 1, nullptr) != 0)
+    return {};
+  if (!AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
+    return {};
+
+  o0 = bit (insn, 21);
+  op1 = bits (insn, 22, 23);
+  op2 = bits (insn, 12, 15);
+
+	/* CPYFE* */
+  if (!((o0 == 0 && op1 == 2)
+	/* SETE* (op2 >= 12 is unallocated space) */
+	|| (o0 == 0 && op1 == 3 && op2 >= 8 && op2 < 12)
+	/* CPYE* */
+	|| (o0 == 1 && op1 == 2)
+	/* SETGE* (op2 >= 12 is unallocated space) */
+	|| (o0 == 1 && op1 == 3 && op2 >= 8 && op2 < 12)))
+    /* Epilogue instruction not found.  */
+    return {};
+
+  /* Insert breakpoint after the end of the atomic sequence.  */
+  return { loc + insn_size };
+}
+
 /* Implement the "software_single_step" gdbarch method, needed to
    single step through atomic sequences on AArch64.  */
 
@@ -3479,6 +3577,9 @@  aarch64_software_single_step (struct regcache *regcache)
   if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0)
     return {};
 
+  if (AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
+    return aarch64_software_single_step_mops (regcache, loc, insn);
+
   /* Look for a Load Exclusive instruction which begins the sequence.  */
   if (inst.opcode->iclass != ldstexcl || bit (insn, 22) == 0)
     return {};
@@ -3808,8 +3909,10 @@  aarch64_displaced_step_copy_insn (struct gdbarch *gdbarch,
   if (aarch64_decode_insn (insn, &inst, 1, NULL) != 0)
     return NULL;
 
-  /* Look for a Load Exclusive instruction which begins the sequence.  */
-  if (inst.opcode->iclass == ldstexcl && bit (insn, 22))
+  /* Look for a Load Exclusive instruction which begins the sequence,
+     or for a MOPS instruction.  */
+  if ((inst.opcode->iclass == ldstexcl && bit (insn, 22))
+      || AARCH64_CPU_HAS_FEATURE (*inst.opcode->avariant, MOPS))
     {
       /* We can't displaced step atomic sequences.  */
       return NULL;