diff mbox

[AArch64,1/2] Refactor aarch64_operands_ok_for_ldpstp, aarch64_operands_adjust_ok_for_ldpstp

Message ID 1463476951-1567-2-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh May 17, 2016, 9:22 a.m. UTC
Hi,

These two functions are very similar and suffer from code duplication.
With a little bit of work we can reduce the strain on the reader by
refactoring the functions.

Essentially, we're going to remove the explicit references to reg_1,
reg_2, reg_3, reg_4 and keep these things in arrays instead, at which
point it becomes clear that these functions are very similar and can be
pulled together.

OK?

Bootstrapped and tested for aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
2016-05-17  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64.c
	(aarch64_extract_ldpstp_operands): New.
	(aarch64_ldpstp_ops_same_reg_class_p): Likewise.
	(aarch64_ldpstp_load_regs_clobber_base_p): Likewise.
	(aarch64_ldpstp_offsets_consecutive_p): Likewise.
	(aarch64_operands_ok_for_ldpstp_1): Likewise.
	(aarch64_operands_ok_for_ldpstp): Refactor to
	aarch64_operands_ok_for_ldpstp_1.
	(aarch64_operands_adjust_ok_for_ldpstp): Likewise.

Comments

James Greenhalgh June 3, 2016, 8:45 a.m. UTC | #1
*ping* https://gcc.gnu.org/ml/gcc-patches/2016-05/msg01193.html

Thanks,
James

On Tue, May 17, 2016 at 10:22:30AM +0100, James Greenhalgh wrote:
> 

> Hi,

> 

> These two functions are very similar and suffer from code duplication.

> With a little bit of work we can reduce the strain on the reader by

> refactoring the functions.

> 

> Essentially, we're going to remove the explicit references to reg_1,

> reg_2, reg_3, reg_4 and keep these things in arrays instead, at which

> point it becomes clear that these functions are very similar and can be

> pulled together.

> 

> OK?

> 

> Bootstrapped and tested for aarch64-none-linux-gnu with no issues.

> 

> OK?

> 

> Thanks,

> James

> 

> ---

> 2016-05-17  James Greenhalgh  <james.greenhalgh@arm.com>

> 

> 	* config/aarch64/aarch64.c

> 	(aarch64_extract_ldpstp_operands): New.

> 	(aarch64_ldpstp_ops_same_reg_class_p): Likewise.

> 	(aarch64_ldpstp_load_regs_clobber_base_p): Likewise.

> 	(aarch64_ldpstp_offsets_consecutive_p): Likewise.

> 	(aarch64_operands_ok_for_ldpstp_1): Likewise.

> 	(aarch64_operands_ok_for_ldpstp): Refactor to

> 	aarch64_operands_ok_for_ldpstp_1.

> 	(aarch64_operands_adjust_ok_for_ldpstp): Likewise.

>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 986262b..434c154 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13346,85 +13346,167 @@  aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
   return;
 }
 
-/* Given OPERANDS of consecutive load/store, check if we can merge
-   them into ldp/stp.  LOAD is true if they are load instructions.
-   MODE is the mode of memory operands.  */
+/* Extract in to REG and MEM operands to an LDP/STP operation from
+   OPERANDS.  The count of operands to extract is OPS, and whether
+   we are looking at a load or a store is given by LOAD.  */
 
-bool
-aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
-				enum machine_mode mode)
+static void
+aarch64_extract_ldpstp_operands (unsigned int ops, bool load,
+				 rtx *operands, rtx *reg, rtx *mem)
 {
-  HOST_WIDE_INT offval_1, offval_2, msize;
-  enum reg_class rclass_1, rclass_2;
-  rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
+  for (unsigned int i = 0; i < ops; i++)
+    {
+      unsigned int twoi = i * 2;
+      reg[i] = operands[load ? twoi : (twoi + 1)];
+      mem[i] = operands[load ? (twoi + 1) : twoi];
+      /* Sanity check.  */
+      gcc_assert (MEM_P (mem[i]));
 
-  if (load)
+      if (load)
+	gcc_assert (REG_P (reg[i]));
+    }
+}
+
+/* Return TRUE if each RTX in REG (which has size COUNT) is of the
+   same register class.  For the purpose of this function anything which
+   would not fit REG_P (i.e. a const_int 0 or a const_double 0.0) is
+   consider to be in GENERAL_REGS.  */
+
+static bool
+aarch64_ldpstp_ops_same_reg_class_p (unsigned int count, rtx *reg)
+{
+  /* Check if the registers are of same class.  */
+  reg_class rclass = (REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0])))
+		      ? FP_REGS
+		      : GENERAL_REGS;
+
+  for (unsigned int i = 1; i < count; i++)
     {
-      mem_1 = operands[1];
-      mem_2 = operands[3];
-      reg_1 = operands[0];
-      reg_2 = operands[2];
-      gcc_assert (REG_P (reg_1) && REG_P (reg_2));
-      if (REGNO (reg_1) == REGNO (reg_2))
+      reg_class rc = (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
+		      ? FP_REGS
+		      : GENERAL_REGS;
+      if (rclass != rc)
 	return false;
     }
-  else
+
+  return true;
+}
+
+/* REG contains the set of registers, sized by COUNT,  which are written by
+   a sequence of (base + offset) loads which are based from BASE and have
+   offsets OFFSETS.  Return TRUE if any of the registers in REG clobber
+   BASE.  */
+
+static bool
+aarch64_ldpstp_load_regs_clobber_base_p (unsigned int count,
+					 rtx *reg, rtx base,
+					 HOST_WIDE_INT *offsets)
+{
+  for (unsigned int i = 0; i < count - 1; i++)
+    if (reg_mentioned_p (reg[i], base))
+      return true;
+
+  /* In increasing order, the last load can clobber the address.  */
+  return (offsets[0] > offsets[1]
+	  && reg_mentioned_p (reg[count - 1], base));
+}
+
+/* Return true if OFFSETS, which has size COUNT, is an ascending or
+   descending sequence, separated by MSIZE.  */
+
+static bool
+aarch64_ldpstp_offsets_consecutive_p (unsigned int count,
+				      HOST_WIDE_INT *offsets,
+				      HOST_WIDE_INT msize)
+{
+  bool ascending = true, descending = true;
+  for (unsigned int i = 0; i < count; i++)
     {
-      mem_1 = operands[0];
-      mem_2 = operands[2];
-      reg_1 = operands[1];
-      reg_2 = operands[3];
+      ascending &= (offsets[0] == offsets[i] - (msize * i));
+      descending &= (offsets[0] == offsets[i] + msize * i);
     }
 
-  /* The mems cannot be volatile.  */
-  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
-    return false;
+  return ascending || descending;
+}
 
-  /* Check if the addresses are in the form of [base+offset].  */
-  extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
-  if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
-  if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
-    return false;
 
-  /* Check if the bases are same.  */
-  if (!rtx_equal_p (base_1, base_2))
-    return false;
+/* Helper function for aarch64_operands_ok_for_ldpstp and
+   aarch64_operands_adjust_ok_for_ldpstp.  OPERANDS are the
+   consecutive load/store operands which we hope to merge.  LOAD
+   is true if these are LOAD instructions.  MODE is the mode of the
+   memory operations.  ADJUST is true if we are in the 4-operand
+   adjust case.  */
 
-  offval_1 = INTVAL (offset_1);
-  offval_2 = INTVAL (offset_2);
-  msize = GET_MODE_SIZE (mode);
-  /* Check if the offsets are consecutive.  */
-  if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
-    return false;
+bool
+aarch64_operands_ok_for_ldpstp_1 (rtx *operands, bool load,
+				 enum machine_mode mode, bool adjust)
+{
+  const unsigned int count = adjust ? 4 : 2;
+  /* Avoid alloca calls and just size as large as they need to be
+     for the largest case we can handle.  */
+  const unsigned int max_ops = 4;
+  rtx mem[max_ops], reg[max_ops], base[max_ops], offset[max_ops];
+  HOST_WIDE_INT offval[max_ops];
+  unsigned int i = 0;
+  HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
+
+  aarch64_extract_ldpstp_operands (count, load, operands, reg, mem);
 
-  /* Check if the addresses are clobbered by load.  */
   if (load)
     {
-      if (reg_mentioned_p (reg_1, mem_1))
-	return false;
+      for (i = 0; i < count; i += 2)
+	if (REGNO (reg[i]) == REGNO (reg[i + 1]))
+	  return false;
+    }
+
+  /* For the adjust case, skip if memory operand is by itself valid
+     for ldp/stp.  */
+  if (adjust
+      && (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode)))
+    return false;
 
-      /* In increasing order, the last load can clobber the address.  */
-      if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
+  /* The mems cannot be volatile.  */
+  for (i = 0; i < count; i++)
+    if (MEM_VOLATILE_P (mem[i]))
       return false;
+
+  /* Check if the addresses are in the form of [base+offset].  */
+  for (i = 0; i < count; i++)
+    {
+      extract_base_offset_in_addr (mem[i], &base[i], &offset[i]);
+      if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
+	return false;
     }
 
-  if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
-    rclass_1 = FP_REGS;
-  else
-    rclass_1 = GENERAL_REGS;
+  /* Check if the bases are same.  */
+  for (i = 1; i < count; i++)
+    if (!rtx_equal_p (base[0], base[i]))
+      return false;
 
-  if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
-    rclass_2 = FP_REGS;
-  else
-    rclass_2 = GENERAL_REGS;
+  for (unsigned int i = 0; i < count; i++)
+    offval[i] = INTVAL (offset[i]);
 
-  /* Check if the registers are of same class.  */
-  if (rclass_1 != rclass_2)
+  if (!aarch64_ldpstp_offsets_consecutive_p (count, offval, msize))
     return false;
 
-  return true;
+  /* Check if the addresses are clobbered by load.  */
+  if (load && aarch64_ldpstp_load_regs_clobber_base_p (count, reg,
+						       base[0], offval))
+    return false;
+
+  return aarch64_ldpstp_ops_same_reg_class_p (count, reg);
+}
+
+
+/* Given OPERANDS of consecutive load/store, check if we can merge
+   them into ldp/stp.  LOAD is true if they are load instructions.
+   MODE is the mode of memory operands.  */
+
+bool
+aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
+				enum machine_mode mode)
+{
+  return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, false);
 }
 
 /* Given OPERANDS of consecutive load/store, check if we can merge
@@ -13446,124 +13528,13 @@  aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
      stp  w1, w1, [scratch, 0x8]
 
    The peephole patterns detecting this opportunity should guarantee
-   the scratch register is avaliable.  */
+   the scratch register is available.  */
 
 bool
 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
 				       enum machine_mode mode)
 {
-  enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
-  HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
-  rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
-  rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
-
-  if (load)
-    {
-      reg_1 = operands[0];
-      mem_1 = operands[1];
-      reg_2 = operands[2];
-      mem_2 = operands[3];
-      reg_3 = operands[4];
-      mem_3 = operands[5];
-      reg_4 = operands[6];
-      mem_4 = operands[7];
-      gcc_assert (REG_P (reg_1) && REG_P (reg_2)
-		  && REG_P (reg_3) && REG_P (reg_4));
-      if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
-	return false;
-    }
-  else
-    {
-      mem_1 = operands[0];
-      reg_1 = operands[1];
-      mem_2 = operands[2];
-      reg_2 = operands[3];
-      mem_3 = operands[4];
-      reg_3 = operands[5];
-      mem_4 = operands[6];
-      reg_4 = operands[7];
-    }
-  /* Skip if memory operand is by itslef valid for ldp/stp.  */
-  if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
-    return false;
-
-  /* The mems cannot be volatile.  */
-  if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
-      || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
-    return false;
-
-  /* Check if the addresses are in the form of [base+offset].  */
-  extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
-  if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
-  if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
-  if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
-    return false;
-  extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
-  if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
-    return false;
-
-  /* Check if the bases are same.  */
-  if (!rtx_equal_p (base_1, base_2)
-      || !rtx_equal_p (base_2, base_3)
-      || !rtx_equal_p (base_3, base_4))
-    return false;
-
-  offval_1 = INTVAL (offset_1);
-  offval_2 = INTVAL (offset_2);
-  offval_3 = INTVAL (offset_3);
-  offval_4 = INTVAL (offset_4);
-  msize = GET_MODE_SIZE (mode);
-  /* Check if the offsets are consecutive.  */
-  if ((offval_1 != (offval_2 + msize)
-       || offval_1 != (offval_3 + msize * 2)
-       || offval_1 != (offval_4 + msize * 3))
-      && (offval_4 != (offval_3 + msize)
-	  || offval_4 != (offval_2 + msize * 2)
-	  || offval_4 != (offval_1 + msize * 3)))
-    return false;
-
-  /* Check if the addresses are clobbered by load.  */
-  if (load)
-    {
-      if (reg_mentioned_p (reg_1, mem_1)
-	  || reg_mentioned_p (reg_2, mem_2)
-	  || reg_mentioned_p (reg_3, mem_3))
-	return false;
-
-      /* In increasing order, the last load can clobber the address.  */
-      if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
-	return false;
-    }
-
-  if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
-    rclass_1 = FP_REGS;
-  else
-    rclass_1 = GENERAL_REGS;
-
-  if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
-    rclass_2 = FP_REGS;
-  else
-    rclass_2 = GENERAL_REGS;
-
-  if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
-    rclass_3 = FP_REGS;
-  else
-    rclass_3 = GENERAL_REGS;
-
-  if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
-    rclass_4 = FP_REGS;
-  else
-    rclass_4 = GENERAL_REGS;
-
-  /* Check if the registers are of same class.  */
-  if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
-    return false;
-
-  return true;
+  return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, true);
 }
 
 /* Given OPERANDS of consecutive load/store, this function pairs them