[AArch64,v2,04/11] aarch64: Improve atomic-op lse generation

Message ID 20181002161915.18843-5-richard.henderson@linaro.org
State New
Headers show
Series
  • LSE atomics out-of-line
Related show

Commit Message

Richard Henderson Oct. 2, 2018, 4:19 p.m.
Fix constraints; avoid unnecessary split.  Drop the use of the atomic_op
iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more
logical for ldclr aka bic.

	* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
	(aarch64_atomic_ldop_supported_p): Remove.
	(aarch64_gen_atomic_ldop): Remove.
	* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
	Fully expand LSE operations here.
	(atomic_fetch_<atomic_optab><ALLI>): Likewise.
	(atomic_<atomic_optab>_fetch<ALLI>): Likewise.
	(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
	and use ATOMIC_LDOP instead; use register_operand for the input;
	drop the split and emit insns directly.
	(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
	(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
	(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.
---
 gcc/config/aarch64/aarch64-protos.h |   2 -
 gcc/config/aarch64/aarch64.c        | 176 -------------------------
 gcc/config/aarch64/atomics.md       | 197 +++++++++++++++-------------
 gcc/config/aarch64/iterators.md     |   5 +-
 4 files changed, 108 insertions(+), 272 deletions(-)

-- 
2.17.1

Comments

James Greenhalgh Oct. 30, 2018, 8:17 p.m. | #1
On Tue, Oct 02, 2018 at 11:19:08AM -0500, Richard Henderson wrote:
> Fix constraints; avoid unnecessary split.  Drop the use of the atomic_op

> iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more

> logical for ldclr aka bic.


OK.

Thanks,
James

> 

> 	* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.

> 	(aarch64_atomic_ldop_supported_p): Remove.

> 	(aarch64_gen_atomic_ldop): Remove.

> 	* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):

> 	Fully expand LSE operations here.

> 	(atomic_fetch_<atomic_optab><ALLI>): Likewise.

> 	(atomic_<atomic_optab>_fetch<ALLI>): Likewise.

> 	(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator

> 	and use ATOMIC_LDOP instead; use register_operand for the input;

> 	drop the split and emit insns directly.

> 	(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.

> 	(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.

> 	(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.

> ---

>  gcc/config/aarch64/aarch64-protos.h |   2 -

>  gcc/config/aarch64/aarch64.c        | 176 -------------------------

>  gcc/config/aarch64/atomics.md       | 197 +++++++++++++++-------------

>  gcc/config/aarch64/iterators.md     |   5 +-

>  4 files changed, 108 insertions(+), 272 deletions(-)

>

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3d045cf43be..1d2f8487d1a 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -563,8 +563,6 @@  rtx aarch64_load_tp (rtx);
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
 
-bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f7b0af2589e..867759f7e80 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14226,32 +14226,6 @@  aarch64_expand_compare_and_swap (rtx operands[])
   emit_insn (gen_rtx_SET (bval, x));
 }
 
-/* Test whether the target supports using a atomic load-operate instruction.
-   CODE is the operation and AFTER is TRUE if the data in memory after the
-   operation should be returned and FALSE if the data before the operation
-   should be returned.  Returns FALSE if the operation isn't supported by the
-   architecture.  */
-
-bool
-aarch64_atomic_ldop_supported_p (enum rtx_code code)
-{
-  if (!TARGET_LSE)
-    return false;
-
-  switch (code)
-    {
-    case SET:
-    case AND:
-    case IOR:
-    case XOR:
-    case MINUS:
-    case PLUS:
-      return true;
-    default:
-      return false;
-    }
-}
-
 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
    sequence implementing an atomic operation.  */
 
@@ -14384,156 +14358,6 @@  aarch64_split_compare_and_swap (rtx operands[])
     aarch64_emit_post_barrier (model);
 }
 
-/* Emit a BIC instruction.  */
-
-static void
-aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
-{
-  rtx shift_rtx = GEN_INT (shift);
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-
-  switch (mode)
-    {
-    case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
-    case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, s2, shift_rtx, s1));
-}
-
-/* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
-   location to store the data read from memory.  OUT_RESULT is the location to
-   store the result of the operation.  MEM is the memory location to read and
-   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
-   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
-   be NULL.  */
-
-void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
-			 rtx mem, rtx value, rtx model_rtx)
-{
-  machine_mode mode = GET_MODE (mem);
-  machine_mode wmode = (mode == DImode ? DImode : SImode);
-  const bool short_mode = (mode < SImode);
-  int ldop_code;
-  rtx src;
-  rtx x;
-
-  if (out_data)
-    out_data = gen_lowpart (mode, out_data);
-
-  if (out_result)
-    out_result = gen_lowpart (mode, out_result);
-
-  /* Make sure the value is in a register, putting it into a destination
-     register if it needs to be manipulated.  */
-  if (!register_operand (value, mode)
-      || code == AND || code == MINUS)
-    {
-      src = out_result ? out_result : out_data;
-      emit_move_insn (src, gen_lowpart (mode, value));
-    }
-  else
-    src = value;
-  gcc_assert (register_operand (src, mode));
-
-  /* Preprocess the data for the operation as necessary.  If the operation is
-     a SET then emit a swap instruction and finish.  */
-  switch (code)
-    {
-    case MINUS:
-      /* Negate the value and treat it as a PLUS.  */
-      {
-	rtx neg_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	neg_src = gen_rtx_NEG (wmode, src);
-	emit_insn (gen_rtx_SET (src, neg_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      /* Fall-through.  */
-    case PLUS:
-      ldop_code = UNSPECV_ATOMIC_LDOP_PLUS;
-      break;
-
-    case IOR:
-      ldop_code = UNSPECV_ATOMIC_LDOP_OR;
-      break;
-
-    case XOR:
-      ldop_code = UNSPECV_ATOMIC_LDOP_XOR;
-      break;
-
-    case AND:
-      {
-	rtx not_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	not_src = gen_rtx_NOT (wmode, src);
-	emit_insn (gen_rtx_SET (src, not_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      ldop_code = UNSPECV_ATOMIC_LDOP_BIC;
-      break;
-
-    default:
-      /* The operation can't be done with atomic instructions.  */
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen_aarch64_atomic_load (ldop_code, mode,
-				      out_data, mem, src, model_rtx));
-
-  /* If necessary, calculate the data in memory after the update by redoing the
-     operation from values in registers.  */
-  if (!out_result)
-    return;
-
-  if (short_mode)
-    {
-      src = gen_lowpart (wmode, src);
-      out_data = gen_lowpart (wmode, out_data);
-      out_result = gen_lowpart (wmode, out_result);
-    }
-
-  x = NULL_RTX;
-
-  switch (code)
-    {
-    case MINUS:
-    case PLUS:
-      x = gen_rtx_PLUS (wmode, out_data, src);
-      break;
-    case IOR:
-      x = gen_rtx_IOR (wmode, out_data, src);
-      break;
-    case XOR:
-      x = gen_rtx_XOR (wmode, out_data, src);
-      break;
-    case AND:
-      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
-      return;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_set_insn (out_result, x);
-
-  return;
-}
-
 /* Split an atomic operation.  */
 
 void
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index bc9e396dc96..2198649b1be 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -207,13 +207,37 @@ 
     rtx (*gen) (rtx, rtx, rtx);
 
     /* Use an atomic load-operate instruction when possible.  */
-    if (aarch64_atomic_ldop_supported_p (<CODE>))
-      gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+    if (TARGET_LSE)
+      {
+	switch (<CODE>)
+	  {
+	  case MINUS:
+	    operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+					      NULL, 1);
+	    /* fallthru */
+	  case PLUS:
+	    gen = gen_aarch64_atomic_add<mode>_lse;
+	    break;
+	  case IOR:
+	    gen = gen_aarch64_atomic_ior<mode>_lse;
+	    break;
+	  case XOR:
+	    gen = gen_aarch64_atomic_xor<mode>_lse;
+	    break;
+	  case AND:
+	    operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+					      NULL, 1);
+	    gen = gen_aarch64_atomic_bic<mode>_lse;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      }
     else
       gen = gen_aarch64_atomic_<atomic_optab><mode>;
 
     emit_insn (gen (operands[0], operands[1], operands[2]));
-
     DONE;
   }
 )
@@ -239,22 +263,25 @@ 
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
   [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 0)
-	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 2 "const_int_operand")]
-      UNSPECV_ATOMIC_OP))
+	(unspec_volatile:ALLI
+	  [(match_dup 0)
+	   (match_operand:ALLI 1 "register_operand" "r")
+	   (match_operand:SI 2 "const_int_operand")]
+      ATOMIC_LDOP))
    (clobber (match_scratch:ALLI 3 "=&r"))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
-			     operands[1], operands[2]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
   }
 )
 
@@ -280,7 +307,7 @@ 
   }
 )
 
-;; Load-operate-store, returning the updated memory data.
+;; Load-operate-store, returning the original memory data.
 
 (define_expand "atomic_fetch_<atomic_optab><mode>"
  [(match_operand:ALLI 0 "register_operand" "")
@@ -293,13 +320,37 @@ 
   rtx (*gen) (rtx, rtx, rtx, rtx);
 
   /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+  if (TARGET_LSE)
+    {
+      switch (<CODE>)
+        {
+	case MINUS:
+	  operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+					    NULL, 1);
+	  /* fallthru */
+	case PLUS:
+	  gen = gen_aarch64_atomic_fetch_add<mode>_lse;
+	  break;
+	case IOR:
+	  gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
+	  break;
+	case XOR:
+	  gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
+	  break;
+	case AND:
+	  operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+					    NULL, 1);
+	  gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+    }
   else
     gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
 
   emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
   DONE;
 })
 
@@ -326,23 +377,26 @@ 
   }
 )
 
-(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
    (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 1)
-	(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))]
+	(unspec_volatile:ALLI
+	  [(match_dup 1)
+	   (match_operand:ALLI 2 "register_operand" "r")
+	   (match_operand:SI 3 "const_int_operand")]
+	  ATOMIC_LDOP))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
-			     operands[2], operands[3]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
   }
 )
 
@@ -370,7 +424,7 @@ 
   }
 )
 
-;; Load-operate-store, returning the original memory data.
+;; Load-operate-store, returning the updated memory data.
 
 (define_expand "atomic_<atomic_optab>_fetch<mode>"
  [(match_operand:ALLI 0 "register_operand" "")
@@ -380,17 +434,23 @@ 
   (match_operand:SI 3 "const_int_operand")]
  ""
 {
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-  rtx value = operands[2];
-
-  /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+  /* Use an atomic load-operate instruction when possible.  In this case
+     we will re-compute the result from the original mem value. */
+  if (TARGET_LSE)
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_atomic_fetch_<atomic_optab><mode>
+                 (tmp, operands[1], operands[2], operands[3]));
+      tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+				 operands[0], 1, OPTAB_WIDEN);
+      emit_move_insn (operands[0], tmp);
+    }
   else
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
-
-  emit_insn (gen (operands[0], operands[1], value, operands[3]));
-
+    {
+      emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
+                 (operands[0], operands[1], operands[2], operands[3]));
+    }
   DONE;
 })
 
@@ -417,29 +477,6 @@ 
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (atomic_op:ALLI
-     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
-     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
-   (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(match_dup 1)
-       (match_dup 2)
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))
-     (clobber (match_scratch:ALLI 4 "=&r"))]
-  "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  {
-    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
-			     operands[2], operands[3]);
-    DONE;
-  }
-)
-
 (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (not:ALLI
@@ -585,29 +622,3 @@ 
       return "dmb\\tish";
   }
 )
-
-;; ARMv8.1-A LSE instructions.
-
-;; Atomic load-op: Load data, operate, store result, keep data.
-
-(define_insn "@aarch64_atomic_load<atomic_ldop><mode>"
- [(set (match_operand:ALLI 0 "register_operand" "=r")
-   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
-  (set (match_dup 1)
-   (unspec_volatile:ALLI
-    [(match_dup 1)
-     (match_operand:ALLI 2 "register_operand")
-     (match_operand:SI 3 "const_int_operand")]
-    ATOMIC_LDOP))]
- "TARGET_LSE && reload_completed"
- {
-   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-   if (is_mm_relaxed (model))
-     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_acquire (model) || is_mm_consume (model))
-     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_release (model))
-     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else
-     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
- })
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a43956054e8..524e4e6929b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -503,7 +503,6 @@ 
     UNSPECV_ATOMIC_CAS		; Represent an atomic CAS.
     UNSPECV_ATOMIC_SWP		; Represent an atomic SWP.
     UNSPECV_ATOMIC_OP		; Represent an atomic operation.
-    UNSPECV_ATOMIC_LDOP		; Represent an atomic load-operation
     UNSPECV_ATOMIC_LDOP_OR	; Represent an atomic load-or
     UNSPECV_ATOMIC_LDOP_BIC	; Represent an atomic load-bic
     UNSPECV_ATOMIC_LDOP_XOR	; Represent an atomic load-xor
@@ -1591,6 +1590,10 @@ 
  [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
   (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
 
+(define_int_attr atomic_ldoptab
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators Attributes.
 ;; -------------------------------------------------------------------