[AArch64,v2,10/11] aarch64: Implement TImode compare-and-swap

Message ID 20181002161915.18843-11-richard.henderson@linaro.org
State New
Headers show
Series
  • LSE atomics out-of-line
Related show

Commit Message

Richard Henderson Oct. 2, 2018, 4:19 p.m.
This pattern will only be used with the __sync functions, because
we do not yet have a bare TImode atomic load.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support		for NE comparison of TImode values.
	(aarch64_print_operand): Extend %R to handle general registers.
	(aarch64_emit_load_exclusive): Add support for TImode.
	(aarch64_emit_store_exclusive): Likewise.
	(aarch64_atomic_ool_func): Likewise.
	(aarch64_ool_cas_names): Likewise.
	* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>):
	Change iterator from ALLI to ALLI_TI.
	(@atomic_compare_and_swap<JUST_TI>): New.
	(@atomic_compare_and_swap<JUST_TI>_lse): New.
	(aarch64_load_exclusive_pair): New.
	(aarch64_store_exclusive_pair): New.
	* config/aarch64/iterators.md (JUST_TI): New.

	* config/aarch64/lse.c (cas): Add support for SIZE == 16.
	* config/aarch64/t-lse (S0, O0): Split out cas.
	(LSE_OBJS): Include $(O0).
---
 gcc/config/aarch64/aarch64-protos.h |  2 +-
 gcc/config/aarch64/aarch64.c        | 72 ++++++++++++++++++-----
 libgcc/config/aarch64/lse.c         | 48 ++++++++++-----
 gcc/config/aarch64/atomics.md       | 91 +++++++++++++++++++++++++++--
 gcc/config/aarch64/iterators.md     |  3 +
 libgcc/config/aarch64/t-lse         | 10 +++-
 6 files changed, 189 insertions(+), 37 deletions(-)

-- 
2.17.1

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index c7b96b12bbe..f735c4e5ad8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -626,7 +626,7 @@  bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
 
 struct atomic_ool_names
 {
-    const char *str[4][4];
+    const char *str[5][4];
 };
 
 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ce4d7e51d00..ac2f055a09e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1610,10 +1610,33 @@  emit_set_insn (rtx x, rtx y)
 rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
-  machine_mode mode = SELECT_CC_MODE (code, x, y);
-  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+  machine_mode cmp_mode = GET_MODE (x);
+  machine_mode cc_mode;
+  rtx cc_reg;
 
-  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+  if (cmp_mode == E_TImode)
+    {
+      gcc_assert (code == NE);
+
+      cc_mode = E_CCmode;
+      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+
+      rtx x_lo = operand_subword (x, 0, 0, TImode);
+      rtx y_lo = operand_subword (y, 0, 0, TImode);
+      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+
+      rtx x_hi = operand_subword (x, 1, 0, TImode);
+      rtx y_hi = operand_subword (y, 1, 0, TImode);
+      emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
+			     gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
+			     GEN_INT (AARCH64_EQ)));
+    }
+  else
+    {
+      cc_mode = SELECT_CC_MODE (code, x, y);
+      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
+    }
   return cc_reg;
 }
 
@@ -6693,7 +6716,7 @@  sizetochar (int size)
      'S/T/U/V':		Print a FP/SIMD register name for a register list.
 			The register printed is the FP/SIMD register name
 			of X + 0/1/2/3 for S/T/U/V.
-     'R':		Print a scalar FP/SIMD register name + 1.
+     'R':		Print a scalar Integer/FP/SIMD register name + 1.
      'X':		Print bottom 16 bits of integer constant in hex.
      'w/x':		Print a general register name or the zero register
 			(32-bit or 64-bit).
@@ -6885,12 +6908,13 @@  aarch64_print_operand (FILE *f, rtx x, int code)
       break;
 
     case 'R':
-      if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
-	{
-	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
-	  return;
-	}
-      asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+      if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
+	asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+      else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+	asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
+      else
+	output_operand_lossage ("incompatible register operand for '%%%c'",
+				code);
       break;
 
     case 'X':
@@ -14143,16 +14167,26 @@  static void
 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
 			     rtx mem, rtx model_rtx)
 {
-  emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
+  if (mode == E_TImode)
+    emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
+						gen_highpart (DImode, rval),
+						mem, model_rtx));
+  else
+    emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
 }
 
 /* Emit store exclusive.  */
 
 static void
 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
-			      rtx rval, rtx mem, rtx model_rtx)
+			      rtx mem, rtx val, rtx model_rtx)
 {
-  emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
+  if (mode == E_TImode)
+    emit_insn (gen_aarch64_store_exclusive_pair
+	       (bval, mem, operand_subword (val, 0, 0, TImode),
+		operand_subword (val, 1, 0, TImode), model_rtx));
+  else
+    emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, val, model_rtx));
 }
 
 /* Mark the previous jump instruction as unlikely.  */
@@ -14164,7 +14198,7 @@  aarch64_emit_unlikely_jump (rtx insn)
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }
 
-/* We store the names of the various atomic helpers in a 4x4 array.
+/* We store the names of the various atomic helpers in a 5x4 array.
    Return the libcall function given MODE, MODEL and NAMES.  */
 
 rtx
@@ -14188,6 +14222,9 @@  aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
     case E_DImode:
       mode_idx = 3;
       break;
+    case E_TImode:
+      mode_idx = 4;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -14222,9 +14259,11 @@  aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
     "__aa64_" #B #N "_rel", \
     "__aa64_" #B #N "_acq_rel" }
 
-#define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8)
+#define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+		 { NULL, NULL, NULL, NULL }
+#define DEF5(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
 
-static const atomic_ool_names aarch64_ool_cas_names = { { DEF4(cas) } };
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
 const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
 const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
 const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
@@ -14247,6 +14286,7 @@  const atomic_ool_names aarch64_ool_steor_names = { { DEF4(eor) } };
 
 #undef DEF0
 #undef DEF4
+#undef DEF5
 
 /* Expand a compare and swap pattern.  */
 
diff --git a/libgcc/config/aarch64/lse.c b/libgcc/config/aarch64/lse.c
index 68ca7df667b..f6114add71a 100644
--- a/libgcc/config/aarch64/lse.c
+++ b/libgcc/config/aarch64/lse.c
@@ -91,6 +91,7 @@  asm(".arch armv8-a+lse");
 #elif SIZE == 4 || SIZE == 8
 # define S     ""
 # define MASK  ""
+#elif SIZE == 16
 #else
 # error
 #endif
@@ -98,9 +99,11 @@  asm(".arch armv8-a+lse");
 #if SIZE < 8
 # define T  unsigned int
 # define W  "w"
-#else
+#elif SIZE == 8
 # define T  unsigned long long
 # define W  ""
+#else
+# define T  unsigned __int128
 #endif
 
 #if MODEL == 1
@@ -138,19 +141,38 @@  T NAME(cas) (T cmp, T new, T *ptr)
   unsigned tmp;
 
   if (have_atomics)
-    __asm__("cas" A L S " %"W"0, %"W"2, %1"
-            : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+    {
+#if SIZE == 16
+      __asm__("casp" A L " %0, %R0, %2, %R2, %1"
+              : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#else
+      __asm__("cas" A L S " %"W"0, %"W"2, %1"
+              : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#endif
+    }
   else
-    __asm__(
-	"0: "
-	"ld" A "xr"S" %"W"0, %1\n\t"
-	"cmp %"W"0, %"W"4" MASK "\n\t"
-	"bne 1f\n\t"
-	"st" L "xr"S" %w2, %"W"3, %1\n\t"
-	"cbnz %w2, 0b\n"
-	"1:"
-	: "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
-
+    {
+#if SIZE == 16
+      __asm__("0: "
+	      "ld" A "xp %0, %R0, %1\n\t"
+	      "cmp %0, %4\n\t"
+	      "ccmp %R0, %R4, #0, eq\n\t"
+	      "bne 1f\n\t"
+	      "st" L "xp %w2, %3, %R3, %1\n\t"
+	      "cbnz %w2, 0b\n"
+	      "1:"
+	      : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#else
+      __asm__("0: "
+	      "ld" A "xr"S" %"W"0, %1\n\t"
+	      "cmp %"W"0, %"W"4" MASK "\n\t"
+	      "bne 1f\n\t"
+	      "st" L "xr"S" %w2, %"W"3, %1\n\t"
+	      "cbnz %w2, 0b\n"
+	      "1:"
+	      : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#endif
+    }
   return old;
 }
 #endif
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 72f9962fe55..fe604606bdd 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -22,10 +22,10 @@ 
 
 (define_expand "@atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand" "")			;; bool out
-   (match_operand:ALLI 1 "register_operand" "")			;; val out
-   (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory
-   (match_operand:ALLI 3 "nonmemory_operand" "")		;; expected
-   (match_operand:ALLI 4 "aarch64_reg_or_zero" "")		;; desired
+   (match_operand:ALLI_TI 1 "register_operand" "")		;; val out
+   (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "")	;; memory
+   (match_operand:ALLI_TI 3 "nonmemory_operand" "")		;; expected
+   (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "")		;; desired
    (match_operand:SI 5 "const_int_operand")			;; is_weak
    (match_operand:SI 6 "const_int_operand")			;; mod_s
    (match_operand:SI 7 "const_int_operand")]			;; mod_f
@@ -88,6 +88,30 @@ 
   }
 )
 
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:JUST_TI 0 "register_operand" "=&r")	;; val out
+    (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:JUST_TI
+      [(match_operand:JUST_TI 2 "register_operand" "r")		;; expect
+       (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ")	;; desired
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_compare_and_swap (operands);
+    DONE;
+  }
+)
+
 (define_insn "@aarch64_compare_and_swap<mode>_lse"
   [(set (match_operand:SI 0 "register_operand" "+r")		;; val out
     (zero_extend:SI
@@ -133,6 +157,28 @@ 
     return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
 })
 
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+  [(set (match_operand:JUST_TI 0 "register_operand" "+r")	;; val out
+    (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:JUST_TI
+      [(match_dup 0)						;; expect
+       (match_operand:JUST_TI 2 "register_operand" "r")		;; desired
+       (match_operand:SI 3 "const_int_operand")]		;; mod_s
+      UNSPECV_ATOMIC_CMPSW))]
+  "TARGET_LSE"
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+  if (is_mm_relaxed (model))
+    return "casp\t%0, %R0, %2, %R2, %1";
+  else if (is_mm_acquire (model) || is_mm_consume (model))
+    return "caspa\t%0, %R0, %2, %R2, %1";
+  else if (is_mm_release (model))
+    return "caspl\t%0, %R0, %2, %R2, %1";
+  else
+    return "caspal\t%0, %R0, %2, %R2, %1";
+})
+
 (define_expand "atomic_exchange<mode>"
  [(match_operand:ALLI 0 "register_operand" "")
   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
@@ -650,6 +696,24 @@ 
   }
 )
 
+(define_insn "aarch64_load_exclusive_pair"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI
+	  [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPECV_LX))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
+  ""
+  {
+    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+    if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
+      return "ldxp\t%0, %1, %2";
+    else
+      return "ldaxp\t%0, %1, %2";
+  }
+)
+
 (define_insn "@aarch64_store_exclusive<mode>"
   [(set (match_operand:SI 0 "register_operand" "=&r")
     (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
@@ -668,6 +732,25 @@ 
   }
 )
 
+(define_insn "aarch64_store_exclusive_pair"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+   (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
+	(unspec_volatile:TI
+	  [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
+	   (match_operand:SI 4 "const_int_operand")]
+	  UNSPECV_SX))]
+  ""
+  {
+    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+    if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
+      return "stxp\t%w0, %x2, %x3, %1";
+    else
+      return "stlxp\t%w0, %x2, %x3, %1";
+  }
+)
+
 (define_expand "mem_thread_fence"
   [(match_operand:SI 0 "const_int_operand" "")]
   ""
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 524e4e6929b..dd26bdbbc6b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -29,6 +29,9 @@ 
 ;; Iterator for HI, SI, DI, some instructions can only work on these modes.
 (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
 
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
+(define_mode_iterator JUST_TI [TI])
+
 ;; Iterator for QI and HI modes
 (define_mode_iterator SHORT [QI HI])
 
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
index e862b0c2448..534ff6efea8 100644
--- a/libgcc/config/aarch64/t-lse
+++ b/libgcc/config/aarch64/t-lse
@@ -18,15 +18,19 @@ 
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset))
+# Compare-and-swap has 5 sizes and 4 memory models.
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
+
+# Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
 O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
 
 # Store-and-operate has 4 sizes but only 2 memory models (relaxed, release).
 S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset))
 O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2)))
 
-LSE_OBJS := $(O1) $(O2)
+LSE_OBJS := $(O0) $(O1) $(O2)
 
 libgcc-objects += $(LSE_OBJS) have_atomic$(objext)