diff mbox series

[3/3,arm] Implement support for the de-speculation intrinsic

Message ID 26f1fd261a467d6b43e1d77085dfb0e169782cf7.1515072356.git.Richard.Earnshaw@arm.com
State Superseded
Headers show
Series Add __builtin_load_no_speculate | expand

Commit Message

Richard Earnshaw (lists) Jan. 4, 2018, 1:58 p.m. UTC
This patch implements despeculation on ARM.  We only support it when
generating ARM or Thumb2 code (we need conditional execution); and we
only support it for sizes up to DImode.  For unsupported cases we
fall back to the generic code generation sequence so that a suitable
failure warning is emitted.

	* config/arm/arm.c (arm_inhibit_load_speculation): New function
	(TARGET_INHIBIT_LOAD_SPECULATION): Redefine.
	* config/arm/unspec.md (VUNSPEC_NOSPECULATE): New unspec_volatile code.
	* config/arm/arm.md (cmp_ior): Make this pattern callable.
	(nospeculate<QHSI:mode>, nospeculatedi): New patterns.
---
 gcc/config/arm/arm.c      | 107 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/config/arm/arm.md     |  40 ++++++++++++++++-
 gcc/config/arm/unspecs.md |   1 +
 3 files changed, 147 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 11e35ad..d1fc0b9 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -321,6 +321,8 @@  static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
+static rtx arm_inhibit_load_speculation (machine_mode, rtx, rtx, rtx, rtx,
+					 rtx, rtx);
 
 /* Table of machine attributes.  */
 static const struct attribute_spec arm_attribute_table[] =
@@ -804,6 +806,9 @@  static const struct attribute_spec arm_attribute_table[] =
 
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
+
+#undef TARGET_INHIBIT_LOAD_SPECULATION
+#define TARGET_INHIBIT_LOAD_SPECULATION arm_inhibit_load_speculation
 
 /* Obstack for minipool constant handling.  */
 static struct obstack minipool_obstack;
@@ -31523,6 +31528,108 @@  arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   return align;
 }
 
+static rtx
+arm_inhibit_load_speculation (machine_mode mode, rtx result, rtx mem,
+				  rtx lower_bound, rtx upper_bound,
+				  rtx fail_result, rtx cmpptr)
+{
+  rtx cond, comparison;
+
+  /* We can't support this for Thumb1 as we have no suitable conditional
+     move operations.  Nor do we support it for TImode.  For both
+     these cases fall back to the generic code sequence which will emit
+     a suitable warning for us.  */
+  if (mode == TImode || TARGET_THUMB1)
+    return default_inhibit_load_speculation (mode, result, mem, lower_bound,
+					     upper_bound, fail_result, cmpptr);
+
+
+  rtx target = gen_reg_rtx (mode);
+  rtx tgt2 = result;
+
+  if (!register_operand (tgt2, mode))
+    tgt2 = gen_reg_rtx (mode);
+
+if (!register_operand (cmpptr, ptr_mode))
+    cmpptr = force_reg (ptr_mode, cmpptr);
+
+  if (upper_bound == NULL)
+    {
+      if (!register_operand (lower_bound, ptr_mode))
+	lower_bound = force_reg (ptr_mode, lower_bound);
+
+      cond = arm_gen_compare_reg (LTU, cmpptr, lower_bound, NULL);
+      comparison = gen_rtx_LTU (VOIDmode, cond, const0_rtx);
+    }
+  else if (lower_bound == NULL)
+    {
+      if (!register_operand (upper_bound, ptr_mode))
+	upper_bound = force_reg (ptr_mode, upper_bound);
+
+      cond = arm_gen_compare_reg (GEU, cmpptr, upper_bound, NULL);
+      comparison = gen_rtx_GEU (VOIDmode, cond, const0_rtx);
+    }
+  else
+    {
+      /* We want to generate code for
+	   result = (cmpptr < lower || cmpptr >= upper) ? 0 : *ptr;
+	 Which can be recast to
+	   result = (cmpptr < lower || upper <= cmpptr) ? 0 : *ptr;
+	 which can be implemented as
+	   cmp   cmpptr, lower
+	   cmpcs upper, cmpptr
+	   bls   1f
+	   ldr   result, [ptr]
+	  1:
+	   movls result, #0
+	 with suitable IT instructions as needed for thumb2.  Later
+	 optimization passes may make the load conditional.  */
+
+      if (!register_operand (lower_bound, ptr_mode))
+	lower_bound = force_reg (ptr_mode, lower_bound);
+
+      if (!register_operand (upper_bound, ptr_mode))
+	upper_bound = force_reg (ptr_mode, upper_bound);
+
+      rtx comparison1 = gen_rtx_LTU (SImode, cmpptr, lower_bound);
+      rtx comparison2 = gen_rtx_LEU (SImode, upper_bound, cmpptr);
+      cond = gen_rtx_REG (arm_select_dominance_cc_mode (comparison1,
+							comparison2,
+							DOM_CC_X_OR_Y),
+			  CC_REGNUM);
+      emit_insn (gen_cmp_ior (cmpptr, lower_bound, upper_bound, cmpptr,
+			      comparison1, comparison2, cond));
+      comparison = gen_rtx_NE (SImode, cond, const0_rtx);
+    }
+
+  rtx_code_label *label = gen_label_rtx ();
+  emit_jump_insn (gen_arm_cond_branch (label, comparison, cond));
+  emit_move_insn (target, mem);
+  emit_label (label);
+
+  insn_code icode;
+
+  switch (mode)
+    {
+    case E_QImode: icode = CODE_FOR_nospeculateqi; break;
+    case E_HImode: icode = CODE_FOR_nospeculatehi; break;
+    case E_SImode: icode = CODE_FOR_nospeculatesi; break;
+    case E_DImode: icode = CODE_FOR_nospeculatedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! insn_operand_matches (icode, 4, fail_result))
+    fail_result = force_reg (mode, fail_result);
+
+  emit_insn (GEN_FCN (icode) (tgt2, comparison, cond, target, fail_result));
+
+  if (tgt2 != result)
+    emit_move_insn (result, tgt2);
+
+  return result;
+}
+
 #if CHECKING_P
 namespace selftest {
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index d60c5af..e700fdf 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9488,7 +9488,7 @@ 
    (set_attr "type" "multiple")]
 )
 
-(define_insn "*cmp_ior"
+(define_insn "cmp_ior"
   [(set (match_operand 6 "dominant_cc_register" "")
 	(compare
 	 (ior:SI
@@ -12015,6 +12015,44 @@ 
   [(set_attr "length" "4")
    (set_attr "type" "coproc")])
 
+(define_insn "nospeculate<QHSI:mode>"
+ [(set (match_operand:QHSI 0 "s_register_operand" "=l,l,r")
+        (unspec_volatile:QHSI
+         [(match_operator 1 "arm_comparison_operator"
+	   [(match_operand 2 "cc_register" "") (const_int 0)])
+	  (match_operand:QHSI 3 "s_register_operand" "0,0,0")
+	  (match_operand:QHSI 4 "arm_not_operand" "I,K,r")]
+	 VUNSPEC_NOSPECULATE))]
+  "TARGET_32BIT"
+  {
+  if (TARGET_THUMB)
+    return \"it\\t%d1\;mov%d1\\t%0, %4\;.inst 0xf3af8014\t%@ CSDB\";
+  return \"mov%d1\\t%0, %4\;.inst 0xe320f014\t%@ CSDB\";
+  }
+  [(set_attr "type" "mov_imm,mvn_imm,mov_reg")
+   (set_attr "conds" "use")
+   (set_attr "length" "8")]
+)
+
+(define_insn "nospeculatedi"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (unspec_volatile:DI
+         [(match_operator 1 "arm_comparison_operator"
+	   [(match_operand 2 "cc_register" "") (const_int 0)])
+	  (match_operand:DI 3 "s_register_operand" "0")
+	  (match_operand:DI 4 "arm_rhs_operand" "rI")]
+	 VUNSPEC_NOSPECULATE))]
+  "TARGET_32BIT"
+  {
+  if (TARGET_THUMB)
+    return \"it\\t%d1\;mov%d1\\t%Q0, %Q4\;it\\t%d1\;mov%d1\\t%R0, %R4\;.inst 0xf3af8014\t%@ CSDB\";
+  return  \"mov%d1\\t%Q0, %Q4\;mov%d1\\t%R0, %R4\;.inst 0xe320f014\t%@ CSDB\";
+  }
+  [(set_attr "type" "mov_reg")
+   (set_attr "conds" "use")
+   (set_attr "length" "12")]
+)
+
 ;; Vector bits common to IWMMXT and Neon
 (include "vec-common.md")
 ;; Load the Intel Wireless Multimedia Extension patterns
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index c474f4b..727a5ab 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -168,6 +168,7 @@ 
   VUNSPEC_MCRR2		; Represent the coprocessor mcrr2 instruction.
   VUNSPEC_MRRC		; Represent the coprocessor mrrc instruction.
   VUNSPEC_MRRC2		; Represent the coprocessor mrrc2 instruction.
+  VUNSPEC_NOSPECULATE	; Represent a despeculation sequence.
 ])
 
 ;; Enumerators for NEON unspecs.