diff mbox series

[1/3,gcc-7,backport,builtins] Generic support for __builtin_load_no_speculate()

Message ID 260f96d6dd9a10d17d53f8e4a7ca49aa94a4ac64.1515075827.git.Richard.Earnshaw@arm.com
State New
Headers show
Series Add __builtin_load_no_speculate | expand

Commit Message

Richard Earnshaw (lists) Jan. 4, 2018, 2:31 p.m. UTC
This patch adds generic support for the new builtin
__builtin_load_no_speculate.  It provides the overloading of the
different access sizes and a default fall-back expansion for targets
that do not support a mechanism for inhibiting speculation.

So that users can know that this version of GCC supports the new intrinsic
we add the predefined macro definition __HAVE_LOAD_NO_SPECULATE while
preprocessing.

	* builtin_types.def (BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR):
	New builtin type signature.
	(BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
	(BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
	(BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
	(BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
	* builtins.def (BUILT_IN_LOAD_NO_SPECULATE_N): New builtin.
	(BUILT_IN_LOAD_NO_SPECULATE_1): Likewise.
	(BUILT_IN_LOAD_NO_SPECULATE_2): Likewise.
	(BUILT_IN_LOAD_NO_SPECULATE_4): Likewise.
	(BUILT_IN_LOAD_NO_SPECULATE_8): Likewise.
	(BUILT_IN_LOAD_NO_SPECULATE_16): Likewise.
	* target.def (inhibit_load_speculation): New hook.
	* doc/tm.texi.in (TARGET_INHIBIT_LOAD_SPECULATION): Add to
	documentation.
	* doc/tm.texi: Regenerated.
	* doc/cpp.texi: Document predefine __HAVE_LOAD_NO_SPECULATE.
	* doc/extend.texi: Document __builtin_load_no_speculate.
	* c-family/c-common.c (load_no_speculate_resolve_size): New function.
	(load_no_speculate_resolve_params): New function.
	(load_no_speculate_resolve_return): New function.
	(resolve_overloaded_builtin): Handle overloading
	__builtin_load_no_speculate.
	* c-family/c-cppbuiltin.c (c_cpp_builtins): Add predefine for
	__HAVE_LOAD_NO_SPECULATE.
	* builtins.c (expand_load_no_speculate): New function.
	(expand_builtin): Handle new no-speculation builtins.
	* targhooks.h (default_inhibit_load_speculation): Declare.
	* targhooks.c (default_inhibit_load_speculation): New function.
---
 gcc/builtin-types.def       |  16 +++++
 gcc/builtins.c              |  99 ++++++++++++++++++++++++++
 gcc/builtins.def            |  22 ++++++
 gcc/c-family/c-common.c     | 164 ++++++++++++++++++++++++++++++++++++++++++++
 gcc/c-family/c-cppbuiltin.c |   5 +-
 gcc/doc/cpp.texi            |   4 ++
 gcc/doc/extend.texi         |  53 ++++++++++++++
 gcc/doc/tm.texi             |   6 ++
 gcc/doc/tm.texi.in          |   2 +
 gcc/target.def              |  20 ++++++
 gcc/targhooks.c             |  67 +++++++++++++++++-
 gcc/targhooks.h             |   3 +
 12 files changed, 459 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index ac98944..109f11c 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -749,6 +749,22 @@  DEF_FUNCTION_TYPE_VAR_3 (BT_FN_SSIZE_STRING_SIZE_CONST_STRING_VAR,
 DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_FILEPTR_INT_CONST_STRING_VAR,
 			 BT_INT, BT_FILEPTR, BT_INT, BT_CONST_STRING)
 
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I1, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I2, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I4, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I8, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I16, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+
 DEF_FUNCTION_TYPE_VAR_4 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VAR,
 			 BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING)
 
diff --git a/gcc/builtins.c b/gcc/builtins.c
index d7d4f0f..58a9dd8 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6349,6 +6349,97 @@  expand_stack_save (void)
   return ret;
 }
 
+/* Expand a call to __builtin_load_no_speculate_<N>.  MODE represents the
+   size of the first argument to that call.  We emit a warning if the
+   result isn't used (IGNORE != 0), since the implementation might
+   rely on the value being used to correctly inhibit speculation.  */
+static rtx
+expand_load_no_speculate (machine_mode mode, tree exp, rtx target, int ignore)
+{
+  rtx ptr, op0, op1, op2, op3, op4;
+  unsigned nargs = call_expr_nargs (exp);
+
+  if (ignore)
+    {
+      warning_at (input_location, 0,
+		  "result of __builtin_load_no_speculate must be used to "
+		  "ensure correct operation");
+      target = NULL;
+    }
+
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+
+  ptr = expand_expr (arg0, NULL_RTX, ptr_mode, EXPAND_SUM);
+  op0 = validize_mem (gen_rtx_MEM (mode, convert_memory_address (Pmode, ptr)));
+
+  set_mem_align (op0, MAX (GET_MODE_ALIGNMENT (mode),
+			   get_pointer_alignment (arg0)));
+  set_mem_alias_set (op0, get_alias_set (TREE_TYPE (TREE_TYPE (arg0))));
+
+  /* Mark the memory access as volatile.  We don't want the optimizers to
+     move it or otherwise substitue an alternative value.  */
+  MEM_VOLATILE_P (op0) = 1;
+
+  if (integer_zerop (tree_strip_nop_conversions (arg1)))
+    op1 = NULL;
+  else
+    {
+      op1 = expand_normal (arg1);
+      if (GET_MODE (op1) != ptr_mode && GET_MODE (op1) != VOIDmode)
+	op1 = convert_modes (ptr_mode, VOIDmode, op1,
+			     TYPE_UNSIGNED (TREE_TYPE (arg1)));
+    }
+
+  if (integer_zerop (tree_strip_nop_conversions (arg2)))
+    op2 = NULL;
+  else
+    {
+      op2 = expand_normal (arg2);
+      if (GET_MODE (op2) != ptr_mode && GET_MODE (op2) != VOIDmode)
+	op2 = convert_modes (ptr_mode, VOIDmode, op2,
+			     TYPE_UNSIGNED (TREE_TYPE (arg2)));
+    }
+
+  if (nargs > 3)
+    {
+      tree arg3 = CALL_EXPR_ARG (exp, 3);
+      op3 = expand_normal (arg3);
+      if (CONST_INT_P (op3))
+	op3 = gen_int_mode (INTVAL (op3), mode);
+      else if (GET_MODE (op3) != mode && GET_MODE (op3) != VOIDmode)
+	op3 = convert_modes (mode, VOIDmode, op3,
+			     TYPE_UNSIGNED (TREE_TYPE (arg3)));
+    }
+  else
+    op3 = const0_rtx;
+
+  if (nargs > 4)
+    {
+      tree arg4 = CALL_EXPR_ARG (exp, 4);
+      op4 = expand_normal (arg4);
+      if (GET_MODE (op4) != ptr_mode && GET_MODE (op4) != VOIDmode)
+	op4 = convert_modes (ptr_mode, VOIDmode, op4,
+			     TYPE_UNSIGNED (TREE_TYPE (arg4)));
+    }
+  else
+    op4 = ptr;
+
+  if (op1 == NULL && op2 == NULL)
+    {
+      error_at (input_location,
+		"at least one speculation bound must be non-NULL");
+      /* Ensure we don't crash later.  */
+      op1 = op4;
+    }
+
+  if (target == NULL)
+    target = gen_reg_rtx (mode);
+
+  return targetm.inhibit_load_speculation (mode, target, op0, op1, op2, op3,
+					   op4);
+}
 
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
@@ -7469,6 +7560,14 @@  expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 	 folding.  */
       break;
 
+    case BUILT_IN_LOAD_NO_SPECULATE_1:
+    case BUILT_IN_LOAD_NO_SPECULATE_2:
+    case BUILT_IN_LOAD_NO_SPECULATE_4:
+    case BUILT_IN_LOAD_NO_SPECULATE_8:
+    case BUILT_IN_LOAD_NO_SPECULATE_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_LOAD_NO_SPECULATE_1);
+      return expand_load_no_speculate (mode, exp, target, ignore);
+
     default:	/* just do library call, if unknown builtin */
       break;
     }
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 58d78db..16894da 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -964,6 +964,28 @@  DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON,
 	     true, true, true, ATTR_NOTHROW_LEAF_LIST, false,
 	     !targetm.have_tls)
 
+/* Suppressing speculation.  Users are expected to use the first (N)
+   variant, which will be translated internally into one of the other
+   types.  */
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_N, "load_no_speculate",
+		 BT_FN_VOID_VAR, ATTR_NULL)
+
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_1, "load_no_speculate_1",
+		 BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		 ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_2, "load_no_speculate_2",
+		 BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		 ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_4, "load_no_speculate_4",
+		 BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		 ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_8, "load_no_speculate_8",
+		 BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		 ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_16, "load_no_speculate_16",
+		 BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		 ATTR_NULL)
+
 /* Exception support.  */
 DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume")
 DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup")
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index e272488..e44c40d 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -6553,6 +6553,146 @@  builtin_type_for_size (int size, bool unsignedp)
   return type ? type : error_mark_node;
 }
 
+/* Work out the size of the object pointed to by the first arguement
+   of a call to __builtin_load_no_speculate.  Only pointers to
+   integral types and pointers are permitted.  Return 0 if the
+   arguement type is not supported of if the size is too large.  */
+static int
+load_no_speculate_resolve_size (tree function, vec<tree, va_gc> *params)
+{
+  /* Type of the argument.  */
+  tree type;
+  int size;
+
+  if (vec_safe_is_empty (params))
+    {
+      error ("too few arguments to function %qE", function);
+      return 0;
+    }
+
+  type = TREE_TYPE ((*params)[0]);
+
+  if (!POINTER_TYPE_P (type))
+    goto incompatible;
+
+  type = TREE_TYPE (type);
+
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      /* Force array-to-pointer decay for c++.  */
+      gcc_assert (c_dialect_cxx());
+      (*params)[0] = default_conversion ((*params)[0]);
+      type = TREE_TYPE ((*params)[0]);
+    }
+
+  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
+    goto incompatible;
+
+  if (!COMPLETE_TYPE_P (type))
+   goto incompatible;
+
+  size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+  if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16)
+    return size;
+
+ incompatible:
+  /* Issue the diagnostic only if the argument is valid, otherwise
+     it would be redundant at best and could be misleading.  */
+  if (type != error_mark_node)
+    error ("operand type %qT is incompatible with argument %d of %qE",
+	   type, 1, function);
+
+  return 0;
+}
+
+/* Validate and coerce PARAMS, the arguments to ORIG_FUNCTION to fit
+   the prototype for FUNCTION.  The first three arguments are
+   mandatory, but shouldn't need casting as they are all pointers and
+   we've already established that the first argument is a pointer to a
+   permitted type.  The two optional arguments may need to be
+   fabricated if they have been omitted.  */
+static bool
+load_no_speculate_resolve_params (location_t loc, tree orig_function,
+				  tree function,
+				  vec<tree, va_gc> *params)
+{
+  function_args_iterator iter;
+
+  function_args_iter_init (&iter, TREE_TYPE (function));
+  tree arg_type = function_args_iter_cond (&iter);
+  unsigned parmnum;
+  tree val;
+
+  if (params->length () < 3)
+    {
+      error_at (loc, "too few arguments to function %qE", orig_function);
+      return false;
+    }
+  else if (params->length () > 5)
+    {
+      error_at (loc, "too many arguments to function %qE", orig_function);
+      return false;
+    }
+
+  /* Required arguments.  These must all be pointers.  */
+  for (parmnum = 0; parmnum < 3; parmnum++)
+    {
+      arg_type = function_args_iter_cond (&iter);
+      val = (*params)[parmnum];
+      if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE)
+	val = default_conversion (val);
+      if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE)
+	goto bad_arg;
+      (*params)[parmnum] = val;
+    }
+
+  /* Optional integer value.  */
+  arg_type = function_args_iter_cond (&iter);
+  if (params->length () >= 4)
+    {
+      val = (*params)[parmnum];
+      val = convert (arg_type, val);
+      (*params)[parmnum] = val;
+    }
+  else
+    return true;
+
+  /* Optional pointer to compare against.  */
+  parmnum = 4;
+  arg_type = function_args_iter_cond (&iter);
+  if (params->length () == 5)
+    {
+      val = (*params)[parmnum];
+      if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE)
+	val = default_conversion (val);
+      if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE)
+	goto bad_arg;
+      (*params)[parmnum] = val;
+    }
+
+  return true;
+
+ bad_arg:
+  error_at (loc, "expecting argument of type %qT for argument %u", arg_type,
+	    parmnum);
+  return false;
+}
+
+/* Cast the result of the builtin back to the type pointed to by the
+   first argument, preserving any qualifiers that it might have.  */
+static tree
+load_no_speculate_resolve_return (tree first_param, tree result)
+{
+  tree ptype = TREE_TYPE (TREE_TYPE (first_param));
+  tree rtype = TREE_TYPE (result);
+  ptype = TYPE_MAIN_VARIANT (ptype);
+
+  if (tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (rtype)))
+    return convert (ptype, result);
+
+  return result;
+}
+
 /* A helper function for resolve_overloaded_builtin in resolving the
    overloaded __sync_ builtins.  Returns a positive power of 2 if the
    first operand of PARAMS is a pointer to a supported data type.
@@ -7204,6 +7344,30 @@  resolve_overloaded_builtin (location_t loc, tree function,
   /* Handle BUILT_IN_NORMAL here.  */
   switch (orig_code)
     {
+    case BUILT_IN_LOAD_NO_SPECULATE_N:
+      {
+	int n = load_no_speculate_resolve_size (function, params);
+	tree new_function, first_param, result;
+	enum built_in_function fncode;
+
+	if (n == 0)
+	  return error_mark_node;
+
+	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
+	new_function = builtin_decl_explicit (fncode);
+	first_param = (*params)[0];
+	if (!load_no_speculate_resolve_params (loc, function, new_function,
+					       params))
+	  return error_mark_node;
+
+	result = build_function_call_vec (loc, vNULL, new_function, params,
+					  NULL);
+	if (result == error_mark_node)
+	  return result;
+
+	return load_no_speculate_resolve_return (first_param, result);
+      }
+
     case BUILT_IN_ATOMIC_EXCHANGE:
     case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
     case BUILT_IN_ATOMIC_LOAD:
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index c5fadaa..c4cb763 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1356,7 +1356,10 @@  c_cpp_builtins (cpp_reader *pfile)
     cpp_define (pfile, "__WCHAR_UNSIGNED__");
 
   cpp_atomic_builtins (pfile);
-    
+
+  /* Show support for __builtin_load_no_speculate ().  */
+  cpp_define (pfile, "__HAVE_LOAD_NO_SPECULATE");
+
 #ifdef DWARF2_UNWIND_INFO
   if (dwarf2out_do_cfi_asm ())
     cpp_define (pfile, "__GCC_HAVE_DWARF2_CFI_ASM");
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 6e16ffb..1718acb 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -2351,6 +2351,10 @@  If GCC cannot determine the current date, it will emit a warning message
 These macros are defined when the target processor supports atomic compare
 and swap operations on operands 1, 2, 4, 8 or 16 bytes in length, respectively.
 
+@item __HAVE_LOAD_NO_SPECULATE
+This macro is defined with the value 1 to show that this version of GCC
+supports @code{__builtin_load_no_speculate}.
+
 @item __GCC_HAVE_DWARF2_CFI_ASM
 This macro is defined when the compiler is emitting DWARF CFI directives
 to the assembler.  When this is defined, it is possible to emit those same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ba309d0..d00be5b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10498,6 +10498,7 @@  in the Cilk Plus language manual which can be found at
 @findex __builtin_islessequal
 @findex __builtin_islessgreater
 @findex __builtin_isunordered
+@findex __builtin_load_no_speculate
 @findex __builtin_powi
 @findex __builtin_powif
 @findex __builtin_powil
@@ -11134,6 +11135,58 @@  an extension.  @xref{Variable Length}, for details.
 
 @end deftypefn
 
+@deftypefn {Built-in Function} @var{type} __builtin_load_no_speculate (const volatile @var{type} *ptr, const volatile void *lower_bound, const volatile void *upper_bound, @var{type} failval, const volatile void *cmpptr)
+The @code{__builtin_load_no_speculation} function provides a means to
+limit the extent to which a processor can continue speculative
+execution with the result of loading a value stored at @var{ptr}.
+Logically, the builtin implements the following behavior:
+
+@smallexample
+inline @var{type} __builtin_load_no_speculate
+    (const volatile @var{type} *ptr,
+     const volatile void *lower_bound,
+     const volatile void *upper_bound,
+     @var{type} failval,
+     const volatile void *cmpptr)
+@{
+  @var{type} result;
+  if (cmpptr >= lower_bound && cmpptr < upper_bound)
+    result = *ptr;
+  else
+    result = failval;
+  return result;
+@}
+@end smallexample
+
+but in addition target-specific code will be inserted to ensure that
+speculation using @code{*ptr} cannot occur when @var{cmpptr} lies outside of
+the specified bounds.
+
+@var{type} may be any integral type (signed, or unsigned, @code{char},
+@code{short}, @code{int}, etc) or a pointer to any type.
+
+The final argument, @var{cmpptr}, may be omitted.  If you do this,
+then the compiler will use @var{ptr} for comparison against the upper
+and lower bounds.  Furthermore, if you omit @var{cmpptr}, you may also
+omit @var{failval} and the compiler will use @code{(@var{type})0} for
+the out-of-bounds result.
+
+Additionally, when it is know that one of the bounds can never fail,
+you can use a literal @code{NULL} argument and the compiler will
+generate code that only checks the other boundary condition.  It is generally
+only safe to do this when your code contains a loop construct where the only
+boundary of interest is the one beyond the termination condition.  You cannot
+omit both boundary conditions in this way.
+
+The logical behaviour of the builtin is supported for all architectures, but
+on machines where target-specific support for inhibiting speculation is not
+implemented, or not necessary, the compiler will emit a warning.
+
+The pre-processor macro @code{__HAVE_LOAD_NO_SPECULATE} is defined with the
+value 1 on all implementations of GCC that support this builtin.
+
+@end deftypefn
+
 @deftypefn {Built-in Function} int __builtin_types_compatible_p (@var{type1}, @var{type2})
 
 You can use the built-in function @code{__builtin_types_compatible_p} to
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c4f2c89..0b43d70 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11866,6 +11866,12 @@  maintainer is familiar with.
 
 @end defmac
 
+@deftypefn {Target Hook} rtx TARGET_INHIBIT_LOAD_SPECULATION (machine_mode @var{mode}, rtx @var{result}, rtx @var{mem}, rtx @var{lower_bound}, rtx @var{upper_bound}, rtx @var{fail_result}, rtx @var{cmpptr})
+Generate a target-specific code sequence that implements @code{__builtin_load_no_speculate}, returning the result in @var{result}. If @var{cmpptr} is greater than, or equal to, @var{lower_bound} and less than @var{upper_bound} then @var{mem}, a @code{MEM} of type @var{mode}, should be returned, otherwise @var{failval} should be returned.  The expansion must ensure that subsequent speculation by the processor using the @var{mem} cannot occur if @var{cmpptr} lies outside of the specified bounds.  At most one of @var{lower_bound} and @var{upper_bound} can be @code{NULL_RTX}, indicating that code for that bounds check should not be generated.
+ 
+ The default implementation implements the logic of the builtin but cannot provide the target-specific code necessary to inhibit speculation.  A warning will be emitted to that effect.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_RUN_TARGET_SELFTESTS (void)
 If selftests are enabled, run any selftests for this target.
 @end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1c471d8..d595002 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8318,4 +8318,6 @@  maintainer is familiar with.
 
 @end defmac
 
+@hook TARGET_INHIBIT_LOAD_SPECULATION
+
 @hook TARGET_RUN_TARGET_SELFTESTS
diff --git a/gcc/target.def b/gcc/target.def
index 6bebfd5..605b793 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4062,6 +4062,26 @@  DEFHOOK
  hook_bool_void_true)
 
 DEFHOOK
+(inhibit_load_speculation,
+ "Generate a target-specific code sequence that implements\
+ @code{__builtin_load_no_speculate}, returning the result in @var{result}.\
+ If @var{cmpptr} is greater than, or equal to, @var{lower_bound} and less\
+ than @var{upper_bound} then @var{mem}, a @code{MEM} of type @var{mode},\
+ should be returned, otherwise @var{failval} should be returned.  The\
+ expansion must ensure that subsequent speculation by the processor using\
+ the @var{mem} cannot occur if @var{cmpptr} lies outside of the specified\
+ bounds.  At most one of @var{lower_bound} and @var{upper_bound} can be\
+ @code{NULL_RTX}, indicating that code for that bounds check should not be\
+ generated.\n\
+ \n\
+ The default implementation implements the logic of the builtin\
+ but cannot provide the target-specific code necessary to inhibit\
+ speculation.  A warning will be emitted to that effect.",
+ rtx, (machine_mode mode, rtx result, rtx mem, rtx lower_bound,
+       rtx upper_bound, rtx fail_result, rtx cmpptr),
+ default_inhibit_load_speculation)
+
+DEFHOOK
 (can_use_doloop_p,
  "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the\n\
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 1cdec06..178d5ea 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -79,7 +79,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "predict.h"
 #include "params.h"
 #include "real.h"
-
+#include "dojump.h"
+#include "basic-block.h"
 
 bool
 default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2107,4 +2108,68 @@  default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED)
   return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
 }
 
+/* Default implementation of the load-and-inhibit-speculation builtin.
+   This version does not have, or know of, the target-specific
+   mechanisms necessary to inhibit speculation, so it simply emits a
+   code sequence that implements the architectural aspects of the
+   builtin.  */
+rtx
+default_inhibit_load_speculation (machine_mode mode ATTRIBUTE_UNUSED,
+				  rtx result,
+				  rtx mem,
+				  rtx lower_bound,
+				  rtx upper_bound,
+				  rtx fail_result,
+				  rtx cmpptr)
+{
+  rtx_code_label *done_label = gen_label_rtx ();
+  rtx_code_label *inrange_label = gen_label_rtx ();
+  warning_at
+    (input_location, 0,
+     "this target does not support anti-speculation operations.  "
+     "Your program will still execute correctly, but speculation "
+     "will not be inhibited");
+
+  /* We don't have any despeculation barriers, but if we mark the branch
+     probabilities to be always predicting the out-of-bounds path, then
+     there's a higher chance that the compiler will order code so that
+     static prediction will fall through a safe path.  */
+  if (lower_bound == NULL)
+    {
+      do_compare_rtx_and_jump (cmpptr, upper_bound, LTU, true, ptr_mode,
+			       NULL, NULL, inrange_label, PROB_VERY_UNLIKELY);
+      emit_move_insn (result, fail_result);
+      emit_jump (done_label);
+      emit_label (inrange_label);
+      emit_move_insn (result, mem);
+      emit_label (done_label);
+    }
+  else if (upper_bound == NULL)
+    {
+      do_compare_rtx_and_jump (cmpptr, lower_bound, GEU, true, ptr_mode,
+			       NULL, NULL, inrange_label, PROB_VERY_UNLIKELY);
+      emit_move_insn (result, fail_result);
+      emit_jump (done_label);
+      emit_label (inrange_label);
+      emit_move_insn (result, mem);
+      emit_label (done_label);
+    }
+  else
+    {
+      rtx_code_label *oob_label = gen_label_rtx ();
+      do_compare_rtx_and_jump (cmpptr, lower_bound, LTU, true, ptr_mode,
+			       NULL, NULL, oob_label, PROB_ALWAYS);
+      do_compare_rtx_and_jump (cmpptr, upper_bound, LTU, true, ptr_mode,
+			       NULL, NULL, inrange_label, PROB_VERY_UNLIKELY);
+      emit_label (oob_label);
+      emit_move_insn (result, fail_result);
+      emit_jump (done_label);
+      emit_label (inrange_label);
+      emit_move_insn (result, mem);
+      emit_label (done_label);
+    }
+
+  return result;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 18070df..e674e9d 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -264,4 +264,7 @@  extern unsigned int default_min_arithmetic_precision (void);
 extern enum flt_eval_method
 default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
 
+extern rtx
+default_inhibit_load_speculation (machine_mode, rtx, rtx, rtx, rtx, rtx, rtx);
+
 #endif /* GCC_TARGHOOKS_H */