[1/3,builtins] Generic support for __builtin_load_no_speculate()

Message ID	3f3375cf241d099400b7f90c7c6e42c2e140734c.1515072356.git.Richard.Earnshaw@arm.com
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-470136-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :in-reply-to:references:mime-version:content-type; q=dns; s= default; b=iiAUGkTajiy6yHv3wARAJPZl/p43RbAuc2Btody6ik7/wUO3r3C2i PzFaNX4AGwe9dk3AgZAO3ftOHiJvAOl390Fmd5pNBTOizOHZfE65hY0AnOJdTTnR 7FbDK1dcJ0VMdcZ7LEBwvuNp11m0biXQ6arjkEaOV9qTd+c8qLlSoI= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Richard Earnshaw <Richard.Earnshaw@arm.com> To: gcc-patches@gcc.gnu.org Cc: Richard Earnshaw <Richard.Earnshaw@arm.com> Subject: [PATCH 1/3] [builtins] Generic support for __builtin_load_no_speculate() Date: Thu, 4 Jan 2018 13:58:41 +0000 Message-Id: <3f3375cf241d099400b7f90c7c6e42c2e140734c.1515072356.git.Richard.Earnshaw@arm.com> In-Reply-To: <cover.1515072356.git.Richard.Earnshaw@arm.com> References: <cover.1515072356.git.Richard.Earnshaw@arm.com> In-Reply-To: <cover.1515072356.git.Richard.Earnshaw@arm.com> References: <cover.1515072356.git.Richard.Earnshaw@arm.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.7.4"
Series	Add __builtin_load_no_speculate \| expand [0/3] Add __builtin_load_no_speculate [1/3,builtins] Generic support for __builtin_load_no_speculate() [2/3,aarch64] Implement support for __builtin_load_no_speculate. [3/3,arm] Implement support for the de-speculation intrinsic

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index bb50e60..259aacd 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -785,6 +785,22 @@ DEF_FUNCTION_TYPE_VAR_3 (BT_FN_SSIZE_STRING_SIZE_CONST_STRING_VAR, DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_FILEPTR_INT_CONST_STRING_VAR, BT_INT, BT_FILEPTR, BT_INT, BT_CONST_STRING) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + BT_I1, BT_CONST_VOLATILE_PTR, BT_CONST_VOLATILE_PTR, + BT_CONST_VOLATILE_PTR) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + BT_I2, BT_CONST_VOLATILE_PTR, BT_CONST_VOLATILE_PTR, + BT_CONST_VOLATILE_PTR) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + BT_I4, BT_CONST_VOLATILE_PTR, BT_CONST_VOLATILE_PTR, + BT_CONST_VOLATILE_PTR) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + BT_I8, BT_CONST_VOLATILE_PTR, BT_CONST_VOLATILE_PTR, + BT_CONST_VOLATILE_PTR) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + BT_I16, BT_CONST_VOLATILE_PTR, BT_CONST_VOLATILE_PTR, + BT_CONST_VOLATILE_PTR) + DEF_FUNCTION_TYPE_VAR_4 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VAR, BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING) diff --git a/gcc/builtins.c b/gcc/builtins.c index 98eb804..1bdbc64 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6602,6 +6602,97 @@ expand_stack_save (void) return ret; } +/* Expand a call to __builtin_load_no_speculate_<N>. MODE represents the + size of the first argument to that call. We emit a warning if the + result isn't used (IGNORE != 0), since the implementation might + rely on the value being used to correctly inhibit speculation. */ +static rtx +expand_load_no_speculate (machine_mode mode, tree exp, rtx target, int ignore) +{ + rtx ptr, op0, op1, op2, op3, op4; + unsigned nargs = call_expr_nargs (exp); + + if (ignore) + { + warning_at (input_location, 0, + "result of __builtin_load_no_speculate must be used to " + "ensure correct operation"); + target = NULL; + } + + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + + ptr = expand_expr (arg0, NULL_RTX, ptr_mode, EXPAND_SUM); + op0 = validize_mem (gen_rtx_MEM (mode, convert_memory_address (Pmode, ptr))); + + set_mem_align (op0, MAX (GET_MODE_ALIGNMENT (mode), + get_pointer_alignment (arg0))); + set_mem_alias_set (op0, get_alias_set (TREE_TYPE (TREE_TYPE (arg0)))); + + /* Mark the memory access as volatile. We don't want the optimizers to + move it or otherwise substitue an alternative value. */ + MEM_VOLATILE_P (op0) = 1; + + if (integer_zerop (tree_strip_nop_conversions (arg1))) + op1 = NULL; + else + { + op1 = expand_normal (arg1); + if (GET_MODE (op1) != ptr_mode && GET_MODE (op1) != VOIDmode) + op1 = convert_modes (ptr_mode, VOIDmode, op1, + TYPE_UNSIGNED (TREE_TYPE (arg1))); + } + + if (integer_zerop (tree_strip_nop_conversions (arg2))) + op2 = NULL; + else + { + op2 = expand_normal (arg2); + if (GET_MODE (op2) != ptr_mode && GET_MODE (op2) != VOIDmode) + op2 = convert_modes (ptr_mode, VOIDmode, op2, + TYPE_UNSIGNED (TREE_TYPE (arg2))); + } + + if (nargs > 3) + { + tree arg3 = CALL_EXPR_ARG (exp, 3); + op3 = expand_normal (arg3); + if (CONST_INT_P (op3)) + op3 = gen_int_mode (INTVAL (op3), mode); + else if (GET_MODE (op3) != mode && GET_MODE (op3) != VOIDmode) + op3 = convert_modes (mode, VOIDmode, op3, + TYPE_UNSIGNED (TREE_TYPE (arg3))); + } + else + op3 = const0_rtx; + + if (nargs > 4) + { + tree arg4 = CALL_EXPR_ARG (exp, 4); + op4 = expand_normal (arg4); + if (GET_MODE (op4) != ptr_mode && GET_MODE (op4) != VOIDmode) + op4 = convert_modes (ptr_mode, VOIDmode, op4, + TYPE_UNSIGNED (TREE_TYPE (arg4))); + } + else + op4 = ptr; + + if (op1 == NULL && op2 == NULL) + { + error_at (input_location, + "at least one speculation bound must be non-NULL"); + /* Ensure we don't crash later. */ + op1 = op4; + } + + if (target == NULL) + target = gen_reg_rtx (mode); + + return targetm.inhibit_load_speculation (mode, target, op0, op1, op2, op3, + op4); +} /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient @@ -7732,6 +7823,14 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, folding. */ break; + case BUILT_IN_LOAD_NO_SPECULATE_1: + case BUILT_IN_LOAD_NO_SPECULATE_2: + case BUILT_IN_LOAD_NO_SPECULATE_4: + case BUILT_IN_LOAD_NO_SPECULATE_8: + case BUILT_IN_LOAD_NO_SPECULATE_16: + mode = get_builtin_sync_mode (fcode - BUILT_IN_LOAD_NO_SPECULATE_1); + return expand_load_no_speculate (mode, exp, target, ignore); + default: /* just do library call, if unknown builtin */ break; } diff --git a/gcc/builtins.def b/gcc/builtins.def index 671097e..761c063 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -1017,6 +1017,28 @@ DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON, true, true, true, ATTR_NOTHROW_LEAF_LIST, false, !targetm.have_tls) +/* Suppressing speculation. Users are expected to use the first (N) + variant, which will be translated internally into one of the other + types. */ +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_N, "load_no_speculate", + BT_FN_VOID_VAR, ATTR_NULL) + +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_1, "load_no_speculate_1", + BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + ATTR_NULL) +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_2, "load_no_speculate_2", + BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + ATTR_NULL) +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_4, "load_no_speculate_4", + BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + ATTR_NULL) +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_8, "load_no_speculate_8", + BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + ATTR_NULL) +DEF_GCC_BUILTIN (BUILT_IN_LOAD_NO_SPECULATE_16, "load_no_speculate_16", + BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, + ATTR_NULL) + /* Exception support. */ DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume") DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup") diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 197a71f..c213ffd 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -6456,6 +6456,146 @@ builtin_type_for_size (int size, bool unsignedp) return type ? type : error_mark_node; } +/* Work out the size of the object pointed to by the first arguement + of a call to __builtin_load_no_speculate. Only pointers to + integral types and pointers are permitted. Return 0 if the + arguement type is not supported of if the size is too large. */ +static int +load_no_speculate_resolve_size (tree function, vec<tree, va_gc> *params) +{ + /* Type of the argument. */ + tree type; + int size; + + if (vec_safe_is_empty (params)) + { + error ("too few arguments to function %qE", function); + return 0; + } + + type = TREE_TYPE ((*params)[0]); + + if (!POINTER_TYPE_P (type)) + goto incompatible; + + type = TREE_TYPE (type); + + if (TREE_CODE (type) == ARRAY_TYPE) + { + /* Force array-to-pointer decay for c++. */ + gcc_assert (c_dialect_cxx()); + (*params)[0] = default_conversion ((*params)[0]); + type = TREE_TYPE ((*params)[0]); + } + + if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) + goto incompatible; + + if (!COMPLETE_TYPE_P (type)) + goto incompatible; + + size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); + if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16) + return size; + + incompatible: + /* Issue the diagnostic only if the argument is valid, otherwise + it would be redundant at best and could be misleading. */ + if (type != error_mark_node) + error ("operand type %qT is incompatible with argument %d of %qE", + type, 1, function); + + return 0; +} + +/* Validate and coerce PARAMS, the arguments to ORIG_FUNCTION to fit + the prototype for FUNCTION. The first three arguments are + mandatory, but shouldn't need casting as they are all pointers and + we've already established that the first argument is a pointer to a + permitted type. The two optional arguments may need to be + fabricated if they have been omitted. */ +static bool +load_no_speculate_resolve_params (location_t loc, tree orig_function, + tree function, + vec<tree, va_gc> *params) +{ + function_args_iterator iter; + + function_args_iter_init (&iter, TREE_TYPE (function)); + tree arg_type = function_args_iter_cond (&iter); + unsigned parmnum; + tree val; + + if (params->length () < 3) + { + error_at (loc, "too few arguments to function %qE", orig_function); + return false; + } + else if (params->length () > 5) + { + error_at (loc, "too many arguments to function %qE", orig_function); + return false; + } + + /* Required arguments. These must all be pointers. */ + for (parmnum = 0; parmnum < 3; parmnum++) + { + arg_type = function_args_iter_cond (&iter); + val = (*params)[parmnum]; + if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE) + val = default_conversion (val); + if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE) + goto bad_arg; + (*params)[parmnum] = val; + } + + /* Optional integer value. */ + arg_type = function_args_iter_cond (&iter); + if (params->length () >= 4) + { + val = (*params)[parmnum]; + val = convert (arg_type, val); + (*params)[parmnum] = val; + } + else + return true; + + /* Optional pointer to compare against. */ + parmnum = 4; + arg_type = function_args_iter_cond (&iter); + if (params->length () == 5) + { + val = (*params)[parmnum]; + if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE) + val = default_conversion (val); + if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE) + goto bad_arg; + (*params)[parmnum] = val; + } + + return true; + + bad_arg: + error_at (loc, "expecting argument of type %qT for argument %u", arg_type, + parmnum); + return false; +} + +/* Cast the result of the builtin back to the type pointed to by the + first argument, preserving any qualifiers that it might have. */ +static tree +load_no_speculate_resolve_return (tree first_param, tree result) +{ + tree ptype = TREE_TYPE (TREE_TYPE (first_param)); + tree rtype = TREE_TYPE (result); + ptype = TYPE_MAIN_VARIANT (ptype); + + if (tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (rtype))) + return convert (ptype, result); + + return result; +} + /* A helper function for resolve_overloaded_builtin in resolving the overloaded __sync_ builtins. Returns a positive power of 2 if the first operand of PARAMS is a pointer to a supported data type. @@ -7110,6 +7250,30 @@ resolve_overloaded_builtin (location_t loc, tree function, /* Handle BUILT_IN_NORMAL here. */ switch (orig_code) { + case BUILT_IN_LOAD_NO_SPECULATE_N: + { + int n = load_no_speculate_resolve_size (function, params); + tree new_function, first_param, result; + enum built_in_function fncode; + + if (n == 0) + return error_mark_node; + + fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1); + new_function = builtin_decl_explicit (fncode); + first_param = (*params)[0]; + if (!load_no_speculate_resolve_params (loc, function, new_function, + params)) + return error_mark_node; + + result = build_function_call_vec (loc, vNULL, new_function, params, + NULL); + if (result == error_mark_node) + return result; + + return load_no_speculate_resolve_return (first_param, result); + } + case BUILT_IN_ATOMIC_EXCHANGE: case BUILT_IN_ATOMIC_COMPARE_EXCHANGE: case BUILT_IN_ATOMIC_LOAD: diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c index 9e33aed..fb06ee7 100644 --- a/gcc/c-family/c-cppbuiltin.c +++ b/gcc/c-family/c-cppbuiltin.c @@ -1361,7 +1361,10 @@ c_cpp_builtins (cpp_reader *pfile) cpp_define (pfile, "__WCHAR_UNSIGNED__"); cpp_atomic_builtins (pfile); - + + /* Show support for __builtin_load_no_speculate (). */ + cpp_define (pfile, "__HAVE_LOAD_NO_SPECULATE"); + #ifdef DWARF2_UNWIND_INFO if (dwarf2out_do_cfi_asm ()) cpp_define (pfile, "__GCC_HAVE_DWARF2_CFI_ASM"); diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi index 94437d5..9dca2e2 100644 --- a/gcc/doc/cpp.texi +++ b/gcc/doc/cpp.texi @@ -2381,6 +2381,10 @@ If GCC cannot determine the current date, it will emit a warning message These macros are defined when the target processor supports atomic compare and swap operations on operands 1, 2, 4, 8 or 16 bytes in length, respectively. +@item __HAVE_LOAD_NO_SPECULATE +This macro is defined with the value 1 to show that this version of GCC +supports @code{__builtin_load_no_speculate}. + @item __GCC_HAVE_DWARF2_CFI_ASM This macro is defined when the compiler is emitting DWARF CFI directives to the assembler. When this is defined, it is possible to emit those same diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 2a553ad..7a71f34 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -10968,6 +10968,7 @@ the built-in function returns -1. @findex __builtin_islessequal @findex __builtin_islessgreater @findex __builtin_isunordered +@findex __builtin_load_no_speculate @findex __builtin_powi @findex __builtin_powif @findex __builtin_powil @@ -11614,6 +11615,58 @@ check its compatibility with @var{size}. @end deftypefn +@deftypefn {Built-in Function} @var{type} __builtin_load_no_speculate (const volatile @var{type} *ptr, const volatile void *lower_bound, const volatile void *upper_bound, @var{type} failval, const volatile void *cmpptr) +The @code{__builtin_load_no_speculation} function provides a means to +limit the extent to which a processor can continue speculative +execution with the result of loading a value stored at @var{ptr}. +Logically, the builtin implements the following behavior: + +@smallexample +inline @var{type} __builtin_load_no_speculate + (const volatile @var{type} *ptr, + const volatile void *lower_bound, + const volatile void *upper_bound, + @var{type} failval, + const volatile void *cmpptr) +@{ + @var{type} result; + if (cmpptr >= lower_bound && cmpptr < upper_bound) + result = *ptr; + else + result = failval; + return result; +@} +@end smallexample + +but in addition target-specific code will be inserted to ensure that +speculation using @code{*ptr} cannot occur when @var{cmpptr} lies outside of +the specified bounds. + +@var{type} may be any integral type (signed, or unsigned, @code{char}, +@code{short}, @code{int}, etc) or a pointer to any type. + +The final argument, @var{cmpptr}, may be omitted. If you do this, +then the compiler will use @var{ptr} for comparison against the upper +and lower bounds. Furthermore, if you omit @var{cmpptr}, you may also +omit @var{failval} and the compiler will use @code{(@var{type})0} for +the out-of-bounds result. + +Additionally, when it is know that one of the bounds can never fail, +you can use a literal @code{NULL} argument and the compiler will +generate code that only checks the other boundary condition. It is generally +only safe to do this when your code contains a loop construct where the only +boundary of interest is the one beyond the termination condition. You cannot +omit both boundary conditions in this way. + +The logical behaviour of the builtin is supported for all architectures, but +on machines where target-specific support for inhibiting speculation is not +implemented, or not necessary, the compiler will emit a warning. + +The pre-processor macro @code{__HAVE_LOAD_NO_SPECULATE} is defined with the +value 1 on all implementations of GCC that support this builtin. + +@end deftypefn + @deftypefn {Built-in Function} int __builtin_types_compatible_p (@var{type1}, @var{type2}) You can use the built-in function @code{__builtin_types_compatible_p} to diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 9793a0e..7309ccb 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11922,6 +11922,12 @@ maintainer is familiar with. @end defmac +@deftypefn {Target Hook} rtx TARGET_INHIBIT_LOAD_SPECULATION (machine_mode @var{mode}, rtx @var{result}, rtx @var{mem}, rtx @var{lower_bound}, rtx @var{upper_bound}, rtx @var{fail_result}, rtx @var{cmpptr}) +Generate a target-specific code sequence that implements @code{__builtin_load_no_speculate}, returning the result in @var{result}. If @var{cmpptr} is greater than, or equal to, @var{lower_bound} and less than @var{upper_bound} then @var{mem}, a @code{MEM} of type @var{mode}, should be returned, otherwise @var{failval} should be returned. The expansion must ensure that subsequent speculation by the processor using the @var{mem} cannot occur if @var{cmpptr} lies outside of the specified bounds. At most one of @var{lower_bound} and @var{upper_bound} can be @code{NULL_RTX}, indicating that code for that bounds check should not be generated. + + The default implementation implements the logic of the builtin but cannot provide the target-specific code necessary to inhibit speculation. A warning will be emitted to that effect. +@end deftypefn + @deftypefn {Target Hook} void TARGET_RUN_TARGET_SELFTESTS (void) If selftests are enabled, run any selftests for this target. @end deftypefn diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 7bcfb37..d34e4bf 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8075,4 +8075,6 @@ maintainer is familiar with. @end defmac +@hook TARGET_INHIBIT_LOAD_SPECULATION + @hook TARGET_RUN_TARGET_SELFTESTS diff --git a/gcc/target.def b/gcc/target.def index e9eacc8..375eb0a 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4214,6 +4214,26 @@ DEFHOOK hook_bool_void_true) DEFHOOK +(inhibit_load_speculation, + "Generate a target-specific code sequence that implements\ + @code{__builtin_load_no_speculate}, returning the result in @var{result}.\ + If @var{cmpptr} is greater than, or equal to, @var{lower_bound} and less\ + than @var{upper_bound} then @var{mem}, a @code{MEM} of type @var{mode},\ + should be returned, otherwise @var{failval} should be returned. The\ + expansion must ensure that subsequent speculation by the processor using\ + the @var{mem} cannot occur if @var{cmpptr} lies outside of the specified\ + bounds. At most one of @var{lower_bound} and @var{upper_bound} can be\ + @code{NULL_RTX}, indicating that code for that bounds check should not be\ + generated.\n\ + \n\ + The default implementation implements the logic of the builtin\ + but cannot provide the target-specific code necessary to inhibit\ + speculation. A warning will be emitted to that effect.", + rtx, (machine_mode mode, rtx result, rtx mem, rtx lower_bound, + rtx upper_bound, rtx fail_result, rtx cmpptr), + default_inhibit_load_speculation) + +DEFHOOK (can_use_doloop_p, "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\ and @code{doloop_begin}) for a particular loop. @var{iterations} gives the\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 653567c..24d9c7b 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -82,6 +82,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "real.h" #include "langhooks.h" +#include "dojump.h" bool default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, @@ -2307,4 +2308,72 @@ default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSE return 0; } +/* Default implementation of the load-and-inhibit-speculation builtin. + This version does not have, or know of, the target-specific + mechanisms necessary to inhibit speculation, so it simply emits a + code sequence that implements the architectural aspects of the + builtin. */ +rtx +default_inhibit_load_speculation (machine_mode mode ATTRIBUTE_UNUSED, + rtx result, + rtx mem, + rtx lower_bound, + rtx upper_bound, + rtx fail_result, + rtx cmpptr) +{ + rtx_code_label *done_label = gen_label_rtx (); + rtx_code_label *inrange_label = gen_label_rtx (); + warning_at + (input_location, 0, + "this target does not support anti-speculation operations. " + "Your program will still execute correctly, but speculation " + "will not be inhibited"); + + /* We don't have any despeculation barriers, but if we mark the branch + probabilities to be always predicting the out-of-bounds path, then + there's a higher chance that the compiler will order code so that + static prediction will fall through a safe path. */ + if (lower_bound == NULL) + { + do_compare_rtx_and_jump (cmpptr, upper_bound, LTU, true, ptr_mode, + NULL, NULL, inrange_label, + profile_probability::never ()); + emit_move_insn (result, fail_result); + emit_jump (done_label); + emit_label (inrange_label); + emit_move_insn (result, mem); + emit_label (done_label); + } + else if (upper_bound == NULL) + { + do_compare_rtx_and_jump (cmpptr, lower_bound, GEU, true, ptr_mode, + NULL, NULL, inrange_label, + profile_probability::never ()); + emit_move_insn (result, fail_result); + emit_jump (done_label); + emit_label (inrange_label); + emit_move_insn (result, mem); + emit_label (done_label); + } + else + { + rtx_code_label *oob_label = gen_label_rtx (); + do_compare_rtx_and_jump (cmpptr, lower_bound, LTU, true, ptr_mode, + NULL, NULL, oob_label, + profile_probability::always ()); + do_compare_rtx_and_jump (cmpptr, upper_bound, GEU, true, ptr_mode, + NULL, NULL, inrange_label, + profile_probability::never ()); + emit_label (oob_label); + emit_move_insn (result, fail_result); + emit_jump (done_label); + emit_label (inrange_label); + emit_move_insn (result, mem); + emit_label (done_label); + } + + return result; +} + #include "gt-targhooks.h" diff --git a/gcc/targhooks.h b/gcc/targhooks.h index e753e58..c55b43f 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -286,4 +286,7 @@ extern enum flt_eval_method default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED); extern bool default_stack_clash_protection_final_dynamic_probe (rtx); +extern rtx +default_inhibit_load_speculation (machine_mode, rtx, rtx, rtx, rtx, rtx, rtx); + #endif /* GCC_TARGHOOKS_H */

[1/3,builtins] Generic support for __builtin_load_no_speculate()

Commit Message

Comments

Patch