From 5a39451f34be9b6ca98b3460bf40d879d6ee61a5 Mon Sep 17 00:00:00 2001
From: Charles Baylis <charles.baylis@linaro.org>
Date: Thu, 24 Mar 2016 20:43:25 +0000
Subject: [PATCH] PR69770 -mlong-calls does not affect calls to __gnu_mcount_nc
generated by -pg
gcc/ChangeLog:
2016-03-24 Charles Baylis <charles.baylis@linaro.org>
* config/arm/arm-protos.h (arm_emit_long_call_profile): New function.
* config/arm/arm.c (arm_emit_long_call_profile_insn): New function.
(arm_expand_prologue): Likewise.
(thumb1_expand_prologue): Likewise.
(arm_output_long_call_to_profile_func): Likewise.
(arm_emit_long_call_profile): Likewise.
* config/arm/arm.h: (ASM_OUTPUT_REG_PUSH) Update comment.
* config/arm/arm.md (arm_long_call_profile): New pattern.
* config/arm/bpabi.h (ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS): New
define.
* config/arm/thumb1.md (thumb1_long_call_profile): New pattern.
* config/arm/unspecs.md (unspecv): Add VUNSPEC_LONG_CALL_PROFILE.
gcc/testsuite/ChangeLog:
2016-03-24 Charles Baylis <charles.baylis@linaro.org>
* gcc.target/arm/pr69770.c: New test.
Change-Id: I9b8de01fea083f17f729c3801f83174bedb3b0c6
@@ -343,6 +343,7 @@ extern void arm_register_target_pragmas (void);
extern void arm_cpu_cpp_builtins (struct cpp_reader *);
extern bool arm_is_constant_pool_ref (rtx);
+void arm_emit_long_call_profile ();
/* Flags used to identify the presence of processor capabilities. */
@@ -21426,6 +21426,22 @@ output_probe_stack_range (rtx reg1, rtx reg2)
return "";
}
+static void
+arm_emit_long_call_profile_insn ()
+{
+ rtx sym_ref = gen_rtx_SYMBOL_REF (Pmode, "__gnu_mcount_nc");
+ /* if movt/movw are not available, use a constant pool */
+ if (!arm_arch_thumb2)
+ {
+ sym_ref = force_const_mem(Pmode, sym_ref);
+ }
+ rtvec vec = gen_rtvec (1, sym_ref);
+ rtx tmp =
+ gen_rtx_UNSPEC_VOLATILE (VOIDmode, vec, VUNSPEC_LONG_CALL_PROFILE);
+ emit_insn (tmp);
+}
+
+
/* Generate the prologue instructions for entry into an ARM or Thumb-2
function. */
void
@@ -21789,6 +21805,10 @@ arm_expand_prologue (void)
arm_load_pic_register (mask);
}
+ if (crtl->profile && TARGET_LONG_CALLS
+ && ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS)
+ arm_emit_long_call_profile_insn ();
+
/* If we are profiling, make sure no instructions are scheduled before
the call to mcount. Similarly if the user has requested no
scheduling in the prolog. Similarly if we want non-call exceptions
@@ -24985,6 +25005,10 @@ thumb1_expand_prologue (void)
if (frame_pointer_needed)
thumb_set_frame_pointer (offsets);
+ if (crtl->profile && TARGET_LONG_CALLS
+ && ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS)
+ arm_emit_long_call_profile_insn ();
+
/* If we are profiling, make sure no instructions are scheduled before
the call to mcount. Similarly if the user has requested no
scheduling in the prolog. Similarly if we want non-call exceptions
@@ -30289,4 +30313,70 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
return;
}
+static void
+arm_output_long_call_to_profile_func (rtx * operands, bool push_scratch)
+{
+ /* operands[0] is the address of the __gnu_mcount_nc function
+ operands[1] is the scratch register we use to load that address */
+ if (push_scratch)
+ output_asm_insn ("push\t{%1}", operands);
+ output_asm_insn ("push\t{lr}", operands);
+ if (GET_CODE (operands[0]) == SYMBOL_REF)
+ {
+ output_asm_insn ("movw\t%1, #:lower16:%c0", operands);
+ output_asm_insn ("movt\t%1, #:upper16:%c0", operands);
+ }
+ else
+ {
+ output_asm_insn ("ldr\t%1, %0", operands);
+ }
+ if (!arm_arch5)
+ {
+ output_asm_insn ("mov\tlr, pc", operands);
+ output_asm_insn ("mov\tpc, %1", operands);
+ }
+ else
+ output_asm_insn ("blx\t%1", operands);
+ if (push_scratch)
+ output_asm_insn ("pop\t{%1}", operands);
+}
+
+void
+arm_emit_long_call_profile()
+{
+ rtx alcp = NULL;
+ rtx operands[2];
+ bool push_scratch;
+ /* find the arm_long_call_profile */
+ for (rtx_insn * insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+ break;
+ if (INSN_CODE (insn) == CODE_FOR_arm_long_call_profile
+ || INSN_CODE (insn) == CODE_FOR_thumb1_long_call_profile)
+ {
+ alcp = PATTERN (insn);
+ break;
+ }
+ }
+ gcc_assert (alcp);
+
+ operands[0] = XEXP (XEXP (alcp, 0), 0);
+ if (TARGET_32BIT)
+ {
+ operands[1] = gen_rtx_REG (SImode, IP_REGNUM);
+ push_scratch = false;
+ }
+ else
+ {
+ /* for nested functions, we can set push_scratch to false, since
+ final.c:profile_function.c and ASM_OUTPUT_REG_PUSH preserve it as
+ part of the sequence to preserve ip across the call to the
+ profiling function. */
+ operands[1] = gen_rtx_REG (SImode, R0_REGNUM + 7);
+ push_scratch = !IS_NESTED (arm_current_func_type ());
+ }
+ arm_output_long_call_to_profile_func (operands, push_scratch);
+}
+
#include "gt-arm.h"
@@ -2044,7 +2044,9 @@ extern int making_const_table;
that ASM_OUTPUT_REG_PUSH will be matched with ASM_OUTPUT_REG_POP, and
that r7 isn't used by the function profiler, so we can use it as a
scratch reg. WARNING: This isn't safe in the general case! It may be
- sensitive to future changes in final.c:profile_function. */
+ sensitive to future changes in final.c:profile_function. This is also
+ relied on in arm_emit_long_call_profile() which assumes r7 can be
+ used as a scratch register to load the address of the function profiler. */
#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \
do \
{ \
@@ -11424,6 +11424,15 @@
DONE;
})
+(define_insn "arm_long_call_profile"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "ji,m")
+ ] VUNSPEC_LONG_CALL_PROFILE)]
+ "TARGET_32BIT"
+ "%@ arm_long_call_profile"
+ [(set_attr "arm_pool_range" "*,4096")
+ (set_attr "arm_neg_pool_range" "*,4084")]
+)
+
;; Vector bits common to IWMMXT and Neon
(include "vec-common.md")
;; Load the Intel Wireless Multimedia Extension patterns
@@ -174,11 +174,18 @@
#undef NO_PROFILE_COUNTERS
#define NO_PROFILE_COUNTERS 1
+#undef ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS
+#define ARM_FUNCTION_PROFILER_SUPPORTS_LONG_CALLS 1
#undef ARM_FUNCTION_PROFILER
#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \
{ \
- fprintf (STREAM, "\tpush\t{lr}\n"); \
- fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n"); \
+ if (TARGET_LONG_CALLS) \
+ { \
+ arm_emit_long_call_profile(); \
+ } else { \
+ fprintf (STREAM, "\tpush\t{lr}\n"); \
+ fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n"); \
+ } \
}
#undef SUBTARGET_FRAME_POINTER_REQUIRED
@@ -1798,7 +1798,7 @@
[(unspec_volatile [(match_operand:SI 0 "s_register_operand" "l")]
VUNSPEC_EH_RETURN)
(clobber (match_scratch:SI 1 "=&l"))]
- "TARGET_THUMB1"
+ "TARGET_THUMB1 && 0"
"#"
"&& reload_completed"
[(const_int 0)]
@@ -1809,4 +1809,13 @@
}"
[(set_attr "type" "mov_reg")]
)
+
+(define_insn "thumb1_long_call_profile"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "j,m")
+ ] VUNSPEC_LONG_CALL_PROFILE)]
+ "TARGET_THUMB1"
+ "%@ thumb1_long_call_profile"
+ [(set_attr "pool_range" "1018")]
+)
+
@@ -148,6 +148,7 @@
VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content.
VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content.
VUNSPEC_PROBE_STACK_RANGE ; Represent stack range probing.
+ VUNSPEC_LONG_CALL_PROFILE ; Represent a long call to profile function
])
;; Enumerators for NEON unspecs.
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-pg -mlong-calls" } */
+
+extern void g(void);
+
+int f() { g(); return 0; }
+
+/* { dg-final { scan-assembler-not "bl\[ \t\]+__gnu_mcount_nc" } } */
+/* { dg-final { scan-assembler "__gnu_mcount_nc" } } */
--
1.9.1