Message ID | 1501096098-2433-1-git-send-email-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Ping. On 26/07/2017 16:08, Adhemerval Zanella wrote: > This is an update patch based on my previous submission [1]. The changes > from previous version are: > > - Update aarch64_supports_split_stack to return true an let the loader > to actually emit an error if it does not provide the required TCB > field. The TCB field support is planed for GLIBC 2.27. > > - Some cleanup on morestack-c.c to avoid code duplication (ss_pointer > function). > > I am still some sporadic failures, but it due missing linker support > for split calls due not enough stack size (on pprof and other go tests > that call backtrace on signal handling). This could be mitigate by > increasing the BACKOFF to a higher value, but to not deviate from other > ports with split-stack support this patch is using the default values > (initial stack size of 16k and backoff of 4k). This should be correctly > handled with proper gold suppor (as for other ports). > > -- > > This patch adds the split-stack support on aarch64 (PR #67877). As for > other ports this patch should be used along with glibc and gold support. > > The support is done similar to other architectures: a split-stack field > is allocated before TCB by glibc, a target-specific __morestack implementation > and helper functions are added in libgcc and compiler supported in adjusted > (split-stack prologue, va_start for argument handling). I also plan to > send the gold support to adjust stack allocation acrosss split-stack > and default code calls. > > Current approach is to set the final stack adjustments using a 2 instructions > at most (mov/movk) which limits stack allocation to upper limit of 4GB. > The morestack call is non standard with x10 hollding the requested stack > pointer, x11 the argument pointer (if required), and x12 to return > continuation address. Unwinding is handled by a personality routine that > knows how to find stack segments. > > Split-stack prologue on function entry is as follow (this goes before the > usual function prologue): > > function: > mrs x9, tpidr_el0 > mov x10, <required stack allocation> > movk x10, #0x0, lsl #16 > sub x10, sp, x10 > mov x11, sp # if function has stacked arguments > adrp x12, main_fn_entry > add x12, x12, :lo12:.L2 > cmp x9, x10 > b.lt <main_fn_entry> > b __morestack > main_fn_entry: > [function prologue] > > Notes: > > 1. Even if a function does not allocate a stack frame, a split-stack prologue > is created. It is to avoid issues with tail call for external symbols > which might require linker adjustment (libgo/runtime/go-varargs.c). > > 2. Basic-block reordering (enabled with -O2) will move split-stack TCB ldr > to after the required stack calculation. > > 3. Similar to powerpc, When the linker detects a call from split-stack to > non-split-stack code, it adds 16k (or more) to the value found in "allocate" > instructions (so non-split-stack code gets a larger stack). The amount is > tunable by a linker option. The edit means aarch64 does not need to > implement __morestack_non_split, necessary on x86 because insufficient > space is available there to edit the stack comparison code. This feature > is only implemented in the GNU gold linker. > > 4. AArch64 does not handle >4G stack initially and although it is possible > to implement it, limiting to 4G allows to materize the allocation with > only 2 instructions (mov + movk) and thus simplifying the linker > adjustments required. Supporting multiple threads each requiring more > than 4G of stack is probably not that important, and likely to OOM at > run time. > > 5. The TCB support on GLIBC is meant to be included in version 2.26. > > 6. The continuation address materialized on x12 is done using 'adrp' > plus add and a static relocation. Current code uses the > aarch64_expand_mov_immediate function and since a better alternative > would be 'adp', it could be a future optimization (not implemented > in this patch). > > libgcc/ChangeLog: > > * libgcc/config.host: Use t-stack and t-statck-aarch64 for > aarch64*-*-linux. > * libgcc/config/aarch64/morestack-c.c: New file. > * libgcc/config/aarch64/morestack.S: Likewise. > * libgcc/config/aarch64/t-stack-aarch64: Likewise. > * libgcc/generic-morestack.c (__splitstack_find): Add aarch64-specific > code. > > gcc/ChangeLog: > > * common/config/aarch64/aarch64-common.c > (aarch64_supports_split_stack): New function. > (TARGET_SUPPORTS_SPLIT_STACK): New macro. > * gcc/config/aarch64/aarch64-linux.h (TARGET_ASM_FILE_END): Remove > macro. > * gcc/config/aarch64/aarch64-protos.h: Add > aarch64_expand_split_stack_prologue and > aarch64_split_stack_space_check. > * gcc/config/aarch64/aarch64.c (aarch64_gen_far_branch): Add suport > to emit 'b' instruction to rtx different than LABEL_REF. > (aarch64_expand_builtin_va_start): Use internal argument pointer > instead of virtual_incoming_args_rtx. > (morestack_ref): New symbol. > (aarch64_load_split_stack_value): New function. > (aarch64_expand_split_stack_prologue): Likewise. > (aarch64_internal_arg_pointer): Likewise. > (aarch64_split_stack_space_check): Likewise. > (aarch64_file_end): Emit the split-stack note sections. > (TARGET_ASM_FILE_END): New macro. > (TARGET_INTERNAL_ARG_POINTER): Likewise. > * gcc/config/aarch64/aarch64.h (aarch64_frame): Add > split_stack_arg_pointer to setup the argument pointer when using > split-stack. > * gcc/config/aarch64/aarch64.md > (UNSPECV_SPLIT_STACK_RETURN): Likewise. > (split_stack_prologue): New expand. > (split_stack_space_check): Likewise. > (split_stack_cond_call): Likewise. > --- > gcc/common/config/aarch64/aarch64-common.c | 16 +- > gcc/config/aarch64/aarch64-linux.h | 2 - > gcc/config/aarch64/aarch64-protos.h | 2 + > gcc/config/aarch64/aarch64.c | 174 +++++++++++++++++++- > gcc/config/aarch64/aarch64.h | 3 + > gcc/config/aarch64/aarch64.md | 56 +++++++ > libgcc/config.host | 1 + > libgcc/config/aarch64/morestack-c.c | 87 ++++++++++ > libgcc/config/aarch64/morestack.S | 248 +++++++++++++++++++++++++++++ > libgcc/config/aarch64/t-stack-aarch64 | 3 + > libgcc/generic-morestack.c | 1 + > 11 files changed, 588 insertions(+), 5 deletions(-) > create mode 100644 libgcc/config/aarch64/morestack-c.c > create mode 100644 libgcc/config/aarch64/morestack.S > create mode 100644 libgcc/config/aarch64/t-stack-aarch64 > > diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c > index a0b7f48..474dfa1 100644 > --- a/gcc/common/config/aarch64/aarch64-common.c > +++ b/gcc/common/config/aarch64/aarch64-common.c > @@ -107,6 +107,21 @@ aarch64_handle_option (struct gcc_options *opts, > } > } > > +/* -fsplit-stack uses a TCB field available on glibc-2.27. GLIBC also > + exports symbol, __tcb_private_ss, to signal it has the field available > + on TCB bloc. This aims to prevent binaries linked against newer > + GLIBC to run on non-supported ones. */ > + > +static bool > +aarch64_supports_split_stack (bool report ATTRIBUTE_UNUSED, > + struct gcc_options *opts ATTRIBUTE_UNUSED) > +{ > + return true; > +} > + > +#undef TARGET_SUPPORTS_SPLIT_STACK > +#define TARGET_SUPPORTS_SPLIT_STACK aarch64_supports_split_stack > + > struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; > > /* An ISA extension in the co-processor and main instruction set space. */ > @@ -340,4 +355,3 @@ aarch64_rewrite_mcpu (int argc, const char **argv) > } > > #undef AARCH64_CPU_NAME_LENGTH > - > diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h > index c45fc1d..b8daba4 100644 > --- a/gcc/config/aarch64/aarch64-linux.h > +++ b/gcc/config/aarch64/aarch64-linux.h > @@ -80,8 +80,6 @@ > } \ > while (0) > > -#define TARGET_ASM_FILE_END file_end_indicate_exec_stack > - > /* Uninitialized common symbols in non-PIE executables, even with > strong definitions in dependent shared libraries, will resolve > to COPY relocated symbol in the executable. See PR65780. */ > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index e397ff4..5bb6411 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -396,6 +396,8 @@ void aarch64_err_no_fpadvsimd (machine_mode, const char *); > void aarch64_expand_epilogue (bool); > void aarch64_expand_mov_immediate (rtx, rtx); > void aarch64_expand_prologue (void); > +void aarch64_expand_split_stack_prologue (void); > +void aarch64_split_stack_space_check (rtx, rtx); > void aarch64_expand_vector_init (rtx, rtx); > void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, > const_tree, unsigned); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 9aa59e7..ae014909 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -951,7 +951,12 @@ aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, > snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr); > output_asm_insn (buffer, operands); > > - snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr); > + if (GET_CODE (operands[pos_label]) == LABEL_REF) > + snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, > + label_ptr); > + else > + snprintf (buffer, sizeof (buffer), "b\t%%%d\n%s:", pos_label, > + label_ptr); > operands[pos_label] = dest_label; > output_asm_insn (buffer, operands); > return ""; > @@ -10365,7 +10370,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) > /* Emit code to initialize STACK, which points to the next varargs stack > argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used > by named arguments. STACK is 8-byte aligned. */ > - t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); > + t = make_tree (TREE_TYPE (stack), crtl->args.internal_arg_pointer); > if (cum->aapcs_stack_size > 0) > t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD); > t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); > @@ -15070,6 +15075,165 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn) > } > } > > +/* -fsplit-stack support. */ > + > +/* A SYMBOL_REF for __morestack. */ > +static GTY(()) rtx morestack_ref; > + > +/* Load split-stack area from thread pointer position. The split-stack is > + allocate just before thread pointer. */ > + > +static rtx > +aarch64_load_split_stack_value (bool use_hard_reg) > +{ > + /* Offset from thread pointer to split-stack area. */ > + const int psso = -8; > + > + rtx ssvalue = use_hard_reg > + ? gen_rtx_REG (Pmode, R9_REGNUM) : gen_reg_rtx (Pmode); > + ssvalue = aarch64_load_tp (ssvalue); > + rtx mem = gen_rtx_MEM (Pmode, plus_constant (Pmode, ssvalue, psso)); > + emit_move_insn (ssvalue, mem); > + return ssvalue; > +} > + > +/* Emit -fsplit-stack prologue, which goes before the regular function > + prologue. */ > + > +void > +aarch64_expand_split_stack_prologue (void) > +{ > + rtx ssvalue, reg10, reg11, reg12, cc, cmp, jump; > + HOST_WIDE_INT allocate; > + rtx_code_label *ok_label = NULL; > + > + gcc_assert (flag_split_stack && reload_completed); > + > + /* It limits total maximum stack allocation on 4G so its value can be > + materialized using two instructions at most (movn/movk). It might be > + used by the linker to add some extra space for split calling non split > + stack functions. */ > + allocate = cfun->machine->frame.frame_size; > + if (allocate > ((int64_t)1 << 32)) > + { > + sorry ("Stack frame larger than 4G is not supported for -fsplit-stack"); > + return; > + } > + > + if (morestack_ref == NULL_RTX) > + { > + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); > + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL > + | SYMBOL_FLAG_FUNCTION); > + } > + > + ssvalue = aarch64_load_split_stack_value (true); > + > + /* Always emit two insns to calculate the requested stack, so the linker > + can edit them when adjusting size for calling non-split-stack code. */ > + reg10 = gen_rtx_REG (Pmode, R10_REGNUM); > + emit_insn (gen_rtx_SET (reg10, GEN_INT (allocate & 0xffff))); > + emit_insn (gen_insv_immdi (reg10, GEN_INT (16), > + GEN_INT ((allocate & 0xffff0000) >> 16))); > + emit_insn (gen_sub3_insn (reg10, stack_pointer_rtx, reg10)); > + > + ok_label = gen_label_rtx (); > + > + /* If function uses stacked arguments save the old stack value so morestack > + can return it. */ > + reg11 = gen_rtx_REG (Pmode, R11_REGNUM); > + if (crtl->args.size > + || cfun->machine->frame.saved_varargs_size) > + emit_move_insn (reg11, stack_pointer_rtx); > + > + /* x12 holds the continuation address used to return to function. */ > + reg12 = gen_rtx_REG (Pmode, R12_REGNUM); > + aarch64_expand_mov_immediate (reg12, gen_rtx_LABEL_REF (VOIDmode, ok_label)); > + > + /* Jump to __morestack call if current ss guard is not suffice. */ > + cc = aarch64_gen_compare_reg (GE, ssvalue, reg10); > + cmp = gen_rtx_fmt_ee (GE, VOIDmode, cc, const0_rtx); > + jump = gen_split_stack_cond_call (morestack_ref, cmp, ok_label, reg12); > + > + aarch64_emit_unlikely_jump (jump); > + JUMP_LABEL (jump) = ok_label; > + LABEL_NUSES (ok_label)++; > + > + /* __morestack will call us here. */ > + emit_label (ok_label); > +} > + > +/* Implement TARGET_ASM_FILE_END. */ > + > +static void > +aarch64_file_end (void) > +{ > + file_end_indicate_exec_stack (); > + > + if (flag_split_stack) > + { > + file_end_indicate_split_stack (); > + > + switch_to_section (data_section); > + fprintf (asm_out_file, "\t.align 3\n"); > + fprintf (asm_out_file, "\t.quad __libc_tcb_private_ss\n"); > + } > +} > + > +/* Return the internal arg pointer used for function incoming arguments. */ > + > +static rtx > +aarch64_internal_arg_pointer (void) > +{ > + if (flag_split_stack > + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) > + == NULL)) > + { > + if (cfun->machine->frame.split_stack_arg_pointer == NULL_RTX) > + { > + rtx pat; > + > + cfun->machine->frame.split_stack_arg_pointer = gen_reg_rtx (Pmode); > + REG_POINTER (cfun->machine->frame.split_stack_arg_pointer) = 1; > + > + /* Put the pseudo initialization right after the note at the > + beginning of the function. */ > + pat = gen_rtx_SET (cfun->machine->frame.split_stack_arg_pointer, > + gen_rtx_REG (Pmode, R11_REGNUM)); > + push_topmost_sequence (); > + emit_insn_after (pat, get_insns ()); > + pop_topmost_sequence (); > + } > + return plus_constant (Pmode, cfun->machine->frame.split_stack_arg_pointer, > + FIRST_PARM_OFFSET (current_function_decl)); > + } > + return virtual_incoming_args_rtx; > +} > + > +/* Emit -fsplit-stack dynamic stack allocation space check. */ > + > +void > +aarch64_split_stack_space_check (rtx size, rtx label) > +{ > + rtx ssvalue, cc, cmp, jump, temp; > + rtx requested = gen_reg_rtx (Pmode); > + > + /* Load __private_ss from TCB. */ > + ssvalue = aarch64_load_split_stack_value (false); > + > + temp = gen_reg_rtx (Pmode); > + > + /* And compare it with frame pointer plus required stack. */ > + size = force_reg (Pmode, size); > + emit_move_insn (requested, gen_rtx_MINUS (Pmode, stack_pointer_rtx, size)); > + > + /* Jump to label call if current ss guard is not suffice. */ > + cc = aarch64_gen_compare_reg (GE, temp, ssvalue); > + cmp = gen_rtx_fmt_ee (GEU, VOIDmode, cc, const0_rtx); > + jump = emit_jump_insn (gen_condjump (cmp, cc, label)); > + JUMP_LABEL (jump) = label; > +} > + > /* Target-specific selftests. */ > > #if CHECKING_P > @@ -15142,6 +15306,9 @@ aarch64_run_selftests (void) > #undef TARGET_ASM_FILE_START > #define TARGET_ASM_FILE_START aarch64_start_file > > +#undef TARGET_ASM_FILE_END > +#define TARGET_ASM_FILE_END aarch64_file_end > + > #undef TARGET_ASM_OUTPUT_MI_THUNK > #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk > > @@ -15227,6 +15394,9 @@ aarch64_run_selftests (void) > #undef TARGET_FRAME_POINTER_REQUIRED > #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required > > +#undef TARGET_INTERNAL_ARG_POINTER > +#define TARGET_INTERNAL_ARG_POINTER aarch64_internal_arg_pointer > + > #undef TARGET_GIMPLE_FOLD_BUILTIN > #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 7f91edb..1406bec 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -607,6 +607,9 @@ struct GTY (()) aarch64_frame > unsigned wb_candidate2; > > bool laid_out; > + > + /* Alternative internal arg pointer for -fsplit-stack. */ > + rtx split_stack_arg_pointer; > }; > > typedef struct GTY (()) machine_function > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index f876a2b..4c914de 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -150,6 +150,7 @@ > UNSPECV_SET_FPSR ; Represent assign of FPSR content. > UNSPECV_BLOCKAGE ; Represent a blockage > UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing. > + UNSPECV_SPLIT_STACK_CALL ; Represent a morestack call > ] > ) > > @@ -5631,3 +5632,58 @@ > > ;; ldp/stp peephole patterns > (include "aarch64-ldpstp.md") > + > +;; Handle -fsplit-stack > +(define_expand "split_stack_prologue" > + [(const_int 0)] > + "" > +{ > + aarch64_expand_split_stack_prologue (); > + DONE; > +}) > + > +;; If there are operand 0 bytes available on the stack, jump to > +;; operand 1. > +(define_expand "split_stack_space_check" > + [(set (match_dup 2) (compare:CC (match_dup 3) (match_dup 2))) > + (set (pc) (if_then_else > + (geu (match_dup 4) (const_int 0)) > + (label_ref (match_operand 1)) > + (pc)))] > + "" > +{ > + aarch64_split_stack_space_check (operands[0], operands[1]); > + DONE; > +}) > + > +;; A __morestack call using branch > + > +(define_expand "split_stack_cond_call" > + [(match_operand 0 "aarch64_call_insn_operand" "") > + (match_operand 1 "" "") > + (match_operand 2 "" "") > + (match_operand 3 "" "")] > + "" > +{ > + emit_jump_insn (gen_split_stack_cond_call_di (operands[0], operands[1], > + operands[2], operands[3])); > + DONE; > +}) > + > + > +(define_insn "split_stack_cond_call_<mode>" > + [(set (pc) > + (if_then_else > + (match_operand 1 "aarch64_comparison_operator" "") > + (label_ref (match_operand 2 "" "")) > + (pc))) > + (set (reg:P 1) (unspec_volatile:P [(match_operand:P 0 "aarch64_call_insn_operand" "") > + (reg:P 1)] > + UNSPECV_SPLIT_STACK_CALL)) > + (use (match_operand:P 3 "register_operand" ""))] > + "" > + { > + return aarch64_gen_far_branch (operands, 0, "Lbcond", "b%M1\\t"); > + } > + [(set_attr "type" "branch")] > +) > diff --git a/libgcc/config.host b/libgcc/config.host > index ae8836e..10157ac 100644 > --- a/libgcc/config.host > +++ b/libgcc/config.host > @@ -352,6 +352,7 @@ aarch64*-*-linux*) > md_unwind_header=aarch64/linux-unwind.h > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > + tmake_file="${tmake_file} t-stack aarch64/t-stack-aarch64" > ;; > alpha*-*-linux*) > tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux" > diff --git a/libgcc/config/aarch64/morestack-c.c b/libgcc/config/aarch64/morestack-c.c > new file mode 100644 > index 0000000..d065d33 > --- /dev/null > +++ b/libgcc/config/aarch64/morestack-c.c > @@ -0,0 +1,87 @@ > +/* AArch64 support for -fsplit-stack. > + * Copyright (C) 2016 Free Software Foundation, Inc. > + * > + * This file is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License as published by the > + * Free Software Foundation; either version 3, or (at your option) any > + * later version. > + * > + * This file is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + * > + * Under Section 7 of GPL version 3, you are granted additional > + * permissions described in the GCC Runtime Library Exception, version > + * 3.1, as published by the Free Software Foundation. > + * > + * You should have received a copy of the GNU General Public License and > + * a copy of the GCC Runtime Library Exception along with this program; > + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + * <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef inhibit_libc > + > +#include <stdint.h> > +#include <stdlib.h> > +#include <stddef.h> > +#include "generic-morestack.h" > + > +#define INITIAL_STACK_SIZE 0x4000 > +#define BACKOFF 0x1000 > + > +void __generic_morestack_set_initial_sp (void *sp, size_t len); > +void *__morestack_get_guard (void); > +void __morestack_set_guard (void *); > +void *__morestack_make_guard (void *stack, size_t size); > +void __morestack_load_mmap (void); > + > +/* split-stack area position from thread pointer. */ > +static inline void * > +ss_pointer (void) > +{ > +#define SS_OFFSET (-8) > + return (void*) ((uintptr_t) __builtin_thread_pointer() + SS_OFFSET); > +} > + > +/* Initialize the stack guard when the program starts or when a new > + thread. This is called from a constructor using ctors section. */ > +void > +__stack_split_initialize (void) > +{ > + register uintptr_t* sp __asm__ ("sp"); > + uintptr_t *ss = ss_pointer (); > + *ss = (uintptr_t)sp - INITIAL_STACK_SIZE; > + __generic_morestack_set_initial_sp (sp, INITIAL_STACK_SIZE); > +} > + > +/* Return current __private_ss. */ > +void * > +__morestack_get_guard (void) > +{ > + void **ss = ss_pointer (); > + return *ss; > +} > + > +/* Set __private_ss to ptr. */ > +void > +__morestack_set_guard (void *ptr) > +{ > + void **ss = ss_pointer (); > + *ss = ptr; > +} > + > +/* Return the stack guard value for given stack. */ > +void * > +__morestack_make_guard (void *stack, size_t size) > +{ > + return (void*)((uintptr_t) stack - size + BACKOFF); > +} > + > +/* Make __stack_split_initialize a high priority constructor. */ > +static void (*const ctors []) > + __attribute__ ((used, section (".ctors.65535"), aligned (sizeof (void *)))) > + = { __stack_split_initialize, __morestack_load_mmap }; > + > +#endif /* !defined (inhibit_libc) */ > diff --git a/libgcc/config/aarch64/morestack.S b/libgcc/config/aarch64/morestack.S > new file mode 100644 > index 0000000..6dc9ba7 > --- /dev/null > +++ b/libgcc/config/aarch64/morestack.S > @@ -0,0 +1,248 @@ > +# AArch64 support for -fsplit-stack. > +# Copyright (C) 2016 Free Software Foundation, Inc. > + > +# This file is part of GCC. > + > +# GCC is free software; you can redistribute it and/or modify it under > +# the terms of the GNU General Public License as published by the Free > +# Software Foundation; either version 3, or (at your option) any later > +# version. > + > +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +# WARRANTY; without even the implied warranty of MERCHANTABILITY or > +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +# for more details. > + > +# Under Section 7 of GPL version 3, you are granted additional > +# permissions described in the GCC Runtime Library Exception, version > +# 3.1, as published by the Free Software Foundation. > + > +# You should have received a copy of the GNU General Public License and > +# a copy of the GCC Runtime Library Exception along with this program; > +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +# <http://www.gnu.org/licenses/>. > + > +/* Define an entry point visible from C. */ > +#define ENTRY(name) \ > + .globl name; \ > + .type name,%function; \ > + .align 4; \ > + name##: > + > +#define END(name) \ > + .size name,.-name > + > +/* __morestack frame size. */ > +#define MORESTACK_FRAMESIZE 112 > +/* Offset from __morestack frame where the new stack size is saved and > + passed to __generic_morestack. */ > +#define NEWSTACK_SAVE 96 > + > +# Excess space needed to call ld.so resolver for lazy plt resolution. > +# Go uses sigaltstack so this doesn't need to also cover signal frame size. > +#define BACKOFF 0x1000 > +# Large excess allocated when calling non-split-stack code. > +#define NON_SPLIT_STACK 0x100000 > + > +/* split-stack area position from thread pointer. */ > +#define SPLITSTACK_PTR_TP -8 > + > + .text > +ENTRY(__morestack_non_split) > + .cfi_startproc > +# We use a cleanup to restore the tcbhead_t.__private_ss if > +# an exception is thrown through this code. > + add x10, x10, NON_SPLIT_STACK > + .cfi_endproc > +END(__morestack_non_split) > +# Fall through into __morestack > + > +# This function is called with non-standard calling conventions, on entry > +# x10 is the requested stack pointer. The split-stack prologue is in the > +# form: > +# > +# function: > +# mrs x9, tpidr_el0 > +# mov x10, <required stack allocation> > +# movk x10, #0x0, lsl #16 > +# sub x10, sp, x10 > +# mov x11, sp # if function has stacked arguments > +# adrp x12, main_fn_entry > +# add x12, x12, :lo12:.L2 > +# cmp x9, x10 > +# b.lt <main_fn_entry> > +# b __morestack > +# main_fn_entry: > +# [function prologue] > +# > +# The N bit is also restored to indicate that the function is called > +# (so the prologue addition can set up the argument pointer correctly). > + > +ENTRY(__morestack) > +.LFB1: > + .cfi_startproc > + > +#ifdef __PIC__ > + .cfi_personality 0x9b,DW.ref.__gcc_personality_v0 > + .cfi_lsda 0x1b,.LLSDA1 > +#else > + .cfi_personality 0x3,__gcc_personality_v0 > + .cfi_lsda 0x3,.LLSDA1 > +#endif > + > + # Calculate requested stack size. > + sub x10, sp, x10 > + # Save parameters > + stp x29, x30, [sp, -MORESTACK_FRAMESIZE]! > + .cfi_def_cfa_offset MORESTACK_FRAMESIZE > + .cfi_offset 29, -MORESTACK_FRAMESIZE > + .cfi_offset 30, -MORESTACK_FRAMESIZE+8 > + add x29, sp, 0 > + .cfi_def_cfa_register 29 > + # Adjust the requested stack size for the frame pointer save. > + stp x0, x1, [sp, 16] > + stp x2, x3, [sp, 32] > + add x10, x10, BACKOFF > + stp x4, x5, [sp, 48] > + stp x6, x7, [sp, 64] > + stp x8, x12, [sp, 80] > + str x10, [sp, 96] > + > + # void __morestack_block_signals (void) > + bl __morestack_block_signals > + > + # void *__generic_morestack (size_t *pframe_size, > + # void *old_stack, > + # size_t param_size) > + # pframe_size: is the size of the required stack frame (the function > + # amount of space remaining on the allocated stack). > + # old_stack: points at the parameters the old stack > + # param_size: size in bytes of parameters to copy to the new stack. > + add x0, x29, NEWSTACK_SAVE > + add x1, x29, MORESTACK_FRAMESIZE > + mov x2, 0 > + bl __generic_morestack > + > + # Start using new stack > + mov sp, x0 > + > + # Set __private_ss stack guard for the new stack. > + ldr x9, [x29, NEWSTACK_SAVE] > + add x0, x0, BACKOFF > + sub x0, x0, x9 > +.LEHB0: > + mrs x1, tpidr_el0 > + str x0, [x1, SPLITSTACK_PTR_TP] > + > + # void __morestack_unblock_signals (void) > + bl __morestack_unblock_signals > + > + # Set up for a call to the target function. > + ldp x0, x1, [x29, 16] > + ldp x2, x3, [x29, 32] > + ldp x4, x5, [x29, 48] > + ldp x6, x7, [x29, 64] > + ldp x8, x12, [x29, 80] > + add x11, x29, MORESTACK_FRAMESIZE > + # Indicate __morestack was called. > + cmp x12, 0 > + blr x12 > + > + stp x0, x1, [x29, 16] > + stp x2, x3, [x29, 32] > + stp x4, x5, [x29, 48] > + stp x6, x7, [x29, 64] > + > + bl __morestack_block_signals > + > + # void *__generic_releasestack (size_t *pavailable) > + add x0, x29, NEWSTACK_SAVE > + bl __generic_releasestack > + > + # Reset __private_ss stack guard to value for old stack > + ldr x9, [x29, NEWSTACK_SAVE] > + add x0, x0, BACKOFF > + sub x0, x0, x9 > + > + # Update TCB split stack field > +.LEHE0: > + mrs x1, tpidr_el0 > + str x0, [x1, SPLITSTACK_PTR_TP] > + > + bl __morestack_unblock_signals > + > + # Use old stack again. > + add sp, x29, MORESTACK_FRAMESIZE > + > + ldp x0, x1, [x29, 16] > + ldp x2, x3, [x29, 32] > + ldp x4, x5, [x29, 48] > + ldp x6, x7, [x29, 64] > + ldp x29, x30, [x29] > + > + .cfi_remember_state > + .cfi_restore 30 > + .cfi_restore 29 > + .cfi_def_cfa 31, 0 > + > + ret > + > +# This is the cleanup code called by the stack unwinder when > +# unwinding through code between .LEHB0 and .LEHE0 above. > +cleanup: > + .cfi_restore_state > + # Reuse the new stack allocation to save/restore the > + # exception header > + str x0, [x29, NEWSTACK_SAVE] > + # size_t __generic_findstack (void *stack) > + add x0, x29, MORESTACK_FRAMESIZE > + bl __generic_findstack > + sub x0, x29, x0 > + add x0, x0, BACKOFF > + # Restore split-stack guard value > + mrs x1, tpidr_el0 > + str x0, [x1, SPLITSTACK_PTR_TP] > + ldr x0, [x29, NEWSTACK_SAVE] > + b _Unwind_Resume > + .cfi_endproc > +END(__morestack) > + > + .section .gcc_except_table,"a",@progbits > + .align 4 > +.LLSDA1: > + # @LPStart format (omit) > + .byte 0xff > + # @TType format (omit) > + .byte 0xff > + # Call-site format (uleb128) > + .byte 0x1 > + # Call-site table length > + .uleb128 .LLSDACSE1-.LLSDACSB1 > +.LLSDACSB1: > + # region 0 start > + .uleb128 .LEHB0-.LFB1 > + # length > + .uleb128 .LEHE0-.LEHB0 > + # landing pad > + .uleb128 cleanup-.LFB1 > + # no action (ie a cleanup) > + .uleb128 0 > +.LLSDACSE1: > + > + > + .global __gcc_personality_v0 > +#ifdef __PIC__ > + # Build a position independent reference to the personality function. > + .hidden DW.ref.__gcc_personality_v0 > + .weak DW.ref.__gcc_personality_v0 > + .section .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat > + .type DW.ref.__gcc_personality_v0, @object > + .align 3 > +DW.ref.__gcc_personality_v0: > + .size DW.ref.__gcc_personality_v0, 8 > + .quad __gcc_personality_v0 > +#endif > + > + .section .note.GNU-stack,"",@progbits > + .section .note.GNU-split-stack,"",@progbits > + .section .note.GNU-no-split-stack,"",@progbits > diff --git a/libgcc/config/aarch64/t-stack-aarch64 b/libgcc/config/aarch64/t-stack-aarch64 > new file mode 100644 > index 0000000..4babb4e > --- /dev/null > +++ b/libgcc/config/aarch64/t-stack-aarch64 > @@ -0,0 +1,3 @@ > +# Makefile fragment to support -fsplit-stack for aarch64. > +LIB2ADD_ST += $(srcdir)/config/aarch64/morestack.S \ > + $(srcdir)/config/aarch64/morestack-c.c > diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c > index adbe436..e91ceb4 100644 > --- a/libgcc/generic-morestack.c > +++ b/libgcc/generic-morestack.c > @@ -943,6 +943,7 @@ __splitstack_find (void *segment_arg, void *sp, size_t *len, > nsp -= 2 * 160; > #elif defined __s390__ > nsp -= 2 * 96; > +#elif defined __aarch64__ > #else > #error "unrecognized target" > #endif >
diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c index a0b7f48..474dfa1 100644 --- a/gcc/common/config/aarch64/aarch64-common.c +++ b/gcc/common/config/aarch64/aarch64-common.c @@ -107,6 +107,21 @@ aarch64_handle_option (struct gcc_options *opts, } } +/* -fsplit-stack uses a TCB field available on glibc-2.27. GLIBC also + exports symbol, __tcb_private_ss, to signal it has the field available + on TCB bloc. This aims to prevent binaries linked against newer + GLIBC to run on non-supported ones. */ + +static bool +aarch64_supports_split_stack (bool report ATTRIBUTE_UNUSED, + struct gcc_options *opts ATTRIBUTE_UNUSED) +{ + return true; +} + +#undef TARGET_SUPPORTS_SPLIT_STACK +#define TARGET_SUPPORTS_SPLIT_STACK aarch64_supports_split_stack + struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; /* An ISA extension in the co-processor and main instruction set space. */ @@ -340,4 +355,3 @@ aarch64_rewrite_mcpu (int argc, const char **argv) } #undef AARCH64_CPU_NAME_LENGTH - diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h index c45fc1d..b8daba4 100644 --- a/gcc/config/aarch64/aarch64-linux.h +++ b/gcc/config/aarch64/aarch64-linux.h @@ -80,8 +80,6 @@ } \ while (0) -#define TARGET_ASM_FILE_END file_end_indicate_exec_stack - /* Uninitialized common symbols in non-PIE executables, even with strong definitions in dependent shared libraries, will resolve to COPY relocated symbol in the executable. See PR65780. */ diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e397ff4..5bb6411 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -396,6 +396,8 @@ void aarch64_err_no_fpadvsimd (machine_mode, const char *); void aarch64_expand_epilogue (bool); void aarch64_expand_mov_immediate (rtx, rtx); void aarch64_expand_prologue (void); +void aarch64_expand_split_stack_prologue (void); +void aarch64_split_stack_space_check (rtx, rtx); void aarch64_expand_vector_init (rtx, rtx); void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, const_tree, unsigned); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9aa59e7..ae014909 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -951,7 +951,12 @@ aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr); output_asm_insn (buffer, operands); - snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr); + if (GET_CODE (operands[pos_label]) == LABEL_REF) + snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, + label_ptr); + else + snprintf (buffer, sizeof (buffer), "b\t%%%d\n%s:", pos_label, + label_ptr); operands[pos_label] = dest_label; output_asm_insn (buffer, operands); return ""; @@ -10365,7 +10370,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) /* Emit code to initialize STACK, which points to the next varargs stack argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used by named arguments. STACK is 8-byte aligned. */ - t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); + t = make_tree (TREE_TYPE (stack), crtl->args.internal_arg_pointer); if (cum->aapcs_stack_size > 0) t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD); t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); @@ -15070,6 +15075,165 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn) } } +/* -fsplit-stack support. */ + +/* A SYMBOL_REF for __morestack. */ +static GTY(()) rtx morestack_ref; + +/* Load split-stack area from thread pointer position. The split-stack is + allocate just before thread pointer. */ + +static rtx +aarch64_load_split_stack_value (bool use_hard_reg) +{ + /* Offset from thread pointer to split-stack area. */ + const int psso = -8; + + rtx ssvalue = use_hard_reg + ? gen_rtx_REG (Pmode, R9_REGNUM) : gen_reg_rtx (Pmode); + ssvalue = aarch64_load_tp (ssvalue); + rtx mem = gen_rtx_MEM (Pmode, plus_constant (Pmode, ssvalue, psso)); + emit_move_insn (ssvalue, mem); + return ssvalue; +} + +/* Emit -fsplit-stack prologue, which goes before the regular function + prologue. */ + +void +aarch64_expand_split_stack_prologue (void) +{ + rtx ssvalue, reg10, reg11, reg12, cc, cmp, jump; + HOST_WIDE_INT allocate; + rtx_code_label *ok_label = NULL; + + gcc_assert (flag_split_stack && reload_completed); + + /* It limits total maximum stack allocation on 4G so its value can be + materialized using two instructions at most (movn/movk). It might be + used by the linker to add some extra space for split calling non split + stack functions. */ + allocate = cfun->machine->frame.frame_size; + if (allocate > ((int64_t)1 << 32)) + { + sorry ("Stack frame larger than 4G is not supported for -fsplit-stack"); + return; + } + + if (morestack_ref == NULL_RTX) + { + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL + | SYMBOL_FLAG_FUNCTION); + } + + ssvalue = aarch64_load_split_stack_value (true); + + /* Always emit two insns to calculate the requested stack, so the linker + can edit them when adjusting size for calling non-split-stack code. */ + reg10 = gen_rtx_REG (Pmode, R10_REGNUM); + emit_insn (gen_rtx_SET (reg10, GEN_INT (allocate & 0xffff))); + emit_insn (gen_insv_immdi (reg10, GEN_INT (16), + GEN_INT ((allocate & 0xffff0000) >> 16))); + emit_insn (gen_sub3_insn (reg10, stack_pointer_rtx, reg10)); + + ok_label = gen_label_rtx (); + + /* If function uses stacked arguments save the old stack value so morestack + can return it. */ + reg11 = gen_rtx_REG (Pmode, R11_REGNUM); + if (crtl->args.size + || cfun->machine->frame.saved_varargs_size) + emit_move_insn (reg11, stack_pointer_rtx); + + /* x12 holds the continuation address used to return to function. */ + reg12 = gen_rtx_REG (Pmode, R12_REGNUM); + aarch64_expand_mov_immediate (reg12, gen_rtx_LABEL_REF (VOIDmode, ok_label)); + + /* Jump to __morestack call if current ss guard is not suffice. */ + cc = aarch64_gen_compare_reg (GE, ssvalue, reg10); + cmp = gen_rtx_fmt_ee (GE, VOIDmode, cc, const0_rtx); + jump = gen_split_stack_cond_call (morestack_ref, cmp, ok_label, reg12); + + aarch64_emit_unlikely_jump (jump); + JUMP_LABEL (jump) = ok_label; + LABEL_NUSES (ok_label)++; + + /* __morestack will call us here. */ + emit_label (ok_label); +} + +/* Implement TARGET_ASM_FILE_END. */ + +static void +aarch64_file_end (void) +{ + file_end_indicate_exec_stack (); + + if (flag_split_stack) + { + file_end_indicate_split_stack (); + + switch_to_section (data_section); + fprintf (asm_out_file, "\t.align 3\n"); + fprintf (asm_out_file, "\t.quad __libc_tcb_private_ss\n"); + } +} + +/* Return the internal arg pointer used for function incoming arguments. */ + +static rtx +aarch64_internal_arg_pointer (void) +{ + if (flag_split_stack + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) + == NULL)) + { + if (cfun->machine->frame.split_stack_arg_pointer == NULL_RTX) + { + rtx pat; + + cfun->machine->frame.split_stack_arg_pointer = gen_reg_rtx (Pmode); + REG_POINTER (cfun->machine->frame.split_stack_arg_pointer) = 1; + + /* Put the pseudo initialization right after the note at the + beginning of the function. */ + pat = gen_rtx_SET (cfun->machine->frame.split_stack_arg_pointer, + gen_rtx_REG (Pmode, R11_REGNUM)); + push_topmost_sequence (); + emit_insn_after (pat, get_insns ()); + pop_topmost_sequence (); + } + return plus_constant (Pmode, cfun->machine->frame.split_stack_arg_pointer, + FIRST_PARM_OFFSET (current_function_decl)); + } + return virtual_incoming_args_rtx; +} + +/* Emit -fsplit-stack dynamic stack allocation space check. */ + +void +aarch64_split_stack_space_check (rtx size, rtx label) +{ + rtx ssvalue, cc, cmp, jump, temp; + rtx requested = gen_reg_rtx (Pmode); + + /* Load __private_ss from TCB. */ + ssvalue = aarch64_load_split_stack_value (false); + + temp = gen_reg_rtx (Pmode); + + /* And compare it with frame pointer plus required stack. */ + size = force_reg (Pmode, size); + emit_move_insn (requested, gen_rtx_MINUS (Pmode, stack_pointer_rtx, size)); + + /* Jump to label call if current ss guard is not suffice. */ + cc = aarch64_gen_compare_reg (GE, temp, ssvalue); + cmp = gen_rtx_fmt_ee (GEU, VOIDmode, cc, const0_rtx); + jump = emit_jump_insn (gen_condjump (cmp, cc, label)); + JUMP_LABEL (jump) = label; +} + /* Target-specific selftests. */ #if CHECKING_P @@ -15142,6 +15306,9 @@ aarch64_run_selftests (void) #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START aarch64_start_file +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END aarch64_file_end + #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk @@ -15227,6 +15394,9 @@ aarch64_run_selftests (void) #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER aarch64_internal_arg_pointer + #undef TARGET_GIMPLE_FOLD_BUILTIN #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 7f91edb..1406bec 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -607,6 +607,9 @@ struct GTY (()) aarch64_frame unsigned wb_candidate2; bool laid_out; + + /* Alternative internal arg pointer for -fsplit-stack. */ + rtx split_stack_arg_pointer; }; typedef struct GTY (()) machine_function diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f876a2b..4c914de 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -150,6 +150,7 @@ UNSPECV_SET_FPSR ; Represent assign of FPSR content. UNSPECV_BLOCKAGE ; Represent a blockage UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing. + UNSPECV_SPLIT_STACK_CALL ; Represent a morestack call ] ) @@ -5631,3 +5632,58 @@ ;; ldp/stp peephole patterns (include "aarch64-ldpstp.md") + +;; Handle -fsplit-stack +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + aarch64_expand_split_stack_prologue (); + DONE; +}) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. +(define_expand "split_stack_space_check" + [(set (match_dup 2) (compare:CC (match_dup 3) (match_dup 2))) + (set (pc) (if_then_else + (geu (match_dup 4) (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "" +{ + aarch64_split_stack_space_check (operands[0], operands[1]); + DONE; +}) + +;; A __morestack call using branch + +(define_expand "split_stack_cond_call" + [(match_operand 0 "aarch64_call_insn_operand" "") + (match_operand 1 "" "") + (match_operand 2 "" "") + (match_operand 3 "" "")] + "" +{ + emit_jump_insn (gen_split_stack_cond_call_di (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + + +(define_insn "split_stack_cond_call_<mode>" + [(set (pc) + (if_then_else + (match_operand 1 "aarch64_comparison_operator" "") + (label_ref (match_operand 2 "" "")) + (pc))) + (set (reg:P 1) (unspec_volatile:P [(match_operand:P 0 "aarch64_call_insn_operand" "") + (reg:P 1)] + UNSPECV_SPLIT_STACK_CALL)) + (use (match_operand:P 3 "register_operand" ""))] + "" + { + return aarch64_gen_far_branch (operands, 0, "Lbcond", "b%M1\\t"); + } + [(set_attr "type" "branch")] +) diff --git a/libgcc/config.host b/libgcc/config.host index ae8836e..10157ac 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -352,6 +352,7 @@ aarch64*-*-linux*) md_unwind_header=aarch64/linux-unwind.h tmake_file="${tmake_file} ${cpu_type}/t-aarch64" tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" + tmake_file="${tmake_file} t-stack aarch64/t-stack-aarch64" ;; alpha*-*-linux*) tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux" diff --git a/libgcc/config/aarch64/morestack-c.c b/libgcc/config/aarch64/morestack-c.c new file mode 100644 index 0000000..d065d33 --- /dev/null +++ b/libgcc/config/aarch64/morestack-c.c @@ -0,0 +1,87 @@ +/* AArch64 support for -fsplit-stack. + * Copyright (C) 2016 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#ifndef inhibit_libc + +#include <stdint.h> +#include <stdlib.h> +#include <stddef.h> +#include "generic-morestack.h" + +#define INITIAL_STACK_SIZE 0x4000 +#define BACKOFF 0x1000 + +void __generic_morestack_set_initial_sp (void *sp, size_t len); +void *__morestack_get_guard (void); +void __morestack_set_guard (void *); +void *__morestack_make_guard (void *stack, size_t size); +void __morestack_load_mmap (void); + +/* split-stack area position from thread pointer. */ +static inline void * +ss_pointer (void) +{ +#define SS_OFFSET (-8) + return (void*) ((uintptr_t) __builtin_thread_pointer() + SS_OFFSET); +} + +/* Initialize the stack guard when the program starts or when a new + thread. This is called from a constructor using ctors section. */ +void +__stack_split_initialize (void) +{ + register uintptr_t* sp __asm__ ("sp"); + uintptr_t *ss = ss_pointer (); + *ss = (uintptr_t)sp - INITIAL_STACK_SIZE; + __generic_morestack_set_initial_sp (sp, INITIAL_STACK_SIZE); +} + +/* Return current __private_ss. */ +void * +__morestack_get_guard (void) +{ + void **ss = ss_pointer (); + return *ss; +} + +/* Set __private_ss to ptr. */ +void +__morestack_set_guard (void *ptr) +{ + void **ss = ss_pointer (); + *ss = ptr; +} + +/* Return the stack guard value for given stack. */ +void * +__morestack_make_guard (void *stack, size_t size) +{ + return (void*)((uintptr_t) stack - size + BACKOFF); +} + +/* Make __stack_split_initialize a high priority constructor. */ +static void (*const ctors []) + __attribute__ ((used, section (".ctors.65535"), aligned (sizeof (void *)))) + = { __stack_split_initialize, __morestack_load_mmap }; + +#endif /* !defined (inhibit_libc) */ diff --git a/libgcc/config/aarch64/morestack.S b/libgcc/config/aarch64/morestack.S new file mode 100644 index 0000000..6dc9ba7 --- /dev/null +++ b/libgcc/config/aarch64/morestack.S @@ -0,0 +1,248 @@ +# AArch64 support for -fsplit-stack. +# Copyright (C) 2016 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. + +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. + +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +/* Define an entry point visible from C. */ +#define ENTRY(name) \ + .globl name; \ + .type name,%function; \ + .align 4; \ + name##: + +#define END(name) \ + .size name,.-name + +/* __morestack frame size. */ +#define MORESTACK_FRAMESIZE 112 +/* Offset from __morestack frame where the new stack size is saved and + passed to __generic_morestack. */ +#define NEWSTACK_SAVE 96 + +# Excess space needed to call ld.so resolver for lazy plt resolution. +# Go uses sigaltstack so this doesn't need to also cover signal frame size. +#define BACKOFF 0x1000 +# Large excess allocated when calling non-split-stack code. +#define NON_SPLIT_STACK 0x100000 + +/* split-stack area position from thread pointer. */ +#define SPLITSTACK_PTR_TP -8 + + .text +ENTRY(__morestack_non_split) + .cfi_startproc +# We use a cleanup to restore the tcbhead_t.__private_ss if +# an exception is thrown through this code. + add x10, x10, NON_SPLIT_STACK + .cfi_endproc +END(__morestack_non_split) +# Fall through into __morestack + +# This function is called with non-standard calling conventions, on entry +# x10 is the requested stack pointer. The split-stack prologue is in the +# form: +# +# function: +# mrs x9, tpidr_el0 +# mov x10, <required stack allocation> +# movk x10, #0x0, lsl #16 +# sub x10, sp, x10 +# mov x11, sp # if function has stacked arguments +# adrp x12, main_fn_entry +# add x12, x12, :lo12:.L2 +# cmp x9, x10 +# b.lt <main_fn_entry> +# b __morestack +# main_fn_entry: +# [function prologue] +# +# The N bit is also restored to indicate that the function is called +# (so the prologue addition can set up the argument pointer correctly). + +ENTRY(__morestack) +.LFB1: + .cfi_startproc + +#ifdef __PIC__ + .cfi_personality 0x9b,DW.ref.__gcc_personality_v0 + .cfi_lsda 0x1b,.LLSDA1 +#else + .cfi_personality 0x3,__gcc_personality_v0 + .cfi_lsda 0x3,.LLSDA1 +#endif + + # Calculate requested stack size. + sub x10, sp, x10 + # Save parameters + stp x29, x30, [sp, -MORESTACK_FRAMESIZE]! + .cfi_def_cfa_offset MORESTACK_FRAMESIZE + .cfi_offset 29, -MORESTACK_FRAMESIZE + .cfi_offset 30, -MORESTACK_FRAMESIZE+8 + add x29, sp, 0 + .cfi_def_cfa_register 29 + # Adjust the requested stack size for the frame pointer save. + stp x0, x1, [sp, 16] + stp x2, x3, [sp, 32] + add x10, x10, BACKOFF + stp x4, x5, [sp, 48] + stp x6, x7, [sp, 64] + stp x8, x12, [sp, 80] + str x10, [sp, 96] + + # void __morestack_block_signals (void) + bl __morestack_block_signals + + # void *__generic_morestack (size_t *pframe_size, + # void *old_stack, + # size_t param_size) + # pframe_size: is the size of the required stack frame (the function + # amount of space remaining on the allocated stack). + # old_stack: points at the parameters the old stack + # param_size: size in bytes of parameters to copy to the new stack. + add x0, x29, NEWSTACK_SAVE + add x1, x29, MORESTACK_FRAMESIZE + mov x2, 0 + bl __generic_morestack + + # Start using new stack + mov sp, x0 + + # Set __private_ss stack guard for the new stack. + ldr x9, [x29, NEWSTACK_SAVE] + add x0, x0, BACKOFF + sub x0, x0, x9 +.LEHB0: + mrs x1, tpidr_el0 + str x0, [x1, SPLITSTACK_PTR_TP] + + # void __morestack_unblock_signals (void) + bl __morestack_unblock_signals + + # Set up for a call to the target function. + ldp x0, x1, [x29, 16] + ldp x2, x3, [x29, 32] + ldp x4, x5, [x29, 48] + ldp x6, x7, [x29, 64] + ldp x8, x12, [x29, 80] + add x11, x29, MORESTACK_FRAMESIZE + # Indicate __morestack was called. + cmp x12, 0 + blr x12 + + stp x0, x1, [x29, 16] + stp x2, x3, [x29, 32] + stp x4, x5, [x29, 48] + stp x6, x7, [x29, 64] + + bl __morestack_block_signals + + # void *__generic_releasestack (size_t *pavailable) + add x0, x29, NEWSTACK_SAVE + bl __generic_releasestack + + # Reset __private_ss stack guard to value for old stack + ldr x9, [x29, NEWSTACK_SAVE] + add x0, x0, BACKOFF + sub x0, x0, x9 + + # Update TCB split stack field +.LEHE0: + mrs x1, tpidr_el0 + str x0, [x1, SPLITSTACK_PTR_TP] + + bl __morestack_unblock_signals + + # Use old stack again. + add sp, x29, MORESTACK_FRAMESIZE + + ldp x0, x1, [x29, 16] + ldp x2, x3, [x29, 32] + ldp x4, x5, [x29, 48] + ldp x6, x7, [x29, 64] + ldp x29, x30, [x29] + + .cfi_remember_state + .cfi_restore 30 + .cfi_restore 29 + .cfi_def_cfa 31, 0 + + ret + +# This is the cleanup code called by the stack unwinder when +# unwinding through code between .LEHB0 and .LEHE0 above. +cleanup: + .cfi_restore_state + # Reuse the new stack allocation to save/restore the + # exception header + str x0, [x29, NEWSTACK_SAVE] + # size_t __generic_findstack (void *stack) + add x0, x29, MORESTACK_FRAMESIZE + bl __generic_findstack + sub x0, x29, x0 + add x0, x0, BACKOFF + # Restore split-stack guard value + mrs x1, tpidr_el0 + str x0, [x1, SPLITSTACK_PTR_TP] + ldr x0, [x29, NEWSTACK_SAVE] + b _Unwind_Resume + .cfi_endproc +END(__morestack) + + .section .gcc_except_table,"a",@progbits + .align 4 +.LLSDA1: + # @LPStart format (omit) + .byte 0xff + # @TType format (omit) + .byte 0xff + # Call-site format (uleb128) + .byte 0x1 + # Call-site table length + .uleb128 .LLSDACSE1-.LLSDACSB1 +.LLSDACSB1: + # region 0 start + .uleb128 .LEHB0-.LFB1 + # length + .uleb128 .LEHE0-.LEHB0 + # landing pad + .uleb128 cleanup-.LFB1 + # no action (ie a cleanup) + .uleb128 0 +.LLSDACSE1: + + + .global __gcc_personality_v0 +#ifdef __PIC__ + # Build a position independent reference to the personality function. + .hidden DW.ref.__gcc_personality_v0 + .weak DW.ref.__gcc_personality_v0 + .section .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat + .type DW.ref.__gcc_personality_v0, @object + .align 3 +DW.ref.__gcc_personality_v0: + .size DW.ref.__gcc_personality_v0, 8 + .quad __gcc_personality_v0 +#endif + + .section .note.GNU-stack,"",@progbits + .section .note.GNU-split-stack,"",@progbits + .section .note.GNU-no-split-stack,"",@progbits diff --git a/libgcc/config/aarch64/t-stack-aarch64 b/libgcc/config/aarch64/t-stack-aarch64 new file mode 100644 index 0000000..4babb4e --- /dev/null +++ b/libgcc/config/aarch64/t-stack-aarch64 @@ -0,0 +1,3 @@ +# Makefile fragment to support -fsplit-stack for aarch64. +LIB2ADD_ST += $(srcdir)/config/aarch64/morestack.S \ + $(srcdir)/config/aarch64/morestack-c.c diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c index adbe436..e91ceb4 100644 --- a/libgcc/generic-morestack.c +++ b/libgcc/generic-morestack.c @@ -943,6 +943,7 @@ __splitstack_find (void *segment_arg, void *sp, size_t *len, nsp -= 2 * 160; #elif defined __s390__ nsp -= 2 * 96; +#elif defined __aarch64__ #else #error "unrecognized target" #endif