diff mbox series

[ARM/FDPIC,v3,04/21,ARM] FDPIC: Add support for FDPIC for arm architecture

Message ID 20181011133518.17258-5-christophe.lyon@st.com
State Superseded
Headers show
Series FDPIC ABI for ARM | expand

Commit Message

Christophe Lyon Oct. 11, 2018, 1:34 p.m. UTC
The FDPIC register is hard-coded to r9, as defined in the ABI.

We have to disable tailcall optimizations if we don't know if the
target function is in the same module. If not, we have to set r9 to
the value associated with the target module.

When generating a symbol address, we have to take into account whether
it is a pointer to data or to a function, because different
relocations are needed.

2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>
	Mickaël Guêné <mickael.guene@st.com>

	* config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro
	in FDPIC mode.
	* config/arm/arm-protos.h (arm_load_function_descriptor): Declare
	new function.
	* config/arm/arm.c (arm_option_override): Define pic register to
	FDPIC_REGNUM.
	(arm_function_ok_for_sibcall) Disable sibcall optimization if we
	have no decl or go through PLT.
	(arm_load_pic_register): Handle TARGET_FDPIC.
	(arm_is_segment_info_known): New function.
	(arm_pic_static_addr): Add support for FDPIC.
	(arm_load_function_descriptor): New function.
	(arm_assemble_integer): Add support for FDPIC.
	* config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):
	Define. (FDPIC_REGNUM): New define.
	* config/arm/arm.md (call): Add support for FDPIC.
	(call_value): Likewise.
	(*restore_pic_register_after_call): New pattern.
	(untyped_call): Disable if FDPIC.
	(untyped_return): Likewise.
	* config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

-- 
2.6.3

Comments

Richard Earnshaw (lists) Oct. 12, 2018, 10:45 a.m. UTC | #1
On 11/10/18 14:34, Christophe Lyon wrote:
> The FDPIC register is hard-coded to r9, as defined in the ABI.

> 

> We have to disable tailcall optimizations if we don't know if the

> target function is in the same module. If not, we have to set r9 to

> the value associated with the target module.

> 

> When generating a symbol address, we have to take into account whether

> it is a pointer to data or to a function, because different

> relocations are needed.

> 

> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

> 	Mickaël Guêné <mickael.guene@st.com>

> 

> 	* config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

> 	in FDPIC mode.

> 	* config/arm/arm-protos.h (arm_load_function_descriptor): Declare

> 	new function.

> 	* config/arm/arm.c (arm_option_override): Define pic register to

> 	FDPIC_REGNUM.

> 	(arm_function_ok_for_sibcall) Disable sibcall optimization if we


Missing colon.

> 	have no decl or go through PLT.

> 	(arm_load_pic_register): Handle TARGET_FDPIC.

> 	(arm_is_segment_info_known): New function.

> 	(arm_pic_static_addr): Add support for FDPIC.

> 	(arm_load_function_descriptor): New function.

> 	(arm_assemble_integer): Add support for FDPIC.

> 	* config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

> 	Define. (FDPIC_REGNUM): New define.

> 	* config/arm/arm.md (call): Add support for FDPIC.

> 	(call_value): Likewise.

> 	(*restore_pic_register_after_call): New pattern.

> 	(untyped_call): Disable if FDPIC.

> 	(untyped_return): Likewise.

> 	* config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

> 


Other comments inline.

> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

> index 4471f79..90733cc 100644

> --- a/gcc/config/arm/arm-c.c

> +++ b/gcc/config/arm/arm-c.c

> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

>        builtin_define ("__ARM_EABI__");

>      }

>  

> +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

> +

>    def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

>    def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

>  

> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

> index 0dfb3ac..28cafa8 100644

> --- a/gcc/config/arm/arm-protos.h

> +++ b/gcc/config/arm/arm-protos.h

> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

>  extern int arm_const_double_inline_cost (rtx);

>  extern bool arm_const_double_by_parts (rtx);

>  extern bool arm_const_double_by_immediates (rtx);

> +extern rtx arm_load_function_descriptor (rtx funcdesc);

>  extern void arm_emit_call_insn (rtx, rtx, bool);

>  bool detect_cmse_nonsecure_call (tree);

>  extern const char *output_call (rtx *);

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

> index 8810df5..92ae24b 100644

> --- a/gcc/config/arm/arm.c

> +++ b/gcc/config/arm/arm.c

> @@ -3470,6 +3470,14 @@ arm_option_override (void)

>    if (flag_pic && TARGET_VXWORKS_RTP)

>      arm_pic_register = 9;

>  

> +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

> +  if (TARGET_FDPIC)

> +    {

> +      arm_pic_register = FDPIC_REGNUM;

> +      if (TARGET_ARM_ARCH < 7)

> +	error ("FDPIC mode is not supported on architectures older than Armv7");


What properties of FDPIC impose this requirement?  Does it also apply to
Armv8-m.baseline?

> +    }

> +

>    if (arm_pic_register_string != NULL)

>      {

>        int pic_register = decode_reg_name (arm_pic_register_string);

> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

>    if (cfun->machine->sibcall_blocked)

>      return false;

>  

> +  if (TARGET_FDPIC)

> +    {

> +      /* In FDPIC, never tailcall something for which we have no decl:

> +	 the target function could be in a different module, requiring

> +	 a different FDPIC register value.  */

> +      if (decl == NULL)

> +	return false;

> +

> +      /* Don't tailcall if we go through the PLT since the FDPIC

> +	 register is then corrupted and we don't restore it after

> +	 static function calls.  */

> +      if (!targetm.binds_local_p (decl))

> +	return false;

> +    }

> +

>    /* Never tailcall something if we are generating code for Thumb-1.  */

>    if (TARGET_THUMB1)

>      return false;

> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

>  {

>    rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

>  

> -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

> +  if (crtl->uses_pic_offset_table == 0

> +      || TARGET_SINGLE_PIC_BASE

> +      || TARGET_FDPIC)

>      return;

>  

>    gcc_assert (flag_pic);

> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

>    emit_use (pic_reg);

>  }

>  

> +/* Try to know if the object will go in text or data segment. This is


"Try to determine whether an object, referenced via ORIG, will be placed
in the text or data segment."
> +   used in FDPIC mode, to decide which relocations to use when

> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only


Two spaces after a period.

> +   location, false otherwise.  */


You've missed the documentation of the return value: does returning true
mean text vs data, or does it mean we know which it will go in, but
don't have to return that information here.

Generally, won't this break big time if users compile with
-ffunction-sections or -fdata-sections?  Is it sufficient to match
.text.* as being text and .data.* for data?


> +static bool

> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

> +{

> +  bool res = false;

> +

> +  *is_readonly = false;

> +

> +  if (GET_CODE (orig) == LABEL_REF)

> +    {

> +      res = true;

> +      *is_readonly = true;

> +    }

> +  else if (SYMBOL_REF_P (orig))

> +    {

> +      if (CONSTANT_POOL_ADDRESS_P (orig))

> +	{

> +	  res = true;

> +	  *is_readonly = true;

> +	}

> +      else if (SYMBOL_REF_LOCAL_P (orig)

> +	       && !SYMBOL_REF_EXTERNAL_P (orig)

> +	       && SYMBOL_REF_DECL (orig)

> +	       && (!DECL_P (SYMBOL_REF_DECL (orig))

> +		   || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

> +	{

> +	  tree decl = SYMBOL_REF_DECL (orig);

> +	  tree init = (TREE_CODE (decl) == VAR_DECL)

> +	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

> +	    ? decl : 0;

> +	  int reloc = 0;

> +	  bool named_section, readonly;

> +

> +	  if (init && init != error_mark_node)

> +	    reloc = compute_reloc_for_constant (init);

> +

> +	  named_section = TREE_CODE (decl) == VAR_DECL

> +	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

> +	  readonly = decl_readonly_section (decl, reloc);

> +

> +	  /* We don't know where the link script will put a named

> +	     section, so return false in such a case.  */

> +	  res = !named_section;

> +

> +	  if (!named_section)

> +	    *is_readonly = readonly;

> +	}

> +      else

> +	{

> +	  /* We don't know.  */

> +	  res = false;

> +	}

> +    }

> +  else

> +    gcc_unreachable ();

> +

> +  return res;

> +}

> +

>  /* Generate code to load the address of a static var when flag_pic is set.  */

>  static rtx_insn *

>  arm_pic_static_addr (rtx orig, rtx reg)

>  {

>    rtx l1, labelno, offset_rtx;

> +  rtx_insn *insn;

>  

>    gcc_assert (flag_pic);

>  

> -  /* We use an UNSPEC rather than a LABEL_REF because this label

> -     never appears in the code stream.  */

> -  labelno = GEN_INT (pic_labelno++);

> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);

> -  l1 = gen_rtx_CONST (VOIDmode, l1);

> +  bool is_readonly = false;

> +  bool info_known = false;

>  

> -  /* On the ARM the PC register contains 'dot + 8' at the time of the

> -     addition, on the Thumb it is 'dot + 4'.  */

> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

> -                               UNSPEC_SYMBOL_OFFSET);

> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> +  if (TARGET_FDPIC

> +      && SYMBOL_REF_P (orig)

> +      && !SYMBOL_REF_FUNCTION_P (orig))

> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

>  

> -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));

> +  if (TARGET_FDPIC

> +      && SYMBOL_REF_P (orig)

> +      && !SYMBOL_REF_FUNCTION_P (orig)

> +      && !info_known)

> +    {

> +      /* We don't know where orig is stored, so we have be

> +	 pessimistic and use a GOT relocation.  */

> +      rtx pat;

> +      rtx mem;

> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> +

> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

> +

> +      /* Make the MEM as close to a constant as possible.  */

> +      mem = SET_SRC (pat);

> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

> +      MEM_READONLY_P (mem) = 1;

> +      MEM_NOTRAP_P (mem) = 1;

> +

> +      insn = emit_insn (pat);

> +    }

> +  else if (TARGET_FDPIC

> +	   && SYMBOL_REF_P (orig)

> +	   && (SYMBOL_REF_FUNCTION_P (orig)

> +	       || (info_known && !is_readonly)))

> +    {

> +      /* We use the GOTOFF relocation.  */

> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> +

> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);

> +      emit_insn (gen_movsi (reg, l1));

> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

> +    }

> +  else

> +    {

> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

> +	 PC-relative access.  */

> +      /* We use an UNSPEC rather than a LABEL_REF because this label

> +	 never appears in the code stream.  */

> +      labelno = GEN_INT (pic_labelno++);

> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);

> +      l1 = gen_rtx_CONST (VOIDmode, l1);

> +

> +      /* On the ARM the PC register contains 'dot + 8' at the time of the

> +	 addition, on the Thumb it is 'dot + 4'.  */

> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

> +				   UNSPEC_SYMBOL_OFFSET);

> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> +

> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

> +						   labelno));

> +    }

> +

> +  return insn;

>  }

>  

>  /* Return nonzero if X is valid as an ARM state addressing register.  */

> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

>    return 0;

>  }

>  

> +/* Emit insns to load the function address from FUNCDESC (an FDPIC

> +   function descriptor) into a register and the GOT address into the

> +   FDPIC register, returning an rtx for the register holding the

> +   function address.  */

> +

> +rtx

> +arm_load_function_descriptor (rtx funcdesc)

> +{

> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));

> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> +

> +  emit_move_insn (fnaddr_reg, fnaddr);

> +  /* The ABI requires the entry point address to be loaded first, so

> +     prevent the load from being moved after that of the GOT

> +     address.  */

> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> +					gen_rtvec (2, pic_reg, gotaddr),

> +					UNSPEC_PIC_RESTORE);

> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM))> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG

(Pmode, FDPIC_REGNUM));

Shouldn't one of these be fnaddr_reg and the other pic_reg?

> +  emit_insn (par);

> +

> +  return fnaddr_reg;

> +}

> +

>  /* Return the maximum amount of padding that will be inserted before

>     label LABEL.  */

> -

>  static HOST_WIDE_INT

>  get_label_padding (rtx label)

>  {

> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)

>  		  && (!SYMBOL_REF_LOCAL_P (x)

>  		      || (SYMBOL_REF_DECL (x)

>  			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

> -	    fputs ("(GOT)", asm_out_file);

> +	    {

> +	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> +		fputs ("(GOTFUNCDESC)", asm_out_file);

> +	      else

> +		fputs ("(GOT)", asm_out_file);

> +	    }

>  	  else

> -	    fputs ("(GOTOFF)", asm_out_file);

> +	    {

> +	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> +		fputs ("(GOTOFFFUNCDESC)", asm_out_file);

> +	      else

> +		{

> +		  bool is_readonly;

> +

> +		  if (arm_is_segment_info_known (x, &is_readonly))

> +		    fputs ("(GOTOFF)", asm_out_file);

> +		  else

> +		    fputs ("(GOT)", asm_out_file);

> +		}

> +	    }

> +	}

> +

> +      /* For FDPIC we also have to mark symbol for .data section.  */

> +      if (TARGET_FDPIC

> +	  && NEED_GOT_RELOC

> +	  && flag_pic

> +	  && !making_const_table

> +	  && SYMBOL_REF_P (x))

> +	{

> +	  if (SYMBOL_REF_FUNCTION_P (x))

> +	    fputs ("(FUNCDESC)", asm_out_file);

>  	}

>        fputc ('\n', asm_out_file);

>        return true;

> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

> index 34894c0..4671d64 100644

> --- a/gcc/config/arm/arm.h

> +++ b/gcc/config/arm/arm.h

> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

>     Pascal), so the following is not true.  */

>  #define STATIC_CHAIN_REGNUM	12

>  

> +/* r9 is the FDPIC register (base register for GOT and FUNCDESC accesses).  */

> +#define FDPIC_REGNUM		9

> +

>  /* Define this to be where the real frame pointer is if it is not possible to

>     work out the offset between the frame pointer and the automatic variables

>     until after register allocation has taken place.  FRAME_POINTER_REGNUM

> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

>     data addresses in memory.  */

>  #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

>  

> +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

> +   entries would need to handle saving and restoring it).  */

> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

> +

>  /* We can't directly access anything that contains a symbol,

>     nor can we indirect via the constant pool.  One exception is

>     UNSPEC_TLS, which is always PIC.  */

> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

> index 270b8e4..09a0701 100644

> --- a/gcc/config/arm/arm.md

> +++ b/gcc/config/arm/arm.md

> @@ -8031,6 +8031,23 @@

>      rtx callee, pat;

>      tree addr = MEM_EXPR (operands[0]);

>      

> +    /* Force FDPIC register (r9) before call.  */

> +    if (TARGET_FDPIC)

> +      {

> +	/* No need to update r9 if calling a static function.

> +	   In other words: set r9 for indirect or non-local calls.  */

> +	callee = XEXP (operands[0], 0);

> +	if (!SYMBOL_REF_P (callee)

> +	    || !SYMBOL_REF_LOCAL_P (callee)

> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> +	  {

> +	    emit_insn (gen_blockage ());

> +	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> +	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));

> +	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> +	 }

> +      }

> +

>      /* In an untyped call, we can get NULL for operand 2.  */

>      if (operands[2] == NULL_RTX)

>        operands[2] = const0_rtx;

> @@ -8044,6 +8061,13 @@

>  	: !REG_P (callee))

>        XEXP (operands[0], 0) = force_reg (Pmode, callee);

>  

> +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

> +      {

> +	/* Indirect call: set r9 with FDPIC value of callee.  */

> +	XEXP (operands[0], 0)

> +	  = arm_load_function_descriptor (XEXP (operands[0], 0));

> +      }

> +

>      if (detect_cmse_nonsecure_call (addr))

>        {

>  	pat = gen_nonsecure_call_internal (operands[0], operands[1],

> @@ -8055,10 +8079,38 @@

>  	pat = gen_call_internal (operands[0], operands[1], operands[2]);

>  	arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

>        }

> +

> +    /* Restore FDPIC register (r9) after call.  */

> +    if (TARGET_FDPIC)

> +      {

> +	/* No need to update r9 if calling a static function.  */

> +	if (!SYMBOL_REF_P (callee)

> +	    || !SYMBOL_REF_LOCAL_P (callee)

> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> +	  {

> +	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> +	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));

> +	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> +	    emit_insn (gen_blockage ());

> +	  }

> +      }

>      DONE;

>    }"

>  )

>  

> +(define_insn "*restore_pic_register_after_call"

> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

> +		       (match_operand:SI 1 "nonimmediate_operand" "r,m")]

> +	       UNSPEC_PIC_RESTORE)

> +	      (use (match_dup 0))

> +	      (clobber (match_dup 0))])

> +  ]

> +  ""

> +  "@

> +  mov\t%0, %1

> +  ldr\t%0, %1"

> +)

> +

>  (define_expand "call_internal"

>    [(parallel [(call (match_operand 0 "memory_operand" "")

>  	            (match_operand 1 "general_operand" ""))

> @@ -8119,6 +8171,30 @@

>      rtx pat, callee;

>      tree addr = MEM_EXPR (operands[1]);

>      

> +    /* Force FDPIC register (r9) before call.  */

> +    if (TARGET_FDPIC)

> +      {

> +	/* No need to update the FDPIC register (r9) if calling a static function.

> +	   In other words: set r9 for indirect or non-local calls.  */

> +	callee = XEXP (operands[1], 0);

> +	if (!SYMBOL_REF_P (callee)

> +	    || !SYMBOL_REF_LOCAL_P (callee)

> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> +	  {

> +	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> +

> +	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> +		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> +			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> +		UNSPEC_PIC_RESTORE);

> +	    XVECEXP (par, 0, 1)

> +	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> +	    XVECEXP (par, 0, 2)

> +	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));


Again, this looks suspicious.

> +	    emit_insn (par);

> +	  }

> +      }

> +

>      /* In an untyped call, we can get NULL for operand 2.  */

>      if (operands[3] == 0)

>        operands[3] = const0_rtx;

> @@ -8132,6 +8208,14 @@

>  	: !REG_P (callee))

>        XEXP (operands[1], 0) = force_reg (Pmode, callee);

>  

> +    if (TARGET_FDPIC

> +	&& !SYMBOL_REF_P (XEXP (operands[1], 0)))

> +      {

> +	/* Indirect call: set r9 with FDPIC value of callee.  */

> +	XEXP (operands[1], 0)

> +	  = arm_load_function_descriptor (XEXP (operands[1], 0));

> +      }

> +

>      if (detect_cmse_nonsecure_call (addr))

>        {

>  	pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

> @@ -8144,6 +8228,28 @@

>  				       operands[2], operands[3]);

>  	arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

>        }

> +    /* Restore FDPIC register (r9) after call.  */

> +    if (TARGET_FDPIC)

> +      {

> +	/* No need to update r9 if calling a static function.  */

> +	if (!SYMBOL_REF_P (callee)

> +	    || !SYMBOL_REF_LOCAL_P (callee)

> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> +	  {

> +	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> +

> +	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> +		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> +			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> +		UNSPEC_PIC_RESTORE);

> +	    XVECEXP (par, 0, 1)

> +	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> +	    XVECEXP (par, 0, 2)

> +	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));


And again.

> +	    emit_insn (par);

> +	  }

> +      }

> +

>      DONE;

>    }"

>  )

> @@ -8486,7 +8592,7 @@

>  		    (const_int 0))

>  	      (match_operand 1 "" "")

>  	      (match_operand 2 "" "")])]

> -  "TARGET_EITHER"

> +  "TARGET_EITHER && !TARGET_FDPIC"

>    "

>    {

>      int i;

> @@ -8553,7 +8659,7 @@

>  (define_expand "untyped_return"

>    [(match_operand:BLK 0 "memory_operand" "")

>     (match_operand 1 "" "")]

> -  "TARGET_EITHER"

> +  "TARGET_EITHER && !TARGET_FDPIC"

>    "

>    {

>      int i;

> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

> index 1941673..349ae0e 100644

> --- a/gcc/config/arm/unspecs.md

> +++ b/gcc/config/arm/unspecs.md

> @@ -86,6 +86,7 @@

>    UNSPEC_PROBE_STACK    ; Probe stack memory reference

>    UNSPEC_NONSECURE_MEM	; Represent non-secure memory in ARMv8-M with

>  			; security extension

> +  UNSPEC_PIC_RESTORE	; Use to restore fdpic register

>  ])

>  

>  (define_c_enum "unspec" [

>
Christophe Lyon Oct. 19, 2018, 1:40 p.m. UTC | #2
On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:
> On 11/10/18 14:34, Christophe Lyon wrote:

>> The FDPIC register is hard-coded to r9, as defined in the ABI.

>>

>> We have to disable tailcall optimizations if we don't know if the

>> target function is in the same module. If not, we have to set r9 to

>> the value associated with the target module.

>>

>> When generating a symbol address, we have to take into account whether

>> it is a pointer to data or to a function, because different

>> relocations are needed.

>>

>> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

>> 	Mickaël Guêné <mickael.guene@st.com>

>>

>> 	* config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

>> 	in FDPIC mode.

>> 	* config/arm/arm-protos.h (arm_load_function_descriptor): Declare

>> 	new function.

>> 	* config/arm/arm.c (arm_option_override): Define pic register to

>> 	FDPIC_REGNUM.

>> 	(arm_function_ok_for_sibcall) Disable sibcall optimization if we

> 

> Missing colon.

> 

>> 	have no decl or go through PLT.

>> 	(arm_load_pic_register): Handle TARGET_FDPIC.

>> 	(arm_is_segment_info_known): New function.

>> 	(arm_pic_static_addr): Add support for FDPIC.

>> 	(arm_load_function_descriptor): New function.

>> 	(arm_assemble_integer): Add support for FDPIC.

>> 	* config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

>> 	Define. (FDPIC_REGNUM): New define.

>> 	* config/arm/arm.md (call): Add support for FDPIC.

>> 	(call_value): Likewise.

>> 	(*restore_pic_register_after_call): New pattern.

>> 	(untyped_call): Disable if FDPIC.

>> 	(untyped_return): Likewise.

>> 	* config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

>>

> 

> Other comments inline.

> 

>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

>> index 4471f79..90733cc 100644

>> --- a/gcc/config/arm/arm-c.c

>> +++ b/gcc/config/arm/arm-c.c

>> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

>>         builtin_define ("__ARM_EABI__");

>>       }

>>   

>> +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

>> +

>>     def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

>>     def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

>>   

>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

>> index 0dfb3ac..28cafa8 100644

>> --- a/gcc/config/arm/arm-protos.h

>> +++ b/gcc/config/arm/arm-protos.h

>> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

>>   extern int arm_const_double_inline_cost (rtx);

>>   extern bool arm_const_double_by_parts (rtx);

>>   extern bool arm_const_double_by_immediates (rtx);

>> +extern rtx arm_load_function_descriptor (rtx funcdesc);

>>   extern void arm_emit_call_insn (rtx, rtx, bool);

>>   bool detect_cmse_nonsecure_call (tree);

>>   extern const char *output_call (rtx *);

>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

>> index 8810df5..92ae24b 100644

>> --- a/gcc/config/arm/arm.c

>> +++ b/gcc/config/arm/arm.c

>> @@ -3470,6 +3470,14 @@ arm_option_override (void)

>>     if (flag_pic && TARGET_VXWORKS_RTP)

>>       arm_pic_register = 9;

>>   

>> +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

>> +  if (TARGET_FDPIC)

>> +    {

>> +      arm_pic_register = FDPIC_REGNUM;

>> +      if (TARGET_ARM_ARCH < 7)

>> +	error ("FDPIC mode is not supported on architectures older than Armv7");

> 

> What properties of FDPIC impose this requirement?  Does it also apply to

> Armv8-m.baseline?

> 

In fact, there was miscommunication on my side, resulting in a misunderstanding between Kyrill and myself, which I badly translated into this condition.

My initial plan was to submit a patch series tested on v7, and send the patches needed to support older architectures as a follow-up. The proper restriction is actually "CPUs that do not support ARM or Thumb2". As you may have noticed during the iterations of this patch series, I had failed to remove partial Thumb1 support hunks.

So really this should be rephrased, and rewritten as "FDPIC mode is supported on architecture versions that support ARM or Thumb-2", if that suits you. And the condition should thus be:
if (! TARGET_ARM && ! TARGET_THUMB2)
   error ("...")

This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.
As a side note, I tried to build GCC master (without my patches) --with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi failed to buid.

For arm-eabi, there are problems in newlib:
newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add sl,r2,#256'
newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub ip,sp,ip'

For arm-linux-gnueabi, the failure happens while building libgcc:
/home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169: Error: selected processor does not support ARM opcodes
/newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an ARM instruction on a Thumb-only processor -- `stmea a1!,{ v1-v7,fp,ip,sp,lr }'
/newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an ARM instruction on a Thumb-only processor -- `mov a1,#0'
/newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an ARM instruction on a Thumb-only processor -- `tst lr,#1'
/newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an ARM instruction on a Thumb-only processor -- `moveq pc,lr'
/newlib/newlib/libc/machine/arm/setjmp.S:194: Error: selected processor does not support ARM opcodes
/newlib/newlib/libc/machine/arm/setjmp.S:203: Error: attempt to use an ARM instruction on a Thumb-only processor -- `ldmfd a1!,{ v1-v7,fp,ip,sp,lr }'
/newlib/newlib/libc/machine/arm/setjmp.S:214: Error: attempt to use an ARM instruction on a Thumb-only processor -- `movs a1,a2'
/newlib/newlib/libc/machine/arm/setjmp.S:218: Error: attempt to use an ARM instruction on a Thumb-only processor -- `moveq a1,#1'
/newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an ARM instruction on a Thumb-only processor -- `tst lr,#1'
/newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an ARM instruction on a Thumb-only processor -- `moveq pc,lr'


>> +    }

>> +

>>     if (arm_pic_register_string != NULL)

>>       {

>>         int pic_register = decode_reg_name (arm_pic_register_string);

>> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

>>     if (cfun->machine->sibcall_blocked)

>>       return false;

>>   

>> +  if (TARGET_FDPIC)

>> +    {

>> +      /* In FDPIC, never tailcall something for which we have no decl:

>> +	 the target function could be in a different module, requiring

>> +	 a different FDPIC register value.  */

>> +      if (decl == NULL)

>> +	return false;

>> +

>> +      /* Don't tailcall if we go through the PLT since the FDPIC

>> +	 register is then corrupted and we don't restore it after

>> +	 static function calls.  */

>> +      if (!targetm.binds_local_p (decl))

>> +	return false;

>> +    }

>> +

>>     /* Never tailcall something if we are generating code for Thumb-1.  */

>>     if (TARGET_THUMB1)

>>       return false;

>> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

>>   {

>>     rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

>>   

>> -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

>> +  if (crtl->uses_pic_offset_table == 0

>> +      || TARGET_SINGLE_PIC_BASE

>> +      || TARGET_FDPIC)

>>       return;

>>   

>>     gcc_assert (flag_pic);

>> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

>>     emit_use (pic_reg);

>>   }

>>   

>> +/* Try to know if the object will go in text or data segment. This is

> 

> "Try to determine whether an object, referenced via ORIG, will be placed

> in the text or data segment."

>> +   used in FDPIC mode, to decide which relocations to use when

>> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only

> 

> Two spaces after a period.

> 

>> +   location, false otherwise.  */

> 

> You've missed the documentation of the return value: does returning true

> mean text vs data, or does it mean we know which it will go in, but

> don't have to return that information here.

> 

> Generally, won't this break big time if users compile with

> -ffunction-sections or -fdata-sections?  Is it sufficient to match

> .text.* as being text and .data.* for data?

> 


I compiled a small testcase with -ffunction-sections and -fdata-sections and noticed no problem.
The code below does not attempt to match section names, I'm not sure to understand your question?

> 

>> +static bool

>> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

>> +{

>> +  bool res = false;

>> +

>> +  *is_readonly = false;

>> +

>> +  if (GET_CODE (orig) == LABEL_REF)

>> +    {

>> +      res = true;

>> +      *is_readonly = true;

>> +    }

>> +  else if (SYMBOL_REF_P (orig))

>> +    {

>> +      if (CONSTANT_POOL_ADDRESS_P (orig))

>> +	{

>> +	  res = true;

>> +	  *is_readonly = true;

>> +	}

>> +      else if (SYMBOL_REF_LOCAL_P (orig)

>> +	       && !SYMBOL_REF_EXTERNAL_P (orig)

>> +	       && SYMBOL_REF_DECL (orig)

>> +	       && (!DECL_P (SYMBOL_REF_DECL (orig))

>> +		   || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

>> +	{

>> +	  tree decl = SYMBOL_REF_DECL (orig);

>> +	  tree init = (TREE_CODE (decl) == VAR_DECL)

>> +	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

>> +	    ? decl : 0;

>> +	  int reloc = 0;

>> +	  bool named_section, readonly;

>> +

>> +	  if (init && init != error_mark_node)

>> +	    reloc = compute_reloc_for_constant (init);

>> +

>> +	  named_section = TREE_CODE (decl) == VAR_DECL

>> +	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

>> +	  readonly = decl_readonly_section (decl, reloc);

>> +

>> +	  /* We don't know where the link script will put a named

>> +	     section, so return false in such a case.  */

>> +	  res = !named_section;

>> +

>> +	  if (!named_section)

>> +	    *is_readonly = readonly;

>> +	}

>> +      else

>> +	{

>> +	  /* We don't know.  */

>> +	  res = false;

>> +	}

>> +    }

>> +  else

>> +    gcc_unreachable ();

>> +

>> +  return res;

>> +}

>> +

>>   /* Generate code to load the address of a static var when flag_pic is set.  */

>>   static rtx_insn *

>>   arm_pic_static_addr (rtx orig, rtx reg)

>>   {

>>     rtx l1, labelno, offset_rtx;

>> +  rtx_insn *insn;

>>   

>>     gcc_assert (flag_pic);

>>   

>> -  /* We use an UNSPEC rather than a LABEL_REF because this label

>> -     never appears in the code stream.  */

>> -  labelno = GEN_INT (pic_labelno++);

>> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);

>> -  l1 = gen_rtx_CONST (VOIDmode, l1);

>> +  bool is_readonly = false;

>> +  bool info_known = false;

>>   

>> -  /* On the ARM the PC register contains 'dot + 8' at the time of the

>> -     addition, on the Thumb it is 'dot + 4'.  */

>> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

>> -                               UNSPEC_SYMBOL_OFFSET);

>> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>> +  if (TARGET_FDPIC

>> +      && SYMBOL_REF_P (orig)

>> +      && !SYMBOL_REF_FUNCTION_P (orig))

>> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

>>   

>> -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));

>> +  if (TARGET_FDPIC

>> +      && SYMBOL_REF_P (orig)

>> +      && !SYMBOL_REF_FUNCTION_P (orig)

>> +      && !info_known)

>> +    {

>> +      /* We don't know where orig is stored, so we have be

>> +	 pessimistic and use a GOT relocation.  */

>> +      rtx pat;

>> +      rtx mem;

>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>> +

>> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

>> +

>> +      /* Make the MEM as close to a constant as possible.  */

>> +      mem = SET_SRC (pat);

>> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

>> +      MEM_READONLY_P (mem) = 1;

>> +      MEM_NOTRAP_P (mem) = 1;

>> +

>> +      insn = emit_insn (pat);

>> +    }

>> +  else if (TARGET_FDPIC

>> +	   && SYMBOL_REF_P (orig)

>> +	   && (SYMBOL_REF_FUNCTION_P (orig)

>> +	       || (info_known && !is_readonly)))

>> +    {

>> +      /* We use the GOTOFF relocation.  */

>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>> +

>> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);

>> +      emit_insn (gen_movsi (reg, l1));

>> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

>> +    }

>> +  else

>> +    {

>> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

>> +	 PC-relative access.  */

>> +      /* We use an UNSPEC rather than a LABEL_REF because this label

>> +	 never appears in the code stream.  */

>> +      labelno = GEN_INT (pic_labelno++);

>> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);

>> +      l1 = gen_rtx_CONST (VOIDmode, l1);

>> +

>> +      /* On the ARM the PC register contains 'dot + 8' at the time of the

>> +	 addition, on the Thumb it is 'dot + 4'.  */

>> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

>> +				   UNSPEC_SYMBOL_OFFSET);

>> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>> +

>> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

>> +						   labelno));

>> +    }

>> +

>> +  return insn;

>>   }

>>   

>>   /* Return nonzero if X is valid as an ARM state addressing register.  */

>> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

>>     return 0;

>>   }

>>   

>> +/* Emit insns to load the function address from FUNCDESC (an FDPIC

>> +   function descriptor) into a register and the GOT address into the

>> +   FDPIC register, returning an rtx for the register holding the

>> +   function address.  */

>> +

>> +rtx

>> +arm_load_function_descriptor (rtx funcdesc)

>> +{

>> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

>> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

>> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));

>> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>> +

>> +  emit_move_insn (fnaddr_reg, fnaddr);

>> +  /* The ABI requires the entry point address to be loaded first, so

>> +     prevent the load from being moved after that of the GOT

>> +     address.  */

>> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>> +					gen_rtvec (2, pic_reg, gotaddr),

>> +					UNSPEC_PIC_RESTORE);

>> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM))

>> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> 

> Shouldn't one of these be fnaddr_reg and the other pic_reg?

I think the USE should be gotaddr, and CLOBBER should be pic_reg, thanks.

> 

>> +  emit_insn (par);

>> +

>> +  return fnaddr_reg;

>> +}

>> +

>>   /* Return the maximum amount of padding that will be inserted before

>>      label LABEL.  */

>> -

>>   static HOST_WIDE_INT

>>   get_label_padding (rtx label)

>>   {

>> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)

>>   		  && (!SYMBOL_REF_LOCAL_P (x)

>>   		      || (SYMBOL_REF_DECL (x)

>>   			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

>> -	    fputs ("(GOT)", asm_out_file);

>> +	    {

>> +	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>> +		fputs ("(GOTFUNCDESC)", asm_out_file);

>> +	      else

>> +		fputs ("(GOT)", asm_out_file);

>> +	    }

>>   	  else

>> -	    fputs ("(GOTOFF)", asm_out_file);

>> +	    {

>> +	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>> +		fputs ("(GOTOFFFUNCDESC)", asm_out_file);

>> +	      else

>> +		{

>> +		  bool is_readonly;

>> +

>> +		  if (arm_is_segment_info_known (x, &is_readonly))

>> +		    fputs ("(GOTOFF)", asm_out_file);

>> +		  else

>> +		    fputs ("(GOT)", asm_out_file);

>> +		}

>> +	    }

>> +	}

>> +

>> +      /* For FDPIC we also have to mark symbol for .data section.  */

>> +      if (TARGET_FDPIC

>> +	  && NEED_GOT_RELOC

>> +	  && flag_pic

>> +	  && !making_const_table

>> +	  && SYMBOL_REF_P (x))

>> +	{

>> +	  if (SYMBOL_REF_FUNCTION_P (x))

>> +	    fputs ("(FUNCDESC)", asm_out_file);

>>   	}

>>         fputc ('\n', asm_out_file);

>>         return true;

>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

>> index 34894c0..4671d64 100644

>> --- a/gcc/config/arm/arm.h

>> +++ b/gcc/config/arm/arm.h

>> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

>>      Pascal), so the following is not true.  */

>>   #define STATIC_CHAIN_REGNUM	12

>>   

>> +/* r9 is the FDPIC register (base register for GOT and FUNCDESC accesses).  */

>> +#define FDPIC_REGNUM		9

>> +

>>   /* Define this to be where the real frame pointer is if it is not possible to

>>      work out the offset between the frame pointer and the automatic variables

>>      until after register allocation has taken place.  FRAME_POINTER_REGNUM

>> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

>>      data addresses in memory.  */

>>   #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

>>   

>> +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

>> +   entries would need to handle saving and restoring it).  */

>> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

>> +

>>   /* We can't directly access anything that contains a symbol,

>>      nor can we indirect via the constant pool.  One exception is

>>      UNSPEC_TLS, which is always PIC.  */

>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

>> index 270b8e4..09a0701 100644

>> --- a/gcc/config/arm/arm.md

>> +++ b/gcc/config/arm/arm.md

>> @@ -8031,6 +8031,23 @@

>>       rtx callee, pat;

>>       tree addr = MEM_EXPR (operands[0]);

>>       

>> +    /* Force FDPIC register (r9) before call.  */

>> +    if (TARGET_FDPIC)

>> +      {

>> +	/* No need to update r9 if calling a static function.

>> +	   In other words: set r9 for indirect or non-local calls.  */

>> +	callee = XEXP (operands[0], 0);

>> +	if (!SYMBOL_REF_P (callee)

>> +	    || !SYMBOL_REF_LOCAL_P (callee)

>> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>> +	  {

>> +	    emit_insn (gen_blockage ());

>> +	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>> +	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));

>> +	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>> +	 }

>> +      }

>> +

>>       /* In an untyped call, we can get NULL for operand 2.  */

>>       if (operands[2] == NULL_RTX)

>>         operands[2] = const0_rtx;

>> @@ -8044,6 +8061,13 @@

>>   	: !REG_P (callee))

>>         XEXP (operands[0], 0) = force_reg (Pmode, callee);

>>   

>> +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

>> +      {

>> +	/* Indirect call: set r9 with FDPIC value of callee.  */

>> +	XEXP (operands[0], 0)

>> +	  = arm_load_function_descriptor (XEXP (operands[0], 0));

>> +      }

>> +

>>       if (detect_cmse_nonsecure_call (addr))

>>         {

>>   	pat = gen_nonsecure_call_internal (operands[0], operands[1],

>> @@ -8055,10 +8079,38 @@

>>   	pat = gen_call_internal (operands[0], operands[1], operands[2]);

>>   	arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

>>         }

>> +

>> +    /* Restore FDPIC register (r9) after call.  */

>> +    if (TARGET_FDPIC)

>> +      {

>> +	/* No need to update r9 if calling a static function.  */

>> +	if (!SYMBOL_REF_P (callee)

>> +	    || !SYMBOL_REF_LOCAL_P (callee)

>> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>> +	  {

>> +	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>> +	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));

>> +	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>> +	    emit_insn (gen_blockage ());

>> +	  }

>> +      }

>>       DONE;

>>     }"

>>   )

>>   

>> +(define_insn "*restore_pic_register_after_call"

>> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

>> +		       (match_operand:SI 1 "nonimmediate_operand" "r,m")]

>> +	       UNSPEC_PIC_RESTORE)

>> +	      (use (match_dup 0))

>> +	      (clobber (match_dup 0))])

>> +  ]

>> +  ""

>> +  "@

>> +  mov\t%0, %1

>> +  ldr\t%0, %1"

>> +)

>> +

>>   (define_expand "call_internal"

>>     [(parallel [(call (match_operand 0 "memory_operand" "")

>>   	            (match_operand 1 "general_operand" ""))

>> @@ -8119,6 +8171,30 @@

>>       rtx pat, callee;

>>       tree addr = MEM_EXPR (operands[1]);

>>       

>> +    /* Force FDPIC register (r9) before call.  */

>> +    if (TARGET_FDPIC)

>> +      {

>> +	/* No need to update the FDPIC register (r9) if calling a static function.

>> +	   In other words: set r9 for indirect or non-local calls.  */

>> +	callee = XEXP (operands[1], 0);

>> +	if (!SYMBOL_REF_P (callee)

>> +	    || !SYMBOL_REF_LOCAL_P (callee)

>> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>> +	  {

>> +	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>> +

>> +	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>> +		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>> +			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>> +		UNSPEC_PIC_RESTORE);

>> +	    XVECEXP (par, 0, 1)

>> +	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>> +	    XVECEXP (par, 0, 2)

>> +	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> 

> Again, this looks suspicious.

> 

Yes, fixed for follow-up patch, with
USE for get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)
CLOBBER for gen_rtx_REG (Pmode, FDPIC_REGNUM)

>> +	    emit_insn (par);

>> +	  }

>> +      }

>> +

>>       /* In an untyped call, we can get NULL for operand 2.  */

>>       if (operands[3] == 0)

>>         operands[3] = const0_rtx;

>> @@ -8132,6 +8208,14 @@

>>   	: !REG_P (callee))

>>         XEXP (operands[1], 0) = force_reg (Pmode, callee);

>>   

>> +    if (TARGET_FDPIC

>> +	&& !SYMBOL_REF_P (XEXP (operands[1], 0)))

>> +      {

>> +	/* Indirect call: set r9 with FDPIC value of callee.  */

>> +	XEXP (operands[1], 0)

>> +	  = arm_load_function_descriptor (XEXP (operands[1], 0));

>> +      }

>> +

>>       if (detect_cmse_nonsecure_call (addr))

>>         {

>>   	pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

>> @@ -8144,6 +8228,28 @@

>>   				       operands[2], operands[3]);

>>   	arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

>>         }

>> +    /* Restore FDPIC register (r9) after call.  */

>> +    if (TARGET_FDPIC)

>> +      {

>> +	/* No need to update r9 if calling a static function.  */

>> +	if (!SYMBOL_REF_P (callee)

>> +	    || !SYMBOL_REF_LOCAL_P (callee)

>> +	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>> +	  {

>> +	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>> +

>> +	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>> +		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>> +			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>> +		UNSPEC_PIC_RESTORE);

>> +	    XVECEXP (par, 0, 1)

>> +	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>> +	    XVECEXP (par, 0, 2)

>> +	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> 

> And again.

Yes

> 

>> +	    emit_insn (par);

>> +	  }

>> +      }

>> +

>>       DONE;

>>     }"

>>   )

>> @@ -8486,7 +8592,7 @@

>>   		    (const_int 0))

>>   	      (match_operand 1 "" "")

>>   	      (match_operand 2 "" "")])]

>> -  "TARGET_EITHER"

>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>     "

>>     {

>>       int i;

>> @@ -8553,7 +8659,7 @@

>>   (define_expand "untyped_return"

>>     [(match_operand:BLK 0 "memory_operand" "")

>>      (match_operand 1 "" "")]

>> -  "TARGET_EITHER"

>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>     "

>>     {

>>       int i;

>> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

>> index 1941673..349ae0e 100644

>> --- a/gcc/config/arm/unspecs.md

>> +++ b/gcc/config/arm/unspecs.md

>> @@ -86,6 +86,7 @@

>>     UNSPEC_PROBE_STACK    ; Probe stack memory reference

>>     UNSPEC_NONSECURE_MEM	; Represent non-secure memory in ARMv8-M with

>>   			; security extension

>> +  UNSPEC_PIC_RESTORE	; Use to restore fdpic register

>>   ])

>>   

>>   (define_c_enum "unspec" [

>>

> 

> .

>
Richard Earnshaw (lists) Oct. 23, 2018, 2:07 p.m. UTC | #3
On 19/10/2018 14:40, Christophe Lyon wrote:
> On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:

>> On 11/10/18 14:34, Christophe Lyon wrote:

>>> The FDPIC register is hard-coded to r9, as defined in the ABI.

>>>

>>> We have to disable tailcall optimizations if we don't know if the

>>> target function is in the same module. If not, we have to set r9 to

>>> the value associated with the target module.

>>>

>>> When generating a symbol address, we have to take into account whether

>>> it is a pointer to data or to a function, because different

>>> relocations are needed.

>>>

>>> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

>>>     Mickaël Guêné <mickael.guene@st.com>

>>>

>>>     * config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

>>>     in FDPIC mode.

>>>     * config/arm/arm-protos.h (arm_load_function_descriptor): Declare

>>>     new function.

>>>     * config/arm/arm.c (arm_option_override): Define pic register to

>>>     FDPIC_REGNUM.

>>>     (arm_function_ok_for_sibcall) Disable sibcall optimization if we

>>

>> Missing colon.

>>

>>>     have no decl or go through PLT.

>>>     (arm_load_pic_register): Handle TARGET_FDPIC.

>>>     (arm_is_segment_info_known): New function.

>>>     (arm_pic_static_addr): Add support for FDPIC.

>>>     (arm_load_function_descriptor): New function.

>>>     (arm_assemble_integer): Add support for FDPIC.

>>>     * config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

>>>     Define. (FDPIC_REGNUM): New define.

>>>     * config/arm/arm.md (call): Add support for FDPIC.

>>>     (call_value): Likewise.

>>>     (*restore_pic_register_after_call): New pattern.

>>>     (untyped_call): Disable if FDPIC.

>>>     (untyped_return): Likewise.

>>>     * config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

>>>

>>

>> Other comments inline.

>>

>>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

>>> index 4471f79..90733cc 100644

>>> --- a/gcc/config/arm/arm-c.c

>>> +++ b/gcc/config/arm/arm-c.c

>>> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

>>>         builtin_define ("__ARM_EABI__");

>>>       }

>>>   +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

>>> +

>>>     def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

>>>     def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

>>>   diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

>>> index 0dfb3ac..28cafa8 100644

>>> --- a/gcc/config/arm/arm-protos.h

>>> +++ b/gcc/config/arm/arm-protos.h

>>> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

>>>   extern int arm_const_double_inline_cost (rtx);

>>>   extern bool arm_const_double_by_parts (rtx);

>>>   extern bool arm_const_double_by_immediates (rtx);

>>> +extern rtx arm_load_function_descriptor (rtx funcdesc);

>>>   extern void arm_emit_call_insn (rtx, rtx, bool);

>>>   bool detect_cmse_nonsecure_call (tree);

>>>   extern const char *output_call (rtx *);

>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

>>> index 8810df5..92ae24b 100644

>>> --- a/gcc/config/arm/arm.c

>>> +++ b/gcc/config/arm/arm.c

>>> @@ -3470,6 +3470,14 @@ arm_option_override (void)

>>>     if (flag_pic && TARGET_VXWORKS_RTP)

>>>       arm_pic_register = 9;

>>>   +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

>>> +  if (TARGET_FDPIC)

>>> +    {

>>> +      arm_pic_register = FDPIC_REGNUM;

>>> +      if (TARGET_ARM_ARCH < 7)

>>> +    error ("FDPIC mode is not supported on architectures older than

>>> Armv7");

>>

>> What properties of FDPIC impose this requirement?  Does it also apply to

>> Armv8-m.baseline?

>>

> In fact, there was miscommunication on my side, resulting in a

> misunderstanding between Kyrill and myself, which I badly translated

> into this condition.

> 

> My initial plan was to submit a patch series tested on v7, and send the

> patches needed to support older architectures as a follow-up. The proper

> restriction is actually "CPUs that do not support ARM or Thumb2". As you

> may have noticed during the iterations of this patch series, I had

> failed to remove partial Thumb1 support hunks.

> 

> So really this should be rephrased, and rewritten as "FDPIC mode is

> supported on architecture versions that support ARM or Thumb-2", if that

> suits you. And the condition should thus be:

> if (! TARGET_ARM && ! TARGET_THUMB2)

>   error ("...")

> 

> This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.


When we get to v8-m.baseline the thumb1/2 distinction starts to become a
lot more blurred.  A lot of thumb2 features needed for stand-alone
systems are then available.  So what feature is it that you require in
order to make fdpic work in (traditional) thumb2 that isn't in
(traditional) thumb1?



> As a side note, I tried to build GCC master (without my patches)

> --with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi

> failed to buid.

> 

> For arm-eabi, there are problems in newlib:

> newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add

> sl,r2,#256'

> newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub

> ip,sp,ip'

> 


These all sound like basic CPU detection issues in newlib and need to be
fixed at some point (it's probably still using some pre-ACLE macros to
detect system capabilities).

R.

> For arm-linux-gnueabi, the failure happens while building libgcc:

> /home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169:

> Error: selected processor does not support ARM opcodes

> /newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `stmea a1!,{

> v1-v7,fp,ip,sp,lr }'

> /newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `mov a1,#0'

> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `tst lr,#1'

> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> /newlib/newlib/libc/machine/arm/setjmp.S:194: Error: selected processor

> does not support ARM opcodes

> /newlib/newlib/libc/machine/arm/setjmp.S:203: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `ldmfd a1!,{

> v1-v7,fp,ip,sp,lr }'

> /newlib/newlib/libc/machine/arm/setjmp.S:214: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `movs a1,a2'

> /newlib/newlib/libc/machine/arm/setjmp.S:218: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `moveq a1,#1'

> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `tst lr,#1'

> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> 

> 

>>> +    }

>>> +

>>>     if (arm_pic_register_string != NULL)

>>>       {

>>>         int pic_register = decode_reg_name (arm_pic_register_string);

>>> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

>>>     if (cfun->machine->sibcall_blocked)

>>>       return false;

>>>   +  if (TARGET_FDPIC)

>>> +    {

>>> +      /* In FDPIC, never tailcall something for which we have no decl:

>>> +     the target function could be in a different module, requiring

>>> +     a different FDPIC register value.  */

>>> +      if (decl == NULL)

>>> +    return false;

>>> +

>>> +      /* Don't tailcall if we go through the PLT since the FDPIC

>>> +     register is then corrupted and we don't restore it after

>>> +     static function calls.  */

>>> +      if (!targetm.binds_local_p (decl))

>>> +    return false;

>>> +    }

>>> +

>>>     /* Never tailcall something if we are generating code for

>>> Thumb-1.  */

>>>     if (TARGET_THUMB1)

>>>       return false;

>>> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs

>>> ATTRIBUTE_UNUSED)

>>>   {

>>>     rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

>>>   -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

>>> +  if (crtl->uses_pic_offset_table == 0

>>> +      || TARGET_SINGLE_PIC_BASE

>>> +      || TARGET_FDPIC)

>>>       return;

>>>       gcc_assert (flag_pic);

>>> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long

>>> saved_regs ATTRIBUTE_UNUSED)

>>>     emit_use (pic_reg);

>>>   }

>>>   +/* Try to know if the object will go in text or data segment. This is

>>

>> "Try to determine whether an object, referenced via ORIG, will be placed

>> in the text or data segment."

>>> +   used in FDPIC mode, to decide which relocations to use when

>>> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only

>>

>> Two spaces after a period.

>>

>>> +   location, false otherwise.  */

>>

>> You've missed the documentation of the return value: does returning true

>> mean text vs data, or does it mean we know which it will go in, but

>> don't have to return that information here.

>>

>> Generally, won't this break big time if users compile with

>> -ffunction-sections or -fdata-sections?  Is it sufficient to match

>> .text.* as being text and .data.* for data?

>>

> 

> I compiled a small testcase with -ffunction-sections and -fdata-sections

> and noticed no problem.

> The code below does not attempt to match section names, I'm not sure to

> understand your question?

> 

>>

>>> +static bool

>>> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

>>> +{

>>> +  bool res = false;

>>> +

>>> +  *is_readonly = false;

>>> +

>>> +  if (GET_CODE (orig) == LABEL_REF)

>>> +    {

>>> +      res = true;

>>> +      *is_readonly = true;

>>> +    }

>>> +  else if (SYMBOL_REF_P (orig))

>>> +    {

>>> +      if (CONSTANT_POOL_ADDRESS_P (orig))

>>> +    {

>>> +      res = true;

>>> +      *is_readonly = true;

>>> +    }

>>> +      else if (SYMBOL_REF_LOCAL_P (orig)

>>> +           && !SYMBOL_REF_EXTERNAL_P (orig)

>>> +           && SYMBOL_REF_DECL (orig)

>>> +           && (!DECL_P (SYMBOL_REF_DECL (orig))

>>> +           || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

>>> +    {

>>> +      tree decl = SYMBOL_REF_DECL (orig);

>>> +      tree init = (TREE_CODE (decl) == VAR_DECL)

>>> +        ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

>>> +        ? decl : 0;

>>> +      int reloc = 0;

>>> +      bool named_section, readonly;

>>> +

>>> +      if (init && init != error_mark_node)

>>> +        reloc = compute_reloc_for_constant (init);

>>> +

>>> +      named_section = TREE_CODE (decl) == VAR_DECL

>>> +        && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

>>> +      readonly = decl_readonly_section (decl, reloc);

>>> +

>>> +      /* We don't know where the link script will put a named

>>> +         section, so return false in such a case.  */

>>> +      res = !named_section;

>>> +

>>> +      if (!named_section)

>>> +        *is_readonly = readonly;

>>> +    }

>>> +      else

>>> +    {

>>> +      /* We don't know.  */

>>> +      res = false;

>>> +    }

>>> +    }

>>> +  else

>>> +    gcc_unreachable ();

>>> +

>>> +  return res;

>>> +}

>>> +

>>>   /* Generate code to load the address of a static var when flag_pic

>>> is set.  */

>>>   static rtx_insn *

>>>   arm_pic_static_addr (rtx orig, rtx reg)

>>>   {

>>>     rtx l1, labelno, offset_rtx;

>>> +  rtx_insn *insn;

>>>       gcc_assert (flag_pic);

>>>   -  /* We use an UNSPEC rather than a LABEL_REF because this label

>>> -     never appears in the code stream.  */

>>> -  labelno = GEN_INT (pic_labelno++);

>>> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

>>> UNSPEC_PIC_LABEL);

>>> -  l1 = gen_rtx_CONST (VOIDmode, l1);

>>> +  bool is_readonly = false;

>>> +  bool info_known = false;

>>>   -  /* On the ARM the PC register contains 'dot + 8' at the time of the

>>> -     addition, on the Thumb it is 'dot + 4'.  */

>>> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>>> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

>>> -                               UNSPEC_SYMBOL_OFFSET);

>>> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>>> +  if (TARGET_FDPIC

>>> +      && SYMBOL_REF_P (orig)

>>> +      && !SYMBOL_REF_FUNCTION_P (orig))

>>> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

>>>   -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

>>> labelno));

>>> +  if (TARGET_FDPIC

>>> +      && SYMBOL_REF_P (orig)

>>> +      && !SYMBOL_REF_FUNCTION_P (orig)

>>> +      && !info_known)

>>> +    {

>>> +      /* We don't know where orig is stored, so we have be

>>> +     pessimistic and use a GOT relocation.  */

>>> +      rtx pat;

>>> +      rtx mem;

>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>> +

>>> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

>>> +

>>> +      /* Make the MEM as close to a constant as possible.  */

>>> +      mem = SET_SRC (pat);

>>> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

>>> +      MEM_READONLY_P (mem) = 1;

>>> +      MEM_NOTRAP_P (mem) = 1;

>>> +

>>> +      insn = emit_insn (pat);

>>> +    }

>>> +  else if (TARGET_FDPIC

>>> +       && SYMBOL_REF_P (orig)

>>> +       && (SYMBOL_REF_FUNCTION_P (orig)

>>> +           || (info_known && !is_readonly)))

>>> +    {

>>> +      /* We use the GOTOFF relocation.  */

>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>> +

>>> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),

>>> UNSPEC_PIC_SYM);

>>> +      emit_insn (gen_movsi (reg, l1));

>>> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

>>> +    }

>>> +  else

>>> +    {

>>> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

>>> +     PC-relative access.  */

>>> +      /* We use an UNSPEC rather than a LABEL_REF because this label

>>> +     never appears in the code stream.  */

>>> +      labelno = GEN_INT (pic_labelno++);

>>> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

>>> UNSPEC_PIC_LABEL);

>>> +      l1 = gen_rtx_CONST (VOIDmode, l1);

>>> +

>>> +      /* On the ARM the PC register contains 'dot + 8' at the time

>>> of the

>>> +     addition, on the Thumb it is 'dot + 4'.  */

>>> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>>> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig,

>>> offset_rtx),

>>> +                   UNSPEC_SYMBOL_OFFSET);

>>> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>>> +

>>> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

>>> +                           labelno));

>>> +    }

>>> +

>>> +  return insn;

>>>   }

>>>     /* Return nonzero if X is valid as an ARM state addressing

>>> register.  */

>>> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

>>>     return 0;

>>>   }

>>>   +/* Emit insns to load the function address from FUNCDESC (an FDPIC

>>> +   function descriptor) into a register and the GOT address into the

>>> +   FDPIC register, returning an rtx for the register holding the

>>> +   function address.  */

>>> +

>>> +rtx

>>> +arm_load_function_descriptor (rtx funcdesc)

>>> +{

>>> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

>>> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

>>> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc,

>>> 4));

>>> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>> +

>>> +  emit_move_insn (fnaddr_reg, fnaddr);

>>> +  /* The ABI requires the entry point address to be loaded first, so

>>> +     prevent the load from being moved after that of the GOT

>>> +     address.  */

>>> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>> +                    gen_rtvec (2, pic_reg, gotaddr),

>>> +                    UNSPEC_PIC_RESTORE);

>>> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode,

>>> FDPIC_REGNUM))

>>> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG

>>> (Pmode, FDPIC_REGNUM));

>>

>> Shouldn't one of these be fnaddr_reg and the other pic_reg?

> I think the USE should be gotaddr, and CLOBBER should be pic_reg, thanks.

> 

>>

>>> +  emit_insn (par);

>>> +

>>> +  return fnaddr_reg;

>>> +}

>>> +

>>>   /* Return the maximum amount of padding that will be inserted before

>>>      label LABEL.  */

>>> -

>>>   static HOST_WIDE_INT

>>>   get_label_padding (rtx label)

>>>   {

>>> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int

>>> size, int aligned_p)

>>>             && (!SYMBOL_REF_LOCAL_P (x)

>>>                 || (SYMBOL_REF_DECL (x)

>>>                 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

>>> -        fputs ("(GOT)", asm_out_file);

>>> +        {

>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>>> +        fputs ("(GOTFUNCDESC)", asm_out_file);

>>> +          else

>>> +        fputs ("(GOT)", asm_out_file);

>>> +        }

>>>         else

>>> -        fputs ("(GOTOFF)", asm_out_file);

>>> +        {

>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>>> +        fputs ("(GOTOFFFUNCDESC)", asm_out_file);

>>> +          else

>>> +        {

>>> +          bool is_readonly;

>>> +

>>> +          if (arm_is_segment_info_known (x, &is_readonly))

>>> +            fputs ("(GOTOFF)", asm_out_file);

>>> +          else

>>> +            fputs ("(GOT)", asm_out_file);

>>> +        }

>>> +        }

>>> +    }

>>> +

>>> +      /* For FDPIC we also have to mark symbol for .data section.  */

>>> +      if (TARGET_FDPIC

>>> +      && NEED_GOT_RELOC

>>> +      && flag_pic

>>> +      && !making_const_table

>>> +      && SYMBOL_REF_P (x))

>>> +    {

>>> +      if (SYMBOL_REF_FUNCTION_P (x))

>>> +        fputs ("(FUNCDESC)", asm_out_file);

>>>       }

>>>         fputc ('\n', asm_out_file);

>>>         return true;

>>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

>>> index 34894c0..4671d64 100644

>>> --- a/gcc/config/arm/arm.h

>>> +++ b/gcc/config/arm/arm.h

>>> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

>>>      Pascal), so the following is not true.  */

>>>   #define STATIC_CHAIN_REGNUM    12

>>>   +/* r9 is the FDPIC register (base register for GOT and FUNCDESC

>>> accesses).  */

>>> +#define FDPIC_REGNUM        9

>>> +

>>>   /* Define this to be where the real frame pointer is if it is not

>>> possible to

>>>      work out the offset between the frame pointer and the automatic

>>> variables

>>>      until after register allocation has taken place. 

>>> FRAME_POINTER_REGNUM

>>> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

>>>      data addresses in memory.  */

>>>   #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

>>>   +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

>>> +   entries would need to handle saving and restoring it).  */

>>> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

>>> +

>>>   /* We can't directly access anything that contains a symbol,

>>>      nor can we indirect via the constant pool.  One exception is

>>>      UNSPEC_TLS, which is always PIC.  */

>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

>>> index 270b8e4..09a0701 100644

>>> --- a/gcc/config/arm/arm.md

>>> +++ b/gcc/config/arm/arm.md

>>> @@ -8031,6 +8031,23 @@

>>>       rtx callee, pat;

>>>       tree addr = MEM_EXPR (operands[0]);

>>>       +    /* Force FDPIC register (r9) before call.  */

>>> +    if (TARGET_FDPIC)

>>> +      {

>>> +    /* No need to update r9 if calling a static function.

>>> +       In other words: set r9 for indirect or non-local calls.  */

>>> +    callee = XEXP (operands[0], 0);

>>> +    if (!SYMBOL_REF_P (callee)

>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>> +      {

>>> +        emit_insn (gen_blockage ());

>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

>>> FDPIC_REGNUM));

>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>>> +     }

>>> +      }

>>> +

>>>       /* In an untyped call, we can get NULL for operand 2.  */

>>>       if (operands[2] == NULL_RTX)

>>>         operands[2] = const0_rtx;

>>> @@ -8044,6 +8061,13 @@

>>>       : !REG_P (callee))

>>>         XEXP (operands[0], 0) = force_reg (Pmode, callee);

>>>   +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

>>> +      {

>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

>>> +    XEXP (operands[0], 0)

>>> +      = arm_load_function_descriptor (XEXP (operands[0], 0));

>>> +      }

>>> +

>>>       if (detect_cmse_nonsecure_call (addr))

>>>         {

>>>       pat = gen_nonsecure_call_internal (operands[0], operands[1],

>>> @@ -8055,10 +8079,38 @@

>>>       pat = gen_call_internal (operands[0], operands[1], operands[2]);

>>>       arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

>>>         }

>>> +

>>> +    /* Restore FDPIC register (r9) after call.  */

>>> +    if (TARGET_FDPIC)

>>> +      {

>>> +    /* No need to update r9 if calling a static function.  */

>>> +    if (!SYMBOL_REF_P (callee)

>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>> +      {

>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

>>> FDPIC_REGNUM));

>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>>> +        emit_insn (gen_blockage ());

>>> +      }

>>> +      }

>>>       DONE;

>>>     }"

>>>   )

>>>   +(define_insn "*restore_pic_register_after_call"

>>> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

>>> +               (match_operand:SI 1 "nonimmediate_operand" "r,m")]

>>> +           UNSPEC_PIC_RESTORE)

>>> +          (use (match_dup 0))

>>> +          (clobber (match_dup 0))])

>>> +  ]

>>> +  ""

>>> +  "@

>>> +  mov\t%0, %1

>>> +  ldr\t%0, %1"

>>> +)

>>> +

>>>   (define_expand "call_internal"

>>>     [(parallel [(call (match_operand 0 "memory_operand" "")

>>>                   (match_operand 1 "general_operand" ""))

>>> @@ -8119,6 +8171,30 @@

>>>       rtx pat, callee;

>>>       tree addr = MEM_EXPR (operands[1]);

>>>       +    /* Force FDPIC register (r9) before call.  */

>>> +    if (TARGET_FDPIC)

>>> +      {

>>> +    /* No need to update the FDPIC register (r9) if calling a static

>>> function.

>>> +       In other words: set r9 for indirect or non-local calls.  */

>>> +    callee = XEXP (operands[1], 0);

>>> +    if (!SYMBOL_REF_P (callee)

>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>> +      {

>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>> +

>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>>> +        UNSPEC_PIC_RESTORE);

>>> +        XVECEXP (par, 0, 1)

>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>>> +        XVECEXP (par, 0, 2)

>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

>>> FDPIC_REGNUM));

>>

>> Again, this looks suspicious.

>>

> Yes, fixed for follow-up patch, with

> USE for get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)

> CLOBBER for gen_rtx_REG (Pmode, FDPIC_REGNUM)

> 

>>> +        emit_insn (par);

>>> +      }

>>> +      }

>>> +

>>>       /* In an untyped call, we can get NULL for operand 2.  */

>>>       if (operands[3] == 0)

>>>         operands[3] = const0_rtx;

>>> @@ -8132,6 +8208,14 @@

>>>       : !REG_P (callee))

>>>         XEXP (operands[1], 0) = force_reg (Pmode, callee);

>>>   +    if (TARGET_FDPIC

>>> +    && !SYMBOL_REF_P (XEXP (operands[1], 0)))

>>> +      {

>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

>>> +    XEXP (operands[1], 0)

>>> +      = arm_load_function_descriptor (XEXP (operands[1], 0));

>>> +      }

>>> +

>>>       if (detect_cmse_nonsecure_call (addr))

>>>         {

>>>       pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

>>> @@ -8144,6 +8228,28 @@

>>>                          operands[2], operands[3]);

>>>       arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

>>>         }

>>> +    /* Restore FDPIC register (r9) after call.  */

>>> +    if (TARGET_FDPIC)

>>> +      {

>>> +    /* No need to update r9 if calling a static function.  */

>>> +    if (!SYMBOL_REF_P (callee)

>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>> +      {

>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>> +

>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>>> +        UNSPEC_PIC_RESTORE);

>>> +        XVECEXP (par, 0, 1)

>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>>> +        XVECEXP (par, 0, 2)

>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

>>> FDPIC_REGNUM));

>>

>> And again.

> Yes

> 

>>

>>> +        emit_insn (par);

>>> +      }

>>> +      }

>>> +

>>>       DONE;

>>>     }"

>>>   )

>>> @@ -8486,7 +8592,7 @@

>>>               (const_int 0))

>>>             (match_operand 1 "" "")

>>>             (match_operand 2 "" "")])]

>>> -  "TARGET_EITHER"

>>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>>     "

>>>     {

>>>       int i;

>>> @@ -8553,7 +8659,7 @@

>>>   (define_expand "untyped_return"

>>>     [(match_operand:BLK 0 "memory_operand" "")

>>>      (match_operand 1 "" "")]

>>> -  "TARGET_EITHER"

>>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>>     "

>>>     {

>>>       int i;

>>> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

>>> index 1941673..349ae0e 100644

>>> --- a/gcc/config/arm/unspecs.md

>>> +++ b/gcc/config/arm/unspecs.md

>>> @@ -86,6 +86,7 @@

>>>     UNSPEC_PROBE_STACK    ; Probe stack memory reference

>>>     UNSPEC_NONSECURE_MEM    ; Represent non-secure memory in ARMv8-M

>>> with

>>>               ; security extension

>>> +  UNSPEC_PIC_RESTORE    ; Use to restore fdpic register

>>>   ])

>>>     (define_c_enum "unspec" [

>>>

>>

>> .

>>

>
Christophe Lyon Oct. 26, 2018, 3:25 p.m. UTC | #4
On Tue, 23 Oct 2018 at 16:07, Richard Earnshaw (lists)
<Richard.Earnshaw@arm.com> wrote:
>

> On 19/10/2018 14:40, Christophe Lyon wrote:

> > On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:

> >> On 11/10/18 14:34, Christophe Lyon wrote:

> >>> The FDPIC register is hard-coded to r9, as defined in the ABI.

> >>>

> >>> We have to disable tailcall optimizations if we don't know if the

> >>> target function is in the same module. If not, we have to set r9 to

> >>> the value associated with the target module.

> >>>

> >>> When generating a symbol address, we have to take into account whether

> >>> it is a pointer to data or to a function, because different

> >>> relocations are needed.

> >>>

> >>> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

> >>>     Mickaël Guêné <mickael.guene@st.com>

> >>>

> >>>     * config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

> >>>     in FDPIC mode.

> >>>     * config/arm/arm-protos.h (arm_load_function_descriptor): Declare

> >>>     new function.

> >>>     * config/arm/arm.c (arm_option_override): Define pic register to

> >>>     FDPIC_REGNUM.

> >>>     (arm_function_ok_for_sibcall) Disable sibcall optimization if we

> >>

> >> Missing colon.

> >>

> >>>     have no decl or go through PLT.

> >>>     (arm_load_pic_register): Handle TARGET_FDPIC.

> >>>     (arm_is_segment_info_known): New function.

> >>>     (arm_pic_static_addr): Add support for FDPIC.

> >>>     (arm_load_function_descriptor): New function.

> >>>     (arm_assemble_integer): Add support for FDPIC.

> >>>     * config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

> >>>     Define. (FDPIC_REGNUM): New define.

> >>>     * config/arm/arm.md (call): Add support for FDPIC.

> >>>     (call_value): Likewise.

> >>>     (*restore_pic_register_after_call): New pattern.

> >>>     (untyped_call): Disable if FDPIC.

> >>>     (untyped_return): Likewise.

> >>>     * config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

> >>>

> >>

> >> Other comments inline.

> >>

> >>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

> >>> index 4471f79..90733cc 100644

> >>> --- a/gcc/config/arm/arm-c.c

> >>> +++ b/gcc/config/arm/arm-c.c

> >>> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

> >>>         builtin_define ("__ARM_EABI__");

> >>>       }

> >>>   +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

> >>> +

> >>>     def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

> >>>     def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

> >>>   diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

> >>> index 0dfb3ac..28cafa8 100644

> >>> --- a/gcc/config/arm/arm-protos.h

> >>> +++ b/gcc/config/arm/arm-protos.h

> >>> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

> >>>   extern int arm_const_double_inline_cost (rtx);

> >>>   extern bool arm_const_double_by_parts (rtx);

> >>>   extern bool arm_const_double_by_immediates (rtx);

> >>> +extern rtx arm_load_function_descriptor (rtx funcdesc);

> >>>   extern void arm_emit_call_insn (rtx, rtx, bool);

> >>>   bool detect_cmse_nonsecure_call (tree);

> >>>   extern const char *output_call (rtx *);

> >>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

> >>> index 8810df5..92ae24b 100644

> >>> --- a/gcc/config/arm/arm.c

> >>> +++ b/gcc/config/arm/arm.c

> >>> @@ -3470,6 +3470,14 @@ arm_option_override (void)

> >>>     if (flag_pic && TARGET_VXWORKS_RTP)

> >>>       arm_pic_register = 9;

> >>>   +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

> >>> +  if (TARGET_FDPIC)

> >>> +    {

> >>> +      arm_pic_register = FDPIC_REGNUM;

> >>> +      if (TARGET_ARM_ARCH < 7)

> >>> +    error ("FDPIC mode is not supported on architectures older than

> >>> Armv7");

> >>

> >> What properties of FDPIC impose this requirement?  Does it also apply to

> >> Armv8-m.baseline?

> >>

> > In fact, there was miscommunication on my side, resulting in a

> > misunderstanding between Kyrill and myself, which I badly translated

> > into this condition.

> >

> > My initial plan was to submit a patch series tested on v7, and send the

> > patches needed to support older architectures as a follow-up. The proper

> > restriction is actually "CPUs that do not support ARM or Thumb2". As you

> > may have noticed during the iterations of this patch series, I had

> > failed to remove partial Thumb1 support hunks.

> >

> > So really this should be rephrased, and rewritten as "FDPIC mode is

> > supported on architecture versions that support ARM or Thumb-2", if that

> > suits you. And the condition should thus be:

> > if (! TARGET_ARM && ! TARGET_THUMB2)

> >   error ("...")

> >

> > This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.

>

> When we get to v8-m.baseline the thumb1/2 distinction starts to become a

> lot more blurred.  A lot of thumb2 features needed for stand-alone

> systems are then available.  So what feature is it that you require in

> order to make fdpic work in (traditional) thumb2 that isn't in

> (traditional) thumb1?

>

At the moment I'm not sure about what feature is missing. It's rather
that we haven't made it work it although there were preliminary attempts.

Since building GCC --with-cpu=cortex-m{0,23} --target arm-linux-gnueabi
currently fails, I tried using a fdpic toolchain built --with-cpu=cortex-m4,
forcing -mcpu=cortex-m{0,23} while building uClibc-ng. I noticed two kinds
of failures:
- parts of assembly files do not support Thumb-1, so they need porting at least
(ldso/ldso/arm/dl-startup.h)
- ICEs for lack of .md patterns (cortex-m4 uses pic_load_addr_32bit which is
missing for m{0,23}

There are probably other problems that would be discovered at runtime.

So it can probably be made to work, but I think that would be an enhancement
for later (not sure there's a real need: can we reasonably think about
running Linux on such small cores?)

>

>

> > As a side note, I tried to build GCC master (without my patches)

> > --with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi

> > failed to buid.

> >

> > For arm-eabi, there are problems in newlib:

> > newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add

> > sl,r2,#256'

> > newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub

> > ip,sp,ip'

> >

>

> These all sound like basic CPU detection issues in newlib and need to be

> fixed at some point (it's probably still using some pre-ACLE macros to

> detect system capabilities).

>

> R.

>

> > For arm-linux-gnueabi, the failure happens while building libgcc:

> > /home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169:

> > Error: selected processor does not support ARM opcodes

> > /newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `stmea a1!,{

> > v1-v7,fp,ip,sp,lr }'

> > /newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `mov a1,#0'

> > /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `tst lr,#1'

> > /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> > /newlib/newlib/libc/machine/arm/setjmp.S:194: Error: selected processor

> > does not support ARM opcodes

> > /newlib/newlib/libc/machine/arm/setjmp.S:203: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `ldmfd a1!,{

> > v1-v7,fp,ip,sp,lr }'

> > /newlib/newlib/libc/machine/arm/setjmp.S:214: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `movs a1,a2'

> > /newlib/newlib/libc/machine/arm/setjmp.S:218: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `moveq a1,#1'

> > /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `tst lr,#1'

> > /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> > ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> >

> >

> >>> +    }

> >>> +

> >>>     if (arm_pic_register_string != NULL)

> >>>       {

> >>>         int pic_register = decode_reg_name (arm_pic_register_string);

> >>> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

> >>>     if (cfun->machine->sibcall_blocked)

> >>>       return false;

> >>>   +  if (TARGET_FDPIC)

> >>> +    {

> >>> +      /* In FDPIC, never tailcall something for which we have no decl:

> >>> +     the target function could be in a different module, requiring

> >>> +     a different FDPIC register value.  */

> >>> +      if (decl == NULL)

> >>> +    return false;

> >>> +

> >>> +      /* Don't tailcall if we go through the PLT since the FDPIC

> >>> +     register is then corrupted and we don't restore it after

> >>> +     static function calls.  */

> >>> +      if (!targetm.binds_local_p (decl))

> >>> +    return false;

> >>> +    }

> >>> +

> >>>     /* Never tailcall something if we are generating code for

> >>> Thumb-1.  */

> >>>     if (TARGET_THUMB1)

> >>>       return false;

> >>> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs

> >>> ATTRIBUTE_UNUSED)

> >>>   {

> >>>     rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

> >>>   -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

> >>> +  if (crtl->uses_pic_offset_table == 0

> >>> +      || TARGET_SINGLE_PIC_BASE

> >>> +      || TARGET_FDPIC)

> >>>       return;

> >>>       gcc_assert (flag_pic);

> >>> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long

> >>> saved_regs ATTRIBUTE_UNUSED)

> >>>     emit_use (pic_reg);

> >>>   }

> >>>   +/* Try to know if the object will go in text or data segment. This is

> >>

> >> "Try to determine whether an object, referenced via ORIG, will be placed

> >> in the text or data segment."

> >>> +   used in FDPIC mode, to decide which relocations to use when

> >>> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only

> >>

> >> Two spaces after a period.

> >>

> >>> +   location, false otherwise.  */

> >>

> >> You've missed the documentation of the return value: does returning true

> >> mean text vs data, or does it mean we know which it will go in, but

> >> don't have to return that information here.

> >>

> >> Generally, won't this break big time if users compile with

> >> -ffunction-sections or -fdata-sections?  Is it sufficient to match

> >> .text.* as being text and .data.* for data?

> >>

> >

> > I compiled a small testcase with -ffunction-sections and -fdata-sections

> > and noticed no problem.

> > The code below does not attempt to match section names, I'm not sure to

> > understand your question?

> >

> >>

> >>> +static bool

> >>> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

> >>> +{

> >>> +  bool res = false;

> >>> +

> >>> +  *is_readonly = false;

> >>> +

> >>> +  if (GET_CODE (orig) == LABEL_REF)

> >>> +    {

> >>> +      res = true;

> >>> +      *is_readonly = true;

> >>> +    }

> >>> +  else if (SYMBOL_REF_P (orig))

> >>> +    {

> >>> +      if (CONSTANT_POOL_ADDRESS_P (orig))

> >>> +    {

> >>> +      res = true;

> >>> +      *is_readonly = true;

> >>> +    }

> >>> +      else if (SYMBOL_REF_LOCAL_P (orig)

> >>> +           && !SYMBOL_REF_EXTERNAL_P (orig)

> >>> +           && SYMBOL_REF_DECL (orig)

> >>> +           && (!DECL_P (SYMBOL_REF_DECL (orig))

> >>> +           || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

> >>> +    {

> >>> +      tree decl = SYMBOL_REF_DECL (orig);

> >>> +      tree init = (TREE_CODE (decl) == VAR_DECL)

> >>> +        ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

> >>> +        ? decl : 0;

> >>> +      int reloc = 0;

> >>> +      bool named_section, readonly;

> >>> +

> >>> +      if (init && init != error_mark_node)

> >>> +        reloc = compute_reloc_for_constant (init);

> >>> +

> >>> +      named_section = TREE_CODE (decl) == VAR_DECL

> >>> +        && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

> >>> +      readonly = decl_readonly_section (decl, reloc);

> >>> +

> >>> +      /* We don't know where the link script will put a named

> >>> +         section, so return false in such a case.  */

> >>> +      res = !named_section;

> >>> +

> >>> +      if (!named_section)

> >>> +        *is_readonly = readonly;

> >>> +    }

> >>> +      else

> >>> +    {

> >>> +      /* We don't know.  */

> >>> +      res = false;

> >>> +    }

> >>> +    }

> >>> +  else

> >>> +    gcc_unreachable ();

> >>> +

> >>> +  return res;

> >>> +}

> >>> +

> >>>   /* Generate code to load the address of a static var when flag_pic

> >>> is set.  */

> >>>   static rtx_insn *

> >>>   arm_pic_static_addr (rtx orig, rtx reg)

> >>>   {

> >>>     rtx l1, labelno, offset_rtx;

> >>> +  rtx_insn *insn;

> >>>       gcc_assert (flag_pic);

> >>>   -  /* We use an UNSPEC rather than a LABEL_REF because this label

> >>> -     never appears in the code stream.  */

> >>> -  labelno = GEN_INT (pic_labelno++);

> >>> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

> >>> UNSPEC_PIC_LABEL);

> >>> -  l1 = gen_rtx_CONST (VOIDmode, l1);

> >>> +  bool is_readonly = false;

> >>> +  bool info_known = false;

> >>>   -  /* On the ARM the PC register contains 'dot + 8' at the time of the

> >>> -     addition, on the Thumb it is 'dot + 4'.  */

> >>> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> >>> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

> >>> -                               UNSPEC_SYMBOL_OFFSET);

> >>> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> >>> +  if (TARGET_FDPIC

> >>> +      && SYMBOL_REF_P (orig)

> >>> +      && !SYMBOL_REF_FUNCTION_P (orig))

> >>> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

> >>>   -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

> >>> labelno));

> >>> +  if (TARGET_FDPIC

> >>> +      && SYMBOL_REF_P (orig)

> >>> +      && !SYMBOL_REF_FUNCTION_P (orig)

> >>> +      && !info_known)

> >>> +    {

> >>> +      /* We don't know where orig is stored, so we have be

> >>> +     pessimistic and use a GOT relocation.  */

> >>> +      rtx pat;

> >>> +      rtx mem;

> >>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>> +

> >>> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

> >>> +

> >>> +      /* Make the MEM as close to a constant as possible.  */

> >>> +      mem = SET_SRC (pat);

> >>> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

> >>> +      MEM_READONLY_P (mem) = 1;

> >>> +      MEM_NOTRAP_P (mem) = 1;

> >>> +

> >>> +      insn = emit_insn (pat);

> >>> +    }

> >>> +  else if (TARGET_FDPIC

> >>> +       && SYMBOL_REF_P (orig)

> >>> +       && (SYMBOL_REF_FUNCTION_P (orig)

> >>> +           || (info_known && !is_readonly)))

> >>> +    {

> >>> +      /* We use the GOTOFF relocation.  */

> >>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>> +

> >>> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),

> >>> UNSPEC_PIC_SYM);

> >>> +      emit_insn (gen_movsi (reg, l1));

> >>> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

> >>> +    }

> >>> +  else

> >>> +    {

> >>> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

> >>> +     PC-relative access.  */

> >>> +      /* We use an UNSPEC rather than a LABEL_REF because this label

> >>> +     never appears in the code stream.  */

> >>> +      labelno = GEN_INT (pic_labelno++);

> >>> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

> >>> UNSPEC_PIC_LABEL);

> >>> +      l1 = gen_rtx_CONST (VOIDmode, l1);

> >>> +

> >>> +      /* On the ARM the PC register contains 'dot + 8' at the time

> >>> of the

> >>> +     addition, on the Thumb it is 'dot + 4'.  */

> >>> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> >>> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig,

> >>> offset_rtx),

> >>> +                   UNSPEC_SYMBOL_OFFSET);

> >>> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> >>> +

> >>> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

> >>> +                           labelno));

> >>> +    }

> >>> +

> >>> +  return insn;

> >>>   }

> >>>     /* Return nonzero if X is valid as an ARM state addressing

> >>> register.  */

> >>> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

> >>>     return 0;

> >>>   }

> >>>   +/* Emit insns to load the function address from FUNCDESC (an FDPIC

> >>> +   function descriptor) into a register and the GOT address into the

> >>> +   FDPIC register, returning an rtx for the register holding the

> >>> +   function address.  */

> >>> +

> >>> +rtx

> >>> +arm_load_function_descriptor (rtx funcdesc)

> >>> +{

> >>> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

> >>> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

> >>> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc,

> >>> 4));

> >>> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>> +

> >>> +  emit_move_insn (fnaddr_reg, fnaddr);

> >>> +  /* The ABI requires the entry point address to be loaded first, so

> >>> +     prevent the load from being moved after that of the GOT

> >>> +     address.  */

> >>> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>> +                    gen_rtvec (2, pic_reg, gotaddr),

> >>> +                    UNSPEC_PIC_RESTORE);

> >>> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode,

> >>> FDPIC_REGNUM))

> >>> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG

> >>> (Pmode, FDPIC_REGNUM));

> >>

> >> Shouldn't one of these be fnaddr_reg and the other pic_reg?

> > I think the USE should be gotaddr, and CLOBBER should be pic_reg, thanks.

> >

> >>

> >>> +  emit_insn (par);

> >>> +

> >>> +  return fnaddr_reg;

> >>> +}

> >>> +

> >>>   /* Return the maximum amount of padding that will be inserted before

> >>>      label LABEL.  */

> >>> -

> >>>   static HOST_WIDE_INT

> >>>   get_label_padding (rtx label)

> >>>   {

> >>> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int

> >>> size, int aligned_p)

> >>>             && (!SYMBOL_REF_LOCAL_P (x)

> >>>                 || (SYMBOL_REF_DECL (x)

> >>>                 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

> >>> -        fputs ("(GOT)", asm_out_file);

> >>> +        {

> >>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> >>> +        fputs ("(GOTFUNCDESC)", asm_out_file);

> >>> +          else

> >>> +        fputs ("(GOT)", asm_out_file);

> >>> +        }

> >>>         else

> >>> -        fputs ("(GOTOFF)", asm_out_file);

> >>> +        {

> >>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> >>> +        fputs ("(GOTOFFFUNCDESC)", asm_out_file);

> >>> +          else

> >>> +        {

> >>> +          bool is_readonly;

> >>> +

> >>> +          if (arm_is_segment_info_known (x, &is_readonly))

> >>> +            fputs ("(GOTOFF)", asm_out_file);

> >>> +          else

> >>> +            fputs ("(GOT)", asm_out_file);

> >>> +        }

> >>> +        }

> >>> +    }

> >>> +

> >>> +      /* For FDPIC we also have to mark symbol for .data section.  */

> >>> +      if (TARGET_FDPIC

> >>> +      && NEED_GOT_RELOC

> >>> +      && flag_pic

> >>> +      && !making_const_table

> >>> +      && SYMBOL_REF_P (x))

> >>> +    {

> >>> +      if (SYMBOL_REF_FUNCTION_P (x))

> >>> +        fputs ("(FUNCDESC)", asm_out_file);

> >>>       }

> >>>         fputc ('\n', asm_out_file);

> >>>         return true;

> >>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

> >>> index 34894c0..4671d64 100644

> >>> --- a/gcc/config/arm/arm.h

> >>> +++ b/gcc/config/arm/arm.h

> >>> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

> >>>      Pascal), so the following is not true.  */

> >>>   #define STATIC_CHAIN_REGNUM    12

> >>>   +/* r9 is the FDPIC register (base register for GOT and FUNCDESC

> >>> accesses).  */

> >>> +#define FDPIC_REGNUM        9

> >>> +

> >>>   /* Define this to be where the real frame pointer is if it is not

> >>> possible to

> >>>      work out the offset between the frame pointer and the automatic

> >>> variables

> >>>      until after register allocation has taken place.

> >>> FRAME_POINTER_REGNUM

> >>> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

> >>>      data addresses in memory.  */

> >>>   #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

> >>>   +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

> >>> +   entries would need to handle saving and restoring it).  */

> >>> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

> >>> +

> >>>   /* We can't directly access anything that contains a symbol,

> >>>      nor can we indirect via the constant pool.  One exception is

> >>>      UNSPEC_TLS, which is always PIC.  */

> >>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

> >>> index 270b8e4..09a0701 100644

> >>> --- a/gcc/config/arm/arm.md

> >>> +++ b/gcc/config/arm/arm.md

> >>> @@ -8031,6 +8031,23 @@

> >>>       rtx callee, pat;

> >>>       tree addr = MEM_EXPR (operands[0]);

> >>>       +    /* Force FDPIC register (r9) before call.  */

> >>> +    if (TARGET_FDPIC)

> >>> +      {

> >>> +    /* No need to update r9 if calling a static function.

> >>> +       In other words: set r9 for indirect or non-local calls.  */

> >>> +    callee = XEXP (operands[0], 0);

> >>> +    if (!SYMBOL_REF_P (callee)

> >>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>> +      {

> >>> +        emit_insn (gen_blockage ());

> >>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

> >>> FDPIC_REGNUM));

> >>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> >>> +     }

> >>> +      }

> >>> +

> >>>       /* In an untyped call, we can get NULL for operand 2.  */

> >>>       if (operands[2] == NULL_RTX)

> >>>         operands[2] = const0_rtx;

> >>> @@ -8044,6 +8061,13 @@

> >>>       : !REG_P (callee))

> >>>         XEXP (operands[0], 0) = force_reg (Pmode, callee);

> >>>   +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

> >>> +      {

> >>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

> >>> +    XEXP (operands[0], 0)

> >>> +      = arm_load_function_descriptor (XEXP (operands[0], 0));

> >>> +      }

> >>> +

> >>>       if (detect_cmse_nonsecure_call (addr))

> >>>         {

> >>>       pat = gen_nonsecure_call_internal (operands[0], operands[1],

> >>> @@ -8055,10 +8079,38 @@

> >>>       pat = gen_call_internal (operands[0], operands[1], operands[2]);

> >>>       arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

> >>>         }

> >>> +

> >>> +    /* Restore FDPIC register (r9) after call.  */

> >>> +    if (TARGET_FDPIC)

> >>> +      {

> >>> +    /* No need to update r9 if calling a static function.  */

> >>> +    if (!SYMBOL_REF_P (callee)

> >>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>> +      {

> >>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

> >>> FDPIC_REGNUM));

> >>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> >>> +        emit_insn (gen_blockage ());

> >>> +      }

> >>> +      }

> >>>       DONE;

> >>>     }"

> >>>   )

> >>>   +(define_insn "*restore_pic_register_after_call"

> >>> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

> >>> +               (match_operand:SI 1 "nonimmediate_operand" "r,m")]

> >>> +           UNSPEC_PIC_RESTORE)

> >>> +          (use (match_dup 0))

> >>> +          (clobber (match_dup 0))])

> >>> +  ]

> >>> +  ""

> >>> +  "@

> >>> +  mov\t%0, %1

> >>> +  ldr\t%0, %1"

> >>> +)

> >>> +

> >>>   (define_expand "call_internal"

> >>>     [(parallel [(call (match_operand 0 "memory_operand" "")

> >>>                   (match_operand 1 "general_operand" ""))

> >>> @@ -8119,6 +8171,30 @@

> >>>       rtx pat, callee;

> >>>       tree addr = MEM_EXPR (operands[1]);

> >>>       +    /* Force FDPIC register (r9) before call.  */

> >>> +    if (TARGET_FDPIC)

> >>> +      {

> >>> +    /* No need to update the FDPIC register (r9) if calling a static

> >>> function.

> >>> +       In other words: set r9 for indirect or non-local calls.  */

> >>> +    callee = XEXP (operands[1], 0);

> >>> +    if (!SYMBOL_REF_P (callee)

> >>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>> +      {

> >>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>> +

> >>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> >>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> >>> +        UNSPEC_PIC_RESTORE);

> >>> +        XVECEXP (par, 0, 1)

> >>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> >>> +        XVECEXP (par, 0, 2)

> >>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

> >>> FDPIC_REGNUM));

> >>

> >> Again, this looks suspicious.

> >>

> > Yes, fixed for follow-up patch, with

> > USE for get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)

> > CLOBBER for gen_rtx_REG (Pmode, FDPIC_REGNUM)

> >

> >>> +        emit_insn (par);

> >>> +      }

> >>> +      }

> >>> +

> >>>       /* In an untyped call, we can get NULL for operand 2.  */

> >>>       if (operands[3] == 0)

> >>>         operands[3] = const0_rtx;

> >>> @@ -8132,6 +8208,14 @@

> >>>       : !REG_P (callee))

> >>>         XEXP (operands[1], 0) = force_reg (Pmode, callee);

> >>>   +    if (TARGET_FDPIC

> >>> +    && !SYMBOL_REF_P (XEXP (operands[1], 0)))

> >>> +      {

> >>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

> >>> +    XEXP (operands[1], 0)

> >>> +      = arm_load_function_descriptor (XEXP (operands[1], 0));

> >>> +      }

> >>> +

> >>>       if (detect_cmse_nonsecure_call (addr))

> >>>         {

> >>>       pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

> >>> @@ -8144,6 +8228,28 @@

> >>>                          operands[2], operands[3]);

> >>>       arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

> >>>         }

> >>> +    /* Restore FDPIC register (r9) after call.  */

> >>> +    if (TARGET_FDPIC)

> >>> +      {

> >>> +    /* No need to update r9 if calling a static function.  */

> >>> +    if (!SYMBOL_REF_P (callee)

> >>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>> +      {

> >>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>> +

> >>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> >>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> >>> +        UNSPEC_PIC_RESTORE);

> >>> +        XVECEXP (par, 0, 1)

> >>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> >>> +        XVECEXP (par, 0, 2)

> >>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

> >>> FDPIC_REGNUM));

> >>

> >> And again.

> > Yes

> >

> >>

> >>> +        emit_insn (par);

> >>> +      }

> >>> +      }

> >>> +

> >>>       DONE;

> >>>     }"

> >>>   )

> >>> @@ -8486,7 +8592,7 @@

> >>>               (const_int 0))

> >>>             (match_operand 1 "" "")

> >>>             (match_operand 2 "" "")])]

> >>> -  "TARGET_EITHER"

> >>> +  "TARGET_EITHER && !TARGET_FDPIC"

> >>>     "

> >>>     {

> >>>       int i;

> >>> @@ -8553,7 +8659,7 @@

> >>>   (define_expand "untyped_return"

> >>>     [(match_operand:BLK 0 "memory_operand" "")

> >>>      (match_operand 1 "" "")]

> >>> -  "TARGET_EITHER"

> >>> +  "TARGET_EITHER && !TARGET_FDPIC"

> >>>     "

> >>>     {

> >>>       int i;

> >>> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

> >>> index 1941673..349ae0e 100644

> >>> --- a/gcc/config/arm/unspecs.md

> >>> +++ b/gcc/config/arm/unspecs.md

> >>> @@ -86,6 +86,7 @@

> >>>     UNSPEC_PROBE_STACK    ; Probe stack memory reference

> >>>     UNSPEC_NONSECURE_MEM    ; Represent non-secure memory in ARMv8-M

> >>> with

> >>>               ; security extension

> >>> +  UNSPEC_PIC_RESTORE    ; Use to restore fdpic register

> >>>   ])

> >>>     (define_c_enum "unspec" [

> >>>

> >>

> >> .

> >>

> >

>
Richard Earnshaw (lists) Oct. 26, 2018, 3:42 p.m. UTC | #5
On 26/10/2018 16:25, Christophe Lyon wrote:
> On Tue, 23 Oct 2018 at 16:07, Richard Earnshaw (lists)

> <Richard.Earnshaw@arm.com> wrote:

>>

>> On 19/10/2018 14:40, Christophe Lyon wrote:

>>> On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:

>>>> On 11/10/18 14:34, Christophe Lyon wrote:

>>>>> The FDPIC register is hard-coded to r9, as defined in the ABI.

>>>>>

>>>>> We have to disable tailcall optimizations if we don't know if the

>>>>> target function is in the same module. If not, we have to set r9 to

>>>>> the value associated with the target module.

>>>>>

>>>>> When generating a symbol address, we have to take into account whether

>>>>> it is a pointer to data or to a function, because different

>>>>> relocations are needed.

>>>>>

>>>>> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

>>>>>     Mickaël Guêné <mickael.guene@st.com>

>>>>>

>>>>>     * config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

>>>>>     in FDPIC mode.

>>>>>     * config/arm/arm-protos.h (arm_load_function_descriptor): Declare

>>>>>     new function.

>>>>>     * config/arm/arm.c (arm_option_override): Define pic register to

>>>>>     FDPIC_REGNUM.

>>>>>     (arm_function_ok_for_sibcall) Disable sibcall optimization if we

>>>>

>>>> Missing colon.

>>>>

>>>>>     have no decl or go through PLT.

>>>>>     (arm_load_pic_register): Handle TARGET_FDPIC.

>>>>>     (arm_is_segment_info_known): New function.

>>>>>     (arm_pic_static_addr): Add support for FDPIC.

>>>>>     (arm_load_function_descriptor): New function.

>>>>>     (arm_assemble_integer): Add support for FDPIC.

>>>>>     * config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

>>>>>     Define. (FDPIC_REGNUM): New define.

>>>>>     * config/arm/arm.md (call): Add support for FDPIC.

>>>>>     (call_value): Likewise.

>>>>>     (*restore_pic_register_after_call): New pattern.

>>>>>     (untyped_call): Disable if FDPIC.

>>>>>     (untyped_return): Likewise.

>>>>>     * config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

>>>>>

>>>>

>>>> Other comments inline.

>>>>

>>>>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

>>>>> index 4471f79..90733cc 100644

>>>>> --- a/gcc/config/arm/arm-c.c

>>>>> +++ b/gcc/config/arm/arm-c.c

>>>>> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

>>>>>         builtin_define ("__ARM_EABI__");

>>>>>       }

>>>>>   +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

>>>>> +

>>>>>     def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

>>>>>     def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

>>>>>   diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

>>>>> index 0dfb3ac..28cafa8 100644

>>>>> --- a/gcc/config/arm/arm-protos.h

>>>>> +++ b/gcc/config/arm/arm-protos.h

>>>>> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

>>>>>   extern int arm_const_double_inline_cost (rtx);

>>>>>   extern bool arm_const_double_by_parts (rtx);

>>>>>   extern bool arm_const_double_by_immediates (rtx);

>>>>> +extern rtx arm_load_function_descriptor (rtx funcdesc);

>>>>>   extern void arm_emit_call_insn (rtx, rtx, bool);

>>>>>   bool detect_cmse_nonsecure_call (tree);

>>>>>   extern const char *output_call (rtx *);

>>>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

>>>>> index 8810df5..92ae24b 100644

>>>>> --- a/gcc/config/arm/arm.c

>>>>> +++ b/gcc/config/arm/arm.c

>>>>> @@ -3470,6 +3470,14 @@ arm_option_override (void)

>>>>>     if (flag_pic && TARGET_VXWORKS_RTP)

>>>>>       arm_pic_register = 9;

>>>>>   +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

>>>>> +  if (TARGET_FDPIC)

>>>>> +    {

>>>>> +      arm_pic_register = FDPIC_REGNUM;

>>>>> +      if (TARGET_ARM_ARCH < 7)

>>>>> +    error ("FDPIC mode is not supported on architectures older than

>>>>> Armv7");

>>>>

>>>> What properties of FDPIC impose this requirement?  Does it also apply to

>>>> Armv8-m.baseline?

>>>>

>>> In fact, there was miscommunication on my side, resulting in a

>>> misunderstanding between Kyrill and myself, which I badly translated

>>> into this condition.

>>>

>>> My initial plan was to submit a patch series tested on v7, and send the

>>> patches needed to support older architectures as a follow-up. The proper

>>> restriction is actually "CPUs that do not support ARM or Thumb2". As you

>>> may have noticed during the iterations of this patch series, I had

>>> failed to remove partial Thumb1 support hunks.

>>>

>>> So really this should be rephrased, and rewritten as "FDPIC mode is

>>> supported on architecture versions that support ARM or Thumb-2", if that

>>> suits you. And the condition should thus be:

>>> if (! TARGET_ARM && ! TARGET_THUMB2)

>>>   error ("...")

>>>

>>> This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.

>>

>> When we get to v8-m.baseline the thumb1/2 distinction starts to become a

>> lot more blurred.  A lot of thumb2 features needed for stand-alone

>> systems are then available.  So what feature is it that you require in

>> order to make fdpic work in (traditional) thumb2 that isn't in

>> (traditional) thumb1?

>>

> At the moment I'm not sure about what feature is missing. It's rather

> that we haven't made it work it although there were preliminary attempts.

> 

> Since building GCC --with-cpu=cortex-m{0,23} --target arm-linux-gnueabi

> currently fails, I tried using a fdpic toolchain built --with-cpu=cortex-m4,

> forcing -mcpu=cortex-m{0,23} while building uClibc-ng. I noticed two kinds

> of failures:

> - parts of assembly files do not support Thumb-1, so they need porting at least

> (ldso/ldso/arm/dl-startup.h)

> - ICEs for lack of .md patterns (cortex-m4 uses pic_load_addr_32bit which is

> missing for m{0,23}

> 

> There are probably other problems that would be discovered at runtime.

> 

> So it can probably be made to work, but I think that would be an enhancement

> for later (not sure there's a real need: can we reasonably think about

> running Linux on such small cores?)


So would a sorry() call be more appropriate?

R.

> 

>>

>>

>>> As a side note, I tried to build GCC master (without my patches)

>>> --with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi

>>> failed to buid.

>>>

>>> For arm-eabi, there are problems in newlib:

>>> newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add

>>> sl,r2,#256'

>>> newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub

>>> ip,sp,ip'

>>>

>>

>> These all sound like basic CPU detection issues in newlib and need to be

>> fixed at some point (it's probably still using some pre-ACLE macros to

>> detect system capabilities).

>>

>> R.

>>

>>> For arm-linux-gnueabi, the failure happens while building libgcc:

>>> /home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169:

>>> Error: selected processor does not support ARM opcodes

>>> /newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `stmea a1!,{

>>> v1-v7,fp,ip,sp,lr }'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `mov a1,#0'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `tst lr,#1'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:194: Error: selected processor

>>> does not support ARM opcodes

>>> /newlib/newlib/libc/machine/arm/setjmp.S:203: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `ldmfd a1!,{

>>> v1-v7,fp,ip,sp,lr }'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:214: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `movs a1,a2'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:218: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `moveq a1,#1'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `tst lr,#1'

>>> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

>>> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

>>>

>>>

>>>>> +    }

>>>>> +

>>>>>     if (arm_pic_register_string != NULL)

>>>>>       {

>>>>>         int pic_register = decode_reg_name (arm_pic_register_string);

>>>>> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

>>>>>     if (cfun->machine->sibcall_blocked)

>>>>>       return false;

>>>>>   +  if (TARGET_FDPIC)

>>>>> +    {

>>>>> +      /* In FDPIC, never tailcall something for which we have no decl:

>>>>> +     the target function could be in a different module, requiring

>>>>> +     a different FDPIC register value.  */

>>>>> +      if (decl == NULL)

>>>>> +    return false;

>>>>> +

>>>>> +      /* Don't tailcall if we go through the PLT since the FDPIC

>>>>> +     register is then corrupted and we don't restore it after

>>>>> +     static function calls.  */

>>>>> +      if (!targetm.binds_local_p (decl))

>>>>> +    return false;

>>>>> +    }

>>>>> +

>>>>>     /* Never tailcall something if we are generating code for

>>>>> Thumb-1.  */

>>>>>     if (TARGET_THUMB1)

>>>>>       return false;

>>>>> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs

>>>>> ATTRIBUTE_UNUSED)

>>>>>   {

>>>>>     rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

>>>>>   -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

>>>>> +  if (crtl->uses_pic_offset_table == 0

>>>>> +      || TARGET_SINGLE_PIC_BASE

>>>>> +      || TARGET_FDPIC)

>>>>>       return;

>>>>>       gcc_assert (flag_pic);

>>>>> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long

>>>>> saved_regs ATTRIBUTE_UNUSED)

>>>>>     emit_use (pic_reg);

>>>>>   }

>>>>>   +/* Try to know if the object will go in text or data segment. This is

>>>>

>>>> "Try to determine whether an object, referenced via ORIG, will be placed

>>>> in the text or data segment."

>>>>> +   used in FDPIC mode, to decide which relocations to use when

>>>>> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only

>>>>

>>>> Two spaces after a period.

>>>>

>>>>> +   location, false otherwise.  */

>>>>

>>>> You've missed the documentation of the return value: does returning true

>>>> mean text vs data, or does it mean we know which it will go in, but

>>>> don't have to return that information here.

>>>>

>>>> Generally, won't this break big time if users compile with

>>>> -ffunction-sections or -fdata-sections?  Is it sufficient to match

>>>> .text.* as being text and .data.* for data?

>>>>

>>>

>>> I compiled a small testcase with -ffunction-sections and -fdata-sections

>>> and noticed no problem.

>>> The code below does not attempt to match section names, I'm not sure to

>>> understand your question?

>>>

>>>>

>>>>> +static bool

>>>>> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

>>>>> +{

>>>>> +  bool res = false;

>>>>> +

>>>>> +  *is_readonly = false;

>>>>> +

>>>>> +  if (GET_CODE (orig) == LABEL_REF)

>>>>> +    {

>>>>> +      res = true;

>>>>> +      *is_readonly = true;

>>>>> +    }

>>>>> +  else if (SYMBOL_REF_P (orig))

>>>>> +    {

>>>>> +      if (CONSTANT_POOL_ADDRESS_P (orig))

>>>>> +    {

>>>>> +      res = true;

>>>>> +      *is_readonly = true;

>>>>> +    }

>>>>> +      else if (SYMBOL_REF_LOCAL_P (orig)

>>>>> +           && !SYMBOL_REF_EXTERNAL_P (orig)

>>>>> +           && SYMBOL_REF_DECL (orig)

>>>>> +           && (!DECL_P (SYMBOL_REF_DECL (orig))

>>>>> +           || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

>>>>> +    {

>>>>> +      tree decl = SYMBOL_REF_DECL (orig);

>>>>> +      tree init = (TREE_CODE (decl) == VAR_DECL)

>>>>> +        ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

>>>>> +        ? decl : 0;

>>>>> +      int reloc = 0;

>>>>> +      bool named_section, readonly;

>>>>> +

>>>>> +      if (init && init != error_mark_node)

>>>>> +        reloc = compute_reloc_for_constant (init);

>>>>> +

>>>>> +      named_section = TREE_CODE (decl) == VAR_DECL

>>>>> +        && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

>>>>> +      readonly = decl_readonly_section (decl, reloc);

>>>>> +

>>>>> +      /* We don't know where the link script will put a named

>>>>> +         section, so return false in such a case.  */

>>>>> +      res = !named_section;

>>>>> +

>>>>> +      if (!named_section)

>>>>> +        *is_readonly = readonly;

>>>>> +    }

>>>>> +      else

>>>>> +    {

>>>>> +      /* We don't know.  */

>>>>> +      res = false;

>>>>> +    }

>>>>> +    }

>>>>> +  else

>>>>> +    gcc_unreachable ();

>>>>> +

>>>>> +  return res;

>>>>> +}

>>>>> +

>>>>>   /* Generate code to load the address of a static var when flag_pic

>>>>> is set.  */

>>>>>   static rtx_insn *

>>>>>   arm_pic_static_addr (rtx orig, rtx reg)

>>>>>   {

>>>>>     rtx l1, labelno, offset_rtx;

>>>>> +  rtx_insn *insn;

>>>>>       gcc_assert (flag_pic);

>>>>>   -  /* We use an UNSPEC rather than a LABEL_REF because this label

>>>>> -     never appears in the code stream.  */

>>>>> -  labelno = GEN_INT (pic_labelno++);

>>>>> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

>>>>> UNSPEC_PIC_LABEL);

>>>>> -  l1 = gen_rtx_CONST (VOIDmode, l1);

>>>>> +  bool is_readonly = false;

>>>>> +  bool info_known = false;

>>>>>   -  /* On the ARM the PC register contains 'dot + 8' at the time of the

>>>>> -     addition, on the Thumb it is 'dot + 4'.  */

>>>>> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>>>>> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

>>>>> -                               UNSPEC_SYMBOL_OFFSET);

>>>>> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>>>>> +  if (TARGET_FDPIC

>>>>> +      && SYMBOL_REF_P (orig)

>>>>> +      && !SYMBOL_REF_FUNCTION_P (orig))

>>>>> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

>>>>>   -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

>>>>> labelno));

>>>>> +  if (TARGET_FDPIC

>>>>> +      && SYMBOL_REF_P (orig)

>>>>> +      && !SYMBOL_REF_FUNCTION_P (orig)

>>>>> +      && !info_known)

>>>>> +    {

>>>>> +      /* We don't know where orig is stored, so we have be

>>>>> +     pessimistic and use a GOT relocation.  */

>>>>> +      rtx pat;

>>>>> +      rtx mem;

>>>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>>>> +

>>>>> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

>>>>> +

>>>>> +      /* Make the MEM as close to a constant as possible.  */

>>>>> +      mem = SET_SRC (pat);

>>>>> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

>>>>> +      MEM_READONLY_P (mem) = 1;

>>>>> +      MEM_NOTRAP_P (mem) = 1;

>>>>> +

>>>>> +      insn = emit_insn (pat);

>>>>> +    }

>>>>> +  else if (TARGET_FDPIC

>>>>> +       && SYMBOL_REF_P (orig)

>>>>> +       && (SYMBOL_REF_FUNCTION_P (orig)

>>>>> +           || (info_known && !is_readonly)))

>>>>> +    {

>>>>> +      /* We use the GOTOFF relocation.  */

>>>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>>>> +

>>>>> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),

>>>>> UNSPEC_PIC_SYM);

>>>>> +      emit_insn (gen_movsi (reg, l1));

>>>>> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

>>>>> +    }

>>>>> +  else

>>>>> +    {

>>>>> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

>>>>> +     PC-relative access.  */

>>>>> +      /* We use an UNSPEC rather than a LABEL_REF because this label

>>>>> +     never appears in the code stream.  */

>>>>> +      labelno = GEN_INT (pic_labelno++);

>>>>> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

>>>>> UNSPEC_PIC_LABEL);

>>>>> +      l1 = gen_rtx_CONST (VOIDmode, l1);

>>>>> +

>>>>> +      /* On the ARM the PC register contains 'dot + 8' at the time

>>>>> of the

>>>>> +     addition, on the Thumb it is 'dot + 4'.  */

>>>>> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

>>>>> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig,

>>>>> offset_rtx),

>>>>> +                   UNSPEC_SYMBOL_OFFSET);

>>>>> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

>>>>> +

>>>>> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

>>>>> +                           labelno));

>>>>> +    }

>>>>> +

>>>>> +  return insn;

>>>>>   }

>>>>>     /* Return nonzero if X is valid as an ARM state addressing

>>>>> register.  */

>>>>> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

>>>>>     return 0;

>>>>>   }

>>>>>   +/* Emit insns to load the function address from FUNCDESC (an FDPIC

>>>>> +   function descriptor) into a register and the GOT address into the

>>>>> +   FDPIC register, returning an rtx for the register holding the

>>>>> +   function address.  */

>>>>> +

>>>>> +rtx

>>>>> +arm_load_function_descriptor (rtx funcdesc)

>>>>> +{

>>>>> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

>>>>> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>>>> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

>>>>> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc,

>>>>> 4));

>>>>> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>>>> +

>>>>> +  emit_move_insn (fnaddr_reg, fnaddr);

>>>>> +  /* The ABI requires the entry point address to be loaded first, so

>>>>> +     prevent the load from being moved after that of the GOT

>>>>> +     address.  */

>>>>> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>>>> +                    gen_rtvec (2, pic_reg, gotaddr),

>>>>> +                    UNSPEC_PIC_RESTORE);

>>>>> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode,

>>>>> FDPIC_REGNUM))

>>>>> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG

>>>>> (Pmode, FDPIC_REGNUM));

>>>>

>>>> Shouldn't one of these be fnaddr_reg and the other pic_reg?

>>> I think the USE should be gotaddr, and CLOBBER should be pic_reg, thanks.

>>>

>>>>

>>>>> +  emit_insn (par);

>>>>> +

>>>>> +  return fnaddr_reg;

>>>>> +}

>>>>> +

>>>>>   /* Return the maximum amount of padding that will be inserted before

>>>>>      label LABEL.  */

>>>>> -

>>>>>   static HOST_WIDE_INT

>>>>>   get_label_padding (rtx label)

>>>>>   {

>>>>> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int

>>>>> size, int aligned_p)

>>>>>             && (!SYMBOL_REF_LOCAL_P (x)

>>>>>                 || (SYMBOL_REF_DECL (x)

>>>>>                 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

>>>>> -        fputs ("(GOT)", asm_out_file);

>>>>> +        {

>>>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>>>>> +        fputs ("(GOTFUNCDESC)", asm_out_file);

>>>>> +          else

>>>>> +        fputs ("(GOT)", asm_out_file);

>>>>> +        }

>>>>>         else

>>>>> -        fputs ("(GOTOFF)", asm_out_file);

>>>>> +        {

>>>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

>>>>> +        fputs ("(GOTOFFFUNCDESC)", asm_out_file);

>>>>> +          else

>>>>> +        {

>>>>> +          bool is_readonly;

>>>>> +

>>>>> +          if (arm_is_segment_info_known (x, &is_readonly))

>>>>> +            fputs ("(GOTOFF)", asm_out_file);

>>>>> +          else

>>>>> +            fputs ("(GOT)", asm_out_file);

>>>>> +        }

>>>>> +        }

>>>>> +    }

>>>>> +

>>>>> +      /* For FDPIC we also have to mark symbol for .data section.  */

>>>>> +      if (TARGET_FDPIC

>>>>> +      && NEED_GOT_RELOC

>>>>> +      && flag_pic

>>>>> +      && !making_const_table

>>>>> +      && SYMBOL_REF_P (x))

>>>>> +    {

>>>>> +      if (SYMBOL_REF_FUNCTION_P (x))

>>>>> +        fputs ("(FUNCDESC)", asm_out_file);

>>>>>       }

>>>>>         fputc ('\n', asm_out_file);

>>>>>         return true;

>>>>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

>>>>> index 34894c0..4671d64 100644

>>>>> --- a/gcc/config/arm/arm.h

>>>>> +++ b/gcc/config/arm/arm.h

>>>>> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

>>>>>      Pascal), so the following is not true.  */

>>>>>   #define STATIC_CHAIN_REGNUM    12

>>>>>   +/* r9 is the FDPIC register (base register for GOT and FUNCDESC

>>>>> accesses).  */

>>>>> +#define FDPIC_REGNUM        9

>>>>> +

>>>>>   /* Define this to be where the real frame pointer is if it is not

>>>>> possible to

>>>>>      work out the offset between the frame pointer and the automatic

>>>>> variables

>>>>>      until after register allocation has taken place.

>>>>> FRAME_POINTER_REGNUM

>>>>> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

>>>>>      data addresses in memory.  */

>>>>>   #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

>>>>>   +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

>>>>> +   entries would need to handle saving and restoring it).  */

>>>>> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

>>>>> +

>>>>>   /* We can't directly access anything that contains a symbol,

>>>>>      nor can we indirect via the constant pool.  One exception is

>>>>>      UNSPEC_TLS, which is always PIC.  */

>>>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

>>>>> index 270b8e4..09a0701 100644

>>>>> --- a/gcc/config/arm/arm.md

>>>>> +++ b/gcc/config/arm/arm.md

>>>>> @@ -8031,6 +8031,23 @@

>>>>>       rtx callee, pat;

>>>>>       tree addr = MEM_EXPR (operands[0]);

>>>>>       +    /* Force FDPIC register (r9) before call.  */

>>>>> +    if (TARGET_FDPIC)

>>>>> +      {

>>>>> +    /* No need to update r9 if calling a static function.

>>>>> +       In other words: set r9 for indirect or non-local calls.  */

>>>>> +    callee = XEXP (operands[0], 0);

>>>>> +    if (!SYMBOL_REF_P (callee)

>>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>>>> +      {

>>>>> +        emit_insn (gen_blockage ());

>>>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

>>>>> FDPIC_REGNUM));

>>>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>>>>> +     }

>>>>> +      }

>>>>> +

>>>>>       /* In an untyped call, we can get NULL for operand 2.  */

>>>>>       if (operands[2] == NULL_RTX)

>>>>>         operands[2] = const0_rtx;

>>>>> @@ -8044,6 +8061,13 @@

>>>>>       : !REG_P (callee))

>>>>>         XEXP (operands[0], 0) = force_reg (Pmode, callee);

>>>>>   +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

>>>>> +      {

>>>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

>>>>> +    XEXP (operands[0], 0)

>>>>> +      = arm_load_function_descriptor (XEXP (operands[0], 0));

>>>>> +      }

>>>>> +

>>>>>       if (detect_cmse_nonsecure_call (addr))

>>>>>         {

>>>>>       pat = gen_nonsecure_call_internal (operands[0], operands[1],

>>>>> @@ -8055,10 +8079,38 @@

>>>>>       pat = gen_call_internal (operands[0], operands[1], operands[2]);

>>>>>       arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

>>>>>         }

>>>>> +

>>>>> +    /* Restore FDPIC register (r9) after call.  */

>>>>> +    if (TARGET_FDPIC)

>>>>> +      {

>>>>> +    /* No need to update r9 if calling a static function.  */

>>>>> +    if (!SYMBOL_REF_P (callee)

>>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>>>> +      {

>>>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

>>>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

>>>>> FDPIC_REGNUM));

>>>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

>>>>> +        emit_insn (gen_blockage ());

>>>>> +      }

>>>>> +      }

>>>>>       DONE;

>>>>>     }"

>>>>>   )

>>>>>   +(define_insn "*restore_pic_register_after_call"

>>>>> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

>>>>> +               (match_operand:SI 1 "nonimmediate_operand" "r,m")]

>>>>> +           UNSPEC_PIC_RESTORE)

>>>>> +          (use (match_dup 0))

>>>>> +          (clobber (match_dup 0))])

>>>>> +  ]

>>>>> +  ""

>>>>> +  "@

>>>>> +  mov\t%0, %1

>>>>> +  ldr\t%0, %1"

>>>>> +)

>>>>> +

>>>>>   (define_expand "call_internal"

>>>>>     [(parallel [(call (match_operand 0 "memory_operand" "")

>>>>>                   (match_operand 1 "general_operand" ""))

>>>>> @@ -8119,6 +8171,30 @@

>>>>>       rtx pat, callee;

>>>>>       tree addr = MEM_EXPR (operands[1]);

>>>>>       +    /* Force FDPIC register (r9) before call.  */

>>>>> +    if (TARGET_FDPIC)

>>>>> +      {

>>>>> +    /* No need to update the FDPIC register (r9) if calling a static

>>>>> function.

>>>>> +       In other words: set r9 for indirect or non-local calls.  */

>>>>> +    callee = XEXP (operands[1], 0);

>>>>> +    if (!SYMBOL_REF_P (callee)

>>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>>>> +      {

>>>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>>>> +

>>>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>>>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>>>>> +        UNSPEC_PIC_RESTORE);

>>>>> +        XVECEXP (par, 0, 1)

>>>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>>>>> +        XVECEXP (par, 0, 2)

>>>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

>>>>> FDPIC_REGNUM));

>>>>

>>>> Again, this looks suspicious.

>>>>

>>> Yes, fixed for follow-up patch, with

>>> USE for get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)

>>> CLOBBER for gen_rtx_REG (Pmode, FDPIC_REGNUM)

>>>

>>>>> +        emit_insn (par);

>>>>> +      }

>>>>> +      }

>>>>> +

>>>>>       /* In an untyped call, we can get NULL for operand 2.  */

>>>>>       if (operands[3] == 0)

>>>>>         operands[3] = const0_rtx;

>>>>> @@ -8132,6 +8208,14 @@

>>>>>       : !REG_P (callee))

>>>>>         XEXP (operands[1], 0) = force_reg (Pmode, callee);

>>>>>   +    if (TARGET_FDPIC

>>>>> +    && !SYMBOL_REF_P (XEXP (operands[1], 0)))

>>>>> +      {

>>>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

>>>>> +    XEXP (operands[1], 0)

>>>>> +      = arm_load_function_descriptor (XEXP (operands[1], 0));

>>>>> +      }

>>>>> +

>>>>>       if (detect_cmse_nonsecure_call (addr))

>>>>>         {

>>>>>       pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

>>>>> @@ -8144,6 +8228,28 @@

>>>>>                          operands[2], operands[3]);

>>>>>       arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

>>>>>         }

>>>>> +    /* Restore FDPIC register (r9) after call.  */

>>>>> +    if (TARGET_FDPIC)

>>>>> +      {

>>>>> +    /* No need to update r9 if calling a static function.  */

>>>>> +    if (!SYMBOL_REF_P (callee)

>>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

>>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

>>>>> +      {

>>>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

>>>>> +

>>>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

>>>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

>>>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

>>>>> +        UNSPEC_PIC_RESTORE);

>>>>> +        XVECEXP (par, 0, 1)

>>>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

>>>>> +        XVECEXP (par, 0, 2)

>>>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

>>>>> FDPIC_REGNUM));

>>>>

>>>> And again.

>>> Yes

>>>

>>>>

>>>>> +        emit_insn (par);

>>>>> +      }

>>>>> +      }

>>>>> +

>>>>>       DONE;

>>>>>     }"

>>>>>   )

>>>>> @@ -8486,7 +8592,7 @@

>>>>>               (const_int 0))

>>>>>             (match_operand 1 "" "")

>>>>>             (match_operand 2 "" "")])]

>>>>> -  "TARGET_EITHER"

>>>>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>>>>     "

>>>>>     {

>>>>>       int i;

>>>>> @@ -8553,7 +8659,7 @@

>>>>>   (define_expand "untyped_return"

>>>>>     [(match_operand:BLK 0 "memory_operand" "")

>>>>>      (match_operand 1 "" "")]

>>>>> -  "TARGET_EITHER"

>>>>> +  "TARGET_EITHER && !TARGET_FDPIC"

>>>>>     "

>>>>>     {

>>>>>       int i;

>>>>> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

>>>>> index 1941673..349ae0e 100644

>>>>> --- a/gcc/config/arm/unspecs.md

>>>>> +++ b/gcc/config/arm/unspecs.md

>>>>> @@ -86,6 +86,7 @@

>>>>>     UNSPEC_PROBE_STACK    ; Probe stack memory reference

>>>>>     UNSPEC_NONSECURE_MEM    ; Represent non-secure memory in ARMv8-M

>>>>> with

>>>>>               ; security extension

>>>>> +  UNSPEC_PIC_RESTORE    ; Use to restore fdpic register

>>>>>   ])

>>>>>     (define_c_enum "unspec" [

>>>>>

>>>>

>>>> .

>>>>

>>>

>>
Christophe Lyon Oct. 29, 2018, 8:16 a.m. UTC | #6
On Fri, 26 Oct 2018 at 17:42, Richard Earnshaw (lists)
<Richard.Earnshaw@arm.com> wrote:
>

> On 26/10/2018 16:25, Christophe Lyon wrote:

> > On Tue, 23 Oct 2018 at 16:07, Richard Earnshaw (lists)

> > <Richard.Earnshaw@arm.com> wrote:

> >>

> >> On 19/10/2018 14:40, Christophe Lyon wrote:

> >>> On 12/10/2018 12:45, Richard Earnshaw (lists) wrote:

> >>>> On 11/10/18 14:34, Christophe Lyon wrote:

> >>>>> The FDPIC register is hard-coded to r9, as defined in the ABI.

> >>>>>

> >>>>> We have to disable tailcall optimizations if we don't know if the

> >>>>> target function is in the same module. If not, we have to set r9 to

> >>>>> the value associated with the target module.

> >>>>>

> >>>>> When generating a symbol address, we have to take into account whether

> >>>>> it is a pointer to data or to a function, because different

> >>>>> relocations are needed.

> >>>>>

> >>>>> 2018-XX-XX  Christophe Lyon  <christophe.lyon@st.com>

> >>>>>     Mickaël Guêné <mickael.guene@st.com>

> >>>>>

> >>>>>     * config/arm/arm-c.c (__FDPIC__): Define new pre-processor macro

> >>>>>     in FDPIC mode.

> >>>>>     * config/arm/arm-protos.h (arm_load_function_descriptor): Declare

> >>>>>     new function.

> >>>>>     * config/arm/arm.c (arm_option_override): Define pic register to

> >>>>>     FDPIC_REGNUM.

> >>>>>     (arm_function_ok_for_sibcall) Disable sibcall optimization if we

> >>>>

> >>>> Missing colon.

> >>>>

> >>>>>     have no decl or go through PLT.

> >>>>>     (arm_load_pic_register): Handle TARGET_FDPIC.

> >>>>>     (arm_is_segment_info_known): New function.

> >>>>>     (arm_pic_static_addr): Add support for FDPIC.

> >>>>>     (arm_load_function_descriptor): New function.

> >>>>>     (arm_assemble_integer): Add support for FDPIC.

> >>>>>     * config/arm/arm.h (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED):

> >>>>>     Define. (FDPIC_REGNUM): New define.

> >>>>>     * config/arm/arm.md (call): Add support for FDPIC.

> >>>>>     (call_value): Likewise.

> >>>>>     (*restore_pic_register_after_call): New pattern.

> >>>>>     (untyped_call): Disable if FDPIC.

> >>>>>     (untyped_return): Likewise.

> >>>>>     * config/arm/unspecs.md (UNSPEC_PIC_RESTORE): New.

> >>>>>

> >>>>

> >>>> Other comments inline.

> >>>>

> >>>>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c

> >>>>> index 4471f79..90733cc 100644

> >>>>> --- a/gcc/config/arm/arm-c.c

> >>>>> +++ b/gcc/config/arm/arm-c.c

> >>>>> @@ -202,6 +202,8 @@ arm_cpu_builtins (struct cpp_reader* pfile)

> >>>>>         builtin_define ("__ARM_EABI__");

> >>>>>       }

> >>>>>   +  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);

> >>>>> +

> >>>>>     def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);

> >>>>>     def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);

> >>>>>   diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

> >>>>> index 0dfb3ac..28cafa8 100644

> >>>>> --- a/gcc/config/arm/arm-protos.h

> >>>>> +++ b/gcc/config/arm/arm-protos.h

> >>>>> @@ -136,6 +136,7 @@ extern int arm_max_const_double_inline_cost (void);

> >>>>>   extern int arm_const_double_inline_cost (rtx);

> >>>>>   extern bool arm_const_double_by_parts (rtx);

> >>>>>   extern bool arm_const_double_by_immediates (rtx);

> >>>>> +extern rtx arm_load_function_descriptor (rtx funcdesc);

> >>>>>   extern void arm_emit_call_insn (rtx, rtx, bool);

> >>>>>   bool detect_cmse_nonsecure_call (tree);

> >>>>>   extern const char *output_call (rtx *);

> >>>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

> >>>>> index 8810df5..92ae24b 100644

> >>>>> --- a/gcc/config/arm/arm.c

> >>>>> +++ b/gcc/config/arm/arm.c

> >>>>> @@ -3470,6 +3470,14 @@ arm_option_override (void)

> >>>>>     if (flag_pic && TARGET_VXWORKS_RTP)

> >>>>>       arm_pic_register = 9;

> >>>>>   +  /* If in FDPIC mode then force arm_pic_register to be r9.  */

> >>>>> +  if (TARGET_FDPIC)

> >>>>> +    {

> >>>>> +      arm_pic_register = FDPIC_REGNUM;

> >>>>> +      if (TARGET_ARM_ARCH < 7)

> >>>>> +    error ("FDPIC mode is not supported on architectures older than

> >>>>> Armv7");

> >>>>

> >>>> What properties of FDPIC impose this requirement?  Does it also apply to

> >>>> Armv8-m.baseline?

> >>>>

> >>> In fact, there was miscommunication on my side, resulting in a

> >>> misunderstanding between Kyrill and myself, which I badly translated

> >>> into this condition.

> >>>

> >>> My initial plan was to submit a patch series tested on v7, and send the

> >>> patches needed to support older architectures as a follow-up. The proper

> >>> restriction is actually "CPUs that do not support ARM or Thumb2". As you

> >>> may have noticed during the iterations of this patch series, I had

> >>> failed to remove partial Thumb1 support hunks.

> >>>

> >>> So really this should be rephrased, and rewritten as "FDPIC mode is

> >>> supported on architecture versions that support ARM or Thumb-2", if that

> >>> suits you. And the condition should thus be:

> >>> if (! TARGET_ARM && ! TARGET_THUMB2)

> >>>   error ("...")

> >>>

> >>> This would also exclude Armv8-m.baseline, since it doesn't support Thumb2.

> >>

> >> When we get to v8-m.baseline the thumb1/2 distinction starts to become a

> >> lot more blurred.  A lot of thumb2 features needed for stand-alone

> >> systems are then available.  So what feature is it that you require in

> >> order to make fdpic work in (traditional) thumb2 that isn't in

> >> (traditional) thumb1?

> >>

> > At the moment I'm not sure about what feature is missing. It's rather

> > that we haven't made it work it although there were preliminary attempts.

> >

> > Since building GCC --with-cpu=cortex-m{0,23} --target arm-linux-gnueabi

> > currently fails, I tried using a fdpic toolchain built --with-cpu=cortex-m4,

> > forcing -mcpu=cortex-m{0,23} while building uClibc-ng. I noticed two kinds

> > of failures:

> > - parts of assembly files do not support Thumb-1, so they need porting at least

> > (ldso/ldso/arm/dl-startup.h)

> > - ICEs for lack of .md patterns (cortex-m4 uses pic_load_addr_32bit which is

> > missing for m{0,23}

> >

> > There are probably other problems that would be discovered at runtime.

> >

> > So it can probably be made to work, but I think that would be an enhancement

> > for later (not sure there's a real need: can we reasonably think about

> > running Linux on such small cores?)

>

> So would a sorry() call be more appropriate?

>

Yes, you are right. I'll do that.

> R.

>

> >

> >>

> >>

> >>> As a side note, I tried to build GCC master (without my patches)

> >>> --with-cpu=cortex-m23, and both targets arm-eabi and arm-linux-gnueabi

> >>> failed to buid.

> >>>

> >>> For arm-eabi, there are problems in newlib:

> >>> newlib/libc/sys/arm/crt0.S:145: Error: lo register required -- `add

> >>> sl,r2,#256'

> >>> newlib/libc/sys/arm/trap.S:88: Error: lo register required -- `sub

> >>> ip,sp,ip'

> >>>

> >>

> >> These all sound like basic CPU detection issues in newlib and need to be

> >> fixed at some point (it's probably still using some pre-ACLE macros to

> >> detect system capabilities).

> >>

> >> R.

> >>

> >>> For arm-linux-gnueabi, the failure happens while building libgcc:

> >>> /home/christophe.lyon/src/GCC/sources/newlib/newlib/libc/machine/arm/setjmp.S:169:

> >>> Error: selected processor does not support ARM opcodes

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:176: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `stmea a1!,{

> >>> v1-v7,fp,ip,sp,lr }'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:186: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `mov a1,#0'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `tst lr,#1'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:188: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:194: Error: selected processor

> >>> does not support ARM opcodes

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:203: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `ldmfd a1!,{

> >>> v1-v7,fp,ip,sp,lr }'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:214: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `movs a1,a2'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:218: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `moveq a1,#1'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `tst lr,#1'

> >>> /newlib/newlib/libc/machine/arm/setjmp.S:220: Error: attempt to use an

> >>> ARM instruction on a Thumb-only processor -- `moveq pc,lr'

> >>>

> >>>

> >>>>> +    }

> >>>>> +

> >>>>>     if (arm_pic_register_string != NULL)

> >>>>>       {

> >>>>>         int pic_register = decode_reg_name (arm_pic_register_string);

> >>>>> @@ -7251,6 +7259,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp)

> >>>>>     if (cfun->machine->sibcall_blocked)

> >>>>>       return false;

> >>>>>   +  if (TARGET_FDPIC)

> >>>>> +    {

> >>>>> +      /* In FDPIC, never tailcall something for which we have no decl:

> >>>>> +     the target function could be in a different module, requiring

> >>>>> +     a different FDPIC register value.  */

> >>>>> +      if (decl == NULL)

> >>>>> +    return false;

> >>>>> +

> >>>>> +      /* Don't tailcall if we go through the PLT since the FDPIC

> >>>>> +     register is then corrupted and we don't restore it after

> >>>>> +     static function calls.  */

> >>>>> +      if (!targetm.binds_local_p (decl))

> >>>>> +    return false;

> >>>>> +    }

> >>>>> +

> >>>>>     /* Never tailcall something if we are generating code for

> >>>>> Thumb-1.  */

> >>>>>     if (TARGET_THUMB1)

> >>>>>       return false;

> >>>>> @@ -7629,7 +7652,9 @@ arm_load_pic_register (unsigned long saved_regs

> >>>>> ATTRIBUTE_UNUSED)

> >>>>>   {

> >>>>>     rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;

> >>>>>   -  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)

> >>>>> +  if (crtl->uses_pic_offset_table == 0

> >>>>> +      || TARGET_SINGLE_PIC_BASE

> >>>>> +      || TARGET_FDPIC)

> >>>>>       return;

> >>>>>       gcc_assert (flag_pic);

> >>>>> @@ -7697,28 +7722,140 @@ arm_load_pic_register (unsigned long

> >>>>> saved_regs ATTRIBUTE_UNUSED)

> >>>>>     emit_use (pic_reg);

> >>>>>   }

> >>>>>   +/* Try to know if the object will go in text or data segment. This is

> >>>>

> >>>> "Try to determine whether an object, referenced via ORIG, will be placed

> >>>> in the text or data segment."

> >>>>> +   used in FDPIC mode, to decide which relocations to use when

> >>>>> +   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only

> >>>>

> >>>> Two spaces after a period.

> >>>>

> >>>>> +   location, false otherwise.  */

> >>>>

> >>>> You've missed the documentation of the return value: does returning true

> >>>> mean text vs data, or does it mean we know which it will go in, but

> >>>> don't have to return that information here.

> >>>>

> >>>> Generally, won't this break big time if users compile with

> >>>> -ffunction-sections or -fdata-sections?  Is it sufficient to match

> >>>> .text.* as being text and .data.* for data?

> >>>>

> >>>

> >>> I compiled a small testcase with -ffunction-sections and -fdata-sections

> >>> and noticed no problem.

> >>> The code below does not attempt to match section names, I'm not sure to

> >>> understand your question?

> >>>

> >>>>

> >>>>> +static bool

> >>>>> +arm_is_segment_info_known (rtx orig, bool *is_readonly)

> >>>>> +{

> >>>>> +  bool res = false;

> >>>>> +

> >>>>> +  *is_readonly = false;

> >>>>> +

> >>>>> +  if (GET_CODE (orig) == LABEL_REF)

> >>>>> +    {

> >>>>> +      res = true;

> >>>>> +      *is_readonly = true;

> >>>>> +    }

> >>>>> +  else if (SYMBOL_REF_P (orig))

> >>>>> +    {

> >>>>> +      if (CONSTANT_POOL_ADDRESS_P (orig))

> >>>>> +    {

> >>>>> +      res = true;

> >>>>> +      *is_readonly = true;

> >>>>> +    }

> >>>>> +      else if (SYMBOL_REF_LOCAL_P (orig)

> >>>>> +           && !SYMBOL_REF_EXTERNAL_P (orig)

> >>>>> +           && SYMBOL_REF_DECL (orig)

> >>>>> +           && (!DECL_P (SYMBOL_REF_DECL (orig))

> >>>>> +           || !DECL_COMMON (SYMBOL_REF_DECL (orig))))

> >>>>> +    {

> >>>>> +      tree decl = SYMBOL_REF_DECL (orig);

> >>>>> +      tree init = (TREE_CODE (decl) == VAR_DECL)

> >>>>> +        ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)

> >>>>> +        ? decl : 0;

> >>>>> +      int reloc = 0;

> >>>>> +      bool named_section, readonly;

> >>>>> +

> >>>>> +      if (init && init != error_mark_node)

> >>>>> +        reloc = compute_reloc_for_constant (init);

> >>>>> +

> >>>>> +      named_section = TREE_CODE (decl) == VAR_DECL

> >>>>> +        && lookup_attribute ("section", DECL_ATTRIBUTES (decl));

> >>>>> +      readonly = decl_readonly_section (decl, reloc);

> >>>>> +

> >>>>> +      /* We don't know where the link script will put a named

> >>>>> +         section, so return false in such a case.  */

> >>>>> +      res = !named_section;

> >>>>> +

> >>>>> +      if (!named_section)

> >>>>> +        *is_readonly = readonly;

> >>>>> +    }

> >>>>> +      else

> >>>>> +    {

> >>>>> +      /* We don't know.  */

> >>>>> +      res = false;

> >>>>> +    }

> >>>>> +    }

> >>>>> +  else

> >>>>> +    gcc_unreachable ();

> >>>>> +

> >>>>> +  return res;

> >>>>> +}

> >>>>> +

> >>>>>   /* Generate code to load the address of a static var when flag_pic

> >>>>> is set.  */

> >>>>>   static rtx_insn *

> >>>>>   arm_pic_static_addr (rtx orig, rtx reg)

> >>>>>   {

> >>>>>     rtx l1, labelno, offset_rtx;

> >>>>> +  rtx_insn *insn;

> >>>>>       gcc_assert (flag_pic);

> >>>>>   -  /* We use an UNSPEC rather than a LABEL_REF because this label

> >>>>> -     never appears in the code stream.  */

> >>>>> -  labelno = GEN_INT (pic_labelno++);

> >>>>> -  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

> >>>>> UNSPEC_PIC_LABEL);

> >>>>> -  l1 = gen_rtx_CONST (VOIDmode, l1);

> >>>>> +  bool is_readonly = false;

> >>>>> +  bool info_known = false;

> >>>>>   -  /* On the ARM the PC register contains 'dot + 8' at the time of the

> >>>>> -     addition, on the Thumb it is 'dot + 4'.  */

> >>>>> -  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> >>>>> -  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),

> >>>>> -                               UNSPEC_SYMBOL_OFFSET);

> >>>>> -  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> >>>>> +  if (TARGET_FDPIC

> >>>>> +      && SYMBOL_REF_P (orig)

> >>>>> +      && !SYMBOL_REF_FUNCTION_P (orig))

> >>>>> +      info_known = arm_is_segment_info_known (orig, &is_readonly);

> >>>>>   -  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

> >>>>> labelno));

> >>>>> +  if (TARGET_FDPIC

> >>>>> +      && SYMBOL_REF_P (orig)

> >>>>> +      && !SYMBOL_REF_FUNCTION_P (orig)

> >>>>> +      && !info_known)

> >>>>> +    {

> >>>>> +      /* We don't know where orig is stored, so we have be

> >>>>> +     pessimistic and use a GOT relocation.  */

> >>>>> +      rtx pat;

> >>>>> +      rtx mem;

> >>>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>>>> +

> >>>>> +      pat = gen_calculate_pic_address (reg, pic_reg, orig);

> >>>>> +

> >>>>> +      /* Make the MEM as close to a constant as possible.  */

> >>>>> +      mem = SET_SRC (pat);

> >>>>> +      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));

> >>>>> +      MEM_READONLY_P (mem) = 1;

> >>>>> +      MEM_NOTRAP_P (mem) = 1;

> >>>>> +

> >>>>> +      insn = emit_insn (pat);

> >>>>> +    }

> >>>>> +  else if (TARGET_FDPIC

> >>>>> +       && SYMBOL_REF_P (orig)

> >>>>> +       && (SYMBOL_REF_FUNCTION_P (orig)

> >>>>> +           || (info_known && !is_readonly)))

> >>>>> +    {

> >>>>> +      /* We use the GOTOFF relocation.  */

> >>>>> +      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>>>> +

> >>>>> +      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),

> >>>>> UNSPEC_PIC_SYM);

> >>>>> +      emit_insn (gen_movsi (reg, l1));

> >>>>> +      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));

> >>>>> +    }

> >>>>> +  else

> >>>>> +    {

> >>>>> +      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use

> >>>>> +     PC-relative access.  */

> >>>>> +      /* We use an UNSPEC rather than a LABEL_REF because this label

> >>>>> +     never appears in the code stream.  */

> >>>>> +      labelno = GEN_INT (pic_labelno++);

> >>>>> +      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno),

> >>>>> UNSPEC_PIC_LABEL);

> >>>>> +      l1 = gen_rtx_CONST (VOIDmode, l1);

> >>>>> +

> >>>>> +      /* On the ARM the PC register contains 'dot + 8' at the time

> >>>>> of the

> >>>>> +     addition, on the Thumb it is 'dot + 4'.  */

> >>>>> +      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);

> >>>>> +      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig,

> >>>>> offset_rtx),

> >>>>> +                   UNSPEC_SYMBOL_OFFSET);

> >>>>> +      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

> >>>>> +

> >>>>> +      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,

> >>>>> +                           labelno));

> >>>>> +    }

> >>>>> +

> >>>>> +  return insn;

> >>>>>   }

> >>>>>     /* Return nonzero if X is valid as an ARM state addressing

> >>>>> register.  */

> >>>>> @@ -15933,9 +16070,36 @@ get_jump_table_size (rtx_jump_table_data *insn)

> >>>>>     return 0;

> >>>>>   }

> >>>>>   +/* Emit insns to load the function address from FUNCDESC (an FDPIC

> >>>>> +   function descriptor) into a register and the GOT address into the

> >>>>> +   FDPIC register, returning an rtx for the register holding the

> >>>>> +   function address.  */

> >>>>> +

> >>>>> +rtx

> >>>>> +arm_load_function_descriptor (rtx funcdesc)

> >>>>> +{

> >>>>> +  rtx fnaddr_reg = gen_reg_rtx (Pmode);

> >>>>> +  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>>>> +  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);

> >>>>> +  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc,

> >>>>> 4));

> >>>>> +  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>>>> +

> >>>>> +  emit_move_insn (fnaddr_reg, fnaddr);

> >>>>> +  /* The ABI requires the entry point address to be loaded first, so

> >>>>> +     prevent the load from being moved after that of the GOT

> >>>>> +     address.  */

> >>>>> +  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>>>> +                    gen_rtvec (2, pic_reg, gotaddr),

> >>>>> +                    UNSPEC_PIC_RESTORE);

> >>>>> +  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode,

> >>>>> FDPIC_REGNUM))

> >>>>> +  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG

> >>>>> (Pmode, FDPIC_REGNUM));

> >>>>

> >>>> Shouldn't one of these be fnaddr_reg and the other pic_reg?

> >>> I think the USE should be gotaddr, and CLOBBER should be pic_reg, thanks.

> >>>

> >>>>

> >>>>> +  emit_insn (par);

> >>>>> +

> >>>>> +  return fnaddr_reg;

> >>>>> +}

> >>>>> +

> >>>>>   /* Return the maximum amount of padding that will be inserted before

> >>>>>      label LABEL.  */

> >>>>> -

> >>>>>   static HOST_WIDE_INT

> >>>>>   get_label_padding (rtx label)

> >>>>>   {

> >>>>> @@ -22890,9 +23054,37 @@ arm_assemble_integer (rtx x, unsigned int

> >>>>> size, int aligned_p)

> >>>>>             && (!SYMBOL_REF_LOCAL_P (x)

> >>>>>                 || (SYMBOL_REF_DECL (x)

> >>>>>                 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))

> >>>>> -        fputs ("(GOT)", asm_out_file);

> >>>>> +        {

> >>>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> >>>>> +        fputs ("(GOTFUNCDESC)", asm_out_file);

> >>>>> +          else

> >>>>> +        fputs ("(GOT)", asm_out_file);

> >>>>> +        }

> >>>>>         else

> >>>>> -        fputs ("(GOTOFF)", asm_out_file);

> >>>>> +        {

> >>>>> +          if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))

> >>>>> +        fputs ("(GOTOFFFUNCDESC)", asm_out_file);

> >>>>> +          else

> >>>>> +        {

> >>>>> +          bool is_readonly;

> >>>>> +

> >>>>> +          if (arm_is_segment_info_known (x, &is_readonly))

> >>>>> +            fputs ("(GOTOFF)", asm_out_file);

> >>>>> +          else

> >>>>> +            fputs ("(GOT)", asm_out_file);

> >>>>> +        }

> >>>>> +        }

> >>>>> +    }

> >>>>> +

> >>>>> +      /* For FDPIC we also have to mark symbol for .data section.  */

> >>>>> +      if (TARGET_FDPIC

> >>>>> +      && NEED_GOT_RELOC

> >>>>> +      && flag_pic

> >>>>> +      && !making_const_table

> >>>>> +      && SYMBOL_REF_P (x))

> >>>>> +    {

> >>>>> +      if (SYMBOL_REF_FUNCTION_P (x))

> >>>>> +        fputs ("(FUNCDESC)", asm_out_file);

> >>>>>       }

> >>>>>         fputc ('\n', asm_out_file);

> >>>>>         return true;

> >>>>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

> >>>>> index 34894c0..4671d64 100644

> >>>>> --- a/gcc/config/arm/arm.h

> >>>>> +++ b/gcc/config/arm/arm.h

> >>>>> @@ -871,6 +871,9 @@ extern int arm_arch_cmse;

> >>>>>      Pascal), so the following is not true.  */

> >>>>>   #define STATIC_CHAIN_REGNUM    12

> >>>>>   +/* r9 is the FDPIC register (base register for GOT and FUNCDESC

> >>>>> accesses).  */

> >>>>> +#define FDPIC_REGNUM        9

> >>>>> +

> >>>>>   /* Define this to be where the real frame pointer is if it is not

> >>>>> possible to

> >>>>>      work out the offset between the frame pointer and the automatic

> >>>>> variables

> >>>>>      until after register allocation has taken place.

> >>>>> FRAME_POINTER_REGNUM

> >>>>> @@ -1927,6 +1930,10 @@ extern unsigned arm_pic_register;

> >>>>>      data addresses in memory.  */

> >>>>>   #define PIC_OFFSET_TABLE_REGNUM arm_pic_register

> >>>>>   +/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT

> >>>>> +   entries would need to handle saving and restoring it).  */

> >>>>> +#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC

> >>>>> +

> >>>>>   /* We can't directly access anything that contains a symbol,

> >>>>>      nor can we indirect via the constant pool.  One exception is

> >>>>>      UNSPEC_TLS, which is always PIC.  */

> >>>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

> >>>>> index 270b8e4..09a0701 100644

> >>>>> --- a/gcc/config/arm/arm.md

> >>>>> +++ b/gcc/config/arm/arm.md

> >>>>> @@ -8031,6 +8031,23 @@

> >>>>>       rtx callee, pat;

> >>>>>       tree addr = MEM_EXPR (operands[0]);

> >>>>>       +    /* Force FDPIC register (r9) before call.  */

> >>>>> +    if (TARGET_FDPIC)

> >>>>> +      {

> >>>>> +    /* No need to update r9 if calling a static function.

> >>>>> +       In other words: set r9 for indirect or non-local calls.  */

> >>>>> +    callee = XEXP (operands[0], 0);

> >>>>> +    if (!SYMBOL_REF_P (callee)

> >>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>>>> +      {

> >>>>> +        emit_insn (gen_blockage ());

> >>>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

> >>>>> FDPIC_REGNUM));

> >>>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> >>>>> +     }

> >>>>> +      }

> >>>>> +

> >>>>>       /* In an untyped call, we can get NULL for operand 2.  */

> >>>>>       if (operands[2] == NULL_RTX)

> >>>>>         operands[2] = const0_rtx;

> >>>>> @@ -8044,6 +8061,13 @@

> >>>>>       : !REG_P (callee))

> >>>>>         XEXP (operands[0], 0) = force_reg (Pmode, callee);

> >>>>>   +    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))

> >>>>> +      {

> >>>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

> >>>>> +    XEXP (operands[0], 0)

> >>>>> +      = arm_load_function_descriptor (XEXP (operands[0], 0));

> >>>>> +      }

> >>>>> +

> >>>>>       if (detect_cmse_nonsecure_call (addr))

> >>>>>         {

> >>>>>       pat = gen_nonsecure_call_internal (operands[0], operands[1],

> >>>>> @@ -8055,10 +8079,38 @@

> >>>>>       pat = gen_call_internal (operands[0], operands[1], operands[2]);

> >>>>>       arm_emit_call_insn (pat, XEXP (operands[0], 0), false);

> >>>>>         }

> >>>>> +

> >>>>> +    /* Restore FDPIC register (r9) after call.  */

> >>>>> +    if (TARGET_FDPIC)

> >>>>> +      {

> >>>>> +    /* No need to update r9 if calling a static function.  */

> >>>>> +    if (!SYMBOL_REF_P (callee)

> >>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>>>> +      {

> >>>>> +        rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

> >>>>> +        emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode,

> >>>>> FDPIC_REGNUM));

> >>>>> +        emit_insn (gen_rtx_USE (VOIDmode, pic_reg));

> >>>>> +        emit_insn (gen_blockage ());

> >>>>> +      }

> >>>>> +      }

> >>>>>       DONE;

> >>>>>     }"

> >>>>>   )

> >>>>>   +(define_insn "*restore_pic_register_after_call"

> >>>>> +  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")

> >>>>> +               (match_operand:SI 1 "nonimmediate_operand" "r,m")]

> >>>>> +           UNSPEC_PIC_RESTORE)

> >>>>> +          (use (match_dup 0))

> >>>>> +          (clobber (match_dup 0))])

> >>>>> +  ]

> >>>>> +  ""

> >>>>> +  "@

> >>>>> +  mov\t%0, %1

> >>>>> +  ldr\t%0, %1"

> >>>>> +)

> >>>>> +

> >>>>>   (define_expand "call_internal"

> >>>>>     [(parallel [(call (match_operand 0 "memory_operand" "")

> >>>>>                   (match_operand 1 "general_operand" ""))

> >>>>> @@ -8119,6 +8171,30 @@

> >>>>>       rtx pat, callee;

> >>>>>       tree addr = MEM_EXPR (operands[1]);

> >>>>>       +    /* Force FDPIC register (r9) before call.  */

> >>>>> +    if (TARGET_FDPIC)

> >>>>> +      {

> >>>>> +    /* No need to update the FDPIC register (r9) if calling a static

> >>>>> function.

> >>>>> +       In other words: set r9 for indirect or non-local calls.  */

> >>>>> +    callee = XEXP (operands[1], 0);

> >>>>> +    if (!SYMBOL_REF_P (callee)

> >>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>>>> +      {

> >>>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>>>> +

> >>>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> >>>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> >>>>> +        UNSPEC_PIC_RESTORE);

> >>>>> +        XVECEXP (par, 0, 1)

> >>>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> >>>>> +        XVECEXP (par, 0, 2)

> >>>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

> >>>>> FDPIC_REGNUM));

> >>>>

> >>>> Again, this looks suspicious.

> >>>>

> >>> Yes, fixed for follow-up patch, with

> >>> USE for get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)

> >>> CLOBBER for gen_rtx_REG (Pmode, FDPIC_REGNUM)

> >>>

> >>>>> +        emit_insn (par);

> >>>>> +      }

> >>>>> +      }

> >>>>> +

> >>>>>       /* In an untyped call, we can get NULL for operand 2.  */

> >>>>>       if (operands[3] == 0)

> >>>>>         operands[3] = const0_rtx;

> >>>>> @@ -8132,6 +8208,14 @@

> >>>>>       : !REG_P (callee))

> >>>>>         XEXP (operands[1], 0) = force_reg (Pmode, callee);

> >>>>>   +    if (TARGET_FDPIC

> >>>>> +    && !SYMBOL_REF_P (XEXP (operands[1], 0)))

> >>>>> +      {

> >>>>> +    /* Indirect call: set r9 with FDPIC value of callee.  */

> >>>>> +    XEXP (operands[1], 0)

> >>>>> +      = arm_load_function_descriptor (XEXP (operands[1], 0));

> >>>>> +      }

> >>>>> +

> >>>>>       if (detect_cmse_nonsecure_call (addr))

> >>>>>         {

> >>>>>       pat = gen_nonsecure_call_value_internal (operands[0], operands[1],

> >>>>> @@ -8144,6 +8228,28 @@

> >>>>>                          operands[2], operands[3]);

> >>>>>       arm_emit_call_insn (pat, XEXP (operands[1], 0), false);

> >>>>>         }

> >>>>> +    /* Restore FDPIC register (r9) after call.  */

> >>>>> +    if (TARGET_FDPIC)

> >>>>> +      {

> >>>>> +    /* No need to update r9 if calling a static function.  */

> >>>>> +    if (!SYMBOL_REF_P (callee)

> >>>>> +        || !SYMBOL_REF_LOCAL_P (callee)

> >>>>> +        || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))

> >>>>> +      {

> >>>>> +        rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));

> >>>>> +

> >>>>> +        XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,

> >>>>> +        gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),

> >>>>> +               get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),

> >>>>> +        UNSPEC_PIC_RESTORE);

> >>>>> +        XVECEXP (par, 0, 1)

> >>>>> +          = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));

> >>>>> +        XVECEXP (par, 0, 2)

> >>>>> +          = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode,

> >>>>> FDPIC_REGNUM));

> >>>>

> >>>> And again.

> >>> Yes

> >>>

> >>>>

> >>>>> +        emit_insn (par);

> >>>>> +      }

> >>>>> +      }

> >>>>> +

> >>>>>       DONE;

> >>>>>     }"

> >>>>>   )

> >>>>> @@ -8486,7 +8592,7 @@

> >>>>>               (const_int 0))

> >>>>>             (match_operand 1 "" "")

> >>>>>             (match_operand 2 "" "")])]

> >>>>> -  "TARGET_EITHER"

> >>>>> +  "TARGET_EITHER && !TARGET_FDPIC"

> >>>>>     "

> >>>>>     {

> >>>>>       int i;

> >>>>> @@ -8553,7 +8659,7 @@

> >>>>>   (define_expand "untyped_return"

> >>>>>     [(match_operand:BLK 0 "memory_operand" "")

> >>>>>      (match_operand 1 "" "")]

> >>>>> -  "TARGET_EITHER"

> >>>>> +  "TARGET_EITHER && !TARGET_FDPIC"

> >>>>>     "

> >>>>>     {

> >>>>>       int i;

> >>>>> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

> >>>>> index 1941673..349ae0e 100644

> >>>>> --- a/gcc/config/arm/unspecs.md

> >>>>> +++ b/gcc/config/arm/unspecs.md

> >>>>> @@ -86,6 +86,7 @@

> >>>>>     UNSPEC_PROBE_STACK    ; Probe stack memory reference

> >>>>>     UNSPEC_NONSECURE_MEM    ; Represent non-secure memory in ARMv8-M

> >>>>> with

> >>>>>               ; security extension

> >>>>> +  UNSPEC_PIC_RESTORE    ; Use to restore fdpic register

> >>>>>   ])

> >>>>>     (define_c_enum "unspec" [

> >>>>>

> >>>>

> >>>> .

> >>>>

> >>>

> >>

>
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 4471f79..90733cc 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -202,6 +202,8 @@  arm_cpu_builtins (struct cpp_reader* pfile)
       builtin_define ("__ARM_EABI__");
     }
 
+  def_or_undef_macro (pfile, "__FDPIC__", TARGET_FDPIC);
+
   def_or_undef_macro (pfile, "__ARM_ARCH_EXT_IDIV__", TARGET_IDIV);
   def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 0dfb3ac..28cafa8 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -136,6 +136,7 @@  extern int arm_max_const_double_inline_cost (void);
 extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);
+extern rtx arm_load_function_descriptor (rtx funcdesc);
 extern void arm_emit_call_insn (rtx, rtx, bool);
 bool detect_cmse_nonsecure_call (tree);
 extern const char *output_call (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8810df5..92ae24b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3470,6 +3470,14 @@  arm_option_override (void)
   if (flag_pic && TARGET_VXWORKS_RTP)
     arm_pic_register = 9;
 
+  /* If in FDPIC mode then force arm_pic_register to be r9.  */
+  if (TARGET_FDPIC)
+    {
+      arm_pic_register = FDPIC_REGNUM;
+      if (TARGET_ARM_ARCH < 7)
+	error ("FDPIC mode is not supported on architectures older than Armv7");
+    }
+
   if (arm_pic_register_string != NULL)
     {
       int pic_register = decode_reg_name (arm_pic_register_string);
@@ -7251,6 +7259,21 @@  arm_function_ok_for_sibcall (tree decl, tree exp)
   if (cfun->machine->sibcall_blocked)
     return false;
 
+  if (TARGET_FDPIC)
+    {
+      /* In FDPIC, never tailcall something for which we have no decl:
+	 the target function could be in a different module, requiring
+	 a different FDPIC register value.  */
+      if (decl == NULL)
+	return false;
+
+      /* Don't tailcall if we go through the PLT since the FDPIC
+	 register is then corrupted and we don't restore it after
+	 static function calls.  */
+      if (!targetm.binds_local_p (decl))
+	return false;
+    }
+
   /* Never tailcall something if we are generating code for Thumb-1.  */
   if (TARGET_THUMB1)
     return false;
@@ -7629,7 +7652,9 @@  arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
 {
   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
 
-  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
+  if (crtl->uses_pic_offset_table == 0
+      || TARGET_SINGLE_PIC_BASE
+      || TARGET_FDPIC)
     return;
 
   gcc_assert (flag_pic);
@@ -7697,28 +7722,140 @@  arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
   emit_use (pic_reg);
 }
 
+/* Try to know if the object will go in text or data segment. This is
+   used in FDPIC mode, to decide which relocations to use when
+   accessing ORIG. IS_READONLY is set to true if ORIG is a read-only
+   location, false otherwise.  */
+static bool
+arm_is_segment_info_known (rtx orig, bool *is_readonly)
+{
+  bool res = false;
+
+  *is_readonly = false;
+
+  if (GET_CODE (orig) == LABEL_REF)
+    {
+      res = true;
+      *is_readonly = true;
+    }
+  else if (SYMBOL_REF_P (orig))
+    {
+      if (CONSTANT_POOL_ADDRESS_P (orig))
+	{
+	  res = true;
+	  *is_readonly = true;
+	}
+      else if (SYMBOL_REF_LOCAL_P (orig)
+	       && !SYMBOL_REF_EXTERNAL_P (orig)
+	       && SYMBOL_REF_DECL (orig)
+	       && (!DECL_P (SYMBOL_REF_DECL (orig))
+		   || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
+	{
+	  tree decl = SYMBOL_REF_DECL (orig);
+	  tree init = (TREE_CODE (decl) == VAR_DECL)
+	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
+	    ? decl : 0;
+	  int reloc = 0;
+	  bool named_section, readonly;
+
+	  if (init && init != error_mark_node)
+	    reloc = compute_reloc_for_constant (init);
+
+	  named_section = TREE_CODE (decl) == VAR_DECL
+	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
+	  readonly = decl_readonly_section (decl, reloc);
+
+	  /* We don't know where the link script will put a named
+	     section, so return false in such a case.  */
+	  res = !named_section;
+
+	  if (!named_section)
+	    *is_readonly = readonly;
+	}
+      else
+	{
+	  /* We don't know.  */
+	  res = false;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  return res;
+}
+
 /* Generate code to load the address of a static var when flag_pic is set.  */
 static rtx_insn *
 arm_pic_static_addr (rtx orig, rtx reg)
 {
   rtx l1, labelno, offset_rtx;
+  rtx_insn *insn;
 
   gcc_assert (flag_pic);
 
-  /* We use an UNSPEC rather than a LABEL_REF because this label
-     never appears in the code stream.  */
-  labelno = GEN_INT (pic_labelno++);
-  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
-  l1 = gen_rtx_CONST (VOIDmode, l1);
+  bool is_readonly = false;
+  bool info_known = false;
 
-  /* On the ARM the PC register contains 'dot + 8' at the time of the
-     addition, on the Thumb it is 'dot + 4'.  */
-  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
-  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
-                               UNSPEC_SYMBOL_OFFSET);
-  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+  if (TARGET_FDPIC
+      && SYMBOL_REF_P (orig)
+      && !SYMBOL_REF_FUNCTION_P (orig))
+      info_known = arm_is_segment_info_known (orig, &is_readonly);
 
-  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
+  if (TARGET_FDPIC
+      && SYMBOL_REF_P (orig)
+      && !SYMBOL_REF_FUNCTION_P (orig)
+      && !info_known)
+    {
+      /* We don't know where orig is stored, so we have be
+	 pessimistic and use a GOT relocation.  */
+      rtx pat;
+      rtx mem;
+      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
+
+      pat = gen_calculate_pic_address (reg, pic_reg, orig);
+
+      /* Make the MEM as close to a constant as possible.  */
+      mem = SET_SRC (pat);
+      gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
+      MEM_READONLY_P (mem) = 1;
+      MEM_NOTRAP_P (mem) = 1;
+
+      insn = emit_insn (pat);
+    }
+  else if (TARGET_FDPIC
+	   && SYMBOL_REF_P (orig)
+	   && (SYMBOL_REF_FUNCTION_P (orig)
+	       || (info_known && !is_readonly)))
+    {
+      /* We use the GOTOFF relocation.  */
+      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
+
+      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
+      emit_insn (gen_movsi (reg, l1));
+      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
+    }
+  else
+    {
+      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
+	 PC-relative access.  */
+      /* We use an UNSPEC rather than a LABEL_REF because this label
+	 never appears in the code stream.  */
+      labelno = GEN_INT (pic_labelno++);
+      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+      l1 = gen_rtx_CONST (VOIDmode, l1);
+
+      /* On the ARM the PC register contains 'dot + 8' at the time of the
+	 addition, on the Thumb it is 'dot + 4'.  */
+      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
+      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
+				   UNSPEC_SYMBOL_OFFSET);
+      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+
+      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
+						   labelno));
+    }
+
+  return insn;
 }
 
 /* Return nonzero if X is valid as an ARM state addressing register.  */
@@ -15933,9 +16070,36 @@  get_jump_table_size (rtx_jump_table_data *insn)
   return 0;
 }
 
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+   function descriptor) into a register and the GOT address into the
+   FDPIC register, returning an rtx for the register holding the
+   function address.  */
+
+rtx
+arm_load_function_descriptor (rtx funcdesc)
+{
+  rtx fnaddr_reg = gen_reg_rtx (Pmode);
+  rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
+  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+
+  emit_move_insn (fnaddr_reg, fnaddr);
+  /* The ABI requires the entry point address to be loaded first, so
+     prevent the load from being moved after that of the GOT
+     address.  */
+  XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,
+					gen_rtvec (2, pic_reg, gotaddr),
+					UNSPEC_PIC_RESTORE);
+  XVECEXP (par, 0, 1) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+  XVECEXP (par, 0, 2) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+  emit_insn (par);
+
+  return fnaddr_reg;
+}
+
 /* Return the maximum amount of padding that will be inserted before
    label LABEL.  */
-
 static HOST_WIDE_INT
 get_label_padding (rtx label)
 {
@@ -22890,9 +23054,37 @@  arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
 		  && (!SYMBOL_REF_LOCAL_P (x)
 		      || (SYMBOL_REF_DECL (x)
 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
-	    fputs ("(GOT)", asm_out_file);
+	    {
+	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
+		fputs ("(GOTFUNCDESC)", asm_out_file);
+	      else
+		fputs ("(GOT)", asm_out_file);
+	    }
 	  else
-	    fputs ("(GOTOFF)", asm_out_file);
+	    {
+	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
+		fputs ("(GOTOFFFUNCDESC)", asm_out_file);
+	      else
+		{
+		  bool is_readonly;
+
+		  if (arm_is_segment_info_known (x, &is_readonly))
+		    fputs ("(GOTOFF)", asm_out_file);
+		  else
+		    fputs ("(GOT)", asm_out_file);
+		}
+	    }
+	}
+
+      /* For FDPIC we also have to mark symbol for .data section.  */
+      if (TARGET_FDPIC
+	  && NEED_GOT_RELOC
+	  && flag_pic
+	  && !making_const_table
+	  && SYMBOL_REF_P (x))
+	{
+	  if (SYMBOL_REF_FUNCTION_P (x))
+	    fputs ("(FUNCDESC)", asm_out_file);
 	}
       fputc ('\n', asm_out_file);
       return true;
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 34894c0..4671d64 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -871,6 +871,9 @@  extern int arm_arch_cmse;
    Pascal), so the following is not true.  */
 #define STATIC_CHAIN_REGNUM	12
 
+/* r9 is the FDPIC register (base register for GOT and FUNCDESC accesses).  */
+#define FDPIC_REGNUM		9
+
 /* Define this to be where the real frame pointer is if it is not possible to
    work out the offset between the frame pointer and the automatic variables
    until after register allocation has taken place.  FRAME_POINTER_REGNUM
@@ -1927,6 +1930,10 @@  extern unsigned arm_pic_register;
    data addresses in memory.  */
 #define PIC_OFFSET_TABLE_REGNUM arm_pic_register
 
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+   entries would need to handle saving and restoring it).  */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
 /* We can't directly access anything that contains a symbol,
    nor can we indirect via the constant pool.  One exception is
    UNSPEC_TLS, which is always PIC.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 270b8e4..09a0701 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -8031,6 +8031,23 @@ 
     rtx callee, pat;
     tree addr = MEM_EXPR (operands[0]);
     
+    /* Force FDPIC register (r9) before call.  */
+    if (TARGET_FDPIC)
+      {
+	/* No need to update r9 if calling a static function.
+	   In other words: set r9 for indirect or non-local calls.  */
+	callee = XEXP (operands[0], 0);
+	if (!SYMBOL_REF_P (callee)
+	    || !SYMBOL_REF_LOCAL_P (callee)
+	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))
+	  {
+	    emit_insn (gen_blockage ());
+	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
+	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));
+	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
+	 }
+      }
+
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[2] == NULL_RTX)
       operands[2] = const0_rtx;
@@ -8044,6 +8061,13 @@ 
 	: !REG_P (callee))
       XEXP (operands[0], 0) = force_reg (Pmode, callee);
 
+    if (TARGET_FDPIC && !SYMBOL_REF_P (XEXP (operands[0], 0)))
+      {
+	/* Indirect call: set r9 with FDPIC value of callee.  */
+	XEXP (operands[0], 0)
+	  = arm_load_function_descriptor (XEXP (operands[0], 0));
+      }
+
     if (detect_cmse_nonsecure_call (addr))
       {
 	pat = gen_nonsecure_call_internal (operands[0], operands[1],
@@ -8055,10 +8079,38 @@ 
 	pat = gen_call_internal (operands[0], operands[1], operands[2]);
 	arm_emit_call_insn (pat, XEXP (operands[0], 0), false);
       }
+
+    /* Restore FDPIC register (r9) after call.  */
+    if (TARGET_FDPIC)
+      {
+	/* No need to update r9 if calling a static function.  */
+	if (!SYMBOL_REF_P (callee)
+	    || !SYMBOL_REF_LOCAL_P (callee)
+	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))
+	  {
+	    rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
+	    emit_move_insn (pic_reg, get_hard_reg_initial_val (Pmode, FDPIC_REGNUM));
+	    emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
+	    emit_insn (gen_blockage ());
+	  }
+      }
     DONE;
   }"
 )
 
+(define_insn "*restore_pic_register_after_call"
+  [(parallel [(unspec [(match_operand:SI 0 "s_register_operand" "=r,r")
+		       (match_operand:SI 1 "nonimmediate_operand" "r,m")]
+	       UNSPEC_PIC_RESTORE)
+	      (use (match_dup 0))
+	      (clobber (match_dup 0))])
+  ]
+  ""
+  "@
+  mov\t%0, %1
+  ldr\t%0, %1"
+)
+
 (define_expand "call_internal"
   [(parallel [(call (match_operand 0 "memory_operand" "")
 	            (match_operand 1 "general_operand" ""))
@@ -8119,6 +8171,30 @@ 
     rtx pat, callee;
     tree addr = MEM_EXPR (operands[1]);
     
+    /* Force FDPIC register (r9) before call.  */
+    if (TARGET_FDPIC)
+      {
+	/* No need to update the FDPIC register (r9) if calling a static function.
+	   In other words: set r9 for indirect or non-local calls.  */
+	callee = XEXP (operands[1], 0);
+	if (!SYMBOL_REF_P (callee)
+	    || !SYMBOL_REF_LOCAL_P (callee)
+	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))
+	  {
+	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+
+	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,
+		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),
+			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),
+		UNSPEC_PIC_RESTORE);
+	    XVECEXP (par, 0, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+	    XVECEXP (par, 0, 2)
+	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+	    emit_insn (par);
+	  }
+      }
+
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[3] == 0)
       operands[3] = const0_rtx;
@@ -8132,6 +8208,14 @@ 
 	: !REG_P (callee))
       XEXP (operands[1], 0) = force_reg (Pmode, callee);
 
+    if (TARGET_FDPIC
+	&& !SYMBOL_REF_P (XEXP (operands[1], 0)))
+      {
+	/* Indirect call: set r9 with FDPIC value of callee.  */
+	XEXP (operands[1], 0)
+	  = arm_load_function_descriptor (XEXP (operands[1], 0));
+      }
+
     if (detect_cmse_nonsecure_call (addr))
       {
 	pat = gen_nonsecure_call_value_internal (operands[0], operands[1],
@@ -8144,6 +8228,28 @@ 
 				       operands[2], operands[3]);
 	arm_emit_call_insn (pat, XEXP (operands[1], 0), false);
       }
+    /* Restore FDPIC register (r9) after call.  */
+    if (TARGET_FDPIC)
+      {
+	/* No need to update r9 if calling a static function.  */
+	if (!SYMBOL_REF_P (callee)
+	    || !SYMBOL_REF_LOCAL_P (callee)
+	    || arm_is_long_call_p (SYMBOL_REF_DECL (callee)))
+	  {
+	    rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+
+	    XVECEXP (par, 0, 0) = gen_rtx_UNSPEC (VOIDmode,
+		gen_rtvec (2, gen_rtx_REG (Pmode, FDPIC_REGNUM),
+			   get_hard_reg_initial_val (Pmode, FDPIC_REGNUM)),
+		UNSPEC_PIC_RESTORE);
+	    XVECEXP (par, 0, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+	    XVECEXP (par, 0, 2)
+	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, FDPIC_REGNUM));
+	    emit_insn (par);
+	  }
+      }
+
     DONE;
   }"
 )
@@ -8486,7 +8592,7 @@ 
 		    (const_int 0))
 	      (match_operand 1 "" "")
 	      (match_operand 2 "" "")])]
-  "TARGET_EITHER"
+  "TARGET_EITHER && !TARGET_FDPIC"
   "
   {
     int i;
@@ -8553,7 +8659,7 @@ 
 (define_expand "untyped_return"
   [(match_operand:BLK 0 "memory_operand" "")
    (match_operand 1 "" "")]
-  "TARGET_EITHER"
+  "TARGET_EITHER && !TARGET_FDPIC"
   "
   {
     int i;
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 1941673..349ae0e 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -86,6 +86,7 @@ 
   UNSPEC_PROBE_STACK    ; Probe stack memory reference
   UNSPEC_NONSECURE_MEM	; Represent non-secure memory in ARMv8-M with
 			; security extension
+  UNSPEC_PIC_RESTORE	; Use to restore fdpic register
 ])
 
 (define_c_enum "unspec" [