diff mbox series

[v2,2/2,Arm] Stack Pointer and Stack Limit initialization refactored.

Message ID 20190719132218.3108-2-alfedotov@gmail.com
State Superseded
Headers show
Series [v2,1/2] Align comments and spaces in libgloss/arm/crt0.S and newlib/libc/sys/arm/crt0.S to ease further code alignment. | expand

Commit Message

Alexander Fedotov July 19, 2019, 1:22 p.m. UTC
From: Alexander Fedotov <alfedotov@gmail.com>


SP initialization changes:
  1. set default value in semihosting case as well
  2. moved existing SP & SL init code for processor modes in separate routine and made it as "hook"
  3. init SP for processor modes in Thumb mode as well

Add new macro FN_RETURN, FN_EH_START and FN_EH_END.
---
 libgloss/arm/arm.h         |  26 ++++
 libgloss/arm/crt0.S        | 281 ++++++++++++++++++++++++++-----------
 newlib/libc/sys/arm/arm.h  |  26 ++++
 newlib/libc/sys/arm/crt0.S | 281 ++++++++++++++++++++++++++-----------
 4 files changed, 444 insertions(+), 170 deletions(-)

-- 
2.17.1
diff mbox series

Patch

diff --git a/libgloss/arm/arm.h b/libgloss/arm/arm.h
index 0489f2d92..10e5b0509 100644
--- a/libgloss/arm/arm.h
+++ b/libgloss/arm/arm.h
@@ -61,4 +61,30 @@ 
 # define HAVE_CALL_INDIRECT
 #endif
 
+/* A and R profiles (and legacy Arm).
+	Current Program Status Register (CPSR)
+	M[4:0]		Mode bits. M[4] is always 1 for 32-bit modes.
+	T[5]			1: Thumb, 0: ARM instruction set
+	F[6]			1: disables FIQ
+	I[7]			1: disables IRQ
+	A[8]			1: disables imprecise aborts
+	E[9]			0: Little-endian, 1: Big-endian
+	J[24]			1: Jazelle instruction set
+ */
+#define CPSR_M_USR			0x00	/* User mode.  */
+#define CPSR_M_FIQ			0x01	/* Fast Interrupt mode.  */
+#define CPSR_M_IRQ			0x02	/* Interrupt mode.  */
+#define CPSR_M_SVR			0x03	/* Supervisor mode.  */
+#define CPSR_M_MON			0x06	/* Monitor mode.  */
+#define CPSR_M_ABT			0x07	/* Abort mode.  */
+#define CPSR_M_HYP			0x0A	/* Hypervisor mode.  */
+#define CPSR_M_UND			0x0B	/* Undefined mode.  */
+#define CPSR_M_SYS			0x0F	/* System mode.  */
+#define CPSR_M_32BIT		0x10	/* 32-bit mode.  */
+#define CPSR_T_BIT			0x20	/* Thumb bit.  */
+#define CPSR_F_MASK			0x40	/* FIQ bit.  */
+#define CPSR_I_MASK			0x80	/* IRQ bit.  */
+
+#define CPSR_M_MASK			0x0F	/* Mode mask except M[4].  */
+
 #endif /* _LIBGLOSS_ARM_H */
diff --git a/libgloss/arm/crt0.S b/libgloss/arm/crt0.S
index f21add8bc..7b3270c33 100644
--- a/libgloss/arm/crt0.S
+++ b/libgloss/arm/crt0.S
@@ -59,6 +59,21 @@ 
 .endm
 #endif
 
+/* Annotation for EABI unwinding tables.  */
+.macro FN_EH_START
+#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+	.fnstart
+#endif
+.endm
+
+.macro FN_EH_END
+#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+	/* Protect against unhandled exceptions.  */
+	.cantunwind
+	.fnend
+#endif
+.endm
+
 .macro indirect_call reg
 #ifdef HAVE_CALL_INDIRECT
 	blx \reg
@@ -68,16 +83,170 @@ 
 #endif
 .endm
 
+/* For armv4t and newer, toolchains will transparently convert
+   'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support
+   for anything older than armv4t, but this should handle that
+   corner case in case anyone needs it anyway.  */
+.macro  FN_RETURN
+#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0
+	mov	pc, lr
+#else
+	bx	lr
+#endif
+.endm
+
+
+
+/******************************************************************************
+* User mode only:           This routine makes default target specific Stack
+*   +-----+ <- SL_sys,    Pointer initialization for different processor modes:
+*   |     |    SL_usr     FIQ, Abort, IRQ, Undefined, Supervisor, System (User)
+*   | SYS |               and setups a default Stack Limit in-case the code has
+*   | USR | -=0x10000     been compiled with "-mapcs-stack-check" for FIQ and
+*   |     |               System (User) modes.
+*   |     |
+*   +-----+ <- initial SP,
+*           becomes SP_sys   Hard-wiring SL value is not ideal, since there is
+*           and SL_usr     currently no support for checking that the heap and
+*                          stack have not collided, or that this default 64k is
+* All modes:               is enough for the program being executed. However,
+*   +-----+ <- SL_sys,     it ensures that this simple crt0 world will not
+*   |     |    SL_usr      immediately cause an overflow event.
+*   | SYS |
+*   | USR | -=0x10000        We go through all execution modes and set up SP
+*   |     |                for each of them.
+*   +-----+ <- SP_sys,
+*   |     |    SP_usr      Note:
+*   | SVC | -= 0x8000        Mode switch via CPSR is not allowed once in
+*   |     |                  non-privileged mode, so we take care not to enter
+*   +-----+ <- SP_svc        "User" to set up its sp, and also skip most
+*   |     |                  operations if already in that mode.
+*   | IRQ | -= 0x2000
+*   |     |                Input parameters:
+* ^ +-----+ <- SP_und       - sp - Initialized SP
+* s |     |                 - r2 - May contain SL value from semihosting
+* t | UND | -= 0x1000              SYS_HEAPINFO call
+* a |     |                Scratch registers:
+* c +-----+ <- SP_und       - r1 - new value of CPSR
+* k |     |                 - r2 - intermediate value (in standalone mode)
+*   | ABT | -= 0x1000       - r3 - new SP value
+* g |     |                 - r4 - save/restore CPSR on entry/exit
+* r +-----+ <- SP_abt,
+* o |     |    SL_fiq        Declared as "weak" so that user can write and use
+* w | FIQ | -= 0x1000      his own implementation if current doesn't fit.
+* t |     |
+* h +-----+ <- initial SP,
+*           becomes SP_fiq
+*
+******************************************************************************/
+	.align	0
+	FUNC_START	_stack_init
+	.weak FUNCTION (_stack_init)
+	FN_EH_START
+
+	/* M profile doesn't have CPSR register.  */
+#if (__ARM_ARCH_PROFILE != 'M')
+	/* Following code is compatible for both ARM and Thumb ISA.  */
+	mrs	r4, CPSR
+	/* Test mode bits - in User of all are 0.  */
+	tst	r4, #(CPSR_M_MASK)
+	/* "eq" means r4 AND #0x0F is 0.  */
+	beq	.Lskip_cpu_modes
+
+	mov	r3, sp /* Save input SP value.  */
+
+	/* FIQ mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_FIQ|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	sl, sp, #0x1000	/* FIQ mode has its own SL.  */
+
+	/* Abort mode, interrupts disabled.  */
+	mov	r3, sl
+	mov	r1, #(CPSR_M_ABT|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x1000
+
+	/* Undefined mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_UND|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x1000
+
+	/* IRQ mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_IRQ|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x2000
+
+	/* Supervisory mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_SVR|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+
+	sub	r3, r3, #0x8000	/* Min size 32k.  */
+	bic	r3, r3, #0x00FF	/* Align with current 64k block.  */
+	bic	r3, r3, #0xFF00
+
+# if __ARM_ARCH >= 4
+	/* System (shares regs with User) mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_SYS|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+# else
+	/* Keep this for ARMv3, but GCC actually dropped it.  */
+	/* Move value into user mode sp without changing modes,  */
+	/* via '^' form of ldm.  */
+	str	r3, [r3, #-4]
+	ldmdb	r3, {sp}^
+# endif
+
+	/* Back to original mode, presumably SVC, with diabled FIQ/IRQ.  */
+	orr	r4, r4, #(CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r4
+
+.Lskip_cpu_modes:
+#endif
+
+	/* Set SL register.  */
+#if defined (ARM_RDI_MONITOR) /* semihosting */
+	cmp	r2, #0
+	beq	.Lsl_forced_zero
+	/* Allow slop for stack overflow handling and small frames.  */
+# ifdef THUMB1_ONLY
+	adds	r2, #128
+	adds	r2, #128
+	mov	sl, r2
+# else
+	add	sl, r2, #256
+# endif
+.Lsl_forced_zero:
+
+#else /* standalone */
+	/* r3 contains SP for System/User mode. Set SL = SP - 0x10000.  */
+	#ifdef THUMB1_ONLY
+	movs	r2, #64
+	lsls	r2, r2, #10
+	subs	r2, r3, r2
+	mov	sl, r2
+	#else
+	/* Still assumes 256bytes below SL.  */
+	sub	sl, r3, #64 << 10
+	#endif
+#endif
+
+	FN_RETURN
+	FN_EH_END
+
+
 /*******************************************************************************
 * Main library startup code.
 *******************************************************************************/
 	.align 	0
 	FUNC_START	_mainCRTStartup
 	FUNC_START	_start
-#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
-	/* Annotation for EABI unwinding tables.  */
-	.fnstart
-#endif
+	FN_EH_START
 
 	/* __ARM_ARCH_PROFILE is defined from GCC 4.8 onwards, however __ARM_ARCH_7A
 	has been defined since 4.2 onwards, which is when v7-a support was added
@@ -144,42 +313,33 @@ 
 .LC32:	
 	ldr	r1, [r0, #8]
 	ldr	r2, [r0, #12]
-	/*  We skip setting sp/sl if 0 returned from semihosting. 
+	/*  We skip setting SP/SL if 0 returned from semihosting.
 	    - According to semihosting docs, if 0 returned from semihosting,
 	      the system was unable to calculate the real value, so it's ok
-	      to skip setting sp/sl to 0 here.
+	      to skip setting SP/SL to 0 here.
 	    - Considering M-profile processors, We might want to initialize
 	      sp by the first entry of vector table and return 0 to SYS_HEAPINFO
-	      semihosting call, which will be skipped here.  */
+	      semihosting call, which will be skipped here.
+	    - Considering R-profile processors there is no automatic SP init by hardware
+	      so we need to initialize it by default value.  */
+	ldr	r3, .Lstack
 	cmp	r1, #0
 	beq	.LC26
-	mov	sp, r1
+	mov	r3, r1
 .LC26:
-	cmp	r2, #0
-	beq	.LC27
+	mov	sp, r3
 
-	/*  Allow slop for stack overflow handling and small frames.  */
-#ifdef THUMB1_ONLY
-	adds	r2, #128
-	adds	r2, #128
-	mov	sl, r2
-#else
-	add	sl, r2, #256
-#endif
+	/* r2 (SL value) will be used in _stack_init.  */
+	bl FUNCTION (_stack_init)
 
-.LC27:
-#else
-	/*  Set up the stack pointer to a fixed value.  */
+
+#else /* standalone */
+	/*  Set up the stack pointer to a fixed value. */
 	/*  Changes by toralf:
 	    - Allow linker script to provide stack via __stack symbol - see
 	      defintion of .Lstack
 	    - Provide "hooks" that may be used by the application to add
-	      custom init code - see .Lhwinit and .Lswinit	
-	    - Go through all execution modes and set up stack for each of them.
-	      Loosely based on init.s from ARM/Motorola example code.
-              Note: Mode switch via CPSR is not allowed once in non-privileged
-		    mode, so we take care not to enter "User" to set up its sp,
-		    and also skip most operations if already in that mode.  */
+	      custom init code - see .Lhwinit and .Lswinit.  */
 
 	ldr	r3, .Lstack
 	cmp	r3, #0
@@ -198,57 +358,10 @@ 
 		 have somehow missed it below (in which case it gets the same
 		 value as FIQ - not ideal, but better than nothing).  */
 	mov	sp, r3
-#ifdef PREFER_THUMB
-	/* XXX Fill in stack assignments for interrupt modes.  */
-#else
-	mrs	r2, CPSR
-	tst	r2, #0x0F	/* Test mode bits - in User of all are 0.  */
-	beq	.LC23		/* "eq" means r2 AND #0x0F is 0.  */
-	msr     CPSR_c, #0xD1	/* FIRQ mode, interrupts disabled.  */
-	mov 	sp, r3
-	sub	sl, sp, #0x1000	/* This mode also has its own sl (see below).  */
-	
-	mov	r3, sl	
-	msr     CPSR_c, #0xD7	/* Abort mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x1000
-
-	msr     CPSR_c, #0xDB	/* Undefined mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x1000
-
-	msr     CPSR_c, #0xD2	/* IRQ mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x2000
-		
-	msr     CPSR_c, #0xD3	/* Supervisory mode, interrupts disabled.  */
 
-	mov	sp, r3
-	sub	r3, r3, #0x8000	/* Min size 32k.  */
-	bic	r3, r3, #0x00FF	/* Align with current 64k block.  */
-	bic	r3, r3, #0xFF00
+	/* We don't care of r2 value in standalone.  */
+	bl FUNCTION (_stack_init)
 
-	str	r3, [r3, #-4]	/* Move value into user mode sp without */ 
-	ldmdb	r3, {sp}^       /* changing modes, via '^' form of ldm.  */ 
-	orr	r2, r2, #0xC0	/* Back to original mode, presumably SVC, */
-	msr	CPSR_c, r2	/* with FIQ/IRQ disable bits forced to 1.  */
-#endif	
-.LC23:
-	/* Setup a default stack-limit in-case the code has been
-	   compiled with "-mapcs-stack-check".  Hard-wiring this value
-	   is not ideal, since there is currently no support for
-	   checking that the heap and stack have not collided, or that
-	   this default 64k is enough for the program being executed.
-	   However, it ensures that this simple crt0 world will not
-	   immediately cause an overflow event:  */
-#ifdef THUMB1_ONLY
-	movs	r2, #64
-	lsls	r2, r2, #10
-	subs	r2, r3, r2
-	mov	sl, r2
-#else
-	sub	sl, r3, #64 << 10	/* Still assumes 256bytes below sl.  */
-#endif
 #endif
 #endif
 	/* Zero the memory in the .bss section.  */
@@ -447,6 +560,8 @@  change_back:
 	swi	SWI_Exit
 #endif
 	
+	FN_EH_END
+
 	/* For Thumb, constants must be after the code since only 
 	   positive offsets are supported for PC relative addresses.  */
 	.align 0
@@ -464,9 +579,6 @@  change_back:
 #else
 	.word	0x80000			/* Top of RAM on the PIE board.  */
 #endif
-
-.Lstack:	
-	.word	__stack
 .Lhwinit:	
 	.word	FUNCTION (hardware_init_hook)
 .Lswinit:
@@ -479,17 +591,16 @@  change_back:
 	   and only if, a normal version of the same symbol isn't provided
 	   e.g. by a linker script or another object file.)  */
 
-	.weak __stack
 	.weak FUNCTION (hardware_init_hook) 
 	.weak FUNCTION (software_init_hook)
 #endif
 	
 #endif
-#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
-	/* Protect against unhandled exceptions.  */
-	.cantunwind
-	.fnend
-#endif
+
+.Lstack:
+	.word	__stack
+	.weak	__stack
+
 .LC1:
 	.word	__bss_start__
 .LC2:
diff --git a/newlib/libc/sys/arm/arm.h b/newlib/libc/sys/arm/arm.h
index 0489f2d92..dbed81750 100644
--- a/newlib/libc/sys/arm/arm.h
+++ b/newlib/libc/sys/arm/arm.h
@@ -61,4 +61,30 @@ 
 # define HAVE_CALL_INDIRECT
 #endif
 
+/* A and R profiles (and legacy Arm).
+	Current Program Status Register (CPSR)
+	M[4:0]		Mode bits. M[4] is always 1 for 32-bit modes.
+	T[5]			1: Thumb, 0: ARM instruction set
+	F[6]			1: disables FIQ
+	I[7]			1: disables IRQ
+	A[8]			1: disables imprecise aborts
+	E[9]			0: Little-endian, 1: Big-endian
+	J[24]			1: Jazelle instruction set
+ */
+#define CPSR_M_USR			0x00	/* User mode.  */
+#define CPSR_M_FIQ			0x01	/* Fast Interrupt mode.  */
+#define CPSR_M_IRQ			0x02	/* Interrupt mode.  */
+#define CPSR_M_SVR			0x03	/* Supervisor mode.  */
+#define CPSR_M_MON			0x06	/* Monitor mode.  */
+#define CPSR_M_ABT			0x07	/* Abort mode.  */
+#define CPSR_M_HYP			0x0A	/* Hypervisor mode.  */
+#define CPSR_M_UND			0x0B	/* Undefined mode.  */
+#define CPSR_M_SYS			0x0F	/* System mode.  */
+#define CPSR_M_32BIT		0x10	/* 32-bit mode.  */
+#define CPSR_T_BIT			0x20	/* Thumb bit.  */
+#define CPSR_F_MASK			0x40	/* FIQ bit.  */
+#define CPSR_I_MASK			0x80	/* IRQ bit.  */
+
+#define CPSR_M_MASK			0x0F	/* Mode mask except M[4] */
+
 #endif /* _LIBGLOSS_ARM_H */
diff --git a/newlib/libc/sys/arm/crt0.S b/newlib/libc/sys/arm/crt0.S
index 3358d7014..24ccb0b04 100644
--- a/newlib/libc/sys/arm/crt0.S
+++ b/newlib/libc/sys/arm/crt0.S
@@ -59,6 +59,21 @@ 
 .endm
 #endif
 
+/* Annotation for EABI unwinding tables.  */
+.macro FN_EH_START
+#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+	.fnstart
+#endif
+.endm
+
+.macro FN_EH_END
+#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+	/* Protect against unhandled exceptions.  */
+	.cantunwind
+	.fnend
+#endif
+.endm
+
 .macro indirect_call reg
 #ifdef HAVE_CALL_INDIRECT
 	blx \reg
@@ -68,16 +83,170 @@ 
 #endif
 .endm
 
+/* For armv4t and newer, toolchains will transparently convert
+   'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support
+   for anything older than armv4t, but this should handle that
+   corner case in case anyone needs it anyway.  */
+.macro  FN_RETURN
+#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0
+	mov	pc, lr
+#else
+	bx	lr
+#endif
+.endm
+
+
+
+/******************************************************************************
+* User mode only:           This routine makes default target specific Stack
+*   +-----+ <- SL_sys,    Pointer initialization for different processor modes:
+*   |     |    SL_usr     FIQ, Abort, IRQ, Undefined, Supervisor, System (User)
+*   | SYS |               and setups a default Stack Limit in-case the code has
+*   | USR | -=0x10000     been compiled with "-mapcs-stack-check" for FIQ and
+*   |     |               System (User) modes.
+*   |     |
+*   +-----+ <- initial SP,
+*           becomes SP_sys   Hard-wiring SL value is not ideal, since there is
+*           and SL_usr     currently no support for checking that the heap and
+*                          stack have not collided, or that this default 64k is
+* All modes:               is enough for the program being executed. However,
+*   +-----+ <- SL_sys,     it ensures that this simple crt0 world will not
+*   |     |    SL_usr      immediately cause an overflow event.
+*   | SYS |
+*   | USR | -=0x10000        We go through all execution modes and set up SP
+*   |     |                for each of them.
+*   +-----+ <- SP_sys,
+*   |     |    SP_usr      Note:
+*   | SVC | -= 0x8000        Mode switch via CPSR is not allowed once in
+*   |     |                  non-privileged mode, so we take care not to enter
+*   +-----+ <- SP_svc        "User" to set up its sp, and also skip most
+*   |     |                  operations if already in that mode.
+*   | IRQ | -= 0x2000
+*   |     |                Input parameters:
+* ^ +-----+ <- SP_und       - sp - Initialized SP
+* s |     |                 - r2 - May contain SL value from semihosting
+* t | UND | -= 0x1000              SYS_HEAPINFO call
+* a |     |                Scratch registers:
+* c +-----+ <- SP_und       - r1 - new value of CPSR
+* k |     |                 - r2 - intermediate value (in standalone mode)
+*   | ABT | -= 0x1000       - r3 - new SP value
+* g |     |                 - r4 - save/restore CPSR on entry/exit
+* r +-----+ <- SP_abt,
+* o |     |    SL_fiq        Declared as "weak" so that user can write and use
+* w | FIQ | -= 0x1000      his own implementation if current doesn't fit.
+* t |     |
+* h +-----+ <- initial SP,
+*           becomes SP_fiq
+*
+******************************************************************************/
+	.align	0
+	FUNC_START	_stack_init
+	.weak FUNCTION (_stack_init)
+	FN_EH_START
+
+	/* M profile doesn't have CPSR register.  */
+#if (__ARM_ARCH_PROFILE != 'M')
+	/* Following code is compatible for both ARM and Thumb ISA.  */
+	mrs	r4, CPSR
+	/* Test mode bits - in User of all are 0.  */
+	tst	r4, #(CPSR_M_MASK)
+	/* "eq" means r4 AND #0x0F is 0.  */
+	beq	.Lskip_cpu_modes
+
+	mov	r3, sp /* Save input SP value.  */
+
+	/* FIQ mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_FIQ|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	sl, sp, #0x1000	/* FIQ mode has its own SL.  */
+
+	/* Abort mode, interrupts disabled.  */
+	mov	r3, sl
+	mov	r1, #(CPSR_M_ABT|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x1000
+
+	/* Undefined mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_UND|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x1000
+
+	/* IRQ mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_IRQ|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+	sub	r3, r3, #0x2000
+
+	/* Supervisory mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_SVR|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+
+	sub	r3, r3, #0x8000	/* Min size 32k.  */
+	bic	r3, r3, #0x00FF	/* Align with current 64k block.  */
+	bic	r3, r3, #0xFF00
+
+# if __ARM_ARCH >= 4
+	/* System (shares regs with User) mode, interrupts disabled.  */
+	mov	r1, #(CPSR_M_SYS|CPSR_M_32BIT|CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r1
+	mov	sp, r3
+# else
+	/* Keep this for ARMv3, but GCC actually dropped it.  */
+	/* Move value into user mode sp without changing modes,  */
+	/* via '^' form of ldm.  */
+	str	r3, [r3, #-4]
+	ldmdb	r3, {sp}^
+# endif
+
+	/* Back to original mode, presumably SVC, with diabled FIQ/IRQ.  */
+	orr	r4, r4, #(CPSR_I_MASK|CPSR_F_MASK)
+	msr	CPSR_c, r4
+
+.Lskip_cpu_modes:
+#endif
+
+	/* Set SL register.  */
+#if defined (ARM_RDI_MONITOR) /* semihosting */
+	cmp	r2, #0
+	beq	.Lsl_forced_zero
+	/* Allow slop for stack overflow handling and small frames.  */
+# ifdef THUMB1_ONLY
+	adds	r2, #128
+	adds	r2, #128
+	mov	sl, r2
+# else
+	add	sl, r2, #256
+# endif
+.Lsl_forced_zero:
+
+#else /* standalone */
+	/* r3 contains SP for System/User mode. Set SL = SP - 0x10000.  */
+	#ifdef THUMB1_ONLY
+	movs	r2, #64
+	lsls	r2, r2, #10
+	subs	r2, r3, r2
+	mov	sl, r2
+	#else
+	/* Still assumes 256bytes below SL.  */
+	sub	sl, r3, #64 << 10
+	#endif
+#endif
+
+	FN_RETURN
+	FN_EH_END
+
+
 /*******************************************************************************
 * Main library startup code.
 *******************************************************************************/
 	.align 	0
 	FUNC_START	_mainCRTStartup
 	FUNC_START	_start
-#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
-	/* Annotation for EABI unwinding tables.  */
-	.fnstart
-#endif
+	FN_EH_START
 
 /* Start by setting up a stack.  */
 #ifdef ARM_RDP_MONITOR
@@ -124,42 +293,33 @@ 
 .LC32:	
 	ldr	r1, [r0, #8]
 	ldr	r2, [r0, #12]
-	/*  We skip setting sp/sl if 0 returned from semihosting. 
+	/*  We skip setting SP/SL if 0 returned from semihosting.
 	    - According to semihosting docs, if 0 returned from semihosting,
 	      the system was unable to calculate the real value, so it's ok
-	      to skip setting sp/sl to 0 here.
+	      to skip setting SP/SL to 0 here.
 	    - Considering M-profile processors, We might want to initialize
 	      sp by the first entry of vector table and return 0 to SYS_HEAPINFO
-	      semihosting call, which will be skipped here.  */
+	      semihosting call, which will be skipped here.
+	    - Considering R-profile processors there is no automatic SP init by hardware
+	      so we need to initialize it by default value.  */
+	ldr	r3, .Lstack
 	cmp	r1, #0
 	beq	.LC26
-	mov	sp, r1
+	mov	r3, r1
 .LC26:
-	cmp	r2, #0
-	beq	.LC27
+	mov	sp, r3
 
-	/*  Allow slop for stack overflow handling and small frames.  */
-#ifdef THUMB1_ONLY
-	adds	r2, #128
-	adds	r2, #128
-	mov	sl, r2
-#else
-	add	sl, r2, #256
-#endif
+	/* r2 (SL value) will be used in _stack_init.  */
+	bl FUNCTION (_stack_init)
 
-.LC27:
-#else
-	/*  Set up the stack pointer to a fixed value.  */
+
+#else /* standalone */
+	/*  Set up the stack pointer to a fixed value. */
 	/*  Changes by toralf:
 	    - Allow linker script to provide stack via __stack symbol - see
 	      defintion of .Lstack
 	    - Provide "hooks" that may be used by the application to add
-	      custom init code - see .Lhwinit and .Lswinit	
-	    - Go through all execution modes and set up stack for each of them.
-	      Loosely based on init.s from ARM/Motorola example code.
-              Note: Mode switch via CPSR is not allowed once in non-privileged
-		    mode, so we take care not to enter "User" to set up its sp,
-		    and also skip most operations if already in that mode.  */
+	      custom init code - see .Lhwinit and .Lswinit.  */
 
 	ldr	r3, .Lstack
 	cmp	r3, #0
@@ -178,57 +338,10 @@ 
 		 have somehow missed it below (in which case it gets the same
 		 value as FIQ - not ideal, but better than nothing).  */
 	mov	sp, r3
-#ifdef PREFER_THUMB
-	/* XXX Fill in stack assignments for interrupt modes.  */
-#else
-	mrs	r2, CPSR
-	tst	r2, #0x0F	/* Test mode bits - in User of all are 0.  */
-	beq	.LC23		/* "eq" means r2 AND #0x0F is 0.  */
-	msr     CPSR_c, #0xD1	/* FIRQ mode, interrupts disabled.  */
-	mov 	sp, r3
-	sub	sl, sp, #0x1000	/* This mode also has its own sl (see below).  */
-	
-	mov	r3, sl	
-	msr     CPSR_c, #0xD7	/* Abort mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x1000
-
-	msr     CPSR_c, #0xDB	/* Undefined mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x1000
-
-	msr     CPSR_c, #0xD2	/* IRQ mode, interrupts disabled.  */
-	mov	sp, r3
-	sub	r3, r3, #0x2000
-		
-	msr     CPSR_c, #0xD3	/* Supervisory mode, interrupts disabled.  */
 
-	mov	sp, r3
-	sub	r3, r3, #0x8000	/* Min size 32k.  */
-	bic	r3, r3, #0x00FF	/* Align with current 64k block.  */
-	bic	r3, r3, #0xFF00
+	/* We don't care of r2 value in standalone.  */
+	bl FUNCTION (_stack_init)
 
-	str	r3, [r3, #-4]	/* Move value into user mode sp without */ 
-	ldmdb	r3, {sp}^       /* changing modes, via '^' form of ldm.  */ 
-	orr	r2, r2, #0xC0	/* Back to original mode, presumably SVC, */
-	msr	CPSR_c, r2	/* with FIQ/IRQ disable bits forced to 1.  */
-#endif	
-.LC23:
-	/* Setup a default stack-limit in-case the code has been
-	   compiled with "-mapcs-stack-check".  Hard-wiring this value
-	   is not ideal, since there is currently no support for
-	   checking that the heap and stack have not collided, or that
-	   this default 64k is enough for the program being executed.
-	   However, it ensures that this simple crt0 world will not
-	   immediately cause an overflow event:  */
-#ifdef THUMB1_ONLY
-	movs	r2, #64
-	lsls	r2, r2, #10
-	subs	r2, r3, r2
-	mov	sl, r2
-#else
-	sub	sl, r3, #64 << 10	/* Still assumes 256bytes below sl.  */
-#endif
 #endif
 #endif
 	/* Zero the memory in the .bss section.  */
@@ -421,6 +534,8 @@  change_back:
 	swi	SWI_Exit
 #endif
 	
+	FN_EH_END
+
 	/* For Thumb, constants must be after the code since only 
 	   positive offsets are supported for PC relative addresses.  */
 	.align 0
@@ -438,9 +553,6 @@  change_back:
 #else
 	.word	0x80000			/* Top of RAM on the PIE board.  */
 #endif
-
-.Lstack:	
-	.word	__stack
 .Lhwinit:	
 	.word	FUNCTION (hardware_init_hook)
 .Lswinit:
@@ -453,17 +565,16 @@  change_back:
 	   and only if, a normal version of the same symbol isn't provided
 	   e.g. by a linker script or another object file.)  */
 
-	.weak __stack
 	.weak FUNCTION (hardware_init_hook) 
 	.weak FUNCTION (software_init_hook)
 #endif
 	
 #endif
-#if defined(__ELF__) && !defined(__USING_SJLJ_EXCEPTIONS__)
-	/* Protect against unhandled exceptions.  */
-	.cantunwind
-	.fnend
-#endif
+
+.Lstack:
+	.word	__stack
+	.weak	__stack
+
 .LC1:
 	.word	__bss_start__
 .LC2: