diff mbox

[1/8] ARM: assembler: introduce adr_l, ldr_l and str_l macros

Message ID 1470238730-30038-2-git-send-email-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel Aug. 3, 2016, 3:38 p.m. UTC
Like arm64, ARM supports position independent code sequences that
produce symbol references with a greater reach than the ordinary
adr/ldr instructions.

Introduce adr_l, that takes the address of a symbol in a PC relative
manner, and ldr_l/str_l that perform a 32-bit loads/stores from a
PC-relative offset.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

---
 arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++
 1 file changed, 59 insertions(+)

-- 
2.7.4


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

Comments

Ard Biesheuvel Aug. 4, 2016, 7:40 a.m. UTC | #1
On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

>> Like arm64, ARM supports position independent code sequences that

>> produce symbol references with a greater reach than the ordinary

>> adr/ldr instructions.

>>

>> Introduce adr_l, that takes the address of a symbol in a PC relative

>> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a

>> PC-relative offset.

>>

>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>> ---

>>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++

>>  1 file changed, 59 insertions(+)

>>

>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h

>> index 4eaea2173bf8..e1450889f96b 100644

>> --- a/arch/arm/include/asm/assembler.h

>> +++ b/arch/arm/include/asm/assembler.h

>> @@ -512,4 +512,63 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )

>>  #endif

>>       .endm

>>

>> +/*

>> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations

>> + */

>> +

>> +     /*

>> +      * @dst: destination register

>> +      * @sym: name of the symbol

>> +      */

>> +     .macro  adr_l, dst, sym

>> +#ifdef CONFIG_THUMB2_KERNEL

>> +     movw    \dst, #:lower16:(\sym) - (. + 12)

>> +     movt    \dst, #:upper16:(\sym) - (. + 8)

>> +     add     \dst, \dst, pc

>

> pc always reads as the address of that add plus 4, right?  I remember

> some special case where it gets rounded down to a 4-byte boundary, but

> IIRC that only applies to certain ldr ..., [pc, ...] forms.

>


The 4-byte rounding occurs when the linker (or the linux module
loader) encounters a bl instruction in Thumb that needs to be fixed up
to blx if the target is ARM.

>> +#else

>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> +     add     \dst, \dst, #:pc_g2:(\sym)

>

> Whoah.  I've never seen this syntax before...  does this work for any

> named reloc, or just for certain blessed relocs?  (I'm also _assuming_

> the assembler support for this functionality is ancient -- if not,

> there may be toolchain compatibility issues.)

>

> Based on my understanding of how these relocs work, this should do the

> right thing, though.

>

>

> Second question: for arm, this reduces the range addressable to

> pc +/- 26-bit offset (24-bit if sym is not word aligned, but that

> probably never happens).

>

> I can't remember the de facto limit on the size of vmlinux for arm --

> are you sure this extra limitation won't break some cases of huge

> initramfs where adr_l gets used for cross-section references?

>


Yes, that seems a valid concern. We have +/- 64 MB for the adr_l
variant (as you say, for word aligned symbols), but this may be
insufficient. The ldr/str variants don't have the same limitation.

> (For Thumb2, :lower16:/:upper16: give a full 32-bit range, so no problem

> there -- sad that this isn't available before ARMv7).

>


Indeed.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 7:40 a.m. UTC | #2
On 3 August 2016 at 20:09, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> +     add     \dst, \dst, #:pc_g2:(\sym)

>

> What's this :pc_g0_nc: stuff?  What binutils versions is it supported

> in?  It doesn't appear documented in gas 2.23.91, so I don't think we

> can use it.

>


binutils 2.18 and up

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 7:43 a.m. UTC | #3
On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

>> Like arm64, ARM supports position independent code sequences that

>> produce symbol references with a greater reach than the ordinary

>> adr/ldr instructions.

>>

>> Introduce adr_l, that takes the address of a symbol in a PC relative

>> manner, and ldr_l/str_l that perform a 32-bit loads/stores from a

>> PC-relative offset.

>>

>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

>> ---

>>  arch/arm/include/asm/assembler.h | 59 ++++++++++++++++++++

>>  1 file changed, 59 insertions(+)

>>

>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h

>> index 4eaea2173bf8..e1450889f96b 100644

>> --- a/arch/arm/include/asm/assembler.h

>> +++ b/arch/arm/include/asm/assembler.h

>> @@ -512,4 +512,63 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )

>>  #endif

>>       .endm

>>

>> +/*

>> + * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations

>> + */

>> +

>> +     /*

>> +      * @dst: destination register

>> +      * @sym: name of the symbol

>> +      */

>> +     .macro  adr_l, dst, sym

>> +#ifdef CONFIG_THUMB2_KERNEL

>> +     movw    \dst, #:lower16:(\sym) - (. + 12)

>> +     movt    \dst, #:upper16:(\sym) - (. + 8)

>> +     add     \dst, \dst, pc

>

> pc always reads as the address of that add plus 4, right?  I remember

> some special case where it gets rounded down to a 4-byte boundary, but

> IIRC that only applies to certain ldr ..., [pc, ...] forms.

>

>> +#else

>> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> +     add     \dst, \dst, #:pc_g2:(\sym)

>

> Whoah.  I've never seen this syntax before...  does this work for any

> named reloc, or just for certain blessed relocs?  (I'm also _assuming_

> the assembler support for this functionality is ancient -- if not,

> there may be toolchain compatibility issues.)

>


(Missed this question in my first reply)

There is no 1:1 mapping between these tokens and the actual relocs.
For instance, pc_g2 will be converted into

R_ARM_ALU_PC_G2 for an add instruction
R_ARM_LDR_PC_G2 for an ldr/str instruction
etc etc

so that means relocations have to be 'blessed' in some way, indeed.

You can still emit arbitrary relocs if you wanted, though, using the
.reloc pseudo op in GAS

-- 
Ard.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 10:12 a.m. UTC | #4
On 4 August 2016 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:

>> On 3 August 2016 at 20:09, Russell King - ARM Linux

>> <linux@armlinux.org.uk> wrote:

>> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

>> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> >> +     add     \dst, \dst, #:pc_g2:(\sym)

>> >

>> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported

>> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we

>> > can use it.

>> >

>>

>> binutils 2.18 and up

>

> I think this was contemporary with GCC 4.<some middling version>, which

> may be newer than the minimimum compiler we require for the kernel,

> particular for earlier arch versions.

>

> Using .reloc probably allows the same thing to be done in a more

> backwards-compatible way.

>


I don't see how LD would know how to handle these relocations if we do
manage to emit them from GAS in this way.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 10:34 a.m. UTC | #5
On 4 August 2016 at 11:44, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 09:40:31AM +0200, Ard Biesheuvel wrote:

>> On 3 August 2016 at 18:49, Dave Martin <Dave.Martin@arm.com> wrote:

>> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

[...]
>> >> +#else

>> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> >> +     add     \dst, \dst, #:pc_g2:(\sym)

>> >

>> > Whoah.  I've never seen this syntax before...  does this work for any

>> > named reloc, or just for certain blessed relocs?  (I'm also _assuming_

>> > the assembler support for this functionality is ancient -- if not,

>> > there may be toolchain compatibility issues.)

>> >

>> > Based on my understanding of how these relocs work, this should do the

>> > right thing, though.

>> >

>> >

>> > Second question: for arm, this reduces the range addressable to

>> > pc +/- 26-bit offset (24-bit if sym is not word aligned, but that

>> > probably never happens).

>> >

>> > I can't remember the de facto limit on the size of vmlinux for arm --

>> > are you sure this extra limitation won't break some cases of huge

>> > initramfs where adr_l gets used for cross-section references?

>> >

>>

>> Yes, that seems a valid concern. We have +/- 64 MB for the adr_l

>> variant (as you say, for word aligned symbols), but this may be

>> insufficient. The ldr/str variants don't have the same limitation.

>

> True, but they're still limited, I think in effect to +/- 256 MB.

>


Yes. I managed to hack around the limitation by doing this

.macro adr_fwd, dst, sym, tmp
add \dst, pc, #:pc_g0_nc:(\sym) - 8
add \dst, \dst, #:pc_g1_nc:(\sym) - 4
.reloc ., R_ARM_LDR_PC_G2, \sym
mov \tmp, #0
add \dst, \dst, \tmp
.endm

but it is becoming very hacky, and this only works for forward
references anyway.

So I think this is a dead end. Series withdrawn.

Thanks for your time,
Ard.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 11:10 a.m. UTC | #6
On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 12:12:07PM +0200, Ard Biesheuvel wrote:

>> On 4 August 2016 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:

>> > On Thu, Aug 04, 2016 at 09:40:50AM +0200, Ard Biesheuvel wrote:

>> >> On 3 August 2016 at 20:09, Russell King - ARM Linux

>> >> <linux@armlinux.org.uk> wrote:

>> >> > On Wed, Aug 03, 2016 at 05:38:43PM +0200, Ard Biesheuvel wrote:

>> >> >> +     add     \dst, pc, #:pc_g0_nc:(\sym) - 8

>> >> >> +     add     \dst, \dst, #:pc_g1_nc:(\sym) - 4

>> >> >> +     add     \dst, \dst, #:pc_g2:(\sym)

>> >> >

>> >> > What's this :pc_g0_nc: stuff?  What binutils versions is it supported

>> >> > in?  It doesn't appear documented in gas 2.23.91, so I don't think we

>> >> > can use it.

>> >> >

>> >>

>> >> binutils 2.18 and up

>> >

>> > I think this was contemporary with GCC 4.<some middling version>, which

>> > may be newer than the minimimum compiler we require for the kernel,

>> > particular for earlier arch versions.

>> >

>> > Using .reloc probably allows the same thing to be done in a more

>> > backwards-compatible way.

>> >

>>

>> I don't see how LD would know how to handle these relocations if we do

>> manage to emit them from GAS in this way.

>

> 0:      add     \dst, pc, #-8

> 1:      add     \dst, \dst, #-4

> 2:      add     \dst, \dst, #0

>

> .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym

> .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym

> .reloc  2b, R_ARM_ALU_PC_G2, \sym

>

> or, for ldr_l:

>

> 0:      add     \dst, pc, #-8

> 1:      add     \dst, \dst, #-4

> 2:      ldr     [\dst, #0]

>

> .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym

> .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym

> .reloc  2b, R_ARM_LDR_PC_G2, \sym

>

> ... should produce precisely the same result at the .o stage.

>


Yes, but how is LD going to perform the arithmetic involved in
handling these relocations? That's is the more interesting part, and
that is not implemented either in binutils < 2.18

>

> This #:reloc: stuff is mostly a shorthand/convenience feature as I

> understand it -- however, you do have to know how to force the correct

> instruction encoding to be emitted to match the reloc type -- with

> #:reloc:, the assembler should take care of that.

>

> The reloc/insn match issue also means that the Thumb2 relocs are

> totally different (R_ARM_THM_<blah>).

>

>

> For OABI, it would no doubt be different again.  I know nothing about

> OABI relocs.

>

> And we're still limited to <32-bit offset ranges.

>


Indeed. As I said, a dead end ...

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 11:34 a.m. UTC | #7
On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:

>> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:

>

> [...]

>

>> > or, for ldr_l:

>> >

>> > 0:      add     \dst, pc, #-8

>> > 1:      add     \dst, \dst, #-4

>> > 2:      ldr     [\dst, #0]

>> >

>> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym

>> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym

>> > .reloc  2b, R_ARM_LDR_PC_G2, \sym

>> >

>> > ... should produce precisely the same result at the .o stage.

>> >

>>

>> Yes, but how is LD going to perform the arithmetic involved in

>

> What arithmetic?

>


The arithmetic involved in populating the immediate fields of these
instructions based on the actual offset between the Place and the
Symbol in the final image.

>> handling these relocations? That's is the more interesting part, and

>> that is not implemented either in binutils < 2.18

>

> You mean .reloc is not implemented < 2.18?

>


Yes, .reloc is implemented, but that is not sufficient.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 1:46 p.m. UTC | #8
On 4 August 2016 at 15:31, Dave Martin <Dave.Martin@arm.com> wrote:
> On Thu, Aug 04, 2016 at 01:34:03PM +0200, Ard Biesheuvel wrote:

>> On 4 August 2016 at 13:30, Dave Martin <Dave.Martin@arm.com> wrote:

>> > On Thu, Aug 04, 2016 at 01:10:55PM +0200, Ard Biesheuvel wrote:

>> >> On 4 August 2016 at 13:08, Dave Martin <Dave.Martin@arm.com> wrote:

>> >

>> > [...]

>> >

>> >> > or, for ldr_l:

>> >> >

>> >> > 0:      add     \dst, pc, #-8

>> >> > 1:      add     \dst, \dst, #-4

>> >> > 2:      ldr     [\dst, #0]

>> >> >

>> >> > .reloc  0b, R_ARM_ALU_PC_G0_NC, \sym

>> >> > .reloc  1b, R_ARM_ALU_PC_G1_NC, \sym

>> >> > .reloc  2b, R_ARM_LDR_PC_G2, \sym

>> >> >

>> >> > ... should produce precisely the same result at the .o stage.

>> >> >

>> >>

>> >> Yes, but how is LD going to perform the arithmetic involved in

>

> [...]

>

>> >> handling these relocations? That's is the more interesting part, and

>> >> that is not implemented either in binutils < 2.18

>> >

>> > What arithmetic?

>> >

>>

>> The arithmetic involved in populating the immediate fields of these

>> instructions based on the actual offset between the Place and the

>> Symbol in the final image.

>

> <digression>

>

> Just for interest...

>

>

> For the linker this is just ordinary relocation processing -- there's

> nothing unusual going on, except that neither GCC nor gas usually

> emit these particular insn relocs automatically.

>


There is no such thing as 'ordinary' relocation processing. Each
relocation type requires its own specific handling, and pre-2.18 LD
simply does not come equipped with the routines to perform the
calculations that the ARM/ELF spec defines for these particular
relocation types. Whether GAS or any other assembler can produce them
is irrelevant, my claim is that pre-2.18 LD does not know how to
/consume/ them.


> I think the ARM RVCT compiler could generate them for producing

> ROM-able position independent code in some confgurations.  I suspect

> they were supported by ld from the start though, or at least pretty

> early on.

>

>

> When you write

>

>         add     \dst, pc, #:pc_g0_nc:\sym - (. + 8)

>

> the arithmetic is somewhat bogus -- the assembler does not (and can't)

> do it, because neither the value of \sym, nor of ., is known.  Only the

> invariant bit (the - 8) can be processed at assembly time.  The

> irreducible part (\sym - .) has to be emitted as a reloc.

>

> Thus, the assembler really does emit

>

> .reloc  ., R_ARM_ALU_PC_G0_NC, \sym

>         add     \dst, pc, #-8

>

> (The "- ." is effectively part of the definition of R_ARM_ALU_PC_G0_NC

> here).

>

>

> For comparison:

>

> $ as <<EOF -o a.o

>         .reloc ., R_ARM_ALU_PC_G0_NC, foo

>         add     r0, pc, #-8

>         .reloc ., R_ARM_ALU_PC_G1_NC, foo

>         add     r0, r0, #-4

>         .reloc ., R_ARM_ALU_PC_G2, foo

>         add     r0, r0, #0

>

>         add     r0, pc, #:pc_g0_nc:foo - . - 8

>         add     r0, r0, #:pc_g1_nc:foo - . - 4

>         add     r0, r0, #:pc_g2:foo - .

> EOF

>

> $ objdump -dr a.o

> 00000000 <.text>:

>    0:   e24f0008        sub     r0, pc, #8

>                         0: R_ARM_ALU_PC_G0_NC   foo

>    4:   e2400004        sub     r0, r0, #4

>                         4: R_ARM_ALU_PC_G1_NC   foo

>    8:   e2800000        add     r0, r0, #0

>                         8: R_ARM_ALU_PC_G2      foo

>    c:   e24f0008        sub     r0, pc, #8

>                         c: R_ARM_ALU_PC_G0_NC   foo

>   10:   e2400004        sub     r0, r0, #4

>                         10: R_ARM_ALU_PC_G1_NC  foo

>   14:   e2800000        add     r0, r0, #0

>                         14: R_ARM_ALU_PC_G2     foo

>

> $ ld --defsym foo=0x4000000 -o a a.o

> $ objdump -dr a

> 00008054 <__bss_end__-0x8018>:

>     8054:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000

>     8058:       e2800bdf        add     r0, r0, #228352 ; 0x37c00

>     805c:       e2800fe9        add     r0, r0, #932    ; 0x3a4

>     8060:       e28f07ff        add     r0, pc, #66846720       ; 0x3fc0000

>     8064:       e2800bdf        add     r0, r0, #228352 ; 0x37c00

>     8068:       e2800fe6        add     r0, r0, #920    ; 0x398

>

>

>> Yes, .reloc is implemented, but that is not sufficient.

>

> </digression>

>

> Cheers

> ---Dave


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel Aug. 4, 2016, 1:58 p.m. UTC | #9
On 4 August 2016 at 14:16, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Aug 04, 2016 at 12:22:29PM +0100, Dave Martin wrote:

>> The more conventional literal-.long approach could still be macro-ised

>> along the same lines, which might make the affected code more readable,

>> but the idiom you'd be replacing is well-understood and not very common.

>

> I don't see how it could be.  You can't efficiently place the literal

> data alongside the instructions dealing with it.

>

> The only alternative is to use ldr rd, =foo, but that gets very stupid

> when you want to calculate the relative offset, and you end up with

> something like this for every relative load:

>

>         ldr rd, =.

>         sub rd, rd, #. - 4

>         ldr r1, =foo

>         add r1, r1, rd

>


This does the trick

#ifdef CONFIG_THUMB2_KERNEL
#define PC_BIAS 4
#else
#define PC_BIAS 8
#endif

/*
* @dst: destination register
* @sym: name of the symbol
*/
.macro adr_l, dst, sym, tmp:req
#if __LINUX_ARM_ARCH__ >= 7
movw \dst, #:lower16:(\sym) - (. + 8 + PC_BIAS)
movt \dst, #:upper16:(\sym) - (. + 4 + PC_BIAS)
add \dst, \dst, pc
#else
ldr \tmp, =\sym
ldr \dst, =. + 12
sub \dst, \dst, pc
sub \dst, \tmp, \dst
#endif
.endm

but it is suboptimal for v6 and earlier, and now the macro requires a
temp register.

> As I've already said, I prefer the existing solution.  It works, it's

> been known to work for the last 22 years.

>

> If it isn't broken, don't try to fix it.

>


It seemed like low hanging fruit, but obviously not ...

Thanks for your time,
Ard.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
diff mbox

Patch

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4eaea2173bf8..e1450889f96b 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -512,4 +512,63 @@  THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> operations
+ */
+
+	/*
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 */
+	.macro	adr_l, dst, sym
+#ifdef CONFIG_THUMB2_KERNEL
+	movw	\dst, #:lower16:(\sym) - (. + 12)
+	movt	\dst, #:upper16:(\sym) - (. + 8)
+	add	\dst, \dst, pc
+#else
+	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
+	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
+	add	\dst, \dst, #:pc_g2:(\sym)
+#endif
+	.endm
+
+	/*
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in a Thumb2 ldr instruction
+	 */
+	.macro	ldr_l, dst, sym, tmp
+#ifdef CONFIG_THUMB2_KERNEL
+	.ifnb	\tmp
+	adr_l	\tmp, \sym
+	ldr	\dst, [\tmp]
+	.else
+	adr_l	\dst, \sym
+	ldr	\dst, [\dst]
+	.endif
+#else
+	add	\dst, pc, #:pc_g0_nc:(\sym) - 8
+	add	\dst, \dst, #:pc_g1_nc:(\sym) - 4
+	ldr	\dst, [\dst, #:pc_g2:(\sym)]
+#endif
+	.endm
+
+	/*
+	 * @src: source register
+	 * @sym: name of the symbol
+	 * @tmp: mandatory scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp:req
+#ifdef CONFIG_THUMB2_KERNEL
+	adr_l	\tmp, \sym
+	str	\src, [\tmp]
+#else
+	add	\tmp, pc, #:pc_g0_nc:(\sym) - 8
+	add	\tmp, \tmp, #:pc_g1_nc:(\sym) - 4
+	str	\src, [\tmp, #:pc_g2:(\sym)]
+#endif
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */