diff mbox

[v4,2/5] arm: add new asm macro update_sctlr

Message ID 1389445524-30623-3-git-send-email-leif.lindholm@linaro.org
State New
Headers show

Commit Message

Leif Lindholm Jan. 11, 2014, 1:05 p.m. UTC
A new macro for setting/clearing bits in the SCTLR.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Suggested-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/assembler.h |   13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Leif Lindholm Jan. 29, 2014, 6:28 p.m. UTC | #1
On Wed, Jan 22, 2014 at 11:20:55AM +0000, Will Deacon wrote:
> > +#ifdef CONFIG_CPU_CP15
> > +/* Macro for setting/clearing bits in sctlr */
> > +	.macro update_sctlr, set:req, clear:req, tmp:req, tmp2:req
> > +	mrc	p15, 0, \tmp, c1, c0, 0
> > +	ldr	\tmp2, =\set
> > +	orr	\tmp, \tmp, \tmp2
> > +	ldr	\tmp2, =\clear
> > +	mvn	\tmp2, \tmp2
> > +	and	\tmp, \tmp, \tmp2
> > +	mcr	p15, 0, \tmp, c1, c0, 0
> 
> I think this would be cleaner if you force the caller to put set and clear
> into registers beforehand, rather than have to do the literal load every
> time. Also, I don't think set and clear should be required (and then you can
> lose tmp2 as well).

I can't figure out how to make register-parameters non-required
(i.e. conditionalise on whether an optional parameter was provided),
so my attempt of refactoring actually ends up using an additional
register:

#ifdef CONFIG_CPU_CP15
/* Macro for setting/clearing bits in sctlr */
	.macro	update_sctlr, set:req, clear:req, tmp:req
	mrc	p15, 0, \tmp, c1, c0, 0
	orr	\tmp, \set
	mvn	\clear, \clear
	and	\tmp, \tmp, \clear
	mcr	p15, 0, \tmp, c1, c0, 0
	.endm
#endif

If you think that's an improvement I can do that, and I have (just)
enough registers to spare.
If I'm being daft with my macro issues, do point it out.

/
    Leif
Mark Salter Jan. 29, 2014, 8:58 p.m. UTC | #2
On Wed, 2014-01-29 at 18:28 +0000, Leif Lindholm wrote:
> On Wed, Jan 22, 2014 at 11:20:55AM +0000, Will Deacon wrote:
> > > +#ifdef CONFIG_CPU_CP15
> > > +/* Macro for setting/clearing bits in sctlr */
> > > +   .macro update_sctlr, set:req, clear:req, tmp:req, tmp2:req
> > > +   mrc     p15, 0, \tmp, c1, c0, 0
> > > +   ldr     \tmp2, =\set
> > > +   orr     \tmp, \tmp, \tmp2
> > > +   ldr     \tmp2, =\clear
> > > +   mvn     \tmp2, \tmp2
> > > +   and     \tmp, \tmp, \tmp2
> > > +   mcr     p15, 0, \tmp, c1, c0, 0
> > 
> > I think this would be cleaner if you force the caller to put set and clear
> > into registers beforehand, rather than have to do the literal load every
> > time. Also, I don't think set and clear should be required (and then you can
> > lose tmp2 as well).
> 
> I can't figure out how to make register-parameters non-required
> (i.e. conditionalise on whether an optional parameter was provided),
> so my attempt of refactoring actually ends up using an additional
> register:
> 

Register parameters are just strings, so how about this:

	.macro foo bar=, baz=
	.ifnc \bar,
	mov \bar,#0
	.endif
	.ifnc \baz,
	mov \baz,#1
	.endif
	.endm

	foo x0
	foo
	foo x1, x2
	foo ,x3

Results in:

0000000000000000 <.text>:
   0:	d2800000 	mov	x0, #0x0                   	// #0
   4:	d2800001 	mov	x1, #0x0                   	// #0
   8:	d2800022 	mov	x2, #0x1                   	// #1
   c:	d2800023 	mov	x3, #0x1                   	// #1
Leif Lindholm Jan. 30, 2014, 1:12 p.m. UTC | #3
On Wed, Jan 29, 2014 at 03:58:44PM -0500, Mark Salter wrote:
> > (i.e. conditionalise on whether an optional parameter was provided),
> > so my attempt of refactoring actually ends up using an additional
> > register:
> > 
> 
> Register parameters are just strings, so how about this:
> 
> 	.macro foo bar=, baz=
> 	.ifnc \bar,
> 	mov \bar,#0
> 	.endif
> 	.ifnc \baz,
> 	mov \baz,#1
> 	.endif
> 	.endm
> 
> 	foo x0
> 	foo
> 	foo x1, x2
> 	foo ,x3
> 
> Results in:
> 
> 0000000000000000 <.text>:
>    0:	d2800000 	mov	x0, #0x0                   	// #0
>    4:	d2800001 	mov	x1, #0x0                   	// #0
>    8:	d2800022 	mov	x2, #0x1                   	// #1
>    c:	d2800023 	mov	x3, #0x1                   	// #1

Oh, that's neat - thanks!

Well, given that, I can think of two less horrible options:
1)
	.macro  update_sctlr, tmp:req, set=, clear=
        mrc	p15, 0, \tmp, c1, c0, 0
	.ifnc	\set,
        orr	\tmp, \set
	.endif
	.ifnc	\clear,
	mvn	\clear, \clear
	and	\tmp, \tmp, \clear
	.endif
        mcr	p15, 0, \tmp, c1, c0, 0
	.endm

With the two call sites in uefi_phys.S as:

	ldr	r5, =(CR_M)
	update_sctlr	r12, , r5
and
	ldr	r4, =(CR_I | CR_C | CR_M)
	update_sctlr	r12, r4

Which disassembles as:

  2c:   e3a05001        mov     r5, #1
  30:   ee11cf10        mrc     15, 0, ip, cr1, cr0, {0}
  34:   e1e05005        mvn     r5, r5
  38:   e00cc005        and     ip, ip, r5
  3c:   ee01cf10        mcr     15, 0, ip, cr1, cr0, {0}
and
  48:   e59f4034        ldr     r4, [pc, #52]   ; 84 <tmpstack+0x4>
  4c:   ee11cf10        mrc     15, 0, ip, cr1, cr0, {0}
  50:   e18cc004        orr     ip, ip, r4
  54:   ee01cf10        mcr     15, 0, ip, cr1, cr0, {0}


2)
	.macro update_sctlr, tmp:req, tmp2:req, set=, clear=
	mrc	p15, 0, \tmp, c1, c0, 0
	.ifnc	\set,
	ldr	\tmp2, =\set
	orr	\tmp, \tmp, \tmp2
	.endif
	.ifnc	\clear,
	ldr	\tmp2, =\clear
	mvn	\tmp2, \tmp2
	and	\tmp, \tmp, \tmp2
	.endif
	mcr	p15, 0, \tmp, c1, c0, 0
	.endm

With the two call sites in uefi_phys.S as: 

	update_sctlr	r4, r5, , (CR_M)
and
	update_sctlr	r4, r5, (CR_I | CR_C | CR_M)

Which disassembles as:

  2c:   ee114f10        mrc     15, 0, r4, cr1, cr0, {0}
  30:   e3a05001        mov     r5, #1
  34:   e1e05005        mvn     r5, r5
  38:   e0044005        and     r4, r4, r5
  3c:   ee014f10        mcr     15, 0, r4, cr1, cr0, {0}
and
  48:   ee114f10        mrc     15, 0, r4, cr1, cr0, {0}
  4c:   e59f5030        ldr     r5, [pc, #48]   ; 84 <tmpstack+0x4>
  50:   e1844005        orr     r4, r4, r5
  54:   ee014f10        mcr     15, 0, r4, cr1, cr0, {0}


The benefit of 2) is a cleaner call site, and one fewer register
used if setting and clearing simultaneously.

The benefit of 1) is that the macro could then easily be used with
the crval mask in mm/proc*.S

So, Will, which one do you want?

/
    Leif
Leif Lindholm Feb. 3, 2014, 3:55 p.m. UTC | #4
On Mon, Feb 03, 2014 at 10:34:15AM +0000, Will Deacon wrote:
> On Thu, Jan 30, 2014 at 01:12:47PM +0000, Leif Lindholm wrote:
> > Oh, that's neat - thanks!
> > 
> > Well, given that, I can think of two less horrible options:
> > 1)
> > 	.macro  update_sctlr, tmp:req, set=, clear=
> >         mrc	p15, 0, \tmp, c1, c0, 0
> > 	.ifnc	\set,
> >         orr	\tmp, \set
> > 	.endif
> > 	.ifnc	\clear,
> > 	mvn	\clear, \clear
> > 	and	\tmp, \tmp, \clear
> 
> Can't you use bic here?

Yeah.

> > 	.endif
> >         mcr	p15, 0, \tmp, c1, c0, 0
> > 	.endm
> > 
> > With the two call sites in uefi_phys.S as:
> > 
> > 	ldr	r5, =(CR_M)
> > 	update_sctlr	r12, , r5
> > and
> > 	ldr	r4, =(CR_I | CR_C | CR_M)
> > 	update_sctlr	r12, r4
> 
> These ldr= could be movs, right?

The first one could.
The second one could be movw on armv7+.

> If so, I definitely prefer this to putting an ldr = into the macro itself
> (option 2).

And your preference between 1) and 2) is?

/
    Leif
Leif Lindholm Feb. 3, 2014, 4:46 p.m. UTC | #5
On Mon, Feb 03, 2014 at 04:00:51PM +0000, Will Deacon wrote:
> > > > With the two call sites in uefi_phys.S as:
> > > > 
> > > > 	ldr	r5, =(CR_M)
> > > > 	update_sctlr	r12, , r5
> > > > and
> > > > 	ldr	r4, =(CR_I | CR_C | CR_M)
> > > > 	update_sctlr	r12, r4
> > > 
> > > These ldr= could be movs, right?
> > 
> > The first one could.
> > The second one could be movw on armv7+.
> > 
> > > If so, I definitely prefer this to putting an ldr = into the macro itself
> > > (option 2).
> > 
> > And your preference between 1) and 2) is?
> 
> (1), using bic and mov[tw] where possible.

(1): ok, thanks.

bic: sure, that was an oversight.

mov[tw]: why?
Then we end up battling different available immediate fields in A32/T32
instruction sets and v5/v6/v7 architecture versions.

/
    Leif
Leif Lindholm Feb. 3, 2014, 6:15 p.m. UTC | #6
On Mon, Feb 03, 2014 at 04:57:18PM +0000, Will Deacon wrote:
> > mov[tw]: why?
> > Then we end up battling different available immediate fields in A32/T32
> > instruction sets and v5/v6/v7 architecture versions.
> 
> I was making the assumption that UEFI was going to be v7 only... is this not
> true?

There is no such requirement in the specification.
It even mentions requirements for ARMv4 in one place :)

But I also don't understand why ldr= should be avoided.
This is not performance sensitive (called once on system boot), and
it's already executing with the caches off, so even if it ends up
being a literal load it does not pollute.

/
    Leif
diff mbox

Patch

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5c22851..aba6458 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -383,4 +383,17 @@  THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+#ifdef CONFIG_CPU_CP15
+/* Macro for setting/clearing bits in sctlr */
+	.macro update_sctlr, set:req, clear:req, tmp:req, tmp2:req
+	mrc	p15, 0, \tmp, c1, c0, 0
+	ldr	\tmp2, =\set
+	orr	\tmp, \tmp, \tmp2
+	ldr	\tmp2, =\clear
+	mvn	\tmp2, \tmp2
+	and	\tmp, \tmp, \tmp2
+	mcr	p15, 0, \tmp, c1, c0, 0
+	.endm
+#endif
+
 #endif /* __ASM_ASSEMBLER_H__ */