diff mbox

[PATCHv2] arm64/cpufeature: don't use mutex in bringup path

Message ID 1494514878-26878-1-git-send-email-mark.rutland@arm.com
State Superseded
Headers show

Commit Message

Mark Rutland May 11, 2017, 3:01 p.m. UTC
Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which
must take the jump_label mutex.

We call cpus_set_cap() in the secondary bringup path, from the idle
thread where interrupts are disabled. Taking a mutex in this path "is a
NONO" regardless of whether it's contended, and something we must avoid.
Additionally, the secondary CPU doesn't hold the percpu rwsem (as this
is held by the primary CPU), so this triggers a lockdep splat.

This patch fixes both issues by moving the static_key poking from
cpus_set_cap() into enable_cpu_capabilities(). To account for the static
keys being set later, cpus_have_const_cap() is updated to use another
static key to check whether the const cap keys have been initialised.

This means that users of cpus_have_const_cap() gain should only gain a
single additional NOP in the fast path once the const caps are
initialised, but should always see the current cap value.

This rework means that we can remove the *_cpuslocked() helpers added in
commit d54bb72551b999dd ("arm64/cpufeature: Use
static_branch_enable_cpuslocked()").

Fixes: efd9e03facd075f5 ("arm64: Use static keys for CPU features")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyniger <marc.zyngier@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Sewior <bigeasy@linutronix.de>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 13 ++++++++++---
 arch/arm64/kernel/cpu_errata.c      |  9 +--------
 arch/arm64/kernel/cpufeature.c      | 25 ++++++++++++++++++++++---
 3 files changed, 33 insertions(+), 14 deletions(-)

Catalin, Will, assuming you're happy with the patch, it will need to go via the
tip tree.

Since v1 [1]:
* Kill redundant update_cpu_errata_workarounds() prototype
* Introduce arm64_const_caps_ready

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-May/505731.html

-- 
1.9.1

Comments

Suzuki K Poulose May 11, 2017, 3:15 p.m. UTC | #1
On 11/05/17 16:01, Mark Rutland wrote:
> Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which

> must take the jump_label mutex.

>

> We call cpus_set_cap() in the secondary bringup path, from the idle

> thread where interrupts are disabled. Taking a mutex in this path "is a

> NONO" regardless of whether it's contended, and something we must avoid.

> Additionally, the secondary CPU doesn't hold the percpu rwsem (as this

> is held by the primary CPU), so this triggers a lockdep splat.

>

> This patch fixes both issues by moving the static_key poking from

> cpus_set_cap() into enable_cpu_capabilities(). To account for the static

> keys being set later, cpus_have_const_cap() is updated to use another

> static key to check whether the const cap keys have been initialised.

>

> This means that users of cpus_have_const_cap() gain should only gain a

> single additional NOP in the fast path once the const caps are

> initialised, but should always see the current cap value.

>

> This rework means that we can remove the *_cpuslocked() helpers added in

> commit d54bb72551b999dd ("arm64/cpufeature: Use

> static_branch_enable_cpuslocked()").

>

> Fixes: efd9e03facd075f5 ("arm64: Use static keys for CPU features")

> Signed-off-by: Mark Rutland <mark.rutland@arm.com>

> Cc: Catalin Marinas <catalin.marinas@arm.com>

> Cc: Marc Zyniger <marc.zyngier@arm.com>

> Cc: Peter Zijlstra <peterz@infradead.org>

> Cc: Sebastian Sewior <bigeasy@linutronix.de>

> Cc: Suzuki Poulose <suzuki.poulose@arm.com>

> Cc: Thomas Gleixner <tglx@linutronix.de>

> Cc: Will Deacon <will.deacon@arm.com>

> ---

>  arch/arm64/include/asm/cpufeature.h | 13 ++++++++++---

>  arch/arm64/kernel/cpu_errata.c      |  9 +--------

>  arch/arm64/kernel/cpufeature.c      | 25 ++++++++++++++++++++++---

>  3 files changed, 33 insertions(+), 14 deletions(-)

>

> Catalin, Will, assuming you're happy with the patch, it will need to go via the

> tip tree.

>

> Since v1 [1]:

> * Kill redundant update_cpu_errata_workarounds() prototype

> * Introduce arm64_const_caps_ready

>

> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-May/505731.html

>

> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

> index 8a7ff73..428ee1f 100644

> --- a/arch/arm64/include/asm/cpufeature.h

> +++ b/arch/arm64/include/asm/cpufeature.h

> @@ -115,6 +115,7 @@ struct arm64_cpu_capabilities {

>

>  extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

>  extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];

> +extern struct static_key_false arm64_const_caps_ready;

>

>  bool this_cpu_has_cap(unsigned int cap);

>

> @@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num)

>  }

>

>  /* System capability check for constant caps */

> -static inline bool cpus_have_const_cap(int num)

> +static inline bool __cpus_have_const_cap(int num)

>  {

>  	if (num >= ARM64_NCAPS)

>  		return false;

> @@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num)

>  	return test_bit(num, cpu_hwcaps);

>  }

>

> +static inline bool cpus_have_const_cap(int num)

> +{

> +	if (static_branch_likely(&arm64_const_caps_ready))

> +		return __cpus_have_const_cap(num);

> +	else

> +		return cpus_have_cap(num);



We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially
try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().
However, it looks like we have already set arm64_const_caps_ready, so should not
hit it in practise. May be we could add a stricter version of the helper ?

static inline cpus_have_const_cap_strict(int num)
{
	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);
	return __cpus_have_const_cap(num);
}


Suzuki
Mark Rutland May 11, 2017, 3:37 p.m. UTC | #2
On Thu, May 11, 2017 at 04:15:38PM +0100, Suzuki K Poulose wrote:
> On 11/05/17 16:01, Mark Rutland wrote:

> >+static inline bool cpus_have_const_cap(int num)

> >+{

> >+	if (static_branch_likely(&arm64_const_caps_ready))

> >+		return __cpus_have_const_cap(num);

> >+	else

> >+		return cpus_have_cap(num);

> 

> We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially

> try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().

> However, it looks like we have already set arm64_const_caps_ready, so should not

> hit it in practise. May be we could add a stricter version of the helper ?

> 

> static inline cpus_have_const_cap_strict(int num)

> {

> 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);

> 	return __cpus_have_const_cap(num);

> }


Just to check, is that the only user of cpus_have_const_cap() at hyp?

If so, I can do something like the above, patching <asm/virt.h> to use
it for has_vhe().

We don't have a BUG handler at hyp, but that should trigger a hyp panic,
which I guess is good enough.

Marc, thoughts?

Thanks,
Mark.
Mark Rutland May 11, 2017, 3:42 p.m. UTC | #3
On Thu, May 11, 2017 at 04:37:20PM +0100, Mark Rutland wrote:
> On Thu, May 11, 2017 at 04:15:38PM +0100, Suzuki K Poulose wrote:

> > On 11/05/17 16:01, Mark Rutland wrote:

> > >+static inline bool cpus_have_const_cap(int num)

> > >+{

> > >+	if (static_branch_likely(&arm64_const_caps_ready))

> > >+		return __cpus_have_const_cap(num);

> > >+	else

> > >+		return cpus_have_cap(num);

> > 

> > We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially

> > try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().

> > However, it looks like we have already set arm64_const_caps_ready, so should not

> > hit it in practise. May be we could add a stricter version of the helper ?

> > 

> > static inline cpus_have_const_cap_strict(int num)

> > {

> > 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);

> > 	return __cpus_have_const_cap(num);

> > }

> 

> Just to check, is that the only user of cpus_have_const_cap() at hyp?

> 

> If so, I can do something like the above, patching <asm/virt.h> to use

> it for has_vhe().

> 

> We don't have a BUG handler at hyp, but that should trigger a hyp panic,

> which I guess is good enough.

> 

> Marc, thoughts?


The other option, given this is *only* used at hyp, is:

static inline bool has_vhe(void)
{
	return !!(read_sysreg(HCR_EL2) & HCR_E2H);
}

... though I assume we may have avoided that deliberately.

Thanks,
Mark.
Suzuki K Poulose May 11, 2017, 3:54 p.m. UTC | #4
On 11/05/17 16:37, Mark Rutland wrote:
> On Thu, May 11, 2017 at 04:15:38PM +0100, Suzuki K Poulose wrote:

>> On 11/05/17 16:01, Mark Rutland wrote:

>>> +static inline bool cpus_have_const_cap(int num)

>>> +{

>>> +	if (static_branch_likely(&arm64_const_caps_ready))

>>> +		return __cpus_have_const_cap(num);

>>> +	else

>>> +		return cpus_have_cap(num);

>>

>> We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially

>> try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().

>> However, it looks like we have already set arm64_const_caps_ready, so should not

>> hit it in practise. May be we could add a stricter version of the helper ?

>>

>> static inline cpus_have_const_cap_strict(int num)

>> {

>> 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);

>> 	return __cpus_have_const_cap(num);

>> }

>

> Just to check, is that the only user of cpus_have_const_cap() at hyp?


Uh, no we have one more, via system_supports_fpsimd() in __actvate_traps.

Suzuki

>

> If so, I can do something like the above, patching <asm/virt.h> to use

> it for has_vhe().

>

> We don't have a BUG handler at hyp, but that should trigger a hyp panic,

> which I guess is good enough.

>

> Marc, thoughts?

>

> Thanks,

> Mark.

>
Marc Zyngier May 11, 2017, 4:08 p.m. UTC | #5
On 11/05/17 16:54, Suzuki K Poulose wrote:
> On 11/05/17 16:37, Mark Rutland wrote:

>> On Thu, May 11, 2017 at 04:15:38PM +0100, Suzuki K Poulose wrote:

>>> On 11/05/17 16:01, Mark Rutland wrote:

>>>> +static inline bool cpus_have_const_cap(int num)

>>>> +{

>>>> +	if (static_branch_likely(&arm64_const_caps_ready))

>>>> +		return __cpus_have_const_cap(num);

>>>> +	else

>>>> +		return cpus_have_cap(num);

>>>

>>> We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially

>>> try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().

>>> However, it looks like we have already set arm64_const_caps_ready, so should not

>>> hit it in practise. May be we could add a stricter version of the helper ?

>>>

>>> static inline cpus_have_const_cap_strict(int num)

>>> {

>>> 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);

>>> 	return __cpus_have_const_cap(num);

>>> }

>>

>> Just to check, is that the only user of cpus_have_const_cap() at hyp?

> 

> Uh, no we have one more, via system_supports_fpsimd() in __actvate_traps.


Indeed, and I'd definitely expect to see more of that trickling in (if
only to deal with errata).

I'm OK with the BUG_ON version, TBH. It's not pretty, but it will be
perfectly visible if it fires.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...
Mark Rutland May 11, 2017, 5:53 p.m. UTC | #6
On Thu, May 11, 2017 at 05:08:19PM +0100, Marc Zyngier wrote:
> On 11/05/17 16:54, Suzuki K Poulose wrote:

> > On 11/05/17 16:37, Mark Rutland wrote:

> >> On Thu, May 11, 2017 at 04:15:38PM +0100, Suzuki K Poulose wrote:

> >>> On 11/05/17 16:01, Mark Rutland wrote:

> >>>> +static inline bool cpus_have_const_cap(int num)

> >>>> +{

> >>>> +	if (static_branch_likely(&arm64_const_caps_ready))

> >>>> +		return __cpus_have_const_cap(num);

> >>>> +	else

> >>>> +		return cpus_have_cap(num);

> >>>

> >>> We use cpus_have_const_cap() from hyp code, via has_vhe() and we could potentially

> >>> try to access unmapped kernel data from hyp if we fallback to cpus_have_cap().

> >>> However, it looks like we have already set arm64_const_caps_ready, so should not

> >>> hit it in practise. May be we could add a stricter version of the helper ?

> >>>

> >>> static inline cpus_have_const_cap_strict(int num)

> >>> {

> >>> 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready);

> >>> 	return __cpus_have_const_cap(num);

> >>> }

> >>

> >> Just to check, is that the only user of cpus_have_const_cap() at hyp?

> > 

> > Uh, no we have one more, via system_supports_fpsimd() in __actvate_traps.

> 

> Indeed, and I'd definitely expect to see more of that trickling in (if

> only to deal with errata).

> 

> I'm OK with the BUG_ON version, TBH. It's not pretty, but it will be

> perfectly visible if it fires.


We can't make system_supports_fpsimd() BUG_ON(), because that will fire
the first time the boot CPU tries to switch thread, due to
fpsimd_thread_switch().

However, thinking about it, there's no risk that this code runs at hyp
before we've intialised the caps.

We initialise hyp from kvm_arch_init(), which is a module initcall. As
it's built-in, that's actually a device initcall, which happens long
after we've finalised the cpucaps.

So the v2 patch should be safe, though we can make that a little clearer
with the below, which I'll fold into v3.

Thanks,
Mark.

---->8----diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5e19165..28bf4ea 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -356,8 +357,10 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 {
        /*
         * Call initialization code, and switch to the full blown
-        * HYP code.
+        * HYP code. If the cpucaps haven't been finialized yet,
+        * something has gone very wrong, and hyp will crash and burn.
         */
+       BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
        __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }


diff mbox

Patch

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8a7ff73..428ee1f 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -115,6 +115,7 @@  struct arm64_cpu_capabilities {
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -124,7 +125,7 @@  static inline bool cpu_have_feature(unsigned int num)
 }
 
 /* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
@@ -138,6 +139,14 @@  static inline bool cpus_have_cap(unsigned int num)
 	return test_bit(num, cpu_hwcaps);
 }
 
+static inline bool cpus_have_const_cap(int num)
+{
+	if (static_branch_likely(&arm64_const_caps_ready))
+		return __cpus_have_const_cap(num);
+	else
+		return cpus_have_cap(num);
+}
+
 static inline void cpus_set_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS) {
@@ -145,7 +154,6 @@  static inline void cpus_set_cap(unsigned int num)
 			num, ARM64_NCAPS);
 	} else {
 		__set_bit(num, cpu_hwcaps);
-		static_branch_enable_cpuslocked(&cpu_hwcap_keys[num]);
 	}
 }
 
@@ -223,7 +231,6 @@  void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 void check_local_cpu_capabilities(void);
 
 void update_cpu_errata_workarounds(void);
-void update_cpu_errata_workarounds_cpuslocked(void);
 void __init enable_errata_workarounds(void);
 void verify_local_cpu_errata_workarounds(void);
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 57d60fa..2ed2a76 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -190,16 +190,9 @@  void verify_local_cpu_errata_workarounds(void)
 		}
 }
 
-void update_cpu_errata_workarounds_cpuslocked(void)
-{
-	update_cpu_capabilities(arm64_errata, "enabling workaround for");
-}
-
 void update_cpu_errata_workarounds(void)
 {
-	get_online_cpus();
-	update_cpu_errata_workarounds_cpuslocked();
-	put_online_cpus();
+	update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
 
 void __init enable_errata_workarounds(void)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 803afae..4a89f59 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -986,8 +986,16 @@  void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  */
 void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
-	for (; caps->matches; caps++)
-		if (caps->enable && cpus_have_cap(caps->capability))
+	for (; caps->matches; caps++) {
+		unsigned int num = caps->capability;
+
+		if (!cpus_have_cap(num))
+			continue;
+
+		/* Ensure cpus_have_const_cap(num) works */
+		static_branch_enable(&cpu_hwcap_keys[num]);
+
+		if (caps->enable) {
 			/*
 			 * Use stop_machine() as it schedules the work allowing
 			 * us to modify PSTATE, instead of on_each_cpu() which
@@ -995,6 +1003,8 @@  void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 			 * we return.
 			 */
 			stop_machine(caps->enable, NULL, cpu_online_mask);
+		}
+	}
 }
 
 /*
@@ -1086,7 +1096,7 @@  void check_local_cpu_capabilities(void)
 	 * advertised capabilities.
 	 */
 	if (!sys_caps_initialised)
-		update_cpu_errata_workarounds_cpuslocked();
+		update_cpu_errata_workarounds();
 	else
 		verify_local_cpu_capabilities();
 }
@@ -1099,6 +1109,14 @@  static void __init setup_feature_capabilities(void)
 	enable_cpu_capabilities(arm64_features);
 }
 
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+	static_branch_enable(&arm64_const_caps_ready);
+}
+
 /*
  * Check if the current CPU has a given feature capability.
  * Should be called from non-preemptible context.
@@ -1134,6 +1152,7 @@  void __init setup_cpu_features(void)
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
 	enable_errata_workarounds();
+	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())