diff mbox series

[v5,03/27] arm64: alternative: Apply alternatives early in boot process

Message ID 1535471497-38854-4-git-send-email-julien.thierry@arm.com
State New
Headers show
Series None | expand

Commit Message

Julien Thierry Aug. 28, 2018, 3:51 p.m. UTC
From: Daniel Thompson <daniel.thompson@linaro.org>


Currently alternatives are applied very late in the boot process (and
a long time after we enable scheduling). Some alternative sequences,
such as those that alter the way CPU context is stored, must be applied
much earlier in the boot sequence.

Introduce apply_boot_alternatives() to allow some alternatives to be
applied immediately after we detect the CPU features of the boot CPU.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>

[julien.thierry@arm.com: rename to fit new cpufeature framework better,
			 apply BOOT_SCOPE feature early in boot]
Signed-off-by: Julien Thierry <julien.thierry@arm.com>

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Christoffer Dall <christoffer.dall@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 arch/arm64/include/asm/alternative.h |  3 +--
 arch/arm64/include/asm/cpufeature.h  |  2 ++
 arch/arm64/kernel/alternative.c      | 28 +++++++++++++++++++++++++---
 arch/arm64/kernel/cpufeature.c       |  5 +++++
 arch/arm64/kernel/smp.c              |  7 +++++++
 5 files changed, 40 insertions(+), 5 deletions(-)

-- 
1.9.1

Comments

James Morse Sept. 12, 2018, 10:29 a.m. UTC | #1
Hi Julien,

On 28/08/18 16:51, Julien Thierry wrote:
> From: Daniel Thompson <daniel.thompson@linaro.org>

> 

> Currently alternatives are applied very late in the boot process (and

> a long time after we enable scheduling). Some alternative sequences,

> such as those that alter the way CPU context is stored, must be applied

> much earlier in the boot sequence.

> 

> Introduce apply_boot_alternatives() to allow some alternatives to be

> applied immediately after we detect the CPU features of the boot CPU.



> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c

> index b5d6039..70c2604 100644

> --- a/arch/arm64/kernel/alternative.c

> +++ b/arch/arm64/kernel/alternative.c

> @@ -145,7 +145,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)

>  	} while (cur += d_size, cur < end);

>  }

>  

> -static void __apply_alternatives(void *alt_region, bool is_module)

> +static void __apply_alternatives(void *alt_region,  bool is_module,

> +				 unsigned long feature_mask)


Shouldn't feature_mask be a DECLARE_BITMAP() maybe-array like cpu_hwcaps?
This means it keeps working when NR_CAPS grows over 64, which might happen
sooner than we think for backported errata...


> @@ -155,6 +156,9 @@ static void __apply_alternatives(void *alt_region, bool is_module)

>  	for (alt = region->begin; alt < region->end; alt++) {

>  		int nr_inst;

>  

> +		if ((BIT(alt->cpufeature) & feature_mask) == 0)

> +			continue;

> +

>  		/* Use ARM64_CB_PATCH as an unconditional patch */

>  		if (alt->cpufeature < ARM64_CB_PATCH &&

>  		    !cpus_have_cap(alt->cpufeature))

> @@ -213,7 +217,7 @@ static int __apply_alternatives_multi_stop(void *unused)

>  		isb();

>  	} else {

>  		BUG_ON(alternatives_applied);

> -		__apply_alternatives(&region, false);

> +		__apply_alternatives(&region, false, ~boot_capabilities);


Ah, this is tricky. There is a bitmap_complement() for the DECLARE_BITMAP()
stuff, but we'd need a second array...

We could pass the scope around, but then __apply_alternatives() would need to
lookup the struct arm64_cpu_capabilities up every time. This is only a problem
as we have one cap-number-space for errata/features, but separate sparse lists.

(I think applying the alternatives one cap at a time is a bad idea as we would
need to walk the alternative region NR_CAPS times)


> @@ -227,6 +231,24 @@ void __init apply_alternatives_all(void)

>  	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);

>  }

>  

> +/*

> + * This is called very early in the boot process (directly after we run

> + * a feature detect on the boot CPU). No need to worry about other CPUs

> + * here.

> + */

> +void __init apply_boot_alternatives(void)

> +{

> +	struct alt_region region = {

> +		.begin	= (struct alt_instr *)__alt_instructions,

> +		.end	= (struct alt_instr *)__alt_instructions_end,

> +	};

> +

> +	/* If called on non-boot cpu things could go wrong */

> +	WARN_ON(smp_processor_id() != 0);


Isn't the problem if there are multiple CPUs online?


> +	__apply_alternatives(&region, false, boot_capabilities);

> +}

> +

>  #ifdef CONFIG_MODULES

>  void apply_alternatives_module(void *start, size_t length)

>  {


> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

> index 3bc1c8b..0d1e41e 100644

> --- a/arch/arm64/kernel/cpufeature.c

> +++ b/arch/arm64/kernel/cpufeature.c

> @@ -52,6 +52,8 @@

>  DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

>  EXPORT_SYMBOL(cpu_hwcaps);

>  

> +unsigned long boot_capabilities;

> +

>  /*

>   * Flag to indicate if we have computed the system wide

>   * capabilities based on the boot time active CPUs. This

> @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

>  		if (!cpus_have_cap(caps->capability) && caps->desc)

>  			pr_info("%s %s\n", info, caps->desc);

>  		cpus_set_cap(caps->capability);


Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in
__apply_alternatives() looks at. If you had a call to __apply_alternatives after
update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only
apply those alternatives...

(I don't think there is a problem re-applying the same alternative, but I
haven't checked).


> +

> +		if (caps->type & SCOPE_BOOT_CPU)

> +			__set_bit(caps->capability, &boot_capabilities);

>  	}

>  }



Thanks,

James
Julien Thierry Sept. 12, 2018, 4:49 p.m. UTC | #2
Hi James,

On 12/09/18 11:29, James Morse wrote:
> Hi Julien,

> 

> On 28/08/18 16:51, Julien Thierry wrote:

>> From: Daniel Thompson <daniel.thompson@linaro.org>

>>

>> Currently alternatives are applied very late in the boot process (and

>> a long time after we enable scheduling). Some alternative sequences,

>> such as those that alter the way CPU context is stored, must be applied

>> much earlier in the boot sequence.

>>

>> Introduce apply_boot_alternatives() to allow some alternatives to be

>> applied immediately after we detect the CPU features of the boot CPU.

> 

> 

>> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c

>> index b5d6039..70c2604 100644

>> --- a/arch/arm64/kernel/alternative.c

>> +++ b/arch/arm64/kernel/alternative.c

>> @@ -145,7 +145,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)

>>   	} while (cur += d_size, cur < end);

>>   }

>>   

>> -static void __apply_alternatives(void *alt_region, bool is_module)

>> +static void __apply_alternatives(void *alt_region,  bool is_module,

>> +				 unsigned long feature_mask)

> 

> Shouldn't feature_mask be a DECLARE_BITMAP() maybe-array like cpu_hwcaps?

> This means it keeps working when NR_CAPS grows over 64, which might happen

> sooner than we think for backported errata...

> 

> 

>> @@ -155,6 +156,9 @@ static void __apply_alternatives(void *alt_region, bool is_module)

>>   	for (alt = region->begin; alt < region->end; alt++) {

>>   		int nr_inst;

>>   

>> +		if ((BIT(alt->cpufeature) & feature_mask) == 0)

>> +			continue;

>> +

>>   		/* Use ARM64_CB_PATCH as an unconditional patch */

>>   		if (alt->cpufeature < ARM64_CB_PATCH &&

>>   		    !cpus_have_cap(alt->cpufeature))

>> @@ -213,7 +217,7 @@ static int __apply_alternatives_multi_stop(void *unused)

>>   		isb();

>>   	} else {

>>   		BUG_ON(alternatives_applied);

>> -		__apply_alternatives(&region, false);

>> +		__apply_alternatives(&region, false, ~boot_capabilities);

> 

> Ah, this is tricky. There is a bitmap_complement() for the DECLARE_BITMAP()

> stuff, but we'd need a second array...

> 

> We could pass the scope around, but then __apply_alternatives() would need to

> lookup the struct arm64_cpu_capabilities up every time. This is only a problem

> as we have one cap-number-space for errata/features, but separate sparse lists.

> 


Since for each alternative we know the cpufeature associated with it, 
the "lookup" is really just accessing an array with the given index, so 
that could be an option.

> (I think applying the alternatives one cap at a time is a bad idea as we would

> need to walk the alternative region NR_CAPS times)

> 

> 

>> @@ -227,6 +231,24 @@ void __init apply_alternatives_all(void)

>>   	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);

>>   }

>>   

>> +/*

>> + * This is called very early in the boot process (directly after we run

>> + * a feature detect on the boot CPU). No need to worry about other CPUs

>> + * here.

>> + */

>> +void __init apply_boot_alternatives(void)

>> +{

>> +	struct alt_region region = {

>> +		.begin	= (struct alt_instr *)__alt_instructions,

>> +		.end	= (struct alt_instr *)__alt_instructions_end,

>> +	};

>> +

>> +	/* If called on non-boot cpu things could go wrong */

>> +	WARN_ON(smp_processor_id() != 0);

> 

> Isn't the problem if there are multiple CPUs online?

> 


Yes, that makes more sense. I'll change this.

> 

>> +	__apply_alternatives(&region, false, boot_capabilities);

>> +}

>> +

>>   #ifdef CONFIG_MODULES

>>   void apply_alternatives_module(void *start, size_t length)

>>   {

> 

>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

>> index 3bc1c8b..0d1e41e 100644

>> --- a/arch/arm64/kernel/cpufeature.c

>> +++ b/arch/arm64/kernel/cpufeature.c

>> @@ -52,6 +52,8 @@

>>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

>>   EXPORT_SYMBOL(cpu_hwcaps);

>>   

>> +unsigned long boot_capabilities;

>> +

>>   /*

>>    * Flag to indicate if we have computed the system wide

>>    * capabilities based on the boot time active CPUs. This

>> @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

>>   		if (!cpus_have_cap(caps->capability) && caps->desc)

>>   			pr_info("%s %s\n", info, caps->desc);

>>   		cpus_set_cap(caps->capability);

> 

> Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in

> __apply_alternatives() looks at. If you had a call to __apply_alternatives after

> update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only

> apply those alternatives...

> 

> (I don't think there is a problem re-applying the same alternative, but I

> haven't checked).

> 


Interesting idea. If someone can confirm that patching alternatives 
twice is safe, I think it would make things simpler.

Thanks,

-- 
Julien Thierry
Daniel Thompson Sept. 17, 2018, 11:44 p.m. UTC | #3
On Wed, Sep 12, 2018 at 05:49:09PM +0100, Julien Thierry wrote:
> > > +	__apply_alternatives(&region, false, boot_capabilities);

> > > +}

> > > +

> > >   #ifdef CONFIG_MODULES

> > >   void apply_alternatives_module(void *start, size_t length)

> > >   {

> > 

> > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

> > > index 3bc1c8b..0d1e41e 100644

> > > --- a/arch/arm64/kernel/cpufeature.c

> > > +++ b/arch/arm64/kernel/cpufeature.c

> > > @@ -52,6 +52,8 @@

> > >   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

> > >   EXPORT_SYMBOL(cpu_hwcaps);

> > > +unsigned long boot_capabilities;

> > > +

> > >   /*

> > >    * Flag to indicate if we have computed the system wide

> > >    * capabilities based on the boot time active CPUs. This

> > > @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

> > >   		if (!cpus_have_cap(caps->capability) && caps->desc)

> > >   			pr_info("%s %s\n", info, caps->desc);

> > >   		cpus_set_cap(caps->capability);

> > 

> > Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in

> > __apply_alternatives() looks at. If you had a call to __apply_alternatives after

> > update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only

> > apply those alternatives...

> > 

> > (I don't think there is a problem re-applying the same alternative, but I

> > haven't checked).

> > 

> 

> Interesting idea. If someone can confirm that patching alternatives twice is

> safe, I think it would make things simpler.


Early versions of this patch applied the alternatives twice. I never
noticed any problems with double patching (second time round it will
write out code that is identical to what is already there so it is
merely inefficient rather than unsafe.


Daniel.
Julien Thierry Sept. 18, 2018, 7:37 a.m. UTC | #4
On 18/09/18 00:44, Daniel Thompson wrote:
> On Wed, Sep 12, 2018 at 05:49:09PM +0100, Julien Thierry wrote:

>>>> +	__apply_alternatives(&region, false, boot_capabilities);

>>>> +}

>>>> +

>>>>    #ifdef CONFIG_MODULES

>>>>    void apply_alternatives_module(void *start, size_t length)

>>>>    {

>>>

>>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

>>>> index 3bc1c8b..0d1e41e 100644

>>>> --- a/arch/arm64/kernel/cpufeature.c

>>>> +++ b/arch/arm64/kernel/cpufeature.c

>>>> @@ -52,6 +52,8 @@

>>>>    DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

>>>>    EXPORT_SYMBOL(cpu_hwcaps);

>>>> +unsigned long boot_capabilities;

>>>> +

>>>>    /*

>>>>     * Flag to indicate if we have computed the system wide

>>>>     * capabilities based on the boot time active CPUs. This

>>>> @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

>>>>    		if (!cpus_have_cap(caps->capability) && caps->desc)

>>>>    			pr_info("%s %s\n", info, caps->desc);

>>>>    		cpus_set_cap(caps->capability);

>>>

>>> Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in

>>> __apply_alternatives() looks at. If you had a call to __apply_alternatives after

>>> update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only

>>> apply those alternatives...

>>>

>>> (I don't think there is a problem re-applying the same alternative, but I

>>> haven't checked).

>>>

>>

>> Interesting idea. If someone can confirm that patching alternatives twice is

>> safe, I think it would make things simpler.

> 

> Early versions of this patch applied the alternatives twice. I never

> noticed any problems with double patching (second time round it will

> write out code that is identical to what is already there so it is

> merely inefficient rather than unsafe.

> 


When you say early version, do you mean the first ones you did? Because 
the one I picked up (v4 I believe) had a feature mask to select which 
ones to apply early and then which ones to exclude when applying the 
rest of the features.

But I admit I have not looked at the previous versions.

Cheers,

-- 
Julien Thierry
James Morse Sept. 18, 2018, 5:47 p.m. UTC | #5
Hi Daniel, Julien,

On 09/18/2018 12:44 AM, Daniel Thompson wrote:
> On Wed, Sep 12, 2018 at 05:49:09PM +0100, Julien Thierry wrote:

>>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

>>>> index 3bc1c8b..0d1e41e 100644

>>>> --- a/arch/arm64/kernel/cpufeature.c

>>>> +++ b/arch/arm64/kernel/cpufeature.c

>>>> @@ -52,6 +52,8 @@

>>>>    DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

>>>>    EXPORT_SYMBOL(cpu_hwcaps);

>>>> +unsigned long boot_capabilities;

>>>> +

>>>>    /*

>>>>     * Flag to indicate if we have computed the system wide

>>>>     * capabilities based on the boot time active CPUs. This

>>>> @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

>>>>    		if (!cpus_have_cap(caps->capability) && caps->desc)

>>>>    			pr_info("%s %s\n", info, caps->desc);

>>>>    		cpus_set_cap(caps->capability);

>>>

>>> Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in

>>> __apply_alternatives() looks at. If you had a call to __apply_alternatives after

>>> update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only

>>> apply those alternatives...

>>>

>>> (I don't think there is a problem re-applying the same alternative, but I

>>> haven't checked).


>> Interesting idea. If someone can confirm that patching alternatives twice is

>> safe, I think it would make things simpler.


Sounds good, I think we need to avoid adding a limit to the number of caps.

The extra-work is inefficient, but if it saves merging those lists as part of this 
series its probably fine. (we only do this stuff once during boot)



> Early versions of this patch applied the alternatives twice. I never

> noticed any problems with double patching (second time round it will

> write out code that is identical to what is already there so it is

> merely inefficient rather than unsafe.


For the regular kind, I agree. But we've recently grown some fancy dynamic patching 
where the code is generated at runtime, instead of swapping in an alternative 
sequence. Details in commit dea5e2a4 ("arm64: alternatives: Add dynamic patching 
feature"). Its unlikely we would ever apply these twice as they can't have a scope, 
... and they all look safe.


Thanks,

James
Marc Zyngier Sept. 21, 2018, 4:05 p.m. UTC | #6
On Wed, 12 Sep 2018 17:49:09 +0100,
Julien Thierry <julien.thierry@arm.com> wrote:
> 

> Hi James,

> 

> On 12/09/18 11:29, James Morse wrote:

> > Hi Julien,

> > 

> > On 28/08/18 16:51, Julien Thierry wrote:

> >> From: Daniel Thompson <daniel.thompson@linaro.org>

> >> 

> >> Currently alternatives are applied very late in the boot process (and

> >> a long time after we enable scheduling). Some alternative sequences,

> >> such as those that alter the way CPU context is stored, must be applied

> >> much earlier in the boot sequence.

> >> 

> >> Introduce apply_boot_alternatives() to allow some alternatives to be

> >> applied immediately after we detect the CPU features of the boot CPU.

> > 

> > 

> >> diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c

> >> index b5d6039..70c2604 100644

> >> --- a/arch/arm64/kernel/alternative.c

> >> +++ b/arch/arm64/kernel/alternative.c

> >> @@ -145,7 +145,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)

> >>   	} while (cur += d_size, cur < end);

> >>   }

> >>   -static void __apply_alternatives(void *alt_region, bool

> >> is_module)

> >> +static void __apply_alternatives(void *alt_region,  bool is_module,

> >> +				 unsigned long feature_mask)

> > 

> > Shouldn't feature_mask be a DECLARE_BITMAP() maybe-array like cpu_hwcaps?

> > This means it keeps working when NR_CAPS grows over 64, which might happen

> > sooner than we think for backported errata...

> > 

> > 

> >> @@ -155,6 +156,9 @@ static void __apply_alternatives(void *alt_region, bool is_module)

> >>   	for (alt = region->begin; alt < region->end; alt++) {

> >>   		int nr_inst;

> >>   +		if ((BIT(alt->cpufeature) & feature_mask) == 0)

> >> +			continue;

> >> +

> >>   		/* Use ARM64_CB_PATCH as an unconditional patch */

> >>   		if (alt->cpufeature < ARM64_CB_PATCH &&

> >>   		    !cpus_have_cap(alt->cpufeature))

> >> @@ -213,7 +217,7 @@ static int __apply_alternatives_multi_stop(void *unused)

> >>   		isb();

> >>   	} else {

> >>   		BUG_ON(alternatives_applied);

> >> -		__apply_alternatives(&region, false);

> >> +		__apply_alternatives(&region, false, ~boot_capabilities);

> > 

> > Ah, this is tricky. There is a bitmap_complement() for the DECLARE_BITMAP()

> > stuff, but we'd need a second array...

> > 

> > We could pass the scope around, but then __apply_alternatives() would need to

> > lookup the struct arm64_cpu_capabilities up every time. This is only a problem

> > as we have one cap-number-space for errata/features, but separate sparse lists.

> > 

> 

> Since for each alternative we know the cpufeature associated with it,

> the "lookup" is really just accessing an array with the given index,

> so that could be an option.

> 

> > (I think applying the alternatives one cap at a time is a bad idea as we would

> > need to walk the alternative region NR_CAPS times)

> > 

> > 

> >> @@ -227,6 +231,24 @@ void __init apply_alternatives_all(void)

> >>   	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);

> >>   }

> >>   +/*

> >> + * This is called very early in the boot process (directly after we run

> >> + * a feature detect on the boot CPU). No need to worry about other CPUs

> >> + * here.

> >> + */

> >> +void __init apply_boot_alternatives(void)

> >> +{

> >> +	struct alt_region region = {

> >> +		.begin	= (struct alt_instr *)__alt_instructions,

> >> +		.end	= (struct alt_instr *)__alt_instructions_end,

> >> +	};

> >> +

> >> +	/* If called on non-boot cpu things could go wrong */

> >> +	WARN_ON(smp_processor_id() != 0);

> > 

> > Isn't the problem if there are multiple CPUs online?

> > 

> 

> Yes, that makes more sense. I'll change this.

> 

> > 

> >> +	__apply_alternatives(&region, false, boot_capabilities);

> >> +}

> >> +

> >>   #ifdef CONFIG_MODULES

> >>   void apply_alternatives_module(void *start, size_t length)

> >>   {

> > 

> >> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

> >> index 3bc1c8b..0d1e41e 100644

> >> --- a/arch/arm64/kernel/cpufeature.c

> >> +++ b/arch/arm64/kernel/cpufeature.c

> >> @@ -52,6 +52,8 @@

> >>   DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);

> >>   EXPORT_SYMBOL(cpu_hwcaps);

> >>   +unsigned long boot_capabilities;

> >> +

> >>   /*

> >>    * Flag to indicate if we have computed the system wide

> >>    * capabilities based on the boot time active CPUs. This

> >> @@ -1375,6 +1377,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,

> >>   		if (!cpus_have_cap(caps->capability) && caps->desc)

> >>   			pr_info("%s %s\n", info, caps->desc);

> >>   		cpus_set_cap(caps->capability);

> > 

> > Hmm, the bitmap behind cpus_set_cap() is what cpus_have_cap() in

> > __apply_alternatives() looks at. If you had a call to __apply_alternatives after

> > update_cpu_capabilities(SCOPE_BOOT_CPU), but before any others, it would only

> > apply those alternatives...

> > 

> > (I don't think there is a problem re-applying the same alternative, but I

> > haven't checked).

> > 

> 

> Interesting idea. If someone can confirm that patching alternatives

> twice is safe, I think it would make things simpler.


It may not be safe for dynamic alternatives, where the patch code is
generated at runtime and may rely on the original text (to extract a
register number, for example -- see kvm_update_va_mask).

Thanks,

	M.

-- 
Jazz is not dead, it just smell funny.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4b650ec..17f4554 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -14,8 +14,6 @@ 
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 
-extern int alternatives_applied;
-
 struct alt_instr {
 	s32 orig_offset;	/* offset to original instruction */
 	s32 alt_offset;		/* offset to replacement instruction */
@@ -27,6 +25,7 @@  struct alt_instr {
 typedef void (*alternative_cb_t)(struct alt_instr *alt,
 				 __le32 *origptr, __le32 *updptr, int nr_inst);
 
+void __init apply_boot_alternatives(void);
 void __init apply_alternatives_all(void);
 
 #ifdef CONFIG_MODULES
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1717ba1..e6c030a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -357,6 +357,8 @@  static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
 
+extern unsigned long boot_capabilities;
+
 bool this_cpu_has_cap(unsigned int cap);
 
 static inline bool cpu_have_feature(unsigned int num)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index b5d6039..70c2604 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -145,7 +145,8 @@  static void clean_dcache_range_nopatch(u64 start, u64 end)
 	} while (cur += d_size, cur < end);
 }
 
-static void __apply_alternatives(void *alt_region, bool is_module)
+static void __apply_alternatives(void *alt_region,  bool is_module,
+				 unsigned long feature_mask)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
@@ -155,6 +156,9 @@  static void __apply_alternatives(void *alt_region, bool is_module)
 	for (alt = region->begin; alt < region->end; alt++) {
 		int nr_inst;
 
+		if ((BIT(alt->cpufeature) & feature_mask) == 0)
+			continue;
+
 		/* Use ARM64_CB_PATCH as an unconditional patch */
 		if (alt->cpufeature < ARM64_CB_PATCH &&
 		    !cpus_have_cap(alt->cpufeature))
@@ -213,7 +217,7 @@  static int __apply_alternatives_multi_stop(void *unused)
 		isb();
 	} else {
 		BUG_ON(alternatives_applied);
-		__apply_alternatives(&region, false);
+		__apply_alternatives(&region, false, ~boot_capabilities);
 		/* Barriers provided by the cache flushing */
 		WRITE_ONCE(alternatives_applied, 1);
 	}
@@ -227,6 +231,24 @@  void __init apply_alternatives_all(void)
 	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
 }
 
+/*
+ * This is called very early in the boot process (directly after we run
+ * a feature detect on the boot CPU). No need to worry about other CPUs
+ * here.
+ */
+void __init apply_boot_alternatives(void)
+{
+	struct alt_region region = {
+		.begin	= (struct alt_instr *)__alt_instructions,
+		.end	= (struct alt_instr *)__alt_instructions_end,
+	};
+
+	/* If called on non-boot cpu things could go wrong */
+	WARN_ON(smp_processor_id() != 0);
+
+	__apply_alternatives(&region, false, boot_capabilities);
+}
+
 #ifdef CONFIG_MODULES
 void apply_alternatives_module(void *start, size_t length)
 {
@@ -235,6 +257,6 @@  void apply_alternatives_module(void *start, size_t length)
 		.end	= start + length,
 	};
 
-	__apply_alternatives(&region, true);
+	__apply_alternatives(&region, true, -1);
 }
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3bc1c8b..0d1e41e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -52,6 +52,8 @@ 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcaps);
 
+unsigned long boot_capabilities;
+
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
@@ -1375,6 +1377,9 @@  static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 		if (!cpus_have_cap(caps->capability) && caps->desc)
 			pr_info("%s %s\n", info, caps->desc);
 		cpus_set_cap(caps->capability);
+
+		if (caps->type & SCOPE_BOOT_CPU)
+			__set_bit(caps->capability, &boot_capabilities);
 	}
 }
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 25fcd22..22c9a0a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -414,6 +414,13 @@  void __init smp_prepare_boot_cpu(void)
 	 */
 	jump_label_init();
 	cpuinfo_store_boot_cpu();
+
+	/*
+	 * We now know enough about the boot CPU to apply the
+	 * alternatives that cannot wait until interrupt handling
+	 * and/or scheduling is enabled.
+	 */
+	apply_boot_alternatives();
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)