Message ID | 9dba462b4d09a1a8a9fbb75740b74bf91a09a3e1.1623825725.git.viresh.kumar@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | cpufreq: cppc: Add support for frequency invariance | expand |
On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote: > Currently topology_scale_freq_tick() may end up using a pointer to > struct scale_freq_data, which was previously cleared by > topology_clear_scale_freq_source(), as there is no protection in place > here. The users of topology_clear_scale_freq_source() though needs a > guarantee that the previous scale_freq_data isn't used anymore. > > Since topology_scale_freq_tick() is called from scheduler tick, we don't > want to add locking in there. Use the RCU update mechanism instead > (which is already used by the scheduler's utilization update path) to > guarantee race free updates here. > > Cc: Paul E. McKenney <paulmck@kernel.org> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> So this is a bugfix for problems in the current codebase? What commit does this fix? Should it go to the stable kernels? > --- > drivers/base/arch_topology.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-) > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > index c1179edc0f3b..921312a8d957 100644 > --- a/drivers/base/arch_topology.c > +++ b/drivers/base/arch_topology.c > @@ -18,10 +18,11 @@ > #include <linux/cpumask.h> > #include <linux/init.h> > #include <linux/percpu.h> > +#include <linux/rcupdate.h> > #include <linux/sched.h> > #include <linux/smp.h> > > -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); > +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); > static struct cpumask scale_freq_counters_mask; > static bool scale_freq_invariant; > > @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data, > if (cpumask_empty(&scale_freq_counters_mask)) > scale_freq_invariant = topology_scale_freq_invariant(); > > + rcu_read_lock(); > + > for_each_cpu(cpu, cpus) { > - sfd = per_cpu(sft_data, cpu); > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > /* Use ARCH provided counters whenever possible */ > if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { > - per_cpu(sft_data, cpu) = data; > + rcu_assign_pointer(per_cpu(sft_data, cpu), data); > cpumask_set_cpu(cpu, &scale_freq_counters_mask); > } > } > > + rcu_read_unlock(); > + > update_scale_freq_invariant(true); > } > EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); > @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source, > struct scale_freq_data *sfd; > int cpu; > > + rcu_read_lock(); > + > for_each_cpu(cpu, cpus) { > - sfd = per_cpu(sft_data, cpu); > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > if (sfd && sfd->source == source) { > - per_cpu(sft_data, cpu) = NULL; > + rcu_assign_pointer(per_cpu(sft_data, cpu), NULL); > cpumask_clear_cpu(cpu, &scale_freq_counters_mask); > } > } > > + rcu_read_unlock(); > + > + /* > + * Make sure all references to previous sft_data are dropped to avoid > + * use-after-free races. > + */ > + synchronize_rcu(); What race is happening? How could the current code race? Only when a cpu is removed? thanks, greg k-h
On 16-06-21, 09:57, Greg Kroah-Hartman wrote: > On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote: > > Currently topology_scale_freq_tick() may end up using a pointer to > > struct scale_freq_data, which was previously cleared by > > topology_clear_scale_freq_source(), as there is no protection in place > > here. The users of topology_clear_scale_freq_source() though needs a > > guarantee that the previous scale_freq_data isn't used anymore. > > > > Since topology_scale_freq_tick() is called from scheduler tick, we don't > > want to add locking in there. Use the RCU update mechanism instead > > (which is already used by the scheduler's utilization update path) to > > guarantee race free updates here. > > > > Cc: Paul E. McKenney <paulmck@kernel.org> > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > So this is a bugfix for problems in the current codebase? What commit > does this fix? Should it go to the stable kernels? There is only one user of topology_clear_scale_freq_source() (cppc-cpufreq driver, which is already reverted in pm/linux-next). So in the upcoming 5.13 kernel release, there will be no one using this API and so no one will break. And so I skipped the fixes tag, I can add it though. > > --- > > drivers/base/arch_topology.c | 27 +++++++++++++++++++++------ > > 1 file changed, 21 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > > index c1179edc0f3b..921312a8d957 100644 > > --- a/drivers/base/arch_topology.c > > +++ b/drivers/base/arch_topology.c > > @@ -18,10 +18,11 @@ > > #include <linux/cpumask.h> > > #include <linux/init.h> > > #include <linux/percpu.h> > > +#include <linux/rcupdate.h> > > #include <linux/sched.h> > > #include <linux/smp.h> > > > > -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); > > +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); > > static struct cpumask scale_freq_counters_mask; > > static bool scale_freq_invariant; > > > > @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data, > > if (cpumask_empty(&scale_freq_counters_mask)) > > scale_freq_invariant = topology_scale_freq_invariant(); > > > > + rcu_read_lock(); > > + > > for_each_cpu(cpu, cpus) { > > - sfd = per_cpu(sft_data, cpu); > > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > > > /* Use ARCH provided counters whenever possible */ > > if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { > > - per_cpu(sft_data, cpu) = data; > > + rcu_assign_pointer(per_cpu(sft_data, cpu), data); > > cpumask_set_cpu(cpu, &scale_freq_counters_mask); > > } > > } > > > > + rcu_read_unlock(); > > + > > update_scale_freq_invariant(true); > > } > > EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); > > @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source, > > struct scale_freq_data *sfd; > > int cpu; > > > > + rcu_read_lock(); > > + > > for_each_cpu(cpu, cpus) { > > - sfd = per_cpu(sft_data, cpu); > > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > > > if (sfd && sfd->source == source) { > > - per_cpu(sft_data, cpu) = NULL; > > + rcu_assign_pointer(per_cpu(sft_data, cpu), NULL); > > cpumask_clear_cpu(cpu, &scale_freq_counters_mask); > > } > > } > > > > + rcu_read_unlock(); > > + > > + /* > > + * Make sure all references to previous sft_data are dropped to avoid > > + * use-after-free races. > > + */ > > + synchronize_rcu(); > > What race is happening? How could the current code race? Only when a > cpu is removed? topology_scale_freq_tick() is called by the scheduler for each CPU from scheduler_tick(). It is possible that topology_scale_freq_tick() ends up using an older copy of sft_data pointer, while it is being removed by topology_clear_scale_freq_source() because a CPU went away or a cpufreq driver went away, or during normal suspend/resume (where CPUs are hot-unplugged). synchronize_rcu() makes sure that all RCU critical sections that started before it is called, will finish before it returns. And so the callers of topology_clear_scale_freq_source() don't need to worry about their callback getting called anymore. -- viresh
On Wed, Jun 16, 2021 at 01:48:59PM +0530, Viresh Kumar wrote: > On 16-06-21, 09:57, Greg Kroah-Hartman wrote: > > On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote: > > > Currently topology_scale_freq_tick() may end up using a pointer to > > > struct scale_freq_data, which was previously cleared by > > > topology_clear_scale_freq_source(), as there is no protection in place > > > here. The users of topology_clear_scale_freq_source() though needs a > > > guarantee that the previous scale_freq_data isn't used anymore. > > > > > > Since topology_scale_freq_tick() is called from scheduler tick, we don't > > > want to add locking in there. Use the RCU update mechanism instead > > > (which is already used by the scheduler's utilization update path) to > > > guarantee race free updates here. > > > > > > Cc: Paul E. McKenney <paulmck@kernel.org> > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > > > So this is a bugfix for problems in the current codebase? What commit > > does this fix? Should it go to the stable kernels? > > There is only one user of topology_clear_scale_freq_source() > (cppc-cpufreq driver, which is already reverted in pm/linux-next). So > in the upcoming 5.13 kernel release, there will be no one using this > API and so no one will break. > > And so I skipped the fixes tag, I can add it though. It would be nice to have to answer this type of question, otherwise you will have automated scripts trying to backport this to kernels where it does not belong :) thanks, greg k-h
On 16-06-21, 10:31, Greg Kroah-Hartman wrote: > On Wed, Jun 16, 2021 at 01:48:59PM +0530, Viresh Kumar wrote: > > On 16-06-21, 09:57, Greg Kroah-Hartman wrote: > > > On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote: > > > > Currently topology_scale_freq_tick() may end up using a pointer to > > > > struct scale_freq_data, which was previously cleared by > > > > topology_clear_scale_freq_source(), as there is no protection in place > > > > here. The users of topology_clear_scale_freq_source() though needs a > > > > guarantee that the previous scale_freq_data isn't used anymore. > > > > > > > > Since topology_scale_freq_tick() is called from scheduler tick, we don't > > > > want to add locking in there. Use the RCU update mechanism instead > > > > (which is already used by the scheduler's utilization update path) to > > > > guarantee race free updates here. > > > > > > > > Cc: Paul E. McKenney <paulmck@kernel.org> > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > > > > > So this is a bugfix for problems in the current codebase? What commit > > > does this fix? Should it go to the stable kernels? > > > > There is only one user of topology_clear_scale_freq_source() > > (cppc-cpufreq driver, which is already reverted in pm/linux-next). So > > in the upcoming 5.13 kernel release, there will be no one using this > > API and so no one will break. > > > > And so I skipped the fixes tag, I can add it though. > > It would be nice to have to answer this type of question, otherwise you > will have automated scripts trying to backport this to kernels where it > does not belong :) Sure, I will add these to the next version. -- viresh
Hey, On Wednesday 16 Jun 2021 at 12:18:08 (+0530), Viresh Kumar wrote: > Currently topology_scale_freq_tick() may end up using a pointer to > struct scale_freq_data, which was previously cleared by > topology_clear_scale_freq_source(), as there is no protection in place > here. The users of topology_clear_scale_freq_source() though needs a > guarantee that the previous scale_freq_data isn't used anymore. > Please correct me if I'm wrong, but my understanding is that this is only a problem for the cppc cpufreq invariance functionality. Let's consider a scenario where CPUs are either hotplugged out or the cpufreq CPPC driver module is removed; topology_clear_scale_freq_source() would get called and the sfd_data will be set to NULL. But if at the same time topology_scale_freq_tick() got an old reference of sfd_data, set_freq_scale() will be called. This is only a problem for CPPC cpufreq as cppc_scale_freq_tick() will end up using driver internal data that might have been freed during the hotplug callbacks or the exit path. If this is the case, wouldn't the synchronisation issue be better resolved in the CPPC cpufreq driver, rather than here? Thank you, Ionela. > Since topology_scale_freq_tick() is called from scheduler tick, we don't > want to add locking in there. Use the RCU update mechanism instead > (which is already used by the scheduler's utilization update path) to > guarantee race free updates here. > > Cc: Paul E. McKenney <paulmck@kernel.org> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > drivers/base/arch_topology.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-) > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > index c1179edc0f3b..921312a8d957 100644 > --- a/drivers/base/arch_topology.c > +++ b/drivers/base/arch_topology.c > @@ -18,10 +18,11 @@ > #include <linux/cpumask.h> > #include <linux/init.h> > #include <linux/percpu.h> > +#include <linux/rcupdate.h> > #include <linux/sched.h> > #include <linux/smp.h> > > -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); > +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); > static struct cpumask scale_freq_counters_mask; > static bool scale_freq_invariant; > > @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data, > if (cpumask_empty(&scale_freq_counters_mask)) > scale_freq_invariant = topology_scale_freq_invariant(); > > + rcu_read_lock(); > + > for_each_cpu(cpu, cpus) { > - sfd = per_cpu(sft_data, cpu); > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > /* Use ARCH provided counters whenever possible */ > if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { > - per_cpu(sft_data, cpu) = data; > + rcu_assign_pointer(per_cpu(sft_data, cpu), data); > cpumask_set_cpu(cpu, &scale_freq_counters_mask); > } > } > > + rcu_read_unlock(); > + > update_scale_freq_invariant(true); > } > EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); > @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source, > struct scale_freq_data *sfd; > int cpu; > > + rcu_read_lock(); > + > for_each_cpu(cpu, cpus) { > - sfd = per_cpu(sft_data, cpu); > + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); > > if (sfd && sfd->source == source) { > - per_cpu(sft_data, cpu) = NULL; > + rcu_assign_pointer(per_cpu(sft_data, cpu), NULL); > cpumask_clear_cpu(cpu, &scale_freq_counters_mask); > } > } > > + rcu_read_unlock(); > + > + /* > + * Make sure all references to previous sft_data are dropped to avoid > + * use-after-free races. > + */ > + synchronize_rcu(); > + > update_scale_freq_invariant(false); > } > EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source); > > void topology_scale_freq_tick(void) > { > - struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); > + struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data)); > > if (sfd) > sfd->set_freq_scale(); > -- > 2.31.1.272.g89b43f80a514 >
Hi Ionela, On 16-06-21, 12:25, Ionela Voinescu wrote: > Please correct me if I'm wrong, but my understanding is that this is > only a problem for the cppc cpufreq invariance functionality. Let's > consider a scenario where CPUs are either hotplugged out or the cpufreq > CPPC driver module is removed; topology_clear_scale_freq_source() would > get called and the sfd_data will be set to NULL. But if at the same > time topology_scale_freq_tick() got an old reference of sfd_data, > set_freq_scale() will be called. This is only a problem for CPPC cpufreq > as cppc_scale_freq_tick() will end up using driver internal data that > might have been freed during the hotplug callbacks or the exit path. For now, yes, CPPC is the only one affected. > If this is the case, wouldn't the synchronisation issue be better > resolved in the CPPC cpufreq driver, rather than here? Hmm, the way I see it is that topology_clear_scale_freq_source() is an API provided by topology core and the topology core needs to guarantee that it doesn't use the data any longer after topology_clear_scale_freq_source() is called. The same is true for other APIs, like: irq_work_sync(); kthread_cancel_work_sync(); It isn't the user which needs to take this into account, but the API provider. There may be more users of this in the future, lets say another cpufreq driver, and so keeping this synchronization at the API provider is the right thing to do IMHO. And from the user's perspective, like cppc, it doesn't have any control over who is using its callback and how and when. It is very very difficult to provide something like this at the users, redundant anyway. For example cppc won't ever know when topology_scale_freq_tick() has stopped calling its callback. For example this is what cppc driver needs to do now: +static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy, + unsigned int cpu) +{ + struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, cpu); + + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpumask_of(cpu)); + irq_work_sync(&cppc_fi->irq_work); + kthread_cancel_work_sync(&cppc_fi->work); +} The driver uses APIs provided by 3 layers, topology, irq-work, kthread and all must provide these guarantees. A very similar thing is implemented in kernel/sched/cpufreq.c for example. -- viresh
On Wednesday 16 Jun 2021 at 17:06:04 (+0530), Viresh Kumar wrote: > Hi Ionela, > > On 16-06-21, 12:25, Ionela Voinescu wrote: > > Please correct me if I'm wrong, but my understanding is that this is > > only a problem for the cppc cpufreq invariance functionality. Let's > > consider a scenario where CPUs are either hotplugged out or the cpufreq > > CPPC driver module is removed; topology_clear_scale_freq_source() would > > get called and the sfd_data will be set to NULL. But if at the same > > time topology_scale_freq_tick() got an old reference of sfd_data, > > set_freq_scale() will be called. This is only a problem for CPPC cpufreq > > as cppc_scale_freq_tick() will end up using driver internal data that > > might have been freed during the hotplug callbacks or the exit path. > > For now, yes, CPPC is the only one affected. > > > If this is the case, wouldn't the synchronisation issue be better > > resolved in the CPPC cpufreq driver, rather than here? > > Hmm, the way I see it is that topology_clear_scale_freq_source() is an API > provided by topology core and the topology core needs to guarantee that it > doesn't use the data any longer after topology_clear_scale_freq_source() is > called. > > The same is true for other APIs, like: > > irq_work_sync(); > kthread_cancel_work_sync(); > > It isn't the user which needs to take this into account, but the API provider. > I would agree if it wasn't for the fact that the driver provides the set_freq_scale() implementation that ends up using driver internal data which could have been freed by the driver's own .exit()/stop_cpu() callbacks. The API and the generic implementation has the responsibility of making sure of sane access to its own structures. Even if we would want to keep drivers from shooting themselves in the foot, I would prefer we postpone it until we have more users for this, before we add any synchronisation mechanisms to functionality called on the tick. Let's see if there's a less invasive solution to fix CPPC for now, what do you think? Thanks, Ionela. > There may be more users of this in the future, lets say another cpufreq driver, > and so keeping this synchronization at the API provider is the right thing to do > IMHO. > > And from the user's perspective, like cppc, it doesn't have any control over who > is using its callback and how and when. It is very very difficult to provide > something like this at the users, redundant anyway. For example cppc won't ever > know when topology_scale_freq_tick() has stopped calling its callback. > > For example this is what cppc driver needs to do now: > > +static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy, > + unsigned int cpu) > +{ > + struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, cpu); > + > + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpumask_of(cpu)); > + irq_work_sync(&cppc_fi->irq_work); > + kthread_cancel_work_sync(&cppc_fi->work); > +} > > The driver uses APIs provided by 3 layers, topology, irq-work, kthread and all > must provide these guarantees. > > A very similar thing is implemented in kernel/sched/cpufreq.c for example. > > -- > viresh
On 16-06-21, 13:00, Ionela Voinescu wrote: > I would agree if it wasn't for the fact that the driver provides the > set_freq_scale() implementation that ends up using driver internal data > which could have been freed by the driver's own .exit()/stop_cpu() > callbacks. The API and the generic implementation has the responsibility > of making sure of sane access to its own structures. How do you see timer core or workqueue core then ? Why do they make sure they don't end up calling user's function once we ask them not to ? And also scheduler's own util update mechanism, the exact thing happens there. Users (cpufreq governors) call cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook() to pass their own data structure "struct update_util_data", which has ia callback within. This is what happens from scheduler's context in those cases. static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { struct update_util_data *data; data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, cpu_of(rq))); if (data) data->func(data, rq_clock(rq), flags); } Also note that some kind of synchronisation mechanism is indeed required between topology_set_scale_freq_source() and topology_clear_scale_freq_source(), there is no race there today, sure, but this is an API which is made generic. > Even if we would want to keep drivers from shooting themselves in the > foot, I would prefer we postpone it until we have more users for this, > before we add any synchronisation mechanisms to functionality called > on the tick. The rcu mechanism is very much used in the scheduler itself because it is lightweight. Honestly I don't even see any other way (w.r.t. locking) users can fix it at their end. They don't know which was the last tick that used their callback. > Let's see if there's a less invasive solution to fix CPPC for now, what > do you think? For me, this change is required in the API despite how CPPC ends up using it. Else we are failing at defining the API itself IMHO. -- viresh
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index c1179edc0f3b..921312a8d957 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -18,10 +18,11 @@ #include <linux/cpumask.h> #include <linux/init.h> #include <linux/percpu.h> +#include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/smp.h> -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data, if (cpumask_empty(&scale_freq_counters_mask)) scale_freq_invariant = topology_scale_freq_invariant(); + rcu_read_lock(); + for_each_cpu(cpu, cpus) { - sfd = per_cpu(sft_data, cpu); + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); /* Use ARCH provided counters whenever possible */ if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { - per_cpu(sft_data, cpu) = data; + rcu_assign_pointer(per_cpu(sft_data, cpu), data); cpumask_set_cpu(cpu, &scale_freq_counters_mask); } } + rcu_read_unlock(); + update_scale_freq_invariant(true); } EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source, struct scale_freq_data *sfd; int cpu; + rcu_read_lock(); + for_each_cpu(cpu, cpus) { - sfd = per_cpu(sft_data, cpu); + sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu)); if (sfd && sfd->source == source) { - per_cpu(sft_data, cpu) = NULL; + rcu_assign_pointer(per_cpu(sft_data, cpu), NULL); cpumask_clear_cpu(cpu, &scale_freq_counters_mask); } } + rcu_read_unlock(); + + /* + * Make sure all references to previous sft_data are dropped to avoid + * use-after-free races. + */ + synchronize_rcu(); + update_scale_freq_invariant(false); } EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source); void topology_scale_freq_tick(void) { - struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); + struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data)); if (sfd) sfd->set_freq_scale();
Currently topology_scale_freq_tick() may end up using a pointer to struct scale_freq_data, which was previously cleared by topology_clear_scale_freq_source(), as there is no protection in place here. The users of topology_clear_scale_freq_source() though needs a guarantee that the previous scale_freq_data isn't used anymore. Since topology_scale_freq_tick() is called from scheduler tick, we don't want to add locking in there. Use the RCU update mechanism instead (which is already used by the scheduler's utilization update path) to guarantee race free updates here. Cc: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> --- drivers/base/arch_topology.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) -- 2.31.1.272.g89b43f80a514