diff mbox series

[v3,1/3] cpufreq: mediatek: using global lock avoid race condition

Message ID 20250214074353.1169864-2-chun-jen.tseng@mediatek.com
State New
Headers show
Series fixed mediatek-cpufreq has multi policy concurrency issue | expand

Commit Message

Chun-Jen Tseng (曾俊仁) Feb. 14, 2025, 7:43 a.m. UTC
In mtk_cpufreq_set_target() is re-enter function but the mutex lock
decalre in mtk_cpu_dvfs_info structure for each policy. It should
change to global variable for critical session avoid race condition
with 2 or more policy.

add mtk_cpufreq_get() replace cpufreq_generic_get() and use global lock
avoid return wrong clock.

Signed-off-by: Mark Tseng <chun-jen.tseng@mediatek.com>
---
 drivers/cpufreq/mediatek-cpufreq.c | 39 ++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 10 deletions(-)

Comments

Viresh Kumar Feb. 19, 2025, 5:42 a.m. UTC | #1
On 14-02-25, 15:43, Mark Tseng wrote:
> In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> decalre in mtk_cpu_dvfs_info structure for each policy. It should
> change to global variable for critical session avoid race condition
> with 2 or more policy.

And what exactly is the race condition here ? Can you please explain that ?
Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't think there
is any race here.

The lock was introduced earlier to avoid a potential race with notifiers, but it
has nothing to do with calling target simultaneously.

commit c210063b40ac ("cpufreq: mediatek: Add opp notification support")
Dan Carpenter Feb. 19, 2025, 7:23 a.m. UTC | #2
Hi Mark,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
config: sparc-randconfig-r071-20250218 (https://download.01.org/0day-ci/archive/20250219/202502190807.fz6fs2jz-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 14.2.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/

smatch warnings:
drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier() warn: inconsistent returns 'global &mtk_policy_lock'.

vim +367 drivers/cpufreq/mediatek-cpufreq.c

c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  317  static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  318  				    unsigned long event, void *data)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  319  {
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  320  	struct dev_pm_opp *opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  321  	struct dev_pm_opp *new_opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  322  	struct mtk_cpu_dvfs_info *info;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  323  	unsigned long freq, volt;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  324  	struct cpufreq_policy *policy;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  325  	int ret = 0;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  326  
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  327  	mutex_lock(&mtk_policy_lock);
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  328  	opp = data;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  329  	info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  330  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  331  	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  332  		freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  333  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  334  		if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  335  			volt = dev_pm_opp_get_voltage(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  336  			ret = mtk_cpufreq_set_voltage(info, volt);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  337  			if (ret)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  338  				dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  339  					"failed to scale voltage: %d\n", ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  340  		}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  341  	} else if (event == OPP_EVENT_DISABLE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  342  		freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  343  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  344  		/* case of current opp item is disabled */
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  345  		if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  346  			freq = 1;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  347  			new_opp = dev_pm_opp_find_freq_ceil(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  348  							    &freq);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  349  			if (IS_ERR(new_opp)) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  350  				dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  351  					"all opp items are disabled\n");
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  352  				ret = PTR_ERR(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  353  				return notifier_from_errno(ret);

mutex_unlock(&mtk_policy_lock) before returning.

c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  354  			}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  355  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  356  			dev_pm_opp_put(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  357  			policy = cpufreq_cpu_get(info->opp_cpu);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  358  			if (policy) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  359  				cpufreq_driver_target(policy, freq / 1000,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  360  						      CPUFREQ_RELATION_L);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  361  				cpufreq_cpu_put(policy);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  362  			}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  363  		}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  364  	}
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  365  	mutex_unlock(&mtk_policy_lock);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  366  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05 @367  	return notifier_from_errno(ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  368  }
Chun-Jen Tseng (曾俊仁) March 20, 2025, 8:22 a.m. UTC | #3
Hi viresh,

Thanks your review and reply.

The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is
also in this structure. when I have two "policy-0" and "policy-6" use
the same mtk_cpufreq_set_target() function but the info->reg_lock 
is from 2 instance(policy-0 and policy-6). when the policy-0 and
policy-6 call set_target target, the mutex_lock is locked by per-
policy. So, I change to global lock avoid per-policy lock.

BRs,

Mark Tseng

On Wed, 2025-02-19 at 11:12 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> On 14-02-25, 15:43, Mark Tseng wrote:
> > In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> > decalre in mtk_cpu_dvfs_info structure for each policy. It should
> > change to global variable for critical session avoid race condition
> > with 2 or more policy.
> 
> And what exactly is the race condition here ? Can you please explain
> that ?
> Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't
> think there
> is any race here.
> 
> The lock was introduced earlier to avoid a potential race with
> notifiers, but it
> has nothing to do with calling target simultaneously.
> 
> commit c210063b40ac ("cpufreq: mediatek: Add opp notification
> support")
> 
> --
> viresh
Chun-Jen Tseng (曾俊仁) March 20, 2025, 8:25 a.m. UTC | #4
Hi Dan,

Thanks your remind. I will be careful and fix it before submit new
patch.

BRs,

Mark Tseng


On Wed, 2025-02-19 at 10:23 +0300, Dan Carpenter wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> Hi Mark,
> 
> kernel test robot noticed the following build warnings:
> 
> https://urldefense.com/v3/__https://git-scm.com/docs/git-format-patch*_base_tree_information__;Iw!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jViJAkm7uw$
>  ]
> 
> url:   
> https://urldefense.com/v3/__https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVj9Egobdw$
> base:  
> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVg7ujqlQw$
>   linux-next
> patch link:   
> https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
> patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock
> avoid race condition
> config: sparc-randconfig-r071-20250218
> (https://urldefense.com/v3/__https://download.01.org/0day-ci/archive/
> 20250219/202502190807.fz6fs2jz-lkp@intel.com/config__;!!CTRNKA9wMg0AR
> bw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-
> 4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVhUidk_DA$ )
> compiler: sparc64-linux-gcc (GCC) 14.2.0
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new
> version of
> the same patch/commit), kindly add following tags
> > Reported-by: kernel test robot <lkp@intel.com>
> > Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
> > Closes:
> > https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/
> 
> smatch warnings:
> drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier()
> warn: inconsistent returns 'global &mtk_policy_lock'.
> 
> vim +367 drivers/cpufreq/mediatek-cpufreq.c
> 
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  317  static int mtk_cpufreq_opp_notifier(struct notifier_block
> *nb,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  318                                     unsigned long event,
> void *data)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  319  {
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  320         struct dev_pm_opp *opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  321         struct dev_pm_opp *new_opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  322         struct mtk_cpu_dvfs_info *info;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  323         unsigned long freq, volt;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  324         struct cpufreq_policy *policy;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  325         int ret = 0;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  326
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  327         mutex_lock(&mtk_policy_lock);
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  328         opp = data;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  329         info = container_of(nb, struct mtk_cpu_dvfs_info,
> opp_nb);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  330
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  331         if (event == OPP_EVENT_ADJUST_VOLTAGE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  332                 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  333
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  334                 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  335                         volt =
> dev_pm_opp_get_voltage(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  336                         ret =
> mtk_cpufreq_set_voltage(info, volt);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  337                         if (ret)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  338                                 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  339                                         "failed to scale
> voltage: %d\n", ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  340                 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  341         } else if (event == OPP_EVENT_DISABLE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  342                 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  343
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  344                 /* case of current opp item is disabled */
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  345                 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  346                         freq = 1;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  347                         new_opp =
> dev_pm_opp_find_freq_ceil(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05 
> 348                                                            
> &freq);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  349                         if (IS_ERR(new_opp)) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  350                                 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  351                                         "all opp items are
> disabled\n");
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  352                                 ret = PTR_ERR(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  353                                 return
> notifier_from_errno(ret);
> 
> mutex_unlock(&mtk_policy_lock) before returning.
> 
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  354                         }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  355
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  356                         dev_pm_opp_put(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  357                         policy = cpufreq_cpu_get(info-
> >opp_cpu);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  358                         if (policy) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  359                                
> cpufreq_driver_target(policy, freq / 1000,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  360                                                      
> CPUFREQ_RELATION_L);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  361                                 cpufreq_cpu_put(policy);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  362                         }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  363                 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  364         }
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  365         mutex_unlock(&mtk_policy_lock);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  366
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05 @367         return notifier_from_errno(ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  368  }
> 
> --
> 0-DAY CI Kernel Test Service
> https://urldefense.com/v3/__https://github.com/intel/lkp-tests/wiki__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVii1cx6tQ$
>
Viresh Kumar March 21, 2025, 4:56 a.m. UTC | #5
On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote:
> The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is
> also in this structure. when I have two "policy-0" and "policy-6" use
> the same mtk_cpufreq_set_target() function but the info->reg_lock 
> is from 2 instance(policy-0 and policy-6). when the policy-0 and
> policy-6 call set_target target, the mutex_lock is locked by per-
> policy. So, I change to global lock avoid per-policy lock.

Yes, that's what you are doing. I am asking why a global lock is required here ?
I think the per-policy lock is all you need.
Chun-Jen Tseng (曾俊仁) March 21, 2025, 5:32 a.m. UTC | #6
Hi viresh,

I add a global lock related to the CCI driver.

This is because the CCI needs to obtain the frequencies of policy-0 and
policy-6 to determine its own frequency.

If policy-0 and policy-6 are set simultaneously, it may cause the CCI
to select the wrong frequency.

Therefore, I hope to change the setting flow to the following:
   policy-0 or policy-6 -> set frequency -> CCI receives notification -
> set CCI frequency

BRs,

Mark Tseng

On Fri, 2025-03-21 at 10:26 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote:
> > The struct mtk_cpu_dvfs_info instance is per-policy and the
> > reg_lock is
> > also in this structure. when I have two "policy-0" and "policy-6"
> > use
> > the same mtk_cpufreq_set_target() function but the info->reg_lock
> > is from 2 instance(policy-0 and policy-6). when the policy-0 and
> > policy-6 call set_target target, the mutex_lock is locked by per-
> > policy. So, I change to global lock avoid per-policy lock.
> 
> Yes, that's what you are doing. I am asking why a global lock is
> required here ?
> I think the per-policy lock is all you need.
> 
> --
> viresh
Viresh Kumar March 21, 2025, 6:01 a.m. UTC | #7
On 21-03-25, 05:32, Chun-Jen Tseng (曾俊仁) wrote:
> I add a global lock related to the CCI driver.
> 
> This is because the CCI needs to obtain the frequencies of policy-0 and
> policy-6 to determine its own frequency.
> 
> If policy-0 and policy-6 are set simultaneously, it may cause the CCI
> to select the wrong frequency.
> 
> Therefore, I hope to change the setting flow to the following:
>    policy-0 or policy-6 -> set frequency -> CCI receives notification -
> set CCI frequency

Can you please point to the code where this race exists ? I am not
sure I fully understand it as of now. If the race is present in
another driver, CCI ?, then shouldn't it be fixed there instead ?
Chun-Jen Tseng (曾俊仁) April 14, 2025, 8:42 a.m. UTC | #8
Hi Viresh,

The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq
driver.
without global lock, It may choose wrong CCI level and cause system
stall.

I hope this flow is serial setting like, BCPU / LCPU set frequency ->
set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ......

without global lock, it could be LCPU / BCPU set frequency -> set CCI
level(during this time, it may change BCPU / LCPU frequency and cause
system stall.

I also can only do global lock on ccifreq_support SoC.

BRs,

Mark Tseng

On Mon, 2025-03-24 at 11:13 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> Hi,
> 
> Thanks for sharing the details this time, it makes it much clearer
> now.
> 
> On 24-03-25, 03:21, Chun-Jen Tseng (曾俊仁) wrote:
> > I think the best configuration sequence is as follows:
> >   cpufreq policy -> set frequency -> CCI governor get
> > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> > 
> > However, in drivers/devfreq/governor_passive.c#L77,
> > get_target_freq_with_cpufreq() retrieves the current frequency of
> > each
> > policy,
> > and it determines the CCI frequency based on the frequency of each
> > policy.
> > 
> > But if policy-0 and policy-6 enter simultaneously, the CCI governor
> > might get an incorrect frequency.
> 
> Yes it may fetch the current frequency (or last known one), but that
> shouldn't be a problem as the postchange notification for policy-6
> should get called right after and should fix the issue. Right ?
> 
> I don't think this is a race and if this requires fixing. clk_get()
> for any device, will always return the last configured value, while
> the clock might be changing at the same time.
> 
> What's important is that you don't get an incorrect frequency (as in
> based on intermediate values of registers, etc). Note that the last
> configured frequency isn't an incorrect frequency.
> 
> > cpufreq policy-0 -> set frequency -> CCI governor get
> > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> > => during this time, the CCI governor gets policy-0 and policy-6,
> > BUT
> > policy-6 may change frequency by cpufreq driver at the same time.
> 
> Sure, and I don't see a problem with that. The issue is there only if
> we can reach a state where CCI is left configured in the wrong state.
> Which I don't think would happen here as the postchange notifier will
> get called again, forcing a switch of frequency again.
> 
> --
> viresh
Viresh Kumar April 16, 2025, 8:05 a.m. UTC | #9
On 14-04-25, 08:42, Chun-Jen Tseng (曾俊仁) wrote:
> Hi Viresh,
> 
> The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq
> driver.
> without global lock, It may choose wrong CCI level and cause system
> stall.
> 
> I hope this flow is serial setting like, BCPU / LCPU set frequency ->
> set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ......
> 
> without global lock, it could be LCPU / BCPU set frequency -> set CCI
> level(during this time, it may change BCPU / LCPU frequency and cause
> system stall.
> 
> I also can only do global lock on ccifreq_support SoC.

As explained earlier, I don't think there is a race here. May be I am
wrong. And so I need a clear code path example from you, which proves
that there is a race here.
diff mbox series

Patch

diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 2656b88db378..07d5958e106a 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -49,8 +49,6 @@  struct mtk_cpu_dvfs_info {
 	bool need_voltage_tracking;
 	int vproc_on_boot;
 	int pre_vproc;
-	/* Avoid race condition for regulators between notify and policy */
-	struct mutex reg_lock;
 	struct notifier_block opp_nb;
 	unsigned int opp_cpu;
 	unsigned long current_freq;
@@ -59,6 +57,9 @@  struct mtk_cpu_dvfs_info {
 	bool ccifreq_bound;
 };
 
+/* Avoid race condition for regulators between notify and policy */
+static DEFINE_MUTEX(mtk_policy_lock);
+
 static struct platform_device *cpufreq_pdev;
 
 static LIST_HEAD(dvfs_info_list);
@@ -209,12 +210,12 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 	long freq_hz, pre_freq_hz;
 	int vproc, pre_vproc, inter_vproc, target_vproc, ret;
 
+	mutex_lock(&mtk_policy_lock);
+
 	inter_vproc = info->intermediate_voltage;
 
 	pre_freq_hz = clk_get_rate(cpu_clk);
 
-	mutex_lock(&info->reg_lock);
-
 	if (unlikely(info->pre_vproc <= 0))
 		pre_vproc = regulator_get_voltage(info->proc_reg);
 	else
@@ -308,7 +309,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 	info->current_freq = freq_hz;
 
 out:
-	mutex_unlock(&info->reg_lock);
+	mutex_unlock(&mtk_policy_lock);
 
 	return ret;
 }
@@ -316,19 +317,20 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 				    unsigned long event, void *data)
 {
-	struct dev_pm_opp *opp = data;
+	struct dev_pm_opp *opp;
 	struct dev_pm_opp *new_opp;
 	struct mtk_cpu_dvfs_info *info;
 	unsigned long freq, volt;
 	struct cpufreq_policy *policy;
 	int ret = 0;
 
+	mutex_lock(&mtk_policy_lock);
+	opp = data;
 	info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
 
 	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
 		freq = dev_pm_opp_get_freq(opp);
 
-		mutex_lock(&info->reg_lock);
 		if (info->current_freq == freq) {
 			volt = dev_pm_opp_get_voltage(opp);
 			ret = mtk_cpufreq_set_voltage(info, volt);
@@ -336,7 +338,6 @@  static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 				dev_err(info->cpu_dev,
 					"failed to scale voltage: %d\n", ret);
 		}
-		mutex_unlock(&info->reg_lock);
 	} else if (event == OPP_EVENT_DISABLE) {
 		freq = dev_pm_opp_get_freq(opp);
 
@@ -361,6 +362,7 @@  static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 			}
 		}
 	}
+	mutex_unlock(&mtk_policy_lock);
 
 	return notifier_from_errno(ret);
 }
@@ -495,7 +497,6 @@  static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
 	dev_pm_opp_put(opp);
 
-	mutex_init(&info->reg_lock);
 	info->current_freq = clk_get_rate(info->cpu_clk);
 
 	info->opp_cpu = cpu;
@@ -607,13 +608,31 @@  static void mtk_cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
 }
 
+static unsigned int mtk_cpufreq_get(unsigned int cpu)
+{
+	struct mtk_cpu_dvfs_info *info;
+	unsigned long current_freq;
+
+	mutex_lock(&mtk_policy_lock);
+	info = mtk_cpu_dvfs_info_lookup(cpu);
+	if (!info) {
+		mutex_unlock(&mtk_policy_lock);
+		return 0;
+	}
+
+	current_freq = info->current_freq / 1000;
+	mutex_unlock(&mtk_policy_lock);
+
+	return current_freq;
+}
+
 static struct cpufreq_driver mtk_cpufreq_driver = {
 	.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK |
 		 CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
 		 CPUFREQ_IS_COOLING_DEV,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.target_index = mtk_cpufreq_set_target,
-	.get = cpufreq_generic_get,
+	.get = mtk_cpufreq_get,
 	.init = mtk_cpufreq_init,
 	.exit = mtk_cpufreq_exit,
 	.register_em = cpufreq_register_em_with_opp,