diff mbox series

[v3,3/6] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

Message ID 20210708120656.663851-4-thara.gopinath@linaro.org
State New
Headers show
Series Introduce LMh driver for Qualcomm SoCs | expand

Commit Message

Thara Gopinath July 8, 2021, 12:06 p.m. UTC
Add interrupt support to notify the kernel of h/w initiated frequency
throttling by LMh. Convey this to scheduler via thermal presssure
interface.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
---

v2->v3:
	- Cosmetic fixes from review comments on the list.
	- Moved all LMh initializations to qcom_cpufreq_hw_lmh_init.
	- Added freeing of LMh interrupt and cancelling the polling worker to
	  qcom_cpufreq_hw_cpu_exit as per Viresh's suggestion.
	- LMh interrupts are now tied to cpu dev and not cpufreq dev. This will be
	  useful for further generation of SoCs where the same interrupt signals
	  multiple cpu clusters.

v1->v2:
	- Introduced qcom_cpufreq_hw_lmh_init to consolidate LMh related initializations
	  as per Viresh's review comment.
	- Moved the piece of code restarting polling/re-enabling LMh interrupt to
	  qcom_lmh_dcvs_notify therby simplifying isr and timer callback as per Viresh's
	  suggestion.
	- Droped cpus from qcom_cpufreq_data and instead using cpus from cpufreq_policy in
	  qcom_lmh_dcvs_notify as per Viresh's review comment.
	- Dropped dt property qcom,support-lmh as per Bjorn's suggestion.
	- Other minor/cosmetic fixes

 drivers/cpufreq/qcom-cpufreq-hw.c | 118 ++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

Comments

kernel test robot July 8, 2021, 8 p.m. UTC | #1
Hi Thara,

I love your patch! Yet something to improve:

[auto build test ERROR on robh/for-next]
[also build test ERROR on pm/linux-next linus/master v5.13 next-20210708]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Thara-Gopinath/Introduce-LMh-driver-for-Qualcomm-SoCs/20210708-200856
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
config: arm64-randconfig-r015-20210707 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 8d69635ed9ecf36fd0ca85906bfde17949671cbe)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/0day-ci/linux/commit/11b0cda3b67f03e4a29b6d2260edb6274fd1d806
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Thara-Gopinath/Introduce-LMh-driver-for-Qualcomm-SoCs/20210708-200856
        git checkout 11b0cda3b67f03e4a29b6d2260edb6274fd1d806
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "topology_set_thermal_pressure" [drivers/cpufreq/qcom-cpufreq-hw.ko] undefined!
>> ERROR: modpost: "cpu_scale" [drivers/cpufreq/qcom-cpufreq-hw.ko] undefined!

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Viresh Kumar July 9, 2021, 6:46 a.m. UTC | #2
On 08-07-21, 08:06, Thara Gopinath wrote:
>  static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>  {
>  	struct platform_device *pdev = cpufreq_get_driver_data();
> @@ -370,6 +480,10 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>  			dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
>  	}
>  
> +	ret = qcom_cpufreq_hw_lmh_init(policy, index);

You missed unregistering EM here (which is also missing from exit,
which you need to fix first in a separate patch).

> +	if (ret)
> +		goto error;
> +
>  	return 0;
>  error:
>  	kfree(data);
> @@ -389,6 +503,10 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
>  
>  	dev_pm_opp_remove_all_dynamic(cpu_dev);
>  	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
> +	if (data->lmh_dcvs_irq > 0) {
> +		devm_free_irq(cpu_dev, data->lmh_dcvs_irq, data);

Why using devm variants here and while requesting the irq ? 

> +		cancel_delayed_work_sync(&data->lmh_dcvs_poll_work);
> +	}

Please move this to qcom_cpufreq_hw_lmh_exit() or something.

Now with sequence of disabling interrupt, etc, I see a potential
problem.

CPU0                                    CPU1

qcom_cpufreq_hw_cpu_exit()
-> devm_free_irq();
                                        qcom_lmh_dcvs_poll()
                                        -> qcom_lmh_dcvs_notify()
                                          -> enable_irq()

-> cancel_delayed_work_sync();


What will happen if enable_irq() gets called after freeing the irq ?
Not sure, but it looks like you will hit this then from manage.c:

WARN(!desc->irq_data.chip, KERN_ERR "enable_irq before
                                     setup/request_irq: irq %u\n", irq))

?

You got a chicken n egg problem :)
Thara Gopinath July 9, 2021, 3:37 p.m. UTC | #3
On 7/9/21 2:46 AM, Viresh Kumar wrote:
> On 08-07-21, 08:06, Thara Gopinath wrote:
>>   static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>>   {
>>   	struct platform_device *pdev = cpufreq_get_driver_data();
>> @@ -370,6 +480,10 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>>   			dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
>>   	}
>>   
>> +	ret = qcom_cpufreq_hw_lmh_init(policy, index);
> 
> You missed unregistering EM here (which is also missing from exit,
> which you need to fix first in a separate patch).

Hi!

So how exactly do you do this? I checked other users of the api and I do 
not see any free. I would say if needed, it should be a separate patch 
and outside of this series.

> 
>> +	if (ret)
>> +		goto error;
>> +
>>   	return 0;
>>   error:
>>   	kfree(data);
>> @@ -389,6 +503,10 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
>>   
>>   	dev_pm_opp_remove_all_dynamic(cpu_dev);
>>   	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
>> +	if (data->lmh_dcvs_irq > 0) {
>> +		devm_free_irq(cpu_dev, data->lmh_dcvs_irq, data);
> 
> Why using devm variants here and while requesting the irq ?
> 
>> +		cancel_delayed_work_sync(&data->lmh_dcvs_poll_work);
>> +	}
> 
> Please move this to qcom_cpufreq_hw_lmh_exit() or something.

Ok.

> 
> Now with sequence of disabling interrupt, etc, I see a potential
> problem.
> 
> CPU0                                    CPU1
> 
> qcom_cpufreq_hw_cpu_exit()
> -> devm_free_irq();
>                                          qcom_lmh_dcvs_poll()
>                                          -> qcom_lmh_dcvs_notify()
>                                            -> enable_irq()
> 
> -> cancel_delayed_work_sync();
> 
> 
> What will happen if enable_irq() gets called after freeing the irq ?
> Not sure, but it looks like you will hit this then from manage.c:
> 
> WARN(!desc->irq_data.chip, KERN_ERR "enable_irq before
>                                       setup/request_irq: irq %u\n", irq))
> 
> ?
> 
> You got a chicken n egg problem :)

Yes indeed! But also it is a very rare chicken and egg problem.
The scenario here is that the cpus are busy and running load causing a 
thermal overrun and lmh is engaged. At the same time for this issue to 
be hit the cpu is trying to exit/disable cpufreq. Calling 
cancel_delayed_work_sync first could solve this issue, right ? 
cancel_delayed_work_sync guarantees the work not to be pending even if
it requeues itself on return. So once the delayed work is cancelled, the 
interrupts can be safely disabled. Thoughts ?


>
Viresh Kumar July 12, 2021, 4:35 a.m. UTC | #4
+Lukasz,

On 09-07-21, 11:37, Thara Gopinath wrote:
> On 7/9/21 2:46 AM, Viresh Kumar wrote:

> > On 08-07-21, 08:06, Thara Gopinath wrote:

> > >   static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)

> > >   {

> > >   	struct platform_device *pdev = cpufreq_get_driver_data();

> > > @@ -370,6 +480,10 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)

> > >   			dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);

> > >   	}

> > > +	ret = qcom_cpufreq_hw_lmh_init(policy, index);

> > 

> > You missed unregistering EM here (which is also missing from exit,

> > which you need to fix first in a separate patch).

> 

> Hi!

> 

> So how exactly do you do this? I checked other users of the api and I do not

> see any free.


Lukasz,

I don't see the cpufreq drivers ever calling dev_pm_opp_of_unregister_em(), and
even if they called, it would translate to em_dev_unregister_perf_domain(),
which has this:

	if (_is_cpu_device(dev))
		return;

I am not sure what's going on here, can you help ?

-- 
viresh
Thara Gopinath July 13, 2021, 1:18 a.m. UTC | #5
On 7/12/21 12:41 AM, Viresh Kumar wrote:
> On 09-07-21, 11:37, Thara Gopinath wrote:

>> On 7/9/21 2:46 AM, Viresh Kumar wrote:

>>>> @@ -389,6 +503,10 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)

>>>>    	dev_pm_opp_remove_all_dynamic(cpu_dev);

>>>>    	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);

>>>> +	if (data->lmh_dcvs_irq > 0) {

>>>> +		devm_free_irq(cpu_dev, data->lmh_dcvs_irq, data);

>>>

>>> Why using devm variants here and while requesting the irq ?

> 

> Missed this one ?


Yep. I just replied to Bjorn's email on this. I will move to non devm 
version.

> 

>>>

>>>> +		cancel_delayed_work_sync(&data->lmh_dcvs_poll_work);

>>>> +	}

>>>

>>> Please move this to qcom_cpufreq_hw_lmh_exit() or something.

>>

>> Ok.

>>

>>>

>>> Now with sequence of disabling interrupt, etc, I see a potential

>>> problem.

>>>

>>> CPU0                                    CPU1

>>>

>>> qcom_cpufreq_hw_cpu_exit()

>>> -> devm_free_irq();

>>>                                           qcom_lmh_dcvs_poll()

>>>                                           -> qcom_lmh_dcvs_notify()

>>>                                             -> enable_irq()

>>>

>>> -> cancel_delayed_work_sync();

>>>

>>>

>>> What will happen if enable_irq() gets called after freeing the irq ?

>>> Not sure, but it looks like you will hit this then from manage.c:

>>>

>>> WARN(!desc->irq_data.chip, KERN_ERR "enable_irq before

>>>                                        setup/request_irq: irq %u\n", irq))

>>>

>>> ?

>>>

>>> You got a chicken n egg problem :)

>>

>> Yes indeed! But also it is a very rare chicken and egg problem.

>> The scenario here is that the cpus are busy and running load causing a

>> thermal overrun and lmh is engaged. At the same time for this issue to be

>> hit the cpu is trying to exit/disable cpufreq.

> 

> Yes, it is a very specific case but it needs to be resolved anyway. You don't

> want to get this ever :)

> 

>> Calling

>> cancel_delayed_work_sync first could solve this issue, right ?

>> cancel_delayed_work_sync guarantees the work not to be pending even if

>> it requeues itself on return. So once the delayed work is cancelled, the

>> interrupts can be safely disabled. Thoughts ?

> 

> I don't think even that would provide such guarantees to you here, as there is

> a chance the work gets queued again because of an interrupt that triggers right

> after you cancel the work.

> 

> The basic way of solving such issues is that once you cancel something, you need

> to guarantee that it doesn't get triggered again, no matter what.

> 

> The problem here I see is with your design itself, both delayed work and irq can

> enable each other, so no matter which one you disable first, won't be

> sufficient. You need to fix that design somehow.


So I really need the interrupt to fire and then the timer to kick in and 
take up the monitoring. I can think of introducing a variable 
is_disabled which is updated and read under a spinlock. 
qcom_cpufreq_hw_cpu_exit can hold the spinlock and set is_disabled to 
true prior to cancelling the work queue or disabling the interrupt. 
Before re-enabling the interrupt or re-queuing the work in 
qcom_lmh_dcvs_notify, is_disabled can be read and checked.

But does this problem not exist in target_index , fast_switch etc also ? 
One cpu can be disabling and the other one can be updating the target 
right?

> 


-- 
Warm Regards
Thara (She/Her/Hers)
Thara Gopinath July 14, 2021, 12:37 p.m. UTC | #6
On 7/12/21 11:18 PM, Viresh Kumar wrote:
> On 12-07-21, 21:18, Thara Gopinath wrote:

>> So I really need the interrupt to fire and then the timer to kick in and

>> take up the monitoring. I can think of introducing a variable is_disabled

>> which is updated and read under a spinlock. qcom_cpufreq_hw_cpu_exit can

>> hold the spinlock and set is_disabled to true prior to cancelling the work

>> queue or disabling the interrupt. Before re-enabling the interrupt or

>> re-queuing the work in qcom_lmh_dcvs_notify, is_disabled can be read and

>> checked.

> 

> Or you can make the lmh_dcvs_poll_work item a pointer and mark it NULL in exit,

> with proper locking etc.


Yes it could work. I will spin the next version with either this or 
introducing a new variable with locking.

> 

>> But does this problem not exist in target_index , fast_switch etc also ? One

>> cpu can be disabling and the other one can be updating the target right?

> 

> The race doesn't happen there as cpufreq_unregister_driver() takes care of

> stopping everything before removing the policy. To be more precise, governor's

> ->stop() function is responsible for making sure that frequency won't be updated

> any further.

> 


-- 
Warm Regards
Thara (She/Her/Hers)
diff mbox series

Patch

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f86859bf76f1..bb5fc700d913 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -7,6 +7,7 @@ 
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 #include <linux/interconnect.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
@@ -22,10 +23,13 @@ 
 #define CLK_HW_DIV			2
 #define LUT_TURBO_IND			1
 
+#define HZ_PER_KHZ			1000
+
 struct qcom_cpufreq_soc_data {
 	u32 reg_enable;
 	u32 reg_freq_lut;
 	u32 reg_volt_lut;
+	u32 reg_current_vote;
 	u32 reg_perf_state;
 	u8 lut_row_size;
 };
@@ -33,7 +37,10 @@  struct qcom_cpufreq_soc_data {
 struct qcom_cpufreq_data {
 	void __iomem *base;
 	struct resource *res;
+	struct delayed_work lmh_dcvs_poll_work;
 	const struct qcom_cpufreq_soc_data *soc_data;
+	struct cpufreq_policy *policy;
+	int lmh_dcvs_irq;
 };
 
 static unsigned long cpu_hw_rate, xo_rate;
@@ -251,10 +258,84 @@  static void qcom_get_related_cpus(int index, struct cpumask *m)
 	}
 }
 
+static inline unsigned long qcom_lmh_vote_to_freq(u32 val)
+{
+	return (val & 0x3FF) * 19200;
+}
+
+static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+{
+	struct cpufreq_policy *policy = data->policy;
+	struct dev_pm_opp *opp;
+	struct device *dev;
+	unsigned long max_capacity, capacity, freq_hz, throttled_freq;
+	unsigned int val, freq;
+
+	/*
+	 * Get the h/w throttled frequency, normalize it using the
+	 * registered opp table and use it to calculate thermal pressure.
+	 */
+	val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
+	freq = qcom_lmh_vote_to_freq(val);
+	freq_hz = freq * HZ_PER_KHZ;
+
+	dev = get_cpu_device(cpumask_first(policy->cpus));
+	opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
+	if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+	throttled_freq = freq_hz / HZ_PER_KHZ;
+
+	/* Update thermal pressure */
+
+	max_capacity = arch_scale_cpu_capacity(cpumask_first(policy->cpus));
+	capacity = throttled_freq * max_capacity;
+	capacity /= policy->cpuinfo.max_freq;
+
+	/* Don't pass boost capacity to scheduler */
+	if (capacity > max_capacity)
+		capacity = max_capacity;
+
+	arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
+
+	/*
+	 * If h/w throttled frequency is higher than what cpufreq has requested for, stop
+	 * polling and switch back to interrupt mechanism
+	 */
+
+	if (throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(policy->cpus)))
+		/* Clear the existing interrupts and enable it back */
+		enable_irq(data->lmh_dcvs_irq);
+	else
+		mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
+				 msecs_to_jiffies(10));
+}
+
+static void qcom_lmh_dcvs_poll(struct work_struct *work)
+{
+	struct qcom_cpufreq_data *data;
+
+	data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
+
+	qcom_lmh_dcvs_notify(data);
+}
+
+static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
+{
+	struct qcom_cpufreq_data *c_data = data;
+
+	/* Disable interrupt and enable polling */
+	disable_irq_nosync(c_data->lmh_dcvs_irq);
+	qcom_lmh_dcvs_notify(c_data);
+
+	return 0;
+}
+
 static const struct qcom_cpufreq_soc_data qcom_soc_data = {
 	.reg_enable = 0x0,
 	.reg_freq_lut = 0x110,
 	.reg_volt_lut = 0x114,
+	.reg_current_vote = 0x704,
 	.reg_perf_state = 0x920,
 	.lut_row_size = 32,
 };
@@ -274,6 +355,35 @@  static const struct of_device_id qcom_cpufreq_hw_match[] = {
 };
 MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match);
 
+static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
+{
+	struct qcom_cpufreq_data *data = policy->driver_data;
+	struct platform_device *pdev = cpufreq_get_driver_data();
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
+	char irq_name[15];
+	int ret;
+
+	/*
+	 * Look for LMh interrupt. If no interrupt line is specified /
+	 * if there is an error, allow cpufreq to be enabled as usual.
+	 */
+	data->lmh_dcvs_irq = platform_get_irq(pdev, index);
+	if (data->lmh_dcvs_irq <= 0)
+		return data->lmh_dcvs_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0;
+
+	snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu);
+	ret = devm_request_irq(cpu_dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
+			       0, irq_name, data);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
+		return 0;
+	}
+	data->policy = policy;
+	INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
+
+	return 0;
+}
+
 static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 {
 	struct platform_device *pdev = cpufreq_get_driver_data();
@@ -370,6 +480,10 @@  static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 			dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
 	}
 
+	ret = qcom_cpufreq_hw_lmh_init(policy, index);
+	if (ret)
+		goto error;
+
 	return 0;
 error:
 	kfree(data);
@@ -389,6 +503,10 @@  static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+	if (data->lmh_dcvs_irq > 0) {
+		devm_free_irq(cpu_dev, data->lmh_dcvs_irq, data);
+		cancel_delayed_work_sync(&data->lmh_dcvs_poll_work);
+	}
 	kfree(policy->freq_table);
 	kfree(data);
 	iounmap(base);