diff mbox series

[3/3] cpufreq: scmi: Register for limit change notifications

Message ID 20240108140118.1596-4-quic_sibis@quicinc.com
State Superseded
Headers show
Series firmware: arm_scmi: Register and handle limits change notification | expand

Commit Message

Sibi Sankar Jan. 8, 2024, 2:01 p.m. UTC
Register for limit change notifications if supported with the help of
perf_notify_support interface and determine the throttled frequency
using the perf_opp_xlate to apply HW pressure.

Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com>
---
 drivers/cpufreq/scmi-cpufreq.c | 42 +++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

Comments

Lukasz Luba Jan. 10, 2024, 8:26 a.m. UTC | #1
Hi Sibi,

+ Morten and Dietmar on CC

On 1/8/24 14:01, Sibi Sankar wrote:
> Register for limit change notifications if supported with the help of
> perf_notify_support interface and determine the throttled frequency
> using the perf_opp_xlate to apply HW pressure.
> 
> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com>
> ---
>   drivers/cpufreq/scmi-cpufreq.c | 42 +++++++++++++++++++++++++++++++++-
>   1 file changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
> index 4ee23f4ebf4a..53bc8868455d 100644
> --- a/drivers/cpufreq/scmi-cpufreq.c
> +++ b/drivers/cpufreq/scmi-cpufreq.c
> @@ -25,9 +25,13 @@ struct scmi_data {
>   	int domain_id;
>   	int nr_opp;
>   	struct device *cpu_dev;
> +	struct cpufreq_policy *policy;
>   	cpumask_var_t opp_shared_cpus;
> +	struct notifier_block limit_notify_nb;
>   };
>   
> +const struct scmi_handle *handle;
> +static struct scmi_device *scmi_dev;
>   static struct scmi_protocol_handle *ph;
>   static const struct scmi_perf_proto_ops *perf_ops;
>   
> @@ -144,6 +148,22 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
>   	return 0;
>   }
>   
> +static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned long event, void *data)
> +{
> +	unsigned long freq_hz;
> +	struct scmi_perf_limits_report *limit_notify = data;
> +	struct scmi_data *priv = container_of(nb, struct scmi_data, limit_notify_nb);
> +	struct cpufreq_policy *policy = priv->policy;
> +
> +	if (perf_ops->perf_opp_xlate(ph, priv->domain_id, limit_notify->range_max, &freq_hz))
> +		return NOTIFY_OK;
> +
> +	/* Update HW pressure (the boost frequencies are accepted) */
> +	arch_update_hw_pressure(policy->related_cpus, (freq_hz / HZ_PER_KHZ));

This is wrong. The whole idea of the new HW pressure was that I wanted
to get rid of the 'signal smoothing' mechanism in order to get
instantaneous value from FW to task scheduler. Vincent created
2 interfaces in that new HW pressure:
1. cpufreq_update_pressure(policy) - raw variable
2. arch_update_hw_pressure(policy->related_cpus, (freq_hz / HZ_PER_KHZ))
    - smoothing PELT mechanism, good for raw IRQ in drivers

In our SCMI cpufreq driver we need the 1st one:
cpufreq_update_pressure(policy)

The FW will do the 'signal smoothing or filtering' and won't
flood the kernel with hundreds of notifications.

So, please change that bit and add me, Morten and Dietmar on CC.
I would like to review it.

Regards,
Lukasz
Sibi Sankar Jan. 17, 2024, 2:58 a.m. UTC | #2
On 1/10/24 13:56, Lukasz Luba wrote:
> Hi Sibi,
> 

Hey Lukasz,
Thanks for taking time to review the series!

> + Morten and Dietmar on CC
> 
> On 1/8/24 14:01, Sibi Sankar wrote:
>> Register for limit change notifications if supported with the help of
>> perf_notify_support interface and determine the throttled frequency
>> using the perf_opp_xlate to apply HW pressure.
>>
>> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com>
>> ---
>>   drivers/cpufreq/scmi-cpufreq.c | 42 +++++++++++++++++++++++++++++++++-
>>   1 file changed, 41 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/cpufreq/scmi-cpufreq.c 
>> b/drivers/cpufreq/scmi-cpufreq.c
>> index 4ee23f4ebf4a..53bc8868455d 100644
>> --- a/drivers/cpufreq/scmi-cpufreq.c
>> +++ b/drivers/cpufreq/scmi-cpufreq.c
>> @@ -25,9 +25,13 @@ struct scmi_data {
>>       int domain_id;
>>       int nr_opp;
>>       struct device *cpu_dev;
>> +    struct cpufreq_policy *policy;
>>       cpumask_var_t opp_shared_cpus;
>> +    struct notifier_block limit_notify_nb;
>>   };
>> +const struct scmi_handle *handle;
>> +static struct scmi_device *scmi_dev;
>>   static struct scmi_protocol_handle *ph;
>>   static const struct scmi_perf_proto_ops *perf_ops;
>> @@ -144,6 +148,22 @@ scmi_get_cpu_power(struct device *cpu_dev, 
>> unsigned long *power,
>>       return 0;
>>   }
>> +static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned 
>> long event, void *data)
>> +{
>> +    unsigned long freq_hz;
>> +    struct scmi_perf_limits_report *limit_notify = data;
>> +    struct scmi_data *priv = container_of(nb, struct scmi_data, 
>> limit_notify_nb);
>> +    struct cpufreq_policy *policy = priv->policy;
>> +
>> +    if (perf_ops->perf_opp_xlate(ph, priv->domain_id, 
>> limit_notify->range_max, &freq_hz))
>> +        return NOTIFY_OK;
>> +
>> +    /* Update HW pressure (the boost frequencies are accepted) */
>> +    arch_update_hw_pressure(policy->related_cpus, (freq_hz / 
>> HZ_PER_KHZ));
> 
> This is wrong. The whole idea of the new HW pressure was that I wanted
> to get rid of the 'signal smoothing' mechanism in order to get
> instantaneous value from FW to task scheduler. Vincent created
> 2 interfaces in that new HW pressure:
> 1. cpufreq_update_pressure(policy) - raw variable
> 2. arch_update_hw_pressure(policy->related_cpus, (freq_hz / HZ_PER_KHZ))
>     - smoothing PELT mechanism, good for raw IRQ in drivers
> 
> In our SCMI cpufreq driver we need the 1st one:
> cpufreq_update_pressure(policy)
> 
> The FW will do the 'signal smoothing or filtering' and won't
> flood the kernel with hundreds of notifications.

Ack, even though I see no mention of filtering being mandated in the 
SCMI specification, the scmi notification by itself will serve as a
rate limiter I guess.

> 
> So, please change that bit and add me, Morten and Dietmar on CC.
> I would like to review it.

ack

-Sibi

> 
> Regards,
> Lukasz
Lukasz Luba Jan. 17, 2024, 8:03 a.m. UTC | #3
On 1/17/24 02:58, Sibi Sankar wrote:
> 
> 
> On 1/10/24 13:56, Lukasz Luba wrote:
>> Hi Sibi,
>>
> 
> Hey Lukasz,
> Thanks for taking time to review the series!
> 
>> + Morten and Dietmar on CC
>>
>> On 1/8/24 14:01, Sibi Sankar wrote:
>>> Register for limit change notifications if supported with the help of
>>> perf_notify_support interface and determine the throttled frequency
>>> using the perf_opp_xlate to apply HW pressure.
>>>
>>> Signed-off-by: Sibi Sankar <quic_sibis@quicinc.com>
>>> ---
>>>   drivers/cpufreq/scmi-cpufreq.c | 42 +++++++++++++++++++++++++++++++++-
>>>   1 file changed, 41 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/cpufreq/scmi-cpufreq.c 
>>> b/drivers/cpufreq/scmi-cpufreq.c
>>> index 4ee23f4ebf4a..53bc8868455d 100644
>>> --- a/drivers/cpufreq/scmi-cpufreq.c
>>> +++ b/drivers/cpufreq/scmi-cpufreq.c
>>> @@ -25,9 +25,13 @@ struct scmi_data {
>>>       int domain_id;
>>>       int nr_opp;
>>>       struct device *cpu_dev;
>>> +    struct cpufreq_policy *policy;
>>>       cpumask_var_t opp_shared_cpus;
>>> +    struct notifier_block limit_notify_nb;
>>>   };
>>> +const struct scmi_handle *handle;
>>> +static struct scmi_device *scmi_dev;
>>>   static struct scmi_protocol_handle *ph;
>>>   static const struct scmi_perf_proto_ops *perf_ops;
>>> @@ -144,6 +148,22 @@ scmi_get_cpu_power(struct device *cpu_dev, 
>>> unsigned long *power,
>>>       return 0;
>>>   }
>>> +static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned 
>>> long event, void *data)
>>> +{
>>> +    unsigned long freq_hz;
>>> +    struct scmi_perf_limits_report *limit_notify = data;
>>> +    struct scmi_data *priv = container_of(nb, struct scmi_data, 
>>> limit_notify_nb);
>>> +    struct cpufreq_policy *policy = priv->policy;
>>> +
>>> +    if (perf_ops->perf_opp_xlate(ph, priv->domain_id, 
>>> limit_notify->range_max, &freq_hz))
>>> +        return NOTIFY_OK;
>>> +
>>> +    /* Update HW pressure (the boost frequencies are accepted) */
>>> +    arch_update_hw_pressure(policy->related_cpus, (freq_hz / 
>>> HZ_PER_KHZ));
>>
>> This is wrong. The whole idea of the new HW pressure was that I wanted
>> to get rid of the 'signal smoothing' mechanism in order to get
>> instantaneous value from FW to task scheduler. Vincent created
>> 2 interfaces in that new HW pressure:
>> 1. cpufreq_update_pressure(policy) - raw variable
>> 2. arch_update_hw_pressure(policy->related_cpus, (freq_hz / HZ_PER_KHZ))
>>     - smoothing PELT mechanism, good for raw IRQ in drivers
>>
>> In our SCMI cpufreq driver we need the 1st one:
>> cpufreq_update_pressure(policy)
>>
>> The FW will do the 'signal smoothing or filtering' and won't
>> flood the kernel with hundreds of notifications.
> 
> Ack, even though I see no mention of filtering being mandated in the 
> SCMI specification, the scmi notification by itself will serve as a
> rate limiter I guess.
> 
>>
>> So, please change that bit and add me, Morten and Dietmar on CC.
>> I would like to review it.
> 

True, the SCMI protocol doesn't describe the rate or limits of
often these performance limit notifications can be sent.
It's too HW specific and some balance has to made to not
flood the kernel with hundreds or thousands of notifications
per second. That could overload the SCMI channel.

The FW implementation has to combine the perf. limit
restrictions from different areas: thermal, power
conditions, MPMM, etc. Some smarter approach in FW
to the processing and filtering of perf limit notification
would be needed. The kernel is not able to do the job at the same
quality as FW in those areas.

Therefore, in the kernel HW pressure signal we don't need
another 'filtering or smoothing' on already processed
SCMI notification information. That could even harm us
if we don't get that FW information in kernel at right time
due to convergence delays of the HW pressure w/ PELT smoothing.
diff mbox series

Patch

diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 4ee23f4ebf4a..53bc8868455d 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -25,9 +25,13 @@  struct scmi_data {
 	int domain_id;
 	int nr_opp;
 	struct device *cpu_dev;
+	struct cpufreq_policy *policy;
 	cpumask_var_t opp_shared_cpus;
+	struct notifier_block limit_notify_nb;
 };
 
+const struct scmi_handle *handle;
+static struct scmi_device *scmi_dev;
 static struct scmi_protocol_handle *ph;
 static const struct scmi_perf_proto_ops *perf_ops;
 
@@ -144,6 +148,22 @@  scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
 	return 0;
 }
 
+static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned long event, void *data)
+{
+	unsigned long freq_hz;
+	struct scmi_perf_limits_report *limit_notify = data;
+	struct scmi_data *priv = container_of(nb, struct scmi_data, limit_notify_nb);
+	struct cpufreq_policy *policy = priv->policy;
+
+	if (perf_ops->perf_opp_xlate(ph, priv->domain_id, limit_notify->range_max, &freq_hz))
+		return NOTIFY_OK;
+
+	/* Update HW pressure (the boost frequencies are accepted) */
+	arch_update_hw_pressure(policy->related_cpus, (freq_hz / HZ_PER_KHZ));
+
+	return NOTIFY_OK;
+}
+
 static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 {
 	int ret, nr_opp, domain;
@@ -151,6 +171,7 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	struct device *cpu_dev;
 	struct scmi_data *priv;
 	struct cpufreq_frequency_table *freq_table;
+	struct scmi_perf_notify_info info = {};
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -250,6 +271,25 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible =
 		perf_ops->fast_switch_possible(ph, domain);
 
+	ret = perf_ops->perf_notify_support(ph, domain, &info);
+	if (ret)
+		dev_warn(cpu_dev, "failed to get supported notifications: %d\n", ret);
+
+	if (info.perf_limit_notify) {
+		priv->limit_notify_nb.notifier_call = scmi_limit_notify_cb;
+		ret = handle->notify_ops->devm_event_notifier_register(scmi_dev, SCMI_PROTOCOL_PERF,
+							SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED,
+							&domain,
+							&priv->limit_notify_nb);
+		if (ret) {
+			dev_err(cpu_dev, "Error in registering limit change notifier for domain %d\n",
+				domain);
+			return ret;
+		}
+	}
+
+	priv->policy = policy;
+
 	return 0;
 
 out_free_opp:
@@ -321,8 +361,8 @@  static int scmi_cpufreq_probe(struct scmi_device *sdev)
 {
 	int ret;
 	struct device *dev = &sdev->dev;
-	const struct scmi_handle *handle;
 
+	scmi_dev = sdev;
 	handle = sdev->handle;
 
 	if (!handle)