diff mbox series

[v2,1/5] PM: domains: Add GENPD_FLAG_RT_SAFE for PREEMPT_RT

Message ID 20221219151503.385816-2-krzysztof.kozlowski@linaro.org
State New
Headers show
Series [v2,1/5] PM: domains: Add GENPD_FLAG_RT_SAFE for PREEMPT_RT | expand

Commit Message

Krzysztof Kozlowski Dec. 19, 2022, 3:14 p.m. UTC
Realtime kernels with PREEMPT_RT must use raw_spinlock_t for domains
which are invoked from CPU idle (thus from atomic section).  Example is
cpuidle PSCI domain driver which itself is PREEMPT_RT safe, but is being
called as part of cpuidle.

Add a flag allowing a power domain provider to indicate it is RT safe.
The flag is supposed to be used with existing GENPD_FLAG_IRQ_SAFE.

Cc: Adrien Thierry <athierry@redhat.com>
Cc: Brian Masney <bmasney@redhat.com>
Cc: linux-rt-users@vger.kernel.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>

---

Independently from Adrien, I encountered the same problem around genpd
when using PREEMPT_RT kernel.

Previous patch by Adrien:
https://lore.kernel.org/all/20220615203605.1068453-1-athierry@redhat.com/
---
 drivers/base/power/domain.c | 59 +++++++++++++++++++++++++++++++++++--
 include/linux/pm_domain.h   | 13 ++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

Comments

Ulf Hansson Jan. 4, 2023, 3:45 p.m. UTC | #1
On Mon, 19 Dec 2022 at 16:15, Krzysztof Kozlowski
<krzysztof.kozlowski@linaro.org> wrote:
>
> Realtime kernels with PREEMPT_RT must use raw_spinlock_t for domains
> which are invoked from CPU idle (thus from atomic section).  Example is
> cpuidle PSCI domain driver which itself is PREEMPT_RT safe, but is being
> called as part of cpuidle.

Just so I don't get this wrong, since the cpuidle-psci also calls
pm_runtime_* functions so it isn't PREEMPT_RT safe, at least not yet?

>
> Add a flag allowing a power domain provider to indicate it is RT safe.
> The flag is supposed to be used with existing GENPD_FLAG_IRQ_SAFE.
>
> Cc: Adrien Thierry <athierry@redhat.com>
> Cc: Brian Masney <bmasney@redhat.com>
> Cc: linux-rt-users@vger.kernel.org
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>

For genpd, overall, I think this looks like an okay approach to me.
Although, let me check the whole series (I need some more time to do
that) before I give this my blessing.

Kind regards
Uffe

>
> ---
>
> Independently from Adrien, I encountered the same problem around genpd
> when using PREEMPT_RT kernel.
>
> Previous patch by Adrien:
> https://lore.kernel.org/all/20220615203605.1068453-1-athierry@redhat.com/
> ---
>  drivers/base/power/domain.c | 59 +++++++++++++++++++++++++++++++++++--
>  include/linux/pm_domain.h   | 13 ++++++++
>  2 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
> index 967bcf9d415e..4dfce1d476f4 100644
> --- a/drivers/base/power/domain.c
> +++ b/drivers/base/power/domain.c
> @@ -119,6 +119,48 @@ static const struct genpd_lock_ops genpd_spin_ops = {
>         .unlock = genpd_unlock_spin,
>  };
>
> +static void genpd_lock_rawspin(struct generic_pm_domain *genpd)
> +       __acquires(&genpd->rslock)
> +{
> +       unsigned long flags;
> +
> +       raw_spin_lock_irqsave(&genpd->rslock, flags);
> +       genpd->rlock_flags = flags;
> +}
> +
> +static void genpd_lock_nested_rawspin(struct generic_pm_domain *genpd,
> +                                       int depth)
> +       __acquires(&genpd->rslock)
> +{
> +       unsigned long flags;
> +
> +       raw_spin_lock_irqsave_nested(&genpd->rslock, flags, depth);
> +       genpd->rlock_flags = flags;
> +}
> +
> +static int genpd_lock_interruptible_rawspin(struct generic_pm_domain *genpd)
> +       __acquires(&genpd->rslock)
> +{
> +       unsigned long flags;
> +
> +       raw_spin_lock_irqsave(&genpd->rslock, flags);
> +       genpd->rlock_flags = flags;
> +       return 0;
> +}
> +
> +static void genpd_unlock_rawspin(struct generic_pm_domain *genpd)
> +       __releases(&genpd->rslock)
> +{
> +       raw_spin_unlock_irqrestore(&genpd->rslock, genpd->rlock_flags);
> +}
> +
> +static const struct genpd_lock_ops genpd_rawspin_ops = {
> +       .lock = genpd_lock_rawspin,
> +       .lock_nested = genpd_lock_nested_rawspin,
> +       .lock_interruptible = genpd_lock_interruptible_rawspin,
> +       .unlock = genpd_unlock_rawspin,
> +};
> +
>  #define genpd_lock(p)                  p->lock_ops->lock(p)
>  #define genpd_lock_nested(p, d)                p->lock_ops->lock_nested(p, d)
>  #define genpd_lock_interruptible(p)    p->lock_ops->lock_interruptible(p)
> @@ -126,6 +168,8 @@ static const struct genpd_lock_ops genpd_spin_ops = {
>
>  #define genpd_status_on(genpd)         (genpd->status == GENPD_STATE_ON)
>  #define genpd_is_irq_safe(genpd)       (genpd->flags & GENPD_FLAG_IRQ_SAFE)
> +#define genpd_is_rt_safe(genpd)                (genpd_is_irq_safe(genpd) && \
> +                                        (genpd->flags & GENPD_FLAG_RT_SAFE))
>  #define genpd_is_always_on(genpd)      (genpd->flags & GENPD_FLAG_ALWAYS_ON)
>  #define genpd_is_active_wakeup(genpd)  (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
>  #define genpd_is_cpu_domain(genpd)     (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
> @@ -1838,6 +1882,12 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
>                 return -EINVAL;
>         }
>
> +       if (!genpd_is_rt_safe(genpd) && genpd_is_rt_safe(subdomain)) {
> +               WARN(1, "Parent %s of subdomain %s must be RT safe\n",
> +                    genpd->name, subdomain->name);
> +               return -EINVAL;
> +       }
> +
>         link = kzalloc(sizeof(*link), GFP_KERNEL);
>         if (!link)
>                 return -ENOMEM;
> @@ -2008,8 +2058,13 @@ static void genpd_free_data(struct generic_pm_domain *genpd)
>  static void genpd_lock_init(struct generic_pm_domain *genpd)
>  {
>         if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
> -               spin_lock_init(&genpd->slock);
> -               genpd->lock_ops = &genpd_spin_ops;
> +               if (genpd->flags & GENPD_FLAG_RT_SAFE) {
> +                       raw_spin_lock_init(&genpd->rslock);
> +                       genpd->lock_ops = &genpd_rawspin_ops;
> +               } else {
> +                       spin_lock_init(&genpd->slock);
> +                       genpd->lock_ops = &genpd_spin_ops;
> +               }
>         } else {
>                 mutex_init(&genpd->mlock);
>                 genpd->lock_ops = &genpd_mtx_ops;
> diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
> index 1cd41bdf73cf..0a1600244963 100644
> --- a/include/linux/pm_domain.h
> +++ b/include/linux/pm_domain.h
> @@ -61,6 +61,14 @@
>   * GENPD_FLAG_MIN_RESIDENCY:   Enable the genpd governor to consider its
>   *                             components' next wakeup when determining the
>   *                             optimal idle state.
> + *
> + * GENPD_FLAG_RT_SAFE:         When used with GENPD_FLAG_IRQ_SAFE, this informs
> + *                             genpd that its backend callbacks, ->power_on|off(),
> + *                             do not use other spinlocks. They might use
> + *                             raw_spinlocks or other pre-emption-disable
> + *                             methods, all of which are PREEMPT_RT safe. Note
> + *                             that, a genpd having this flag set, requires its
> + *                             masterdomains to also have it set.
>   */
>  #define GENPD_FLAG_PM_CLK       (1U << 0)
>  #define GENPD_FLAG_IRQ_SAFE     (1U << 1)
> @@ -69,6 +77,7 @@
>  #define GENPD_FLAG_CPU_DOMAIN   (1U << 4)
>  #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
>  #define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
> +#define GENPD_FLAG_RT_SAFE      (1U << 7)
>
>  enum gpd_status {
>         GENPD_STATE_ON = 0,     /* PM domain is on */
> @@ -164,6 +173,10 @@ struct generic_pm_domain {
>                         spinlock_t slock;
>                         unsigned long lock_flags;
>                 };
> +               struct {
> +                       raw_spinlock_t rslock;
> +                       unsigned long rlock_flags;
> +               };
>         };
>
>  };
> --
> 2.34.1
>
Krzysztof Kozlowski Jan. 6, 2023, 2:52 p.m. UTC | #2
On 04/01/2023 16:45, Ulf Hansson wrote:
> On Mon, 19 Dec 2022 at 16:15, Krzysztof Kozlowski
> <krzysztof.kozlowski@linaro.org> wrote:
>>
>> Realtime kernels with PREEMPT_RT must use raw_spinlock_t for domains
>> which are invoked from CPU idle (thus from atomic section).  Example is
>> cpuidle PSCI domain driver which itself is PREEMPT_RT safe, but is being
>> called as part of cpuidle.
> 
> Just so I don't get this wrong, since the cpuidle-psci also calls
> pm_runtime_* functions so it isn't PREEMPT_RT safe, at least not yet?

You are correct. Patch 3 here addresses it by... just not doing runtime
PM. This is a hacky workaround but:
1. I don't have any other idea,
2. It's not a big problem because RT systems are not supposed to have
any CPU idle (one of first things during RT system tuning is to disable
cpuidle).

> 
>>
>> Add a flag allowing a power domain provider to indicate it is RT safe.
>> The flag is supposed to be used with existing GENPD_FLAG_IRQ_SAFE.
>>
>> Cc: Adrien Thierry <athierry@redhat.com>
>> Cc: Brian Masney <bmasney@redhat.com>
>> Cc: linux-rt-users@vger.kernel.org
>> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
> 
> For genpd, overall, I think this looks like an okay approach to me.
> Although, let me check the whole series (I need some more time to do
> that) before I give this my blessing.

Sure, we are all have too many mails in inbox. :)

Best regards,
Krzysztof
Sebastian Andrzej Siewior Jan. 12, 2023, 10:32 a.m. UTC | #3
On 2022-12-19 16:14:59 [+0100], Krzysztof Kozlowski wrote:
> Realtime kernels with PREEMPT_RT must use raw_spinlock_t for domains
> which are invoked from CPU idle (thus from atomic section).  Example is
> cpuidle PSCI domain driver which itself is PREEMPT_RT safe, but is being
> called as part of cpuidle.

I think it needs to be clarified what PREEMPT_RT safe means. PSCI is an
external interface which does not inform us what it does and how long
the operation will take.
The ACPI table for instance populate several idle states and their
entry/exit time. Then you can decide if and when an entry/exit latency
of 500us is PREEMPT_RT safe.

> Add a flag allowing a power domain provider to indicate it is RT safe.
> The flag is supposed to be used with existing GENPD_FLAG_IRQ_SAFE.
> 
> Cc: Adrien Thierry <athierry@redhat.com>
> Cc: Brian Masney <bmasney@redhat.com>
> Cc: linux-rt-users@vger.kernel.org
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
> > index 1cd41bdf73cf..0a1600244963 100644
> --- a/include/linux/pm_domain.h
> +++ b/include/linux/pm_domain.h
> @@ -61,6 +61,14 @@
>   * GENPD_FLAG_MIN_RESIDENCY:	Enable the genpd governor to consider its
>   *				components' next wakeup when determining the
>   *				optimal idle state.
> + *
> + * GENPD_FLAG_RT_SAFE:		When used with GENPD_FLAG_IRQ_SAFE, this informs
> + *				genpd that its backend callbacks, ->power_on|off(),
> + *				do not use other spinlocks. They might use
> + *				raw_spinlocks or other pre-emption-disable
> + *				methods, all of which are PREEMPT_RT safe. Note

Please use spinlock_t and raw_spinlock_t. Wouldn't it be better to write
"preemption" instead "pre-emption"?
The important part is probably that once a raw_spinlock_t has been
acquired, it is not possible to invoke any function that acquries
sleeping locks (which includes memory allocations). While even without
that flag it is possible to invoke a function which disables and enables
preemption on its own.

> + *				that, a genpd having this flag set, requires its
> + *				masterdomains to also have it set.

This could be verified upon registration, no?
It might be worth noting that preemption-off section during PM
operations contribute to the system's max latency. Depending on how low
the operation is, it may or may not be a problem.
The ->power_on|off() refers to the sate of the component, right?

>   */
>  #define GENPD_FLAG_PM_CLK	 (1U << 0)
>  #define GENPD_FLAG_IRQ_SAFE	 (1U << 1)

Sebastian
Sebastian Andrzej Siewior Jan. 12, 2023, 10:36 a.m. UTC | #4
On 2023-01-06 15:52:57 [+0100], Krzysztof Kozlowski wrote:
> > Just so I don't get this wrong, since the cpuidle-psci also calls
> > pm_runtime_* functions so it isn't PREEMPT_RT safe, at least not yet?
> 
> You are correct. Patch 3 here addresses it by... just not doing runtime
> PM. This is a hacky workaround but:
> 1. I don't have any other idea,
> 2. It's not a big problem because RT systems are not supposed to have
> any CPU idle (one of first things during RT system tuning is to disable
> cpuidle).

so you say you use idle=poll instead? 

> Best regards,
> Krzysztof

Sebastian
Krzysztof Kozlowski Jan. 12, 2023, 11:27 a.m. UTC | #5
On 12/01/2023 11:36, Sebastian Andrzej Siewior wrote:
> On 2023-01-06 15:52:57 [+0100], Krzysztof Kozlowski wrote:
>>> Just so I don't get this wrong, since the cpuidle-psci also calls
>>> pm_runtime_* functions so it isn't PREEMPT_RT safe, at least not yet?
>>
>> You are correct. Patch 3 here addresses it by... just not doing runtime
>> PM. This is a hacky workaround but:
>> 1. I don't have any other idea,
>> 2. It's not a big problem because RT systems are not supposed to have
>> any CPU idle (one of first things during RT system tuning is to disable
>> cpuidle).
> 
> so you say you use idle=poll instead? 

This was generic comment that system is not supposed to go into deeper
idle states.

Best regards,
Krzysztof
Krzysztof Kozlowski Jan. 12, 2023, 11:31 a.m. UTC | #6
On 12/01/2023 11:32, Sebastian Andrzej Siewior wrote:
> On 2022-12-19 16:14:59 [+0100], Krzysztof Kozlowski wrote:
>> Realtime kernels with PREEMPT_RT must use raw_spinlock_t for domains
>> which are invoked from CPU idle (thus from atomic section).  Example is
>> cpuidle PSCI domain driver which itself is PREEMPT_RT safe, but is being
>> called as part of cpuidle.
> 
> I think it needs to be clarified what PREEMPT_RT safe means. 

OK

> PSCI is an
> external interface which does not inform us what it does and how long
> the operation will take.
> The ACPI table for instance populate several idle states and their
> entry/exit time. Then you can decide if and when an entry/exit latency
> of 500us is PREEMPT_RT safe.
> 
>> Add a flag allowing a power domain provider to indicate it is RT safe.
>> The flag is supposed to be used with existing GENPD_FLAG_IRQ_SAFE.
>>
>> Cc: Adrien Thierry <athierry@redhat.com>
>> Cc: Brian Masney <bmasney@redhat.com>
>> Cc: linux-rt-users@vger.kernel.org
>> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
>>
> …
>> index 1cd41bdf73cf..0a1600244963 100644
>> --- a/include/linux/pm_domain.h
>> +++ b/include/linux/pm_domain.h
>> @@ -61,6 +61,14 @@
>>   * GENPD_FLAG_MIN_RESIDENCY:	Enable the genpd governor to consider its
>>   *				components' next wakeup when determining the
>>   *				optimal idle state.
>> + *
>> + * GENPD_FLAG_RT_SAFE:		When used with GENPD_FLAG_IRQ_SAFE, this informs
>> + *				genpd that its backend callbacks, ->power_on|off(),
>> + *				do not use other spinlocks. They might use
>> + *				raw_spinlocks or other pre-emption-disable
>> + *				methods, all of which are PREEMPT_RT safe. Note
> 
> Please use spinlock_t and raw_spinlock_t. Wouldn't it be better to write
> "preemption" instead "pre-emption"?

Sure.

> The important part is probably that once a raw_spinlock_t has been
> acquired, it is not possible to invoke any function that acquries
> sleeping locks (which includes memory allocations). While even without
> that flag it is possible to invoke a function which disables and enables
> preemption on its own.
> 
>> + *				that, a genpd having this flag set, requires its
>> + *				masterdomains to also have it set.
> 
> This could be verified upon registration, no?

It is, just like the IRQ_SAFE flag. The code is symmetrical to IRQ_SAFE.

> It might be worth noting that preemption-off section during PM
> operations contribute to the system's max latency.

You mean in the commit msg? In the doc, I don't want to deviate from
IRQ_SAFE. It's not really related to the flag.

> Depending on how low
> the operation is, it may or may not be a problem.
> The ->power_on|off() refers to the sate of the component, right?

It refers to genpd framework.

Best regards,
Krzysztof
diff mbox series

Patch

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 967bcf9d415e..4dfce1d476f4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -119,6 +119,48 @@  static const struct genpd_lock_ops genpd_spin_ops = {
 	.unlock = genpd_unlock_spin,
 };
 
+static void genpd_lock_rawspin(struct generic_pm_domain *genpd)
+	__acquires(&genpd->rslock)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&genpd->rslock, flags);
+	genpd->rlock_flags = flags;
+}
+
+static void genpd_lock_nested_rawspin(struct generic_pm_domain *genpd,
+					int depth)
+	__acquires(&genpd->rslock)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave_nested(&genpd->rslock, flags, depth);
+	genpd->rlock_flags = flags;
+}
+
+static int genpd_lock_interruptible_rawspin(struct generic_pm_domain *genpd)
+	__acquires(&genpd->rslock)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&genpd->rslock, flags);
+	genpd->rlock_flags = flags;
+	return 0;
+}
+
+static void genpd_unlock_rawspin(struct generic_pm_domain *genpd)
+	__releases(&genpd->rslock)
+{
+	raw_spin_unlock_irqrestore(&genpd->rslock, genpd->rlock_flags);
+}
+
+static const struct genpd_lock_ops genpd_rawspin_ops = {
+	.lock = genpd_lock_rawspin,
+	.lock_nested = genpd_lock_nested_rawspin,
+	.lock_interruptible = genpd_lock_interruptible_rawspin,
+	.unlock = genpd_unlock_rawspin,
+};
+
 #define genpd_lock(p)			p->lock_ops->lock(p)
 #define genpd_lock_nested(p, d)		p->lock_ops->lock_nested(p, d)
 #define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
@@ -126,6 +168,8 @@  static const struct genpd_lock_ops genpd_spin_ops = {
 
 #define genpd_status_on(genpd)		(genpd->status == GENPD_STATE_ON)
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
+#define genpd_is_rt_safe(genpd)		(genpd_is_irq_safe(genpd) && \
+					 (genpd->flags & GENPD_FLAG_RT_SAFE))
 #define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
 #define genpd_is_active_wakeup(genpd)	(genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
 #define genpd_is_cpu_domain(genpd)	(genpd->flags & GENPD_FLAG_CPU_DOMAIN)
@@ -1838,6 +1882,12 @@  static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 		return -EINVAL;
 	}
 
+	if (!genpd_is_rt_safe(genpd) && genpd_is_rt_safe(subdomain)) {
+		WARN(1, "Parent %s of subdomain %s must be RT safe\n",
+		     genpd->name, subdomain->name);
+		return -EINVAL;
+	}
+
 	link = kzalloc(sizeof(*link), GFP_KERNEL);
 	if (!link)
 		return -ENOMEM;
@@ -2008,8 +2058,13 @@  static void genpd_free_data(struct generic_pm_domain *genpd)
 static void genpd_lock_init(struct generic_pm_domain *genpd)
 {
 	if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
-		spin_lock_init(&genpd->slock);
-		genpd->lock_ops = &genpd_spin_ops;
+		if (genpd->flags & GENPD_FLAG_RT_SAFE) {
+			raw_spin_lock_init(&genpd->rslock);
+			genpd->lock_ops = &genpd_rawspin_ops;
+		} else {
+			spin_lock_init(&genpd->slock);
+			genpd->lock_ops = &genpd_spin_ops;
+		}
 	} else {
 		mutex_init(&genpd->mlock);
 		genpd->lock_ops = &genpd_mtx_ops;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1cd41bdf73cf..0a1600244963 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -61,6 +61,14 @@ 
  * GENPD_FLAG_MIN_RESIDENCY:	Enable the genpd governor to consider its
  *				components' next wakeup when determining the
  *				optimal idle state.
+ *
+ * GENPD_FLAG_RT_SAFE:		When used with GENPD_FLAG_IRQ_SAFE, this informs
+ *				genpd that its backend callbacks, ->power_on|off(),
+ *				do not use other spinlocks. They might use
+ *				raw_spinlocks or other pre-emption-disable
+ *				methods, all of which are PREEMPT_RT safe. Note
+ *				that, a genpd having this flag set, requires its
+ *				masterdomains to also have it set.
  */
 #define GENPD_FLAG_PM_CLK	 (1U << 0)
 #define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
@@ -69,6 +77,7 @@ 
 #define GENPD_FLAG_CPU_DOMAIN	 (1U << 4)
 #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
 #define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
+#define GENPD_FLAG_RT_SAFE	 (1U << 7)
 
 enum gpd_status {
 	GENPD_STATE_ON = 0,	/* PM domain is on */
@@ -164,6 +173,10 @@  struct generic_pm_domain {
 			spinlock_t slock;
 			unsigned long lock_flags;
 		};
+		struct {
+			raw_spinlock_t rslock;
+			unsigned long rlock_flags;
+		};
 	};
 
 };