diff mbox series

[1/3,RFC] PM: domains: Introduce .power_pre/post_on/off callbacks

Message ID 20221108013517.749665-1-marex@denx.de
State New
Headers show
Series [1/3,RFC] PM: domains: Introduce .power_pre/post_on/off callbacks | expand

Commit Message

Marek Vasut Nov. 8, 2022, 1:35 a.m. UTC
Currently it is possible that a power domain power on or off would claim
the genpd lock first and clock core prepare_lock second, while another
thread could do the reverse, and this would trigger lockdep warning.

Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
are triggered before the genpd_lock() and after genpd_unlock() respectively in
case the domain is powered on and off. Those are meant to let drivers claim
clock core prepare_lock via clk_*prepare() call and release the lock via
clk_*unprepare() call to always assure that the clock and genpd lock ordering
is correct.

Signed-off-by: Marek Vasut <marex@denx.de>
---
Cc: Adam Ford <aford173@gmail.com>
Cc: Fabio Estevam <festevam@denx.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jacky Bai <ping.bai@nxp.com>
Cc: Kevin Hilman <khilman@kernel.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Marek Vasut <marex@denx.de>
Cc: Mark Brown <broonie@kernel.org>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peng Fan <peng.fan@nxp.com>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: linux-clk@vger.kernel.org
Cc: linux-imx@nxp.com
Cc: linux-pm@vger.kernel.org
To: linux-arm-kernel@lists.infradead.org
---
 drivers/base/power/domain.c | 103 ++++++++++++++++++++++++++++++++----
 include/linux/pm_domain.h   |   4 ++
 2 files changed, 97 insertions(+), 10 deletions(-)

Comments

Laurent Pinchart Nov. 9, 2022, 1:19 p.m. UTC | #1
Hi Marek,

Thank you for the patch.

On Tue, Nov 08, 2022 at 02:35:15AM +0100, Marek Vasut wrote:
> Currently it is possible that a power domain power on or off would claim
> the genpd lock first and clock core prepare_lock second, while another
> thread could do the reverse, and this would trigger lockdep warning.
> 
> Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
> are triggered before the genpd_lock() and after genpd_unlock() respectively in
> case the domain is powered on and off. Those are meant to let drivers claim
> clock core prepare_lock via clk_*prepare() call and release the lock via
> clk_*unprepare() call to always assure that the clock and genpd lock ordering
> is correct.
> 
> Signed-off-by: Marek Vasut <marex@denx.de>
> ---
> Cc: Adam Ford <aford173@gmail.com>
> Cc: Fabio Estevam <festevam@denx.de>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Jacky Bai <ping.bai@nxp.com>
> Cc: Kevin Hilman <khilman@kernel.org>
> Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Liam Girdwood <lgirdwood@gmail.com>
> Cc: Lucas Stach <l.stach@pengutronix.de>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Mark Brown <broonie@kernel.org>
> Cc: Martin Kepplinger <martink@posteo.de>
> Cc: Pavel Machek <pavel@ucw.cz>
> Cc: Peng Fan <peng.fan@nxp.com>
> Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
> Cc: Philipp Zabel <p.zabel@pengutronix.de>
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Cc: Sascha Hauer <s.hauer@pengutronix.de>
> Cc: Shawn Guo <shawnguo@kernel.org>
> Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
> Cc: Stephen Boyd <sboyd@kernel.org>
> Cc: Ulf Hansson <ulf.hansson@linaro.org>
> Cc: linux-clk@vger.kernel.org
> Cc: linux-imx@nxp.com
> Cc: linux-pm@vger.kernel.org
> To: linux-arm-kernel@lists.infradead.org
> ---
>  drivers/base/power/domain.c | 103 ++++++++++++++++++++++++++++++++----
>  include/linux/pm_domain.h   |   4 ++
>  2 files changed, 97 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
> index 6471b559230e9..df2a93d0674e4 100644
> --- a/drivers/base/power/domain.c
> +++ b/drivers/base/power/domain.c
> @@ -494,6 +494,22 @@ void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
>  }
>  EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
>  
> +static int genpd_power_pre_on(struct generic_pm_domain *genpd)
> +{
> +	if (!genpd->power_pre_on)
> +		return 0;
> +
> +	return genpd->power_pre_on(genpd);
> +}
> +
> +static int genpd_power_post_on(struct generic_pm_domain *genpd)
> +{
> +	if (!genpd->power_post_on)
> +		return 0;
> +
> +	return genpd->power_post_on(genpd);
> +}
> +
>  static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
>  {
>  	unsigned int state_idx = genpd->state_idx;
> @@ -544,6 +560,22 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
>  	return ret;
>  }
>  
> +static int genpd_power_off_pre(struct generic_pm_domain *genpd)
> +{
> +	if (!genpd->power_off_pre)
> +		return 0;
> +
> +	return genpd->power_off_pre(genpd);
> +}
> +
> +static int genpd_power_off_post(struct generic_pm_domain *genpd)
> +{
> +	if (!genpd->power_off_post)
> +		return 0;
> +
> +	return genpd->power_off_post(genpd);
> +}
> +
>  static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
>  {
>  	unsigned int state_idx = genpd->state_idx;
> @@ -816,12 +848,18 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
>  static void genpd_power_off_work_fn(struct work_struct *work)
>  {
>  	struct generic_pm_domain *genpd;
> +	int ret;
>  
>  	genpd = container_of(work, struct generic_pm_domain, power_off_work);
>  
> +	ret = genpd_power_off_pre(genpd);
> +	if (ret)
> +		return;
>  	genpd_lock(genpd);
>  	genpd_power_off(genpd, false, 0);
>  	genpd_unlock(genpd);
> +	ret = genpd_power_off_post(genpd);
> +	WARN_ON_ONCE(ret);
>  }
>  
>  /**
> @@ -938,12 +976,14 @@ static int genpd_runtime_suspend(struct device *dev)
>  	if (irq_safe_dev_in_sleep_domain(dev, genpd))
>  		return 0;
>  
> +	ret = genpd_power_off_pre(genpd);
> +	if (ret)
> +		return ret;
>  	genpd_lock(genpd);
>  	gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
>  	genpd_power_off(genpd, true, 0);
>  	genpd_unlock(genpd);
> -
> -	return 0;
> +	return genpd_power_off_post(genpd);
>  }
>  
>  /**
> @@ -977,12 +1017,21 @@ static int genpd_runtime_resume(struct device *dev)
>  	if (irq_safe_dev_in_sleep_domain(dev, genpd))
>  		goto out;
>  
> +	ret = genpd_power_pre_on(genpd);
> +	if (ret)
> +		return ret;
>  	genpd_lock(genpd);
>  	ret = genpd_power_on(genpd, 0);
>  	if (!ret)
>  		genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
>  	genpd_unlock(genpd);
>  
> +	if (ret) {
> +		genpd_power_post_on(genpd);
> +		return ret;
> +	}
> +
> +	ret = genpd_power_post_on(genpd);
>  	if (ret)
>  		return ret;
>  
> @@ -1017,10 +1066,13 @@ static int genpd_runtime_resume(struct device *dev)
>  	genpd_stop_dev(genpd, dev);
>  err_poweroff:
>  	if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
> -		genpd_lock(genpd);
> -		gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
> -		genpd_power_off(genpd, true, 0);
> -		genpd_unlock(genpd);
> +		if (!genpd_power_off_pre(genpd)) {
> +			genpd_lock(genpd);
> +			gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
> +			genpd_power_off(genpd, true, 0);
> +			genpd_unlock(genpd);
> +			genpd_power_off_post(genpd);
> +		}
>  	}
>  
>  	return ret;
> @@ -1225,12 +1277,14 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff)
>  		}
>  	}
>  
> +	ret = genpd_power_off_pre(genpd);
> +	if (ret)
> +		return ret;
>  	genpd_lock(genpd);
>  	genpd->suspended_count++;
>  	genpd_sync_power_off(genpd, true, 0);
>  	genpd_unlock(genpd);
> -
> -	return 0;
> +	return genpd_power_off_post(genpd);
>  }
>  
>  /**
> @@ -1267,10 +1321,16 @@ static int genpd_resume_noirq(struct device *dev)
>  	if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
>  		return pm_generic_resume_noirq(dev);
>  
> +	ret = genpd_power_pre_on(genpd);
> +	if (ret)
> +		return ret;
>  	genpd_lock(genpd);
>  	genpd_sync_power_on(genpd, true, 0);
>  	genpd->suspended_count--;
>  	genpd_unlock(genpd);
> +	ret = genpd_power_post_on(genpd);
> +	if (ret)
> +		return ret;
>  
>  	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
>  	    !pm_runtime_status_suspended(dev)) {
> @@ -1378,6 +1438,9 @@ static int genpd_restore_noirq(struct device *dev)
>  	 * At this point suspended_count == 0 means we are being run for the
>  	 * first time for the given domain in the present cycle.
>  	 */
> +	ret = genpd_power_pre_on(genpd);
> +	if (ret)
> +		return ret;
>  	genpd_lock(genpd);
>  	if (genpd->suspended_count++ == 0) {
>  		/*
> @@ -1390,6 +1453,9 @@ static int genpd_restore_noirq(struct device *dev)
>  
>  	genpd_sync_power_on(genpd, true, 0);
>  	genpd_unlock(genpd);
> +	ret = genpd_power_post_on(genpd);
> +	if (ret)
> +		return ret;
>  
>  	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
>  	    !pm_runtime_status_suspended(dev)) {
> @@ -1413,6 +1479,7 @@ static int genpd_restore_noirq(struct device *dev)
>  static void genpd_complete(struct device *dev)
>  {
>  	struct generic_pm_domain *genpd;
> +	int ret;

This variable is unused, causing a compilation error.

>  
>  	dev_dbg(dev, "%s()\n", __func__);
>  
> @@ -1435,6 +1502,7 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>  {
>  	struct generic_pm_domain *genpd;
>  	bool use_lock;
> +	int ret;
>  
>  	genpd = dev_to_genpd_safe(dev);
>  	if (!genpd)
> @@ -1442,8 +1510,13 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>  
>  	use_lock = genpd_is_irq_safe(genpd);
>  
> -	if (use_lock)
> +	if (use_lock) {
> +		ret = suspend ? genpd_power_off_pre(genpd) :
> +				genpd_power_pre_on(genpd);
> +		if (ret)
> +			return;
>  		genpd_lock(genpd);
> +	}
>  
>  	if (suspend) {
>  		genpd->suspended_count++;
> @@ -1453,8 +1526,12 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>  		genpd->suspended_count--;
>  	}
>  
> -	if (use_lock)
> +	if (use_lock) {
>  		genpd_unlock(genpd);
> +		ret = suspend ? genpd_power_off_post(genpd) :
> +				genpd_power_post_on(genpd);
> +		WARN_ON_ONCE(ret);
> +	}
>  }
>  
>  /**
> @@ -2750,9 +2827,15 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
>  	dev->pm_domain->sync = genpd_dev_pm_sync;
>  
>  	if (power_on) {
> +		ret = genpd_power_pre_on(pd);
> +		if (ret)
> +			return ret;
>  		genpd_lock(pd);
>  		ret = genpd_power_on(pd, 0);
>  		genpd_unlock(pd);
> +		ret = genpd_power_post_on(pd);
> +		if (ret)
> +			return ret;
>  	}
>  
>  	if (ret) {
> diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
> index ebc3516980907..3cf231a27cb1b 100644
> --- a/include/linux/pm_domain.h
> +++ b/include/linux/pm_domain.h
> @@ -134,8 +134,12 @@ struct generic_pm_domain {
>  	unsigned int prepared_count;	/* Suspend counter of prepared devices */
>  	unsigned int performance_state;	/* Aggregated max performance state */
>  	cpumask_var_t cpus;		/* A cpumask of the attached CPUs */
> +	int (*power_off_pre)(struct generic_pm_domain *domain);
>  	int (*power_off)(struct generic_pm_domain *domain);
> +	int (*power_off_post)(struct generic_pm_domain *domain);
> +	int (*power_pre_on)(struct generic_pm_domain *domain);
>  	int (*power_on)(struct generic_pm_domain *domain);
> +	int (*power_post_on)(struct generic_pm_domain *domain);
>  	struct raw_notifier_head power_notifiers; /* Power on/off notifiers */
>  	struct opp_table *opp_table;	/* OPP table of the genpd */
>  	unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,
Marek Vasut Nov. 9, 2022, 1:25 p.m. UTC | #2
On 11/9/22 14:19, Laurent Pinchart wrote:

Hi,

[...]

>> @@ -1413,6 +1479,7 @@ static int genpd_restore_noirq(struct device *dev)
>>   static void genpd_complete(struct device *dev)
>>   {
>>   	struct generic_pm_domain *genpd;
>> +	int ret;
> 
> This variable is unused, causing a compilation error.

I suspect only with -Werror, but anyway, already fixed locally, it's a 
rebase on latest next artifact.

[...]

>> diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
>> index ebc3516980907..3cf231a27cb1b 100644
>> --- a/include/linux/pm_domain.h
>> +++ b/include/linux/pm_domain.h
>> @@ -134,8 +134,12 @@ struct generic_pm_domain {
>>   	unsigned int prepared_count;	/* Suspend counter of prepared devices */
>>   	unsigned int performance_state;	/* Aggregated max performance state */
>>   	cpumask_var_t cpus;		/* A cpumask of the attached CPUs */
>> +	int (*power_off_pre)(struct generic_pm_domain *domain);
>>   	int (*power_off)(struct generic_pm_domain *domain);
>> +	int (*power_off_post)(struct generic_pm_domain *domain);
>> +	int (*power_pre_on)(struct generic_pm_domain *domain);
>>   	int (*power_on)(struct generic_pm_domain *domain);
>> +	int (*power_post_on)(struct generic_pm_domain *domain);
>>   	struct raw_notifier_head power_notifiers; /* Power on/off notifiers */
>>   	struct opp_table *opp_table;	/* OPP table of the genpd */
>>   	unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,

I am looking more for a feedback on this extension of the callbacks, and 
on the overall approach. Is this something which looks OK, or would 
there be better way to handle this ?
Peng Fan (OSS) Nov. 11, 2022, 8:27 a.m. UTC | #3
Hi Marek,


On 11/8/2022 9:35 AM, Marek Vasut wrote:
> It is possible for clk_disable_unused() to trigger lockdep warning
> regarding lock ordering in this driver. This happens in case of the
> following conditions:
> 
> A) clock core clk_disable_unused() triggers the following sequence in a
>     driver which also uses GPCv2 domain:
>     - clk_prepare_lock() -> obtains clock core prepare_lock
>     - pm_runtime_get*() -> obtains &blk_ctrl_genpd_lock_class
> 
> B) driver powers up a power domain and triggers the following sequence
>     in GPCv2:
>     - pm_runtime_get_sync() -> obtains &blk_ctrl_genpd_lock_class
>     - clk_bulk_prepare_enable() -> obtains clock core prepare_lock
> 
> This can lead to a deadlock in case A and B runs on separate CPUs.
> 
> To avoid the deadlock, split clk_*prepare() from clk_*enable() and call
> the former in power_pre_on() callback, before pm_runtime_get_sync(). The
> reverse is implemented in the power_off_post() callback in the same way.
> This way, the GPCv2 driver always claims the prepare_lock before
> blk_ctrl_genpd_lock_class and the deadlock is avoided.


How about using notifier such as, GENPD_NOTIFY_PRE_ON?

Regards,

> 
> Signed-off-by: Marek Vasut <marex@denx.de>
> ---
> Cc: Adam Ford <aford173@gmail.com>
> Cc: Fabio Estevam <festevam@denx.de>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Jacky Bai <ping.bai@nxp.com>
> Cc: Kevin Hilman <khilman@kernel.org>
> Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Liam Girdwood <lgirdwood@gmail.com>
> Cc: Lucas Stach <l.stach@pengutronix.de>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Mark Brown <broonie@kernel.org>
> Cc: Martin Kepplinger <martink@posteo.de>
> Cc: Pavel Machek <pavel@ucw.cz>
> Cc: Peng Fan <peng.fan@nxp.com>
> Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
> Cc: Philipp Zabel <p.zabel@pengutronix.de>
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Cc: Sascha Hauer <s.hauer@pengutronix.de>
> Cc: Shawn Guo <shawnguo@kernel.org>
> Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
> Cc: Stephen Boyd <sboyd@kernel.org>
> Cc: Ulf Hansson <ulf.hansson@linaro.org>
> Cc: linux-clk@vger.kernel.org
> Cc: linux-imx@nxp.com
> Cc: linux-pm@vger.kernel.org
> To: linux-arm-kernel@lists.infradead.org
> ---
>   drivers/soc/imx/gpcv2.c | 74 ++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 66 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c
> index 7a47d14fde445..8d27a227ba02d 100644
> --- a/drivers/soc/imx/gpcv2.c
> +++ b/drivers/soc/imx/gpcv2.c
> @@ -298,6 +298,8 @@ struct imx_pgc_domain {
>   
>   	unsigned int pgc_sw_pup_reg;
>   	unsigned int pgc_sw_pdn_reg;
> +
> +	int enabled;
>   };
>   
>   struct imx_pgc_domain_data {
> @@ -313,6 +315,52 @@ to_imx_pgc_domain(struct generic_pm_domain *genpd)
>   	return container_of(genpd, struct imx_pgc_domain, genpd);
>   }
>   
> +static int imx_pgc_power_pre_up(struct generic_pm_domain *genpd)
> +{
> +	struct imx_pgc_domain *domain = to_imx_pgc_domain(genpd);
> +	int ret;
> +
> +	ret = clk_bulk_prepare(domain->num_clks, domain->clks);
> +	if (ret)
> +		dev_err(domain->dev, "failed to prepare reset clocks\n");
> +
> +	return ret;
> +}
> +
> +static int imx_pgc_power_post_up(struct generic_pm_domain *genpd)
> +{
> +	struct imx_pgc_domain *domain = to_imx_pgc_domain(genpd);
> +
> +	if (!domain->keep_clocks && domain->enabled)
> +		clk_bulk_unprepare(domain->num_clks, domain->clks);
> +
> +	return 0;
> +}
> +
> +static int imx_pgc_power_down_pre(struct generic_pm_domain *genpd)
> +{
> +	struct imx_pgc_domain *domain = to_imx_pgc_domain(genpd);
> +	int ret;
> +
> +	if (!domain->keep_clocks || !domain->enabled) {
> +		ret = clk_bulk_prepare(domain->num_clks, domain->clks);
> +		if (ret)
> +			dev_err(domain->dev, "failed to prepare reset clocks\n");
> +	}
> +
> +	return ret;
> +}
> +
> +static int imx_pgc_power_down_post(struct generic_pm_domain *genpd)
> +{
> +	struct imx_pgc_domain *domain = to_imx_pgc_domain(genpd);
> +
> +	if (!domain->keep_clocks || !domain->enabled)
> +		clk_bulk_unprepare(domain->num_clks, domain->clks);
> +
> +	return 0;
> +}
> +
>   static int imx_pgc_power_up(struct generic_pm_domain *genpd)
>   {
>   	struct imx_pgc_domain *domain = to_imx_pgc_domain(genpd);
> @@ -338,7 +386,7 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
>   	reset_control_assert(domain->reset);
>   
>   	/* Enable reset clocks for all devices in the domain */
> -	ret = clk_bulk_prepare_enable(domain->num_clks, domain->clks);
> +	ret = clk_bulk_enable(domain->num_clks, domain->clks);
>   	if (ret) {
>   		dev_err(domain->dev, "failed to enable reset clocks\n");
>   		goto out_regulator_disable;
> @@ -397,12 +445,14 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
>   
>   	/* Disable reset clocks for all devices in the domain */
>   	if (!domain->keep_clocks)
> -		clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
> +		clk_bulk_disable(domain->num_clks, domain->clks);
> +
> +	domain->enabled++;
>   
>   	return 0;
>   
>   out_clk_disable:
> -	clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
> +	clk_bulk_disable(domain->num_clks, domain->clks);
>   out_regulator_disable:
>   	if (!IS_ERR(domain->regulator))
>   		regulator_disable(domain->regulator);
> @@ -420,7 +470,7 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd)
>   
>   	/* Enable reset clocks for all devices in the domain */
>   	if (!domain->keep_clocks) {
> -		ret = clk_bulk_prepare_enable(domain->num_clks, domain->clks);
> +		ret = clk_bulk_enable(domain->num_clks, domain->clks);
>   		if (ret) {
>   			dev_err(domain->dev, "failed to enable reset clocks\n");
>   			return ret;
> @@ -467,7 +517,7 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd)
>   	}
>   
>   	/* Disable reset clocks for all devices in the domain */
> -	clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
> +	clk_bulk_disable(domain->num_clks, domain->clks);
>   
>   	if (!IS_ERR(domain->regulator)) {
>   		ret = regulator_disable(domain->regulator);
> @@ -479,13 +529,17 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd)
>   		}
>   	}
>   
> +	domain->enabled--;
> +
>   	pm_runtime_put_sync_suspend(domain->dev);
>   
>   	return 0;
>   
>   out_clk_disable:
>   	if (!domain->keep_clocks)
> -		clk_bulk_disable_unprepare(domain->num_clks, domain->clks);
> +		clk_bulk_disable(domain->num_clks, domain->clks);
> +
> +	domain->enabled--;
>   
>   	return ret;
>   }
> @@ -1514,8 +1568,12 @@ static int imx_gpcv2_probe(struct platform_device *pdev)
>   		domain->regmap = regmap;
>   		domain->regs = domain_data->pgc_regs;
>   
> -		domain->genpd.power_on  = imx_pgc_power_up;
> -		domain->genpd.power_off = imx_pgc_power_down;
> +		domain->genpd.power_pre_on	= imx_pgc_power_pre_up;
> +		domain->genpd.power_on		= imx_pgc_power_up;
> +		domain->genpd.power_post_on	= imx_pgc_power_post_up;
> +		domain->genpd.power_off_pre	= imx_pgc_power_down_pre;
> +		domain->genpd.power_off		= imx_pgc_power_down;
> +		domain->genpd.power_off_post	= imx_pgc_power_down_post;
>   
>   		pd_pdev->dev.parent = dev;
>   		pd_pdev->dev.of_node = np;
Ulf Hansson Nov. 14, 2022, 7:40 p.m. UTC | #4
On Tue, 8 Nov 2022 at 02:35, Marek Vasut <marex@denx.de> wrote:
>
> Currently it is possible that a power domain power on or off would claim
> the genpd lock first and clock core prepare_lock second, while another
> thread could do the reverse, and this would trigger lockdep warning.

I am not quite sure I fully understand. In this case is the lockdep
warning relevant or just something that we want to silence?

>
> Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
> are triggered before the genpd_lock() and after genpd_unlock() respectively in
> case the domain is powered on and off. Those are meant to let drivers claim
> clock core prepare_lock via clk_*prepare() call and release the lock via
> clk_*unprepare() call to always assure that the clock and genpd lock ordering
> is correct.

To me, this sounds like a problem that may be better fixed by trying
to model the parent/child-domains in a more strict way, through genpd.

There is a comment in the code in imx8mp_blk_ctrl_probe() that seems
to be pointing in this direction too.

"* We use runtime PM to trigger power on/off of the upstream GPC
  * domain, as a strict hierarchical parent/child power domain
  * setup doesn't allow us to meet the sequencing requirements......"

I am wondering about what those "sequencing requirements" are - and
whether it could make better sense to fix these issues instead?

Kind regards
Uffe

>
> Signed-off-by: Marek Vasut <marex@denx.de>
> ---
> Cc: Adam Ford <aford173@gmail.com>
> Cc: Fabio Estevam <festevam@denx.de>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Jacky Bai <ping.bai@nxp.com>
> Cc: Kevin Hilman <khilman@kernel.org>
> Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Liam Girdwood <lgirdwood@gmail.com>
> Cc: Lucas Stach <l.stach@pengutronix.de>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Mark Brown <broonie@kernel.org>
> Cc: Martin Kepplinger <martink@posteo.de>
> Cc: Pavel Machek <pavel@ucw.cz>
> Cc: Peng Fan <peng.fan@nxp.com>
> Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
> Cc: Philipp Zabel <p.zabel@pengutronix.de>
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Cc: Sascha Hauer <s.hauer@pengutronix.de>
> Cc: Shawn Guo <shawnguo@kernel.org>
> Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
> Cc: Stephen Boyd <sboyd@kernel.org>
> Cc: Ulf Hansson <ulf.hansson@linaro.org>
> Cc: linux-clk@vger.kernel.org
> Cc: linux-imx@nxp.com
> Cc: linux-pm@vger.kernel.org
> To: linux-arm-kernel@lists.infradead.org
> ---
>  drivers/base/power/domain.c | 103 ++++++++++++++++++++++++++++++++----
>  include/linux/pm_domain.h   |   4 ++
>  2 files changed, 97 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
> index 6471b559230e9..df2a93d0674e4 100644
> --- a/drivers/base/power/domain.c
> +++ b/drivers/base/power/domain.c
> @@ -494,6 +494,22 @@ void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
>  }
>  EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
>
> +static int genpd_power_pre_on(struct generic_pm_domain *genpd)
> +{
> +       if (!genpd->power_pre_on)
> +               return 0;
> +
> +       return genpd->power_pre_on(genpd);
> +}
> +
> +static int genpd_power_post_on(struct generic_pm_domain *genpd)
> +{
> +       if (!genpd->power_post_on)
> +               return 0;
> +
> +       return genpd->power_post_on(genpd);
> +}
> +
>  static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
>  {
>         unsigned int state_idx = genpd->state_idx;
> @@ -544,6 +560,22 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
>         return ret;
>  }
>
> +static int genpd_power_off_pre(struct generic_pm_domain *genpd)
> +{
> +       if (!genpd->power_off_pre)
> +               return 0;
> +
> +       return genpd->power_off_pre(genpd);
> +}
> +
> +static int genpd_power_off_post(struct generic_pm_domain *genpd)
> +{
> +       if (!genpd->power_off_post)
> +               return 0;
> +
> +       return genpd->power_off_post(genpd);
> +}
> +
>  static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
>  {
>         unsigned int state_idx = genpd->state_idx;
> @@ -816,12 +848,18 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
>  static void genpd_power_off_work_fn(struct work_struct *work)
>  {
>         struct generic_pm_domain *genpd;
> +       int ret;
>
>         genpd = container_of(work, struct generic_pm_domain, power_off_work);
>
> +       ret = genpd_power_off_pre(genpd);
> +       if (ret)
> +               return;
>         genpd_lock(genpd);
>         genpd_power_off(genpd, false, 0);
>         genpd_unlock(genpd);
> +       ret = genpd_power_off_post(genpd);
> +       WARN_ON_ONCE(ret);
>  }
>
>  /**
> @@ -938,12 +976,14 @@ static int genpd_runtime_suspend(struct device *dev)
>         if (irq_safe_dev_in_sleep_domain(dev, genpd))
>                 return 0;
>
> +       ret = genpd_power_off_pre(genpd);
> +       if (ret)
> +               return ret;
>         genpd_lock(genpd);
>         gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
>         genpd_power_off(genpd, true, 0);
>         genpd_unlock(genpd);
> -
> -       return 0;
> +       return genpd_power_off_post(genpd);
>  }
>
>  /**
> @@ -977,12 +1017,21 @@ static int genpd_runtime_resume(struct device *dev)
>         if (irq_safe_dev_in_sleep_domain(dev, genpd))
>                 goto out;
>
> +       ret = genpd_power_pre_on(genpd);
> +       if (ret)
> +               return ret;
>         genpd_lock(genpd);
>         ret = genpd_power_on(genpd, 0);
>         if (!ret)
>                 genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
>         genpd_unlock(genpd);
>
> +       if (ret) {
> +               genpd_power_post_on(genpd);
> +               return ret;
> +       }
> +
> +       ret = genpd_power_post_on(genpd);
>         if (ret)
>                 return ret;
>
> @@ -1017,10 +1066,13 @@ static int genpd_runtime_resume(struct device *dev)
>         genpd_stop_dev(genpd, dev);
>  err_poweroff:
>         if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
> -               genpd_lock(genpd);
> -               gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
> -               genpd_power_off(genpd, true, 0);
> -               genpd_unlock(genpd);
> +               if (!genpd_power_off_pre(genpd)) {
> +                       genpd_lock(genpd);
> +                       gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
> +                       genpd_power_off(genpd, true, 0);
> +                       genpd_unlock(genpd);
> +                       genpd_power_off_post(genpd);
> +               }
>         }
>
>         return ret;
> @@ -1225,12 +1277,14 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff)
>                 }
>         }
>
> +       ret = genpd_power_off_pre(genpd);
> +       if (ret)
> +               return ret;
>         genpd_lock(genpd);
>         genpd->suspended_count++;
>         genpd_sync_power_off(genpd, true, 0);
>         genpd_unlock(genpd);
> -
> -       return 0;
> +       return genpd_power_off_post(genpd);
>  }
>
>  /**
> @@ -1267,10 +1321,16 @@ static int genpd_resume_noirq(struct device *dev)
>         if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
>                 return pm_generic_resume_noirq(dev);
>
> +       ret = genpd_power_pre_on(genpd);
> +       if (ret)
> +               return ret;
>         genpd_lock(genpd);
>         genpd_sync_power_on(genpd, true, 0);
>         genpd->suspended_count--;
>         genpd_unlock(genpd);
> +       ret = genpd_power_post_on(genpd);
> +       if (ret)
> +               return ret;
>
>         if (genpd->dev_ops.stop && genpd->dev_ops.start &&
>             !pm_runtime_status_suspended(dev)) {
> @@ -1378,6 +1438,9 @@ static int genpd_restore_noirq(struct device *dev)
>          * At this point suspended_count == 0 means we are being run for the
>          * first time for the given domain in the present cycle.
>          */
> +       ret = genpd_power_pre_on(genpd);
> +       if (ret)
> +               return ret;
>         genpd_lock(genpd);
>         if (genpd->suspended_count++ == 0) {
>                 /*
> @@ -1390,6 +1453,9 @@ static int genpd_restore_noirq(struct device *dev)
>
>         genpd_sync_power_on(genpd, true, 0);
>         genpd_unlock(genpd);
> +       ret = genpd_power_post_on(genpd);
> +       if (ret)
> +               return ret;
>
>         if (genpd->dev_ops.stop && genpd->dev_ops.start &&
>             !pm_runtime_status_suspended(dev)) {
> @@ -1413,6 +1479,7 @@ static int genpd_restore_noirq(struct device *dev)
>  static void genpd_complete(struct device *dev)
>  {
>         struct generic_pm_domain *genpd;
> +       int ret;
>
>         dev_dbg(dev, "%s()\n", __func__);
>
> @@ -1435,6 +1502,7 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>  {
>         struct generic_pm_domain *genpd;
>         bool use_lock;
> +       int ret;
>
>         genpd = dev_to_genpd_safe(dev);
>         if (!genpd)
> @@ -1442,8 +1510,13 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>
>         use_lock = genpd_is_irq_safe(genpd);
>
> -       if (use_lock)
> +       if (use_lock) {
> +               ret = suspend ? genpd_power_off_pre(genpd) :
> +                               genpd_power_pre_on(genpd);
> +               if (ret)
> +                       return;
>                 genpd_lock(genpd);
> +       }
>
>         if (suspend) {
>                 genpd->suspended_count++;
> @@ -1453,8 +1526,12 @@ static void genpd_switch_state(struct device *dev, bool suspend)
>                 genpd->suspended_count--;
>         }
>
> -       if (use_lock)
> +       if (use_lock) {
>                 genpd_unlock(genpd);
> +               ret = suspend ? genpd_power_off_post(genpd) :
> +                               genpd_power_post_on(genpd);
> +               WARN_ON_ONCE(ret);
> +       }
>  }
>
>  /**
> @@ -2750,9 +2827,15 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
>         dev->pm_domain->sync = genpd_dev_pm_sync;
>
>         if (power_on) {
> +               ret = genpd_power_pre_on(pd);
> +               if (ret)
> +                       return ret;
>                 genpd_lock(pd);
>                 ret = genpd_power_on(pd, 0);
>                 genpd_unlock(pd);
> +               ret = genpd_power_post_on(pd);
> +               if (ret)
> +                       return ret;
>         }
>
>         if (ret) {
> diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
> index ebc3516980907..3cf231a27cb1b 100644
> --- a/include/linux/pm_domain.h
> +++ b/include/linux/pm_domain.h
> @@ -134,8 +134,12 @@ struct generic_pm_domain {
>         unsigned int prepared_count;    /* Suspend counter of prepared devices */
>         unsigned int performance_state; /* Aggregated max performance state */
>         cpumask_var_t cpus;             /* A cpumask of the attached CPUs */
> +       int (*power_off_pre)(struct generic_pm_domain *domain);
>         int (*power_off)(struct generic_pm_domain *domain);
> +       int (*power_off_post)(struct generic_pm_domain *domain);
> +       int (*power_pre_on)(struct generic_pm_domain *domain);
>         int (*power_on)(struct generic_pm_domain *domain);
> +       int (*power_post_on)(struct generic_pm_domain *domain);
>         struct raw_notifier_head power_notifiers; /* Power on/off notifiers */
>         struct opp_table *opp_table;    /* OPP table of the genpd */
>         unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,
> --
> 2.35.1
>
Ulf Hansson Nov. 16, 2022, 4:30 p.m. UTC | #5
On Wed, 16 Nov 2022 at 14:25, Lucas Stach <l.stach@pengutronix.de> wrote:
>
> Hi Ulf,
>
> Am Mittwoch, dem 16.11.2022 um 13:41 +0100 schrieb Ulf Hansson:
> > + Stephen Boyd
> >
> > On Mon, 14 Nov 2022 at 21:32, Marek Vasut <marex@denx.de> wrote:
> > >
> > > On 11/14/22 20:40, Ulf Hansson wrote:
> > > > On Tue, 8 Nov 2022 at 02:35, Marek Vasut <marex@denx.de> wrote:
> > > > >
> > > > > Currently it is possible that a power domain power on or off would claim
> > > > > the genpd lock first and clock core prepare_lock second, while another
> > > > > thread could do the reverse, and this would trigger lockdep warning.
> > > >
> > > > I am not quite sure I fully understand. In this case is the lockdep
> > > > warning relevant or just something that we want to silence?
> > >
> > > This is a valid problem, see patches 2/3 and 3/3 for details too.
> > >
> > > > > Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
> > > > > are triggered before the genpd_lock() and after genpd_unlock() respectively in
> > > > > case the domain is powered on and off. Those are meant to let drivers claim
> > > > > clock core prepare_lock via clk_*prepare() call and release the lock via
> > > > > clk_*unprepare() call to always assure that the clock and genpd lock ordering
> > > > > is correct.
> > > >
> > > > To me, this sounds like a problem that may be better fixed by trying
> > > > to model the parent/child-domains in a more strict way, through genpd.
> > > >
> > > > There is a comment in the code in imx8mp_blk_ctrl_probe() that seems
> > > > to be pointing in this direction too.
> > > >
> > > > "* We use runtime PM to trigger power on/off of the upstream GPC
> > > >    * domain, as a strict hierarchical parent/child power domain
> > > >    * setup doesn't allow us to meet the sequencing requirements......"
> > > >
> > > > I am wondering about what those "sequencing requirements" are - and
> > > > whether it could make better sense to fix these issues instead?
> > >
> > > Here is the lockdep splat:
> > >
> > > https://lore.kernel.org/all/Y1cs++TV2GCuh4tS@pendragon.ideasonboard.com/
> >
> > Yes, that certainly helped!
> >
> > >
> > > It really is a problem between the clock and genpd subsystem locks, they
> > > can be claimed in arbitrary order, see patch 2/3 and 3/3.
> > >
> > > I think that might clarify what I am attempting to solve here.
> >
> > Let me try to put some more words behind this, to make sure I have
> > understood correctly, but also to easier allow more people to chim in.
> >
> > Note that, in your commit messages in patch2 and patch3, you are
> > mentioning clk_disable_unused(), but that's not what the lockdep splat
> > above is pointing at. Although, it seems the clk_disable_unused()
> > thingy, would trigger a similar problem for this configuration for the
> > imx8mp platform.
> >
> > Case #1:
> > Triggered from the workqueue, the genpd_power_off_work_fn() ends up
> > calling clk_bulk_unprepare(), from a genpd's ->power_off() callback(),
> > which has been assigned to imx8mp_blk_ctrl_power_off(). Before genpd's
> > ->power_off() is called, the genpd-lock(s) have been acquired, thus we
> > are trying to acquire the global clk-prepare lock via
> > clk_bulk_unprepare() while holding the genpd-lock(s).
> >
> > Case #0:
> > The "drm driver" calls clk_set_rate(), thus we start by acquiring the
> > global clk-prepare lock. Internally in the clock frameworks, the
> > clk_set_rate() path continues to call clk_pm_runtime_get(). In this
> > case, the corresponding clock provider's struct *device, seems to be
> > attached to a genpd too. This means the call to clk_pm_runtime_get()
> > ends up in genpd_runtime_resume(), which needs to acquire the
> > genpd-lock(s) before it continues to call genpd_power_on() to power-on
> > the PM domain. In other words, we try to acquire genpd-lock(s) while
> > holding the global clk-prepare lock.
> >
> > The solution to fix these problems that you suggest in the $subject
> > patch, isn't the direction I want this to take. The new callbacks are
> > prone to be abused and it would also require genpd provider specific
> > code to fix the problems. Of course, we need things to work, but let's
> > look at a couple of options first. See below.
> >
> > 1)
> > In a way, it looks like we have a circular description in DT of the
> > hierarchy of the clock- and genpd providers, which is a bit awkward in
> > my opinion. I was browsing the imx8mp DTS files to find this, but I
> > couldn't. Can you perhaps point me to the DTS file(s) you are using? I
> > can try to have a look so see if this could be arranged differently.
>
> The dependency chain isn't circular, it just happens to converge in the
> clock framework and its single big hammer lock. The chain looks some
> thing like this:
>
> 1. DRM driver request pixel clock (clk_prepare_enable ->
> clk_prepare_mutex)
> 2. Pixel clock is supplied from the PHY, which is in a power domain, so
> in order to supply the clock it needs to runtime resume
> 3. genpd powers up the PHY blk-ctrl domain, which again is inside a
> GPCv2 power domain
> 4. genpd powers up GPCv2 domain, which needs a specific clock to be
> running in order to power up the domain, so it does a
> clk_prepare_enable, which now happens to hit the clk_prepare_mutex
> taken in step 1.
>
> As the runtime resume/suspend for the PHY may go through a workqueue we
> have two different contexts trying to take the clk_prepare_mutex, which
> is what lockdep complains about.

I see. Thanks for bringing some more clarity in this.

So the above is described in some of the in-kernel DTS(i) files too?

>
> >
> > 2)
> > What we have seen from other use cases [1], is that calling
> > pm_runtime_get|put*(), while holding subsystem specific locks (like
> > the genpd-lock(s) and clk-prepare lock), isn't working very well. So,
> > I am thinking that we could have a look at the runtime PM deployment
> > in the clock framework, to see if we can avoid holding the global
> > clk-prepare lock, while calling into runtime PM. I believe that should
> > fix these problems too.
>
> I don't see any straight forward way to avoid the clock framework calls
> in the chain laid out above. I would be happy if anyone has some good
> suggestions.

I think you misunderstood what I proposed above. What I was suggesting
is that we could re-work the runtime PM deployment in the clock
framework.

In this way, we would not be holding the global clk-prepare lock while
trying to power-on the phy and its PM domain. Wouldn't that work?

Kind regards
Uffe
Peng Fan (OSS) Jan. 4, 2023, 8:37 a.m. UTC | #6
Hi Ulf,

On 11/17/2022 12:30 AM, Ulf Hansson wrote:
> On Wed, 16 Nov 2022 at 14:25, Lucas Stach <l.stach@pengutronix.de> wrote:
>>
>> Hi Ulf,
>>
>> Am Mittwoch, dem 16.11.2022 um 13:41 +0100 schrieb Ulf Hansson:
>>> + Stephen Boyd
>>>
>>> On Mon, 14 Nov 2022 at 21:32, Marek Vasut <marex@denx.de> wrote:
>>>>
>>>> On 11/14/22 20:40, Ulf Hansson wrote:
>>>>> On Tue, 8 Nov 2022 at 02:35, Marek Vasut <marex@denx.de> wrote:
>>>>>>
>>>>>> Currently it is possible that a power domain power on or off would claim
>>>>>> the genpd lock first and clock core prepare_lock second, while another
>>>>>> thread could do the reverse, and this would trigger lockdep warning.
>>>>>
>>>>> I am not quite sure I fully understand. In this case is the lockdep
>>>>> warning relevant or just something that we want to silence?
>>>>
>>>> This is a valid problem, see patches 2/3 and 3/3 for details too.
>>>>
>>>>>> Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
>>>>>> are triggered before the genpd_lock() and after genpd_unlock() respectively in
>>>>>> case the domain is powered on and off. Those are meant to let drivers claim
>>>>>> clock core prepare_lock via clk_*prepare() call and release the lock via
>>>>>> clk_*unprepare() call to always assure that the clock and genpd lock ordering
>>>>>> is correct.
>>>>>
>>>>> To me, this sounds like a problem that may be better fixed by trying
>>>>> to model the parent/child-domains in a more strict way, through genpd.
>>>>>
>>>>> There is a comment in the code in imx8mp_blk_ctrl_probe() that seems
>>>>> to be pointing in this direction too.
>>>>>
>>>>> "* We use runtime PM to trigger power on/off of the upstream GPC
>>>>>     * domain, as a strict hierarchical parent/child power domain
>>>>>     * setup doesn't allow us to meet the sequencing requirements......"
>>>>>
>>>>> I am wondering about what those "sequencing requirements" are - and
>>>>> whether it could make better sense to fix these issues instead?
>>>>
>>>> Here is the lockdep splat:
>>>>
>>>> https://lore.kernel.org/all/Y1cs++TV2GCuh4tS@pendragon.ideasonboard.com/
>>>
>>> Yes, that certainly helped!
>>>
>>>>
>>>> It really is a problem between the clock and genpd subsystem locks, they
>>>> can be claimed in arbitrary order, see patch 2/3 and 3/3.
>>>>
>>>> I think that might clarify what I am attempting to solve here.
>>>
>>> Let me try to put some more words behind this, to make sure I have
>>> understood correctly, but also to easier allow more people to chim in.
>>>
>>> Note that, in your commit messages in patch2 and patch3, you are
>>> mentioning clk_disable_unused(), but that's not what the lockdep splat
>>> above is pointing at. Although, it seems the clk_disable_unused()
>>> thingy, would trigger a similar problem for this configuration for the
>>> imx8mp platform.
>>>
>>> Case #1:
>>> Triggered from the workqueue, the genpd_power_off_work_fn() ends up
>>> calling clk_bulk_unprepare(), from a genpd's ->power_off() callback(),
>>> which has been assigned to imx8mp_blk_ctrl_power_off(). Before genpd's
>>> ->power_off() is called, the genpd-lock(s) have been acquired, thus we
>>> are trying to acquire the global clk-prepare lock via
>>> clk_bulk_unprepare() while holding the genpd-lock(s).
>>>
>>> Case #0:
>>> The "drm driver" calls clk_set_rate(), thus we start by acquiring the
>>> global clk-prepare lock. Internally in the clock frameworks, the
>>> clk_set_rate() path continues to call clk_pm_runtime_get(). In this
>>> case, the corresponding clock provider's struct *device, seems to be
>>> attached to a genpd too. This means the call to clk_pm_runtime_get()
>>> ends up in genpd_runtime_resume(), which needs to acquire the
>>> genpd-lock(s) before it continues to call genpd_power_on() to power-on
>>> the PM domain. In other words, we try to acquire genpd-lock(s) while
>>> holding the global clk-prepare lock.
>>>
>>> The solution to fix these problems that you suggest in the $subject
>>> patch, isn't the direction I want this to take. The new callbacks are
>>> prone to be abused and it would also require genpd provider specific
>>> code to fix the problems. Of course, we need things to work, but let's
>>> look at a couple of options first. See below.
>>>
>>> 1)
>>> In a way, it looks like we have a circular description in DT of the
>>> hierarchy of the clock- and genpd providers, which is a bit awkward in
>>> my opinion. I was browsing the imx8mp DTS files to find this, but I
>>> couldn't. Can you perhaps point me to the DTS file(s) you are using? I
>>> can try to have a look so see if this could be arranged differently.
>>
>> The dependency chain isn't circular, it just happens to converge in the
>> clock framework and its single big hammer lock. The chain looks some
>> thing like this:
>>
>> 1. DRM driver request pixel clock (clk_prepare_enable ->
>> clk_prepare_mutex)
>> 2. Pixel clock is supplied from the PHY, which is in a power domain, so
>> in order to supply the clock it needs to runtime resume
>> 3. genpd powers up the PHY blk-ctrl domain, which again is inside a
>> GPCv2 power domain
>> 4. genpd powers up GPCv2 domain, which needs a specific clock to be
>> running in order to power up the domain, so it does a
>> clk_prepare_enable, which now happens to hit the clk_prepare_mutex
>> taken in step 1.
>>
>> As the runtime resume/suspend for the PHY may go through a workqueue we
>> have two different contexts trying to take the clk_prepare_mutex, which
>> is what lockdep complains about.
> 
> I see. Thanks for bringing some more clarity in this.
> 
> So the above is described in some of the in-kernel DTS(i) files too?
> 
>>
>>>
>>> 2)
>>> What we have seen from other use cases [1], is that calling
>>> pm_runtime_get|put*(), while holding subsystem specific locks (like
>>> the genpd-lock(s) and clk-prepare lock), isn't working very well. So,
>>> I am thinking that we could have a look at the runtime PM deployment
>>> in the clock framework, to see if we can avoid holding the global
>>> clk-prepare lock, while calling into runtime PM. I believe that should
>>> fix these problems too.
>>
>> I don't see any straight forward way to avoid the clock framework calls
>> in the chain laid out above. I would be happy if anyone has some good
>> suggestions.
> 
> I think you misunderstood what I proposed above. What I was suggesting
> is that we could re-work the runtime PM deployment in the clock
> framework.
> 
> In this way, we would not be holding the global clk-prepare lock while
> trying to power-on the phy and its PM domain. Wouldn't that work?


Do you have time to give a look at NXP downstream patch to address the 
lock issue in genpd side [1]?

Honestly I not have idea on how to rework the clk part to avoid holding
the clk-prepare lock when power on the phy and its power domain.

[1]https://github.com/nxp-imx/linux-imx/commit/c85126180a10439a8c7db1800529b4857d91c609

Thanks,
Peng.
> 
> Kind regards
> Uffe
Ulf Hansson Jan. 18, 2023, 12:55 p.m. UTC | #7
On Wed, 4 Jan 2023 at 09:38, Peng Fan <peng.fan@oss.nxp.com> wrote:
>
> Hi Ulf,
>
> On 11/17/2022 12:30 AM, Ulf Hansson wrote:
> > On Wed, 16 Nov 2022 at 14:25, Lucas Stach <l.stach@pengutronix.de> wrote:
> >>
> >> Hi Ulf,
> >>
> >> Am Mittwoch, dem 16.11.2022 um 13:41 +0100 schrieb Ulf Hansson:
> >>> + Stephen Boyd
> >>>
> >>> On Mon, 14 Nov 2022 at 21:32, Marek Vasut <marex@denx.de> wrote:
> >>>>
> >>>> On 11/14/22 20:40, Ulf Hansson wrote:
> >>>>> On Tue, 8 Nov 2022 at 02:35, Marek Vasut <marex@denx.de> wrote:
> >>>>>>
> >>>>>> Currently it is possible that a power domain power on or off would claim
> >>>>>> the genpd lock first and clock core prepare_lock second, while another
> >>>>>> thread could do the reverse, and this would trigger lockdep warning.
> >>>>>
> >>>>> I am not quite sure I fully understand. In this case is the lockdep
> >>>>> warning relevant or just something that we want to silence?
> >>>>
> >>>> This is a valid problem, see patches 2/3 and 3/3 for details too.
> >>>>
> >>>>>> Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
> >>>>>> are triggered before the genpd_lock() and after genpd_unlock() respectively in
> >>>>>> case the domain is powered on and off. Those are meant to let drivers claim
> >>>>>> clock core prepare_lock via clk_*prepare() call and release the lock via
> >>>>>> clk_*unprepare() call to always assure that the clock and genpd lock ordering
> >>>>>> is correct.
> >>>>>
> >>>>> To me, this sounds like a problem that may be better fixed by trying
> >>>>> to model the parent/child-domains in a more strict way, through genpd.
> >>>>>
> >>>>> There is a comment in the code in imx8mp_blk_ctrl_probe() that seems
> >>>>> to be pointing in this direction too.
> >>>>>
> >>>>> "* We use runtime PM to trigger power on/off of the upstream GPC
> >>>>>     * domain, as a strict hierarchical parent/child power domain
> >>>>>     * setup doesn't allow us to meet the sequencing requirements......"
> >>>>>
> >>>>> I am wondering about what those "sequencing requirements" are - and
> >>>>> whether it could make better sense to fix these issues instead?
> >>>>
> >>>> Here is the lockdep splat:
> >>>>
> >>>> https://lore.kernel.org/all/Y1cs++TV2GCuh4tS@pendragon.ideasonboard.com/
> >>>
> >>> Yes, that certainly helped!
> >>>
> >>>>
> >>>> It really is a problem between the clock and genpd subsystem locks, they
> >>>> can be claimed in arbitrary order, see patch 2/3 and 3/3.
> >>>>
> >>>> I think that might clarify what I am attempting to solve here.
> >>>
> >>> Let me try to put some more words behind this, to make sure I have
> >>> understood correctly, but also to easier allow more people to chim in.
> >>>
> >>> Note that, in your commit messages in patch2 and patch3, you are
> >>> mentioning clk_disable_unused(), but that's not what the lockdep splat
> >>> above is pointing at. Although, it seems the clk_disable_unused()
> >>> thingy, would trigger a similar problem for this configuration for the
> >>> imx8mp platform.
> >>>
> >>> Case #1:
> >>> Triggered from the workqueue, the genpd_power_off_work_fn() ends up
> >>> calling clk_bulk_unprepare(), from a genpd's ->power_off() callback(),
> >>> which has been assigned to imx8mp_blk_ctrl_power_off(). Before genpd's
> >>> ->power_off() is called, the genpd-lock(s) have been acquired, thus we
> >>> are trying to acquire the global clk-prepare lock via
> >>> clk_bulk_unprepare() while holding the genpd-lock(s).
> >>>
> >>> Case #0:
> >>> The "drm driver" calls clk_set_rate(), thus we start by acquiring the
> >>> global clk-prepare lock. Internally in the clock frameworks, the
> >>> clk_set_rate() path continues to call clk_pm_runtime_get(). In this
> >>> case, the corresponding clock provider's struct *device, seems to be
> >>> attached to a genpd too. This means the call to clk_pm_runtime_get()
> >>> ends up in genpd_runtime_resume(), which needs to acquire the
> >>> genpd-lock(s) before it continues to call genpd_power_on() to power-on
> >>> the PM domain. In other words, we try to acquire genpd-lock(s) while
> >>> holding the global clk-prepare lock.
> >>>
> >>> The solution to fix these problems that you suggest in the $subject
> >>> patch, isn't the direction I want this to take. The new callbacks are
> >>> prone to be abused and it would also require genpd provider specific
> >>> code to fix the problems. Of course, we need things to work, but let's
> >>> look at a couple of options first. See below.
> >>>
> >>> 1)
> >>> In a way, it looks like we have a circular description in DT of the
> >>> hierarchy of the clock- and genpd providers, which is a bit awkward in
> >>> my opinion. I was browsing the imx8mp DTS files to find this, but I
> >>> couldn't. Can you perhaps point me to the DTS file(s) you are using? I
> >>> can try to have a look so see if this could be arranged differently.
> >>
> >> The dependency chain isn't circular, it just happens to converge in the
> >> clock framework and its single big hammer lock. The chain looks some
> >> thing like this:
> >>
> >> 1. DRM driver request pixel clock (clk_prepare_enable ->
> >> clk_prepare_mutex)
> >> 2. Pixel clock is supplied from the PHY, which is in a power domain, so
> >> in order to supply the clock it needs to runtime resume
> >> 3. genpd powers up the PHY blk-ctrl domain, which again is inside a
> >> GPCv2 power domain
> >> 4. genpd powers up GPCv2 domain, which needs a specific clock to be
> >> running in order to power up the domain, so it does a
> >> clk_prepare_enable, which now happens to hit the clk_prepare_mutex
> >> taken in step 1.
> >>
> >> As the runtime resume/suspend for the PHY may go through a workqueue we
> >> have two different contexts trying to take the clk_prepare_mutex, which
> >> is what lockdep complains about.
> >
> > I see. Thanks for bringing some more clarity in this.
> >
> > So the above is described in some of the in-kernel DTS(i) files too?
> >
> >>
> >>>
> >>> 2)
> >>> What we have seen from other use cases [1], is that calling
> >>> pm_runtime_get|put*(), while holding subsystem specific locks (like
> >>> the genpd-lock(s) and clk-prepare lock), isn't working very well. So,
> >>> I am thinking that we could have a look at the runtime PM deployment
> >>> in the clock framework, to see if we can avoid holding the global
> >>> clk-prepare lock, while calling into runtime PM. I believe that should
> >>> fix these problems too.
> >>
> >> I don't see any straight forward way to avoid the clock framework calls
> >> in the chain laid out above. I would be happy if anyone has some good
> >> suggestions.
> >
> > I think you misunderstood what I proposed above. What I was suggesting
> > is that we could re-work the runtime PM deployment in the clock
> > framework.
> >
> > In this way, we would not be holding the global clk-prepare lock while
> > trying to power-on the phy and its PM domain. Wouldn't that work?
>
>
> Do you have time to give a look at NXP downstream patch to address the
> lock issue in genpd side [1]?

Thanks for sharing this! I had a look and the approach seems like it
could work for you. However, I am still not sure it's the correct
thing to do.

>
> Honestly I not have idea on how to rework the clk part to avoid holding
> the clk-prepare lock when power on the phy and its power domain.

I understand. Let me try to prepare a hackish patch that we can use to
run some tests.

Unfortunately, I can't make any promises for when, but I will do my best.

>
> [1]https://github.com/nxp-imx/linux-imx/commit/c85126180a10439a8c7db1800529b4857d91c609
>
> Thanks,
> Peng.
> >
> > Kind regards
> > Uffe

Kind regards
Uffe
Marek Vasut Jan. 18, 2023, 1:07 p.m. UTC | #8
On 1/18/23 13:55, Ulf Hansson wrote:
> On Wed, 4 Jan 2023 at 09:38, Peng Fan <peng.fan@oss.nxp.com> wrote:
>>
>> Hi Ulf,
>>
>> On 11/17/2022 12:30 AM, Ulf Hansson wrote:
>>> On Wed, 16 Nov 2022 at 14:25, Lucas Stach <l.stach@pengutronix.de> wrote:
>>>>
>>>> Hi Ulf,
>>>>
>>>> Am Mittwoch, dem 16.11.2022 um 13:41 +0100 schrieb Ulf Hansson:
>>>>> + Stephen Boyd
>>>>>
>>>>> On Mon, 14 Nov 2022 at 21:32, Marek Vasut <marex@denx.de> wrote:
>>>>>>
>>>>>> On 11/14/22 20:40, Ulf Hansson wrote:
>>>>>>> On Tue, 8 Nov 2022 at 02:35, Marek Vasut <marex@denx.de> wrote:
>>>>>>>>
>>>>>>>> Currently it is possible that a power domain power on or off would claim
>>>>>>>> the genpd lock first and clock core prepare_lock second, while another
>>>>>>>> thread could do the reverse, and this would trigger lockdep warning.
>>>>>>>
>>>>>>> I am not quite sure I fully understand. In this case is the lockdep
>>>>>>> warning relevant or just something that we want to silence?
>>>>>>
>>>>>> This is a valid problem, see patches 2/3 and 3/3 for details too.
>>>>>>
>>>>>>>> Introduce new callbacks, .power_pre/post_on() and .power_off_pre/post(), which
>>>>>>>> are triggered before the genpd_lock() and after genpd_unlock() respectively in
>>>>>>>> case the domain is powered on and off. Those are meant to let drivers claim
>>>>>>>> clock core prepare_lock via clk_*prepare() call and release the lock via
>>>>>>>> clk_*unprepare() call to always assure that the clock and genpd lock ordering
>>>>>>>> is correct.
>>>>>>>
>>>>>>> To me, this sounds like a problem that may be better fixed by trying
>>>>>>> to model the parent/child-domains in a more strict way, through genpd.
>>>>>>>
>>>>>>> There is a comment in the code in imx8mp_blk_ctrl_probe() that seems
>>>>>>> to be pointing in this direction too.
>>>>>>>
>>>>>>> "* We use runtime PM to trigger power on/off of the upstream GPC
>>>>>>>      * domain, as a strict hierarchical parent/child power domain
>>>>>>>      * setup doesn't allow us to meet the sequencing requirements......"
>>>>>>>
>>>>>>> I am wondering about what those "sequencing requirements" are - and
>>>>>>> whether it could make better sense to fix these issues instead?
>>>>>>
>>>>>> Here is the lockdep splat:
>>>>>>
>>>>>> https://lore.kernel.org/all/Y1cs++TV2GCuh4tS@pendragon.ideasonboard.com/
>>>>>
>>>>> Yes, that certainly helped!
>>>>>
>>>>>>
>>>>>> It really is a problem between the clock and genpd subsystem locks, they
>>>>>> can be claimed in arbitrary order, see patch 2/3 and 3/3.
>>>>>>
>>>>>> I think that might clarify what I am attempting to solve here.
>>>>>
>>>>> Let me try to put some more words behind this, to make sure I have
>>>>> understood correctly, but also to easier allow more people to chim in.
>>>>>
>>>>> Note that, in your commit messages in patch2 and patch3, you are
>>>>> mentioning clk_disable_unused(), but that's not what the lockdep splat
>>>>> above is pointing at. Although, it seems the clk_disable_unused()
>>>>> thingy, would trigger a similar problem for this configuration for the
>>>>> imx8mp platform.
>>>>>
>>>>> Case #1:
>>>>> Triggered from the workqueue, the genpd_power_off_work_fn() ends up
>>>>> calling clk_bulk_unprepare(), from a genpd's ->power_off() callback(),
>>>>> which has been assigned to imx8mp_blk_ctrl_power_off(). Before genpd's
>>>>> ->power_off() is called, the genpd-lock(s) have been acquired, thus we
>>>>> are trying to acquire the global clk-prepare lock via
>>>>> clk_bulk_unprepare() while holding the genpd-lock(s).
>>>>>
>>>>> Case #0:
>>>>> The "drm driver" calls clk_set_rate(), thus we start by acquiring the
>>>>> global clk-prepare lock. Internally in the clock frameworks, the
>>>>> clk_set_rate() path continues to call clk_pm_runtime_get(). In this
>>>>> case, the corresponding clock provider's struct *device, seems to be
>>>>> attached to a genpd too. This means the call to clk_pm_runtime_get()
>>>>> ends up in genpd_runtime_resume(), which needs to acquire the
>>>>> genpd-lock(s) before it continues to call genpd_power_on() to power-on
>>>>> the PM domain. In other words, we try to acquire genpd-lock(s) while
>>>>> holding the global clk-prepare lock.
>>>>>
>>>>> The solution to fix these problems that you suggest in the $subject
>>>>> patch, isn't the direction I want this to take. The new callbacks are
>>>>> prone to be abused and it would also require genpd provider specific
>>>>> code to fix the problems. Of course, we need things to work, but let's
>>>>> look at a couple of options first. See below.
>>>>>
>>>>> 1)
>>>>> In a way, it looks like we have a circular description in DT of the
>>>>> hierarchy of the clock- and genpd providers, which is a bit awkward in
>>>>> my opinion. I was browsing the imx8mp DTS files to find this, but I
>>>>> couldn't. Can you perhaps point me to the DTS file(s) you are using? I
>>>>> can try to have a look so see if this could be arranged differently.
>>>>
>>>> The dependency chain isn't circular, it just happens to converge in the
>>>> clock framework and its single big hammer lock. The chain looks some
>>>> thing like this:
>>>>
>>>> 1. DRM driver request pixel clock (clk_prepare_enable ->
>>>> clk_prepare_mutex)
>>>> 2. Pixel clock is supplied from the PHY, which is in a power domain, so
>>>> in order to supply the clock it needs to runtime resume
>>>> 3. genpd powers up the PHY blk-ctrl domain, which again is inside a
>>>> GPCv2 power domain
>>>> 4. genpd powers up GPCv2 domain, which needs a specific clock to be
>>>> running in order to power up the domain, so it does a
>>>> clk_prepare_enable, which now happens to hit the clk_prepare_mutex
>>>> taken in step 1.
>>>>
>>>> As the runtime resume/suspend for the PHY may go through a workqueue we
>>>> have two different contexts trying to take the clk_prepare_mutex, which
>>>> is what lockdep complains about.
>>>
>>> I see. Thanks for bringing some more clarity in this.
>>>
>>> So the above is described in some of the in-kernel DTS(i) files too?
>>>
>>>>
>>>>>
>>>>> 2)
>>>>> What we have seen from other use cases [1], is that calling
>>>>> pm_runtime_get|put*(), while holding subsystem specific locks (like
>>>>> the genpd-lock(s) and clk-prepare lock), isn't working very well. So,
>>>>> I am thinking that we could have a look at the runtime PM deployment
>>>>> in the clock framework, to see if we can avoid holding the global
>>>>> clk-prepare lock, while calling into runtime PM. I believe that should
>>>>> fix these problems too.
>>>>
>>>> I don't see any straight forward way to avoid the clock framework calls
>>>> in the chain laid out above. I would be happy if anyone has some good
>>>> suggestions.
>>>
>>> I think you misunderstood what I proposed above. What I was suggesting
>>> is that we could re-work the runtime PM deployment in the clock
>>> framework.
>>>
>>> In this way, we would not be holding the global clk-prepare lock while
>>> trying to power-on the phy and its PM domain. Wouldn't that work?
>>
>>
>> Do you have time to give a look at NXP downstream patch to address the
>> lock issue in genpd side [1]?
> 
> Thanks for sharing this! I had a look and the approach seems like it
> could work for you. However, I am still not sure it's the correct
> thing to do.
> 
>>
>> Honestly I not have idea on how to rework the clk part to avoid holding
>> the clk-prepare lock when power on the phy and its power domain.
> 
> I understand. Let me try to prepare a hackish patch that we can use to
> run some tests.
> 
> Unfortunately, I can't make any promises for when, but I will do my best.

Thanks !
Peng Fan (OSS) Feb. 16, 2023, 1:47 a.m. UTC | #9
Hi Ulf,

On 1/18/2023 8:55 PM, Ulf Hansson wrote:
...
>>
>> Honestly I not have idea on how to rework the clk part to avoid holding
>> the clk-prepare lock when power on the phy and its power domain.
> 
> I understand. Let me try to prepare a hackish patch that we can use to
> run some tests.
> 
> Unfortunately, I can't make any promises for when, but I will do my best.
> 

Have you got time to give a look on this part? Or do you have any 
suggestions that I could help with?

Thanks,
Peng.

>>
>> [1]https://github.com/nxp-imx/linux-imx/commit/c85126180a10439a8c7db1800529b4857d91c609
>>
>> Thanks,
>> Peng.
>>>
>>> Kind regards
>>> Uffe
> 
> Kind regards
> Uffe
Ulf Hansson Feb. 16, 2023, 10:48 a.m. UTC | #10
On Thu, 16 Feb 2023 at 02:47, Peng Fan <peng.fan@oss.nxp.com> wrote:
>
> Hi Ulf,
>
> On 1/18/2023 8:55 PM, Ulf Hansson wrote:
> ...
> >>
> >> Honestly I not have idea on how to rework the clk part to avoid holding
> >> the clk-prepare lock when power on the phy and its power domain.
> >
> > I understand. Let me try to prepare a hackish patch that we can use to
> > run some tests.
> >
> > Unfortunately, I can't make any promises for when, but I will do my best.
> >
>
> Have you got time to give a look on this part? Or do you have any
> suggestions that I could help with?

I have looked closer at this, but haven't yet got the time to actually
post a patch. Let me see if I can do some re-prioritization of things
at my side...

Anyway, my plan is to post a "hackish" patch that we should use solely
for testing, to make sure that the approach works. Then, I intend to
continue to work on the complete solution, which seems to be requiring
some reworks of locking parts in the clock framework. Although, it's
too early for me, to say exactly how, so I can't really give you any
pointers in this regard, sorry.

>
> Thanks,
> Peng.
>
> >>
> >> [1]https://github.com/nxp-imx/linux-imx/commit/c85126180a10439a8c7db1800529b4857d91c609
> >>

Kind regards
Uffe
diff mbox series

Patch

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6471b559230e9..df2a93d0674e4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -494,6 +494,22 @@  void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
 }
 EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
 
+static int genpd_power_pre_on(struct generic_pm_domain *genpd)
+{
+	if (!genpd->power_pre_on)
+		return 0;
+
+	return genpd->power_pre_on(genpd);
+}
+
+static int genpd_power_post_on(struct generic_pm_domain *genpd)
+{
+	if (!genpd->power_post_on)
+		return 0;
+
+	return genpd->power_post_on(genpd);
+}
+
 static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 {
 	unsigned int state_idx = genpd->state_idx;
@@ -544,6 +560,22 @@  static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 	return ret;
 }
 
+static int genpd_power_off_pre(struct generic_pm_domain *genpd)
+{
+	if (!genpd->power_off_pre)
+		return 0;
+
+	return genpd->power_off_pre(genpd);
+}
+
+static int genpd_power_off_post(struct generic_pm_domain *genpd)
+{
+	if (!genpd->power_off_post)
+		return 0;
+
+	return genpd->power_off_post(genpd);
+}
+
 static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 {
 	unsigned int state_idx = genpd->state_idx;
@@ -816,12 +848,18 @@  static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 static void genpd_power_off_work_fn(struct work_struct *work)
 {
 	struct generic_pm_domain *genpd;
+	int ret;
 
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
+	ret = genpd_power_off_pre(genpd);
+	if (ret)
+		return;
 	genpd_lock(genpd);
 	genpd_power_off(genpd, false, 0);
 	genpd_unlock(genpd);
+	ret = genpd_power_off_post(genpd);
+	WARN_ON_ONCE(ret);
 }
 
 /**
@@ -938,12 +976,14 @@  static int genpd_runtime_suspend(struct device *dev)
 	if (irq_safe_dev_in_sleep_domain(dev, genpd))
 		return 0;
 
+	ret = genpd_power_off_pre(genpd);
+	if (ret)
+		return ret;
 	genpd_lock(genpd);
 	gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
 	genpd_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
-
-	return 0;
+	return genpd_power_off_post(genpd);
 }
 
 /**
@@ -977,12 +1017,21 @@  static int genpd_runtime_resume(struct device *dev)
 	if (irq_safe_dev_in_sleep_domain(dev, genpd))
 		goto out;
 
+	ret = genpd_power_pre_on(genpd);
+	if (ret)
+		return ret;
 	genpd_lock(genpd);
 	ret = genpd_power_on(genpd, 0);
 	if (!ret)
 		genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
 	genpd_unlock(genpd);
 
+	if (ret) {
+		genpd_power_post_on(genpd);
+		return ret;
+	}
+
+	ret = genpd_power_post_on(genpd);
 	if (ret)
 		return ret;
 
@@ -1017,10 +1066,13 @@  static int genpd_runtime_resume(struct device *dev)
 	genpd_stop_dev(genpd, dev);
 err_poweroff:
 	if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
-		genpd_lock(genpd);
-		gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
-		genpd_power_off(genpd, true, 0);
-		genpd_unlock(genpd);
+		if (!genpd_power_off_pre(genpd)) {
+			genpd_lock(genpd);
+			gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
+			genpd_power_off(genpd, true, 0);
+			genpd_unlock(genpd);
+			genpd_power_off_post(genpd);
+		}
 	}
 
 	return ret;
@@ -1225,12 +1277,14 @@  static int genpd_finish_suspend(struct device *dev, bool poweroff)
 		}
 	}
 
+	ret = genpd_power_off_pre(genpd);
+	if (ret)
+		return ret;
 	genpd_lock(genpd);
 	genpd->suspended_count++;
 	genpd_sync_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
-
-	return 0;
+	return genpd_power_off_post(genpd);
 }
 
 /**
@@ -1267,10 +1321,16 @@  static int genpd_resume_noirq(struct device *dev)
 	if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
 		return pm_generic_resume_noirq(dev);
 
+	ret = genpd_power_pre_on(genpd);
+	if (ret)
+		return ret;
 	genpd_lock(genpd);
 	genpd_sync_power_on(genpd, true, 0);
 	genpd->suspended_count--;
 	genpd_unlock(genpd);
+	ret = genpd_power_post_on(genpd);
+	if (ret)
+		return ret;
 
 	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
 	    !pm_runtime_status_suspended(dev)) {
@@ -1378,6 +1438,9 @@  static int genpd_restore_noirq(struct device *dev)
 	 * At this point suspended_count == 0 means we are being run for the
 	 * first time for the given domain in the present cycle.
 	 */
+	ret = genpd_power_pre_on(genpd);
+	if (ret)
+		return ret;
 	genpd_lock(genpd);
 	if (genpd->suspended_count++ == 0) {
 		/*
@@ -1390,6 +1453,9 @@  static int genpd_restore_noirq(struct device *dev)
 
 	genpd_sync_power_on(genpd, true, 0);
 	genpd_unlock(genpd);
+	ret = genpd_power_post_on(genpd);
+	if (ret)
+		return ret;
 
 	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
 	    !pm_runtime_status_suspended(dev)) {
@@ -1413,6 +1479,7 @@  static int genpd_restore_noirq(struct device *dev)
 static void genpd_complete(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1435,6 +1502,7 @@  static void genpd_switch_state(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 	bool use_lock;
+	int ret;
 
 	genpd = dev_to_genpd_safe(dev);
 	if (!genpd)
@@ -1442,8 +1510,13 @@  static void genpd_switch_state(struct device *dev, bool suspend)
 
 	use_lock = genpd_is_irq_safe(genpd);
 
-	if (use_lock)
+	if (use_lock) {
+		ret = suspend ? genpd_power_off_pre(genpd) :
+				genpd_power_pre_on(genpd);
+		if (ret)
+			return;
 		genpd_lock(genpd);
+	}
 
 	if (suspend) {
 		genpd->suspended_count++;
@@ -1453,8 +1526,12 @@  static void genpd_switch_state(struct device *dev, bool suspend)
 		genpd->suspended_count--;
 	}
 
-	if (use_lock)
+	if (use_lock) {
 		genpd_unlock(genpd);
+		ret = suspend ? genpd_power_off_post(genpd) :
+				genpd_power_post_on(genpd);
+		WARN_ON_ONCE(ret);
+	}
 }
 
 /**
@@ -2750,9 +2827,15 @@  static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
 	dev->pm_domain->sync = genpd_dev_pm_sync;
 
 	if (power_on) {
+		ret = genpd_power_pre_on(pd);
+		if (ret)
+			return ret;
 		genpd_lock(pd);
 		ret = genpd_power_on(pd, 0);
 		genpd_unlock(pd);
+		ret = genpd_power_post_on(pd);
+		if (ret)
+			return ret;
 	}
 
 	if (ret) {
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ebc3516980907..3cf231a27cb1b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -134,8 +134,12 @@  struct generic_pm_domain {
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	unsigned int performance_state;	/* Aggregated max performance state */
 	cpumask_var_t cpus;		/* A cpumask of the attached CPUs */
+	int (*power_off_pre)(struct generic_pm_domain *domain);
 	int (*power_off)(struct generic_pm_domain *domain);
+	int (*power_off_post)(struct generic_pm_domain *domain);
+	int (*power_pre_on)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
+	int (*power_post_on)(struct generic_pm_domain *domain);
 	struct raw_notifier_head power_notifiers; /* Power on/off notifiers */
 	struct opp_table *opp_table;	/* OPP table of the genpd */
 	unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,