Message ID | 20210701232728.23591-3-digetx@gmail.com |
---|---|
State | New |
Headers | show |
Series | NVIDIA Tegra power management patches for 5.15 | expand |
On Fri, 2 Jul 2021 at 01:28, Dmitry Osipenko <digetx@gmail.com> wrote: > > Implement attach_dev() callback of power domain drivers that initializes > the domain's performance state. GENPD core will apply the performance > state on the first runtime PM resume of the attached device. > > Signed-off-by: Dmitry Osipenko <digetx@gmail.com> > --- > drivers/soc/tegra/pmc.c | 147 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 147 insertions(+) > > diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c > index f63dfb2ca3f9..ebafb818b08e 100644 > --- a/drivers/soc/tegra/pmc.c > +++ b/drivers/soc/tegra/pmc.c > @@ -506,6 +506,151 @@ static void tegra_pmc_scratch_writel(struct tegra_pmc *pmc, u32 value, > writel(value, pmc->scratch + offset); > } > > +static const char * const tegra_emc_compats[] = { > + "nvidia,tegra20-emc", > + "nvidia,tegra30-emc", > + NULL, > +}; > + > +/* > + * This GENPD callback is used by both powergate and core domains. > + * > + * We retrieve clock rate of the attached device and initialize domain's > + * performance state in accordance to the clock rate. > + */ > +static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, > + struct device *dev) > +{ > + struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); > + struct opp_table *opp_table, *pd_opp_table; > + struct generic_pm_domain *core_genpd; > + struct dev_pm_opp *opp, *pd_opp; > + unsigned long rate, state; > + struct gpd_link *link; > + struct clk *clk; > + u32 hw_version; > + int ret; > + > + /* > + * Tegra114+ SocS don't support OPP yet. But if they will get OPP > + * support, then we want to skip OPP for older kernels to preserve > + * compatibility of newer DTBs with older kernels. > + */ > + if (!pmc->soc->supports_core_domain) > + return 0; > + > + /* > + * The EMC devices are a special case because we have a protection > + * from non-EMC drivers getting clock handle before EMC driver is > + * fully initialized. The goal of the protection is to prevent > + * devfreq driver from getting failures if it will try to change > + * EMC clock rate until clock is fully initialized. The EMC drivers > + * will initialize the performance state by themselves. > + */ > + if (of_device_compatible_match(dev->of_node, tegra_emc_compats)) > + return 0; > + > + clk = clk_get(dev, NULL); > + if (IS_ERR(clk)) { > + dev_err(&genpd->dev, "failed to get clk of %s: %pe\n", > + dev_name(dev), clk); > + return PTR_ERR(clk); > + } > + > + rate = clk_get_rate(clk); > + if (!rate) { > + dev_err(&genpd->dev, "failed to get clk rate of %s\n", > + dev_name(dev)); > + ret = -EINVAL; > + goto put_clk; > + } > + > + if (of_machine_is_compatible("nvidia,tegra20")) > + hw_version = BIT(tegra_sku_info.soc_process_id); > + else > + hw_version = BIT(tegra_sku_info.soc_speedo_id); > + > + opp_table = dev_pm_opp_set_supported_hw(dev, &hw_version, 1); > + if (IS_ERR(opp_table)) { > + dev_err(&genpd->dev, "failed to set OPP supported HW for %s: %d\n", > + dev_name(dev), ret); > + ret = PTR_ERR(opp_table); > + goto put_clk; > + } > + > + ret = dev_pm_opp_of_add_table(dev); > + if (ret) { > + /* older DTBs that don't have OPPs will get -ENODEV here */ > + if (ret != -ENODEV) > + dev_err(&genpd->dev, "failed to get OPP table of %s: %d\n", > + dev_name(dev), ret); > + else > + ret = 0; > + > + goto put_supported_hw; > + } > + > + /* find suitable OPP for the rate */ > + opp = dev_pm_opp_find_freq_ceil(dev, &rate); > + > + if (opp == ERR_PTR(-ERANGE)) > + opp = dev_pm_opp_find_freq_floor(dev, &rate); > + > + if (IS_ERR(opp)) { > + dev_err(&genpd->dev, "failed to find OPP for %luHz of %s: %pe\n", > + rate, dev_name(dev), opp); > + ret = PTR_ERR(opp); > + goto remove_dev_table; > + } > + > + if (!list_empty(&genpd->child_links)) { > + link = list_first_entry(&genpd->child_links, struct gpd_link, > + child_node); > + core_genpd = link->parent; > + } else { > + core_genpd = genpd; > + } This looks a bit odd to me. A genpd provider shouldn't need to walk these links as these are considered internals to genpd. Normally this needs lockings, etc. Why exactly do you need this? > + > + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); > + if (IS_ERR(pd_opp_table)) { > + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", > + dev_name(&core_genpd->dev), pd_opp_table); > + ret = PTR_ERR(pd_opp_table); > + goto put_dev_opp; > + } > + > + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); > + if (IS_ERR(pd_opp)) { > + dev_err(&genpd->dev, > + "failed to xlate required OPP for %luHz of %s: %pe\n", > + rate, dev_name(dev), pd_opp); > + ret = PTR_ERR(pd_opp); > + goto put_pd_opp_table; > + } > + > + /* > + * The initialized state will be applied by GENPD core on the first > + * RPM-resume of the device. This means that drivers don't need to > + * explicitly initialize performance state. > + */ > + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); > + gpd_data->rpm_pstate = state; Could the above be replaced with Rajendra's suggestion [1], which changes genpd to internally during attach, to set a default performance state when there is a "required-opp" specified in the device node? [...] Kind regards Uffe [1] https://lkml.org/lkml/2021/7/20/99
02.08.2021 17:48, Ulf Hansson пишет: ... >> + if (!list_empty(&genpd->child_links)) { >> + link = list_first_entry(&genpd->child_links, struct gpd_link, >> + child_node); >> + core_genpd = link->parent; >> + } else { >> + core_genpd = genpd; >> + } > > This looks a bit odd to me. A genpd provider shouldn't need to walk > these links as these are considered internals to genpd. Normally this > needs lockings, etc. > > Why exactly do you need this? We have a chain of PMC domain -> core domain, both domains are created and liked together by this PMC driver. Devices are attached to either PMC domain or to core domain. PMC domain doesn't handle the performance changes, performance requests go down to the core domain. This is needed in order to translate the device's OPP into performance state of the core domain, based on the domain to which device is attached. >> + >> + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); >> + if (IS_ERR(pd_opp_table)) { >> + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", >> + dev_name(&core_genpd->dev), pd_opp_table); >> + ret = PTR_ERR(pd_opp_table); >> + goto put_dev_opp; >> + } >> + >> + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); >> + if (IS_ERR(pd_opp)) { >> + dev_err(&genpd->dev, >> + "failed to xlate required OPP for %luHz of %s: %pe\n", >> + rate, dev_name(dev), pd_opp); >> + ret = PTR_ERR(pd_opp); >> + goto put_pd_opp_table; >> + } >> + >> + /* >> + * The initialized state will be applied by GENPD core on the first >> + * RPM-resume of the device. This means that drivers don't need to >> + * explicitly initialize performance state. >> + */ >> + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); >> + gpd_data->rpm_pstate = state; > > Could the above be replaced with Rajendra's suggestion [1], which > changes genpd to internally during attach, to set a default > performance state when there is a "required-opp" specified in the > device node? It's not a "static" performance level here, but any level depending on h/w state left from bootloader and etc. The performance level corresponds to the voltage of the core domain, hence we need to initialize the voltage vote before device is resumed.
On Mon, 2 Aug 2021 at 20:23, Dmitry Osipenko <digetx@gmail.com> wrote: > > 02.08.2021 17:48, Ulf Hansson пишет: > ... > >> + if (!list_empty(&genpd->child_links)) { > >> + link = list_first_entry(&genpd->child_links, struct gpd_link, > >> + child_node); > >> + core_genpd = link->parent; > >> + } else { > >> + core_genpd = genpd; > >> + } > > > > This looks a bit odd to me. A genpd provider shouldn't need to walk > > these links as these are considered internals to genpd. Normally this > > needs lockings, etc. > > > > Why exactly do you need this? > > We have a chain of PMC domain -> core domain, both domains are created > and liked together by this PMC driver. Devices are attached to either > PMC domain or to core domain. PMC domain doesn't handle the performance > changes, performance requests go down to the core domain. Did I get this right? The core domain is the parent to the PMC domain? > > This is needed in order to translate the device's OPP into performance > state of the core domain, based on the domain to which device is attached. So, the PMC domain doesn't have an OPP table associated with it, but some of its attached devices may still have available OPPs, which should be managed through the parent domain (core domain). Correct? Is there a DT patch in the series that I can look at that shows how this is encoded? Hmm, I have the feeling that we should try to manage in some generic way in genpd, rather than having to deal with it here. > > >> + > >> + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); > >> + if (IS_ERR(pd_opp_table)) { > >> + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", > >> + dev_name(&core_genpd->dev), pd_opp_table); > >> + ret = PTR_ERR(pd_opp_table); > >> + goto put_dev_opp; > >> + } > >> + > >> + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); > >> + if (IS_ERR(pd_opp)) { > >> + dev_err(&genpd->dev, > >> + "failed to xlate required OPP for %luHz of %s: %pe\n", > >> + rate, dev_name(dev), pd_opp); > >> + ret = PTR_ERR(pd_opp); > >> + goto put_pd_opp_table; > >> + } > >> + > >> + /* > >> + * The initialized state will be applied by GENPD core on the first > >> + * RPM-resume of the device. This means that drivers don't need to > >> + * explicitly initialize performance state. > >> + */ > >> + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); > >> + gpd_data->rpm_pstate = state; > > > > Could the above be replaced with Rajendra's suggestion [1], which > > changes genpd to internally during attach, to set a default > > performance state when there is a "required-opp" specified in the > > device node? > > It's not a "static" performance level here, but any level depending on > h/w state left from bootloader and etc. The performance level > corresponds to the voltage of the core domain, hence we need to > initialize the voltage vote before device is resumed. Why not let the driver deal with this instead? It should be able to probe its device, no matter what state the bootloader has put the device into. To me, it sounds like a call to dev_pm_genpd_set_performance_state() (perhaps via dev_pm_opp_set_opp() or dev_pm_opp_set_rate()) from the driver itself, should be sufficient? I understand that it means the domain may change the OPP during boot, without respecting a vote for a device that has not been probed yet. But is there a problem with this? Kind regards Uffe
04.08.2021 12:59, Ulf Hansson пишет: > On Mon, 2 Aug 2021 at 20:23, Dmitry Osipenko <digetx@gmail.com> wrote: >> >> 02.08.2021 17:48, Ulf Hansson пишет: >> ... >>>> + if (!list_empty(&genpd->child_links)) { >>>> + link = list_first_entry(&genpd->child_links, struct gpd_link, >>>> + child_node); >>>> + core_genpd = link->parent; >>>> + } else { >>>> + core_genpd = genpd; >>>> + } >>> >>> This looks a bit odd to me. A genpd provider shouldn't need to walk >>> these links as these are considered internals to genpd. Normally this >>> needs lockings, etc. >>> >>> Why exactly do you need this? >> >> We have a chain of PMC domain -> core domain, both domains are created >> and liked together by this PMC driver. Devices are attached to either >> PMC domain or to core domain. PMC domain doesn't handle the performance >> changes, performance requests go down to the core domain. > > Did I get this right? The core domain is the parent to the PMC domain? You got this right. >> >> This is needed in order to translate the device's OPP into performance >> state of the core domain, based on the domain to which device is attached. > > So, the PMC domain doesn't have an OPP table associated with it, but > some of its attached devices may still have available OPPs, which > should be managed through the parent domain (core domain). Correct? Yes, the OPPs are specified only for the core domain. > Is there a DT patch in the series that I can look at that shows how > this is encoded? See patches which are adding domains and OPPs to DTs: https://patchwork.ozlabs.org/project/linux-tegra/patch/20210701232728.23591-34-digetx@gmail.com/ https://patchwork.ozlabs.org/project/linux-tegra/patch/20210701232728.23591-35-digetx@gmail.com/ > Hmm, I have the feeling that we should try to manage in some generic > way in genpd, rather than having to deal with it here. Still it requires a platform-specific knowledge. It could be some new genpd hook for the initialization. But I don't know what other platforms may want to initialize, so it's not clear to me how to make it generic. >>>> + >>>> + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); >>>> + if (IS_ERR(pd_opp_table)) { >>>> + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", >>>> + dev_name(&core_genpd->dev), pd_opp_table); >>>> + ret = PTR_ERR(pd_opp_table); >>>> + goto put_dev_opp; >>>> + } >>>> + >>>> + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); >>>> + if (IS_ERR(pd_opp)) { >>>> + dev_err(&genpd->dev, >>>> + "failed to xlate required OPP for %luHz of %s: %pe\n", >>>> + rate, dev_name(dev), pd_opp); >>>> + ret = PTR_ERR(pd_opp); >>>> + goto put_pd_opp_table; >>>> + } >>>> + >>>> + /* >>>> + * The initialized state will be applied by GENPD core on the first >>>> + * RPM-resume of the device. This means that drivers don't need to >>>> + * explicitly initialize performance state. >>>> + */ >>>> + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); >>>> + gpd_data->rpm_pstate = state; >>> >>> Could the above be replaced with Rajendra's suggestion [1], which >>> changes genpd to internally during attach, to set a default >>> performance state when there is a "required-opp" specified in the >>> device node? >> >> It's not a "static" performance level here, but any level depending on >> h/w state left from bootloader and etc. The performance level >> corresponds to the voltage of the core domain, hence we need to >> initialize the voltage vote before device is resumed. > > Why not let the driver deal with this instead? It should be able to > probe its device, no matter what state the bootloader has put the > device into. > > To me, it sounds like a call to dev_pm_genpd_set_performance_state() > (perhaps via dev_pm_opp_set_opp() or dev_pm_opp_set_rate()) from the > driver itself, should be sufficient? We did that in a previous versions of this series where drivers were calling devm_tegra_core_dev_init_opp_table() helper during the probe to initialize performance state of the domain. Moving OPP state initialization into central place made drivers cleaner by removing the boilerplate code. I can revert back to the previous variant, although this variant works well too. > I understand that it means the domain may change the OPP during boot, > without respecting a vote for a device that has not been probed yet. > But is there a problem with this? Domains themselves don't change OPP, there is no problem with that. The point is to have cleaner code in the drivers.
On Wed, 4 Aug 2021 at 23:17, Dmitry Osipenko <digetx@gmail.com> wrote: > > 04.08.2021 12:59, Ulf Hansson пишет: > > On Mon, 2 Aug 2021 at 20:23, Dmitry Osipenko <digetx@gmail.com> wrote: > >> > >> 02.08.2021 17:48, Ulf Hansson пишет: > >> ... > >>>> + if (!list_empty(&genpd->child_links)) { > >>>> + link = list_first_entry(&genpd->child_links, struct gpd_link, > >>>> + child_node); > >>>> + core_genpd = link->parent; > >>>> + } else { > >>>> + core_genpd = genpd; > >>>> + } > >>> > >>> This looks a bit odd to me. A genpd provider shouldn't need to walk > >>> these links as these are considered internals to genpd. Normally this > >>> needs lockings, etc. > >>> > >>> Why exactly do you need this? > >> > >> We have a chain of PMC domain -> core domain, both domains are created > >> and liked together by this PMC driver. Devices are attached to either > >> PMC domain or to core domain. PMC domain doesn't handle the performance > >> changes, performance requests go down to the core domain. > > > > Did I get this right? The core domain is the parent to the PMC domain? > > You got this right. > > >> > >> This is needed in order to translate the device's OPP into performance > >> state of the core domain, based on the domain to which device is attached. > > > > So, the PMC domain doesn't have an OPP table associated with it, but > > some of its attached devices may still have available OPPs, which > > should be managed through the parent domain (core domain). Correct? > > Yes, the OPPs are specified only for the core domain. > > > Is there a DT patch in the series that I can look at that shows how > > this is encoded? > > See patches which are adding domains and OPPs to DTs: > > https://patchwork.ozlabs.org/project/linux-tegra/patch/20210701232728.23591-34-digetx@gmail.com/ > > https://patchwork.ozlabs.org/project/linux-tegra/patch/20210701232728.23591-35-digetx@gmail.com/ Thanks, this looks sane to me! > > > Hmm, I have the feeling that we should try to manage in some generic > > way in genpd, rather than having to deal with it here. > > Still it requires a platform-specific knowledge. It could be some new > genpd hook for the initialization. But I don't know what other platforms > may want to initialize, so it's not clear to me how to make it generic. Right. We may need some more thinking around this, if needed at all (see below). > > >>>> + > >>>> + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); > >>>> + if (IS_ERR(pd_opp_table)) { > >>>> + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", > >>>> + dev_name(&core_genpd->dev), pd_opp_table); > >>>> + ret = PTR_ERR(pd_opp_table); > >>>> + goto put_dev_opp; > >>>> + } > >>>> + > >>>> + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); > >>>> + if (IS_ERR(pd_opp)) { > >>>> + dev_err(&genpd->dev, > >>>> + "failed to xlate required OPP for %luHz of %s: %pe\n", > >>>> + rate, dev_name(dev), pd_opp); > >>>> + ret = PTR_ERR(pd_opp); > >>>> + goto put_pd_opp_table; > >>>> + } > >>>> + > >>>> + /* > >>>> + * The initialized state will be applied by GENPD core on the first > >>>> + * RPM-resume of the device. This means that drivers don't need to > >>>> + * explicitly initialize performance state. > >>>> + */ > >>>> + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); > >>>> + gpd_data->rpm_pstate = state; > >>> > >>> Could the above be replaced with Rajendra's suggestion [1], which > >>> changes genpd to internally during attach, to set a default > >>> performance state when there is a "required-opp" specified in the > >>> device node? > >> > >> It's not a "static" performance level here, but any level depending on > >> h/w state left from bootloader and etc. The performance level > >> corresponds to the voltage of the core domain, hence we need to > >> initialize the voltage vote before device is resumed. > > > > Why not let the driver deal with this instead? It should be able to > > probe its device, no matter what state the bootloader has put the > > device into. > > > > To me, it sounds like a call to dev_pm_genpd_set_performance_state() > > (perhaps via dev_pm_opp_set_opp() or dev_pm_opp_set_rate()) from the > > driver itself, should be sufficient? > > We did that in a previous versions of this series where drivers were > calling devm_tegra_core_dev_init_opp_table() helper during the probe to > initialize performance state of the domain. Moving OPP state > initialization into central place made drivers cleaner by removing the > boilerplate code. I am not against doing this in a central place, like $subject patch suggests. As a matter of fact, it makes perfect sense to me. However, what I am concerned about, is that you require to use genpd internal data structures to do it. I think we should try to avoid that. > > I can revert back to the previous variant, although this variant works > well too. I looked at that code and in that path we end up calling dev_pm_opp_set_rate(), after it has initialized the opp table for the device. Rather than doing the OF parsing above to find out the current state for the device, why can't you just call dev_pm_opp_set_rate() to initialize a proper vote instead? > > > I understand that it means the domain may change the OPP during boot, > > without respecting a vote for a device that has not been probed yet. > > But is there a problem with this? > > Domains themselves don't change OPP, there is no problem with that. The > point is to have cleaner code in the drivers. Alright, this makes sense to me as well. Kind regards Uffe
09.08.2021 17:15, Ulf Hansson пишет: >> We did that in a previous versions of this series where drivers were >> calling devm_tegra_core_dev_init_opp_table() helper during the probe to >> initialize performance state of the domain. Moving OPP state >> initialization into central place made drivers cleaner by removing the >> boilerplate code. > I am not against doing this in a central place, like $subject patch > suggests. As a matter of fact, it makes perfect sense to me. > > However, what I am concerned about, is that you require to use genpd > internal data structures to do it. I think we should try to avoid > that. Alright, what do you think about this: diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index a934c679e6ce..5faed62075e9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2669,12 +2669,37 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; + if (pd->default_performance_state) { + unsigned int default_pstate; + + ret = pd->default_performance_state(pd, dev); + if (ret < 0) { + dev_err(dev, "failed to get default performance state for PM domain %s: %d\n", + pd->name, ret); + goto out; + } + + default_pstate = ret; + + if (power_on) { + ret = dev_pm_genpd_set_performance_state(dev, default_pstate); + if (ret) { + dev_err(dev, "failed to set default performance state %u for PM domain %s: %d\n", + default_pstate, pd->name, ret); + goto out; + } + } else { + dev_gpd_data(dev)->rpm_pstate = default_pstate; + } + } + if (power_on) { genpd_lock(pd); ret = genpd_power_on(pd, 0); genpd_unlock(pd); } +out: if (ret) genpd_remove_device(pd, dev); diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 81d1f019fa0c..9efb55f52462 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -518,15 +518,14 @@ static const char * const tegra_emc_compats[] = { * We retrieve clock rate of the attached device and initialize domain's * performance state in accordance to the clock rate. */ -static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, - struct device *dev) +static int tegra_pmc_genpd_default_perf_state(struct generic_pm_domain *genpd, + struct device *dev) { - struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); struct opp_table *opp_table, *pd_opp_table; struct generic_pm_domain *core_genpd; struct dev_pm_opp *opp, *pd_opp; - unsigned long rate, state; struct gpd_link *link; + unsigned long rate; struct clk *clk; u32 hw_version; int ret; @@ -633,8 +632,7 @@ static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, * RPM-resume of the device. This means that drivers don't need to * explicitly initialize performance state. */ - state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); - gpd_data->rpm_pstate = state; + ret = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); dev_pm_opp_put(pd_opp); put_pd_opp_table: @@ -1383,7 +1381,7 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) pg->id = id; pg->genpd.name = np->name; - pg->genpd.attach_dev = tegra_pmc_pd_attach_dev; + pg->genpd.default_performance_state = tegra_pmc_genpd_default_perf_state; pg->genpd.power_off = tegra_genpd_power_off; pg->genpd.power_on = tegra_genpd_power_on; pg->pmc = pmc; @@ -1500,7 +1498,7 @@ static int tegra_pmc_core_pd_add(struct tegra_pmc *pmc, struct device_node *np) return -ENOMEM; genpd->name = np->name; - genpd->attach_dev = tegra_pmc_pd_attach_dev; + genpd->default_performance_state = tegra_pmc_genpd_default_perf_state; genpd->set_performance_state = tegra_pmc_core_pd_set_performance_state; genpd->opp_to_performance_state = tegra_pmc_core_pd_opp_to_performance_state; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 21a0577305ef..cd4867817ca5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -143,6 +143,8 @@ struct generic_pm_domain { struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); + int (*default_performance_state)(struct generic_pm_domain *domain, + struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; void (*free_states)(struct genpd_power_state *states, >> I can revert back to the previous variant, although this variant works >> well too. > I looked at that code and in that path we end up calling > dev_pm_opp_set_rate(), after it has initialized the opp table for the > device. > > Rather than doing the OF parsing above to find out the current state > for the device, why can't you just call dev_pm_opp_set_rate() to > initialize a proper vote instead? > For some devices clock rate is either preset by bootloader, or by clk driver, or by assigned-clocks in a device-tree. And then I don't see what's the difference in comparison to initialization for the current rate. For some devices, like memory controller, we can't just change the clock rate because it's a complex procedure and some boards will use fixed rate, but the power vote still must be initialized.
On Tue, 10 Aug 2021 at 01:56, Dmitry Osipenko <digetx@gmail.com> wrote: > > 09.08.2021 17:15, Ulf Hansson пишет: > >> We did that in a previous versions of this series where drivers were > >> calling devm_tegra_core_dev_init_opp_table() helper during the probe to > >> initialize performance state of the domain. Moving OPP state > >> initialization into central place made drivers cleaner by removing the > >> boilerplate code. > > I am not against doing this in a central place, like $subject patch > > suggests. As a matter of fact, it makes perfect sense to me. > > > > However, what I am concerned about, is that you require to use genpd > > internal data structures to do it. I think we should try to avoid > > that. > > Alright, what do you think about this: > > diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c > index a934c679e6ce..5faed62075e9 100644 > --- a/drivers/base/power/domain.c > +++ b/drivers/base/power/domain.c > @@ -2669,12 +2669,37 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, > dev->pm_domain->detach = genpd_dev_pm_detach; > dev->pm_domain->sync = genpd_dev_pm_sync; > > + if (pd->default_performance_state) { > + unsigned int default_pstate; > + > + ret = pd->default_performance_state(pd, dev); > + if (ret < 0) { > + dev_err(dev, "failed to get default performance state for PM domain %s: %d\n", > + pd->name, ret); > + goto out; > + } Adding a new callback seems reasonable to support this. > + > + default_pstate = ret; > + > + if (power_on) { > + ret = dev_pm_genpd_set_performance_state(dev, default_pstate); However, this is more questionable to me. First, I don't think we should care about whether this is "power_on" or not. At this point, performance states are treated orthogonal to idle states in genpd. We may decide to change that in some way, but that deserves a different change. Second, I don't think we should call dev_pm_genpd_set_performance_state() from here. It's probably better handled from the genpd callback itself, if/when needed. That said, perhaps the new callback should just return a regular error code and zero on success, rather than the current performance state. See more below. > + if (ret) { > + dev_err(dev, "failed to set default performance state %u for PM domain %s: %d\n", > + default_pstate, pd->name, ret); > + goto out; > + } > + } else { > + dev_gpd_data(dev)->rpm_pstate = default_pstate; No, this isn't the right thing to do. It looks like you are trying to use the ->rpm_pstate for synchronization with runtime PM for consumer drivers. This is fragile as it depends on the runtime PM deployment in the consumer driver. I think you should look at ->rpm_pstate as a variable solely for managing save/restore of the performance state for the device, during runtime suspend/resume in genpd. Synchronization of a vote for a performance state for a device, needs to be managed by calling dev_pm_genpd_set_performance_state() - or by calling an OPP function that calls it, like dev_pm_opp_set_rate(), for example. > + } > + } > + > if (power_on) { > genpd_lock(pd); > ret = genpd_power_on(pd, 0); > genpd_unlock(pd); > } > > +out: > if (ret) > genpd_remove_device(pd, dev); > > diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c > index 81d1f019fa0c..9efb55f52462 100644 > --- a/drivers/soc/tegra/pmc.c > +++ b/drivers/soc/tegra/pmc.c > @@ -518,15 +518,14 @@ static const char * const tegra_emc_compats[] = { > * We retrieve clock rate of the attached device and initialize domain's > * performance state in accordance to the clock rate. > */ > -static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, > - struct device *dev) > +static int tegra_pmc_genpd_default_perf_state(struct generic_pm_domain *genpd, > + struct device *dev) > { > - struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); > struct opp_table *opp_table, *pd_opp_table; > struct generic_pm_domain *core_genpd; > struct dev_pm_opp *opp, *pd_opp; > - unsigned long rate, state; > struct gpd_link *link; > + unsigned long rate; > struct clk *clk; > u32 hw_version; > int ret; > @@ -633,8 +632,7 @@ static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, > * RPM-resume of the device. This means that drivers don't need to > * explicitly initialize performance state. > */ > - state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); > - gpd_data->rpm_pstate = state; > + ret = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); I don't see how this avoids tegra_pmc_genpd_default_perf_state() from having to walk &genpd->child_links. That's still an issue, right? > dev_pm_opp_put(pd_opp); > > put_pd_opp_table: > @@ -1383,7 +1381,7 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) > > pg->id = id; > pg->genpd.name = np->name; > - pg->genpd.attach_dev = tegra_pmc_pd_attach_dev; > + pg->genpd.default_performance_state = tegra_pmc_genpd_default_perf_state; > pg->genpd.power_off = tegra_genpd_power_off; > pg->genpd.power_on = tegra_genpd_power_on; > pg->pmc = pmc; > @@ -1500,7 +1498,7 @@ static int tegra_pmc_core_pd_add(struct tegra_pmc *pmc, struct device_node *np) > return -ENOMEM; > > genpd->name = np->name; > - genpd->attach_dev = tegra_pmc_pd_attach_dev; > + genpd->default_performance_state = tegra_pmc_genpd_default_perf_state; > genpd->set_performance_state = tegra_pmc_core_pd_set_performance_state; > genpd->opp_to_performance_state = tegra_pmc_core_pd_opp_to_performance_state; > > diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h > index 21a0577305ef..cd4867817ca5 100644 > --- a/include/linux/pm_domain.h > +++ b/include/linux/pm_domain.h > @@ -143,6 +143,8 @@ struct generic_pm_domain { > struct device *dev); > void (*detach_dev)(struct generic_pm_domain *domain, > struct device *dev); > + int (*default_performance_state)(struct generic_pm_domain *domain, > + struct device *dev); > unsigned int flags; /* Bit field of configs for genpd */ > struct genpd_power_state *states; > void (*free_states)(struct genpd_power_state *states, > > >> I can revert back to the previous variant, although this variant works > >> well too. > > I looked at that code and in that path we end up calling > > dev_pm_opp_set_rate(), after it has initialized the opp table for the > > device. > > > > Rather than doing the OF parsing above to find out the current state > > for the device, why can't you just call dev_pm_opp_set_rate() to > > initialize a proper vote instead? > > > > For some devices clock rate is either preset by bootloader, or by clk driver, or by assigned-clocks in a device-tree. And then I don't see what's the difference in comparison to initialization for the current rate. > > For some devices, like memory controller, we can't just change the clock rate because it's a complex procedure and some boards will use fixed rate, but the power vote still must be initialized. I am not saying you should change the clock rate. The current code path that runs via devm_tegra_core_dev_init_opp_table() just calls clk_get_rate and then dev_pm_opp_set_rate() with the current rate to vote for the corresponding OPP level. Right? Isn't this exactly what you want? No? Kind regards Uffe
10.08.2021 13:51, Ulf Hansson пишет: ... >> + if (power_on) { >> + ret = dev_pm_genpd_set_performance_state(dev, default_pstate); > > However, this is more questionable to me. > > First, I don't think we should care about whether this is "power_on" > or not. At this point, performance states are treated orthogonal to > idle states in genpd. We may decide to change that in some way, but > that deserves a different change. Okay > Second, I don't think we should call > dev_pm_genpd_set_performance_state() from here. It's probably better > handled from the genpd callback itself, if/when needed. Indeed > That said, perhaps the new callback should just return a regular error > code and zero on success, rather than the current performance state. > See more below. > >> + if (ret) { >> + dev_err(dev, "failed to set default performance state %u for PM domain %s: %d\n", >> + default_pstate, pd->name, ret); >> + goto out; >> + } >> + } else { >> + dev_gpd_data(dev)->rpm_pstate = default_pstate; > > No, this isn't the right thing to do. > > It looks like you are trying to use the ->rpm_pstate for > synchronization with runtime PM for consumer drivers. This is fragile > as it depends on the runtime PM deployment in the consumer driver. I > think you should look at ->rpm_pstate as a variable solely for > managing save/restore of the performance state for the device, during > runtime suspend/resume in genpd. > > Synchronization of a vote for a performance state for a device, needs > to be managed by calling dev_pm_genpd_set_performance_state() - or by > calling an OPP function that calls it, like dev_pm_opp_set_rate(), for > example. The !power_on case should be skipped at all because common core code can't handle it properly, hence rpm_pstate shouldn't be touched. I realized it only after sending out this email. ... >>> Rather than doing the OF parsing above to find out the current state >>> for the device, why can't you just call dev_pm_opp_set_rate() to >>> initialize a proper vote instead? >>> >> >> For some devices clock rate is either preset by bootloader, or by clk driver, or by assigned-clocks in a device-tree. And then I don't see what's the difference in comparison to initialization for the current rate. >> >> For some devices, like memory controller, we can't just change the clock rate because it's a complex procedure and some boards will use fixed rate, but the power vote still must be initialized. > > I am not saying you should change the clock rate. The current code > path that runs via devm_tegra_core_dev_init_opp_table() just calls > clk_get_rate and then dev_pm_opp_set_rate() with the current rate to > vote for the corresponding OPP level. Right? > > Isn't this exactly what you want? No? I see now what you meant, it's actually a simpler variant and it works too. Thank you for the suggestion, I'll prepare v8.
11.08.2021 22:30, Dmitry Osipenko пишет: > 10.08.2021 13:51, Ulf Hansson пишет: > ... >>> + if (power_on) { >>> + ret = dev_pm_genpd_set_performance_state(dev, default_pstate); >> >> However, this is more questionable to me. >> >> First, I don't think we should care about whether this is "power_on" >> or not. At this point, performance states are treated orthogonal to >> idle states in genpd. We may decide to change that in some way, but >> that deserves a different change. > > Okay > >> Second, I don't think we should call >> dev_pm_genpd_set_performance_state() from here. It's probably better >> handled from the genpd callback itself, if/when needed. > Indeed > >> That said, perhaps the new callback should just return a regular error >> code and zero on success, rather than the current performance state. >> See more below. >> >>> + if (ret) { >>> + dev_err(dev, "failed to set default performance state %u for PM domain %s: %d\n", >>> + default_pstate, pd->name, ret); >>> + goto out; >>> + } >>> + } else { >>> + dev_gpd_data(dev)->rpm_pstate = default_pstate; >> >> No, this isn't the right thing to do. >> >> It looks like you are trying to use the ->rpm_pstate for >> synchronization with runtime PM for consumer drivers. This is fragile >> as it depends on the runtime PM deployment in the consumer driver. I >> think you should look at ->rpm_pstate as a variable solely for >> managing save/restore of the performance state for the device, during >> runtime suspend/resume in genpd. >> >> Synchronization of a vote for a performance state for a device, needs >> to be managed by calling dev_pm_genpd_set_performance_state() - or by >> calling an OPP function that calls it, like dev_pm_opp_set_rate(), for >> example. > > The !power_on case should be skipped at all because common core code > can't handle it properly, hence rpm_pstate shouldn't be touched. I > realized it only after sending out this email. > > ... >>>> Rather than doing the OF parsing above to find out the current state >>>> for the device, why can't you just call dev_pm_opp_set_rate() to >>>> initialize a proper vote instead? >>>> >>> >>> For some devices clock rate is either preset by bootloader, or by clk driver, or by assigned-clocks in a device-tree. And then I don't see what's the difference in comparison to initialization for the current rate. >>> >>> For some devices, like memory controller, we can't just change the clock rate because it's a complex procedure and some boards will use fixed rate, but the power vote still must be initialized. >> >> I am not saying you should change the clock rate. The current code >> path that runs via devm_tegra_core_dev_init_opp_table() just calls >> clk_get_rate and then dev_pm_opp_set_rate() with the current rate to >> vote for the corresponding OPP level. Right? >> >> Isn't this exactly what you want? No? > > I see now what you meant, it's actually a simpler variant and it works > too. Thank you for the suggestion, I'll prepare v8. > My bad, it doesn't work at all. I actually need to use the rpm_pstate or something else because performance state is coupled with the enable state of the device. If device is never rpm-suspended by consumer driver, then the initialized performance state is never dropped. Hence I want to initialize the state which is set only when device is resumed. I'll need to think more about it.
12.08.2021 01:41, Dmitry Osipenko пишет: >>> I am not saying you should change the clock rate. The current code >>> path that runs via devm_tegra_core_dev_init_opp_table() just calls >>> clk_get_rate and then dev_pm_opp_set_rate() with the current rate to >>> vote for the corresponding OPP level. Right? >>> >>> Isn't this exactly what you want? No? >> I see now what you meant, it's actually a simpler variant and it works >> too. Thank you for the suggestion, I'll prepare v8. >> > My bad, it doesn't work at all. I actually need to use the rpm_pstate or > something else because performance state is coupled with the enable > state of the device. If device is never rpm-suspended by consumer > driver, then the initialized performance state is never dropped. Hence I > want to initialize the state which is set only when device is resumed. > > I'll need to think more about it. GENPD core has these false assumptions: 1. It assumes that by default all devices are at zero performance level at a boot time. This is not true for Tegra because hardware is pre-initialized independently from GENPD. 2. It assumes that nothing depends on performance level and devices can operate at any level at any time. Not true for Tegra and other platforms where performance level is coupled with clocks state of attached devices. OPP framework glues clock and performance level together for us, which works good so far. Hence I either need to patch every driver to use dev_pm_opp_set_rate in order to sync clk rate with the perf level at device runtime, or I need to preset the rpm perf level to allow GENPD core to set the level before device is resumed.
On Thu, 12 Aug 2021 at 03:40, Dmitry Osipenko <digetx@gmail.com> wrote: > > 12.08.2021 01:41, Dmitry Osipenko пишет: > >>> I am not saying you should change the clock rate. The current code > >>> path that runs via devm_tegra_core_dev_init_opp_table() just calls > >>> clk_get_rate and then dev_pm_opp_set_rate() with the current rate to > >>> vote for the corresponding OPP level. Right? > >>> > >>> Isn't this exactly what you want? No? > >> I see now what you meant, it's actually a simpler variant and it works > >> too. Thank you for the suggestion, I'll prepare v8. > >> > > My bad, it doesn't work at all. I actually need to use the rpm_pstate or > > something else because performance state is coupled with the enable > > state of the device. If device is never rpm-suspended by consumer > > driver, then the initialized performance state is never dropped. Hence I > > want to initialize the state which is set only when device is resumed. > > > > I'll need to think more about it. > > GENPD core has these false assumptions: > > 1. It assumes that by default all devices are at zero performance level > at a boot time. This is not true for Tegra because hardware is > pre-initialized independently from GENPD. Right, which is similar to other SoCs. > > 2. It assumes that nothing depends on performance level and devices can > operate at any level at any time. Not true for Tegra and other platforms > where performance level is coupled with clocks state of attached > devices. OPP framework glues clock and performance level together for > us, which works good so far. Right, OPPs need to be managed differently depending on the SoC. That's why genpd is there to help and to model this as "performance states" and to allow operations to be set through SoC specific callabacks, for example. More importantly, the assumption is that in general, consumer drivers should use the OPP library to vote/set OPP levels, they shouldn't call dev_pm_genpd_set_performance_state() - unless they know exactly what they are doing. > > Hence I either need to patch every driver to use dev_pm_opp_set_rate in > order to sync clk rate with the perf level at device runtime, or I need > to preset the rpm perf level to allow GENPD core to set the level before > device is resumed. When the device is getting attached to its genpd (during the probe sequence and for a single PM domain case), runtime PM is disabled for the device. If you would call dev_pm_opp_set_rate() from a genpd callback during attach (without changing the rate), this means you would update/synchronize the vote. In this way, the vote is set before the device is runtime resumed by the driver, right? On the other hand, patching the driver should also be quite simple and you need to do that anyways, don't you? Kind regards Uffe
12.08.2021 14:17, Ulf Hansson пишет: > On Thu, 12 Aug 2021 at 03:40, Dmitry Osipenko <digetx@gmail.com> wrote: >> >> 12.08.2021 01:41, Dmitry Osipenko пишет: >>>>> I am not saying you should change the clock rate. The current code >>>>> path that runs via devm_tegra_core_dev_init_opp_table() just calls >>>>> clk_get_rate and then dev_pm_opp_set_rate() with the current rate to >>>>> vote for the corresponding OPP level. Right? >>>>> >>>>> Isn't this exactly what you want? No? >>>> I see now what you meant, it's actually a simpler variant and it works >>>> too. Thank you for the suggestion, I'll prepare v8. >>>> >>> My bad, it doesn't work at all. I actually need to use the rpm_pstate or >>> something else because performance state is coupled with the enable >>> state of the device. If device is never rpm-suspended by consumer >>> driver, then the initialized performance state is never dropped. Hence I >>> want to initialize the state which is set only when device is resumed. >>> >>> I'll need to think more about it. >> >> GENPD core has these false assumptions: >> >> 1. It assumes that by default all devices are at zero performance level >> at a boot time. This is not true for Tegra because hardware is >> pre-initialized independently from GENPD. > > Right, which is similar to other SoCs. > >> >> 2. It assumes that nothing depends on performance level and devices can >> operate at any level at any time. Not true for Tegra and other platforms >> where performance level is coupled with clocks state of attached >> devices. OPP framework glues clock and performance level together for >> us, which works good so far. > > Right, OPPs need to be managed differently depending on the SoC. > That's why genpd is there to help and to model this as "performance > states" and to allow operations to be set through SoC specific > callabacks, for example. > > More importantly, the assumption is that in general, consumer drivers > should use the OPP library to vote/set OPP levels, they shouldn't call > dev_pm_genpd_set_performance_state() - unless they know exactly what > they are doing. > >> >> Hence I either need to patch every driver to use dev_pm_opp_set_rate in >> order to sync clk rate with the perf level at device runtime, or I need >> to preset the rpm perf level to allow GENPD core to set the level before >> device is resumed. > > When the device is getting attached to its genpd (during the probe > sequence and for a single PM domain case), runtime PM is disabled for > the device. If you would call dev_pm_opp_set_rate() from a genpd > callback during attach (without changing the rate), this means you > would update/synchronize the vote. In this way, the vote is set before > the device is runtime resumed by the driver, right? I was doing that previously, but then realized that for some devices the state needs to be synced only once device is rpm-resumed. Making RPM driver to perform the syncing worked well. > On the other hand, patching the driver should also be quite simple and > you need to do that anyways, don't you? It will be extra boilerplate code in the drivers, but perhaps it's the best option for today, I'll consider it for v8. Thank you for helping with the review.
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index f63dfb2ca3f9..ebafb818b08e 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -506,6 +506,151 @@ static void tegra_pmc_scratch_writel(struct tegra_pmc *pmc, u32 value, writel(value, pmc->scratch + offset); } +static const char * const tegra_emc_compats[] = { + "nvidia,tegra20-emc", + "nvidia,tegra30-emc", + NULL, +}; + +/* + * This GENPD callback is used by both powergate and core domains. + * + * We retrieve clock rate of the attached device and initialize domain's + * performance state in accordance to the clock rate. + */ +static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, + struct device *dev) +{ + struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); + struct opp_table *opp_table, *pd_opp_table; + struct generic_pm_domain *core_genpd; + struct dev_pm_opp *opp, *pd_opp; + unsigned long rate, state; + struct gpd_link *link; + struct clk *clk; + u32 hw_version; + int ret; + + /* + * Tegra114+ SocS don't support OPP yet. But if they will get OPP + * support, then we want to skip OPP for older kernels to preserve + * compatibility of newer DTBs with older kernels. + */ + if (!pmc->soc->supports_core_domain) + return 0; + + /* + * The EMC devices are a special case because we have a protection + * from non-EMC drivers getting clock handle before EMC driver is + * fully initialized. The goal of the protection is to prevent + * devfreq driver from getting failures if it will try to change + * EMC clock rate until clock is fully initialized. The EMC drivers + * will initialize the performance state by themselves. + */ + if (of_device_compatible_match(dev->of_node, tegra_emc_compats)) + return 0; + + clk = clk_get(dev, NULL); + if (IS_ERR(clk)) { + dev_err(&genpd->dev, "failed to get clk of %s: %pe\n", + dev_name(dev), clk); + return PTR_ERR(clk); + } + + rate = clk_get_rate(clk); + if (!rate) { + dev_err(&genpd->dev, "failed to get clk rate of %s\n", + dev_name(dev)); + ret = -EINVAL; + goto put_clk; + } + + if (of_machine_is_compatible("nvidia,tegra20")) + hw_version = BIT(tegra_sku_info.soc_process_id); + else + hw_version = BIT(tegra_sku_info.soc_speedo_id); + + opp_table = dev_pm_opp_set_supported_hw(dev, &hw_version, 1); + if (IS_ERR(opp_table)) { + dev_err(&genpd->dev, "failed to set OPP supported HW for %s: %d\n", + dev_name(dev), ret); + ret = PTR_ERR(opp_table); + goto put_clk; + } + + ret = dev_pm_opp_of_add_table(dev); + if (ret) { + /* older DTBs that don't have OPPs will get -ENODEV here */ + if (ret != -ENODEV) + dev_err(&genpd->dev, "failed to get OPP table of %s: %d\n", + dev_name(dev), ret); + else + ret = 0; + + goto put_supported_hw; + } + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dev, &rate); + + if (IS_ERR(opp)) { + dev_err(&genpd->dev, "failed to find OPP for %luHz of %s: %pe\n", + rate, dev_name(dev), opp); + ret = PTR_ERR(opp); + goto remove_dev_table; + } + + if (!list_empty(&genpd->child_links)) { + link = list_first_entry(&genpd->child_links, struct gpd_link, + child_node); + core_genpd = link->parent; + } else { + core_genpd = genpd; + } + + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); + if (IS_ERR(pd_opp_table)) { + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", + dev_name(&core_genpd->dev), pd_opp_table); + ret = PTR_ERR(pd_opp_table); + goto put_dev_opp; + } + + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); + if (IS_ERR(pd_opp)) { + dev_err(&genpd->dev, + "failed to xlate required OPP for %luHz of %s: %pe\n", + rate, dev_name(dev), pd_opp); + ret = PTR_ERR(pd_opp); + goto put_pd_opp_table; + } + + /* + * The initialized state will be applied by GENPD core on the first + * RPM-resume of the device. This means that drivers don't need to + * explicitly initialize performance state. + */ + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); + gpd_data->rpm_pstate = state; + dev_pm_opp_put(pd_opp); + +put_pd_opp_table: + dev_pm_opp_put_opp_table(pd_opp_table); +put_dev_opp: + dev_pm_opp_put(opp); +remove_dev_table: + dev_pm_opp_of_remove_table(dev); +put_supported_hw: + dev_pm_opp_put_supported_hw(opp_table); +put_clk: + clk_put(clk); + + return ret; +} + /* * TODO Figure out a way to call this with the struct tegra_pmc * passed in. * This currently doesn't work because readx_poll_timeout() can only operate @@ -1238,6 +1383,7 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) pg->id = id; pg->genpd.name = np->name; + pg->genpd.attach_dev = tegra_pmc_pd_attach_dev; pg->genpd.power_off = tegra_genpd_power_off; pg->genpd.power_on = tegra_genpd_power_on; pg->pmc = pmc; @@ -1354,6 +1500,7 @@ static int tegra_pmc_core_pd_add(struct tegra_pmc *pmc, struct device_node *np) return -ENOMEM; genpd->name = np->name; + genpd->attach_dev = tegra_pmc_pd_attach_dev; genpd->set_performance_state = tegra_pmc_core_pd_set_performance_state; genpd->opp_to_performance_state = tegra_pmc_core_pd_opp_to_performance_state;
Implement attach_dev() callback of power domain drivers that initializes the domain's performance state. GENPD core will apply the performance state on the first runtime PM resume of the attached device. Signed-off-by: Dmitry Osipenko <digetx@gmail.com> --- drivers/soc/tegra/pmc.c | 147 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+)