diff mbox series

[V2,2/2] cpufreq: dt: Refactor initialization to handle probe deferral properly

Message ID f75c61f193f396608d592ae2a9938264d582c038.1598260050.git.viresh.kumar@linaro.org
State Accepted
Commit dc279ac6e5b4e06ec9c15b82e30e8bf2576b14f9
Headers show
Series [V2,1/2] opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER | expand

Commit Message

Viresh Kumar Aug. 24, 2020, 9:09 a.m. UTC
From: Stephan Gerhold <stephan@gerhold.net>


cpufreq-dt is currently unable to handle -EPROBE_DEFER properly
because the error code is not propagated for the cpufreq_driver->init()
callback. Instead, it attempts to avoid the situation by temporarily
requesting all resources within resources_available() and releasing them
again immediately after. This has several disadvantages:

  - Whenever we add something like interconnect handling to the OPP core
    we need to patch cpufreq-dt to request these resources early.

  - resources_available() is only run for CPU0, but other clusters may
    eventually depend on other resources that are not available yet.
    (See FIXME comment removed by this commit...)

  - All resources need to be looked up several times.

Now that the OPP core can propagate -EPROBE_DEFER during initialization,
it would be nice to avoid all that trouble and just propagate its error
code when necessary.

This commit refactors the cpufreq-dt driver to initialize private_data
before registering the cpufreq driver. We do this by iterating over
all possible CPUs and ensure that all resources are initialized:

  1. dev_pm_opp_get_opp_table() ensures the OPP table is allocated
     and initialized with clock and interconnects.

  2. dev_pm_opp_set_regulators() requests the regulators and assigns
     them to the OPP table.

  3. We call dev_pm_opp_of_get_sharing_cpus() early so that we only
     initialize the OPP table once for each shared policy.

With these changes, we actually end up saving a few lines of code,
the resources are no longer looked up multiple times and everything
should be much more robust.

Signed-off-by: Stephan Gerhold <stephan@gerhold.net>

[ Viresh: Use list_head structure for maintaining the list and minor
	  changes ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 drivers/cpufreq/cpufreq-dt.c | 286 +++++++++++++++++------------------
 1 file changed, 143 insertions(+), 143 deletions(-)

-- 
2.25.0.rc1.19.g042ed3e048af

Comments

Marek Szyprowski Sept. 1, 2020, 8:57 a.m. UTC | #1
Hi Viresh

On 24.08.2020 11:09, Viresh Kumar wrote:
> From: Stephan Gerhold <stephan@gerhold.net>
>
> cpufreq-dt is currently unable to handle -EPROBE_DEFER properly
> because the error code is not propagated for the cpufreq_driver->init()
> callback. Instead, it attempts to avoid the situation by temporarily
> requesting all resources within resources_available() and releasing them
> again immediately after. This has several disadvantages:
>
>    - Whenever we add something like interconnect handling to the OPP core
>      we need to patch cpufreq-dt to request these resources early.
>
>    - resources_available() is only run for CPU0, but other clusters may
>      eventually depend on other resources that are not available yet.
>      (See FIXME comment removed by this commit...)
>
>    - All resources need to be looked up several times.
>
> Now that the OPP core can propagate -EPROBE_DEFER during initialization,
> it would be nice to avoid all that trouble and just propagate its error
> code when necessary.
>
> This commit refactors the cpufreq-dt driver to initialize private_data
> before registering the cpufreq driver. We do this by iterating over
> all possible CPUs and ensure that all resources are initialized:
>
>    1. dev_pm_opp_get_opp_table() ensures the OPP table is allocated
>       and initialized with clock and interconnects.
>
>    2. dev_pm_opp_set_regulators() requests the regulators and assigns
>       them to the OPP table.
>
>    3. We call dev_pm_opp_of_get_sharing_cpus() early so that we only
>       initialize the OPP table once for each shared policy.
>
> With these changes, we actually end up saving a few lines of code,
> the resources are no longer looked up multiple times and everything
> should be much more robust.
>
> Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
> [ Viresh: Use list_head structure for maintaining the list and minor
> 	  changes ]
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

This patch landed in linux-next about a week ago. It introduces a 
following warning on Samsung Exnyos3250 SoC:

cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
1000000000, volt: 1150000, enabled: 1. New: freq: 1000000000, volt: 
1150000, enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
900000000, volt: 1112500, enabled: 1. New: freq: 900000000, volt: 
1112500, enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
800000000, volt: 1075000, enabled: 1. New: freq: 800000000, volt: 
1075000, enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
700000000, volt: 1037500, enabled: 1. New: freq: 700000000, volt: 
1037500, enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
600000000, volt: 1000000, enabled: 1. New: freq: 600000000, volt: 
1000000, enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
500000000, volt: 962500, enabled: 1. New: freq: 500000000, volt: 962500, 
enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
400000000, volt: 925000, enabled: 1. New: freq: 400000000, volt: 925000, 
enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
300000000, volt: 887500, enabled: 1. New: freq: 300000000, volt: 887500, 
enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
200000000, volt: 850000, enabled: 1. New: freq: 200000000, volt: 850000, 
enabled: 1
cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq: 
100000000, volt: 850000, enabled: 1. New: freq: 100000000, volt: 850000, 
enabled: 1

I've checked a bit and this is related to the fact that Exynos3250 SoC 
use OPP-v1 table. Is this intentional? It is not a problem to convert it 
to OPP-v2 and mark OPP table as shared, but this is a kind of a regression.

> ...

Best regards
Marek Szyprowski Sept. 1, 2020, 10:05 a.m. UTC | #2
Hi Viresh,

On 01.09.2020 11:45, Viresh Kumar wrote:
> On 01-09-20, 10:57, Marek Szyprowski wrote:
>> This patch landed in linux-next about a week ago. It introduces a
>> following warning on Samsung Exnyos3250 SoC:
>>
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 1000000000, volt: 1150000, enabled: 1. New: freq: 1000000000, volt:
>> 1150000, enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 900000000, volt: 1112500, enabled: 1. New: freq: 900000000, volt:
>> 1112500, enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 800000000, volt: 1075000, enabled: 1. New: freq: 800000000, volt:
>> 1075000, enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 700000000, volt: 1037500, enabled: 1. New: freq: 700000000, volt:
>> 1037500, enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 600000000, volt: 1000000, enabled: 1. New: freq: 600000000, volt:
>> 1000000, enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 500000000, volt: 962500, enabled: 1. New: freq: 500000000, volt: 962500,
>> enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 400000000, volt: 925000, enabled: 1. New: freq: 400000000, volt: 925000,
>> enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 300000000, volt: 887500, enabled: 1. New: freq: 300000000, volt: 887500,
>> enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 200000000, volt: 850000, enabled: 1. New: freq: 200000000, volt: 850000,
>> enabled: 1
>> cpu cpu1: _opp_is_duplicate: duplicate OPPs detected. Existing: freq:
>> 100000000, volt: 850000, enabled: 1. New: freq: 100000000, volt: 850000,
>> enabled: 1
>>
>> I've checked a bit and this is related to the fact that Exynos3250 SoC
>> use OPP-v1 table. Is this intentional? It is not a problem to convert it
>> to OPP-v2 and mark OPP table as shared, but this is a kind of a regression.
> It took me 20 minutes for me to see "where has my patch gone" :(
>
> I wrote a small patch for that to work without any issues, but not
> sure how I missed or abandoned it. Anyway, here is the diff again and
> I will send it out again once you confirm it fixes the issue. Can you
> please also test your driver as a module with multiple insertion/removals ?

Indeed, this patch seems to fix/hide that warning. Feel free to add:

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> diff --git a/drivers/opp/of.c b/drivers/opp/of.c
> index 5dac8bffd68c..e72753be7dc7 100644
> --- a/drivers/opp/of.c
> +++ b/drivers/opp/of.c
> @@ -905,6 +905,16 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table)
>          const __be32 *val;
>          int nr, ret = 0;
>   
> +       mutex_lock(&opp_table->lock);
> +       if (opp_table->parsed_static_opps) {
> +               opp_table->parsed_static_opps++;
> +               mutex_unlock(&opp_table->lock);
> +               return 0;
> +       }
> +
> +       opp_table->parsed_static_opps = 1;
> +       mutex_unlock(&opp_table->lock);
> +
>          prop = of_find_property(dev->of_node, "operating-points", NULL);
>          if (!prop)
>                  return -ENODEV;
> @@ -921,10 +931,6 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table)
>                  return -EINVAL;
>          }
>   
> -       mutex_lock(&opp_table->lock);
> -       opp_table->parsed_static_opps = 1;
> -       mutex_unlock(&opp_table->lock);
> -
>          val = prop->value;
>          while (nr) {
>                  unsigned long freq = be32_to_cpup(val++) * 1000;
>
Best regards
Viresh Kumar Sept. 1, 2020, 10:12 a.m. UTC | #3
On 01-09-20, 12:05, Marek Szyprowski wrote:
> Indeed, this patch seems to fix/hide that warning. Feel free to add:


s/hide/really-fix/ :)

I assumed that this problem is going to be there, when I applied the
other patch and so was trying to write a fix but somehow skipped doing
that :(

-- 
viresh
Geert Uytterhoeven Oct. 13, 2020, 9:47 a.m. UTC | #4
Hi Viresh, Stephan,

On Mon, Aug 24, 2020 at 11:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> From: Stephan Gerhold <stephan@gerhold.net>
>
> cpufreq-dt is currently unable to handle -EPROBE_DEFER properly
> because the error code is not propagated for the cpufreq_driver->init()
> callback. Instead, it attempts to avoid the situation by temporarily
> requesting all resources within resources_available() and releasing them
> again immediately after. This has several disadvantages:
>
>   - Whenever we add something like interconnect handling to the OPP core
>     we need to patch cpufreq-dt to request these resources early.
>
>   - resources_available() is only run for CPU0, but other clusters may
>     eventually depend on other resources that are not available yet.
>     (See FIXME comment removed by this commit...)
>
>   - All resources need to be looked up several times.
>
> Now that the OPP core can propagate -EPROBE_DEFER during initialization,
> it would be nice to avoid all that trouble and just propagate its error
> code when necessary.
>
> This commit refactors the cpufreq-dt driver to initialize private_data
> before registering the cpufreq driver. We do this by iterating over
> all possible CPUs and ensure that all resources are initialized:
>
>   1. dev_pm_opp_get_opp_table() ensures the OPP table is allocated
>      and initialized with clock and interconnects.
>
>   2. dev_pm_opp_set_regulators() requests the regulators and assigns
>      them to the OPP table.
>
>   3. We call dev_pm_opp_of_get_sharing_cpus() early so that we only
>      initialize the OPP table once for each shared policy.
>
> With these changes, we actually end up saving a few lines of code,
> the resources are no longer looked up multiple times and everything
> should be much more robust.
>
> Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
> [ Viresh: Use list_head structure for maintaining the list and minor
>           changes ]
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

Thanks for your patch, which is now commit dc279ac6e5b4e06e ("cpufreq:
dt: Refactor initialization to handle probe deferral properly") in
pm/linux-next, and to which I bisected a regression.
Reverting this commit fixes the issue.

On r8a7791/koelsch, during resume from s2ram:

     PM: suspend entry (deep)
     Filesystems sync: 0.000 seconds
     Freezing user space processes ... (elapsed 0.003 seconds) done.
     OOM killer disabled.
     Freezing remaining freezable tasks ... (elapsed 0.009 seconds) done.
     Disabling non-boot CPUs ...
     Enabling non-boot CPUs ...
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    +cpu cpu0: OPP table can't be empty
     CPU1 is up
     rcar-pcie fe000000.pcie: PCIe x1: link up

The cpufreq code tries to talk to the PMIC, while the I2C controller
that hosts the PMIC is suspended, and thus any communication attempt
times out.  __i2c_check_suspended() fails to notice that, as the
i2c_shmobile_i2c driver doesn't have a suspend callback calling
i2c_mark_adapter_suspended() yet.  After fixing that (will send a patch
soon), the I2C core rightfully complains with:

    WARNING: CPU: 1 PID: 13 at drivers/i2c/i2c-core.h:54
__i2c_transfer+0x4a4/0x4e4
    i2c i2c-6: Transfer while suspended
    CPU: 1 PID: 13 Comm: cpuhp/1 Not tainted
5.9.0-shmobile-09581-g05a3e5886c7615b1-dirty #718
    Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
    [<c010dcec>] (unwind_backtrace) from [<c0109b18>] (show_stack+0x10/0x14)
    [<c0109b18>] (show_stack) from [<c075e928>] (dump_stack+0x8c/0xac)
    [<c075e928>] (dump_stack) from [<c011c23c>] (__warn+0xd0/0xe8)
    [<c011c23c>] (__warn) from [<c011c2c4>] (warn_slowpath_fmt+0x70/0x9c)
    [<c011c2c4>] (warn_slowpath_fmt) from [<c0548be8>]
(__i2c_transfer+0x4a4/0x4e4)
    [<c0548be8>] (__i2c_transfer) from [<c0548cd8>] (i2c_transfer+0xb0/0xf8)
    [<c0548cd8>] (i2c_transfer) from [<c046f884>] (regmap_i2c_read+0x54/0x88)
    [<c046f884>] (regmap_i2c_read) from [<c046b69c>]
(_regmap_raw_read+0x118/0x1f0)
    [<c046b69c>] (_regmap_raw_read) from [<c046b7b8>]
(_regmap_bus_read+0x44/0x68)
    [<c046b7b8>] (_regmap_bus_read) from [<c04698a0>] (_regmap_read+0x84/0x110)
    [<c04698a0>] (_regmap_read) from [<c046c10c>] (regmap_read+0x40/0x58)
    [<c046c10c>] (regmap_read) from [<c03cf0a8>]
(regulator_get_voltage_sel_regmap+0x28/0x74)
    [<c03cf0a8>] (regulator_get_voltage_sel_regmap) from [<c03cb29c>]
(regulator_get_voltage_rdev+0xa4/0x14c)
    [<c03cb29c>] (regulator_get_voltage_rdev) from [<c03cc658>]
(regulator_get_voltage+0x2c/0x60)
    [<c03cc658>] (regulator_get_voltage) from [<c03cd994>]
(regulator_is_supported_voltage+0x30/0xd8)
    [<c03cd994>] (regulator_is_supported_voltage) from [<c05a32b4>]
(_opp_add+0x164/0x1b8)
    [<c05a32b4>] (_opp_add) from [<c05a3388>] (_opp_add_v1+0x80/0xb8)
    [<c05a3388>] (_opp_add_v1) from [<c05a5220>]
(dev_pm_opp_of_add_table+0x130/0x168)
    [<c05a5220>] (dev_pm_opp_of_add_table) from [<c05a5330>]
(dev_pm_opp_of_cpumask_add_table+0x60/0xac)
    [<c05a5330>] (dev_pm_opp_of_cpumask_add_table) from [<c05ab88c>]
(cpufreq_init+0x94/0x1c4)
    [<c05ab88c>] (cpufreq_init) from [<c05a8190>] (cpufreq_online+0x148/0x7ac)
    [<c05a8190>] (cpufreq_online) from [<c05a87fc>]
(cpuhp_cpufreq_online+0x8/0x10)
    [<c05a87fc>] (cpuhp_cpufreq_online) from [<c011d088>]
(cpuhp_invoke_callback+0xf8/0x2e4)
    [<c011d088>] (cpuhp_invoke_callback) from [<c011d418>]
(cpuhp_thread_fun+0xac/0x244)
    [<c011d418>] (cpuhp_thread_fun) from [<c013c090>]
(smpboot_thread_fn+0x19c/0x1a8)
    [<c013c090>] (smpboot_thread_fn) from [<c0138408>] (kthread+0x104/0x110)
    [<c0138408>] (kthread) from [<c0100148>] (ret_from_fork+0x14/0x2c)

Gr{oetje,eeting}s,

                        Geert
Viresh Kumar Oct. 13, 2020, 9:56 a.m. UTC | #5
On 13-10-20, 11:47, Geert Uytterhoeven wrote:
> Thanks for your patch, which is now commit dc279ac6e5b4e06e ("cpufreq:
> dt: Refactor initialization to handle probe deferral properly") in
> pm/linux-next, and to which I bisected a regression.
> Reverting this commit fixes the issue.

Strange. I had a look again and I failed to understand why it would
happen this way.

> On r8a7791/koelsch, during resume from s2ram:
> 
>      PM: suspend entry (deep)
>      Filesystems sync: 0.000 seconds
>      Freezing user space processes ... (elapsed 0.003 seconds) done.
>      OOM killer disabled.
>      Freezing remaining freezable tasks ... (elapsed 0.009 seconds) done.
>      Disabling non-boot CPUs ...
>      Enabling non-boot CPUs ...
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
>     +cpu cpu0: OPP table can't be empty
>      CPU1 is up
>      rcar-pcie fe000000.pcie: PCIe x1: link up
> 
> The cpufreq code tries to talk to the PMIC, while the I2C controller
> that hosts the PMIC is suspended, and thus any communication attempt
> times out.  __i2c_check_suspended() fails to notice that, as the
> i2c_shmobile_i2c driver doesn't have a suspend callback calling
> i2c_mark_adapter_suspended() yet.  After fixing that (will send a patch
> soon), the I2C core rightfully complains with:
> 
>     WARNING: CPU: 1 PID: 13 at drivers/i2c/i2c-core.h:54
> __i2c_transfer+0x4a4/0x4e4
>     i2c i2c-6: Transfer while suspended
>     CPU: 1 PID: 13 Comm: cpuhp/1 Not tainted
> 5.9.0-shmobile-09581-g05a3e5886c7615b1-dirty #718
>     Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
>     [<c010dcec>] (unwind_backtrace) from [<c0109b18>] (show_stack+0x10/0x14)
>     [<c0109b18>] (show_stack) from [<c075e928>] (dump_stack+0x8c/0xac)
>     [<c075e928>] (dump_stack) from [<c011c23c>] (__warn+0xd0/0xe8)
>     [<c011c23c>] (__warn) from [<c011c2c4>] (warn_slowpath_fmt+0x70/0x9c)
>     [<c011c2c4>] (warn_slowpath_fmt) from [<c0548be8>]
> (__i2c_transfer+0x4a4/0x4e4)
>     [<c0548be8>] (__i2c_transfer) from [<c0548cd8>] (i2c_transfer+0xb0/0xf8)
>     [<c0548cd8>] (i2c_transfer) from [<c046f884>] (regmap_i2c_read+0x54/0x88)
>     [<c046f884>] (regmap_i2c_read) from [<c046b69c>]
> (_regmap_raw_read+0x118/0x1f0)
>     [<c046b69c>] (_regmap_raw_read) from [<c046b7b8>]
> (_regmap_bus_read+0x44/0x68)
>     [<c046b7b8>] (_regmap_bus_read) from [<c04698a0>] (_regmap_read+0x84/0x110)
>     [<c04698a0>] (_regmap_read) from [<c046c10c>] (regmap_read+0x40/0x58)
>     [<c046c10c>] (regmap_read) from [<c03cf0a8>]
> (regulator_get_voltage_sel_regmap+0x28/0x74)
>     [<c03cf0a8>] (regulator_get_voltage_sel_regmap) from [<c03cb29c>]
> (regulator_get_voltage_rdev+0xa4/0x14c)
>     [<c03cb29c>] (regulator_get_voltage_rdev) from [<c03cc658>]
> (regulator_get_voltage+0x2c/0x60)
>     [<c03cc658>] (regulator_get_voltage) from [<c03cd994>]
> (regulator_is_supported_voltage+0x30/0xd8)
>     [<c03cd994>] (regulator_is_supported_voltage) from [<c05a32b4>]
> (_opp_add+0x164/0x1b8)
>     [<c05a32b4>] (_opp_add) from [<c05a3388>] (_opp_add_v1+0x80/0xb8)
>     [<c05a3388>] (_opp_add_v1) from [<c05a5220>]
> (dev_pm_opp_of_add_table+0x130/0x168)
>     [<c05a5220>] (dev_pm_opp_of_add_table) from [<c05a5330>]
> (dev_pm_opp_of_cpumask_add_table+0x60/0xac)
>     [<c05a5330>] (dev_pm_opp_of_cpumask_add_table) from [<c05ab88c>]
> (cpufreq_init+0x94/0x1c4)
>     [<c05ab88c>] (cpufreq_init) from [<c05a8190>] (cpufreq_online+0x148/0x7ac)
>     [<c05a8190>] (cpufreq_online) from [<c05a87fc>]
> (cpuhp_cpufreq_online+0x8/0x10)
>     [<c05a87fc>] (cpuhp_cpufreq_online) from [<c011d088>]
> (cpuhp_invoke_callback+0xf8/0x2e4)
>     [<c011d088>] (cpuhp_invoke_callback) from [<c011d418>]
> (cpuhp_thread_fun+0xac/0x244)
>     [<c011d418>] (cpuhp_thread_fun) from [<c013c090>]
> (smpboot_thread_fn+0x19c/0x1a8)
>     [<c013c090>] (smpboot_thread_fn) from [<c0138408>] (kthread+0x104/0x110)
>     [<c0138408>] (kthread) from [<c0100148>] (ret_from_fork+0x14/0x2c)

So there is a dependency chain here. cpufreq->regulator->i2c,
specially with the patch you pointed out, we handle defer-probing in a
better way and so I would expect i2c driver to get in first and
cpufreq should get registered later only. And I would have expected
the same during suspend/resume.

Can you see why this happens ?
Geert Uytterhoeven Oct. 14, 2020, 4:40 p.m. UTC | #6
Hi Viresh,

On Tue, Oct 13, 2020 at 11:56 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 13-10-20, 11:47, Geert Uytterhoeven wrote:
> > Thanks for your patch, which is now commit dc279ac6e5b4e06e ("cpufreq:
> > dt: Refactor initialization to handle probe deferral properly") in
> > pm/linux-next, and to which I bisected a regression.
> > Reverting this commit fixes the issue.
>
> Strange. I had a look again and I failed to understand why it would
> happen this way.
>
> > On r8a7791/koelsch, during resume from s2ram:
> >
> >      PM: suspend entry (deep)
> >      Filesystems sync: 0.000 seconds
> >      Freezing user space processes ... (elapsed 0.003 seconds) done.
> >      OOM killer disabled.
> >      Freezing remaining freezable tasks ... (elapsed 0.009 seconds) done.
> >      Disabling non-boot CPUs ...
> >      Enabling non-boot CPUs ...
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     +cpu cpu0: OPP table can't be empty
> >      CPU1 is up
> >      rcar-pcie fe000000.pcie: PCIe x1: link up
> >
> > The cpufreq code tries to talk to the PMIC, while the I2C controller
> > that hosts the PMIC is suspended, and thus any communication attempt
> > times out.  __i2c_check_suspended() fails to notice that, as the
> > i2c_shmobile_i2c driver doesn't have a suspend callback calling
> > i2c_mark_adapter_suspended() yet.  After fixing that (will send a patch
> > soon), the I2C core rightfully complains with:
> >
> >     WARNING: CPU: 1 PID: 13 at drivers/i2c/i2c-core.h:54
> > __i2c_transfer+0x4a4/0x4e4
> >     i2c i2c-6: Transfer while suspended
> >     CPU: 1 PID: 13 Comm: cpuhp/1 Not tainted
> > 5.9.0-shmobile-09581-g05a3e5886c7615b1-dirty #718
> >     Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
> >     [<c010dcec>] (unwind_backtrace) from [<c0109b18>] (show_stack+0x10/0x14)
> >     [<c0109b18>] (show_stack) from [<c075e928>] (dump_stack+0x8c/0xac)
> >     [<c075e928>] (dump_stack) from [<c011c23c>] (__warn+0xd0/0xe8)
> >     [<c011c23c>] (__warn) from [<c011c2c4>] (warn_slowpath_fmt+0x70/0x9c)
> >     [<c011c2c4>] (warn_slowpath_fmt) from [<c0548be8>]
> > (__i2c_transfer+0x4a4/0x4e4)
> >     [<c0548be8>] (__i2c_transfer) from [<c0548cd8>] (i2c_transfer+0xb0/0xf8)
> >     [<c0548cd8>] (i2c_transfer) from [<c046f884>] (regmap_i2c_read+0x54/0x88)
> >     [<c046f884>] (regmap_i2c_read) from [<c046b69c>]
> > (_regmap_raw_read+0x118/0x1f0)
> >     [<c046b69c>] (_regmap_raw_read) from [<c046b7b8>]
> > (_regmap_bus_read+0x44/0x68)
> >     [<c046b7b8>] (_regmap_bus_read) from [<c04698a0>] (_regmap_read+0x84/0x110)
> >     [<c04698a0>] (_regmap_read) from [<c046c10c>] (regmap_read+0x40/0x58)
> >     [<c046c10c>] (regmap_read) from [<c03cf0a8>]
> > (regulator_get_voltage_sel_regmap+0x28/0x74)
> >     [<c03cf0a8>] (regulator_get_voltage_sel_regmap) from [<c03cb29c>]
> > (regulator_get_voltage_rdev+0xa4/0x14c)
> >     [<c03cb29c>] (regulator_get_voltage_rdev) from [<c03cc658>]
> > (regulator_get_voltage+0x2c/0x60)
> >     [<c03cc658>] (regulator_get_voltage) from [<c03cd994>]
> > (regulator_is_supported_voltage+0x30/0xd8)
> >     [<c03cd994>] (regulator_is_supported_voltage) from [<c05a32b4>]
> > (_opp_add+0x164/0x1b8)
> >     [<c05a32b4>] (_opp_add) from [<c05a3388>] (_opp_add_v1+0x80/0xb8)
> >     [<c05a3388>] (_opp_add_v1) from [<c05a5220>]
> > (dev_pm_opp_of_add_table+0x130/0x168)
> >     [<c05a5220>] (dev_pm_opp_of_add_table) from [<c05a5330>]
> > (dev_pm_opp_of_cpumask_add_table+0x60/0xac)
> >     [<c05a5330>] (dev_pm_opp_of_cpumask_add_table) from [<c05ab88c>]
> > (cpufreq_init+0x94/0x1c4)
> >     [<c05ab88c>] (cpufreq_init) from [<c05a8190>] (cpufreq_online+0x148/0x7ac)
> >     [<c05a8190>] (cpufreq_online) from [<c05a87fc>]
> > (cpuhp_cpufreq_online+0x8/0x10)
> >     [<c05a87fc>] (cpuhp_cpufreq_online) from [<c011d088>]
> > (cpuhp_invoke_callback+0xf8/0x2e4)
> >     [<c011d088>] (cpuhp_invoke_callback) from [<c011d418>]
> > (cpuhp_thread_fun+0xac/0x244)
> >     [<c011d418>] (cpuhp_thread_fun) from [<c013c090>]
> > (smpboot_thread_fn+0x19c/0x1a8)
> >     [<c013c090>] (smpboot_thread_fn) from [<c0138408>] (kthread+0x104/0x110)
> >     [<c0138408>] (kthread) from [<c0100148>] (ret_from_fork+0x14/0x2c)
>
> So there is a dependency chain here. cpufreq->regulator->i2c,
> specially with the patch you pointed out, we handle defer-probing in a
> better way and so I would expect i2c driver to get in first and
> cpufreq should get registered later only. And I would have expected
> the same during suspend/resume.
>
> Can you see why this happens ?

On this platform (r8a7791-koelsch.dts), there is no opp table in DT.

  Before:
    boot:
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362
      cpu cpu0: resources_available:95
      cpu cpu0: resources_available:102: clk_get() returned z
      cpu cpu0: resources_available:120:
dev_pm_opp_of_find_icc_paths() returned 0
      cpu cpu0: resources_available:125: find_supply_name() returned cpu0
      cpu cpu0: resources_available:132: regulator_get_optional()
returned -EPROBE_DEFER
      cpu cpu0: cpu0 regulator not ready, retry
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:
resources_available() returned -517
      ...
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362
      cpu cpu0: resources_available:95
      cpu cpu0: resources_available:102: clk_get() returned z
      cpu cpu0: resources_available:120:
dev_pm_opp_of_find_icc_paths() returned 0
      cpu cpu0: resources_available:125: find_supply_name() returned cpu0
      cpu cpu0: resources_available:132: regulator_get_optional()
returned (ptrval)
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:
resources_available() returned 0
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:375
      cpufreq_dt: cpufreq_init:162
      cpu cpu0: cpufreq_init:170: clk_get() returned z
      cpu cpu0: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
      cpu cpu0: cpufreq_init:198: find_supply_name() returned cpu0
      <i2c comm>
      cpu cpu0: cpufreq_init:201: dev_pm_opp_set_regulators() returned (ptrval)
      <i2c comm>
      cpu cpu0: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu0: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
      cpu cpu0: OPP table is not ready, deferring probe
      cpufreq_dt: cpufreq_init:162
      cpu cpu1: cpufreq_init:170: clk_get() returned z
      cpu cpu1: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
      cpu cpu1: no regulator for cpu1
      cpu cpu1: cpufreq_init:198: find_supply_name() returned (null)
      cpu cpu1: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu1: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
      cpu cpu1: OPP table is not ready, deferring probe

    s2ram:
      cpufreq_dt: cpufreq_init:162
      cpu cpu1: cpufreq_init:170: clk_get() returned z
      cpu cpu1: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
      cpu cpu1: no regulator for cpu1
      cpu cpu1: cpufreq_init:198: find_supply_name() returned (null)
      cpu cpu1: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu1: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
      cpu cpu1: OPP table is not ready, deferring probe
      CPU1 is up

  After:
    boot:
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:356
      cpufreq_dt: dt_cpufreq_early_init:251
      cpu cpu0: dt_cpufreq_early_init:256
      cpu cpu0: dt_cpufreq_early_init:271: dev_pm_opp_get_opp_table()
returned (ptrval)
      cpu cpu0: dt_cpufreq_early_init:284: find_supply_name() returned cpu0
      cpu cpu0: dt_cpufreq_early_init:288: dev_pm_opp_set_regulators()
returned -EPROBE_DEFER
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
dt_cpufreq_early_init() returned -517
      ...
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:356
      cpufreq_dt: dt_cpufreq_early_init:251
      cpu cpu0: dt_cpufreq_early_init:256
      cpu cpu0: dt_cpufreq_early_init:271: dev_pm_opp_get_opp_table()
returned (ptrval)
      cpu cpu0: dt_cpufreq_early_init:284: find_supply_name() returned cpu0
      cpu cpu0: dt_cpufreq_early_init:288: dev_pm_opp_set_regulators()
returned (ptrval)
      cpu cpu0: dt_cpufreq_early_init:301:
dev_pm_opp_of_get_sharing_cpus() returned -2
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
dt_cpufreq_early_init() returned 0
      cpufreq_dt: dt_cpufreq_early_init:251
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
dt_cpufreq_early_init() returned 0
      cpufreq-dt cpufreq-dt: dt_cpufreq_probe:365
      cpufreq_dt: cpufreq_init:114
      cpu cpu0: cpufreq_init:124: clk_get() returned z
      cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0
      cpu cpu0: OPP table can't be empty
      cpufreq_dt: cpufreq_init:114
      cpu cpu0: cpufreq_init:124: clk_get() returned z
      <i2c comm>
      cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0

    s2ram:

      cpufreq_dt: cpufreq_init:114
      cpu cpu0: cpufreq_init:124: clk_get() returned z
      WARNING: CPU: 1 PID: 14 at drivers/i2c/i2c-core.h:54
__i2c_transfer+0x2d8/0x310
      i2c i2c-6: Transfer while suspended
      cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
      cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0
      cpu cpu0: OPP table can't be empty
      CPU1 is up

I hope this helps.
Thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Viresh Kumar Oct. 16, 2020, 8:07 a.m. UTC | #7
On 16-10-20, 08:44, Geert Uytterhoeven wrote:
> On Fri, Oct 16, 2020 at 7:03 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > On 14-10-20, 18:40, Geert Uytterhoeven wrote:
> > > On this platform (r8a7791-koelsch.dts), there is no opp table in DT.
> 
> I think you missed the clue above:

I read it earlier as well.

> this DTS does not have an opp-table
> with operating-points-v2, but cpu0 does have the operating-points (v1)
> property (note the latter is something I missed before).

This is different than having no OPP table in DT.

> > >
> > >   Before:
> >
> > I assume this means before this patchset came in..
> 
> Indeed.
> 
> > >     boot:
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362
> > >       cpu cpu0: resources_available:95
> > >       cpu cpu0: resources_available:102: clk_get() returned z
> > >       cpu cpu0: resources_available:120:
> > > dev_pm_opp_of_find_icc_paths() returned 0
> > >       cpu cpu0: resources_available:125: find_supply_name() returned cpu0
> > >       cpu cpu0: resources_available:132: regulator_get_optional()
> > > returned -EPROBE_DEFER
> > >       cpu cpu0: cpu0 regulator not ready, retry
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:
> > > resources_available() returned -517
> >
> > we deferred probe once.
> >
> > >       ...
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362
> > >       cpu cpu0: resources_available:95
> > >       cpu cpu0: resources_available:102: clk_get() returned z
> > >       cpu cpu0: resources_available:120:
> > > dev_pm_opp_of_find_icc_paths() returned 0
> > >       cpu cpu0: resources_available:125: find_supply_name() returned cpu0
> > >       cpu cpu0: resources_available:132: regulator_get_optional()
> > > returned (ptrval)
> >
> > found regulator next time.
> >
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:
> > > resources_available() returned 0
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:375
> > >       cpufreq_dt: cpufreq_init:162
> > >       cpu cpu0: cpufreq_init:170: clk_get() returned z
> > >       cpu cpu0: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
> > >       cpu cpu0: cpufreq_init:198: find_supply_name() returned cpu0
> > >       <i2c comm>
> > >       cpu cpu0: cpufreq_init:201: dev_pm_opp_set_regulators() returned (ptrval)
> > >       <i2c comm>
> > >       cpu cpu0: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu0: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
> > >       cpu cpu0: OPP table is not ready, deferring probe
> >
> > This failed, as we couldn't have deferred probe from cpufreq_init.
> > Which means that cpufreq didn't work here.
> 
> No opp-table in DT.

V1 is also an OPP table.

> Shouldn't it use operating-points v1 instead?

Both v1 and v2 are considered as OPP tables. When we say that the
opp-count is 0, it means that it failed to find any of them.

> > >       cpufreq_dt: cpufreq_init:162
> > >       cpu cpu1: cpufreq_init:170: clk_get() returned z
> > >       cpu cpu1: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
> > >       cpu cpu1: no regulator for cpu1
> > >       cpu cpu1: cpufreq_init:198: find_supply_name() returned (null)
> > >       cpu cpu1: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu1: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
> > >       cpu cpu1: OPP table is not ready, deferring probe
> >
> > Same for CPU1.
> 
> Note that only CPU0 has operating-points v1.

Both should have it ideally, though it works if CPU0 gets probed
first. But if CPU0 is hotplugged out and we try to probe CPU1, then it
will fail.

The fact that cpufreq core tried to probe CPU1 means that it failed
for CPU0. And this is before the patchset in question came in.

I don't think cpufreq was working earlier for your platform, please
check why.

> >
> > >
> > >     s2ram:
> > >       cpufreq_dt: cpufreq_init:162
> > >       cpu cpu1: cpufreq_init:170: clk_get() returned z
> > >       cpu cpu1: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2
> > >       cpu cpu1: no regulator for cpu1
> > >       cpu cpu1: cpufreq_init:198: find_supply_name() returned (null)
> > >       cpu cpu1: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu1: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0
> > >       cpu cpu1: OPP table is not ready, deferring probe
> >
> > And same here.
> >
> > >       CPU1 is up
> > >
> > >   After:
> > >     boot:
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:356
> > >       cpufreq_dt: dt_cpufreq_early_init:251
> > >       cpu cpu0: dt_cpufreq_early_init:256
> > >       cpu cpu0: dt_cpufreq_early_init:271: dev_pm_opp_get_opp_table()
> > > returned (ptrval)
> > >       cpu cpu0: dt_cpufreq_early_init:284: find_supply_name() returned cpu0
> > >       cpu cpu0: dt_cpufreq_early_init:288: dev_pm_opp_set_regulators()
> > > returned -EPROBE_DEFER
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
> > > dt_cpufreq_early_init() returned -517
> > >       ...
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:356
> > >       cpufreq_dt: dt_cpufreq_early_init:251
> > >       cpu cpu0: dt_cpufreq_early_init:256
> > >       cpu cpu0: dt_cpufreq_early_init:271: dev_pm_opp_get_opp_table()
> > > returned (ptrval)
> > >       cpu cpu0: dt_cpufreq_early_init:284: find_supply_name() returned cpu0
> > >       cpu cpu0: dt_cpufreq_early_init:288: dev_pm_opp_set_regulators()
> > > returned (ptrval)
> > >       cpu cpu0: dt_cpufreq_early_init:301:
> > > dev_pm_opp_of_get_sharing_cpus() returned -2
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
> > > dt_cpufreq_early_init() returned 0
> > >       cpufreq_dt: dt_cpufreq_early_init:251
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:360:
> > > dt_cpufreq_early_init() returned 0
> > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:365
> > >       cpufreq_dt: cpufreq_init:114
> > >       cpu cpu0: cpufreq_init:124: clk_get() returned z
> > >       cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0
> > >       cpu cpu0: OPP table can't be empty
> >
> > Same issue here.
> >
> > >       cpufreq_dt: cpufreq_init:114
> > >       cpu cpu0: cpufreq_init:124: clk_get() returned z
> > >       <i2c comm>
> > >       cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0
> > >
> > >     s2ram:
> > >
> > >       cpufreq_dt: cpufreq_init:114
> > >       cpu cpu0: cpufreq_init:124: clk_get() returned z
> > >       WARNING: CPU: 1 PID: 14 at drivers/i2c/i2c-core.h:54
> > > __i2c_transfer+0x2d8/0x310
> > >       i2c i2c-6: Transfer while suspended
> > >       cpu cpu0: cpufreq_init:142: dev_pm_opp_of_cpumask_add_table() returned 0
> > >       cpu cpu0: cpufreq_init:151: dev_pm_opp_get_opp_count() returned 0
> > >       cpu cpu0: OPP table can't be empty
> > >       CPU1 is up
> > >
> > > I hope this helps.
> >
> > Unfortunately it raised more questions than what it answered :(
> 
> Before, it bailed out before talking to the regulator during s2ram,
> After, it talks to the regulator before bailing out, triggering the WARN().

It wasn't working before and it isn't working now. Though I do see a
problem with cpufreq core where it tries suspend/resume even though
->init() failed for all CPUs earlier. I will fix that separately.

I think someone needs to see why it wasn't working earlier and then we
can see if we have pending issues.
Geert Uytterhoeven Oct. 27, 2020, 4:29 p.m. UTC | #8
Hi Viresh,

On Fri, Oct 16, 2020 at 10:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 16-10-20, 08:44, Geert Uytterhoeven wrote:

> > On Fri, Oct 16, 2020 at 7:03 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > > On 14-10-20, 18:40, Geert Uytterhoeven wrote:

> > > > On this platform (r8a7791-koelsch.dts), there is no opp table in DT.

> >

> > I think you missed the clue above:

>

> I read it earlier as well.

>

> > this DTS does not have an opp-table

> > with operating-points-v2, but cpu0 does have the operating-points (v1)

> > property (note the latter is something I missed before).

>

> This is different than having no OPP table in DT.

>

> > > >

> > > >   Before:

> > >

> > > I assume this means before this patchset came in..

> >

> > Indeed.

> >

> > > >     boot:

> > > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362

> > > >       cpu cpu0: resources_available:95

> > > >       cpu cpu0: resources_available:102: clk_get() returned z

> > > >       cpu cpu0: resources_available:120:

> > > > dev_pm_opp_of_find_icc_paths() returned 0

> > > >       cpu cpu0: resources_available:125: find_supply_name() returned cpu0

> > > >       cpu cpu0: resources_available:132: regulator_get_optional()

> > > > returned -EPROBE_DEFER

> > > >       cpu cpu0: cpu0 regulator not ready, retry

> > > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:

> > > > resources_available() returned -517

> > >

> > > we deferred probe once.

> > >

> > > >       ...

> > > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:362

> > > >       cpu cpu0: resources_available:95

> > > >       cpu cpu0: resources_available:102: clk_get() returned z

> > > >       cpu cpu0: resources_available:120:

> > > > dev_pm_opp_of_find_icc_paths() returned 0

> > > >       cpu cpu0: resources_available:125: find_supply_name() returned cpu0

> > > >       cpu cpu0: resources_available:132: regulator_get_optional()

> > > > returned (ptrval)

> > >

> > > found regulator next time.

> > >

> > > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:371:

> > > > resources_available() returned 0

> > > >       cpufreq-dt cpufreq-dt: dt_cpufreq_probe:375

> > > >       cpufreq_dt: cpufreq_init:162

> > > >       cpu cpu0: cpufreq_init:170: clk_get() returned z

> > > >       cpu cpu0: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2

> > > >       cpu cpu0: cpufreq_init:198: find_supply_name() returned cpu0

> > > >       <i2c comm>

> > > >       cpu cpu0: cpufreq_init:201: dev_pm_opp_set_regulators() returned (ptrval)

> > > >       <i2c comm>

> > > >       cpu cpu0: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0

> > > >       cpu cpu0: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0

> > > >       cpu cpu0: OPP table is not ready, deferring probe

> > >

> > > This failed, as we couldn't have deferred probe from cpufreq_init.

> > > Which means that cpufreq didn't work here.

> >

> > No opp-table in DT.

>

> V1 is also an OPP table.


OK.

> > Shouldn't it use operating-points v1 instead?

>

> Both v1 and v2 are considered as OPP tables. When we say that the

> opp-count is 0, it means that it failed to find any of them.


Interestingly, first the v1 table is found, later opp-count is 0 (see below).

> > > >       cpufreq_dt: cpufreq_init:162

> > > >       cpu cpu1: cpufreq_init:170: clk_get() returned z

> > > >       cpu cpu1: cpufreq_init:179: dev_pm_opp_of_get_sharing_cpus() returned -2

> > > >       cpu cpu1: no regulator for cpu1

> > > >       cpu cpu1: cpufreq_init:198: find_supply_name() returned (null)

> > > >       cpu cpu1: cpufreq_init:230: dev_pm_opp_of_cpumask_add_table() returned 0

> > > >       cpu cpu1: cpufreq_init:239: dev_pm_opp_get_opp_count() returned 0

> > > >       cpu cpu1: OPP table is not ready, deferring probe

> > >

> > > Same for CPU1.

> >

> > Note that only CPU0 has operating-points v1.

>

> Both should have it ideally, though it works if CPU0 gets probed

> first. But if CPU0 is hotplugged out and we try to probe CPU1, then it

> will fail.

>

> The fact that cpufreq core tried to probe CPU1 means that it failed

> for CPU0. And this is before the patchset in question came in.

>

> I don't think cpufreq was working earlier for your platform, please

> check why.


[...]

> I think someone needs to see why it wasn't working earlier and then we

> can see if we have pending issues.


On plain v5.9, with #define DEBUG and a few extra debug prints
added, I get:

    cpufreq_dt: cpufreq_init:164: policy->cpu = 0
    cpufreq_dt: cpufreq_init:165: policy->cpus = 0
    cpufreq_dt: cpufreq_init:166: policy->related_cpus =
    cpufreq_dt: cpufreq_init:167: policy->real_cpus =
    cpu cpu0: dev_pm_opp_of_get_sharing_cpus: Couldn't find opp node.
    of: dev_pm_opp_of_cpumask_add_table:1049
    of: dev_pm_opp_of_cpumask_add_table:1054: cpu 0
    cpu cpu0: dev_pm_opp_of_add_table:954
    cpu cpu0: dev_pm_opp_of_add_table:956:
dev_pm_opp_get_opp_table_indexed() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:891
    cpu cpu0: _of_add_opp_table_v1:893: _find_opp_table() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:909: 6 entries
    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
    cpu cpu0: Couldn't find proper 'dynamic-power-coefficient' in DT
    cpu cpu0: Couldn't register Energy Model -22

This happens quite late in the boot sequence, long after cpu1 has been
brought online.
So it finds the v1 opp table for cpu0, which has 6 entries.
The last two messages should be harmless, right?
So you say cpufreq is not working? How can I verify that?

Note that it never tries to do anything for cpu1.
Note that during s2ram, nothing related is printed.

On v5.10, with similar debug code, things are different. During boot:

    cpu cpu0: dev_pm_opp_of_get_sharing_cpus: Couldn't find opp node.
    cpufreq_dt: cpufreq_init:112: policy->cpu = 0
    cpufreq_dt: cpufreq_init:113: policy->cpus = 0
    cpufreq_dt: cpufreq_init:114: policy->related_cpus =
    cpufreq_dt: cpufreq_init:115: policy->real_cpus =
    of: dev_pm_opp_of_cpumask_add_table:1075
    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 0
    cpu cpu0: dev_pm_opp_of_add_table:980
    cpu cpu0: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:914
    cpu cpu0: _of_add_opp_table_v1:916: _find_opp_table() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:937: 6 entries

Good, found the table with 6 entries.

    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 1
    cpu cpu1: dev_pm_opp_of_add_table:980
    cpu cpu1: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned (ptrval)
    cpu cpu1: _of_add_opp_table_v1:914
    cpu cpu1: _of_add_opp_table_v1:916: _find_opp_table() returned (ptrval)
    cpu cpu1: _of_add_opp_table_v1:937: 6 entries

Oh, this time it checked cpu1, too (why?), and found 6 entries, good.

    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 0
    cpu cpu0: OPP table can't be empty

Wait, _get_opp_count() returns 0?

    cpufreq_dt: cpufreq_init:112: policy->cpu = 1
    cpufreq_dt: cpufreq_init:113: policy->cpus = 1
    cpufreq_dt: cpufreq_init:114: policy->related_cpus =
    cpufreq_dt: cpufreq_init:115: policy->real_cpus =

Oh, this time cpufreq_init() is called for cpu1, too.

    of: dev_pm_opp_of_cpumask_add_table:1075
    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 0
    cpu cpu0: dev_pm_opp_of_add_table:980
    cpu cpu0: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:914
    cpu cpu0: _of_add_opp_table_v1:916: _find_opp_table() returned (ptrval)
    cpu cpu0: _of_add_opp_table_v1:937: 6 entries

Checking cpu0 again (why?), 6 entries.

    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 1
    cpu cpu1: dev_pm_opp_of_add_table:980
    cpu cpu1: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned (ptrval)
    cpu cpu1: _of_add_opp_table_v1:914
    cpu cpu1: _of_add_opp_table_v1:916: _find_opp_table() returned (ptrval)
    cpu cpu1: _of_add_opp_table_v1:937: 6 entries

Checking cpu1 again, 6 entries.

    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 0
    cpu cpu0: OPP table can't be empty

Huh, cpu0 again, not cpu1? And _get_opp_count() returns 0?

During s2ram, v5.10-rc1, it redoes most of the above, incl. touching the
PMIC, which it shouldn't due in this phase of system resume:

    Disabling non-boot CPUs ...
    Enabling non-boot CPUs ...
    cpufreq_dt: cpufreq_init:112: policy->cpu = 1
    cpufreq_dt: cpufreq_init:113: policy->cpus = 1
    cpufreq_dt: cpufreq_init:114: policy->related_cpus =
    cpufreq_dt: cpufreq_init:115: policy->real_cpus =
    of: dev_pm_opp_of_cpumask_add_table:1075
    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 0
    cpu cpu0: dev_pm_opp_of_add_table:980
    cpu cpu0: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned f680980b
    cpu cpu0: _of_add_opp_table_v1:914
    cpu cpu0: _of_add_opp_table_v1:916: _find_opp_table() returned a4afd426
    cpu cpu0: _of_add_opp_table_v1:937: 6 entries
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out

The i2c controller is suspended, this could go boom...

    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    of: dev_pm_opp_of_cpumask_add_table:1080: cpu 1
    cpu cpu1: dev_pm_opp_of_add_table:980
    cpu cpu1: dev_pm_opp_of_add_table:982:
dev_pm_opp_get_opp_table_indexed() returned f680980b
    cpu cpu1: _of_add_opp_table_v1:914
    cpu cpu1: _of_add_opp_table_v1:916: _find_opp_table() returned 9087c76d
    cpu cpu1: _of_add_opp_table_v1:937: 6 entries
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    i2c-sh_mobile e60b0000.i2c: Transfer request timed out
    cpu cpu0: dev_pm_opp_get_opp_count:331
    cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned f680980b
    cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 0
    cpu cpu0: OPP table can't be empty
    CPU1 is up

Thanks for your comments!

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Geert Uytterhoeven Oct. 28, 2020, 9:49 a.m. UTC | #9
Hi Viresh,

On Wed, Oct 28, 2020 at 6:48 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 27-10-20, 17:29, Geert Uytterhoeven wrote:
> > On plain v5.9, with #define DEBUG and a few extra debug prints
> > added, I get:
> >
> >     cpufreq_dt: cpufreq_init:164: policy->cpu = 0
> >     cpufreq_dt: cpufreq_init:165: policy->cpus = 0
> >     cpufreq_dt: cpufreq_init:166: policy->related_cpus =
> >     cpufreq_dt: cpufreq_init:167: policy->real_cpus =
> >     cpu cpu0: dev_pm_opp_of_get_sharing_cpus: Couldn't find opp node.
> >     of: dev_pm_opp_of_cpumask_add_table:1049
> >     of: dev_pm_opp_of_cpumask_add_table:1054: cpu 0
> >     cpu cpu0: dev_pm_opp_of_add_table:954
> >     cpu cpu0: dev_pm_opp_of_add_table:956:
> > dev_pm_opp_get_opp_table_indexed() returned (ptrval)
> >     cpu cpu0: _of_add_opp_table_v1:891
> >     cpu cpu0: _of_add_opp_table_v1:893: _find_opp_table() returned (ptrval)
> >     cpu cpu0: _of_add_opp_table_v1:909: 6 entries
> >     cpu cpu0: dev_pm_opp_get_opp_count:331
> >     cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
> >     cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
> >     cpu cpu0: dev_pm_opp_get_opp_count:331
> >     cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
> >     cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
> >     cpu cpu0: dev_pm_opp_get_opp_count:331
> >     cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
> >     cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 6
> >     cpu cpu0: Couldn't find proper 'dynamic-power-coefficient' in DT
> >     cpu cpu0: Couldn't register Energy Model -22
> >
> > This happens quite late in the boot sequence, long after cpu1 has been
> > brought online.
> > So it finds the v1 opp table for cpu0, which has 6 entries.
> > The last two messages should be harmless, right?
>
> Yes.
>
> > So you say cpufreq is not working? How can I verify that?
>
> I said it because your earlier logs showed that we defered probed
> again or the count was 0 and we failed. Something like that.
>
> Give output of this to verify if cpufreq is working or not:
>
> grep . /sys/devices/system/cpu/cpufreq/policy*/*
>
> This will be empty if there is no cpufreq.

/sys/devices/system/cpu/cpufreq/policy0/affected_cpus:0 1
/sys/devices/system/cpu/cpufreq/policy0/cpuinfo_cur_freq:375000
/sys/devices/system/cpu/cpufreq/policy0/cpuinfo_max_freq:1500000
/sys/devices/system/cpu/cpufreq/policy0/cpuinfo_min_freq:375000
/sys/devices/system/cpu/cpufreq/policy0/cpuinfo_transition_latency:300000
/sys/devices/system/cpu/cpufreq/policy0/related_cpus:0 1
/sys/devices/system/cpu/cpufreq/policy0/scaling_available_frequencies:375000
750000 937500 1125000 1312500 1500000
/sys/devices/system/cpu/cpufreq/policy0/scaling_available_governors:conservative
ondemand userspace powersave performance schedutil
/sys/devices/system/cpu/cpufreq/policy0/scaling_cur_freq:375000
/sys/devices/system/cpu/cpufreq/policy0/scaling_driver:cpufreq-dt
/sys/devices/system/cpu/cpufreq/policy0/scaling_governor:schedutil
/sys/devices/system/cpu/cpufreq/policy0/scaling_max_freq:1500000
/sys/devices/system/cpu/cpufreq/policy0/scaling_min_freq:375000
/sys/devices/system/cpu/cpufreq/policy0/scaling_setspeed:<unsupported>

So it works in v5.9, but not in v5.10-rc1.

Bisection says it was broken by commit 90d46d71cce279d8 ("opp: Handle
multiple calls for same OPP table in _of_add_opp_table_v1()").

> >     cpu cpu0: dev_pm_opp_get_opp_count:331
> >     cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned (ptrval)
> >     cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 0
> >     cpu cpu0: OPP table can't be empty
> >
> > Wait, _get_opp_count() returns 0?
>
> Does this fix it for you as well ?
>
> https://lore.kernel.org/lkml/2c73ab54717ef358b118ea0cfb727b1427e7730a.1602648719.git.viresh.kumar@linaro.org/

Thanks, it does. I had arrived at the same conclusion after bisection.

> I didn't point you to this earlier as your logs said something else.

All my logs said _get_opp_count() returns 0.

> > During s2ram, v5.10-rc1, it redoes most of the above, incl. touching the
> > PMIC, which it shouldn't due in this phase of system resume:
> >
> >     Disabling non-boot CPUs ...
> >     Enabling non-boot CPUs ...
> >     cpufreq_dt: cpufreq_init:112: policy->cpu = 1
> >     cpufreq_dt: cpufreq_init:113: policy->cpus = 1
> >     cpufreq_dt: cpufreq_init:114: policy->related_cpus =
> >     cpufreq_dt: cpufreq_init:115: policy->real_cpus =
> >     of: dev_pm_opp_of_cpumask_add_table:1075
> >     of: dev_pm_opp_of_cpumask_add_table:1080: cpu 0
> >     cpu cpu0: dev_pm_opp_of_add_table:980
> >     cpu cpu0: dev_pm_opp_of_add_table:982:
> > dev_pm_opp_get_opp_table_indexed() returned f680980b
> >     cpu cpu0: _of_add_opp_table_v1:914
> >     cpu cpu0: _of_add_opp_table_v1:916: _find_opp_table() returned a4afd426
> >     cpu cpu0: _of_add_opp_table_v1:937: 6 entries
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >
> > The i2c controller is suspended, this could go boom...
> >
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     of: dev_pm_opp_of_cpumask_add_table:1080: cpu 1
> >     cpu cpu1: dev_pm_opp_of_add_table:980
> >     cpu cpu1: dev_pm_opp_of_add_table:982:
> > dev_pm_opp_get_opp_table_indexed() returned f680980b
> >     cpu cpu1: _of_add_opp_table_v1:914
> >     cpu cpu1: _of_add_opp_table_v1:916: _find_opp_table() returned 9087c76d
> >     cpu cpu1: _of_add_opp_table_v1:937: 6 entries
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     i2c-sh_mobile e60b0000.i2c: Transfer request timed out
> >     cpu cpu0: dev_pm_opp_get_opp_count:331
> >     cpu cpu0: dev_pm_opp_get_opp_count:333: _find_opp_table() returned f680980b
> >     cpu cpu0: dev_pm_opp_get_opp_count:342: _get_opp_count() returned 0
> >     cpu cpu0: OPP table can't be empty
> >     CPU1 is up
>
> Lets make the normal boot work first and see about this later.

This is also fixed by your patch: the PMIC is no longer accessed while
suspended.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
diff mbox series

Patch

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 944d7b45afe9..d29cd93045fa 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -13,6 +13,7 @@ 
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pm_opp.h>
@@ -24,18 +25,35 @@ 
 #include "cpufreq-dt.h"
 
 struct private_data {
-	struct opp_table *opp_table;
+	struct list_head node;
+
+	cpumask_var_t cpus;
 	struct device *cpu_dev;
-	const char *reg_name;
+	struct opp_table *opp_table;
+	struct opp_table *reg_opp_table;
 	bool have_static_opps;
 };
 
+static LIST_HEAD(priv_list);
+
 static struct freq_attr *cpufreq_dt_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
 	NULL,   /* Extra space for boost-attr if required */
 	NULL,
 };
 
+static struct private_data *cpufreq_dt_find_data(int cpu)
+{
+	struct private_data *priv;
+
+	list_for_each_entry(priv, &priv_list, node) {
+		if (cpumask_test_cpu(cpu, priv->cpus))
+			return priv;
+	}
+
+	return NULL;
+}
+
 static int set_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct private_data *priv = policy->driver_data;
@@ -90,83 +108,24 @@  static const char *find_supply_name(struct device *dev)
 	return name;
 }
 
-static int resources_available(void)
-{
-	struct device *cpu_dev;
-	struct regulator *cpu_reg;
-	struct clk *cpu_clk;
-	int ret = 0;
-	const char *name;
-
-	cpu_dev = get_cpu_device(0);
-	if (!cpu_dev) {
-		pr_err("failed to get cpu0 device\n");
-		return -ENODEV;
-	}
-
-	cpu_clk = clk_get(cpu_dev, NULL);
-	ret = PTR_ERR_OR_ZERO(cpu_clk);
-	if (ret) {
-		/*
-		 * If cpu's clk node is present, but clock is not yet
-		 * registered, we should try defering probe.
-		 */
-		if (ret == -EPROBE_DEFER)
-			dev_dbg(cpu_dev, "clock not ready, retry\n");
-		else
-			dev_err(cpu_dev, "failed to get clock: %d\n", ret);
-
-		return ret;
-	}
-
-	clk_put(cpu_clk);
-
-	ret = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
-	if (ret)
-		return ret;
-
-	name = find_supply_name(cpu_dev);
-	/* Platform doesn't require regulator */
-	if (!name)
-		return 0;
-
-	cpu_reg = regulator_get_optional(cpu_dev, name);
-	ret = PTR_ERR_OR_ZERO(cpu_reg);
-	if (ret) {
-		/*
-		 * If cpu's regulator supply node is present, but regulator is
-		 * not yet registered, we should try defering probe.
-		 */
-		if (ret == -EPROBE_DEFER)
-			dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n");
-		else
-			dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret);
-
-		return ret;
-	}
-
-	regulator_put(cpu_reg);
-	return 0;
-}
-
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct cpufreq_frequency_table *freq_table;
-	struct opp_table *opp_table = NULL;
 	struct private_data *priv;
 	struct device *cpu_dev;
 	struct clk *cpu_clk;
 	unsigned int transition_latency;
-	bool fallback = false;
-	const char *name;
 	int ret;
 
-	cpu_dev = get_cpu_device(policy->cpu);
-	if (!cpu_dev) {
-		pr_err("failed to get cpu%d device\n", policy->cpu);
+	priv = cpufreq_dt_find_data(policy->cpu);
+	if (!priv) {
+		pr_err("failed to find data for cpu%d\n", policy->cpu);
 		return -ENODEV;
 	}
 
+	cpu_dev = priv->cpu_dev;
+	cpumask_copy(policy->cpus, priv->cpus);
+
 	cpu_clk = clk_get(cpu_dev, NULL);
 	if (IS_ERR(cpu_clk)) {
 		ret = PTR_ERR(cpu_clk);
@@ -174,45 +133,6 @@  static int cpufreq_init(struct cpufreq_policy *policy)
 		return ret;
 	}
 
-	/* Get OPP-sharing information from "operating-points-v2" bindings */
-	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
-	if (ret) {
-		if (ret != -ENOENT)
-			goto out_put_clk;
-
-		/*
-		 * operating-points-v2 not supported, fallback to old method of
-		 * finding shared-OPPs for backward compatibility if the
-		 * platform hasn't set sharing CPUs.
-		 */
-		if (dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus))
-			fallback = true;
-	}
-
-	/*
-	 * OPP layer will be taking care of regulators now, but it needs to know
-	 * the name of the regulator first.
-	 */
-	name = find_supply_name(cpu_dev);
-	if (name) {
-		opp_table = dev_pm_opp_set_regulators(cpu_dev, &name, 1);
-		if (IS_ERR(opp_table)) {
-			ret = PTR_ERR(opp_table);
-			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
-				policy->cpu, ret);
-			goto out_put_clk;
-		}
-	}
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out_put_regulator;
-	}
-
-	priv->reg_name = name;
-	priv->opp_table = opp_table;
-
 	/*
 	 * Initialize OPP tables for all policy->cpus. They will be shared by
 	 * all CPUs which have marked their CPUs shared with OPP bindings.
@@ -232,31 +152,17 @@  static int cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	ret = dev_pm_opp_get_opp_count(cpu_dev);
 	if (ret <= 0) {
-		dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
-		ret = -EPROBE_DEFER;
+		dev_err(cpu_dev, "OPP table can't be empty\n");
+		ret = -ENODEV;
 		goto out_free_opp;
 	}
 
-	if (fallback) {
-		cpumask_setall(policy->cpus);
-
-		/*
-		 * OPP tables are initialized only for policy->cpu, do it for
-		 * others as well.
-		 */
-		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
-		if (ret)
-			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
-				__func__, ret);
-	}
-
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
 		goto out_free_opp;
 	}
 
-	priv->cpu_dev = cpu_dev;
 	policy->driver_data = priv;
 	policy->clk = cpu_clk;
 	policy->freq_table = freq_table;
@@ -288,11 +194,6 @@  static int cpufreq_init(struct cpufreq_policy *policy)
 out_free_opp:
 	if (priv->have_static_opps)
 		dev_pm_opp_of_cpumask_remove_table(policy->cpus);
-	kfree(priv);
-out_put_regulator:
-	if (name)
-		dev_pm_opp_put_regulators(opp_table);
-out_put_clk:
 	clk_put(cpu_clk);
 
 	return ret;
@@ -320,12 +221,7 @@  static int cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	if (priv->have_static_opps)
 		dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
-	if (priv->reg_name)
-		dev_pm_opp_put_regulators(priv->opp_table);
-
 	clk_put(policy->clk);
-	kfree(priv);
-
 	return 0;
 }
 
@@ -344,21 +240,119 @@  static struct cpufreq_driver dt_cpufreq_driver = {
 	.suspend = cpufreq_generic_suspend,
 };
 
-static int dt_cpufreq_probe(struct platform_device *pdev)
+static int dt_cpufreq_early_init(struct device *dev, int cpu)
 {
-	struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
+	struct private_data *priv;
+	struct device *cpu_dev;
+	const char *reg_name;
 	int ret;
 
+	/* Check if this CPU is already covered by some other policy */
+	if (cpufreq_dt_find_data(cpu))
+		return 0;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev)
+		return -EPROBE_DEFER;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL))
+		return -ENOMEM;
+
+	priv->cpu_dev = cpu_dev;
+
+	/* Try to get OPP table early to ensure resources are available */
+	priv->opp_table = dev_pm_opp_get_opp_table(cpu_dev);
+	if (IS_ERR(priv->opp_table)) {
+		ret = PTR_ERR(priv->opp_table);
+		if (ret != -EPROBE_DEFER)
+			dev_err(cpu_dev, "failed to get OPP table: %d\n", ret);
+		goto free_cpumask;
+	}
+
 	/*
-	 * All per-cluster (CPUs sharing clock/voltages) initialization is done
-	 * from ->init(). In probe(), we just need to make sure that clk and
-	 * regulators are available. Else defer probe and retry.
-	 *
-	 * FIXME: Is checking this only for CPU0 sufficient ?
+	 * OPP layer will be taking care of regulators now, but it needs to know
+	 * the name of the regulator first.
 	 */
-	ret = resources_available();
-	if (ret)
-		return ret;
+	reg_name = find_supply_name(cpu_dev);
+	if (reg_name) {
+		priv->reg_opp_table = dev_pm_opp_set_regulators(cpu_dev,
+								&reg_name, 1);
+		if (IS_ERR(priv->reg_opp_table)) {
+			ret = PTR_ERR(priv->reg_opp_table);
+			if (ret != -EPROBE_DEFER)
+				dev_err(cpu_dev, "failed to set regulators: %d\n",
+					ret);
+			goto put_table;
+		}
+	}
+
+	/* Find OPP sharing information so we can fill pri->cpus here */
+	/* Get OPP-sharing information from "operating-points-v2" bindings */
+	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->cpus);
+	if (ret) {
+		if (ret != -ENOENT)
+			goto put_reg;
+
+		/*
+		 * operating-points-v2 not supported, fallback to all CPUs share
+		 * OPP for backward compatibility if the platform hasn't set
+		 * sharing CPUs.
+		 */
+		if (dev_pm_opp_get_sharing_cpus(cpu_dev, priv->cpus)) {
+			cpumask_setall(priv->cpus);
+
+			/*
+			 * OPP tables are initialized only for cpu, do it for
+			 * others as well.
+			 */
+			ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->cpus);
+			if (ret)
+				dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+					__func__, ret);
+		}
+	}
+
+	list_add(&priv->node, &priv_list);
+	return 0;
+
+put_reg:
+	if (priv->reg_opp_table)
+		dev_pm_opp_put_regulators(priv->reg_opp_table);
+put_table:
+	dev_pm_opp_put_opp_table(priv->opp_table);
+free_cpumask:
+	free_cpumask_var(priv->cpus);
+	return ret;
+}
+
+static void dt_cpufreq_release(void)
+{
+	struct private_data *priv, *tmp;
+
+	list_for_each_entry_safe(priv, tmp, &priv_list, node) {
+		if (priv->reg_opp_table)
+			dev_pm_opp_put_regulators(priv->reg_opp_table);
+		dev_pm_opp_put_opp_table(priv->opp_table);
+		free_cpumask_var(priv->cpus);
+		list_del(&priv->node);
+	}
+}
+
+static int dt_cpufreq_probe(struct platform_device *pdev)
+{
+	struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
+	int ret, cpu;
+
+	/* Request resources early so we can return in case of -EPROBE_DEFER */
+	for_each_possible_cpu(cpu) {
+		ret = dt_cpufreq_early_init(&pdev->dev, cpu);
+		if (ret)
+			goto err;
+	};
 
 	if (data) {
 		if (data->have_governor_per_policy)
@@ -374,15 +368,21 @@  static int dt_cpufreq_probe(struct platform_device *pdev)
 	}
 
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
-	if (ret)
+	if (ret) {
 		dev_err(&pdev->dev, "failed register driver: %d\n", ret);
+		goto err;
+	}
 
+	return 0;
+err:
+	dt_cpufreq_release();
 	return ret;
 }
 
 static int dt_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&dt_cpufreq_driver);
+	dt_cpufreq_release();
 	return 0;
 }