[v1,11/30] drm/tegra: dc: Support OPP and SoC core voltage scaling

Message ID 20201104234427.26477-12-digetx@gmail.com
State New
Headers show
Series
  • Introduce core voltage scaling for NVIDIA Tegra20/30 SoCs
Related show

Commit Message

Dmitry Osipenko Nov. 4, 2020, 11:44 p.m.
Add OPP and SoC core voltage scaling support to the display controller
driver. This is required for enabling system-wide DVFS on older Tegra
SoCs.

Tested-by: Peter Geis <pgwipeout@gmail.com>
Tested-by: Nicolas Chauvet <kwizart@gmail.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/gpu/drm/tegra/Kconfig |   1 +
 drivers/gpu/drm/tegra/dc.c    | 138 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/tegra/dc.h    |   5 ++
 3 files changed, 143 insertions(+), 1 deletion(-)

Comments

Thierry Reding Nov. 10, 2020, 8:29 p.m. | #1
On Thu, Nov 05, 2020 at 02:44:08AM +0300, Dmitry Osipenko wrote:
> Add OPP and SoC core voltage scaling support to the display controller
> driver. This is required for enabling system-wide DVFS on older Tegra
> SoCs.
> 
> Tested-by: Peter Geis <pgwipeout@gmail.com>
> Tested-by: Nicolas Chauvet <kwizart@gmail.com>
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/gpu/drm/tegra/Kconfig |   1 +
>  drivers/gpu/drm/tegra/dc.c    | 138 +++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/tegra/dc.h    |   5 ++
>  3 files changed, 143 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
> index 1650a448eabd..9eec4c3fbd3b 100644
> --- a/drivers/gpu/drm/tegra/Kconfig
> +++ b/drivers/gpu/drm/tegra/Kconfig
> @@ -12,6 +12,7 @@ config DRM_TEGRA
>  	select INTERCONNECT
>  	select IOMMU_IOVA
>  	select CEC_CORE if CEC_NOTIFIER
> +	select PM_OPP
>  	help
>  	  Choose this option if you have an NVIDIA Tegra SoC.
>  
> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
> index fd7c8828652d..babcb66a335b 100644
> --- a/drivers/gpu/drm/tegra/dc.c
> +++ b/drivers/gpu/drm/tegra/dc.c
> @@ -11,9 +11,13 @@
>  #include <linux/interconnect.h>
>  #include <linux/module.h>
>  #include <linux/of_device.h>
> +#include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/regulator/consumer.h>
>  #include <linux/reset.h>
>  
> +#include <soc/tegra/common.h>
> +#include <soc/tegra/fuse.h>
>  #include <soc/tegra/pmc.h>
>  
>  #include <drm/drm_atomic.h>
> @@ -1699,6 +1703,55 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
>  	return 0;
>  }
>  
> +static void tegra_dc_update_voltage_state(struct tegra_dc *dc,
> +					  struct tegra_dc_state *state)
> +{
> +	struct dev_pm_opp *opp;
> +	unsigned long rate;
> +	int err, min_uV;
> +
> +	/* OPP usage is optional */
> +	if (!dc->opp_table)
> +		return;
> +
> +	/* calculate actual pixel clock rate which depends on internal divider */
> +	rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2);
> +
> +	/* find suitable OPP for the rate */
> +	opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate);
> +
> +	if (opp == ERR_PTR(-ERANGE))
> +		opp = dev_pm_opp_find_freq_floor(dc->dev, &rate);
> +
> +	if (IS_ERR(opp)) {
> +		dev_err(dc->dev, "failed to find OPP for %lu Hz: %ld\n",
> +			rate, PTR_ERR(opp));
> +		return;
> +	}
> +
> +	min_uV = dev_pm_opp_get_voltage(opp);
> +	dev_pm_opp_put(opp);
> +
> +	/*
> +	 * Voltage scaling is optional and trying to set voltage for a dummy
> +	 * regulator will error out.
> +	 */
> +	if (!device_property_present(dc->dev, "core-supply"))
> +		return;

This is a potentially heavy operation, so I think we should avoid that
here. How about you use devm_regulator_get_optional() in ->probe()? That
returns -ENODEV if no regulator was specified, in which case you can set
dc->core_reg = NULL and use that as the condition here.

> +
> +	/*
> +	 * Note that the minimum core voltage depends on the pixel clock
> +	 * rate (which depends on internal clock divider of CRTC) and not on
> +	 * the rate of the display controller clock. This is why we're not
> +	 * using dev_pm_opp_set_rate() API and instead are managing the
> +	 * voltage by ourselves.
> +	 */
> +	err = regulator_set_voltage(dc->core_reg, min_uV, INT_MAX);
> +	if (err)
> +		dev_err(dc->dev, "failed to set CORE voltage to %duV: %d\n",
> +			min_uV, err);
> +}

Also, I'd prefer if the flow here was more linear, such as:

	if (dc->core_reg) {
		err = regulator_set_voltage(...);
		...
	}

> +
>  static void tegra_dc_commit_state(struct tegra_dc *dc,
>  				  struct tegra_dc_state *state)
>  {
> @@ -1738,6 +1791,8 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
>  	if (err < 0)
>  		dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
>  			dc->clk, state->pclk, err);
> +
> +	tegra_dc_update_voltage_state(dc, state);
>  }
>  
>  static void tegra_dc_stop(struct tegra_dc *dc)
> @@ -2521,6 +2576,7 @@ static int tegra_dc_runtime_suspend(struct host1x_client *client)
>  
>  	clk_disable_unprepare(dc->clk);
>  	pm_runtime_put_sync(dev);
> +	regulator_disable(dc->core_reg);
>  
>  	return 0;
>  }
> @@ -2531,10 +2587,16 @@ static int tegra_dc_runtime_resume(struct host1x_client *client)
>  	struct device *dev = client->dev;
>  	int err;
>  
> +	err = regulator_enable(dc->core_reg);
> +	if (err < 0) {
> +		dev_err(dev, "failed to enable CORE regulator: %d\n", err);
> +		return err;
> +	}
> +
>  	err = pm_runtime_get_sync(dev);
>  	if (err < 0) {
>  		dev_err(dev, "failed to get runtime PM: %d\n", err);
> -		return err;
> +		goto disable_regulator;
>  	}
>  
>  	if (dc->soc->has_powergate) {
> @@ -2564,6 +2626,9 @@ static int tegra_dc_runtime_resume(struct host1x_client *client)
>  	clk_disable_unprepare(dc->clk);
>  put_rpm:
>  	pm_runtime_put_sync(dev);
> +disable_regulator:
> +	regulator_disable(dc->core_reg);
> +
>  	return err;
>  }
>  
> @@ -2879,6 +2944,72 @@ static int tegra_dc_couple(struct tegra_dc *dc)
>  	return 0;
>  }
>  
> +static void tegra_dc_deinit_opp_table(void *data)
> +{
> +	struct tegra_dc *dc = data;
> +
> +	dev_pm_opp_of_remove_table(dc->dev);
> +	dev_pm_opp_put_supported_hw(dc->opp_table);
> +	dev_pm_opp_put_regulators(dc->opp_table);
> +}
> +
> +static int devm_tegra_dc_opp_table_init(struct tegra_dc *dc)
> +{
> +	struct opp_table *hw_opp_table;
> +	u32 hw_version;
> +	int err;
> +
> +	/* voltage scaling is optional */
> +	dc->core_reg = devm_regulator_get(dc->dev, "core");
> +	if (IS_ERR(dc->core_reg))
> +		return dev_err_probe(dc->dev, PTR_ERR(dc->core_reg),
> +				     "failed to get CORE regulator\n");
> +
> +	/* legacy device-trees don't have OPP table */
> +	if (!device_property_present(dc->dev, "operating-points-v2"))
> +		return 0;

"Legacy" is a bit confusing here. For one, no device trees currently
have these tables and secondly, for newer SoCs we may never need them.

> +
> +	dc->opp_table = dev_pm_opp_get_opp_table(dc->dev);
> +	if (IS_ERR(dc->opp_table))
> +		return dev_err_probe(dc->dev, PTR_ERR(dc->opp_table),
> +				     "failed to prepare OPP table\n");
> +
> +	if (of_machine_is_compatible("nvidia,tegra20"))
> +		hw_version = BIT(tegra_sku_info.soc_process_id);
> +	else
> +		hw_version = BIT(tegra_sku_info.soc_speedo_id);
> +
> +	hw_opp_table = dev_pm_opp_set_supported_hw(dc->dev, &hw_version, 1);
> +	err = PTR_ERR_OR_ZERO(hw_opp_table);

What's the point of this? A more canonical version would be:

	if (IS_ERR(hw_opp_table)) {
		err = PTR_ERR(hw_opp_table);
		dev_err(dc->dev, ...);
		goto put_table;
	}

That uses the same number of lines but is much easier to read, in my
opinion, because it is the canonical form.

> +	if (err) {
> +		dev_err(dc->dev, "failed to set supported HW: %d\n", err);
> +		goto put_table;
> +	}
> +
> +	err = dev_pm_opp_of_add_table(dc->dev);
> +	if (err) {
> +		dev_err(dc->dev, "failed to add OPP table: %d\n", err);
> +		goto put_hw;
> +	}
> +
> +	err = devm_add_action(dc->dev, tegra_dc_deinit_opp_table, dc);
> +	if (err)
> +		goto remove_table;

Do these functions return positive values? If not, I'd prefer if this
check was more explicit (i.e. err < 0) for consistency with the rest of
this code.

> +
> +	dev_info(dc->dev, "OPP HW ver. 0x%x\n", hw_version);
> +
> +	return 0;
> +
> +remove_table:
> +	dev_pm_opp_of_remove_table(dc->dev);
> +put_hw:
> +	dev_pm_opp_put_supported_hw(dc->opp_table);
> +put_table:
> +	dev_pm_opp_put_opp_table(dc->opp_table);
> +
> +	return err;
> +}
> +
>  static int tegra_dc_probe(struct platform_device *pdev)
>  {
>  	struct tegra_dc *dc;
> @@ -2937,6 +3068,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
>  		tegra_powergate_power_off(dc->powergate);
>  	}
>  
> +	err = devm_tegra_dc_opp_table_init(dc);
> +	if (err < 0)
> +		return err;
> +
>  	dc->regs = devm_platform_ioremap_resource(pdev, 0);
>  	if (IS_ERR(dc->regs))
>  		return PTR_ERR(dc->regs);
> @@ -3007,6 +3142,7 @@ struct platform_driver tegra_dc_driver = {
>  	.driver = {
>  		.name = "tegra-dc",
>  		.of_match_table = tegra_dc_of_match,
> +		.sync_state = tegra_soc_device_sync_state,
>  	},
>  	.probe = tegra_dc_probe,
>  	.remove = tegra_dc_remove,
> diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
> index ba4ed35139fb..fd774fc5c2e4 100644
> --- a/drivers/gpu/drm/tegra/dc.h
> +++ b/drivers/gpu/drm/tegra/dc.h
> @@ -13,6 +13,8 @@
>  
>  #include "drm.h"
>  
> +struct opp_table;
> +struct regulator;
>  struct tegra_output;
>  
>  #define TEGRA_DC_LEGACY_PLANES_NUM	6
> @@ -107,6 +109,9 @@ struct tegra_dc {
>  	struct drm_info_list *debugfs_files;
>  
>  	const struct tegra_dc_soc_info *soc;
> +
> +	struct opp_table *opp_table;
> +	struct regulator *core_reg;

We typically use a _supply suffix on regulators to avoid confusing this
with "register".

Thierry
Dmitry Osipenko Nov. 10, 2020, 9:17 p.m. | #2
10.11.2020 23:29, Thierry Reding пишет:
>> +

>> +	dc->opp_table = dev_pm_opp_get_opp_table(dc->dev);

>> +	if (IS_ERR(dc->opp_table))

>> +		return dev_err_probe(dc->dev, PTR_ERR(dc->opp_table),

>> +				     "failed to prepare OPP table\n");

>> +

>> +	if (of_machine_is_compatible("nvidia,tegra20"))

>> +		hw_version = BIT(tegra_sku_info.soc_process_id);

>> +	else

>> +		hw_version = BIT(tegra_sku_info.soc_speedo_id);

>> +

>> +	hw_opp_table = dev_pm_opp_set_supported_hw(dc->dev, &hw_version, 1);

>> +	err = PTR_ERR_OR_ZERO(hw_opp_table);

> What's the point of this? A more canonical version would be:

> 

> 	if (IS_ERR(hw_opp_table)) {

> 		err = PTR_ERR(hw_opp_table);

> 		dev_err(dc->dev, ...);

> 		goto put_table;

> 	}

> 

> That uses the same number of lines but is much easier to read, in my

> opinion, because it is the canonical form.

> 


Your variant is much more difficult to read for me :/

I guess the only reason it could be "canonical" is because
PTR_ERR_OR_ZERO was added not so long time ago.

But don't worry, this code will be moved out in a v2 and it won't use
PTR_ERR_OR_ZERO.
Dan Carpenter Nov. 11, 2020, 9:28 a.m. | #3
On Tue, Nov 10, 2020 at 09:29:45PM +0100, Thierry Reding wrote:
> > +	err = dev_pm_opp_of_add_table(dc->dev);
> > +	if (err) {
> > +		dev_err(dc->dev, "failed to add OPP table: %d\n", err);
> > +		goto put_hw;
> > +	}
> > +
> > +	err = devm_add_action(dc->dev, tegra_dc_deinit_opp_table, dc);
> > +	if (err)
> > +		goto remove_table;
> 
> Do these functions return positive values? If not, I'd prefer if this
> check was more explicit (i.e. err < 0) for consistency with the rest of
> this code.
> 

Isn't it the other way around?  It's only when the check is explicitly
for "if (ret < 0)" that we have to wonder about positives. If the codes
says "if (ret)" then we know that it doesn't return positive values and
every non-zero is an error.

In the kernel "if (ret)" is way more popular than "if (ret < 0)":

    $ git grep 'if (\(ret\|rc\|err\))' | wc -l
    92927
    $ git grep 'if (\(ret\|rc\|err\) < 0)' | wc -l
    36577

And some of those are places where "ret" can be positive so we are
forced to use the "if (ret < 0)" format.

Checking for "if (ret)" is easier from a static analysis perspective.
If it's one style is used consistently then they're the same but when
there is a mismatch the "if (ret < 0) " will trigger a false positive
and the "if (ret) " will not.

	int var;

	ret = frob(&var);
	if (ret < 0)
		return ret;

Smatch thinks positive returns are not handled so it complains that
"var can be uninitialized".

regards,
dan carpenter
Mark Brown Nov. 11, 2020, 11:55 a.m. | #4
On Wed, Nov 11, 2020 at 12:23:41AM +0300, Dmitry Osipenko wrote:
> 10.11.2020 23:32, Mark Brown пишет:

> >>> +	if (!device_property_present(dc->dev, "core-supply"))
> >>> +		return;

> >> This is a potentially heavy operation, so I think we should avoid that
> >> here. How about you use devm_regulator_get_optional() in ->probe()? That
> >> returns -ENODEV if no regulator was specified, in which case you can set
> >> dc->core_reg = NULL and use that as the condition here.

> > Or enumerate the configurable voltages after getting the regulator and
> > handle that appropriately which would be more robust in case there's
> > missing or unusual constraints.

> I already changed that code to use regulator_get_optional() for v2.

That doesn't look entirely appropriate given that the core does most
likely require some kind of power to operate.

> Regarding the enumerating supported voltage.. I think this should be
> done by the OPP core, but regulator core doesn't work well if
> regulator_get() is invoked more than one time for the same device, at
> least there is a loud debugfs warning about an already existing

I don't understand why this would be an issue - if nothing else the core
could just offer an interface to trigger the check.

> directory for a regulator. It's easy to check whether the debug
> directory exists before creating it, like thermal framework does it for
> example, but then there were some other more difficult issues.. I don't
> recall what they were right now. Perhaps will be easier to simply get a
> error from regulator_set_voltage() for now because it shouldn't ever
> happen in practice, unless device-tree has wrong constraints.

The constraints might not be wrong, there might be some board which has
a constraint somewhere for
Mark Brown Nov. 12, 2020, 8:01 p.m. | #5
On Thu, Nov 12, 2020 at 10:16:14PM +0300, Dmitry Osipenko wrote:
> 12.11.2020 20:16, Mark Brown пишет:

> > On Thu, Nov 12, 2020 at 07:59:36PM +0300, Dmitry Osipenko wrote:


> >> Also, some device-trees won't have that regulator anyways because board

> >> schematics isn't available, and thus, we can't fix them.


> > This is what dummy supplies are for?


> But it's not allowed to change voltage of a dummy regulator, is it

> intentional?


Of course not, we can't know if the requested new voltage is valid - the
driver would have to have explict support for handling situations where
it's not possible to change the voltage (which it can detect through
enumerating the values it wants to set at startup).

[Requesting the same supply multiple times]
> > So there's no known obstacle to putting enumeration of supported

> > voltages into the OPP core then?  I'm a bit confused here.


> It's an obstacle if both OPP and device driver need to get the same

> regulator. Like in the case of this DRM driver, which need to control

> the voltage instead of allowing OPP core to do it.


It wasn't entirely deliberate but the warnings have proven useful in
catching bugs (eg, leaked regulator requests).  The general thought is
that if two things both claim to control the same supply on a consumer
then they really ought to be coordinating with each other.

> Please notice that devm_tegra_dc_opp_table_init() of this patch doesn't

> use dev_pm_opp_set_regulators(), which would allow OPP core to filter

> out unsupported OPPs. But then OPP core will need need to get an already

> requested regulator and this doesn't work well.


There is nothing stopping us adding a variant of that call which passes
in the regulators that were acquired by the caller rather than having
the OPP core do the requesting, or equally the OPP core could provide a
mechanism for the caller to get the regulators that were requested.

> > Ah, so each board duplicates the OPP tables then, or there's an

> > expectation that if there's some limit then they'll copy and modify the

> > table?  If that's the case then it's a bit redundant to do filtering

> > indeed.


> I think this is not strictly defined. Either way will work, although

> perhaps it should be more preferred that unsupported OPPs aren't present

> in a device-tree.


OTOH that does mean that if there's an updated information on OPPs (new
ones added, old ones determined to be unstable) then you can't just
update a central place.  It depends if the OPPs are thought of as
describing the SoC or the system as a whole I guess.
Dmitry Osipenko Nov. 13, 2020, 3:55 p.m. | #6
13.11.2020 17:29, Mark Brown пишет:
> On Fri, Nov 13, 2020 at 01:37:01AM +0300, Dmitry Osipenko wrote:

>> 12.11.2020 23:01, Mark Brown пишет:

>>>> But it's not allowed to change voltage of a dummy regulator, is it

>>>> intentional?

> 

>>> Of course not, we can't know if the requested new voltage is valid - the

>>> driver would have to have explict support for handling situations where

>>> it's not possible to change the voltage (which it can detect through

>>> enumerating the values it wants to set at startup).

> 

>>> [Requesting the same supply multiple times]

> 

>> But how driver is supposed to recognize that it's a dummy or buggy

>> regulator if it rejects all voltages?

> 

> It's not clear if it matters - it's more a policy decision on the part

> of the driver about what it thinks safe error handling is.  If it's not

> possible to read voltages from the regulator the consumer driver has to

> decide what it thinks it's safe for it to do, either way it has no idea

> what the actual current voltage is.  It could assume that it's something

> that supports all the use cases it wants to use and just carry on with

> no configuration of voltages, it could decide that it might not support

> everything and not make any changes to be safe, or do something like

> try to figure out that if we're currently at a given OPP that's the top

> OPP possible.  Historically when we've not had regulator control in

> these drivers so they have effectively gone with the first option of

> just assuming it's a generally safe value, this often aligns with what

> the power on requirements for SoCs are so it's not unreasonable.


I don't think that in a case of this particular driver there is a way to
make any decisions other than to assume that all changes are safe to be
done if regulator isn't specified in a device-tree.

If regulator_get() returns a dummy regulator, then this means that
regulator isn't specified in a device-tree. But then the only way for a
consumer driver to check whether regulator is dummy, is to check
presence of the supply property in a device-tree.

We want to emit error messages when something goes wrong, for example
when regulator voltage fails to change. It's fine that voltage changes
are failing for a dummy regulator, but then consumer driver shouldn't
recognize it as a error condition.

The regulator_get_optional() provides a more consistent and
straightforward way for consumer drivers to check presence of a physical
voltage regulator in comparison to dealing with a regulator_get(). The
dummy regulator is nice to use when there is no need to change
regulator's voltage, which doesn't work for a dummy regulator.
Dmitry Osipenko Nov. 19, 2020, 2:22 p.m. | #7
16.11.2020 16:33, Mark Brown пишет:
> On Sun, Nov 15, 2020 at 08:42:10PM +0300, Dmitry Osipenko wrote:
>> 13.11.2020 20:28, Mark Brown пишет:
> 
>>>> What should we do?
> 
>>> As I keep saying the consumer driver should be enumerating the voltages
>>> it can set, if it can't find any and wants to continue then it can just
>>> skip setting voltages later on.  If only some are unavailable then it
>>> probably wants to eliminate those specific OPPs instead.
> 
>> I'm seeing a dummy regulator as a helper for consumer drivers which
>> removes burden of handling an absent (optional) regulator. Is this a
>> correct understanding of a dummy regulator?
> 
>> Older DTBs don't have a regulator and we want to keep them working. This
>> is equal to a physically absent regulator and in this case it's an
>> optional regulator, IMO.
> 
> No, you are failing to understand the purpose of this code.  To
> reiterate unless the device supports operating with the supply
> physically absent then the driver should not be attempting to use
> regulator_get_optional().  That exists specifically for the case where
> the supply may be absent, nothing else.  The dummy regulator is there
> precisely for the case where the system does not describe supplies that
> we know are required for the device to function, it fixes up that
> omission so we don't need to open code handling of this in every single
> consumer driver.

The original intention of regulator_get_optional() is clear to me, but
nothing really stops drivers from slightly re-purposing this API, IMO.

Drivers should be free to assume that if regulator isn't defined by
firmware, then it's physically absent if this doesn't break anything. Of
course in some cases it's unsafe to make such assumptions. I think it's
a bit unpractical to artificially limit API usage without a good reason,
i.e. if nothing breaks underneath of a driver.

> Regulators that are present but not described by the firmware are a
> clearly different case to regulators that are not physically there,
> hardware with actually optional regulators will generally require some
> configuration for this case.
> 

I have good news. After spending some more time on trying out different
things, I found that my previous assumption about the fixed-regulator
was wrong, it actually accepts voltage changes, i.e. regulator consumer
doesn't get a error on a voltage-change. This is exactly what is needed
for the OPP core to work properly.

This means that there is no need to add special quirks to work around
absent regulators, we will just add a fixed regulator to the DTs which
don't specify a real regulator. The OPP core will perform voltage
checking and filter out unsupported OPPs. The older DTBs will continue
to work as well.
Mark Brown Nov. 19, 2020, 3:19 p.m. | #8
On Thu, Nov 19, 2020 at 05:22:43PM +0300, Dmitry Osipenko wrote:
> 16.11.2020 16:33, Mark Brown пишет:

> > No, you are failing to understand the purpose of this code.  To
> > reiterate unless the device supports operating with the supply
> > physically absent then the driver should not be attempting to use
> > regulator_get_optional().  That exists specifically for the case where

> The original intention of regulator_get_optional() is clear to me, but
> nothing really stops drivers from slightly re-purposing this API, IMO.

> Drivers should be free to assume that if regulator isn't defined by
> firmware, then it's physically absent if this doesn't break anything. Of
> course in some cases it's unsafe to make such assumptions. I think it's
> a bit unpractical to artificially limit API usage without a good reason,
> i.e. if nothing breaks underneath of a driver.

If the supply can be physically absent without breaking anything then
this is the intended use case for optional regulators.  This is a *very*
uncommon.

> > Regulators that are present but not described by the firmware are a
> > clearly different case to regulators that are not physically there,
> > hardware with actually optional regulators will generally require some
> > configuration for this case.

> I have good news. After spending some more time on trying out different
> things, I found that my previous assumption about the fixed-regulator
> was wrong, it actually accepts voltage changes, i.e. regulator consumer
> doesn't get a error on a voltage-change. This is exactly what is needed
> for the OPP core to work properly.

To be clear when you set a voltage range you will get the minimum
voltage that can be supported within that range on the system given all
the other constraints the system has.  For fixed voltage regulators or
regulators constraints to not change voltage this means that if whatever
voltage they are fixed at is in the range requested then the API will
report success.

Patch

diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 1650a448eabd..9eec4c3fbd3b 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -12,6 +12,7 @@  config DRM_TEGRA
 	select INTERCONNECT
 	select IOMMU_IOVA
 	select CEC_CORE if CEC_NOTIFIER
+	select PM_OPP
 	help
 	  Choose this option if you have an NVIDIA Tegra SoC.
 
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index fd7c8828652d..babcb66a335b 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -11,9 +11,13 @@ 
 #include <linux/interconnect.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
+#include <soc/tegra/common.h>
+#include <soc/tegra/fuse.h>
 #include <soc/tegra/pmc.h>
 
 #include <drm/drm_atomic.h>
@@ -1699,6 +1703,55 @@  int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 	return 0;
 }
 
+static void tegra_dc_update_voltage_state(struct tegra_dc *dc,
+					  struct tegra_dc_state *state)
+{
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+	int err, min_uV;
+
+	/* OPP usage is optional */
+	if (!dc->opp_table)
+		return;
+
+	/* calculate actual pixel clock rate which depends on internal divider */
+	rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2);
+
+	/* find suitable OPP for the rate */
+	opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate);
+
+	if (opp == ERR_PTR(-ERANGE))
+		opp = dev_pm_opp_find_freq_floor(dc->dev, &rate);
+
+	if (IS_ERR(opp)) {
+		dev_err(dc->dev, "failed to find OPP for %lu Hz: %ld\n",
+			rate, PTR_ERR(opp));
+		return;
+	}
+
+	min_uV = dev_pm_opp_get_voltage(opp);
+	dev_pm_opp_put(opp);
+
+	/*
+	 * Voltage scaling is optional and trying to set voltage for a dummy
+	 * regulator will error out.
+	 */
+	if (!device_property_present(dc->dev, "core-supply"))
+		return;
+
+	/*
+	 * Note that the minimum core voltage depends on the pixel clock
+	 * rate (which depends on internal clock divider of CRTC) and not on
+	 * the rate of the display controller clock. This is why we're not
+	 * using dev_pm_opp_set_rate() API and instead are managing the
+	 * voltage by ourselves.
+	 */
+	err = regulator_set_voltage(dc->core_reg, min_uV, INT_MAX);
+	if (err)
+		dev_err(dc->dev, "failed to set CORE voltage to %duV: %d\n",
+			min_uV, err);
+}
+
 static void tegra_dc_commit_state(struct tegra_dc *dc,
 				  struct tegra_dc_state *state)
 {
@@ -1738,6 +1791,8 @@  static void tegra_dc_commit_state(struct tegra_dc *dc,
 	if (err < 0)
 		dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
 			dc->clk, state->pclk, err);
+
+	tegra_dc_update_voltage_state(dc, state);
 }
 
 static void tegra_dc_stop(struct tegra_dc *dc)
@@ -2521,6 +2576,7 @@  static int tegra_dc_runtime_suspend(struct host1x_client *client)
 
 	clk_disable_unprepare(dc->clk);
 	pm_runtime_put_sync(dev);
+	regulator_disable(dc->core_reg);
 
 	return 0;
 }
@@ -2531,10 +2587,16 @@  static int tegra_dc_runtime_resume(struct host1x_client *client)
 	struct device *dev = client->dev;
 	int err;
 
+	err = regulator_enable(dc->core_reg);
+	if (err < 0) {
+		dev_err(dev, "failed to enable CORE regulator: %d\n", err);
+		return err;
+	}
+
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
-		return err;
+		goto disable_regulator;
 	}
 
 	if (dc->soc->has_powergate) {
@@ -2564,6 +2626,9 @@  static int tegra_dc_runtime_resume(struct host1x_client *client)
 	clk_disable_unprepare(dc->clk);
 put_rpm:
 	pm_runtime_put_sync(dev);
+disable_regulator:
+	regulator_disable(dc->core_reg);
+
 	return err;
 }
 
@@ -2879,6 +2944,72 @@  static int tegra_dc_couple(struct tegra_dc *dc)
 	return 0;
 }
 
+static void tegra_dc_deinit_opp_table(void *data)
+{
+	struct tegra_dc *dc = data;
+
+	dev_pm_opp_of_remove_table(dc->dev);
+	dev_pm_opp_put_supported_hw(dc->opp_table);
+	dev_pm_opp_put_regulators(dc->opp_table);
+}
+
+static int devm_tegra_dc_opp_table_init(struct tegra_dc *dc)
+{
+	struct opp_table *hw_opp_table;
+	u32 hw_version;
+	int err;
+
+	/* voltage scaling is optional */
+	dc->core_reg = devm_regulator_get(dc->dev, "core");
+	if (IS_ERR(dc->core_reg))
+		return dev_err_probe(dc->dev, PTR_ERR(dc->core_reg),
+				     "failed to get CORE regulator\n");
+
+	/* legacy device-trees don't have OPP table */
+	if (!device_property_present(dc->dev, "operating-points-v2"))
+		return 0;
+
+	dc->opp_table = dev_pm_opp_get_opp_table(dc->dev);
+	if (IS_ERR(dc->opp_table))
+		return dev_err_probe(dc->dev, PTR_ERR(dc->opp_table),
+				     "failed to prepare OPP table\n");
+
+	if (of_machine_is_compatible("nvidia,tegra20"))
+		hw_version = BIT(tegra_sku_info.soc_process_id);
+	else
+		hw_version = BIT(tegra_sku_info.soc_speedo_id);
+
+	hw_opp_table = dev_pm_opp_set_supported_hw(dc->dev, &hw_version, 1);
+	err = PTR_ERR_OR_ZERO(hw_opp_table);
+	if (err) {
+		dev_err(dc->dev, "failed to set supported HW: %d\n", err);
+		goto put_table;
+	}
+
+	err = dev_pm_opp_of_add_table(dc->dev);
+	if (err) {
+		dev_err(dc->dev, "failed to add OPP table: %d\n", err);
+		goto put_hw;
+	}
+
+	err = devm_add_action(dc->dev, tegra_dc_deinit_opp_table, dc);
+	if (err)
+		goto remove_table;
+
+	dev_info(dc->dev, "OPP HW ver. 0x%x\n", hw_version);
+
+	return 0;
+
+remove_table:
+	dev_pm_opp_of_remove_table(dc->dev);
+put_hw:
+	dev_pm_opp_put_supported_hw(dc->opp_table);
+put_table:
+	dev_pm_opp_put_opp_table(dc->opp_table);
+
+	return err;
+}
+
 static int tegra_dc_probe(struct platform_device *pdev)
 {
 	struct tegra_dc *dc;
@@ -2937,6 +3068,10 @@  static int tegra_dc_probe(struct platform_device *pdev)
 		tegra_powergate_power_off(dc->powergate);
 	}
 
+	err = devm_tegra_dc_opp_table_init(dc);
+	if (err < 0)
+		return err;
+
 	dc->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(dc->regs))
 		return PTR_ERR(dc->regs);
@@ -3007,6 +3142,7 @@  struct platform_driver tegra_dc_driver = {
 	.driver = {
 		.name = "tegra-dc",
 		.of_match_table = tegra_dc_of_match,
+		.sync_state = tegra_soc_device_sync_state,
 	},
 	.probe = tegra_dc_probe,
 	.remove = tegra_dc_remove,
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index ba4ed35139fb..fd774fc5c2e4 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -13,6 +13,8 @@ 
 
 #include "drm.h"
 
+struct opp_table;
+struct regulator;
 struct tegra_output;
 
 #define TEGRA_DC_LEGACY_PLANES_NUM	6
@@ -107,6 +109,9 @@  struct tegra_dc {
 	struct drm_info_list *debugfs_files;
 
 	const struct tegra_dc_soc_info *soc;
+
+	struct opp_table *opp_table;
+	struct regulator *core_reg;
 };
 
 static inline struct tegra_dc *