[2/2] cpufreq: mediatek: Add MT8173 cpufreq driver

Message ID 1433766561-1330-3-git-send-email-pi-cheng.chen@linaro.org
State New
Headers show

Commit Message

pi-cheng.chen June 8, 2015, 12:29 p.m.
This patch implements MT8173 cpufreq driver.

Signed-off-by: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
---
 drivers/cpufreq/Kconfig.arm      |   7 +
 drivers/cpufreq/Makefile         |   1 +
 drivers/cpufreq/mt8173-cpufreq.c | 550 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 558 insertions(+)
 create mode 100644 drivers/cpufreq/mt8173-cpufreq.c

Comments

pi-cheng.chen June 10, 2015, 3:37 a.m. | #1
On Tue, Jun 9, 2015 at 5:17 PM, Paul Bolle <pebolle@tiscali.nl> wrote:
> On Mon, 2015-06-08 at 20:29 +0800, Pi-Cheng Chen wrote:
>> --- /dev/null
>> +++ b/drivers/cpufreq/mt8173-cpufreq.c
>
>> +#include <linux/module.h>
>
> Weren't you going to drop this include?

Sorry I forget to merge that part of fix into this patch.
Will fix it.

>
>> +module_init(mt8173_cpufreq_driver_init);
>
> For built-in code this is equivalent to, speaking from memory:
>     device_initcall(mt8173_cpufreq_driver_init);
>
> Why don't you just use that directly?

Yes. Will do it.
Thanks for reviewing.

Best Regards,
Pi-Cheng

>
> Thanks,
>
>
> Paul Bolle
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 22, 2015, 11:45 a.m. | #2
On 08-06-15, 20:29, Pi-Cheng Chen wrote:

Sorry for the delay, I have been quite busy recently.

> +++ b/drivers/cpufreq/mt8173-cpufreq.c
> +static LIST_HEAD(cpu_dvfs_info_list);
> +
> +static inline struct mtk_cpu_dvfs_info *to_mtk_cpu_dvfs_info(
> +			struct list_head *list)
> +{
> +	return list_entry(list, struct mtk_cpu_dvfs_info, node);
> +}
> +
> +static inline void mtk_cpu_dvfs_info_add(struct mtk_cpu_dvfs_info *info)
> +{
> +	list_add(&info->node, &cpu_dvfs_info_list);
> +}

No stupid wrappers please. Doesn't make anything better.

> +
> +static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_get(int cpu)

A very bad name to a routine with very specific functionality.

> +{
> +	struct mtk_cpu_dvfs_info *info;
> +	struct list_head *list;
> +
> +	list_for_each(list, &cpu_dvfs_info_list) {
> +		info = to_mtk_cpu_dvfs_info(list);
> +
> +		if (cpumask_test_cpu(cpu, info->cpus))
> +			return info;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void mtk_cpu_dvfs_info_release(void)
> +{
> +	struct list_head *list, *tmp;
> +	struct mtk_cpu_dvfs_info *info;
> +
> +	list_for_each_safe(list, tmp, &cpu_dvfs_info_list) {
> +		info = to_mtk_cpu_dvfs_info(list);
> +
> +		dev_pm_opp_free_cpufreq_table(info->cpu_dev,
> +					      &info->freq_table);
> +
> +		if (!IS_ERR(info->proc_reg))
> +			regulator_put(info->proc_reg);
> +		if (!IS_ERR(info->sram_reg))
> +			regulator_put(info->sram_reg);
> +		if (!IS_ERR(info->cpu_clk))
> +			clk_put(info->cpu_clk);
> +		if (!IS_ERR(info->inter_clk))
> +			clk_put(info->inter_clk);
> +
> +		of_free_opp_table(info->cpu_dev);
> +
> +		list_del(list);
> +		kfree(info);
> +	}
> +}
> +
> +#define MIN(a, b) ((a) < (b) ? (a) : (b))
> +#define MAX(a, b) ((a) > (b) ? (a) : (b))

Look for these in kernel.h

> +static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc)
> +{
> +	if (info->need_voltage_trace)
> +		return mtk_cpufreq_voltage_trace(info, vproc);
> +	else
> +		return regulator_set_voltage(info->proc_reg, vproc,
> +					     vproc + VOLT_TOL);
> +}
> +
> +static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> +				  unsigned int index)
> +{
> +	struct cpufreq_frequency_table *freq_table = policy->freq_table;
> +	struct clk *cpu_clk = policy->clk;
> +	struct clk *armpll = clk_get_parent(cpu_clk);
> +	struct mtk_cpu_dvfs_info *info = policy->driver_data;
> +	struct device *cpu_dev = info->cpu_dev;
> +	struct dev_pm_opp *opp;
> +	long freq_hz, old_freq_hz;
> +	int vproc, old_vproc, inter_vproc, target_vproc, ret;
> +
> +	inter_vproc = info->intermediate_voltage;
> +
> +	old_freq_hz = clk_get_rate(cpu_clk);
> +	old_vproc = regulator_get_voltage(info->proc_reg);
> +
> +	freq_hz = freq_table[index].frequency * 1000;

A blank line here.

> +	rcu_read_lock();
> +	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
> +	if (IS_ERR(opp)) {
> +		rcu_read_unlock();
> +		pr_err("cpu%d: failed to find OPP for %ld\n",
> +		       policy->cpu, freq_hz);
> +		return PTR_ERR(opp);
> +	}
> +	vproc = dev_pm_opp_get_voltage(opp);
> +	rcu_read_unlock();
> +
> +	/*
> +	 * If the new voltage or the intermediate voltage is higher than the
> +	 * current voltage, scale up voltage first.
> +	 */
> +	target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc;
> +	if (old_vproc < target_vproc) {
> +		ret = mtk_cpufreq_set_voltage(info, target_vproc);
> +		if (ret) {
> +			pr_err("cpu%d: failed to scale up voltage!\n",
> +			       policy->cpu);
> +			mtk_cpufreq_set_voltage(info, old_vproc);
> +			return ret;
> +		}
> +	}
> +
> +	/* Reparent the CPU clock to intermediate clock. */
> +	ret = clk_set_parent(cpu_clk, info->inter_clk);
> +	if (ret) {
> +		pr_err("cpu%d: failed to re-parent cpu clock!\n",
> +		       policy->cpu);
> +		mtk_cpufreq_set_voltage(info, old_vproc);
> +		WARN_ON(1);
> +		return ret;
> +	}
> +
> +	/* Set the original PLL to target rate. */
> +	ret = clk_set_rate(armpll, freq_hz);
> +	if (ret) {
> +		pr_err("cpu%d: failed to scale cpu clock rate!\n",
> +		       policy->cpu);
> +		clk_set_parent(cpu_clk, armpll);
> +		mtk_cpufreq_set_voltage(info, old_vproc);
> +		return ret;
> +	}
> +
> +	/* Set parent of CPU clock back to the original PLL. */
> +	ret = clk_set_parent(cpu_clk, armpll);
> +	if (ret) {
> +		pr_err("cpu%d: failed to re-parent cpu clock!\n",
> +		       policy->cpu);
> +		mtk_cpufreq_set_voltage(info, inter_vproc);
> +		WARN_ON(1);
> +		return ret;
> +	}
> +
> +	/*
> +	 * If the new voltage is lower than the intermediate voltage or the
> +	 * original voltage, scale down to the new voltage.
> +	 */
> +	if (vproc < inter_vproc || vproc < old_vproc) {
> +		ret = mtk_cpufreq_set_voltage(info, vproc);
> +		if (ret) {
> +			pr_err("cpu%d: failed to scale down voltage!\n",
> +			       policy->cpu);
> +			clk_set_parent(cpu_clk, info->inter_clk);
> +			clk_set_rate(armpll, old_freq_hz);
> +			clk_set_parent(cpu_clk, armpll);
> +			return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int mtk_cpufreq_init(struct cpufreq_policy *policy)
> +{
> +	struct mtk_cpu_dvfs_info *info;
> +	int ret;
> +
> +	info = mtk_cpu_dvfs_info_get(policy->cpu);
> +	if (!info) {
> +		pr_err("%s: mtk cpu dvfs info for cpu%d is not initialized\n",
> +		       __func__, policy->cpu);
> +		return -ENODEV;
> +	}
> +
> +	ret = cpufreq_table_validate_and_show(policy, info->freq_table);
> +	if (ret) {
> +		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
> +		return ret;
> +	}
> +
> +	cpumask_copy(policy->cpus, info->cpus);
> +	policy->driver_data = info;
> +	policy->clk = info->cpu_clk;
> +
> +	return 0;
> +}
> +
> +static struct cpufreq_driver mt8173_cpufreq_driver = {
> +	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
> +	.verify = cpufreq_generic_frequency_table_verify,
> +	.target_index = mtk_cpufreq_set_target,
> +	.get = cpufreq_generic_get,
> +	.init = mtk_cpufreq_init,
> +	.name = "mtk-cpufreq",
> +	.attr = cpufreq_generic_attr,
> +};
> +
> +static int mtk_cpu_dvfs_info_init(int cpu)
> +{
> +	struct device *cpu_dev;
> +	struct regulator *proc_reg = ERR_PTR(-ENODEV);
> +	struct regulator *sram_reg = ERR_PTR(-ENODEV);
> +	struct clk *cpu_clk = ERR_PTR(-ENODEV);
> +	struct clk *inter_clk = ERR_PTR(-ENODEV);
> +	struct mtk_cpu_dvfs_info *info;
> +	struct cpufreq_frequency_table *freq_table;
> +	struct dev_pm_opp *opp;
> +	unsigned long rate;
> +	int ret;
> +
> +	cpu_dev = get_cpu_device(cpu);
> +	if (!cpu_dev) {
> +		pr_err("failed to get cpu%d device\n", cpu);
> +		return -ENODEV;
> +	}
> +
> +	ret = of_init_opp_table(cpu_dev);
> +	if (ret) {
> +		pr_warn("no OPP table for cpu%d\n", cpu);
> +		return ret;
> +	}
> +
> +	cpu_clk = clk_get(cpu_dev, "cpu");
> +	if (IS_ERR(cpu_clk)) {
> +		if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
> +			pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
> +		else
> +			pr_err("failed to get cpu clk for cpu%d\n", cpu);
> +
> +		ret = PTR_ERR(cpu_clk);
> +		goto out_free_opp_table;
> +	}
> +
> +	inter_clk = clk_get(cpu_dev, "intermediate");
> +	if (IS_ERR(inter_clk)) {
> +		if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
> +			pr_warn("intermediate clk for cpu%d not ready, retry.\n",
> +				cpu);
> +		else
> +			pr_err("failed to get intermediate clk for cpu%d\n",
> +			       cpu);
> +
> +		ret = PTR_ERR(cpu_clk);
> +		goto out_free_resources;
> +	}
> +
> +	proc_reg = regulator_get_exclusive(cpu_dev, "proc");
> +	if (IS_ERR(proc_reg)) {
> +		if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
> +			pr_warn("proc regulator for cpu%d not ready, retry.\n",
> +				cpu);
> +		else
> +			pr_err("failed to get proc regulator for cpu%d\n",
> +			       cpu);
> +
> +		ret = PTR_ERR(proc_reg);
> +		goto out_free_resources;
> +	}
> +
> +	/* Both presence and absence of sram regulator are valid cases. */
> +	sram_reg = regulator_get_exclusive(cpu_dev, "sram");
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		ret = -ENOMEM;
> +		goto out_free_resources;
> +	}
> +
> +	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
> +	if (ret) {
> +		pr_err("failed to init cpufreq table for cpu%d: %d\n",
> +		       cpu, ret);
> +		goto out_free_mtk_cpu_dvfs_info;
> +	}
> +
> +	if (!alloc_cpumask_var(&info->cpus, GFP_KERNEL))

Why getting into such trouble? What about just saving policy's pointer
in info and use it for freq_table, cpus mask, etc ?

> +		goto out_free_cpufreq_table;
> +

> +}
> +
> +static int mt8173_cpufreq_probe(struct platform_device *pdev)
> +{
> +	int cpu, ret;
> +
> +	for_each_possible_cpu(cpu) {
> +		/*
> +		 * If the struct mtk_cpu_dvfs_info for the cpu power domain
> +		 * is already initialized, skip this CPU.
> +		 */
> +		if (!mtk_cpu_dvfs_info_get(cpu)) {
> +			ret = mtk_cpu_dvfs_info_init(cpu);
> +			if (ret) {
> +				if (ret != -EPROBE_DEFER)
> +					pr_err("%s probe fail\n", __func__);
> +
> +				mtk_cpu_dvfs_info_release();
> +				return ret;
> +			}
> +		}
> +	}
> +
> +	ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
> +	if (ret) {
> +		pr_err("failed to register mtk cpufreq driver\n");
> +		mtk_cpu_dvfs_info_release();
> +	}
> +
> +	return ret;
> +}
> +
> +static struct platform_driver mt8173_cpufreq_platdrv = {
> +	.driver = {
> +		.name	= "mt8173-cpufreq",
> +	},
> +	.probe		= mt8173_cpufreq_probe,
> +};
> +module_platform_driver(mt8173_cpufreq_platdrv);
> +
> +static int mt8173_cpufreq_driver_init(void)
> +{
> +	struct platform_device *pdev;
> +
> +	if (!of_machine_is_compatible("mediatek,mt8173"))
> +		return -ENODEV;
> +
> +	pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0);
> +	if (IS_ERR(pdev)) {
> +		pr_err("failed to register mtk-cpufreq platform device\n");
> +		return PTR_ERR(pdev);
> +	}
> +
> +	return 0;
> +}
> +module_init(mt8173_cpufreq_driver_init);

Can this driver be built as module? Why this module* crap all over the
place?
pi-cheng.chen June 23, 2015, 3:25 p.m. | #3
Hi Viresh,

On Mon, Jun 22, 2015 at 7:45 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 08-06-15, 20:29, Pi-Cheng Chen wrote:
>
> Sorry for the delay, I have been quite busy recently.

That's fine. Thanks for reviewing.

>
>> +++ b/drivers/cpufreq/mt8173-cpufreq.c
>> +static LIST_HEAD(cpu_dvfs_info_list);
>> +
>> +static inline struct mtk_cpu_dvfs_info *to_mtk_cpu_dvfs_info(
>> +                     struct list_head *list)
>> +{
>> +     return list_entry(list, struct mtk_cpu_dvfs_info, node);
>> +}
>> +
>> +static inline void mtk_cpu_dvfs_info_add(struct mtk_cpu_dvfs_info *info)
>> +{
>> +     list_add(&info->node, &cpu_dvfs_info_list);
>> +}
>
> No stupid wrappers please. Doesn't make anything better.

Will remove it.

>
>> +
>> +static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_get(int cpu)
>
> A very bad name to a routine with very specific functionality.

Would get_mtk_cpu_dvfs_info() or cpu_to_mtk_cpu_dvfs_info() be better?

>
>> +{
>> +     struct mtk_cpu_dvfs_info *info;
>> +     struct list_head *list;
>> +
>> +     list_for_each(list, &cpu_dvfs_info_list) {
>> +             info = to_mtk_cpu_dvfs_info(list);
>> +
>> +             if (cpumask_test_cpu(cpu, info->cpus))
>> +                     return info;
>> +     }
>> +
>> +     return NULL;
>> +}
>> +
>> +static void mtk_cpu_dvfs_info_release(void)
>> +{
>> +     struct list_head *list, *tmp;
>> +     struct mtk_cpu_dvfs_info *info;
>> +
>> +     list_for_each_safe(list, tmp, &cpu_dvfs_info_list) {
>> +             info = to_mtk_cpu_dvfs_info(list);
>> +
>> +             dev_pm_opp_free_cpufreq_table(info->cpu_dev,
>> +                                           &info->freq_table);
>> +
>> +             if (!IS_ERR(info->proc_reg))
>> +                     regulator_put(info->proc_reg);
>> +             if (!IS_ERR(info->sram_reg))
>> +                     regulator_put(info->sram_reg);
>> +             if (!IS_ERR(info->cpu_clk))
>> +                     clk_put(info->cpu_clk);
>> +             if (!IS_ERR(info->inter_clk))
>> +                     clk_put(info->inter_clk);
>> +
>> +             of_free_opp_table(info->cpu_dev);
>> +
>> +             list_del(list);
>> +             kfree(info);
>> +     }
>> +}
>> +
>> +#define MIN(a, b) ((a) < (b) ? (a) : (b))
>> +#define MAX(a, b) ((a) > (b) ? (a) : (b))
>
> Look for these in kernel.h

Thanks for pointing me out this file.
I tried to looking for those macro in the kernel tree but didn't find them.
Will use those in kernel.h instead.

>
>> +static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc)
>> +{
>> +     if (info->need_voltage_trace)
>> +             return mtk_cpufreq_voltage_trace(info, vproc);
>> +     else
>> +             return regulator_set_voltage(info->proc_reg, vproc,
>> +                                          vproc + VOLT_TOL);
>> +}
>> +
>> +static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>> +                               unsigned int index)
>> +{
>> +     struct cpufreq_frequency_table *freq_table = policy->freq_table;
>> +     struct clk *cpu_clk = policy->clk;
>> +     struct clk *armpll = clk_get_parent(cpu_clk);
>> +     struct mtk_cpu_dvfs_info *info = policy->driver_data;
>> +     struct device *cpu_dev = info->cpu_dev;
>> +     struct dev_pm_opp *opp;
>> +     long freq_hz, old_freq_hz;
>> +     int vproc, old_vproc, inter_vproc, target_vproc, ret;
>> +
>> +     inter_vproc = info->intermediate_voltage;
>> +
>> +     old_freq_hz = clk_get_rate(cpu_clk);
>> +     old_vproc = regulator_get_voltage(info->proc_reg);
>> +
>> +     freq_hz = freq_table[index].frequency * 1000;
>
> A blank line here.

Will remove it.

>
>> +     rcu_read_lock();
>> +     opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
>> +     if (IS_ERR(opp)) {
>> +             rcu_read_unlock();
>> +             pr_err("cpu%d: failed to find OPP for %ld\n",
>> +                    policy->cpu, freq_hz);
>> +             return PTR_ERR(opp);
>> +     }
>> +     vproc = dev_pm_opp_get_voltage(opp);
>> +     rcu_read_unlock();
>> +
>> +     /*
>> +      * If the new voltage or the intermediate voltage is higher than the
>> +      * current voltage, scale up voltage first.
>> +      */
>> +     target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc;
>> +     if (old_vproc < target_vproc) {
>> +             ret = mtk_cpufreq_set_voltage(info, target_vproc);
>> +             if (ret) {
>> +                     pr_err("cpu%d: failed to scale up voltage!\n",
>> +                            policy->cpu);
>> +                     mtk_cpufreq_set_voltage(info, old_vproc);
>> +                     return ret;
>> +             }
>> +     }
>> +
>> +     /* Reparent the CPU clock to intermediate clock. */
>> +     ret = clk_set_parent(cpu_clk, info->inter_clk);
>> +     if (ret) {
>> +             pr_err("cpu%d: failed to re-parent cpu clock!\n",
>> +                    policy->cpu);
>> +             mtk_cpufreq_set_voltage(info, old_vproc);
>> +             WARN_ON(1);
>> +             return ret;
>> +     }
>> +
>> +     /* Set the original PLL to target rate. */
>> +     ret = clk_set_rate(armpll, freq_hz);
>> +     if (ret) {
>> +             pr_err("cpu%d: failed to scale cpu clock rate!\n",
>> +                    policy->cpu);
>> +             clk_set_parent(cpu_clk, armpll);
>> +             mtk_cpufreq_set_voltage(info, old_vproc);
>> +             return ret;
>> +     }
>> +
>> +     /* Set parent of CPU clock back to the original PLL. */
>> +     ret = clk_set_parent(cpu_clk, armpll);
>> +     if (ret) {
>> +             pr_err("cpu%d: failed to re-parent cpu clock!\n",
>> +                    policy->cpu);
>> +             mtk_cpufreq_set_voltage(info, inter_vproc);
>> +             WARN_ON(1);
>> +             return ret;
>> +     }
>> +
>> +     /*
>> +      * If the new voltage is lower than the intermediate voltage or the
>> +      * original voltage, scale down to the new voltage.
>> +      */
>> +     if (vproc < inter_vproc || vproc < old_vproc) {
>> +             ret = mtk_cpufreq_set_voltage(info, vproc);
>> +             if (ret) {
>> +                     pr_err("cpu%d: failed to scale down voltage!\n",
>> +                            policy->cpu);
>> +                     clk_set_parent(cpu_clk, info->inter_clk);
>> +                     clk_set_rate(armpll, old_freq_hz);
>> +                     clk_set_parent(cpu_clk, armpll);
>> +                     return ret;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int mtk_cpufreq_init(struct cpufreq_policy *policy)
>> +{
>> +     struct mtk_cpu_dvfs_info *info;
>> +     int ret;
>> +
>> +     info = mtk_cpu_dvfs_info_get(policy->cpu);
>> +     if (!info) {
>> +             pr_err("%s: mtk cpu dvfs info for cpu%d is not initialized\n",
>> +                    __func__, policy->cpu);
>> +             return -ENODEV;
>> +     }
>> +
>> +     ret = cpufreq_table_validate_and_show(policy, info->freq_table);
>> +     if (ret) {
>> +             pr_err("%s: invalid frequency table: %d\n", __func__, ret);
>> +             return ret;
>> +     }
>> +
>> +     cpumask_copy(policy->cpus, info->cpus);
>> +     policy->driver_data = info;
>> +     policy->clk = info->cpu_clk;
>> +
>> +     return 0;
>> +}
>> +
>> +static struct cpufreq_driver mt8173_cpufreq_driver = {
>> +     .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
>> +     .verify = cpufreq_generic_frequency_table_verify,
>> +     .target_index = mtk_cpufreq_set_target,
>> +     .get = cpufreq_generic_get,
>> +     .init = mtk_cpufreq_init,
>> +     .name = "mtk-cpufreq",
>> +     .attr = cpufreq_generic_attr,
>> +};
>> +
>> +static int mtk_cpu_dvfs_info_init(int cpu)
>> +{
>> +     struct device *cpu_dev;
>> +     struct regulator *proc_reg = ERR_PTR(-ENODEV);
>> +     struct regulator *sram_reg = ERR_PTR(-ENODEV);
>> +     struct clk *cpu_clk = ERR_PTR(-ENODEV);
>> +     struct clk *inter_clk = ERR_PTR(-ENODEV);
>> +     struct mtk_cpu_dvfs_info *info;
>> +     struct cpufreq_frequency_table *freq_table;
>> +     struct dev_pm_opp *opp;
>> +     unsigned long rate;
>> +     int ret;
>> +
>> +     cpu_dev = get_cpu_device(cpu);
>> +     if (!cpu_dev) {
>> +             pr_err("failed to get cpu%d device\n", cpu);
>> +             return -ENODEV;
>> +     }
>> +
>> +     ret = of_init_opp_table(cpu_dev);
>> +     if (ret) {
>> +             pr_warn("no OPP table for cpu%d\n", cpu);
>> +             return ret;
>> +     }
>> +
>> +     cpu_clk = clk_get(cpu_dev, "cpu");
>> +     if (IS_ERR(cpu_clk)) {
>> +             if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
>> +                     pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
>> +             else
>> +                     pr_err("failed to get cpu clk for cpu%d\n", cpu);
>> +
>> +             ret = PTR_ERR(cpu_clk);
>> +             goto out_free_opp_table;
>> +     }
>> +
>> +     inter_clk = clk_get(cpu_dev, "intermediate");
>> +     if (IS_ERR(inter_clk)) {
>> +             if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
>> +                     pr_warn("intermediate clk for cpu%d not ready, retry.\n",
>> +                             cpu);
>> +             else
>> +                     pr_err("failed to get intermediate clk for cpu%d\n",
>> +                            cpu);
>> +
>> +             ret = PTR_ERR(cpu_clk);
>> +             goto out_free_resources;
>> +     }
>> +
>> +     proc_reg = regulator_get_exclusive(cpu_dev, "proc");
>> +     if (IS_ERR(proc_reg)) {
>> +             if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
>> +                     pr_warn("proc regulator for cpu%d not ready, retry.\n",
>> +                             cpu);
>> +             else
>> +                     pr_err("failed to get proc regulator for cpu%d\n",
>> +                            cpu);
>> +
>> +             ret = PTR_ERR(proc_reg);
>> +             goto out_free_resources;
>> +     }
>> +
>> +     /* Both presence and absence of sram regulator are valid cases. */
>> +     sram_reg = regulator_get_exclusive(cpu_dev, "sram");
>> +
>> +     info = kzalloc(sizeof(*info), GFP_KERNEL);
>> +     if (!info) {
>> +             ret = -ENOMEM;
>> +             goto out_free_resources;
>> +     }
>> +
>> +     ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
>> +     if (ret) {
>> +             pr_err("failed to init cpufreq table for cpu%d: %d\n",
>> +                    cpu, ret);
>> +             goto out_free_mtk_cpu_dvfs_info;
>> +     }
>> +
>> +     if (!alloc_cpumask_var(&info->cpus, GFP_KERNEL))
>
> Why getting into such trouble? What about just saving policy's pointer
> in info and use it for freq_table, cpus mask, etc ?

This function is called from driver probe function. At this moment, cpufreq
driver is not yet registered and the policy struct for each cluster is not yet
allocated.

Also, the mtk_cpu_dvfs_info structs for all clusters are stored in a list
after probe done. cpus mask is used to find out the proper info struct
for corresponding cpu.

For freq_table, I simply don't want to free and allocate the table
every time when cpu is unplugged and plugged so I just allocate it
in the probe function call path, and cache it in the info struct.

>
>> +             goto out_free_cpufreq_table;
>> +
>
>> +}
>> +
>> +static int mt8173_cpufreq_probe(struct platform_device *pdev)
>> +{
>> +     int cpu, ret;
>> +
>> +     for_each_possible_cpu(cpu) {
>> +             /*
>> +              * If the struct mtk_cpu_dvfs_info for the cpu power domain
>> +              * is already initialized, skip this CPU.
>> +              */
>> +             if (!mtk_cpu_dvfs_info_get(cpu)) {
>> +                     ret = mtk_cpu_dvfs_info_init(cpu);
>> +                     if (ret) {
>> +                             if (ret != -EPROBE_DEFER)
>> +                                     pr_err("%s probe fail\n", __func__);
>> +
>> +                             mtk_cpu_dvfs_info_release();
>> +                             return ret;
>> +                     }
>> +             }
>> +     }
>> +
>> +     ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
>> +     if (ret) {
>> +             pr_err("failed to register mtk cpufreq driver\n");
>> +             mtk_cpu_dvfs_info_release();
>> +     }
>> +
>> +     return ret;
>> +}
>> +
>> +static struct platform_driver mt8173_cpufreq_platdrv = {
>> +     .driver = {
>> +             .name   = "mt8173-cpufreq",
>> +     },
>> +     .probe          = mt8173_cpufreq_probe,
>> +};
>> +module_platform_driver(mt8173_cpufreq_platdrv);
>> +
>> +static int mt8173_cpufreq_driver_init(void)
>> +{
>> +     struct platform_device *pdev;
>> +
>> +     if (!of_machine_is_compatible("mediatek,mt8173"))
>> +             return -ENODEV;
>> +
>> +     pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0);
>> +     if (IS_ERR(pdev)) {
>> +             pr_err("failed to register mtk-cpufreq platform device\n");
>> +             return PTR_ERR(pdev);
>> +     }
>> +
>> +     return 0;
>> +}
>> +module_init(mt8173_cpufreq_driver_init);
>
> Can this driver be built as module? Why this module* crap all over the
> place?

As mentioned in previous comment from Paul Bolle, it should be better
to do device_initcall(mt8173_cpufreq_driver_inint) instead of
module_init(mt8173_cpufreq_driver_init). I will do it.

But for module_platform_driver(mt8173_cpufreq_platdrv), I saw many
built-in drivers in kernel call module_platform_driver() also so I use it.
Would it be better to use platform_driver_register() instead?

Thanks.
Pi-Cheng

>
> --
> viresh
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 24, 2015, 12:57 a.m. | #4
On 23-06-15, 23:25, Pi-Cheng Chen wrote:
> On Mon, Jun 22, 2015 at 7:45 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >> +static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_get(int cpu)
> >
> > A very bad name to a routine with very specific functionality.
> 
> Would get_mtk_cpu_dvfs_info() or cpu_to_mtk_cpu_dvfs_info() be better?
> 
> >
> >> +{
> >> +     struct mtk_cpu_dvfs_info *info;
> >> +     struct list_head *list;
> >> +
> >> +     list_for_each(list, &cpu_dvfs_info_list) {
> >> +             info = to_mtk_cpu_dvfs_info(list);
> >> +
> >> +             if (cpumask_test_cpu(cpu, info->cpus))
> >> +                     return info;
> >> +     }

Firstly there is no need of extra long name for static routines. Just
keep them simple. i.e. no need of mtk_cpu_.

And then I was a bit confused about the functionality earlier and your
initial name wasn't that bad. Sorry :(

So maybe just, get_dvfs_info().

> >> +     freq_hz = freq_table[index].frequency * 1000;
> >
> > A blank line here.
> 
> Will remove it.
> 

I was asking you to add it :)

> >> +     rcu_read_lock();


> >> +static int mtk_cpu_dvfs_info_init(int cpu)
> >> +{
> >> +     struct device *cpu_dev;
> >> +     struct regulator *proc_reg = ERR_PTR(-ENODEV);
> >> +     struct regulator *sram_reg = ERR_PTR(-ENODEV);
> >> +     struct clk *cpu_clk = ERR_PTR(-ENODEV);
> >> +     struct clk *inter_clk = ERR_PTR(-ENODEV);
> >> +     struct mtk_cpu_dvfs_info *info;
> >> +     struct cpufreq_frequency_table *freq_table;
> >> +     struct dev_pm_opp *opp;
> >> +     unsigned long rate;
> >> +     int ret;
> >> +
> >> +     cpu_dev = get_cpu_device(cpu);
> >> +     if (!cpu_dev) {
> >> +             pr_err("failed to get cpu%d device\n", cpu);
> >> +             return -ENODEV;
> >> +     }
> >> +
> >> +     ret = of_init_opp_table(cpu_dev);
> >> +     if (ret) {
> >> +             pr_warn("no OPP table for cpu%d\n", cpu);
> >> +             return ret;
> >> +     }
> >> +
> >> +     cpu_clk = clk_get(cpu_dev, "cpu");
> >> +     if (IS_ERR(cpu_clk)) {
> >> +             if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
> >> +                     pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
> >> +             else
> >> +                     pr_err("failed to get cpu clk for cpu%d\n", cpu);
> >> +
> >> +             ret = PTR_ERR(cpu_clk);
> >> +             goto out_free_opp_table;
> >> +     }
> >> +
> >> +     inter_clk = clk_get(cpu_dev, "intermediate");
> >> +     if (IS_ERR(inter_clk)) {
> >> +             if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
> >> +                     pr_warn("intermediate clk for cpu%d not ready, retry.\n",
> >> +                             cpu);
> >> +             else
> >> +                     pr_err("failed to get intermediate clk for cpu%d\n",
> >> +                            cpu);
> >> +
> >> +             ret = PTR_ERR(cpu_clk);
> >> +             goto out_free_resources;
> >> +     }
> >> +
> >> +     proc_reg = regulator_get_exclusive(cpu_dev, "proc");
> >> +     if (IS_ERR(proc_reg)) {
> >> +             if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
> >> +                     pr_warn("proc regulator for cpu%d not ready, retry.\n",
> >> +                             cpu);
> >> +             else
> >> +                     pr_err("failed to get proc regulator for cpu%d\n",
> >> +                            cpu);
> >> +
> >> +             ret = PTR_ERR(proc_reg);
> >> +             goto out_free_resources;
> >> +     }
> >> +
> >> +     /* Both presence and absence of sram regulator are valid cases. */
> >> +     sram_reg = regulator_get_exclusive(cpu_dev, "sram");
> >> +
> >> +     info = kzalloc(sizeof(*info), GFP_KERNEL);
> >> +     if (!info) {
> >> +             ret = -ENOMEM;
> >> +             goto out_free_resources;
> >> +     }
> >> +
> >> +     ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
> >> +     if (ret) {
> >> +             pr_err("failed to init cpufreq table for cpu%d: %d\n",
> >> +                    cpu, ret);
> >> +             goto out_free_mtk_cpu_dvfs_info;
> >> +     }
> >> +
> >> +     if (!alloc_cpumask_var(&info->cpus, GFP_KERNEL))
> >
> > Why getting into such trouble? What about just saving policy's pointer
> > in info and use it for freq_table, cpus mask, etc ?
> 
> This function is called from driver probe function. At this moment, cpufreq
> driver is not yet registered and the policy struct for each cluster is not yet
> allocated.

This should be called from ->init() callback of the cpufreq driver.

> Also, the mtk_cpu_dvfs_info structs for all clusters are stored in a list
> after probe done. cpus mask is used to find out the proper info struct
> for corresponding cpu.

Yeah, but this will be exactly same to policy->related_cpus, just use
that. Why allocate another mask for same purpose?

> For freq_table, I simply don't want to free and allocate the table
> every time when cpu is unplugged and plugged so I just allocate it
> in the probe function call path, and cache it in the info struct.

->exit() will be called only when all the CPUs from a policy are
hotplugged out. Not on every hotplug. So you probably have 4 big and 4
little cores. So policy for big cluster will be removed only when all
the CPUs are down. And so no point storing things from probe.

Or is there anything special with your usecase?

> But for module_platform_driver(mt8173_cpufreq_platdrv), I saw many
> built-in drivers in kernel call module_platform_driver() also so I use it.

That was wrong for them as well :)

> Would it be better to use platform_driver_register() instead?

Hmm..
pi-cheng.chen June 24, 2015, 8:44 a.m. | #5
Hi Viresh,

On Wed, Jun 24, 2015 at 8:57 AM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 23-06-15, 23:25, Pi-Cheng Chen wrote:
>> On Mon, Jun 22, 2015 at 7:45 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
>> >> +static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_get(int cpu)
>> >
>> > A very bad name to a routine with very specific functionality.
>>
>> Would get_mtk_cpu_dvfs_info() or cpu_to_mtk_cpu_dvfs_info() be better?
>>
>> >
>> >> +{
>> >> +     struct mtk_cpu_dvfs_info *info;
>> >> +     struct list_head *list;
>> >> +
>> >> +     list_for_each(list, &cpu_dvfs_info_list) {
>> >> +             info = to_mtk_cpu_dvfs_info(list);
>> >> +
>> >> +             if (cpumask_test_cpu(cpu, info->cpus))
>> >> +                     return info;
>> >> +     }
>
> Firstly there is no need of extra long name for static routines. Just
> keep them simple. i.e. no need of mtk_cpu_.
>
> And then I was a bit confused about the functionality earlier and your
> initial name wasn't that bad. Sorry :(

That's ok. :)

>
> So maybe just, get_dvfs_info().

Yes. Thanks. I'll pick this one.

>
>> >> +     freq_hz = freq_table[index].frequency * 1000;
>> >
>> > A blank line here.
>>
>> Will remove it.
>>
>
> I was asking you to add it :)

I thought you were asking to remove the previous one originally.
Sorry for my misunderstanding.
Will add it.

>
>> >> +     rcu_read_lock();
>
>
>> >> +static int mtk_cpu_dvfs_info_init(int cpu)
>> >> +{
>> >> +     struct device *cpu_dev;
>> >> +     struct regulator *proc_reg = ERR_PTR(-ENODEV);
>> >> +     struct regulator *sram_reg = ERR_PTR(-ENODEV);
>> >> +     struct clk *cpu_clk = ERR_PTR(-ENODEV);
>> >> +     struct clk *inter_clk = ERR_PTR(-ENODEV);
>> >> +     struct mtk_cpu_dvfs_info *info;
>> >> +     struct cpufreq_frequency_table *freq_table;
>> >> +     struct dev_pm_opp *opp;
>> >> +     unsigned long rate;
>> >> +     int ret;
>> >> +
>> >> +     cpu_dev = get_cpu_device(cpu);
>> >> +     if (!cpu_dev) {
>> >> +             pr_err("failed to get cpu%d device\n", cpu);
>> >> +             return -ENODEV;
>> >> +     }
>> >> +
>> >> +     ret = of_init_opp_table(cpu_dev);
>> >> +     if (ret) {
>> >> +             pr_warn("no OPP table for cpu%d\n", cpu);
>> >> +             return ret;
>> >> +     }
>> >> +
>> >> +     cpu_clk = clk_get(cpu_dev, "cpu");
>> >> +     if (IS_ERR(cpu_clk)) {
>> >> +             if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
>> >> +                     pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
>> >> +             else
>> >> +                     pr_err("failed to get cpu clk for cpu%d\n", cpu);
>> >> +
>> >> +             ret = PTR_ERR(cpu_clk);
>> >> +             goto out_free_opp_table;
>> >> +     }
>> >> +
>> >> +     inter_clk = clk_get(cpu_dev, "intermediate");
>> >> +     if (IS_ERR(inter_clk)) {
>> >> +             if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
>> >> +                     pr_warn("intermediate clk for cpu%d not ready, retry.\n",
>> >> +                             cpu);
>> >> +             else
>> >> +                     pr_err("failed to get intermediate clk for cpu%d\n",
>> >> +                            cpu);
>> >> +
>> >> +             ret = PTR_ERR(cpu_clk);
>> >> +             goto out_free_resources;
>> >> +     }
>> >> +
>> >> +     proc_reg = regulator_get_exclusive(cpu_dev, "proc");
>> >> +     if (IS_ERR(proc_reg)) {
>> >> +             if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
>> >> +                     pr_warn("proc regulator for cpu%d not ready, retry.\n",
>> >> +                             cpu);
>> >> +             else
>> >> +                     pr_err("failed to get proc regulator for cpu%d\n",
>> >> +                            cpu);
>> >> +
>> >> +             ret = PTR_ERR(proc_reg);
>> >> +             goto out_free_resources;
>> >> +     }
>> >> +
>> >> +     /* Both presence and absence of sram regulator are valid cases. */
>> >> +     sram_reg = regulator_get_exclusive(cpu_dev, "sram");
>> >> +
>> >> +     info = kzalloc(sizeof(*info), GFP_KERNEL);
>> >> +     if (!info) {
>> >> +             ret = -ENOMEM;
>> >> +             goto out_free_resources;
>> >> +     }
>> >> +
>> >> +     ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
>> >> +     if (ret) {
>> >> +             pr_err("failed to init cpufreq table for cpu%d: %d\n",
>> >> +                    cpu, ret);
>> >> +             goto out_free_mtk_cpu_dvfs_info;
>> >> +     }
>> >> +
>> >> +     if (!alloc_cpumask_var(&info->cpus, GFP_KERNEL))
>> >
>> > Why getting into such trouble? What about just saving policy's pointer
>> > in info and use it for freq_table, cpus mask, etc ?
>>
>> This function is called from driver probe function. At this moment, cpufreq
>> driver is not yet registered and the policy struct for each cluster is not yet
>> allocated.
>
> This should be called from ->init() callback of the cpufreq driver.

Some reasons I did this in probe(). Please see below.

>
>> Also, the mtk_cpu_dvfs_info structs for all clusters are stored in a list
>> after probe done. cpus mask is used to find out the proper info struct
>> for corresponding cpu.
>
> Yeah, but this will be exactly same to policy->related_cpus, just use
> that. Why allocate another mask for same purpose?

Yes. I agree. Please see the reasons why I did it below.

>
>> For freq_table, I simply don't want to free and allocate the table
>> every time when cpu is unplugged and plugged so I just allocate it
>> in the probe function call path, and cache it in the info struct.
>
> ->exit() will be called only when all the CPUs from a policy are
> hotplugged out. Not on every hotplug. So you probably have 4 big and 4
> little cores. So policy for big cluster will be removed only when all
> the CPUs are down. And so no point storing things from probe.
>
> Or is there anything special with your usecase?

One reason to put those initialization and resource allocation in probe is
that it's easier to handle the return value -PROBE_DEFER from clock
and regulator framework when trying to get clocks and regulators
consumed by cpufreq driver.

The other reason is when the whole system is resuming from suspend,
the other subsystem e.g. i2c on which cpufreq driver depends might not
ready yet during cpufreq driver initialization. In this case, the cpufreq
driver will be blocked when trying to get resources e.g. regulator on i2c
bus, and the whole system will stuck for seconds.

For the two reasons above, I try to do all resource initialization and
allocation in probe(), and leave less things to do in init() as possible.
Do you have any suggestion for this?

>
>> But for module_platform_driver(mt8173_cpufreq_platdrv), I saw many
>> built-in drivers in kernel call module_platform_driver() also so I use it.
>
> That was wrong for them as well :)
>
>> Would it be better to use platform_driver_register() instead?
>
> Hmm..

Will fix it.

Thanks.
Pi-Cheng
>
> --
> viresh
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 24, 2015, 8:56 a.m. | #6
On 24-06-15, 16:44, Pi-Cheng Chen wrote:
> One reason to put those initialization and resource allocation in probe is
> that it's easier to handle the return value -PROBE_DEFER from clock
> and regulator framework when trying to get clocks and regulators
> consumed by cpufreq driver.

This is the sequence of events if you move these things to ->init().

- your driver's probe()
  -> cpufreq_register_driver()
    -> init()
      -> clk/reg get, failed, return -EPROBE_DEFER

And the failure here will be propagated to probe(). So, it should
work IMHO.

> The other reason is when the whole system is resuming from suspend,
> the other subsystem e.g. i2c on which cpufreq driver depends might not
> ready yet during cpufreq driver initialization. In this case, the cpufreq
> driver will be blocked when trying to get resources e.g. regulator on i2c
> bus, and the whole system will stuck for seconds.

That's something else you must investigate on. This dependency should
be resolved in some other way, I thought DT might have taken care of
such dependencies.
pi-cheng.chen June 24, 2015, 9:09 a.m. | #7
On Wed, Jun 24, 2015 at 4:56 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 24-06-15, 16:44, Pi-Cheng Chen wrote:
>> One reason to put those initialization and resource allocation in probe is
>> that it's easier to handle the return value -PROBE_DEFER from clock
>> and regulator framework when trying to get clocks and regulators
>> consumed by cpufreq driver.
>
> This is the sequence of events if you move these things to ->init().
>
> - your driver's probe()
>   -> cpufreq_register_driver()
>     -> init()
>       -> clk/reg get, failed, return -EPROBE_DEFER
>
> And the failure here will be propagated to probe(). So, it should
> work IMHO.

I will try it.

>
>> The other reason is when the whole system is resuming from suspend,
>> the other subsystem e.g. i2c on which cpufreq driver depends might not
>> ready yet during cpufreq driver initialization. In this case, the cpufreq
>> driver will be blocked when trying to get resources e.g. regulator on i2c
>> bus, and the whole system will stuck for seconds.
>
> That's something else you must investigate on. This dependency should
> be resolved in some other way, I thought DT might have taken care of
> such dependencies.

I will check if it could be solved in some other way.
Thanks.

Pi-Cheng

>
> --
> viresh
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 4f3dbc8..350752b 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -141,6 +141,13 @@  config ARM_KIRKWOOD_CPUFREQ
 	  This adds the CPUFreq driver for Marvell Kirkwood
 	  SoCs.
 
+config ARM_MT8173_CPUFREQ
+	bool "Mediatek MT8173 CPUFreq support"
+	depends on ARCH_MEDIATEK && REGULATOR
+	select PM_OPP
+	help
+	  This adds the CPUFreq driver support for Mediatek MT8173 SoC.
+
 config ARM_OMAP2PLUS_CPUFREQ
 	bool "TI OMAP2+"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index cdce92a..97f9a9b 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -63,6 +63,7 @@  obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ)	+= hisi-acpu-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
 obj-$(CONFIG_ARM_INTEGRATOR)		+= integrator-cpufreq.o
 obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ)	+= kirkwood-cpufreq.o
+obj-$(CONFIG_ARM_MT8173_CPUFREQ)	+= mt8173-cpufreq.o
 obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ)	+= omap-cpufreq.o
 obj-$(CONFIG_ARM_PXA2xx_CPUFREQ)	+= pxa2xx-cpufreq.o
 obj-$(CONFIG_PXA3xx)			+= pxa3xx-cpufreq.o
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
new file mode 100644
index 0000000..d539e7b
--- /dev/null
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -0,0 +1,550 @@ 
+/*
+ * Copyright (c) 2015 Linaro Ltd.
+ * Author: Pi-Cheng Chen <pi-cheng.chen@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#define MIN_VOLT_SHIFT		(100000)
+#define MAX_VOLT_SHIFT		(200000)
+#define MAX_VOLT_LIMIT		(1150000)
+#define VOLT_TOL		(10000)
+
+/*
+ * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS
+ * on each CPU power/clock domain of Mediatek SoCs. Each CPU cluster in
+ * Mediatek SoCs has two voltage inputs, Vproc and Vsram. In some cases the two
+ * voltage inputs need to be controlled under a hardware limitation:
+ * 100mV < Vsram - Vproc < 200mV
+ *
+ * When scaling the clock frequency of a CPU clock domain, the clock source
+ * needs to be switched to another stable PLL clock temporarily until
+ * the original PLL becomes stable at target frequency.
+ */
+struct mtk_cpu_dvfs_info {
+	struct list_head node;
+	cpumask_var_t cpus;
+	struct cpufreq_frequency_table *freq_table;
+	struct device *cpu_dev;
+	struct regulator *proc_reg;
+	struct regulator *sram_reg;
+	struct clk *cpu_clk;
+	struct clk *inter_clk;
+	int intermediate_voltage;
+	bool need_voltage_trace;
+};
+
+static LIST_HEAD(cpu_dvfs_info_list);
+
+static inline struct mtk_cpu_dvfs_info *to_mtk_cpu_dvfs_info(
+			struct list_head *list)
+{
+	return list_entry(list, struct mtk_cpu_dvfs_info, node);
+}
+
+static inline void mtk_cpu_dvfs_info_add(struct mtk_cpu_dvfs_info *info)
+{
+	list_add(&info->node, &cpu_dvfs_info_list);
+}
+
+static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_get(int cpu)
+{
+	struct mtk_cpu_dvfs_info *info;
+	struct list_head *list;
+
+	list_for_each(list, &cpu_dvfs_info_list) {
+		info = to_mtk_cpu_dvfs_info(list);
+
+		if (cpumask_test_cpu(cpu, info->cpus))
+			return info;
+	}
+
+	return NULL;
+}
+
+static void mtk_cpu_dvfs_info_release(void)
+{
+	struct list_head *list, *tmp;
+	struct mtk_cpu_dvfs_info *info;
+
+	list_for_each_safe(list, tmp, &cpu_dvfs_info_list) {
+		info = to_mtk_cpu_dvfs_info(list);
+
+		dev_pm_opp_free_cpufreq_table(info->cpu_dev,
+					      &info->freq_table);
+
+		if (!IS_ERR(info->proc_reg))
+			regulator_put(info->proc_reg);
+		if (!IS_ERR(info->sram_reg))
+			regulator_put(info->sram_reg);
+		if (!IS_ERR(info->cpu_clk))
+			clk_put(info->cpu_clk);
+		if (!IS_ERR(info->inter_clk))
+			clk_put(info->inter_clk);
+
+		of_free_opp_table(info->cpu_dev);
+
+		list_del(list);
+		kfree(info);
+	}
+}
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static int mtk_cpufreq_voltage_trace(struct mtk_cpu_dvfs_info *info,
+				     int new_vproc)
+{
+	struct regulator *proc_reg = info->proc_reg;
+	struct regulator *sram_reg = info->sram_reg;
+	int old_vproc, old_vsram, new_vsram, vsram, vproc, ret;
+
+	old_vproc = regulator_get_voltage(proc_reg);
+	old_vsram = regulator_get_voltage(sram_reg);
+	/* Vsram should not exceed the maximum allowed voltage of SoC. */
+	new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT);
+
+	if (old_vproc < new_vproc) {
+		/*
+		 * When scaling up voltages, Vsram and Vproc scale up step
+		 * by step. At each step, set Vsram to (Vproc + 200mV) first,
+		 * then set Vproc to (Vsram - 100mV).
+		 * Keep doing it until Vsram and Vproc hit target voltages.
+		 */
+		do {
+			old_vsram = regulator_get_voltage(sram_reg);
+			old_vproc = regulator_get_voltage(proc_reg);
+
+			vsram = MIN(new_vsram, old_vproc + MAX_VOLT_SHIFT);
+
+			if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) {
+				vsram = MAX_VOLT_LIMIT;
+
+				/*
+				 * If the target Vsram hits the maximum voltage,
+				 * try to set the exact voltage value first.
+				 */
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram);
+				if (ret)
+					ret = regulator_set_voltage(sram_reg,
+							vsram - VOLT_TOL,
+							vsram);
+
+				vproc = new_vproc;
+			} else {
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram + VOLT_TOL);
+
+				vproc = vsram - MIN_VOLT_SHIFT;
+			}
+			if (ret)
+				return ret;
+
+			ret = regulator_set_voltage(proc_reg, vproc,
+						    vproc + VOLT_TOL);
+			if (ret) {
+				regulator_set_voltage(sram_reg, old_vsram,
+						      old_vsram);
+				return ret;
+			}
+		} while (vproc < new_vproc || vsram < new_vsram);
+	} else if (old_vproc > new_vproc) {
+		/*
+		 * When scaling down voltages, Vsram and Vproc scale down step
+		 * by step. At each step, set Vproc to (Vsram - 200mV) first,
+		 * then set Vproc to (Vproc + 100mV).
+		 * Keep doing it until Vsram and Vproc hit target voltages.
+		 */
+		do {
+			old_vproc = regulator_get_voltage(proc_reg);
+			old_vsram = regulator_get_voltage(sram_reg);
+
+			vproc = MAX(new_vproc, old_vsram - MAX_VOLT_SHIFT);
+			ret = regulator_set_voltage(proc_reg, vproc,
+						    vproc + VOLT_TOL);
+			if (ret)
+				return ret;
+
+			if (vproc == new_vproc)
+				vsram = new_vsram;
+			else
+				vsram = MAX(new_vsram, vproc + MIN_VOLT_SHIFT);
+
+			if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) {
+				vsram = MAX_VOLT_LIMIT;
+
+				/*
+				 * If the target Vsram hits the maximum voltage,
+				 * try to set the exact voltage value first.
+				 */
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram);
+				if (ret)
+					ret = regulator_set_voltage(sram_reg,
+							vsram - VOLT_TOL,
+							vsram);
+			} else {
+				ret = regulator_set_voltage(sram_reg, vsram,
+							    vsram + VOLT_TOL);
+			}
+
+			if (ret) {
+				regulator_set_voltage(proc_reg, old_vproc,
+						      old_vproc);
+				return ret;
+			}
+		} while (vproc > new_vproc + VOLT_TOL ||
+			 vsram > new_vsram + VOLT_TOL);
+	}
+
+	return 0;
+}
+
+static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc)
+{
+	if (info->need_voltage_trace)
+		return mtk_cpufreq_voltage_trace(info, vproc);
+	else
+		return regulator_set_voltage(info->proc_reg, vproc,
+					     vproc + VOLT_TOL);
+}
+
+static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
+				  unsigned int index)
+{
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
+	struct clk *cpu_clk = policy->clk;
+	struct clk *armpll = clk_get_parent(cpu_clk);
+	struct mtk_cpu_dvfs_info *info = policy->driver_data;
+	struct device *cpu_dev = info->cpu_dev;
+	struct dev_pm_opp *opp;
+	long freq_hz, old_freq_hz;
+	int vproc, old_vproc, inter_vproc, target_vproc, ret;
+
+	inter_vproc = info->intermediate_voltage;
+
+	old_freq_hz = clk_get_rate(cpu_clk);
+	old_vproc = regulator_get_voltage(info->proc_reg);
+
+	freq_hz = freq_table[index].frequency * 1000;
+	rcu_read_lock();
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		pr_err("cpu%d: failed to find OPP for %ld\n",
+		       policy->cpu, freq_hz);
+		return PTR_ERR(opp);
+	}
+	vproc = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	/*
+	 * If the new voltage or the intermediate voltage is higher than the
+	 * current voltage, scale up voltage first.
+	 */
+	target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc;
+	if (old_vproc < target_vproc) {
+		ret = mtk_cpufreq_set_voltage(info, target_vproc);
+		if (ret) {
+			pr_err("cpu%d: failed to scale up voltage!\n",
+			       policy->cpu);
+			mtk_cpufreq_set_voltage(info, old_vproc);
+			return ret;
+		}
+	}
+
+	/* Reparent the CPU clock to intermediate clock. */
+	ret = clk_set_parent(cpu_clk, info->inter_clk);
+	if (ret) {
+		pr_err("cpu%d: failed to re-parent cpu clock!\n",
+		       policy->cpu);
+		mtk_cpufreq_set_voltage(info, old_vproc);
+		WARN_ON(1);
+		return ret;
+	}
+
+	/* Set the original PLL to target rate. */
+	ret = clk_set_rate(armpll, freq_hz);
+	if (ret) {
+		pr_err("cpu%d: failed to scale cpu clock rate!\n",
+		       policy->cpu);
+		clk_set_parent(cpu_clk, armpll);
+		mtk_cpufreq_set_voltage(info, old_vproc);
+		return ret;
+	}
+
+	/* Set parent of CPU clock back to the original PLL. */
+	ret = clk_set_parent(cpu_clk, armpll);
+	if (ret) {
+		pr_err("cpu%d: failed to re-parent cpu clock!\n",
+		       policy->cpu);
+		mtk_cpufreq_set_voltage(info, inter_vproc);
+		WARN_ON(1);
+		return ret;
+	}
+
+	/*
+	 * If the new voltage is lower than the intermediate voltage or the
+	 * original voltage, scale down to the new voltage.
+	 */
+	if (vproc < inter_vproc || vproc < old_vproc) {
+		ret = mtk_cpufreq_set_voltage(info, vproc);
+		if (ret) {
+			pr_err("cpu%d: failed to scale down voltage!\n",
+			       policy->cpu);
+			clk_set_parent(cpu_clk, info->inter_clk);
+			clk_set_rate(armpll, old_freq_hz);
+			clk_set_parent(cpu_clk, armpll);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int mtk_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct mtk_cpu_dvfs_info *info;
+	int ret;
+
+	info = mtk_cpu_dvfs_info_get(policy->cpu);
+	if (!info) {
+		pr_err("%s: mtk cpu dvfs info for cpu%d is not initialized\n",
+		       __func__, policy->cpu);
+		return -ENODEV;
+	}
+
+	ret = cpufreq_table_validate_and_show(policy, info->freq_table);
+	if (ret) {
+		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
+		return ret;
+	}
+
+	cpumask_copy(policy->cpus, info->cpus);
+	policy->driver_data = info;
+	policy->clk = info->cpu_clk;
+
+	return 0;
+}
+
+static struct cpufreq_driver mt8173_cpufreq_driver = {
+	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = mtk_cpufreq_set_target,
+	.get = cpufreq_generic_get,
+	.init = mtk_cpufreq_init,
+	.name = "mtk-cpufreq",
+	.attr = cpufreq_generic_attr,
+};
+
+static int mtk_cpu_dvfs_info_init(int cpu)
+{
+	struct device *cpu_dev;
+	struct regulator *proc_reg = ERR_PTR(-ENODEV);
+	struct regulator *sram_reg = ERR_PTR(-ENODEV);
+	struct clk *cpu_clk = ERR_PTR(-ENODEV);
+	struct clk *inter_clk = ERR_PTR(-ENODEV);
+	struct mtk_cpu_dvfs_info *info;
+	struct cpufreq_frequency_table *freq_table;
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+	int ret;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev) {
+		pr_err("failed to get cpu%d device\n", cpu);
+		return -ENODEV;
+	}
+
+	ret = of_init_opp_table(cpu_dev);
+	if (ret) {
+		pr_warn("no OPP table for cpu%d\n", cpu);
+		return ret;
+	}
+
+	cpu_clk = clk_get(cpu_dev, "cpu");
+	if (IS_ERR(cpu_clk)) {
+		if (PTR_ERR(cpu_clk) == -EPROBE_DEFER)
+			pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu);
+		else
+			pr_err("failed to get cpu clk for cpu%d\n", cpu);
+
+		ret = PTR_ERR(cpu_clk);
+		goto out_free_opp_table;
+	}
+
+	inter_clk = clk_get(cpu_dev, "intermediate");
+	if (IS_ERR(inter_clk)) {
+		if (PTR_ERR(inter_clk) == -EPROBE_DEFER)
+			pr_warn("intermediate clk for cpu%d not ready, retry.\n",
+				cpu);
+		else
+			pr_err("failed to get intermediate clk for cpu%d\n",
+			       cpu);
+
+		ret = PTR_ERR(cpu_clk);
+		goto out_free_resources;
+	}
+
+	proc_reg = regulator_get_exclusive(cpu_dev, "proc");
+	if (IS_ERR(proc_reg)) {
+		if (PTR_ERR(proc_reg) == -EPROBE_DEFER)
+			pr_warn("proc regulator for cpu%d not ready, retry.\n",
+				cpu);
+		else
+			pr_err("failed to get proc regulator for cpu%d\n",
+			       cpu);
+
+		ret = PTR_ERR(proc_reg);
+		goto out_free_resources;
+	}
+
+	/* Both presence and absence of sram regulator are valid cases. */
+	sram_reg = regulator_get_exclusive(cpu_dev, "sram");
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto out_free_resources;
+	}
+
+	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
+	if (ret) {
+		pr_err("failed to init cpufreq table for cpu%d: %d\n",
+		       cpu, ret);
+		goto out_free_mtk_cpu_dvfs_info;
+	}
+
+	if (!alloc_cpumask_var(&info->cpus, GFP_KERNEL))
+		goto out_free_cpufreq_table;
+
+	/* Search a safe voltage for intermediate frequency. */
+	rate = clk_get_rate(inter_clk);
+	rcu_read_lock();
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
+	if (IS_ERR(opp)) {
+		pr_err("failed to get intermediate opp for cpu%d\n", cpu);
+		ret = PTR_ERR(opp);
+		goto out_free_cpumask;
+	}
+	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	/* CPUs in the same cluster share a clock and power domain. */
+	cpumask_copy(info->cpus, &cpu_topology[cpu].core_sibling);
+
+	info->cpu_dev = cpu_dev;
+	info->proc_reg = proc_reg;
+	info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg;
+	info->cpu_clk = cpu_clk;
+	info->inter_clk = inter_clk;
+	info->freq_table = freq_table;
+
+	/*
+	 * If SRAM regulator is present, software "voltage trace" is needed
+	 * for this CPU power domain.
+	 */
+	info->need_voltage_trace = !IS_ERR(sram_reg);
+
+	mtk_cpu_dvfs_info_add(info);
+
+	return 0;
+
+out_free_cpumask:
+	free_cpumask_var(info->cpus);
+
+out_free_cpufreq_table:
+	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
+
+out_free_mtk_cpu_dvfs_info:
+	kfree(info);
+
+out_free_resources:
+	if (!IS_ERR(proc_reg))
+		regulator_put(proc_reg);
+	if (!IS_ERR(sram_reg))
+		regulator_put(sram_reg);
+	if (!IS_ERR(cpu_clk))
+		clk_put(cpu_clk);
+	if (!IS_ERR(inter_clk))
+		clk_put(inter_clk);
+
+out_free_opp_table:
+	of_free_opp_table(cpu_dev);
+
+	return ret;
+}
+
+static int mt8173_cpufreq_probe(struct platform_device *pdev)
+{
+	int cpu, ret;
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * If the struct mtk_cpu_dvfs_info for the cpu power domain
+		 * is already initialized, skip this CPU.
+		 */
+		if (!mtk_cpu_dvfs_info_get(cpu)) {
+			ret = mtk_cpu_dvfs_info_init(cpu);
+			if (ret) {
+				if (ret != -EPROBE_DEFER)
+					pr_err("%s probe fail\n", __func__);
+
+				mtk_cpu_dvfs_info_release();
+				return ret;
+			}
+		}
+	}
+
+	ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
+	if (ret) {
+		pr_err("failed to register mtk cpufreq driver\n");
+		mtk_cpu_dvfs_info_release();
+	}
+
+	return ret;
+}
+
+static struct platform_driver mt8173_cpufreq_platdrv = {
+	.driver = {
+		.name	= "mt8173-cpufreq",
+	},
+	.probe		= mt8173_cpufreq_probe,
+};
+module_platform_driver(mt8173_cpufreq_platdrv);
+
+static int mt8173_cpufreq_driver_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!of_machine_is_compatible("mediatek,mt8173"))
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		pr_err("failed to register mtk-cpufreq platform device\n");
+		return PTR_ERR(pdev);
+	}
+
+	return 0;
+}
+module_init(mt8173_cpufreq_driver_init);