diff mbox series

[v4,2/2] PM / devfreq: Add HiSilicon uncore frequency scaling driver

Message ID 20250530081722.280776-3-zhanjie9@hisilicon.com
State New
Headers show
Series PM / devfreq: Add HiSilicon uncore frequency scaling driver | expand

Commit Message

Jie Zhan May 30, 2025, 8:17 a.m. UTC
Add the HiSilicon uncore frequency scaling driver for Kunpeng SoCs based on
the devfreq framework.  The uncore domain contains shared computing
resources, including system interconnects and L3 cache.  The uncore
frequency significantly impacts the system-wide performance as well as
power consumption.  This driver adds support for runtime management of
uncore frequency from kernel and userspace.  The main function includes
setting and getting frequencies, changing frequency scaling policies, and
querying the list of CPUs whose performance is significantly related to
this uncore frequency domain, etc.  The driver communicates with a platform
controller through an ACPI PCC mailbox to take the actual actions of
frequency scaling.

Co-developed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
---
 Documentation/ABI/testing/sysfs-class-devfreq |   9 +
 drivers/devfreq/Kconfig                       |  11 +
 drivers/devfreq/Makefile                      |   1 +
 drivers/devfreq/hisi_uncore_freq.c            | 656 ++++++++++++++++++
 4 files changed, 677 insertions(+)
 create mode 100644 drivers/devfreq/hisi_uncore_freq.c

Comments

Jonathan Cameron June 16, 2025, 9:35 a.m. UTC | #1
On Fri, 30 May 2025 16:17:22 +0800
Jie Zhan <zhanjie9@hisilicon.com> wrote:

> Add the HiSilicon uncore frequency scaling driver for Kunpeng SoCs based on
> the devfreq framework.  The uncore domain contains shared computing
> resources, including system interconnects and L3 cache.  The uncore
> frequency significantly impacts the system-wide performance as well as
> power consumption.  This driver adds support for runtime management of
> uncore frequency from kernel and userspace.  The main function includes
> setting and getting frequencies, changing frequency scaling policies, and
> querying the list of CPUs whose performance is significantly related to
> this uncore frequency domain, etc.  The driver communicates with a platform
> controller through an ACPI PCC mailbox to take the actual actions of
> frequency scaling.
> 
> Co-developed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
> Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
> Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Hi Zhanjie,

A few comments inline.  In general nice and clean.

I think only one that really needs a change it the one around the
CPU association firmware handling.

Jonathan


> ---
>  Documentation/ABI/testing/sysfs-class-devfreq |   9 +
>  drivers/devfreq/Kconfig                       |  11 +
>  drivers/devfreq/Makefile                      |   1 +
>  drivers/devfreq/hisi_uncore_freq.c            | 656 ++++++++++++++++++
>  4 files changed, 677 insertions(+)
>  create mode 100644 drivers/devfreq/hisi_uncore_freq.c
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
> index 1e7e0bb4c14e..fed95bb0bb6d 100644
> --- a/Documentation/ABI/testing/sysfs-class-devfreq
> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
> @@ -132,3 +132,12 @@ Description:
>  
>  		A list of governors that support the node:
>  		- simple_ondemand
> +
> +What:		/sys/class/devfreq/.../related_cpus
> +Date:		June 2025
> +Contact:	Linux power management list <linux-pm@vger.kernel.org>
> +Description:	The list of CPUs whose performance is closely related to the
> +		frequency of this devfreq domain.
> +
> +		This file is only present if the hisi_uncore_freq driver is in
> +		use.

Unless this last statement was requested by another reviewer, I'd change it for
something more generic to allow it to be used by other drivers.  Something like

		This file is only present if a specific device has a close association
		with a subset of the CPUs.


> diff --git a/drivers/devfreq/hisi_uncore_freq.c b/drivers/devfreq/hisi_uncore_freq.c
> new file mode 100644
> index 000000000000..7e1b7f48d0f4
> --- /dev/null
> +++ b/drivers/devfreq/hisi_uncore_freq.c


> +enum hisi_uncore_freq_mode {
> +	HUCF_MODE_PLATFORM = 0,
> +	HUCF_MODE_OS,
> +	HUCF_MODE_MAX,

I assume these max entries are terminators?  I.e. nothing should ever
come after them?  If so you could drop the commas to make that explicit.

> +};
> +
> +#define HUCF_CAP_PLATFORM_CTRL	BIT(0)

> +static int hisi_uncore_cmd_send(struct hisi_uncore_freq *uncore,
> +				u8 cmd, u32 *data)
> +{
> +	struct hisi_uncore_pcc_shmem __iomem *addr;
> +	struct hisi_uncore_pcc_shmem shmem;
> +	struct pcc_mbox_chan *pchan;
> +	unsigned int mrtt;
> +	s64 time_delta;
> +	u16 status;
> +	int rc;
> +
> +	guard(mutex)(&uncore->pcc_lock);
> +
> +	pchan = uncore->pchan;
> +	if (!pchan)
> +		return -ENODEV;
> +
> +	addr = (struct hisi_uncore_pcc_shmem __iomem *)pchan->shmem;
> +	if (!addr)
> +		return -EINVAL;
> +
> +	/* Handle the Minimum Request Turnaround Time (MRTT) */
> +	mrtt = pchan->min_turnaround_time;
> +	time_delta = ktime_us_delta(ktime_get(),
> +				    uncore->last_cmd_cmpl_time);

Fits on one line under 80 chars.

> +	if (mrtt > time_delta)
> +		udelay(mrtt - time_delta);


> +
> +static int hisi_uncore_init_opp(struct hisi_uncore_freq *uncore)
> +{
> +	struct device *dev = uncore->dev;
> +	u32 data = 0, num, index;

Trivial personal preference. Don't mix assignment and no
assignment declarations.  It's every so slightly harder to read.

> +	unsigned long freq_mhz;
> +	int rc;
> +
> +	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_PLAT_FREQ_NUM,
> +				  &data);
> +	if (rc)
> +		return dev_err_probe(dev, rc, "Failed to get plat freq num\n");
> +
> +	num = data;
> +
> +	for (index = 0; index < num; index++) {
> +		data = index;
> +		rc = hisi_uncore_cmd_send(uncore,
> +					  HUCF_PCC_CMD_GET_PLAT_FREQ_BY_IDX,
> +					  &data);
> +		if (rc) {
> +			dev_pm_opp_remove_all_dynamic(dev);
> +			return dev_err_probe(dev, rc,
> +				"Failed to get plat freq at index %u\n", index);
> +		}
> +		freq_mhz = data;
> +
> +		/* Don't care OPP votlage, take 1V as default */
voltage

Spell check in case I missed others.  A W=1 build tends to catch the simple ones
like this.

> +		rc = dev_pm_opp_add(dev, freq_mhz * HZ_PER_MHZ, 1000000);
> +		if (rc) {
> +			dev_pm_opp_remove_all_dynamic(dev);
> +			return dev_err_probe(dev, rc,
> +				"Add OPP %lu failed\n", freq_mhz);
> +		}
> +	}
> +
> +	return devm_add_action_or_reset(dev, devm_hisi_uncore_remove_opp, uncore);
Hmm. I'm normally a fan of registering these after the calls, but this is one
of the rare cases where pushing it before (with a comment) cleans up the code.

If you do that, then all the error cases in the loop just need to return and not
call the cleanup manually.

> +}

> +
> +static int hisi_platform_gov_handler(struct devfreq *df, unsigned int event,
> +				     void *val)
> +{
> +	struct hisi_uncore_freq *uncore = dev_get_drvdata(df->dev.parent);
> +	int rc = 0;
> +	u32 data;
> +
> +	if (WARN_ON(!uncore || !uncore->pchan))
> +		return -ENODEV;
> +
> +	switch (event) {
> +	case DEVFREQ_GOV_START:
> +		data = HUCF_MODE_PLATFORM;
> +		rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data);
> +		break;
> +	case DEVFREQ_GOV_STOP:
> +		data = HUCF_MODE_OS;
> +		rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (rc)
> +		dev_err(uncore->dev, "Failed to set operate mode (%d)\n", rc);

Trivial: I'd push this up into the two case statements (with early returns) as then
you can also report what mode you were trying to set in the string.

> +
> +	return rc;
> +}

> +
> +static int hisi_uncore_add_platform_gov(struct hisi_uncore_freq *uncore)
> +{
> +	int rc = 0;

Initialized in only path where it's used.  Maybe push this
declaration down to if (hisi_platform_gov_usage() block 

> +
> +	if (!(uncore->cap & HUCF_CAP_PLATFORM_CTRL))
> +		return 0;
> +
> +	guard(mutex)(&hisi_platform_gov_usage_lock);
> +
> +	if (hisi_platform_gov_usage == 0) {
> +		rc = devfreq_add_governor(&hisi_platform_governor);

		int rc = devfreq....

> +		if (rc)
> +			return rc;
> +	}
> +	hisi_platform_gov_usage++;
> +
> +	return devm_add_action_or_reset(uncore->dev,
> +					devm_hisi_uncore_remove_platform_gov,
> +					uncore);
> +}
> +
> +static int hisi_uncore_mark_related_cpus(struct hisi_uncore_freq *uncore,
> +				 char *property, int (*get_topo_id)(int cpu),
> +				 const struct cpumask *(*get_cpumask)(int cpu))
> +{
> +	unsigned int i, cpu;
> +	size_t len;
> +	int rc;
> +
> +	rc = device_property_count_u32(uncore->dev, property);
> +	if (rc < 0)
> +		return rc;
Most of the errors here don't reflect it not being found and are things
were we should probably fail the driver probe (so someone can fix whatever
is wrong with the firmware.)  I think only -EINVAL means not here
(technically arguments are not valid)

> +	if (rc == 0)
> +		return -EINVAL;
> +
> +	len = rc;
> +	u32 *num __free(kfree) = kcalloc(len, sizeof(*num), GFP_KERNEL);
> +	if (!num)
> +		return -ENOMEM;

On this failure path, falling over the other route below doesn't make much sense.

> +
> +	rc = device_property_read_u32_array(uncore->dev, property, num, len);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < len; i++) {
> +		for_each_possible_cpu(cpu) {
> +			if (get_topo_id(cpu) == num[i]) {

You could flip to reduce indent.  Marginal though so up to you and definitely
not worth another spin.

			if (get_topo_id(cpu) != num[i])
				continue;

			cpumask_or(&uncore->related_cpus, &uncore->related_cpus,
				   get_cpumask(cpu));
			break;

> +				cpumask_or(&uncore->related_cpus,
> +					   &uncore->related_cpus,
> +					   get_cpumask(cpu));
> +				break;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int get_package_id(int cpu)
> +{
> +	return topology_physical_package_id(cpu);
> +}
> +
> +static const struct cpumask *get_package_cpumask(int cpu)
> +{
> +	return topology_core_cpumask(cpu);
> +}
> +
> +static int get_cluster_id(int cpu)
> +{
> +	return topology_cluster_id(cpu);
> +}
> +
> +static const struct cpumask *get_cluster_cpumask(int cpu)
> +{
> +	return topology_cluster_cpumask(cpu);
> +}
> +
> +static int hisi_uncore_mark_related_cpus_wrap(struct hisi_uncore_freq *uncore)
> +{
> +	int rc;
> +
> +	cpumask_clear(&uncore->related_cpus);
> +
> +	rc = hisi_uncore_mark_related_cpus(uncore, "related-package",
> +					   get_package_id,
> +					   get_package_cpumask);
> +	if (rc == 0)
> +		return rc;

return 0;  might make it a tiny bit more explicit that this is a good path.
Maybe a comment in here on why that is a good path.
I think this is trying one then the other and only one is expected to be
present?  Perhaps be a little more paranoid here and check what was seen was
-EINVAL.

	if (rc != -EINVAL)
		return rc; /* May be 0 or another error code */

Perhaps with an ACPI snippet in the patch description to illustrate what
is going on here.


> +
> +	return hisi_uncore_mark_related_cpus(uncore, "related-cluster",
> +					     get_cluster_id,
> +					     get_cluster_cpumask);
> +}

> +static int hisi_uncore_devfreq_register(struct hisi_uncore_freq *uncore)
> +{
> +	struct devfreq_dev_profile *profile;
> +	struct device *dev = uncore->dev;
> +	unsigned long freq;
> +	u32 data;
> +	int rc;
> +
> +	rc = hisi_uncore_get_cur_freq(dev, &freq);
One for another day:
Whilst we do indeed need to do this, it seems like a small optimization to
devfreq would be to check for initial_freq == 0 and if it is try get_cur_freq()
after registration. Mind you I checked and this only seems to apply to
the imx drivers and this one.

> +	if (rc)
> +		return dev_err_probe(dev, rc, "Failed to get plat init freq\n");
> +
> +	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
> +	if (!profile)
> +		return -ENOMEM;
> +
> +	*profile = (struct devfreq_dev_profile) {
> +		.initial_freq = freq,
> +		.polling_ms = HUCF_DEFAULT_POLLING_MS,
> +		.timer = DEVFREQ_TIMER_DELAYED,
> +		.target = hisi_uncore_target,
> +		.get_dev_status = hisi_uncore_get_dev_status,
> +		.get_cur_freq = hisi_uncore_get_cur_freq,
> +		.dev_groups = hisi_uncore_freq_groups,
> +	};
> +
> +	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_MODE, &data);
> +	if (rc)
> +		return dev_err_probe(dev, rc, "Failed to get operate mode\n");
> +
> +	if (data == HUCF_MODE_PLATFORM)
> +		uncore->devfreq = devm_devfreq_add_device(dev, profile,
> +					  hisi_platform_governor.name, NULL);
> +	else
> +		uncore->devfreq = devm_devfreq_add_device(dev, profile,
> +					  DEVFREQ_GOV_PERFORMANCE, NULL);
> +	if (IS_ERR(uncore->devfreq))
> +		return dev_err_probe(dev, PTR_ERR(uncore->devfreq),
> +			"Failed to add devfreq device\n");
> +
> +	return 0;
> +}
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 1e7e0bb4c14e..fed95bb0bb6d 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -132,3 +132,12 @@  Description:
 
 		A list of governors that support the node:
 		- simple_ondemand
+
+What:		/sys/class/devfreq/.../related_cpus
+Date:		June 2025
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:	The list of CPUs whose performance is closely related to the
+		frequency of this devfreq domain.
+
+		This file is only present if the hisi_uncore_freq driver is in
+		use.
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 3c4862a752b5..c999c4a1e567 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -90,6 +90,17 @@  config ARM_EXYNOS_BUS_DEVFREQ
 	  and adjusts the operating frequencies and voltages with OPP support.
 	  This does not yet operate with optimal voltages.
 
+config ARM_HISI_UNCORE_DEVFREQ
+	tristate "HiSilicon uncore DEVFREQ Driver"
+	depends on ACPI && ACPI_PPTT && PCC
+	select DEVFREQ_GOV_PERFORMANCE
+	select DEVFREQ_GOV_USERSPACE
+	help
+	  This adds a DEVFREQ driver that manages uncore frequency scaling for
+	  HiSilicon Kunpeng SoCs. This enables runtime management of uncore
+	  frequency scaling from kernel and userspace. The uncore domain
+	  contains system interconnects and L3 cache.
+
 config ARM_IMX_BUS_DEVFREQ
 	tristate "i.MX Generic Bus DEVFREQ Driver"
 	depends on ARCH_MXC || COMPILE_TEST
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index bf40d04928d0..404179d79a9d 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -9,6 +9,7 @@  obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
+obj-$(CONFIG_ARM_HISI_UNCORE_DEVFREQ)	+= hisi_uncore_freq.o
 obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ)	+= imx-bus.o
 obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ)	+= imx8m-ddrc.o
 obj-$(CONFIG_ARM_MEDIATEK_CCI_DEVFREQ)	+= mtk-cci-devfreq.o
diff --git a/drivers/devfreq/hisi_uncore_freq.c b/drivers/devfreq/hisi_uncore_freq.c
new file mode 100644
index 000000000000..7e1b7f48d0f4
--- /dev/null
+++ b/drivers/devfreq/hisi_uncore_freq.c
@@ -0,0 +1,656 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon uncore frequency scaling driver
+ *
+ * Copyright (c) 2025 HiSilicon Co., Ltd
+ */
+
+#include <linux/acpi.h>
+#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/devfreq.h>
+#include <linux/device.h>
+#include <linux/dev_printk.h>
+#include <linux/errno.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/property.h>
+#include <linux/topology.h>
+#include <linux/units.h>
+#include <acpi/pcc.h>
+
+#include "governor.h"
+
+struct hisi_uncore_pcc_data {
+	u16 status;
+	u16 resv;
+	u32 data;
+};
+
+struct hisi_uncore_pcc_shmem {
+	struct acpi_pcct_shared_memory head;
+	struct hisi_uncore_pcc_data pcc_data;
+};
+
+enum hisi_uncore_pcc_cmd_type {
+	HUCF_PCC_CMD_GET_CAP = 0,
+	HUCF_PCC_CMD_GET_FREQ,
+	HUCF_PCC_CMD_SET_FREQ,
+	HUCF_PCC_CMD_GET_MODE,
+	HUCF_PCC_CMD_SET_MODE,
+	HUCF_PCC_CMD_GET_PLAT_FREQ_NUM,
+	HUCF_PCC_CMD_GET_PLAT_FREQ_BY_IDX,
+	HUCF_PCC_CMD_MAX = 256,
+};
+
+static int hisi_platform_gov_usage;
+static DEFINE_MUTEX(hisi_platform_gov_usage_lock);
+
+enum hisi_uncore_freq_mode {
+	HUCF_MODE_PLATFORM = 0,
+	HUCF_MODE_OS,
+	HUCF_MODE_MAX,
+};
+
+#define HUCF_CAP_PLATFORM_CTRL	BIT(0)
+
+/**
+ * struct hisi_uncore_freq - hisi uncore frequency scaling device data
+ * @dev:		device of this frequency scaling driver
+ * @cl:			mailbox client object
+ * @pchan:		PCC mailbox channel
+ * @chan_id:		PCC channel ID
+ * @last_cmd_cmpl_time:	timestamp of the last completed PCC command
+ * @pcc_lock:		PCC channel lock
+ * @devfreq:		devfreq data of this hisi_uncore_freq device
+ * @related_cpus:	CPUs whose performance is majorly affected by this
+ *			uncore frequency domain
+ * @cap:		capabililty flag
+ */
+struct hisi_uncore_freq {
+	struct device *dev;
+	struct mbox_client cl;
+	struct pcc_mbox_chan *pchan;
+	int chan_id;
+	ktime_t last_cmd_cmpl_time;
+	struct mutex pcc_lock;
+	struct devfreq *devfreq;
+	struct cpumask related_cpus;
+	u32 cap;
+};
+
+/* PCC channel timeout = PCC nominal latency * NUM */
+#define HUCF_PCC_POLL_TIMEOUT_NUM	1000
+#define HUCF_PCC_POLL_INTERVAL_US	5
+
+/* Default polling interval in ms for devfreq governors*/
+#define HUCF_DEFAULT_POLLING_MS 100
+
+static int hisi_uncore_request_pcc_chan(struct hisi_uncore_freq *uncore)
+{
+	struct device *dev = uncore->dev;
+	struct pcc_mbox_chan *pcc_chan;
+	int rc;
+
+	uncore->cl = (struct mbox_client) {
+		.dev = dev,
+		.tx_block = false,
+		.knows_txdone = true,
+	};
+
+	pcc_chan = pcc_mbox_request_channel(&uncore->cl, uncore->chan_id);
+	if (IS_ERR(pcc_chan))
+		return dev_err_probe(dev, PTR_ERR(pcc_chan),
+			"Failed to request PCC channel %u\n", uncore->chan_id);
+
+	if (!pcc_chan->shmem_base_addr) {
+		pcc_mbox_free_channel(pcc_chan);
+		return dev_err_probe(dev, -EINVAL,
+			"Invalid PCC shared memory address\n");
+	}
+
+	if (pcc_chan->shmem_size < sizeof(struct hisi_uncore_pcc_shmem)) {
+		pcc_mbox_free_channel(pcc_chan);
+		return dev_err_probe(dev, -EINVAL,
+			"Invalid PCC shared memory size (%lluB)\n",
+			pcc_chan->shmem_size);
+	}
+
+	rc = devm_mutex_init(dev, &uncore->pcc_lock);
+	if (rc) {
+		pcc_mbox_free_channel(pcc_chan);
+		return rc;
+	}
+
+	uncore->pchan = pcc_chan;
+
+	return 0;
+}
+
+static void hisi_uncore_free_pcc_chan(struct hisi_uncore_freq *uncore)
+{
+	guard(mutex)(&uncore->pcc_lock);
+	pcc_mbox_free_channel(uncore->pchan);
+	uncore->pchan = NULL;
+}
+
+static void devm_hisi_uncore_free_pcc_chan(void *data)
+{
+	hisi_uncore_free_pcc_chan(data);
+}
+
+static acpi_status hisi_uncore_pcc_reg_scan(struct acpi_resource *res,
+					    void *ctx)
+{
+	struct acpi_resource_generic_register *reg;
+	struct hisi_uncore_freq *uncore;
+
+	if (!res || res->type != ACPI_RESOURCE_TYPE_GENERIC_REGISTER)
+		return AE_OK;
+
+	reg = &res->data.generic_reg;
+	if (reg->space_id != ACPI_ADR_SPACE_PLATFORM_COMM)
+		return AE_OK;
+
+	if (!ctx)
+		return AE_ERROR;
+
+	uncore = ctx;
+	/* PCC subspace ID stored in Access Size */
+	uncore->chan_id = reg->access_size;
+
+	return AE_CTRL_TERMINATE;
+}
+
+static int hisi_uncore_init_pcc_chan(struct hisi_uncore_freq *uncore)
+{
+	acpi_handle handle = ACPI_HANDLE(uncore->dev);
+	acpi_status status;
+	int rc;
+
+	uncore->chan_id = -1;
+	status = acpi_walk_resources(handle, METHOD_NAME__CRS,
+				     hisi_uncore_pcc_reg_scan, uncore);
+	if (ACPI_FAILURE(status) || uncore->chan_id < 0)
+		return dev_err_probe(uncore->dev, -ENODEV,
+			"Failed to get a PCC channel\n");
+
+	rc = hisi_uncore_request_pcc_chan(uncore);
+	if (rc)
+		return rc;
+
+	return devm_add_action_or_reset(uncore->dev,
+					devm_hisi_uncore_free_pcc_chan,
+					uncore);
+}
+
+static int hisi_uncore_cmd_send(struct hisi_uncore_freq *uncore,
+				u8 cmd, u32 *data)
+{
+	struct hisi_uncore_pcc_shmem __iomem *addr;
+	struct hisi_uncore_pcc_shmem shmem;
+	struct pcc_mbox_chan *pchan;
+	unsigned int mrtt;
+	s64 time_delta;
+	u16 status;
+	int rc;
+
+	guard(mutex)(&uncore->pcc_lock);
+
+	pchan = uncore->pchan;
+	if (!pchan)
+		return -ENODEV;
+
+	addr = (struct hisi_uncore_pcc_shmem __iomem *)pchan->shmem;
+	if (!addr)
+		return -EINVAL;
+
+	/* Handle the Minimum Request Turnaround Time (MRTT) */
+	mrtt = pchan->min_turnaround_time;
+	time_delta = ktime_us_delta(ktime_get(),
+				    uncore->last_cmd_cmpl_time);
+	if (mrtt > time_delta)
+		udelay(mrtt - time_delta);
+
+	/* Copy data */
+	shmem.head = (struct acpi_pcct_shared_memory) {
+		.signature = PCC_SIGNATURE | uncore->chan_id,
+		.command = cmd,
+	};
+	shmem.pcc_data.data = *data;
+	memcpy_toio(addr, &shmem, sizeof(shmem));
+
+	/* Ring doorbell */
+	rc = mbox_send_message(pchan->mchan, &cmd);
+	if (rc < 0) {
+		dev_err(uncore->dev, "Failed to send mbox message, %d\n", rc);
+		return rc;
+	}
+
+	/* Wait status */
+	rc = readw_poll_timeout(&addr->head.status, status,
+				status & (PCC_STATUS_CMD_COMPLETE |
+					  PCC_STATUS_ERROR),
+				HUCF_PCC_POLL_INTERVAL_US,
+				pchan->latency * HUCF_PCC_POLL_TIMEOUT_NUM);
+	if (rc) {
+		dev_err(uncore->dev, "PCC channel response timeout, cmd=%u\n", cmd);
+	} else if (status & PCC_STATUS_ERROR) {
+		dev_err(uncore->dev, "PCC cmd error, cmd=%u\n", cmd);
+		rc = -EIO;
+	}
+
+	uncore->last_cmd_cmpl_time = ktime_get();
+
+	/* Copy data back */
+	memcpy_fromio(data, &addr->pcc_data.data, sizeof(*data));
+
+	/* Clear mailbox active req */
+	mbox_client_txdone(pchan->mchan, rc);
+
+	return rc;
+}
+
+static int hisi_uncore_target(struct device *dev, unsigned long *freq,
+			      u32 flags)
+{
+	struct hisi_uncore_freq *uncore = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	u32 data;
+
+	if (WARN_ON(!uncore || !uncore->pchan))
+		return -ENODEV;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp)) {
+		dev_err(dev, "Failed to get opp for freq %lu hz\n", *freq);
+		return PTR_ERR(opp);
+	}
+	dev_pm_opp_put(opp);
+
+	data = (u32)(dev_pm_opp_get_freq(opp) / HZ_PER_MHZ);
+
+	return hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_FREQ, &data);
+}
+
+static int hisi_uncore_get_dev_status(struct device *dev,
+				      struct devfreq_dev_status *stat)
+{
+	/* Not used */
+	return 0;
+}
+
+static int hisi_uncore_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct hisi_uncore_freq *uncore = dev_get_drvdata(dev);
+	u32 data = 0;
+	int rc;
+
+	if (WARN_ON(!uncore || !uncore->pchan))
+		return -ENODEV;
+
+	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_FREQ, &data);
+
+	/*
+	 * Upon a failure, 'data' remains 0 and 'freq' is set to 0 rather than a
+	 * random value.  devfreq shouldn't use 'freq' in that case though.
+	 */
+	*freq = data * HZ_PER_MHZ;
+
+	return rc;
+}
+
+static void devm_hisi_uncore_remove_opp(void *data)
+{
+	struct hisi_uncore_freq *uncore = data;
+
+	dev_pm_opp_remove_all_dynamic(uncore->dev);
+}
+
+static int hisi_uncore_init_opp(struct hisi_uncore_freq *uncore)
+{
+	struct device *dev = uncore->dev;
+	u32 data = 0, num, index;
+	unsigned long freq_mhz;
+	int rc;
+
+	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_PLAT_FREQ_NUM,
+				  &data);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to get plat freq num\n");
+
+	num = data;
+
+	for (index = 0; index < num; index++) {
+		data = index;
+		rc = hisi_uncore_cmd_send(uncore,
+					  HUCF_PCC_CMD_GET_PLAT_FREQ_BY_IDX,
+					  &data);
+		if (rc) {
+			dev_pm_opp_remove_all_dynamic(dev);
+			return dev_err_probe(dev, rc,
+				"Failed to get plat freq at index %u\n", index);
+		}
+		freq_mhz = data;
+
+		/* Don't care OPP votlage, take 1V as default */
+		rc = dev_pm_opp_add(dev, freq_mhz * HZ_PER_MHZ, 1000000);
+		if (rc) {
+			dev_pm_opp_remove_all_dynamic(dev);
+			return dev_err_probe(dev, rc,
+				"Add OPP %lu failed\n", freq_mhz);
+		}
+	}
+
+	return devm_add_action_or_reset(dev, devm_hisi_uncore_remove_opp, uncore);
+}
+
+static int hisi_platform_gov_func(struct devfreq *df, unsigned long *freq)
+{
+	/*
+	 * Platform-controlled mode doesn't care the frequency issued from
+	 * devfreq, so just pick the max freq.
+	 */
+	*freq = DEVFREQ_MAX_FREQ;
+
+	return 0;
+}
+
+static int hisi_platform_gov_handler(struct devfreq *df, unsigned int event,
+				     void *val)
+{
+	struct hisi_uncore_freq *uncore = dev_get_drvdata(df->dev.parent);
+	int rc = 0;
+	u32 data;
+
+	if (WARN_ON(!uncore || !uncore->pchan))
+		return -ENODEV;
+
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		data = HUCF_MODE_PLATFORM;
+		rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data);
+		break;
+	case DEVFREQ_GOV_STOP:
+		data = HUCF_MODE_OS;
+		rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data);
+		break;
+	default:
+		break;
+	}
+
+	if (rc)
+		dev_err(uncore->dev, "Failed to set operate mode (%d)\n", rc);
+
+	return rc;
+}
+
+/*
+ * In the platform-controlled mode, the platform decides the uncore frequency
+ * and ignores the frequency issued from the driver.
+ * Thus, create a pseudo 'hisi_platform' governor that stops devfreq monitor
+ * from working so as to save meaningless overhead.
+ */
+static struct devfreq_governor hisi_platform_governor = {
+	.name = "hisi_platform",
+	/*
+	 * Set interrupt_driven to skip the devfreq monitor mechanism, though
+	 * this governor not interrupt-driven.
+	 */
+	.flags = DEVFREQ_GOV_FLAG_IRQ_DRIVEN,
+	.get_target_freq = hisi_platform_gov_func,
+	.event_handler = hisi_platform_gov_handler,
+};
+
+static void hisi_uncore_remove_platform_gov(struct hisi_uncore_freq *uncore)
+{
+	u32 data = HUCF_MODE_PLATFORM;
+	int rc;
+
+	if (!(uncore->cap & HUCF_CAP_PLATFORM_CTRL))
+		return;
+
+	guard(mutex)(&hisi_platform_gov_usage_lock);
+
+	if (--hisi_platform_gov_usage == 0) {
+		rc = devfreq_remove_governor(&hisi_platform_governor);
+		if (rc)
+			dev_err(uncore->dev, "Failed to remove hisi_platform gov (%d)\n", rc);
+	}
+
+	/*
+	 * Set to the platform-controlled mode, if supported, so as to have a
+	 * certain behaviour when the driver is detached.
+	 */
+	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data);
+	if (rc)
+		dev_err(uncore->dev, "Failed to set platform mode on exit (%d)\n", rc);
+}
+
+static void devm_hisi_uncore_remove_platform_gov(void *data)
+{
+	hisi_uncore_remove_platform_gov(data);
+}
+
+static int hisi_uncore_add_platform_gov(struct hisi_uncore_freq *uncore)
+{
+	int rc = 0;
+
+	if (!(uncore->cap & HUCF_CAP_PLATFORM_CTRL))
+		return 0;
+
+	guard(mutex)(&hisi_platform_gov_usage_lock);
+
+	if (hisi_platform_gov_usage == 0) {
+		rc = devfreq_add_governor(&hisi_platform_governor);
+		if (rc)
+			return rc;
+	}
+	hisi_platform_gov_usage++;
+
+	return devm_add_action_or_reset(uncore->dev,
+					devm_hisi_uncore_remove_platform_gov,
+					uncore);
+}
+
+static int hisi_uncore_mark_related_cpus(struct hisi_uncore_freq *uncore,
+				 char *property, int (*get_topo_id)(int cpu),
+				 const struct cpumask *(*get_cpumask)(int cpu))
+{
+	unsigned int i, cpu;
+	size_t len;
+	int rc;
+
+	rc = device_property_count_u32(uncore->dev, property);
+	if (rc < 0)
+		return rc;
+	if (rc == 0)
+		return -EINVAL;
+
+	len = rc;
+	u32 *num __free(kfree) = kcalloc(len, sizeof(*num), GFP_KERNEL);
+	if (!num)
+		return -ENOMEM;
+
+	rc = device_property_read_u32_array(uncore->dev, property, num, len);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < len; i++) {
+		for_each_possible_cpu(cpu) {
+			if (get_topo_id(cpu) == num[i]) {
+				cpumask_or(&uncore->related_cpus,
+					   &uncore->related_cpus,
+					   get_cpumask(cpu));
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int get_package_id(int cpu)
+{
+	return topology_physical_package_id(cpu);
+}
+
+static const struct cpumask *get_package_cpumask(int cpu)
+{
+	return topology_core_cpumask(cpu);
+}
+
+static int get_cluster_id(int cpu)
+{
+	return topology_cluster_id(cpu);
+}
+
+static const struct cpumask *get_cluster_cpumask(int cpu)
+{
+	return topology_cluster_cpumask(cpu);
+}
+
+static int hisi_uncore_mark_related_cpus_wrap(struct hisi_uncore_freq *uncore)
+{
+	int rc;
+
+	cpumask_clear(&uncore->related_cpus);
+
+	rc = hisi_uncore_mark_related_cpus(uncore, "related-package",
+					   get_package_id,
+					   get_package_cpumask);
+	if (rc == 0)
+		return rc;
+
+	return hisi_uncore_mark_related_cpus(uncore, "related-cluster",
+					     get_cluster_id,
+					     get_cluster_cpumask);
+}
+
+static ssize_t related_cpus_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct hisi_uncore_freq *uncore = dev_get_drvdata(dev->parent);
+
+	return cpumap_print_to_pagebuf(true, buf, &uncore->related_cpus);
+}
+
+static DEVICE_ATTR_RO(related_cpus);
+
+static struct attribute *hisi_uncore_freq_attrs[] = {
+	&dev_attr_related_cpus.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(hisi_uncore_freq);
+
+static int hisi_uncore_devfreq_register(struct hisi_uncore_freq *uncore)
+{
+	struct devfreq_dev_profile *profile;
+	struct device *dev = uncore->dev;
+	unsigned long freq;
+	u32 data;
+	int rc;
+
+	rc = hisi_uncore_get_cur_freq(dev, &freq);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to get plat init freq\n");
+
+	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
+	if (!profile)
+		return -ENOMEM;
+
+	*profile = (struct devfreq_dev_profile) {
+		.initial_freq = freq,
+		.polling_ms = HUCF_DEFAULT_POLLING_MS,
+		.timer = DEVFREQ_TIMER_DELAYED,
+		.target = hisi_uncore_target,
+		.get_dev_status = hisi_uncore_get_dev_status,
+		.get_cur_freq = hisi_uncore_get_cur_freq,
+		.dev_groups = hisi_uncore_freq_groups,
+	};
+
+	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_MODE, &data);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to get operate mode\n");
+
+	if (data == HUCF_MODE_PLATFORM)
+		uncore->devfreq = devm_devfreq_add_device(dev, profile,
+					  hisi_platform_governor.name, NULL);
+	else
+		uncore->devfreq = devm_devfreq_add_device(dev, profile,
+					  DEVFREQ_GOV_PERFORMANCE, NULL);
+	if (IS_ERR(uncore->devfreq))
+		return dev_err_probe(dev, PTR_ERR(uncore->devfreq),
+			"Failed to add devfreq device\n");
+
+	return 0;
+}
+
+static int hisi_uncore_freq_probe(struct platform_device *pdev)
+{
+	struct hisi_uncore_freq *uncore;
+	struct device *dev = &pdev->dev;
+	u32 cap;
+	int rc;
+
+	uncore = devm_kzalloc(dev, sizeof(*uncore), GFP_KERNEL);
+	if (!uncore)
+		return -ENOMEM;
+
+	uncore->dev = dev;
+	platform_set_drvdata(pdev, uncore);
+
+	rc = hisi_uncore_init_pcc_chan(uncore);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to init PCC channel\n");
+
+	rc = hisi_uncore_init_opp(uncore);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to init OPP\n");
+
+	rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_CAP, &cap);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to get capability\n");
+
+	uncore->cap = cap;
+
+	rc = hisi_uncore_add_platform_gov(uncore);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to add hisi_platform governor\n");
+
+	rc = hisi_uncore_mark_related_cpus_wrap(uncore);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to mark related cpus\n");
+
+	rc = hisi_uncore_devfreq_register(uncore);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to register devfreq\n");
+
+	return 0;
+}
+
+static const struct acpi_device_id hisi_uncore_freq_acpi_match[] = {
+	{ "HISI04F1", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_uncore_freq_acpi_match);
+
+static struct platform_driver hisi_uncore_freq_drv = {
+	.probe	= hisi_uncore_freq_probe,
+	.driver = {
+		.name = "hisi_uncore_freq",
+		.acpi_match_table = hisi_uncore_freq_acpi_match,
+	},
+};
+module_platform_driver(hisi_uncore_freq_drv);
+
+MODULE_DESCRIPTION("HiSilicon uncore frequency scaling driver");
+MODULE_AUTHOR("Jie Zhan <zhanjie9@hisilicon.com>");
+MODULE_LICENSE("GPL");