diff mbox series

[v4,13/24] x86/sched: Update the IPC class of the current task

Message ID 20230613042422.5344-14-ricardo.neri-calderon@linux.intel.com
State New
Headers show
Series sched: Introduce classes of tasks for load balance | expand

Commit Message

Ricardo Neri June 13, 2023, 4:24 a.m. UTC
Intel Thread Director provides a classification value based on the type of
instructions that CPU is currently executing. Use this classification to
update the IPC class of the current task.

The responsibility for configuring and enabling both the Hardware Feedback
Interface and Intel Thread Director lies with the HFI driver, but it should
not directly handle tasks.

Update the HFI driver to read the register that provides the classification
result. Implement the arch_update_ipcc() interface of the scheduler under
arch/x86 code to update the IPC class of individual tasks.

Task classification only makes sense when used along with the HFI driver.
Make HFI driver select CONFIG_IPC_CLASSES. However, users may still select
CONFIG_IPC_CLASSES. Add function stubs to prevent build errors.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Perry Yuan <Perry.Yuan@amd.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Zhao Liu <zhao1.liu@linux.intel.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v3:
 * Relocated the implementation of arch_update_ipcc() from drivers/thermal
   to arch/x86. (Rafael)
 * Moved the definition of MSR_IA32_HW_FEEDBACK_CHAR into this patch.
   (Reported-by: kernel test robot <lkp@intel.com>)
 * Select CONFIG_IPC_CLASSES when CONFIG_INTEL_HFI_THERMAL is selected to
   reduce the configuration burden of the user/administrator. (Srinivas)

Changes since v2:
 * Removed the implementation of arch_has_ipc_classes().

Changes since v1:
 * Adjusted the result the classification of Intel Thread Director to start
   at class 1. Class 0 for the scheduler means that the task is
   unclassified.
 * Redefined union hfi_thread_feedback_char_msr to ensure all
   bit-fields are packed. (PeterZ)
 * Removed CONFIG_INTEL_THREAD_DIRECTOR. (PeterZ)
 * Shortened the names of the functions that implement IPC classes.
 * Removed argument smt_siblings_idle from intel_hfi_update_ipcc().
   (PeterZ)
---
 arch/x86/include/asm/msr-index.h  |  1 +
 arch/x86/include/asm/topology.h   | 11 +++++++++
 arch/x86/kernel/Makefile          |  2 ++
 arch/x86/kernel/sched_ipcc.c      | 35 +++++++++++++++++++++++++++++
 drivers/thermal/intel/Kconfig     |  1 +
 drivers/thermal/intel/intel_hfi.c | 37 +++++++++++++++++++++++++++++++
 6 files changed, 87 insertions(+)
 create mode 100644 arch/x86/kernel/sched_ipcc.c
diff mbox series

Patch

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3aedae61af4f..0bc4ed0ff787 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1108,6 +1108,7 @@ 
 /* Hardware Feedback Interface */
 #define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
 #define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
+#define MSR_IA32_HW_FEEDBACK_CHAR	0x17d2
 
 /* x2APIC locked status */
 #define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index caf41c4869a0..7d2ed7f7bb8c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -235,4 +235,15 @@  void init_freq_invariance_cppc(void);
 #define arch_init_invariance_cppc init_freq_invariance_cppc
 #endif
 
+#ifdef CONFIG_INTEL_HFI_THERMAL
+int intel_hfi_read_classid(u8 *classid);
+#else
+static inline int intel_hfi_read_classid(u8 *classid) { return -ENODEV; }
+#endif
+
+#ifdef CONFIG_IPC_CLASSES
+void intel_update_ipcc(struct task_struct *curr);
+#define arch_update_ipcc intel_update_ipcc
+#endif
+
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4070a01c11b7..f9b9d213ddaa 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -145,6 +145,8 @@  obj-$(CONFIG_CFI_CLANG)			+= cfi.o
 
 obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
 
+obj-$(CONFIG_IPC_CLASSES)		+= sched_ipcc.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/sched_ipcc.c b/arch/x86/kernel/sched_ipcc.c
new file mode 100644
index 000000000000..685e7b3b5375
--- /dev/null
+++ b/arch/x86/kernel/sched_ipcc.c
@@ -0,0 +1,35 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel support for scheduler IPC classes
+ *
+ * Copyright (c) 2023, Intel Corporation.
+ *
+ * Author: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ * On hybrid processors, the architecture differences between types of CPUs
+ * lead to different number of retired instructions per cycle (IPC). IPCs may
+ * differ further by classes of instructions.
+ *
+ * The scheduler assigns an IPC class to every task with arch_update_ipcc()
+ * from data that hardware provides. Implement this interface for x86.
+ *
+ * See kernel/sched/sched.h for details.
+ */
+
+#include <linux/sched.h>
+
+#include <asm/topology.h>
+
+void intel_update_ipcc(struct task_struct *curr)
+{
+	u8 hfi_class;
+
+	if (intel_hfi_read_classid(&hfi_class))
+		return;
+
+	/*
+	 * 0 is a valid classification for Intel Thread Director. A scheduler
+	 * IPCC class of 0 means that the task is unclassified. Adjust.
+	 */
+	curr->ipcc = hfi_class + 1;
+}
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index ecd7e07eece0..418db04dc876 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -109,6 +109,7 @@  config INTEL_HFI_THERMAL
 	depends on CPU_SUP_INTEL
 	depends on X86_THERMAL_VECTOR
 	select THERMAL_NETLINK
+	select IPC_CLASSES
 	help
 	  Select this option to enable the Hardware Feedback Interface. If
 	  selected, hardware provides guidance to the operating system on
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index b41547912fda..20ee4264dcd4 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -72,6 +72,15 @@  union cpuid6_edx {
 	u32 full;
 };
 
+union hfi_thread_feedback_char_msr {
+	struct {
+		u64	classid : 8;
+		u64	__reserved : 55;
+		u64	valid : 1;
+	} split;
+	u64 full;
+};
+
 /**
  * struct hfi_cpu_data - HFI capabilities per CPU
  * @perf_cap:		Performance capability
@@ -171,6 +180,34 @@  static struct workqueue_struct *hfi_updates_wq;
 #define HFI_UPDATE_INTERVAL		HZ
 #define HFI_MAX_THERM_NOTIFY_COUNT	16
 
+/**
+ * intel_hfi_read_classid() - Read the currrent classid
+ * @classid:	Variable to which the classid will be written.
+ *
+ * Read the classification that Intel Thread Director has produced when this
+ * function is called. Thread classification must be enabled before calling
+ * this function.
+ *
+ * Return: 0 if the produced classification is valid. Error otherwise.
+ */
+int intel_hfi_read_classid(u8 *classid)
+{
+	union hfi_thread_feedback_char_msr msr;
+
+	/* We should not be here if ITD is not supported. */
+	if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
+		pr_warn_once("task classification requested but not supported!");
+		return -ENODEV;
+	}
+
+	rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
+	if (!msr.split.valid)
+		return -EINVAL;
+
+	*classid = msr.split.classid;
+	return 0;
+}
+
 static void get_hfi_caps(struct hfi_instance *hfi_instance,
 			 struct thermal_genl_cpu_caps *cpu_caps)
 {