@@ -44,6 +44,8 @@
#define INTEL_CPUFREQ_TRANSITION_DELAY_HWP 5000
#define INTEL_CPUFREQ_TRANSITION_DELAY 500
+#define INTEL_PSTATE_CORE_SCALING 100000
+
#ifdef CONFIG_ACPI
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
@@ -221,6 +223,7 @@
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
* @suspended: Whether or not the driver has been suspended.
+ * @em_registered: If set, an energy model has been registered.
* @hwp_notify_work: workqueue for HWP notifications.
*
* This structure stores per CPU instance data for all CPUs.
@@ -260,6 +263,9 @@
unsigned int sched_flags;
u32 hwp_boost_min;
bool suspended;
+#ifdef CONFIG_ENERGY_MODEL
+ bool em_registered;
+#endif
struct delayed_work hwp_notify_work;
};
@@ -311,7 +317,7 @@
static inline int core_get_scaling(void)
{
- return 100000;
+ return INTEL_PSTATE_CORE_SCALING;
}
#ifdef CONFIG_ACPI
@@ -945,12 +951,105 @@
*/
static DEFINE_MUTEX(hybrid_capacity_lock);
+#ifdef CONFIG_ENERGY_MODEL
+#define HYBRID_EM_STATE_COUNT 4
+
+static int hybrid_active_power(struct device *dev, unsigned long *power,
+ unsigned long *freq)
+{
+ /*
+ * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
+ * of the maximum capacity such that two CPUs of the same type will be
+ * regarded as equally attractive if the utilization of each of them
+ * falls into the same bin, which should prevent tasks from being
+ * migrated between them too often.
+ *
+ * For this purpose, return the "frequency" of 2 for the first
+ * performance level and otherwise leave the value set by the caller.
+ */
+ if (!*freq)
+ *freq = 2;
+
+ /* No power information. */
+ *power = EM_MAX_POWER;
+
+ return 0;
+}
+
+static int hybrid_get_cost(struct device *dev, unsigned long freq,
+ unsigned long *cost)
+{
+ struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
+
+ /*
+ * The smaller the perf-to-frequency scaling factor, the larger the IPC
+ * ratio between the given CPU and the least capable CPU in the system.
+ * Regard that IPC ratio as the primary cost component and assume that
+ * the scaling factors for different CPU types will differ by at least
+ * 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
+ *
+ * Add the freq value to the cost, so that the cost of running on CPUs
+ * of the same type in different "utilization bins" is different.
+ */
+ *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
+
+ return 0;
+}
+
+static bool hybrid_register_perf_domain(unsigned int cpu)
+{
+ static const struct em_data_callback cb
+ = EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost);
+ struct cpudata *cpudata = all_cpu_data[cpu];
+ struct device *cpu_dev;
+
+ /*
+ * Registering EM perf domains without enabling asymmetric CPU capacity
+ * support is not really useful and one domain should not be registered
+ * more than once.
+ */
+ if (!hybrid_max_perf_cpu || cpudata->em_registered)
+ return false;
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ return false;
+
+ if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
+ cpumask_of(cpu), false))
+ return false;
+
+ cpudata->em_registered = true;
+
+ return true;
+}
+
+static void hybrid_register_all_perf_domains(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ hybrid_register_perf_domain(cpu);
+}
+
+static void hybrid_update_perf_domain(struct cpudata *cpu)
+{
+ if (cpu->em_registered)
+ em_adjust_cpu_capacity(cpu->cpu);
+}
+#else /* !CONFIG_ENERGY_MODEL */
+static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; }
+static inline void hybrid_register_all_perf_domains(void) {}
+static inline void hybrid_update_perf_domain(struct cpudata *cpu) {}
+#endif /* CONFIG_ENERGY_MODEL */
+
static void hybrid_set_cpu_capacity(struct cpudata *cpu)
{
arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
hybrid_max_perf_cpu->capacity_perf,
cpu->capacity_perf,
cpu->pstate.max_pstate_physical);
+ hybrid_update_perf_domain(cpu);
pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
@@ -1039,6 +1138,11 @@
guard(mutex)(&hybrid_capacity_lock);
__hybrid_refresh_cpu_capacity_scaling();
+ /*
+ * Perf domains are not registered before setting hybrid_max_perf_cpu,
+ * so register them all after setting up CPU capacity scaling.
+ */
+ hybrid_register_all_perf_domains();
}
static void hybrid_init_cpu_capacity_scaling(bool refresh)
@@ -1066,7 +1170,7 @@
hybrid_refresh_cpu_capacity_scaling();
/*
* Disabling ITMT causes sched domains to be rebuilt to disable asym
- * packing and enable asym capacity.
+ * packing and enable asym capacity and EAS.
*/
sched_clear_itmt_support();
}
@@ -1144,6 +1248,14 @@
}
hybrid_set_cpu_capacity(cpu);
+ /*
+ * If the CPU was offline to start with and it is going online for the
+ * first time, a perf domain needs to be registered for it if hybrid
+ * capacity scaling has been enabled already. In that case, sched
+ * domains need to be rebuilt to take the new perf domain into account.
+ */
+ if (hybrid_register_perf_domain(cpu->cpu))
+ em_rebuild_sched_domains();
unlock:
mutex_unlock(&hybrid_capacity_lock);
@@ -3416,6 +3528,8 @@
static int intel_pstate_update_status(const char *buf, size_t size)
{
+ int ret = -EINVAL;
+
if (size == 3 && !strncmp(buf, "off", size)) {
if (!intel_pstate_driver)
return -EINVAL;
@@ -3425,6 +3539,8 @@
cpufreq_unregister_driver(intel_pstate_driver);
intel_pstate_driver_cleanup();
+ /* Trigger EAS support reconfiguration in case it was used. */
+ rebuild_sched_domains_energy();
return 0;
}
@@ -3436,7 +3552,13 @@
cpufreq_unregister_driver(intel_pstate_driver);
}
- return intel_pstate_register_driver(&intel_pstate);
+ ret = intel_pstate_register_driver(&intel_pstate);
+ /*
+ * If the previous status had been "passive" and the schedutil
+ * governor had been used, it disabled EAS on exit, so trigger
+ * sched domains rebuild in case EAS needs to be enabled again.
+ */
+ rebuild_sched_domains_energy();
}
if (size == 7 && !strncmp(buf, "passive", size)) {
@@ -3448,10 +3570,10 @@
intel_pstate_sysfs_hide_hwp_dynamic_boost();
}
- return intel_pstate_register_driver(&intel_cpufreq);
+ ret = intel_pstate_register_driver(&intel_cpufreq);
}
- return -EINVAL;
+ return ret;
}
static int no_load __initdata;