diff mbox series

[RFC,12/22] KVM: x86: Load guest [am]perf into hardware MSRs at vcpu_load()

Message ID 20241121185315.3416855-13-mizhang@google.com
State New
Headers show
Series KVM: x86: Virtualize IA32_APERF and IA32_MPERF MSRs | expand

Commit Message

Mingwei Zhang Nov. 21, 2024, 6:53 p.m. UTC
For vCPUs with APERFMPERF and in KVM_RUN, load the guest IA32_APERF
and IA32_MPERF values into the hardware MSRs when loading the vCPU,
but only if the vCPU is not halted. For running vCPUs, first add in
any "background" C0 cycles accumulated since the last checkpoint.

Note that for host TSC measurements of background C0 cycles, we assume
IA32_MPERF increments at the same frequency as TSC. While this is true
for all known processors with these MSRs, it is not architecturally
guaranteed.

Signed-off-by: Mingwei Zhang <mizhang@google.com>
Co-developed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
---
 arch/x86/kvm/x86.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d66cccff13347..b914578718d9c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1918,6 +1918,22 @@  static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
 				 _kvm_set_msr);
 }
 
+/*
+ * Add elapsed TSC ticks to guest IA32_MPERF while vCPU is in C0 but
+ * not running. Uses TSC instead of host MPERF to include time when
+ * physical CPU is in lower C-states, as guest MPERF should count
+ * whenever vCPU is in C0.  Assumes TSC and MPERF frequencies match.
+ */
+static void kvm_accumulate_background_guest_mperf(struct kvm_vcpu *vcpu)
+{
+	u64 now = rdtsc();
+	s64 tsc_delta = now - vcpu->arch.aperfmperf.host_tsc;
+
+	if (tsc_delta > 0)
+		vcpu->arch.aperfmperf.guest_mperf += tsc_delta;
+	vcpu->arch.aperfmperf.host_tsc = now;
+}
+
 /*
  * Read the MSR specified by @index into @data.  Select MSR specific fault
  * checks are bypassed if @host_initiated is %true.
@@ -4980,6 +4996,19 @@  static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
+static void kvm_load_guest_aperfmperf(struct kvm_vcpu *vcpu, bool update_mperf)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (update_mperf)
+		kvm_accumulate_background_guest_mperf(vcpu);
+	set_guest_aperf(vcpu->arch.aperfmperf.guest_aperf);
+	set_guest_mperf(vcpu->arch.aperfmperf.guest_mperf);
+	vcpu->arch.aperfmperf.loaded_while_running = true;
+	local_irq_restore(flags);
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -5039,6 +5068,12 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		vcpu->cpu = cpu;
 	}
 
+	if (vcpu->wants_to_run &&
+	    guest_can_use(vcpu, X86_FEATURE_APERFMPERF) &&
+	    (vcpu->scheduled_out ? vcpu->arch.aperfmperf.loaded_while_running :
+	     (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)))
+		kvm_load_guest_aperfmperf(vcpu, true);
+
 	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
 }