diff mbox series

[2/2] KVM: x86: notify user space about guest entering s2idle

Message ID 20220609110337.1238762-3-jaz@semihalf.com
State New
Headers show
Series x86: notify hypervisor/VMM about guest entering s2idle | expand

Commit Message

Grzegorz Jaszczyk June 9, 2022, 11:03 a.m. UTC
From: Zide Chen <zide.chen@intel.corp-partner.google.com>

Upon exiting to user space, the kvm_run structure contains system_event
with type KVM_SYSTEM_EVENT_S2IDLE to notify about guest entering s2idle
suspend state.

Userspace can choose to:
- ignore it
- start the suspend flow in host (if notified from privileged VM,
  capable of suspending the host machine)
- take advantage of this event to make sure that the VM is suspended

The last one is especially useful for cases where some devices are
pass-through to the VM and to perform full system suspension, the guest
needs to finish with it's own suspension process first (e.g. calling
suspend hooks for given driver/subsystem which resides on the guest).
In such case host user-space power daemon (e.g. powerd) could first
notify VMM about suspension imminent. Next the VMM could trigger
suspension process on the guest VM and block till receiving
KVM_SYSTEM_EVENT_S2IDLE notification, after which the suspension of the
host can continue.

Additionally to not introduce regression on existing VMM which doesn't
support KVM_SYSTEM_EVENT_S2IDLE exits, allow to enable it through
KVM_CAP_X86_SYSTEM_S2IDLE VM capability.

Co-developed-by: Peter Fang <peter.fang@intel.corp-partner.google.com>
Signed-off-by: Peter Fang <peter.fang@intel.corp-partner.google.com>
Signed-off-by: Zide Chen <zide.chen@intel.corp-partner.google.com>
Co-developed-by: Grzegorz Jaszczyk <jaz@semihalf.com>
Signed-off-by: Grzegorz Jaszczyk <jaz@semihalf.com>
---
 Documentation/virt/kvm/api.rst  | 21 +++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 15 +++++++++++++++
 include/uapi/linux/kvm.h        |  2 ++
 tools/include/uapi/linux/kvm.h  |  1 +
 5 files changed, 41 insertions(+)
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 11e00a46c610..670dada87f50 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6146,6 +6146,8 @@  should put the acknowledged interrupt vector into the 'epr' field.
   #define KVM_SYSTEM_EVENT_WAKEUP         4
   #define KVM_SYSTEM_EVENT_SUSPEND        5
   #define KVM_SYSTEM_EVENT_SEV_TERM       6
+  #define KVM_SYSTEM_EVENT_S2IDLE         7
+
 			__u32 type;
                         __u32 ndata;
                         __u64 data[16];
@@ -6177,6 +6179,15 @@  Valid values for 'type' are:
    marking the exiting vCPU as runnable, or deny it and call KVM_RUN again.
  - KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of
    the VM.
+ - KVM_SYSTEM_EVENT_S2IDLE -- the guest has notified about entering s2idle
+   state. Userspace can choose to:
+   - ignore it
+   - start the suspend flow in host (if notified from a privileged VM, capable
+     of suspending the host machine)
+   - take advantage of this event to make sure that the VM is suspended - used
+     for full system suspension, where the host waits for guest suspension
+     before continues with it's own, host suspension process.
+   This is available on x86 only.
 
 If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
 architecture specific information for the system-level event.  Only
@@ -7956,6 +7967,16 @@  should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
 When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
 type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
 
+8.37 KVM_CAP_X86_SYSTEM_S2IDLE
+-------------------------------
+
+:Capability: KVM_CAP_X86_SYSTEM_S2IDLE
+:Architectures: x86
+:Type: vm
+
+When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
+type KVM_SYSTEM_EVENT_S2IDLE to process the guest s2idle notification.
+
 9. Known KVM API problems
 =========================
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 959d66b9be94..85966da56c75 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -105,6 +105,7 @@ 
 	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
 	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_S2IDLE		KVM_ARCH_REQ(32)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1160,6 +1161,7 @@  struct kvm_arch {
 
 	bool bus_lock_detection_enabled;
 	bool enable_pmu;
+	bool s2idle_notification;
 	/*
 	 * If exit_on_emulation_error is set, and the in-kernel instruction
 	 * emulator fails to emulate an instruction, allow userspace
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ed4bd6e762b..651ebac025c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4291,6 +4291,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_SYS_ATTRIBUTES:
 	case KVM_CAP_VAPIC:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_X86_SYSTEM_S2IDLE:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL:
@@ -6084,6 +6085,10 @@  int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		}
 		mutex_unlock(&kvm->lock);
 		break;
+	case KVM_CAP_X86_SYSTEM_S2IDLE:
+		kvm->arch.s2idle_notification = true;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -9307,6 +9312,10 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 	case KVM_HC_SYSTEM_S2IDLE:
+		if (!vcpu->kvm->arch.s2idle_notification)
+			break;
+
+		kvm_make_request(KVM_REQ_HV_S2IDLE, vcpu);
 		ret = 0;
 		break;
 	default:
@@ -10114,6 +10123,12 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_HV_S2IDLE, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_S2IDLE;
+			r = 0;
+			goto out;
+		}
 
 		/*
 		 * KVM_REQ_HV_STIMER has to be processed after
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5088bd9f1922..dd71ccf8fce4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -447,6 +447,7 @@  struct kvm_run {
 #define KVM_SYSTEM_EVENT_WAKEUP         4
 #define KVM_SYSTEM_EVENT_SUSPEND        5
 #define KVM_SYSTEM_EVENT_SEV_TERM       6
+#define KVM_SYSTEM_EVENT_S2IDLE         7
 			__u32 type;
 			__u32 ndata;
 			union {
@@ -1157,6 +1158,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_TSC_CONTROL 214
 #define KVM_CAP_SYSTEM_EVENT_DATA 215
 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216
+#define KVM_CAP_X86_SYSTEM_S2IDLE 217
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 6a184d260c7f..f8db91439c41 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -444,6 +444,7 @@  struct kvm_run {
 #define KVM_SYSTEM_EVENT_SHUTDOWN       1
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
+#define KVM_SYSTEM_EVENT_S2IDLE         7
 			__u32 type;
 			__u32 ndata;
 			union {