Message ID | 20240528095522.509667-11-kirill.shutemov@linux.intel.com |
---|---|
State | Superseded |
Headers | show |
Series | x86/tdx: Add kexec support | expand |
On Tue, May 28, 2024 at 12:55:13PM +0300, Kirill A. Shutemov wrote: > diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h > index 28ac3cb9b987..6cade48811cc 100644 > --- a/arch/x86/include/asm/x86_init.h > +++ b/arch/x86/include/asm/x86_init.h > @@ -149,12 +149,21 @@ struct x86_init_acpi { > * @enc_status_change_finish Notify HV after the encryption status of a range is changed > * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status > * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status > + * @enc_kexec_begin Begin the two-step process of conversion shared memory back s/conversion/converting/ > + * to private. It stops the new conversions from being started > + * and waits in-flight conversions to finish, if possible. Good. Now add "The @crash parameter denotes whether the function is being called in the crash shutdown path." > + * @enc_kexec_finish Finish the two-step process of conversion shared memory to s/conversion/converting/ > + * private. All memory is private after the call. "... when the function returns." > + * It called with all CPUs but one shutdown and interrupts > + * disabled. "It is called on only one CPU while the others are shut down and with interrupts disabled." > */ > struct x86_guest { > int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); > int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); > bool (*enc_tlb_flush_required)(bool enc); > bool (*enc_cache_flush_required)(void); > + void (*enc_kexec_begin)(bool crash); > + void (*enc_kexec_finish)(void); > }; > > /** > diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c > index f06501445cd9..74f6305eb9ec 100644 > --- a/arch/x86/kernel/crash.c > +++ b/arch/x86/kernel/crash.c > @@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs) > #ifdef CONFIG_HPET_TIMER > hpet_disable(); > #endif > + > + /* > + * Non-crash kexec calls enc_kexec_begin() while scheduling is still > + * active. This allows the callback to wait until all in-flight > + * shared<->private conversions are complete. In a crash scenario, > + * enc_kexec_begin() get call after all but one CPU has been shut down "gets called" ... "have been shut down" > + * and interrupts have been disabled. This only allows the callback to only? > + * detect a race with the conversion and report it. > + */ > + x86_platform.guest.enc_kexec_begin(true); > + x86_platform.guest.enc_kexec_finish(); > + ...
On 5/28/24 02:55, Kirill A. Shutemov wrote: > + x86_platform.guest.enc_kexec_begin(true); > + x86_platform.guest.enc_kexec_finish(); I really despise the random, unlabeled true/false/0/1 arguments to functions like this. I'll bring it up in the non-noop patch though.
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 28ac3cb9b987..6cade48811cc 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -149,12 +149,21 @@ struct x86_init_acpi { * @enc_status_change_finish Notify HV after the encryption status of a range is changed * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + * @enc_kexec_begin Begin the two-step process of conversion shared memory back + * to private. It stops the new conversions from being started + * and waits in-flight conversions to finish, if possible. + * @enc_kexec_finish Finish the two-step process of conversion shared memory to + * private. All memory is private after the call. + * It called with all CPUs but one shutdown and interrupts + * disabled. */ struct x86_guest { int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); + void (*enc_kexec_begin)(bool crash); + void (*enc_kexec_finish)(void); }; /** diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f06501445cd9..74f6305eb9ec 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif + + /* + * Non-crash kexec calls enc_kexec_begin() while scheduling is still + * active. This allows the callback to wait until all in-flight + * shared<->private conversions are complete. In a crash scenario, + * enc_kexec_begin() get call after all but one CPU has been shut down + * and interrupts have been disabled. This only allows the callback to + * detect a race with the conversion and report it. + */ + x86_platform.guest.enc_kexec_begin(true); + x86_platform.guest.enc_kexec_finish(); + crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f3130f762784..097313147ad3 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include <linux/delay.h> #include <linux/objtool.h> #include <linux/pgtable.h> +#include <linux/kexec.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void) void native_machine_shutdown(void) { + /* + * Call enc_kexec_begin() while all CPUs are still active and + * interrupts are enabled. This will allow all in-flight memory + * conversions to finish cleanly. + */ + if (kexec_in_progress) + x86_platform.guest.enc_kexec_begin(false); + /* Stop the cpus and apics */ #ifdef CONFIG_X86_IO_APIC /* @@ -752,6 +761,9 @@ void native_machine_shutdown(void) #ifdef CONFIG_X86_64 x86_platform.iommu_shutdown(); #endif + + if (kexec_in_progress) + x86_platform.guest.enc_kexec_finish(); } static void __machine_emergency_restart(int emergency) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a7143bb7dd93..8a79fb505303 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -138,6 +138,8 @@ static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static void enc_kexec_begin_noop(bool crash) {} +static void enc_kexec_finish_noop(void) {} static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { @@ -161,6 +163,8 @@ struct x86_platform_ops x86_platform __ro_after_init = { .enc_status_change_finish = enc_status_change_finish_noop, .enc_tlb_flush_required = enc_tlb_flush_required_noop, .enc_cache_flush_required = enc_cache_flush_required_noop, + .enc_kexec_begin = enc_kexec_begin_noop, + .enc_kexec_finish = enc_kexec_finish_noop, }, };