diff mbox series

[Part1,RFC,v3,11/22] x86/sev: Add helper for validating pages in early enc attribute changes

Message ID 20210602140416.23573-12-brijesh.singh@amd.com
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Guest Support | expand

Commit Message

Brijesh Singh June 2, 2021, 2:04 p.m. UTC
The early_set_memory_{encrypt,decrypt}() are used for changing the
page from decrypted (shared) to encrypted (private) and vice versa.
When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

The early_set_memory_{encrypt,decrypt} can be called before the GHCB
is setup, use the SNP page state MSR protocol VMGEXIT defined in the GHCB
specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() that can be used outside
the sev specific files to change the page state for a specified memory
range.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/sev.h |  20 +++++++
 arch/x86/kernel/sev.c      | 105 +++++++++++++++++++++++++++++++++++++
 arch/x86/mm/mem_encrypt.c  |  47 ++++++++++++++++-
 3 files changed, 170 insertions(+), 2 deletions(-)

Comments

Borislav Petkov June 10, 2021, 3:50 p.m. UTC | #1
On Wed, Jun 02, 2021 at 09:04:05AM -0500, Brijesh Singh wrote:
> @@ -65,6 +65,12 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>  /* RMP page size */
>  #define RMP_PG_SIZE_4K			0
>  
> +/* Memory opertion for snp_prep_memory() */
> +enum snp_mem_op {
> +	MEMORY_PRIVATE,
> +	MEMORY_SHARED

See below.

> +};
> +
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  extern struct static_key_false sev_es_enable_key;
>  extern void __sev_es_ist_enter(struct pt_regs *regs);
> @@ -103,6 +109,11 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
>  
>  	return rc;
>  }
> +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
> +		unsigned int npages);
> +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
> +		unsigned int npages);

Align arguments on the opening brace.

> +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op);
>  #else
>  static inline void sev_es_ist_enter(struct pt_regs *regs) { }
>  static inline void sev_es_ist_exit(void) { }
> @@ -110,6 +121,15 @@ static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { ret
>  static inline void sev_es_nmi_complete(void) { }
>  static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
>  static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
> +static inline void __init
> +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages)

Put those { } at the end of the line:

early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }

no need for separate lines. Ditto below.

> +{
> +}
> +static inline void __init
> +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages)
> +{
> +}
> +static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op) { }
>  #endif
>  
>  #endif
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index 455c09a9b2c2..6e9b45bb38ab 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -532,6 +532,111 @@ static u64 get_jump_table_addr(void)
>  	return ret;
>  }
>  
> +static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
> +{
> +	unsigned long vaddr_end;
> +	int rc;
> +
> +	vaddr = vaddr & PAGE_MASK;
> +	vaddr_end = vaddr + (npages << PAGE_SHIFT);
> +
> +	while (vaddr < vaddr_end) {
> +		rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
> +		if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
> +			sev_es_terminate(1, GHCB_TERM_PVALIDATE);
					^^

I guess that 1 should be a define too, if we have to be correct:

			sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);

or so. Ditto for all other calls of this.

> +
> +		vaddr = vaddr + PAGE_SIZE;
> +	}
> +}
> +
> +static void __init early_set_page_state(unsigned long paddr, unsigned int npages, int op)
> +{
> +	unsigned long paddr_end;
> +	u64 val;
> +
> +	paddr = paddr & PAGE_MASK;
> +	paddr_end = paddr + (npages << PAGE_SHIFT);
> +
> +	while (paddr < paddr_end) {
> +		/*
> +		 * Use the MSR protocol because this function can be called before the GHCB
> +		 * is established.
> +		 */
> +		sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
> +		VMGEXIT();
> +
> +		val = sev_es_rd_ghcb_msr();
> +
> +		if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
Brijesh Singh June 14, 2021, 12:45 p.m. UTC | #2
On 6/10/21 10:50 AM, Borislav Petkov wrote:
> On Wed, Jun 02, 2021 at 09:04:05AM -0500, Brijesh Singh wrote:
>> @@ -65,6 +65,12 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>>  /* RMP page size */
>>  #define RMP_PG_SIZE_4K			0
>>  
>> +/* Memory opertion for snp_prep_memory() */
>> +enum snp_mem_op {
>> +	MEMORY_PRIVATE,
>> +	MEMORY_SHARED
> See below.
>
>> +};
>> +
>>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>>  extern struct static_key_false sev_es_enable_key;
>>  extern void __sev_es_ist_enter(struct pt_regs *regs);
>> @@ -103,6 +109,11 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
>>  
>>  	return rc;
>>  }
>> +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
>> +		unsigned int npages);
>> +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
>> +		unsigned int npages);
> Align arguments on the opening brace.

Noted.


>
>> +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op);
>>  #else
>>  static inline void sev_es_ist_enter(struct pt_regs *regs) { }
>>  static inline void sev_es_ist_exit(void) { }
>> @@ -110,6 +121,15 @@ static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { ret
>>  static inline void sev_es_nmi_complete(void) { }
>>  static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
>>  static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
>> +static inline void __init
>> +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages)
> Put those { } at the end of the line:
>
> early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
>
> no need for separate lines. Ditto below.

Noted.


>
>> +{
>> +}
>> +static inline void __init
>> +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages)
>> +{
>> +}
>> +static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op) { }
>>  #endif
>>  
>>  #endif
>> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
>> index 455c09a9b2c2..6e9b45bb38ab 100644
>> --- a/arch/x86/kernel/sev.c
>> +++ b/arch/x86/kernel/sev.c
>> @@ -532,6 +532,111 @@ static u64 get_jump_table_addr(void)
>>  	return ret;
>>  }
>>  
>> +static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
>> +{
>> +	unsigned long vaddr_end;
>> +	int rc;
>> +
>> +	vaddr = vaddr & PAGE_MASK;
>> +	vaddr_end = vaddr + (npages << PAGE_SHIFT);
>> +
>> +	while (vaddr < vaddr_end) {
>> +		rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
>> +		if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
>> +			sev_es_terminate(1, GHCB_TERM_PVALIDATE);
> 					^^
>
> I guess that 1 should be a define too, if we have to be correct:
>
> 			sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
>
> or so. Ditto for all other calls of this.

Sure, I will define a macro for it.


>
>> +
>> +		vaddr = vaddr + PAGE_SIZE;
>> +	}
>> +}
>> +
>> +static void __init early_set_page_state(unsigned long paddr, unsigned int npages, int op)
>> +{
>> +	unsigned long paddr_end;
>> +	u64 val;
>> +
>> +	paddr = paddr & PAGE_MASK;
>> +	paddr_end = paddr + (npages << PAGE_SHIFT);
>> +
>> +	while (paddr < paddr_end) {
>> +		/*
>> +		 * Use the MSR protocol because this function can be called before the GHCB
>> +		 * is established.
>> +		 */
>> +		sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
>> +		VMGEXIT();
>> +
>> +		val = sev_es_rd_ghcb_msr();
>> +
>> +		if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
> From a previous review:
>
> Does that one need a warning too or am I being too paranoid?

IMO, there is no need to add a warning. This case should happen if its
either a hypervisor bug or hypervisor does not follow the GHCB
specification. I followed the SEV-ES vmgexit handling  and it does not
warn if the hypervisor returns a wrong response code. We simply
terminate the guest.


>
>> +			goto e_term;
>> +
>> +		if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
>> +			 "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
>> +			 op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
>> +			 paddr, GHCB_MSR_PSC_RESP_VAL(val)))
>> +			goto e_term;
>> +
>> +		paddr = paddr + PAGE_SIZE;
>> +	}
>> +
>> +	return;
>> +
>> +e_term:
>> +	sev_es_terminate(1, GHCB_TERM_PSC);
>> +}
>> +
>> +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
>> +					 unsigned int npages)
>> +{
>> +	if (!sev_feature_enabled(SEV_SNP))
>> +		return;
>> +
>> +	 /* Ask hypervisor to add the memory pages in RMP table as a 'private'. */
> 	    Ask the hypervisor to mark the memory pages as private in the RMP table.

Noted.


>
>> +	early_set_page_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
>> +
>> +	/* Validate the memory pages after they've been added in the RMP table. */
>> +	pvalidate_pages(vaddr, npages, 1);
>> +}
>> +
>> +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
>> +					unsigned int npages)
>> +{
>> +	if (!sev_feature_enabled(SEV_SNP))
>> +		return;
>> +
>> +	/*
>> +	 * Invalidate the memory pages before they are marked shared in the
>> +	 * RMP table.
>> +	 */
>> +	pvalidate_pages(vaddr, npages, 0);
>> +
>> +	 /* Ask hypervisor to make the memory pages shared in the RMP table. */
> 			      mark

Noted.


>> +	early_set_page_state(paddr, npages, SNP_PAGE_STATE_SHARED);
>> +}
>> +
>> +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op)
>> +{
>> +	unsigned long vaddr, npages;
>> +
>> +	vaddr = (unsigned long)__va(paddr);
>> +	npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
>> +
>> +	switch (op) {
>> +	case MEMORY_PRIVATE: {
>> +		early_snp_set_memory_private(vaddr, paddr, npages);
>> +		return;
>> +	}
>> +	case MEMORY_SHARED: {
>> +		early_snp_set_memory_shared(vaddr, paddr, npages);
>> +		return;
>> +	}
>> +	default:
>> +		break;
>> +	}
>> +
>> +	WARN(1, "invalid memory op %d\n", op);
> A lot easier, diff ontop of your patch:

thanks. I will apply it.

I did thought about reusing the VMGEXIT defined macro
SNP_PAGE_STATE_{PRIVATE, SHARED} but I was not sure if you will be okay
with that. Additionally now both the function name and macro name will
include the "SNP". The call will look like this:

snp_prep_memory(paddr, SNP_PAGE_STATE_PRIVATE)

>
> ---
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 7c2cb5300e43..2ad4b5ab3f6c 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -65,12 +65,6 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>  /* RMP page size */
>  #define RMP_PG_SIZE_4K			0
>  
> -/* Memory opertion for snp_prep_memory() */
> -enum snp_mem_op {
> -	MEMORY_PRIVATE,
> -	MEMORY_SHARED
> -};
> -
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  extern struct static_key_false sev_es_enable_key;
>  extern void __sev_es_ist_enter(struct pt_regs *regs);
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index 2a5dce42af35..991d7964cee9 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -662,20 +662,13 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op)
>  	vaddr = (unsigned long)__va(paddr);
>  	npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
>  
> -	switch (op) {
> -	case MEMORY_PRIVATE: {
> +	if (op == SNP_PAGE_STATE_PRIVATE)
>  		early_snp_set_memory_private(vaddr, paddr, npages);
> -		return;
> -	}
> -	case MEMORY_SHARED: {
> +	else if (op == SNP_PAGE_STATE_SHARED)
>  		early_snp_set_memory_shared(vaddr, paddr, npages);
> -		return;
> +	else {
> +		WARN(1, "invalid memory page op %d\n", op);
>  	}
> -	default:
> -		break;
> -	}
> -
> -	WARN(1, "invalid memory op %d\n", op);
>  }
>  
>  int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
> ---
>
>>  static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
>>  
>> +/*
>> + * When SNP is active, changes the page state from private to shared before
> s/changes/change/

Noted.


>
>> + * copying the data from the source to destination and restore after the copy.
>> + * This is required because the source address is mapped as decrypted by the
>> + * caller of the routine.
>> + */
>> +static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
>> +				     unsigned long paddr, bool decrypt)
>> +{
>> +	unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
>> +
>> +	if (!sev_feature_enabled(SEV_SNP) || !decrypt) {
>> +		memcpy(dst, src, sz);
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * If the paddr needs to be accessed decrypted, mark the page
> What do you mean "If" - this is the SNP version of memcpy. Just say:
>
> 	/*
> 	 * With SNP, the page address needs to be ...
> 	 */
>
>> +	 * shared in the RMP table before copying it.
>> +	 */
>> +	early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
>> +
>> +	memcpy(dst, src, sz);
>> +
>> +	/* Restore the page state after the memcpy. */
>> +	early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
>> +}
>> +
>>  /*
>>   * This routine does not change the underlying encryption setting of the
>>   * page(s) that map this memory. It assumes that eventually the memory is
>> @@ -96,8 +125,8 @@ static void __init __sme_early_enc_dec(resource_size_t paddr,
>>  		 * Use a temporary buffer, of cache-line multiple size, to
>>  		 * avoid data corruption as documented in the APM.
>>  		 */
>> -		memcpy(sme_early_buffer, src, len);
>> -		memcpy(dst, sme_early_buffer, len);
>> +		snp_memcpy(sme_early_buffer, src, len, paddr, enc);
>> +		snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
>>  
>>  		early_memunmap(dst, len);
>>  		early_memunmap(src, len);
>> @@ -277,9 +306,23 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>>  	else
>>  		sme_early_decrypt(pa, size);
>>  
>> +	/*
>> +	 * If page is getting mapped decrypted in the page table, then the page state
>> +	 * change in the RMP table must happen before the page table updates.
>> +	 */
>> +	if (!enc)
>> +		early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
> Merge the two branches:

Noted.


>
> 	/* Encrypt/decrypt the contents in-place */
>         if (enc) {
>                 sme_early_encrypt(pa, size);
>         } else {
>                 sme_early_decrypt(pa, size);
>
>                 /*
>                  * On SNP, the page state change in the RMP table must happen
>                  * before the page table updates.
>                  */
>                 early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
>         }

- Brijesh
Borislav Petkov June 14, 2021, 7:03 p.m. UTC | #3
On Mon, Jun 14, 2021 at 07:45:11AM -0500, Brijesh Singh wrote:
> IMO, there is no need to add a warning. This case should happen if its

> either a hypervisor bug or hypervisor does not follow the GHCB

> specification. I followed the SEV-ES vmgexit handling  and it does not

> warn if the hypervisor returns a wrong response code. We simply

> terminate the guest.


This brings my regular user-friendliness question: will the guest user
know what happened or will the guest simply disappear/freeze without any
hint as to what has happened so that a post-mortem analysis would turn
out hard to decipher?

> I did thought about reusing the VMGEXIT defined macro

> SNP_PAGE_STATE_{PRIVATE, SHARED} but I was not sure if you will be okay

> with that.


Yeah, I think that makes stuff simpler. Unless there's something
speaking against it which we both are not thinking of right now.

> Additionally now both the function name and macro name will

> include the "SNP". The call will look like this:

> 

> snp_prep_memory(paddr, SNP_PAGE_STATE_PRIVATE)


Yap, looks ok to me.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Brijesh Singh June 14, 2021, 9:01 p.m. UTC | #4
On 6/14/21 2:03 PM, Borislav Petkov wrote:
> On Mon, Jun 14, 2021 at 07:45:11AM -0500, Brijesh Singh wrote:
>> IMO, there is no need to add a warning. This case should happen if its
>> either a hypervisor bug or hypervisor does not follow the GHCB
>> specification. I followed the SEV-ES vmgexit handling  and it does not
>> warn if the hypervisor returns a wrong response code. We simply
>> terminate the guest.
> This brings my regular user-friendliness question: will the guest user
> know what happened or will the guest simply disappear/freeze without any
> hint as to what has happened so that a post-mortem analysis would turn
> out hard to decipher?

When a guest requests to terminate then guest user (aka VMM) will be
notified through the hypervisor that guest has requested the
termination. KVM defines a fixed set of reason code that is passed to
the guest user, see
https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h#L237.
In this particular case guest user probably get the KVM_EXIT_SHUTDOWN --
i.e guest asked to be terminated. If user wants to see the actual GHCB
reason code then they must look into the KVM log.

Now that we have to defined a Linux specific reason set, we could
potentially define a new error code "Invalid response code" and return
that instead of generic termination error in this particular case. So
that when user looks at KVM log they see the "invalid response code"
instead of the generic GHCB error.

If we go with that approach then I think it makes sense to cover it for
SEV-ES guests too.


>> I did thought about reusing the VMGEXIT defined macro
>> SNP_PAGE_STATE_{PRIVATE, SHARED} but I was not sure if you will be okay
>> with that.
> Yeah, I think that makes stuff simpler. Unless there's something
> speaking against it which we both are not thinking of right now.
>
>> Additionally now both the function name and macro name will
>> include the "SNP". The call will look like this:
>>
>> snp_prep_memory(paddr, SNP_PAGE_STATE_PRIVATE)
> Yap, looks ok to me.
>
> Thx.
>
Borislav Petkov June 16, 2021, 10:07 a.m. UTC | #5
On Mon, Jun 14, 2021 at 04:01:44PM -0500, Brijesh Singh wrote:
> Now that we have to defined a Linux specific reason set, we could
> potentially define a new error code "Invalid response code" and return

I don't understand - you have a WARN for the following check:

                if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
                         "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",

what's wrong with doing:

                if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
                         "Wrong PSC response code: 0x%x\n",
                         (unsigned int)GHCB_RESP_CODE(val)))
                        goto e_term;


above it too?
Brijesh Singh June 16, 2021, 11 a.m. UTC | #6
On 6/16/21 5:07 AM, Borislav Petkov wrote:
> On Mon, Jun 14, 2021 at 04:01:44PM -0500, Brijesh Singh wrote:
>> Now that we have to defined a Linux specific reason set, we could
>> potentially define a new error code "Invalid response code" and return
> I don't understand - you have a WARN for the following check:
>
>                 if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
>                          "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",


This WARN indicates that command execution failed but it does not mean
that hypervisor violated the GHCB protocol.


>
> what's wrong with doing:
>
>                 if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
>                          "Wrong PSC response code: 0x%x\n",
>                          (unsigned int)GHCB_RESP_CODE(val)))
>                         goto e_term;

There are around 6 MSR based VMGEXITs and every VMEXIT are paired with
request and response. Guest uses the request code while submitting the
command, and hypervisor provides result through the response code. If
the guest sees hypervisor didn't use the response code (i.e violates the
GHCB protocol) then it terminates the guest with reason code set to
"General termination".  e.g look at the do_vc_no_ghcb()

https://elixir.bootlin.com/linux/v5.13-rc6/source/arch/x86/kernel/sev-shared.c#L157

I am trying to be consistent with previous VMGEXIT implementations. If
the command itself failed then use the command specific error code to
tell hypervisor why we terminated but if the hypervisor violated the
GHCB specification then use the "general request termination".


>
> above it too?
>
Borislav Petkov June 16, 2021, 12:03 p.m. UTC | #7
On Wed, Jun 16, 2021 at 06:00:09AM -0500, Brijesh Singh wrote:
> I am trying to be consistent with previous VMGEXIT implementations. If
> the command itself failed then use the command specific error code to
> tell hypervisor why we terminated but if the hypervisor violated the
> GHCB specification then use the "general request termination".

I feel like we're running in circles here: I ask about debuggability
and telling the user what exactly failed and you're giving me some
explanation about what the error codes mean. I can see what they mean.

So let me try again:

Imagine you're a guest owner and you haven't written the SNP code and
you don't know how it works.

You start a guest in the public cloud and it fails because the
hypervisor violates the GHCB protocol and all that guest prints before
it dies is

"general request termination"

How are you - the guest owner - going to find out what exactly happened?

Call support?
Brijesh Singh June 16, 2021, 12:49 p.m. UTC | #8
On 6/16/21 7:03 AM, Borislav Petkov wrote:
> On Wed, Jun 16, 2021 at 06:00:09AM -0500, Brijesh Singh wrote:

>> I am trying to be consistent with previous VMGEXIT implementations. If

>> the command itself failed then use the command specific error code to

>> tell hypervisor why we terminated but if the hypervisor violated the

>> GHCB specification then use the "general request termination".

> I feel like we're running in circles here: I ask about debuggability

> and telling the user what exactly failed and you're giving me some

> explanation about what the error codes mean. I can see what they mean.

>

> So let me try again:

>

> Imagine you're a guest owner and you haven't written the SNP code and

> you don't know how it works.

>

> You start a guest in the public cloud and it fails because the

> hypervisor violates the GHCB protocol and all that guest prints before

> it dies is

>

> "general request termination"



The GHCB specification does not define a unique error code for every
possible condition. Now that we have reserved reason set 1 for the
Linux-specific error code, we could add a new error code to cover the
cases for the protocol violation. I was highlighting that we should not
overload the meaning of GHCB_TERM_PSC. In my mind, the GHCB_TERM_PSC
error code is used when the guest sees that the hypervisor failed to
change the state . The failure maybe because the guest provided a bogus
GPA or invalid operation code, or RMPUPDATE failure or HV does not
support SNP feature etc etc. But in this case, the failure was due to
the protocol error, and IMO we should not use the GHCB_TERM_PSC.
Additionally, we should also update CPUID and other VMGEXITs to use the
new error code instead of "general request termination" so that its
consistent.


If you still think that GHCB_TERM_PSC is valid here, then I am okay with it.

-Brijesh
Borislav Petkov June 16, 2021, 1:02 p.m. UTC | #9
On Wed, Jun 16, 2021 at 07:49:25AM -0500, Brijesh Singh wrote:
> If you still think ...


I think you should answer my question first:

> Imagine you're a guest owner and you haven't written the SNP code and

> you don't know how it works.

>

> You start a guest in the public cloud and it fails because the

> hypervisor violates the GHCB protocol and all that guest prints before

> it dies is

>

> "general request termination"

>

> How are you - the guest owner - going to find out what exactly happened?

>

> Call support?


And let me paraphrase it again: if the error condition with which the
guest terminates is not uniquely identifiable but simply a "general
request", how are such conditions going to be debugged?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Dr. David Alan Gilbert June 16, 2021, 1:06 p.m. UTC | #10
* Brijesh Singh (brijesh.singh@amd.com) wrote:
> 
> On 6/16/21 7:03 AM, Borislav Petkov wrote:
> > On Wed, Jun 16, 2021 at 06:00:09AM -0500, Brijesh Singh wrote:
> >> I am trying to be consistent with previous VMGEXIT implementations. If
> >> the command itself failed then use the command specific error code to
> >> tell hypervisor why we terminated but if the hypervisor violated the
> >> GHCB specification then use the "general request termination".
> > I feel like we're running in circles here: I ask about debuggability
> > and telling the user what exactly failed and you're giving me some
> > explanation about what the error codes mean. I can see what they mean.
> >
> > So let me try again:
> >
> > Imagine you're a guest owner and you haven't written the SNP code and
> > you don't know how it works.
> >
> > You start a guest in the public cloud and it fails because the
> > hypervisor violates the GHCB protocol and all that guest prints before
> > it dies is
> >
> > "general request termination"
> 
> 
> The GHCB specification does not define a unique error code for every
> possible condition. Now that we have reserved reason set 1 for the
> Linux-specific error code, we could add a new error code to cover the
> cases for the protocol violation. I was highlighting that we should not
> overload the meaning of GHCB_TERM_PSC. In my mind, the GHCB_TERM_PSC
> error code is used when the guest sees that the hypervisor failed to
> change the state . The failure maybe because the guest provided a bogus
> GPA or invalid operation code, or RMPUPDATE failure or HV does not
> support SNP feature etc etc. But in this case, the failure was due to
> the protocol error, and IMO we should not use the GHCB_TERM_PSC.
> Additionally, we should also update CPUID and other VMGEXITs to use the
> new error code instead of "general request termination" so that its
> consistent.
> 
> 
> If you still think that GHCB_TERM_PSC is valid here, then I am okay with it.

I'd kind of agree with Borislav, the more hints we can have as to the
actual failure reason the better - so if you've got multiple cases
where the guest thinks the hypervisor has screwed up, find a way to give
an error code to tell us which one.

Dave

> -Brijesh
> 
> 
>
Brijesh Singh June 16, 2021, 1:10 p.m. UTC | #11
On 6/16/21 8:02 AM, Borislav Petkov wrote:
> On Wed, Jun 16, 2021 at 07:49:25AM -0500, Brijesh Singh wrote:

>> If you still think ...

> I think you should answer my question first:

>

>> Imagine you're a guest owner and you haven't written the SNP code and

>> you don't know how it works.

>>

>> You start a guest in the public cloud and it fails because the

>> hypervisor violates the GHCB protocol and all that guest prints before

>> it dies is

>>

>> "general request termination"

>>

>> How are you - the guest owner - going to find out what exactly happened?

>>

>> Call support?

> And let me paraphrase it again: if the error condition with which the

> guest terminates is not uniquely identifiable but simply a "general

> request", how are such conditions going to be debugged?


I thought I said it somewhere in our previous conversation, I would look
at the KVM trace log, each vmgexit entry and exit are logged. The log
contains full GHCB MSR value, and in it you can see both the request and
response code and decode the failure reason.

-Brijesh
Brijesh Singh June 16, 2021, 2:36 p.m. UTC | #12
On 6/16/2021 8:10 AM, Brijesh Singh wrote:
> 

> On 6/16/21 8:02 AM, Borislav Petkov wrote:

>> On Wed, Jun 16, 2021 at 07:49:25AM -0500, Brijesh Singh wrote:

>>> If you still think ...

>> I think you should answer my question first:

>>

>>> Imagine you're a guest owner and you haven't written the SNP code and

>>> you don't know how it works.

>>>

>>> You start a guest in the public cloud and it fails because the

>>> hypervisor violates the GHCB protocol and all that guest prints before

>>> it dies is

>>>

>>> "general request termination"

>>>

>>> How are you - the guest owner - going to find out what exactly happened?

>>>

>>> Call support?

>> And let me paraphrase it again: if the error condition with which the

>> guest terminates is not uniquely identifiable but simply a "general

>> request", how are such conditions going to be debugged?

> > I thought I said it somewhere in our previous conversation, I would look

> at the KVM trace log, each vmgexit entry and exit are logged. The log

> contains full GHCB MSR value, and in it you can see both the request and

> response code and decode the failure reason.

> 


I now realized that in this case we may not have the trace's. It's
a production environment and my development machine :(. I will go ahead and
add the error message when guest sees an invalid response code before
terminating it. Will do the similar error message in the decompression
path.

-Brijesh
Brijesh Singh June 16, 2021, 2:37 p.m. UTC | #13
On 6/16/2021 9:36 AM, Brijesh Singh wrote:
> 

> 

> On 6/16/2021 8:10 AM, Brijesh Singh wrote:

>>

>> On 6/16/21 8:02 AM, Borislav Petkov wrote:

>>> On Wed, Jun 16, 2021 at 07:49:25AM -0500, Brijesh Singh wrote:

>>>> If you still think ...

>>> I think you should answer my question first:

>>>

>>>> Imagine you're a guest owner and you haven't written the SNP code and

>>>> you don't know how it works.

>>>>

>>>> You start a guest in the public cloud and it fails because the

>>>> hypervisor violates the GHCB protocol and all that guest prints before

>>>> it dies is

>>>>

>>>> "general request termination"

>>>>

>>>> How are you - the guest owner - going to find out what exactly happened?

>>>>

>>>> Call support?

>>> And let me paraphrase it again: if the error condition with which the

>>> guest terminates is not uniquely identifiable but simply a "general

>>> request", how are such conditions going to be debugged?

>>> I thought I said it somewhere in our previous conversation, I would look

>> at the KVM trace log, each vmgexit entry and exit are logged. The log

>> contains full GHCB MSR value, and in it you can see both the request and

>> response code and decode the failure reason.

>>

> 

> I now realized that in this case we may not have the trace's. It's

> a production environment and my development machine :(.


I am mean to say *not* my development machine
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index c41c786d69fe..7c2cb5300e43 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -65,6 +65,12 @@  extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
 /* RMP page size */
 #define RMP_PG_SIZE_4K			0
 
+/* Memory opertion for snp_prep_memory() */
+enum snp_mem_op {
+	MEMORY_PRIVATE,
+	MEMORY_SHARED
+};
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 extern struct static_key_false sev_es_enable_key;
 extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -103,6 +109,11 @@  static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
 
 	return rc;
 }
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+		unsigned int npages);
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+		unsigned int npages);
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
@@ -110,6 +121,15 @@  static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { ret
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
 static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages)
+{
+}
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages)
+{
+}
+static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op) { }
 #endif
 
 #endif
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 455c09a9b2c2..6e9b45bb38ab 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -532,6 +532,111 @@  static u64 get_jump_table_addr(void)
 	return ret;
 }
 
+static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
+{
+	unsigned long vaddr_end;
+	int rc;
+
+	vaddr = vaddr & PAGE_MASK;
+	vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+	while (vaddr < vaddr_end) {
+		rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+		if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
+			sev_es_terminate(1, GHCB_TERM_PVALIDATE);
+
+		vaddr = vaddr + PAGE_SIZE;
+	}
+}
+
+static void __init early_set_page_state(unsigned long paddr, unsigned int npages, int op)
+{
+	unsigned long paddr_end;
+	u64 val;
+
+	paddr = paddr & PAGE_MASK;
+	paddr_end = paddr + (npages << PAGE_SHIFT);
+
+	while (paddr < paddr_end) {
+		/*
+		 * Use the MSR protocol because this function can be called before the GHCB
+		 * is established.
+		 */
+		sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+		VMGEXIT();
+
+		val = sev_es_rd_ghcb_msr();
+
+		if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
+			goto e_term;
+
+		if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
+			 "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
+			 op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
+			 paddr, GHCB_MSR_PSC_RESP_VAL(val)))
+			goto e_term;
+
+		paddr = paddr + PAGE_SIZE;
+	}
+
+	return;
+
+e_term:
+	sev_es_terminate(1, GHCB_TERM_PSC);
+}
+
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+					 unsigned int npages)
+{
+	if (!sev_feature_enabled(SEV_SNP))
+		return;
+
+	 /* Ask hypervisor to add the memory pages in RMP table as a 'private'. */
+	early_set_page_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+	/* Validate the memory pages after they've been added in the RMP table. */
+	pvalidate_pages(vaddr, npages, 1);
+}
+
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+					unsigned int npages)
+{
+	if (!sev_feature_enabled(SEV_SNP))
+		return;
+
+	/*
+	 * Invalidate the memory pages before they are marked shared in the
+	 * RMP table.
+	 */
+	pvalidate_pages(vaddr, npages, 0);
+
+	 /* Ask hypervisor to make the memory pages shared in the RMP table. */
+	early_set_page_state(paddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op)
+{
+	unsigned long vaddr, npages;
+
+	vaddr = (unsigned long)__va(paddr);
+	npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+	switch (op) {
+	case MEMORY_PRIVATE: {
+		early_snp_set_memory_private(vaddr, paddr, npages);
+		return;
+	}
+	case MEMORY_SHARED: {
+		early_snp_set_memory_shared(vaddr, paddr, npages);
+		return;
+	}
+	default:
+		break;
+	}
+
+	WARN(1, "invalid memory op %d\n", op);
+}
+
 int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
 {
 	u16 startup_cs, startup_ip;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 63e7799a9a86..45d9feb0151a 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -30,6 +30,7 @@ 
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/cmdline.h>
+#include <asm/sev.h>
 
 #include "mm_internal.h"
 
@@ -48,6 +49,34 @@  EXPORT_SYMBOL_GPL(sev_enable_key);
 /* Buffer used for early in-place encryption by BSP, no locking needed */
 static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
 
+/*
+ * When SNP is active, changes the page state from private to shared before
+ * copying the data from the source to destination and restore after the copy.
+ * This is required because the source address is mapped as decrypted by the
+ * caller of the routine.
+ */
+static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
+				     unsigned long paddr, bool decrypt)
+{
+	unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+	if (!sev_feature_enabled(SEV_SNP) || !decrypt) {
+		memcpy(dst, src, sz);
+		return;
+	}
+
+	/*
+	 * If the paddr needs to be accessed decrypted, mark the page
+	 * shared in the RMP table before copying it.
+	 */
+	early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
+
+	memcpy(dst, src, sz);
+
+	/* Restore the page state after the memcpy. */
+	early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
+}
+
 /*
  * This routine does not change the underlying encryption setting of the
  * page(s) that map this memory. It assumes that eventually the memory is
@@ -96,8 +125,8 @@  static void __init __sme_early_enc_dec(resource_size_t paddr,
 		 * Use a temporary buffer, of cache-line multiple size, to
 		 * avoid data corruption as documented in the APM.
 		 */
-		memcpy(sme_early_buffer, src, len);
-		memcpy(dst, sme_early_buffer, len);
+		snp_memcpy(sme_early_buffer, src, len, paddr, enc);
+		snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
 
 		early_memunmap(dst, len);
 		early_memunmap(src, len);
@@ -277,9 +306,23 @@  static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 	else
 		sme_early_decrypt(pa, size);
 
+	/*
+	 * If page is getting mapped decrypted in the page table, then the page state
+	 * change in the RMP table must happen before the page table updates.
+	 */
+	if (!enc)
+		early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+
 	/* Change the page encryption mask. */
 	new_pte = pfn_pte(pfn, new_prot);
 	set_pte_atomic(kpte, new_pte);
+
+	/*
+	 * If page is set encrypted in the page table, then update the RMP table to
+	 * add this page as private.
+	 */
+	if (enc)
+		early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
 }
 
 static int __init early_set_memory_enc_dec(unsigned long vaddr,