diff mbox series

[Part2,RFC,v4,23/40] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_START command

Message ID 20210707183616.5620-24-brijesh.singh@amd.com
State New
Headers show
Series [Part2,RFC,v4,01/40] KVM: SVM: Add support to handle AP reset MSR protocol | expand

Commit Message

Brijesh Singh July 7, 2021, 6:35 p.m. UTC
KVM_SEV_SNP_LAUNCH_START begins the launch process for an SEV-SNP guest.
The command initializes a cryptographic digest context used to construct
the measurement of the guest. If the guest is expected to be migrated,
the command also binds a migration agent (MA) to the guest.

For more information see the SEV-SNP specification.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 .../virt/kvm/amd-memory-encryption.rst        |  25 ++++
 arch/x86/kvm/svm/sev.c                        | 132 +++++++++++++++++-
 arch/x86/kvm/svm/svm.h                        |   1 +
 include/uapi/linux/kvm.h                      |   9 ++
 4 files changed, 166 insertions(+), 1 deletion(-)

Comments

Peter Gonda July 12, 2021, 6:45 p.m. UTC | #1
>
> +static int snp_decommission_context(struct kvm *kvm)
> +{
> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +       struct sev_data_snp_decommission data = {};
> +       int ret;
> +
> +       /* If context is not created then do nothing */
> +       if (!sev->snp_context)
> +               return 0;
> +
> +       data.gctx_paddr = __sme_pa(sev->snp_context);
> +       ret = snp_guest_decommission(&data, NULL);
> +       if (ret)
> +               return ret;

Should we WARN or pr_err here? I see in the case of
snp_launch_start's e_free_context we do not warn the user they have
leaked a firmware page.

>
> +
> +       /* free the context page now */
> +       snp_free_firmware_page(sev->snp_context);
> +       sev->snp_context = NULL;
> +
> +       return 0;
> +}
> +
>  void sev_vm_destroy(struct kvm *kvm)
>  {
>         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm)
>
>         mutex_unlock(&kvm->lock);
>
> -       sev_unbind_asid(kvm, sev->handle);
> +       if (sev_snp_guest(kvm)) {
> +               if (snp_decommission_context(kvm)) {
> +                       pr_err("Failed to free SNP guest context, leaking asid!\n");

Should these errors be a WARN since we are leaking some state?


> +                       return;
> +               }
> +       } else {
> +               sev_unbind_asid(kvm, sev->handle);
> +       }
> +
>         sev_asid_free(sev);
>  }
>
Sean Christopherson July 16, 2021, 7:43 p.m. UTC | #2
On Wed, Jul 07, 2021, Brijesh Singh wrote:
> @@ -1527,6 +1530,100 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)

>  	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);

>  }

>  

> +static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)

> +{

> +	struct sev_data_snp_gctx_create data = {};

> +	void *context;

> +	int rc;

> +

> +	/* Allocate memory for context page */


Eh, I'd drop this comment.  It's quite obvious that a page is being allocated
and that it's being assigned to the context.

> +	context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);

> +	if (!context)

> +		return NULL;

> +

> +	data.gctx_paddr = __psp_pa(context);

> +	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);

> +	if (rc) {

> +		snp_free_firmware_page(context);

> +		return NULL;

> +	}

> +

> +	return context;

> +}

> +

> +static int snp_bind_asid(struct kvm *kvm, int *error)

> +{

> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

> +	struct sev_data_snp_activate data = {};

> +	int asid = sev_get_asid(kvm);

> +	int ret, retry_count = 0;

> +

> +	/* Activate ASID on the given context */

> +	data.gctx_paddr = __psp_pa(sev->snp_context);

> +	data.asid   = asid;

> +again:

> +	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error);

> +

> +	/* Check if the DF_FLUSH is required, and try again */


Please provide more info on why this may be necessary.  I can see from the code
that it does a flush and retries, but I have no idea why a flush would be required
in the first place, e.g. why can't KVM guarantee that everything is in the proper
state before attempting to bind an ASID?

> +	if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) {

> +		/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */

> +		down_read(&sev_deactivate_lock);

> +		wbinvd_on_all_cpus();

> +		ret = snp_guest_df_flush(error);

> +		up_read(&sev_deactivate_lock);

> +

> +		if (ret)

> +			return ret;

> +

> +		/* only one retry */


Again, please explain why.  Is this arbitrary?  Is retrying more than once
guaranteed to be useless?

> +		retry_count = 1;

> +

> +		goto again;

> +	}

> +

> +	return ret;

> +}


...

>  void sev_vm_destroy(struct kvm *kvm)

>  {

>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

> @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm)

>  

>  	mutex_unlock(&kvm->lock);

>  

> -	sev_unbind_asid(kvm, sev->handle);

> +	if (sev_snp_guest(kvm)) {

> +		if (snp_decommission_context(kvm)) {

> +			pr_err("Failed to free SNP guest context, leaking asid!\n");


I agree with Peter that this likely warrants a WARN.  If a WARN isn't justified,
e.g. this can happen without a KVM/CPU bug, then there absolutely needs to be a
massive comment explaining why we have code that result in memory leaks.

> +			return;

> +		}

> +	} else {

> +		sev_unbind_asid(kvm, sev->handle);

> +	}

> +

>  	sev_asid_free(sev);

>  }

>  

> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

> index b9ea99f8579e..bc5582b44356 100644

> --- a/arch/x86/kvm/svm/svm.h

> +++ b/arch/x86/kvm/svm/svm.h

> @@ -67,6 +67,7 @@ struct kvm_sev_info {

>  	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */

>  	struct kvm *enc_context_owner; /* Owner of copied encryption context */

>  	struct misc_cg *misc_cg; /* For misc cgroup accounting */

> +	void *snp_context;      /* SNP guest context page */

>  };

>  

>  struct kvm_svm {

> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

> index 989a64aa1ae5..dbd05179d8fa 100644

> --- a/include/uapi/linux/kvm.h

> +++ b/include/uapi/linux/kvm.h

> @@ -1680,6 +1680,7 @@ enum sev_cmd_id {

>  

>  	/* SNP specific commands */

>  	KVM_SEV_SNP_INIT = 256,

> +	KVM_SEV_SNP_LAUNCH_START,

>  

>  	KVM_SEV_NR_MAX,

>  };

> @@ -1781,6 +1782,14 @@ struct kvm_snp_init {

>  	__u64 flags;

>  };

>  

> +struct kvm_sev_snp_launch_start {

> +	__u64 policy;

> +	__u64 ma_uaddr;

> +	__u8 ma_en;

> +	__u8 imi_en;

> +	__u8 gosvw[16];


Hmm, I'd prefer to pad this out to be 8-byte sized.

> +};

> +

>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)

>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)

>  #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)

> -- 

> 2.17.1

>
Brijesh Singh July 16, 2021, 9:42 p.m. UTC | #3
On 7/16/21 2:43 PM, Sean Christopherson wrote:
> On Wed, Jul 07, 2021, Brijesh Singh wrote:

>> @@ -1527,6 +1530,100 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)

>>  	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);

>>  }

>>  

>> +static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)

>> +{

>> +	struct sev_data_snp_gctx_create data = {};

>> +	void *context;

>> +	int rc;

>> +

>> +	/* Allocate memory for context page */

> Eh, I'd drop this comment.  It's quite obvious that a page is being allocated

> and that it's being assigned to the context.

>

>> +	context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);

>> +	if (!context)

>> +		return NULL;

>> +

>> +	data.gctx_paddr = __psp_pa(context);

>> +	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);

>> +	if (rc) {

>> +		snp_free_firmware_page(context);

>> +		return NULL;

>> +	}

>> +

>> +	return context;

>> +}

>> +

>> +static int snp_bind_asid(struct kvm *kvm, int *error)

>> +{

>> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

>> +	struct sev_data_snp_activate data = {};

>> +	int asid = sev_get_asid(kvm);

>> +	int ret, retry_count = 0;

>> +

>> +	/* Activate ASID on the given context */

>> +	data.gctx_paddr = __psp_pa(sev->snp_context);

>> +	data.asid   = asid;

>> +again:

>> +	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error);

>> +

>> +	/* Check if the DF_FLUSH is required, and try again */

> Please provide more info on why this may be necessary.  I can see from the code

> that it does a flush and retries, but I have no idea why a flush would be required

> in the first place, e.g. why can't KVM guarantee that everything is in the proper

> state before attempting to bind an ASID?



Ah good question, we already have function to recycle the ASIDs. The
recycle happen during the ASID allocation. While recycling it issues the
SEV/ES DF_FLUSH command. That function need to be enhanced to use the
SNP specific DF_FLUSH command when ASID's are getting reused. I wish we
had one DF_FLUSH which internally takes care of both the cases. Thinking
loud, maybe firmware team decided to add a new one because what if
someone is not using the SEV and SEV-ES or some fw does not support
legacy SEV commands. I will fix it and remove the DF_FLUSH from the
launch_start.


>

>> +	if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) {

>> +		/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */

>> +		down_read(&sev_deactivate_lock);

>> +		wbinvd_on_all_cpus();

>> +		ret = snp_guest_df_flush(error);

>> +		up_read(&sev_deactivate_lock);

>> +

>> +		if (ret)

>> +			return ret;

>> +

>> +		/* only one retry */

> Again, please explain why.  Is this arbitrary?  Is retrying more than once

> guaranteed to be useless?

>

>> +		retry_count = 1;

>> +

>> +		goto again;

>> +	}

>> +

>> +	return ret;

>> +}

> ...

>

>>  void sev_vm_destroy(struct kvm *kvm)

>>  {

>>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

>> @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm)

>>  

>>  	mutex_unlock(&kvm->lock);

>>  

>> -	sev_unbind_asid(kvm, sev->handle);

>> +	if (sev_snp_guest(kvm)) {

>> +		if (snp_decommission_context(kvm)) {

>> +			pr_err("Failed to free SNP guest context, leaking asid!\n");

> I agree with Peter that this likely warrants a WARN.  If a WARN isn't justified,

> e.g. this can happen without a KVM/CPU bug, then there absolutely needs to be a

> massive comment explaining why we have code that result in memory leaks.



Ack.

>

>> +			return;

>> +		}

>> +	} else {

>> +		sev_unbind_asid(kvm, sev->handle);

>> +	}

>> +

>>  	sev_asid_free(sev);

>>  }

>>  

>> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

>> index b9ea99f8579e..bc5582b44356 100644

>> --- a/arch/x86/kvm/svm/svm.h

>> +++ b/arch/x86/kvm/svm/svm.h

>> @@ -67,6 +67,7 @@ struct kvm_sev_info {

>>  	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */

>>  	struct kvm *enc_context_owner; /* Owner of copied encryption context */

>>  	struct misc_cg *misc_cg; /* For misc cgroup accounting */

>> +	void *snp_context;      /* SNP guest context page */

>>  };

>>  

>>  struct kvm_svm {

>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

>> index 989a64aa1ae5..dbd05179d8fa 100644

>> --- a/include/uapi/linux/kvm.h

>> +++ b/include/uapi/linux/kvm.h

>> @@ -1680,6 +1680,7 @@ enum sev_cmd_id {

>>  

>>  	/* SNP specific commands */

>>  	KVM_SEV_SNP_INIT = 256,

>> +	KVM_SEV_SNP_LAUNCH_START,

>>  

>>  	KVM_SEV_NR_MAX,

>>  };

>> @@ -1781,6 +1782,14 @@ struct kvm_snp_init {

>>  	__u64 flags;

>>  };

>>  

>> +struct kvm_sev_snp_launch_start {

>> +	__u64 policy;

>> +	__u64 ma_uaddr;

>> +	__u8 ma_en;

>> +	__u8 imi_en;

>> +	__u8 gosvw[16];

> Hmm, I'd prefer to pad this out to be 8-byte sized.


Noted.


>> +};

>> +

>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)

>>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)

>>  #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)

>> -- 

>> 2.17.1

>>
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index 75ca60b6d40a..8620383d405a 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -443,6 +443,31 @@  Returns: 0 on success, -negative on error
                 __u64 flags;    /* must be zero */
         };
 
+
+19. KVM_SNP_LAUNCH_START
+------------------------
+
+The KVM_SNP_LAUNCH_START command is used for creating the memory encryption
+context for the SEV-SNP guest. To create the encryption context, user must
+provide a guest policy, migration agent (if any) and guest OS visible
+workarounds value as defined SEV-SNP specification.
+
+Parameters (in): struct  kvm_snp_launch_start
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_snp_launch_start {
+                __u64 policy;           /* Guest policy to use. */
+                __u64 ma_uaddr;         /* userspace address of migration agent */
+                __u8 ma_en;             /* 1 if the migtation agent is enabled */
+                __u8 imi_en;            /* set IMI to 1. */
+                __u8 gosvw[16];         /* guest OS visible workarounds */
+        };
+
+See the SEV-SNP specification for further detail on the launch input.
+
 References
 ==========
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index be31221f0a47..f44a657e8912 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -20,6 +20,7 @@ 
 #include <asm/fpu/internal.h>
 
 #include <asm/trapnr.h>
+#include <asm/sev.h>
 
 #include "x86.h"
 #include "svm.h"
@@ -75,6 +76,8 @@  static unsigned long sev_me_mask;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
 
+static int snp_decommission_context(struct kvm *kvm);
+
 struct enc_region {
 	struct list_head list;
 	unsigned long npages;
@@ -1527,6 +1530,100 @@  static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
 }
 
+static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+	struct sev_data_snp_gctx_create data = {};
+	void *context;
+	int rc;
+
+	/* Allocate memory for context page */
+	context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
+	if (!context)
+		return NULL;
+
+	data.gctx_paddr = __psp_pa(context);
+	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);
+	if (rc) {
+		snp_free_firmware_page(context);
+		return NULL;
+	}
+
+	return context;
+}
+
+static int snp_bind_asid(struct kvm *kvm, int *error)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_activate data = {};
+	int asid = sev_get_asid(kvm);
+	int ret, retry_count = 0;
+
+	/* Activate ASID on the given context */
+	data.gctx_paddr = __psp_pa(sev->snp_context);
+	data.asid   = asid;
+again:
+	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error);
+
+	/* Check if the DF_FLUSH is required, and try again */
+	if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) {
+		/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
+		down_read(&sev_deactivate_lock);
+		wbinvd_on_all_cpus();
+		ret = snp_guest_df_flush(error);
+		up_read(&sev_deactivate_lock);
+
+		if (ret)
+			return ret;
+
+		/* only one retry */
+		retry_count = 1;
+
+		goto again;
+	}
+
+	return ret;
+}
+
+static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_launch_start start = {};
+	struct kvm_sev_snp_launch_start params;
+	int rc;
+
+	if (!sev_snp_guest(kvm))
+		return -ENOTTY;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+		return -EFAULT;
+
+	/* Initialize the guest context */
+	sev->snp_context = snp_context_create(kvm, argp);
+	if (!sev->snp_context)
+		return -ENOTTY;
+
+	/* Issue the LAUNCH_START command */
+	start.gctx_paddr = __psp_pa(sev->snp_context);
+	start.policy = params.policy;
+	memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
+	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error);
+	if (rc)
+		goto e_free_context;
+
+	/* Bind ASID to this guest */
+	sev->fd = argp->sev_fd;
+	rc = snp_bind_asid(kvm, &argp->error);
+	if (rc)
+		goto e_free_context;
+
+	return 0;
+
+e_free_context:
+	snp_decommission_context(kvm);
+
+	return rc;
+}
+
 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_sev_cmd sev_cmd;
@@ -1616,6 +1713,9 @@  int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 	case KVM_SEV_RECEIVE_FINISH:
 		r = sev_receive_finish(kvm, &sev_cmd);
 		break;
+	case KVM_SEV_SNP_LAUNCH_START:
+		r = snp_launch_start(kvm, &sev_cmd);
+		break;
 	default:
 		r = -EINVAL;
 		goto out;
@@ -1809,6 +1909,28 @@  int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
 	return ret;
 }
 
+static int snp_decommission_context(struct kvm *kvm)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_decommission data = {};
+	int ret;
+
+	/* If context is not created then do nothing */
+	if (!sev->snp_context)
+		return 0;
+
+	data.gctx_paddr = __sme_pa(sev->snp_context);
+	ret = snp_guest_decommission(&data, NULL);
+	if (ret)
+		return ret;
+
+	/* free the context page now */
+	snp_free_firmware_page(sev->snp_context);
+	sev->snp_context = NULL;
+
+	return 0;
+}
+
 void sev_vm_destroy(struct kvm *kvm)
 {
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -1847,7 +1969,15 @@  void sev_vm_destroy(struct kvm *kvm)
 
 	mutex_unlock(&kvm->lock);
 
-	sev_unbind_asid(kvm, sev->handle);
+	if (sev_snp_guest(kvm)) {
+		if (snp_decommission_context(kvm)) {
+			pr_err("Failed to free SNP guest context, leaking asid!\n");
+			return;
+		}
+	} else {
+		sev_unbind_asid(kvm, sev->handle);
+	}
+
 	sev_asid_free(sev);
 }
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index b9ea99f8579e..bc5582b44356 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -67,6 +67,7 @@  struct kvm_sev_info {
 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
+	void *snp_context;      /* SNP guest context page */
 };
 
 struct kvm_svm {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 989a64aa1ae5..dbd05179d8fa 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1680,6 +1680,7 @@  enum sev_cmd_id {
 
 	/* SNP specific commands */
 	KVM_SEV_SNP_INIT = 256,
+	KVM_SEV_SNP_LAUNCH_START,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1781,6 +1782,14 @@  struct kvm_snp_init {
 	__u64 flags;
 };
 
+struct kvm_sev_snp_launch_start {
+	__u64 policy;
+	__u64 ma_uaddr;
+	__u8 ma_en;
+	__u8 imi_en;
+	__u8 gosvw[16];
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)