diff mbox series

[Part2,v6,42/49] KVM: SVM: Provide support for SNP_GUEST_REQUEST NAE event

Message ID 5d05799fc61994684aa2b2ddb8c5b326a3279e25.1655761627.git.ashish.kalra@amd.com
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) | expand

Commit Message

Ashish Kalra June 20, 2022, 11:13 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

Version 2 of GHCB specification added the support for two SNP Guest
Request Message NAE events. The events allows for an SEV-SNP guest to
make request to the SEV-SNP firmware through hypervisor using the
SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.

The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
difference of an additional certificate blob that can be passed through
the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
provides snp_guest_ext_guest_request() that is used by the KVM to get
both the report and certificate data at once.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/svm/svm.h |   2 +
 2 files changed, 192 insertions(+), 6 deletions(-)

Comments

Peter Gonda June 24, 2022, 4:25 p.m. UTC | #1
On Mon, Jun 20, 2022 at 5:13 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
>
> From: Brijesh Singh <brijesh.singh@amd.com>
>
> Version 2 of GHCB specification added the support for two SNP Guest
> Request Message NAE events. The events allows for an SEV-SNP guest to
> make request to the SEV-SNP firmware through hypervisor using the
> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
>
> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
> difference of an additional certificate blob that can be passed through
> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
> provides snp_guest_ext_guest_request() that is used by the KVM to get
> both the report and certificate data at once.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm/svm.h |   2 +
>  2 files changed, 192 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 7fc0fad87054..089af21a4efe 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -343,6 +343,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
>
>                 spin_lock_init(&sev->psc_lock);
>                 ret = sev_snp_init(&argp->error);
> +               mutex_init(&sev->guest_req_lock);
>         } else {
>                 ret = sev_platform_init(&argp->error);
>         }
> @@ -1884,23 +1885,39 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
>
>  static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
> +       void *context = NULL, *certs_data = NULL, *resp_page = NULL;

Is the NULL setting here unnecessary since all of these are set via
functions snp_alloc_firmware_page(), kmalloc(), and
snp_alloc_firmware_page() respectively?

> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>         struct sev_data_snp_gctx_create data = {};
> -       void *context;
>         int rc;
>
> +       /* Allocate memory used for the certs data in SNP guest request */
> +       certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT);
> +       if (!certs_data)
> +               return NULL;

I think we want to use kzalloc() here to ensure we never give the
guest uninitialized kernel memory.

> +
>         /* Allocate memory for context page */
>         context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
>         if (!context)
> -               return NULL;
> +               goto e_free;
> +
> +       /* Allocate a firmware buffer used during the guest command handling. */
> +       resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
> +       if (!resp_page)
> +               goto e_free;

|resp_page| doesn't appear to be used anywhere?

>
>         data.gctx_paddr = __psp_pa(context);
>         rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);
> -       if (rc) {
> -               snp_free_firmware_page(context);
> -               return NULL;
> -       }
> +       if (rc)
> +               goto e_free;
> +
> +       sev->snp_certs_data = certs_data;
>
>         return context;
> +
> +e_free:
> +       snp_free_firmware_page(context);
> +       kfree(certs_data);
> +       return NULL;
>  }
>
>  static int snp_bind_asid(struct kvm *kvm, int *error)
> @@ -2565,6 +2582,8 @@ static int snp_decommission_context(struct kvm *kvm)
>         snp_free_firmware_page(sev->snp_context);
>         sev->snp_context = NULL;
>
> +       kfree(sev->snp_certs_data);
> +
>         return 0;
>  }
>
> @@ -3077,6 +3096,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
>         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>         case SVM_VMGEXIT_HV_FEATURES:
>         case SVM_VMGEXIT_PSC:
> +       case SVM_VMGEXIT_GUEST_REQUEST:
> +       case SVM_VMGEXIT_EXT_GUEST_REQUEST:
>                 break;
>         default:
>                 reason = GHCB_ERR_INVALID_EVENT;
> @@ -3502,6 +3523,155 @@ static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm)
>         return rc ? map_to_psc_vmgexit_code(rc) : 0;
>  }
>
> +static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm,
> +                                        struct sev_data_snp_guest_request *data,
> +                                        gpa_t req_gpa, gpa_t resp_gpa)
> +{
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       kvm_pfn_t req_pfn, resp_pfn;
> +       struct kvm_sev_info *sev;
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;

This is normally done at declaration in this file. Why not here?

      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

> +
> +       if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE))
> +               return SEV_RET_INVALID_PARAM;
> +
> +       req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa));
> +       if (is_error_noslot_pfn(req_pfn))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa));
> +       if (is_error_noslot_pfn(resp_pfn))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       data->gctx_paddr = __psp_pa(sev->snp_context);
> +       data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
> +       data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
> +
> +       return 0;
> +}
> +
> +static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc)
> +{
> +       u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT;
> +       int ret;
> +
> +       ret = snp_page_reclaim(pfn);
> +       if (ret)
> +               *rc = SEV_RET_INVALID_ADDRESS;

Do we need a diff error code here? This means the page the guest gives
us is now "stuck" in the FW owned state. How would the guest know this
is the case? We return the exact same error in snp_setup_guest_buf()
if the resp_gpa isn't page aligned so now if the guest ever sees a
SEV_RET_INVALID_ADDRESS I think its only safe option is to either try
and page_state_change it to a know state or mark it as unusable
memory.

> +
> +       ret = rmp_make_shared(pfn, PG_LEVEL_4K);
> +       if (ret)
> +               *rc = SEV_RET_INVALID_ADDRESS;

Ditto here I think we need some way to signal to the guest what state
this page is on return to guest execution.

Also these errors shadow over FW successes, this means the guest's
guest-request-sequence-numbers are now out of sync meaning this VMPCK
is unusable less the guest risk reusing the AES IV (which would break
the confidentiality/integrity). Should we have a way to signal to the
guest the FW has successfully run your command but we could not change
the page states back correctly, so the guest should increment their
sequence numbers.

> +}
> +
> +static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
> +{
> +       struct sev_data_snp_guest_request data = {0};
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       struct kvm_sev_info *sev;
> +       unsigned long rc;
> +       int err;
> +
> +       if (!sev_snp_guest(vcpu->kvm)) {
> +               rc = SEV_RET_INVALID_GUEST;
> +               goto e_fail;
> +       }
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;

Ditto why not due this above?

> +
> +       mutex_lock(&sev->guest_req_lock);
> +
> +       rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa);
> +       if (rc)
> +               goto unlock;
> +
> +       rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err);
> +       if (rc)
> +               /* use the firmware error code */
> +               rc = err;
> +
> +       snp_cleanup_guest_buf(&data, &rc);
> +
> +unlock:
> +       mutex_unlock(&sev->guest_req_lock);
> +
> +e_fail:
> +       svm_set_ghcb_sw_exit_info_2(vcpu, rc);
> +}
> +
> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
> +{
> +       struct sev_data_snp_guest_request req = {0};
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       unsigned long data_npages;
> +       struct kvm_sev_info *sev;
> +       unsigned long rc, err;
> +       u64 data_gpa;
> +
> +       if (!sev_snp_guest(vcpu->kvm)) {
> +               rc = SEV_RET_INVALID_GUEST;
> +               goto e_fail;
> +       }
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;
> +
> +       data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
> +       data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
> +
> +       if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
> +               rc = SEV_RET_INVALID_ADDRESS;
> +               goto e_fail;
> +       }
> +
> +       /* Verify that requested blob will fit in certificate buffer */
> +       if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
> +               rc = SEV_RET_INVALID_PARAM;
> +               goto e_fail;
> +       }
> +
> +       mutex_lock(&sev->guest_req_lock);
> +
> +       rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
> +       if (rc)
> +               goto unlock;
> +
> +       rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data,
> +                                        &data_npages, &err);
> +       if (rc) {
> +               /*
> +                * If buffer length is small then return the expected
> +                * length in rbx.
> +                */
> +               if (err == SNP_GUEST_REQ_INVALID_LEN)
> +                       vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
> +
> +               /* pass the firmware error code */
> +               rc = err;
> +               goto cleanup;
> +       }
> +
> +       /* Copy the certificate blob in the guest memory */
> +       if (data_npages &&
> +           kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT))
> +               rc = SEV_RET_INVALID_ADDRESS;

Since at this point the PSP FW has correctly executed the command and
incremented the VMPCK sequence number I think we need another error
signal here since this will tell the guest the PSP had an error so it
will not know if the VMPCK sequence number should be incremented.

> +
> +cleanup:
> +       snp_cleanup_guest_buf(&req, &rc);
> +
> +unlock:
> +       mutex_unlock(&sev->guest_req_lock);
> +
> +e_fail:
> +       svm_set_ghcb_sw_exit_info_2(vcpu, rc);
> +}
> +
>  static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
>  {
>         struct vmcb_control_area *control = &svm->vmcb->control;
> @@ -3753,6 +3923,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
>                 svm_set_ghcb_sw_exit_info_2(vcpu, rc);
>                 break;
>         }
> +       case SVM_VMGEXIT_GUEST_REQUEST: {
> +               snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2);
> +
> +               ret = 1;
> +               break;
> +       }
> +       case SVM_VMGEXIT_EXT_GUEST_REQUEST: {
> +               snp_handle_ext_guest_request(svm,
> +                                            control->exit_info_1,
> +                                            control->exit_info_2);
> +
> +               ret = 1;
> +               break;
> +       }
>         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>                 vcpu_unimpl(vcpu,
>                             "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 3fd95193ed8d..3be24da1a743 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -98,6 +98,8 @@ struct kvm_sev_info {
>         u64 snp_init_flags;
>         void *snp_context;      /* SNP guest context page */
>         spinlock_t psc_lock;
> +       void *snp_certs_data;
> +       struct mutex guest_req_lock;
>  };
>
>  struct kvm_svm {
> --
> 2.25.1
>
Ashish Kalra June 27, 2022, 7:03 p.m. UTC | #2
[Public]

Hello Peter,

-----Original Message-----
From: Peter Gonda <pgonda@google.com> 
Sent: Friday, June 24, 2022 11:25 AM
To: Kalra, Ashish <Ashish.Kalra@amd.com>
Cc: the arch/x86 maintainers <x86@kernel.org>; LKML <linux-kernel@vger.kernel.org>; kvm list <kvm@vger.kernel.org>; linux-coco@lists.linux.dev; linux-mm@kvack.org; Linux Crypto Mailing List <linux-crypto@vger.kernel.org>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; Joerg Roedel <jroedel@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; H. Peter Anvin <hpa@zytor.com>; Ard Biesheuvel <ardb@kernel.org>; Paolo Bonzini <pbonzini@redhat.com>; Sean Christopherson <seanjc@google.com>; Vitaly Kuznetsov <vkuznets@redhat.com>; Jim Mattson <jmattson@google.com>; Andy Lutomirski <luto@kernel.org>; Dave Hansen <dave.hansen@linux.intel.com>; Sergio Lopez <slp@redhat.com>; Peter Zijlstra <peterz@infradead.org>; Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>; David Rientjes <rientjes@google.com>; Dov Murik <dovmurik@linux.ibm.com>; Tobin Feldman-Fitzthum <tobin@ibm.com>; Borislav Petkov <bp@alien8.de>; Roth, Michael <Michael.Roth@amd.com>; Vlastimil Babka <vbabka@suse.cz>; Kirill A . Shutemov <kirill@shutemov.name>; Andi Kleen <ak@linux.intel.com>; Tony Luck <tony.luck@intel.com>; Marc Orr <marcorr@google.com>; Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@linux.intel.com>; Alper Gun <alpergun@google.com>; Dr. David Alan Gilbert <dgilbert@redhat.com>; jarkko@kernel.org
Subject: Re: [PATCH Part2 v6 42/49] KVM: SVM: Provide support for SNP_GUEST_REQUEST NAE event

On Mon, Jun 20, 2022 at 5:13 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
>
> From: Brijesh Singh <brijesh.singh@amd.com>
>
> Version 2 of GHCB specification added the support for two SNP Guest 
> Request Message NAE events. The events allows for an SEV-SNP guest to 
> make request to the SEV-SNP firmware through hypervisor using the 
> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
>
> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the 
> difference of an additional certificate blob that can be passed 
> through the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP 
> driver provides snp_guest_ext_guest_request() that is used by the KVM 
> to get both the report and certificate data at once.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm/svm.h |   2 +
>  2 files changed, 192 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 
> 7fc0fad87054..089af21a4efe 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -343,6 +343,7 @@ static int sev_guest_init(struct kvm *kvm, struct 
> kvm_sev_cmd *argp)
>
>                 spin_lock_init(&sev->psc_lock);
>                 ret = sev_snp_init(&argp->error);
> +               mutex_init(&sev->guest_req_lock);
>         } else {
>                 ret = sev_platform_init(&argp->error);
>         }
> @@ -1884,23 +1885,39 @@ int sev_vm_move_enc_context_from(struct kvm 
> *kvm, unsigned int source_fd)
>
>  static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd 
> *argp)  {
> +       void *context = NULL, *certs_data = NULL, *resp_page = NULL;

>Is the NULL setting here unnecessary since all of these are set via functions snp_alloc_firmware_page(), kmalloc(), and
>snp_alloc_firmware_page() respectively?

Yes, they don't need to be set to NULL.

> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>         struct sev_data_snp_gctx_create data = {};
> -       void *context;
>         int rc;
>
> +       /* Allocate memory used for the certs data in SNP guest request */
> +       certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT);
> +       if (!certs_data)
> +               return NULL;

>I think we want to use kzalloc() here to ensure we never give the guest uninitialized kernel memory.

Yes.

> +
>         /* Allocate memory for context page */
>         context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
>         if (!context)
> -               return NULL;
> +               goto e_free;
> +
> +       /* Allocate a firmware buffer used during the guest command handling. */
> +       resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
> +       if (!resp_page)
> +               goto e_free;

>|resp_page| doesn't appear to be used anywhere?

>
>         data.gctx_paddr = __psp_pa(context);
>         rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);
> -       if (rc) {
> -               snp_free_firmware_page(context);
> -               return NULL;
> -       }
> +       if (rc)
> +               goto e_free;
> +
> +       sev->snp_certs_data = certs_data;
>
>         return context;
> +
> +e_free:
> +       snp_free_firmware_page(context);
> +       kfree(certs_data);
> +       return NULL;
>  }
>
>  static int snp_bind_asid(struct kvm *kvm, int *error) @@ -2565,6 
> +2582,8 @@ static int snp_decommission_context(struct kvm *kvm)
>         snp_free_firmware_page(sev->snp_context);
>         sev->snp_context = NULL;
>
> +       kfree(sev->snp_certs_data);
> +
>         return 0;
>  }
>
> @@ -3077,6 +3096,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
>         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>         case SVM_VMGEXIT_HV_FEATURES:
>         case SVM_VMGEXIT_PSC:
> +       case SVM_VMGEXIT_GUEST_REQUEST:
> +       case SVM_VMGEXIT_EXT_GUEST_REQUEST:
>                 break;
>         default:
>                 reason = GHCB_ERR_INVALID_EVENT; @@ -3502,6 +3523,155 
> @@ static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm)
>         return rc ? map_to_psc_vmgexit_code(rc) : 0;  }
>
> +static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm,
> +                                        struct sev_data_snp_guest_request *data,
> +                                        gpa_t req_gpa, gpa_t 
> +resp_gpa) {
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       kvm_pfn_t req_pfn, resp_pfn;
> +       struct kvm_sev_info *sev;
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;

>This is normally done at declaration in this file. Why not here?

>      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
Ok.

> +
> +       if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE))
> +               return SEV_RET_INVALID_PARAM;
> +
> +       req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa));
> +       if (is_error_noslot_pfn(req_pfn))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa));
> +       if (is_error_noslot_pfn(resp_pfn))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true))
> +               return SEV_RET_INVALID_ADDRESS;
> +
> +       data->gctx_paddr = __psp_pa(sev->snp_context);
> +       data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
> +       data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
> +
> +       return 0;
> +}
> +
> +static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request 
> +*data, unsigned long *rc) {
> +       u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT;
> +       int ret;
> +
> +       ret = snp_page_reclaim(pfn);
> +       if (ret)
> +               *rc = SEV_RET_INVALID_ADDRESS;

>Do we need a diff error code here? This means the page the guest gives us is now "stuck" in the FW owned state. How would the guest know this is the case? We return the exact same error in snp_setup_guest_buf() if the resp_gpa isn't page aligned >so now if the guest ever sees a SEV_RET_INVALID_ADDRESS I think its only safe option is to either try and page_state_change it to a know state or mark it as unusable memory.

If snp_page_reclaim() fails, it will invoke snp_leak_pages() which will indicate memory failure and trigger memory recovery mechanisms and that should drop the pages or mark them unusable.

> +
> +       ret = rmp_make_shared(pfn, PG_LEVEL_4K);
> +       if (ret)
> +               *rc = SEV_RET_INVALID_ADDRESS;

>Ditto here I think we need some way to signal to the guest what state this page is on return to guest execution.

>Also these errors shadow over FW successes, this means the guest's guest-request-sequence-numbers are now out of sync meaning this VMPCK is unusable less the guest risk reusing the AES IV (which would break the confidentiality/integrity). >Should we have a way to signal to the guest the FW has successfully run your command but we could not change the page states back correctly, so the guest should increment their sequence numbers.

Yes, that is an important observation, and the sequence numbers are now out of sync.
But as this is an error path, so what's the guarantee that the next guest message request will succeed completely,  isn’t it better to let the 
FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ? 

> +}
> +
> +static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t 
> +req_gpa, gpa_t resp_gpa) {
> +       struct sev_data_snp_guest_request data = {0};
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       struct kvm_sev_info *sev;
> +       unsigned long rc;
> +       int err;
> +
> +       if (!sev_snp_guest(vcpu->kvm)) {
> +               rc = SEV_RET_INVALID_GUEST;
> +               goto e_fail;
> +       }
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;

>Ditto why not due this above?
Ok.

> +
> +       mutex_lock(&sev->guest_req_lock);
> +
> +       rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa);
> +       if (rc)
> +               goto unlock;
> +
> +       rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err);
> +       if (rc)
> +               /* use the firmware error code */
> +               rc = err;
> +
> +       snp_cleanup_guest_buf(&data, &rc);
> +
> +unlock:
> +       mutex_unlock(&sev->guest_req_lock);
> +
> +e_fail:
> +       svm_set_ghcb_sw_exit_info_2(vcpu, rc); }
> +
> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t 
> +req_gpa, gpa_t resp_gpa) {
> +       struct sev_data_snp_guest_request req = {0};
> +       struct kvm_vcpu *vcpu = &svm->vcpu;
> +       struct kvm *kvm = vcpu->kvm;
> +       unsigned long data_npages;
> +       struct kvm_sev_info *sev;
> +       unsigned long rc, err;
> +       u64 data_gpa;
> +
> +       if (!sev_snp_guest(vcpu->kvm)) {
> +               rc = SEV_RET_INVALID_GUEST;
> +               goto e_fail;
> +       }
> +
> +       sev = &to_kvm_svm(kvm)->sev_info;
> +
> +       data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
> +       data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
> +
> +       if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
> +               rc = SEV_RET_INVALID_ADDRESS;
> +               goto e_fail;
> +       }
> +
> +       /* Verify that requested blob will fit in certificate buffer */
> +       if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
> +               rc = SEV_RET_INVALID_PARAM;
> +               goto e_fail;
> +       }
> +
> +       mutex_lock(&sev->guest_req_lock);
> +
> +       rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
> +       if (rc)
> +               goto unlock;
> +
> +       rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data,
> +                                        &data_npages, &err);
> +       if (rc) {
> +               /*
> +                * If buffer length is small then return the expected
> +                * length in rbx.
> +                */
> +               if (err == SNP_GUEST_REQ_INVALID_LEN)
> +                       vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
> +
> +               /* pass the firmware error code */
> +               rc = err;
> +               goto cleanup;
> +       }
> +
> +       /* Copy the certificate blob in the guest memory */
> +       if (data_npages &&
> +           kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT))
> +               rc = SEV_RET_INVALID_ADDRESS;

>Since at this point the PSP FW has correctly executed the command and incremented the VMPCK sequence number I think we need another error signal here since this will tell the guest the PSP had an error so it will not know if the VMPCK sequence >number should be incremented.

Similarly as above, as this is an error path, so what's the guarantee that the next guest message request will succeed completely,  isn’t it better to let the 
FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ?

> +
> +cleanup:
> +       snp_cleanup_guest_buf(&req, &rc);
> +
> +unlock:
> +       mutex_unlock(&sev->guest_req_lock);
> +
> +e_fail:
> +       svm_set_ghcb_sw_exit_info_2(vcpu, rc); }
> +
>  static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)  {
>         struct vmcb_control_area *control = &svm->vmcb->control; @@ 
> -3753,6 +3923,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
>                 svm_set_ghcb_sw_exit_info_2(vcpu, rc);
>                 break;
>         }
> +       case SVM_VMGEXIT_GUEST_REQUEST: {
> +               snp_handle_guest_request(svm, control->exit_info_1, 
> + control->exit_info_2);
> +
> +               ret = 1;
> +               break;
> +       }
> +       case SVM_VMGEXIT_EXT_GUEST_REQUEST: {
> +               snp_handle_ext_guest_request(svm,
> +                                            control->exit_info_1,
> +                                            control->exit_info_2);
> +
> +               ret = 1;
> +               break;
> +       }
>         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>                 vcpu_unimpl(vcpu,
>                             "vmgexit: unsupported event - 
> exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git 
> a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 
> 3fd95193ed8d..3be24da1a743 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -98,6 +98,8 @@ struct kvm_sev_info {
>         u64 snp_init_flags;
>         void *snp_context;      /* SNP guest context page */
>         spinlock_t psc_lock;
> +       void *snp_certs_data;
> +       struct mutex guest_req_lock;
>  };
>
>  struct kvm_svm {
> --
> 2.25.1
>
Tom Lendacky Oct. 21, 2022, 7:06 p.m. UTC | #3
On 6/20/22 18:13, Ashish Kalra wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> Version 2 of GHCB specification added the support for two SNP Guest
> Request Message NAE events. The events allows for an SEV-SNP guest to
> make request to the SEV-SNP firmware through hypervisor using the
> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
> 
> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
> difference of an additional certificate blob that can be passed through
> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
> provides snp_guest_ext_guest_request() that is used by the KVM to get
> both the report and certificate data at once.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>   arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
>   arch/x86/kvm/svm/svm.h |   2 +
>   2 files changed, 192 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 7fc0fad87054..089af21a4efe 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c

> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
> +{
> +	struct sev_data_snp_guest_request req = {0};
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +	struct kvm *kvm = vcpu->kvm;
> +	unsigned long data_npages;
> +	struct kvm_sev_info *sev;
> +	unsigned long rc, err;
> +	u64 data_gpa;
> +
> +	if (!sev_snp_guest(vcpu->kvm)) {
> +		rc = SEV_RET_INVALID_GUEST;
> +		goto e_fail;
> +	}
> +
> +	sev = &to_kvm_svm(kvm)->sev_info;
> +
> +	data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
> +	data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
> +
> +	if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
> +		rc = SEV_RET_INVALID_ADDRESS;
> +		goto e_fail;
> +	}
> +
> +	/* Verify that requested blob will fit in certificate buffer */
> +	if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {

Not sure this is a valid check...  Isn't it OK if the guest has supplied 
more room than is required? If the guest supplies 8 pages and the 
hypervisor only needs to copy 1 page of data (or the SEV_FW_BLOB_MAX_SIZE 
number of pages) that shouldn't be an error. I think this check can go, right?

Thanks,
Tom

> +		rc = SEV_RET_INVALID_PARAM;
> +		goto e_fail;
> +	}
> +
> +	mutex_lock(&sev->guest_req_lock);
> +
> +	rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
> +	if (rc)
> +		goto unlock;
> +
> +	rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data,
> +					 &data_npages, &err);
> +	if (rc) {
> +		/*
> +		 * If buffer length is small then return the expected
> +		 * length in rbx.
> +		 */
> +		if (err == SNP_GUEST_REQ_INVALID_LEN)
> +			vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
> +
> +		/* pass the firmware error code */
> +		rc = err;
> +		goto cleanup;
> +	}
> +
> +	/* Copy the certificate blob in the guest memory */
> +	if (data_npages &&
> +	    kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT))
> +		rc = SEV_RET_INVALID_ADDRESS;
> +
> +cleanup:
> +	snp_cleanup_guest_buf(&req, &rc);
> +
> +unlock:
> +	mutex_unlock(&sev->guest_req_lock);
> +
> +e_fail:
> +	svm_set_ghcb_sw_exit_info_2(vcpu, rc);
> +}
> +
Ashish Kalra Oct. 21, 2022, 9:12 p.m. UTC | #4
Hello Tom,

On 10/21/2022 2:06 PM, Tom Lendacky wrote:
> On 6/20/22 18:13, Ashish Kalra wrote:
>> From: Brijesh Singh <brijesh.singh@amd.com>
>>
>> Version 2 of GHCB specification added the support for two SNP Guest
>> Request Message NAE events. The events allows for an SEV-SNP guest to
>> make request to the SEV-SNP firmware through hypervisor using the
>> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
>>
>> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
>> difference of an additional certificate blob that can be passed through
>> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
>> provides snp_guest_ext_guest_request() that is used by the KVM to get
>> both the report and certificate data at once.
>>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>>   arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
>>   arch/x86/kvm/svm/svm.h |   2 +
>>   2 files changed, 192 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>> index 7fc0fad87054..089af21a4efe 100644
>> --- a/arch/x86/kvm/svm/sev.c
>> +++ b/arch/x86/kvm/svm/sev.c
> 
>> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t 
>> req_gpa, gpa_t resp_gpa)
>> +{
>> +    struct sev_data_snp_guest_request req = {0};
>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>> +    struct kvm *kvm = vcpu->kvm;
>> +    unsigned long data_npages;
>> +    struct kvm_sev_info *sev;
>> +    unsigned long rc, err;
>> +    u64 data_gpa;
>> +
>> +    if (!sev_snp_guest(vcpu->kvm)) {
>> +        rc = SEV_RET_INVALID_GUEST;
>> +        goto e_fail;
>> +    }
>> +
>> +    sev = &to_kvm_svm(kvm)->sev_info;
>> +
>> +    data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
>> +    data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
>> +
>> +    if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
>> +        rc = SEV_RET_INVALID_ADDRESS;
>> +        goto e_fail;
>> +    }
>> +
>> +    /* Verify that requested blob will fit in certificate buffer */
>> +    if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
> 
> Not sure this is a valid check...  Isn't it OK if the guest has supplied 
> more room than is required? If the guest supplies 8 pages and the 
> hypervisor only needs to copy 1 page of data (or the 
> SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I 
> think this check can go, right?
> 

Agreed.

The check should probably be
  if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE)

and that check already exists in:

snp_guest_ext_guest_request(...)
{
...
...
    /*
          * Check if there is enough space to copy the certificate 
chain. Otherwise
          * return ERROR code defined in the GHCB specification.
          */
         expected_npages = sev->snp_certs_len >> PAGE_SHIFT;
         if (*npages < expected_npages) {
                 *npages = expected_npages;
                 *fw_err = SNP_GUEST_REQ_INVALID_LEN;
                 return -EINVAL;
         }
...

Thanks,
Ashish

> Thanks,
> Tom
> 
>> +        rc = SEV_RET_INVALID_PARAM;
>> +        goto e_fail;
>> +    }
>> +
>> +    mutex_lock(&sev->guest_req_lock);
>> +
>> +    rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
>> +    if (rc)
>> +        goto unlock;
>> +
>> +    rc = snp_guest_ext_guest_request(&req, (unsigned 
>> long)sev->snp_certs_data,
>> +                     &data_npages, &err);
>> +    if (rc) {
>> +        /*
>> +         * If buffer length is small then return the expected
>> +         * length in rbx.
>> +         */
>> +        if (err == SNP_GUEST_REQ_INVALID_LEN)
>> +            vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
>> +
>> +        /* pass the firmware error code */
>> +        rc = err;
>> +        goto cleanup;
>> +    }
>> +
>> +    /* Copy the certificate blob in the guest memory */
>> +    if (data_npages &&
>> +        kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, 
>> data_npages << PAGE_SHIFT))
>> +        rc = SEV_RET_INVALID_ADDRESS;
>> +
>> +cleanup:
>> +    snp_cleanup_guest_buf(&req, &rc);
>> +
>> +unlock:
>> +    mutex_unlock(&sev->guest_req_lock);
>> +
>> +e_fail:
>> +    svm_set_ghcb_sw_exit_info_2(vcpu, rc);
>> +}
>> +
Tom Lendacky Oct. 21, 2022, 9:30 p.m. UTC | #5
On 10/21/22 16:12, Kalra, Ashish wrote:
> Hello Tom,
> 
> On 10/21/2022 2:06 PM, Tom Lendacky wrote:
>> On 6/20/22 18:13, Ashish Kalra wrote:
>>> From: Brijesh Singh <brijesh.singh@amd.com>
>>>
>>> Version 2 of GHCB specification added the support for two SNP Guest
>>> Request Message NAE events. The events allows for an SEV-SNP guest to
>>> make request to the SEV-SNP firmware through hypervisor using the
>>> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
>>>
>>> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
>>> difference of an additional certificate blob that can be passed through
>>> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
>>> provides snp_guest_ext_guest_request() that is used by the KVM to get
>>> both the report and certificate data at once.
>>>
>>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>>> ---
>>>   arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++--
>>>   arch/x86/kvm/svm/svm.h |   2 +
>>>   2 files changed, 192 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>>> index 7fc0fad87054..089af21a4efe 100644
>>> --- a/arch/x86/kvm/svm/sev.c
>>> +++ b/arch/x86/kvm/svm/sev.c
>>
>>> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t 
>>> req_gpa, gpa_t resp_gpa)
>>> +{
>>> +    struct sev_data_snp_guest_request req = {0};
>>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>>> +    struct kvm *kvm = vcpu->kvm;
>>> +    unsigned long data_npages;
>>> +    struct kvm_sev_info *sev;
>>> +    unsigned long rc, err;
>>> +    u64 data_gpa;
>>> +
>>> +    if (!sev_snp_guest(vcpu->kvm)) {
>>> +        rc = SEV_RET_INVALID_GUEST;
>>> +        goto e_fail;
>>> +    }
>>> +
>>> +    sev = &to_kvm_svm(kvm)->sev_info;
>>> +
>>> +    data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
>>> +    data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
>>> +
>>> +    if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
>>> +        rc = SEV_RET_INVALID_ADDRESS;
>>> +        goto e_fail;
>>> +    }
>>> +
>>> +    /* Verify that requested blob will fit in certificate buffer */
>>> +    if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
>>
>> Not sure this is a valid check...  Isn't it OK if the guest has supplied 
>> more room than is required? If the guest supplies 8 pages and the 
>> hypervisor only needs to copy 1 page of data (or the 
>> SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I 
>> think this check can go, right?
>>
> 
> Agreed.
> 
> The check should probably be
>   if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE)

No, the check should just be removed. If the number of pages required to 
hold the cert data is only 1, then a data_npages value of 1 is just fine 
(see below).

> 
> and that check already exists in:
> 
> snp_guest_ext_guest_request(...)
> {
> ...
> ...
>     /*
>           * Check if there is enough space to copy the certificate chain. 
> Otherwise
>           * return ERROR code defined in the GHCB specification.
>           */
>          expected_npages = sev->snp_certs_len >> PAGE_SHIFT;
>          if (*npages < expected_npages) {

If expected_npages is 1, then an *npages value of 1 is OK. But if you put 
the check in above that you want, you would never get here with an *npages 
value of 1.

Thanks,
Tom

>                  *npages = expected_npages;
>                  *fw_err = SNP_GUEST_REQ_INVALID_LEN;
>                  return -EINVAL;
>          }
> ...
> 
> Thanks,
> Ashish
> 
>> Thanks,
>> Tom
>>
>>> +        rc = SEV_RET_INVALID_PARAM;
>>> +        goto e_fail;
>>> +    }
>>> +
>>> +    mutex_lock(&sev->guest_req_lock);
>>> +
>>> +    rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
>>> +    if (rc)
>>> +        goto unlock;
>>> +
>>> +    rc = snp_guest_ext_guest_request(&req, (unsigned 
>>> long)sev->snp_certs_data,
>>> +                     &data_npages, &err);
>>> +    if (rc) {
>>> +        /*
>>> +         * If buffer length is small then return the expected
>>> +         * length in rbx.
>>> +         */
>>> +        if (err == SNP_GUEST_REQ_INVALID_LEN)
>>> +            vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
>>> +
>>> +        /* pass the firmware error code */
>>> +        rc = err;
>>> +        goto cleanup;
>>> +    }
>>> +
>>> +    /* Copy the certificate blob in the guest memory */
>>> +    if (data_npages &&
>>> +        kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, 
>>> data_npages << PAGE_SHIFT))
>>> +        rc = SEV_RET_INVALID_ADDRESS;
>>> +
>>> +cleanup:
>>> +    snp_cleanup_guest_buf(&req, &rc);
>>> +
>>> +unlock:
>>> +    mutex_unlock(&sev->guest_req_lock);
>>> +
>>> +e_fail:
>>> +    svm_set_ghcb_sw_exit_info_2(vcpu, rc);
>>> +}
>>> +
Ashish Kalra Oct. 21, 2022, 9:56 p.m. UTC | #6
On 10/21/2022 4:30 PM, Tom Lendacky wrote:
> On 10/21/22 16:12, Kalra, Ashish wrote:
>> Hello Tom,
>>
>> On 10/21/2022 2:06 PM, Tom Lendacky wrote:
>>> On 6/20/22 18:13, Ashish Kalra wrote:
>>>> From: Brijesh Singh <brijesh.singh@amd.com>
>>>>
>>>> Version 2 of GHCB specification added the support for two SNP Guest
>>>> Request Message NAE events. The events allows for an SEV-SNP guest to
>>>> make request to the SEV-SNP firmware through hypervisor using the
>>>> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification.
>>>>
>>>> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the
>>>> difference of an additional certificate blob that can be passed through
>>>> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver
>>>> provides snp_guest_ext_guest_request() that is used by the KVM to get
>>>> both the report and certificate data at once.
>>>>
>>>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>>>> ---
>>>>   arch/x86/kvm/svm/sev.c | 196 
>>>> +++++++++++++++++++++++++++++++++++++++--
>>>>   arch/x86/kvm/svm/svm.h |   2 +
>>>>   2 files changed, 192 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>>>> index 7fc0fad87054..089af21a4efe 100644
>>>> --- a/arch/x86/kvm/svm/sev.c
>>>> +++ b/arch/x86/kvm/svm/sev.c
>>>
>>>> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, 
>>>> gpa_t req_gpa, gpa_t resp_gpa)
>>>> +{
>>>> +    struct sev_data_snp_guest_request req = {0};
>>>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>>>> +    struct kvm *kvm = vcpu->kvm;
>>>> +    unsigned long data_npages;
>>>> +    struct kvm_sev_info *sev;
>>>> +    unsigned long rc, err;
>>>> +    u64 data_gpa;
>>>> +
>>>> +    if (!sev_snp_guest(vcpu->kvm)) {
>>>> +        rc = SEV_RET_INVALID_GUEST;
>>>> +        goto e_fail;
>>>> +    }
>>>> +
>>>> +    sev = &to_kvm_svm(kvm)->sev_info;
>>>> +
>>>> +    data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
>>>> +    data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
>>>> +
>>>> +    if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
>>>> +        rc = SEV_RET_INVALID_ADDRESS;
>>>> +        goto e_fail;
>>>> +    }
>>>> +
>>>> +    /* Verify that requested blob will fit in certificate buffer */
>>>> +    if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
>>>
>>> Not sure this is a valid check...  Isn't it OK if the guest has 
>>> supplied more room than is required? If the guest supplies 8 pages 
>>> and the hypervisor only needs to copy 1 page of data (or the 
>>> SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I 
>>> think this check can go, right?
>>>
>>
>> Agreed.
>>
>> The check should probably be
>>   if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE)
> 
> No, the check should just be removed. If the number of pages required to 
> hold the cert data is only 1, then a data_npages value of 1 is just fine 
> (see below).
> 

Ok.

>>
>> and that check already exists in:
>>
>> snp_guest_ext_guest_request(...)
>> {
>> ...
>> ...
>>     /*
>>           * Check if there is enough space to copy the certificate 
>> chain. Otherwise
>>           * return ERROR code defined in the GHCB specification.
>>           */
>>          expected_npages = sev->snp_certs_len >> PAGE_SHIFT;
>>          if (*npages < expected_npages) {
> 
> If expected_npages is 1, then an *npages value of 1 is OK. But if you 
> put the check in above that you want, you would never get here with an 
> *npages value of 1.

Yes that is correct, i incorrectly assumed that
sev->snp_certs_len == SEV_FW_BLOB_MAX_SIZE

Thanks,
Ashish

> 
> Thanks,
> Tom
> 
>>                  *npages = expected_npages;
>>                  *fw_err = SNP_GUEST_REQ_INVALID_LEN;
>>                  return -EINVAL;
>>          }
>> ...
>>
>> Thanks,
>> Ashish
>>
>>> Thanks,
>>> Tom
>>>
>>>> +        rc = SEV_RET_INVALID_PARAM;
>>>> +        goto e_fail;
>>>> +    }
>>>> +
>>>> +    mutex_lock(&sev->guest_req_lock);
>>>> +
>>>> +    rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
>>>> +    if (rc)
>>>> +        goto unlock;
>>>> +
>>>> +    rc = snp_guest_ext_guest_request(&req, (unsigned 
>>>> long)sev->snp_certs_data,
>>>> +                     &data_npages, &err);
>>>> +    if (rc) {
>>>> +        /*
>>>> +         * If buffer length is small then return the expected
>>>> +         * length in rbx.
>>>> +         */
>>>> +        if (err == SNP_GUEST_REQ_INVALID_LEN)
>>>> +            vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
>>>> +
>>>> +        /* pass the firmware error code */
>>>> +        rc = err;
>>>> +        goto cleanup;
>>>> +    }
>>>> +
>>>> +    /* Copy the certificate blob in the guest memory */
>>>> +    if (data_npages &&
>>>> +        kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, 
>>>> data_npages << PAGE_SHIFT))
>>>> +        rc = SEV_RET_INVALID_ADDRESS;
>>>> +
>>>> +cleanup:
>>>> +    snp_cleanup_guest_buf(&req, &rc);
>>>> +
>>>> +unlock:
>>>> +    mutex_unlock(&sev->guest_req_lock);
>>>> +
>>>> +e_fail:
>>>> +    svm_set_ghcb_sw_exit_info_2(vcpu, rc);
>>>> +}
>>>> +
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7fc0fad87054..089af21a4efe 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -343,6 +343,7 @@  static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
 		spin_lock_init(&sev->psc_lock);
 		ret = sev_snp_init(&argp->error);
+		mutex_init(&sev->guest_req_lock);
 	} else {
 		ret = sev_platform_init(&argp->error);
 	}
@@ -1884,23 +1885,39 @@  int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
 
 static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
+	void *context = NULL, *certs_data = NULL, *resp_page = NULL;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_snp_gctx_create data = {};
-	void *context;
 	int rc;
 
+	/* Allocate memory used for the certs data in SNP guest request */
+	certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT);
+	if (!certs_data)
+		return NULL;
+
 	/* Allocate memory for context page */
 	context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
 	if (!context)
-		return NULL;
+		goto e_free;
+
+	/* Allocate a firmware buffer used during the guest command handling. */
+	resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
+	if (!resp_page)
+		goto e_free;
 
 	data.gctx_paddr = __psp_pa(context);
 	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);
-	if (rc) {
-		snp_free_firmware_page(context);
-		return NULL;
-	}
+	if (rc)
+		goto e_free;
+
+	sev->snp_certs_data = certs_data;
 
 	return context;
+
+e_free:
+	snp_free_firmware_page(context);
+	kfree(certs_data);
+	return NULL;
 }
 
 static int snp_bind_asid(struct kvm *kvm, int *error)
@@ -2565,6 +2582,8 @@  static int snp_decommission_context(struct kvm *kvm)
 	snp_free_firmware_page(sev->snp_context);
 	sev->snp_context = NULL;
 
+	kfree(sev->snp_certs_data);
+
 	return 0;
 }
 
@@ -3077,6 +3096,8 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 	case SVM_VMGEXIT_HV_FEATURES:
 	case SVM_VMGEXIT_PSC:
+	case SVM_VMGEXIT_GUEST_REQUEST:
+	case SVM_VMGEXIT_EXT_GUEST_REQUEST:
 		break;
 	default:
 		reason = GHCB_ERR_INVALID_EVENT;
@@ -3502,6 +3523,155 @@  static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm)
 	return rc ? map_to_psc_vmgexit_code(rc) : 0;
 }
 
+static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm,
+					 struct sev_data_snp_guest_request *data,
+					 gpa_t req_gpa, gpa_t resp_gpa)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	kvm_pfn_t req_pfn, resp_pfn;
+	struct kvm_sev_info *sev;
+
+	sev = &to_kvm_svm(kvm)->sev_info;
+
+	if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE))
+		return SEV_RET_INVALID_PARAM;
+
+	req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa));
+	if (is_error_noslot_pfn(req_pfn))
+		return SEV_RET_INVALID_ADDRESS;
+
+	resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa));
+	if (is_error_noslot_pfn(resp_pfn))
+		return SEV_RET_INVALID_ADDRESS;
+
+	if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true))
+		return SEV_RET_INVALID_ADDRESS;
+
+	data->gctx_paddr = __psp_pa(sev->snp_context);
+	data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
+	data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
+
+	return 0;
+}
+
+static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc)
+{
+	u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT;
+	int ret;
+
+	ret = snp_page_reclaim(pfn);
+	if (ret)
+		*rc = SEV_RET_INVALID_ADDRESS;
+
+	ret = rmp_make_shared(pfn, PG_LEVEL_4K);
+	if (ret)
+		*rc = SEV_RET_INVALID_ADDRESS;
+}
+
+static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
+{
+	struct sev_data_snp_guest_request data = {0};
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_sev_info *sev;
+	unsigned long rc;
+	int err;
+
+	if (!sev_snp_guest(vcpu->kvm)) {
+		rc = SEV_RET_INVALID_GUEST;
+		goto e_fail;
+	}
+
+	sev = &to_kvm_svm(kvm)->sev_info;
+
+	mutex_lock(&sev->guest_req_lock);
+
+	rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa);
+	if (rc)
+		goto unlock;
+
+	rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err);
+	if (rc)
+		/* use the firmware error code */
+		rc = err;
+
+	snp_cleanup_guest_buf(&data, &rc);
+
+unlock:
+	mutex_unlock(&sev->guest_req_lock);
+
+e_fail:
+	svm_set_ghcb_sw_exit_info_2(vcpu, rc);
+}
+
+static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
+{
+	struct sev_data_snp_guest_request req = {0};
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long data_npages;
+	struct kvm_sev_info *sev;
+	unsigned long rc, err;
+	u64 data_gpa;
+
+	if (!sev_snp_guest(vcpu->kvm)) {
+		rc = SEV_RET_INVALID_GUEST;
+		goto e_fail;
+	}
+
+	sev = &to_kvm_svm(kvm)->sev_info;
+
+	data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
+	data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
+
+	if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
+		rc = SEV_RET_INVALID_ADDRESS;
+		goto e_fail;
+	}
+
+	/* Verify that requested blob will fit in certificate buffer */
+	if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
+		rc = SEV_RET_INVALID_PARAM;
+		goto e_fail;
+	}
+
+	mutex_lock(&sev->guest_req_lock);
+
+	rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
+	if (rc)
+		goto unlock;
+
+	rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data,
+					 &data_npages, &err);
+	if (rc) {
+		/*
+		 * If buffer length is small then return the expected
+		 * length in rbx.
+		 */
+		if (err == SNP_GUEST_REQ_INVALID_LEN)
+			vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
+
+		/* pass the firmware error code */
+		rc = err;
+		goto cleanup;
+	}
+
+	/* Copy the certificate blob in the guest memory */
+	if (data_npages &&
+	    kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT))
+		rc = SEV_RET_INVALID_ADDRESS;
+
+cleanup:
+	snp_cleanup_guest_buf(&req, &rc);
+
+unlock:
+	mutex_unlock(&sev->guest_req_lock);
+
+e_fail:
+	svm_set_ghcb_sw_exit_info_2(vcpu, rc);
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3753,6 +3923,20 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 		svm_set_ghcb_sw_exit_info_2(vcpu, rc);
 		break;
 	}
+	case SVM_VMGEXIT_GUEST_REQUEST: {
+		snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2);
+
+		ret = 1;
+		break;
+	}
+	case SVM_VMGEXIT_EXT_GUEST_REQUEST: {
+		snp_handle_ext_guest_request(svm,
+					     control->exit_info_1,
+					     control->exit_info_2);
+
+		ret = 1;
+		break;
+	}
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		vcpu_unimpl(vcpu,
 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 3fd95193ed8d..3be24da1a743 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -98,6 +98,8 @@  struct kvm_sev_info {
 	u64 snp_init_flags;
 	void *snp_context;      /* SNP guest context page */
 	spinlock_t psc_lock;
+	void *snp_certs_data;
+	struct mutex guest_req_lock;
 };
 
 struct kvm_svm {