diff mbox series

[3/5] crypto: ccp: Play nice with vmalloc'd memory for SEV command structs

Message ID 20210402233702.3291792-4-seanjc@google.com
State New
Headers show
Series ccp: KVM: SVM: Use stack for SEV command buffers | expand

Commit Message

Sean Christopherson April 2, 2021, 11:37 p.m. UTC
Copy vmalloc'd data to an internal buffer instead of rejecting outright
so that callers can put SEV command buffers on the stack without running
afoul of CONFIG_VMAP_STACK=y.  Currently, the largest supported command
takes a 68 byte buffer, i.e. pretty much every command can be put on the
stack.  Because sev_cmd_mutex is held for the entirety of a transaction,
only a single bounce buffer is required.

Use a flexible array for the buffer, sized to hold the largest known
command.   Alternatively, the buffer could be a union of all known
command structs, but that would incur a higher maintenance cost due to
the need to update the union for every command in addition to updating
the existing sev_cmd_buffer_len().

Align the buffer to an 8-byte boundary, mimicking the alignment that
would be provided by the compiler if any of the structs were embedded
directly.  Note, sizeof() correctly incorporates this alignment.

Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 drivers/crypto/ccp/sev-dev.c | 33 +++++++++++++++++++++++++++------
 drivers/crypto/ccp/sev-dev.h |  7 +++++++
 2 files changed, 34 insertions(+), 6 deletions(-)

Comments

Christophe Leroy April 3, 2021, 5:05 p.m. UTC | #1
Le 03/04/2021 à 01:37, Sean Christopherson a écrit :
> Copy vmalloc'd data to an internal buffer instead of rejecting outright

> so that callers can put SEV command buffers on the stack without running

> afoul of CONFIG_VMAP_STACK=y.  Currently, the largest supported command

> takes a 68 byte buffer, i.e. pretty much every command can be put on the

> stack.  Because sev_cmd_mutex is held for the entirety of a transaction,

> only a single bounce buffer is required.

> 

> Use a flexible array for the buffer, sized to hold the largest known

> command.   Alternatively, the buffer could be a union of all known

> command structs, but that would incur a higher maintenance cost due to

> the need to update the union for every command in addition to updating

> the existing sev_cmd_buffer_len().

> 

> Align the buffer to an 8-byte boundary, mimicking the alignment that

> would be provided by the compiler if any of the structs were embedded

> directly.  Note, sizeof() correctly incorporates this alignment.

> 

> Cc: Brijesh Singh <brijesh.singh@amd.com>

> Cc: Borislav Petkov <bp@suse.de>

> Cc: Tom Lendacky <thomas.lendacky@amd.com>

> Signed-off-by: Sean Christopherson <seanjc@google.com>

> ---

>   drivers/crypto/ccp/sev-dev.c | 33 +++++++++++++++++++++++++++------

>   drivers/crypto/ccp/sev-dev.h |  7 +++++++

>   2 files changed, 34 insertions(+), 6 deletions(-)

> 

> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c

> index 4c513318f16a..6d5882290cfc 100644

> --- a/drivers/crypto/ccp/sev-dev.c

> +++ b/drivers/crypto/ccp/sev-dev.c

> @@ -135,13 +135,14 @@ static int sev_cmd_buffer_len(int cmd)

>   	return 0;

>   }

>   

> -static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

> +static int __sev_do_cmd_locked(int cmd, void *__data, int *psp_ret)

>   {

>   	struct psp_device *psp = psp_master;

>   	struct sev_device *sev;

>   	unsigned int phys_lsb, phys_msb;

>   	unsigned int reg, ret = 0;

>   	int buf_len;

> +	void *data;

>   

>   	if (!psp || !psp->sev_data)

>   		return -ENODEV;

> @@ -152,11 +153,21 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	sev = psp->sev_data;

>   

>   	buf_len = sev_cmd_buffer_len(cmd);

> -	if (WARN_ON_ONCE(!!data != !!buf_len))

> +	if (WARN_ON_ONCE(!!__data != !!buf_len))

>   		return -EINVAL;

>   

> -	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))

> -		return -EINVAL;

> +	if (__data && is_vmalloc_addr(__data)) {


I think you want to use !virt_addr_valid() here, because not only vmalloc addresses are a problem. 
For instance, module addresses are a problem as well.

> +		/*

> +		 * If the incoming buffer is virtually allocated, copy it to

> +		 * the driver's scratch buffer as __pa() will not work for such

> +		 * addresses, vmalloc_to_page() is not guaranteed to succeed,

> +		 * and vmalloc'd data may not be physically contiguous.

> +		 */

> +		data = sev->cmd_buf;

> +		memcpy(data, __data, buf_len);

> +	} else {

> +		data = __data;

> +	}

>   

>   	/* Get the physical address of the command buffer */

>   	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;

> @@ -204,6 +215,13 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,

>   			     buf_len, false);

>   

> +	/*

> +	 * Copy potential output from the PSP back to __data.  Do this even on

> +	 * failure in case the caller wants to glean something from the error.

> +	 */

> +	if (__data && data != __data)

> +		memcpy(__data, data, buf_len);

> +

>   	return ret;

>   }

>   

> @@ -978,9 +996,12 @@ int sev_dev_init(struct psp_device *psp)

>   {

>   	struct device *dev = psp->dev;

>   	struct sev_device *sev;

> -	int ret = -ENOMEM;

> +	int ret = -ENOMEM, cmd_buf_size = 0, i;

>   

> -	sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL);

> +	for (i = 0; i < SEV_CMD_MAX; i++)

> +		cmd_buf_size = max(cmd_buf_size, sev_cmd_buffer_len(i));

> +

> +	sev = devm_kzalloc(dev, sizeof(*sev) + cmd_buf_size, GFP_KERNEL);

>   	if (!sev)

>   		goto e_err;

>   

> diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h

> index dd5c4fe82914..b43283ce2d73 100644

> --- a/drivers/crypto/ccp/sev-dev.h

> +++ b/drivers/crypto/ccp/sev-dev.h

> @@ -52,6 +52,13 @@ struct sev_device {

>   	u8 api_major;

>   	u8 api_minor;

>   	u8 build;

> +

> +	/*

> +	 * Buffer used for incoming commands whose physical address cannot be

> +	 * resolved via __pa(), e.g. stack pointers when CONFIG_VMAP_STACK=y.

> +	 * Note, alignment isn't strictly required.

> +	 */

> +	u8 cmd_buf[] __aligned(8);

>   };

>   

>   int sev_dev_init(struct psp_device *psp);

>
Christophe Leroy April 3, 2021, 5:13 p.m. UTC | #2
Le 03/04/2021 à 01:37, Sean Christopherson a écrit :
> Copy vmalloc'd data to an internal buffer instead of rejecting outright

> so that callers can put SEV command buffers on the stack without running

> afoul of CONFIG_VMAP_STACK=y.  Currently, the largest supported command

> takes a 68 byte buffer, i.e. pretty much every command can be put on the

> stack.  Because sev_cmd_mutex is held for the entirety of a transaction,

> only a single bounce buffer is required.

> 

> Use a flexible array for the buffer, sized to hold the largest known

> command.   Alternatively, the buffer could be a union of all known

> command structs, but that would incur a higher maintenance cost due to

> the need to update the union for every command in addition to updating

> the existing sev_cmd_buffer_len().

> 

> Align the buffer to an 8-byte boundary, mimicking the alignment that

> would be provided by the compiler if any of the structs were embedded

> directly.  Note, sizeof() correctly incorporates this alignment.

> 

> Cc: Brijesh Singh <brijesh.singh@amd.com>

> Cc: Borislav Petkov <bp@suse.de>

> Cc: Tom Lendacky <thomas.lendacky@amd.com>

> Signed-off-by: Sean Christopherson <seanjc@google.com>

> ---

>   drivers/crypto/ccp/sev-dev.c | 33 +++++++++++++++++++++++++++------

>   drivers/crypto/ccp/sev-dev.h |  7 +++++++

>   2 files changed, 34 insertions(+), 6 deletions(-)

> 

> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c

> index 4c513318f16a..6d5882290cfc 100644

> --- a/drivers/crypto/ccp/sev-dev.c

> +++ b/drivers/crypto/ccp/sev-dev.c

> @@ -135,13 +135,14 @@ static int sev_cmd_buffer_len(int cmd)

>   	return 0;

>   }

>   

> -static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

> +static int __sev_do_cmd_locked(int cmd, void *__data, int *psp_ret)

>   {

>   	struct psp_device *psp = psp_master;

>   	struct sev_device *sev;

>   	unsigned int phys_lsb, phys_msb;

>   	unsigned int reg, ret = 0;

>   	int buf_len;

> +	void *data;

>   

>   	if (!psp || !psp->sev_data)

>   		return -ENODEV;

> @@ -152,11 +153,21 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	sev = psp->sev_data;

>   

>   	buf_len = sev_cmd_buffer_len(cmd);

> -	if (WARN_ON_ONCE(!!data != !!buf_len))

> +	if (WARN_ON_ONCE(!!__data != !!buf_len))


Why do you need a double !! ?
I think !__data != !buf_len should be enough.

>   		return -EINVAL;

>   

> -	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))

> -		return -EINVAL;

> +	if (__data && is_vmalloc_addr(__data)) {

> +		/*

> +		 * If the incoming buffer is virtually allocated, copy it to

> +		 * the driver's scratch buffer as __pa() will not work for such

> +		 * addresses, vmalloc_to_page() is not guaranteed to succeed,

> +		 * and vmalloc'd data may not be physically contiguous.

> +		 */

> +		data = sev->cmd_buf;

> +		memcpy(data, __data, buf_len);

> +	} else {

> +		data = __data;

> +	}

>   

>   	/* Get the physical address of the command buffer */

>   	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;

> @@ -204,6 +215,13 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,

>   			     buf_len, false);

>   

> +	/*

> +	 * Copy potential output from the PSP back to __data.  Do this even on

> +	 * failure in case the caller wants to glean something from the error.

> +	 */

> +	if (__data && data != __data)


IIUC, when __data is NULL, data is also NULL, so this double test is useless.

Checking data != __data should be enough

> +		memcpy(__data, data, buf_len);

> +

>   	return ret;

>   }

>   

> @@ -978,9 +996,12 @@ int sev_dev_init(struct psp_device *psp)

>   {

>   	struct device *dev = psp->dev;

>   	struct sev_device *sev;

> -	int ret = -ENOMEM;

> +	int ret = -ENOMEM, cmd_buf_size = 0, i;

>   

> -	sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL);

> +	for (i = 0; i < SEV_CMD_MAX; i++)

> +		cmd_buf_size = max(cmd_buf_size, sev_cmd_buffer_len(i));

> +

> +	sev = devm_kzalloc(dev, sizeof(*sev) + cmd_buf_size, GFP_KERNEL);

>   	if (!sev)

>   		goto e_err;

>   

> diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h

> index dd5c4fe82914..b43283ce2d73 100644

> --- a/drivers/crypto/ccp/sev-dev.h

> +++ b/drivers/crypto/ccp/sev-dev.h

> @@ -52,6 +52,13 @@ struct sev_device {

>   	u8 api_major;

>   	u8 api_minor;

>   	u8 build;

> +

> +	/*

> +	 * Buffer used for incoming commands whose physical address cannot be

> +	 * resolved via __pa(), e.g. stack pointers when CONFIG_VMAP_STACK=y.

> +	 * Note, alignment isn't strictly required.

> +	 */

> +	u8 cmd_buf[] __aligned(8);

>   };

>   

>   int sev_dev_init(struct psp_device *psp);

>
Christophe Leroy April 4, 2021, 6:48 a.m. UTC | #3
Le 03/04/2021 à 01:37, Sean Christopherson a écrit :
> Copy vmalloc'd data to an internal buffer instead of rejecting outright

> so that callers can put SEV command buffers on the stack without running

> afoul of CONFIG_VMAP_STACK=y.  Currently, the largest supported command

> takes a 68 byte buffer, i.e. pretty much every command can be put on the

> stack.  Because sev_cmd_mutex is held for the entirety of a transaction,

> only a single bounce buffer is required.

> 

> Use a flexible array for the buffer, sized to hold the largest known

> command.   Alternatively, the buffer could be a union of all known

> command structs, but that would incur a higher maintenance cost due to

> the need to update the union for every command in addition to updating

> the existing sev_cmd_buffer_len().

> 

> Align the buffer to an 8-byte boundary, mimicking the alignment that

> would be provided by the compiler if any of the structs were embedded

> directly.  Note, sizeof() correctly incorporates this alignment.

> 

> Cc: Brijesh Singh <brijesh.singh@amd.com>

> Cc: Borislav Petkov <bp@suse.de>

> Cc: Tom Lendacky <thomas.lendacky@amd.com>

> Signed-off-by: Sean Christopherson <seanjc@google.com>

> ---

>   drivers/crypto/ccp/sev-dev.c | 33 +++++++++++++++++++++++++++------

>   drivers/crypto/ccp/sev-dev.h |  7 +++++++

>   2 files changed, 34 insertions(+), 6 deletions(-)

> 

> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c

> index 4c513318f16a..6d5882290cfc 100644

> --- a/drivers/crypto/ccp/sev-dev.c

> +++ b/drivers/crypto/ccp/sev-dev.c

> @@ -135,13 +135,14 @@ static int sev_cmd_buffer_len(int cmd)

>   	return 0;

>   }

>   

> -static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

> +static int __sev_do_cmd_locked(int cmd, void *__data, int *psp_ret)

>   {

>   	struct psp_device *psp = psp_master;

>   	struct sev_device *sev;

>   	unsigned int phys_lsb, phys_msb;

>   	unsigned int reg, ret = 0;

>   	int buf_len;

> +	void *data;

>   

>   	if (!psp || !psp->sev_data)

>   		return -ENODEV;

> @@ -152,11 +153,21 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	sev = psp->sev_data;

>   

>   	buf_len = sev_cmd_buffer_len(cmd);

> -	if (WARN_ON_ONCE(!!data != !!buf_len))

> +	if (WARN_ON_ONCE(!!__data != !!buf_len))

>   		return -EINVAL;

>   

> -	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))

> -		return -EINVAL;

> +	if (__data && is_vmalloc_addr(__data)) {

> +		/*

> +		 * If the incoming buffer is virtually allocated, copy it to

> +		 * the driver's scratch buffer as __pa() will not work for such

> +		 * addresses, vmalloc_to_page() is not guaranteed to succeed,

> +		 * and vmalloc'd data may not be physically contiguous.

> +		 */

> +		data = sev->cmd_buf;

> +		memcpy(data, __data, buf_len);

> +	} else {

> +		data = __data;

> +	}


I don't know how big commands are, but if they are small, it would probably be more efficient to 
inconditionnally copy them to the buffer rather then doing the test.

>   

>   	/* Get the physical address of the command buffer */

>   	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;

> @@ -204,6 +215,13 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>   	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,

>   			     buf_len, false);

>   

> +	/*

> +	 * Copy potential output from the PSP back to __data.  Do this even on

> +	 * failure in case the caller wants to glean something from the error.

> +	 */

> +	if (__data && data != __data)

> +		memcpy(__data, data, buf_len);

> +

>   	return ret;

>   }

>   

> @@ -978,9 +996,12 @@ int sev_dev_init(struct psp_device *psp)

>   {

>   	struct device *dev = psp->dev;

>   	struct sev_device *sev;

> -	int ret = -ENOMEM;

> +	int ret = -ENOMEM, cmd_buf_size = 0, i;

>   

> -	sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL);

> +	for (i = 0; i < SEV_CMD_MAX; i++)

> +		cmd_buf_size = max(cmd_buf_size, sev_cmd_buffer_len(i));

> +

> +	sev = devm_kzalloc(dev, sizeof(*sev) + cmd_buf_size, GFP_KERNEL);

>   	if (!sev)

>   		goto e_err;

>   

> diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h

> index dd5c4fe82914..b43283ce2d73 100644

> --- a/drivers/crypto/ccp/sev-dev.h

> +++ b/drivers/crypto/ccp/sev-dev.h

> @@ -52,6 +52,13 @@ struct sev_device {

>   	u8 api_major;

>   	u8 api_minor;

>   	u8 build;

> +

> +	/*

> +	 * Buffer used for incoming commands whose physical address cannot be

> +	 * resolved via __pa(), e.g. stack pointers when CONFIG_VMAP_STACK=y.

> +	 * Note, alignment isn't strictly required.

> +	 */

> +	u8 cmd_buf[] __aligned(8);

>   };

>   

>   int sev_dev_init(struct psp_device *psp);

>
Sean Christopherson April 5, 2021, 3:06 p.m. UTC | #4
On Sun, Apr 04, 2021, Christophe Leroy wrote:
> 

> Le 03/04/2021 à 01:37, Sean Christopherson a écrit :

> > @@ -152,11 +153,21 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

> >   	sev = psp->sev_data;

> >   	buf_len = sev_cmd_buffer_len(cmd);

> > -	if (WARN_ON_ONCE(!!data != !!buf_len))

> > +	if (WARN_ON_ONCE(!!__data != !!buf_len))

> >   		return -EINVAL;

> > -	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))

> > -		return -EINVAL;

> > +	if (__data && is_vmalloc_addr(__data)) {

> > +		/*

> > +		 * If the incoming buffer is virtually allocated, copy it to

> > +		 * the driver's scratch buffer as __pa() will not work for such

> > +		 * addresses, vmalloc_to_page() is not guaranteed to succeed,

> > +		 * and vmalloc'd data may not be physically contiguous.

> > +		 */

> > +		data = sev->cmd_buf;

> > +		memcpy(data, __data, buf_len);

> > +	} else {

> > +		data = __data;

> > +	}

> 

> I don't know how big commands are, but if they are small, it would probably

> be more efficient to inconditionnally copy them to the buffer rather then

> doing the test.


Brijesh, I assume SNP support will need to copy the commands unconditionally? If
yes, it probably makes sense to do so now and avoid vmalloc dependencies
completely.  And I think that would allow for the removal of status_cmd_buf and
init_cmd_buf, or is there another reason those dedicated buffers exist?
Brijesh Singh April 5, 2021, 4:01 p.m. UTC | #5
On 4/5/21 10:06 AM, Sean Christopherson wrote:
> On Sun, Apr 04, 2021, Christophe Leroy wrote:

>> Le 03/04/2021 à 01:37, Sean Christopherson a écrit :

>>> @@ -152,11 +153,21 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)

>>>   	sev = psp->sev_data;

>>>   	buf_len = sev_cmd_buffer_len(cmd);

>>> -	if (WARN_ON_ONCE(!!data != !!buf_len))

>>> +	if (WARN_ON_ONCE(!!__data != !!buf_len))

>>>   		return -EINVAL;

>>> -	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))

>>> -		return -EINVAL;

>>> +	if (__data && is_vmalloc_addr(__data)) {

>>> +		/*

>>> +		 * If the incoming buffer is virtually allocated, copy it to

>>> +		 * the driver's scratch buffer as __pa() will not work for such

>>> +		 * addresses, vmalloc_to_page() is not guaranteed to succeed,

>>> +		 * and vmalloc'd data may not be physically contiguous.

>>> +		 */

>>> +		data = sev->cmd_buf;

>>> +		memcpy(data, __data, buf_len);

>>> +	} else {

>>> +		data = __data;

>>> +	}

>> I don't know how big commands are, but if they are small, it would probably

>> be more efficient to inconditionnally copy them to the buffer rather then

>> doing the test.

> Brijesh, I assume SNP support will need to copy the commands unconditionally? If

> yes, it probably makes sense to do so now and avoid vmalloc dependencies

> completely.  And I think that would allow for the removal of status_cmd_buf and

> init_cmd_buf, or is there another reason those dedicated buffers exist?



Yes, we need to copy the commands unconditionally for the SNP support.
It makes sense to avoid the vmalloc dependencies. I can't think of any
reason why we would need the status_cmd_buf and init_cmd_buf after those
changes.
diff mbox series

Patch

diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 4c513318f16a..6d5882290cfc 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -135,13 +135,14 @@  static int sev_cmd_buffer_len(int cmd)
 	return 0;
 }
 
-static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
+static int __sev_do_cmd_locked(int cmd, void *__data, int *psp_ret)
 {
 	struct psp_device *psp = psp_master;
 	struct sev_device *sev;
 	unsigned int phys_lsb, phys_msb;
 	unsigned int reg, ret = 0;
 	int buf_len;
+	void *data;
 
 	if (!psp || !psp->sev_data)
 		return -ENODEV;
@@ -152,11 +153,21 @@  static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
 	sev = psp->sev_data;
 
 	buf_len = sev_cmd_buffer_len(cmd);
-	if (WARN_ON_ONCE(!!data != !!buf_len))
+	if (WARN_ON_ONCE(!!__data != !!buf_len))
 		return -EINVAL;
 
-	if (WARN_ON_ONCE(data && is_vmalloc_addr(data)))
-		return -EINVAL;
+	if (__data && is_vmalloc_addr(__data)) {
+		/*
+		 * If the incoming buffer is virtually allocated, copy it to
+		 * the driver's scratch buffer as __pa() will not work for such
+		 * addresses, vmalloc_to_page() is not guaranteed to succeed,
+		 * and vmalloc'd data may not be physically contiguous.
+		 */
+		data = sev->cmd_buf;
+		memcpy(data, __data, buf_len);
+	} else {
+		data = __data;
+	}
 
 	/* Get the physical address of the command buffer */
 	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
@@ -204,6 +215,13 @@  static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
 	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
 			     buf_len, false);
 
+	/*
+	 * Copy potential output from the PSP back to __data.  Do this even on
+	 * failure in case the caller wants to glean something from the error.
+	 */
+	if (__data && data != __data)
+		memcpy(__data, data, buf_len);
+
 	return ret;
 }
 
@@ -978,9 +996,12 @@  int sev_dev_init(struct psp_device *psp)
 {
 	struct device *dev = psp->dev;
 	struct sev_device *sev;
-	int ret = -ENOMEM;
+	int ret = -ENOMEM, cmd_buf_size = 0, i;
 
-	sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL);
+	for (i = 0; i < SEV_CMD_MAX; i++)
+		cmd_buf_size = max(cmd_buf_size, sev_cmd_buffer_len(i));
+
+	sev = devm_kzalloc(dev, sizeof(*sev) + cmd_buf_size, GFP_KERNEL);
 	if (!sev)
 		goto e_err;
 
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index dd5c4fe82914..b43283ce2d73 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -52,6 +52,13 @@  struct sev_device {
 	u8 api_major;
 	u8 api_minor;
 	u8 build;
+
+	/*
+	 * Buffer used for incoming commands whose physical address cannot be
+	 * resolved via __pa(), e.g. stack pointers when CONFIG_VMAP_STACK=y.
+	 * Note, alignment isn't strictly required.
+	 */
+	u8 cmd_buf[] __aligned(8);
 };
 
 int sev_dev_init(struct psp_device *psp);