Message ID | 43568d5e6395fcab48262fa5b3d1a5112918fbe8.1669372199.git.Rijo-john.Thomas@amd.com |
---|---|
State | New |
Headers | show |
Series | [1/1] crypto: ccp - Allocate TEE ring and cmd buffer using DMA APIs | expand |
On 12/6/22 06:30, Rijo Thomas wrote: > For AMD Secure Processor (ASP) to map and access TEE ring buffer, the > ring buffer address sent by host to ASP must be a real physical > address and the pages must be physically contiguous. > > In a virtualized environment though, when the driver is running in a > guest VM, the pages allocated by __get_free_pages() may not be > contiguous in the host (or machine) physical address space. Guests > will see a guest (or pseudo) physical address and not the actual host > (or machine) physical address. The TEE running on ASP cannot decipher > pseudo physical addresses. It needs host or machine physical address. > > To resolve this problem, use DMA APIs for allocating buffers that must > be shared with TEE. This will ensure that the pages are contiguous in > host (or machine) address space. If the DMA handle is an IOVA, > translate it into a physical address before sending it to ASP. > > This patch also exports two APIs (one for buffer allocation and > another to free the buffer). This API can be used by AMD-TEE driver to > share buffers with TEE. > > Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge") > Cc: Tom Lendacky <thomas.lendacky@amd.com> > Cc: stable@vger.kernel.org > Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com> > Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> > Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> > Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> > --- > drivers/crypto/ccp/psp-dev.c | 6 +- > drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++------------- > drivers/crypto/ccp/tee-dev.h | 9 +-- > include/linux/psp-tee.h | 47 ++++++++++++++ > 4 files changed, 127 insertions(+), 51 deletions(-) > > diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c > index c9c741ac8442..2b86158d7435 100644 > --- a/drivers/crypto/ccp/psp-dev.c > +++ b/drivers/crypto/ccp/psp-dev.c > @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) > goto e_err; > } > > + if (sp->set_psp_master_device) > + sp->set_psp_master_device(sp); > + This worries me a bit... if psp_init() fails, it may still be referenced as the master device. What's the reason for moving it? > ret = psp_init(psp); > if (ret) > goto e_irq; > > - if (sp->set_psp_master_device) > - sp->set_psp_master_device(sp); > - > /* Enable interrupt */ > iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); > > diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c > index 5c9d47f3be37..1631d9851e54 100644 > --- a/drivers/crypto/ccp/tee-dev.c > +++ b/drivers/crypto/ccp/tee-dev.c > @@ -12,8 +12,9 @@ > #include <linux/mutex.h> > #include <linux/delay.h> > #include <linux/slab.h> > +#include <linux/dma-direct.h> > +#include <linux/iommu.h> > #include <linux/gfp.h> > -#include <linux/psp-sev.h> > #include <linux/psp-tee.h> > > #include "psp-dev.h" > @@ -21,25 +22,64 @@ > > static bool psp_dead; > > +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) It looks like both calls to this use the same gfp_t values, you can probably eliminate from the call and just specify them in here. > +{ > + struct psp_device *psp = psp_get_master_device(); > + struct dma_buffer *dma_buf; > + struct iommu_domain *dom; > + > + if (!psp || !size) > + return NULL; > + > + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); > + if (!dma_buf) > + return NULL; > + > + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); > + if (!dma_buf->vaddr || !dma_buf->dma) { I don't think you can have one of these be NULL without both being NULL, but I guess it doesn't hurt to check. > + kfree(dma_buf); > + return NULL; > + } > + > + dma_buf->size = size; > + > + dom = iommu_get_domain_for_dev(psp->dev); > + if (dom) You need a nice comment above this all explaining that. I guess you're using the presence of a domain to determine whether you're running on bare-metal vs within a hypervisor? I'm not sure what will happen if the guest ever gets an emulated IOMMU... > + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); If you're just looking to get the physical address, why not just to an __pa(dma_buf->vaddr)? Also, paddr might not be the best name, since it isn't always a physical address, but I can't really think of something right now. Thanks, Tom > + else > + dma_buf->paddr = dma_buf->dma; > + > + return dma_buf; > +} > +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); > + > +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) > +{ > + struct psp_device *psp = psp_get_master_device(); > + > + if (!psp || !dma_buf) > + return; > + > + dma_free_coherent(psp->dev, dma_buf->size, > + dma_buf->vaddr, dma_buf->dma); > + > + kfree(dma_buf); > +} > +EXPORT_SYMBOL(psp_tee_free_dmabuf); > + > static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) > { > struct ring_buf_manager *rb_mgr = &tee->rb_mgr; > - void *start_addr; > > if (!ring_size) > return -EINVAL; > > - /* We need actual physical address instead of DMA address, since > - * Trusted OS running on AMD Secure Processor will map this region > - */ > - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); > - if (!start_addr) > + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, > + GFP_KERNEL | __GFP_ZERO); > + if (!rb_mgr->ring_buf) { > + dev_err(tee->dev, "ring allocation failed\n"); > return -ENOMEM; > - > - memset(start_addr, 0x0, ring_size); > - rb_mgr->ring_start = start_addr; > - rb_mgr->ring_size = ring_size; > - rb_mgr->ring_pa = __psp_pa(start_addr); > + } > mutex_init(&rb_mgr->mutex); > > return 0; > @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) > { > struct ring_buf_manager *rb_mgr = &tee->rb_mgr; > > - if (!rb_mgr->ring_start) > - return; > + psp_tee_free_dmabuf(rb_mgr->ring_buf); > > - free_pages((unsigned long)rb_mgr->ring_start, > - get_order(rb_mgr->ring_size)); > - > - rb_mgr->ring_start = NULL; > - rb_mgr->ring_size = 0; > - rb_mgr->ring_pa = 0; > mutex_destroy(&rb_mgr->mutex); > } > > @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, > return -ETIMEDOUT; > } > > -static > -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) > +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) > { > struct tee_init_ring_cmd *cmd; > + struct dma_buffer *cmd_buffer; > > - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); > - if (!cmd) > + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), > + GFP_KERNEL | __GFP_ZERO); > + if (!cmd_buffer) > return NULL; > > - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); > - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); > - cmd->size = tee->rb_mgr.ring_size; > + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; > + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); > + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); > + cmd->size = tee->rb_mgr.ring_buf->size; > > dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", > cmd->hi_addr, cmd->low_addr, cmd->size); > > - return cmd; > + return cmd_buffer; > } > > -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) > +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) > { > - kfree(cmd); > + psp_tee_free_dmabuf(cmd_buffer); > } > > static int tee_init_ring(struct psp_tee_device *tee) > { > int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); > - struct tee_init_ring_cmd *cmd; > - phys_addr_t cmd_buffer; > + struct dma_buffer *cmd_buffer; > unsigned int reg; > int ret; > > @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) > > tee->rb_mgr.wptr = 0; > > - cmd = tee_alloc_cmd_buffer(tee); > - if (!cmd) { > + cmd_buffer = tee_alloc_cmd_buffer(tee); > + if (!cmd_buffer) { > tee_free_ring(tee); > return -ENOMEM; > } > > - cmd_buffer = __psp_pa((void *)cmd); > - > /* Send command buffer details to Trusted OS by writing to > * CPU-PSP message registers > */ > > - iowrite32(lower_32_bits(cmd_buffer), > + iowrite32(lower_32_bits(cmd_buffer->paddr), > tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); > - iowrite32(upper_32_bits(cmd_buffer), > + iowrite32(upper_32_bits(cmd_buffer->paddr), > tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); > iowrite32(TEE_RING_INIT_CMD, > tee->io_regs + tee->vdata->cmdresp_reg); > @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) > } > > free_buf: > - tee_free_cmd_buffer(cmd); > + tee_free_cmd_buffer(cmd_buffer); > > return ret; > } > @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) > unsigned int reg; > int ret; > > - if (!tee->rb_mgr.ring_start) > + if (!tee->rb_mgr.ring_buf->vaddr) > return; > > if (psp_dead) > @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, > do { > /* Get pointer to ring buffer command entry */ > cmd = (struct tee_ring_cmd *) > - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); > + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); > > rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); > > @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, > > /* Update local copy of write pointer */ > tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); > - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) > + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) > tee->rb_mgr.wptr = 0; > > /* Trigger interrupt to Trusted OS */ > diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h > index 49d26158b71e..9238487ee8bf 100644 > --- a/drivers/crypto/ccp/tee-dev.h > +++ b/drivers/crypto/ccp/tee-dev.h > @@ -16,6 +16,7 @@ > > #include <linux/device.h> > #include <linux/mutex.h> > +#include <linux/psp-tee.h> > > #define TEE_DEFAULT_TIMEOUT 10 > #define MAX_BUFFER_SIZE 988 > @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { > > /** > * struct ring_buf_manager - Helper structure to manage ring buffer. > - * @ring_start: starting address of ring buffer > - * @ring_size: size of ring buffer in bytes > - * @ring_pa: physical address of ring buffer > * @wptr: index to the last written entry in ring buffer > + * @ring_buf: ring buffer allocated using DMA api > */ > struct ring_buf_manager { > struct mutex mutex; /* synchronizes access to ring buffer */ > - void *ring_start; > - u32 ring_size; > - phys_addr_t ring_pa; > u32 wptr; > + struct dma_buffer *ring_buf; > }; > > struct psp_tee_device { > diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h > index cb0c95d6d76b..c0fa922f24d4 100644 > --- a/include/linux/psp-tee.h > +++ b/include/linux/psp-tee.h > @@ -13,6 +13,7 @@ > > #include <linux/types.h> > #include <linux/errno.h> > +#include <linux/dma-mapping.h> > > /* This file defines the Trusted Execution Environment (TEE) interface commands > * and the API exported by AMD Secure Processor driver to communicate with > @@ -40,6 +41,20 @@ enum tee_cmd_id { > TEE_CMD_ID_UNMAP_SHARED_MEM, > }; > > +/** > + * struct dma_buffer - Structure for a DMA buffer. > + * @dma: DMA buffer address > + * @paddr: Physical address of DMA buffer > + * @vaddr: CPU virtual address of DMA buffer > + * @size: Size of DMA buffer in bytes > + */ > +struct dma_buffer { > + dma_addr_t dma; > + phys_addr_t paddr; > + void *vaddr; > + unsigned long size; > +}; > + > #ifdef CONFIG_CRYPTO_DEV_SP_PSP > /** > * psp_tee_process_cmd() - Process command in Trusted Execution Environment > @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, > */ > int psp_check_tee_status(void); > > +/** > + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using > + * dma_alloc_coherent() API. > + * > + * This function can be used to allocate a shared memory region between the > + * host and PSP TEE. > + * > + * Returns: > + * non-NULL a valid pointer to struct dma_buffer > + * NULL on failure > + */ > +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); > + > +/** > + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. > + * > + * This function can be used to release shared memory region between host > + * and PSP TEE. > + * > + */ > +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); > + > #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ > > static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, > @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) > { > return -ENODEV; > } > + > +static inline > +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) > +{ > + return NULL; > +} > + > +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) > +{ > +} > #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ > #endif /* __PSP_TEE_H_ */
On 12/10/2022 2:31 AM, Tom Lendacky wrote: > On 12/6/22 06:30, Rijo Thomas wrote: >> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the >> ring buffer address sent by host to ASP must be a real physical >> address and the pages must be physically contiguous. >> >> In a virtualized environment though, when the driver is running in a >> guest VM, the pages allocated by __get_free_pages() may not be >> contiguous in the host (or machine) physical address space. Guests >> will see a guest (or pseudo) physical address and not the actual host >> (or machine) physical address. The TEE running on ASP cannot decipher >> pseudo physical addresses. It needs host or machine physical address. >> >> To resolve this problem, use DMA APIs for allocating buffers that must >> be shared with TEE. This will ensure that the pages are contiguous in >> host (or machine) address space. If the DMA handle is an IOVA, >> translate it into a physical address before sending it to ASP. >> >> This patch also exports two APIs (one for buffer allocation and >> another to free the buffer). This API can be used by AMD-TEE driver to >> share buffers with TEE. >> >> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge") >> Cc: Tom Lendacky <thomas.lendacky@amd.com> >> Cc: stable@vger.kernel.org >> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com> >> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> >> --- >> drivers/crypto/ccp/psp-dev.c | 6 +- >> drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++------------- >> drivers/crypto/ccp/tee-dev.h | 9 +-- >> include/linux/psp-tee.h | 47 ++++++++++++++ >> 4 files changed, 127 insertions(+), 51 deletions(-) >> >> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c >> index c9c741ac8442..2b86158d7435 100644 >> --- a/drivers/crypto/ccp/psp-dev.c >> +++ b/drivers/crypto/ccp/psp-dev.c >> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) >> goto e_err; >> } >> + if (sp->set_psp_master_device) >> + sp->set_psp_master_device(sp); >> + > > This worries me a bit... if psp_init() fails, it may still be referenced as the master device. What's the reason for moving it? Hmm. Okay, I see your point. In psp_tee_alloc_dmabuf(), we call psp_get_master_device(). Without above change, psp_get_master_device() returns NULL. I think in psp_dev_init(), we can add below error handling: ret = psp_init(psp); if (ret) goto e_init; ... e_init: if (sp->clear_psp_master_device) sp->clear_psp_master_device(sp); Will this help address your concern? > >> ret = psp_init(psp); >> if (ret) >> goto e_irq; >> - if (sp->set_psp_master_device) >> - sp->set_psp_master_device(sp); >> - >> /* Enable interrupt */ >> iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); >> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c >> index 5c9d47f3be37..1631d9851e54 100644 >> --- a/drivers/crypto/ccp/tee-dev.c >> +++ b/drivers/crypto/ccp/tee-dev.c >> @@ -12,8 +12,9 @@ >> #include <linux/mutex.h> >> #include <linux/delay.h> >> #include <linux/slab.h> >> +#include <linux/dma-direct.h> >> +#include <linux/iommu.h> >> #include <linux/gfp.h> >> -#include <linux/psp-sev.h> >> #include <linux/psp-tee.h> >> #include "psp-dev.h" >> @@ -21,25 +22,64 @@ >> static bool psp_dead; >> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) > > It looks like both calls to this use the same gfp_t values, you can probably eliminate from the call and just specify them in here. > Okay, I will remove gfp_t flag from the function argument. >> +{ >> + struct psp_device *psp = psp_get_master_device(); >> + struct dma_buffer *dma_buf; >> + struct iommu_domain *dom; >> + >> + if (!psp || !size) >> + return NULL; >> + >> + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); >> + if (!dma_buf) >> + return NULL; >> + >> + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); >> + if (!dma_buf->vaddr || !dma_buf->dma) { > > I don't think you can have one of these be NULL without both being NULL, but I guess it doesn't hurt to check. > Okay, we will keep both checks for now. >> + kfree(dma_buf); >> + return NULL; >> + } >> + >> + dma_buf->size = size; >> + > + dom = iommu_get_domain_for_dev(psp->dev); >> + if (dom) > > You need a nice comment above this all explaining that. I guess you're using the presence of a domain to determine whether you're running on bare-metal vs within a hypervisor? I'm not sure what will happen if the guest ever gets an emulated IOMMU... Sure we will add a comment. We are not trying to determive bare-metal vs hypervisor here, but determine whether DMA handle returned by dma_alloc_coherent() is an IOVA or not. If the address is an IOVA, we convert IOVA to physical address using iommu_iova_to_phys(). This was our intention. > >> + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); > > If you're just looking to get the physical address, why not just to an __pa(dma_buf->vaddr)? > > Also, paddr might not be the best name, since it isn't always a physical address, but I can't really think of something right now. > We can use __pa(dma_buf->vaddr) only on bare-metal. In hypervisor, __pa(dma_buf->vaddr) gives pseudo-physical address; pseudo-physical address cannot be understood by ASP. ASP needs real physical address (aka machine address). Please see commit message. We chose the name paddr, since it's a (real) physical address that we want to send across to ASP. I am not sure, why you say - it isn't always a physical address. Thanks, Rijo > Thanks, > Tom > >> + else >> + dma_buf->paddr = dma_buf->dma; >> + >> + return dma_buf; >> +} >> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); >> + >> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) >> +{ >> + struct psp_device *psp = psp_get_master_device(); >> + >> + if (!psp || !dma_buf) >> + return; >> + >> + dma_free_coherent(psp->dev, dma_buf->size, >> + dma_buf->vaddr, dma_buf->dma); >> + >> + kfree(dma_buf); >> +} >> +EXPORT_SYMBOL(psp_tee_free_dmabuf); >> + >> static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) >> { >> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >> - void *start_addr; >> if (!ring_size) >> return -EINVAL; >> - /* We need actual physical address instead of DMA address, since >> - * Trusted OS running on AMD Secure Processor will map this region >> - */ >> - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); >> - if (!start_addr) >> + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, >> + GFP_KERNEL | __GFP_ZERO); >> + if (!rb_mgr->ring_buf) { >> + dev_err(tee->dev, "ring allocation failed\n"); >> return -ENOMEM; >> - >> - memset(start_addr, 0x0, ring_size); >> - rb_mgr->ring_start = start_addr; >> - rb_mgr->ring_size = ring_size; >> - rb_mgr->ring_pa = __psp_pa(start_addr); >> + } >> mutex_init(&rb_mgr->mutex); >> return 0; >> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) >> { >> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >> - if (!rb_mgr->ring_start) >> - return; >> + psp_tee_free_dmabuf(rb_mgr->ring_buf); >> - free_pages((unsigned long)rb_mgr->ring_start, >> - get_order(rb_mgr->ring_size)); >> - >> - rb_mgr->ring_start = NULL; >> - rb_mgr->ring_size = 0; >> - rb_mgr->ring_pa = 0; >> mutex_destroy(&rb_mgr->mutex); >> } >> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, >> return -ETIMEDOUT; >> } >> -static >> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >> { >> struct tee_init_ring_cmd *cmd; >> + struct dma_buffer *cmd_buffer; >> - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); >> - if (!cmd) >> + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), >> + GFP_KERNEL | __GFP_ZERO); >> + if (!cmd_buffer) >> return NULL; >> - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); >> - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); >> - cmd->size = tee->rb_mgr.ring_size; >> + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; >> + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); >> + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); >> + cmd->size = tee->rb_mgr.ring_buf->size; >> dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", >> cmd->hi_addr, cmd->low_addr, cmd->size); >> - return cmd; >> + return cmd_buffer; >> } >> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) >> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) >> { >> - kfree(cmd); >> + psp_tee_free_dmabuf(cmd_buffer); >> } >> static int tee_init_ring(struct psp_tee_device *tee) >> { >> int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); >> - struct tee_init_ring_cmd *cmd; >> - phys_addr_t cmd_buffer; >> + struct dma_buffer *cmd_buffer; >> unsigned int reg; >> int ret; >> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) >> tee->rb_mgr.wptr = 0; >> - cmd = tee_alloc_cmd_buffer(tee); >> - if (!cmd) { >> + cmd_buffer = tee_alloc_cmd_buffer(tee); >> + if (!cmd_buffer) { >> tee_free_ring(tee); >> return -ENOMEM; >> } >> - cmd_buffer = __psp_pa((void *)cmd); >> - >> /* Send command buffer details to Trusted OS by writing to >> * CPU-PSP message registers >> */ >> - iowrite32(lower_32_bits(cmd_buffer), >> + iowrite32(lower_32_bits(cmd_buffer->paddr), >> tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); >> - iowrite32(upper_32_bits(cmd_buffer), >> + iowrite32(upper_32_bits(cmd_buffer->paddr), >> tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); >> iowrite32(TEE_RING_INIT_CMD, >> tee->io_regs + tee->vdata->cmdresp_reg); >> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) >> } >> free_buf: >> - tee_free_cmd_buffer(cmd); >> + tee_free_cmd_buffer(cmd_buffer); >> return ret; >> } >> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) >> unsigned int reg; >> int ret; >> - if (!tee->rb_mgr.ring_start) >> + if (!tee->rb_mgr.ring_buf->vaddr) >> return; >> if (psp_dead) >> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >> do { >> /* Get pointer to ring buffer command entry */ >> cmd = (struct tee_ring_cmd *) >> - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); >> + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); >> rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); >> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >> /* Update local copy of write pointer */ >> tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); >> - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) >> + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) >> tee->rb_mgr.wptr = 0; >> /* Trigger interrupt to Trusted OS */ >> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h >> index 49d26158b71e..9238487ee8bf 100644 >> --- a/drivers/crypto/ccp/tee-dev.h >> +++ b/drivers/crypto/ccp/tee-dev.h >> @@ -16,6 +16,7 @@ >> #include <linux/device.h> >> #include <linux/mutex.h> >> +#include <linux/psp-tee.h> >> #define TEE_DEFAULT_TIMEOUT 10 >> #define MAX_BUFFER_SIZE 988 >> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { >> /** >> * struct ring_buf_manager - Helper structure to manage ring buffer. >> - * @ring_start: starting address of ring buffer >> - * @ring_size: size of ring buffer in bytes >> - * @ring_pa: physical address of ring buffer >> * @wptr: index to the last written entry in ring buffer >> + * @ring_buf: ring buffer allocated using DMA api >> */ >> struct ring_buf_manager { >> struct mutex mutex; /* synchronizes access to ring buffer */ >> - void *ring_start; >> - u32 ring_size; >> - phys_addr_t ring_pa; >> u32 wptr; >> + struct dma_buffer *ring_buf; >> }; >> struct psp_tee_device { >> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h >> index cb0c95d6d76b..c0fa922f24d4 100644 >> --- a/include/linux/psp-tee.h >> +++ b/include/linux/psp-tee.h >> @@ -13,6 +13,7 @@ >> #include <linux/types.h> >> #include <linux/errno.h> >> +#include <linux/dma-mapping.h> >> /* This file defines the Trusted Execution Environment (TEE) interface commands >> * and the API exported by AMD Secure Processor driver to communicate with >> @@ -40,6 +41,20 @@ enum tee_cmd_id { >> TEE_CMD_ID_UNMAP_SHARED_MEM, >> }; >> +/** >> + * struct dma_buffer - Structure for a DMA buffer. >> + * @dma: DMA buffer address >> + * @paddr: Physical address of DMA buffer >> + * @vaddr: CPU virtual address of DMA buffer >> + * @size: Size of DMA buffer in bytes >> + */ >> +struct dma_buffer { >> + dma_addr_t dma; >> + phys_addr_t paddr; >> + void *vaddr; >> + unsigned long size; >> +}; >> + >> #ifdef CONFIG_CRYPTO_DEV_SP_PSP >> /** >> * psp_tee_process_cmd() - Process command in Trusted Execution Environment >> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, >> */ >> int psp_check_tee_status(void); >> +/** >> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using >> + * dma_alloc_coherent() API. >> + * >> + * This function can be used to allocate a shared memory region between the >> + * host and PSP TEE. >> + * >> + * Returns: >> + * non-NULL a valid pointer to struct dma_buffer >> + * NULL on failure >> + */ >> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); >> + >> +/** >> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. >> + * >> + * This function can be used to release shared memory region between host >> + * and PSP TEE. >> + * >> + */ >> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); >> + >> #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ >> static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, >> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) >> { >> return -ENODEV; >> } >> + >> +static inline >> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >> +{ >> + return NULL; >> +} >> + >> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) >> +{ >> +} >> #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ >> #endif /* __PSP_TEE_H_ */
On 12/12/22 05:21, Rijo Thomas wrote: > On 12/10/2022 2:31 AM, Tom Lendacky wrote: >> On 12/6/22 06:30, Rijo Thomas wrote: >>> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the >>> ring buffer address sent by host to ASP must be a real physical >>> address and the pages must be physically contiguous. >>> >>> In a virtualized environment though, when the driver is running in a >>> guest VM, the pages allocated by __get_free_pages() may not be >>> contiguous in the host (or machine) physical address space. Guests >>> will see a guest (or pseudo) physical address and not the actual host >>> (or machine) physical address. The TEE running on ASP cannot decipher >>> pseudo physical addresses. It needs host or machine physical address. >>> >>> To resolve this problem, use DMA APIs for allocating buffers that must >>> be shared with TEE. This will ensure that the pages are contiguous in >>> host (or machine) address space. If the DMA handle is an IOVA, >>> translate it into a physical address before sending it to ASP. >>> >>> This patch also exports two APIs (one for buffer allocation and >>> another to free the buffer). This API can be used by AMD-TEE driver to >>> share buffers with TEE. >>> >>> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge") >>> Cc: Tom Lendacky <thomas.lendacky@amd.com> >>> Cc: stable@vger.kernel.org >>> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com> >>> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >>> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >>> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> >>> --- >>> drivers/crypto/ccp/psp-dev.c | 6 +- >>> drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++------------- >>> drivers/crypto/ccp/tee-dev.h | 9 +-- >>> include/linux/psp-tee.h | 47 ++++++++++++++ >>> 4 files changed, 127 insertions(+), 51 deletions(-) >>> >>> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c >>> index c9c741ac8442..2b86158d7435 100644 >>> --- a/drivers/crypto/ccp/psp-dev.c >>> +++ b/drivers/crypto/ccp/psp-dev.c >>> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) >>> goto e_err; >>> } >>> + if (sp->set_psp_master_device) >>> + sp->set_psp_master_device(sp); >>> + >> >> This worries me a bit... if psp_init() fails, it may still be referenced as the master device. What's the reason for moving it? > > Hmm. Okay, I see your point. > > In psp_tee_alloc_dmabuf(), we call psp_get_master_device(). Without above change, psp_get_master_device() returns NULL. > > I think in psp_dev_init(), we can add below error handling: > > ret = psp_init(psp); > if (ret) > goto e_init; > ... > > e_init: > if (sp->clear_psp_master_device) > sp->clear_psp_master_device(sp); > > Will this help address your concern? Yes, that works. > >> >>> ret = psp_init(psp); >>> if (ret) >>> goto e_irq; >>> - if (sp->set_psp_master_device) >>> - sp->set_psp_master_device(sp); >>> - >>> /* Enable interrupt */ >>> iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); >>> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c >>> index 5c9d47f3be37..1631d9851e54 100644 >>> --- a/drivers/crypto/ccp/tee-dev.c >>> +++ b/drivers/crypto/ccp/tee-dev.c >>> @@ -12,8 +12,9 @@ >>> #include <linux/mutex.h> >>> #include <linux/delay.h> >>> #include <linux/slab.h> >>> +#include <linux/dma-direct.h> >>> +#include <linux/iommu.h> >>> #include <linux/gfp.h> >>> -#include <linux/psp-sev.h> >>> #include <linux/psp-tee.h> >>> #include "psp-dev.h" >>> @@ -21,25 +22,64 @@ >>> static bool psp_dead; >>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >> >> It looks like both calls to this use the same gfp_t values, you can probably eliminate from the call and just specify them in here. >> > > Okay, I will remove gfp_t flag from the function argument. > >>> +{ >>> + struct psp_device *psp = psp_get_master_device(); >>> + struct dma_buffer *dma_buf; >>> + struct iommu_domain *dom; >>> + >>> + if (!psp || !size) >>> + return NULL; >>> + >>> + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); >>> + if (!dma_buf) >>> + return NULL; >>> + >>> + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); >>> + if (!dma_buf->vaddr || !dma_buf->dma) { >> >> I don't think you can have one of these be NULL without both being NULL, but I guess it doesn't hurt to check. >> > > Okay, we will keep both checks for now. > >>> + kfree(dma_buf); >>> + return NULL; >>> + } >>> + >>> + dma_buf->size = size; >>> + > + dom = iommu_get_domain_for_dev(psp->dev); >>> + if (dom) >> >> You need a nice comment above this all explaining that. I guess you're using the presence of a domain to determine whether you're running on bare-metal vs within a hypervisor? I'm not sure what will happen if the guest ever gets an emulated IOMMU... > > Sure we will add a comment. > > We are not trying to determive bare-metal vs hypervisor here, but determine whether DMA handle returned by dma_alloc_coherent() is an IOVA or not. > If the address is an IOVA, we convert IOVA to physical address using iommu_iova_to_phys(). This was our intention. Ok, but if a VM gets an emulated IOMMU and ends up with an IOVA, that IOVA points to a GPA - and if the size is over a page, then you aren't guaranteed to have contiguous physical memory. Probably not a concern at the moment, but not sure about what happens in the future. > >> >>> + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); >> >> If you're just looking to get the physical address, why not just to an __pa(dma_buf->vaddr)? >> >> Also, paddr might not be the best name, since it isn't always a physical address, but I can't really think of something right now. >> > > We can use __pa(dma_buf->vaddr) only on bare-metal. In hypervisor, __pa(dma_buf->vaddr) gives pseudo-physical address; pseudo-physical address cannot be understood by ASP. > ASP needs real physical address (aka machine address). Please see commit message. > > We chose the name paddr, since it's a (real) physical address that we want to send across to ASP. I am not sure, why you say - it isn't always a physical address. Then a nice comment above this explaining all that and the fact that when there is no IOMMU the DMA address is actually the physical address, would be appropriate. A number of years from now, this will raise questions without thorough documentation. Thanks, Tom > > Thanks, > Rijo > >> Thanks, >> Tom >> >>> + else >>> + dma_buf->paddr = dma_buf->dma; >>> + >>> + return dma_buf; >>> +} >>> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); >>> + >>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) >>> +{ >>> + struct psp_device *psp = psp_get_master_device(); >>> + >>> + if (!psp || !dma_buf) >>> + return; >>> + >>> + dma_free_coherent(psp->dev, dma_buf->size, >>> + dma_buf->vaddr, dma_buf->dma); >>> + >>> + kfree(dma_buf); >>> +} >>> +EXPORT_SYMBOL(psp_tee_free_dmabuf); >>> + >>> static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) >>> { >>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>> - void *start_addr; >>> if (!ring_size) >>> return -EINVAL; >>> - /* We need actual physical address instead of DMA address, since >>> - * Trusted OS running on AMD Secure Processor will map this region >>> - */ >>> - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); >>> - if (!start_addr) >>> + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, >>> + GFP_KERNEL | __GFP_ZERO); >>> + if (!rb_mgr->ring_buf) { >>> + dev_err(tee->dev, "ring allocation failed\n"); >>> return -ENOMEM; >>> - >>> - memset(start_addr, 0x0, ring_size); >>> - rb_mgr->ring_start = start_addr; >>> - rb_mgr->ring_size = ring_size; >>> - rb_mgr->ring_pa = __psp_pa(start_addr); >>> + } >>> mutex_init(&rb_mgr->mutex); >>> return 0; >>> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) >>> { >>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>> - if (!rb_mgr->ring_start) >>> - return; >>> + psp_tee_free_dmabuf(rb_mgr->ring_buf); >>> - free_pages((unsigned long)rb_mgr->ring_start, >>> - get_order(rb_mgr->ring_size)); >>> - >>> - rb_mgr->ring_start = NULL; >>> - rb_mgr->ring_size = 0; >>> - rb_mgr->ring_pa = 0; >>> mutex_destroy(&rb_mgr->mutex); >>> } >>> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, >>> return -ETIMEDOUT; >>> } >>> -static >>> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>> { >>> struct tee_init_ring_cmd *cmd; >>> + struct dma_buffer *cmd_buffer; >>> - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); >>> - if (!cmd) >>> + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), >>> + GFP_KERNEL | __GFP_ZERO); >>> + if (!cmd_buffer) >>> return NULL; >>> - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); >>> - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); >>> - cmd->size = tee->rb_mgr.ring_size; >>> + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; >>> + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); >>> + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); >>> + cmd->size = tee->rb_mgr.ring_buf->size; >>> dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", >>> cmd->hi_addr, cmd->low_addr, cmd->size); >>> - return cmd; >>> + return cmd_buffer; >>> } >>> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) >>> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) >>> { >>> - kfree(cmd); >>> + psp_tee_free_dmabuf(cmd_buffer); >>> } >>> static int tee_init_ring(struct psp_tee_device *tee) >>> { >>> int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); >>> - struct tee_init_ring_cmd *cmd; >>> - phys_addr_t cmd_buffer; >>> + struct dma_buffer *cmd_buffer; >>> unsigned int reg; >>> int ret; >>> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) >>> tee->rb_mgr.wptr = 0; >>> - cmd = tee_alloc_cmd_buffer(tee); >>> - if (!cmd) { >>> + cmd_buffer = tee_alloc_cmd_buffer(tee); >>> + if (!cmd_buffer) { >>> tee_free_ring(tee); >>> return -ENOMEM; >>> } >>> - cmd_buffer = __psp_pa((void *)cmd); >>> - >>> /* Send command buffer details to Trusted OS by writing to >>> * CPU-PSP message registers >>> */ >>> - iowrite32(lower_32_bits(cmd_buffer), >>> + iowrite32(lower_32_bits(cmd_buffer->paddr), >>> tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); >>> - iowrite32(upper_32_bits(cmd_buffer), >>> + iowrite32(upper_32_bits(cmd_buffer->paddr), >>> tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); >>> iowrite32(TEE_RING_INIT_CMD, >>> tee->io_regs + tee->vdata->cmdresp_reg); >>> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) >>> } >>> free_buf: >>> - tee_free_cmd_buffer(cmd); >>> + tee_free_cmd_buffer(cmd_buffer); >>> return ret; >>> } >>> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) >>> unsigned int reg; >>> int ret; >>> - if (!tee->rb_mgr.ring_start) >>> + if (!tee->rb_mgr.ring_buf->vaddr) >>> return; >>> if (psp_dead) >>> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>> do { >>> /* Get pointer to ring buffer command entry */ >>> cmd = (struct tee_ring_cmd *) >>> - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); >>> + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); >>> rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); >>> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>> /* Update local copy of write pointer */ >>> tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); >>> - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) >>> + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) >>> tee->rb_mgr.wptr = 0; >>> /* Trigger interrupt to Trusted OS */ >>> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h >>> index 49d26158b71e..9238487ee8bf 100644 >>> --- a/drivers/crypto/ccp/tee-dev.h >>> +++ b/drivers/crypto/ccp/tee-dev.h >>> @@ -16,6 +16,7 @@ >>> #include <linux/device.h> >>> #include <linux/mutex.h> >>> +#include <linux/psp-tee.h> >>> #define TEE_DEFAULT_TIMEOUT 10 >>> #define MAX_BUFFER_SIZE 988 >>> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { >>> /** >>> * struct ring_buf_manager - Helper structure to manage ring buffer. >>> - * @ring_start: starting address of ring buffer >>> - * @ring_size: size of ring buffer in bytes >>> - * @ring_pa: physical address of ring buffer >>> * @wptr: index to the last written entry in ring buffer >>> + * @ring_buf: ring buffer allocated using DMA api >>> */ >>> struct ring_buf_manager { >>> struct mutex mutex; /* synchronizes access to ring buffer */ >>> - void *ring_start; >>> - u32 ring_size; >>> - phys_addr_t ring_pa; >>> u32 wptr; >>> + struct dma_buffer *ring_buf; >>> }; >>> struct psp_tee_device { >>> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h >>> index cb0c95d6d76b..c0fa922f24d4 100644 >>> --- a/include/linux/psp-tee.h >>> +++ b/include/linux/psp-tee.h >>> @@ -13,6 +13,7 @@ >>> #include <linux/types.h> >>> #include <linux/errno.h> >>> +#include <linux/dma-mapping.h> >>> /* This file defines the Trusted Execution Environment (TEE) interface commands >>> * and the API exported by AMD Secure Processor driver to communicate with >>> @@ -40,6 +41,20 @@ enum tee_cmd_id { >>> TEE_CMD_ID_UNMAP_SHARED_MEM, >>> }; >>> +/** >>> + * struct dma_buffer - Structure for a DMA buffer. >>> + * @dma: DMA buffer address >>> + * @paddr: Physical address of DMA buffer >>> + * @vaddr: CPU virtual address of DMA buffer >>> + * @size: Size of DMA buffer in bytes >>> + */ >>> +struct dma_buffer { >>> + dma_addr_t dma; >>> + phys_addr_t paddr; >>> + void *vaddr; >>> + unsigned long size; >>> +}; >>> + >>> #ifdef CONFIG_CRYPTO_DEV_SP_PSP >>> /** >>> * psp_tee_process_cmd() - Process command in Trusted Execution Environment >>> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, >>> */ >>> int psp_check_tee_status(void); >>> +/** >>> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using >>> + * dma_alloc_coherent() API. >>> + * >>> + * This function can be used to allocate a shared memory region between the >>> + * host and PSP TEE. >>> + * >>> + * Returns: >>> + * non-NULL a valid pointer to struct dma_buffer >>> + * NULL on failure >>> + */ >>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); >>> + >>> +/** >>> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. >>> + * >>> + * This function can be used to release shared memory region between host >>> + * and PSP TEE. >>> + * >>> + */ >>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); >>> + >>> #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ >>> static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, >>> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) >>> { >>> return -ENODEV; >>> } >>> + >>> +static inline >>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >>> +{ >>> + return NULL; >>> +} >>> + >>> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) >>> +{ >>> +} >>> #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ >>> #endif /* __PSP_TEE_H_ */
On 12/12/2022 9:13 PM, Tom Lendacky wrote: > On 12/12/22 05:21, Rijo Thomas wrote: >> On 12/10/2022 2:31 AM, Tom Lendacky wrote: >>> On 12/6/22 06:30, Rijo Thomas wrote: >>>> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the >>>> ring buffer address sent by host to ASP must be a real physical >>>> address and the pages must be physically contiguous. >>>> >>>> In a virtualized environment though, when the driver is running in a >>>> guest VM, the pages allocated by __get_free_pages() may not be >>>> contiguous in the host (or machine) physical address space. Guests >>>> will see a guest (or pseudo) physical address and not the actual host >>>> (or machine) physical address. The TEE running on ASP cannot decipher >>>> pseudo physical addresses. It needs host or machine physical address. >>>> >>>> To resolve this problem, use DMA APIs for allocating buffers that must >>>> be shared with TEE. This will ensure that the pages are contiguous in >>>> host (or machine) address space. If the DMA handle is an IOVA, >>>> translate it into a physical address before sending it to ASP. >>>> >>>> This patch also exports two APIs (one for buffer allocation and >>>> another to free the buffer). This API can be used by AMD-TEE driver to >>>> share buffers with TEE. >>>> >>>> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge") >>>> Cc: Tom Lendacky <thomas.lendacky@amd.com> >>>> Cc: stable@vger.kernel.org >>>> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com> >>>> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >>>> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com> >>>> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> >>>> --- >>>> drivers/crypto/ccp/psp-dev.c | 6 +- >>>> drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++------------- >>>> drivers/crypto/ccp/tee-dev.h | 9 +-- >>>> include/linux/psp-tee.h | 47 ++++++++++++++ >>>> 4 files changed, 127 insertions(+), 51 deletions(-) >>>> >>>> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c >>>> index c9c741ac8442..2b86158d7435 100644 >>>> --- a/drivers/crypto/ccp/psp-dev.c >>>> +++ b/drivers/crypto/ccp/psp-dev.c >>>> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) >>>> goto e_err; >>>> } >>>> + if (sp->set_psp_master_device) >>>> + sp->set_psp_master_device(sp); >>>> + >>> >>> This worries me a bit... if psp_init() fails, it may still be referenced as the master device. What's the reason for moving it? >> >> Hmm. Okay, I see your point. >> >> In psp_tee_alloc_dmabuf(), we call psp_get_master_device(). Without above change, psp_get_master_device() returns NULL. >> >> I think in psp_dev_init(), we can add below error handling: >> >> ret = psp_init(psp); >> if (ret) >> goto e_init; >> ... >> >> e_init: >> if (sp->clear_psp_master_device) >> sp->clear_psp_master_device(sp); >> >> Will this help address your concern? > > Yes, that works. > >> >>> >>>> ret = psp_init(psp); >>>> if (ret) >>>> goto e_irq; >>>> - if (sp->set_psp_master_device) >>>> - sp->set_psp_master_device(sp); >>>> - >>>> /* Enable interrupt */ >>>> iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); >>>> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c >>>> index 5c9d47f3be37..1631d9851e54 100644 >>>> --- a/drivers/crypto/ccp/tee-dev.c >>>> +++ b/drivers/crypto/ccp/tee-dev.c >>>> @@ -12,8 +12,9 @@ >>>> #include <linux/mutex.h> >>>> #include <linux/delay.h> >>>> #include <linux/slab.h> >>>> +#include <linux/dma-direct.h> >>>> +#include <linux/iommu.h> >>>> #include <linux/gfp.h> >>>> -#include <linux/psp-sev.h> >>>> #include <linux/psp-tee.h> >>>> #include "psp-dev.h" >>>> @@ -21,25 +22,64 @@ >>>> static bool psp_dead; >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >>> >>> It looks like both calls to this use the same gfp_t values, you can probably eliminate from the call and just specify them in here. >>> >> >> Okay, I will remove gfp_t flag from the function argument. >> >>>> +{ >>>> + struct psp_device *psp = psp_get_master_device(); >>>> + struct dma_buffer *dma_buf; >>>> + struct iommu_domain *dom; >>>> + >>>> + if (!psp || !size) >>>> + return NULL; >>>> + >>>> + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); >>>> + if (!dma_buf) >>>> + return NULL; >>>> + >>>> + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); >>>> + if (!dma_buf->vaddr || !dma_buf->dma) { >>> >>> I don't think you can have one of these be NULL without both being NULL, but I guess it doesn't hurt to check. >>> >> >> Okay, we will keep both checks for now. >> >>>> + kfree(dma_buf); >>>> + return NULL; >>>> + } >>>> + >>>> + dma_buf->size = size; >>>> + > + dom = iommu_get_domain_for_dev(psp->dev); >>>> + if (dom) >>> >>> You need a nice comment above this all explaining that. I guess you're using the presence of a domain to determine whether you're running on bare-metal vs within a hypervisor? I'm not sure what will happen if the guest ever gets an emulated IOMMU... >> >> Sure we will add a comment. >> >> We are not trying to determive bare-metal vs hypervisor here, but determine whether DMA handle returned by dma_alloc_coherent() is an IOVA or not. >> If the address is an IOVA, we convert IOVA to physical address using iommu_iova_to_phys(). This was our intention. > > Ok, but if a VM gets an emulated IOMMU and ends up with an IOVA, that IOVA points to a GPA - and if the size is over a page, then you aren't guaranteed to have contiguous physical memory. > > Probably not a concern at the moment, but not sure about what happens in the future. > We are not having a VM with an emulated IOMMU. We will have to handle it in case a need arises in future. If iommu_iova_to_phys() returns a GPA, then this patch will not be functional. We ran the VM as a paravirtualized guest. So, the guest allocations using dma_alloc_coherent() gave us memory that was contiguous in host or machine address space. >> >>> >>>> + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); >>> >>> If you're just looking to get the physical address, why not just to an __pa(dma_buf->vaddr)? >>> >>> Also, paddr might not be the best name, since it isn't always a physical address, but I can't really think of something right now. >>> >> >> We can use __pa(dma_buf->vaddr) only on bare-metal. In hypervisor, __pa(dma_buf->vaddr) gives pseudo-physical address; pseudo-physical address cannot be understood by ASP. >> ASP needs real physical address (aka machine address). Please see commit message. >> >> We chose the name paddr, since it's a (real) physical address that we want to send across to ASP. I am not sure, why you say - it isn't always a physical address. > > Then a nice comment above this explaining all that and the fact that when there is no IOMMU the DMA address is actually the physical address, would be appropriate. A number of years from now, this will raise questions without thorough documentation. Agreed. We will add proper comment. We will keep the name paddr for now. Thanks, Rijo > > Thanks, > Tom > >> >> Thanks, >> Rijo >> >>> Thanks, >>> Tom >>> >>>> + else >>>> + dma_buf->paddr = dma_buf->dma; >>>> + >>>> + return dma_buf; >>>> +} >>>> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); >>>> + >>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) >>>> +{ >>>> + struct psp_device *psp = psp_get_master_device(); >>>> + >>>> + if (!psp || !dma_buf) >>>> + return; >>>> + >>>> + dma_free_coherent(psp->dev, dma_buf->size, >>>> + dma_buf->vaddr, dma_buf->dma); >>>> + >>>> + kfree(dma_buf); >>>> +} >>>> +EXPORT_SYMBOL(psp_tee_free_dmabuf); >>>> + >>>> static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) >>>> { >>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>>> - void *start_addr; >>>> if (!ring_size) >>>> return -EINVAL; >>>> - /* We need actual physical address instead of DMA address, since >>>> - * Trusted OS running on AMD Secure Processor will map this region >>>> - */ >>>> - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); >>>> - if (!start_addr) >>>> + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, >>>> + GFP_KERNEL | __GFP_ZERO); >>>> + if (!rb_mgr->ring_buf) { >>>> + dev_err(tee->dev, "ring allocation failed\n"); >>>> return -ENOMEM; >>>> - >>>> - memset(start_addr, 0x0, ring_size); >>>> - rb_mgr->ring_start = start_addr; >>>> - rb_mgr->ring_size = ring_size; >>>> - rb_mgr->ring_pa = __psp_pa(start_addr); >>>> + } >>>> mutex_init(&rb_mgr->mutex); >>>> return 0; >>>> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) >>>> { >>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>>> - if (!rb_mgr->ring_start) >>>> - return; >>>> + psp_tee_free_dmabuf(rb_mgr->ring_buf); >>>> - free_pages((unsigned long)rb_mgr->ring_start, >>>> - get_order(rb_mgr->ring_size)); >>>> - >>>> - rb_mgr->ring_start = NULL; >>>> - rb_mgr->ring_size = 0; >>>> - rb_mgr->ring_pa = 0; >>>> mutex_destroy(&rb_mgr->mutex); >>>> } >>>> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, >>>> return -ETIMEDOUT; >>>> } >>>> -static >>>> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>>> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>>> { >>>> struct tee_init_ring_cmd *cmd; >>>> + struct dma_buffer *cmd_buffer; >>>> - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); >>>> - if (!cmd) >>>> + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), >>>> + GFP_KERNEL | __GFP_ZERO); >>>> + if (!cmd_buffer) >>>> return NULL; >>>> - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); >>>> - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); >>>> - cmd->size = tee->rb_mgr.ring_size; >>>> + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; >>>> + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); >>>> + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); >>>> + cmd->size = tee->rb_mgr.ring_buf->size; >>>> dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", >>>> cmd->hi_addr, cmd->low_addr, cmd->size); >>>> - return cmd; >>>> + return cmd_buffer; >>>> } >>>> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) >>>> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) >>>> { >>>> - kfree(cmd); >>>> + psp_tee_free_dmabuf(cmd_buffer); >>>> } >>>> static int tee_init_ring(struct psp_tee_device *tee) >>>> { >>>> int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); >>>> - struct tee_init_ring_cmd *cmd; >>>> - phys_addr_t cmd_buffer; >>>> + struct dma_buffer *cmd_buffer; >>>> unsigned int reg; >>>> int ret; >>>> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) >>>> tee->rb_mgr.wptr = 0; >>>> - cmd = tee_alloc_cmd_buffer(tee); >>>> - if (!cmd) { >>>> + cmd_buffer = tee_alloc_cmd_buffer(tee); >>>> + if (!cmd_buffer) { >>>> tee_free_ring(tee); >>>> return -ENOMEM; >>>> } >>>> - cmd_buffer = __psp_pa((void *)cmd); >>>> - >>>> /* Send command buffer details to Trusted OS by writing to >>>> * CPU-PSP message registers >>>> */ >>>> - iowrite32(lower_32_bits(cmd_buffer), >>>> + iowrite32(lower_32_bits(cmd_buffer->paddr), >>>> tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); >>>> - iowrite32(upper_32_bits(cmd_buffer), >>>> + iowrite32(upper_32_bits(cmd_buffer->paddr), >>>> tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); >>>> iowrite32(TEE_RING_INIT_CMD, >>>> tee->io_regs + tee->vdata->cmdresp_reg); >>>> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) >>>> } >>>> free_buf: >>>> - tee_free_cmd_buffer(cmd); >>>> + tee_free_cmd_buffer(cmd_buffer); >>>> return ret; >>>> } >>>> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) >>>> unsigned int reg; >>>> int ret; >>>> - if (!tee->rb_mgr.ring_start) >>>> + if (!tee->rb_mgr.ring_buf->vaddr) >>>> return; >>>> if (psp_dead) >>>> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>>> do { >>>> /* Get pointer to ring buffer command entry */ >>>> cmd = (struct tee_ring_cmd *) >>>> - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); >>>> + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); >>>> rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); >>>> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>>> /* Update local copy of write pointer */ >>>> tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); >>>> - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) >>>> + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) >>>> tee->rb_mgr.wptr = 0; >>>> /* Trigger interrupt to Trusted OS */ >>>> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h >>>> index 49d26158b71e..9238487ee8bf 100644 >>>> --- a/drivers/crypto/ccp/tee-dev.h >>>> +++ b/drivers/crypto/ccp/tee-dev.h >>>> @@ -16,6 +16,7 @@ >>>> #include <linux/device.h> >>>> #include <linux/mutex.h> >>>> +#include <linux/psp-tee.h> >>>> #define TEE_DEFAULT_TIMEOUT 10 >>>> #define MAX_BUFFER_SIZE 988 >>>> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { >>>> /** >>>> * struct ring_buf_manager - Helper structure to manage ring buffer. >>>> - * @ring_start: starting address of ring buffer >>>> - * @ring_size: size of ring buffer in bytes >>>> - * @ring_pa: physical address of ring buffer >>>> * @wptr: index to the last written entry in ring buffer >>>> + * @ring_buf: ring buffer allocated using DMA api >>>> */ >>>> struct ring_buf_manager { >>>> struct mutex mutex; /* synchronizes access to ring buffer */ >>>> - void *ring_start; >>>> - u32 ring_size; >>>> - phys_addr_t ring_pa; >>>> u32 wptr; >>>> + struct dma_buffer *ring_buf; >>>> }; >>>> struct psp_tee_device { >>>> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h >>>> index cb0c95d6d76b..c0fa922f24d4 100644 >>>> --- a/include/linux/psp-tee.h >>>> +++ b/include/linux/psp-tee.h >>>> @@ -13,6 +13,7 @@ >>>> #include <linux/types.h> >>>> #include <linux/errno.h> >>>> +#include <linux/dma-mapping.h> >>>> /* This file defines the Trusted Execution Environment (TEE) interface commands >>>> * and the API exported by AMD Secure Processor driver to communicate with >>>> @@ -40,6 +41,20 @@ enum tee_cmd_id { >>>> TEE_CMD_ID_UNMAP_SHARED_MEM, >>>> }; >>>> +/** >>>> + * struct dma_buffer - Structure for a DMA buffer. >>>> + * @dma: DMA buffer address >>>> + * @paddr: Physical address of DMA buffer >>>> + * @vaddr: CPU virtual address of DMA buffer >>>> + * @size: Size of DMA buffer in bytes >>>> + */ >>>> +struct dma_buffer { >>>> + dma_addr_t dma; >>>> + phys_addr_t paddr; >>>> + void *vaddr; >>>> + unsigned long size; >>>> +}; >>>> + >>>> #ifdef CONFIG_CRYPTO_DEV_SP_PSP >>>> /** >>>> * psp_tee_process_cmd() - Process command in Trusted Execution Environment >>>> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, >>>> */ >>>> int psp_check_tee_status(void); >>>> +/** >>>> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using >>>> + * dma_alloc_coherent() API. >>>> + * >>>> + * This function can be used to allocate a shared memory region between the >>>> + * host and PSP TEE. >>>> + * >>>> + * Returns: >>>> + * non-NULL a valid pointer to struct dma_buffer >>>> + * NULL on failure >>>> + */ >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); >>>> + >>>> +/** >>>> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. >>>> + * >>>> + * This function can be used to release shared memory region between host >>>> + * and PSP TEE. >>>> + * >>>> + */ >>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); >>>> + >>>> #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ >>>> static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, >>>> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) >>>> { >>>> return -ENODEV; >>>> } >>>> + >>>> +static inline >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >>>> +{ >>>> + return NULL; >>>> +} >>>> + >>>> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) >>>> +{ >>>> +} >>>> #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ >>>> #endif /* __PSP_TEE_H_ */
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index c9c741ac8442..2b86158d7435 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) goto e_err; } + if (sp->set_psp_master_device) + sp->set_psp_master_device(sp); + ret = psp_init(psp); if (ret) goto e_irq; - if (sp->set_psp_master_device) - sp->set_psp_master_device(sp); - /* Enable interrupt */ iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c index 5c9d47f3be37..1631d9851e54 100644 --- a/drivers/crypto/ccp/tee-dev.c +++ b/drivers/crypto/ccp/tee-dev.c @@ -12,8 +12,9 @@ #include <linux/mutex.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/dma-direct.h> +#include <linux/iommu.h> #include <linux/gfp.h> -#include <linux/psp-sev.h> #include <linux/psp-tee.h> #include "psp-dev.h" @@ -21,25 +22,64 @@ static bool psp_dead; +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) +{ + struct psp_device *psp = psp_get_master_device(); + struct dma_buffer *dma_buf; + struct iommu_domain *dom; + + if (!psp || !size) + return NULL; + + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); + if (!dma_buf) + return NULL; + + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); + if (!dma_buf->vaddr || !dma_buf->dma) { + kfree(dma_buf); + return NULL; + } + + dma_buf->size = size; + + dom = iommu_get_domain_for_dev(psp->dev); + if (dom) + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); + else + dma_buf->paddr = dma_buf->dma; + + return dma_buf; +} +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); + +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) +{ + struct psp_device *psp = psp_get_master_device(); + + if (!psp || !dma_buf) + return; + + dma_free_coherent(psp->dev, dma_buf->size, + dma_buf->vaddr, dma_buf->dma); + + kfree(dma_buf); +} +EXPORT_SYMBOL(psp_tee_free_dmabuf); + static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) { struct ring_buf_manager *rb_mgr = &tee->rb_mgr; - void *start_addr; if (!ring_size) return -EINVAL; - /* We need actual physical address instead of DMA address, since - * Trusted OS running on AMD Secure Processor will map this region - */ - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); - if (!start_addr) + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, + GFP_KERNEL | __GFP_ZERO); + if (!rb_mgr->ring_buf) { + dev_err(tee->dev, "ring allocation failed\n"); return -ENOMEM; - - memset(start_addr, 0x0, ring_size); - rb_mgr->ring_start = start_addr; - rb_mgr->ring_size = ring_size; - rb_mgr->ring_pa = __psp_pa(start_addr); + } mutex_init(&rb_mgr->mutex); return 0; @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) { struct ring_buf_manager *rb_mgr = &tee->rb_mgr; - if (!rb_mgr->ring_start) - return; + psp_tee_free_dmabuf(rb_mgr->ring_buf); - free_pages((unsigned long)rb_mgr->ring_start, - get_order(rb_mgr->ring_size)); - - rb_mgr->ring_start = NULL; - rb_mgr->ring_size = 0; - rb_mgr->ring_pa = 0; mutex_destroy(&rb_mgr->mutex); } @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, return -ETIMEDOUT; } -static -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) { struct tee_init_ring_cmd *cmd; + struct dma_buffer *cmd_buffer; - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); - if (!cmd) + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), + GFP_KERNEL | __GFP_ZERO); + if (!cmd_buffer) return NULL; - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); - cmd->size = tee->rb_mgr.ring_size; + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); + cmd->size = tee->rb_mgr.ring_buf->size; dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", cmd->hi_addr, cmd->low_addr, cmd->size); - return cmd; + return cmd_buffer; } -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) { - kfree(cmd); + psp_tee_free_dmabuf(cmd_buffer); } static int tee_init_ring(struct psp_tee_device *tee) { int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); - struct tee_init_ring_cmd *cmd; - phys_addr_t cmd_buffer; + struct dma_buffer *cmd_buffer; unsigned int reg; int ret; @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) tee->rb_mgr.wptr = 0; - cmd = tee_alloc_cmd_buffer(tee); - if (!cmd) { + cmd_buffer = tee_alloc_cmd_buffer(tee); + if (!cmd_buffer) { tee_free_ring(tee); return -ENOMEM; } - cmd_buffer = __psp_pa((void *)cmd); - /* Send command buffer details to Trusted OS by writing to * CPU-PSP message registers */ - iowrite32(lower_32_bits(cmd_buffer), + iowrite32(lower_32_bits(cmd_buffer->paddr), tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); - iowrite32(upper_32_bits(cmd_buffer), + iowrite32(upper_32_bits(cmd_buffer->paddr), tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); iowrite32(TEE_RING_INIT_CMD, tee->io_regs + tee->vdata->cmdresp_reg); @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) } free_buf: - tee_free_cmd_buffer(cmd); + tee_free_cmd_buffer(cmd_buffer); return ret; } @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) unsigned int reg; int ret; - if (!tee->rb_mgr.ring_start) + if (!tee->rb_mgr.ring_buf->vaddr) return; if (psp_dead) @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, do { /* Get pointer to ring buffer command entry */ cmd = (struct tee_ring_cmd *) - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, /* Update local copy of write pointer */ tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) tee->rb_mgr.wptr = 0; /* Trigger interrupt to Trusted OS */ diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h index 49d26158b71e..9238487ee8bf 100644 --- a/drivers/crypto/ccp/tee-dev.h +++ b/drivers/crypto/ccp/tee-dev.h @@ -16,6 +16,7 @@ #include <linux/device.h> #include <linux/mutex.h> +#include <linux/psp-tee.h> #define TEE_DEFAULT_TIMEOUT 10 #define MAX_BUFFER_SIZE 988 @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { /** * struct ring_buf_manager - Helper structure to manage ring buffer. - * @ring_start: starting address of ring buffer - * @ring_size: size of ring buffer in bytes - * @ring_pa: physical address of ring buffer * @wptr: index to the last written entry in ring buffer + * @ring_buf: ring buffer allocated using DMA api */ struct ring_buf_manager { struct mutex mutex; /* synchronizes access to ring buffer */ - void *ring_start; - u32 ring_size; - phys_addr_t ring_pa; u32 wptr; + struct dma_buffer *ring_buf; }; struct psp_tee_device { diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h index cb0c95d6d76b..c0fa922f24d4 100644 --- a/include/linux/psp-tee.h +++ b/include/linux/psp-tee.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/errno.h> +#include <linux/dma-mapping.h> /* This file defines the Trusted Execution Environment (TEE) interface commands * and the API exported by AMD Secure Processor driver to communicate with @@ -40,6 +41,20 @@ enum tee_cmd_id { TEE_CMD_ID_UNMAP_SHARED_MEM, }; +/** + * struct dma_buffer - Structure for a DMA buffer. + * @dma: DMA buffer address + * @paddr: Physical address of DMA buffer + * @vaddr: CPU virtual address of DMA buffer + * @size: Size of DMA buffer in bytes + */ +struct dma_buffer { + dma_addr_t dma; + phys_addr_t paddr; + void *vaddr; + unsigned long size; +}; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** * psp_tee_process_cmd() - Process command in Trusted Execution Environment @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, */ int psp_check_tee_status(void); +/** + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using + * dma_alloc_coherent() API. + * + * This function can be used to allocate a shared memory region between the + * host and PSP TEE. + * + * Returns: + * non-NULL a valid pointer to struct dma_buffer + * NULL on failure + */ +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); + +/** + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. + * + * This function can be used to release shared memory region between host + * and PSP TEE. + * + */ +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); + #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) { return -ENODEV; } + +static inline +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) +{ + return NULL; +} + +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) +{ +} #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_TEE_H_ */