diff mbox series

[Resend,RFC,V4,09/13] x86/Swiotlb/HV: Add Swiotlb bounce buffer remap function for HV IVM

Message ID 20210707154629.3977369-10-ltykernel@gmail.com
State Superseded
Headers show
Series x86/Hyper-V: Add Hyper-V Isolation VM support | expand

Commit Message

Tianyu Lan July 7, 2021, 3:46 p.m. UTC
From: Tianyu Lan <Tianyu.Lan@microsoft.com>

In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
extra address space which is above shared_gpa_boundary
(E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG.
The access physical address will be original physical address +
shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP
spec is called virtual top of memory(vTOM). Memory addresses below
vTOM are automatically treated as private while memory above
vTOM is treated as shared.

Introduce set_memory_decrypted_map() function to decrypt memory and
remap memory with platform callback. Use set_memory_decrypted_
map() in the swiotlb code, store remap address returned by the new
API and use the remap address to copy data from/to swiotlb bounce buffer.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/hyperv/ivm.c             | 30 ++++++++++++++++++++++++++++++
 arch/x86/include/asm/mshyperv.h   |  2 ++
 arch/x86/include/asm/set_memory.h |  2 ++
 arch/x86/mm/pat/set_memory.c      | 28 ++++++++++++++++++++++++++++
 include/linux/swiotlb.h           |  4 ++++
 kernel/dma/swiotlb.c              | 11 ++++++++---
 6 files changed, 74 insertions(+), 3 deletions(-)

Comments

Tianyu Lan July 20, 2021, 10:42 a.m. UTC | #1
Hi Christoph & Konrad:
     Could you review this patch and make sure this is right way to 
resolve the memory remap request from AMD SEV-SNP vTOM case?

Thanks.

On 7/7/2021 11:46 PM, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
> extra address space which is above shared_gpa_boundary
> (E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG.
> The access physical address will be original physical address +
> shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP
> spec is called virtual top of memory(vTOM). Memory addresses below
> vTOM are automatically treated as private while memory above
> vTOM is treated as shared.
> 
> Introduce set_memory_decrypted_map() function to decrypt memory and
> remap memory with platform callback. Use set_memory_decrypted_
> map() in the swiotlb code, store remap address returned by the new
> API and use the remap address to copy data from/to swiotlb bounce buffer.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>   arch/x86/hyperv/ivm.c             | 30 ++++++++++++++++++++++++++++++
>   arch/x86/include/asm/mshyperv.h   |  2 ++
>   arch/x86/include/asm/set_memory.h |  2 ++
>   arch/x86/mm/pat/set_memory.c      | 28 ++++++++++++++++++++++++++++
>   include/linux/swiotlb.h           |  4 ++++
>   kernel/dma/swiotlb.c              | 11 ++++++++---
>   6 files changed, 74 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 8a6f4e9e3d6c..ea33935e0c17 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -267,3 +267,33 @@ int hv_set_mem_enc(unsigned long addr, int numpages, bool enc)
>   			enc ? VMBUS_PAGE_NOT_VISIBLE
>   			: VMBUS_PAGE_VISIBLE_READ_WRITE);
>   }
> +
> +/*
> + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
> + */
> +unsigned long hv_map_memory(unsigned long addr, unsigned long size)
> +{
> +	unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE,
> +				      sizeof(unsigned long),
> +		       GFP_KERNEL);
> +	unsigned long vaddr;
> +	int i;
> +
> +	if (!pfns)
> +		return (unsigned long)NULL;
> +
> +	for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++)
> +		pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) +
> +			(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
> +
> +	vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE,
> +					PAGE_KERNEL_IO);
> +	kfree(pfns);
> +
> +	return vaddr;
> +}
> +
> +void hv_unmap_memory(unsigned long addr)
> +{
> +	vunmap((void *)addr);
> +}
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index fe03e3e833ac..ba3cb9e32fdb 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -253,6 +253,8 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
>   int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
>   int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
>   int hv_set_mem_enc(unsigned long addr, int numpages, bool enc);
> +unsigned long hv_map_memory(unsigned long addr, unsigned long size);
> +void hv_unmap_memory(unsigned long addr);
>   void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
>   void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
>   void hv_signal_eom_ghcb(void);
> diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
> index 43fa081a1adb..7a2117931830 100644
> --- a/arch/x86/include/asm/set_memory.h
> +++ b/arch/x86/include/asm/set_memory.h
> @@ -49,6 +49,8 @@ int set_memory_decrypted(unsigned long addr, int numpages);
>   int set_memory_np_noalias(unsigned long addr, int numpages);
>   int set_memory_nonglobal(unsigned long addr, int numpages);
>   int set_memory_global(unsigned long addr, int numpages);
> +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size);
> +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size);
>   
>   int set_pages_array_uc(struct page **pages, int addrinarray);
>   int set_pages_array_wc(struct page **pages, int addrinarray);
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 6cc83c57383d..5d4d3963f4a2 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -2039,6 +2039,34 @@ int set_memory_decrypted(unsigned long addr, int numpages)
>   }
>   EXPORT_SYMBOL_GPL(set_memory_decrypted);
>   
> +static unsigned long __map_memory(unsigned long addr, unsigned long size)
> +{
> +	if (hv_is_isolation_supported())
> +		return hv_map_memory(addr, size);
> +
> +	return addr;
> +}
> +
> +static void __unmap_memory(unsigned long addr)
> +{
> +	if (hv_is_isolation_supported())
> +		hv_unmap_memory(addr);
> +}
> +
> +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size)
> +{
> +	if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false))
> +		return (unsigned long)NULL;
> +
> +	return __map_memory(addr, size);
> +}
> +
> +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size)
> +{
> +	__unmap_memory(addr);
> +	return __set_memory_enc_dec(addr, size / PAGE_SIZE, true);
> +}
> +
>   int set_pages_uc(struct page *page, int numpages)
>   {
>   	unsigned long addr = (unsigned long)page_address(page);
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index f507e3eacbea..5c6f6c7380ef 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -72,6 +72,9 @@ extern enum swiotlb_force swiotlb_force;
>    * @end:	The end address of the swiotlb memory pool. Used to do a quick
>    *		range check to see if the memory was in fact allocated by this
>    *		API.
> + * @vstart:	The virtual start address of the swiotlb memory pool. The swiotlb
> + *		memory pool may be remapped in the memory encrypted case and store
> + *		virtual address for bounce buffer operation.
>    * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
>    *		@end. For default swiotlb, this is command line adjustable via
>    *		setup_io_tlb_npages.
> @@ -89,6 +92,7 @@ extern enum swiotlb_force swiotlb_force;
>   struct io_tlb_mem {
>   	phys_addr_t start;
>   	phys_addr_t end;
> +	void *vstart;
>   	unsigned long nslabs;
>   	unsigned long used;
>   	unsigned int index;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index d3fa4669229b..9911817250a8 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -194,8 +194,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
>   		mem->slots[i].alloc_size = 0;
>   	}
>   
> -	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> -	memset(vaddr, 0, bytes);
> +	mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes);
> +	if (!mem->vstart) {
> +		pr_err("Failed to decrypt memory.\n");
> +		return;
> +	}
> +
> +	memset(mem->vstart, 0, bytes);
>   }
>   
>   int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
> @@ -352,7 +357,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
>   	phys_addr_t orig_addr = mem->slots[index].orig_addr;
>   	size_t alloc_size = mem->slots[index].alloc_size;
>   	unsigned long pfn = PFN_DOWN(orig_addr);
> -	unsigned char *vaddr = phys_to_virt(tlb_addr);
> +	unsigned char *vaddr = mem->vstart + tlb_addr - mem->start;
>   
>   	if (orig_addr == INVALID_PHYS_ADDR)
>   		return;
>
Christoph Hellwig July 20, 2021, 1:54 p.m. UTC | #2
Please split the swiotlb changes into a separate patch from the
consumer.

>  }
> +
> +/*
> + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
> + */
> +unsigned long hv_map_memory(unsigned long addr, unsigned long size)
> +{
> +	unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE,
> +				      sizeof(unsigned long),
> +		       GFP_KERNEL);
> +	unsigned long vaddr;
> +	int i;
> +
> +	if (!pfns)
> +		return (unsigned long)NULL;
> +
> +	for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++)
> +		pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) +
> +			(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
> +
> +	vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE,
> +					PAGE_KERNEL_IO);
> +	kfree(pfns);
> +
> +	return vaddr;

This seems to miss a 'select VMAP_PFN'.  But more importantly I don't
think this actually works.  Various DMA APIs do expect a struct page
backing, so how is this going to work with say dma_mmap_attrs or
dma_get_sgtable_attrs?

> +static unsigned long __map_memory(unsigned long addr, unsigned long size)
> +{
> +	if (hv_is_isolation_supported())
> +		return hv_map_memory(addr, size);
> +
> +	return addr;
> +}
> +
> +static void __unmap_memory(unsigned long addr)
> +{
> +	if (hv_is_isolation_supported())
> +		hv_unmap_memory(addr);
> +}
> +
> +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size)
> +{
> +	if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false))
> +		return (unsigned long)NULL;
> +
> +	return __map_memory(addr, size);
> +}
> +
> +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size)
> +{
> +	__unmap_memory(addr);
> +	return __set_memory_enc_dec(addr, size / PAGE_SIZE, true);
> +}

Why this obsfucation into all kinds of strange helpers?  Also I think
we want an ops vectors (or alternative calls) instead of the random
if checks here.

> + * @vstart:	The virtual start address of the swiotlb memory pool. The swiotlb
> + *		memory pool may be remapped in the memory encrypted case and store

Normall we'd call this vaddr or cpu_addr.

> -	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> -	memset(vaddr, 0, bytes);
> +	mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes);

Please always pass kernel virtual addresses as pointers.

And I think these APIs might need better names, e.g.

arch_dma_map_decrypted and arch_dma_unmap_decrypted.

Also these will need fallback versions for non-x86 architectures that
currently use memory encryption.
Christoph Hellwig July 21, 2021, 2:33 p.m. UTC | #3
On Wed, Jul 21, 2021 at 06:28:48PM +0800, Tianyu Lan wrote:
> dma_mmap_attrs() and dma_get_sgtable_attrs() get input virtual address

> belonging to backing memory with struct page and returns bounce buffer

> dma physical address which is below shared_gpa_boundary(vTOM) and passed

> to Hyper-V via vmbus protocol.

>

> The new map virtual address is only to access bounce buffer in swiotlb

> code and will not be used other places. It's stored in the mem->vstart.

> So the new API set_memory_decrypted_map() in this series is only called

> in the swiotlb code. Other platforms may replace set_memory_decrypted()

> with set_memory_decrypted_map() as requested.


It seems like you are indeed not using these new helpers in
dma_direct_alloc.  How is dma_alloc_attrs/dma_alloc_coherent going to
work on these systems?
Tianyu Lan Aug. 13, 2021, 4:43 p.m. UTC | #4
Hi Christoph:
       I followed your this suggestion to rework the latest
version(https://lkml.org/lkml/2021/8/9/805). I just remove the arch
prefix from your suggested name arch_dma_map_decrypted because the 
platform may populate their map/umap callback in the ops. But from your 
latest comment(https://lkml.org/lkml/2021/8/12/149), these names confuse 
vs the actual dma_map_* calls... Could you help to give the right 
function name? The new map function is to map bounce buffer in the 
trust/Isolation VM and these buffers are DMA memory.



On 7/20/2021 9:54 PM, Christoph Hellwig wrote:
>> -	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
>> -	memset(vaddr, 0, bytes);
>> +	mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes);
> Please always pass kernel virtual addresses as pointers.
> 
> And I think these APIs might need better names, e.g.
> 
> arch_dma_map_decrypted and arch_dma_unmap_decrypted.
> 
> Also these will need fallback versions for non-x86 architectures that
> currently use memory encryption.
diff mbox series

Patch

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 8a6f4e9e3d6c..ea33935e0c17 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -267,3 +267,33 @@  int hv_set_mem_enc(unsigned long addr, int numpages, bool enc)
 			enc ? VMBUS_PAGE_NOT_VISIBLE
 			: VMBUS_PAGE_VISIBLE_READ_WRITE);
 }
+
+/*
+ * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
+ */
+unsigned long hv_map_memory(unsigned long addr, unsigned long size)
+{
+	unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE,
+				      sizeof(unsigned long),
+		       GFP_KERNEL);
+	unsigned long vaddr;
+	int i;
+
+	if (!pfns)
+		return (unsigned long)NULL;
+
+	for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++)
+		pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) +
+			(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
+
+	vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE,
+					PAGE_KERNEL_IO);
+	kfree(pfns);
+
+	return vaddr;
+}
+
+void hv_unmap_memory(unsigned long addr)
+{
+	vunmap((void *)addr);
+}
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fe03e3e833ac..ba3cb9e32fdb 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -253,6 +253,8 @@  int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
 int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
 int hv_set_mem_enc(unsigned long addr, int numpages, bool enc);
+unsigned long hv_map_memory(unsigned long addr, unsigned long size);
+void hv_unmap_memory(unsigned long addr);
 void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
 void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
 void hv_signal_eom_ghcb(void);
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 43fa081a1adb..7a2117931830 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -49,6 +49,8 @@  int set_memory_decrypted(unsigned long addr, int numpages);
 int set_memory_np_noalias(unsigned long addr, int numpages);
 int set_memory_nonglobal(unsigned long addr, int numpages);
 int set_memory_global(unsigned long addr, int numpages);
+unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size);
+int set_memory_encrypted_unmap(unsigned long addr, unsigned long size);
 
 int set_pages_array_uc(struct page **pages, int addrinarray);
 int set_pages_array_wc(struct page **pages, int addrinarray);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 6cc83c57383d..5d4d3963f4a2 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2039,6 +2039,34 @@  int set_memory_decrypted(unsigned long addr, int numpages)
 }
 EXPORT_SYMBOL_GPL(set_memory_decrypted);
 
+static unsigned long __map_memory(unsigned long addr, unsigned long size)
+{
+	if (hv_is_isolation_supported())
+		return hv_map_memory(addr, size);
+
+	return addr;
+}
+
+static void __unmap_memory(unsigned long addr)
+{
+	if (hv_is_isolation_supported())
+		hv_unmap_memory(addr);
+}
+
+unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size)
+{
+	if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false))
+		return (unsigned long)NULL;
+
+	return __map_memory(addr, size);
+}
+
+int set_memory_encrypted_unmap(unsigned long addr, unsigned long size)
+{
+	__unmap_memory(addr);
+	return __set_memory_enc_dec(addr, size / PAGE_SIZE, true);
+}
+
 int set_pages_uc(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f507e3eacbea..5c6f6c7380ef 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,6 +72,9 @@  extern enum swiotlb_force swiotlb_force;
  * @end:	The end address of the swiotlb memory pool. Used to do a quick
  *		range check to see if the memory was in fact allocated by this
  *		API.
+ * @vstart:	The virtual start address of the swiotlb memory pool. The swiotlb
+ *		memory pool may be remapped in the memory encrypted case and store
+ *		virtual address for bounce buffer operation.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
  *		@end. For default swiotlb, this is command line adjustable via
  *		setup_io_tlb_npages.
@@ -89,6 +92,7 @@  extern enum swiotlb_force swiotlb_force;
 struct io_tlb_mem {
 	phys_addr_t start;
 	phys_addr_t end;
+	void *vstart;
 	unsigned long nslabs;
 	unsigned long used;
 	unsigned int index;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d3fa4669229b..9911817250a8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -194,8 +194,13 @@  static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 		mem->slots[i].alloc_size = 0;
 	}
 
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
+	mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes);
+	if (!mem->vstart) {
+		pr_err("Failed to decrypt memory.\n");
+		return;
+	}
+
+	memset(mem->vstart, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -352,7 +357,7 @@  static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
 	size_t alloc_size = mem->slots[index].alloc_size;
 	unsigned long pfn = PFN_DOWN(orig_addr);
-	unsigned char *vaddr = phys_to_virt(tlb_addr);
+	unsigned char *vaddr = mem->vstart + tlb_addr - mem->start;
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;