diff mbox series

[5/6] dma-mapping/iommu: Add dma_set_max_opt_size()

Message ID 1616160348-29451-6-git-send-email-john.garry@huawei.com
State New
Headers show
Series dma mapping/iommu: Allow IOMMU IOVA rcache range to be configured | expand

Commit Message

John Garry March 19, 2021, 1:25 p.m. UTC
Add a function to allow the max size which we want to optimise DMA mappings
for.

Signed-off-by: John Garry <john.garry@huawei.com>

---
 drivers/iommu/dma-iommu.c   |  2 +-
 include/linux/dma-map-ops.h |  1 +
 include/linux/dma-mapping.h |  5 +++++
 kernel/dma/mapping.c        | 11 +++++++++++
 4 files changed, 18 insertions(+), 1 deletion(-)

-- 
2.26.2

Comments

Robin Murphy March 19, 2021, 5 p.m. UTC | #1
On 2021-03-19 13:25, John Garry wrote:
> Add a function to allow the max size which we want to optimise DMA mappings

> for.


It seems neat in theory - particularly for packet-based interfaces that 
might have a known fixed size of data unit that they're working on at 
any given time - but aren't there going to be many cases where the 
driver has no idea because it depends on whatever size(s) of request 
userspace happens to throw at it? Even if it does know the absolute 
maximum size of thing it could ever transfer, that could be 
impractically large in areas like video/AI/etc., so it could still be 
hard to make a reasonable decision.

Being largely workload-dependent is why I still think this should be a 
command-line or sysfs tuneable - we could set the default based on how 
much total memory is available, but ultimately it's the end user who 
knows what the workload is going to be and what they care about 
optimising for.

Another thought (which I'm almost reluctant to share) is that I would 
*love* to try implementing a self-tuning strategy that can detect high 
contention on particular allocation sizes and adjust the caches on the 
fly, but I can easily imagine that having enough inherent overhead to 
end up being an impractical (but fun) waste of time.

Robin.

> Signed-off-by: John Garry <john.garry@huawei.com>

> ---

>   drivers/iommu/dma-iommu.c   |  2 +-

>   include/linux/dma-map-ops.h |  1 +

>   include/linux/dma-mapping.h |  5 +++++

>   kernel/dma/mapping.c        | 11 +++++++++++

>   4 files changed, 18 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

> index a5dfbd6c0496..d35881fcfb9c 100644

> --- a/drivers/iommu/dma-iommu.c

> +++ b/drivers/iommu/dma-iommu.c

> @@ -447,7 +447,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,

>   	return (dma_addr_t)iova << shift;

>   }

>   

> -__maybe_unused

>   static void iommu_dma_set_opt_size(struct device *dev, size_t size)

>   {

>   	struct iommu_domain *domain = iommu_get_dma_domain(dev);

> @@ -1278,6 +1277,7 @@ static const struct dma_map_ops iommu_dma_ops = {

>   	.map_resource		= iommu_dma_map_resource,

>   	.unmap_resource		= iommu_dma_unmap_resource,

>   	.get_merge_boundary	= iommu_dma_get_merge_boundary,

> +	.set_max_opt_size	= iommu_dma_set_opt_size,

>   };

>   

>   /*

> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h

> index 51872e736e7b..fed7a183b3b9 100644

> --- a/include/linux/dma-map-ops.h

> +++ b/include/linux/dma-map-ops.h

> @@ -64,6 +64,7 @@ struct dma_map_ops {

>   	u64 (*get_required_mask)(struct device *dev);

>   	size_t (*max_mapping_size)(struct device *dev);

>   	unsigned long (*get_merge_boundary)(struct device *dev);

> +	void (*set_max_opt_size)(struct device *dev, size_t size);

>   };

>   

>   #ifdef CONFIG_DMA_OPS

> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h

> index 2a984cb4d1e0..91fe770145d4 100644

> --- a/include/linux/dma-mapping.h

> +++ b/include/linux/dma-mapping.h

> @@ -144,6 +144,7 @@ u64 dma_get_required_mask(struct device *dev);

>   size_t dma_max_mapping_size(struct device *dev);

>   bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);

>   unsigned long dma_get_merge_boundary(struct device *dev);

> +void dma_set_max_opt_size(struct device *dev, size_t size);

>   #else /* CONFIG_HAS_DMA */

>   static inline dma_addr_t dma_map_page_attrs(struct device *dev,

>   		struct page *page, size_t offset, size_t size,

> @@ -257,6 +258,10 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev)

>   {

>   	return 0;

>   }

> +static inline void dma_set_max_opt_size(struct device *dev, size_t size)

> +{

> +}

> +

>   #endif /* CONFIG_HAS_DMA */

>   

>   struct page *dma_alloc_pages(struct device *dev, size_t size,

> diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c

> index b6a633679933..59e6acb1c471 100644

> --- a/kernel/dma/mapping.c

> +++ b/kernel/dma/mapping.c

> @@ -608,3 +608,14 @@ unsigned long dma_get_merge_boundary(struct device *dev)

>   	return ops->get_merge_boundary(dev);

>   }

>   EXPORT_SYMBOL_GPL(dma_get_merge_boundary);

> +

> +void dma_set_max_opt_size(struct device *dev, size_t size)

> +{

> +	const struct dma_map_ops *ops = get_dma_ops(dev);

> +

> +	if (!ops || !ops->set_max_opt_size)

> +		return;

> +

> +	ops->set_max_opt_size(dev, size);

> +}

> +EXPORT_SYMBOL_GPL(dma_set_max_opt_size);

>
John Garry March 19, 2021, 6:02 p.m. UTC | #2
On 19/03/2021 17:00, Robin Murphy wrote:
> On 2021-03-19 13:25, John Garry wrote:

>> Add a function to allow the max size which we want to optimise DMA 

>> mappings

>> for.

> 

> It seems neat in theory - particularly for packet-based interfaces that 

> might have a known fixed size of data unit that they're working on at 

> any given time - but aren't there going to be many cases where the 

> driver has no idea because it depends on whatever size(s) of request 

> userspace happens to throw at it? Even if it does know the absolute 

> maximum size of thing it could ever transfer, that could be 

> impractically large in areas like video/AI/etc., so it could still be 

> hard to make a reasonable decision.


So if you consider the SCSI stack, which is my interest, we know the max 
segment size and we know the max number of segments per request, so we 
should know the theoretical upper limit of the actual IOVA length we can 
get.

Indeed, from my experiment on my SCSI host, max IOVA len is found to be 
507904, which is PAGE_SIZE * 124 (that is max sg ents there). 
Incidentally that means that we want RCACHE RANGE MAX of 8, not 6.

> 

> Being largely workload-dependent is why I still think this should be a 

> command-line or sysfs tuneable - we could set the default based on how 

> much total memory is available, but ultimately it's the end user who 

> knows what the workload is going to be and what they care about 

> optimising for.


If that hardware is only found in a server, then the extra memory cost 
would be trivial, so setting to max is standard approach.

> 

> Another thought (which I'm almost reluctant to share) is that I would 

> *love* to try implementing a self-tuning strategy that can detect high 

> contention on particular allocation sizes and adjust the caches on the 

> fly, but I can easily imagine that having enough inherent overhead to 

> end up being an impractical (but fun) waste of time.

> 


For now, I just want to recover the performance lost recently :)

Thanks,
John
Salil Mehta March 31, 2021, 8:01 a.m. UTC | #3
> From: iommu [mailto:iommu-bounces@lists.linux-foundation.org] On Behalf Of

> Robin Murphy

> Sent: Friday, March 19, 2021 5:00 PM

> To: John Garry <john.garry@huawei.com>; joro@8bytes.org; will@kernel.org;

> jejb@linux.ibm.com; martin.petersen@oracle.com; hch@lst.de;

> m.szyprowski@samsung.com

> Cc: iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org;

> linux-scsi@vger.kernel.org; Linuxarm <linuxarm@huawei.com>

> Subject: Re: [PATCH 5/6] dma-mapping/iommu: Add dma_set_max_opt_size()

> 

> On 2021-03-19 13:25, John Garry wrote:

> > Add a function to allow the max size which we want to optimise DMA mappings

> > for.

> 

> It seems neat in theory - particularly for packet-based interfaces that

> might have a known fixed size of data unit that they're working on at

> any given time - but aren't there going to be many cases where the

> driver has no idea because it depends on whatever size(s) of request

> userspace happens to throw at it? Even if it does know the absolute

> maximum size of thing it could ever transfer, that could be

> impractically large in areas like video/AI/etc., so it could still be

> hard to make a reasonable decision.



This is also the case in networking workloads where we have MTU set but
actual packet sizes might vary.


> 

> Being largely workload-dependent is why I still think this should be a

> command-line or sysfs tuneable - we could set the default based on how

> much total memory is available, but ultimately it's the end user who

> knows what the workload is going to be and what they care about

> optimising for.

> 

> Another thought (which I'm almost reluctant to share) is that I would

> *love* to try implementing a self-tuning strategy that can detect high

> contention on particular allocation sizes and adjust the caches on the

> fly, but I can easily imagine that having enough inherent overhead to

> end up being an impractical (but fun) waste of time.


This might be particularly useful for the NICs where packet sizes vary
from 64K to 9K. Hence, without optimal strategy this can affect the
performance of networking workloads.


> 

> Robin.

> 

> > Signed-off-by: John Garry <john.garry@huawei.com>

> > ---

> >   drivers/iommu/dma-iommu.c   |  2 +-

> >   include/linux/dma-map-ops.h |  1 +

> >   include/linux/dma-mapping.h |  5 +++++

> >   kernel/dma/mapping.c        | 11 +++++++++++

> >   4 files changed, 18 insertions(+), 1 deletion(-)

> >

> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

> > index a5dfbd6c0496..d35881fcfb9c 100644

> > --- a/drivers/iommu/dma-iommu.c

> > +++ b/drivers/iommu/dma-iommu.c

> > @@ -447,7 +447,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain

> *domain,

> >   	return (dma_addr_t)iova << shift;

> >   }

> >

> > -__maybe_unused

> >   static void iommu_dma_set_opt_size(struct device *dev, size_t size)

> >   {

> >   	struct iommu_domain *domain = iommu_get_dma_domain(dev);

> > @@ -1278,6 +1277,7 @@ static const struct dma_map_ops iommu_dma_ops = {

> >   	.map_resource		= iommu_dma_map_resource,

> >   	.unmap_resource		= iommu_dma_unmap_resource,

> >   	.get_merge_boundary	= iommu_dma_get_merge_boundary,

> > +	.set_max_opt_size	= iommu_dma_set_opt_size,

> >   };

> >

> >   /*

> > diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h

> > index 51872e736e7b..fed7a183b3b9 100644

> > --- a/include/linux/dma-map-ops.h

> > +++ b/include/linux/dma-map-ops.h

> > @@ -64,6 +64,7 @@ struct dma_map_ops {

> >   	u64 (*get_required_mask)(struct device *dev);

> >   	size_t (*max_mapping_size)(struct device *dev);

> >   	unsigned long (*get_merge_boundary)(struct device *dev);

> > +	void (*set_max_opt_size)(struct device *dev, size_t size);

> >   };

> >

> >   #ifdef CONFIG_DMA_OPS

> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h

> > index 2a984cb4d1e0..91fe770145d4 100644

> > --- a/include/linux/dma-mapping.h

> > +++ b/include/linux/dma-mapping.h

> > @@ -144,6 +144,7 @@ u64 dma_get_required_mask(struct device *dev);

> >   size_t dma_max_mapping_size(struct device *dev);

> >   bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);

> >   unsigned long dma_get_merge_boundary(struct device *dev);

> > +void dma_set_max_opt_size(struct device *dev, size_t size);

> >   #else /* CONFIG_HAS_DMA */

> >   static inline dma_addr_t dma_map_page_attrs(struct device *dev,

> >   		struct page *page, size_t offset, size_t size,

> > @@ -257,6 +258,10 @@ static inline unsigned long dma_get_merge_boundary(struct

> device *dev)

> >   {

> >   	return 0;

> >   }

> > +static inline void dma_set_max_opt_size(struct device *dev, size_t size)

> > +{

> > +}

> > +

> >   #endif /* CONFIG_HAS_DMA */

> >

> >   struct page *dma_alloc_pages(struct device *dev, size_t size,

> > diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c

> > index b6a633679933..59e6acb1c471 100644

> > --- a/kernel/dma/mapping.c

> > +++ b/kernel/dma/mapping.c

> > @@ -608,3 +608,14 @@ unsigned long dma_get_merge_boundary(struct device *dev)

> >   	return ops->get_merge_boundary(dev);

> >   }

> >   EXPORT_SYMBOL_GPL(dma_get_merge_boundary);

> > +

> > +void dma_set_max_opt_size(struct device *dev, size_t size)

> > +{

> > +	const struct dma_map_ops *ops = get_dma_ops(dev);

> > +

> > +	if (!ops || !ops->set_max_opt_size)

> > +		return;

> > +

> > +	ops->set_max_opt_size(dev, size);

> > +}

> > +EXPORT_SYMBOL_GPL(dma_set_max_opt_size);

> >

> _______________________________________________

> iommu mailing list

> iommu@lists.linux-foundation.org

> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Salil Mehta March 31, 2021, 8:08 a.m. UTC | #4
(+) correction below, sorry for the typo in earlier post.

> From: iommu [mailto:iommu-bounces@lists.linux-foundation.org] On Behalf Of

> Robin Murphy

> Sent: Friday, March 19, 2021 5:00 PM

> To: John Garry <john.garry@huawei.com>; joro@8bytes.org; will@kernel.org;

> jejb@linux.ibm.com; martin.petersen@oracle.com; hch@lst.de;

> m.szyprowski@samsung.com

> Cc: iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org;

> linux-scsi@vger.kernel.org; Linuxarm <linuxarm@huawei.com>

> Subject: Re: [PATCH 5/6] dma-mapping/iommu: Add dma_set_max_opt_size()

> 

> On 2021-03-19 13:25, John Garry wrote:

> > Add a function to allow the max size which we want to optimise DMA mappings

> > for.

> 

> It seems neat in theory - particularly for packet-based interfaces that

> might have a known fixed size of data unit that they're working on at

> any given time - but aren't there going to be many cases where the

> driver has no idea because it depends on whatever size(s) of request

> userspace happens to throw at it? Even if it does know the absolute

> maximum size of thing it could ever transfer, that could be

> impractically large in areas like video/AI/etc., so it could still be

> hard to make a reasonable decision.



This is also the case for networking workloads where we have MTU set but
actual packet sizes might vary.

> 

> Being largely workload-dependent is why I still think this should be a

> command-line or sysfs tuneable - we could set the default based on how

> much total memory is available, but ultimately it's the end user who

> knows what the workload is going to be and what they care about

> optimising for.

> 

> Another thought (which I'm almost reluctant to share) is that I would

> *love* to try implementing a self-tuning strategy that can detect high

> contention on particular allocation sizes and adjust the caches on the

> fly, but I can easily imagine that having enough inherent overhead to

> end up being an impractical (but fun) waste of time.



This might be particularly useful for the NICs where packet sizes vary
from 64B to 9K. But without optimal strategy this can affect the
performance of networking workloads.


> 

> Robin.

> 

> > Signed-off-by: John Garry <john.garry@huawei.com>

> > ---

> >   drivers/iommu/dma-iommu.c   |  2 +-

> >   include/linux/dma-map-ops.h |  1 +

> >   include/linux/dma-mapping.h |  5 +++++

> >   kernel/dma/mapping.c        | 11 +++++++++++

> >   4 files changed, 18 insertions(+), 1 deletion(-)

> >

> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

> > index a5dfbd6c0496..d35881fcfb9c 100644

> > --- a/drivers/iommu/dma-iommu.c

> > +++ b/drivers/iommu/dma-iommu.c

> > @@ -447,7 +447,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain

> *domain,

> >   	return (dma_addr_t)iova << shift;

> >   }

> >

> > -__maybe_unused

> >   static void iommu_dma_set_opt_size(struct device *dev, size_t size)

> >   {

> >   	struct iommu_domain *domain = iommu_get_dma_domain(dev);

> > @@ -1278,6 +1277,7 @@ static const struct dma_map_ops iommu_dma_ops = {

> >   	.map_resource		= iommu_dma_map_resource,

> >   	.unmap_resource		= iommu_dma_unmap_resource,

> >   	.get_merge_boundary	= iommu_dma_get_merge_boundary,

> > +	.set_max_opt_size	= iommu_dma_set_opt_size,

> >   };

> >

> >   /*

> > diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h

> > index 51872e736e7b..fed7a183b3b9 100644

> > --- a/include/linux/dma-map-ops.h

> > +++ b/include/linux/dma-map-ops.h

> > @@ -64,6 +64,7 @@ struct dma_map_ops {

> >   	u64 (*get_required_mask)(struct device *dev);

> >   	size_t (*max_mapping_size)(struct device *dev);

> >   	unsigned long (*get_merge_boundary)(struct device *dev);

> > +	void (*set_max_opt_size)(struct device *dev, size_t size);

> >   };

> >

> >   #ifdef CONFIG_DMA_OPS

> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h

> > index 2a984cb4d1e0..91fe770145d4 100644

> > --- a/include/linux/dma-mapping.h

> > +++ b/include/linux/dma-mapping.h

> > @@ -144,6 +144,7 @@ u64 dma_get_required_mask(struct device *dev);

> >   size_t dma_max_mapping_size(struct device *dev);

> >   bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);

> >   unsigned long dma_get_merge_boundary(struct device *dev);

> > +void dma_set_max_opt_size(struct device *dev, size_t size);

> >   #else /* CONFIG_HAS_DMA */

> >   static inline dma_addr_t dma_map_page_attrs(struct device *dev,

> >   		struct page *page, size_t offset, size_t size,

> > @@ -257,6 +258,10 @@ static inline unsigned long dma_get_merge_boundary(struct

> device *dev)

> >   {

> >   	return 0;

> >   }

> > +static inline void dma_set_max_opt_size(struct device *dev, size_t size)

> > +{

> > +}

> > +

> >   #endif /* CONFIG_HAS_DMA */

> >

> >   struct page *dma_alloc_pages(struct device *dev, size_t size,

> > diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c

> > index b6a633679933..59e6acb1c471 100644

> > --- a/kernel/dma/mapping.c

> > +++ b/kernel/dma/mapping.c

> > @@ -608,3 +608,14 @@ unsigned long dma_get_merge_boundary(struct device *dev)

> >   	return ops->get_merge_boundary(dev);

> >   }

> >   EXPORT_SYMBOL_GPL(dma_get_merge_boundary);

> > +

> > +void dma_set_max_opt_size(struct device *dev, size_t size)

> > +{

> > +	const struct dma_map_ops *ops = get_dma_ops(dev);

> > +

> > +	if (!ops || !ops->set_max_opt_size)

> > +		return;

> > +

> > +	ops->set_max_opt_size(dev, size);

> > +}

> > +EXPORT_SYMBOL_GPL(dma_set_max_opt_size);

> >

> _______________________________________________

> iommu mailing list

> iommu@lists.linux-foundation.org

> https://lists.linuxfoundation.org/mailman/listinfo/iommu
diff mbox series

Patch

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a5dfbd6c0496..d35881fcfb9c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -447,7 +447,6 @@  static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 	return (dma_addr_t)iova << shift;
 }
 
-__maybe_unused
 static void iommu_dma_set_opt_size(struct device *dev, size_t size)
 {
 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -1278,6 +1277,7 @@  static const struct dma_map_ops iommu_dma_ops = {
 	.map_resource		= iommu_dma_map_resource,
 	.unmap_resource		= iommu_dma_unmap_resource,
 	.get_merge_boundary	= iommu_dma_get_merge_boundary,
+	.set_max_opt_size	= iommu_dma_set_opt_size,
 };
 
 /*
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 51872e736e7b..fed7a183b3b9 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -64,6 +64,7 @@  struct dma_map_ops {
 	u64 (*get_required_mask)(struct device *dev);
 	size_t (*max_mapping_size)(struct device *dev);
 	unsigned long (*get_merge_boundary)(struct device *dev);
+	void (*set_max_opt_size)(struct device *dev, size_t size);
 };
 
 #ifdef CONFIG_DMA_OPS
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 2a984cb4d1e0..91fe770145d4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -144,6 +144,7 @@  u64 dma_get_required_mask(struct device *dev);
 size_t dma_max_mapping_size(struct device *dev);
 bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
 unsigned long dma_get_merge_boundary(struct device *dev);
+void dma_set_max_opt_size(struct device *dev, size_t size);
 #else /* CONFIG_HAS_DMA */
 static inline dma_addr_t dma_map_page_attrs(struct device *dev,
 		struct page *page, size_t offset, size_t size,
@@ -257,6 +258,10 @@  static inline unsigned long dma_get_merge_boundary(struct device *dev)
 {
 	return 0;
 }
+static inline void dma_set_max_opt_size(struct device *dev, size_t size)
+{
+}
+
 #endif /* CONFIG_HAS_DMA */
 
 struct page *dma_alloc_pages(struct device *dev, size_t size,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b6a633679933..59e6acb1c471 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -608,3 +608,14 @@  unsigned long dma_get_merge_boundary(struct device *dev)
 	return ops->get_merge_boundary(dev);
 }
 EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
+
+void dma_set_max_opt_size(struct device *dev, size_t size)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (!ops || !ops->set_max_opt_size)
+		return;
+
+	ops->set_max_opt_size(dev, size);
+}
+EXPORT_SYMBOL_GPL(dma_set_max_opt_size);