diff mbox series

[PATCHv2,1/3] iommu/io-pgtable: Add a quirk to use tlb_flush_all() for partial walk flush

Message ID b099af10926b34249f4a30262db37f50491bebe7.1623981933.git.saiprakash.ranjan@codeaurora.org
State New
Headers show
Series iommu/io-pgtable: Optimize partial walk flush for large scatter-gather list | expand

Commit Message

Sai Prakash Ranjan June 18, 2021, 2:51 a.m. UTC
Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire context
with tlb_flush_all() callback in partial walk flush to improve unmap
performance on select few platforms where the cost of over-invalidation
is less than the unmap latency.

Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
---
 drivers/iommu/io-pgtable-arm.c | 3 ++-
 include/linux/io-pgtable.h     | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

Comments

Robin Murphy June 21, 2021, 3:45 p.m. UTC | #1
On 2021-06-18 03:51, Sai Prakash Ranjan wrote:
> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire context

> with tlb_flush_all() callback in partial walk flush to improve unmap

> performance on select few platforms where the cost of over-invalidation

> is less than the unmap latency.


I still think this doesn't belong anywhere near io-pgtable at all. It's 
a driver-internal decision how exactly it implements a non-leaf 
invalidation, and that may be more complex than a predetermined boolean 
decision. For example, I've just realised for SMMUv3 we can't invalidate 
multiple levels of table at once with a range command, since if we 
assume the whole thing is mapped at worst-case page granularity we may 
fail to invalidate any parts which are mapped as intermediate-level 
blocks. If invalidating a 1GB region (with 4KB granule) means having to 
fall back to 256K non-range commands, we may not want to invalidate by 
VA then, even though doing so for a 2MB region is still optimal.

It's also quite feasible that drivers might want to do this for leaf 
invalidations too - if you don't like issuing 512 commands to invalidate 
2MB, do you like issuing 511 commands to invalidate 2044KB? - and at 
that point the logic really has to be in the driver anyway.

Robin.

> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>

> ---

>   drivers/iommu/io-pgtable-arm.c | 3 ++-

>   include/linux/io-pgtable.h     | 5 +++++

>   2 files changed, 7 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c

> index 87def58e79b5..5d362f2214bd 100644

> --- a/drivers/iommu/io-pgtable-arm.c

> +++ b/drivers/iommu/io-pgtable-arm.c

> @@ -768,7 +768,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)

>   	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |

>   			    IO_PGTABLE_QUIRK_NON_STRICT |

>   			    IO_PGTABLE_QUIRK_ARM_TTBR1 |

> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))

> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |

> +			    IO_PGTABLE_QUIRK_TLB_INV_ALL))

>   		return NULL;

>   

>   	data = arm_lpae_alloc_pgtable(cfg);

> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

> index 4d40dfa75b55..45441592a0e6 100644

> --- a/include/linux/io-pgtable.h

> +++ b/include/linux/io-pgtable.h

> @@ -82,6 +82,10 @@ struct io_pgtable_cfg {

>   	 *

>   	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability

>   	 *	attributes set in the TCR for a non-coherent page-table walker.

> +	 *

> +	 * IO_PGTABLE_QUIRK_TLB_INV_ALL: Use TLBIALL/TLBIASID to invalidate

> +	 *	entire context for partial walk flush to increase unmap

> +	 *	performance on select few platforms.

>   	 */

>   	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)

>   	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)

> @@ -89,6 +93,7 @@ struct io_pgtable_cfg {

>   	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(4)

>   	#define IO_PGTABLE_QUIRK_ARM_TTBR1	BIT(5)

>   	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA	BIT(6)

> +	#define IO_PGTABLE_QUIRK_TLB_INV_ALL	BIT(7)

>   	unsigned long			quirks;

>   	unsigned long			pgsize_bitmap;

>   	unsigned int			ias;

>
Sai Prakash Ranjan June 22, 2021, 7:11 a.m. UTC | #2
Hi Robin,

On 2021-06-21 21:15, Robin Murphy wrote:
> On 2021-06-18 03:51, Sai Prakash Ranjan wrote:

>> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire context

>> with tlb_flush_all() callback in partial walk flush to improve unmap

>> performance on select few platforms where the cost of 

>> over-invalidation

>> is less than the unmap latency.

> 

> I still think this doesn't belong anywhere near io-pgtable at all.

> It's a driver-internal decision how exactly it implements a non-leaf

> invalidation, and that may be more complex than a predetermined

> boolean decision. For example, I've just realised for SMMUv3 we can't

> invalidate multiple levels of table at once with a range command,

> since if we assume the whole thing is mapped at worst-case page

> granularity we may fail to invalidate any parts which are mapped as

> intermediate-level blocks. If invalidating a 1GB region (with 4KB

> granule) means having to fall back to 256K non-range commands, we may

> not want to invalidate by VA then, even though doing so for a 2MB

> region is still optimal.

> 

> It's also quite feasible that drivers might want to do this for leaf

> invalidations too - if you don't like issuing 512 commands to

> invalidate 2MB, do you like issuing 511 commands to invalidate 2044KB?

> - and at that point the logic really has to be in the driver anyway.

> 


Ok I will move this to tlb_flush_walk() functions in the drivers. In the 
previous
v1 thread, you suggested to make the choice in iommu_get_dma_strict() 
test,
I assume you meant the test in iommu_dma_init_domain() with a flag or 
was it
the leaf driver(ex:arm-smmu.c) test of iommu_get_dma_strict() in 
init_domain?

I am still a bit confused on where this flag would be? Should this be a 
part
of struct iommu_domain?

Thanks,
Sai

> 

>> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>

>> ---

>>   drivers/iommu/io-pgtable-arm.c | 3 ++-

>>   include/linux/io-pgtable.h     | 5 +++++

>>   2 files changed, 7 insertions(+), 1 deletion(-)

>> 

>> diff --git a/drivers/iommu/io-pgtable-arm.c 

>> b/drivers/iommu/io-pgtable-arm.c

>> index 87def58e79b5..5d362f2214bd 100644

>> --- a/drivers/iommu/io-pgtable-arm.c

>> +++ b/drivers/iommu/io-pgtable-arm.c

>> @@ -768,7 +768,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg 

>> *cfg, void *cookie)

>>   	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |

>>   			    IO_PGTABLE_QUIRK_NON_STRICT |

>>   			    IO_PGTABLE_QUIRK_ARM_TTBR1 |

>> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))

>> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |

>> +			    IO_PGTABLE_QUIRK_TLB_INV_ALL))

>>   		return NULL;

>>     	data = arm_lpae_alloc_pgtable(cfg);

>> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

>> index 4d40dfa75b55..45441592a0e6 100644

>> --- a/include/linux/io-pgtable.h

>> +++ b/include/linux/io-pgtable.h

>> @@ -82,6 +82,10 @@ struct io_pgtable_cfg {

>>   	 *

>>   	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability

>>   	 *	attributes set in the TCR for a non-coherent page-table walker.

>> +	 *

>> +	 * IO_PGTABLE_QUIRK_TLB_INV_ALL: Use TLBIALL/TLBIASID to invalidate

>> +	 *	entire context for partial walk flush to increase unmap

>> +	 *	performance on select few platforms.

>>   	 */

>>   	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)

>>   	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)

>> @@ -89,6 +93,7 @@ struct io_pgtable_cfg {

>>   	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(4)

>>   	#define IO_PGTABLE_QUIRK_ARM_TTBR1	BIT(5)

>>   	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA	BIT(6)

>> +	#define IO_PGTABLE_QUIRK_TLB_INV_ALL	BIT(7)

>>   	unsigned long			quirks;

>>   	unsigned long			pgsize_bitmap;

>>   	unsigned int			ias;

>> 


-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member
of Code Aurora Forum, hosted by The Linux Foundation
Robin Murphy June 22, 2021, 12:11 p.m. UTC | #3
On 2021-06-22 08:11, Sai Prakash Ranjan wrote:
> Hi Robin,

> 

> On 2021-06-21 21:15, Robin Murphy wrote:

>> On 2021-06-18 03:51, Sai Prakash Ranjan wrote:

>>> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire context

>>> with tlb_flush_all() callback in partial walk flush to improve unmap

>>> performance on select few platforms where the cost of over-invalidation

>>> is less than the unmap latency.

>>

>> I still think this doesn't belong anywhere near io-pgtable at all.

>> It's a driver-internal decision how exactly it implements a non-leaf

>> invalidation, and that may be more complex than a predetermined

>> boolean decision. For example, I've just realised for SMMUv3 we can't

>> invalidate multiple levels of table at once with a range command,

>> since if we assume the whole thing is mapped at worst-case page

>> granularity we may fail to invalidate any parts which are mapped as

>> intermediate-level blocks. If invalidating a 1GB region (with 4KB

>> granule) means having to fall back to 256K non-range commands, we may

>> not want to invalidate by VA then, even though doing so for a 2MB

>> region is still optimal.

>>

>> It's also quite feasible that drivers might want to do this for leaf

>> invalidations too - if you don't like issuing 512 commands to

>> invalidate 2MB, do you like issuing 511 commands to invalidate 2044KB?

>> - and at that point the logic really has to be in the driver anyway.

>>

> 

> Ok I will move this to tlb_flush_walk() functions in the drivers. In the 

> previous

> v1 thread, you suggested to make the choice in iommu_get_dma_strict() test,

> I assume you meant the test in iommu_dma_init_domain() with a flag or 

> was it

> the leaf driver(ex:arm-smmu.c) test of iommu_get_dma_strict() in 

> init_domain?


Yes, I meant literally inside the same condition where we currently set 
"pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;" in 
arm_smmu_init_domain_context().

> I am still a bit confused on where this flag would be? Should this be a 

> part

> of struct iommu_domain?


Well, if you were to rewrite the config with an alternative set of 
flush_ops at that point it would be implicit. For a flag, probably 
either in arm_smmu_domain or arm_smmu_impl. Maybe a flag would be less 
useful than generalising straight to a "maximum number of by-VA 
invalidations it's worth sending individually" threshold value? It's 
clear to me what overall shape and separation of responsibility is most 
logical, but beyond that I don't have a particularly strong opinion on 
the exact implementation; I've just been chucking ideas around :)

Cheers,
Robin.
Sai Prakash Ranjan June 22, 2021, 2:27 p.m. UTC | #4
Hi Robin,

On 2021-06-22 17:41, Robin Murphy wrote:
> On 2021-06-22 08:11, Sai Prakash Ranjan wrote:

>> Hi Robin,

>> 

>> On 2021-06-21 21:15, Robin Murphy wrote:

>>> On 2021-06-18 03:51, Sai Prakash Ranjan wrote:

>>>> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire 

>>>> context

>>>> with tlb_flush_all() callback in partial walk flush to improve unmap

>>>> performance on select few platforms where the cost of 

>>>> over-invalidation

>>>> is less than the unmap latency.

>>> 

>>> I still think this doesn't belong anywhere near io-pgtable at all.

>>> It's a driver-internal decision how exactly it implements a non-leaf

>>> invalidation, and that may be more complex than a predetermined

>>> boolean decision. For example, I've just realised for SMMUv3 we can't

>>> invalidate multiple levels of table at once with a range command,

>>> since if we assume the whole thing is mapped at worst-case page

>>> granularity we may fail to invalidate any parts which are mapped as

>>> intermediate-level blocks. If invalidating a 1GB region (with 4KB

>>> granule) means having to fall back to 256K non-range commands, we may

>>> not want to invalidate by VA then, even though doing so for a 2MB

>>> region is still optimal.

>>> 

>>> It's also quite feasible that drivers might want to do this for leaf

>>> invalidations too - if you don't like issuing 512 commands to

>>> invalidate 2MB, do you like issuing 511 commands to invalidate 

>>> 2044KB?

>>> - and at that point the logic really has to be in the driver anyway.

>>> 

>> 

>> Ok I will move this to tlb_flush_walk() functions in the drivers. In 

>> the previous

>> v1 thread, you suggested to make the choice in iommu_get_dma_strict() 

>> test,

>> I assume you meant the test in iommu_dma_init_domain() with a flag or 

>> was it

>> the leaf driver(ex:arm-smmu.c) test of iommu_get_dma_strict() in 

>> init_domain?

> 

> Yes, I meant literally inside the same condition where we currently

> set "pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;" in

> arm_smmu_init_domain_context().

> 


Ok got it, thanks.

>> I am still a bit confused on where this flag would be? Should this be 

>> a part

>> of struct iommu_domain?

> 

> Well, if you were to rewrite the config with an alternative set of

> flush_ops at that point it would be implicit. For a flag, probably

> either in arm_smmu_domain or arm_smmu_impl. Maybe a flag would be less

> useful than generalising straight to a "maximum number of by-VA

> invalidations it's worth sending individually" threshold value?


But then we would still need some flag to make this implementation
specific (qcom specific for now) and this threshold would just be
another condition although it would have been useful if this was
generic enough.

> It's clear to me what overall shape and separation of responsibility is

> most logical, but beyond that I don't have a particularly strong

> opinion on the exact implementation; I've just been chucking ideas

> around :)

> 


Your ideas are very informative and useful :)

Thanks,
Sai

-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member
of Code Aurora Forum, hosted by The Linux Foundation
Robin Murphy June 22, 2021, 6:37 p.m. UTC | #5
On 2021-06-22 15:27, Sai Prakash Ranjan wrote:
> Hi Robin,

> 

> On 2021-06-22 17:41, Robin Murphy wrote:

>> On 2021-06-22 08:11, Sai Prakash Ranjan wrote:

>>> Hi Robin,

>>>

>>> On 2021-06-21 21:15, Robin Murphy wrote:

>>>> On 2021-06-18 03:51, Sai Prakash Ranjan wrote:

>>>>> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire context

>>>>> with tlb_flush_all() callback in partial walk flush to improve unmap

>>>>> performance on select few platforms where the cost of 

>>>>> over-invalidation

>>>>> is less than the unmap latency.

>>>>

>>>> I still think this doesn't belong anywhere near io-pgtable at all.

>>>> It's a driver-internal decision how exactly it implements a non-leaf

>>>> invalidation, and that may be more complex than a predetermined

>>>> boolean decision. For example, I've just realised for SMMUv3 we can't

>>>> invalidate multiple levels of table at once with a range command,

>>>> since if we assume the whole thing is mapped at worst-case page

>>>> granularity we may fail to invalidate any parts which are mapped as

>>>> intermediate-level blocks. If invalidating a 1GB region (with 4KB

>>>> granule) means having to fall back to 256K non-range commands, we may

>>>> not want to invalidate by VA then, even though doing so for a 2MB

>>>> region is still optimal.

>>>>

>>>> It's also quite feasible that drivers might want to do this for leaf

>>>> invalidations too - if you don't like issuing 512 commands to

>>>> invalidate 2MB, do you like issuing 511 commands to invalidate 2044KB?

>>>> - and at that point the logic really has to be in the driver anyway.

>>>>

>>>

>>> Ok I will move this to tlb_flush_walk() functions in the drivers. In 

>>> the previous

>>> v1 thread, you suggested to make the choice in iommu_get_dma_strict() 

>>> test,

>>> I assume you meant the test in iommu_dma_init_domain() with a flag or 

>>> was it

>>> the leaf driver(ex:arm-smmu.c) test of iommu_get_dma_strict() in 

>>> init_domain?

>>

>> Yes, I meant literally inside the same condition where we currently

>> set "pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;" in

>> arm_smmu_init_domain_context().

>>

> 

> Ok got it, thanks.

> 

>>> I am still a bit confused on where this flag would be? Should this be 

>>> a part

>>> of struct iommu_domain?

>>

>> Well, if you were to rewrite the config with an alternative set of

>> flush_ops at that point it would be implicit. For a flag, probably

>> either in arm_smmu_domain or arm_smmu_impl. Maybe a flag would be less

>> useful than generalising straight to a "maximum number of by-VA

>> invalidations it's worth sending individually" threshold value?

> 

> But then we would still need some flag to make this implementation

> specific (qcom specific for now) and this threshold would just be

> another condition although it would have been useful if this was

> generic enough.


Well, for that approach I assume we could do something like special-case 
0, or if it's a mutable per-domain value maybe just initialise it to 
SIZE_MAX or whatever such that it would never be reached in practice. 
Whichever way, it was meant to be implied that anything at the domain 
level would still be subject to final adjustment by the init_context hook.

Robin.

>> It's clear to me what overall shape and separation of responsibility is

>> most logical, but beyond that I don't have a particularly strong

>> opinion on the exact implementation; I've just been chucking ideas

>> around :)

>>

> 

> Your ideas are very informative and useful :)

> 

> Thanks,

> Sai

>
Sai Prakash Ranjan June 23, 2021, 1:43 p.m. UTC | #6
Hi Robin,

On 2021-06-23 00:07, Robin Murphy wrote:
> On 2021-06-22 15:27, Sai Prakash Ranjan wrote:

>> Hi Robin,

>> 

>> On 2021-06-22 17:41, Robin Murphy wrote:

>>> On 2021-06-22 08:11, Sai Prakash Ranjan wrote:

>>>> Hi Robin,

>>>> 

>>>> On 2021-06-21 21:15, Robin Murphy wrote:

>>>>> On 2021-06-18 03:51, Sai Prakash Ranjan wrote:

>>>>>> Add a quirk IO_PGTABLE_QUIRK_TLB_INV_ALL to invalidate entire 

>>>>>> context

>>>>>> with tlb_flush_all() callback in partial walk flush to improve 

>>>>>> unmap

>>>>>> performance on select few platforms where the cost of 

>>>>>> over-invalidation

>>>>>> is less than the unmap latency.

>>>>> 

>>>>> I still think this doesn't belong anywhere near io-pgtable at all.

>>>>> It's a driver-internal decision how exactly it implements a 

>>>>> non-leaf

>>>>> invalidation, and that may be more complex than a predetermined

>>>>> boolean decision. For example, I've just realised for SMMUv3 we 

>>>>> can't

>>>>> invalidate multiple levels of table at once with a range command,

>>>>> since if we assume the whole thing is mapped at worst-case page

>>>>> granularity we may fail to invalidate any parts which are mapped as

>>>>> intermediate-level blocks. If invalidating a 1GB region (with 4KB

>>>>> granule) means having to fall back to 256K non-range commands, we 

>>>>> may

>>>>> not want to invalidate by VA then, even though doing so for a 2MB

>>>>> region is still optimal.

>>>>> 

>>>>> It's also quite feasible that drivers might want to do this for 

>>>>> leaf

>>>>> invalidations too - if you don't like issuing 512 commands to

>>>>> invalidate 2MB, do you like issuing 511 commands to invalidate 

>>>>> 2044KB?

>>>>> - and at that point the logic really has to be in the driver 

>>>>> anyway.

>>>>> 

>>>> 

>>>> Ok I will move this to tlb_flush_walk() functions in the drivers. In 

>>>> the previous

>>>> v1 thread, you suggested to make the choice in 

>>>> iommu_get_dma_strict() test,

>>>> I assume you meant the test in iommu_dma_init_domain() with a flag 

>>>> or was it

>>>> the leaf driver(ex:arm-smmu.c) test of iommu_get_dma_strict() in 

>>>> init_domain?

>>> 

>>> Yes, I meant literally inside the same condition where we currently

>>> set "pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;" in

>>> arm_smmu_init_domain_context().

>>> 

>> 

>> Ok got it, thanks.

>> 

>>>> I am still a bit confused on where this flag would be? Should this 

>>>> be a part

>>>> of struct iommu_domain?

>>> 

>>> Well, if you were to rewrite the config with an alternative set of

>>> flush_ops at that point it would be implicit. For a flag, probably

>>> either in arm_smmu_domain or arm_smmu_impl. Maybe a flag would be 

>>> less

>>> useful than generalising straight to a "maximum number of by-VA

>>> invalidations it's worth sending individually" threshold value?

>> 

>> But then we would still need some flag to make this implementation

>> specific (qcom specific for now) and this threshold would just be

>> another condition although it would have been useful if this was

>> generic enough.

> 

> Well, for that approach I assume we could do something like

> special-case 0, or if it's a mutable per-domain value maybe just

> initialise it to SIZE_MAX or whatever such that it would never be

> reached in practice. Whichever way, it was meant to be implied that

> anything at the domain level would still be subject to final

> adjustment by the init_context hook.

> 


Ok that should work, so I went ahead with another set of flush_ops
and posted out v3.

Thanks,
Sai

> 

>>> It's clear to me what overall shape and separation of responsibility 

>>> is

>>> most logical, but beyond that I don't have a particularly strong

>>> opinion on the exact implementation; I've just been chucking ideas

>>> around :)

>>> 

>> 

>> Your ideas are very informative and useful :)

>> 

>> Thanks,

>> Sai

>> 


-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member
of Code Aurora Forum, hosted by The Linux Foundation
diff mbox series

Patch

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..5d362f2214bd 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -768,7 +768,8 @@  arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NON_STRICT |
 			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
-			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
+			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
+			    IO_PGTABLE_QUIRK_TLB_INV_ALL))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4d40dfa75b55..45441592a0e6 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -82,6 +82,10 @@  struct io_pgtable_cfg {
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
 	 *	attributes set in the TCR for a non-coherent page-table walker.
+	 *
+	 * IO_PGTABLE_QUIRK_TLB_INV_ALL: Use TLBIALL/TLBIASID to invalidate
+	 *	entire context for partial walk flush to increase unmap
+	 *	performance on select few platforms.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
@@ -89,6 +93,7 @@  struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(4)
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1	BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA	BIT(6)
+	#define IO_PGTABLE_QUIRK_TLB_INV_ALL	BIT(7)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;