Message ID | 20220822184742.32076-1-robdclark@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | drm/msm/iommu: optimize map/unmap | expand |
Hi Rob, On 8/23/2022 12:17 AM, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > Using map_pages/unmap_pages cuts down on the # of pgtable walks needed > in the process of finding where to insert/remove an entry. The end > result is ~5-10x faster than mapping a single page at a time. > > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/gpu/drm/msm/msm_iommu.c | 91 ++++++++++++++++++++++++++++----- > 1 file changed, 79 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c > index a54ed354578b..0f3f60da3314 100644 > --- a/drivers/gpu/drm/msm/msm_iommu.c > +++ b/drivers/gpu/drm/msm/msm_iommu.c > @@ -21,6 +21,7 @@ struct msm_iommu_pagetable { > struct msm_mmu base; > struct msm_mmu *parent; > struct io_pgtable_ops *pgtbl_ops; > + unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ > phys_addr_t ttbr; > u32 asid; > }; > @@ -29,23 +30,85 @@ static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) > return container_of(mmu, struct msm_iommu_pagetable, base); > } > > +/* based on iommu_pgsize() in iommu.c: */ > +static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable, Maybe call this msm_iommu_pgsize? There won't be any namespace conflict since it is static in both places but still would be better. > + unsigned long iova, phys_addr_t paddr, > + size_t size, size_t *count) > +{ > + unsigned int pgsize_idx, pgsize_idx_next; > + unsigned long pgsizes; > + size_t offset, pgsize, pgsize_next; > + unsigned long addr_merge = paddr | iova; > + > + /* Page sizes supported by the hardware and small enough for @size */ > + pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0); > + > + /* Constrain the page sizes further based on the maximum alignment */ > + if (likely(addr_merge)) > + pgsizes &= GENMASK(__ffs(addr_merge), 0); > + > + /* Make sure we have at least one suitable page size */ > + BUG_ON(!pgsizes); > + > + /* Pick the biggest page size remaining */ > + pgsize_idx = __fls(pgsizes); > + pgsize = BIT(pgsize_idx); > + if (!count) > + return pgsize; > + > + /* Find the next biggest support page size, if it exists */ > + pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); > + if (!pgsizes) > + goto out_set_count; > + > + pgsize_idx_next = __ffs(pgsizes); > + pgsize_next = BIT(pgsize_idx_next); > + > + /* > + * There's no point trying a bigger page size unless the virtual > + * and physical addresses are similarly offset within the larger page. > + */ > + if ((iova ^ paddr) & (pgsize_next - 1)) > + goto out_set_count; > + > + /* Calculate the offset to the next page size alignment boundary */ > + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); > + > + /* > + * If size is big enough to accommodate the larger page, reduce > + * the number of smaller pages. > + */ > + if (offset + pgsize_next <= size) > + size = offset; > + > +out_set_count: > + *count = size >> pgsize_idx; > + return pgsize; > +} > + > static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, > size_t size) > { > struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); > struct io_pgtable_ops *ops = pagetable->pgtbl_ops; > - size_t unmapped = 0; > > /* Unmap the block one page at a time */ This comment will need an update. > while (size) { > - unmapped += ops->unmap(ops, iova, 4096, NULL); > - iova += 4096; > - size -= 4096; > + size_t unmapped, pgsize, count; > + > + pgsize = iommu_pgsize(pagetable, iova, iova, size, &count); > + > + unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL); > + if (!unmapped) > + break; > + > + iova += unmapped; > + size -= unmapped; > } > > iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain); > > - return (unmapped == size) ? 0 : -EINVAL; > + return (size == 0) ? 0 : -EINVAL; > } > > static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, > @@ -54,7 +117,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, > struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); > struct io_pgtable_ops *ops = pagetable->pgtbl_ops; > struct scatterlist *sg; > - size_t mapped = 0; > u64 addr = iova; > unsigned int i; > > @@ -64,15 +126,19 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, > > /* Map the block one page at a time */ This comment will need an update. > while (size) { > - if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) { > - msm_iommu_pagetable_unmap(mmu, iova, mapped); > + size_t pgsize, count, mapped; > + > + pgsize = iommu_pgsize(pagetable, addr, phys, size, &count); > + > + if (ops->map_pages(ops, addr, phys, pgsize, count, > + prot, GFP_KERNEL, &mapped)) { > + msm_iommu_pagetable_unmap(mmu, iova, addr - iova); On ->map_pages failure, some pages can still be mapped and would need to be accounted for unmapping, so maybe follow the logic in __iommu_map() to account for mapped size instead of addr - iova where addr won't be updated in case of failure to map few pages. Thanks, Sai > return -EINVAL; > } > > - phys += 4096; > - addr += 4096; > - size -= 4096; > - mapped += 4096; > + phys += mapped; > + addr += mapped; > + size -= mapped; > } > } > > @@ -207,6 +273,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) > > /* Needed later for TLB flush */ > pagetable->parent = parent; > + pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; > pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; > > /*
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index a54ed354578b..0f3f60da3314 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,7 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; }; @@ -29,23 +30,85 @@ static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) return container_of(mmu, struct msm_iommu_pagetable, base); } +/* based on iommu_pgsize() in iommu.c: */ +static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable, + unsigned long iova, phys_addr_t paddr, + size_t size, size_t *count) +{ + unsigned int pgsize_idx, pgsize_idx_next; + unsigned long pgsizes; + size_t offset, pgsize, pgsize_next; + unsigned long addr_merge = paddr | iova; + + /* Page sizes supported by the hardware and small enough for @size */ + pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0); + + /* Constrain the page sizes further based on the maximum alignment */ + if (likely(addr_merge)) + pgsizes &= GENMASK(__ffs(addr_merge), 0); + + /* Make sure we have at least one suitable page size */ + BUG_ON(!pgsizes); + + /* Pick the biggest page size remaining */ + pgsize_idx = __fls(pgsizes); + pgsize = BIT(pgsize_idx); + if (!count) + return pgsize; + + /* Find the next biggest support page size, if it exists */ + pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); + if (!pgsizes) + goto out_set_count; + + pgsize_idx_next = __ffs(pgsizes); + pgsize_next = BIT(pgsize_idx_next); + + /* + * There's no point trying a bigger page size unless the virtual + * and physical addresses are similarly offset within the larger page. + */ + if ((iova ^ paddr) & (pgsize_next - 1)) + goto out_set_count; + + /* Calculate the offset to the next page size alignment boundary */ + offset = pgsize_next - (addr_merge & (pgsize_next - 1)); + + /* + * If size is big enough to accommodate the larger page, reduce + * the number of smaller pages. + */ + if (offset + pgsize_next <= size) + size = offset; + +out_set_count: + *count = size >> pgsize_idx; + return pgsize; +} + static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, size_t size) { struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); struct io_pgtable_ops *ops = pagetable->pgtbl_ops; - size_t unmapped = 0; /* Unmap the block one page at a time */ while (size) { - unmapped += ops->unmap(ops, iova, 4096, NULL); - iova += 4096; - size -= 4096; + size_t unmapped, pgsize, count; + + pgsize = iommu_pgsize(pagetable, iova, iova, size, &count); + + unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL); + if (!unmapped) + break; + + iova += unmapped; + size -= unmapped; } iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain); - return (unmapped == size) ? 0 : -EINVAL; + return (size == 0) ? 0 : -EINVAL; } static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, @@ -54,7 +117,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); struct io_pgtable_ops *ops = pagetable->pgtbl_ops; struct scatterlist *sg; - size_t mapped = 0; u64 addr = iova; unsigned int i; @@ -64,15 +126,19 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, /* Map the block one page at a time */ while (size) { - if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) { - msm_iommu_pagetable_unmap(mmu, iova, mapped); + size_t pgsize, count, mapped; + + pgsize = iommu_pgsize(pagetable, addr, phys, size, &count); + + if (ops->map_pages(ops, addr, phys, pgsize, count, + prot, GFP_KERNEL, &mapped)) { + msm_iommu_pagetable_unmap(mmu, iova, addr - iova); return -EINVAL; } - phys += 4096; - addr += 4096; - size -= 4096; - mapped += 4096; + phys += mapped; + addr += mapped; + size -= mapped; } } @@ -207,6 +273,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; /*