mbox series

[v2,0/6] iommu-arm-smmu: Add auxiliary domains and per-instance pagetables

Message ID 20200626200414.14382-1-jcrouse@codeaurora.org
Headers show
Series iommu-arm-smmu: Add auxiliary domains and per-instance pagetables | expand

Message

Jordan Crouse June 26, 2020, 8:04 p.m. UTC
This is a new refresh of support for auxiliary domains for arm-smmu-v2
and per-instance pagetables for drm/msm. The big change here from past
efforts is that outside of creating a single aux-domain to enable TTBR0
all of the per-instance pagetables are created and managed exclusively
in drm/msm without involving the arm-smmu driver. This fits in with the
suggested model of letting the GPU hardware do what it needs and leave the
arm-smmu driver blissfully unaware.

Almost. In order to set up the io-pgtable properly in drm/msm we need to
query the pagetable configuration from the current active domain and we need to
rely on the iommu API to flush TLBs after a unmap. In the future we can optimize
this in the drm/msm driver to track the state of the TLBs but for now the big
hammer lets us get off the ground.

This series is built on the split pagetable support [1].

[1] https://patchwork.kernel.org/patch/11628543/

v2: Remove unneeded cruft in the a6xx page switch sequence

Jordan Crouse (6):
  iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2
  iommu/io-pgtable: Allow a pgtable implementation to skip TLB
    operations
  iommu/arm-smmu: Add a domain attribute to pass the pagetable config
  drm/msm: Add support to create a local pagetable
  drm/msm: Add support for address space instances
  drm/msm/a6xx: Add support for per-instance pagetables

 drivers/gpu/drm/msm/adreno/a6xx_gpu.c |  43 +++++
 drivers/gpu/drm/msm/msm_drv.c         |  15 +-
 drivers/gpu/drm/msm/msm_drv.h         |   4 +
 drivers/gpu/drm/msm/msm_gem_vma.c     |   9 +
 drivers/gpu/drm/msm/msm_gpu.c         |  17 ++
 drivers/gpu/drm/msm/msm_gpu.h         |   5 +
 drivers/gpu/drm/msm/msm_gpummu.c      |   2 +-
 drivers/gpu/drm/msm/msm_iommu.c       | 180 +++++++++++++++++++-
 drivers/gpu/drm/msm/msm_mmu.h         |  16 +-
 drivers/gpu/drm/msm/msm_ringbuffer.h  |   1 +
 drivers/iommu/arm-smmu.c              | 231 ++++++++++++++++++++++++--
 drivers/iommu/arm-smmu.h              |   1 +
 include/linux/io-pgtable.h            |  11 +-
 include/linux/iommu.h                 |   1 +
 14 files changed, 507 insertions(+), 29 deletions(-)

Comments

Robin Murphy July 7, 2020, 11:34 a.m. UTC | #1
On 2020-06-26 21:04, Jordan Crouse wrote:
> Allow a io-pgtable implementation to skip TLB operations by checking for

> NULL pointers in the helper functions. It will be up to to the owner

> of the io-pgtable instance to make sure that they independently handle

> the TLB correctly.


I don't really understand what this is for - tricking the IOMMU driver 
into not performing its TLB maintenance at points when that maintenance 
has been deemed necessary doesn't seem like the appropriate way to 
achieve anything good :/

Robin.

> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> ---

> 

>   include/linux/io-pgtable.h | 11 +++++++----

>   1 file changed, 7 insertions(+), 4 deletions(-)

> 

> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

> index 53d53c6c2be9..bbed1d3925ba 100644

> --- a/include/linux/io-pgtable.h

> +++ b/include/linux/io-pgtable.h

> @@ -210,21 +210,24 @@ struct io_pgtable {

>   

>   static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)

>   {

> -	iop->cfg.tlb->tlb_flush_all(iop->cookie);

> +	if (iop->cfg.tlb)

> +		iop->cfg.tlb->tlb_flush_all(iop->cookie);

>   }

>   

>   static inline void

>   io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,

>   			  size_t size, size_t granule)

>   {

> -	iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> +	if (iop->cfg.tlb)

> +		iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

>   }

>   

>   static inline void

>   io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,

>   			  size_t size, size_t granule)

>   {

> -	iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> +	if (iop->cfg.tlb)

> +		iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

>   }

>   

>   static inline void

> @@ -232,7 +235,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop,

>   			struct iommu_iotlb_gather * gather, unsigned long iova,

>   			size_t granule)

>   {

> -	if (iop->cfg.tlb->tlb_add_page)

> +	if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page)

>   		iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);

>   }

>   

>
Robin Murphy July 7, 2020, 11:36 a.m. UTC | #2
On 2020-06-26 21:04, Jordan Crouse wrote:
> Add support to create a io-pgtable for use by targets that support

> per-instance pagetables.  In order to support per-instance pagetables the

> GPU SMMU device needs to have the qcom,adreno-smmu compatible string and

> split pagetables and auxiliary domains need to be supported and enabled.

> 

> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> ---

> 

>   drivers/gpu/drm/msm/msm_gpummu.c |   2 +-

>   drivers/gpu/drm/msm/msm_iommu.c  | 180 ++++++++++++++++++++++++++++++-

>   drivers/gpu/drm/msm/msm_mmu.h    |  16 ++-

>   3 files changed, 195 insertions(+), 3 deletions(-)

> 

> diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c

> index 310a31b05faa..aab121f4beb7 100644

> --- a/drivers/gpu/drm/msm/msm_gpummu.c

> +++ b/drivers/gpu/drm/msm/msm_gpummu.c

> @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)

>   	}

>   

>   	gpummu->gpu = gpu;

> -	msm_mmu_init(&gpummu->base, dev, &funcs);

> +	msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);

>   

>   	return &gpummu->base;

>   }

> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c

> index 1b6635504069..f455c597f76d 100644

> --- a/drivers/gpu/drm/msm/msm_iommu.c

> +++ b/drivers/gpu/drm/msm/msm_iommu.c

> @@ -4,15 +4,192 @@

>    * Author: Rob Clark <robdclark@gmail.com>

>    */

>   

> +#include <linux/io-pgtable.h>

>   #include "msm_drv.h"

>   #include "msm_mmu.h"

>   

>   struct msm_iommu {

>   	struct msm_mmu base;

>   	struct iommu_domain *domain;

> +	struct iommu_domain *aux_domain;

>   };

> +

>   #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)

>   

> +struct msm_iommu_pagetable {

> +	struct msm_mmu base;

> +	struct msm_mmu *parent;

> +	struct io_pgtable_ops *pgtbl_ops;

> +	phys_addr_t ttbr;

> +	u32 asid;

> +};

> +

> +static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)

> +{

> +	return container_of(mmu, struct msm_iommu_pagetable, base);

> +}

> +

> +static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,

> +		size_t size)

> +{

> +	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> +	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> +	size_t unmapped = 0;

> +

> +	/* Unmap the block one page at a time */

> +	while (size) {

> +		unmapped += ops->unmap(ops, iova, 4096, NULL);

> +		iova += 4096;

> +		size -= 4096;

> +	}

> +

> +	iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);

> +

> +	return (unmapped == size) ? 0 : -EINVAL;

> +}


Remember in patch #1 when you said "Then 'domain' can be used like any 
other iommu domain to map and unmap iova addresses in the pagetable."?

This appears to be very much not that :/

Robin.

> +

> +static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,

> +		struct sg_table *sgt, size_t len, int prot)

> +{

> +	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> +	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> +	struct scatterlist *sg;

> +	size_t mapped = 0;

> +	u64 addr = iova;

> +	unsigned int i;

> +

> +	for_each_sg(sgt->sgl, sg, sgt->nents, i) {

> +		size_t size = sg->length;

> +		phys_addr_t phys = sg_phys(sg);

> +

> +		/* Map the block one page at a time */

> +		while (size) {

> +			if (ops->map(ops, addr, phys, 4096, prot)) {

> +				msm_iommu_pagetable_unmap(mmu, iova, mapped);

> +				return -EINVAL;

> +			}

> +

> +			phys += 4096;

> +			addr += 4096;

> +			size -= 4096;

> +			mapped += 4096;

> +		}

> +	}

> +

> +	return 0;

> +}

> +

> +static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu)

> +{

> +	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> +

> +	free_io_pgtable_ops(pagetable->pgtbl_ops);

> +	kfree(pagetable);

> +}

> +

> +/*

> + * Given a parent device, create and return an aux domain. This will enable the

> + * TTBR0 region

> + */

> +static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent)

> +{

> +	struct msm_iommu *iommu = to_msm_iommu(parent);

> +	struct iommu_domain *domain;

> +	int ret;

> +

> +	if (iommu->aux_domain)

> +		return iommu->aux_domain;

> +

> +	if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))

> +		return ERR_PTR(-ENODEV);

> +

> +	domain = iommu_domain_alloc(&platform_bus_type);

> +	if (!domain)

> +		return ERR_PTR(-ENODEV);

> +

> +	ret = iommu_aux_attach_device(domain, parent->dev);

> +	if (ret) {

> +		iommu_domain_free(domain);

> +		return ERR_PTR(ret);

> +	}

> +

> +	iommu->aux_domain = domain;

> +	return domain;

> +}

> +

> +int msm_iommu_pagetable_params(struct msm_mmu *mmu,

> +		phys_addr_t *ttbr, int *asid)

> +{

> +	struct msm_iommu_pagetable *pagetable;

> +

> +	if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)

> +		return -EINVAL;

> +

> +	pagetable = to_pagetable(mmu);

> +

> +	if (ttbr)

> +		*ttbr = pagetable->ttbr;

> +

> +	if (asid)

> +		*asid = pagetable->asid;

> +

> +	return 0;

> +}

> +

> +static const struct msm_mmu_funcs pagetable_funcs = {

> +		.map = msm_iommu_pagetable_map,

> +		.unmap = msm_iommu_pagetable_unmap,

> +		.destroy = msm_iommu_pagetable_destroy,

> +};

> +

> +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)

> +{

> +	static int next_asid = 16;

> +	struct msm_iommu_pagetable *pagetable;

> +	struct iommu_domain *aux_domain;

> +	struct io_pgtable_cfg cfg;

> +	int ret;

> +

> +	/* Make sure that the parent has a aux domain attached */

> +	aux_domain = msm_iommu_get_aux_domain(parent);

> +	if (IS_ERR(aux_domain))

> +		return ERR_CAST(aux_domain);

> +

> +	/* Get the pagetable configuration from the aux domain */

> +	ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);

> +	if (ret)

> +		return ERR_PTR(ret);

> +

> +	pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);

> +	if (!pagetable)

> +		return ERR_PTR(-ENOMEM);

> +

> +	msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,

> +		MSM_MMU_IOMMU_PAGETABLE);

> +

> +	cfg.tlb = NULL;

> +

> +	pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,

> +		&cfg, aux_domain);

> +

> +	if (!pagetable->pgtbl_ops) {

> +		kfree(pagetable);

> +		return ERR_PTR(-ENOMEM);

> +	}

> +

> +

> +	/* Needed later for TLB flush */

> +	pagetable->parent = parent;

> +	pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;

> +

> +	pagetable->asid = next_asid;

> +	next_asid = (next_asid + 1)  % 255;

> +	if (next_asid < 16)

> +		next_asid = 16;

> +

> +	return &pagetable->base;

> +}

> +

>   static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,

>   		unsigned long iova, int flags, void *arg)

>   {

> @@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,

>   	if (iova & BIT_ULL(48))

>   		iova |= GENMASK_ULL(63, 49);

>   

> +

>   	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);

>   	WARN_ON(!ret);

>   

> @@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)

>   		return ERR_PTR(-ENOMEM);

>   

>   	iommu->domain = domain;

> -	msm_mmu_init(&iommu->base, dev, &funcs);

> +	msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);

>   	iommu_set_fault_handler(domain, msm_fault_handler, iommu);

>   

>   	ret = iommu_attach_device(iommu->domain, dev);

> diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h

> index 3a534ee59bf6..61ade89d9e48 100644

> --- a/drivers/gpu/drm/msm/msm_mmu.h

> +++ b/drivers/gpu/drm/msm/msm_mmu.h

> @@ -17,18 +17,26 @@ struct msm_mmu_funcs {

>   	void (*destroy)(struct msm_mmu *mmu);

>   };

>   

> +enum msm_mmu_type {

> +	MSM_MMU_GPUMMU,

> +	MSM_MMU_IOMMU,

> +	MSM_MMU_IOMMU_PAGETABLE,

> +};

> +

>   struct msm_mmu {

>   	const struct msm_mmu_funcs *funcs;

>   	struct device *dev;

>   	int (*handler)(void *arg, unsigned long iova, int flags);

>   	void *arg;

> +	enum msm_mmu_type type;

>   };

>   

>   static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,

> -		const struct msm_mmu_funcs *funcs)

> +		const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)

>   {

>   	mmu->dev = dev;

>   	mmu->funcs = funcs;

> +	mmu->type = type;

>   }

>   

>   struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);

> @@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,

>   	mmu->handler = handler;

>   }

>   

> +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);

> +

>   void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base,

>   		dma_addr_t *tran_error);

>   

> +

> +int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,

> +		int *asid);

> +

>   #endif /* __MSM_MMU_H__ */

>
Rob Clark July 7, 2020, 2:25 p.m. UTC | #3
On Tue, Jul 7, 2020 at 4:34 AM Robin Murphy <robin.murphy@arm.com> wrote:
>

> On 2020-06-26 21:04, Jordan Crouse wrote:

> > Allow a io-pgtable implementation to skip TLB operations by checking for

> > NULL pointers in the helper functions. It will be up to to the owner

> > of the io-pgtable instance to make sure that they independently handle

> > the TLB correctly.

>

> I don't really understand what this is for - tricking the IOMMU driver

> into not performing its TLB maintenance at points when that maintenance

> has been deemed necessary doesn't seem like the appropriate way to

> achieve anything good :/


No, for triggering the io-pgtable helpers into not performing TLB
maintenance.  But seriously, since we are creating pgtables ourselves,
and we don't want to be ioremap'ing the GPU's SMMU instance, the
alternative is plugging in no-op helpers.  Which amounts to the same
thing.

Currently (in a later patch in the series) we are using
iommu_flush_tlb_all() when unmapping, which is a bit of a big hammer.
Although I think we could be a bit more clever and do the TLB ops on
the GPU (since the GPU knows if pagetables we are unmapping from are
in-use and could skip the TLB ops otherwise).

On the topic, if we are using unique ASID values per set of
pagetables, how expensive is tlb invalidate for an ASID that has no
entries in the TLB?

BR,
-R

>

> Robin.

>

> > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> > ---

> >

> >   include/linux/io-pgtable.h | 11 +++++++----

> >   1 file changed, 7 insertions(+), 4 deletions(-)

> >

> > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

> > index 53d53c6c2be9..bbed1d3925ba 100644

> > --- a/include/linux/io-pgtable.h

> > +++ b/include/linux/io-pgtable.h

> > @@ -210,21 +210,24 @@ struct io_pgtable {

> >

> >   static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)

> >   {

> > -     iop->cfg.tlb->tlb_flush_all(iop->cookie);

> > +     if (iop->cfg.tlb)

> > +             iop->cfg.tlb->tlb_flush_all(iop->cookie);

> >   }

> >

> >   static inline void

> >   io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,

> >                         size_t size, size_t granule)

> >   {

> > -     iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> > +     if (iop->cfg.tlb)

> > +             iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> >   }

> >

> >   static inline void

> >   io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,

> >                         size_t size, size_t granule)

> >   {

> > -     iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> > +     if (iop->cfg.tlb)

> > +             iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> >   }

> >

> >   static inline void

> > @@ -232,7 +235,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop,

> >                       struct iommu_iotlb_gather * gather, unsigned long iova,

> >                       size_t granule)

> >   {

> > -     if (iop->cfg.tlb->tlb_add_page)

> > +     if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page)

> >               iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);

> >   }

> >

> >

> _______________________________________________

> Freedreno mailing list

> Freedreno@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/freedreno
Rob Clark July 7, 2020, 2:41 p.m. UTC | #4
On Tue, Jul 7, 2020 at 4:36 AM Robin Murphy <robin.murphy@arm.com> wrote:
>

> On 2020-06-26 21:04, Jordan Crouse wrote:

> > Add support to create a io-pgtable for use by targets that support

> > per-instance pagetables.  In order to support per-instance pagetables the

> > GPU SMMU device needs to have the qcom,adreno-smmu compatible string and

> > split pagetables and auxiliary domains need to be supported and enabled.

> >

> > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> > ---

> >

> >   drivers/gpu/drm/msm/msm_gpummu.c |   2 +-

> >   drivers/gpu/drm/msm/msm_iommu.c  | 180 ++++++++++++++++++++++++++++++-

> >   drivers/gpu/drm/msm/msm_mmu.h    |  16 ++-

> >   3 files changed, 195 insertions(+), 3 deletions(-)

> >

> > diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c

> > index 310a31b05faa..aab121f4beb7 100644

> > --- a/drivers/gpu/drm/msm/msm_gpummu.c

> > +++ b/drivers/gpu/drm/msm/msm_gpummu.c

> > @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)

> >       }

> >

> >       gpummu->gpu = gpu;

> > -     msm_mmu_init(&gpummu->base, dev, &funcs);

> > +     msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);

> >

> >       return &gpummu->base;

> >   }

> > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c

> > index 1b6635504069..f455c597f76d 100644

> > --- a/drivers/gpu/drm/msm/msm_iommu.c

> > +++ b/drivers/gpu/drm/msm/msm_iommu.c

> > @@ -4,15 +4,192 @@

> >    * Author: Rob Clark <robdclark@gmail.com>

> >    */

> >

> > +#include <linux/io-pgtable.h>

> >   #include "msm_drv.h"

> >   #include "msm_mmu.h"

> >

> >   struct msm_iommu {

> >       struct msm_mmu base;

> >       struct iommu_domain *domain;

> > +     struct iommu_domain *aux_domain;

> >   };

> > +

> >   #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)

> >

> > +struct msm_iommu_pagetable {

> > +     struct msm_mmu base;

> > +     struct msm_mmu *parent;

> > +     struct io_pgtable_ops *pgtbl_ops;

> > +     phys_addr_t ttbr;

> > +     u32 asid;

> > +};

> > +

> > +static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)

> > +{

> > +     return container_of(mmu, struct msm_iommu_pagetable, base);

> > +}

> > +

> > +static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,

> > +             size_t size)

> > +{

> > +     struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> > +     struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> > +     size_t unmapped = 0;

> > +

> > +     /* Unmap the block one page at a time */

> > +     while (size) {

> > +             unmapped += ops->unmap(ops, iova, 4096, NULL);

> > +             iova += 4096;

> > +             size -= 4096;

> > +     }

> > +

> > +     iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);

> > +

> > +     return (unmapped == size) ? 0 : -EINVAL;

> > +}

>

> Remember in patch #1 when you said "Then 'domain' can be used like any

> other iommu domain to map and unmap iova addresses in the pagetable."?

>

> This appears to be very much not that :/

>


I guess that comment is a bit stale.. the original plan was to create
an iommu_domain per set of pgtables, but at some point we realized
that by using the io-pgtable helpers directly, we would inflict a lot
less GPU-crazy on the iommu drivers

BR,
-R

> Robin.

>

> > +

> > +static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,

> > +             struct sg_table *sgt, size_t len, int prot)

> > +{

> > +     struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> > +     struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> > +     struct scatterlist *sg;

> > +     size_t mapped = 0;

> > +     u64 addr = iova;

> > +     unsigned int i;

> > +

> > +     for_each_sg(sgt->sgl, sg, sgt->nents, i) {

> > +             size_t size = sg->length;

> > +             phys_addr_t phys = sg_phys(sg);

> > +

> > +             /* Map the block one page at a time */

> > +             while (size) {

> > +                     if (ops->map(ops, addr, phys, 4096, prot)) {

> > +                             msm_iommu_pagetable_unmap(mmu, iova, mapped);

> > +                             return -EINVAL;

> > +                     }

> > +

> > +                     phys += 4096;

> > +                     addr += 4096;

> > +                     size -= 4096;

> > +                     mapped += 4096;

> > +             }

> > +     }

> > +

> > +     return 0;

> > +}

> > +

> > +static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu)

> > +{

> > +     struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> > +

> > +     free_io_pgtable_ops(pagetable->pgtbl_ops);

> > +     kfree(pagetable);

> > +}

> > +

> > +/*

> > + * Given a parent device, create and return an aux domain. This will enable the

> > + * TTBR0 region

> > + */

> > +static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent)

> > +{

> > +     struct msm_iommu *iommu = to_msm_iommu(parent);

> > +     struct iommu_domain *domain;

> > +     int ret;

> > +

> > +     if (iommu->aux_domain)

> > +             return iommu->aux_domain;

> > +

> > +     if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))

> > +             return ERR_PTR(-ENODEV);

> > +

> > +     domain = iommu_domain_alloc(&platform_bus_type);

> > +     if (!domain)

> > +             return ERR_PTR(-ENODEV);

> > +

> > +     ret = iommu_aux_attach_device(domain, parent->dev);

> > +     if (ret) {

> > +             iommu_domain_free(domain);

> > +             return ERR_PTR(ret);

> > +     }

> > +

> > +     iommu->aux_domain = domain;

> > +     return domain;

> > +}

> > +

> > +int msm_iommu_pagetable_params(struct msm_mmu *mmu,

> > +             phys_addr_t *ttbr, int *asid)

> > +{

> > +     struct msm_iommu_pagetable *pagetable;

> > +

> > +     if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)

> > +             return -EINVAL;

> > +

> > +     pagetable = to_pagetable(mmu);

> > +

> > +     if (ttbr)

> > +             *ttbr = pagetable->ttbr;

> > +

> > +     if (asid)

> > +             *asid = pagetable->asid;

> > +

> > +     return 0;

> > +}

> > +

> > +static const struct msm_mmu_funcs pagetable_funcs = {

> > +             .map = msm_iommu_pagetable_map,

> > +             .unmap = msm_iommu_pagetable_unmap,

> > +             .destroy = msm_iommu_pagetable_destroy,

> > +};

> > +

> > +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)

> > +{

> > +     static int next_asid = 16;

> > +     struct msm_iommu_pagetable *pagetable;

> > +     struct iommu_domain *aux_domain;

> > +     struct io_pgtable_cfg cfg;

> > +     int ret;

> > +

> > +     /* Make sure that the parent has a aux domain attached */

> > +     aux_domain = msm_iommu_get_aux_domain(parent);

> > +     if (IS_ERR(aux_domain))

> > +             return ERR_CAST(aux_domain);

> > +

> > +     /* Get the pagetable configuration from the aux domain */

> > +     ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);

> > +     if (ret)

> > +             return ERR_PTR(ret);

> > +

> > +     pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);

> > +     if (!pagetable)

> > +             return ERR_PTR(-ENOMEM);

> > +

> > +     msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,

> > +             MSM_MMU_IOMMU_PAGETABLE);

> > +

> > +     cfg.tlb = NULL;

> > +

> > +     pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,

> > +             &cfg, aux_domain);

> > +

> > +     if (!pagetable->pgtbl_ops) {

> > +             kfree(pagetable);

> > +             return ERR_PTR(-ENOMEM);

> > +     }

> > +

> > +

> > +     /* Needed later for TLB flush */

> > +     pagetable->parent = parent;

> > +     pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;

> > +

> > +     pagetable->asid = next_asid;

> > +     next_asid = (next_asid + 1)  % 255;

> > +     if (next_asid < 16)

> > +             next_asid = 16;

> > +

> > +     return &pagetable->base;

> > +}

> > +

> >   static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,

> >               unsigned long iova, int flags, void *arg)

> >   {

> > @@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,

> >       if (iova & BIT_ULL(48))

> >               iova |= GENMASK_ULL(63, 49);

> >

> > +

> >       ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);

> >       WARN_ON(!ret);

> >

> > @@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)

> >               return ERR_PTR(-ENOMEM);

> >

> >       iommu->domain = domain;

> > -     msm_mmu_init(&iommu->base, dev, &funcs);

> > +     msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);

> >       iommu_set_fault_handler(domain, msm_fault_handler, iommu);

> >

> >       ret = iommu_attach_device(iommu->domain, dev);

> > diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h

> > index 3a534ee59bf6..61ade89d9e48 100644

> > --- a/drivers/gpu/drm/msm/msm_mmu.h

> > +++ b/drivers/gpu/drm/msm/msm_mmu.h

> > @@ -17,18 +17,26 @@ struct msm_mmu_funcs {

> >       void (*destroy)(struct msm_mmu *mmu);

> >   };

> >

> > +enum msm_mmu_type {

> > +     MSM_MMU_GPUMMU,

> > +     MSM_MMU_IOMMU,

> > +     MSM_MMU_IOMMU_PAGETABLE,

> > +};

> > +

> >   struct msm_mmu {

> >       const struct msm_mmu_funcs *funcs;

> >       struct device *dev;

> >       int (*handler)(void *arg, unsigned long iova, int flags);

> >       void *arg;

> > +     enum msm_mmu_type type;

> >   };

> >

> >   static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,

> > -             const struct msm_mmu_funcs *funcs)

> > +             const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)

> >   {

> >       mmu->dev = dev;

> >       mmu->funcs = funcs;

> > +     mmu->type = type;

> >   }

> >

> >   struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);

> > @@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,

> >       mmu->handler = handler;

> >   }

> >

> > +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);

> > +

> >   void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base,

> >               dma_addr_t *tran_error);

> >

> > +

> > +int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,

> > +             int *asid);

> > +

> >   #endif /* __MSM_MMU_H__ */

> >

> _______________________________________________

> Freedreno mailing list

> Freedreno@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/freedreno
Rob Clark July 7, 2020, 2:58 p.m. UTC | #5
On Tue, Jul 7, 2020 at 7:25 AM Rob Clark <robdclark@gmail.com> wrote:
>

> On Tue, Jul 7, 2020 at 4:34 AM Robin Murphy <robin.murphy@arm.com> wrote:

> >

> > On 2020-06-26 21:04, Jordan Crouse wrote:

> > > Allow a io-pgtable implementation to skip TLB operations by checking for

> > > NULL pointers in the helper functions. It will be up to to the owner

> > > of the io-pgtable instance to make sure that they independently handle

> > > the TLB correctly.

> >

> > I don't really understand what this is for - tricking the IOMMU driver

> > into not performing its TLB maintenance at points when that maintenance

> > has been deemed necessary doesn't seem like the appropriate way to

> > achieve anything good :/

>

> No, for triggering the io-pgtable helpers into not performing TLB

> maintenance.  But seriously, since we are creating pgtables ourselves,

> and we don't want to be ioremap'ing the GPU's SMMU instance, the

> alternative is plugging in no-op helpers.  Which amounts to the same

> thing.


Hmm, that said, since we are just memcpy'ing the io_pgtable_cfg from
arm-smmu, it will already be populated with arm-smmu's fxn ptrs.  I
guess we could maybe make it work without no-op helpers, although in
that case it looks like we need to fix something about aux-domain vs
tlb helpers:

[  +0.004373] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000019
[  +0.004086] Mem abort info:
[  +0.004319]   ESR = 0x96000004
[  +0.003462]   EC = 0x25: DABT (current EL), IL = 32 bits
[  +0.003494]   SET = 0, FnV = 0
[  +0.002812]   EA = 0, S1PTW = 0
[  +0.002873] Data abort info:
[  +0.003031]   ISV = 0, ISS = 0x00000004
[  +0.003785]   CM = 0, WnR = 0
[  +0.003641] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000261d65000
[  +0.003383] [0000000000000019] pgd=0000000000000000, p4d=0000000000000000
[  +0.003715] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[  +0.002744] Modules linked in: xt_CHECKSUM xt_MASQUERADE
xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle
ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack
nf_defrag_ipv4 libcrc32c bridge stp llc ip6table_filter ip6_tables
iptable_filter ax88179_178a usbnet uvcvideo videobuf2_vmalloc
videobuf2_memops videobuf2_v4l2 videobuf2_common videodev mc
hid_multitouch i2c_hid some_battery ti_sn65dsi86 hci_uart btqca btbcm
qcom_spmi_adc5 bluetooth qcom_spmi_temp_alarm qcom_vadc_common
ecdh_generic ecc snd_soc_sdm845 snd_soc_rt5663 snd_soc_qcom_common
ath10k_snoc ath10k_core crct10dif_ce ath mac80211 snd_soc_rl6231
soundwire_bus i2c_qcom_geni libarc4 qcom_rng msm phy_qcom_qusb2
reset_qcom_pdc drm_kms_helper cfg80211 rfkill qcom_q6v5_mss
qcom_q6v5_ipa_notify socinfo qrtr ns panel_simple qcom_q6v5_pas
qcom_common qcom_glink_smem slim_qcom_ngd_ctrl qcom_sysmon drm
qcom_q6v5 slimbus qmi_helpers qcom_wdt mdt_loader rmtfs_mem be2iscsi
bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio
[  +0.000139]  libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp
libiscsi_tcp libiscsi scsi_transport_iscsi fuse ip_tables x_tables
ipv6 nf_defrag_ipv6
[  +0.020933] CPU: 3 PID: 168 Comm: kworker/u16:7 Not tainted
5.8.0-rc1-c630+ #31
[  +0.003828] Hardware name: LENOVO 81JL/LNVNB161216, BIOS
9UCN33WW(V2.06) 06/ 4/2019
[  +0.004039] Workqueue: msm msm_gem_free_work [msm]
[  +0.003885] pstate: 60c00005 (nZCv daif +PAN +UAO BTYPE=--)
[  +0.003859] pc : arm_smmu_tlb_inv_range_s1+0x30/0x148
[  +0.003742] lr : arm_smmu_tlb_add_page_s1+0x1c/0x28
[  +0.003887] sp : ffff800011cdb970
[  +0.003868] x29: ffff800011cdb970 x28: 0000000000000003
[  +0.003930] x27: ffff0001f1882f80 x26: 0000000000000001
[  +0.003886] x25: 0000000000000003 x24: 0000000000000620
[  +0.003932] x23: 0000000000000000 x22: 0000000000001000
[  +0.003886] x21: 0000000000001000 x20: ffff0001cf857300
[  +0.003916] x19: 0000000000000001 x18: 00000000ffffffff
[  +0.003921] x17: ffffd9e6a24ae0e8 x16: 0000000000012577
[  +0.003843] x15: 0000000000012578 x14: 0000000000000000
[  +0.003884] x13: 0000000000012574 x12: ffffd9e6a2550180
[  +0.003834] x11: 0000000000083f80 x10: 0000000000000000
[  +0.003889] x9 : 0000000000000000 x8 : ffff0001f1882f80
[  +0.003812] x7 : 0000000000000001 x6 : 0000000000000048
[  +0.003807] x5 : ffff0001c86e1000 x4 : 0000000000000620
[  +0.003802] x3 : ffff0001ddb57700 x2 : 0000000000001000
[  +0.003809] x1 : 0000000000001000 x0 : 0000000101048000
[  +0.003768] Call trace:
[  +0.003665]  arm_smmu_tlb_inv_range_s1+0x30/0x148
[  +0.003769]  arm_smmu_tlb_add_page_s1+0x1c/0x28
[  +0.003760]  __arm_lpae_unmap+0x3c4/0x498
[  +0.003821]  __arm_lpae_unmap+0xfc/0x498
[  +0.003693]  __arm_lpae_unmap+0xfc/0x498
[  +0.003704]  __arm_lpae_unmap+0xfc/0x498
[  +0.003608]  arm_lpae_unmap+0x60/0x78
[  +0.003653]  msm_iommu_pagetable_unmap+0x5c/0xa0 [msm]
[  +0.003711]  msm_gem_purge_vma+0x48/0x70 [msm]
[  +0.003716]  put_iova+0x68/0xc8 [msm]
[  +0.003792]  msm_gem_free_work+0x118/0x190 [msm]
[  +0.003739]  process_one_work+0x28c/0x6e8
[  +0.003595]  worker_thread+0x4c/0x420
[  +0.003546]  kthread+0x148/0x168
[  +0.003675]  ret_from_fork+0x10/0x1c
[  +0.003596] Code: 2a0403f8 a9046bf9 f9400073 39406077 (b9401a61)

BR,
-R

>

> Currently (in a later patch in the series) we are using

> iommu_flush_tlb_all() when unmapping, which is a bit of a big hammer.

> Although I think we could be a bit more clever and do the TLB ops on

> the GPU (since the GPU knows if pagetables we are unmapping from are

> in-use and could skip the TLB ops otherwise).

>

> On the topic, if we are using unique ASID values per set of

> pagetables, how expensive is tlb invalidate for an ASID that has no

> entries in the TLB?

>

> BR,

> -R

>

> >

> > Robin.

> >

> > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> > > ---

> > >

> > >   include/linux/io-pgtable.h | 11 +++++++----

> > >   1 file changed, 7 insertions(+), 4 deletions(-)

> > >

> > > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

> > > index 53d53c6c2be9..bbed1d3925ba 100644

> > > --- a/include/linux/io-pgtable.h

> > > +++ b/include/linux/io-pgtable.h

> > > @@ -210,21 +210,24 @@ struct io_pgtable {

> > >

> > >   static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)

> > >   {

> > > -     iop->cfg.tlb->tlb_flush_all(iop->cookie);

> > > +     if (iop->cfg.tlb)

> > > +             iop->cfg.tlb->tlb_flush_all(iop->cookie);

> > >   }

> > >

> > >   static inline void

> > >   io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,

> > >                         size_t size, size_t granule)

> > >   {

> > > -     iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> > > +     if (iop->cfg.tlb)

> > > +             iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> > >   }

> > >

> > >   static inline void

> > >   io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,

> > >                         size_t size, size_t granule)

> > >   {

> > > -     iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> > > +     if (iop->cfg.tlb)

> > > +             iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> > >   }

> > >

> > >   static inline void

> > > @@ -232,7 +235,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop,

> > >                       struct iommu_iotlb_gather * gather, unsigned long iova,

> > >                       size_t granule)

> > >   {

> > > -     if (iop->cfg.tlb->tlb_add_page)

> > > +     if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page)

> > >               iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);

> > >   }

> > >

> > >

> > _______________________________________________

> > Freedreno mailing list

> > Freedreno@lists.freedesktop.org

> > https://lists.freedesktop.org/mailman/listinfo/freedreno
Jordan Crouse July 8, 2020, 7:19 p.m. UTC | #6
On Tue, Jul 07, 2020 at 07:58:18AM -0700, Rob Clark wrote:
> On Tue, Jul 7, 2020 at 7:25 AM Rob Clark <robdclark@gmail.com> wrote:

> >

> > On Tue, Jul 7, 2020 at 4:34 AM Robin Murphy <robin.murphy@arm.com> wrote:

> > >

> > > On 2020-06-26 21:04, Jordan Crouse wrote:

> > > > Allow a io-pgtable implementation to skip TLB operations by checking for

> > > > NULL pointers in the helper functions. It will be up to to the owner

> > > > of the io-pgtable instance to make sure that they independently handle

> > > > the TLB correctly.

> > >

> > > I don't really understand what this is for - tricking the IOMMU driver

> > > into not performing its TLB maintenance at points when that maintenance

> > > has been deemed necessary doesn't seem like the appropriate way to

> > > achieve anything good :/

> >

> > No, for triggering the io-pgtable helpers into not performing TLB

> > maintenance.  But seriously, since we are creating pgtables ourselves,

> > and we don't want to be ioremap'ing the GPU's SMMU instance, the

> > alternative is plugging in no-op helpers.  Which amounts to the same

> > thing.

> 

> Hmm, that said, since we are just memcpy'ing the io_pgtable_cfg from

> arm-smmu, it will already be populated with arm-smmu's fxn ptrs.  I

> guess we could maybe make it work without no-op helpers, although in

> that case it looks like we need to fix something about aux-domain vs

> tlb helpers:


I had a change that handled these correctly but I abandoned it because the
TLB functions didn't kick the power and I didn't think that would be desirable
at the generic level for performance reasons. Since the GPU SMMU is on the same
power domain as the GMU we could enable it in the GPU driver before calling
the TLB operations but we would need to be clever about it to prevent bringing
up the GMU just to unmap memory.

Jordan

> [  +0.004373] Unable to handle kernel NULL pointer dereference at

> virtual address 0000000000000019

> [  +0.004086] Mem abort info:

> [  +0.004319]   ESR = 0x96000004

> [  +0.003462]   EC = 0x25: DABT (current EL), IL = 32 bits

> [  +0.003494]   SET = 0, FnV = 0

> [  +0.002812]   EA = 0, S1PTW = 0

> [  +0.002873] Data abort info:

> [  +0.003031]   ISV = 0, ISS = 0x00000004

> [  +0.003785]   CM = 0, WnR = 0

> [  +0.003641] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000261d65000

> [  +0.003383] [0000000000000019] pgd=0000000000000000, p4d=0000000000000000

> [  +0.003715] Internal error: Oops: 96000004 [#1] PREEMPT SMP

> [  +0.002744] Modules linked in: xt_CHECKSUM xt_MASQUERADE

> xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle

> ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack

> nf_defrag_ipv4 libcrc32c bridge stp llc ip6table_filter ip6_tables

> iptable_filter ax88179_178a usbnet uvcvideo videobuf2_vmalloc

> videobuf2_memops videobuf2_v4l2 videobuf2_common videodev mc

> hid_multitouch i2c_hid some_battery ti_sn65dsi86 hci_uart btqca btbcm

> qcom_spmi_adc5 bluetooth qcom_spmi_temp_alarm qcom_vadc_common

> ecdh_generic ecc snd_soc_sdm845 snd_soc_rt5663 snd_soc_qcom_common

> ath10k_snoc ath10k_core crct10dif_ce ath mac80211 snd_soc_rl6231

> soundwire_bus i2c_qcom_geni libarc4 qcom_rng msm phy_qcom_qusb2

> reset_qcom_pdc drm_kms_helper cfg80211 rfkill qcom_q6v5_mss

> qcom_q6v5_ipa_notify socinfo qrtr ns panel_simple qcom_q6v5_pas

> qcom_common qcom_glink_smem slim_qcom_ngd_ctrl qcom_sysmon drm

> qcom_q6v5 slimbus qmi_helpers qcom_wdt mdt_loader rmtfs_mem be2iscsi

> bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio

> [  +0.000139]  libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp

> libiscsi_tcp libiscsi scsi_transport_iscsi fuse ip_tables x_tables

> ipv6 nf_defrag_ipv6

> [  +0.020933] CPU: 3 PID: 168 Comm: kworker/u16:7 Not tainted

> 5.8.0-rc1-c630+ #31

> [  +0.003828] Hardware name: LENOVO 81JL/LNVNB161216, BIOS

> 9UCN33WW(V2.06) 06/ 4/2019

> [  +0.004039] Workqueue: msm msm_gem_free_work [msm]

> [  +0.003885] pstate: 60c00005 (nZCv daif +PAN +UAO BTYPE=--)

> [  +0.003859] pc : arm_smmu_tlb_inv_range_s1+0x30/0x148

> [  +0.003742] lr : arm_smmu_tlb_add_page_s1+0x1c/0x28

> [  +0.003887] sp : ffff800011cdb970

> [  +0.003868] x29: ffff800011cdb970 x28: 0000000000000003

> [  +0.003930] x27: ffff0001f1882f80 x26: 0000000000000001

> [  +0.003886] x25: 0000000000000003 x24: 0000000000000620

> [  +0.003932] x23: 0000000000000000 x22: 0000000000001000

> [  +0.003886] x21: 0000000000001000 x20: ffff0001cf857300

> [  +0.003916] x19: 0000000000000001 x18: 00000000ffffffff

> [  +0.003921] x17: ffffd9e6a24ae0e8 x16: 0000000000012577

> [  +0.003843] x15: 0000000000012578 x14: 0000000000000000

> [  +0.003884] x13: 0000000000012574 x12: ffffd9e6a2550180

> [  +0.003834] x11: 0000000000083f80 x10: 0000000000000000

> [  +0.003889] x9 : 0000000000000000 x8 : ffff0001f1882f80

> [  +0.003812] x7 : 0000000000000001 x6 : 0000000000000048

> [  +0.003807] x5 : ffff0001c86e1000 x4 : 0000000000000620

> [  +0.003802] x3 : ffff0001ddb57700 x2 : 0000000000001000

> [  +0.003809] x1 : 0000000000001000 x0 : 0000000101048000

> [  +0.003768] Call trace:

> [  +0.003665]  arm_smmu_tlb_inv_range_s1+0x30/0x148

> [  +0.003769]  arm_smmu_tlb_add_page_s1+0x1c/0x28

> [  +0.003760]  __arm_lpae_unmap+0x3c4/0x498

> [  +0.003821]  __arm_lpae_unmap+0xfc/0x498

> [  +0.003693]  __arm_lpae_unmap+0xfc/0x498

> [  +0.003704]  __arm_lpae_unmap+0xfc/0x498

> [  +0.003608]  arm_lpae_unmap+0x60/0x78

> [  +0.003653]  msm_iommu_pagetable_unmap+0x5c/0xa0 [msm]

> [  +0.003711]  msm_gem_purge_vma+0x48/0x70 [msm]

> [  +0.003716]  put_iova+0x68/0xc8 [msm]

> [  +0.003792]  msm_gem_free_work+0x118/0x190 [msm]

> [  +0.003739]  process_one_work+0x28c/0x6e8

> [  +0.003595]  worker_thread+0x4c/0x420

> [  +0.003546]  kthread+0x148/0x168

> [  +0.003675]  ret_from_fork+0x10/0x1c

> [  +0.003596] Code: 2a0403f8 a9046bf9 f9400073 39406077 (b9401a61)

> 

> BR,

> -R

> 

> >

> > Currently (in a later patch in the series) we are using

> > iommu_flush_tlb_all() when unmapping, which is a bit of a big hammer.

> > Although I think we could be a bit more clever and do the TLB ops on

> > the GPU (since the GPU knows if pagetables we are unmapping from are

> > in-use and could skip the TLB ops otherwise).

> >

> > On the topic, if we are using unique ASID values per set of

> > pagetables, how expensive is tlb invalidate for an ASID that has no

> > entries in the TLB?

> >

> > BR,

> > -R

> >

> > >

> > > Robin.

> > >

> > > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> > > > ---

> > > >

> > > >   include/linux/io-pgtable.h | 11 +++++++----

> > > >   1 file changed, 7 insertions(+), 4 deletions(-)

> > > >

> > > > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h

> > > > index 53d53c6c2be9..bbed1d3925ba 100644

> > > > --- a/include/linux/io-pgtable.h

> > > > +++ b/include/linux/io-pgtable.h

> > > > @@ -210,21 +210,24 @@ struct io_pgtable {

> > > >

> > > >   static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)

> > > >   {

> > > > -     iop->cfg.tlb->tlb_flush_all(iop->cookie);

> > > > +     if (iop->cfg.tlb)

> > > > +             iop->cfg.tlb->tlb_flush_all(iop->cookie);

> > > >   }

> > > >

> > > >   static inline void

> > > >   io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,

> > > >                         size_t size, size_t granule)

> > > >   {

> > > > -     iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> > > > +     if (iop->cfg.tlb)

> > > > +             iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);

> > > >   }

> > > >

> > > >   static inline void

> > > >   io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,

> > > >                         size_t size, size_t granule)

> > > >   {

> > > > -     iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> > > > +     if (iop->cfg.tlb)

> > > > +             iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);

> > > >   }

> > > >

> > > >   static inline void

> > > > @@ -232,7 +235,7 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop,

> > > >                       struct iommu_iotlb_gather * gather, unsigned long iova,

> > > >                       size_t granule)

> > > >   {

> > > > -     if (iop->cfg.tlb->tlb_add_page)

> > > > +     if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page)

> > > >               iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);

> > > >   }

> > > >

> > > >

> > > _______________________________________________

> > > Freedreno mailing list

> > > Freedreno@lists.freedesktop.org

> > > https://lists.freedesktop.org/mailman/listinfo/freedreno


-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Jordan Crouse July 8, 2020, 7:35 p.m. UTC | #7
On Tue, Jul 07, 2020 at 12:36:42PM +0100, Robin Murphy wrote:
> On 2020-06-26 21:04, Jordan Crouse wrote:

> >Add support to create a io-pgtable for use by targets that support

> >per-instance pagetables.  In order to support per-instance pagetables the

> >GPU SMMU device needs to have the qcom,adreno-smmu compatible string and

> >split pagetables and auxiliary domains need to be supported and enabled.

> >

> >Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>

> >---

> >

> >  drivers/gpu/drm/msm/msm_gpummu.c |   2 +-

> >  drivers/gpu/drm/msm/msm_iommu.c  | 180 ++++++++++++++++++++++++++++++-

> >  drivers/gpu/drm/msm/msm_mmu.h    |  16 ++-

> >  3 files changed, 195 insertions(+), 3 deletions(-)

> >

> >diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c

> >index 310a31b05faa..aab121f4beb7 100644

> >--- a/drivers/gpu/drm/msm/msm_gpummu.c

> >+++ b/drivers/gpu/drm/msm/msm_gpummu.c

> >@@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)

> >  	}

> >  	gpummu->gpu = gpu;

> >-	msm_mmu_init(&gpummu->base, dev, &funcs);

> >+	msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);

> >  	return &gpummu->base;

> >  }

> >diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c

> >index 1b6635504069..f455c597f76d 100644

> >--- a/drivers/gpu/drm/msm/msm_iommu.c

> >+++ b/drivers/gpu/drm/msm/msm_iommu.c

> >@@ -4,15 +4,192 @@

> >   * Author: Rob Clark <robdclark@gmail.com>

> >   */

> >+#include <linux/io-pgtable.h>

> >  #include "msm_drv.h"

> >  #include "msm_mmu.h"

> >  struct msm_iommu {

> >  	struct msm_mmu base;

> >  	struct iommu_domain *domain;

> >+	struct iommu_domain *aux_domain;

> >  };

> >+

> >  #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)

> >+struct msm_iommu_pagetable {

> >+	struct msm_mmu base;

> >+	struct msm_mmu *parent;

> >+	struct io_pgtable_ops *pgtbl_ops;

> >+	phys_addr_t ttbr;

> >+	u32 asid;

> >+};

> >+

> >+static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)

> >+{

> >+	return container_of(mmu, struct msm_iommu_pagetable, base);

> >+}

> >+

> >+static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,

> >+		size_t size)

> >+{

> >+	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> >+	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> >+	size_t unmapped = 0;

> >+

> >+	/* Unmap the block one page at a time */

> >+	while (size) {

> >+		unmapped += ops->unmap(ops, iova, 4096, NULL);

> >+		iova += 4096;

> >+		size -= 4096;

> >+	}

> >+

> >+	iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);

> >+

> >+	return (unmapped == size) ? 0 : -EINVAL;

> >+}

> 

> Remember in patch #1 when you said "Then 'domain' can be used like any other

> iommu domain to map and unmap iova addresses in the pagetable."?

> 

> This appears to be very much not that :/

 
The code changed but the commit log stayed the same.  I'll reword.

Jordan

> Robin.

> 

> >+

> >+static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,

> >+		struct sg_table *sgt, size_t len, int prot)

> >+{

> >+	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> >+	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;

> >+	struct scatterlist *sg;

> >+	size_t mapped = 0;

> >+	u64 addr = iova;

> >+	unsigned int i;

> >+

> >+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {

> >+		size_t size = sg->length;

> >+		phys_addr_t phys = sg_phys(sg);

> >+

> >+		/* Map the block one page at a time */

> >+		while (size) {

> >+			if (ops->map(ops, addr, phys, 4096, prot)) {

> >+				msm_iommu_pagetable_unmap(mmu, iova, mapped);

> >+				return -EINVAL;

> >+			}

> >+

> >+			phys += 4096;

> >+			addr += 4096;

> >+			size -= 4096;

> >+			mapped += 4096;

> >+		}

> >+	}

> >+

> >+	return 0;

> >+}

> >+

> >+static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu)

> >+{

> >+	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);

> >+

> >+	free_io_pgtable_ops(pagetable->pgtbl_ops);

> >+	kfree(pagetable);

> >+}

> >+

> >+/*

> >+ * Given a parent device, create and return an aux domain. This will enable the

> >+ * TTBR0 region

> >+ */

> >+static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent)

> >+{

> >+	struct msm_iommu *iommu = to_msm_iommu(parent);

> >+	struct iommu_domain *domain;

> >+	int ret;

> >+

> >+	if (iommu->aux_domain)

> >+		return iommu->aux_domain;

> >+

> >+	if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))

> >+		return ERR_PTR(-ENODEV);

> >+

> >+	domain = iommu_domain_alloc(&platform_bus_type);

> >+	if (!domain)

> >+		return ERR_PTR(-ENODEV);

> >+

> >+	ret = iommu_aux_attach_device(domain, parent->dev);

> >+	if (ret) {

> >+		iommu_domain_free(domain);

> >+		return ERR_PTR(ret);

> >+	}

> >+

> >+	iommu->aux_domain = domain;

> >+	return domain;

> >+}

> >+

> >+int msm_iommu_pagetable_params(struct msm_mmu *mmu,

> >+		phys_addr_t *ttbr, int *asid)

> >+{

> >+	struct msm_iommu_pagetable *pagetable;

> >+

> >+	if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)

> >+		return -EINVAL;

> >+

> >+	pagetable = to_pagetable(mmu);

> >+

> >+	if (ttbr)

> >+		*ttbr = pagetable->ttbr;

> >+

> >+	if (asid)

> >+		*asid = pagetable->asid;

> >+

> >+	return 0;

> >+}

> >+

> >+static const struct msm_mmu_funcs pagetable_funcs = {

> >+		.map = msm_iommu_pagetable_map,

> >+		.unmap = msm_iommu_pagetable_unmap,

> >+		.destroy = msm_iommu_pagetable_destroy,

> >+};

> >+

> >+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)

> >+{

> >+	static int next_asid = 16;

> >+	struct msm_iommu_pagetable *pagetable;

> >+	struct iommu_domain *aux_domain;

> >+	struct io_pgtable_cfg cfg;

> >+	int ret;

> >+

> >+	/* Make sure that the parent has a aux domain attached */

> >+	aux_domain = msm_iommu_get_aux_domain(parent);

> >+	if (IS_ERR(aux_domain))

> >+		return ERR_CAST(aux_domain);

> >+

> >+	/* Get the pagetable configuration from the aux domain */

> >+	ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);

> >+	if (ret)

> >+		return ERR_PTR(ret);

> >+

> >+	pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);

> >+	if (!pagetable)

> >+		return ERR_PTR(-ENOMEM);

> >+

> >+	msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,

> >+		MSM_MMU_IOMMU_PAGETABLE);

> >+

> >+	cfg.tlb = NULL;

> >+

> >+	pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,

> >+		&cfg, aux_domain);

> >+

> >+	if (!pagetable->pgtbl_ops) {

> >+		kfree(pagetable);

> >+		return ERR_PTR(-ENOMEM);

> >+	}

> >+

> >+

> >+	/* Needed later for TLB flush */

> >+	pagetable->parent = parent;

> >+	pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;

> >+

> >+	pagetable->asid = next_asid;

> >+	next_asid = (next_asid + 1)  % 255;

> >+	if (next_asid < 16)

> >+		next_asid = 16;

> >+

> >+	return &pagetable->base;

> >+}

> >+

> >  static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,

> >  		unsigned long iova, int flags, void *arg)

> >  {

> >@@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,

> >  	if (iova & BIT_ULL(48))

> >  		iova |= GENMASK_ULL(63, 49);

> >+

> >  	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);

> >  	WARN_ON(!ret);

> >@@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)

> >  		return ERR_PTR(-ENOMEM);

> >  	iommu->domain = domain;

> >-	msm_mmu_init(&iommu->base, dev, &funcs);

> >+	msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);

> >  	iommu_set_fault_handler(domain, msm_fault_handler, iommu);

> >  	ret = iommu_attach_device(iommu->domain, dev);

> >diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h

> >index 3a534ee59bf6..61ade89d9e48 100644

> >--- a/drivers/gpu/drm/msm/msm_mmu.h

> >+++ b/drivers/gpu/drm/msm/msm_mmu.h

> >@@ -17,18 +17,26 @@ struct msm_mmu_funcs {

> >  	void (*destroy)(struct msm_mmu *mmu);

> >  };

> >+enum msm_mmu_type {

> >+	MSM_MMU_GPUMMU,

> >+	MSM_MMU_IOMMU,

> >+	MSM_MMU_IOMMU_PAGETABLE,

> >+};

> >+

> >  struct msm_mmu {

> >  	const struct msm_mmu_funcs *funcs;

> >  	struct device *dev;

> >  	int (*handler)(void *arg, unsigned long iova, int flags);

> >  	void *arg;

> >+	enum msm_mmu_type type;

> >  };

> >  static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,

> >-		const struct msm_mmu_funcs *funcs)

> >+		const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)

> >  {

> >  	mmu->dev = dev;

> >  	mmu->funcs = funcs;

> >+	mmu->type = type;

> >  }

> >  struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);

> >@@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,

> >  	mmu->handler = handler;

> >  }

> >+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);

> >+

> >  void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base,

> >  		dma_addr_t *tran_error);

> >+

> >+int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,

> >+		int *asid);

> >+

> >  #endif /* __MSM_MMU_H__ */

> >


-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project