Message ID | 20240523175227.117984-2-robdclark@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | [v4,1/2] iommu/io-pgtable-arm: Add way to debug pgtable walk | expand |
On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > would be traversed for a given iova access. > > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------ > include/linux/io-pgtable.h | 4 +++ > 2 files changed, 46 insertions(+), 9 deletions(-) Acked-by: Joerg Roedel <jroedel@suse.de>
On 23/05/2024 6:52 pm, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > would be traversed for a given iova access. Have to say I'm a little torn here - with my iommu-dma hat on I'm not super enthusiastic about adding any more overhead to iova_to_phys, but in terms of maintaining io-pgtable I do like the overall shape of the implementation... Will, how much would you hate a compromise of inlining iova_to_phys as the default walk behaviour if cb is NULL? :) That said, looking at the unmap figures for dma_map_benchmark on a Neoverse N1, any difference I think I see is still well within the noise, so maybe a handful of extra indirect calls isn't really enough to worry about? Cheers, Robin. > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------ > include/linux/io-pgtable.h | 4 +++ > 2 files changed, 46 insertions(+), 9 deletions(-) > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index f7828a7aad41..f47a0e64bb35 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov > data->start_level, ptep); > } > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > - unsigned long iova) > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, > + int (*cb)(void *cb_data, void *pte, int level), > + void *cb_data) > { > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > arm_lpae_iopte pte, *ptep = data->pgd; > int lvl = data->start_level; > + int ret; > > do { > /* Valid IOPTE pointer? */ > if (!ptep) > - return 0; > + return -EFAULT; > > /* Grab the IOPTE we're interested in */ > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > /* Valid entry? */ > if (!pte) > - return 0; > + return -EFAULT; > + > + ret = cb(cb_data, &pte, lvl); > + if (ret) > + return ret; > > - /* Leaf entry? */ > + /* Leaf entry? If so, we've found the translation */ > if (iopte_leaf(pte, lvl, data->iop.fmt)) > - goto found_translation; > + return 0; > > /* Take it to the next level */ > ptep = iopte_deref(pte, data); > } while (++lvl < ARM_LPAE_MAX_LEVELS); > > /* Ran out of page tables to walk */ > + return -EFAULT; > +} > + > +struct iova_to_phys_walk_data { > + arm_lpae_iopte pte; > + int level; > +}; > + > +static int iova_to_phys_walk_cb(void *cb_data, void *pte, int level) > +{ > + struct iova_to_phys_walk_data *d = cb_data; > + > + d->pte = *(arm_lpae_iopte *)pte; > + d->level = level; > + > return 0; > +} > + > +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > + unsigned long iova) > +{ > + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > + struct iova_to_phys_walk_data d; > + int ret; > + > + ret = arm_lpae_pgtable_walk(ops, iova, iova_to_phys_walk_cb, &d); > + if (ret) > + return 0; > > -found_translation: > - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); > - return iopte_to_paddr(pte, data) | iova; > + iova &= (ARM_LPAE_BLOCK_SIZE(d.level, data) - 1); > + return iopte_to_paddr(d.pte, data) | iova; > } > > static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) > @@ -807,6 +839,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) > .map_pages = arm_lpae_map_pages, > .unmap_pages = arm_lpae_unmap_pages, > .iova_to_phys = arm_lpae_iova_to_phys, > + .pgtable_walk = arm_lpae_pgtable_walk, > }; > > return data; > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index 86cf1f7ae389..261b48af068a 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -177,6 +177,7 @@ struct io_pgtable_cfg { > * @map_pages: Map a physically contiguous range of pages of the same size. > * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. > * @iova_to_phys: Translate iova to physical address. > + * @pgtable_walk: (optional) Perform a page table walk for a given iova. > * > * These functions map directly onto the iommu_ops member functions with > * the same names. > @@ -190,6 +191,9 @@ struct io_pgtable_ops { > struct iommu_iotlb_gather *gather); > phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, > unsigned long iova); > + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, > + int (*cb)(void *cb_data, void *pte, int level), > + void *cb_data); > int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, > unsigned long iova, size_t size, > unsigned long flags,
On Tue, Jun 25, 2024 at 4:27 AM Will Deacon <will@kernel.org> wrote: > > On Mon, Jun 24, 2024 at 08:37:26AM -0700, Rob Clark wrote: > > On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote: > > > > > > On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote: > > > > From: Rob Clark <robdclark@chromium.org> > > > > > > > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > > > > would be traversed for a given iova access. > > > > > > > > Signed-off-by: Rob Clark <robdclark@chromium.org> > > > > --- > > > > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------ > > > > include/linux/io-pgtable.h | 4 +++ > > > > 2 files changed, 46 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > > > > index f7828a7aad41..f47a0e64bb35 100644 > > > > --- a/drivers/iommu/io-pgtable-arm.c > > > > +++ b/drivers/iommu/io-pgtable-arm.c > > > > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov > > > > data->start_level, ptep); > > > > } > > > > > > > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > > > - unsigned long iova) > > > > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, > > > > + int (*cb)(void *cb_data, void *pte, int level), > > > > + void *cb_data) > > > > { > > > > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > > > > arm_lpae_iopte pte, *ptep = data->pgd; > > > > int lvl = data->start_level; > > > > + int ret; > > > > > > > > do { > > > > /* Valid IOPTE pointer? */ > > > > if (!ptep) > > > > - return 0; > > > > + return -EFAULT; > > > > > > nit: -ENOENT might be a little better, as we're only checking against a > > > NULL entry rather than strictly any faulting entry. > > > > > > > /* Grab the IOPTE we're interested in */ > > > > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); > > > > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > > > > > > > /* Valid entry? */ > > > > if (!pte) > > > > - return 0; > > > > + return -EFAULT; > > > > > > Same here (and at the end of the function). > > > > > > > + > > > > + ret = cb(cb_data, &pte, lvl); > > > > > > Since pte is on the stack, rather than pointing into the actual pgtable, > > > I think it would be clearer to pass it by value to the callback. > > > > fwiw, I passed it as a void* to avoid the pte size.. although I guess > > it could be a union of all the possible pte types > > Can you just get away with a u64? yeah, that wfm if you're ok with it BR, -R
On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote: > > On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote: > > From: Rob Clark <robdclark@chromium.org> > > > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > > would be traversed for a given iova access. > > > > Signed-off-by: Rob Clark <robdclark@chromium.org> > > --- > > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------ > > include/linux/io-pgtable.h | 4 +++ > > 2 files changed, 46 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > > index f7828a7aad41..f47a0e64bb35 100644 > > --- a/drivers/iommu/io-pgtable-arm.c > > +++ b/drivers/iommu/io-pgtable-arm.c > > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov > > data->start_level, ptep); > > } > > > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > - unsigned long iova) > > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, > > + int (*cb)(void *cb_data, void *pte, int level), > > + void *cb_data) > > { > > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > > arm_lpae_iopte pte, *ptep = data->pgd; > > int lvl = data->start_level; > > + int ret; > > > > do { > > /* Valid IOPTE pointer? */ > > if (!ptep) > > - return 0; > > + return -EFAULT; > > nit: -ENOENT might be a little better, as we're only checking against a > NULL entry rather than strictly any faulting entry. > > > /* Grab the IOPTE we're interested in */ > > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); > > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > > > /* Valid entry? */ > > if (!pte) > > - return 0; > > + return -EFAULT; > > Same here (and at the end of the function). > > > + > > + ret = cb(cb_data, &pte, lvl); > > Since pte is on the stack, rather than pointing into the actual pgtable, > I think it would be clearer to pass it by value to the callback. > > > + if (ret) > > + return ret; > > > > - /* Leaf entry? */ > > + /* Leaf entry? If so, we've found the translation */ > > if (iopte_leaf(pte, lvl, data->iop.fmt)) > > - goto found_translation; > > + return 0; > > > > /* Take it to the next level */ > > ptep = iopte_deref(pte, data); > > } while (++lvl < ARM_LPAE_MAX_LEVELS); > > > > /* Ran out of page tables to walk */ > > + return -EFAULT; > > +} > > + > > +struct iova_to_phys_walk_data { > > + arm_lpae_iopte pte; > > + int level; > > +}; > > Expanding a little on Robin's suggestion, why don't we drop this structure > in favour of something more generic: > > struct arm_lpae_walk_data { > arm_lpae_iopte ptes[ARM_LPAE_MAX_LEVELS]; > }; > > and then do something in the walker like: > > if (cb && !cb(pte, lvl)) > walk_data->ptes[lvl] = pte; > So thinking about this some more... if I use a walk_data struct to return the PTEs, I can just get rid of the callback entirely. That ends up looking more like my first version. The callback taking void* was mainly to avoid coding the PTE size in the generic io_pgtable interface. But if we just go with u64, because that is the biggest PTE size we need to deal with, then it all gets simpler. (The callback was actually a semi-awkward interface to use from drm/msm.) BR, -R > which could return the physical address at the end, if it reaches a leaf > entry. That way arm_lpae_iova_to_phys() is just passing a NULL callback > to the walker and your debug callback just needs to return 0 (i.e. the > callback is basically just saying whether or not to continue the walk). > > Will
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f7828a7aad41..f47a0e64bb35 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, - unsigned long iova) +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, + int (*cb)(void *cb_data, void *pte, int level), + void *cb_data) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte pte, *ptep = data->pgd; int lvl = data->start_level; + int ret; do { /* Valid IOPTE pointer? */ if (!ptep) - return 0; + return -EFAULT; /* Grab the IOPTE we're interested in */ ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, /* Valid entry? */ if (!pte) - return 0; + return -EFAULT; + + ret = cb(cb_data, &pte, lvl); + if (ret) + return ret; - /* Leaf entry? */ + /* Leaf entry? If so, we've found the translation */ if (iopte_leaf(pte, lvl, data->iop.fmt)) - goto found_translation; + return 0; /* Take it to the next level */ ptep = iopte_deref(pte, data); } while (++lvl < ARM_LPAE_MAX_LEVELS); /* Ran out of page tables to walk */ + return -EFAULT; +} + +struct iova_to_phys_walk_data { + arm_lpae_iopte pte; + int level; +}; + +static int iova_to_phys_walk_cb(void *cb_data, void *pte, int level) +{ + struct iova_to_phys_walk_data *d = cb_data; + + d->pte = *(arm_lpae_iopte *)pte; + d->level = level; + return 0; +} + +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct iova_to_phys_walk_data d; + int ret; + + ret = arm_lpae_pgtable_walk(ops, iova, iova_to_phys_walk_cb, &d); + if (ret) + return 0; -found_translation: - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); - return iopte_to_paddr(pte, data) | iova; + iova &= (ARM_LPAE_BLOCK_SIZE(d.level, data) - 1); + return iopte_to_paddr(d.pte, data) | iova; } static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) @@ -807,6 +839,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .pgtable_walk = arm_lpae_pgtable_walk, }; return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86cf1f7ae389..261b48af068a 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -177,6 +177,7 @@ struct io_pgtable_cfg { * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. + * @pgtable_walk: (optional) Perform a page table walk for a given iova. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -190,6 +191,9 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, + int (*cb)(void *cb_data, void *pte, int level), + void *cb_data); int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags,