diff mbox series

[v9,2/5] iommu: Implement of_iommu_get_resv_regions()

Message ID 20220923123557.866972-3-thierry.reding@gmail.com
State New
Headers show
Series iommu: Support mappings/reservations in reserved-memory regions | expand

Commit Message

Thierry Reding Sept. 23, 2022, 12:35 p.m. UTC
From: Thierry Reding <treding@nvidia.com>

This is an implementation that IOMMU drivers can use to obtain reserved
memory regions from a device tree node. It uses the reserved-memory DT
bindings to find the regions associated with a given device. If these
regions are marked accordingly, identity mappings will be created for
them in the IOMMU domain that the devices will be attached to.

Cc: Frank Rowand <frowand.list@gmail.com>
Cc: devicetree@vger.kernel.org
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Changes in v9:
- address review comments by Robin Murphy:
  - warn about non-direct mappings since they are not supported yet
  - cleanup code to require less indentation
  - narrow scope of variables

Changes in v8:
- cleanup set-but-unused variables

Changes in v6:
- remove reference to now unused dt-bindings/reserved-memory.h include

Changes in v5:
- update for new "iommu-addresses" device tree bindings

Changes in v4:
- fix build failure on !CONFIG_OF_ADDRESS

Changes in v3:
- change "active" property to identity mapping flag that is part of the
  memory region specifier (as defined by #memory-region-cells) to allow
  per-reference flags to be used

Changes in v2:
- use "active" property to determine whether direct mappings are needed

 drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++
 include/linux/of_iommu.h |   8 +++
 2 files changed, 112 insertions(+)

Comments

Robin Murphy Oct. 7, 2022, 1:47 p.m. UTC | #1
On 2022-09-23 13:35, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> This is an implementation that IOMMU drivers can use to obtain reserved
> memory regions from a device tree node. It uses the reserved-memory DT
> bindings to find the regions associated with a given device. If these
> regions are marked accordingly, identity mappings will be created for
> them in the IOMMU domain that the devices will be attached to.
> 
> Cc: Frank Rowand <frowand.list@gmail.com>
> Cc: devicetree@vger.kernel.org
> Reviewed-by: Rob Herring <robh@kernel.org>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
> Changes in v9:
> - address review comments by Robin Murphy:
>    - warn about non-direct mappings since they are not supported yet
>    - cleanup code to require less indentation
>    - narrow scope of variables
> 
> Changes in v8:
> - cleanup set-but-unused variables
> 
> Changes in v6:
> - remove reference to now unused dt-bindings/reserved-memory.h include
> 
> Changes in v5:
> - update for new "iommu-addresses" device tree bindings
> 
> Changes in v4:
> - fix build failure on !CONFIG_OF_ADDRESS
> 
> Changes in v3:
> - change "active" property to identity mapping flag that is part of the
>    memory region specifier (as defined by #memory-region-cells) to allow
>    per-reference flags to be used
> 
> Changes in v2:
> - use "active" property to determine whether direct mappings are needed
> 
>   drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++
>   include/linux/of_iommu.h |   8 +++
>   2 files changed, 112 insertions(+)
> 
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 5696314ae69e..0bf2b08bca0a 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -11,6 +11,7 @@
>   #include <linux/module.h>
>   #include <linux/msi.h>
>   #include <linux/of.h>
> +#include <linux/of_address.h>
>   #include <linux/of_iommu.h>
>   #include <linux/of_pci.h>
>   #include <linux/pci.h>
> @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
>   
>   	return ops;
>   }
> +
> +static inline bool check_direct_mapping(struct device *dev, struct resource *phys,

Where "phys" is the virtual address, right? :(

> +					phys_addr_t start, phys_addr_t end)
> +{
> +	if (start != phys->start || end != phys->end) {
> +		dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n",
> +			 &phys, &start, &end);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/**
> + * of_iommu_get_resv_regions - reserved region driver helper for device tree
> + * @dev: device for which to get reserved regions
> + * @list: reserved region list
> + *
> + * IOMMU drivers can use this to implement their .get_resv_regions() callback
> + * for memory regions attached to a device tree node. See the reserved-memory
> + * device tree bindings on how to use these:
> + *
> + *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> + */
> +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
> +{
> +#if IS_ENABLED(CONFIG_OF_ADDRESS)
> +	struct of_phandle_iterator it;
> +	int err;
> +
> +	of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
> +		const __be32 *maps, *end;
> +		struct resource res;
> +		int size;
> +
> +		memset(&res, 0, sizeof(res));
> +
> +		/*
> +		 * The "reg" property is optional and can be omitted by reserved-memory regions
> +		 * that represent reservations in the IOVA space, which are regions that should
> +		 * not be mapped.
> +		 */
> +		if (of_find_property(it.node, "reg", NULL)) {
> +			err = of_address_to_resource(it.node, 0, &res);
> +			if (err < 0) {
> +				dev_err(dev, "failed to parse memory region %pOF: %d\n",
> +					it.node, err);
> +				continue;
> +			}
> +		}
> +
> +		maps = of_get_property(it.node, "iommu-addresses", &size);
> +		if (!maps)
> +			continue;
> +
> +		end = maps + size / sizeof(__be32);
> +
> +		while (maps < end) {
> +			struct device_node *np;
> +			u32 phandle;
> +			int na, ns;
> +
> +			phandle = be32_to_cpup(maps++);
> +			np = of_find_node_by_phandle(phandle);
> +			na = of_n_addr_cells(np);
> +			ns = of_n_size_cells(np);
> +
> +			if (np == dev->of_node) {
> +				int prot = IOMMU_READ | IOMMU_WRITE;
> +				struct iommu_resv_region *region;
> +				enum iommu_resv_type type;
> +				phys_addr_t start;
> +				size_t length;
> +
> +				start = of_translate_dma_address(np, maps);
> +				length = of_read_number(maps + na, ns);
> +
> +				/*
> +				 * IOMMU regions without an associated physical region cannot be
> +				 * mapped and are simply reservations.
> +				 */
> +				if (res.end > res.start) {
> +					phys_addr_t end = start + length - 1;
> +
> +					if (check_direct_mapping(dev, &res, start, end))
> +						type = IOMMU_RESV_DIRECT_RELAXABLE;

Again I really don't think we should assume relaxable by default.

Looking at the shape of things now, it seems like 
check_direct_mappings() wants to subsume the check on res as well and 
grow in to a more general function for determining the iommu_resv_type. 
Then we've got a clear place to start special-casing things like 
simple-framebuffer that we do know a bit more about.

Thanks,
Robin.

> +					else
> +						type = IOMMU_RESV_RESERVED;
> +				} else {
> +					type = IOMMU_RESV_RESERVED;
> +				}
> +
> +				region = iommu_alloc_resv_region(start, length, prot, type);
> +				if (region)
> +					list_add_tail(&region->list, list);
> +			}
> +
> +			maps += na + ns;
> +		}
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL(of_iommu_get_resv_regions);
> diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
> index 55c1eb300a86..9a5e6b410dd2 100644
> --- a/include/linux/of_iommu.h
> +++ b/include/linux/of_iommu.h
> @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev,
>   					struct device_node *master_np,
>   					const u32 *id);
>   
> +extern void of_iommu_get_resv_regions(struct device *dev,
> +				      struct list_head *list);
> +
>   #else
>   
>   static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
> @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
>   	return NULL;
>   }
>   
> +static inline void of_iommu_get_resv_regions(struct device *dev,
> +					     struct list_head *list)
> +{
> +}
> +
>   #endif	/* CONFIG_OF_IOMMU */
>   
>   #endif /* __OF_IOMMU_H */
Thierry Reding Oct. 7, 2022, 3:28 p.m. UTC | #2
On Fri, Oct 07, 2022 at 02:47:23PM +0100, Robin Murphy wrote:
> On 2022-09-23 13:35, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> > 
> > This is an implementation that IOMMU drivers can use to obtain reserved
> > memory regions from a device tree node. It uses the reserved-memory DT
> > bindings to find the regions associated with a given device. If these
> > regions are marked accordingly, identity mappings will be created for
> > them in the IOMMU domain that the devices will be attached to.
> > 
> > Cc: Frank Rowand <frowand.list@gmail.com>
> > Cc: devicetree@vger.kernel.org
> > Reviewed-by: Rob Herring <robh@kernel.org>
> > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > ---
> > Changes in v9:
> > - address review comments by Robin Murphy:
> >    - warn about non-direct mappings since they are not supported yet
> >    - cleanup code to require less indentation
> >    - narrow scope of variables
> > 
> > Changes in v8:
> > - cleanup set-but-unused variables
> > 
> > Changes in v6:
> > - remove reference to now unused dt-bindings/reserved-memory.h include
> > 
> > Changes in v5:
> > - update for new "iommu-addresses" device tree bindings
> > 
> > Changes in v4:
> > - fix build failure on !CONFIG_OF_ADDRESS
> > 
> > Changes in v3:
> > - change "active" property to identity mapping flag that is part of the
> >    memory region specifier (as defined by #memory-region-cells) to allow
> >    per-reference flags to be used
> > 
> > Changes in v2:
> > - use "active" property to determine whether direct mappings are needed
> > 
> >   drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++
> >   include/linux/of_iommu.h |   8 +++
> >   2 files changed, 112 insertions(+)
> > 
> > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> > index 5696314ae69e..0bf2b08bca0a 100644
> > --- a/drivers/iommu/of_iommu.c
> > +++ b/drivers/iommu/of_iommu.c
> > @@ -11,6 +11,7 @@
> >   #include <linux/module.h>
> >   #include <linux/msi.h>
> >   #include <linux/of.h>
> > +#include <linux/of_address.h>
> >   #include <linux/of_iommu.h>
> >   #include <linux/of_pci.h>
> >   #include <linux/pci.h>
> > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
> >   	return ops;
> >   }
> > +
> > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys,
> 
> Where "phys" is the virtual address, right? :(

No, phys is actually res passed in from of_iommu_get_resv_regions()
where it is the address read from the "reg" property. So that's the
physical address of the reserved region. Perhaps it'd be useful to
rename "res" to "phys" in that function to be a little more consistent.
It's actually the "start" and "end" values that are passed into this
function that refer to the I/O virtual addresses from iommu-addresses.

> 
> > +					phys_addr_t start, phys_addr_t end)
> > +{
> > +	if (start != phys->start || end != phys->end) {
> > +		dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n",
> > +			 &phys, &start, &end);
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> > +
> > +/**
> > + * of_iommu_get_resv_regions - reserved region driver helper for device tree
> > + * @dev: device for which to get reserved regions
> > + * @list: reserved region list
> > + *
> > + * IOMMU drivers can use this to implement their .get_resv_regions() callback
> > + * for memory regions attached to a device tree node. See the reserved-memory
> > + * device tree bindings on how to use these:
> > + *
> > + *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > + */
> > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
> > +{
> > +#if IS_ENABLED(CONFIG_OF_ADDRESS)
> > +	struct of_phandle_iterator it;
> > +	int err;
> > +
> > +	of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
> > +		const __be32 *maps, *end;
> > +		struct resource res;
> > +		int size;
> > +
> > +		memset(&res, 0, sizeof(res));
> > +
> > +		/*
> > +		 * The "reg" property is optional and can be omitted by reserved-memory regions
> > +		 * that represent reservations in the IOVA space, which are regions that should
> > +		 * not be mapped.
> > +		 */
> > +		if (of_find_property(it.node, "reg", NULL)) {
> > +			err = of_address_to_resource(it.node, 0, &res);
> > +			if (err < 0) {
> > +				dev_err(dev, "failed to parse memory region %pOF: %d\n",
> > +					it.node, err);
> > +				continue;
> > +			}
> > +		}
> > +
> > +		maps = of_get_property(it.node, "iommu-addresses", &size);
> > +		if (!maps)
> > +			continue;
> > +
> > +		end = maps + size / sizeof(__be32);
> > +
> > +		while (maps < end) {
> > +			struct device_node *np;
> > +			u32 phandle;
> > +			int na, ns;
> > +
> > +			phandle = be32_to_cpup(maps++);
> > +			np = of_find_node_by_phandle(phandle);
> > +			na = of_n_addr_cells(np);
> > +			ns = of_n_size_cells(np);
> > +
> > +			if (np == dev->of_node) {
> > +				int prot = IOMMU_READ | IOMMU_WRITE;
> > +				struct iommu_resv_region *region;
> > +				enum iommu_resv_type type;
> > +				phys_addr_t start;
> > +				size_t length;
> > +
> > +				start = of_translate_dma_address(np, maps);
> > +				length = of_read_number(maps + na, ns);
> > +
> > +				/*
> > +				 * IOMMU regions without an associated physical region cannot be
> > +				 * mapped and are simply reservations.
> > +				 */
> > +				if (res.end > res.start) {
> > +					phys_addr_t end = start + length - 1;
> > +
> > +					if (check_direct_mapping(dev, &res, start, end))
> > +						type = IOMMU_RESV_DIRECT_RELAXABLE;
> 
> Again I really don't think we should assume relaxable by default.
> 
> Looking at the shape of things now, it seems like check_direct_mappings()
> wants to subsume the check on res as well and grow in to a more general
> function for determining the iommu_resv_type. Then we've got a clear place
> to start special-casing things like simple-framebuffer that we do know a bit
> more about.

Okay, I think I know where you're going with this. Let me see what I can
come up with.

Thierry

> 
> Thanks,
> Robin.
> 
> > +					else
> > +						type = IOMMU_RESV_RESERVED;
> > +				} else {
> > +					type = IOMMU_RESV_RESERVED;
> > +				}
> > +
> > +				region = iommu_alloc_resv_region(start, length, prot, type);
> > +				if (region)
> > +					list_add_tail(&region->list, list);
> > +			}
> > +
> > +			maps += na + ns;
> > +		}
> > +	}
> > +#endif
> > +}
> > +EXPORT_SYMBOL(of_iommu_get_resv_regions);
> > diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
> > index 55c1eb300a86..9a5e6b410dd2 100644
> > --- a/include/linux/of_iommu.h
> > +++ b/include/linux/of_iommu.h
> > @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev,
> >   					struct device_node *master_np,
> >   					const u32 *id);
> > +extern void of_iommu_get_resv_regions(struct device *dev,
> > +				      struct list_head *list);
> > +
> >   #else
> >   static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
> > @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
> >   	return NULL;
> >   }
> > +static inline void of_iommu_get_resv_regions(struct device *dev,
> > +					     struct list_head *list)
> > +{
> > +}
> > +
> >   #endif	/* CONFIG_OF_IOMMU */
> >   #endif /* __OF_IOMMU_H */
Robin Murphy Oct. 7, 2022, 4:35 p.m. UTC | #3
On 2022-10-07 16:28, Thierry Reding wrote:
[...]
>>> @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
>>>    	return ops;
>>>    }
>>> +
>>> +static inline bool check_direct_mapping(struct device *dev, struct resource *phys,
>>
>> Where "phys" is the virtual address, right? :(
> 
> No, phys is actually res passed in from of_iommu_get_resv_regions()
> where it is the address read from the "reg" property. So that's the
> physical address of the reserved region. Perhaps it'd be useful to
> rename "res" to "phys" in that function to be a little more consistent.
> It's actually the "start" and "end" values that are passed into this
> function that refer to the I/O virtual addresses from iommu-addresses.

Oh, so it's the phys_addr_t's that aren't physical addresses - well, it 
had to be wrong one way or the other :)

I agree that s/res/phys/ in the main function, and maybe s/start/iova/ 
too, would be helpful.

Thanks,
Robin.
Thierry Reding Oct. 19, 2022, 6:03 p.m. UTC | #4
On Fri, Sep 23, 2022 at 02:35:54PM +0200, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> This is an implementation that IOMMU drivers can use to obtain reserved
> memory regions from a device tree node. It uses the reserved-memory DT
> bindings to find the regions associated with a given device. If these
> regions are marked accordingly, identity mappings will be created for
> them in the IOMMU domain that the devices will be attached to.
> 
> Cc: Frank Rowand <frowand.list@gmail.com>
> Cc: devicetree@vger.kernel.org
> Reviewed-by: Rob Herring <robh@kernel.org>
> Signed-off-by: Thierry Reding <treding@nvidia.com>
> ---
> Changes in v9:
> - address review comments by Robin Murphy:
>   - warn about non-direct mappings since they are not supported yet
>   - cleanup code to require less indentation
>   - narrow scope of variables
> 
> Changes in v8:
> - cleanup set-but-unused variables
> 
> Changes in v6:
> - remove reference to now unused dt-bindings/reserved-memory.h include
> 
> Changes in v5:
> - update for new "iommu-addresses" device tree bindings
> 
> Changes in v4:
> - fix build failure on !CONFIG_OF_ADDRESS
> 
> Changes in v3:
> - change "active" property to identity mapping flag that is part of the
>   memory region specifier (as defined by #memory-region-cells) to allow
>   per-reference flags to be used
> 
> Changes in v2:
> - use "active" property to determine whether direct mappings are needed
> 
>  drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++
>  include/linux/of_iommu.h |   8 +++
>  2 files changed, 112 insertions(+)
> 
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 5696314ae69e..0bf2b08bca0a 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -11,6 +11,7 @@
>  #include <linux/module.h>
>  #include <linux/msi.h>
>  #include <linux/of.h>
> +#include <linux/of_address.h>
>  #include <linux/of_iommu.h>
>  #include <linux/of_pci.h>
>  #include <linux/pci.h>
> @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
>  
>  	return ops;
>  }
> +
> +static inline bool check_direct_mapping(struct device *dev, struct resource *phys,
> +					phys_addr_t start, phys_addr_t end)
> +{
> +	if (start != phys->start || end != phys->end) {
> +		dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n",
> +			 &phys, &start, &end);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/**
> + * of_iommu_get_resv_regions - reserved region driver helper for device tree
> + * @dev: device for which to get reserved regions
> + * @list: reserved region list
> + *
> + * IOMMU drivers can use this to implement their .get_resv_regions() callback
> + * for memory regions attached to a device tree node. See the reserved-memory
> + * device tree bindings on how to use these:
> + *
> + *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> + */
> +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
> +{
> +#if IS_ENABLED(CONFIG_OF_ADDRESS)
> +	struct of_phandle_iterator it;
> +	int err;
> +
> +	of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
> +		const __be32 *maps, *end;
> +		struct resource res;
> +		int size;
> +
> +		memset(&res, 0, sizeof(res));
> +
> +		/*
> +		 * The "reg" property is optional and can be omitted by reserved-memory regions
> +		 * that represent reservations in the IOVA space, which are regions that should
> +		 * not be mapped.
> +		 */
> +		if (of_find_property(it.node, "reg", NULL)) {
> +			err = of_address_to_resource(it.node, 0, &res);
> +			if (err < 0) {
> +				dev_err(dev, "failed to parse memory region %pOF: %d\n",
> +					it.node, err);
> +				continue;
> +			}
> +		}
> +
> +		maps = of_get_property(it.node, "iommu-addresses", &size);
> +		if (!maps)
> +			continue;
> +
> +		end = maps + size / sizeof(__be32);
> +
> +		while (maps < end) {
> +			struct device_node *np;
> +			u32 phandle;
> +			int na, ns;
> +
> +			phandle = be32_to_cpup(maps++);
> +			np = of_find_node_by_phandle(phandle);
> +			na = of_n_addr_cells(np);
> +			ns = of_n_size_cells(np);
> +
> +			if (np == dev->of_node) {
> +				int prot = IOMMU_READ | IOMMU_WRITE;
> +				struct iommu_resv_region *region;
> +				enum iommu_resv_type type;
> +				phys_addr_t start;
> +				size_t length;
> +
> +				start = of_translate_dma_address(np, maps);

I just came across an issue when extending the testing from simple-
framebuffer to the full display engine, with the main difference being
that the fill display engine is hooked up both to the IOMMU and to the
memory controller via the interconnects property ("dma-mem").

The latter seems to throw off the of_translate_dma_address() because we
have a top-level bus@0 node that sets #address-cells = <1> and #size-
cells = <1>, which is sufficient to represent the "reg" entries for the
devices. However, for the reserved-memory node needs #address-cells =
<2> and #size-cells = <2> to make sure we can describe memory regions
above the 4 GiB boundary (and potentially larger than 4 GiB, too).

What happens now is that of_translate_dma_address() will find the DMA
parent for the display engine, which is the memory controller, which
also has #address-cells = <2> and #size-cells = <2> for the same reason
as the reserved-memory node. In other words, what this tries to model is
that for DMA accesses, we span more than the 4 GiB range that is
sufficient to address registers for IP blocks.

However, of_translate_dma_address() then ends up getting #address-cells
and #size-cells from the *parent* of the DMA parent. And then everything
falls apart during translation.

Any idea if I'm doing something wrong? Or is the code wrong and it's not
actually using the right cell counts? Should it be using the cell counts
from the DMA parent rather than its parent bus?

Thierry
Thierry Reding Oct. 20, 2022, 2:34 p.m. UTC | #5
On Wed, Oct 19, 2022 at 08:03:31PM +0200, Thierry Reding wrote:
> On Fri, Sep 23, 2022 at 02:35:54PM +0200, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> > 
> > This is an implementation that IOMMU drivers can use to obtain reserved
> > memory regions from a device tree node. It uses the reserved-memory DT
> > bindings to find the regions associated with a given device. If these
> > regions are marked accordingly, identity mappings will be created for
> > them in the IOMMU domain that the devices will be attached to.
> > 
> > Cc: Frank Rowand <frowand.list@gmail.com>
> > Cc: devicetree@vger.kernel.org
> > Reviewed-by: Rob Herring <robh@kernel.org>
> > Signed-off-by: Thierry Reding <treding@nvidia.com>
> > ---
> > Changes in v9:
> > - address review comments by Robin Murphy:
> >   - warn about non-direct mappings since they are not supported yet
> >   - cleanup code to require less indentation
> >   - narrow scope of variables
> > 
> > Changes in v8:
> > - cleanup set-but-unused variables
> > 
> > Changes in v6:
> > - remove reference to now unused dt-bindings/reserved-memory.h include
> > 
> > Changes in v5:
> > - update for new "iommu-addresses" device tree bindings
> > 
> > Changes in v4:
> > - fix build failure on !CONFIG_OF_ADDRESS
> > 
> > Changes in v3:
> > - change "active" property to identity mapping flag that is part of the
> >   memory region specifier (as defined by #memory-region-cells) to allow
> >   per-reference flags to be used
> > 
> > Changes in v2:
> > - use "active" property to determine whether direct mappings are needed
> > 
> >  drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++
> >  include/linux/of_iommu.h |   8 +++
> >  2 files changed, 112 insertions(+)
> > 
> > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> > index 5696314ae69e..0bf2b08bca0a 100644
> > --- a/drivers/iommu/of_iommu.c
> > +++ b/drivers/iommu/of_iommu.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/module.h>
> >  #include <linux/msi.h>
> >  #include <linux/of.h>
> > +#include <linux/of_address.h>
> >  #include <linux/of_iommu.h>
> >  #include <linux/of_pci.h>
> >  #include <linux/pci.h>
> > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
> >  
> >  	return ops;
> >  }
> > +
> > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys,
> > +					phys_addr_t start, phys_addr_t end)
> > +{
> > +	if (start != phys->start || end != phys->end) {
> > +		dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n",
> > +			 &phys, &start, &end);
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> > +
> > +/**
> > + * of_iommu_get_resv_regions - reserved region driver helper for device tree
> > + * @dev: device for which to get reserved regions
> > + * @list: reserved region list
> > + *
> > + * IOMMU drivers can use this to implement their .get_resv_regions() callback
> > + * for memory regions attached to a device tree node. See the reserved-memory
> > + * device tree bindings on how to use these:
> > + *
> > + *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > + */
> > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
> > +{
> > +#if IS_ENABLED(CONFIG_OF_ADDRESS)
> > +	struct of_phandle_iterator it;
> > +	int err;
> > +
> > +	of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
> > +		const __be32 *maps, *end;
> > +		struct resource res;
> > +		int size;
> > +
> > +		memset(&res, 0, sizeof(res));
> > +
> > +		/*
> > +		 * The "reg" property is optional and can be omitted by reserved-memory regions
> > +		 * that represent reservations in the IOVA space, which are regions that should
> > +		 * not be mapped.
> > +		 */
> > +		if (of_find_property(it.node, "reg", NULL)) {
> > +			err = of_address_to_resource(it.node, 0, &res);
> > +			if (err < 0) {
> > +				dev_err(dev, "failed to parse memory region %pOF: %d\n",
> > +					it.node, err);
> > +				continue;
> > +			}
> > +		}
> > +
> > +		maps = of_get_property(it.node, "iommu-addresses", &size);
> > +		if (!maps)
> > +			continue;
> > +
> > +		end = maps + size / sizeof(__be32);
> > +
> > +		while (maps < end) {
> > +			struct device_node *np;
> > +			u32 phandle;
> > +			int na, ns;
> > +
> > +			phandle = be32_to_cpup(maps++);
> > +			np = of_find_node_by_phandle(phandle);
> > +			na = of_n_addr_cells(np);
> > +			ns = of_n_size_cells(np);
> > +
> > +			if (np == dev->of_node) {
> > +				int prot = IOMMU_READ | IOMMU_WRITE;
> > +				struct iommu_resv_region *region;
> > +				enum iommu_resv_type type;
> > +				phys_addr_t start;
> > +				size_t length;
> > +
> > +				start = of_translate_dma_address(np, maps);
> 
> I just came across an issue when extending the testing from simple-
> framebuffer to the full display engine, with the main difference being
> that the fill display engine is hooked up both to the IOMMU and to the
> memory controller via the interconnects property ("dma-mem").
> 
> The latter seems to throw off the of_translate_dma_address() because we
> have a top-level bus@0 node that sets #address-cells = <1> and #size-
> cells = <1>, which is sufficient to represent the "reg" entries for the
> devices. However, for the reserved-memory node needs #address-cells =
> <2> and #size-cells = <2> to make sure we can describe memory regions
> above the 4 GiB boundary (and potentially larger than 4 GiB, too).
> 
> What happens now is that of_translate_dma_address() will find the DMA
> parent for the display engine, which is the memory controller, which
> also has #address-cells = <2> and #size-cells = <2> for the same reason
> as the reserved-memory node. In other words, what this tries to model is
> that for DMA accesses, we span more than the 4 GiB range that is
> sufficient to address registers for IP blocks.
> 
> However, of_translate_dma_address() then ends up getting #address-cells
> and #size-cells from the *parent* of the DMA parent. And then everything
> falls apart during translation.
> 
> Any idea if I'm doing something wrong? Or is the code wrong and it's not
> actually using the right cell counts? Should it be using the cell counts
> from the DMA parent rather than its parent bus?

I came up with the attached patch. This works for my case, but will
abort the DMA parent traversal early on some devices. I'm not sure how
much this would matter in practice.

A safer way would be to create a new variant of __of_get_dma_parent()
that doesn't have the of_get_parent() fallback. That's assuming that we
agree on the concept of having potentially different cell counts, and
effectively DMA busses that are separate from the traditional control
busses in DT.

Do we also need separate DMA cell counts so that one node can be a DMA
bus and a control bus at the same time? Or is this overcomplicating
things and a simpler approach would be to propagate the cell counts all
the way to the top level? I think this all might work with the existing
code if I make bus@0's cell count 2 & 2 for Tegra SoC DTSI files. It's a
lot of churn and seems more like a workaround rather than a correct
model of the busses.

Thierry
From 7f63e7c86fa43f6c7d9254323606daeeb442cf48 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 20 Oct 2022 15:21:10 +0200
Subject: [PATCH] of: Stop DMA translation at last DMA parent

DMA parent devices can define separate DMA busses via the "dma-ranges"
and "#address-cells" and "#size-cells" properties. If the DMA bus has
different cell counts than its parent, this can cause the translation
of DMA address to fails (e.g. truncation from 2 to 1 address cells).

Avoid this by stopping to search for DMA parents when a parent without
a "dma-ranges" property is encountered. Also, since it is the DMA parent
that defines the DMA bus, use the bus' cell counts instead of its parent
cell counts.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/of/address.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 14f137a21b0c..e2f45bdbc41a 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -475,6 +475,7 @@ static u64 __of_translate_address(struct device_node *dev,
 				  const __be32 *in_addr, const char *rprop,
 				  struct device_node **host)
 {
+	bool dma = rprop && !strcmp(rprop, "dma-ranges");
 	struct device_node *parent = NULL;
 	struct of_bus *bus, *pbus;
 	__be32 addr[OF_MAX_ADDR_CELLS];
@@ -494,7 +495,12 @@ static u64 __of_translate_address(struct device_node *dev,
 	bus = of_match_bus(parent);
 
 	/* Count address cells & copy address locally */
-	bus->count_cells(dev, &na, &ns);
+	if (dma) {
+		na = of_bus_n_addr_cells(parent);
+		ns = of_bus_n_size_cells(parent);
+	} else {
+		bus->count_cells(dev, &na, &ns);
+	}
 	if (!OF_CHECK_COUNTS(na, ns)) {
 		pr_debug("Bad cell count for %pOF\n", dev);
 		goto bail;
@@ -515,7 +521,7 @@ static u64 __of_translate_address(struct device_node *dev,
 		parent = get_parent(dev);
 
 		/* If root, we have finished */
-		if (parent == NULL) {
+		if (parent == NULL || (dma && !of_get_property(parent, "dma-ranges", NULL))) {
 			pr_debug("reached root node\n");
 			result = of_read_number(addr, na);
 			break;
@@ -536,7 +542,12 @@ static u64 __of_translate_address(struct device_node *dev,
 
 		/* Get new parent bus and counts */
 		pbus = of_match_bus(parent);
-		pbus->count_cells(dev, &pna, &pns);
+		if (dma) {
+			pna = of_bus_n_addr_cells(parent);
+			pns = of_bus_n_size_cells(parent);
+		} else {
+			pbus->count_cells(dev, &pna, &pns);
+		}
 		if (!OF_CHECK_COUNTS(pna, pns)) {
 			pr_err("Bad cell count for %pOF\n", dev);
 			break;
diff mbox series

Patch

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5696314ae69e..0bf2b08bca0a 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -11,6 +11,7 @@ 
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_iommu.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
@@ -172,3 +173,106 @@  const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 	return ops;
 }
+
+static inline bool check_direct_mapping(struct device *dev, struct resource *phys,
+					phys_addr_t start, phys_addr_t end)
+{
+	if (start != phys->start || end != phys->end) {
+		dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n",
+			 &phys, &start, &end);
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * of_iommu_get_resv_regions - reserved region driver helper for device tree
+ * @dev: device for which to get reserved regions
+ * @list: reserved region list
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions() callback
+ * for memory regions attached to a device tree node. See the reserved-memory
+ * device tree bindings on how to use these:
+ *
+ *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
+{
+#if IS_ENABLED(CONFIG_OF_ADDRESS)
+	struct of_phandle_iterator it;
+	int err;
+
+	of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
+		const __be32 *maps, *end;
+		struct resource res;
+		int size;
+
+		memset(&res, 0, sizeof(res));
+
+		/*
+		 * The "reg" property is optional and can be omitted by reserved-memory regions
+		 * that represent reservations in the IOVA space, which are regions that should
+		 * not be mapped.
+		 */
+		if (of_find_property(it.node, "reg", NULL)) {
+			err = of_address_to_resource(it.node, 0, &res);
+			if (err < 0) {
+				dev_err(dev, "failed to parse memory region %pOF: %d\n",
+					it.node, err);
+				continue;
+			}
+		}
+
+		maps = of_get_property(it.node, "iommu-addresses", &size);
+		if (!maps)
+			continue;
+
+		end = maps + size / sizeof(__be32);
+
+		while (maps < end) {
+			struct device_node *np;
+			u32 phandle;
+			int na, ns;
+
+			phandle = be32_to_cpup(maps++);
+			np = of_find_node_by_phandle(phandle);
+			na = of_n_addr_cells(np);
+			ns = of_n_size_cells(np);
+
+			if (np == dev->of_node) {
+				int prot = IOMMU_READ | IOMMU_WRITE;
+				struct iommu_resv_region *region;
+				enum iommu_resv_type type;
+				phys_addr_t start;
+				size_t length;
+
+				start = of_translate_dma_address(np, maps);
+				length = of_read_number(maps + na, ns);
+
+				/*
+				 * IOMMU regions without an associated physical region cannot be
+				 * mapped and are simply reservations.
+				 */
+				if (res.end > res.start) {
+					phys_addr_t end = start + length - 1;
+
+					if (check_direct_mapping(dev, &res, start, end))
+						type = IOMMU_RESV_DIRECT_RELAXABLE;
+					else
+						type = IOMMU_RESV_RESERVED;
+				} else {
+					type = IOMMU_RESV_RESERVED;
+				}
+
+				region = iommu_alloc_resv_region(start, length, prot, type);
+				if (region)
+					list_add_tail(&region->list, list);
+			}
+
+			maps += na + ns;
+		}
+	}
+#endif
+}
+EXPORT_SYMBOL(of_iommu_get_resv_regions);
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 55c1eb300a86..9a5e6b410dd2 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -12,6 +12,9 @@  extern const struct iommu_ops *of_iommu_configure(struct device *dev,
 					struct device_node *master_np,
 					const u32 *id);
 
+extern void of_iommu_get_resv_regions(struct device *dev,
+				      struct list_head *list);
+
 #else
 
 static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
@@ -21,6 +24,11 @@  static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 	return NULL;
 }
 
+static inline void of_iommu_get_resv_regions(struct device *dev,
+					     struct list_head *list)
+{
+}
+
 #endif	/* CONFIG_OF_IOMMU */
 
 #endif /* __OF_IOMMU_H */