diff mbox

[v7,2/6] pci: OF: Fix the conversion of IO ranges into IO resources.

Message ID 20140314171921.GY6457@e106497-lin.cambridge.arm.com
State New
Headers show

Commit Message

Liviu Dudau March 14, 2014, 5:19 p.m. UTC
On Fri, Mar 14, 2014 at 05:05:28PM +0000, Arnd Bergmann wrote:
> On Friday 14 March 2014, Liviu Dudau wrote:
> > +int of_pci_range_to_resource(struct of_pci_range *range,
> > +       struct device_node *np, struct resource *res)
> > +{
> > +       res->flags = range->flags;
> > +       if (res->flags & IORESOURCE_IO) {
> > +               unsigned long port = -1;
> > +               int err = pci_register_io_range(range->cpu_addr, range->size);
> > +               if (err)
> > +                       return err;
> > +               port = pci_address_to_pio(range->cpu_addr);
> > +               if (port == (unsigned long)-1) {
> > +                       res->start = (resource_size_t)OF_BAD_ADDR;
> > +                       res->end = (resource_size_t)OF_BAD_ADDR;
> > +                       return -EINVAL;
> > +               }
> 
> The error handling is inconsistent here: in one case you set the resource
> to OF_BAD_ADDR, in the other one you don't.
> 
> 	Arnd
> 

You are right, that was lazy of me. What about this version?

8<----------------------------------------------------

From acfd63b5c48b4a9066ab0b373633c5bb4feaadf5 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Fri, 28 Feb 2014 12:40:46 +0000
Subject: [PATCH v7 2/6] pci: OF: Fix the conversion of IO ranges into IO
 resources.

The ranges property for a host bridge controller in DT describes
the mapping between the PCI bus address and the CPU physical address.
The resources framework however expects that the IO resources start
at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
The conversion from pci ranges to resources failed to take that into account.

In the process move the function into drivers/of/address.c as it now
depends on pci_address_to_pio() code and make it return an error message.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Tested-by: Tanmay Inamdar <tinamdar@apm.com>
---
 drivers/of/address.c       | 45 ++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h | 13 ++---------
 2 files changed, 47 insertions(+), 11 deletions(-)

Comments

Arnd Bergmann March 14, 2014, 6:46 p.m. UTC | #1
On Friday 14 March 2014, Liviu Dudau wrote:
> You are right, that was lazy of me. What about this version?

Yes, that seems better. Thanks for fixing it up.

But back to the more important question that I realized we have
not resolved yet:

You now have two completely independent allocation functions for
logical I/O space numbers, and they will return different numbers
for any nontrivial scenario.

> +int of_pci_range_to_resource(struct of_pci_range *range,
> +       struct device_node *np, struct resource *res)
> +{
> +       res->flags = range->flags;
> +       res->parent = res->child = res->sibling = NULL;
> +       res->name = np->full_name;
> +
> +       if (res->flags & IORESOURCE_IO) {
> +               unsigned long port = -1;
> +               int err = pci_register_io_range(range->cpu_addr, range->size);
> +               if (err)
> +                       goto invalid_range;
> +               port = pci_address_to_pio(range->cpu_addr);
> +               if (port == (unsigned long)-1) {
> +                       err = -EINVAL;
> +                       goto invalid_range;
> +               }
> +               res->start = port;
> +       } else {

This one concatenates the I/O spaces and assumes that each space starts
at bus address zero, and takes little precaution to avoid filling up IO_SPACE_LIMIT
if the sizes are too big.

>+unsigned long pci_ioremap_io(const struct resource *res, phys_addr_t phys_addr)
>+{
>+       unsigned long start, len, virt_start;
>+       int err;
>+
>+       if (res->end > IO_SPACE_LIMIT)
>+               return -EINVAL;
>+
>+       /*
>+        * try finding free space for the whole size first,
>+        * fall back to 64K if not available
>+        */
>+       len = resource_size(res);
>+       start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
>+                               res->start / PAGE_SIZE, len / PAGE_SIZE, 0);
>+       if (start == IO_SPACE_PAGES && len > SZ_64K) {
>+               len = SZ_64K;
>+               start = 0;
>+               start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
>+                                       start, len / PAGE_SIZE, 0);
>+       }
>+
>+       /* no 64K area found */
>+       if (start == IO_SPACE_PAGES)
>+               return -ENOMEM;
>+
>+       /* ioremap physical aperture to virtual aperture */
>+       virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
>+       err = ioremap_page_range(virt_start, virt_start + len,
>+                               phys_addr, __pgprot(PROT_DEVICE_nGnRE));
>+       if (err)
>+               return err;
>+
>+       bitmap_set(pci_iospace, start, len / PAGE_SIZE);
>+
>+       /* return io_offset */
>+       return start * PAGE_SIZE - res->start;
>+}

While this one will try to fall back to smaller sizes, and will honor nonzero
bus addresses.

I think we shouldn't even try to do the same thing twice, but instead just
use a single allocator. I'd prefer the one I came up with, but I realize
that I am biased here ;-)

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau March 14, 2014, 7 p.m. UTC | #2
On Fri, Mar 14, 2014 at 06:46:23PM +0000, Arnd Bergmann wrote:
> On Friday 14 March 2014, Liviu Dudau wrote:
> > You are right, that was lazy of me. What about this version?
> 
> Yes, that seems better. Thanks for fixing it up.
> 
> But back to the more important question that I realized we have
> not resolved yet:
> 
> You now have two completely independent allocation functions for
> logical I/O space numbers, and they will return different numbers
> for any nontrivial scenario.
> 
> > +int of_pci_range_to_resource(struct of_pci_range *range,
> > +       struct device_node *np, struct resource *res)
> > +{
> > +       res->flags = range->flags;
> > +       res->parent = res->child = res->sibling = NULL;
> > +       res->name = np->full_name;
> > +
> > +       if (res->flags & IORESOURCE_IO) {
> > +               unsigned long port = -1;
> > +               int err = pci_register_io_range(range->cpu_addr, range->size);
> > +               if (err)
> > +                       goto invalid_range;
> > +               port = pci_address_to_pio(range->cpu_addr);
> > +               if (port == (unsigned long)-1) {
> > +                       err = -EINVAL;
> > +                       goto invalid_range;
> > +               }
> > +               res->start = port;
> > +       } else {
> 
> This one concatenates the I/O spaces and assumes that each space starts
> at bus address zero, and takes little precaution to avoid filling up IO_SPACE_LIMIT
> if the sizes are too big.

Actually, you are attaching too much meaning to this one. pci_register_io_range()
only tries to remember the ranges, nothing else. And it *does* check that the
total sum of registered ranges does not exceed IO_SPACE_LIMIT. This is used before
we have any resource mapped for that range (actually it is used to *create* the
resource for the range) so there is no other helping hand.

It doesn't assume that space starts at bus address zero, it ignores the bus address.
It only handles CPU addresses for the range, to help with generating logical IO ports.
If you have overlapping CPU addresses with different bus addresses it will not work,
but then I guess you will have different problems then.

> 
> >+unsigned long pci_ioremap_io(const struct resource *res, phys_addr_t phys_addr)
> >+{
> >+       unsigned long start, len, virt_start;
> >+       int err;
> >+
> >+       if (res->end > IO_SPACE_LIMIT)
> >+               return -EINVAL;
> >+
> >+       /*
> >+        * try finding free space for the whole size first,
> >+        * fall back to 64K if not available
> >+        */
> >+       len = resource_size(res);
> >+       start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> >+                               res->start / PAGE_SIZE, len / PAGE_SIZE, 0);
> >+       if (start == IO_SPACE_PAGES && len > SZ_64K) {
> >+               len = SZ_64K;
> >+               start = 0;
> >+               start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> >+                                       start, len / PAGE_SIZE, 0);
> >+       }
> >+
> >+       /* no 64K area found */
> >+       if (start == IO_SPACE_PAGES)
> >+               return -ENOMEM;
> >+
> >+       /* ioremap physical aperture to virtual aperture */
> >+       virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
> >+       err = ioremap_page_range(virt_start, virt_start + len,
> >+                               phys_addr, __pgprot(PROT_DEVICE_nGnRE));
> >+       if (err)
> >+               return err;
> >+
> >+       bitmap_set(pci_iospace, start, len / PAGE_SIZE);
> >+
> >+       /* return io_offset */
> >+       return start * PAGE_SIZE - res->start;
> >+}
> 
> While this one will try to fall back to smaller sizes, and will honor nonzero
> bus addresses.

Yes, because this one does the actual allocation. And it needs to know how
the mapping from CPU to bus works.


> 
> I think we shouldn't even try to do the same thing twice, but instead just
> use a single allocator. I'd prefer the one I came up with, but I realize
> that I am biased here ;-)

I agree, but I think the two functions serve two different and distinct roles.

Best regards,
Liviu


> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Arnd Bergmann March 14, 2014, 7:16 p.m. UTC | #3
On Friday 14 March 2014, Liviu Dudau wrote:
> On Fri, Mar 14, 2014 at 06:46:23PM +0000, Arnd Bergmann wrote:
> > On Friday 14 March 2014, Liviu Dudau wrote:
> > 
> > > +int of_pci_range_to_resource(struct of_pci_range *range,
> > > +       struct device_node *np, struct resource *res)
> > > +{
> > > +       res->flags = range->flags;
> > > +       res->parent = res->child = res->sibling = NULL;
> > > +       res->name = np->full_name;
> > > +
> > > +       if (res->flags & IORESOURCE_IO) {
> > > +               unsigned long port = -1;
> > > +               int err = pci_register_io_range(range->cpu_addr, range->size);
> > > +               if (err)
> > > +                       goto invalid_range;
> > > +               port = pci_address_to_pio(range->cpu_addr);
> > > +               if (port == (unsigned long)-1) {
> > > +                       err = -EINVAL;
> > > +                       goto invalid_range;
> > > +               }
> > > +               res->start = port;
> > > +       } else {
> > 
> > This one concatenates the I/O spaces and assumes that each space starts
> > at bus address zero, and takes little precaution to avoid filling up IO_SPACE_LIMIT
> > if the sizes are too big.
> 
> Actually, you are attaching too much meaning to this one. pci_register_io_range()
> only tries to remember the ranges, nothing else. And it *does* check that the
> total sum of registered ranges does not exceed IO_SPACE_LIMIT. This is used before
> we have any resource mapped for that range (actually it is used to *create* the
> resource for the range) so there is no other helping hand.
> 
> It doesn't assume that space starts at bus address zero, it ignores the bus address.
> It only handles CPU addresses for the range, to help with generating logical IO ports.
> If you have overlapping CPU addresses with different bus addresses it will not work,
> but then I guess you will have different problems then.

The problem is that it tries to set up a mapping so that pci_address_to_pio
returns the actual port number, but the port that you assign to res->start
above is assumed to match the 'start' variable below. If the value ends
up different, the BARs that get set by the PCI bus scan are not in the
same place that got ioremapped into the virtual I/O aperture.

> > >+unsigned long pci_ioremap_io(const struct resource *res, phys_addr_t phys_addr)
> > >+{
> > >+       unsigned long start, len, virt_start;
> > >+       int err;
> > >+
> > >+       if (res->end > IO_SPACE_LIMIT)
> > >+               return -EINVAL;
> > >+
> > >+       /*
> > >+        * try finding free space for the whole size first,
> > >+        * fall back to 64K if not available
> > >+        */
> > >+       len = resource_size(res);
> > >+       start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > >+                               res->start / PAGE_SIZE, len / PAGE_SIZE, 0);
> > >+       if (start == IO_SPACE_PAGES && len > SZ_64K) {
> > >+               len = SZ_64K;
> > >+               start = 0;
> > >+               start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > >+                                       start, len / PAGE_SIZE, 0);
> > >+       }
> > >+
> > >+       /* no 64K area found */
> > >+       if (start == IO_SPACE_PAGES)
> > >+               return -ENOMEM;
> > >+
> > >+       /* ioremap physical aperture to virtual aperture */
> > >+       virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
> > >+       err = ioremap_page_range(virt_start, virt_start + len,
> > >+                               phys_addr, __pgprot(PROT_DEVICE_nGnRE));
> > >+       if (err)
> > >+               return err;
> > >+
> > >+       bitmap_set(pci_iospace, start, len / PAGE_SIZE);
> > >+
> > >+       /* return io_offset */
> > >+       return start * PAGE_SIZE - res->start;
> > >+}

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau March 17, 2014, 1:41 p.m. UTC | #4
On Fri, Mar 14, 2014 at 07:16:10PM +0000, Arnd Bergmann wrote:
> On Friday 14 March 2014, Liviu Dudau wrote:
> > On Fri, Mar 14, 2014 at 06:46:23PM +0000, Arnd Bergmann wrote:
> > > On Friday 14 March 2014, Liviu Dudau wrote:
> > > 
> > > > +int of_pci_range_to_resource(struct of_pci_range *range,
> > > > +       struct device_node *np, struct resource *res)
> > > > +{
> > > > +       res->flags = range->flags;
> > > > +       res->parent = res->child = res->sibling = NULL;
> > > > +       res->name = np->full_name;
> > > > +
> > > > +       if (res->flags & IORESOURCE_IO) {
> > > > +               unsigned long port = -1;
> > > > +               int err = pci_register_io_range(range->cpu_addr, range->size);
> > > > +               if (err)
> > > > +                       goto invalid_range;
> > > > +               port = pci_address_to_pio(range->cpu_addr);
> > > > +               if (port == (unsigned long)-1) {
> > > > +                       err = -EINVAL;
> > > > +                       goto invalid_range;
> > > > +               }
> > > > +               res->start = port;
> > > > +       } else {
> > > 
> > > This one concatenates the I/O spaces and assumes that each space starts
> > > at bus address zero, and takes little precaution to avoid filling up IO_SPACE_LIMIT
> > > if the sizes are too big.
> > 
> > Actually, you are attaching too much meaning to this one. pci_register_io_range()
> > only tries to remember the ranges, nothing else. And it *does* check that the
> > total sum of registered ranges does not exceed IO_SPACE_LIMIT. This is used before
> > we have any resource mapped for that range (actually it is used to *create* the
> > resource for the range) so there is no other helping hand.
> > 
> > It doesn't assume that space starts at bus address zero, it ignores the bus address.
> > It only handles CPU addresses for the range, to help with generating logical IO ports.
> > If you have overlapping CPU addresses with different bus addresses it will not work,
> > but then I guess you will have different problems then.
> 
> The problem is that it tries to set up a mapping so that pci_address_to_pio
> returns the actual port number, but the port that you assign to res->start
> above is assumed to match the 'start' variable below. If the value ends
> up different, the BARs that get set by the PCI bus scan are not in the
> same place that got ioremapped into the virtual I/O aperture.

Yes, after writting a reply trying to justify why it would actually work I've realised
where the fault of my logic stands (short version, two host controllers will get different
io_offsets but the ports numbers will start from zero leading to confusion about which
host controller the resource belongs to).

I will try to split your function into two parts, one that calculates the io_offset and
another that does the ioremap_page_range() side and replace my cooked function.

Best regards,
Liviu

> 
> > > >+unsigned long pci_ioremap_io(const struct resource *res, phys_addr_t phys_addr)
> > > >+{
> > > >+       unsigned long start, len, virt_start;
> > > >+       int err;
> > > >+
> > > >+       if (res->end > IO_SPACE_LIMIT)
> > > >+               return -EINVAL;
> > > >+
> > > >+       /*
> > > >+        * try finding free space for the whole size first,
> > > >+        * fall back to 64K if not available
> > > >+        */
> > > >+       len = resource_size(res);
> > > >+       start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > > >+                               res->start / PAGE_SIZE, len / PAGE_SIZE, 0);
> > > >+       if (start == IO_SPACE_PAGES && len > SZ_64K) {
> > > >+               len = SZ_64K;
> > > >+               start = 0;
> > > >+               start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > > >+                                       start, len / PAGE_SIZE, 0);
> > > >+       }
> > > >+
> > > >+       /* no 64K area found */
> > > >+       if (start == IO_SPACE_PAGES)
> > > >+               return -ENOMEM;
> > > >+
> > > >+       /* ioremap physical aperture to virtual aperture */
> > > >+       virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
> > > >+       err = ioremap_page_range(virt_start, virt_start + len,
> > > >+                               phys_addr, __pgprot(PROT_DEVICE_nGnRE));
> > > >+       if (err)
> > > >+               return err;
> > > >+
> > > >+       bitmap_set(pci_iospace, start, len / PAGE_SIZE);
> > > >+
> > > >+       /* return io_offset */
> > > >+       return start * PAGE_SIZE - res->start;
> > > >+}
> 
> 	Arnd
>
diff mbox

Patch

diff --git a/drivers/of/address.c b/drivers/of/address.c
index be958ed..673c050 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -728,3 +728,48 @@  void __iomem *of_iomap(struct device_node *np, int index)
 	return ioremap(res.start, resource_size(&res));
 }
 EXPORT_SYMBOL(of_iomap);
+
+/**
+ * of_pci_range_to_resource - Create a resource from an of_pci_range
+ * @range:	the PCI range that describes the resource
+ * @np:		device node where the range belongs to
+ * @res:	pointer to a valid resource that will be updated to
+ *              reflect the values contained in the range.
+ *
+ * Returns EINVAL if the range cannot be converted to resource.
+ *
+ * Note that if the range is an IO range, the resource will be converted
+ * using pci_address_to_pio() which can fail if it is called too early or
+ * if the range cannot be matched to any host bridge IO space (our case here).
+ * To guard against that we try to register the IO range first.
+ * If that fails we know that pci_address_to_pio() will do too.
+ */
+int of_pci_range_to_resource(struct of_pci_range *range,
+	struct device_node *np, struct resource *res)
+{
+	res->flags = range->flags;
+	res->parent = res->child = res->sibling = NULL;
+	res->name = np->full_name;
+
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long port = -1;
+		int err = pci_register_io_range(range->cpu_addr, range->size);
+		if (err)
+			goto invalid_range;
+		port = pci_address_to_pio(range->cpu_addr);
+		if (port == (unsigned long)-1) {
+			err = -EINVAL;
+			goto invalid_range;
+		}
+		res->start = port;
+	} else {
+		res->start = range->cpu_addr;
+	}
+	res->end = res->start + range->size - 1;
+	return 0;
+
+invalid_range:
+	res->start = (resource_size_t)OF_BAD_ADDR;
+	res->end = (resource_size_t)OF_BAD_ADDR;
+	return err;
+}
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 40c418d..a4b400d 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -23,17 +23,8 @@  struct of_pci_range {
 #define for_each_of_pci_range(parser, range) \
 	for (; of_pci_range_parser_one(parser, range);)
 
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
-{
-	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
-	res->parent = res->child = res->sibling = NULL;
-	res->name = np->full_name;
-}
-
+extern int of_pci_range_to_resource(struct of_pci_range *range,
+		struct device_node *np, struct resource *res);
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);