diff mbox

pci: Add support for creating a generic host_bridge from device tree

Message ID 1391452428-22917-2-git-send-email-Liviu.Dudau@arm.com
State New
Headers show

Commit Message

Liviu Dudau Feb. 3, 2014, 6:33 p.m. UTC
Several platforms use a rather generic version of parsing
the device tree to find the host bridge ranges. Move that
into the generic PCI code and use it to create a pci_host_bridge
structure that can be used by arch code.

Based on early attempts by Andrew Murray to unify the code.
Used powerpc and microblaze PCI code as starting point.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Cc: Catalin Marinas <Catalin.Marinas@arm.com>
Cc: Will Deacon <Will.Deacon@arm.com>
---
 drivers/pci/host-bridge.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c       | 11 ++++++
 include/linux/pci.h       | 14 ++++++++
 3 files changed, 117 insertions(+)

Comments

Liviu Dudau Feb. 3, 2014, 7:06 p.m. UTC | #1
Hi Arnd,

First of all, thanks for reviewing this!

On Mon, Feb 03, 2014 at 06:46:10PM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 18:33:48 Liviu Dudau wrote:
> > +/**
> > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > + * @dev: device node of the host bridge having the range property
> > + * @resources: list where the range of resources will be added after DT parsing
> > + *
> > + * This function will parse the "ranges" property of a PCI host bridge device
> > + * node and setup the resource mapping based on its content. It is expected
> > + * that the property conforms with the Power ePAPR document.
> > + *
> > + * Each architecture will then apply their filtering based on the limitations
> > + * of each platform. One general restriction seems to be the number of IO space
> > + * ranges, the PCI framework makes intensive use of struct resource management,
> > + * and for IORESOURCE_IO types they can only be requested if they are contained
> > + * within the global ioport_resource, so that should be limited to one IO space
> > + * range.
> 
> Actually we have quite a different set of restrictions around I/O space on ARM32
> at the moment: Each host bridge can have its own 64KB range in an arbitrary
> location on MMIO space, and the total must not exceed 2MB of I/O space.

And that is why the filtering is not (yet) imposed in the generic code. But once
you use pci_request_region, that will call request_region which will check
against ioport_resource as parent for the requested resource. That should fail
if is is not in the correct range, so I don't know how arm arch code manages
multiple IO ranges.

> 
> > + */
> > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > +					struct list_head *resources)
> > +{
> > +	struct resource *res;
> > +	struct of_pci_range range;
> > +	struct of_pci_range_parser parser;
> > +	int err;
> > +
> > +	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > +
> > +	/* Check for ranges property */
> > +	err = of_pci_range_parser_init(&parser, dev);
> > +	if (err)
> > +		return err;
> > +
> > +	pr_debug("Parsing ranges property...\n");
> > +	for_each_of_pci_range(&parser, &range) {
> > +		/* Read next ranges element */
> > +		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > +				range.pci_space, range.pci_addr);
> > +		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > +					range.cpu_addr, range.size);
> > +
> > +		/* If we failed translation or got a zero-sized region
> > +		 * (some FW try to feed us with non sensical zero sized regions
> > +		 * such as power3 which look like some kind of attempt
> > +		 * at exposing the VGA memory hole) then skip this range
> > +		 */
> > +		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > +			continue;
> > +
> > +		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > +		if (!res) {
> > +			err = -ENOMEM;
> > +			goto bridge_ranges_nomem;
> > +		}
> > +
> > +		of_pci_range_to_resource(&range, dev, res);
> > +
> > +		pci_add_resource_offset(resources, res,
> > +				range.cpu_addr - range.pci_addr);
> > +	}
> 
> I believe of_pci_range_to_resource() will return the MMIO aperture for the
> I/O space window here, which is not what you are supposed to pass into
> pci_add_resource_offset.

And that is why the code in probe.c has been added to deal with that. It is
too early to do the adjustments here as all we have is the list of resources
and that might get culled by the architecture fixup code. Remembering the
io_offset will happen once the pci_host_bridge gets created, and the resources
are then adjusted.

> 
> > +EXPORT_SYMBOL(pci_host_bridge_of_init);
> 
> EXPORT_SYMBOL_GPL

Will change for v2, thanks!

> 
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index 6e34498..16febae 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -1787,6 +1787,17 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> >  	list_for_each_entry_safe(window, n, resources, list) {
> >  		list_move_tail(&window->list, &bridge->windows);
> >  		res = window->res;
> > +		/*
> > +		 * IO resources are stored in the kernel with a CPU start
> > +		 * address of zero. Adjust the data accordingly and remember
> > +		 * the offset
> > +		 */
> > +		if (resource_type(res) == IORESOURCE_IO) {
> > +			bridge->io_offset = res->start;
> > +			res->end -= res->start;
> > +			window->offset -= res->start;
> > +			res->start = 0;
> > +		}
> >  		offset = window->offset;
> >  		if (res->flags & IORESOURCE_BUS)
> 
> Won't this break all existing host bridges?

I am not sure. I believe not, due to what I've explained earlier, but you might be right.

The adjustment happens before the resource is added to the host bridge windows and translates
it from MMIO range into IO range.

Best regards,
Liviu

> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Liviu Dudau Feb. 3, 2014, 10:17 p.m. UTC | #2
On Mon, Feb 03, 2014 at 07:31:31PM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 19:06:49 Liviu Dudau wrote:
> > On Mon, Feb 03, 2014 at 06:46:10PM +0000, Arnd Bergmann wrote:
> > > On Monday 03 February 2014 18:33:48 Liviu Dudau wrote:
> > > > +/**
> > > > + * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
> > > > + * @dev: device node of the host bridge having the range property
> > > > + * @resources: list where the range of resources will be added after DT parsing
> > > > + *
> > > > + * This function will parse the "ranges" property of a PCI host bridge device
> > > > + * node and setup the resource mapping based on its content. It is expected
> > > > + * that the property conforms with the Power ePAPR document.
> > > > + *
> > > > + * Each architecture will then apply their filtering based on the limitations
> > > > + * of each platform. One general restriction seems to be the number of IO space
> > > > + * ranges, the PCI framework makes intensive use of struct resource management,
> > > > + * and for IORESOURCE_IO types they can only be requested if they are contained
> > > > + * within the global ioport_resource, so that should be limited to one IO space
> > > > + * range.
> > >
> > > Actually we have quite a different set of restrictions around I/O space on ARM32
> > > at the moment: Each host bridge can have its own 64KB range in an arbitrary
> > > location on MMIO space, and the total must not exceed 2MB of I/O space.
> > 
> > And that is why the filtering is not (yet) imposed in the generic code. But once
> > you use pci_request_region, that will call request_region which will check
> > against ioport_resource as parent for the requested resource. That should fail
> > if is is not in the correct range, so I don't know how arm arch code manages
> > multiple IO ranges.
> 
> Let's try to come up with nomenclature so we can talk about this better
> 
> The ioport_resource is in "logical I/O space", which is a Linux fiction,
> it goes from 0 to IO_SPACE_LIMIT (2MB on ARM) and is mapped into "virtual
> I/O space", which start at (void __iomem *)PCI_IO_VIRT_BASE.
> 
> Each PCI domain can have its own "bus I/O aperture", which is typically
> between 0x1000 and 0xffff and reflects the address that is used in PCI
> transactions and in BARs. 

Actually, the bus I/O aperture can start from 0x0000 if you are talking about
PCI bus addresses.

> The aperture here reflects the subset of the
> 4GB bus I/O space that is actually mapped into a CPU visible "physical
> I/O aperture" using an inbound mapping of the host bridge. The physical
> I/O aperture in turn gets mapped to the virtual I/O space using 
> pci_ioremap_io. 

Agree.

> The difference between a bus I/O address and a logical
> I/O address is stored in the io_offset.

Not exactly. If that would be true that means that for an I/O range that
start at bus I/O address zero but physical I/O apperture starts at
0x40000000 the io_offset is zero. For me, the io_offset should be 0x40000000.

Let me see if I can summarise this correctly, using only CPU addresses:

0x0000 - IO_SPACE_LIMIT           <-  logical I/O address
0xPPPPPPPP - 0xPPPPPPPP+IO_SIZE   <-  physical address for PCI I/O space
0xVVVVVVVV - 0xVVVVVVVV+IO_SPACE_LIMIT <- virtual address for I/O

The io_offset then is 0xPPPPPPPP - logical I/O address. At least that is
the intent of the io_offset variable that I introduced in pci_host_bridge.

The bus I/O address is generated by the host bridge, I think we can ignore
it here as it tends to confuse the message.

> 
> So much for basic definitions. When a device driver calls pci_request_region,
> the port number it sees is the bus I/O port number adjusted using the
> io_offset to turn it into a logical I/O port number, which should
> always be within the host bridge window, which in turn is a subset
> of the ioport_resource.

My understanding is that device drivers all user port numbers that are logical
I/O numbers, so no io_offset needs to be applied here. It is only when one
wants to access the port, that the translation happens. First, inb or outb
will add the PCI_IO_VIRT_BASE to generate the virtual address, the MMU will
then convert that address to physical address and the host bridge will
then translate the physical address into bus address.



> 
> > > > +static int pci_host_bridge_of_get_ranges(struct device_node *dev,
> > > > +                                   struct list_head *resources)
> > > > +{
> > > > +   struct resource *res;
> > > > +   struct of_pci_range range;
> > > > +   struct of_pci_range_parser parser;
> > > > +   int err;
> > > > +
> > > > +   pr_info("PCI host bridge %s ranges:\n", dev->full_name);
> > > > +
> > > > +   /* Check for ranges property */
> > > > +   err = of_pci_range_parser_init(&parser, dev);
> > > > +   if (err)
> > > > +           return err;
> > > > +
> > > > +   pr_debug("Parsing ranges property...\n");
> > > > +   for_each_of_pci_range(&parser, &range) {
> > > > +           /* Read next ranges element */
> > > > +           pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
> > > > +                           range.pci_space, range.pci_addr);
> > > > +           pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
> > > > +                                   range.cpu_addr, range.size);
> > > > +
> > > > +           /* If we failed translation or got a zero-sized region
> > > > +            * (some FW try to feed us with non sensical zero sized regions
> > > > +            * such as power3 which look like some kind of attempt
> > > > +            * at exposing the VGA memory hole) then skip this range
> > > > +            */
> > > > +           if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
> > > > +                   continue;
> > > > +
> > > > +           res = kzalloc(sizeof(struct resource), GFP_KERNEL);
> > > > +           if (!res) {
> > > > +                   err = -ENOMEM;
> > > > +                   goto bridge_ranges_nomem;
> > > > +           }
> > > > +
> > > > +           of_pci_range_to_resource(&range, dev, res);
> > > > +
> > > > +           pci_add_resource_offset(resources, res,
> > > > +                           range.cpu_addr - range.pci_addr);
> > > > +   }
> > >
> > > I believe of_pci_range_to_resource() will return the MMIO aperture for the
> > > I/O space window here, which is not what you are supposed to pass into
> > > pci_add_resource_offset.
> > 
> > And that is why the code in probe.c has been added to deal with that. It is
> > too early to do the adjustments here as all we have is the list of resources
> > and that might get culled by the architecture fixup code. Remembering the
> > io_offset will happen once the pci_host_bridge gets created, and the resources
> > are then adjusted.
> 
> So you want to register an incorrect I/O resource first and then
> have it fixed up later, rather than registering the correct
> one from the start as everyone else?

The incorrect I/O resource is added to a temporary list of resources, it has not
been attached yet to the list of windows in the bridge. What gets added is the
I/O resource as described if it would be an ordinary resource.

> 
> > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > index 6e34498..16febae 100644
> > > > --- a/drivers/pci/probe.c
> > > > +++ b/drivers/pci/probe.c
> > > > @@ -1787,6 +1787,17 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> > > >     list_for_each_entry_safe(window, n, resources, list) {
> > > >             list_move_tail(&window->list, &bridge->windows);
> > > >             res = window->res;
> > > > +           /*
> > > > +            * IO resources are stored in the kernel with a CPU start
> > > > +            * address of zero. Adjust the data accordingly and remember
> > > > +            * the offset
> > > > +            */
> > > > +           if (resource_type(res) == IORESOURCE_IO) {
> > > > +                   bridge->io_offset = res->start;
> > > > +                   res->end -= res->start;
> > > > +                   window->offset -= res->start;
> > > > +                   res->start = 0;
> > > > +           }

Here, we correct for the fact that IORESOURCE_IO is not a normal resource, because Linux wants
a logical I/O as start and end address, not the physical CPU address. We adjust to that and
remember the offset.


> > > >             offset = window->offset;
> > > >             if (res->flags & IORESOURCE_BUS)
> > >
> > > Won't this break all existing host bridges?
> > 
> > I am not sure. I believe not, due to what I've explained earlier, but you might be right.
> > 
> > The adjustment happens before the resource is added to the host bridge windows and translates
> > it from MMIO range into IO range.
> 
> AFAICT, the resource_type of the resource you register above should be
> IORESOURCE_MEM, so you are not actually matching it here.

No, all resources are added here. For IORESOURCE_IO we do an adjustment.

Best regards,
Liviu

> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Liviu Dudau Feb. 4, 2014, 12:08 p.m. UTC | #3
On Tue, Feb 04, 2014 at 10:09:44AM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 22:17:44 Liviu Dudau wrote:
> > On Mon, Feb 03, 2014 at 07:31:31PM +0000, Arnd Bergmann wrote:
> > > Let's try to come up with nomenclature so we can talk about this better
> > >
> > > The ioport_resource is in "logical I/O space", which is a Linux fiction,
> > > it goes from 0 to IO_SPACE_LIMIT (2MB on ARM) and is mapped into "virtual
> > > I/O space", which start at (void __iomem *)PCI_IO_VIRT_BASE.
> > >
> > > Each PCI domain can have its own "bus I/O aperture", which is typically
> > > between 0x1000 and 0xffff and reflects the address that is used in PCI
> > > transactions and in BARs.
> > 
> > Actually, the bus I/O aperture can start from 0x0000 if you are talking about
> > PCI bus addresses.
> 
> Right.
> 
> > > The aperture here reflects the subset of the
> > > 4GB bus I/O space that is actually mapped into a CPU visible "physical
> > > I/O aperture" using an inbound mapping of the host bridge. The physical
> > > I/O aperture in turn gets mapped to the virtual I/O space using
> > > pci_ioremap_io.
> > 
> > Agree.
> > 
> > > The difference between a bus I/O address and a logical
> > > I/O address is stored in the io_offset.
> > 
> > Not exactly. If that would be true that means that for an I/O range that
> > start at bus I/O address zero but physical I/O apperture starts at
> > 0x40000000 the io_offset is zero. For me, the io_offset should be 0x40000000.
> 
> That's not how we do it on any of the existing host controllers.
> Typically the io_offset is zero for the first one, and may be
> either zero for all the others (meaning BARs get > 64KB values
> for secondary buses) or between 64KB and 2MB (meaning each bus
> starts at I/O port number 0).

In that case it is probably worth to rename my variable into phys_io_offset.

I need to go back over my driver code. My assumptions were probably wrong
wrt to meaning of the io_offset.

> 
> > Let me see if I can summarise this correctly, using only CPU addresses:
> > 
> > 0x0000 - IO_SPACE_LIMIT           <-  logical I/O address
> > 0xPPPPPPPP - 0xPPPPPPPP+IO_SIZE   <-  physical address for PCI I/O space
> > 0xVVVVVVVV - 0xVVVVVVVV+IO_SPACE_LIMIT <- virtual address for I/O
> > 
> > The io_offset then is 0xPPPPPPPP - logical I/O address. At least that is
> > the intent of the io_offset variable that I introduced in pci_host_bridge.
> 
> That is highly confusing then, because we already have something called
> io_offset with a different meaning. I would call 0xPPPPPPPP the io_phys_base
> if I had to come up with a variable name for it.
> 
> > The bus I/O address is generated by the host bridge, I think we can ignore
> > it here as it tends to confuse the message.
> 
> No, it's important because the PCI core code has to transform between
> bus I/O address and logical I/O address when accessing the BARs.
> 
> > > So much for basic definitions. When a device driver calls pci_request_region,
> > > the port number it sees is the bus I/O port number adjusted using the
> > > io_offset to turn it into a logical I/O port number, which should
> > > always be within the host bridge window, which in turn is a subset
> > > of the ioport_resource.
> > 
> > My understanding is that device drivers all user port numbers that are logical
> > I/O numbers, so no io_offset needs to be applied here. It is only when one
> > wants to access the port, that the translation happens. First, inb or outb
> > will add the PCI_IO_VIRT_BASE to generate the virtual address, the MMU will
> > then convert that address to physical address and the host bridge will
> > then translate the physical address into bus address.
> 
> This is correct. The bus I/O number is not visible to the device driver,
> but it is what you put into the 'ranges' property in DT, and it gets
> used during PCI resource scanning.
> 
> 
> > > > And that is why the code in probe.c has been added to deal with that. It is
> > > > too early to do the adjustments here as all we have is the list of resources
> > > > and that might get culled by the architecture fixup code. Remembering the
> > > > io_offset will happen once the pci_host_bridge gets created, and the resources
> > > > are then adjusted.
> > >
> > > So you want to register an incorrect I/O resource first and then
> > > have it fixed up later, rather than registering the correct
> > > one from the start as everyone else?
> > 
> > The incorrect I/O resource is added to a temporary list of resources, it has not
> > been attached yet to the list of windows in the bridge. What gets added is the
> > I/O resource as described if it would be an ordinary resource.
> 
> I'm not completely sure I'm following here, but let's work out the
> other things first, this will probably get clearer then.
> 
> > > > > > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > > > > > index 6e34498..16febae 100644
> > > > > > --- a/drivers/pci/probe.c
> > > > > > +++ b/drivers/pci/probe.c
> > > > > > @@ -1787,6 +1787,17 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
> > > > > >     list_for_each_entry_safe(window, n, resources, list) {
> > > > > >             list_move_tail(&window->list, &bridge->windows);
> > > > > >             res = window->res;
> > > > > > +           /*
> > > > > > +            * IO resources are stored in the kernel with a CPU start
> > > > > > +            * address of zero. Adjust the data accordingly and remember
> > > > > > +            * the offset
> > > > > > +            */
> > > > > > +           if (resource_type(res) == IORESOURCE_IO) {
> > > > > > +                   bridge->io_offset = res->start;
> > > > > > +                   res->end -= res->start;
> > > > > > +                   window->offset -= res->start;
> > > > > > +                   res->start = 0;
> > > > > > +           }
> > 
> > Here, we correct for the fact that IORESOURCE_IO is not a normal resource, because Linux wants
> > a logical I/O as start and end address, not the physical CPU address. We adjust to that and
> > remember the offset.
> 
> But the offset (phys_base) doesn't actually matter to the PCI core or
> the driver. Why save it?

Because I need it later for the host bridge ATR setup.

> 
> > > > > >             offset = window->offset;
> > > > > >             if (res->flags & IORESOURCE_BUS)
> > > > >
> > > > > Won't this break all existing host bridges?
> > > >
> > > > I am not sure. I believe not, due to what I've explained earlier, but you might be right.
> > > >
> > > > The adjustment happens before the resource is added to the host bridge windows and translates
> > > > it from MMIO range into IO range.
> > >
> > > AFAICT, the resource_type of the resource you register above should be
> > > IORESOURCE_MEM, so you are not actually matching it here.
> > 
> > No, all resources are added here. For IORESOURCE_IO we do an adjustment.
> 
> But there should never be an IORESOURCE_IO resource structure that is
> not in IO space, i.e. within ioport_resource. Doing an "adjustment"
> is not an operation defined on this structure. What I meant above is that
> the pci range parser gets this right and gives you a resource that looks
> like { .flags = IORESOURCE_MEM, .start = phys_base, .end = phys_base +
> size - 1}, while the resource we want to register is { .flags = IORESOURCE_IO,
> .start = log_base, .end = log_base + size -1}. In the of_pci_range struct for
> the I/O space, the "pci_space" is IORESOURCE_IO (for the pci_addr), while the
> "flags" are IORESOURCE_MEM, to go along with the cpu_addr.

The pci range parser gives me a range with .flags = IORESOURCE_IO for IO space. It
does not convert it to IORESOURCE_MEM. Hence the need for adjustment.

Best regards,
Liviu

> 
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
diff mbox

Patch

diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 06ace62..9d11deb 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -6,6 +6,7 @@ 
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/of_address.h>
 
 #include "pci.h"
 
@@ -91,3 +92,94 @@  void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
 	res->end = region->end + offset;
 }
 EXPORT_SYMBOL(pcibios_bus_to_resource);
+
+/**
+ * pci_host_bridge_of_get_ranges - Parse PCI host bridge resources from DT
+ * @dev: device node of the host bridge having the range property
+ * @resources: list where the range of resources will be added after DT parsing
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * Each architecture will then apply their filtering based on the limitations
+ * of each platform. One general restriction seems to be the number of IO space
+ * ranges, the PCI framework makes intensive use of struct resource management,
+ * and for IORESOURCE_IO types they can only be requested if they are contained
+ * within the global ioport_resource, so that should be limited to one IO space
+ * range.
+ */
+static int pci_host_bridge_of_get_ranges(struct device_node *dev,
+					struct list_head *resources)
+{
+	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	int err;
+
+	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev);
+	if (err)
+		return err;
+
+	pr_debug("Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		pr_debug("pci_space: 0x%08x pci_addr:0x%016llx ",
+				range.pci_space, range.pci_addr);
+		pr_debug("cpu_addr:0x%016llx size:0x%016llx\n",
+					range.cpu_addr, range.size);
+
+		/* If we failed translation or got a zero-sized region
+		 * (some FW try to feed us with non sensical zero sized regions
+		 * such as power3 which look like some kind of attempt
+		 * at exposing the VGA memory hole) then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto bridge_ranges_nomem;
+		}
+
+		of_pci_range_to_resource(&range, dev, res);
+
+		pci_add_resource_offset(resources, res,
+				range.cpu_addr - range.pci_addr);
+	}
+
+	/* Apply architecture specific fixups for the ranges */
+	pcibios_fixup_bridge_ranges(resources);
+
+	return 0;
+
+bridge_ranges_nomem:
+	pci_free_resource_list(resources);
+	return err;
+}
+
+struct pci_host_bridge *
+pci_host_bridge_of_init(struct device *parent, int busno, struct pci_ops *ops,
+			void *host_data, struct list_head *resources)
+{
+	struct pci_bus *root_bus;
+	struct pci_host_bridge *bridge;
+
+	/* first parse the host bridge bus ranges */
+	if (pci_host_bridge_of_get_ranges(parent->of_node, resources))
+		return NULL;
+
+	/* then create the root bus */
+	root_bus = pci_create_root_bus(parent, busno, ops, host_data, resources);
+	if (!root_bus)
+		return NULL;
+
+	bridge = to_pci_host_bridge(root_bus->bridge);
+
+	return bridge;
+}
+EXPORT_SYMBOL(pci_host_bridge_of_init);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6e34498..16febae 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1787,6 +1787,17 @@  struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 	list_for_each_entry_safe(window, n, resources, list) {
 		list_move_tail(&window->list, &bridge->windows);
 		res = window->res;
+		/*
+		 * IO resources are stored in the kernel with a CPU start
+		 * address of zero. Adjust the data accordingly and remember
+		 * the offset
+		 */
+		if (resource_type(res) == IORESOURCE_IO) {
+			bridge->io_offset = res->start;
+			res->end -= res->start;
+			window->offset -= res->start;
+			res->start = 0;
+		}
 		offset = window->offset;
 		if (res->flags & IORESOURCE_BUS)
 			pci_bus_insert_busn_res(b, bus, res->end);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fb57c89..8953997 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -394,6 +394,8 @@  struct pci_host_bridge_window {
 struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */
+	resource_size_t io_offset;	/* CPU address offset for io resources */
+	int domain_nr;
 	struct list_head windows;	/* pci_host_bridge_windows */
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
@@ -1762,11 +1764,23 @@  static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
 	return bus ? bus->dev.of_node : NULL;
 }
 
+struct pci_host_bridge *
+pci_host_bridge_of_init(struct device *parent, int busno, struct pci_ops *ops,
+			void *host_data, struct list_head *resources);
+
+void pcibios_fixup_bridge_ranges(struct list_head *resources);
 #else /* CONFIG_OF */
 static inline void pci_set_of_node(struct pci_dev *dev) { }
 static inline void pci_release_of_node(struct pci_dev *dev) { }
 static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
 static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
+
+static inline struct pci_host_bridge *
+pci_host_bridge_of_init(struct device *parent, struct pci_ops *ops,
+			void *host_data, struct list_head *resources)
+{
+	return NULL;
+}
 #endif  /* CONFIG_OF */
 
 #ifdef CONFIG_EEH