diff mbox

[1/7] hw/misc/platform_devices: helpers for dynamic instantiation of platform devices

Message ID 1404716892-15600-2-git-send-email-eric.auger@linaro.org
State New
Headers show

Commit Message

Auger Eric July 7, 2014, 7:08 a.m. UTC
This new module implements routines which help in dynamic instantiation
of sysbus devices. Machine files can use those generic routines.

---

Dynamic sysbus device allocation fully written by Alex Graf.

[Eric Auger]
Those functions were initially in ppc e500 machine file. Now moved to a
separate module.

PPCE500Params is replaced by a generic struct named PlatformParams

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
 hw/misc/Makefile.objs              |   1 +
 hw/misc/platform_devices.c         | 217 +++++++++++++++++++++++++++++++++++++
 include/hw/misc/platform_devices.h |  61 +++++++++++
 3 files changed, 279 insertions(+)
 create mode 100644 hw/misc/platform_devices.c
 create mode 100644 include/hw/misc/platform_devices.h

Comments

Alexander Graf July 8, 2014, 1:43 p.m. UTC | #1
On 07.07.14 09:08, Eric Auger wrote:
> This new module implements routines which help in dynamic instantiation
> of sysbus devices. Machine files can use those generic routines.
>
> ---
>
> Dynamic sysbus device allocation fully written by Alex Graf.
>
> [Eric Auger]
> Those functions were initially in ppc e500 machine file. Now moved to a
> separate module.
>
> PPCE500Params is replaced by a generic struct named PlatformParams
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> Signed-off-by: Eric Auger <eric.auger@linaro.org>
> ---
>   hw/misc/Makefile.objs              |   1 +
>   hw/misc/platform_devices.c         | 217 +++++++++++++++++++++++++++++++++++++
>   include/hw/misc/platform_devices.h |  61 +++++++++++
>   3 files changed, 279 insertions(+)
>   create mode 100644 hw/misc/platform_devices.c
>   create mode 100644 include/hw/misc/platform_devices.h
>
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index e47fea8..d081606 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -40,3 +40,4 @@ obj-$(CONFIG_SLAVIO) += slavio_misc.o
>   obj-$(CONFIG_ZYNQ) += zynq_slcr.o
>   
>   obj-$(CONFIG_PVPANIC) += pvpanic.o
> +obj-y += platform_devices.o
> diff --git a/hw/misc/platform_devices.c b/hw/misc/platform_devices.c
> new file mode 100644
> index 0000000..96ab272
> --- /dev/null
> +++ b/hw/misc/platform_devices.c
> @@ -0,0 +1,217 @@
> +#include "hw/misc/platform_devices.h"
> +#include "hw/sysbus.h"
> +#include "qemu/error-report.h"
> +
> +#define PAGE_SHIFT 12
> +
> +int sysbus_device_create_devtree(Object *obj, void *opaque)
> +{
> +    PlatformDevtreeData *data = opaque;
> +    Object *dev;
> +    SysBusDevice *sbdev;
> +    bool matched = false;
> +
> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
> +    sbdev = (SysBusDevice *)dev;
> +
> +    if (!sbdev) {
> +        /* Container, traverse it for children */
> +        return object_child_foreach(obj, sysbus_device_create_devtree, data);
> +    }
> +
> +    if (!matched) {
> +        error_report("Device %s is not supported by this machine yet.",
> +                     qdev_fw_name(DEVICE(dev)));
> +        exit(1);
> +    }
> +
> +    return 0;
> +}
> +
> +void platform_bus_create_devtree(PlatformParams *params, void *fdt,
> +                                        const char *mpic)
> +{
> +    gchar *node = g_strdup_printf("/platform@%"PRIx64,
> +                                  params->platform_bus_base);
> +    const char platcomp[] = "qemu,platform\0simple-bus";
> +    PlatformDevtreeData data;
> +    Object *container;
> +    uint64_t addr = params->platform_bus_base;
> +    uint64_t size = params->platform_bus_size;
> +    int irq_start = params->platform_bus_first_irq;
> +
> +    /* Create a /platform node that we can put all devices into */
> +
> +    qemu_fdt_add_subnode(fdt, node);
> +    qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp));
> +
> +    /* Our platform bus region is less than 32bit big, so 1 cell is enough for
> +       address and size */
> +    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
> +    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
> +    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
> +
> +    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
> +
> +    /* Loop through all devices and create nodes for known ones */
> +    data.fdt = fdt;
> +    data.mpic = mpic;
> +    data.irq_start = irq_start;
> +    data.node = node;
> +
> +    container = container_get(qdev_get_machine(), "/peripheral");
> +    sysbus_device_create_devtree(container, &data);
> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
> +    sysbus_device_create_devtree(container, &data);
> +
> +    g_free(node);
> +}

Device trees are pretty platform (and even machine) specific. Just to 
give you an example - the interrupt specifier on most e500 systems 
really is 4 cells big:

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt#n80

|   Interrupt specifiers consists of 4 cells encoded as
   follows:

    <1st-cell>   interrupt-number

                 Identifies the interrupt source.  The meaning
                 depends on the type of interrupt.

                 Note: If the interrupt-type cell is undefined
                 (i.e. #interrupt-cells = 2), this cell
                 should be interpreted the same as for
                 interrupt-type 0-- i.e. an external or
                 normal SoC device interrupt.

    <2nd-cell>   level-sense information, encoded as follows:
                     0 = low-to-high edge triggered
                     1 = active low level-sensitive
                     2 = active high level-sensitive
                     3 = high-to-low edge triggered

    <3rd-cell>   interrupt-type

                 The following types are supported:

                   0 = external or normal SoC device interrupt

                       The interrupt-number cell contains
                       the SoC device interrupt number.  The
                       type-specific cell is undefined.  The
                       interrupt-number is derived from the
                       MPIC a block of registers referred to as
                       the "Interrupt Source Configuration Registers".
                       Each source has 32-bytes of registers
                       (vector/priority and destination) in this
                       region.   So interrupt 0 is at offset 0x0,
                       interrupt 1 is at offset 0x20, and so on.

                   1 = error interrupt

                       The interrupt-number cell contains
                       the SoC device interrupt number for
                       the error interrupt.  The type-specific
                       cell identifies the specific error
                       interrupt number.

                   2 = MPIC inter-processor interrupt (IPI)

                       The interrupt-number cell identifies
                       the MPIC IPI number.  The type-specific
                       cell is undefined.

                   3 = MPIC timer interrupt

                       The interrupt-number cell identifies
                       the MPIC timer number.  The type-specific
                       cell is undefined.

    <4th-cell>   type-specific information

                 The type-specific cell is encoded as follows:

                  - For interrupt-type 1 (error interrupt),
                    the type-specific cell contains the
                    bit number of the error interrupt in the
                    Error Interrupt Summary Register.
|




while on ARM you have a GIC which works like this:

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/arm/gic.txt#n20

|- #interrupt-cells : Specifies the number of cells needed to encode an
   interrupt source.  The type shall be a <u32> and the value shall be 3.

   The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
   interrupts.

   The 2nd cell contains the interrupt number for the interrupt type.
   SPI interrupts are in the range [0-987].  PPI interrupts are in the
   range [0-15].

   The 3rd cell is the flags, encoded as follows:
	bits[3:0] trigger type and level flags.
		1 = low-to-high edge triggered
		2 = high-to-low edge triggered
		4 = active high level-sensitive
		8 = active low level-sensitive
	bits[15:8] PPI interrupt cpu mask.  Each bit corresponds to each of
	the 8 possible cpus attached to the GIC.  A bit set to '1' indicated
	the interrupt is wired to that CPU.  Only valid for PPI interrupts.
|



Both have vastly different semantics. The number of cells is different, 
the value of the cells is different. Even the definition how to 
represent edge vs level triggered interrupts differs.

I don't think this will stop with interrupts. Maybe someone wants to add 
a special machine check flag to addresses on a platform and then 
"ranges" and "regs" will have different semantics on different 
platforms. There is a lot that can go wrong when you try to unify this code.

> +
> +int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
> +                         int n, unsigned long *used_irqs,
> +                         qemu_irq *platform_irqs)
> +{
> +    int max_irqs = params->platform_bus_num_irqs;
> +    char *prop = g_strdup_printf("irq[%d]", n);
> +    int irqn = object_property_get_int(OBJECT(sbdev), prop, NULL);
> +
> +    if (irqn == SYSBUS_DYNAMIC) {
> +        /* Find the first available IRQ */
> +        irqn = find_first_zero_bit(used_irqs, max_irqs);
> +    }
> +
> +    if ((irqn >= max_irqs) || test_and_set_bit(irqn, used_irqs)) {
> +        hw_error("IRQ %d is already allocated or no free IRQ left", irqn);
> +    }
> +
> +    sysbus_connect_irq(sbdev, n, platform_irqs[irqn]);
> +    object_property_set_int(OBJECT(sbdev), irqn, prop, NULL);
> +
> +    g_free(prop);
> +    return 0;
> +}
> +
> +int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
> +                          int n, unsigned long *used_mem,
> +                          MemoryRegion *pmem)
> +{
> +    MemoryRegion *device_mem = sbdev->mmio[n].memory;
> +    uint64_t size = memory_region_size(device_mem);
> +    uint64_t page_size = (1 << PAGE_SHIFT);
> +    uint64_t page_mask = page_size - 1;
> +    uint64_t size_pages = (size + page_mask) >> PAGE_SHIFT;
> +    uint64_t max_size = params->platform_bus_size;
> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
> +    char *prop = g_strdup_printf("mmio[%d]", n);
> +    hwaddr addr = object_property_get_int(OBJECT(sbdev), prop, NULL);
> +    int page;
> +    int i;
> +
> +    page = addr >> PAGE_SHIFT;
> +    if (addr == SYSBUS_DYNAMIC) {
> +        uint64_t size_pages_align;
> +
> +        /* Align the region to at least its own size granularity */
> +        if (is_power_of_2(size_pages)) {
> +            size_pages_align = size_pages;
> +        } else {
> +            size_pages_align = pow2floor(size_pages) << 1;
> +        }
> +
> +        /* Find the first available region that fits */
> +        page = bitmap_find_next_zero_area(used_mem, max_pages, 0, size_pages,
> +                                          size_pages_align);
> +
> +        addr = (uint64_t)page << PAGE_SHIFT;
> +    }
> +
> +    if (page >= max_pages || test_bit(page, used_mem) ||
> +        (find_next_bit(used_mem, max_pages, page) < size_pages)) {
> +        hw_error("Memory [%"PRIx64":%"PRIx64" is already allocated or "
> +                 "no slot left", addr, size);
> +    }
> +
> +    for (i = page; i < (page + size_pages); i++) {
> +        set_bit(i, used_mem);
> +    }
> +
> +    memory_region_add_subregion(pmem, addr, device_mem);
> +    sbdev->mmio[n].addr = addr;
> +    object_property_set_int(OBJECT(sbdev), addr, prop, NULL);
> +
> +    g_free(prop);
> +    return 0;
> +}
> +
> +int sysbus_device_check(Object *obj, void *opaque)
> +{
> +    PlatformBusInitData *init = opaque;
> +    Object *dev;
> +    SysBusDevice *sbdev;
> +    int i;
> +
> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
> +    sbdev = (SysBusDevice *)dev;
> +
> +    if (!sbdev) {
> +        /* Container, traverse it for children */
> +        return object_child_foreach(obj, sysbus_device_check, opaque);
> +    }
> +
> +    /* Connect sysbus device to virtual platform bus */
> +    for (i = 0; i < sbdev->num_irq; i++) {
> +        if (!sbdev->irqp[i]) {
> +            /* This IRQ is an incoming IRQ, we can't wire those here */
> +            continue;
> +        }
> +        platform_bus_map_irq(init->params, sbdev, i,
> +                             init->used_irqs, init->irqs);
> +    }
> +
> +    for (i = 0; i < sbdev->num_mmio; i++) {
> +        platform_bus_map_mmio(init->params, sbdev, i,
> +                              init->used_mem, init->mem);
> +    }
> +
> +    return 0;
> +}
> +
> +void platform_bus_init(PlatformParams *params,
> +                       MemoryRegion *address_space_mem,
> +                       qemu_irq *mpic)
> +{
> +    uint64_t max_size = params->platform_bus_size;
> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
> +    DECLARE_BITMAP(used_irqs, params->platform_bus_num_irqs);
> +    DECLARE_BITMAP(used_mem, max_pages);
> +    MemoryRegion *platform_region = g_new(MemoryRegion, 1);
> +    Object *container;
> +    PlatformBusInitData init = {
> +        .used_irqs = used_irqs,
> +        .used_mem = used_mem,
> +        .mem = platform_region,
> +        .irqs = &mpic[params->platform_bus_first_irq],
> +        .params = params,
> +    };
> +
> +    memory_region_init(platform_region, NULL, "platform devices",
> +                       params->platform_bus_size);
> +
> +    bitmap_clear(used_irqs, 0, params->platform_bus_num_irqs);
> +    bitmap_clear(used_mem, 0, max_pages);
> +
> +    /* Loop through all sysbus devices that were spawened outside the machine */
> +    container = container_get(qdev_get_machine(), "/peripheral");
> +    sysbus_device_check(container, &init);
> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
> +    sysbus_device_check(container, &init);
> +
> +    memory_region_add_subregion(address_space_mem, params->platform_bus_base,
> +                                platform_region);
> +}

However, I do think it's a good idea to generalize the "platform bus" 
device if you want to reuse it on ARM. The mmio / irq allocator is 
pretty straight forward and should be generic enough for you to use.

If you do this, please don't duplicate the code but rather move it from 
the e500 file into your new one :).


Alex
Auger Eric July 23, 2014, 2:58 p.m. UTC | #2
On 07/08/2014 03:43 PM, Alexander Graf wrote:
> 
> On 07.07.14 09:08, Eric Auger wrote:
>> This new module implements routines which help in dynamic instantiation
>> of sysbus devices. Machine files can use those generic routines.
>>
>> ---
>>
>> Dynamic sysbus device allocation fully written by Alex Graf.
>>
>> [Eric Auger]
>> Those functions were initially in ppc e500 machine file. Now moved to a
>> separate module.
>>
>> PPCE500Params is replaced by a generic struct named PlatformParams
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>> ---
>>   hw/misc/Makefile.objs              |   1 +
>>   hw/misc/platform_devices.c         | 217
>> +++++++++++++++++++++++++++++++++++++
>>   include/hw/misc/platform_devices.h |  61 +++++++++++
>>   3 files changed, 279 insertions(+)
>>   create mode 100644 hw/misc/platform_devices.c
>>   create mode 100644 include/hw/misc/platform_devices.h
>>
>> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
>> index e47fea8..d081606 100644
>> --- a/hw/misc/Makefile.objs
>> +++ b/hw/misc/Makefile.objs
>> @@ -40,3 +40,4 @@ obj-$(CONFIG_SLAVIO) += slavio_misc.o
>>   obj-$(CONFIG_ZYNQ) += zynq_slcr.o
>>     obj-$(CONFIG_PVPANIC) += pvpanic.o
>> +obj-y += platform_devices.o
>> diff --git a/hw/misc/platform_devices.c b/hw/misc/platform_devices.c
>> new file mode 100644
>> index 0000000..96ab272
>> --- /dev/null
>> +++ b/hw/misc/platform_devices.c
>> @@ -0,0 +1,217 @@
>> +#include "hw/misc/platform_devices.h"
>> +#include "hw/sysbus.h"
>> +#include "qemu/error-report.h"
>> +
>> +#define PAGE_SHIFT 12
>> +
>> +int sysbus_device_create_devtree(Object *obj, void *opaque)
>> +{
>> +    PlatformDevtreeData *data = opaque;
>> +    Object *dev;
>> +    SysBusDevice *sbdev;
>> +    bool matched = false;
>> +
>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>> +    sbdev = (SysBusDevice *)dev;
>> +
>> +    if (!sbdev) {
>> +        /* Container, traverse it for children */
>> +        return object_child_foreach(obj,
>> sysbus_device_create_devtree, data);
>> +    }
>> +
>> +    if (!matched) {
>> +        error_report("Device %s is not supported by this machine yet.",
>> +                     qdev_fw_name(DEVICE(dev)));
>> +        exit(1);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void platform_bus_create_devtree(PlatformParams *params, void *fdt,
>> +                                        const char *mpic)
>> +{
>> +    gchar *node = g_strdup_printf("/platform@%"PRIx64,
>> +                                  params->platform_bus_base);
>> +    const char platcomp[] = "qemu,platform\0simple-bus";
>> +    PlatformDevtreeData data;
>> +    Object *container;
>> +    uint64_t addr = params->platform_bus_base;
>> +    uint64_t size = params->platform_bus_size;
>> +    int irq_start = params->platform_bus_first_irq;
>> +
>> +    /* Create a /platform node that we can put all devices into */
>> +
>> +    qemu_fdt_add_subnode(fdt, node);
>> +    qemu_fdt_setprop(fdt, node, "compatible", platcomp,
>> sizeof(platcomp));
>> +
>> +    /* Our platform bus region is less than 32bit big, so 1 cell is
>> enough for
>> +       address and size */
>> +    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
>> +    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
>> +    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr,
>> size);
>> +
>> +    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
>> +
>> +    /* Loop through all devices and create nodes for known ones */
>> +    data.fdt = fdt;
>> +    data.mpic = mpic;
>> +    data.irq_start = irq_start;
>> +    data.node = node;
>> +
>> +    container = container_get(qdev_get_machine(), "/peripheral");
>> +    sysbus_device_create_devtree(container, &data);
>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>> +    sysbus_device_create_devtree(container, &data);
>> +
>> +    g_free(node);
>> +}
> 
> Device trees are pretty platform (and even machine) specific. Just to
> give you an example - the interrupt specifier on most e500 systems
> really is 4 cells big:
> 
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt#n80
> 
> 
> |   Interrupt specifiers consists of 4 cells encoded as
>   follows:
> 
>    <1st-cell>   interrupt-number
> 
>                 Identifies the interrupt source.  The meaning
>                 depends on the type of interrupt.
> 
>                 Note: If the interrupt-type cell is undefined
>                 (i.e. #interrupt-cells = 2), this cell
>                 should be interpreted the same as for
>                 interrupt-type 0-- i.e. an external or
>                 normal SoC device interrupt.
> 
>    <2nd-cell>   level-sense information, encoded as follows:
>                     0 = low-to-high edge triggered
>                     1 = active low level-sensitive
>                     2 = active high level-sensitive
>                     3 = high-to-low edge triggered
> 
>    <3rd-cell>   interrupt-type
> 
>                 The following types are supported:
> 
>                   0 = external or normal SoC device interrupt
> 
>                       The interrupt-number cell contains
>                       the SoC device interrupt number.  The
>                       type-specific cell is undefined.  The
>                       interrupt-number is derived from the
>                       MPIC a block of registers referred to as
>                       the "Interrupt Source Configuration Registers".
>                       Each source has 32-bytes of registers
>                       (vector/priority and destination) in this
>                       region.   So interrupt 0 is at offset 0x0,
>                       interrupt 1 is at offset 0x20, and so on.
> 
>                   1 = error interrupt
> 
>                       The interrupt-number cell contains
>                       the SoC device interrupt number for
>                       the error interrupt.  The type-specific
>                       cell identifies the specific error
>                       interrupt number.
> 
>                   2 = MPIC inter-processor interrupt (IPI)
> 
>                       The interrupt-number cell identifies
>                       the MPIC IPI number.  The type-specific
>                       cell is undefined.
> 
>                   3 = MPIC timer interrupt
> 
>                       The interrupt-number cell identifies
>                       the MPIC timer number.  The type-specific
>                       cell is undefined.
> 
>    <4th-cell>   type-specific information
> 
>                 The type-specific cell is encoded as follows:
> 
>                  - For interrupt-type 1 (error interrupt),
>                    the type-specific cell contains the
>                    bit number of the error interrupt in the
>                    Error Interrupt Summary Register.
> |
> 
> 
> 
> 
> while on ARM you have a GIC which works like this:
> 
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/arm/gic.txt#n20
> 
> 
> |- #interrupt-cells : Specifies the number of cells needed to encode an
>   interrupt source.  The type shall be a <u32> and the value shall be 3.
> 
>   The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
>   interrupts.
> 
>   The 2nd cell contains the interrupt number for the interrupt type.
>   SPI interrupts are in the range [0-987].  PPI interrupts are in the
>   range [0-15].
> 
>   The 3rd cell is the flags, encoded as follows:
>     bits[3:0] trigger type and level flags.
>         1 = low-to-high edge triggered
>         2 = high-to-low edge triggered
>         4 = active high level-sensitive
>         8 = active low level-sensitive
>     bits[15:8] PPI interrupt cpu mask.  Each bit corresponds to each of
>     the 8 possible cpus attached to the GIC.  A bit set to '1' indicated
>     the interrupt is wired to that CPU.  Only valid for PPI interrupts.
> |
> 
> 
> 
> Both have vastly different semantics. The number of cells is different,
> the value of the cells is different. Even the definition how to
> represent edge vs level triggered interrupts differs.
> 
> I don't think this will stop with interrupts. Maybe someone wants to add
> a special machine check flag to addresses on a platform and then
> "ranges" and "regs" will have different semantics on different
> platforms. There is a lot that can go wrong when you try to unify this
> code.

Hi Alex,

thank you for giving such an example. Indeed I was not aware there were
such huge discrepancies. I guess this comment mostly holds for the
actual device node generation (what I specialized in the parent QEMU
device) and not for the "qemu, platform simple-bus" node generation?

> 
>> +
>> +int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
>> +                         int n, unsigned long *used_irqs,
>> +                         qemu_irq *platform_irqs)
>> +{
>> +    int max_irqs = params->platform_bus_num_irqs;
>> +    char *prop = g_strdup_printf("irq[%d]", n);
>> +    int irqn = object_property_get_int(OBJECT(sbdev), prop, NULL);
>> +
>> +    if (irqn == SYSBUS_DYNAMIC) {
>> +        /* Find the first available IRQ */
>> +        irqn = find_first_zero_bit(used_irqs, max_irqs);
>> +    }
>> +
>> +    if ((irqn >= max_irqs) || test_and_set_bit(irqn, used_irqs)) {
>> +        hw_error("IRQ %d is already allocated or no free IRQ left",
>> irqn);
>> +    }
>> +
>> +    sysbus_connect_irq(sbdev, n, platform_irqs[irqn]);
>> +    object_property_set_int(OBJECT(sbdev), irqn, prop, NULL);
>> +
>> +    g_free(prop);
>> +    return 0;
>> +}
>> +
>> +int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
>> +                          int n, unsigned long *used_mem,
>> +                          MemoryRegion *pmem)
>> +{
>> +    MemoryRegion *device_mem = sbdev->mmio[n].memory;
>> +    uint64_t size = memory_region_size(device_mem);
>> +    uint64_t page_size = (1 << PAGE_SHIFT);
>> +    uint64_t page_mask = page_size - 1;
>> +    uint64_t size_pages = (size + page_mask) >> PAGE_SHIFT;
>> +    uint64_t max_size = params->platform_bus_size;
>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>> +    char *prop = g_strdup_printf("mmio[%d]", n);
>> +    hwaddr addr = object_property_get_int(OBJECT(sbdev), prop, NULL);
>> +    int page;
>> +    int i;
>> +
>> +    page = addr >> PAGE_SHIFT;
>> +    if (addr == SYSBUS_DYNAMIC) {
>> +        uint64_t size_pages_align;
>> +
>> +        /* Align the region to at least its own size granularity */
>> +        if (is_power_of_2(size_pages)) {
>> +            size_pages_align = size_pages;
>> +        } else {
>> +            size_pages_align = pow2floor(size_pages) << 1;
>> +        }
>> +
>> +        /* Find the first available region that fits */
>> +        page = bitmap_find_next_zero_area(used_mem, max_pages, 0,
>> size_pages,
>> +                                          size_pages_align);
>> +
>> +        addr = (uint64_t)page << PAGE_SHIFT;
>> +    }
>> +
>> +    if (page >= max_pages || test_bit(page, used_mem) ||
>> +        (find_next_bit(used_mem, max_pages, page) < size_pages)) {
>> +        hw_error("Memory [%"PRIx64":%"PRIx64" is already allocated or "
>> +                 "no slot left", addr, size);
>> +    }
>> +
>> +    for (i = page; i < (page + size_pages); i++) {
>> +        set_bit(i, used_mem);
>> +    }
>> +
>> +    memory_region_add_subregion(pmem, addr, device_mem);
>> +    sbdev->mmio[n].addr = addr;
>> +    object_property_set_int(OBJECT(sbdev), addr, prop, NULL);
>> +
>> +    g_free(prop);
>> +    return 0;
>> +}
>> +
>> +int sysbus_device_check(Object *obj, void *opaque)
>> +{
>> +    PlatformBusInitData *init = opaque;
>> +    Object *dev;
>> +    SysBusDevice *sbdev;
>> +    int i;
>> +
>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>> +    sbdev = (SysBusDevice *)dev;
>> +
>> +    if (!sbdev) {
>> +        /* Container, traverse it for children */
>> +        return object_child_foreach(obj, sysbus_device_check, opaque);
>> +    }
>> +
>> +    /* Connect sysbus device to virtual platform bus */
>> +    for (i = 0; i < sbdev->num_irq; i++) {
>> +        if (!sbdev->irqp[i]) {
>> +            /* This IRQ is an incoming IRQ, we can't wire those here */
>> +            continue;
>> +        }
>> +        platform_bus_map_irq(init->params, sbdev, i,
>> +                             init->used_irqs, init->irqs);
>> +    }
>> +
>> +    for (i = 0; i < sbdev->num_mmio; i++) {
>> +        platform_bus_map_mmio(init->params, sbdev, i,
>> +                              init->used_mem, init->mem);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void platform_bus_init(PlatformParams *params,
>> +                       MemoryRegion *address_space_mem,
>> +                       qemu_irq *mpic)
>> +{
>> +    uint64_t max_size = params->platform_bus_size;
>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>> +    DECLARE_BITMAP(used_irqs, params->platform_bus_num_irqs);
>> +    DECLARE_BITMAP(used_mem, max_pages);
>> +    MemoryRegion *platform_region = g_new(MemoryRegion, 1);
>> +    Object *container;
>> +    PlatformBusInitData init = {
>> +        .used_irqs = used_irqs,
>> +        .used_mem = used_mem,
>> +        .mem = platform_region,
>> +        .irqs = &mpic[params->platform_bus_first_irq],
>> +        .params = params,
>> +    };
>> +
>> +    memory_region_init(platform_region, NULL, "platform devices",
>> +                       params->platform_bus_size);
>> +
>> +    bitmap_clear(used_irqs, 0, params->platform_bus_num_irqs);
>> +    bitmap_clear(used_mem, 0, max_pages);
>> +
>> +    /* Loop through all sysbus devices that were spawened outside the
>> machine */
>> +    container = container_get(qdev_get_machine(), "/peripheral");
>> +    sysbus_device_check(container, &init);
>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>> +    sysbus_device_check(container, &init);
>> +
>> +    memory_region_add_subregion(address_space_mem,
>> params->platform_bus_base,
>> +                                platform_region);
>> +}
> 
> However, I do think it's a good idea to generalize the "platform bus"
> device if you want to reuse it on ARM. The mmio / irq allocator is
> pretty straight forward and should be generic enough for you to use.
I need clarification here: do you talk about your very first patch
"Platform Device Support" or the code above with a proper solution for
device tree generation?
> 
> If you do this, please don't duplicate the code but rather move it from
> the e500 file into your new one :).
OK. do you mean modifying the e500.c code to call those routines? My
concern is about testing.

Thanks again for your comments

Best Regards

Eric
> 
> 
> Alex
>
Alexander Graf July 23, 2014, 11:07 p.m. UTC | #3
On 23.07.14 16:58, Eric Auger wrote:
> On 07/08/2014 03:43 PM, Alexander Graf wrote:
>> On 07.07.14 09:08, Eric Auger wrote:
>>> This new module implements routines which help in dynamic instantiation
>>> of sysbus devices. Machine files can use those generic routines.
>>>
>>> ---
>>>
>>> Dynamic sysbus device allocation fully written by Alex Graf.
>>>
>>> [Eric Auger]
>>> Those functions were initially in ppc e500 machine file. Now moved to a
>>> separate module.
>>>
>>> PPCE500Params is replaced by a generic struct named PlatformParams
>>>
>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>> ---
>>>    hw/misc/Makefile.objs              |   1 +
>>>    hw/misc/platform_devices.c         | 217
>>> +++++++++++++++++++++++++++++++++++++
>>>    include/hw/misc/platform_devices.h |  61 +++++++++++
>>>    3 files changed, 279 insertions(+)
>>>    create mode 100644 hw/misc/platform_devices.c
>>>    create mode 100644 include/hw/misc/platform_devices.h
>>>
>>> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
>>> index e47fea8..d081606 100644
>>> --- a/hw/misc/Makefile.objs
>>> +++ b/hw/misc/Makefile.objs
>>> @@ -40,3 +40,4 @@ obj-$(CONFIG_SLAVIO) += slavio_misc.o
>>>    obj-$(CONFIG_ZYNQ) += zynq_slcr.o
>>>      obj-$(CONFIG_PVPANIC) += pvpanic.o
>>> +obj-y += platform_devices.o
>>> diff --git a/hw/misc/platform_devices.c b/hw/misc/platform_devices.c
>>> new file mode 100644
>>> index 0000000..96ab272
>>> --- /dev/null
>>> +++ b/hw/misc/platform_devices.c
>>> @@ -0,0 +1,217 @@
>>> +#include "hw/misc/platform_devices.h"
>>> +#include "hw/sysbus.h"
>>> +#include "qemu/error-report.h"
>>> +
>>> +#define PAGE_SHIFT 12
>>> +
>>> +int sysbus_device_create_devtree(Object *obj, void *opaque)
>>> +{
>>> +    PlatformDevtreeData *data = opaque;
>>> +    Object *dev;
>>> +    SysBusDevice *sbdev;
>>> +    bool matched = false;
>>> +
>>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>>> +    sbdev = (SysBusDevice *)dev;
>>> +
>>> +    if (!sbdev) {
>>> +        /* Container, traverse it for children */
>>> +        return object_child_foreach(obj,
>>> sysbus_device_create_devtree, data);
>>> +    }
>>> +
>>> +    if (!matched) {
>>> +        error_report("Device %s is not supported by this machine yet.",
>>> +                     qdev_fw_name(DEVICE(dev)));
>>> +        exit(1);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +void platform_bus_create_devtree(PlatformParams *params, void *fdt,
>>> +                                        const char *mpic)
>>> +{
>>> +    gchar *node = g_strdup_printf("/platform@%"PRIx64,
>>> +                                  params->platform_bus_base);
>>> +    const char platcomp[] = "qemu,platform\0simple-bus";
>>> +    PlatformDevtreeData data;
>>> +    Object *container;
>>> +    uint64_t addr = params->platform_bus_base;
>>> +    uint64_t size = params->platform_bus_size;
>>> +    int irq_start = params->platform_bus_first_irq;
>>> +
>>> +    /* Create a /platform node that we can put all devices into */
>>> +
>>> +    qemu_fdt_add_subnode(fdt, node);
>>> +    qemu_fdt_setprop(fdt, node, "compatible", platcomp,
>>> sizeof(platcomp));
>>> +
>>> +    /* Our platform bus region is less than 32bit big, so 1 cell is
>>> enough for
>>> +       address and size */
>>> +    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
>>> +    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
>>> +    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr,
>>> size);
>>> +
>>> +    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
>>> +
>>> +    /* Loop through all devices and create nodes for known ones */
>>> +    data.fdt = fdt;
>>> +    data.mpic = mpic;
>>> +    data.irq_start = irq_start;
>>> +    data.node = node;
>>> +
>>> +    container = container_get(qdev_get_machine(), "/peripheral");
>>> +    sysbus_device_create_devtree(container, &data);
>>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>>> +    sysbus_device_create_devtree(container, &data);
>>> +
>>> +    g_free(node);
>>> +}
>> Device trees are pretty platform (and even machine) specific. Just to
>> give you an example - the interrupt specifier on most e500 systems
>> really is 4 cells big:
>>
>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt#n80
>>
>>
>> |   Interrupt specifiers consists of 4 cells encoded as
>>    follows:
>>
>>     <1st-cell>   interrupt-number
>>
>>                  Identifies the interrupt source.  The meaning
>>                  depends on the type of interrupt.
>>
>>                  Note: If the interrupt-type cell is undefined
>>                  (i.e. #interrupt-cells = 2), this cell
>>                  should be interpreted the same as for
>>                  interrupt-type 0-- i.e. an external or
>>                  normal SoC device interrupt.
>>
>>     <2nd-cell>   level-sense information, encoded as follows:
>>                      0 = low-to-high edge triggered
>>                      1 = active low level-sensitive
>>                      2 = active high level-sensitive
>>                      3 = high-to-low edge triggered
>>
>>     <3rd-cell>   interrupt-type
>>
>>                  The following types are supported:
>>
>>                    0 = external or normal SoC device interrupt
>>
>>                        The interrupt-number cell contains
>>                        the SoC device interrupt number.  The
>>                        type-specific cell is undefined.  The
>>                        interrupt-number is derived from the
>>                        MPIC a block of registers referred to as
>>                        the "Interrupt Source Configuration Registers".
>>                        Each source has 32-bytes of registers
>>                        (vector/priority and destination) in this
>>                        region.   So interrupt 0 is at offset 0x0,
>>                        interrupt 1 is at offset 0x20, and so on.
>>
>>                    1 = error interrupt
>>
>>                        The interrupt-number cell contains
>>                        the SoC device interrupt number for
>>                        the error interrupt.  The type-specific
>>                        cell identifies the specific error
>>                        interrupt number.
>>
>>                    2 = MPIC inter-processor interrupt (IPI)
>>
>>                        The interrupt-number cell identifies
>>                        the MPIC IPI number.  The type-specific
>>                        cell is undefined.
>>
>>                    3 = MPIC timer interrupt
>>
>>                        The interrupt-number cell identifies
>>                        the MPIC timer number.  The type-specific
>>                        cell is undefined.
>>
>>     <4th-cell>   type-specific information
>>
>>                  The type-specific cell is encoded as follows:
>>
>>                   - For interrupt-type 1 (error interrupt),
>>                     the type-specific cell contains the
>>                     bit number of the error interrupt in the
>>                     Error Interrupt Summary Register.
>> |
>>
>>
>>
>>
>> while on ARM you have a GIC which works like this:
>>
>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/arm/gic.txt#n20
>>
>>
>> |- #interrupt-cells : Specifies the number of cells needed to encode an
>>    interrupt source.  The type shall be a <u32> and the value shall be 3.
>>
>>    The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
>>    interrupts.
>>
>>    The 2nd cell contains the interrupt number for the interrupt type.
>>    SPI interrupts are in the range [0-987].  PPI interrupts are in the
>>    range [0-15].
>>
>>    The 3rd cell is the flags, encoded as follows:
>>      bits[3:0] trigger type and level flags.
>>          1 = low-to-high edge triggered
>>          2 = high-to-low edge triggered
>>          4 = active high level-sensitive
>>          8 = active low level-sensitive
>>      bits[15:8] PPI interrupt cpu mask.  Each bit corresponds to each of
>>      the 8 possible cpus attached to the GIC.  A bit set to '1' indicated
>>      the interrupt is wired to that CPU.  Only valid for PPI interrupts.
>> |
>>
>>
>>
>> Both have vastly different semantics. The number of cells is different,
>> the value of the cells is different. Even the definition how to
>> represent edge vs level triggered interrupts differs.
>>
>> I don't think this will stop with interrupts. Maybe someone wants to add
>> a special machine check flag to addresses on a platform and then
>> "ranges" and "regs" will have different semantics on different
>> platforms. There is a lot that can go wrong when you try to unify this
>> code.
> Hi Alex,
>
> thank you for giving such an example. Indeed I was not aware there were
> such huge discrepancies. I guess this comment mostly holds for the
> actual device node generation (what I specialized in the parent QEMU
> device) and not for the "qemu, platform simple-bus" node generation?

Is the dt generation that much code? Just duplicate it for now - we can 
generalize it later if we see how things work out.

>
>>> +
>>> +int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
>>> +                         int n, unsigned long *used_irqs,
>>> +                         qemu_irq *platform_irqs)
>>> +{
>>> +    int max_irqs = params->platform_bus_num_irqs;
>>> +    char *prop = g_strdup_printf("irq[%d]", n);
>>> +    int irqn = object_property_get_int(OBJECT(sbdev), prop, NULL);
>>> +
>>> +    if (irqn == SYSBUS_DYNAMIC) {
>>> +        /* Find the first available IRQ */
>>> +        irqn = find_first_zero_bit(used_irqs, max_irqs);
>>> +    }
>>> +
>>> +    if ((irqn >= max_irqs) || test_and_set_bit(irqn, used_irqs)) {
>>> +        hw_error("IRQ %d is already allocated or no free IRQ left",
>>> irqn);
>>> +    }
>>> +
>>> +    sysbus_connect_irq(sbdev, n, platform_irqs[irqn]);
>>> +    object_property_set_int(OBJECT(sbdev), irqn, prop, NULL);
>>> +
>>> +    g_free(prop);
>>> +    return 0;
>>> +}
>>> +
>>> +int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
>>> +                          int n, unsigned long *used_mem,
>>> +                          MemoryRegion *pmem)
>>> +{
>>> +    MemoryRegion *device_mem = sbdev->mmio[n].memory;
>>> +    uint64_t size = memory_region_size(device_mem);
>>> +    uint64_t page_size = (1 << PAGE_SHIFT);
>>> +    uint64_t page_mask = page_size - 1;
>>> +    uint64_t size_pages = (size + page_mask) >> PAGE_SHIFT;
>>> +    uint64_t max_size = params->platform_bus_size;
>>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>>> +    char *prop = g_strdup_printf("mmio[%d]", n);
>>> +    hwaddr addr = object_property_get_int(OBJECT(sbdev), prop, NULL);
>>> +    int page;
>>> +    int i;
>>> +
>>> +    page = addr >> PAGE_SHIFT;
>>> +    if (addr == SYSBUS_DYNAMIC) {
>>> +        uint64_t size_pages_align;
>>> +
>>> +        /* Align the region to at least its own size granularity */
>>> +        if (is_power_of_2(size_pages)) {
>>> +            size_pages_align = size_pages;
>>> +        } else {
>>> +            size_pages_align = pow2floor(size_pages) << 1;
>>> +        }
>>> +
>>> +        /* Find the first available region that fits */
>>> +        page = bitmap_find_next_zero_area(used_mem, max_pages, 0,
>>> size_pages,
>>> +                                          size_pages_align);
>>> +
>>> +        addr = (uint64_t)page << PAGE_SHIFT;
>>> +    }
>>> +
>>> +    if (page >= max_pages || test_bit(page, used_mem) ||
>>> +        (find_next_bit(used_mem, max_pages, page) < size_pages)) {
>>> +        hw_error("Memory [%"PRIx64":%"PRIx64" is already allocated or "
>>> +                 "no slot left", addr, size);
>>> +    }
>>> +
>>> +    for (i = page; i < (page + size_pages); i++) {
>>> +        set_bit(i, used_mem);
>>> +    }
>>> +
>>> +    memory_region_add_subregion(pmem, addr, device_mem);
>>> +    sbdev->mmio[n].addr = addr;
>>> +    object_property_set_int(OBJECT(sbdev), addr, prop, NULL);
>>> +
>>> +    g_free(prop);
>>> +    return 0;
>>> +}
>>> +
>>> +int sysbus_device_check(Object *obj, void *opaque)
>>> +{
>>> +    PlatformBusInitData *init = opaque;
>>> +    Object *dev;
>>> +    SysBusDevice *sbdev;
>>> +    int i;
>>> +
>>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>>> +    sbdev = (SysBusDevice *)dev;
>>> +
>>> +    if (!sbdev) {
>>> +        /* Container, traverse it for children */
>>> +        return object_child_foreach(obj, sysbus_device_check, opaque);
>>> +    }
>>> +
>>> +    /* Connect sysbus device to virtual platform bus */
>>> +    for (i = 0; i < sbdev->num_irq; i++) {
>>> +        if (!sbdev->irqp[i]) {
>>> +            /* This IRQ is an incoming IRQ, we can't wire those here */
>>> +            continue;
>>> +        }
>>> +        platform_bus_map_irq(init->params, sbdev, i,
>>> +                             init->used_irqs, init->irqs);
>>> +    }
>>> +
>>> +    for (i = 0; i < sbdev->num_mmio; i++) {
>>> +        platform_bus_map_mmio(init->params, sbdev, i,
>>> +                              init->used_mem, init->mem);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +void platform_bus_init(PlatformParams *params,
>>> +                       MemoryRegion *address_space_mem,
>>> +                       qemu_irq *mpic)
>>> +{
>>> +    uint64_t max_size = params->platform_bus_size;
>>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>>> +    DECLARE_BITMAP(used_irqs, params->platform_bus_num_irqs);
>>> +    DECLARE_BITMAP(used_mem, max_pages);
>>> +    MemoryRegion *platform_region = g_new(MemoryRegion, 1);
>>> +    Object *container;
>>> +    PlatformBusInitData init = {
>>> +        .used_irqs = used_irqs,
>>> +        .used_mem = used_mem,
>>> +        .mem = platform_region,
>>> +        .irqs = &mpic[params->platform_bus_first_irq],
>>> +        .params = params,
>>> +    };
>>> +
>>> +    memory_region_init(platform_region, NULL, "platform devices",
>>> +                       params->platform_bus_size);
>>> +
>>> +    bitmap_clear(used_irqs, 0, params->platform_bus_num_irqs);
>>> +    bitmap_clear(used_mem, 0, max_pages);
>>> +
>>> +    /* Loop through all sysbus devices that were spawened outside the
>>> machine */
>>> +    container = container_get(qdev_get_machine(), "/peripheral");
>>> +    sysbus_device_check(container, &init);
>>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>>> +    sysbus_device_check(container, &init);
>>> +
>>> +    memory_region_add_subregion(address_space_mem,
>>> params->platform_bus_base,
>>> +                                platform_region);
>>> +}
>> However, I do think it's a good idea to generalize the "platform bus"
>> device if you want to reuse it on ARM. The mmio / irq allocator is
>> pretty straight forward and should be generic enough for you to use.
> I need clarification here: do you talk about your very first patch
> "Platform Device Support" or the code above with a proper solution for
> device tree generation?

I'm talking about the actual implementation of the allocation logic. I 
think we're better off to keep all the device tree logic purely in the 
machine files for now.

>> If you do this, please don't duplicate the code but rather move it from
>> the e500 file into your new one :).
> OK. do you mean modifying the e500.c code to call those routines? My
> concern is about testing.

Why? We have a u-boot binary that starts up based on the device tree 
with TCG if you just start the e500plat machine and if you like I can 
easily give you a nice guest kernel and rootfs as well ;).


Alex
Auger Eric July 24, 2014, 8:01 a.m. UTC | #4
On 07/24/2014 01:07 AM, Alexander Graf wrote:
> 
> On 23.07.14 16:58, Eric Auger wrote:
>> On 07/08/2014 03:43 PM, Alexander Graf wrote:
>>> On 07.07.14 09:08, Eric Auger wrote:
>>>> This new module implements routines which help in dynamic instantiation
>>>> of sysbus devices. Machine files can use those generic routines.
>>>>
>>>> ---
>>>>
>>>> Dynamic sysbus device allocation fully written by Alex Graf.
>>>>
>>>> [Eric Auger]
>>>> Those functions were initially in ppc e500 machine file. Now moved to a
>>>> separate module.
>>>>
>>>> PPCE500Params is replaced by a generic struct named PlatformParams
>>>>
>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>>> ---
>>>>    hw/misc/Makefile.objs              |   1 +
>>>>    hw/misc/platform_devices.c         | 217
>>>> +++++++++++++++++++++++++++++++++++++
>>>>    include/hw/misc/platform_devices.h |  61 +++++++++++
>>>>    3 files changed, 279 insertions(+)
>>>>    create mode 100644 hw/misc/platform_devices.c
>>>>    create mode 100644 include/hw/misc/platform_devices.h
>>>>
>>>> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
>>>> index e47fea8..d081606 100644
>>>> --- a/hw/misc/Makefile.objs
>>>> +++ b/hw/misc/Makefile.objs
>>>> @@ -40,3 +40,4 @@ obj-$(CONFIG_SLAVIO) += slavio_misc.o
>>>>    obj-$(CONFIG_ZYNQ) += zynq_slcr.o
>>>>      obj-$(CONFIG_PVPANIC) += pvpanic.o
>>>> +obj-y += platform_devices.o
>>>> diff --git a/hw/misc/platform_devices.c b/hw/misc/platform_devices.c
>>>> new file mode 100644
>>>> index 0000000..96ab272
>>>> --- /dev/null
>>>> +++ b/hw/misc/platform_devices.c
>>>> @@ -0,0 +1,217 @@
>>>> +#include "hw/misc/platform_devices.h"
>>>> +#include "hw/sysbus.h"
>>>> +#include "qemu/error-report.h"
>>>> +
>>>> +#define PAGE_SHIFT 12
>>>> +
>>>> +int sysbus_device_create_devtree(Object *obj, void *opaque)
>>>> +{
>>>> +    PlatformDevtreeData *data = opaque;
>>>> +    Object *dev;
>>>> +    SysBusDevice *sbdev;
>>>> +    bool matched = false;
>>>> +
>>>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>>>> +    sbdev = (SysBusDevice *)dev;
>>>> +
>>>> +    if (!sbdev) {
>>>> +        /* Container, traverse it for children */
>>>> +        return object_child_foreach(obj,
>>>> sysbus_device_create_devtree, data);
>>>> +    }
>>>> +
>>>> +    if (!matched) {
>>>> +        error_report("Device %s is not supported by this machine
>>>> yet.",
>>>> +                     qdev_fw_name(DEVICE(dev)));
>>>> +        exit(1);
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void platform_bus_create_devtree(PlatformParams *params, void *fdt,
>>>> +                                        const char *mpic)
>>>> +{
>>>> +    gchar *node = g_strdup_printf("/platform@%"PRIx64,
>>>> +                                  params->platform_bus_base);
>>>> +    const char platcomp[] = "qemu,platform\0simple-bus";
>>>> +    PlatformDevtreeData data;
>>>> +    Object *container;
>>>> +    uint64_t addr = params->platform_bus_base;
>>>> +    uint64_t size = params->platform_bus_size;
>>>> +    int irq_start = params->platform_bus_first_irq;
>>>> +
>>>> +    /* Create a /platform node that we can put all devices into */
>>>> +
>>>> +    qemu_fdt_add_subnode(fdt, node);
>>>> +    qemu_fdt_setprop(fdt, node, "compatible", platcomp,
>>>> sizeof(platcomp));
>>>> +
>>>> +    /* Our platform bus region is less than 32bit big, so 1 cell is
>>>> enough for
>>>> +       address and size */
>>>> +    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
>>>> +    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
>>>> +    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr,
>>>> size);
>>>> +
>>>> +    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
>>>> +
>>>> +    /* Loop through all devices and create nodes for known ones */
>>>> +    data.fdt = fdt;
>>>> +    data.mpic = mpic;
>>>> +    data.irq_start = irq_start;
>>>> +    data.node = node;
>>>> +
>>>> +    container = container_get(qdev_get_machine(), "/peripheral");
>>>> +    sysbus_device_create_devtree(container, &data);
>>>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>>>> +    sysbus_device_create_devtree(container, &data);
>>>> +
>>>> +    g_free(node);
>>>> +}
>>> Device trees are pretty platform (and even machine) specific. Just to
>>> give you an example - the interrupt specifier on most e500 systems
>>> really is 4 cells big:
>>>
>>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt#n80
>>>
>>>
>>>
>>> |   Interrupt specifiers consists of 4 cells encoded as
>>>    follows:
>>>
>>>     <1st-cell>   interrupt-number
>>>
>>>                  Identifies the interrupt source.  The meaning
>>>                  depends on the type of interrupt.
>>>
>>>                  Note: If the interrupt-type cell is undefined
>>>                  (i.e. #interrupt-cells = 2), this cell
>>>                  should be interpreted the same as for
>>>                  interrupt-type 0-- i.e. an external or
>>>                  normal SoC device interrupt.
>>>
>>>     <2nd-cell>   level-sense information, encoded as follows:
>>>                      0 = low-to-high edge triggered
>>>                      1 = active low level-sensitive
>>>                      2 = active high level-sensitive
>>>                      3 = high-to-low edge triggered
>>>
>>>     <3rd-cell>   interrupt-type
>>>
>>>                  The following types are supported:
>>>
>>>                    0 = external or normal SoC device interrupt
>>>
>>>                        The interrupt-number cell contains
>>>                        the SoC device interrupt number.  The
>>>                        type-specific cell is undefined.  The
>>>                        interrupt-number is derived from the
>>>                        MPIC a block of registers referred to as
>>>                        the "Interrupt Source Configuration Registers".
>>>                        Each source has 32-bytes of registers
>>>                        (vector/priority and destination) in this
>>>                        region.   So interrupt 0 is at offset 0x0,
>>>                        interrupt 1 is at offset 0x20, and so on.
>>>
>>>                    1 = error interrupt
>>>
>>>                        The interrupt-number cell contains
>>>                        the SoC device interrupt number for
>>>                        the error interrupt.  The type-specific
>>>                        cell identifies the specific error
>>>                        interrupt number.
>>>
>>>                    2 = MPIC inter-processor interrupt (IPI)
>>>
>>>                        The interrupt-number cell identifies
>>>                        the MPIC IPI number.  The type-specific
>>>                        cell is undefined.
>>>
>>>                    3 = MPIC timer interrupt
>>>
>>>                        The interrupt-number cell identifies
>>>                        the MPIC timer number.  The type-specific
>>>                        cell is undefined.
>>>
>>>     <4th-cell>   type-specific information
>>>
>>>                  The type-specific cell is encoded as follows:
>>>
>>>                   - For interrupt-type 1 (error interrupt),
>>>                     the type-specific cell contains the
>>>                     bit number of the error interrupt in the
>>>                     Error Interrupt Summary Register.
>>> |
>>>
>>>
>>>
>>>
>>> while on ARM you have a GIC which works like this:
>>>
>>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/arm/gic.txt#n20
>>>
>>>
>>>
>>> |- #interrupt-cells : Specifies the number of cells needed to encode an
>>>    interrupt source.  The type shall be a <u32> and the value shall
>>> be 3.
>>>
>>>    The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
>>>    interrupts.
>>>
>>>    The 2nd cell contains the interrupt number for the interrupt type.
>>>    SPI interrupts are in the range [0-987].  PPI interrupts are in the
>>>    range [0-15].
>>>
>>>    The 3rd cell is the flags, encoded as follows:
>>>      bits[3:0] trigger type and level flags.
>>>          1 = low-to-high edge triggered
>>>          2 = high-to-low edge triggered
>>>          4 = active high level-sensitive
>>>          8 = active low level-sensitive
>>>      bits[15:8] PPI interrupt cpu mask.  Each bit corresponds to each of
>>>      the 8 possible cpus attached to the GIC.  A bit set to '1'
>>> indicated
>>>      the interrupt is wired to that CPU.  Only valid for PPI interrupts.
>>> |
>>>
>>>
>>>
>>> Both have vastly different semantics. The number of cells is different,
>>> the value of the cells is different. Even the definition how to
>>> represent edge vs level triggered interrupts differs.
>>>
>>> I don't think this will stop with interrupts. Maybe someone wants to add
>>> a special machine check flag to addresses on a platform and then
>>> "ranges" and "regs" will have different semantics on different
>>> platforms. There is a lot that can go wrong when you try to unify this
>>> code.
>> Hi Alex,
>>
>> thank you for giving such an example. Indeed I was not aware there were
>> such huge discrepancies. I guess this comment mostly holds for the
>> actual device node generation (what I specialized in the parent QEMU
>> device) and not for the "qemu, platform simple-bus" node generation?
> 
> Is the dt generation that much code? Just duplicate it for now - we can
> generalize it later if we see how things work out.
> 
>>
>>>> +
>>>> +int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
>>>> +                         int n, unsigned long *used_irqs,
>>>> +                         qemu_irq *platform_irqs)
>>>> +{
>>>> +    int max_irqs = params->platform_bus_num_irqs;
>>>> +    char *prop = g_strdup_printf("irq[%d]", n);
>>>> +    int irqn = object_property_get_int(OBJECT(sbdev), prop, NULL);
>>>> +
>>>> +    if (irqn == SYSBUS_DYNAMIC) {
>>>> +        /* Find the first available IRQ */
>>>> +        irqn = find_first_zero_bit(used_irqs, max_irqs);
>>>> +    }
>>>> +
>>>> +    if ((irqn >= max_irqs) || test_and_set_bit(irqn, used_irqs)) {
>>>> +        hw_error("IRQ %d is already allocated or no free IRQ left",
>>>> irqn);
>>>> +    }
>>>> +
>>>> +    sysbus_connect_irq(sbdev, n, platform_irqs[irqn]);
>>>> +    object_property_set_int(OBJECT(sbdev), irqn, prop, NULL);
>>>> +
>>>> +    g_free(prop);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
>>>> +                          int n, unsigned long *used_mem,
>>>> +                          MemoryRegion *pmem)
>>>> +{
>>>> +    MemoryRegion *device_mem = sbdev->mmio[n].memory;
>>>> +    uint64_t size = memory_region_size(device_mem);
>>>> +    uint64_t page_size = (1 << PAGE_SHIFT);
>>>> +    uint64_t page_mask = page_size - 1;
>>>> +    uint64_t size_pages = (size + page_mask) >> PAGE_SHIFT;
>>>> +    uint64_t max_size = params->platform_bus_size;
>>>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>>>> +    char *prop = g_strdup_printf("mmio[%d]", n);
>>>> +    hwaddr addr = object_property_get_int(OBJECT(sbdev), prop, NULL);
>>>> +    int page;
>>>> +    int i;
>>>> +
>>>> +    page = addr >> PAGE_SHIFT;
>>>> +    if (addr == SYSBUS_DYNAMIC) {
>>>> +        uint64_t size_pages_align;
>>>> +
>>>> +        /* Align the region to at least its own size granularity */
>>>> +        if (is_power_of_2(size_pages)) {
>>>> +            size_pages_align = size_pages;
>>>> +        } else {
>>>> +            size_pages_align = pow2floor(size_pages) << 1;
>>>> +        }
>>>> +
>>>> +        /* Find the first available region that fits */
>>>> +        page = bitmap_find_next_zero_area(used_mem, max_pages, 0,
>>>> size_pages,
>>>> +                                          size_pages_align);
>>>> +
>>>> +        addr = (uint64_t)page << PAGE_SHIFT;
>>>> +    }
>>>> +
>>>> +    if (page >= max_pages || test_bit(page, used_mem) ||
>>>> +        (find_next_bit(used_mem, max_pages, page) < size_pages)) {
>>>> +        hw_error("Memory [%"PRIx64":%"PRIx64" is already allocated
>>>> or "
>>>> +                 "no slot left", addr, size);
>>>> +    }
>>>> +
>>>> +    for (i = page; i < (page + size_pages); i++) {
>>>> +        set_bit(i, used_mem);
>>>> +    }
>>>> +
>>>> +    memory_region_add_subregion(pmem, addr, device_mem);
>>>> +    sbdev->mmio[n].addr = addr;
>>>> +    object_property_set_int(OBJECT(sbdev), addr, prop, NULL);
>>>> +
>>>> +    g_free(prop);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int sysbus_device_check(Object *obj, void *opaque)
>>>> +{
>>>> +    PlatformBusInitData *init = opaque;
>>>> +    Object *dev;
>>>> +    SysBusDevice *sbdev;
>>>> +    int i;
>>>> +
>>>> +    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
>>>> +    sbdev = (SysBusDevice *)dev;
>>>> +
>>>> +    if (!sbdev) {
>>>> +        /* Container, traverse it for children */
>>>> +        return object_child_foreach(obj, sysbus_device_check, opaque);
>>>> +    }
>>>> +
>>>> +    /* Connect sysbus device to virtual platform bus */
>>>> +    for (i = 0; i < sbdev->num_irq; i++) {
>>>> +        if (!sbdev->irqp[i]) {
>>>> +            /* This IRQ is an incoming IRQ, we can't wire those
>>>> here */
>>>> +            continue;
>>>> +        }
>>>> +        platform_bus_map_irq(init->params, sbdev, i,
>>>> +                             init->used_irqs, init->irqs);
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < sbdev->num_mmio; i++) {
>>>> +        platform_bus_map_mmio(init->params, sbdev, i,
>>>> +                              init->used_mem, init->mem);
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void platform_bus_init(PlatformParams *params,
>>>> +                       MemoryRegion *address_space_mem,
>>>> +                       qemu_irq *mpic)
>>>> +{
>>>> +    uint64_t max_size = params->platform_bus_size;
>>>> +    uint64_t max_pages = max_size >> PAGE_SHIFT;
>>>> +    DECLARE_BITMAP(used_irqs, params->platform_bus_num_irqs);
>>>> +    DECLARE_BITMAP(used_mem, max_pages);
>>>> +    MemoryRegion *platform_region = g_new(MemoryRegion, 1);
>>>> +    Object *container;
>>>> +    PlatformBusInitData init = {
>>>> +        .used_irqs = used_irqs,
>>>> +        .used_mem = used_mem,
>>>> +        .mem = platform_region,
>>>> +        .irqs = &mpic[params->platform_bus_first_irq],
>>>> +        .params = params,
>>>> +    };
>>>> +
>>>> +    memory_region_init(platform_region, NULL, "platform devices",
>>>> +                       params->platform_bus_size);
>>>> +
>>>> +    bitmap_clear(used_irqs, 0, params->platform_bus_num_irqs);
>>>> +    bitmap_clear(used_mem, 0, max_pages);
>>>> +
>>>> +    /* Loop through all sysbus devices that were spawened outside the
>>>> machine */
>>>> +    container = container_get(qdev_get_machine(), "/peripheral");
>>>> +    sysbus_device_check(container, &init);
>>>> +    container = container_get(qdev_get_machine(), "/peripheral-anon");
>>>> +    sysbus_device_check(container, &init);
>>>> +
>>>> +    memory_region_add_subregion(address_space_mem,
>>>> params->platform_bus_base,
>>>> +                                platform_region);
>>>> +}
>>> However, I do think it's a good idea to generalize the "platform bus"
>>> device if you want to reuse it on ARM. The mmio / irq allocator is
>>> pretty straight forward and should be generic enough for you to use.
>> I need clarification here: do you talk about your very first patch
>> "Platform Device Support" or the code above with a proper solution for
>> device tree generation?
> 
> I'm talking about the actual implementation of the allocation logic. I
> think we're better off to keep all the device tree logic purely in the
> machine files for now.
ok
> 
>>> If you do this, please don't duplicate the code but rather move it from
>>> the e500 file into your new one :).
>> OK. do you mean modifying the e500.c code to call those routines? My
>> concern is about testing.
> 
> Why? We have a u-boot binary that starts up based on the device tree
> with TCG if you just start the e500plat machine and if you like I can
> easily give you a nice guest kernel and rootfs as well ;).
Definitively if you can afford to give this support, no problem.

Best Regards

Eric
> 
> 
> Alex
>
diff mbox

Patch

diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index e47fea8..d081606 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -40,3 +40,4 @@  obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
 
 obj-$(CONFIG_PVPANIC) += pvpanic.o
+obj-y += platform_devices.o
diff --git a/hw/misc/platform_devices.c b/hw/misc/platform_devices.c
new file mode 100644
index 0000000..96ab272
--- /dev/null
+++ b/hw/misc/platform_devices.c
@@ -0,0 +1,217 @@ 
+#include "hw/misc/platform_devices.h"
+#include "hw/sysbus.h"
+#include "qemu/error-report.h"
+
+#define PAGE_SHIFT 12
+
+int sysbus_device_create_devtree(Object *obj, void *opaque)
+{
+    PlatformDevtreeData *data = opaque;
+    Object *dev;
+    SysBusDevice *sbdev;
+    bool matched = false;
+
+    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
+    sbdev = (SysBusDevice *)dev;
+
+    if (!sbdev) {
+        /* Container, traverse it for children */
+        return object_child_foreach(obj, sysbus_device_create_devtree, data);
+    }
+
+    if (!matched) {
+        error_report("Device %s is not supported by this machine yet.",
+                     qdev_fw_name(DEVICE(dev)));
+        exit(1);
+    }
+
+    return 0;
+}
+
+void platform_bus_create_devtree(PlatformParams *params, void *fdt,
+                                        const char *mpic)
+{
+    gchar *node = g_strdup_printf("/platform@%"PRIx64,
+                                  params->platform_bus_base);
+    const char platcomp[] = "qemu,platform\0simple-bus";
+    PlatformDevtreeData data;
+    Object *container;
+    uint64_t addr = params->platform_bus_base;
+    uint64_t size = params->platform_bus_size;
+    int irq_start = params->platform_bus_first_irq;
+
+    /* Create a /platform node that we can put all devices into */
+
+    qemu_fdt_add_subnode(fdt, node);
+    qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp));
+
+    /* Our platform bus region is less than 32bit big, so 1 cell is enough for
+       address and size */
+    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
+
+    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
+
+    /* Loop through all devices and create nodes for known ones */
+    data.fdt = fdt;
+    data.mpic = mpic;
+    data.irq_start = irq_start;
+    data.node = node;
+
+    container = container_get(qdev_get_machine(), "/peripheral");
+    sysbus_device_create_devtree(container, &data);
+    container = container_get(qdev_get_machine(), "/peripheral-anon");
+    sysbus_device_create_devtree(container, &data);
+
+    g_free(node);
+}
+
+int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
+                         int n, unsigned long *used_irqs,
+                         qemu_irq *platform_irqs)
+{
+    int max_irqs = params->platform_bus_num_irqs;
+    char *prop = g_strdup_printf("irq[%d]", n);
+    int irqn = object_property_get_int(OBJECT(sbdev), prop, NULL);
+
+    if (irqn == SYSBUS_DYNAMIC) {
+        /* Find the first available IRQ */
+        irqn = find_first_zero_bit(used_irqs, max_irqs);
+    }
+
+    if ((irqn >= max_irqs) || test_and_set_bit(irqn, used_irqs)) {
+        hw_error("IRQ %d is already allocated or no free IRQ left", irqn);
+    }
+
+    sysbus_connect_irq(sbdev, n, platform_irqs[irqn]);
+    object_property_set_int(OBJECT(sbdev), irqn, prop, NULL);
+
+    g_free(prop);
+    return 0;
+}
+
+int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
+                          int n, unsigned long *used_mem,
+                          MemoryRegion *pmem)
+{
+    MemoryRegion *device_mem = sbdev->mmio[n].memory;
+    uint64_t size = memory_region_size(device_mem);
+    uint64_t page_size = (1 << PAGE_SHIFT);
+    uint64_t page_mask = page_size - 1;
+    uint64_t size_pages = (size + page_mask) >> PAGE_SHIFT;
+    uint64_t max_size = params->platform_bus_size;
+    uint64_t max_pages = max_size >> PAGE_SHIFT;
+    char *prop = g_strdup_printf("mmio[%d]", n);
+    hwaddr addr = object_property_get_int(OBJECT(sbdev), prop, NULL);
+    int page;
+    int i;
+
+    page = addr >> PAGE_SHIFT;
+    if (addr == SYSBUS_DYNAMIC) {
+        uint64_t size_pages_align;
+
+        /* Align the region to at least its own size granularity */
+        if (is_power_of_2(size_pages)) {
+            size_pages_align = size_pages;
+        } else {
+            size_pages_align = pow2floor(size_pages) << 1;
+        }
+
+        /* Find the first available region that fits */
+        page = bitmap_find_next_zero_area(used_mem, max_pages, 0, size_pages,
+                                          size_pages_align);
+
+        addr = (uint64_t)page << PAGE_SHIFT;
+    }
+
+    if (page >= max_pages || test_bit(page, used_mem) ||
+        (find_next_bit(used_mem, max_pages, page) < size_pages)) {
+        hw_error("Memory [%"PRIx64":%"PRIx64" is already allocated or "
+                 "no slot left", addr, size);
+    }
+
+    for (i = page; i < (page + size_pages); i++) {
+        set_bit(i, used_mem);
+    }
+
+    memory_region_add_subregion(pmem, addr, device_mem);
+    sbdev->mmio[n].addr = addr;
+    object_property_set_int(OBJECT(sbdev), addr, prop, NULL);
+
+    g_free(prop);
+    return 0;
+}
+
+int sysbus_device_check(Object *obj, void *opaque)
+{
+    PlatformBusInitData *init = opaque;
+    Object *dev;
+    SysBusDevice *sbdev;
+    int i;
+
+    dev = object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE);
+    sbdev = (SysBusDevice *)dev;
+
+    if (!sbdev) {
+        /* Container, traverse it for children */
+        return object_child_foreach(obj, sysbus_device_check, opaque);
+    }
+
+    /* Connect sysbus device to virtual platform bus */
+    for (i = 0; i < sbdev->num_irq; i++) {
+        if (!sbdev->irqp[i]) {
+            /* This IRQ is an incoming IRQ, we can't wire those here */
+            continue;
+        }
+        platform_bus_map_irq(init->params, sbdev, i,
+                             init->used_irqs, init->irqs);
+    }
+
+    for (i = 0; i < sbdev->num_mmio; i++) {
+        platform_bus_map_mmio(init->params, sbdev, i,
+                              init->used_mem, init->mem);
+    }
+
+    return 0;
+}
+
+void platform_bus_init(PlatformParams *params,
+                       MemoryRegion *address_space_mem,
+                       qemu_irq *mpic)
+{
+    uint64_t max_size = params->platform_bus_size;
+    uint64_t max_pages = max_size >> PAGE_SHIFT;
+    DECLARE_BITMAP(used_irqs, params->platform_bus_num_irqs);
+    DECLARE_BITMAP(used_mem, max_pages);
+    MemoryRegion *platform_region = g_new(MemoryRegion, 1);
+    Object *container;
+    PlatformBusInitData init = {
+        .used_irqs = used_irqs,
+        .used_mem = used_mem,
+        .mem = platform_region,
+        .irqs = &mpic[params->platform_bus_first_irq],
+        .params = params,
+    };
+
+    memory_region_init(platform_region, NULL, "platform devices",
+                       params->platform_bus_size);
+
+    bitmap_clear(used_irqs, 0, params->platform_bus_num_irqs);
+    bitmap_clear(used_mem, 0, max_pages);
+
+    /* Loop through all sysbus devices that were spawened outside the machine */
+    container = container_get(qdev_get_machine(), "/peripheral");
+    sysbus_device_check(container, &init);
+    container = container_get(qdev_get_machine(), "/peripheral-anon");
+    sysbus_device_check(container, &init);
+
+    memory_region_add_subregion(address_space_mem, params->platform_bus_base,
+                                platform_region);
+}
+
+void platform_bus_init_notify(Notifier *notifier, void *data)
+{
+    PlatformBusNotifier *pn = (PlatformBusNotifier *)notifier;
+    platform_bus_init(&pn->params, pn->address_space_mem, pn->mpic);
+}
diff --git a/include/hw/misc/platform_devices.h b/include/hw/misc/platform_devices.h
new file mode 100644
index 0000000..ab79346
--- /dev/null
+++ b/include/hw/misc/platform_devices.h
@@ -0,0 +1,61 @@ 
+#ifndef HW_MISC_PLATFORM_DEVICES_H
+#define HW_MISC_PLATFORM_DEVICES_H
+
+#include "qemu-common.h"
+#include "sysemu/device_tree.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+
+typedef struct PlatformDevtreeData {
+    void *fdt;
+    const char *mpic;
+    int irq_start;
+    const char *node;
+} PlatformDevtreeData;
+
+typedef struct {
+    bool has_platform_bus;
+    hwaddr platform_bus_base;
+    hwaddr platform_bus_size;
+    int platform_bus_first_irq;
+    int platform_bus_num_irqs;
+} PlatformParams;
+
+typedef struct PlatformBusNotifier {
+    Notifier notifier;
+    MemoryRegion *address_space_mem;
+    qemu_irq *mpic;
+    PlatformParams params;
+} PlatformBusNotifier;
+
+typedef struct PlatformBusInitData {
+    unsigned long *used_irqs;
+    unsigned long *used_mem;
+    MemoryRegion *mem;
+    qemu_irq *irqs;
+    int device_count;
+    PlatformParams *params;
+} PlatformBusInitData;
+
+
+int sysbus_device_create_devtree(Object *obj, void *opaque);
+
+void platform_bus_create_devtree(PlatformParams *params, void *fdt,
+                                        const char *mpic);
+
+int platform_bus_map_irq(PlatformParams *params, SysBusDevice *sbdev,
+                         int n, unsigned long *used_irqs,
+                         qemu_irq *platform_irqs);
+
+int platform_bus_map_mmio(PlatformParams *params, SysBusDevice *sbdev,
+                          int n, unsigned long *used_mem,
+                          MemoryRegion *pmem);
+
+int sysbus_device_check(Object *obj, void *opaque);
+
+void platform_bus_init(PlatformParams *params,
+                       MemoryRegion *address_space_mem,
+                       qemu_irq *mpic);
+
+void platform_bus_init_notify(Notifier *notifier, void *data);
+#endif