[Xen-devel,v2,08/15] xen/passthrough: iommu: Split generic IOMMU code

Message ID 1393193792-20008-9-git-send-email-julien.grall@linaro.org
State Superseded
Headers show

Commit Message

Julien Grall Feb. 23, 2014, 10:16 p.m.
The generic IOMMU framework code (xen/drivers/passthrough/iommu.c) contains
functions specific to x86 and PCI.

Split the framework in 3 distincts files:
    - iommu.c: contains generic functions shared between x86 and ARM
               (when it will be supported)
    - pci.c: contains specific functions for PCI passthrough
    - x86/iommu.c: contains specific functions for x86

io.c contains x86 HVM specific code. Only compile for x86.

This patch is mostly code movement in new files.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Cc: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Jan Beulich <jbeulich@suse.com>

---
    Changes in v2:
        - Update commit message
        - Removing spurious change in drivers/passthrough/vtd/iommu.c
        - Move iommu_x86.c in x86/iommu.c
        - Merge iommu_pci.c in pci.c
        - Introduce iommu_do_pci_domctl
---
 xen/drivers/passthrough/Makefile     |    4 +-
 xen/drivers/passthrough/iommu.c      |  493 ++--------------------------------
 xen/drivers/passthrough/pci.c        |  437 ++++++++++++++++++++++++++++++
 xen/drivers/passthrough/x86/Makefile |    1 +
 xen/drivers/passthrough/x86/iommu.c  |   65 +++++
 xen/include/asm-x86/iommu.h          |   46 ++++
 xen/include/xen/hvm/iommu.h          |    1 +
 xen/include/xen/iommu.h              |   43 ++-
 8 files changed, 597 insertions(+), 493 deletions(-)
 create mode 100644 xen/drivers/passthrough/x86/iommu.c
 create mode 100644 xen/include/asm-x86/iommu.h

Comments

Julien Grall Feb. 24, 2014, 12:46 p.m. | #1
Hi Jan,

On 02/24/2014 10:39 AM, Jan Beulich wrote:
>>>> On 23.02.14 at 23:16, Julien Grall <julien.grall@linaro.org> wrote:
>> +int iommu_do_domctl(
>> +    struct xen_domctl *domctl, struct domain *d,
>> +    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>>  {
>> -    const struct iommu_ops *ops = iommu_get_ops();
>> -    if ( iommu_intremap )
>> -        ops->read_msi_from_ire(msi_desc, msg);
>> -}
>> +    int ret = 0;
>>  
>> -unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
>> -{
>> -    const struct iommu_ops *ops = iommu_get_ops();
>> -    return ops->read_apic_from_ire(apic, reg);
>> -}
>> +    if ( !iommu_enabled )
>> +        return -ENOSYS;
>>  
>> -int __init iommu_setup_hpet_msi(struct msi_desc *msi)
>> -{
>> -    const struct iommu_ops *ops = iommu_get_ops();
>> -    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
>> -}
>> +    switch ( domctl->cmd )
>> +    {
>> +#ifdef HAS_PCI
>> +    case XEN_DOMCTL_get_device_group:
>> +    case XEN_DOMCTL_test_assign_device:
>> +    case XEN_DOMCTL_assign_device:
>> +    case XEN_DOMCTL_deassign_device:
>> +        ret = iommu_do_pci_domctl(domctl, d, u_domctl);
>> +        break;
>> +#endif
>> +    default:
>> +        ret = -ENOSYS;
>> +    }
> 
> Please simply have the default case chain to iommu_do_pci_domctl(),
> avoiding the need to change two source files when new sub-ops get
> added.

I wrote in this manner because we will add soon "iommu_do_dt_domctl" to
handle DOMCTL for device tree assignment.

For one of this function we will have to deal with this trick. Or ... we
can do:

   ret = iommu_do_pci_domctl(...)
   if ( ret != ENOSYS )
     return ret;
   ret = iommu_do_dt_domctl(...)

IHMO, I would prefer the former solution.

> 
> Also, last case in the set of case statements or not - the default
> case should also have a break statement.

I will add it.

>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> ...
>> @@ -980,6 +983,440 @@ static int __init setup_dump_pcidevs(void)
>>  }
>>  __initcall(setup_dump_pcidevs);
>>  
>> +static int iommu_populate_page_table(struct domain *d)
>> +{
> 
> Now why is this function being moved here? It doesn't appear to
> have anything PCI specific at all.

Because this function is only used in assign_device.

I also remember this function can't work on ARM (arch.relmem_list
doesn't exists). I will move this code in x86/iommu.c because it's seems
architecture initialization.

> 
>> --- /dev/null
>> +++ b/xen/drivers/passthrough/x86/iommu.c
>> @@ -0,0 +1,65 @@
>> +/*
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along with
>> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
>> + * Place - Suite 330, Boston, MA 02111-1307 USA.
>> + */
>> +
>> +#include <xen/sched.h>
>> +#include <xen/iommu.h>
>> +#include <xen/paging.h>
>> +#include <xen/guest_access.h>
>> +#include <xen/event.h>
>> +#include <xen/softirq.h>
>> +#include <xsm/xsm.h>
>> +
>> +void iommu_update_ire_from_apic(
>> +    unsigned int apic, unsigned int reg, unsigned int value)
>> +{
>> +    const struct iommu_ops *ops = iommu_get_ops();
>> +    ops->update_ire_from_apic(apic, reg, value);
>> +}
> 
> While one might argue that this one is x86-specific (albeit from past
> IA64 days we know it isn't entirely), ...
> 
>> +
>> +int iommu_update_ire_from_msi(
>> +    struct msi_desc *msi_desc, struct msi_msg *msg)
>> +{
>> +    const struct iommu_ops *ops = iommu_get_ops();
>> +    return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
>> +}
> 
> ... this one clearly isn't - I'm sure once you support PCI on ARM, you
> will also want/need to support MSI. (The same then of course goes
> for the respective functions' declarations.)

Right, I guess it's the same for iommu_read_msi_from_ire.

>> --- /dev/null
>> +++ b/xen/include/asm-x86/iommu.h
>> @@ -0,0 +1,46 @@
>> +/*
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along with
>> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
>> + * Place - Suite 330, Boston, MA 02111-1307 USA.
>> +*/
>> +#ifndef __ARCH_X86_IOMMU_H__
>> +#define __ARCH_X86_IOMMU_H__
>> +
>> +#define MAX_IOMMUS 32
>> +
>> +#include <asm/msi.h>
> 
> Please don't, if at all possible.

I'm not sure to understand ... what do you mean? Don't include "asm/msi.h"?

>> @@ -84,52 +82,56 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci);
>>  bool_t pt_irq_need_timer(uint32_t flags);
>>  
>>  #define PT_IRQ_TIME_OUT MILLISECS(8)
>> +#endif /* HAS_PCI */
>>  
>> +#ifdef CONFIG_X86
>>  struct msi_desc;
>>  struct msi_msg;
>> +#endif /* CONFIG_X86 */
> 
> Hardly - this again is a direct descendant from PCI.

I will #ifdef HAS_PCI.

>> +#ifdef CONFIG_X86
>>      void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
>>      int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
>>      void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
>>      unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
>>      int (*setup_hpet_msi)(struct msi_desc *);
>> +    void (*share_p2m)(struct domain *d);
>> +#endif /* CONFIG_X86 */
> 
> Is that last one really x86-specific in any way?

On ARM, P2M are share by default. You don't need to call this function
explicitly. So I think we can safely say it's x86-specific.

Developper won't call this function by mistake.

Cheers,

Patch

diff --git a/xen/drivers/passthrough/Makefile b/xen/drivers/passthrough/Makefile
index 7c40fa5..6e08f89 100644
--- a/xen/drivers/passthrough/Makefile
+++ b/xen/drivers/passthrough/Makefile
@@ -3,5 +3,5 @@  subdir-$(x86) += amd
 subdir-$(x86_64) += x86
 
 obj-y += iommu.o
-obj-y += io.o
-obj-y += pci.o
+obj-$(x86) += io.o
+obj-$(HAS_PCI) += pci.o
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 3c63f87..6893cf3 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -24,7 +24,6 @@ 
 #include <xsm/xsm.h>
 
 static void parse_iommu_param(char *s);
-static int iommu_populate_page_table(struct domain *d);
 static void iommu_dump_p2m_table(unsigned char key);
 
 /*
@@ -179,86 +178,7 @@  void __init iommu_dom0_init(struct domain *d)
     return hd->platform_ops->dom0_init(d);
 }
 
-int iommu_add_device(struct pci_dev *pdev)
-{
-    struct hvm_iommu *hd;
-    int rc;
-    u8 devfn;
-
-    if ( !pdev->domain )
-        return -EINVAL;
-
-    ASSERT(spin_is_locked(&pcidevs_lock));
-
-    hd = domain_hvm_iommu(pdev->domain);
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
-    rc = hd->platform_ops->add_device(pdev->devfn, pdev);
-    if ( rc || !pdev->phantom_stride )
-        return rc;
-
-    for ( devfn = pdev->devfn ; ; )
-    {
-        devfn += pdev->phantom_stride;
-        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-            return 0;
-        rc = hd->platform_ops->add_device(devfn, pdev);
-        if ( rc )
-            printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
-                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
-    }
-}
-
-int iommu_enable_device(struct pci_dev *pdev)
-{
-    struct hvm_iommu *hd;
-
-    if ( !pdev->domain )
-        return -EINVAL;
-
-    ASSERT(spin_is_locked(&pcidevs_lock));
-
-    hd = domain_hvm_iommu(pdev->domain);
-    if ( !iommu_enabled || !hd->platform_ops ||
-         !hd->platform_ops->enable_device )
-        return 0;
-
-    return hd->platform_ops->enable_device(pdev);
-}
-
-int iommu_remove_device(struct pci_dev *pdev)
-{
-    struct hvm_iommu *hd;
-    u8 devfn;
-
-    if ( !pdev->domain )
-        return -EINVAL;
-
-    hd = domain_hvm_iommu(pdev->domain);
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
-    for ( devfn = pdev->devfn ; pdev->phantom_stride; )
-    {
-        int rc;
-
-        devfn += pdev->phantom_stride;
-        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-            break;
-        rc = hd->platform_ops->remove_device(devfn, pdev);
-        if ( !rc )
-            continue;
-
-        printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
-               pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
-        return rc;
-    }
-
-    return hd->platform_ops->remove_device(pdev->devfn, pdev);
-}
-
-static void iommu_teardown(struct domain *d)
+void iommu_teardown(struct domain *d)
 {
     const struct hvm_iommu *hd = domain_hvm_iommu(d);
 
@@ -267,151 +187,6 @@  static void iommu_teardown(struct domain *d)
     tasklet_schedule(&iommu_pt_cleanup_tasklet);
 }
 
-/*
- * If the device isn't owned by dom0, it means it already
- * has been assigned to other domain, or it doesn't exist.
- */
-static int device_assigned(u16 seg, u8 bus, u8 devfn)
-{
-    struct pci_dev *pdev;
-
-    spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
-    spin_unlock(&pcidevs_lock);
-
-    return pdev ? 0 : -EBUSY;
-}
-
-static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct pci_dev *pdev;
-    int rc = 0;
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
-    /* Prevent device assign if mem paging or mem sharing have been 
-     * enabled for this domain */
-    if ( unlikely(!need_iommu(d) &&
-            (d->arch.hvm_domain.mem_sharing_enabled ||
-             d->mem_event->paging.ring_page)) )
-        return -EXDEV;
-
-    if ( !spin_trylock(&pcidevs_lock) )
-        return -ERESTART;
-
-    if ( need_iommu(d) <= 0 )
-    {
-        if ( !iommu_use_hap_pt(d) )
-        {
-            rc = iommu_populate_page_table(d);
-            if ( rc )
-            {
-                spin_unlock(&pcidevs_lock);
-                return rc;
-            }
-        }
-        d->need_iommu = 1;
-    }
-
-    pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
-    if ( !pdev )
-    {
-        rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
-        goto done;
-    }
-
-    pdev->fault.count = 0;
-
-    if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
-        goto done;
-
-    for ( ; pdev->phantom_stride; rc = 0 )
-    {
-        devfn += pdev->phantom_stride;
-        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-            break;
-        rc = hd->platform_ops->assign_device(d, devfn, pdev);
-        if ( rc )
-            printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
-                   d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-                   rc);
-    }
-
- done:
-    if ( !has_arch_pdevs(d) && need_iommu(d) )
-        iommu_teardown(d);
-    spin_unlock(&pcidevs_lock);
-
-    return rc;
-}
-
-static int iommu_populate_page_table(struct domain *d)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct page_info *page;
-    int rc = 0, n = 0;
-
-    d->need_iommu = -1;
-
-    this_cpu(iommu_dont_flush_iotlb) = 1;
-    spin_lock(&d->page_alloc_lock);
-
-    if ( unlikely(d->is_dying) )
-        rc = -ESRCH;
-
-    while ( !rc && (page = page_list_remove_head(&d->page_list)) )
-    {
-        if ( is_hvm_domain(d) ||
-            (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
-        {
-            BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page))));
-            rc = hd->platform_ops->map_page(
-                d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page),
-                IOMMUF_readable|IOMMUF_writable);
-            if ( rc )
-            {
-                page_list_add(page, &d->page_list);
-                break;
-            }
-        }
-        page_list_add_tail(page, &d->arch.relmem_list);
-        if ( !(++n & 0xff) && !page_list_empty(&d->page_list) &&
-             hypercall_preempt_check() )
-            rc = -ERESTART;
-    }
-
-    if ( !rc )
-    {
-        /*
-         * The expectation here is that generally there are many normal pages
-         * on relmem_list (the ones we put there) and only few being in an
-         * offline/broken state. The latter ones are always at the head of the
-         * list. Hence we first move the whole list, and then move back the
-         * first few entries.
-         */
-        page_list_move(&d->page_list, &d->arch.relmem_list);
-        while ( (page = page_list_first(&d->page_list)) != NULL &&
-                (page->count_info & (PGC_state|PGC_broken)) )
-        {
-            page_list_del(page, &d->page_list);
-            page_list_add_tail(page, &d->arch.relmem_list);
-        }
-    }
-
-    spin_unlock(&d->page_alloc_lock);
-    this_cpu(iommu_dont_flush_iotlb) = 0;
-
-    if ( !rc )
-        iommu_iotlb_flush_all(d);
-    else if ( rc != -ERESTART )
-        iommu_teardown(d);
-
-    return rc;
-}
-
-
 void iommu_domain_destroy(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
@@ -498,53 +273,6 @@  void iommu_iotlb_flush_all(struct domain *d)
     hd->platform_ops->iotlb_flush_all(d);
 }
 
-/* caller should hold the pcidevs_lock */
-int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct pci_dev *pdev = NULL;
-    int ret = 0;
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return -EINVAL;
-
-    ASSERT(spin_is_locked(&pcidevs_lock));
-    pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
-    if ( !pdev )
-        return -ENODEV;
-
-    while ( pdev->phantom_stride )
-    {
-        devfn += pdev->phantom_stride;
-        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
-            break;
-        ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
-        if ( !ret )
-            continue;
-
-        printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
-               d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
-        return ret;
-    }
-
-    devfn = pdev->devfn;
-    ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
-    if ( ret )
-    {
-        dprintk(XENLOG_G_ERR,
-                "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
-                d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-        return ret;
-    }
-
-    pdev->fault.count = 0;
-
-    if ( !has_arch_pdevs(d) && need_iommu(d) )
-        iommu_teardown(d);
-
-    return ret;
-}
-
 int __init iommu_setup(void)
 {
     int rc = -ENODEV;
@@ -585,91 +313,37 @@  int __init iommu_setup(void)
     return rc;
 }
 
-static int iommu_get_device_group(
-    struct domain *d, u16 seg, u8 bus, u8 devfn,
-    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct pci_dev *pdev;
-    int group_id, sdev_id;
-    u32 bdf;
-    int i = 0;
-    const struct iommu_ops *ops = hd->platform_ops;
-
-    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
-        return 0;
-
-    group_id = ops->get_device_group_id(seg, bus, devfn);
-
-    spin_lock(&pcidevs_lock);
-    for_each_pdev( d, pdev )
-    {
-        if ( (pdev->seg != seg) ||
-             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
-            continue;
-
-        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
-            continue;
-
-        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
-        if ( (sdev_id == group_id) && (i < max_sdevs) )
-        {
-            bdf = 0;
-            bdf |= (pdev->bus & 0xff) << 16;
-            bdf |= (pdev->devfn & 0xff) << 8;
-
-            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
-            {
-                spin_unlock(&pcidevs_lock);
-                return -1;
-            }
-            i++;
-        }
-    }
-    spin_unlock(&pcidevs_lock);
-
-    return i;
-}
-
-void iommu_update_ire_from_apic(
-    unsigned int apic, unsigned int reg, unsigned int value)
-{
-    const struct iommu_ops *ops = iommu_get_ops();
-    ops->update_ire_from_apic(apic, reg, value);
-}
-
-int iommu_update_ire_from_msi(
-    struct msi_desc *msi_desc, struct msi_msg *msg)
+void iommu_resume()
 {
     const struct iommu_ops *ops = iommu_get_ops();
-    return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
+    if ( iommu_enabled )
+        ops->resume();
 }
 
-void iommu_read_msi_from_ire(
-    struct msi_desc *msi_desc, struct msi_msg *msg)
+int iommu_do_domctl(
+    struct xen_domctl *domctl, struct domain *d,
+    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 {
-    const struct iommu_ops *ops = iommu_get_ops();
-    if ( iommu_intremap )
-        ops->read_msi_from_ire(msi_desc, msg);
-}
+    int ret = 0;
 
-unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
-{
-    const struct iommu_ops *ops = iommu_get_ops();
-    return ops->read_apic_from_ire(apic, reg);
-}
+    if ( !iommu_enabled )
+        return -ENOSYS;
 
-int __init iommu_setup_hpet_msi(struct msi_desc *msi)
-{
-    const struct iommu_ops *ops = iommu_get_ops();
-    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
-}
+    switch ( domctl->cmd )
+    {
+#ifdef HAS_PCI
+    case XEN_DOMCTL_get_device_group:
+    case XEN_DOMCTL_test_assign_device:
+    case XEN_DOMCTL_assign_device:
+    case XEN_DOMCTL_deassign_device:
+        ret = iommu_do_pci_domctl(domctl, d, u_domctl);
+        break;
+#endif
+    default:
+        ret = -ENOSYS;
+    }
 
-void iommu_resume()
-{
-    const struct iommu_ops *ops = iommu_get_ops();
-    if ( iommu_enabled )
-        ops->resume();
+    return ret;
 }
 
 void iommu_suspend()
@@ -695,125 +369,6 @@  void iommu_crash_shutdown(void)
     iommu_enabled = iommu_intremap = 0;
 }
 
-int iommu_do_domctl(
-    struct xen_domctl *domctl, struct domain *d,
-    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-{
-    u16 seg;
-    u8 bus, devfn;
-    int ret = 0;
-
-    if ( !iommu_enabled )
-        return -ENOSYS;
-
-    switch ( domctl->cmd )
-    {
-    case XEN_DOMCTL_get_device_group:
-    {
-        u32 max_sdevs;
-        XEN_GUEST_HANDLE_64(uint32) sdevs;
-
-        ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
-        if ( ret )
-            break;
-
-        seg = domctl->u.get_device_group.machine_sbdf >> 16;
-        bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.get_device_group.machine_sbdf & 0xff;
-        max_sdevs = domctl->u.get_device_group.max_sdevs;
-        sdevs = domctl->u.get_device_group.sdev_array;
-
-        ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
-        if ( ret < 0 )
-        {
-            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
-            ret = -EFAULT;
-            domctl->u.get_device_group.num_sdevs = 0;
-        }
-        else
-        {
-            domctl->u.get_device_group.num_sdevs = ret;
-            ret = 0;
-        }
-        if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
-            ret = -EFAULT;
-    }
-    break;
-
-    case XEN_DOMCTL_test_assign_device:
-        ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
-        if ( ret )
-            break;
-
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
-        if ( device_assigned(seg, bus, devfn) )
-        {
-            printk(XENLOG_G_INFO
-                   "%04x:%02x:%02x.%u already assigned, or non-existent\n",
-                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-            ret = -EINVAL;
-        }
-        break;
-
-    case XEN_DOMCTL_assign_device:
-        if ( unlikely(d->is_dying) )
-        {
-            ret = -EINVAL;
-            break;
-        }
-
-        ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
-        if ( ret )
-            break;
-
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
-        ret = device_assigned(seg, bus, devfn) ?:
-              assign_device(d, seg, bus, devfn);
-        if ( ret == -ERESTART )
-            ret = hypercall_create_continuation(__HYPERVISOR_domctl,
-                                                "h", u_domctl);
-        else if ( ret )
-            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
-                   "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
-                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-                   d->domain_id, ret);
-
-        break;
-
-    case XEN_DOMCTL_deassign_device:
-        ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
-        if ( ret )
-            break;
-
-        seg = domctl->u.assign_device.machine_sbdf >> 16;
-        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
-        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
-        spin_lock(&pcidevs_lock);
-        ret = deassign_device(d, seg, bus, devfn);
-        spin_unlock(&pcidevs_lock);
-        if ( ret )
-            printk(XENLOG_G_ERR
-                   "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
-                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-                   d->domain_id, ret);
-
-        break;
-
-    default:
-        ret = -ENOSYS;
-        break;
-    }
-
-    return ret;
-}
-
 static void iommu_dump_p2m_table(unsigned char key)
 {
     struct domain *d;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index c5c8344..0108f44 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -26,6 +26,9 @@ 
 #include <asm/hvm/irq.h>
 #include <xen/delay.h>
 #include <xen/keyhandler.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
+#include <xen/paging.h>
 #include <xen/radix-tree.h>
 #include <xen/tasklet.h>
 #include <xsm/xsm.h>
@@ -980,6 +983,440 @@  static int __init setup_dump_pcidevs(void)
 }
 __initcall(setup_dump_pcidevs);
 
+static int iommu_populate_page_table(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct page_info *page;
+    int rc = 0, n = 0;
+
+    d->need_iommu = -1;
+
+    this_cpu(iommu_dont_flush_iotlb) = 1;
+    spin_lock(&d->page_alloc_lock);
+
+    if ( unlikely(d->is_dying) )
+        rc = -ESRCH;
+
+
+    while ( !rc && (page = page_list_remove_head(&d->page_list)) )
+    {
+        if ( is_hvm_domain(d) ||
+            (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
+        {
+            BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page))));
+            rc = hd->platform_ops->map_page(
+                d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page),
+                IOMMUF_readable|IOMMUF_writable);
+            if ( rc )
+            {
+                page_list_add(page, &d->page_list);
+                break;
+            }
+        }
+        page_list_add_tail(page, &d->arch.relmem_list);
+        if ( !(++n & 0xff) && !page_list_empty(&d->page_list) &&
+             hypercall_preempt_check() )
+            rc = -ERESTART;
+    }
+
+    if ( !rc )
+    {
+        /*
+         * The expectation here is that generally there are many normal pages
+         * on relmem_list (the ones we put there) and only few being in an
+         * offline/broken state. The latter ones are always at the head of the
+         * list. Hence we first move the whole list, and then move back the
+         * first few entries.
+         */
+        page_list_move(&d->page_list, &d->arch.relmem_list);
+        while ( (page = page_list_first(&d->page_list)) != NULL &&
+                (page->count_info & (PGC_state|PGC_broken)) )
+        {
+            page_list_del(page, &d->page_list);
+            page_list_add_tail(page, &d->arch.relmem_list);
+        }
+    }
+
+    spin_unlock(&d->page_alloc_lock);
+    this_cpu(iommu_dont_flush_iotlb) = 0;
+
+    if ( !rc )
+        iommu_iotlb_flush_all(d);
+    else if ( rc != -ERESTART )
+        iommu_teardown(d);
+
+    return rc;
+}
+
+int iommu_add_device(struct pci_dev *pdev)
+{
+    struct hvm_iommu *hd;
+    int rc;
+    u8 devfn;
+
+    if ( !pdev->domain )
+        return -EINVAL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    hd = domain_hvm_iommu(pdev->domain);
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+    if ( rc || !pdev->phantom_stride )
+        return rc;
+
+    for ( devfn = pdev->devfn ; ; )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            return 0;
+        rc = hd->platform_ops->add_device(devfn, pdev);
+        if ( rc )
+            printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+    }
+}
+
+int iommu_enable_device(struct pci_dev *pdev)
+{
+    struct hvm_iommu *hd;
+
+    if ( !pdev->domain )
+        return -EINVAL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+
+    hd = domain_hvm_iommu(pdev->domain);
+    if ( !iommu_enabled || !hd->platform_ops ||
+         !hd->platform_ops->enable_device )
+        return 0;
+
+    return hd->platform_ops->enable_device(pdev);
+}
+
+int iommu_remove_device(struct pci_dev *pdev)
+{
+    struct hvm_iommu *hd;
+    u8 devfn;
+
+    if ( !pdev->domain )
+        return -EINVAL;
+
+    hd = domain_hvm_iommu(pdev->domain);
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+    {
+        int rc;
+
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->remove_device(devfn, pdev);
+        if ( !rc )
+            continue;
+
+        printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+               pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+        return rc;
+    }
+
+    return hd->platform_ops->remove_device(pdev->devfn, pdev);
+}
+
+/*
+ * If the device isn't owned by dom0, it means it already
+ * has been assigned to other domain, or it doesn't exist.
+ */
+static int device_assigned(u16 seg, u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+    spin_unlock(&pcidevs_lock);
+
+    return pdev ? 0 : -EBUSY;
+}
+
+static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
+    int rc = 0;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    /* Prevent device assign if mem paging or mem sharing have been 
+     * enabled for this domain */
+    if ( unlikely(!need_iommu(d) &&
+            (d->arch.hvm_domain.mem_sharing_enabled ||
+             d->mem_event->paging.ring_page)) )
+        return -EXDEV;
+
+    if ( !spin_trylock(&pcidevs_lock) )
+        return -ERESTART;
+
+    if ( need_iommu(d) <= 0 )
+    {
+        if ( !iommu_use_hap_pt(d) )
+        {
+            rc = iommu_populate_page_table(d);
+            if ( rc )
+            {
+                spin_unlock(&pcidevs_lock);
+                return rc;
+            }
+        }
+        d->need_iommu = 1;
+    }
+
+    pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+    if ( !pdev )
+    {
+        rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+        goto done;
+    }
+
+    pdev->fault.count = 0;
+
+    if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
+        goto done;
+
+    for ( ; pdev->phantom_stride; rc = 0 )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->assign_device(d, devfn, pdev);
+        if ( rc )
+            printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
+                   d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   rc);
+    }
+
+ done:
+    if ( !has_arch_pdevs(d) && need_iommu(d) )
+        iommu_teardown(d);
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
+
+/* caller should hold the pcidevs_lock */
+int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev = NULL;
+    int ret = 0;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return -EINVAL;
+
+    ASSERT(spin_is_locked(&pcidevs_lock));
+    pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
+    if ( !pdev )
+        return -ENODEV;
+
+    while ( pdev->phantom_stride )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+        if ( !ret )
+            continue;
+
+        printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+               d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+        return ret;
+    }
+
+    devfn = pdev->devfn;
+    ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+    if ( ret )
+    {
+        dprintk(XENLOG_G_ERR,
+                "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
+                d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        return ret;
+    }
+
+    pdev->fault.count = 0;
+
+    if ( !has_arch_pdevs(d) && need_iommu(d) )
+        iommu_teardown(d);
+
+    return ret;
+}
+
+static int iommu_get_device_group(
+    struct domain *d, u16 seg, u8 bus, u8 devfn,
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
+    int group_id, sdev_id;
+    u32 bdf;
+    int i = 0;
+    const struct iommu_ops *ops = hd->platform_ops;
+
+    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
+        return 0;
+
+    group_id = ops->get_device_group_id(seg, bus, devfn);
+
+    spin_lock(&pcidevs_lock);
+    for_each_pdev( d, pdev )
+    {
+        if ( (pdev->seg != seg) ||
+             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
+            continue;
+
+        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
+            continue;
+
+        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
+        if ( (sdev_id == group_id) && (i < max_sdevs) )
+        {
+            bdf = 0;
+            bdf |= (pdev->bus & 0xff) << 16;
+            bdf |= (pdev->devfn & 0xff) << 8;
+
+            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+            {
+                spin_unlock(&pcidevs_lock);
+                return -1;
+            }
+            i++;
+        }
+    }
+
+    spin_unlock(&pcidevs_lock);
+
+    return i;
+}
+
+int iommu_do_pci_domctl(
+    struct xen_domctl *domctl, struct domain *d,
+    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+{
+    u16 seg;
+    u8 bus, devfn;
+    int ret = 0;
+
+    switch ( domctl->cmd )
+    {
+    case XEN_DOMCTL_get_device_group:
+    {
+        u32 max_sdevs;
+        XEN_GUEST_HANDLE_64(uint32) sdevs;
+
+        ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
+        if ( ret )
+            break;
+
+        seg = domctl->u.get_device_group.machine_sbdf >> 16;
+        bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff;
+        devfn = domctl->u.get_device_group.machine_sbdf & 0xff;
+        max_sdevs = domctl->u.get_device_group.max_sdevs;
+        sdevs = domctl->u.get_device_group.sdev_array;
+
+        ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
+        if ( ret < 0 )
+        {
+            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+            ret = -EFAULT;
+            domctl->u.get_device_group.num_sdevs = 0;
+        }
+        else
+        {
+            domctl->u.get_device_group.num_sdevs = ret;
+            ret = 0;
+        }
+        if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
+            ret = -EFAULT;
+    }
+    break;
+
+    case XEN_DOMCTL_test_assign_device:
+        ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
+        if ( ret )
+            break;
+
+        seg = domctl->u.assign_device.machine_sbdf >> 16;
+        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+        if ( device_assigned(seg, bus, devfn) )
+        {
+            printk(XENLOG_G_INFO
+                   "%04x:%02x:%02x.%u already assigned, or non-existent\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+            ret = -EINVAL;
+        }
+        break;
+
+    case XEN_DOMCTL_assign_device:
+        if ( unlikely(d->is_dying) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+        if ( ret )
+            break;
+
+        seg = domctl->u.assign_device.machine_sbdf >> 16;
+        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+        ret = device_assigned(seg, bus, devfn) ?:
+              assign_device(d, seg, bus, devfn);
+        if ( ret == -ERESTART )
+            ret = hypercall_create_continuation(__HYPERVISOR_domctl,
+                                                "h", u_domctl);
+        else if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
+                   "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   d->domain_id, ret);
+
+        break;
+
+    case XEN_DOMCTL_deassign_device:
+        ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+        if ( ret )
+            break;
+
+        seg = domctl->u.assign_device.machine_sbdf >> 16;
+        bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+        devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+        spin_lock(&pcidevs_lock);
+        ret = deassign_device(d, seg, bus, devfn);
+        spin_unlock(&pcidevs_lock);
+        if ( ret )
+            printk(XENLOG_G_ERR
+                   "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   d->domain_id, ret);
+
+        break;
+
+    default:
+        ret = -ENOSYS;
+        break;
+    }
+
+    return ret;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/drivers/passthrough/x86/Makefile b/xen/drivers/passthrough/x86/Makefile
index c124a51..a70cf94 100644
--- a/xen/drivers/passthrough/x86/Makefile
+++ b/xen/drivers/passthrough/x86/Makefile
@@ -1 +1,2 @@ 
 obj-y += ats.o
+obj-y += iommu.o
diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
new file mode 100644
index 0000000..bd3c23b
--- /dev/null
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -0,0 +1,65 @@ 
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/iommu.h>
+#include <xen/paging.h>
+#include <xen/guest_access.h>
+#include <xen/event.h>
+#include <xen/softirq.h>
+#include <xsm/xsm.h>
+
+void iommu_update_ire_from_apic(
+    unsigned int apic, unsigned int reg, unsigned int value)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    ops->update_ire_from_apic(apic, reg, value);
+}
+
+int iommu_update_ire_from_msi(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
+}
+
+void iommu_read_msi_from_ire(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    if ( iommu_intremap )
+        ops->read_msi_from_ire(msi_desc, msg);
+}
+
+unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    return ops->read_apic_from_ire(apic, reg);
+}
+
+int __init iommu_setup_hpet_msi(struct msi_desc *msi)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
new file mode 100644
index 0000000..34c1896
--- /dev/null
+++ b/xen/include/asm-x86/iommu.h
@@ -0,0 +1,46 @@ 
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+*/
+#ifndef __ARCH_X86_IOMMU_H__
+#define __ARCH_X86_IOMMU_H__
+
+#define MAX_IOMMUS 32
+
+#include <asm/msi.h>
+
+void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
+int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
+void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
+unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
+int iommu_setup_hpet_msi(struct msi_desc *);
+
+void iommu_share_p2m_table(struct domain *d);
+
+/* While VT-d specific, this must get declared in a generic header. */
+int adjust_vtd_irq_affinities(void);
+void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present);
+int iommu_supports_eim(void);
+int iommu_enable_x2apic_IR(void);
+void iommu_disable_x2apic_IR(void);
+void iommu_set_dom0_mapping(struct domain *d);
+
+#endif /* !__ARCH_X86_IOMMU_H__ */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/hvm/iommu.h b/xen/include/xen/hvm/iommu.h
index 26539e0..2abb4e3 100644
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -21,6 +21,7 @@ 
 #define __XEN_HVM_IOMMU_H__
 
 #include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
 
 struct g2m_ioport {
     struct list_head list;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index fcbc432..65a37c0 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -25,6 +25,7 @@ 
 #include <xen/pci.h>
 #include <public/hvm/ioreq.h>
 #include <public/domctl.h>
+#include <asm/iommu.h>
 
 extern bool_t iommu_enable, iommu_enabled;
 extern bool_t force_iommu, iommu_verbose;
@@ -39,17 +40,12 @@  extern bool_t amd_iommu_perdev_intremap;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 
-#define MAX_IOMMUS 32
-
 #define PAGE_SHIFT_4K       (12)
 #define PAGE_SIZE_4K        (1UL << PAGE_SHIFT_4K)
 #define PAGE_MASK_4K        (((u64)-1) << PAGE_SHIFT_4K)
 #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
 
 int iommu_setup(void);
-int iommu_supports_eim(void);
-int iommu_enable_x2apic_IR(void);
-void iommu_disable_x2apic_IR(void);
 
 int iommu_add_device(struct pci_dev *pdev);
 int iommu_enable_device(struct pci_dev *pdev);
@@ -59,6 +55,9 @@  void iommu_dom0_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn);
 
+/* Function used internally, use iommu_domain_destroy */
+void iommu_teardown(struct domain *d);
+
 /* iommu_map_page() takes flags to direct the mapping operation. */
 #define _IOMMUF_readable 0
 #define IOMMUF_readable  (1u<<_IOMMUF_readable)
@@ -67,9 +66,8 @@  int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
                    unsigned int flags);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
-void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present);
-void iommu_domain_teardown(struct domain *d);
 
+#ifdef HAS_PCI
 void pt_pci_init(void);
 
 struct pirq;
@@ -84,52 +82,56 @@  void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci);
 bool_t pt_irq_need_timer(uint32_t flags);
 
 #define PT_IRQ_TIME_OUT MILLISECS(8)
+#endif /* HAS_PCI */
 
+#ifdef CONFIG_X86
 struct msi_desc;
 struct msi_msg;
+#endif /* CONFIG_X86 */
+
 struct page_info;
 
 struct iommu_ops {
     int (*init)(struct domain *d);
     void (*dom0_init)(struct domain *d);
+#ifdef HAS_PCI
     int (*add_device)(u8 devfn, struct pci_dev *);
     int (*enable_device)(struct pci_dev *pdev);
     int (*remove_device)(u8 devfn, struct pci_dev *);
     int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
+    int (*reassign_device)(struct domain *s, struct domain *t,
+			   u8 devfn, struct pci_dev *);
+    int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
+#endif /* HAS_PCI */
     void (*teardown)(struct domain *d);
     int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
                     unsigned int flags);
     int (*unmap_page)(struct domain *d, unsigned long gfn);
     void (*free_page_table)(struct page_info *);
-    int (*reassign_device)(struct domain *s, struct domain *t,
-			   u8 devfn, struct pci_dev *);
-    int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
+#ifdef CONFIG_X86
     void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
     int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
     void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
     unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
     int (*setup_hpet_msi)(struct msi_desc *);
+    void (*share_p2m)(struct domain *d);
+#endif /* CONFIG_X86 */
     void (*suspend)(void);
     void (*resume)(void);
-    void (*share_p2m)(struct domain *d);
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
     void (*dump_p2m_table)(struct domain *d);
 };
 
-void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
-int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
-void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
-unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
-int iommu_setup_hpet_msi(struct msi_desc *);
-
 void iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
 
-void iommu_set_dom0_mapping(struct domain *d);
-void iommu_share_p2m_table(struct domain *d);
+#if HAS_PCI
+int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d,
+                        XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+#endif
 
 int iommu_do_domctl(struct xen_domctl *, struct domain *d,
                     XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
@@ -137,9 +139,6 @@  int iommu_do_domctl(struct xen_domctl *, struct domain *d,
 void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count);
 void iommu_iotlb_flush_all(struct domain *d);
 
-/* While VT-d specific, this must get declared in a generic header. */
-int adjust_vtd_irq_affinities(void);
-
 /*
  * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
  * avoid unecessary iotlb_flush in the low level IOMMU code.