@@ -3,5 +3,5 @@ subdir-$(x86) += amd
subdir-$(x86_64) += x86
obj-y += iommu.o
-obj-y += io.o
-obj-y += pci.o
+obj-$(x86) += io.o
+obj-$(HAS_PCI) += pci.o
@@ -24,7 +24,6 @@
#include <xsm/xsm.h>
static void parse_iommu_param(char *s);
-static int iommu_populate_page_table(struct domain *d);
static void iommu_dump_p2m_table(unsigned char key);
/*
@@ -179,86 +178,7 @@ void __init iommu_dom0_init(struct domain *d)
return hd->platform_ops->dom0_init(d);
}
-int iommu_add_device(struct pci_dev *pdev)
-{
- struct hvm_iommu *hd;
- int rc;
- u8 devfn;
-
- if ( !pdev->domain )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&pcidevs_lock));
-
- hd = domain_hvm_iommu(pdev->domain);
- if ( !iommu_enabled || !hd->platform_ops )
- return 0;
-
- rc = hd->platform_ops->add_device(pdev->devfn, pdev);
- if ( rc || !pdev->phantom_stride )
- return rc;
-
- for ( devfn = pdev->devfn ; ; )
- {
- devfn += pdev->phantom_stride;
- if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
- return 0;
- rc = hd->platform_ops->add_device(devfn, pdev);
- if ( rc )
- printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
- pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
- }
-}
-
-int iommu_enable_device(struct pci_dev *pdev)
-{
- struct hvm_iommu *hd;
-
- if ( !pdev->domain )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&pcidevs_lock));
-
- hd = domain_hvm_iommu(pdev->domain);
- if ( !iommu_enabled || !hd->platform_ops ||
- !hd->platform_ops->enable_device )
- return 0;
-
- return hd->platform_ops->enable_device(pdev);
-}
-
-int iommu_remove_device(struct pci_dev *pdev)
-{
- struct hvm_iommu *hd;
- u8 devfn;
-
- if ( !pdev->domain )
- return -EINVAL;
-
- hd = domain_hvm_iommu(pdev->domain);
- if ( !iommu_enabled || !hd->platform_ops )
- return 0;
-
- for ( devfn = pdev->devfn ; pdev->phantom_stride; )
- {
- int rc;
-
- devfn += pdev->phantom_stride;
- if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
- break;
- rc = hd->platform_ops->remove_device(devfn, pdev);
- if ( !rc )
- continue;
-
- printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
- pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
- return rc;
- }
-
- return hd->platform_ops->remove_device(pdev->devfn, pdev);
-}
-
-static void iommu_teardown(struct domain *d)
+void iommu_teardown(struct domain *d)
{
const struct hvm_iommu *hd = domain_hvm_iommu(d);
@@ -267,151 +187,6 @@ static void iommu_teardown(struct domain *d)
tasklet_schedule(&iommu_pt_cleanup_tasklet);
}
-/*
- * If the device isn't owned by dom0, it means it already
- * has been assigned to other domain, or it doesn't exist.
- */
-static int device_assigned(u16 seg, u8 bus, u8 devfn)
-{
- struct pci_dev *pdev;
-
- spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
- spin_unlock(&pcidevs_lock);
-
- return pdev ? 0 : -EBUSY;
-}
-
-static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct pci_dev *pdev;
- int rc = 0;
-
- if ( !iommu_enabled || !hd->platform_ops )
- return 0;
-
- /* Prevent device assign if mem paging or mem sharing have been
- * enabled for this domain */
- if ( unlikely(!need_iommu(d) &&
- (d->arch.hvm_domain.mem_sharing_enabled ||
- d->mem_event->paging.ring_page)) )
- return -EXDEV;
-
- if ( !spin_trylock(&pcidevs_lock) )
- return -ERESTART;
-
- if ( need_iommu(d) <= 0 )
- {
- if ( !iommu_use_hap_pt(d) )
- {
- rc = iommu_populate_page_table(d);
- if ( rc )
- {
- spin_unlock(&pcidevs_lock);
- return rc;
- }
- }
- d->need_iommu = 1;
- }
-
- pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
- if ( !pdev )
- {
- rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
- goto done;
- }
-
- pdev->fault.count = 0;
-
- if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
- goto done;
-
- for ( ; pdev->phantom_stride; rc = 0 )
- {
- devfn += pdev->phantom_stride;
- if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
- break;
- rc = hd->platform_ops->assign_device(d, devfn, pdev);
- if ( rc )
- printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
- d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- rc);
- }
-
- done:
- if ( !has_arch_pdevs(d) && need_iommu(d) )
- iommu_teardown(d);
- spin_unlock(&pcidevs_lock);
-
- return rc;
-}
-
-static int iommu_populate_page_table(struct domain *d)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct page_info *page;
- int rc = 0, n = 0;
-
- d->need_iommu = -1;
-
- this_cpu(iommu_dont_flush_iotlb) = 1;
- spin_lock(&d->page_alloc_lock);
-
- if ( unlikely(d->is_dying) )
- rc = -ESRCH;
-
- while ( !rc && (page = page_list_remove_head(&d->page_list)) )
- {
- if ( is_hvm_domain(d) ||
- (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
- {
- BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page))));
- rc = hd->platform_ops->map_page(
- d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page),
- IOMMUF_readable|IOMMUF_writable);
- if ( rc )
- {
- page_list_add(page, &d->page_list);
- break;
- }
- }
- page_list_add_tail(page, &d->arch.relmem_list);
- if ( !(++n & 0xff) && !page_list_empty(&d->page_list) &&
- hypercall_preempt_check() )
- rc = -ERESTART;
- }
-
- if ( !rc )
- {
- /*
- * The expectation here is that generally there are many normal pages
- * on relmem_list (the ones we put there) and only few being in an
- * offline/broken state. The latter ones are always at the head of the
- * list. Hence we first move the whole list, and then move back the
- * first few entries.
- */
- page_list_move(&d->page_list, &d->arch.relmem_list);
- while ( (page = page_list_first(&d->page_list)) != NULL &&
- (page->count_info & (PGC_state|PGC_broken)) )
- {
- page_list_del(page, &d->page_list);
- page_list_add_tail(page, &d->arch.relmem_list);
- }
- }
-
- spin_unlock(&d->page_alloc_lock);
- this_cpu(iommu_dont_flush_iotlb) = 0;
-
- if ( !rc )
- iommu_iotlb_flush_all(d);
- else if ( rc != -ERESTART )
- iommu_teardown(d);
-
- return rc;
-}
-
-
void iommu_domain_destroy(struct domain *d)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
@@ -498,53 +273,6 @@ void iommu_iotlb_flush_all(struct domain *d)
hd->platform_ops->iotlb_flush_all(d);
}
-/* caller should hold the pcidevs_lock */
-int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct pci_dev *pdev = NULL;
- int ret = 0;
-
- if ( !iommu_enabled || !hd->platform_ops )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
- if ( !pdev )
- return -ENODEV;
-
- while ( pdev->phantom_stride )
- {
- devfn += pdev->phantom_stride;
- if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
- break;
- ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
- if ( !ret )
- continue;
-
- printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
- d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
- return ret;
- }
-
- devfn = pdev->devfn;
- ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
- if ( ret )
- {
- dprintk(XENLOG_G_ERR,
- "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
- d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- return ret;
- }
-
- pdev->fault.count = 0;
-
- if ( !has_arch_pdevs(d) && need_iommu(d) )
- iommu_teardown(d);
-
- return ret;
-}
-
int __init iommu_setup(void)
{
int rc = -ENODEV;
@@ -585,91 +313,37 @@ int __init iommu_setup(void)
return rc;
}
-static int iommu_get_device_group(
- struct domain *d, u16 seg, u8 bus, u8 devfn,
- XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct pci_dev *pdev;
- int group_id, sdev_id;
- u32 bdf;
- int i = 0;
- const struct iommu_ops *ops = hd->platform_ops;
-
- if ( !iommu_enabled || !ops || !ops->get_device_group_id )
- return 0;
-
- group_id = ops->get_device_group_id(seg, bus, devfn);
-
- spin_lock(&pcidevs_lock);
- for_each_pdev( d, pdev )
- {
- if ( (pdev->seg != seg) ||
- ((pdev->bus == bus) && (pdev->devfn == devfn)) )
- continue;
-
- if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
- continue;
-
- sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
- if ( (sdev_id == group_id) && (i < max_sdevs) )
- {
- bdf = 0;
- bdf |= (pdev->bus & 0xff) << 16;
- bdf |= (pdev->devfn & 0xff) << 8;
-
- if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
- {
- spin_unlock(&pcidevs_lock);
- return -1;
- }
- i++;
- }
- }
- spin_unlock(&pcidevs_lock);
-
- return i;
-}
-
-void iommu_update_ire_from_apic(
- unsigned int apic, unsigned int reg, unsigned int value)
-{
- const struct iommu_ops *ops = iommu_get_ops();
- ops->update_ire_from_apic(apic, reg, value);
-}
-
-int iommu_update_ire_from_msi(
- struct msi_desc *msi_desc, struct msi_msg *msg)
+void iommu_resume()
{
const struct iommu_ops *ops = iommu_get_ops();
- return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
+ if ( iommu_enabled )
+ ops->resume();
}
-void iommu_read_msi_from_ire(
- struct msi_desc *msi_desc, struct msi_msg *msg)
+int iommu_do_domctl(
+ struct xen_domctl *domctl, struct domain *d,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
{
- const struct iommu_ops *ops = iommu_get_ops();
- if ( iommu_intremap )
- ops->read_msi_from_ire(msi_desc, msg);
-}
+ int ret = 0;
-unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
-{
- const struct iommu_ops *ops = iommu_get_ops();
- return ops->read_apic_from_ire(apic, reg);
-}
+ if ( !iommu_enabled )
+ return -ENOSYS;
-int __init iommu_setup_hpet_msi(struct msi_desc *msi)
-{
- const struct iommu_ops *ops = iommu_get_ops();
- return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
-}
+ switch ( domctl->cmd )
+ {
+#ifdef HAS_PCI
+ case XEN_DOMCTL_get_device_group:
+ case XEN_DOMCTL_test_assign_device:
+ case XEN_DOMCTL_assign_device:
+ case XEN_DOMCTL_deassign_device:
+ ret = iommu_do_pci_domctl(domctl, d, u_domctl);
+ break;
+#endif
+ default:
+ ret = -ENOSYS;
+ }
-void iommu_resume()
-{
- const struct iommu_ops *ops = iommu_get_ops();
- if ( iommu_enabled )
- ops->resume();
+ return ret;
}
void iommu_suspend()
@@ -695,125 +369,6 @@ void iommu_crash_shutdown(void)
iommu_enabled = iommu_intremap = 0;
}
-int iommu_do_domctl(
- struct xen_domctl *domctl, struct domain *d,
- XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-{
- u16 seg;
- u8 bus, devfn;
- int ret = 0;
-
- if ( !iommu_enabled )
- return -ENOSYS;
-
- switch ( domctl->cmd )
- {
- case XEN_DOMCTL_get_device_group:
- {
- u32 max_sdevs;
- XEN_GUEST_HANDLE_64(uint32) sdevs;
-
- ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
- if ( ret )
- break;
-
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
- bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff;
- devfn = domctl->u.get_device_group.machine_sbdf & 0xff;
- max_sdevs = domctl->u.get_device_group.max_sdevs;
- sdevs = domctl->u.get_device_group.sdev_array;
-
- ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
- if ( ret < 0 )
- {
- dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
- ret = -EFAULT;
- domctl->u.get_device_group.num_sdevs = 0;
- }
- else
- {
- domctl->u.get_device_group.num_sdevs = ret;
- ret = 0;
- }
- if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
- ret = -EFAULT;
- }
- break;
-
- case XEN_DOMCTL_test_assign_device:
- ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
- if ( ret )
- break;
-
- seg = domctl->u.assign_device.machine_sbdf >> 16;
- bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
- devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
- if ( device_assigned(seg, bus, devfn) )
- {
- printk(XENLOG_G_INFO
- "%04x:%02x:%02x.%u already assigned, or non-existent\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = -EINVAL;
- }
- break;
-
- case XEN_DOMCTL_assign_device:
- if ( unlikely(d->is_dying) )
- {
- ret = -EINVAL;
- break;
- }
-
- ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
- if ( ret )
- break;
-
- seg = domctl->u.assign_device.machine_sbdf >> 16;
- bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
- devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
- ret = device_assigned(seg, bus, devfn) ?:
- assign_device(d, seg, bus, devfn);
- if ( ret == -ERESTART )
- ret = hypercall_create_continuation(__HYPERVISOR_domctl,
- "h", u_domctl);
- else if ( ret )
- printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
- "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- d->domain_id, ret);
-
- break;
-
- case XEN_DOMCTL_deassign_device:
- ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
- if ( ret )
- break;
-
- seg = domctl->u.assign_device.machine_sbdf >> 16;
- bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
- devfn = domctl->u.assign_device.machine_sbdf & 0xff;
-
- spin_lock(&pcidevs_lock);
- ret = deassign_device(d, seg, bus, devfn);
- spin_unlock(&pcidevs_lock);
- if ( ret )
- printk(XENLOG_G_ERR
- "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- d->domain_id, ret);
-
- break;
-
- default:
- ret = -ENOSYS;
- break;
- }
-
- return ret;
-}
-
static void iommu_dump_p2m_table(unsigned char key)
{
struct domain *d;
@@ -26,6 +26,9 @@
#include <asm/hvm/irq.h>
#include <xen/delay.h>
#include <xen/keyhandler.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
+#include <xen/paging.h>
#include <xen/radix-tree.h>
#include <xen/tasklet.h>
#include <xsm/xsm.h>
@@ -980,6 +983,440 @@ static int __init setup_dump_pcidevs(void)
}
__initcall(setup_dump_pcidevs);
+static int iommu_populate_page_table(struct domain *d)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct page_info *page;
+ int rc = 0, n = 0;
+
+ d->need_iommu = -1;
+
+ this_cpu(iommu_dont_flush_iotlb) = 1;
+ spin_lock(&d->page_alloc_lock);
+
+ if ( unlikely(d->is_dying) )
+ rc = -ESRCH;
+
+
+ while ( !rc && (page = page_list_remove_head(&d->page_list)) )
+ {
+ if ( is_hvm_domain(d) ||
+ (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
+ {
+ BUG_ON(SHARED_M2P(mfn_to_gmfn(d, page_to_mfn(page))));
+ rc = hd->platform_ops->map_page(
+ d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page),
+ IOMMUF_readable|IOMMUF_writable);
+ if ( rc )
+ {
+ page_list_add(page, &d->page_list);
+ break;
+ }
+ }
+ page_list_add_tail(page, &d->arch.relmem_list);
+ if ( !(++n & 0xff) && !page_list_empty(&d->page_list) &&
+ hypercall_preempt_check() )
+ rc = -ERESTART;
+ }
+
+ if ( !rc )
+ {
+ /*
+ * The expectation here is that generally there are many normal pages
+ * on relmem_list (the ones we put there) and only few being in an
+ * offline/broken state. The latter ones are always at the head of the
+ * list. Hence we first move the whole list, and then move back the
+ * first few entries.
+ */
+ page_list_move(&d->page_list, &d->arch.relmem_list);
+ while ( (page = page_list_first(&d->page_list)) != NULL &&
+ (page->count_info & (PGC_state|PGC_broken)) )
+ {
+ page_list_del(page, &d->page_list);
+ page_list_add_tail(page, &d->arch.relmem_list);
+ }
+ }
+
+ spin_unlock(&d->page_alloc_lock);
+ this_cpu(iommu_dont_flush_iotlb) = 0;
+
+ if ( !rc )
+ iommu_iotlb_flush_all(d);
+ else if ( rc != -ERESTART )
+ iommu_teardown(d);
+
+ return rc;
+}
+
+int iommu_add_device(struct pci_dev *pdev)
+{
+ struct hvm_iommu *hd;
+ int rc;
+ u8 devfn;
+
+ if ( !pdev->domain )
+ return -EINVAL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ hd = domain_hvm_iommu(pdev->domain);
+ if ( !iommu_enabled || !hd->platform_ops )
+ return 0;
+
+ rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+ if ( rc || !pdev->phantom_stride )
+ return rc;
+
+ for ( devfn = pdev->devfn ; ; )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ return 0;
+ rc = hd->platform_ops->add_device(devfn, pdev);
+ if ( rc )
+ printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ }
+}
+
+int iommu_enable_device(struct pci_dev *pdev)
+{
+ struct hvm_iommu *hd;
+
+ if ( !pdev->domain )
+ return -EINVAL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ hd = domain_hvm_iommu(pdev->domain);
+ if ( !iommu_enabled || !hd->platform_ops ||
+ !hd->platform_ops->enable_device )
+ return 0;
+
+ return hd->platform_ops->enable_device(pdev);
+}
+
+int iommu_remove_device(struct pci_dev *pdev)
+{
+ struct hvm_iommu *hd;
+ u8 devfn;
+
+ if ( !pdev->domain )
+ return -EINVAL;
+
+ hd = domain_hvm_iommu(pdev->domain);
+ if ( !iommu_enabled || !hd->platform_ops )
+ return 0;
+
+ for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+ {
+ int rc;
+
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->remove_device(devfn, pdev);
+ if ( !rc )
+ continue;
+
+ printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ return rc;
+ }
+
+ return hd->platform_ops->remove_device(pdev->devfn, pdev);
+}
+
+/*
+ * If the device isn't owned by dom0, it means it already
+ * has been assigned to other domain, or it doesn't exist.
+ */
+static int device_assigned(u16 seg, u8 bus, u8 devfn)
+{
+ struct pci_dev *pdev = NULL;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+ spin_unlock(&pcidevs_lock);
+
+ return pdev ? 0 : -EBUSY;
+}
+
+static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
+ int rc = 0;
+
+ if ( !iommu_enabled || !hd->platform_ops )
+ return 0;
+
+ /* Prevent device assign if mem paging or mem sharing have been
+ * enabled for this domain */
+ if ( unlikely(!need_iommu(d) &&
+ (d->arch.hvm_domain.mem_sharing_enabled ||
+ d->mem_event->paging.ring_page)) )
+ return -EXDEV;
+
+ if ( !spin_trylock(&pcidevs_lock) )
+ return -ERESTART;
+
+ if ( need_iommu(d) <= 0 )
+ {
+ if ( !iommu_use_hap_pt(d) )
+ {
+ rc = iommu_populate_page_table(d);
+ if ( rc )
+ {
+ spin_unlock(&pcidevs_lock);
+ return rc;
+ }
+ }
+ d->need_iommu = 1;
+ }
+
+ pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+ if ( !pdev )
+ {
+ rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+ goto done;
+ }
+
+ pdev->fault.count = 0;
+
+ if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
+ goto done;
+
+ for ( ; pdev->phantom_stride; rc = 0 )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->assign_device(d, devfn, pdev);
+ if ( rc )
+ printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ rc);
+ }
+
+ done:
+ if ( !has_arch_pdevs(d) && need_iommu(d) )
+ iommu_teardown(d);
+ spin_unlock(&pcidevs_lock);
+
+ return rc;
+}
+
+/* caller should hold the pcidevs_lock */
+int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev = NULL;
+ int ret = 0;
+
+ if ( !iommu_enabled || !hd->platform_ops )
+ return -EINVAL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
+ if ( !pdev )
+ return -ENODEV;
+
+ while ( pdev->phantom_stride )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+ if ( !ret )
+ continue;
+
+ printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+ return ret;
+ }
+
+ devfn = pdev->devfn;
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+ if ( ret )
+ {
+ dprintk(XENLOG_G_ERR,
+ "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return ret;
+ }
+
+ pdev->fault.count = 0;
+
+ if ( !has_arch_pdevs(d) && need_iommu(d) )
+ iommu_teardown(d);
+
+ return ret;
+}
+
+static int iommu_get_device_group(
+ struct domain *d, u16 seg, u8 bus, u8 devfn,
+ XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
+ int group_id, sdev_id;
+ u32 bdf;
+ int i = 0;
+ const struct iommu_ops *ops = hd->platform_ops;
+
+ if ( !iommu_enabled || !ops || !ops->get_device_group_id )
+ return 0;
+
+ group_id = ops->get_device_group_id(seg, bus, devfn);
+
+ spin_lock(&pcidevs_lock);
+ for_each_pdev( d, pdev )
+ {
+ if ( (pdev->seg != seg) ||
+ ((pdev->bus == bus) && (pdev->devfn == devfn)) )
+ continue;
+
+ if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
+ continue;
+
+ sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
+ if ( (sdev_id == group_id) && (i < max_sdevs) )
+ {
+ bdf = 0;
+ bdf |= (pdev->bus & 0xff) << 16;
+ bdf |= (pdev->devfn & 0xff) << 8;
+
+ if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+ {
+ spin_unlock(&pcidevs_lock);
+ return -1;
+ }
+ i++;
+ }
+ }
+
+ spin_unlock(&pcidevs_lock);
+
+ return i;
+}
+
+int iommu_do_pci_domctl(
+ struct xen_domctl *domctl, struct domain *d,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+{
+ u16 seg;
+ u8 bus, devfn;
+ int ret = 0;
+
+ switch ( domctl->cmd )
+ {
+ case XEN_DOMCTL_get_device_group:
+ {
+ u32 max_sdevs;
+ XEN_GUEST_HANDLE_64(uint32) sdevs;
+
+ ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
+ if ( ret )
+ break;
+
+ seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ bus = (domctl->u.get_device_group.machine_sbdf >> 8) & 0xff;
+ devfn = domctl->u.get_device_group.machine_sbdf & 0xff;
+ max_sdevs = domctl->u.get_device_group.max_sdevs;
+ sdevs = domctl->u.get_device_group.sdev_array;
+
+ ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
+ if ( ret < 0 )
+ {
+ dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+ ret = -EFAULT;
+ domctl->u.get_device_group.num_sdevs = 0;
+ }
+ else
+ {
+ domctl->u.get_device_group.num_sdevs = ret;
+ ret = 0;
+ }
+ if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
+ ret = -EFAULT;
+ }
+ break;
+
+ case XEN_DOMCTL_test_assign_device:
+ ret = xsm_test_assign_device(XSM_HOOK, domctl->u.assign_device.machine_sbdf);
+ if ( ret )
+ break;
+
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
+ bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+ devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+ if ( device_assigned(seg, bus, devfn) )
+ {
+ printk(XENLOG_G_INFO
+ "%04x:%02x:%02x.%u already assigned, or non-existent\n",
+ seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ ret = -EINVAL;
+ }
+ break;
+
+ case XEN_DOMCTL_assign_device:
+ if ( unlikely(d->is_dying) )
+ {
+ ret = -EINVAL;
+ break;
+ }
+
+ ret = xsm_assign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+ if ( ret )
+ break;
+
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
+ bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+ devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+ ret = device_assigned(seg, bus, devfn) ?:
+ assign_device(d, seg, bus, devfn);
+ if ( ret == -ERESTART )
+ ret = hypercall_create_continuation(__HYPERVISOR_domctl,
+ "h", u_domctl);
+ else if ( ret )
+ printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
+ "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
+ seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ d->domain_id, ret);
+
+ break;
+
+ case XEN_DOMCTL_deassign_device:
+ ret = xsm_deassign_device(XSM_HOOK, d, domctl->u.assign_device.machine_sbdf);
+ if ( ret )
+ break;
+
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
+ bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
+ devfn = domctl->u.assign_device.machine_sbdf & 0xff;
+
+ spin_lock(&pcidevs_lock);
+ ret = deassign_device(d, seg, bus, devfn);
+ spin_unlock(&pcidevs_lock);
+ if ( ret )
+ printk(XENLOG_G_ERR
+ "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
+ seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ d->domain_id, ret);
+
+ break;
+
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ return ret;
+}
+
/*
* Local variables:
* mode: C
@@ -1 +1,2 @@
obj-y += ats.o
+obj-y += iommu.o
new file mode 100644
@@ -0,0 +1,65 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/sched.h>
+#include <xen/iommu.h>
+#include <xen/paging.h>
+#include <xen/guest_access.h>
+#include <xen/event.h>
+#include <xen/softirq.h>
+#include <xsm/xsm.h>
+
+void iommu_update_ire_from_apic(
+ unsigned int apic, unsigned int reg, unsigned int value)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ ops->update_ire_from_apic(apic, reg, value);
+}
+
+int iommu_update_ire_from_msi(
+ struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
+}
+
+void iommu_read_msi_from_ire(
+ struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ if ( iommu_intremap )
+ ops->read_msi_from_ire(msi_desc, msg);
+}
+
+unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ return ops->read_apic_from_ire(apic, reg);
+}
+
+int __init iommu_setup_hpet_msi(struct msi_desc *msi)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+ return ops->setup_hpet_msi ? ops->setup_hpet_msi(msi) : -ENODEV;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
new file mode 100644
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+*/
+#ifndef __ARCH_X86_IOMMU_H__
+#define __ARCH_X86_IOMMU_H__
+
+#define MAX_IOMMUS 32
+
+#include <asm/msi.h>
+
+void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
+int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
+void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
+unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
+int iommu_setup_hpet_msi(struct msi_desc *);
+
+void iommu_share_p2m_table(struct domain *d);
+
+/* While VT-d specific, this must get declared in a generic header. */
+int adjust_vtd_irq_affinities(void);
+void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present);
+int iommu_supports_eim(void);
+int iommu_enable_x2apic_IR(void);
+void iommu_disable_x2apic_IR(void);
+void iommu_set_dom0_mapping(struct domain *d);
+
+#endif /* !__ARCH_X86_IOMMU_H__ */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
@@ -21,6 +21,7 @@
#define __XEN_HVM_IOMMU_H__
#include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
struct g2m_ioport {
struct list_head list;
@@ -25,6 +25,7 @@
#include <xen/pci.h>
#include <public/hvm/ioreq.h>
#include <public/domctl.h>
+#include <asm/iommu.h>
extern bool_t iommu_enable, iommu_enabled;
extern bool_t force_iommu, iommu_verbose;
@@ -39,17 +40,12 @@ extern bool_t amd_iommu_perdev_intremap;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)
-#define MAX_IOMMUS 32
-
#define PAGE_SHIFT_4K (12)
#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
int iommu_setup(void);
-int iommu_supports_eim(void);
-int iommu_enable_x2apic_IR(void);
-void iommu_disable_x2apic_IR(void);
int iommu_add_device(struct pci_dev *pdev);
int iommu_enable_device(struct pci_dev *pdev);
@@ -59,6 +55,9 @@ void iommu_dom0_init(struct domain *d);
void iommu_domain_destroy(struct domain *d);
int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn);
+/* Function used internally, use iommu_domain_destroy */
+void iommu_teardown(struct domain *d);
+
/* iommu_map_page() takes flags to direct the mapping operation. */
#define _IOMMUF_readable 0
#define IOMMUF_readable (1u<<_IOMMUF_readable)
@@ -67,9 +66,8 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn);
int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags);
int iommu_unmap_page(struct domain *d, unsigned long gfn);
-void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present);
-void iommu_domain_teardown(struct domain *d);
+#ifdef HAS_PCI
void pt_pci_init(void);
struct pirq;
@@ -84,52 +82,56 @@ void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci);
bool_t pt_irq_need_timer(uint32_t flags);
#define PT_IRQ_TIME_OUT MILLISECS(8)
+#endif /* HAS_PCI */
+#ifdef CONFIG_X86
struct msi_desc;
struct msi_msg;
+#endif /* CONFIG_X86 */
+
struct page_info;
struct iommu_ops {
int (*init)(struct domain *d);
void (*dom0_init)(struct domain *d);
+#ifdef HAS_PCI
int (*add_device)(u8 devfn, struct pci_dev *);
int (*enable_device)(struct pci_dev *pdev);
int (*remove_device)(u8 devfn, struct pci_dev *);
int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
+ int (*reassign_device)(struct domain *s, struct domain *t,
+ u8 devfn, struct pci_dev *);
+ int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
+#endif /* HAS_PCI */
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags);
int (*unmap_page)(struct domain *d, unsigned long gfn);
void (*free_page_table)(struct page_info *);
- int (*reassign_device)(struct domain *s, struct domain *t,
- u8 devfn, struct pci_dev *);
- int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
+#ifdef CONFIG_X86
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
int (*setup_hpet_msi)(struct msi_desc *);
+ void (*share_p2m)(struct domain *d);
+#endif /* CONFIG_X86 */
void (*suspend)(void);
void (*resume)(void);
- void (*share_p2m)(struct domain *d);
void (*crash_shutdown)(void);
void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
void (*iotlb_flush_all)(struct domain *d);
void (*dump_p2m_table)(struct domain *d);
};
-void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
-int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
-void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
-unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
-int iommu_setup_hpet_msi(struct msi_desc *);
-
void iommu_suspend(void);
void iommu_resume(void);
void iommu_crash_shutdown(void);
-void iommu_set_dom0_mapping(struct domain *d);
-void iommu_share_p2m_table(struct domain *d);
+#if HAS_PCI
+int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+#endif
int iommu_do_domctl(struct xen_domctl *, struct domain *d,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
@@ -137,9 +139,6 @@ int iommu_do_domctl(struct xen_domctl *, struct domain *d,
void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count);
void iommu_iotlb_flush_all(struct domain *d);
-/* While VT-d specific, this must get declared in a generic header. */
-int adjust_vtd_irq_affinities(void);
-
/*
* The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
* avoid unecessary iotlb_flush in the low level IOMMU code.
The generic IOMMU framework code (xen/drivers/passthrough/iommu.c) contains functions specific to x86 and PCI. Split the framework in 3 distincts files: - iommu.c: contains generic functions shared between x86 and ARM (when it will be supported) - pci.c: contains specific functions for PCI passthrough - x86/iommu.c: contains specific functions for x86 io.c contains x86 HVM specific code. Only compile for x86. This patch is mostly code movement in new files. Signed-off-by: Julien Grall <julien.grall@linaro.org> Cc: Xiantao Zhang <xiantao.zhang@intel.com> Cc: Jan Beulich <jbeulich@suse.com> --- Changes in v2: - Update commit message - Removing spurious change in drivers/passthrough/vtd/iommu.c - Move iommu_x86.c in x86/iommu.c - Merge iommu_pci.c in pci.c - Introduce iommu_do_pci_domctl --- xen/drivers/passthrough/Makefile | 4 +- xen/drivers/passthrough/iommu.c | 493 ++-------------------------------- xen/drivers/passthrough/pci.c | 437 ++++++++++++++++++++++++++++++ xen/drivers/passthrough/x86/Makefile | 1 + xen/drivers/passthrough/x86/iommu.c | 65 +++++ xen/include/asm-x86/iommu.h | 46 ++++ xen/include/xen/hvm/iommu.h | 1 + xen/include/xen/iommu.h | 43 ++- 8 files changed, 597 insertions(+), 493 deletions(-) create mode 100644 xen/drivers/passthrough/x86/iommu.c create mode 100644 xen/include/asm-x86/iommu.h