diff mbox series

[v1,10/16] iommufd: Add mmap interface

Message ID c332a8701959c098f747dd8e8fa083ceda2bf2c3.1744353300.git.nicolinc@nvidia.com
State New
Headers show
Series iommufd: Add vIOMMU infrastructure (Part-4 vCMDQ) | expand

Commit Message

Nicolin Chen April 11, 2025, 6:37 a.m. UTC
For vIOMMU passing through HW resources to user space (VMs), add an mmap
infrastructure to map a region of hardware MMIO pages. The addr and size
should be given previously via a prior IOMMU_VIOMMU_ALLOC ioctl in some
output fields of the structure.

Maintain an mt_mmap per ictx for validations. And give IOMMU drivers a
pair of helpers to add and delete mmap regions onto the mt_mmap.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h |  9 ++++++
 include/linux/iommufd.h                 | 15 ++++++++++
 drivers/iommu/iommufd/driver.c          | 40 +++++++++++++++++++++++++
 drivers/iommu/iommufd/main.c            | 35 ++++++++++++++++++++++
 4 files changed, 99 insertions(+)

Comments

Tian, Kevin April 21, 2025, 8:16 a.m. UTC | #1
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Friday, April 11, 2025 2:38 PM
> 
> For vIOMMU passing through HW resources to user space (VMs), add an
> mmap
> infrastructure to map a region of hardware MMIO pages. The addr and size
> should be given previously via a prior IOMMU_VIOMMU_ALLOC ioctl in some
> output fields of the structure.

According to the code the addr must be the immap_id given by previous
alloc but size can be any as long as it doesn't exceed the physical length.

> 
> +/* Entry for iommufd_ctx::mt_mmap */
> +struct iommufd_mmap {
> +	unsigned long pfn_start;
> +	unsigned long pfn_end;
> +	bool is_io;
> +};

what is the point of 'is_io' here? Do you intend to allow userspace to
mmap anonymous memory via iommufd?

anyway for now the only user in this series always sets it to true.

I'd suggest to remove it until there is a real need.

> 
> +/*
> + * The pfn and size carried in @vma from the user space mmap call should
> be

there is no 'pfn' carried in the mmap call. It's vm_pgoff.

> + * previously given to user space via a prior ioctl output.
> + */
> +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct iommufd_ctx *ictx = filp->private_data;
> +	size_t size = vma->vm_end - vma->vm_start;
> +	struct iommufd_mmap *immap;
> +
> +	if (size & ~PAGE_MASK)
> +		return -EINVAL;
> +	if (!(vma->vm_flags & VM_SHARED))
> +		return -EINVAL;
> +	if (vma->vm_flags & VM_EXEC)
> +		return -EPERM;
> +
> +	/* vm_pgoff carries an index of an mtree entry/immap */
> +	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
> +	if (!immap)
> +		return -EINVAL;
> +	if (size >> PAGE_SHIFT > immap->pfn_end - immap->pfn_start + 1)
> +		return -EINVAL;

Do we want to document in uAPI that iommufd mmap allows to map
a sub-region (starting from offset zero) of the reported size from earlier
alloc ioctl, but not from random offset (of course impossible by forcing
vm_pgoff to be a mtree index)?
Nicolin Chen April 21, 2025, 5:45 p.m. UTC | #2
On Mon, Apr 21, 2025 at 08:16:54AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Friday, April 11, 2025 2:38 PM
> > + * previously given to user space via a prior ioctl output.
> > + */
> > +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
> > +{
> > +	struct iommufd_ctx *ictx = filp->private_data;
> > +	size_t size = vma->vm_end - vma->vm_start;
> > +	struct iommufd_mmap *immap;
> > +
> > +	if (size & ~PAGE_MASK)
> > +		return -EINVAL;
> > +	if (!(vma->vm_flags & VM_SHARED))
> > +		return -EINVAL;
> > +	if (vma->vm_flags & VM_EXEC)
> > +		return -EPERM;
> > +
> > +	/* vm_pgoff carries an index of an mtree entry/immap */
> > +	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
> > +	if (!immap)
> > +		return -EINVAL;
> > +	if (size >> PAGE_SHIFT > immap->pfn_end - immap->pfn_start + 1)
> > +		return -EINVAL;
> 
> Do we want to document in uAPI that iommufd mmap allows to map
> a sub-region (starting from offset zero) of the reported size from earlier
> alloc ioctl, but not from random offset (of course impossible by forcing
> vm_pgoff to be a mtree index)?

I also did this:

diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst
index ace0579432d57..f57a5bf2feea1 100644
--- a/Documentation/userspace-api/iommufd.rst
+++ b/Documentation/userspace-api/iommufd.rst
@@ -128,11 +128,13 @@ Following IOMMUFD objects are exposed to userspace:
   virtualization feature for a VM to directly execute guest-issued commands to
   invalidate HW cache entries holding the mappings or translations of a guest-
   owned stage-1 page table. Along with this queue object, iommufd provides the
-  user space a new mmap interface that the VMM can mmap a physical MMIO region
-  from the host physical address space to a guest physical address space. To use
-  this mmap interface, the VMM must define an IOMMU specific driver structure
-  to ask for a pair of VMA info (vm_pgoff/size) to do mmap after a vCMDQ gets
-  allocated.
+  user space an mmap interface for VMM to mmap a physical MMIO region from the
+  host physical address space to a guest physical address space. When allocating
+  a vCMDQ, the VMM must request a pair of VMA info (vm_pgoff/size) for a later
+  mmap call. The length argument of an mmap call can be smaller than the given
+  size for a paritial mmap, but the given vm_pgoff (as the addr argument of the
+  mmap call) should never be changed, which implies that the mmap always starts
+  from the beginning of the MMIO region.
 
 All user-visible objects are destroyed via the IOMMU_DESTROY uAPI.
 
Thanks
Nicolin
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index da35ffcc212b..5be0248966aa 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -7,6 +7,7 @@ 
 #include <linux/iommu.h>
 #include <linux/iommufd.h>
 #include <linux/iova_bitmap.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/uaccess.h>
 #include <linux/xarray.h>
@@ -44,6 +45,7 @@  struct iommufd_ctx {
 	struct xarray groups;
 	wait_queue_head_t destroy_wait;
 	struct rw_semaphore ioas_creation_lock;
+	struct maple_tree mt_mmap;
 
 	struct mutex sw_msi_lock;
 	struct list_head sw_msi_list;
@@ -55,6 +57,13 @@  struct iommufd_ctx {
 	struct iommufd_ioas *vfio_ioas;
 };
 
+/* Entry for iommufd_ctx::mt_mmap */
+struct iommufd_mmap {
+	unsigned long pfn_start;
+	unsigned long pfn_end;
+	bool is_io;
+};
+
 /*
  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
  * domains and permits sharing of PFNs between io_pagetable instances. This
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 4118eaece1a5..e9394e20c4dd 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -226,6 +226,9 @@  struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 					     size_t size,
 					     enum iommufd_object_type type);
 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
+int iommufd_ctx_alloc_mmap(struct iommufd_ctx *ictx, phys_addr_t base,
+			   size_t size, bool is_io, unsigned long *immap_id);
+void iommufd_ctx_free_mmap(struct iommufd_ctx *ictx, unsigned long immap_id);
 struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
 				       unsigned long vdev_id);
 int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
@@ -246,6 +249,18 @@  static inline void iommufd_object_abort(struct iommufd_ctx *ictx,
 {
 }
 
+static inline int iommufd_ctx_alloc_mmap(struct iommufd_ctx *ictx,
+					 phys_addr_t base, size_t size,
+					 bool is_io, unsigned long *immap_id)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void iommufd_ctx_free_mmap(struct iommufd_ctx *ictx,
+					 unsigned long immap_id)
+{
+}
+
 static inline struct device *
 iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
 {
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 7980a09761c2..abe0bdc1e9a3 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -50,6 +50,46 @@  void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_object_abort, "IOMMUFD");
 
+/* Driver should report the immap_id to user space for mmap() syscalls */
+int iommufd_ctx_alloc_mmap(struct iommufd_ctx *ictx, phys_addr_t base,
+			   size_t size, bool is_io, unsigned long *immap_id)
+{
+	struct iommufd_mmap *immap;
+	int rc;
+
+	if (WARN_ON_ONCE(!immap_id))
+		return -EINVAL;
+	if (base & ~PAGE_MASK)
+		return -EINVAL;
+	if (!size || size & ~PAGE_MASK)
+		return -EINVAL;
+
+	immap = kzalloc(sizeof(*immap), GFP_KERNEL);
+	if (!immap)
+		return -ENOMEM;
+	immap->pfn_start = base >> PAGE_SHIFT;
+	immap->pfn_end = immap->pfn_start + (size >> PAGE_SHIFT) - 1;
+	immap->is_io = is_io;
+
+	rc = mtree_alloc_range(&ictx->mt_mmap, immap_id, immap, sizeof(immap),
+			       0, LONG_MAX >> PAGE_SHIFT, GFP_KERNEL);
+	if (rc < 0) {
+		kfree(immap);
+		return rc;
+	}
+
+	/* mmap() syscall would right-shift the immap_id to vma->vm_pgoff */
+	*immap_id <<= PAGE_SHIFT;
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_alloc_mmap, "IOMMUFD");
+
+void iommufd_ctx_free_mmap(struct iommufd_ctx *ictx, unsigned long immap_id)
+{
+	kfree(mtree_erase(&ictx->mt_mmap, immap_id >> PAGE_SHIFT));
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_free_mmap, "IOMMUFD");
+
 /* Caller should xa_lock(&viommu->vdevs) to protect the return value */
 struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
 				       unsigned long vdev_id)
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 27473aff150f..d3101c76fcb8 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -213,6 +213,7 @@  static int iommufd_fops_open(struct inode *inode, struct file *filp)
 	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
 	xa_init(&ictx->groups);
 	ictx->file = filp;
+	mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
 	init_waitqueue_head(&ictx->destroy_wait);
 	mutex_init(&ictx->sw_msi_lock);
 	INIT_LIST_HEAD(&ictx->sw_msi_list);
@@ -410,11 +411,45 @@  static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
 	return ret;
 }
 
+/*
+ * The pfn and size carried in @vma from the user space mmap call should be
+ * previously given to user space via a prior ioctl output.
+ */
+static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct iommufd_ctx *ictx = filp->private_data;
+	size_t size = vma->vm_end - vma->vm_start;
+	struct iommufd_mmap *immap;
+
+	if (size & ~PAGE_MASK)
+		return -EINVAL;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (vma->vm_flags & VM_EXEC)
+		return -EPERM;
+
+	/* vm_pgoff carries an index of an mtree entry/immap */
+	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
+	if (!immap)
+		return -EINVAL;
+	if (size >> PAGE_SHIFT > immap->pfn_end - immap->pfn_start + 1)
+		return -EINVAL;
+
+	vma->vm_pgoff = 0;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+	if (immap->is_io)
+		vm_flags_set(vma, VM_IO);
+	return remap_pfn_range(vma, vma->vm_start, immap->pfn_start, size,
+			       vma->vm_page_prot);
+}
+
 static const struct file_operations iommufd_fops = {
 	.owner = THIS_MODULE,
 	.open = iommufd_fops_open,
 	.release = iommufd_fops_release,
 	.unlocked_ioctl = iommufd_fops_ioctl,
+	.mmap = iommufd_fops_mmap,
 };
 
 /**