@@ -195,11 +195,15 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
{
uint64_t npages = dma->size / pgsize;
+ size_t bitmap_size;
if (npages > DIRTY_BITMAP_PAGES_MAX)
return -EINVAL;
- dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages), GFP_KERNEL);
+ /* Allocate extra 64 bits which are used for bitmap manipulation */
+ bitmap_size = DIRTY_BITMAP_BYTES(npages) + sizeof(u64);
+
+ dma->bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
if (!dma->bitmap)
return -ENOMEM;
@@ -1011,23 +1015,25 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
}
static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
- struct vfio_iommu_type1_dma_unmap *unmap)
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap)
{
- uint64_t mask;
struct vfio_dma *dma, *dma_last = NULL;
- size_t unmapped = 0;
+ size_t unmapped = 0, pgsize;
int ret = 0, retries = 0;
+ unsigned long pgshift;
mutex_lock(&iommu->lock);
- mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1;
+ pgshift = __ffs(iommu->pgsize_bitmap);
+ pgsize = (size_t)1 << pgshift;
- if (unmap->iova & mask) {
+ if (unmap->iova & (pgsize - 1)) {
ret = -EINVAL;
goto unlock;
}
- if (!unmap->size || unmap->size & mask) {
+ if (!unmap->size || unmap->size & (pgsize - 1)) {
ret = -EINVAL;
goto unlock;
}
@@ -1038,9 +1044,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
goto unlock;
}
- WARN_ON(mask & PAGE_MASK);
-again:
+ /* When dirty tracking is enabled, allow only min supported pgsize */
+ if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+ (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ WARN_ON((pgsize - 1) & PAGE_MASK);
+again:
/*
* vfio-iommu-type1 (v1) - User mappings were coalesced together to
* avoid tracking individual mappings. This means that the granularity
@@ -1078,6 +1090,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
ret = -EINVAL;
goto unlock;
}
+
dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
ret = -EINVAL;
@@ -1121,6 +1134,14 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
mutex_lock(&iommu->lock);
goto again;
}
+
+ if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
+ ret = update_user_bitmap(bitmap->data, dma,
+ unmap->iova, pgsize);
+ if (ret)
+ break;
+ }
+
unmapped += dma->size;
vfio_remove_dma(iommu, dma);
}
@@ -2459,17 +2480,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
struct vfio_iommu_type1_dma_unmap unmap;
- long ret;
+ struct vfio_bitmap bitmap = { 0 };
+ int ret;
minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
if (copy_from_user(&unmap, (void __user *)arg, minsz))
return -EFAULT;
- if (unmap.argsz < minsz || unmap.flags)
+ if (unmap.argsz < minsz ||
+ unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)
return -EINVAL;
- ret = vfio_dma_do_unmap(iommu, &unmap);
+ if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
+ unsigned long pgshift;
+
+ if (unmap.argsz < (minsz + sizeof(bitmap)))
+ return -EINVAL;
+
+ if (copy_from_user(&bitmap,
+ (void __user *)(arg + minsz),
+ sizeof(bitmap)))
+ return -EFAULT;
+
+ if (!access_ok((void __user *)bitmap.data, bitmap.size))
+ return -EINVAL;
+
+ pgshift = __ffs(bitmap.pgsize);
+ ret = verify_bitmap_size(unmap.size >> pgshift,
+ bitmap.size);
+ if (ret)
+ return ret;
+ }
+
+ ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap);
if (ret)
return ret;
@@ -1048,12 +1048,22 @@ struct vfio_bitmap {
* field. No guarantee is made to the user that arbitrary unmaps of iova
* or size different from those used in the original mapping call will
* succeed.
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get dirty bitmap
+ * before unmapping IO virtual addresses. When this flag is set, user must
+ * provide data[] as structure vfio_bitmap. User must allocate memory to get
+ * bitmap, zero the bitmap memory and must set size of allocated memory in
+ * vfio_bitmap.size field. A bit in bitmap represents one page of user provided
+ * page size in 'pgsize', consecutively starting from iova offset. Bit set
+ * indicates page at that offset from iova is dirty. Bitmap of pages in the
+ * range of unmapped size is returned in vfio_bitmap.data
*/
struct vfio_iommu_type1_dma_unmap {
__u32 argsz;
__u32 flags;
+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
+ __u8 data[];
};
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)