Message ID | 20210119045920.447-1-xieyongji@bytedance.com |
---|---|
Headers | show |
Series | Introduce VDUSE - vDPA Device in Userspace | expand |
On 2021/1/19 下午12:59, Xie Yongji wrote: > Introduce a mutex to protect vhost device iotlb from > concurrent access. > > Fixes: 4c8cf318("vhost: introduce vDPA-based backend") > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vhost/vdpa.c | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 448be7875b6d..4a241d380c40 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -49,6 +49,7 @@ struct vhost_vdpa { > struct eventfd_ctx *config_ctx; > int in_batch; > struct vdpa_iova_range range; > + struct mutex mutex; Let's use the device mutex like what vhost_process_iotlb_msg() did. Thanks > }; > > static DEFINE_IDA(vhost_vdpa_ida); > @@ -728,6 +729,7 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, > if (r) > return r; > > + mutex_lock(&v->mutex); > switch (msg->type) { > case VHOST_IOTLB_UPDATE: > r = vhost_vdpa_process_iotlb_update(v, msg); > @@ -747,6 +749,7 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, > r = -EINVAL; > break; > } > + mutex_unlock(&v->mutex); > > return r; > } > @@ -1017,6 +1020,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) > return minor; > } > > + mutex_init(&v->mutex); > atomic_set(&v->opened, 0); > v->minor = minor; > v->vdpa = vdpa;
On 2021/1/19 下午12:59, Xie Yongji wrote: > With VDUSE, we should be able to support all kinds of virtio devices. > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vhost/vdpa.c | 29 +++-------------------------- > 1 file changed, 3 insertions(+), 26 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 29ed4173f04e..448be7875b6d 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -22,6 +22,7 @@ > #include <linux/nospec.h> > #include <linux/vhost.h> > #include <linux/virtio_net.h> > +#include <linux/virtio_blk.h> > > #include "vhost.h" > > @@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) > return 0; > } > > -static int vhost_vdpa_config_validate(struct vhost_vdpa *v, > - struct vhost_vdpa_config *c) > -{ > - long size = 0; > - > - switch (v->virtio_id) { > - case VIRTIO_ID_NET: > - size = sizeof(struct virtio_net_config); > - break; > - } > - > - if (c->len == 0) > - return -EINVAL; > - > - if (c->len > size - c->off) > - return -E2BIG; > - > - return 0; > -} I think we should use a separate patch for this. Thanks > - > static long vhost_vdpa_get_config(struct vhost_vdpa *v, > struct vhost_vdpa_config __user *c) > { > @@ -215,7 +196,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, > > if (copy_from_user(&config, c, size)) > return -EFAULT; > - if (vhost_vdpa_config_validate(v, &config)) > + if (config.len == 0) > return -EINVAL; > buf = kvzalloc(config.len, GFP_KERNEL); > if (!buf) > @@ -243,7 +224,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, > > if (copy_from_user(&config, c, size)) > return -EFAULT; > - if (vhost_vdpa_config_validate(v, &config)) > + if (config.len == 0) > return -EINVAL; > buf = kvzalloc(config.len, GFP_KERNEL); > if (!buf) > @@ -1025,10 +1006,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) > int minor; > int r; > > - /* Currently, we only accept the network devices. */ > - if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) > - return -ENOTSUPP; > - > v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); > if (!v) > return -ENOMEM;
On 2021/1/19 下午12:59, Xie Yongji wrote: > Add an opaque pointer for vhost IOTLB to store the > corresponding vma->vm_file and offset on the DMA mapping. Let's split the patch into two. 1) opaque pointer 2) vma stuffs > > It will be used in VDUSE case later. > > Suggested-by: Jason Wang <jasowang@redhat.com> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++--- > drivers/vhost/iotlb.c | 5 ++- > drivers/vhost/vdpa.c | 66 +++++++++++++++++++++++++++++++++++----- > drivers/vhost/vhost.c | 4 +-- > include/linux/vdpa.h | 3 +- > include/linux/vhost_iotlb.h | 8 ++++- > 6 files changed, 79 insertions(+), 18 deletions(-) > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > index 03c796873a6b..1ffcef67954f 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > @@ -279,7 +279,7 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, > */ > spin_lock(&vdpasim->iommu_lock); > ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, > - pa, dir_to_perm(dir)); > + pa, dir_to_perm(dir), NULL); Maybe its better to introduce vhost_iotlb_add_range_ctx() which can accepts the opaque (context). And let vhost_iotlb_add_range() just call that. > spin_unlock(&vdpasim->iommu_lock); > if (ret) > return DMA_MAPPING_ERROR; > @@ -317,7 +317,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, > > ret = vhost_iotlb_add_range(iommu, (u64)pa, > (u64)pa + size - 1, > - pa, VHOST_MAP_RW); > + pa, VHOST_MAP_RW, NULL); > if (ret) { > *dma_addr = DMA_MAPPING_ERROR; > kfree(addr); > @@ -625,7 +625,8 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > for (map = vhost_iotlb_itree_first(iotlb, start, last); map; > map = vhost_iotlb_itree_next(map, start, last)) { > ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, > - map->last, map->addr, map->perm); > + map->last, map->addr, > + map->perm, NULL); > if (ret) > goto err; > } > @@ -639,14 +640,14 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > } > > static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, > - u64 pa, u32 perm) > + u64 pa, u32 perm, void *opaque) > { > struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > int ret; > > spin_lock(&vdpasim->iommu_lock); > ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, > - perm); > + perm, NULL); > spin_unlock(&vdpasim->iommu_lock); > > return ret; > diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c > index 0fd3f87e913c..3bd5bd06cdbc 100644 > --- a/drivers/vhost/iotlb.c > +++ b/drivers/vhost/iotlb.c > @@ -42,13 +42,15 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); > * @last: last of IOVA range > * @addr: the address that is mapped to @start > * @perm: access permission of this range > + * @opaque: the opaque pointer for the IOTLB mapping > * > * Returns an error last is smaller than start or memory allocation > * fails > */ > int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > u64 start, u64 last, > - u64 addr, unsigned int perm) > + u64 addr, unsigned int perm, > + void *opaque) > { > struct vhost_iotlb_map *map; > > @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > map->last = last; > map->addr = addr; > map->perm = perm; > + map->opaque = opaque; > > iotlb->nmaps++; > vhost_iotlb_itree_insert(map, &iotlb->root); > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 36b6950ba37f..e83e5be7cec8 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -488,6 +488,7 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > struct vhost_dev *dev = &v->vdev; > struct vdpa_device *vdpa = v->vdpa; > struct vhost_iotlb *iotlb = dev->iotlb; > + struct vhost_iotlb_file *iotlb_file; > struct vhost_iotlb_map *map; > struct page *page; > unsigned long pfn, pinned; > @@ -504,6 +505,10 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > } > atomic64_sub(map->size >> PAGE_SHIFT, > &dev->mm->pinned_vm); > + } else if (map->opaque) { > + iotlb_file = (struct vhost_iotlb_file *)map->opaque; > + fput(iotlb_file->file); > + kfree(iotlb_file); > } > vhost_iotlb_map_free(iotlb, map); > } > @@ -540,8 +545,8 @@ static int perm_to_iommu_flags(u32 perm) > return flags | IOMMU_CACHE; > } > > -static int vhost_vdpa_map(struct vhost_vdpa *v, > - u64 iova, u64 size, u64 pa, u32 perm) > +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > + u64 size, u64 pa, u32 perm, void *opaque) > { > struct vhost_dev *dev = &v->vdev; > struct vdpa_device *vdpa = v->vdpa; > @@ -549,12 +554,12 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > int r = 0; > > r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, > - pa, perm); > + pa, perm, opaque); > if (r) > return r; > > if (ops->dma_map) { > - r = ops->dma_map(vdpa, iova, size, pa, perm); > + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); > } else if (ops->set_map) { > if (!v->in_batch) > r = ops->set_map(vdpa, dev->iotlb); > @@ -591,6 +596,51 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > } > } > > +static int vhost_vdpa_sva_map(struct vhost_vdpa *v, > + u64 iova, u64 size, u64 uaddr, u32 perm) > +{ > + u64 offset, map_size, map_iova = iova; > + struct vhost_iotlb_file *iotlb_file; > + struct vm_area_struct *vma; > + int ret; Lacking mmap_read_lock(). > + > + while (size) { > + vma = find_vma(current->mm, uaddr); > + if (!vma) { > + ret = -EINVAL; > + goto err; > + } > + map_size = min(size, vma->vm_end - uaddr); > + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; > + iotlb_file = NULL; > + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { I wonder if we need more strict check here. When developing vhost-vdpa, I try hard to make sure the map can only work for user pages. So the question is: do we need to exclude MMIO area or only allow shmem to work here? > + iotlb_file = kmalloc(sizeof(*iotlb_file), GFP_KERNEL); > + if (!iotlb_file) { > + ret = -ENOMEM; > + goto err; > + } > + iotlb_file->file = get_file(vma->vm_file); > + iotlb_file->offset = offset; > + } I wonder if it's better to allocate iotlb_file and make iotlb_file->file = NULL && iotlb_file->offset = 0. This can force a consistent code for the vDPA parents. Or we can simply fail the map without a file as backend. > + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, > + perm, iotlb_file); > + if (ret) { > + if (iotlb_file) { > + fput(iotlb_file->file); > + kfree(iotlb_file); > + } > + goto err; > + } > + size -= map_size; > + uaddr += map_size; > + map_iova += map_size; > + } > + return 0; > +err: > + vhost_vdpa_unmap(v, iova, map_iova - iova); > + return ret; > +} > + > static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > struct vhost_iotlb_msg *msg) > { > @@ -615,8 +665,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > return -EEXIST; > > if (vdpa->sva) > - return vhost_vdpa_map(v, msg->iova, msg->size, > - msg->uaddr, msg->perm); > + return vhost_vdpa_sva_map(v, msg->iova, msg->size, > + msg->uaddr, msg->perm); So I think it's better squash vhost_vdpa_sva_map() and related changes into previous patch. > > /* Limit the use of memory for bookkeeping */ > page_list = (struct page **) __get_free_page(GFP_KERNEL); > @@ -671,7 +721,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > ret = vhost_vdpa_map(v, iova, csize, > map_pfn << PAGE_SHIFT, > - msg->perm); > + msg->perm, NULL); > if (ret) { > /* > * Unpin the pages that are left unmapped > @@ -700,7 +750,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > > /* Pin the rest chunk */ > ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, > - map_pfn << PAGE_SHIFT, msg->perm); > + map_pfn << PAGE_SHIFT, msg->perm, NULL); > out: > if (ret) { > if (nchunks) { > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index a262e12c6dc2..120dd5b3c119 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -1104,7 +1104,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, > vhost_vq_meta_reset(dev); > if (vhost_iotlb_add_range(dev->iotlb, msg->iova, > msg->iova + msg->size - 1, > - msg->uaddr, msg->perm)) { > + msg->uaddr, msg->perm, NULL)) { > ret = -ENOMEM; > break; > } > @@ -1450,7 +1450,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) > region->guest_phys_addr + > region->memory_size - 1, > region->userspace_addr, > - VHOST_MAP_RW)) > + VHOST_MAP_RW, NULL)) > goto err; > } > > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h > index f86869651614..b264c627e94b 100644 > --- a/include/linux/vdpa.h > +++ b/include/linux/vdpa.h > @@ -189,6 +189,7 @@ struct vdpa_iova_range { > * @size: size of the area > * @pa: physical address for the map > * @perm: device access permission (VHOST_MAP_XX) > + * @opaque: the opaque pointer for the mapping > * Returns integer: success (0) or error (< 0) > * @dma_unmap: Unmap an area of IOVA (optional but > * must be implemented with dma_map) > @@ -243,7 +244,7 @@ struct vdpa_config_ops { > /* DMA ops */ > int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); > int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, > - u64 pa, u32 perm); > + u64 pa, u32 perm, void *opaque); > int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); > > /* Free device resources */ > diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h > index 6b09b786a762..66a50c11c8ca 100644 > --- a/include/linux/vhost_iotlb.h > +++ b/include/linux/vhost_iotlb.h > @@ -4,6 +4,11 @@ > > #include <linux/interval_tree_generic.h> > > +struct vhost_iotlb_file { > + struct file *file; > + u64 offset; > +}; I think we'd better either: 1) simply use struct vhost_iotlb_file * instead of void *opaque for vhost_iotlb_map or 2)rename and move the vhost_iotlb_file to vdpa 2) looks better since we want to let vhost iotlb to carry any type of context (opaque pointer) And if we do this, the modification of vdpa_config_ops deserves a separate patch. Thanks > + > struct vhost_iotlb_map { > struct rb_node rb; > struct list_head link; > @@ -17,6 +22,7 @@ struct vhost_iotlb_map { > u32 perm; > u32 flags_padding; > u64 __subtree_last; > + void *opaque; > }; > > #define VHOST_IOTLB_FLAG_RETIRE 0x1 > @@ -30,7 +36,7 @@ struct vhost_iotlb { > }; > > int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last, > - u64 addr, unsigned int perm); > + u64 addr, unsigned int perm, void *opaque); > void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); > > struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags);
On Wed, Jan 20, 2021 at 11:44 AM Jason Wang <jasowang@redhat.com> wrote: > > > On 2021/1/19 下午12:59, Xie Yongji wrote: > > Introduce a mutex to protect vhost device iotlb from > > concurrent access. > > > > Fixes: 4c8cf318("vhost: introduce vDPA-based backend") > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > > --- > > drivers/vhost/vdpa.c | 4 ++++ > > 1 file changed, 4 insertions(+) > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > index 448be7875b6d..4a241d380c40 100644 > > --- a/drivers/vhost/vdpa.c > > +++ b/drivers/vhost/vdpa.c > > @@ -49,6 +49,7 @@ struct vhost_vdpa { > > struct eventfd_ctx *config_ctx; > > int in_batch; > > struct vdpa_iova_range range; > > + struct mutex mutex; > > > Let's use the device mutex like what vhost_process_iotlb_msg() did. > Looks fine. Thanks, Yongji
On Wed, Jan 20, 2021 at 11:47 AM Jason Wang <jasowang@redhat.com> wrote: > > > On 2021/1/19 下午12:59, Xie Yongji wrote: > > With VDUSE, we should be able to support all kinds of virtio devices. > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > > --- > > drivers/vhost/vdpa.c | 29 +++-------------------------- > > 1 file changed, 3 insertions(+), 26 deletions(-) > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > index 29ed4173f04e..448be7875b6d 100644 > > --- a/drivers/vhost/vdpa.c > > +++ b/drivers/vhost/vdpa.c > > @@ -22,6 +22,7 @@ > > #include <linux/nospec.h> > > #include <linux/vhost.h> > > #include <linux/virtio_net.h> > > +#include <linux/virtio_blk.h> > > > > #include "vhost.h" > > > > @@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) > > return 0; > > } > > > > -static int vhost_vdpa_config_validate(struct vhost_vdpa *v, > > - struct vhost_vdpa_config *c) > > -{ > > - long size = 0; > > - > > - switch (v->virtio_id) { > > - case VIRTIO_ID_NET: > > - size = sizeof(struct virtio_net_config); > > - break; > > - } > > - > > - if (c->len == 0) > > - return -EINVAL; > > - > > - if (c->len > size - c->off) > > - return -E2BIG; > > - > > - return 0; > > -} > > > I think we should use a separate patch for this. > Will do it. Thanks, Yongji
On Wed, Jan 20, 2021 at 2:24 PM Jason Wang <jasowang@redhat.com> wrote: > > > On 2021/1/19 下午12:59, Xie Yongji wrote: > > Add an opaque pointer for vhost IOTLB to store the > > corresponding vma->vm_file and offset on the DMA mapping. > > > Let's split the patch into two. > > 1) opaque pointer > 2) vma stuffs > OK. > > > > > It will be used in VDUSE case later. > > > > Suggested-by: Jason Wang <jasowang@redhat.com> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > > --- > > drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++--- > > drivers/vhost/iotlb.c | 5 ++- > > drivers/vhost/vdpa.c | 66 +++++++++++++++++++++++++++++++++++----- > > drivers/vhost/vhost.c | 4 +-- > > include/linux/vdpa.h | 3 +- > > include/linux/vhost_iotlb.h | 8 ++++- > > 6 files changed, 79 insertions(+), 18 deletions(-) > > > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > > index 03c796873a6b..1ffcef67954f 100644 > > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > > @@ -279,7 +279,7 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, > > */ > > spin_lock(&vdpasim->iommu_lock); > > ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, > > - pa, dir_to_perm(dir)); > > + pa, dir_to_perm(dir), NULL); > > > Maybe its better to introduce > > vhost_iotlb_add_range_ctx() which can accepts the opaque (context). And > let vhost_iotlb_add_range() just call that. > If so, we need export both vhost_iotlb_add_range() and vhost_iotlb_add_range_ctx() which will be used in VDUSE driver. Is it a bit redundant? > > > spin_unlock(&vdpasim->iommu_lock); > > if (ret) > > return DMA_MAPPING_ERROR; > > @@ -317,7 +317,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, > > > > ret = vhost_iotlb_add_range(iommu, (u64)pa, > > (u64)pa + size - 1, > > - pa, VHOST_MAP_RW); > > + pa, VHOST_MAP_RW, NULL); > > if (ret) { > > *dma_addr = DMA_MAPPING_ERROR; > > kfree(addr); > > @@ -625,7 +625,8 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > > for (map = vhost_iotlb_itree_first(iotlb, start, last); map; > > map = vhost_iotlb_itree_next(map, start, last)) { > > ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, > > - map->last, map->addr, map->perm); > > + map->last, map->addr, > > + map->perm, NULL); > > if (ret) > > goto err; > > } > > @@ -639,14 +640,14 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > > } > > > > static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, > > - u64 pa, u32 perm) > > + u64 pa, u32 perm, void *opaque) > > { > > struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > > int ret; > > > > spin_lock(&vdpasim->iommu_lock); > > ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, > > - perm); > > + perm, NULL); > > spin_unlock(&vdpasim->iommu_lock); > > > > return ret; > > diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c > > index 0fd3f87e913c..3bd5bd06cdbc 100644 > > --- a/drivers/vhost/iotlb.c > > +++ b/drivers/vhost/iotlb.c > > @@ -42,13 +42,15 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); > > * @last: last of IOVA range > > * @addr: the address that is mapped to @start > > * @perm: access permission of this range > > + * @opaque: the opaque pointer for the IOTLB mapping > > * > > * Returns an error last is smaller than start or memory allocation > > * fails > > */ > > int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > > u64 start, u64 last, > > - u64 addr, unsigned int perm) > > + u64 addr, unsigned int perm, > > + void *opaque) > > { > > struct vhost_iotlb_map *map; > > > > @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > > map->last = last; > > map->addr = addr; > > map->perm = perm; > > + map->opaque = opaque; > > > > iotlb->nmaps++; > > vhost_iotlb_itree_insert(map, &iotlb->root); > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > index 36b6950ba37f..e83e5be7cec8 100644 > > --- a/drivers/vhost/vdpa.c > > +++ b/drivers/vhost/vdpa.c > > @@ -488,6 +488,7 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > > struct vhost_dev *dev = &v->vdev; > > struct vdpa_device *vdpa = v->vdpa; > > struct vhost_iotlb *iotlb = dev->iotlb; > > + struct vhost_iotlb_file *iotlb_file; > > struct vhost_iotlb_map *map; > > struct page *page; > > unsigned long pfn, pinned; > > @@ -504,6 +505,10 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > > } > > atomic64_sub(map->size >> PAGE_SHIFT, > > &dev->mm->pinned_vm); > > + } else if (map->opaque) { > > + iotlb_file = (struct vhost_iotlb_file *)map->opaque; > > + fput(iotlb_file->file); > > + kfree(iotlb_file); > > } > > vhost_iotlb_map_free(iotlb, map); > > } > > @@ -540,8 +545,8 @@ static int perm_to_iommu_flags(u32 perm) > > return flags | IOMMU_CACHE; > > } > > > > -static int vhost_vdpa_map(struct vhost_vdpa *v, > > - u64 iova, u64 size, u64 pa, u32 perm) > > +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > > + u64 size, u64 pa, u32 perm, void *opaque) > > { > > struct vhost_dev *dev = &v->vdev; > > struct vdpa_device *vdpa = v->vdpa; > > @@ -549,12 +554,12 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > > int r = 0; > > > > r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, > > - pa, perm); > > + pa, perm, opaque); > > if (r) > > return r; > > > > if (ops->dma_map) { > > - r = ops->dma_map(vdpa, iova, size, pa, perm); > > + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); > > } else if (ops->set_map) { > > if (!v->in_batch) > > r = ops->set_map(vdpa, dev->iotlb); > > @@ -591,6 +596,51 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > > } > > } > > > > +static int vhost_vdpa_sva_map(struct vhost_vdpa *v, > > + u64 iova, u64 size, u64 uaddr, u32 perm) > > +{ > > + u64 offset, map_size, map_iova = iova; > > + struct vhost_iotlb_file *iotlb_file; > > + struct vm_area_struct *vma; > > + int ret; > > > Lacking mmap_read_lock(). > Good catch! Will fix it. > > > + > > + while (size) { > > + vma = find_vma(current->mm, uaddr); > > + if (!vma) { > > + ret = -EINVAL; > > + goto err; > > + } > > + map_size = min(size, vma->vm_end - uaddr); > > + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; > > + iotlb_file = NULL; > > + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { > > > I wonder if we need more strict check here. When developing vhost-vdpa, > I try hard to make sure the map can only work for user pages. > > So the question is: do we need to exclude MMIO area or only allow shmem > to work here? > Do you mean we need to check VM_MIXEDMAP | VM_PFNMAP here? It makes sense to me. > > > > + iotlb_file = kmalloc(sizeof(*iotlb_file), GFP_KERNEL); > > + if (!iotlb_file) { > > + ret = -ENOMEM; > > + goto err; > > + } > > + iotlb_file->file = get_file(vma->vm_file); > > + iotlb_file->offset = offset; > > + } > > > I wonder if it's better to allocate iotlb_file and make iotlb_file->file > = NULL && iotlb_file->offset = 0. This can force a consistent code for > the vDPA parents. > Looks fine to me. > Or we can simply fail the map without a file as backend. > Actually there will be some vma without vm_file during vm booting. > > > + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, > > + perm, iotlb_file); > > + if (ret) { > > + if (iotlb_file) { > > + fput(iotlb_file->file); > > + kfree(iotlb_file); > > + } > > + goto err; > > + } > > + size -= map_size; > > + uaddr += map_size; > > + map_iova += map_size; > > + } > > + return 0; > > +err: > > + vhost_vdpa_unmap(v, iova, map_iova - iova); > > + return ret; > > +} > > + > > static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > > struct vhost_iotlb_msg *msg) > > { > > @@ -615,8 +665,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > > return -EEXIST; > > > > if (vdpa->sva) > > - return vhost_vdpa_map(v, msg->iova, msg->size, > > - msg->uaddr, msg->perm); > > + return vhost_vdpa_sva_map(v, msg->iova, msg->size, > > + msg->uaddr, msg->perm); > > > So I think it's better squash vhost_vdpa_sva_map() and related changes > into previous patch. > OK, so the order of the patches is: 1) opaque pointer 2) va support + vma stuffs? Is it OK? > > > > > /* Limit the use of memory for bookkeeping */ > > page_list = (struct page **) __get_free_page(GFP_KERNEL); > > @@ -671,7 +721,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > > ret = vhost_vdpa_map(v, iova, csize, > > map_pfn << PAGE_SHIFT, > > - msg->perm); > > + msg->perm, NULL); > > if (ret) { > > /* > > * Unpin the pages that are left unmapped > > @@ -700,7 +750,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > > > > /* Pin the rest chunk */ > > ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, > > - map_pfn << PAGE_SHIFT, msg->perm); > > + map_pfn << PAGE_SHIFT, msg->perm, NULL); > > out: > > if (ret) { > > if (nchunks) { > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index a262e12c6dc2..120dd5b3c119 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -1104,7 +1104,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, > > vhost_vq_meta_reset(dev); > > if (vhost_iotlb_add_range(dev->iotlb, msg->iova, > > msg->iova + msg->size - 1, > > - msg->uaddr, msg->perm)) { > > + msg->uaddr, msg->perm, NULL)) { > > ret = -ENOMEM; > > break; > > } > > @@ -1450,7 +1450,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) > > region->guest_phys_addr + > > region->memory_size - 1, > > region->userspace_addr, > > - VHOST_MAP_RW)) > > + VHOST_MAP_RW, NULL)) > > goto err; > > } > > > > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h > > index f86869651614..b264c627e94b 100644 > > --- a/include/linux/vdpa.h > > +++ b/include/linux/vdpa.h > > @@ -189,6 +189,7 @@ struct vdpa_iova_range { > > * @size: size of the area > > * @pa: physical address for the map > > * @perm: device access permission (VHOST_MAP_XX) > > + * @opaque: the opaque pointer for the mapping > > * Returns integer: success (0) or error (< 0) > > * @dma_unmap: Unmap an area of IOVA (optional but > > * must be implemented with dma_map) > > @@ -243,7 +244,7 @@ struct vdpa_config_ops { > > /* DMA ops */ > > int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); > > int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, > > - u64 pa, u32 perm); > > + u64 pa, u32 perm, void *opaque); > > int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); > > > > /* Free device resources */ > > diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h > > index 6b09b786a762..66a50c11c8ca 100644 > > --- a/include/linux/vhost_iotlb.h > > +++ b/include/linux/vhost_iotlb.h > > @@ -4,6 +4,11 @@ > > > > #include <linux/interval_tree_generic.h> > > > > +struct vhost_iotlb_file { > > + struct file *file; > > + u64 offset; > > +}; > > > I think we'd better either: > > 1) simply use struct vhost_iotlb_file * instead of void *opaque for > vhost_iotlb_map > > or > > 2)rename and move the vhost_iotlb_file to vdpa > > 2) looks better since we want to let vhost iotlb to carry any type of > context (opaque pointer) > I agree. So we need to introduce struct vdpa_iotlb_file in include/linux/vdpa.h, right? > And if we do this, the modification of vdpa_config_ops deserves a > separate patch. > Sorry, I didn't get you here. What do you mean by the modification of vdpa_config_ops? Do you mean adding an opaque pointer to ops.dma_map? Thanks, Yongji
On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: > >On 2021/1/19 下午12:59, Xie Yongji wrote: >>With VDUSE, we should be able to support all kinds of virtio devices. >> >>Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >>--- >> drivers/vhost/vdpa.c | 29 +++-------------------------- >> 1 file changed, 3 insertions(+), 26 deletions(-) >> >>diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>index 29ed4173f04e..448be7875b6d 100644 >>--- a/drivers/vhost/vdpa.c >>+++ b/drivers/vhost/vdpa.c >>@@ -22,6 +22,7 @@ >> #include <linux/nospec.h> >> #include <linux/vhost.h> >> #include <linux/virtio_net.h> >>+#include <linux/virtio_blk.h> >> #include "vhost.h" >>@@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) >> return 0; >> } >>-static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >>- struct vhost_vdpa_config *c) >>-{ >>- long size = 0; >>- >>- switch (v->virtio_id) { >>- case VIRTIO_ID_NET: >>- size = sizeof(struct virtio_net_config); >>- break; >>- } >>- >>- if (c->len == 0) >>- return -EINVAL; >>- >>- if (c->len > size - c->off) >>- return -E2BIG; >>- >>- return 0; >>-} > > >I think we should use a separate patch for this. For the vdpa-blk simulator I had the same issues and I'm adding a .get_config_size() callback to vdpa devices. Do you think make sense or is better to remove this check in vhost/vdpa, delegating the boundaries checks to get_config/set_config callbacks. Thanks, Stefano
On 2021/1/19 下午1:07, Xie Yongji wrote: > This patch introduces a dedicated workqueue for irq injection > so that we are able to do some performance tuning for it. > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> If we want the split like this. It might be better to: 1) implement a simple irq injection on the ioctl context in patch 8 2) add the dedicated workqueue injection in this patch Since my understanding is that 1) the function looks more isolated for readers 2) the difference between sysctl vs workqueue should be more obvious than system wq vs dedicated wq 3) a chance to describe why workqueue is needed in the commit log in this patch Thanks > --- > drivers/vdpa/vdpa_user/eventfd.c | 10 +++++++++- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/drivers/vdpa/vdpa_user/eventfd.c b/drivers/vdpa/vdpa_user/eventfd.c > index dbffddb08908..caf7d8d68ac0 100644 > --- a/drivers/vdpa/vdpa_user/eventfd.c > +++ b/drivers/vdpa/vdpa_user/eventfd.c > @@ -18,6 +18,7 @@ > #include "eventfd.h" > > static struct workqueue_struct *vduse_irqfd_cleanup_wq; > +static struct workqueue_struct *vduse_irq_wq; > > static void vduse_virqfd_shutdown(struct work_struct *work) > { > @@ -57,7 +58,7 @@ static int vduse_virqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, > __poll_t flags = key_to_poll(key); > > if (flags & EPOLLIN) > - schedule_work(&virqfd->inject); > + queue_work(vduse_irq_wq, &virqfd->inject); > > if (flags & EPOLLHUP) { > spin_lock(&vq->irq_lock); > @@ -165,11 +166,18 @@ int vduse_virqfd_init(void) > if (!vduse_irqfd_cleanup_wq) > return -ENOMEM; > > + vduse_irq_wq = alloc_workqueue("vduse-irq", WQ_SYSFS | WQ_UNBOUND, 0); > + if (!vduse_irq_wq) { > + destroy_workqueue(vduse_irqfd_cleanup_wq); > + return -ENOMEM; > + } > + > return 0; > } > > void vduse_virqfd_exit(void) > { > + destroy_workqueue(vduse_irq_wq); > destroy_workqueue(vduse_irqfd_cleanup_wq); > } >
On 2021/1/20 下午7:08, Stefano Garzarella wrote: > On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: >> >> On 2021/1/19 下午12:59, Xie Yongji wrote: >>> With VDUSE, we should be able to support all kinds of virtio devices. >>> >>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >>> --- >>> drivers/vhost/vdpa.c | 29 +++-------------------------- >>> 1 file changed, 3 insertions(+), 26 deletions(-) >>> >>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>> index 29ed4173f04e..448be7875b6d 100644 >>> --- a/drivers/vhost/vdpa.c >>> +++ b/drivers/vhost/vdpa.c >>> @@ -22,6 +22,7 @@ >>> #include <linux/nospec.h> >>> #include <linux/vhost.h> >>> #include <linux/virtio_net.h> >>> +#include <linux/virtio_blk.h> >>> #include "vhost.h" >>> @@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct >>> vhost_vdpa *v, u8 __user *statusp) >>> return 0; >>> } >>> -static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >>> - struct vhost_vdpa_config *c) >>> -{ >>> - long size = 0; >>> - >>> - switch (v->virtio_id) { >>> - case VIRTIO_ID_NET: >>> - size = sizeof(struct virtio_net_config); >>> - break; >>> - } >>> - >>> - if (c->len == 0) >>> - return -EINVAL; >>> - >>> - if (c->len > size - c->off) >>> - return -E2BIG; >>> - >>> - return 0; >>> -} >> >> >> I think we should use a separate patch for this. > > For the vdpa-blk simulator I had the same issues and I'm adding a > .get_config_size() callback to vdpa devices. > > Do you think make sense or is better to remove this check in > vhost/vdpa, delegating the boundaries checks to get_config/set_config > callbacks. A question here. How much value could we gain from get_config_size() consider we can let vDPA parent to validate the length in its get_config(). Thanks > > Thanks, > Stefano >
On 2021/1/20 下午3:52, Yongji Xie wrote: > On Wed, Jan 20, 2021 at 2:24 PM Jason Wang <jasowang@redhat.com> wrote: >> >> On 2021/1/19 下午12:59, Xie Yongji wrote: >>> Add an opaque pointer for vhost IOTLB to store the >>> corresponding vma->vm_file and offset on the DMA mapping. >> >> Let's split the patch into two. >> >> 1) opaque pointer >> 2) vma stuffs >> > OK. > >>> It will be used in VDUSE case later. >>> >>> Suggested-by: Jason Wang <jasowang@redhat.com> >>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >>> --- >>> drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++--- >>> drivers/vhost/iotlb.c | 5 ++- >>> drivers/vhost/vdpa.c | 66 +++++++++++++++++++++++++++++++++++----- >>> drivers/vhost/vhost.c | 4 +-- >>> include/linux/vdpa.h | 3 +- >>> include/linux/vhost_iotlb.h | 8 ++++- >>> 6 files changed, 79 insertions(+), 18 deletions(-) >>> >>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c >>> index 03c796873a6b..1ffcef67954f 100644 >>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c >>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c >>> @@ -279,7 +279,7 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, >>> */ >>> spin_lock(&vdpasim->iommu_lock); >>> ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, >>> - pa, dir_to_perm(dir)); >>> + pa, dir_to_perm(dir), NULL); >> >> Maybe its better to introduce >> >> vhost_iotlb_add_range_ctx() which can accepts the opaque (context). And >> let vhost_iotlb_add_range() just call that. >> > If so, we need export both vhost_iotlb_add_range() and > vhost_iotlb_add_range_ctx() which will be used in VDUSE driver. Is it > a bit redundant? Probably not, we do something similar in virtio core: void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : virtqueue_get_buf_ctx_split(_vq, len, ctx); } EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { return virtqueue_get_buf_ctx(_vq, len, NULL); } EXPORT_SYMBOL_GPL(virtqueue_get_buf); > >>> spin_unlock(&vdpasim->iommu_lock); >>> if (ret) >>> return DMA_MAPPING_ERROR; >>> @@ -317,7 +317,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, >>> >>> ret = vhost_iotlb_add_range(iommu, (u64)pa, >>> (u64)pa + size - 1, >>> - pa, VHOST_MAP_RW); >>> + pa, VHOST_MAP_RW, NULL); >>> if (ret) { >>> *dma_addr = DMA_MAPPING_ERROR; >>> kfree(addr); >>> @@ -625,7 +625,8 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, >>> for (map = vhost_iotlb_itree_first(iotlb, start, last); map; >>> map = vhost_iotlb_itree_next(map, start, last)) { >>> ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, >>> - map->last, map->addr, map->perm); >>> + map->last, map->addr, >>> + map->perm, NULL); >>> if (ret) >>> goto err; >>> } >>> @@ -639,14 +640,14 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, >>> } >>> >>> static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, >>> - u64 pa, u32 perm) >>> + u64 pa, u32 perm, void *opaque) >>> { >>> struct vdpasim *vdpasim = vdpa_to_sim(vdpa); >>> int ret; >>> >>> spin_lock(&vdpasim->iommu_lock); >>> ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, >>> - perm); >>> + perm, NULL); >>> spin_unlock(&vdpasim->iommu_lock); >>> >>> return ret; >>> diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c >>> index 0fd3f87e913c..3bd5bd06cdbc 100644 >>> --- a/drivers/vhost/iotlb.c >>> +++ b/drivers/vhost/iotlb.c >>> @@ -42,13 +42,15 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); >>> * @last: last of IOVA range >>> * @addr: the address that is mapped to @start >>> * @perm: access permission of this range >>> + * @opaque: the opaque pointer for the IOTLB mapping >>> * >>> * Returns an error last is smaller than start or memory allocation >>> * fails >>> */ >>> int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, >>> u64 start, u64 last, >>> - u64 addr, unsigned int perm) >>> + u64 addr, unsigned int perm, >>> + void *opaque) >>> { >>> struct vhost_iotlb_map *map; >>> >>> @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, >>> map->last = last; >>> map->addr = addr; >>> map->perm = perm; >>> + map->opaque = opaque; >>> >>> iotlb->nmaps++; >>> vhost_iotlb_itree_insert(map, &iotlb->root); >>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>> index 36b6950ba37f..e83e5be7cec8 100644 >>> --- a/drivers/vhost/vdpa.c >>> +++ b/drivers/vhost/vdpa.c >>> @@ -488,6 +488,7 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) >>> struct vhost_dev *dev = &v->vdev; >>> struct vdpa_device *vdpa = v->vdpa; >>> struct vhost_iotlb *iotlb = dev->iotlb; >>> + struct vhost_iotlb_file *iotlb_file; >>> struct vhost_iotlb_map *map; >>> struct page *page; >>> unsigned long pfn, pinned; >>> @@ -504,6 +505,10 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) >>> } >>> atomic64_sub(map->size >> PAGE_SHIFT, >>> &dev->mm->pinned_vm); >>> + } else if (map->opaque) { >>> + iotlb_file = (struct vhost_iotlb_file *)map->opaque; >>> + fput(iotlb_file->file); >>> + kfree(iotlb_file); >>> } >>> vhost_iotlb_map_free(iotlb, map); >>> } >>> @@ -540,8 +545,8 @@ static int perm_to_iommu_flags(u32 perm) >>> return flags | IOMMU_CACHE; >>> } >>> >>> -static int vhost_vdpa_map(struct vhost_vdpa *v, >>> - u64 iova, u64 size, u64 pa, u32 perm) >>> +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, >>> + u64 size, u64 pa, u32 perm, void *opaque) >>> { >>> struct vhost_dev *dev = &v->vdev; >>> struct vdpa_device *vdpa = v->vdpa; >>> @@ -549,12 +554,12 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, >>> int r = 0; >>> >>> r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, >>> - pa, perm); >>> + pa, perm, opaque); >>> if (r) >>> return r; >>> >>> if (ops->dma_map) { >>> - r = ops->dma_map(vdpa, iova, size, pa, perm); >>> + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); >>> } else if (ops->set_map) { >>> if (!v->in_batch) >>> r = ops->set_map(vdpa, dev->iotlb); >>> @@ -591,6 +596,51 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) >>> } >>> } >>> >>> +static int vhost_vdpa_sva_map(struct vhost_vdpa *v, >>> + u64 iova, u64 size, u64 uaddr, u32 perm) >>> +{ >>> + u64 offset, map_size, map_iova = iova; >>> + struct vhost_iotlb_file *iotlb_file; >>> + struct vm_area_struct *vma; >>> + int ret; >> >> Lacking mmap_read_lock(). >> > Good catch! Will fix it. > >>> + >>> + while (size) { >>> + vma = find_vma(current->mm, uaddr); >>> + if (!vma) { >>> + ret = -EINVAL; >>> + goto err; >>> + } >>> + map_size = min(size, vma->vm_end - uaddr); >>> + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; >>> + iotlb_file = NULL; >>> + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { >> >> I wonder if we need more strict check here. When developing vhost-vdpa, >> I try hard to make sure the map can only work for user pages. >> >> So the question is: do we need to exclude MMIO area or only allow shmem >> to work here? >> > Do you mean we need to check VM_MIXEDMAP | VM_PFNMAP here? I meant do we need to allow VM_IO here? (We don't allow such case in vhost-vdpa now). > > It makes sense to me. > >> >>> + iotlb_file = kmalloc(sizeof(*iotlb_file), GFP_KERNEL); >>> + if (!iotlb_file) { >>> + ret = -ENOMEM; >>> + goto err; >>> + } >>> + iotlb_file->file = get_file(vma->vm_file); >>> + iotlb_file->offset = offset; >>> + } >> >> I wonder if it's better to allocate iotlb_file and make iotlb_file->file >> = NULL && iotlb_file->offset = 0. This can force a consistent code for >> the vDPA parents. >> > Looks fine to me. > >> Or we can simply fail the map without a file as backend. >> > Actually there will be some vma without vm_file during vm booting. Yes, e.g bios or other rom. Vhost-user has the similar issue and they filter the out them in qemu. For vhost-vDPA, consider it can supports various difference backends, we can't do that. > >>> + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, >>> + perm, iotlb_file); >>> + if (ret) { >>> + if (iotlb_file) { >>> + fput(iotlb_file->file); >>> + kfree(iotlb_file); >>> + } >>> + goto err; >>> + } >>> + size -= map_size; >>> + uaddr += map_size; >>> + map_iova += map_size; >>> + } >>> + return 0; >>> +err: >>> + vhost_vdpa_unmap(v, iova, map_iova - iova); >>> + return ret; >>> +} >>> + >>> static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >>> struct vhost_iotlb_msg *msg) >>> { >>> @@ -615,8 +665,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >>> return -EEXIST; >>> >>> if (vdpa->sva) >>> - return vhost_vdpa_map(v, msg->iova, msg->size, >>> - msg->uaddr, msg->perm); >>> + return vhost_vdpa_sva_map(v, msg->iova, msg->size, >>> + msg->uaddr, msg->perm); >> >> So I think it's better squash vhost_vdpa_sva_map() and related changes >> into previous patch. >> > OK, so the order of the patches is: > 1) opaque pointer > 2) va support + vma stuffs? > > Is it OK? Fine with me. > >>> /* Limit the use of memory for bookkeeping */ >>> page_list = (struct page **) __get_free_page(GFP_KERNEL); >>> @@ -671,7 +721,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >>> csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; >>> ret = vhost_vdpa_map(v, iova, csize, >>> map_pfn << PAGE_SHIFT, >>> - msg->perm); >>> + msg->perm, NULL); >>> if (ret) { >>> /* >>> * Unpin the pages that are left unmapped >>> @@ -700,7 +750,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, >>> >>> /* Pin the rest chunk */ >>> ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, >>> - map_pfn << PAGE_SHIFT, msg->perm); >>> + map_pfn << PAGE_SHIFT, msg->perm, NULL); >>> out: >>> if (ret) { >>> if (nchunks) { >>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >>> index a262e12c6dc2..120dd5b3c119 100644 >>> --- a/drivers/vhost/vhost.c >>> +++ b/drivers/vhost/vhost.c >>> @@ -1104,7 +1104,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, >>> vhost_vq_meta_reset(dev); >>> if (vhost_iotlb_add_range(dev->iotlb, msg->iova, >>> msg->iova + msg->size - 1, >>> - msg->uaddr, msg->perm)) { >>> + msg->uaddr, msg->perm, NULL)) { >>> ret = -ENOMEM; >>> break; >>> } >>> @@ -1450,7 +1450,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) >>> region->guest_phys_addr + >>> region->memory_size - 1, >>> region->userspace_addr, >>> - VHOST_MAP_RW)) >>> + VHOST_MAP_RW, NULL)) >>> goto err; >>> } >>> >>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h >>> index f86869651614..b264c627e94b 100644 >>> --- a/include/linux/vdpa.h >>> +++ b/include/linux/vdpa.h >>> @@ -189,6 +189,7 @@ struct vdpa_iova_range { >>> * @size: size of the area >>> * @pa: physical address for the map >>> * @perm: device access permission (VHOST_MAP_XX) >>> + * @opaque: the opaque pointer for the mapping >>> * Returns integer: success (0) or error (< 0) >>> * @dma_unmap: Unmap an area of IOVA (optional but >>> * must be implemented with dma_map) >>> @@ -243,7 +244,7 @@ struct vdpa_config_ops { >>> /* DMA ops */ >>> int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); >>> int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, >>> - u64 pa, u32 perm); >>> + u64 pa, u32 perm, void *opaque); >>> int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); >>> >>> /* Free device resources */ >>> diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h >>> index 6b09b786a762..66a50c11c8ca 100644 >>> --- a/include/linux/vhost_iotlb.h >>> +++ b/include/linux/vhost_iotlb.h >>> @@ -4,6 +4,11 @@ >>> >>> #include <linux/interval_tree_generic.h> >>> >>> +struct vhost_iotlb_file { >>> + struct file *file; >>> + u64 offset; >>> +}; >> >> I think we'd better either: >> >> 1) simply use struct vhost_iotlb_file * instead of void *opaque for >> vhost_iotlb_map >> >> or >> >> 2)rename and move the vhost_iotlb_file to vdpa >> >> 2) looks better since we want to let vhost iotlb to carry any type of >> context (opaque pointer) >> > I agree. So we need to introduce struct vdpa_iotlb_file in > include/linux/vdpa.h, right? Yes. > >> And if we do this, the modification of vdpa_config_ops deserves a >> separate patch. >> > Sorry, I didn't get you here. What do you mean by the modification of > vdpa_config_ops? Do you mean adding an opaque pointer to ops.dma_map? Yes. Thanks > > Thanks, > Yongji >
On Wed, Jan 27, 2021 at 11:33:03AM +0800, Jason Wang wrote: > >On 2021/1/20 下午7:08, Stefano Garzarella wrote: >>On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: >>> >>>On 2021/1/19 下午12:59, Xie Yongji wrote: >>>>With VDUSE, we should be able to support all kinds of virtio devices. >>>> >>>>Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >>>>--- >>>> drivers/vhost/vdpa.c | 29 +++-------------------------- >>>> 1 file changed, 3 insertions(+), 26 deletions(-) >>>> >>>>diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>>>index 29ed4173f04e..448be7875b6d 100644 >>>>--- a/drivers/vhost/vdpa.c >>>>+++ b/drivers/vhost/vdpa.c >>>>@@ -22,6 +22,7 @@ >>>> #include <linux/nospec.h> >>>> #include <linux/vhost.h> >>>> #include <linux/virtio_net.h> >>>>+#include <linux/virtio_blk.h> >>>> #include "vhost.h" >>>>@@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct >>>>vhost_vdpa *v, u8 __user *statusp) >>>> return 0; >>>> } >>>>-static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >>>>- struct vhost_vdpa_config *c) >>>>-{ >>>>- long size = 0; >>>>- >>>>- switch (v->virtio_id) { >>>>- case VIRTIO_ID_NET: >>>>- size = sizeof(struct virtio_net_config); >>>>- break; >>>>- } >>>>- >>>>- if (c->len == 0) >>>>- return -EINVAL; >>>>- >>>>- if (c->len > size - c->off) >>>>- return -E2BIG; >>>>- >>>>- return 0; >>>>-} >>> >>> >>>I think we should use a separate patch for this. >> >>For the vdpa-blk simulator I had the same issues and I'm adding a >>.get_config_size() callback to vdpa devices. >> >>Do you think make sense or is better to remove this check in >>vhost/vdpa, delegating the boundaries checks to >>get_config/set_config callbacks. > > >A question here. How much value could we gain from get_config_size() >consider we can let vDPA parent to validate the length in its >get_config(). > I agree, most of the implementations already validate the length, the only gain is an error returned since get_config() is void, but eventually we can add a return value to it. Thanks, Stefano
On Tue, Jan 19, 2021 at 12:59:12PM +0800, Xie Yongji wrote: >With VDUSE, we should be able to support all kinds of virtio devices. > >Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >--- > drivers/vhost/vdpa.c | 29 +++-------------------------- > 1 file changed, 3 insertions(+), 26 deletions(-) > >diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >index 29ed4173f04e..448be7875b6d 100644 >--- a/drivers/vhost/vdpa.c >+++ b/drivers/vhost/vdpa.c >@@ -22,6 +22,7 @@ > #include <linux/nospec.h> > #include <linux/vhost.h> > #include <linux/virtio_net.h> >+#include <linux/virtio_blk.h> Is this inclusion necessary? Maybe we can remove virtio_net.h as well. Thanks, Stefano > > #include "vhost.h" > >@@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) > return 0; > } > >-static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >- struct vhost_vdpa_config *c) >-{ >- long size = 0; >- >- switch (v->virtio_id) { >- case VIRTIO_ID_NET: >- size = sizeof(struct virtio_net_config); >- break; >- } >- >- if (c->len == 0) >- return -EINVAL; >- >- if (c->len > size - c->off) >- return -E2BIG; >- >- return 0; >-} >- > static long vhost_vdpa_get_config(struct vhost_vdpa *v, > struct vhost_vdpa_config __user *c) > { >@@ -215,7 +196,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, > > if (copy_from_user(&config, c, size)) > return -EFAULT; >- if (vhost_vdpa_config_validate(v, &config)) >+ if (config.len == 0) > return -EINVAL; > buf = kvzalloc(config.len, GFP_KERNEL); > if (!buf) >@@ -243,7 +224,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, > > if (copy_from_user(&config, c, size)) > return -EFAULT; >- if (vhost_vdpa_config_validate(v, &config)) >+ if (config.len == 0) > return -EINVAL; > buf = kvzalloc(config.len, GFP_KERNEL); > if (!buf) >@@ -1025,10 +1006,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) > int minor; > int r; > >- /* Currently, we only accept the network devices. */ >- if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) >- return -ENOTSUPP; >- > v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); > if (!v) > return -ENOMEM; >-- >2.11.0 >
On Tue, Jan 26, 2021 at 4:17 PM Jason Wang <jasowang@redhat.com> wrote: > > > On 2021/1/19 下午1:07, Xie Yongji wrote: > > This patch introduces a dedicated workqueue for irq injection > > so that we are able to do some performance tuning for it. > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > > > If we want the split like this. > > It might be better to: > > 1) implement a simple irq injection on the ioctl context in patch 8 > 2) add the dedicated workqueue injection in this patch > > Since my understanding is that > > 1) the function looks more isolated for readers > 2) the difference between sysctl vs workqueue should be more obvious > than system wq vs dedicated wq > 3) a chance to describe why workqueue is needed in the commit log in > this patch > OK, I will try to do it in v4. Thanks, Yongji
On Wed, Jan 27, 2021 at 4:59 PM Stefano Garzarella <sgarzare@redhat.com> wrote: > > On Tue, Jan 19, 2021 at 12:59:12PM +0800, Xie Yongji wrote: > >With VDUSE, we should be able to support all kinds of virtio devices. > > > >Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > >--- > > drivers/vhost/vdpa.c | 29 +++-------------------------- > > 1 file changed, 3 insertions(+), 26 deletions(-) > > > >diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > >index 29ed4173f04e..448be7875b6d 100644 > >--- a/drivers/vhost/vdpa.c > >+++ b/drivers/vhost/vdpa.c > >@@ -22,6 +22,7 @@ > > #include <linux/nospec.h> > > #include <linux/vhost.h> > > #include <linux/virtio_net.h> > >+#include <linux/virtio_blk.h> > > Is this inclusion necessary? > My mistake... > Maybe we can remove virtio_net.h as well. > Agree. Thanks, Yongji
On Wed, Jan 27, 2021 at 11:51 AM Jason Wang <jasowang@redhat.com> wrote: > > > On 2021/1/20 下午3:52, Yongji Xie wrote: > > On Wed, Jan 20, 2021 at 2:24 PM Jason Wang <jasowang@redhat.com> wrote: > >> > >> On 2021/1/19 下午12:59, Xie Yongji wrote: > >>> Add an opaque pointer for vhost IOTLB to store the > >>> corresponding vma->vm_file and offset on the DMA mapping. > >> > >> Let's split the patch into two. > >> > >> 1) opaque pointer > >> 2) vma stuffs > >> > > OK. > > > >>> It will be used in VDUSE case later. > >>> > >>> Suggested-by: Jason Wang <jasowang@redhat.com> > >>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > >>> --- > >>> drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++--- > >>> drivers/vhost/iotlb.c | 5 ++- > >>> drivers/vhost/vdpa.c | 66 +++++++++++++++++++++++++++++++++++----- > >>> drivers/vhost/vhost.c | 4 +-- > >>> include/linux/vdpa.h | 3 +- > >>> include/linux/vhost_iotlb.h | 8 ++++- > >>> 6 files changed, 79 insertions(+), 18 deletions(-) > >>> > >>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> index 03c796873a6b..1ffcef67954f 100644 > >>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> @@ -279,7 +279,7 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, > >>> */ > >>> spin_lock(&vdpasim->iommu_lock); > >>> ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, > >>> - pa, dir_to_perm(dir)); > >>> + pa, dir_to_perm(dir), NULL); > >> > >> Maybe its better to introduce > >> > >> vhost_iotlb_add_range_ctx() which can accepts the opaque (context). And > >> let vhost_iotlb_add_range() just call that. > >> > > If so, we need export both vhost_iotlb_add_range() and > > vhost_iotlb_add_range_ctx() which will be used in VDUSE driver. Is it > > a bit redundant? > > > Probably not, we do something similar in virtio core: > > void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, > void **ctx) > { > struct vring_virtqueue *vq = to_vvq(_vq); > > return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : > virtqueue_get_buf_ctx_split(_vq, len, ctx); > } > EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); > > void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > { > return virtqueue_get_buf_ctx(_vq, len, NULL); > } > EXPORT_SYMBOL_GPL(virtqueue_get_buf); > I see. Will do it in the next version. > > > > >>> spin_unlock(&vdpasim->iommu_lock); > >>> if (ret) > >>> return DMA_MAPPING_ERROR; > >>> @@ -317,7 +317,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, > >>> > >>> ret = vhost_iotlb_add_range(iommu, (u64)pa, > >>> (u64)pa + size - 1, > >>> - pa, VHOST_MAP_RW); > >>> + pa, VHOST_MAP_RW, NULL); > >>> if (ret) { > >>> *dma_addr = DMA_MAPPING_ERROR; > >>> kfree(addr); > >>> @@ -625,7 +625,8 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > >>> for (map = vhost_iotlb_itree_first(iotlb, start, last); map; > >>> map = vhost_iotlb_itree_next(map, start, last)) { > >>> ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, > >>> - map->last, map->addr, map->perm); > >>> + map->last, map->addr, > >>> + map->perm, NULL); > >>> if (ret) > >>> goto err; > >>> } > >>> @@ -639,14 +640,14 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > >>> } > >>> > >>> static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, > >>> - u64 pa, u32 perm) > >>> + u64 pa, u32 perm, void *opaque) > >>> { > >>> struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > >>> int ret; > >>> > >>> spin_lock(&vdpasim->iommu_lock); > >>> ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, > >>> - perm); > >>> + perm, NULL); > >>> spin_unlock(&vdpasim->iommu_lock); > >>> > >>> return ret; > >>> diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c > >>> index 0fd3f87e913c..3bd5bd06cdbc 100644 > >>> --- a/drivers/vhost/iotlb.c > >>> +++ b/drivers/vhost/iotlb.c > >>> @@ -42,13 +42,15 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); > >>> * @last: last of IOVA range > >>> * @addr: the address that is mapped to @start > >>> * @perm: access permission of this range > >>> + * @opaque: the opaque pointer for the IOTLB mapping > >>> * > >>> * Returns an error last is smaller than start or memory allocation > >>> * fails > >>> */ > >>> int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > >>> u64 start, u64 last, > >>> - u64 addr, unsigned int perm) > >>> + u64 addr, unsigned int perm, > >>> + void *opaque) > >>> { > >>> struct vhost_iotlb_map *map; > >>> > >>> @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > >>> map->last = last; > >>> map->addr = addr; > >>> map->perm = perm; > >>> + map->opaque = opaque; > >>> > >>> iotlb->nmaps++; > >>> vhost_iotlb_itree_insert(map, &iotlb->root); > >>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > >>> index 36b6950ba37f..e83e5be7cec8 100644 > >>> --- a/drivers/vhost/vdpa.c > >>> +++ b/drivers/vhost/vdpa.c > >>> @@ -488,6 +488,7 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > >>> struct vhost_dev *dev = &v->vdev; > >>> struct vdpa_device *vdpa = v->vdpa; > >>> struct vhost_iotlb *iotlb = dev->iotlb; > >>> + struct vhost_iotlb_file *iotlb_file; > >>> struct vhost_iotlb_map *map; > >>> struct page *page; > >>> unsigned long pfn, pinned; > >>> @@ -504,6 +505,10 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > >>> } > >>> atomic64_sub(map->size >> PAGE_SHIFT, > >>> &dev->mm->pinned_vm); > >>> + } else if (map->opaque) { > >>> + iotlb_file = (struct vhost_iotlb_file *)map->opaque; > >>> + fput(iotlb_file->file); > >>> + kfree(iotlb_file); > >>> } > >>> vhost_iotlb_map_free(iotlb, map); > >>> } > >>> @@ -540,8 +545,8 @@ static int perm_to_iommu_flags(u32 perm) > >>> return flags | IOMMU_CACHE; > >>> } > >>> > >>> -static int vhost_vdpa_map(struct vhost_vdpa *v, > >>> - u64 iova, u64 size, u64 pa, u32 perm) > >>> +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > >>> + u64 size, u64 pa, u32 perm, void *opaque) > >>> { > >>> struct vhost_dev *dev = &v->vdev; > >>> struct vdpa_device *vdpa = v->vdpa; > >>> @@ -549,12 +554,12 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > >>> int r = 0; > >>> > >>> r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, > >>> - pa, perm); > >>> + pa, perm, opaque); > >>> if (r) > >>> return r; > >>> > >>> if (ops->dma_map) { > >>> - r = ops->dma_map(vdpa, iova, size, pa, perm); > >>> + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); > >>> } else if (ops->set_map) { > >>> if (!v->in_batch) > >>> r = ops->set_map(vdpa, dev->iotlb); > >>> @@ -591,6 +596,51 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > >>> } > >>> } > >>> > >>> +static int vhost_vdpa_sva_map(struct vhost_vdpa *v, > >>> + u64 iova, u64 size, u64 uaddr, u32 perm) > >>> +{ > >>> + u64 offset, map_size, map_iova = iova; > >>> + struct vhost_iotlb_file *iotlb_file; > >>> + struct vm_area_struct *vma; > >>> + int ret; > >> > >> Lacking mmap_read_lock(). > >> > > Good catch! Will fix it. > > > >>> + > >>> + while (size) { > >>> + vma = find_vma(current->mm, uaddr); > >>> + if (!vma) { > >>> + ret = -EINVAL; > >>> + goto err; > >>> + } > >>> + map_size = min(size, vma->vm_end - uaddr); > >>> + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; > >>> + iotlb_file = NULL; > >>> + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { > >> > >> I wonder if we need more strict check here. When developing vhost-vdpa, > >> I try hard to make sure the map can only work for user pages. > >> > >> So the question is: do we need to exclude MMIO area or only allow shmem > >> to work here? > >> > > Do you mean we need to check VM_MIXEDMAP | VM_PFNMAP here? > > > I meant do we need to allow VM_IO here? (We don't allow such case in > vhost-vdpa now). > OK, let's exclude the vma with VM_IO | VM_PFNMAP. > > > > > It makes sense to me. > > > >> > >>> + iotlb_file = kmalloc(sizeof(*iotlb_file), GFP_KERNEL); > >>> + if (!iotlb_file) { > >>> + ret = -ENOMEM; > >>> + goto err; > >>> + } > >>> + iotlb_file->file = get_file(vma->vm_file); > >>> + iotlb_file->offset = offset; > >>> + } > >> > >> I wonder if it's better to allocate iotlb_file and make iotlb_file->file > >> = NULL && iotlb_file->offset = 0. This can force a consistent code for > >> the vDPA parents. > >> > > Looks fine to me. > > > >> Or we can simply fail the map without a file as backend. > >> > > Actually there will be some vma without vm_file during vm booting. > > > Yes, e.g bios or other rom. Vhost-user has the similar issue and they > filter the out them in qemu. > > For vhost-vDPA, consider it can supports various difference backends, we > can't do that. > OK, I will transfer iotlb_file with NULL file and let the backend do the filtering. Thanks, Yongji
On 2021/1/27 下午4:57, Stefano Garzarella wrote: > On Wed, Jan 27, 2021 at 11:33:03AM +0800, Jason Wang wrote: >> >> On 2021/1/20 下午7:08, Stefano Garzarella wrote: >>> On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: >>>> >>>> On 2021/1/19 下午12:59, Xie Yongji wrote: >>>>> With VDUSE, we should be able to support all kinds of virtio devices. >>>>> >>>>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >>>>> --- >>>>> drivers/vhost/vdpa.c | 29 +++-------------------------- >>>>> 1 file changed, 3 insertions(+), 26 deletions(-) >>>>> >>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>>>> index 29ed4173f04e..448be7875b6d 100644 >>>>> --- a/drivers/vhost/vdpa.c >>>>> +++ b/drivers/vhost/vdpa.c >>>>> @@ -22,6 +22,7 @@ >>>>> #include <linux/nospec.h> >>>>> #include <linux/vhost.h> >>>>> #include <linux/virtio_net.h> >>>>> +#include <linux/virtio_blk.h> >>>>> #include "vhost.h" >>>>> @@ -185,26 +186,6 @@ static long vhost_vdpa_set_status(struct >>>>> vhost_vdpa *v, u8 __user *statusp) >>>>> return 0; >>>>> } >>>>> -static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >>>>> - struct vhost_vdpa_config *c) >>>>> -{ >>>>> - long size = 0; >>>>> - >>>>> - switch (v->virtio_id) { >>>>> - case VIRTIO_ID_NET: >>>>> - size = sizeof(struct virtio_net_config); >>>>> - break; >>>>> - } >>>>> - >>>>> - if (c->len == 0) >>>>> - return -EINVAL; >>>>> - >>>>> - if (c->len > size - c->off) >>>>> - return -E2BIG; >>>>> - >>>>> - return 0; >>>>> -} >>>> >>>> >>>> I think we should use a separate patch for this. >>> >>> For the vdpa-blk simulator I had the same issues and I'm adding a >>> .get_config_size() callback to vdpa devices. >>> >>> Do you think make sense or is better to remove this check in >>> vhost/vdpa, delegating the boundaries checks to >>> get_config/set_config callbacks. >> >> >> A question here. How much value could we gain from get_config_size() >> consider we can let vDPA parent to validate the length in its >> get_config(). >> > > I agree, most of the implementations already validate the length, the > only gain is an error returned since get_config() is void, but > eventually we can add a return value to it. Right, one problem here is that. For the virito path, its get_config() returns void. So we can not propagate error to virtio drivers. But it might not be a big issue since we trust kernel virtio driver. So I think it makes sense to change the return value in the vdpa config ops. Thanks > > Thanks, > Stefano >
On Fri, Jan 29, 2021 at 11:04 PM Stefano Garzarella <sgarzare@redhat.com> wrote: > > On Thu, Jan 28, 2021 at 11:11:49AM +0800, Jason Wang wrote: > > > >On 2021/1/27 下午4:57, Stefano Garzarella wrote: > >>On Wed, Jan 27, 2021 at 11:33:03AM +0800, Jason Wang wrote: > >>> > >>>On 2021/1/20 下午7:08, Stefano Garzarella wrote: > >>>>On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: > >>>>> > >>>>>On 2021/1/19 下午12:59, Xie Yongji wrote: > >>>>>>With VDUSE, we should be able to support all kinds of virtio devices. > >>>>>> > >>>>>>Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > >>>>>>--- > >>>>>> drivers/vhost/vdpa.c | 29 +++-------------------------- > >>>>>> 1 file changed, 3 insertions(+), 26 deletions(-) > >>>>>> > >>>>>>diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > >>>>>>index 29ed4173f04e..448be7875b6d 100644 > >>>>>>--- a/drivers/vhost/vdpa.c > >>>>>>+++ b/drivers/vhost/vdpa.c > >>>>>>@@ -22,6 +22,7 @@ > >>>>>> #include <linux/nospec.h> > >>>>>> #include <linux/vhost.h> > >>>>>> #include <linux/virtio_net.h> > >>>>>>+#include <linux/virtio_blk.h> > >>>>>> #include "vhost.h" > >>>>>>@@ -185,26 +186,6 @@ static long > >>>>>>vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user > >>>>>>*statusp) > >>>>>> return 0; > >>>>>> } > >>>>>>-static int vhost_vdpa_config_validate(struct vhost_vdpa *v, > >>>>>>- struct vhost_vdpa_config *c) > >>>>>>-{ > >>>>>>- long size = 0; > >>>>>>- > >>>>>>- switch (v->virtio_id) { > >>>>>>- case VIRTIO_ID_NET: > >>>>>>- size = sizeof(struct virtio_net_config); > >>>>>>- break; > >>>>>>- } > >>>>>>- > >>>>>>- if (c->len == 0) > >>>>>>- return -EINVAL; > >>>>>>- > >>>>>>- if (c->len > size - c->off) > >>>>>>- return -E2BIG; > >>>>>>- > >>>>>>- return 0; > >>>>>>-} > >>>>> > >>>>> > >>>>>I think we should use a separate patch for this. > >>>> > >>>>For the vdpa-blk simulator I had the same issues and I'm adding > >>>>a .get_config_size() callback to vdpa devices. > >>>> > >>>>Do you think make sense or is better to remove this check in > >>>>vhost/vdpa, delegating the boundaries checks to > >>>>get_config/set_config callbacks. > >>> > >>> > >>>A question here. How much value could we gain from > >>>get_config_size() consider we can let vDPA parent to validate the > >>>length in its get_config(). > >>> > >> > >>I agree, most of the implementations already validate the length, > >>the only gain is an error returned since get_config() is void, but > >>eventually we can add a return value to it. > > > > > >Right, one problem here is that. For the virito path, its get_config() > >returns void. So we can not propagate error to virtio drivers. But it > >might not be a big issue since we trust kernel virtio driver. > > > >So I think it makes sense to change the return value in the vdpa config ops. > > Thanks for your feedback! > > @Xie do you plan to do this? > > Otherwise I can do in my vdpa-blk-sim series, where for now I used this > patch as is. > Hi Stefano, please do in your vdpa-blk-sim series. I have no plan for it now. Thanks, Yongji
On Sat, Jan 30, 2021 at 07:33:08PM +0800, Yongji Xie wrote: >On Fri, Jan 29, 2021 at 11:04 PM Stefano Garzarella <sgarzare@redhat.com> wrote: >> >> On Thu, Jan 28, 2021 at 11:11:49AM +0800, Jason Wang wrote: >> > >> >On 2021/1/27 下午4:57, Stefano Garzarella wrote: >> >>On Wed, Jan 27, 2021 at 11:33:03AM +0800, Jason Wang wrote: >> >>> >> >>>On 2021/1/20 下午7:08, Stefano Garzarella wrote: >> >>>>On Wed, Jan 20, 2021 at 11:46:38AM +0800, Jason Wang wrote: >> >>>>> >> >>>>>On 2021/1/19 下午12:59, Xie Yongji wrote: >> >>>>>>With VDUSE, we should be able to support all kinds of virtio devices. >> >>>>>> >> >>>>>>Signed-off-by: Xie Yongji <xieyongji@bytedance.com> >> >>>>>>--- >> >>>>>> drivers/vhost/vdpa.c | 29 +++-------------------------- >> >>>>>> 1 file changed, 3 insertions(+), 26 deletions(-) >> >>>>>> >> >>>>>>diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >> >>>>>>index 29ed4173f04e..448be7875b6d 100644 >> >>>>>>--- a/drivers/vhost/vdpa.c >> >>>>>>+++ b/drivers/vhost/vdpa.c >> >>>>>>@@ -22,6 +22,7 @@ >> >>>>>> #include <linux/nospec.h> >> >>>>>> #include <linux/vhost.h> >> >>>>>> #include <linux/virtio_net.h> >> >>>>>>+#include <linux/virtio_blk.h> >> >>>>>> #include "vhost.h" >> >>>>>>@@ -185,26 +186,6 @@ static long >> >>>>>>vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user >> >>>>>>*statusp) >> >>>>>> return 0; >> >>>>>> } >> >>>>>>-static int vhost_vdpa_config_validate(struct vhost_vdpa *v, >> >>>>>>- struct vhost_vdpa_config *c) >> >>>>>>-{ >> >>>>>>- long size = 0; >> >>>>>>- >> >>>>>>- switch (v->virtio_id) { >> >>>>>>- case VIRTIO_ID_NET: >> >>>>>>- size = sizeof(struct virtio_net_config); >> >>>>>>- break; >> >>>>>>- } >> >>>>>>- >> >>>>>>- if (c->len == 0) >> >>>>>>- return -EINVAL; >> >>>>>>- >> >>>>>>- if (c->len > size - c->off) >> >>>>>>- return -E2BIG; >> >>>>>>- >> >>>>>>- return 0; >> >>>>>>-} >> >>>>> >> >>>>> >> >>>>>I think we should use a separate patch for this. >> >>>> >> >>>>For the vdpa-blk simulator I had the same issues and I'm adding >> >>>>a .get_config_size() callback to vdpa devices. >> >>>> >> >>>>Do you think make sense or is better to remove this check in >> >>>>vhost/vdpa, delegating the boundaries checks to >> >>>>get_config/set_config callbacks. >> >>> >> >>> >> >>>A question here. How much value could we gain from >> >>>get_config_size() consider we can let vDPA parent to validate the >> >>>length in its get_config(). >> >>> >> >> >> >>I agree, most of the implementations already validate the length, >> >>the only gain is an error returned since get_config() is void, but >> >>eventually we can add a return value to it. >> > >> > >> >Right, one problem here is that. For the virito path, its get_config() >> >returns void. So we can not propagate error to virtio drivers. But it >> >might not be a big issue since we trust kernel virtio driver. >> > >> >So I think it makes sense to change the return value in the vdpa config ops. >> >> Thanks for your feedback! >> >> @Xie do you plan to do this? >> >> Otherwise I can do in my vdpa-blk-sim series, where for now I used this >> patch as is. >> > >Hi Stefano, please do in your vdpa-blk-sim series. I have no plan for >it now. Okay, I'll do it. Thanks, Stefano