[3/9] vhost: modify internal functions to take a vhost_worker

Message ID 20210525180600.6349-4-michael.christie@oracle.com
State New
Headers show
Series
  • Untitled series #130738
Related show

Commit Message

Mike Christie May 25, 2021, 6:05 p.m.
The final patches will allow us to have multiple vhost_workers per device
and be able to share them across devices. To prepare for that, this patch
allow us our internal work queueing, flush and cgroup attach functions to
take a vhost_worker as an arg.

The poll code required a change to the drivers, so the next patch will
convert that code.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++--------------
 drivers/vhost/vhost.h |   4 +-
 2 files changed, 94 insertions(+), 46 deletions(-)

Comments

Stefan Hajnoczi June 3, 2021, 10:45 a.m. | #1
On Tue, May 25, 2021 at 01:05:54PM -0500, Mike Christie wrote:
> -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)

> +static void vhost_work_queue_on(struct vhost_work *work,

> +				struct vhost_worker *worker)

>  {

> -	if (!dev->worker)

> -		return;

> -

>  	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {

>  		/* We can only add the work to the list after we're

>  		 * sure it was not in the list.

>  		 * test_and_set_bit() implies a memory barrier.

>  		 */

> -		llist_add(&work->node, &dev->worker->work_list);

> -		wake_up_process(dev->worker->task);

> +		llist_add(&work->node, &worker->work_list);

> +		wake_up_process(worker->task);

>  	}

>  }

> +

> +void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)


When should this function still be used? A doc comment contrasting it to
vhost_work_queue_on() would be helpful. I would expect callers to switch
to that instead of queuing work on dev->workers[0].

>  /* A lockless hint for busy polling code to exit the loop */

>  bool vhost_has_work(struct vhost_dev *dev)

>  {

> -	return dev->worker && !llist_empty(&dev->worker->work_list);

> +	int i;

> +

> +	for (i = 0; i < dev->num_workers; i++) {

> +		if (!llist_empty(&dev->workers[i]->work_list))

> +			return true;

> +	}

> +

> +	return false;

>  }

>  EXPORT_SYMBOL_GPL(vhost_has_work);


It's probably not necessary to poll all workers:

drivers/vhost/net.c calls vhost_has_work() to busy poll a specific
virtqueue. If the vq:worker mapping is 1:1 or N:1 then vhost_has_work()
should be extended to include the struct vhost_virtqueue so we can poll
just that vq worker's work_list.
>  /* Caller must have device mutex */

>  static int vhost_worker_try_create_def(struct vhost_dev *dev)

>  {

> -	if (!dev->use_worker || dev->worker)

> +	struct vhost_worker *worker;

> +

> +	if (!dev->use_worker || dev->workers)

>  		return 0;

>  

> -	return vhost_worker_create(dev);

> +	dev->workers = kcalloc(1, sizeof(struct vhost_worker *), GFP_KERNEL);


GFP_KERNEL_ACCOUNT so that vhost memory associated with a process (the
one that invoked the ioctl) is accounted? This may get trickier if the
workers are shared between processes.

The same applies for struct vhost_worker in vhost_worker_create().

Patch

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a291cde95c43..4bfa9a7a51bb 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -231,20 +231,6 @@  void vhost_poll_stop(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_stop);
 
-void vhost_work_dev_flush(struct vhost_dev *dev)
-{
-	struct vhost_flush_struct flush;
-
-	if (dev->worker) {
-		init_completion(&flush.wait_event);
-		vhost_work_init(&flush.work, vhost_flush_work);
-
-		vhost_work_queue(dev, &flush.work);
-		wait_for_completion(&flush.wait_event);
-	}
-}
-EXPORT_SYMBOL_GPL(vhost_work_dev_flush);
-
 /* Flush any work that has been scheduled. When calling this, don't hold any
  * locks that are also used by the callback. */
 void vhost_poll_flush(struct vhost_poll *poll)
@@ -253,26 +239,62 @@  void vhost_poll_flush(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_flush);
 
-void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
+static void vhost_work_queue_on(struct vhost_work *work,
+				struct vhost_worker *worker)
 {
-	if (!dev->worker)
-		return;
-
 	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
 		/* We can only add the work to the list after we're
 		 * sure it was not in the list.
 		 * test_and_set_bit() implies a memory barrier.
 		 */
-		llist_add(&work->node, &dev->worker->work_list);
-		wake_up_process(dev->worker->task);
+		llist_add(&work->node, &worker->work_list);
+		wake_up_process(worker->task);
 	}
 }
+
+void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
+{
+	if (!dev->workers)
+		return;
+	/*
+	 * devs with use_worker=true created by tools that do not support the
+	 * worker creation ioctl will always have at least one worker.
+	 */
+	vhost_work_queue_on(work, dev->workers[0]);
+}
 EXPORT_SYMBOL_GPL(vhost_work_queue);
 
+static void vhost_work_dev_flush_on(struct vhost_worker *worker)
+{
+	struct vhost_flush_struct flush;
+
+	init_completion(&flush.wait_event);
+	vhost_work_init(&flush.work, vhost_flush_work);
+
+	vhost_work_queue_on(&flush.work, worker);
+	wait_for_completion(&flush.wait_event);
+}
+
+void vhost_work_dev_flush(struct vhost_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->num_workers; i++)
+		vhost_work_dev_flush_on(dev->workers[i]);
+}
+EXPORT_SYMBOL_GPL(vhost_work_dev_flush);
+
 /* A lockless hint for busy polling code to exit the loop */
 bool vhost_has_work(struct vhost_dev *dev)
 {
-	return dev->worker && !llist_empty(&dev->worker->work_list);
+	int i;
+
+	for (i = 0; i < dev->num_workers; i++) {
+		if (!llist_empty(&dev->workers[i]->work_list))
+			return true;
+	}
+
+	return false;
 }
 EXPORT_SYMBOL_GPL(vhost_has_work);
 
@@ -482,7 +504,8 @@  void vhost_dev_init(struct vhost_dev *dev,
 	dev->umem = NULL;
 	dev->iotlb = NULL;
 	dev->mm = NULL;
-	dev->worker = NULL;
+	dev->workers = NULL;
+	dev->num_workers = 0;
 	dev->iov_limit = iov_limit;
 	dev->weight = weight;
 	dev->byte_weight = byte_weight;
@@ -531,14 +554,14 @@  static void vhost_attach_cgroups_work(struct vhost_work *work)
 	s->ret = cgroup_attach_task_all(s->owner, current);
 }
 
-static int vhost_attach_cgroups(struct vhost_dev *dev)
+static int vhost_attach_cgroups_on(struct vhost_worker *worker)
 {
 	struct vhost_attach_cgroups_struct attach;
 
 	attach.owner = current;
 	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
-	vhost_work_queue(dev, &attach.work);
-	vhost_work_dev_flush(dev);
+	vhost_work_queue_on(&attach.work, worker);
+	vhost_work_dev_flush_on(worker);
 	return attach.ret;
 }
 
@@ -579,20 +602,29 @@  static void vhost_detach_mm(struct vhost_dev *dev)
 	dev->mm = NULL;
 }
 
-static void vhost_worker_free(struct vhost_dev *dev)
+static void vhost_worker_free(struct vhost_worker *worker)
 {
-	struct vhost_worker *worker = dev->worker;
-
-	if (!worker)
-		return;
-
-	dev->worker = NULL;
 	WARN_ON(!llist_empty(&worker->work_list));
 	kthread_stop(worker->task);
 	kfree(worker);
 }
 
-static int vhost_worker_create(struct vhost_dev *dev)
+static void vhost_workers_free(struct vhost_dev *dev)
+{
+	int i;
+
+	if (!dev->workers)
+		return;
+
+	for (i = 0; i < dev->num_workers; i++)
+		vhost_worker_free(dev->workers[i]);
+
+	kfree(dev->workers);
+	dev->num_workers = 0;
+	dev->workers = NULL;
+}
+
+static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
 {
 	struct vhost_worker *worker;
 	struct task_struct *task;
@@ -600,42 +632,56 @@  static int vhost_worker_create(struct vhost_dev *dev)
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
 	if (!worker)
-		return -ENOMEM;
+		return NULL;
 
-	dev->worker = worker;
+	worker->id = dev->num_workers;
 	worker->dev = dev;
 	init_llist_head(&worker->work_list);
 
 	task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
+	if (IS_ERR(task))
 		goto free_worker;
-	}
 
 	worker->task = task;
 	wake_up_process(task); /* avoid contributing to loadavg */
 
-	ret = vhost_attach_cgroups(dev);
+	ret = vhost_attach_cgroups_on(worker);
 	if (ret)
 		goto stop_worker;
 
-	return 0;
+	return worker;
 
 stop_worker:
 	kthread_stop(worker->task);
 free_worker:
 	kfree(worker);
-	dev->worker = NULL;
-	return ret;
+	return NULL;
 }
 
 /* Caller must have device mutex */
 static int vhost_worker_try_create_def(struct vhost_dev *dev)
 {
-	if (!dev->use_worker || dev->worker)
+	struct vhost_worker *worker;
+
+	if (!dev->use_worker || dev->workers)
 		return 0;
 
-	return vhost_worker_create(dev);
+	dev->workers = kcalloc(1, sizeof(struct vhost_worker *), GFP_KERNEL);
+	if (!dev->workers)
+		return -ENOMEM;
+
+	worker = vhost_worker_create(dev);
+	if (!worker)
+		goto free_workers;
+
+	dev->workers[worker->id] = worker;
+	dev->num_workers++;
+	return 0;
+
+free_workers:
+	kfree(dev->workers);
+	dev->workers = NULL;
+	return -ENOMEM;
 }
 
 /* Caller should have device mutex */
@@ -750,7 +796,7 @@  void vhost_dev_cleanup(struct vhost_dev *dev)
 	dev->iotlb = NULL;
 	vhost_clear_msg(dev);
 	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
-	vhost_worker_free(dev);
+	vhost_workers_free(dev);
 	vhost_detach_mm(dev);
 	dev->kcov_handle = 0;
 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index bfc4563e612f..fa1af4106755 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -29,6 +29,7 @@  struct vhost_worker {
 	struct task_struct	*task;
 	struct llist_head	work_list;
 	struct vhost_dev	*dev;
+	int			id;
 };
 
 /* Poll a file (eventfd or socket) */
@@ -155,7 +156,8 @@  struct vhost_dev {
 	struct vhost_virtqueue **vqs;
 	int nvqs;
 	struct eventfd_ctx *log_ctx;
-	struct vhost_worker *worker;
+	struct vhost_worker **workers;
+	int num_workers;
 	struct vhost_iotlb *umem;
 	struct vhost_iotlb *iotlb;
 	spinlock_t iotlb_lock;