diff mbox series

[V7] scsi: core: only re-run queue in scsi_end_request() if device queue is busy

Message ID 20200910075056.36509-1-ming.lei@redhat.com
State New
Headers show
Series [V7] scsi: core: only re-run queue in scsi_end_request() if device queue is busy | expand

Commit Message

Ming Lei Sept. 10, 2020, 7:50 a.m. UTC
Now the request queue is run in scsi_end_request() unconditionally if both
target queue and host queue is ready. We should have re-run request queue
only after this device queue becomes busy for restarting this LUN only.

Recently Long Li reported that cost of run queue may be very heavy in
case of high queue depth. So improve this situation by only running
the request queue when this LUN is busy.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Tested-by: Long Li <longli@microsoft.com>
Reported-by: Long Li <longli@microsoft.com>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V7:
        - patch style && comment change, as suggested by Bart
        - add reviewed-by tag
V6:
        - patch style && comment change, as suggested by Bart
        - add reviewed-by & tested-by tag
V5:
   	- patch style && comment change, as suggested by Bart
        - add reviewed-by & tested-by tag
V4:
        - fix one race reported by Kashyap, and simplify the implementation
        a bit; also pass Kashyap's both function and performance test
V3:
        - add one smp_mb() in scsi_mq_get_budget() and comment
V2:
        - commit log change, no any code change
        - add reported-by tag


 drivers/scsi/scsi_lib.c    | 48 ++++++++++++++++++++++++++++++++++----
 include/scsi/scsi_device.h |  1 +
 2 files changed, 45 insertions(+), 4 deletions(-)

Comments

Ming Lei Sept. 15, 2020, 2:49 a.m. UTC | #1
On Thu, Sep 10, 2020 at 03:50:56PM +0800, Ming Lei wrote:
> Now the request queue is run in scsi_end_request() unconditionally if both

> target queue and host queue is ready. We should have re-run request queue

> only after this device queue becomes busy for restarting this LUN only.

> 

> Recently Long Li reported that cost of run queue may be very heavy in

> case of high queue depth. So improve this situation by only running

> the request queue when this LUN is busy.

> 

> Reviewed-by: Bart Van Assche <bvanassche@acm.org>

> Reviewed-by: Hannes Reinecke <hare@suse.de>

> Reviewed-by: Ewan D. Milne <emilne@redhat.com>

> Reviewed-by: John Garry <john.garry@huawei.com>

> Tested-by: Long Li <longli@microsoft.com>

> Reported-by: Long Li <longli@microsoft.com>

> Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>

> Signed-off-by: Ming Lei <ming.lei@redhat.com>

> ---

> V7:

>         - patch style && comment change, as suggested by Bart

>         - add reviewed-by tag

> V6:

>         - patch style && comment change, as suggested by Bart

>         - add reviewed-by & tested-by tag

> V5:

>    	- patch style && comment change, as suggested by Bart

>         - add reviewed-by & tested-by tag

> V4:

>         - fix one race reported by Kashyap, and simplify the implementation

>         a bit; also pass Kashyap's both function and performance test

> V3:

>         - add one smp_mb() in scsi_mq_get_budget() and comment

> V2:

>         - commit log change, no any code change

>         - add reported-by tag

> 

> 

>  drivers/scsi/scsi_lib.c    | 48 ++++++++++++++++++++++++++++++++++----

>  include/scsi/scsi_device.h |  1 +

>  2 files changed, 45 insertions(+), 4 deletions(-)

> 

> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

> index 7affaaf8b98e..4fb624744ae6 100644

> --- a/drivers/scsi/scsi_lib.c

> +++ b/drivers/scsi/scsi_lib.c

> @@ -549,10 +549,27 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)

>  static void scsi_run_queue_async(struct scsi_device *sdev)

>  {

>  	if (scsi_target(sdev)->single_lun ||

> -	    !list_empty(&sdev->host->starved_list))

> +	    !list_empty(&sdev->host->starved_list)) {

>  		kblockd_schedule_work(&sdev->requeue_work);

> -	else

> -		blk_mq_run_hw_queues(sdev->request_queue, true);

> +	} else {

> +		/*

> +		 * smp_mb() present in sbitmap_queue_clear() or implied in

> +		 * .end_io is for ordering writing .device_busy in

> +		 * scsi_device_unbusy() and reading sdev->restarts.

> +		 */

> +		int old = atomic_read(&sdev->restarts);

> +

> +		/*

> +		 * ->restarts has to be kept as non-zero if new budget

> +		 *  contention comes.

> +		 *

> +		 *  No need to run queue when either another re-run

> +		 *  queue wins in updating ->restarts or one new budget

> +		 *  contention comes.

> +		 */

> +		if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old)

> +			blk_mq_run_hw_queues(sdev->request_queue, true);

> +	}

>  }

>  

>  /* Returns false when no more bytes to process, true if there are more */

> @@ -1612,7 +1629,30 @@ static bool scsi_mq_get_budget(struct request_queue *q)

>  {

>  	struct scsi_device *sdev = q->queuedata;

>  

> -	return scsi_dev_queue_ready(q, sdev);

> +	if (scsi_dev_queue_ready(q, sdev))

> +		return true;

> +

> +	atomic_inc(&sdev->restarts);

> +

> +	/*

> +	 * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy).

> +	 * .restarts must be incremented before .device_busy is read because the

> +	 * code in scsi_run_queue_async() depends on the order of these operations.

> +	 */

> +	smp_mb__after_atomic();

> +

> +	/*

> +	 * If all in-flight requests originated from this LUN are completed

> +	 * before reading .device_busy, sdev->device_busy will be observed as

> +	 * zero, then blk_mq_delay_run_hw_queues() will dispatch this request

> +	 * soon. Otherwise, completion of one of these request will observe

> +	 * the .restarts flag, and the request queue will be run for handling

> +	 * this request, see scsi_end_request().

> +	 */

> +	if (unlikely(atomic_read(&sdev->device_busy) == 0 &&

> +				!scsi_device_blocked(sdev)))

> +		blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);

> +	return false;

>  }

>  

>  static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,

> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h

> index bc5909033d13..1a5c9a3df6d6 100644

> --- a/include/scsi/scsi_device.h

> +++ b/include/scsi/scsi_device.h

> @@ -109,6 +109,7 @@ struct scsi_device {

>  	atomic_t device_busy;		/* commands actually active on LLDD */

>  	atomic_t device_blocked;	/* Device returned QUEUE_FULL. */

>  

> +	atomic_t restarts;

>  	spinlock_t list_lock;

>  	struct list_head starved_entry;

>  	unsigned short queue_depth;	/* How deep of a queue we want */

> -- 

> 2.25.2

> 


Hello Martin,

Ping...


thanks,
Ming
Martin K. Petersen Sept. 16, 2020, 2:21 a.m. UTC | #2
Ming,

> Now the request queue is run in scsi_end_request() unconditionally if

> both target queue and host queue is ready. We should have re-run

> request queue only after this device queue becomes busy for restarting

> this LUN only.

>

> Recently Long Li reported that cost of run queue may be very heavy in

> case of high queue depth. So improve this situation by only running

> the request queue when this LUN is busy.


Applied to 5.10/scsi-staging, thanks!

-- 
Martin K. Petersen	Oracle Linux Engineering
Martin K. Petersen Sept. 22, 2020, 3:56 a.m. UTC | #3
On Thu, 10 Sep 2020 15:50:56 +0800, Ming Lei wrote:

> Now the request queue is run in scsi_end_request() unconditionally if both

> target queue and host queue is ready. We should have re-run request queue

> only after this device queue becomes busy for restarting this LUN only.

> 

> Recently Long Li reported that cost of run queue may be very heavy in

> case of high queue depth. So improve this situation by only running

> the request queue when this LUN is busy.


Applied to 5.10/scsi-queue, thanks!

[1/1] scsi: core: Only re-run queue in scsi_end_request() if device queue is busy
      https://git.kernel.org/mkp/scsi/c/ed5dd6a67d5e

-- 
Martin K. Petersen	Oracle Linux Engineering
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7affaaf8b98e..4fb624744ae6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -549,10 +549,27 @@  static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
 static void scsi_run_queue_async(struct scsi_device *sdev)
 {
 	if (scsi_target(sdev)->single_lun ||
-	    !list_empty(&sdev->host->starved_list))
+	    !list_empty(&sdev->host->starved_list)) {
 		kblockd_schedule_work(&sdev->requeue_work);
-	else
-		blk_mq_run_hw_queues(sdev->request_queue, true);
+	} else {
+		/*
+		 * smp_mb() present in sbitmap_queue_clear() or implied in
+		 * .end_io is for ordering writing .device_busy in
+		 * scsi_device_unbusy() and reading sdev->restarts.
+		 */
+		int old = atomic_read(&sdev->restarts);
+
+		/*
+		 * ->restarts has to be kept as non-zero if new budget
+		 *  contention comes.
+		 *
+		 *  No need to run queue when either another re-run
+		 *  queue wins in updating ->restarts or one new budget
+		 *  contention comes.
+		 */
+		if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old)
+			blk_mq_run_hw_queues(sdev->request_queue, true);
+	}
 }
 
 /* Returns false when no more bytes to process, true if there are more */
@@ -1612,7 +1629,30 @@  static bool scsi_mq_get_budget(struct request_queue *q)
 {
 	struct scsi_device *sdev = q->queuedata;
 
-	return scsi_dev_queue_ready(q, sdev);
+	if (scsi_dev_queue_ready(q, sdev))
+		return true;
+
+	atomic_inc(&sdev->restarts);
+
+	/*
+	 * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy).
+	 * .restarts must be incremented before .device_busy is read because the
+	 * code in scsi_run_queue_async() depends on the order of these operations.
+	 */
+	smp_mb__after_atomic();
+
+	/*
+	 * If all in-flight requests originated from this LUN are completed
+	 * before reading .device_busy, sdev->device_busy will be observed as
+	 * zero, then blk_mq_delay_run_hw_queues() will dispatch this request
+	 * soon. Otherwise, completion of one of these request will observe
+	 * the .restarts flag, and the request queue will be run for handling
+	 * this request, see scsi_end_request().
+	 */
+	if (unlikely(atomic_read(&sdev->device_busy) == 0 &&
+				!scsi_device_blocked(sdev)))
+		blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);
+	return false;
 }
 
 static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index bc5909033d13..1a5c9a3df6d6 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -109,6 +109,7 @@  struct scsi_device {
 	atomic_t device_busy;		/* commands actually active on LLDD */
 	atomic_t device_blocked;	/* Device returned QUEUE_FULL. */
 
+	atomic_t restarts;
 	spinlock_t list_lock;
 	struct list_head starved_entry;
 	unsigned short queue_depth;	/* How deep of a queue we want */