diff mbox series

[v1,3/3] scsi_debug: iouring iopoll support

Message ID 20201015133721.63476-1-kashyap.desai@broadcom.com
State Superseded
Headers show
Series [v1,1/3] add io_uring with IOPOLL support in scsi layer | expand

Commit Message

Kashyap Desai Oct. 15, 2020, 1:37 p.m. UTC
Add support of iouring iopoll interface in scsi_debug.
This feature requires shared hosttag support in kernel and driver.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: dgilbert@interlog.com
Cc: linux-block@vger.kernel.org
---
 drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

Comments

Kashyap Desai Nov. 13, 2020, 10:50 a.m. UTC | #1
> -----Original Message-----

> From: Kashyap Desai [mailto:kashyap.desai@broadcom.com]

> Sent: Thursday, October 15, 2020 7:07 PM

> To: linux-scsi@vger.kernel.org

> Cc: Kashyap Desai <kashyap.desai@broadcom.com>; dgilbert@interlog.com;

> linux-block@vger.kernel.org

> Subject: [PATCH v1 3/3] scsi_debug: iouring iopoll support

>

> Add support of iouring iopoll interface in scsi_debug.

> This feature requires shared hosttag support in kernel and driver.

>

> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>

> Cc: dgilbert@interlog.com

> Cc: linux-block@vger.kernel.org

> ---

>  drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++

>  1 file changed, 123 insertions(+)



Hi Doug - Any comment/feedback ?

Kashyap
Douglas Gilbert Nov. 15, 2020, 4:04 p.m. UTC | #2
On 2020-11-13 5:50 a.m., Kashyap Desai wrote:
>> -----Original Message-----

>> From: Kashyap Desai [mailto:kashyap.desai@broadcom.com]

>> Sent: Thursday, October 15, 2020 7:07 PM

>> To: linux-scsi@vger.kernel.org

>> Cc: Kashyap Desai <kashyap.desai@broadcom.com>; dgilbert@interlog.com;

>> linux-block@vger.kernel.org

>> Subject: [PATCH v1 3/3] scsi_debug: iouring iopoll support

>>

>> Add support of iouring iopoll interface in scsi_debug.

>> This feature requires shared hosttag support in kernel and driver.

>>

>> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>

>> Cc: dgilbert@interlog.com

>> Cc: linux-block@vger.kernel.org

>> ---

>>   drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++

>>   1 file changed, 123 insertions(+)

> 

> 

> Hi Doug - Any comment/feedback ?


Hi,
I'm testing this patch and have found an issue which I have taken up
directly with Kashyap. More to follow.

Doug Gilbert
Douglas Gilbert Nov. 24, 2020, 4:38 p.m. UTC | #3
On 2020-10-15 9:37 a.m., Kashyap Desai wrote:
> Add support of iouring iopoll interface in scsi_debug.

> This feature requires shared hosttag support in kernel and driver.


I am continuing to test this patch. There is one fix shown inline below
plus a question near the end.

Doug Gilbert

> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>

> Cc: dgilbert@interlog.com

> Cc: linux-block@vger.kernel.org

> ---

>   drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++

>   1 file changed, 123 insertions(+)

> 

> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c

> index a87e40aec11f..4d9cc6af588c 100644

> --- a/drivers/scsi/scsi_debug.c

> +++ b/drivers/scsi/scsi_debug.c

> @@ -826,6 +826,7 @@ static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;

>   static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;

>   

>   static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */

> +static int poll_queues; /* iouring iopoll interface.*/

>   static struct sdebug_queue *sdebug_q_arr;  /* ptr to array of submit queues */

>   

>   static DEFINE_RWLOCK(atomic_rw);

> @@ -5422,6 +5423,14 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,

>   	cmnd->host_scribble = (unsigned char *)sqcp;

>   	sd_dp = sqcp->sd_dp;

>   	spin_unlock_irqrestore(&sqp->qc_lock, iflags);

> +

> +	/* Do not complete IO from default completion path.

> +	 * Let it to be on queue.

> +	 * Completion should happen from mq_poll interface.

> +	 */

> +	if ((sqp - sdebug_q_arr) >= (submit_queues - poll_queues))

> +		return 0;

> +

>   	if (!sd_dp) {

>   		sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);

>   		if (!sd_dp) {

> @@ -5604,6 +5613,7 @@ module_param_named(sector_size, sdebug_sector_size, int, S_IRUGO);

>   module_param_named(statistics, sdebug_statistics, bool, S_IRUGO | S_IWUSR);

>   module_param_named(strict, sdebug_strict, bool, S_IRUGO | S_IWUSR);

>   module_param_named(submit_queues, submit_queues, int, S_IRUGO);

> +module_param_named(poll_queues, poll_queues, int, S_IRUGO);

>   module_param_named(tur_ms_to_ready, sdeb_tur_ms_to_ready, int, S_IRUGO);

>   module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);

>   module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);

> @@ -5673,6 +5683,7 @@ MODULE_PARM_DESC(sector_size, "logical block size in bytes (def=512)");

>   MODULE_PARM_DESC(statistics, "collect statistics on commands, queues (def=0)");

>   MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");

>   MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");

> +MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");

>   MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial good status (def=0)");

>   MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)");

>   MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");

> @@ -7140,6 +7151,104 @@ static int resp_not_ready(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)

>   	return check_condition_result;

>   }

>   

> +static int sdebug_map_queues(struct Scsi_Host *shost)

> +{

> +	int i, qoff;

> +

> +	if (shost->nr_hw_queues == 1)

> +		return 0;

> +

> +	for (i = 0, qoff = 0; i < HCTX_MAX_TYPES; i++) {

> +		struct blk_mq_queue_map *map = &shost->tag_set.map[i];

> +

> +		map->nr_queues  = 0;

> +

> +		if (i == HCTX_TYPE_DEFAULT)

> +			map->nr_queues = submit_queues - poll_queues;

> +		else if (i == HCTX_TYPE_POLL)

> +			map->nr_queues = poll_queues;

> +

> +		if (!map->nr_queues) {

> +			BUG_ON(i == HCTX_TYPE_DEFAULT);

> +			continue;

> +		}

> +

> +		map->queue_offset = qoff;

> +		blk_mq_map_queues(map);

> +

> +		qoff += map->nr_queues;

> +	}

> +

> +	return 0;

> +

> +}

> +

> +static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)

> +{

> +	int qc_idx;

> +	int retiring = 0;

> +	unsigned long iflags;

> +	struct sdebug_queue *sqp;

> +	struct sdebug_queued_cmd *sqcp;

> +	struct scsi_cmnd *scp;

> +	struct sdebug_dev_info *devip;

> +	int num_entries = 0;

> +

> +	sqp = sdebug_q_arr + queue_num;

> +

> +	do {

> +		spin_lock_irqsave(&sqp->qc_lock, iflags);

> +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);

> +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))


The above line IMO needs to be:
		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))

If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no request waiting
then "scp is NULL, ..." is reported suggesting there is an error.

> +			goto out;

> +

> +		sqcp = &sqp->qc_arr[qc_idx];

> +		scp = sqcp->a_cmnd;

> +		if (unlikely(scp == NULL)) {

> +			pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",

> +			       queue_num, qc_idx, __func__);

> +			goto out;

> +		}

> +		devip = (struct sdebug_dev_info *)scp->device->hostdata;

> +		if (likely(devip))

> +			atomic_dec(&devip->num_in_q);

> +		else

> +			pr_err("devip=NULL from %s\n", __func__);

> +		if (unlikely(atomic_read(&retired_max_queue) > 0))

> +			retiring = 1;

> +

> +		sqcp->a_cmnd = NULL;

> +		if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {

> +			pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",

> +				sqp, queue_num, qc_idx, __func__);

> +			goto out;

> +		}

> +

> +		if (unlikely(retiring)) {	/* user has reduced max_queue */

> +			int k, retval;

> +

> +			retval = atomic_read(&retired_max_queue);

> +			if (qc_idx >= retval) {

> +				pr_err("index %d too large\n", retval);

> +				goto out;

> +			}

> +			k = find_last_bit(sqp->in_use_bm, retval);

> +			if ((k < sdebug_max_queue) || (k == retval))

> +				atomic_set(&retired_max_queue, 0);

> +			else

> +				atomic_set(&retired_max_queue, k + 1);

> +		}

> +		spin_unlock_irqrestore(&sqp->qc_lock, iflags);

> +		scp->scsi_done(scp); /* callback to mid level */

> +		num_entries++;

> +	} while (1);

> +

> +out:

> +	spin_unlock_irqrestore(&sqp->qc_lock, iflags);

> +	return num_entries;

> +}

> +

> +

>   static int scsi_debug_queuecommand(struct Scsi_Host *shost,

>   				   struct scsi_cmnd *scp)

>   {

> @@ -7318,6 +7427,8 @@ static struct scsi_host_template sdebug_driver_template = {

>   	.ioctl =		scsi_debug_ioctl,

>   	.queuecommand =		scsi_debug_queuecommand,

>   	.change_queue_depth =	sdebug_change_qdepth,

> +	.map_queues =		sdebug_map_queues,

> +	.mq_poll =		sdebug_blk_mq_poll,

>   	.eh_abort_handler =	scsi_debug_abort,

>   	.eh_device_reset_handler = scsi_debug_device_reset,

>   	.eh_target_reset_handler = scsi_debug_target_reset,

> @@ -7365,6 +7476,18 @@ static int sdebug_driver_probe(struct device *dev)

>   	if (sdebug_host_max_queue)

>   		hpnt->host_tagset = 1;

>   

> +	/* poll queues are possible for nr_hw_queues > 1 */

> +	if (hpnt->nr_hw_queues == 1)

> +		poll_queues = 0;

> +

> +	/* poll queues  */

> +	if (poll_queues >= submit_queues) {


So the above line rules out poll_queues == submit_queues; is that the
intention? If so, a short explanation of why in a comment would be
helpful. Helpful at least to me who would like to document this option.

> +		pr_warn("%s: trim poll_queues to 1\n", my_name);

> +		poll_queues = 1;

> +	}

> +	if (poll_queues)

> +		hpnt->nr_maps = 3;

> +

>   	sdbg_host->shost = hpnt;

>   	*((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host;

>   	if ((hpnt->this_id >= 0) && (sdebug_num_tgts > hpnt->this_id))

>
Kashyap Desai Nov. 30, 2020, 9:06 a.m. UTC | #4
>

> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:

> > Add support of iouring iopoll interface in scsi_debug.

> > This feature requires shared hosttag support in kernel and driver.

>

> I am continuing to test this patch. There is one fix shown inline below

> plus a

> question near the end.


Hi Doug,  I have created add-on patch which includes all your comment. I am
also able to see the issue you reported and below patch fix it.
I will hold V2 revision of the series and I will wait for your Review-by and
Tested-by Tag.

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 4d9cc6af588c..fb328253086d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -5675,6 +5675,7 @@ MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer
length granularity exponent
 MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout,
8->recovered_err... (def=0)");
 MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get
new store (def=0)");
 MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to
max(submit_queues - 1)");
 MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
 MODULE_PARM_DESC(random, "If set, uniformly randomize command duration
between 0 and delay_in_ns");
 MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");
@@ -5683,7 +5684,6 @@ MODULE_PARM_DESC(sector_size, "logical block size in
bytes (def=512)");
 MODULE_PARM_DESC(statistics, "collect statistics on commands, queues
(def=0)");
 MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
 MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
-MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
 MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial
good status (def=0)");
 MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba
(def=0)");
 MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in
blocks (def=1)");
@@ -7199,7 +7199,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost,
unsigned int queue_num)
        do {
                spin_lock_irqsave(&sqp->qc_lock, iflags);
                qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
-               if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
+               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
                        goto out;

                sqcp = &sqp->qc_arr[qc_idx];
@@ -7477,10 +7477,17 @@ static int sdebug_driver_probe(struct device *dev)
                hpnt->host_tagset = 1;

        /* poll queues are possible for nr_hw_queues > 1 */
-       if (hpnt->nr_hw_queues == 1)
+       if (hpnt->nr_hw_queues == 1 || (poll_queues < 1)) {
+               pr_warn("%s: trim poll_queues to 0. poll_q/nr_hw = (%d/%d)
\n",
+                        my_name, poll_queues, hpnt->nr_hw_queues);
                poll_queues = 0;
+       }

-       /* poll queues  */
+       /*
+        * Poll queues don't need interrupts, but we need at least one I/O
queue
+        * left over for non-polled I/O.
+        * If condition not met, trim poll_queues to 1 (just for
simplicity).
+        */
        if (poll_queues >= submit_queues) {
                pr_warn("%s: trim poll_queues to 1\n", my_name);
                poll_queues = 1;


> > +	do {

> > +		spin_lock_irqsave(&sqp->qc_lock, iflags);

> > +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);

> > +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))

>

> The above line IMO needs to be:

> 		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))

>

> If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no

> request waiting then "scp is NULL, ..." is reported suggesting there is an

> error.


BTW -  Is below piece of code at sdebug_q_cmd_complete() requires similar
change ?
Use sdebug_max_queue instead of SDEBUG_CANQUEUE
        if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
                pr_err("wild qc_idx=%d\n", qc_idx);
                return;
        }

Kashyap
Douglas Gilbert Nov. 30, 2020, 7:42 p.m. UTC | #5
On 2020-11-30 4:06 a.m., Kashyap Desai wrote:
>>

>> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:

>>> Add support of iouring iopoll interface in scsi_debug.

>>> This feature requires shared hosttag support in kernel and driver.

>>

>> I am continuing to test this patch. There is one fix shown inline below

>> plus a

>> question near the end.

> 

> Hi Doug,  I have created add-on patch which includes all your comment. I am

> also able to see the issue you reported and below patch fix it.

> I will hold V2 revision of the series and I will wait for your Review-by and

> Tested-by Tag.

> 

> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c

> index 4d9cc6af588c..fb328253086d 100644

> --- a/drivers/scsi/scsi_debug.c

> +++ b/drivers/scsi/scsi_debug.c

> @@ -5675,6 +5675,7 @@ MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer

> length granularity exponent

>   MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout,

> 8->recovered_err... (def=0)");

>   MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get

> new store (def=0)");

>   MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");

> +MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to

> max(submit_queues - 1)");

>   MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");

>   MODULE_PARM_DESC(random, "If set, uniformly randomize command duration

> between 0 and delay_in_ns");

>   MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");

> @@ -5683,7 +5684,6 @@ MODULE_PARM_DESC(sector_size, "logical block size in

> bytes (def=512)");

>   MODULE_PARM_DESC(statistics, "collect statistics on commands, queues

> (def=0)");

>   MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");

>   MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");

> -MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");

>   MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial

> good status (def=0)");

>   MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba

> (def=0)");

>   MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in

> blocks (def=1)");

> @@ -7199,7 +7199,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost,

> unsigned int queue_num)

>          do {

>                  spin_lock_irqsave(&sqp->qc_lock, iflags);

>                  qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);

> -               if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))

> +               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))

>                          goto out;

> 

>                  sqcp = &sqp->qc_arr[qc_idx];

> @@ -7477,10 +7477,17 @@ static int sdebug_driver_probe(struct device *dev)

>                  hpnt->host_tagset = 1;

> 

>          /* poll queues are possible for nr_hw_queues > 1 */

> -       if (hpnt->nr_hw_queues == 1)

> +       if (hpnt->nr_hw_queues == 1 || (poll_queues < 1)) {

> +               pr_warn("%s: trim poll_queues to 0. poll_q/nr_hw = (%d/%d)

> \n",

> +                        my_name, poll_queues, hpnt->nr_hw_queues);

>                  poll_queues = 0;

> +       }

> 

> -       /* poll queues  */

> +       /*

> +        * Poll queues don't need interrupts, but we need at least one I/O

> queue

> +        * left over for non-polled I/O.

> +        * If condition not met, trim poll_queues to 1 (just for

> simplicity).

> +        */

>          if (poll_queues >= submit_queues) {

>                  pr_warn("%s: trim poll_queues to 1\n", my_name);

>                  poll_queues = 1;

> 

>


Kashyap,
I struggled with this patch, first the line wrap, then the last two
patch segments not applying. Could you send me the scsi_debug.c file
attached to an email?

>>> +	do {

>>> +		spin_lock_irqsave(&sqp->qc_lock, iflags);

>>> +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);

>>> +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))

>>

>> The above line IMO needs to be:

>> 		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))

>>

>> If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no

>> request waiting then "scp is NULL, ..." is reported suggesting there is an

>> error.

> 

> BTW -  Is below piece of code at sdebug_q_cmd_complete() requires similar

> change ?

> Use sdebug_max_queue instead of SDEBUG_CANQUEUE

>          if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {

>                  pr_err("wild qc_idx=%d\n", qc_idx);

>                  return;

>          }


Yes, I need to look at this. sdebug_max_queue is initialized to
SDEBUG_CANQUEUE but then can be overridden by the invocation line parameters.
Several arrays in structures are sized by SDEBUG_CANQUEUE which will
remain. But most SDEBUG_CANQUEUE uses inside driver functions can probably
be replaced by sdebug_max_queue when I confirm that it is safe. Since
sdebug_max_queue <= SDEBUG_CANQUEUE and the fields in between should
always be zero, the current situation just leads to wasted cycles.

Doug Gilbert
Douglas Gilbert Dec. 1, 2020, 4:56 p.m. UTC | #6
On 2020-11-30 4:06 a.m., Kashyap Desai wrote:
>>

>> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:

>>> Add support of iouring iopoll interface in scsi_debug.

>>> This feature requires shared hosttag support in kernel and driver.

>>

>> I am continuing to test this patch. There is one fix shown inline below

>> plus a

>> question near the end.

> 

> Hi Doug,  I have created add-on patch which includes all your comment. I am

> also able to see the issue you reported and below patch fix it.

> I will hold V2 revision of the series and I will wait for your Review-by and

> Tested-by Tag.


Thanks, that is a good explanation of why poll_queues must be less than
submit_queues.

Acked-by: Douglas Gilbert <dgilbert@interlog.com>

Tested-by: Douglas Gilbert <dgilbert@interlog.com>
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a87e40aec11f..4d9cc6af588c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -826,6 +826,7 @@  static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;
 static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
 
 static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */
+static int poll_queues; /* iouring iopoll interface.*/
 static struct sdebug_queue *sdebug_q_arr;  /* ptr to array of submit queues */
 
 static DEFINE_RWLOCK(atomic_rw);
@@ -5422,6 +5423,14 @@  static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	cmnd->host_scribble = (unsigned char *)sqcp;
 	sd_dp = sqcp->sd_dp;
 	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+
+	/* Do not complete IO from default completion path.
+	 * Let it to be on queue.
+	 * Completion should happen from mq_poll interface.
+	 */
+	if ((sqp - sdebug_q_arr) >= (submit_queues - poll_queues))
+		return 0;
+
 	if (!sd_dp) {
 		sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
 		if (!sd_dp) {
@@ -5604,6 +5613,7 @@  module_param_named(sector_size, sdebug_sector_size, int, S_IRUGO);
 module_param_named(statistics, sdebug_statistics, bool, S_IRUGO | S_IWUSR);
 module_param_named(strict, sdebug_strict, bool, S_IRUGO | S_IWUSR);
 module_param_named(submit_queues, submit_queues, int, S_IRUGO);
+module_param_named(poll_queues, poll_queues, int, S_IRUGO);
 module_param_named(tur_ms_to_ready, sdeb_tur_ms_to_ready, int, S_IRUGO);
 module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);
 module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);
@@ -5673,6 +5683,7 @@  MODULE_PARM_DESC(sector_size, "logical block size in bytes (def=512)");
 MODULE_PARM_DESC(statistics, "collect statistics on commands, queues (def=0)");
 MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
 MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
 MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial good status (def=0)");
 MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)");
 MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");
@@ -7140,6 +7151,104 @@  static int resp_not_ready(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	return check_condition_result;
 }
 
+static int sdebug_map_queues(struct Scsi_Host *shost)
+{
+	int i, qoff;
+
+	if (shost->nr_hw_queues == 1)
+		return 0;
+
+	for (i = 0, qoff = 0; i < HCTX_MAX_TYPES; i++) {
+		struct blk_mq_queue_map *map = &shost->tag_set.map[i];
+
+		map->nr_queues  = 0;
+
+		if (i == HCTX_TYPE_DEFAULT)
+			map->nr_queues = submit_queues - poll_queues;
+		else if (i == HCTX_TYPE_POLL)
+			map->nr_queues = poll_queues;
+
+		if (!map->nr_queues) {
+			BUG_ON(i == HCTX_TYPE_DEFAULT);
+			continue;
+		}
+
+		map->queue_offset = qoff;
+		blk_mq_map_queues(map);
+
+		qoff += map->nr_queues;
+	}
+
+	return 0;
+
+}
+
+static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+	int qc_idx;
+	int retiring = 0;
+	unsigned long iflags;
+	struct sdebug_queue *sqp;
+	struct sdebug_queued_cmd *sqcp;
+	struct scsi_cmnd *scp;
+	struct sdebug_dev_info *devip;
+	int num_entries = 0;
+
+	sqp = sdebug_q_arr + queue_num;
+
+	do {
+		spin_lock_irqsave(&sqp->qc_lock, iflags);
+		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
+		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
+			goto out;
+
+		sqcp = &sqp->qc_arr[qc_idx];
+		scp = sqcp->a_cmnd;
+		if (unlikely(scp == NULL)) {
+			pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",
+			       queue_num, qc_idx, __func__);
+			goto out;
+		}
+		devip = (struct sdebug_dev_info *)scp->device->hostdata;
+		if (likely(devip))
+			atomic_dec(&devip->num_in_q);
+		else
+			pr_err("devip=NULL from %s\n", __func__);
+		if (unlikely(atomic_read(&retired_max_queue) > 0))
+			retiring = 1;
+
+		sqcp->a_cmnd = NULL;
+		if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
+			pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",
+				sqp, queue_num, qc_idx, __func__);
+			goto out;
+		}
+
+		if (unlikely(retiring)) {	/* user has reduced max_queue */
+			int k, retval;
+
+			retval = atomic_read(&retired_max_queue);
+			if (qc_idx >= retval) {
+				pr_err("index %d too large\n", retval);
+				goto out;
+			}
+			k = find_last_bit(sqp->in_use_bm, retval);
+			if ((k < sdebug_max_queue) || (k == retval))
+				atomic_set(&retired_max_queue, 0);
+			else
+				atomic_set(&retired_max_queue, k + 1);
+		}
+		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+		scp->scsi_done(scp); /* callback to mid level */
+		num_entries++;
+	} while (1);
+
+out:
+	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+	return num_entries;
+}
+
+
 static int scsi_debug_queuecommand(struct Scsi_Host *shost,
 				   struct scsi_cmnd *scp)
 {
@@ -7318,6 +7427,8 @@  static struct scsi_host_template sdebug_driver_template = {
 	.ioctl =		scsi_debug_ioctl,
 	.queuecommand =		scsi_debug_queuecommand,
 	.change_queue_depth =	sdebug_change_qdepth,
+	.map_queues =		sdebug_map_queues,
+	.mq_poll =		sdebug_blk_mq_poll,
 	.eh_abort_handler =	scsi_debug_abort,
 	.eh_device_reset_handler = scsi_debug_device_reset,
 	.eh_target_reset_handler = scsi_debug_target_reset,
@@ -7365,6 +7476,18 @@  static int sdebug_driver_probe(struct device *dev)
 	if (sdebug_host_max_queue)
 		hpnt->host_tagset = 1;
 
+	/* poll queues are possible for nr_hw_queues > 1 */
+	if (hpnt->nr_hw_queues == 1)
+		poll_queues = 0;
+
+	/* poll queues  */
+	if (poll_queues >= submit_queues) {
+		pr_warn("%s: trim poll_queues to 1\n", my_name);
+		poll_queues = 1;
+	}
+	if (poll_queues)
+		hpnt->nr_maps = 3;
+
 	sdbg_host->shost = hpnt;
 	*((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host;
 	if ((hpnt->this_id >= 0) && (sdebug_num_tgts > hpnt->this_id))