diff mbox series

[v5,03/23] scsi: hisi_sas: optimise the usage of hisi_hba.lock

Message ID 1497017796-105067-4-git-send-email-john.garry@huawei.com
State Superseded
Headers show
Series [v5,01/23] scsi: hisi_sas: fix timeout check in hisi_sas_internal_task_abort() | expand

Commit Message

John Garry June 9, 2017, 2:16 p.m. UTC
From: Xiang Chen <chenxiang66@hisilicon.com>


Currently hisi_hba.lock is locked to deliver and receive a
command to/from any hw queue. This causes much
contention at high data-rates.

To boost performance, lock on a per queue basis for
sending and receiving commands to/from hw.

Certain critical regions still need to be locked in the delivery
and completion stages with hisi_hba.lock.

New element hisi_sas_device.dq is added to store the delivery
queue for a device, so it does not need to be needlessly
re-calculated for every task.

Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>

Signed-off-by: John Garry <john.garry@huawei.com>

---
 drivers/scsi/hisi_sas/hisi_sas.h       |  9 ++---
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 61 +++++++++++++++++++++++-----------
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 23 +++++--------
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 34 +++++++++----------
 4 files changed, 71 insertions(+), 56 deletions(-)

-- 
1.9.1

Comments

kernel test robot June 10, 2017, 8:44 p.m. UTC | #1
Hi Xiang,

[auto build test WARNING on mkp-scsi/for-next]
[also build test WARNING on v4.12-rc4 next-20170609]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/John-Garry/hisi_sas-hip08-support/20170611-014437
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next


coccinelle warnings: (new ones prefixed by >>)

>> drivers/scsi/hisi_sas/hisi_sas_main.c:1208:1-18: ERROR: nested lock+irqsave that reuses flags from line 1178.


vim +1208 drivers/scsi/hisi_sas/hisi_sas_main.c

bf95e9ccc Xiang Chen 2017-06-09  1172  	if (rc) {
bf95e9ccc Xiang Chen 2017-06-09  1173  		spin_unlock_irqrestore(&hisi_hba->lock, flags);
441c27401 John Garry 2016-08-24  1174  		goto err_out;
bf95e9ccc Xiang Chen 2017-06-09  1175  	}
bf95e9ccc Xiang Chen 2017-06-09  1176  	spin_unlock_irqrestore(&hisi_hba->lock, flags);
bf95e9ccc Xiang Chen 2017-06-09  1177  
bf95e9ccc Xiang Chen 2017-06-09 @1178  	spin_lock_irqsave(&dq->lock, flags);
bf95e9ccc Xiang Chen 2017-06-09  1179  	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
441c27401 John Garry 2016-08-24  1180  	if (rc)
441c27401 John Garry 2016-08-24  1181  		goto err_out_tag;
441c27401 John Garry 2016-08-24  1182  
bf95e9ccc Xiang Chen 2017-06-09  1183  	dlvry_queue = dq->id;
bf95e9ccc Xiang Chen 2017-06-09  1184  	dlvry_queue_slot = dq->wr_point;
bf95e9ccc Xiang Chen 2017-06-09  1185  
441c27401 John Garry 2016-08-24  1186  	slot = &hisi_hba->slot_info[slot_idx];
441c27401 John Garry 2016-08-24  1187  	memset(slot, 0, sizeof(struct hisi_sas_slot));
441c27401 John Garry 2016-08-24  1188  
441c27401 John Garry 2016-08-24  1189  	slot->idx = slot_idx;
441c27401 John Garry 2016-08-24  1190  	slot->n_elem = n_elem;
441c27401 John Garry 2016-08-24  1191  	slot->dlvry_queue = dlvry_queue;
441c27401 John Garry 2016-08-24  1192  	slot->dlvry_queue_slot = dlvry_queue_slot;
441c27401 John Garry 2016-08-24  1193  	cmd_hdr_base = hisi_hba->cmd_hdr[dlvry_queue];
441c27401 John Garry 2016-08-24  1194  	slot->cmd_hdr = &cmd_hdr_base[dlvry_queue_slot];
441c27401 John Garry 2016-08-24  1195  	slot->task = task;
441c27401 John Garry 2016-08-24  1196  	slot->port = port;
441c27401 John Garry 2016-08-24  1197  	task->lldd_task = slot;
441c27401 John Garry 2016-08-24  1198  
441c27401 John Garry 2016-08-24  1199  	memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));
441c27401 John Garry 2016-08-24  1200  
441c27401 John Garry 2016-08-24  1201  	rc = hisi_sas_task_prep_abort(hisi_hba, slot, device_id,
441c27401 John Garry 2016-08-24  1202  				      abort_flag, task_tag);
441c27401 John Garry 2016-08-24  1203  	if (rc)
441c27401 John Garry 2016-08-24  1204  		goto err_out_tag;
441c27401 John Garry 2016-08-24  1205  
405314df5 John Garry 2017-03-23  1206  
405314df5 John Garry 2017-03-23  1207  	list_add_tail(&slot->entry, &sas_dev->list);
54c9dd2d2 John Garry 2017-03-23 @1208  	spin_lock_irqsave(&task->task_state_lock, flags);
441c27401 John Garry 2016-08-24  1209  	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
54c9dd2d2 John Garry 2017-03-23  1210  	spin_unlock_irqrestore(&task->task_state_lock, flags);
441c27401 John Garry 2016-08-24  1211  

:::::: The code at line 1208 was first introduced by commit
:::::: 54c9dd2d26d0951891516a956893428feb9aea17 scsi: hisi_sas: fix some sas_task.task_state_lock locking

:::::: TO: John Garry <john.garry@huawei.com>
:::::: CC: Martin K. Petersen <martin.petersen@oracle.com>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
John Garry June 12, 2017, 8:26 a.m. UTC | #2
On 10/06/2017 21:44, kbuild test robot wrote:
> Hi Xiang,

>

> [auto build test WARNING on mkp-scsi/for-next]

> [also build test WARNING on v4.12-rc4 next-20170609]

> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

>

> url:    https://github.com/0day-ci/linux/commits/John-Garry/hisi_sas-hip08-support/20170611-014437

> base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next

>

>

> coccinelle warnings: (new ones prefixed by >>)

>

>>> drivers/scsi/hisi_sas/hisi_sas_main.c:1208:1-18: ERROR: nested lock+irqsave that reuses flags from line 1178.

>

> vim +1208 drivers/scsi/hisi_sas/hisi_sas_main.c

>

> bf95e9ccc Xiang Chen 2017-06-09  1172  	if (rc) {

> bf95e9ccc Xiang Chen 2017-06-09  1173  		spin_unlock_irqrestore(&hisi_hba->lock, flags);

> 441c27401 John Garry 2016-08-24  1174  		goto err_out;

> bf95e9ccc Xiang Chen 2017-06-09  1175  	}

> bf95e9ccc Xiang Chen 2017-06-09  1176  	spin_unlock_irqrestore(&hisi_hba->lock, flags);

> bf95e9ccc Xiang Chen 2017-06-09  1177

> bf95e9ccc Xiang Chen 2017-06-09 @1178  	spin_lock_irqsave(&dq->lock, flags);

> bf95e9ccc Xiang Chen 2017-06-09  1179  	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);

> 441c27401 John Garry 2016-08-24  1180  	if (rc)

> 441c27401 John Garry 2016-08-24  1181  		goto err_out_tag;

> 441c27401 John Garry 2016-08-24  1182

> bf95e9ccc Xiang Chen 2017-06-09  1183  	dlvry_queue = dq->id;

> bf95e9ccc Xiang Chen 2017-06-09  1184  	dlvry_queue_slot = dq->wr_point;

> bf95e9ccc Xiang Chen 2017-06-09  1185

> 441c27401 John Garry 2016-08-24  1186  	slot = &hisi_hba->slot_info[slot_idx];

> 441c27401 John Garry 2016-08-24  1187  	memset(slot, 0, sizeof(struct hisi_sas_slot));

> 441c27401 John Garry 2016-08-24  1188

> 441c27401 John Garry 2016-08-24  1189  	slot->idx = slot_idx;

> 441c27401 John Garry 2016-08-24  1190  	slot->n_elem = n_elem;

> 441c27401 John Garry 2016-08-24  1191  	slot->dlvry_queue = dlvry_queue;

> 441c27401 John Garry 2016-08-24  1192  	slot->dlvry_queue_slot = dlvry_queue_slot;

> 441c27401 John Garry 2016-08-24  1193  	cmd_hdr_base = hisi_hba->cmd_hdr[dlvry_queue];

> 441c27401 John Garry 2016-08-24  1194  	slot->cmd_hdr = &cmd_hdr_base[dlvry_queue_slot];

> 441c27401 John Garry 2016-08-24  1195  	slot->task = task;

> 441c27401 John Garry 2016-08-24  1196  	slot->port = port;

> 441c27401 John Garry 2016-08-24  1197  	task->lldd_task = slot;

> 441c27401 John Garry 2016-08-24  1198

> 441c27401 John Garry 2016-08-24  1199  	memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));

> 441c27401 John Garry 2016-08-24  1200

> 441c27401 John Garry 2016-08-24  1201  	rc = hisi_sas_task_prep_abort(hisi_hba, slot, device_id,

> 441c27401 John Garry 2016-08-24  1202  				      abort_flag, task_tag);

> 441c27401 John Garry 2016-08-24  1203  	if (rc)

> 441c27401 John Garry 2016-08-24  1204  		goto err_out_tag;

> 441c27401 John Garry 2016-08-24  1205

> 405314df5 John Garry 2017-03-23  1206

> 405314df5 John Garry 2017-03-23  1207  	list_add_tail(&slot->entry, &sas_dev->list);

> 54c9dd2d2 John Garry 2017-03-23 @1208  	spin_lock_irqsave(&task->task_state_lock, flags);


I don't think that reusing flags variable is in error, as there would be 
no nested spinlock at this point within the function.

If it is not recommended or not permitted to reuse flags variable for 
separate spinlocks, then that can be changed - I don't know.

John

> 441c27401 John Garry 2016-08-24  1209  	task->task_state_flags |= SAS_TASK_AT_INITIATOR;

> 54c9dd2d2 John Garry 2017-03-23  1210  	spin_unlock_irqrestore(&task->task_state_lock, flags);

> 441c27401 John Garry 2016-08-24  1211

>

> :::::: The code at line 1208 was first introduced by commit

> :::::: 54c9dd2d26d0951891516a956893428feb9aea17 scsi: hisi_sas: fix some sas_task.task_state_lock locking

>

> :::::: TO: John Garry <john.garry@huawei.com>

> :::::: CC: Martin K. Petersen <martin.petersen@oracle.com>

>

> ---

> 0-DAY kernel test infrastructure                Open Source Technology Center

> https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

>

> .

>
Arnd Bergmann June 12, 2017, 9:45 a.m. UTC | #3
On Mon, Jun 12, 2017 at 10:26 AM, John Garry <john.garry@huawei.com> wrote:
> On 10/06/2017 21:44, kbuild test robot wrote:

>

>

> I don't think that reusing flags variable is in error, as there would be no

> nested spinlock at this point within the function.

>

> If it is not recommended or not permitted to reuse flags variable for

> separate spinlocks, then that can be changed - I don't know.


No, look again: coccinelle is right as the locks are nested in
https://github.com/0day-ci/linux/blob/bf95e9cccde4af4ed2012a6ec44d48b545d5ffed/drivers/scsi/hisi_sas/hisi_sas_main.c#L1208

dq->lock is already held and you acquire task->task_state_lock in
line 1208, which overwrites the flags.

      Arnd
John Garry June 12, 2017, 10:24 a.m. UTC | #4
On 12/06/2017 10:45, Arnd Bergmann wrote:
> On Mon, Jun 12, 2017 at 10:26 AM, John Garry <john.garry@huawei.com> wrote:

>> On 10/06/2017 21:44, kbuild test robot wrote:

>>

>>

>> I don't think that reusing flags variable is in error, as there would be no

>> nested spinlock at this point within the function.

>>

>> If it is not recommended or not permitted to reuse flags variable for

>> separate spinlocks, then that can be changed - I don't know.

>

> No, look again: coccinelle is right as the locks are nested in

> https://github.com/0day-ci/linux/blob/bf95e9cccde4af4ed2012a6ec44d48b545d5ffed/drivers/scsi/hisi_sas/hisi_sas_main.c#L1208

>

> dq->lock is already held and you acquire task->task_state_lock in

> line 1208, which overwrites the flags.

>

>       Arnd


Hi Arnd,

Ah, now I see. The error message mislead me, as I straight away checked 
hisi_sas_task_prep() which does the same locking and also reuses flags 
for spinlock_irqsave(), but it's safe here.

But, as you pointed out, the problem is in 
hisi_sas_internal_abort_task_exec().

A thought: it would be useful if coccinelle printed explicitly the 
function name which has the warning/error.

Thanks,
John

>

> .

>
diff mbox series

Patch

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index b4e96fa9..68ba7bd 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -102,6 +102,8 @@  struct hisi_sas_cq {
 
 struct hisi_sas_dq {
 	struct hisi_hba *hisi_hba;
+	struct hisi_sas_slot	*slot_prep;
+	spinlock_t lock;
 	int	wr_point;
 	int	id;
 };
@@ -109,6 +111,7 @@  struct hisi_sas_dq {
 struct hisi_sas_device {
 	struct hisi_hba		*hisi_hba;
 	struct domain_device	*sas_device;
+	struct hisi_sas_dq	*dq;
 	struct list_head	list;
 	u64 attached_phy;
 	atomic64_t running_req;
@@ -154,9 +157,8 @@  struct hisi_sas_hw {
 				struct domain_device *device);
 	struct hisi_sas_device *(*alloc_dev)(struct domain_device *device);
 	void (*sl_notify)(struct hisi_hba *hisi_hba, int phy_no);
-	int (*get_free_slot)(struct hisi_hba *hisi_hba, u32 dev_id,
-			int *q, int *s);
-	void (*start_delivery)(struct hisi_hba *hisi_hba);
+	int (*get_free_slot)(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq);
+	void (*start_delivery)(struct hisi_sas_dq *dq);
 	int (*prep_ssp)(struct hisi_hba *hisi_hba,
 			struct hisi_sas_slot *slot, int is_tmf,
 			struct hisi_sas_tmf_task *tmf);
@@ -217,7 +219,6 @@  struct hisi_hba {
 	struct hisi_sas_port port[HISI_SAS_MAX_PHYS];
 
 	int	queue_count;
-	struct hisi_sas_slot	*slot_prep;
 
 	struct dma_pool *sge_page_pool;
 	struct hisi_sas_device	devices[HISI_SAS_MAX_DEVICES];
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 54e0cf2..b247220 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -179,10 +179,11 @@  static void hisi_sas_slot_abort(struct work_struct *work)
 		task->task_done(task);
 }
 
-static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
-			      int is_tmf, struct hisi_sas_tmf_task *tmf,
-			      int *pass)
+static int hisi_sas_task_prep(struct sas_task *task, struct hisi_sas_dq
+		*dq, int is_tmf, struct hisi_sas_tmf_task *tmf,
+		int *pass)
 {
+	struct hisi_hba *hisi_hba = dq->hisi_hba;
 	struct domain_device *device = task->dev;
 	struct hisi_sas_device *sas_dev = device->lldd_dev;
 	struct hisi_sas_port *port;
@@ -240,18 +241,24 @@  static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
 	} else
 		n_elem = task->num_scatter;
 
+	spin_lock_irqsave(&hisi_hba->lock, flags);
 	if (hisi_hba->hw->slot_index_alloc)
 		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, &slot_idx,
 						    device);
 	else
 		rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
-	if (rc)
+	if (rc) {
+		spin_unlock_irqrestore(&hisi_hba->lock, flags);
 		goto err_out;
-	rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id,
-					&dlvry_queue, &dlvry_queue_slot);
+	}
+	spin_unlock_irqrestore(&hisi_hba->lock, flags);
+
+	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
 	if (rc)
 		goto err_out_tag;
 
+	dlvry_queue = dq->id;
+	dlvry_queue_slot = dq->wr_point;
 	slot = &hisi_hba->slot_info[slot_idx];
 	memset(slot, 0, sizeof(struct hisi_sas_slot));
 
@@ -316,7 +323,7 @@  static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
 	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-	hisi_hba->slot_prep = slot;
+	dq->slot_prep = slot;
 
 	atomic64_inc(&sas_dev->running_req);
 	++(*pass);
@@ -354,19 +361,22 @@  static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
 	unsigned long flags;
 	struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev);
 	struct device *dev = &hisi_hba->pdev->dev;
+	struct domain_device *device = task->dev;
+	struct hisi_sas_device *sas_dev = device->lldd_dev;
+	struct hisi_sas_dq *dq = sas_dev->dq;
 
 	if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)))
 		return -EINVAL;
 
 	/* protect task_prep and start_delivery sequence */
-	spin_lock_irqsave(&hisi_hba->lock, flags);
-	rc = hisi_sas_task_prep(task, hisi_hba, is_tmf, tmf, &pass);
+	spin_lock_irqsave(&dq->lock, flags);
+	rc = hisi_sas_task_prep(task, dq, is_tmf, tmf, &pass);
 	if (rc)
 		dev_err(dev, "task exec: failed[%d]!\n", rc);
 
 	if (likely(pass))
-		hisi_hba->hw->start_delivery(hisi_hba);
-	spin_unlock_irqrestore(&hisi_hba->lock, flags);
+		hisi_hba->hw->start_delivery(dq);
+	spin_unlock_irqrestore(&dq->lock, flags);
 
 	return rc;
 }
@@ -421,12 +431,16 @@  static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
 	spin_lock(&hisi_hba->lock);
 	for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) {
 		if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
+			int queue = i % hisi_hba->queue_count;
+			struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
+
 			hisi_hba->devices[i].device_id = i;
 			sas_dev = &hisi_hba->devices[i];
 			sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
 			sas_dev->dev_type = device->dev_type;
 			sas_dev->hisi_hba = hisi_hba;
 			sas_dev->sas_device = device;
+			sas_dev->dq = dq;
 			INIT_LIST_HEAD(&hisi_hba->devices[i].list);
 			break;
 		}
@@ -1140,6 +1154,7 @@  static int hisi_sas_query_task(struct sas_task *task)
 	struct hisi_sas_slot *slot;
 	struct asd_sas_port *sas_port = device->port;
 	struct hisi_sas_cmd_hdr *cmd_hdr_base;
+	struct hisi_sas_dq *dq = sas_dev->dq;
 	int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx;
 	unsigned long flags;
 
@@ -1152,14 +1167,22 @@  static int hisi_sas_query_task(struct sas_task *task)
 	port = to_hisi_sas_port(sas_port);
 
 	/* simply get a slot and send abort command */
+	spin_lock_irqsave(&hisi_hba->lock, flags);
 	rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
-	if (rc)
+	if (rc) {
+		spin_unlock_irqrestore(&hisi_hba->lock, flags);
 		goto err_out;
-	rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id,
-					&dlvry_queue, &dlvry_queue_slot);
+	}
+	spin_unlock_irqrestore(&hisi_hba->lock, flags);
+
+	spin_lock_irqsave(&dq->lock, flags);
+	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
 	if (rc)
 		goto err_out_tag;
 
+	dlvry_queue = dq->id;
+	dlvry_queue_slot = dq->wr_point;
+
 	slot = &hisi_hba->slot_info[slot_idx];
 	memset(slot, 0, sizeof(struct hisi_sas_slot));
 
@@ -1186,18 +1209,20 @@  static int hisi_sas_query_task(struct sas_task *task)
 	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-	hisi_hba->slot_prep = slot;
+	dq->slot_prep = slot;
 
 	atomic64_inc(&sas_dev->running_req);
 
 	/* send abort command to our chip */
-	hisi_hba->hw->start_delivery(hisi_hba);
+	hisi_hba->hw->start_delivery(dq);
+	spin_unlock_irqrestore(&dq->lock, flags);
 
 	return 0;
 
 err_out_tag:
 	hisi_sas_slot_index_free(hisi_hba, slot_idx);
 err_out:
+	spin_unlock_irqrestore(&dq->lock, flags);
 	dev_err(dev, "internal abort task prep: failed[%d]!\n", rc);
 
 	return rc;
@@ -1221,7 +1246,6 @@  static int hisi_sas_query_task(struct sas_task *task)
 	struct hisi_sas_device *sas_dev = device->lldd_dev;
 	struct device *dev = &hisi_hba->pdev->dev;
 	int res;
-	unsigned long flags;
 
 	if (!hisi_hba->hw->prep_abort)
 		return -EOPNOTSUPP;
@@ -1238,11 +1262,8 @@  static int hisi_sas_query_task(struct sas_task *task)
 	task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110);
 	add_timer(&task->slow_task->timer);
 
-	/* Lock as we are alloc'ing a slot, which cannot be interrupted */
-	spin_lock_irqsave(&hisi_hba->lock, flags);
 	res = hisi_sas_internal_abort_task_exec(hisi_hba, sas_dev->device_id,
 						task, abort_flag, tag);
-	spin_unlock_irqrestore(&hisi_hba->lock, flags);
 	if (res) {
 		del_timer(&task->slow_task->timer);
 		dev_err(dev, "internal task abort: executing internal task failed: %d\n",
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index fc1c1b2..7d7d2a7 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -900,22 +900,17 @@  static int get_wideport_bitmap_v1_hw(struct hisi_hba *hisi_hba, int port_id)
 	return bitmap;
 }
 
-/**
- * This function allocates across all queues to load balance.
- * Slots are allocated from queues in a round-robin fashion.
- *
+/*
  * The callpath to this function and upto writing the write
  * queue pointer should be safe from interruption.
  */
-static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id,
-				int *q, int *s)
+static int
+get_free_slot_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
 {
 	struct device *dev = &hisi_hba->pdev->dev;
-	struct hisi_sas_dq *dq;
+	int queue = dq->id;
 	u32 r, w;
-	int queue = dev_id % hisi_hba->queue_count;
 
-	dq = &hisi_hba->dq[queue];
 	w = dq->wr_point;
 	r = hisi_sas_read32_relaxed(hisi_hba,
 				DLVRY_Q_0_RD_PTR + (queue * 0x14));
@@ -924,16 +919,14 @@  static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id,
 		return -EAGAIN;
 	}
 
-	*q = queue;
-	*s = w;
 	return 0;
 }
 
-static void start_delivery_v1_hw(struct hisi_hba *hisi_hba)
+static void start_delivery_v1_hw(struct hisi_sas_dq *dq)
 {
-	int dlvry_queue = hisi_hba->slot_prep->dlvry_queue;
-	int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot;
-	struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue];
+	struct hisi_hba *hisi_hba = dq->hisi_hba;
+	int dlvry_queue = dq->slot_prep->dlvry_queue;
+	int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;
 
 	dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
 	hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index e241921..2607aac 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -695,6 +695,9 @@  hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
 		if (sata_dev && (i & 1))
 			continue;
 		if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
+			int queue = i % hisi_hba->queue_count;
+			struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
+
 			hisi_hba->devices[i].device_id = i;
 			sas_dev = &hisi_hba->devices[i];
 			sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
@@ -702,6 +705,7 @@  hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
 			sas_dev->hisi_hba = hisi_hba;
 			sas_dev->sas_device = device;
 			sas_dev->sata_idx = sata_idx;
+			sas_dev->dq = dq;
 			INIT_LIST_HEAD(&hisi_hba->devices[i].list);
 			break;
 		}
@@ -1454,22 +1458,17 @@  static int get_wideport_bitmap_v2_hw(struct hisi_hba *hisi_hba, int port_id)
 	return bitmap;
 }
 
-/**
- * This function allocates across all queues to load balance.
- * Slots are allocated from queues in a round-robin fashion.
- *
+/*
  * The callpath to this function and upto writing the write
  * queue pointer should be safe from interruption.
  */
-static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id,
-				int *q, int *s)
+static int
+get_free_slot_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
 {
 	struct device *dev = &hisi_hba->pdev->dev;
-	struct hisi_sas_dq *dq;
+	int queue = dq->id;
 	u32 r, w;
-	int queue = dev_id % hisi_hba->queue_count;
 
-	dq = &hisi_hba->dq[queue];
 	w = dq->wr_point;
 	r = hisi_sas_read32_relaxed(hisi_hba,
 				DLVRY_Q_0_RD_PTR + (queue * 0x14));
@@ -1479,16 +1478,14 @@  static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id,
 		return -EAGAIN;
 	}
 
-	*q = queue;
-	*s = w;
 	return 0;
 }
 
-static void start_delivery_v2_hw(struct hisi_hba *hisi_hba)
+static void start_delivery_v2_hw(struct hisi_sas_dq *dq)
 {
-	int dlvry_queue = hisi_hba->slot_prep->dlvry_queue;
-	int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot;
-	struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue];
+	struct hisi_hba *hisi_hba = dq->hisi_hba;
+	int dlvry_queue = dq->slot_prep->dlvry_queue;
+	int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;
 
 	dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
 	hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),
@@ -2344,7 +2341,9 @@  static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
 	spin_lock_irqsave(&task->task_state_lock, flags);
 	task->task_state_flags |= SAS_TASK_STATE_DONE;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	spin_lock_irqsave(&hisi_hba->lock, flags);
 	hisi_sas_slot_task_free(hisi_hba, task, slot);
+	spin_unlock_irqrestore(&hisi_hba->lock, flags);
 	sts = ts->stat;
 
 	if (task->task_done)
@@ -3162,13 +3161,14 @@  static void cq_tasklet_v2_hw(unsigned long val)
 	struct hisi_sas_complete_v2_hdr *complete_queue;
 	u32 rd_point = cq->rd_point, wr_point, dev_id;
 	int queue = cq->id;
+	struct hisi_sas_dq *dq = &hisi_hba->dq[queue];
 
 	if (unlikely(hisi_hba->reject_stp_links_msk))
 		phys_try_accept_stp_links_v2_hw(hisi_hba);
 
 	complete_queue = hisi_hba->complete_hdr[queue];
 
-	spin_lock(&hisi_hba->lock);
+	spin_lock(&dq->lock);
 	wr_point = hisi_sas_read32(hisi_hba, COMPL_Q_0_WR_PTR +
 				   (0x14 * queue));
 
@@ -3218,7 +3218,7 @@  static void cq_tasklet_v2_hw(unsigned long val)
 	/* update rd_point */
 	cq->rd_point = rd_point;
 	hisi_sas_write32(hisi_hba, COMPL_Q_0_RD_PTR + (0x14 * queue), rd_point);
-	spin_unlock(&hisi_hba->lock);
+	spin_unlock(&dq->lock);
 }
 
 static irqreturn_t cq_interrupt_v2_hw(int irq_no, void *p)