diff mbox series

[11/13] target: replace work per cmd in completion path

Message ID 20210210045516.11180-12-michael.christie@oracle.com
State New
Headers show
Series [01/13] target: move t_task_cdb initialization | expand

Commit Message

Mike Christie Feb. 10, 2021, 4:55 a.m. UTC
Doing a work per cmd can lead to lots of threads being created.
This patch just replaces the completion work per cmd with a per cpu
list. Combined with the first patches this allows tcm loop on top of
initiators like iser to go from around 700K IOPs to 1000K and reduces
the number of threads that get created when the system is under heavy
load and hitting the initiator drivers tagging limits.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/target/target_core_device.c    |   3 +
 drivers/target/target_core_internal.h  |   1 +
 drivers/target/target_core_transport.c | 103 ++++++++++++++-----------
 include/target/target_core_base.h      |   1 +
 4 files changed, 65 insertions(+), 43 deletions(-)
diff mbox series

Patch

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 74d3a4896588..eaa2323843c0 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -743,6 +743,9 @@  struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 
 		init_llist_head(&q->sq.cmd_list);
 		INIT_WORK(&q->sq.work, target_queued_submit_work);
+
+		init_llist_head(&q->cq.cmd_list);
+		INIT_WORK(&q->cq.work, target_queued_compl_work);
 	}
 
 	dev->se_hba = hba;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 56f841fd7f04..e54d05ae8dfd 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -154,6 +154,7 @@  bool	target_check_wce(struct se_device *dev);
 bool	target_check_fua(struct se_device *dev);
 void	__target_execute_cmd(struct se_cmd *, bool);
 void	target_queued_submit_work(struct work_struct *work);
+void	target_queued_compl_work(struct work_struct *work);
 
 /* target_core_stat.c */
 void	target_stat_setup_dev_default_groups(struct se_device *);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c4bc012fc215..039016ab7a77 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -55,7 +55,6 @@  static void transport_complete_task_attr(struct se_cmd *cmd);
 static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason);
 static void transport_handle_queue_full(struct se_cmd *cmd,
 		struct se_device *dev, int err, bool write_pending);
-static void target_complete_ok_work(struct work_struct *work);
 
 int init_se_kmem_caches(void)
 {
@@ -732,14 +731,6 @@  static void transport_lun_remove_cmd(struct se_cmd *cmd)
 		percpu_ref_put(&lun->lun_ref);
 }
 
-static void target_complete_failure_work(struct work_struct *work)
-{
-	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
-
-	transport_generic_request_failure(cmd,
-			TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
-}
-
 /*
  * Used when asking transport to copy Sense Data from the underlying
  * Linux/SCSI struct scsi_cmnd
@@ -827,11 +818,20 @@  static void target_handle_abort(struct se_cmd *cmd)
 	transport_cmd_check_stop_to_fabric(cmd);
 }
 
-static void target_abort_work(struct work_struct *work)
+static void target_queue_cmd_work(struct se_cmd_queue *q, struct se_cmd *se_cmd,
+				  int cpu, struct workqueue_struct *wq)
 {
-	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
+	llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
+	queue_work_on(cpu, wq, &q->work);
+}
 
-	target_handle_abort(cmd);
+static void target_queue_cmd_compl(struct se_cmd *se_cmd)
+{
+	struct se_device *se_dev = se_cmd->se_dev;
+	int cpu = se_cmd->cpuid;
+
+	target_queue_cmd_work(&se_dev->queues[cpu].cq, se_cmd, cpu,
+			      target_completion_wq);
 }
 
 static bool target_cmd_interrupted(struct se_cmd *cmd)
@@ -841,8 +841,8 @@  static bool target_cmd_interrupted(struct se_cmd *cmd)
 	if (cmd->transport_state & CMD_T_ABORTED) {
 		if (cmd->transport_complete_callback)
 			cmd->transport_complete_callback(cmd, false, &post_ret);
-		INIT_WORK(&cmd->work, target_abort_work);
-		queue_work(target_completion_wq, &cmd->work);
+
+		target_queue_cmd_compl(cmd);
 		return true;
 	} else if (cmd->transport_state & CMD_T_STOP) {
 		if (cmd->transport_complete_callback)
@@ -857,7 +857,6 @@  static bool target_cmd_interrupted(struct se_cmd *cmd)
 /* May be called from interrupt context so must not sleep. */
 void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 {
-	int success;
 	unsigned long flags;
 
 	if (target_cmd_interrupted(cmd))
@@ -866,25 +865,11 @@  void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 	cmd->scsi_status = scsi_status;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	switch (cmd->scsi_status) {
-	case SAM_STAT_CHECK_CONDITION:
-		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
-			success = 1;
-		else
-			success = 0;
-		break;
-	default:
-		success = 1;
-		break;
-	}
-
 	cmd->t_state = TRANSPORT_COMPLETE;
 	cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-	INIT_WORK(&cmd->work, success ? target_complete_ok_work :
-		  target_complete_failure_work);
-	queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
+	target_queue_cmd_compl(cmd);
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
@@ -1894,13 +1879,6 @@  void target_queued_submit_work(struct work_struct *work)
 		target_unplug_device(se_plug);
 }
 
-static void target_queue_cmd_work(struct se_cmd_queue *q, struct se_cmd *se_cmd,
-				  int cpu)
-{
-	llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
-	queue_work_on(cpu, target_submission_wq, &q->work);
-}
-
 /**
  * target_queue_cmd_submit - queue the cmd to run on the LIO workqueue
  * @se_cmd: command descriptor to submit
@@ -1955,7 +1933,8 @@  target_queue_cmd_submit(struct se_cmd *se_cmd, struct se_session *se_sess,
 
 	cpu = se_cmd->cpuid;
 	se_dev = se_cmd->se_dev;
-	target_queue_cmd_work(&se_dev->queues[cpu].sq, se_cmd, cpu);
+	target_queue_cmd_work(&se_dev->queues[cpu].sq, se_cmd, cpu,
+			      target_submission_wq);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
@@ -2058,8 +2037,7 @@  void transport_generic_request_failure(struct se_cmd *cmd,
 		cmd->transport_complete_callback(cmd, false, &post_ret);
 
 	if (cmd->transport_state & CMD_T_ABORTED) {
-		INIT_WORK(&cmd->work, target_abort_work);
-		queue_work(target_completion_wq, &cmd->work);
+		target_queue_cmd_compl(cmd);
 		return;
 	}
 
@@ -2484,10 +2462,32 @@  static bool target_read_prot_action(struct se_cmd *cmd)
 	return false;
 }
 
-static void target_complete_ok_work(struct work_struct *work)
+static void target_complete_cmd_work(struct se_cmd *cmd)
 {
-	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
-	int ret;
+	int ret, success;
+
+	if (cmd->transport_state & CMD_T_ABORTED) {
+		target_handle_abort(cmd);
+		return;
+	}
+
+	switch (cmd->scsi_status) {
+	case SAM_STAT_CHECK_CONDITION:
+		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+			success = 1;
+		else
+			success = 0;
+		break;
+	default:
+		success = 1;
+		break;
+	}
+
+	if (!success) {
+		transport_generic_request_failure(cmd,
+				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+		return;
+	}
 
 	/*
 	 * Check if we need to move delayed/dormant tasks from cmds on the
@@ -2629,6 +2629,23 @@  static void target_complete_ok_work(struct work_struct *work)
 	transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 }
 
+void target_queued_compl_work(struct work_struct *work)
+{
+	struct se_cmd_queue *cq = container_of(work, struct se_cmd_queue,
+					       work);
+	struct se_cmd *se_cmd, *next_cmd;
+	struct llist_node *cmd_list;
+
+	cmd_list = llist_del_all(&cq->cmd_list);
+	if (!cmd_list)
+		/* Previous call took what we were queued to submit */
+		return;
+
+	cmd_list = llist_reverse_order(cmd_list);
+	llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
+		target_complete_cmd_work(se_cmd);
+}
+
 void target_free_sgl(struct scatterlist *sgl, int nents)
 {
 	sgl_free_n_order(sgl, nents, 0);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index b8e0a3250bd0..f2ba7de59da7 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -777,6 +777,7 @@  struct se_device_queue {
 	struct list_head	state_list;
 	spinlock_t		lock;
 	struct se_cmd_queue	sq;
+	struct se_cmd_queue	cq;
 };
 
 struct se_device {