diff mbox series

[05/12] lpfc: Revise ndlp kref handling for dev_loss_tmo_callbk and lpfc_drop_node

Message ID 20230712180522.112722-6-justintee8345@gmail.com
State New
Headers show
Series lpfc: Update lpfc to revision 14.2.0.14 | expand

Commit Message

Justin Tee July 12, 2023, 6:05 p.m. UTC
The ndlp kref count implementation in lpfc_dev_loss_tmo_callbk removes the
initial node reference when a vport is unloading.  When lpfc_cleanup sends
a DEVICE_RM event and is in NPR state, the driver calls lpfc_drop_node.
Subsequently, lpfc_drop_node also removes an ndlp kref thinking it is the
initial reference.  This unintentionally introduces an extra kref decrement
on the ndlp object.

Fix by using the NLP_DROPPED node flag in lpfc_dev_loss_tmo_callbk and
lpfc_drop_node to coordinate the removal of the initial node reference.

In lpfc_dev_loss_tmo_callbk, remove the SCSI transport reference provided
the node is registered in the dev_loss context because the driver cannot
call the SCSI transport in dev_loss context or afterwards.  And, have
lpfc_drop_node not remove a reference if another thread is acting or has
already acted on it.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c | 70 +++++++++++++++++++++-----------
 drivers/scsi/lpfc/lpfc_nvme.c    |  5 ++-
 2 files changed, 50 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index b4303254744a..388a481c8118 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -169,29 +169,44 @@  lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
 			 "3181 dev_loss_callbk x%06x, rport x%px flg x%x "
-			 "load_flag x%x refcnt %d state %d xpt x%x\n",
+			 "load_flag x%x refcnt %u state %d xpt x%x\n",
 			 ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
 			 vport->load_flag, kref_read(&ndlp->kref),
 			 ndlp->nlp_state, ndlp->fc4_xpt_flags);
 
-	/* Don't schedule a worker thread event if the vport is going down.
-	 * The teardown process cleans up the node via lpfc_drop_node.
-	 */
+	/* Don't schedule a worker thread event if the vport is going down. */
 	if (vport->load_flag & FC_UNLOADING) {
-		((struct lpfc_rport_data *)rport->dd_data)->pnode = NULL;
+		spin_lock_irqsave(&ndlp->lock, iflags);
 		ndlp->rport = NULL;
 
-		ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
-		/* clear the NLP_XPT_REGD if the node is not registered
-		 * with nvme-fc
+		/* The scsi_transport is done with the rport so lpfc cannot
+		 * call to unregister. Remove the scsi transport reference
+		 * and clean up the SCSI transport node details.
 		 */
-		if (ndlp->fc4_xpt_flags == NLP_XPT_REGD)
-			ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
+		if (ndlp->fc4_xpt_flags & (NLP_XPT_REGD | SCSI_XPT_REGD)) {
+			ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
 
-		/* Remove the node reference from remote_port_add now.
-		 * The driver will not call remote_port_delete.
+			/* NVME transport-registered rports need the
+			 * NLP_XPT_REGD flag to complete an unregister.
+			 */
+			if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
+				ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
+			spin_unlock_irqrestore(&ndlp->lock, iflags);
+			lpfc_nlp_put(ndlp);
+			spin_lock_irqsave(&ndlp->lock, iflags);
+		}
+
+		/* Only 1 thread can drop the initial node reference.  If
+		 * another thread has set NLP_DROPPED, this thread is done.
 		 */
-		lpfc_nlp_put(ndlp);
+		if (!(ndlp->nlp_flag & NLP_DROPPED)) {
+			ndlp->nlp_flag |= NLP_DROPPED;
+			spin_unlock_irqrestore(&ndlp->lock, iflags);
+			lpfc_nlp_put(ndlp);
+			spin_lock_irqsave(&ndlp->lock, iflags);
+		}
+
+		spin_unlock_irqrestore(&ndlp->lock, iflags);
 		return;
 	}
 
@@ -4686,7 +4701,8 @@  lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	spin_lock_irqsave(&ndlp->lock, iflags);
 	if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
 		spin_unlock_irqrestore(&ndlp->lock, iflags);
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+		lpfc_printf_vlog(vport, KERN_INFO,
+				 LOG_ELS | LOG_NODE | LOG_DISCOVERY,
 				 "0999 %s Not regd: ndlp x%px rport x%px DID "
 				 "x%x FLG x%x XPT x%x\n",
 				  __func__, ndlp, ndlp->rport, ndlp->nlp_DID,
@@ -4702,9 +4718,10 @@  lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		vport->phba->nport_event_cnt++;
 		lpfc_unregister_remote_port(ndlp);
 	} else if (!ndlp->rport) {
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+		lpfc_printf_vlog(vport, KERN_INFO,
+				 LOG_ELS | LOG_NODE | LOG_DISCOVERY,
 				 "1999 %s NDLP in devloss x%px DID x%x FLG x%x"
-				 " XPT x%x refcnt %d\n",
+				 " XPT x%x refcnt %u\n",
 				 __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag,
 				 ndlp->fc4_xpt_flags,
 				 kref_read(&ndlp->kref));
@@ -4954,22 +4971,29 @@  lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
 	/*
 	 * Use of lpfc_drop_node and UNUSED list: lpfc_drop_node should
-	 * be used if we wish to issue the "last" lpfc_nlp_put() to remove
-	 * the ndlp from the vport. The ndlp marked as UNUSED on the list
-	 * until ALL other outstanding threads have completed. We check
-	 * that the ndlp not already in the UNUSED state before we proceed.
+	 * be used when lpfc wants to remove the "last" lpfc_nlp_put() to
+	 * release the ndlp from the vport when conditions are correct.
 	 */
 	if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
 		return;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
-	ndlp->nlp_flag |= NLP_DROPPED;
 	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
 		lpfc_cleanup_vports_rrqs(vport, ndlp);
 		lpfc_unreg_rpi(vport, ndlp);
 	}
 
-	lpfc_nlp_put(ndlp);
-	return;
+	/* NLP_DROPPED means another thread already removed the initial
+	 * reference from lpfc_nlp_init.  If set, don't drop it again and
+	 * introduce an imbalance.
+	 */
+	spin_lock_irq(&ndlp->lock);
+	if (!(ndlp->nlp_flag & NLP_DROPPED)) {
+		ndlp->nlp_flag |= NLP_DROPPED;
+		spin_unlock_irq(&ndlp->lock);
+		lpfc_nlp_put(ndlp);
+		return;
+	}
+	spin_unlock_irq(&ndlp->lock);
 }
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 3ee5cde481f3..39acbcb7ec66 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2503,8 +2503,9 @@  lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		lpfc_printf_vlog(vport, KERN_ERR,
 				 LOG_TRACE_EVENT,
 				 "6031 RemotePort Registration failed "
-				 "err: %d, DID x%06x\n",
-				 ret, ndlp->nlp_DID);
+				 "err: %d, DID x%06x ref %u\n",
+				 ret, ndlp->nlp_DID, kref_read(&ndlp->kref));
+		lpfc_nlp_put(ndlp);
 	}
 
 	return ret;