diff mbox series

[09/17] lpfc: Fix NPIV Fabric Node reference counting

Message ID 20201115192646.12977-10-james.smart@broadcom.com
State New
Headers show
Series [01/17] lpfc: Rework remote port ref counting and node freeing | expand

Commit Message

James Smart Nov. 15, 2020, 7:26 p.m. UTC
While testing initiator-side cable swaps with NPIV, oops occur.
The reference counts for the Fabric nodes on the NPIV vports isn't
balanced, resulting in premature node removal.

The following fixes were made:
- Removed the FC_LBIT check in lpfc_linkup_port. This removed the special
  case for vports that didn't have them clean up just like the physical
  port.
- Removed the unreg_rpi call in lpfc_cleanup_node. In this section, the
  node is being removed in the context of a reference count release
  and a mailbox command can't be issued at this point.
- Remove special case handling in the default mailbox completion handler
  that allowed the skipping of a node reference. Now, reference counting
  always requires the removal of the reference.
- Move the location of the DEVICE_RM event is done during LOGO handling
  as the driver has additional work to do on the ndlp before puts/releases
  can be performed.

Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
---
 drivers/scsi/lpfc/lpfc_els.c     | 47 +++++++++++++++++---------------
 drivers/scsi/lpfc/lpfc_hbadisc.c | 29 ++++++++------------
 drivers/scsi/lpfc/lpfc_init.c    |  6 ++--
 drivers/scsi/lpfc/lpfc_sli.c     | 10 +++++--
 4 files changed, 46 insertions(+), 46 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 6fc42c978730..48095bebd47b 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2835,8 +2835,9 @@  lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	/* LOGO completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 			 "0105 LOGO completes to NPort x%x "
-			 "Data: x%x x%x x%x x%x\n",
-			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
+			 "refcnt %d nflags x%x Data: x%x x%x x%x x%x\n",
+			 ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp->nlp_flag,
+			 irsp->ulpStatus, irsp->un.ulpWord[4],
 			 irsp->ulpTimeout, vport->num_disc_nodes);
 
 	if (lpfc_els_chk_latt(vport)) {
@@ -2844,17 +2845,6 @@  lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		goto out;
 	}
 
-	/* Check to see if link went down during discovery */
-	if (ndlp->nlp_flag & NLP_TARGET_REMOVE) {
-	        /* NLP_EVT_DEVICE_RM should unregister the RPI
-		 * which should abort all outstanding IOs.
-		 */
-		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
-					NLP_EVT_DEVICE_RM);
-		skip_recovery = 1;
-		goto out;
-	}
-
 	/* The LOGO will not be retried on failure.  A LOGO was
 	 * issued to the remote rport and a ACC or RJT or no Answer are
 	 * all acceptable.  Note the failure and move forward with
@@ -2876,6 +2866,19 @@  lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	/* Call state machine. This will unregister the rpi if needed. */
 	lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO);
 
+	/* The driver sets this flag for an NPIV instance that doesn't want to
+	 * log into the remote port.
+	 */
+	if (ndlp->nlp_flag & NLP_TARGET_REMOVE) {
+		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+					NLP_EVT_DEVICE_RM);
+		lpfc_els_free_iocb(phba, cmdiocb);
+		lpfc_nlp_put(ndlp);
+
+		/* Presume the node was released. */
+		return;
+	}
+
 out:
 	/* Driver is done with the IO.  */
 	lpfc_els_free_iocb(phba, cmdiocb);
@@ -4399,10 +4402,10 @@  lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		irsp->ulpStatus, irsp->un.ulpWord[4], ndlp->nlp_DID);
 	/* ACC to LOGO completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0109 ACC to LOGO completes to NPort x%x "
+			 "0109 ACC to LOGO completes to NPort x%x refcnt %d"
 			 "Data: x%x x%x x%x\n",
-			 ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
-			 ndlp->nlp_rpi);
+			 ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp->nlp_flag,
+			 ndlp->nlp_state, ndlp->nlp_rpi);
 
 	if (ndlp->nlp_state == NLP_STE_NPR_NODE) {
 		/* NPort Recovery mode or node is just allocated */
@@ -8650,9 +8653,9 @@  lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	/* ELS command <elsCmd> received from NPORT <did> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 			 "0112 ELS command x%x received from NPORT x%x "
-			 "Data: x%x x%x x%x x%x\n",
-			cmd, did, vport->port_state, vport->fc_flag,
-			vport->fc_myDID, vport->fc_prevDID);
+			 "refcnt %d Data: x%x x%x x%x x%x\n",
+			 cmd, did, kref_read(&ndlp->kref), vport->port_state,
+			 vport->fc_flag, vport->fc_myDID, vport->fc_prevDID);
 
 	/* reject till our FLOGI completes or PLOGI assigned DID via PT2PT */
 	if ((vport->port_state < LPFC_FABRIC_CFG_LINK) &&
@@ -9144,9 +9147,9 @@  lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 	spin_lock_irq(shost->host_lock);
 	if (vport->fc_flag & FC_DISC_DELAYED) {
 		spin_unlock_irq(shost->host_lock);
-		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-				"3334 Delay fc port discovery for %d seconds\n",
-				phba->fc_ratov);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
+				 "3334 Delay fc port discovery for %d secs\n",
+				 phba->fc_ratov);
 		mod_timer(&vport->delayed_disc_tmo,
 			jiffies + msecs_to_jiffies(1000 * phba->fc_ratov));
 		return;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index f3cf988733c2..4c5a0ffec86f 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4810,8 +4810,14 @@  lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport,
 {
 	unsigned long iflags;
 
+	/* Driver always gets a reference on the mailbox job
+	 * in support of async jobs.
+	 */
+	mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
+	if (!mbox->ctx_ndlp)
+		return;
+
 	if (ndlp->nlp_flag & NLP_ISSUE_LOGO) {
-		mbox->ctx_ndlp = ndlp;
 		mbox->mbox_cmpl = lpfc_nlp_logo_unreg;
 
 	} else if (phba->sli_rev == LPFC_SLI_REV4 &&
@@ -4819,7 +4825,6 @@  lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport,
 		    (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
 				      LPFC_SLI_INTF_IF_TYPE_2) &&
 		    (kref_read(&ndlp->kref) > 0)) {
-		mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
 		mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr;
 	} else {
 		if (vport->load_flag & FC_UNLOADING) {
@@ -4828,9 +4833,7 @@  lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport,
 				ndlp->nlp_flag |= NLP_RELEASE_RPI;
 				spin_unlock_irqrestore(&ndlp->lock, iflags);
 			}
-			lpfc_nlp_get(ndlp);
 		}
-		mbox->ctx_ndlp = ndlp;
 		mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 	}
 }
@@ -4888,6 +4891,11 @@  lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			lpfc_unreg_login(phba, vport->vpi, rpi, mbox);
 			mbox->vport = vport;
 			lpfc_set_unreg_login_mbx_cmpl(phba, vport, ndlp, mbox);
+			if (!mbox->ctx_ndlp) {
+				mempool_free(mbox, phba->mbox_mem_pool);
+				return 1;
+			}
+
 			if (mbox->mbox_cmpl == lpfc_sli4_unreg_rpi_cmpl_clr)
 				/*
 				 * accept PLOGIs after unreg_rpi_cmpl
@@ -5057,7 +5065,6 @@  lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct lpfc_hba  *phba = vport->phba;
 	LPFC_MBOXQ_t *mb, *nextmb;
 	struct lpfc_dmabuf *mp;
-	unsigned long iflags;
 
 	/* Cleanup node for NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
@@ -5125,18 +5132,6 @@  lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	lpfc_cleanup_vports_rrqs(vport, ndlp);
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		ndlp->nlp_flag |= NLP_RELEASE_RPI;
-	if (!lpfc_unreg_rpi(vport, ndlp)) {
-		/* Clean up unregistered and non freed rpis */
-		if ((ndlp->nlp_flag & NLP_RELEASE_RPI) &&
-		    !(ndlp->nlp_rpi == LPFC_RPI_ALLOC_ERROR)) {
-			lpfc_sli4_free_rpi(vport->phba,
-					   ndlp->nlp_rpi);
-			spin_lock_irqsave(&ndlp->lock, iflags);
-			ndlp->nlp_flag &= ~NLP_RELEASE_RPI;
-			ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR;
-			spin_unlock_irqrestore(&ndlp->lock, iflags);
-		}
-	}
 	return 0;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index f0cbf98e7caa..86d9ab4bcebb 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2851,10 +2851,8 @@  lpfc_cleanup(struct lpfc_vport *vport)
 			continue;
 		}
 
-		/* take care of nodes in unused state before the state
-		 * machine taking action.
-		 */
-		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) {
+		if (ndlp->nlp_DID == Fabric_Cntl_DID &&
+		    ndlp->nlp_state == NLP_STE_UNUSED_NODE) {
 			lpfc_nlp_put(ndlp);
 			continue;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 4f537a07bac6..116a6822c201 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2541,8 +2541,12 @@  lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 			} else {
 				__lpfc_sli_rpi_release(vport, ndlp);
 			}
-			if (vport->load_flag & FC_UNLOADING)
-				lpfc_nlp_put(ndlp);
+
+			/* The unreg_login mailbox is complete and had a
+			 * reference that has to be released.  The PLOGI
+			 * got its own ref.
+			 */
+			lpfc_nlp_put(ndlp);
 			pmb->ctx_ndlp = NULL;
 		}
 	}
@@ -2566,7 +2570,7 @@  lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
  *
  * This function is the unreg rpi mailbox completion handler. It
  * frees the memory resources associated with the completed mailbox
- * command. An additional refrenece is put on the ndlp to prevent
+ * command. An additional reference is put on the ndlp to prevent
  * lpfc_nlp_release from freeing the rpi bit in the bitmask before
  * the unreg mailbox command completes, this routine puts the
  * reference back.