diff mbox series

[v2,12/15] lpfc: Fix crash when nvmet transport calls host_release

Message ID 20210104180240.46824-13-jsmart2021@gmail.com
State New
Headers show
Series lpfc: Update lpfc to revision 12.8.0.7 | expand

Commit Message

James Smart Jan. 4, 2021, 6:02 p.m. UTC
When lpfc is running in NVMET mode and supports the NVME-1 addendum
changes, a LIP on a bound NVME Initiator or lipping the lpfc NVMET's
link resulted in an Oops in lpfc_nvmet_host_release.

The fix requires lpfc NVMET to maintain an additional reference on any
node structure that acts as the hosthandle for the NVMET transport.
This reference get is a one-time addition, is taken prior to the upcall
of an unsolicited LS_REQ, and is released when the NVMET transport releases
the hosthandle during the host_release downcall.

Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
---
 drivers/scsi/lpfc/lpfc_disc.h  | 16 ++++++++++------
 drivers/scsi/lpfc/lpfc_nvmet.c | 33 ++++++++++++++++++++++++++++-----
 drivers/scsi/lpfc/lpfc_sli.c   | 29 +++++++++++++++++++++++++----
 3 files changed, 63 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 4cea61b63fdf..8ce13ef3cac3 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -77,6 +77,13 @@  struct lpfc_node_rrqs {
 	unsigned long xri_bitmap[XRI_BITMAP_ULONGS];
 };
 
+enum lpfc_fc4_xpt_flags {
+	NLP_WAIT_FOR_UNREG = 0x1,
+	SCSI_XPT_REGD      = 0x2,
+	NVME_XPT_REGD      = 0x4,
+	NLP_XPT_HAS_HH     = 0x8,
+};
+
 struct lpfc_nodelist {
 	struct list_head nlp_listp;
 	struct lpfc_name nlp_portname;
@@ -134,13 +141,10 @@  struct lpfc_nodelist {
 	unsigned long *active_rrqs_xri_bitmap;
 	struct lpfc_scsicmd_bkt *lat_data;	/* Latency data */
 	uint32_t fc4_prli_sent;
-	uint32_t fc4_xpt_flags;
-	uint32_t upcall_flags;
-#define NLP_WAIT_FOR_UNREG    0x1
-#define SCSI_XPT_REGD         0x2
-#define NVME_XPT_REGD         0x4
-#define NLP_WAIT_FOR_LOGO     0x2
+	u32 upcall_flags;
+#define	NLP_WAIT_FOR_LOGO 0x2
 
+	enum lpfc_fc4_xpt_flags fc4_xpt_flags;
 
 	uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
 #define NVME_FB_BIT_SHIFT 9    /* PRLI Rsp first burst in 512B units. */
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a71df8788fff..bb2a4a0d1295 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1367,17 +1367,22 @@  static void
 lpfc_nvmet_host_release(void *hosthandle)
 {
 	struct lpfc_nodelist *ndlp = hosthandle;
-	struct lpfc_hba *phba = NULL;
+	struct lpfc_hba *phba = ndlp->phba;
 	struct lpfc_nvmet_tgtport *tgtp;
 
-	phba = ndlp->phba;
 	if (!phba->targetport || !phba->targetport->private)
 		return;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-			"6202 NVMET XPT releasing hosthandle x%px\n",
-			hosthandle);
+			"6202 NVMET XPT releasing hosthandle x%px "
+			"DID x%x xflags x%x refcnt %d\n",
+			hosthandle, ndlp->nlp_DID, ndlp->fc4_xpt_flags,
+			kref_read(&ndlp->kref));
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	spin_lock_irq(&ndlp->lock);
+	ndlp->fc4_xpt_flags &= ~NLP_XPT_HAS_HH;
+	spin_unlock_irq(&ndlp->lock);
+	lpfc_nlp_put(ndlp);
 	atomic_set(&tgtp->state, 0);
 }
 
@@ -3644,15 +3649,33 @@  lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
 void
 lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
+	u32 ndlp_has_hh;
 	struct lpfc_nvmet_tgtport *tgtp;
 
-	lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS,
+	lpfc_printf_log(phba, KERN_INFO,
+			LOG_NVME | LOG_NVME_ABTS | LOG_NVME_DISC,
 			"6203 Invalidating hosthandle x%px\n",
 			ndlp);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE);
 
+	spin_lock_irq(&ndlp->lock);
+	ndlp_has_hh = ndlp->fc4_xpt_flags & NLP_XPT_HAS_HH;
+	spin_unlock_irq(&ndlp->lock);
+
+	/* Do not invalidate any nodes that do not have a hosthandle.
+	 * The host_release callbk will cause a node reference
+	 * count imbalance and a crash.
+	 */
+	if (!ndlp_has_hh) {
+		lpfc_printf_log(phba, KERN_INFO,
+				LOG_NVME | LOG_NVME_ABTS | LOG_NVME_DISC,
+				"6204 Skip invalidate on node x%px DID x%x\n",
+				ndlp, ndlp->nlp_DID);
+		return;
+	}
+
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	/* Need to get the nvmet_fc_target_port pointer here.*/
 	nvmet_fc_invalidate_host(phba->targetport, ndlp);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index dedea5de7d78..176706aaebf5 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -3011,23 +3011,44 @@  lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 	axchg->payload = nvmebuf->dbuf.virt;
 	INIT_LIST_HEAD(&axchg->list);
 
-	if (phba->nvmet_support)
+	if (phba->nvmet_support) {
 		ret = lpfc_nvmet_handle_lsreq(phba, axchg);
-	else
+		spin_lock_irq(&ndlp->lock);
+		if (!ret && !(ndlp->fc4_xpt_flags & NLP_XPT_HAS_HH)) {
+			ndlp->fc4_xpt_flags |= NLP_XPT_HAS_HH;
+			spin_unlock_irq(&ndlp->lock);
+
+			/* This reference is a single occurrence to hold the
+			 * node valid until the nvmet transport calls
+			 * host_release.
+			 */
+			if (!lpfc_nlp_get(ndlp))
+				goto out_fail;
+
+			lpfc_printf_log(phba, KERN_ERR, LOG_NODE,
+					"6206 NVMET unsol ls_req ndlp %p "
+					"DID x%x xflags x%x refcnt %d\n",
+					ndlp, ndlp->nlp_DID,
+					ndlp->fc4_xpt_flags,
+					kref_read(&ndlp->kref));
+		} else {
+			spin_unlock_irq(&ndlp->lock);
+		}
+	} else {
 		ret = lpfc_nvme_handle_lsreq(phba, axchg);
+	}
 
 	/* if zero, LS was successfully handled. If non-zero, LS not handled */
 	if (!ret)
 		return;
 
+out_fail:
 	lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
 			"6155 Drop NVME LS from DID %06X: SID %06X OXID x%X "
 			"NVMe%s handler failed %d\n",
 			did, sid, oxid,
 			(phba->nvmet_support) ? "T" : "I", ret);
 
-out_fail:
-
 	/* recycle receive buffer */
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);