diff mbox series

[RFC] nvme-fc: FPIN link integrity handling

Message ID 20240219085929.31255-1-hare@kernel.org
State New
Headers show
Series [RFC] nvme-fc: FPIN link integrity handling | expand

Commit Message

Hannes Reinecke Feb. 19, 2024, 8:59 a.m. UTC
From: Hannes Reinecke <hare@suse.de>

FPIN LI (link integrity) messages are received when the attached
fabric detects hardware errors. In response to these messages the
affected ports should not be used for I/O, and only put back into
service once the ports had been reset as then the hardware might
have been replaced.
This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
which will be checked during multipath path selection, causing the
path to be skipped.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/core.c       |  13 ++--
 drivers/nvme/host/fc.c         | 108 +++++++++++++++++++++++++++++++++
 drivers/nvme/host/multipath.c  |   2 +
 drivers/nvme/host/nvme.h       |   1 +
 drivers/scsi/lpfc/lpfc_els.c   |   6 +-
 drivers/scsi/qla2xxx/qla_isr.c |   1 +
 include/linux/nvme-fc-driver.h |   3 +
 7 files changed, 129 insertions(+), 5 deletions(-)

Comments

Sagi Grimberg March 7, 2024, 10:10 a.m. UTC | #1
On 19/02/2024 10:59, hare@kernel.org wrote:
> From: Hannes Reinecke <hare@suse.de>
>
> FPIN LI (link integrity) messages are received when the attached
> fabric detects hardware errors. In response to these messages the
> affected ports should not be used for I/O, and only put back into
> service once the ports had been reset as then the hardware might
> have been replaced.

Does this mean it cannot service any type of communication over
the wire?

> This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
> which will be checked during multipath path selection, causing the
> path to be skipped.

While this looks sensible to me, it also looks like this introduces a 
ctrl state
outside of ctrl->state... Wouldn't it make sense to move the controller to
NVME_CTRL_DEAD ? or is it not a terminal state?

>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>   drivers/nvme/host/core.c       |  13 ++--
>   drivers/nvme/host/fc.c         | 108 +++++++++++++++++++++++++++++++++
>   drivers/nvme/host/multipath.c  |   2 +
>   drivers/nvme/host/nvme.h       |   1 +
>   drivers/scsi/lpfc/lpfc_els.c   |   6 +-
>   drivers/scsi/qla2xxx/qla_isr.c |   1 +
>   include/linux/nvme-fc-driver.h |   3 +
>   7 files changed, 129 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index eed3e22e24d9..5e9a0cf43636 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -750,10 +750,14 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
>   
>   	if (state != NVME_CTRL_DELETING_NOIO &&
>   	    state != NVME_CTRL_DELETING &&
> -	    state != NVME_CTRL_DEAD &&
> -	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
> -	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
> -		return BLK_STS_RESOURCE;
> +	    state != NVME_CTRL_DEAD) {
> +		if (!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
> +		    !blk_noretry_request(rq) &&
> +		    !(rq->cmd_flags & REQ_NVME_MPATH))
> +			return BLK_STS_RESOURCE;
> +		if (test_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags))
> +			return BLK_STS_TRANSPORT;
> +	}
>   	return nvme_host_path_error(rq);
>   }
>   EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
> @@ -4575,6 +4579,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>   	WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
>   	ctrl->passthru_err_log_enabled = false;
>   	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
> +	clear_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags);
>   	spin_lock_init(&ctrl->lock);
>   	mutex_init(&ctrl->scan_lock);
>   	INIT_LIST_HEAD(&ctrl->namespaces);
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index 5e226728c822..fdf77f5cb944 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -787,6 +787,9 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
>   		"NVME-FC{%d}: controller connectivity lost. Awaiting "
>   		"Reconnect", ctrl->cnum);
>   
> +	/* clear 'transport blocked' flag as controller will be reset */
> +	clear_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags);
> +
>   	switch (nvme_ctrl_state(&ctrl->ctrl)) {
>   	case NVME_CTRL_NEW:
>   	case NVME_CTRL_LIVE:
> @@ -3741,6 +3744,111 @@ static struct nvmf_transport_ops nvme_fc_transport = {
>   	.create_ctrl	= nvme_fc_create_ctrl,
>   };
>   
> +static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
> +						     u64 rport_wwpn)
> +{
> +	struct nvme_fc_rport *rport;
> +
> +	list_for_each_entry(rport, &lport->endp_list, endp_list) {
> +		if (!nvme_fc_rport_get(rport))
> +			continue;
> +		if (rport->remoteport.port_name == rport_wwpn &&
> +		    rport->remoteport.port_role & FC_PORT_ROLE_NVME_TARGET)
> +			return rport;
> +		nvme_fc_rport_put(rport);
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * nvme_fc_fpin_li_lport_update - routine to update Link Integrity
> + * event statistics.
> + * @lport:		local port the FPIN was received on
> + * @tlv:		pointer to link integrity descriptor
> + *
> + */
> +static void
> +nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_tlv_desc *tlv)
> +{
> +	unsigned int i, pname_count;
> +	struct nvme_fc_rport *attached_rport;
> +	struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
> +	u64 wwpn;
> +
> +	wwpn = be64_to_cpu(li_desc->attached_wwpn);
> +	attached_rport = nvme_fc_rport_from_wwpn(lport, wwpn);
> +	pname_count = be32_to_cpu(li_desc->pname_count);
> +
> +	for (i = 0; pname_count; i++) {
> +		struct nvme_fc_rport *rport;
> +
> +		wwpn = be64_to_cpu(li_desc->pname_list[i]);
> +		rport = nvme_fc_rport_from_wwpn(lport, wwpn);
> +		if (!rport)
> +			continue;
> +		if (rport != attached_rport) {
> +			struct nvme_fc_ctrl *ctrl;
> +
> +			spin_lock_irq(&rport->lock);
> +			list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
> +				set_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->ctrl.flags);
> +			spin_unlock_irq(&rport->lock);
> +		}
> +		nvme_fc_rport_put(rport);
> +	}
> +	if (attached_rport) {
> +		struct nvme_fc_ctrl *ctrl;
> +
> +		spin_lock_irq(&attached_rport->lock);
> +		list_for_each_entry(ctrl, &attached_rport->ctrl_list, ctrl_list)
> +			set_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->ctrl.flags);
> +		spin_unlock_irq(&attached_rport->lock);
> +		nvme_fc_rport_put(attached_rport);
> +	}
> +}
> +
> +/**
> + * fc_host_fpin_rcv - routine to process a received FPIN.
> + * @localport:		local port the FPIN was received on
> + * @fpin_len:		length of FPIN payload, in bytes
> + * @fpin_buf:		pointer to FPIN payload
> + * Notes:
> + *	This routine assumes no locks are held on entry.
> + */
> +void
> +nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
> +		 u32 fpin_len, char *fpin_buf)
> +{
> +	struct nvme_fc_lport *lport;
> +	struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
> +	struct fc_tlv_desc *tlv;
> +	u32 bytes_remain;
> +	u32 dtag;
> +
> +	if (!localport)
> +		return;
> +	lport = localport_to_lport(localport);
> +	tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
> +	bytes_remain = fpin_len - offsetof(struct fc_els_fpin, fpin_desc);
> +	bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
> +
> +	while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
> +	       bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
> +		dtag = be32_to_cpu(tlv->desc_tag);
> +		switch (dtag) {
> +		case ELS_DTAG_LNK_INTEGRITY:
> +			nvme_fc_fpin_li_lport_update(lport, tlv);
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
> +		tlv = fc_tlv_next_desc(tlv);
> +	}
> +}
> +EXPORT_SYMBOL(nvme_fc_fpin_rcv);
> +
>   /* Arbitrary successive failures max. With lots of subsystems could be high */
>   #define DISCOVERY_MAX_FAIL	20
>   
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 4a90dad43303..e0dcfe9edb89 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -235,6 +235,8 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
>   	 */
>   	if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
>   		return true;
> +	if (test_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ns->ctrl->flags))
> +		return true;
>   	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
>   	    !test_bit(NVME_NS_READY, &ns->flags))
>   		return true;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index ee30bb63e36b..6ed2ca6b35e4 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -258,6 +258,7 @@ enum nvme_ctrl_flags {
>   	NVME_CTRL_SKIP_ID_CNS_CS	= 4,
>   	NVME_CTRL_DIRTY_CAPABILITY	= 5,
>   	NVME_CTRL_FROZEN		= 6,
> +	NVME_CTRL_TRANSPORT_BLOCKED	= 7,
>   };
>   
>   struct nvme_ctrl {
> diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
> index 4d723200690a..ecfe6bc8ab63 100644
> --- a/drivers/scsi/lpfc/lpfc_els.c
> +++ b/drivers/scsi/lpfc/lpfc_els.c
> @@ -33,6 +33,7 @@
>   #include <scsi/scsi_transport_fc.h>
>   #include <uapi/scsi/fc/fc_fs.h>
>   #include <uapi/scsi/fc/fc_els.h>
> +#include <linux/nvme-fc-driver.h>
>   
>   #include "lpfc_hw4.h"
>   #include "lpfc_hw.h"
> @@ -10343,9 +10344,12 @@ lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
>   		fpin_length += sizeof(struct fc_els_fpin); /* the entire FPIN */
>   
>   		/* Send every descriptor individually to the upper layer */
> -		if (deliver)
> +		if (deliver) {
>   			fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
>   					 fpin_length, (char *)fpin, 0);
> +			nvme_fc_fpin_rcv(vport->localport,
> +					 fpin_length, (char *)fpin);
> +		}
>   		desc_cnt++;
>   	}
>   }
> diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
> index d48007e18288..b180e10053c5 100644
> --- a/drivers/scsi/qla2xxx/qla_isr.c
> +++ b/drivers/scsi/qla2xxx/qla_isr.c
> @@ -46,6 +46,7 @@ qla27xx_process_purex_fpin(struct scsi_qla_host *vha, struct purex_item *item)
>   		       pkt, pkt_size);
>   
>   	fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
> +	nvme_fc_fpin_rcv(vha->nvme_local_port, pkt_size, (char *)pkt);
>   }
>   
>   const char *const port_state_str[] = {
> diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
> index 4109f1bd6128..994bc459e6dd 100644
> --- a/include/linux/nvme-fc-driver.h
> +++ b/include/linux/nvme-fc-driver.h
> @@ -536,6 +536,9 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
>   int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
>   			u32 dev_loss_tmo);
>   
> +void nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
> +		      u32 fpin_len, char *fpin_buf);
> +
>   /*
>    * Routine called to pass a NVME-FC LS request, received by the lldd,
>    * to the nvme-fc transport.
Hannes Reinecke March 7, 2024, 11:29 a.m. UTC | #2
On 3/7/24 11:10, Sagi Grimberg wrote:
> 
> 
> On 19/02/2024 10:59, hare@kernel.org wrote:
>> From: Hannes Reinecke <hare@suse.de>
>>
>> FPIN LI (link integrity) messages are received when the attached
>> fabric detects hardware errors. In response to these messages the
>> affected ports should not be used for I/O, and only put back into
>> service once the ports had been reset as then the hardware might
>> have been replaced.
> 
> Does this mean it cannot service any type of communication over
> the wire?
> 
It means that the service is impacted, and communication cannot be 
guaranteed (CRC errors, packet loss, you name it).
So the link should be taken out of service until it's been (manually)
replaced.

>> This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
>> which will be checked during multipath path selection, causing the
>> path to be skipped.
> 
> While this looks sensible to me, it also looks like this introduces a 
> ctrl state
> outside of ctrl->state... Wouldn't it make sense to move the controller to
> NVME_CTRL_DEAD ? or is it not a terminal state?
> 
Actually, I was trying to model it alongside the 
'devloss_tmo'/'fast_io_fail' mechanism we have in SCSI.
Technically the controller is still present, it's just that we shouldn't
send I/O to it. And I'd rather not disconnect here as we're trying to
do an autoconnect on FC, so manually disconnect would interfere with
that and we probably end in a death spiral doing disconnect/reconnect.

We could 'elevate' it to a new controller state, but wasn't sure how big
an appetite there is. And we already have flags like 'stopped' which
seem to fall into the same category.

So I'd rather not touch the state machine.

Cheers,

Hannes
Sagi Grimberg March 7, 2024, 12:01 p.m. UTC | #3
On 07/03/2024 13:29, Hannes Reinecke wrote:
> On 3/7/24 11:10, Sagi Grimberg wrote:
>>
>>
>> On 19/02/2024 10:59, hare@kernel.org wrote:
>>> From: Hannes Reinecke <hare@suse.de>
>>>
>>> FPIN LI (link integrity) messages are received when the attached
>>> fabric detects hardware errors. In response to these messages the
>>> affected ports should not be used for I/O, and only put back into
>>> service once the ports had been reset as then the hardware might
>>> have been replaced.
>>
>> Does this mean it cannot service any type of communication over
>> the wire?
>>
> It means that the service is impacted, and communication cannot be 
> guaranteed (CRC errors, packet loss, you name it).
> So the link should be taken out of service until it's been (manually)
> replaced.

OK, that's what I assumed.

>
>>> This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
>>> which will be checked during multipath path selection, causing the
>>> path to be skipped.
>>
>> While this looks sensible to me, it also looks like this introduces a 
>> ctrl state
>> outside of ctrl->state... Wouldn't it make sense to move the 
>> controller to
>> NVME_CTRL_DEAD ? or is it not a terminal state?
>>
> Actually, I was trying to model it alongside the 
> 'devloss_tmo'/'fast_io_fail' mechanism we have in SCSI.
> Technically the controller is still present, it's just that we shouldn't
> send I/O to it.

Sounds like a dead controller to me.

> And I'd rather not disconnect here as we're trying to
> do an autoconnect on FC, so manually disconnect would interfere with
> that and we probably end in a death spiral doing disconnect/reconnect.

I suggested just transitioning the state to DEAD... Not sure how keep-alives
behave though...

>
> We could 'elevate' it to a new controller state, but wasn't sure how big
> an appetite there is. And we already have flags like 'stopped' which
> seem to fall into the same category.

stopped is different because it is not used to determine if it is capable
for IO (admin or io queues). Hence it is ok to be a flag.

>
> So I'd rather not touch the state machine.

I'm just trying to understand if it falls into one of the existing 
states that
we have today. Because it sounds awfully like DEAD to me.
Hannes Reinecke March 7, 2024, 12:13 p.m. UTC | #4
On 3/7/24 13:01, Sagi Grimberg wrote:
> 
> 
> On 07/03/2024 13:29, Hannes Reinecke wrote:
>> On 3/7/24 11:10, Sagi Grimberg wrote:
>>>
>>>
>>> On 19/02/2024 10:59, hare@kernel.org wrote:
>>>> From: Hannes Reinecke <hare@suse.de>
>>>>
>>>> FPIN LI (link integrity) messages are received when the attached
>>>> fabric detects hardware errors. In response to these messages the
>>>> affected ports should not be used for I/O, and only put back into
>>>> service once the ports had been reset as then the hardware might
>>>> have been replaced.
>>>
>>> Does this mean it cannot service any type of communication over
>>> the wire?
>>>
>> It means that the service is impacted, and communication cannot be 
>> guaranteed (CRC errors, packet loss, you name it).
>> So the link should be taken out of service until it's been (manually)
>> replaced.
> 
> OK, that's what I assumed.
> 
>>
>>>> This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
>>>> which will be checked during multipath path selection, causing the
>>>> path to be skipped.
>>>
>>> While this looks sensible to me, it also looks like this introduces a 
>>> ctrl state
>>> outside of ctrl->state... Wouldn't it make sense to move the 
>>> controller to
>>> NVME_CTRL_DEAD ? or is it not a terminal state?
>>>
>> Actually, I was trying to model it alongside the 
>> 'devloss_tmo'/'fast_io_fail' mechanism we have in SCSI.
>> Technically the controller is still present, it's just that we shouldn't
>> send I/O to it.
> 
> Sounds like a dead controller to me.
> 
Sort of, yes.

>> And I'd rather not disconnect here as we're trying to
>> do an autoconnect on FC, so manually disconnect would interfere with
>> that and we probably end in a death spiral doing disconnect/reconnect.
> 
> I suggested just transitioning the state to DEAD... Not sure how 
> keep-alives behave though...
> 
Hmm. The state machine has the transition LIVE->DELETING->DEAD,
ie a dead controller is on the way out, with all resources being
reclaimed.

A direct transition would pretty much violate that.
If we were going that way I'd prefer to have another state
('IMPACTED' ? 'LIVE_NOIO' ?) with the transitions
LIVE->IMPACTED->DELETING->DEAD

>>
>> We could 'elevate' it to a new controller state, but wasn't sure how big
>> an appetite there is. And we already have flags like 'stopped' which
>> seem to fall into the same category.
> 
> stopped is different because it is not used to determine if it is capable
> for IO (admin or io queues). Hence it is ok to be a flag.
> 
Okay.

So yeah, we could introduce a new state, but I guess a direct transition
to 'DEAD' is not really a good idea.

Cheers,

Hannes
Sagi Grimberg March 8, 2024, 10:38 a.m. UTC | #5
On 07/03/2024 14:13, Hannes Reinecke wrote:
> On 3/7/24 13:01, Sagi Grimberg wrote:
>>
>>
>> On 07/03/2024 13:29, Hannes Reinecke wrote:
>>> On 3/7/24 11:10, Sagi Grimberg wrote:
>>>>
>>>>
>>>> On 19/02/2024 10:59, hare@kernel.org wrote:
>>>>> From: Hannes Reinecke <hare@suse.de>
>>>>>
>>>>> FPIN LI (link integrity) messages are received when the attached
>>>>> fabric detects hardware errors. In response to these messages the
>>>>> affected ports should not be used for I/O, and only put back into
>>>>> service once the ports had been reset as then the hardware might
>>>>> have been replaced.
>>>>
>>>> Does this mean it cannot service any type of communication over
>>>> the wire?
>>>>
>>> It means that the service is impacted, and communication cannot be 
>>> guaranteed (CRC errors, packet loss, you name it).
>>> So the link should be taken out of service until it's been (manually)
>>> replaced.
>>
>> OK, that's what I assumed.
>>
>>>
>>>>> This patch adds a new controller flag 'NVME_CTRL_TRANSPORT_BLOCKED'
>>>>> which will be checked during multipath path selection, causing the
>>>>> path to be skipped.
>>>>
>>>> While this looks sensible to me, it also looks like this introduces 
>>>> a ctrl state
>>>> outside of ctrl->state... Wouldn't it make sense to move the 
>>>> controller to
>>>> NVME_CTRL_DEAD ? or is it not a terminal state?
>>>>
>>> Actually, I was trying to model it alongside the 
>>> 'devloss_tmo'/'fast_io_fail' mechanism we have in SCSI.
>>> Technically the controller is still present, it's just that we 
>>> shouldn't
>>> send I/O to it.
>>
>> Sounds like a dead controller to me.
>>
> Sort of, yes.
>
>>> And I'd rather not disconnect here as we're trying to
>>> do an autoconnect on FC, so manually disconnect would interfere with
>>> that and we probably end in a death spiral doing disconnect/reconnect.
>>
>> I suggested just transitioning the state to DEAD... Not sure how 
>> keep-alives behave though...
>>
> Hmm. The state machine has the transition LIVE->DELETING->DEAD,
> ie a dead controller is on the way out, with all resources being
> reclaimed.
>
> A direct transition would pretty much violate that.
> If we were going that way I'd prefer to have another state
> ('IMPACTED' ? 'LIVE_NOIO' ?) with the transitions
> LIVE->IMPACTED->DELETING->DEAD
>
>>>
>>> We could 'elevate' it to a new controller state, but wasn't sure how 
>>> big
>>> an appetite there is. And we already have flags like 'stopped' which
>>> seem to fall into the same category.
>>
>> stopped is different because it is not used to determine if it is 
>> capable
>> for IO (admin or io queues). Hence it is ok to be a flag.
>>
> Okay.
>
> So yeah, we could introduce a new state, but I guess a direct transition
> to 'DEAD' is not really a good idea.

How common do you think this state would be? On the one hand, having a 
generic
state that the transport is kept a live but simply refuses to accept I/O 
sounds like
a generic state, but I can't think of an equivalent in the other transports.

If this is something that is private to FC, perhaps the right way is to 
add a flag
for it that only fc sets, and when a second usage of it appears, we 
promote it
to a proper controller state. Thoughts?
Hannes Reinecke March 8, 2024, 2:09 p.m. UTC | #6
On 3/8/24 11:38, Sagi Grimberg wrote:
> 
> 
> On 07/03/2024 14:13, Hannes Reinecke wrote:
>> On 3/7/24 13:01, Sagi Grimberg wrote:
>>>
[ .. ]
>>>
>>> stopped is different because it is not used to determine if it is 
>>> capable for IO (admin or io queues). Hence it is ok to be a flag.
>>>
>> Okay.
>>
But wait, isn't that precisely what we're trying to achieve here?
IE can't we call nvme_quiesce_io_queues() when we detect a link 
integrity failure?

Lemme check how this would work out...

>> So yeah, we could introduce a new state, but I guess a direct transition
>> to 'DEAD' is not really a good idea.
> 
> How common do you think this state would be? On the one hand, having a 
> generic state that the transport is kept a live but simply refuses to
> accept I/O; sounds like a generic state, but I can't think of an
> equivalent in the other transports.
> 

Yeah, it's pretty FC specific for now. Authentication is similar, 
though, as the spec implies that we shouldn't sent I/O when 
authentication is in progress.

> If this is something that is private to FC, perhaps the right way is to 
> add a flag for it that only fc sets, and when a second usage of it appears,
> we promote it to a proper controller state. Thoughts?

But that's what I'm doing, no? Only FC sets the 'transport blocked'
flag, so I'm not sure how your idea would be different here...

Cheers,

Hannes
Sagi Grimberg March 8, 2024, 2:19 p.m. UTC | #7
On 08/03/2024 16:09, Hannes Reinecke wrote:
> On 3/8/24 11:38, Sagi Grimberg wrote:
>>
>>
>> On 07/03/2024 14:13, Hannes Reinecke wrote:
>>> On 3/7/24 13:01, Sagi Grimberg wrote:
>>>>
> [ .. ]
>>>>
>>>> stopped is different because it is not used to determine if it is 
>>>> capable for IO (admin or io queues). Hence it is ok to be a flag.
>>>>
>>> Okay.
>>>
> But wait, isn't that precisely what we're trying to achieve here?
> IE can't we call nvme_quiesce_io_queues() when we detect a link 
> integrity failure?
>
> Lemme check how this would work out...
>
>>> So yeah, we could introduce a new state, but I guess a direct 
>>> transition
>>> to 'DEAD' is not really a good idea.
>>
>> How common do you think this state would be? On the one hand, having 
>> a generic state that the transport is kept a live but simply refuses to
>> accept I/O; sounds like a generic state, but I can't think of an
>> equivalent in the other transports.
>>
>
> Yeah, it's pretty FC specific for now. Authentication is similar, 
> though, as the spec implies that we shouldn't sent I/O when 
> authentication is in progress.
>
>> If this is something that is private to FC, perhaps the right way is 
>> to add a flag for it that only fc sets, and when a second usage of it 
>> appears,
>> we promote it to a proper controller state. Thoughts?
>
> But that's what I'm doing, no? Only FC sets the 'transport blocked'
> flag, so I'm not sure how your idea would be different here...

I know, I just came a full circle with our discussion :)
diff mbox series

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index eed3e22e24d9..5e9a0cf43636 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -750,10 +750,14 @@  blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
 
 	if (state != NVME_CTRL_DELETING_NOIO &&
 	    state != NVME_CTRL_DELETING &&
-	    state != NVME_CTRL_DEAD &&
-	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
-	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
-		return BLK_STS_RESOURCE;
+	    state != NVME_CTRL_DEAD) {
+		if (!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
+		    !blk_noretry_request(rq) &&
+		    !(rq->cmd_flags & REQ_NVME_MPATH))
+			return BLK_STS_RESOURCE;
+		if (test_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags))
+			return BLK_STS_TRANSPORT;
+	}
 	return nvme_host_path_error(rq);
 }
 EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
@@ -4575,6 +4579,7 @@  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
 	ctrl->passthru_err_log_enabled = false;
 	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+	clear_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
 	mutex_init(&ctrl->scan_lock);
 	INIT_LIST_HEAD(&ctrl->namespaces);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5e226728c822..fdf77f5cb944 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -787,6 +787,9 @@  nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
 		"NVME-FC{%d}: controller connectivity lost. Awaiting "
 		"Reconnect", ctrl->cnum);
 
+	/* clear 'transport blocked' flag as controller will be reset */
+	clear_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->flags);
+
 	switch (nvme_ctrl_state(&ctrl->ctrl)) {
 	case NVME_CTRL_NEW:
 	case NVME_CTRL_LIVE:
@@ -3741,6 +3744,111 @@  static struct nvmf_transport_ops nvme_fc_transport = {
 	.create_ctrl	= nvme_fc_create_ctrl,
 };
 
+static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
+						     u64 rport_wwpn)
+{
+	struct nvme_fc_rport *rport;
+
+	list_for_each_entry(rport, &lport->endp_list, endp_list) {
+		if (!nvme_fc_rport_get(rport))
+			continue;
+		if (rport->remoteport.port_name == rport_wwpn &&
+		    rport->remoteport.port_role & FC_PORT_ROLE_NVME_TARGET)
+			return rport;
+		nvme_fc_rport_put(rport);
+	}
+	return NULL;
+}
+
+/*
+ * nvme_fc_fpin_li_lport_update - routine to update Link Integrity
+ * event statistics.
+ * @lport:		local port the FPIN was received on
+ * @tlv:		pointer to link integrity descriptor
+ *
+ */
+static void
+nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_tlv_desc *tlv)
+{
+	unsigned int i, pname_count;
+	struct nvme_fc_rport *attached_rport;
+	struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
+	u64 wwpn;
+
+	wwpn = be64_to_cpu(li_desc->attached_wwpn);
+	attached_rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+	pname_count = be32_to_cpu(li_desc->pname_count);
+
+	for (i = 0; pname_count; i++) {
+		struct nvme_fc_rport *rport;
+
+		wwpn = be64_to_cpu(li_desc->pname_list[i]);
+		rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+		if (!rport)
+			continue;
+		if (rport != attached_rport) {
+			struct nvme_fc_ctrl *ctrl;
+
+			spin_lock_irq(&rport->lock);
+			list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+				set_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->ctrl.flags);
+			spin_unlock_irq(&rport->lock);
+		}
+		nvme_fc_rport_put(rport);
+	}
+	if (attached_rport) {
+		struct nvme_fc_ctrl *ctrl;
+
+		spin_lock_irq(&attached_rport->lock);
+		list_for_each_entry(ctrl, &attached_rport->ctrl_list, ctrl_list)
+			set_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ctrl->ctrl.flags);
+		spin_unlock_irq(&attached_rport->lock);
+		nvme_fc_rport_put(attached_rport);
+	}
+}
+
+/**
+ * fc_host_fpin_rcv - routine to process a received FPIN.
+ * @localport:		local port the FPIN was received on
+ * @fpin_len:		length of FPIN payload, in bytes
+ * @fpin_buf:		pointer to FPIN payload
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ */
+void
+nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+		 u32 fpin_len, char *fpin_buf)
+{
+	struct nvme_fc_lport *lport;
+	struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
+	struct fc_tlv_desc *tlv;
+	u32 bytes_remain;
+	u32 dtag;
+
+	if (!localport)
+		return;
+	lport = localport_to_lport(localport);
+	tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
+	bytes_remain = fpin_len - offsetof(struct fc_els_fpin, fpin_desc);
+	bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
+
+	while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
+	       bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
+		dtag = be32_to_cpu(tlv->desc_tag);
+		switch (dtag) {
+		case ELS_DTAG_LNK_INTEGRITY:
+			nvme_fc_fpin_li_lport_update(lport, tlv);
+			break;
+		default:
+			break;
+		}
+
+		bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+		tlv = fc_tlv_next_desc(tlv);
+	}
+}
+EXPORT_SYMBOL(nvme_fc_fpin_rcv);
+
 /* Arbitrary successive failures max. With lots of subsystems could be high */
 #define DISCOVERY_MAX_FAIL	20
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 4a90dad43303..e0dcfe9edb89 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -235,6 +235,8 @@  static bool nvme_path_is_disabled(struct nvme_ns *ns)
 	 */
 	if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
 		return true;
+	if (test_bit(NVME_CTRL_TRANSPORT_BLOCKED, &ns->ctrl->flags))
+		return true;
 	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
 	    !test_bit(NVME_NS_READY, &ns->flags))
 		return true;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ee30bb63e36b..6ed2ca6b35e4 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -258,6 +258,7 @@  enum nvme_ctrl_flags {
 	NVME_CTRL_SKIP_ID_CNS_CS	= 4,
 	NVME_CTRL_DIRTY_CAPABILITY	= 5,
 	NVME_CTRL_FROZEN		= 6,
+	NVME_CTRL_TRANSPORT_BLOCKED	= 7,
 };
 
 struct nvme_ctrl {
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 4d723200690a..ecfe6bc8ab63 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -33,6 +33,7 @@ 
 #include <scsi/scsi_transport_fc.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -10343,9 +10344,12 @@  lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
 		fpin_length += sizeof(struct fc_els_fpin); /* the entire FPIN */
 
 		/* Send every descriptor individually to the upper layer */
-		if (deliver)
+		if (deliver) {
 			fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
 					 fpin_length, (char *)fpin, 0);
+			nvme_fc_fpin_rcv(vport->localport,
+					 fpin_length, (char *)fpin);
+		}
 		desc_cnt++;
 	}
 }
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d48007e18288..b180e10053c5 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -46,6 +46,7 @@  qla27xx_process_purex_fpin(struct scsi_qla_host *vha, struct purex_item *item)
 		       pkt, pkt_size);
 
 	fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
+	nvme_fc_fpin_rcv(vha->nvme_local_port, pkt_size, (char *)pkt);
 }
 
 const char *const port_state_str[] = {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 4109f1bd6128..994bc459e6dd 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -536,6 +536,9 @@  void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
 int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
 			u32 dev_loss_tmo);
 
+void nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+		      u32 fpin_len, char *fpin_buf);
+
 /*
  * Routine called to pass a NVME-FC LS request, received by the lldd,
  * to the nvme-fc transport.