diff mbox series

[v3,05/17] scsi_transport_fc: Added a new rport state FC_PORTSTATE_MARGINAL

Message ID 1602732462-10443-6-git-send-email-muneendra.kumar@broadcom.com
State Superseded
Headers show
Series [v3,01/17] scsi: Added a new definition in scsi_cmnd.h | expand

Commit Message

Muneendra Kumar Oct. 15, 2020, 3:27 a.m. UTC
Added a new rport state FC_PORTSTATE_MARGINAL.

Added a new inline function fc_rport_chkmarginal_set_noretries
which will set the SCMD_NORETRIES_ABORT bit in cmd->state if rport state
is marginal.

Added a new argumet scsi_cmnd to the function fc_remote_port_chkready.
Made changes in fc_remote_port_chkready function to treat marginal and
online as same.If scsi_cmd is passed fc_rport_chkmarginal_set_noretries
will be called.

Also made changes in fc_remote_port_delete,fc_user_scan_tgt,
fc_timeout_deleted_rport functions  to handle the new rport state
FC_PORTSTATE_MARGINAL.

Signed-off-by: Muneendra <muneendra.kumar@broadcom.com>

---
v3:
Rearranged the patch so that all the changes with respect to new
rport state is part of this patch.
Added a new argument to scsi_cmd  to fc_remote_port_chkready

v2:
New patch
---
 drivers/scsi/scsi_transport_fc.c | 40 +++++++++++++++++++-------------
 include/scsi/scsi_transport_fc.h | 26 ++++++++++++++++++++-
 2 files changed, 49 insertions(+), 17 deletions(-)

Comments

Mike Christie Oct. 16, 2020, 7:52 p.m. UTC | #1
On 10/14/20 10:27 PM, Muneendra wrote:
> Added a new rport state FC_PORTSTATE_MARGINAL.
> 
> Added a new inline function fc_rport_chkmarginal_set_noretries
> which will set the SCMD_NORETRIES_ABORT bit in cmd->state if rport state
> is marginal.
> 
> Added a new argumet scsi_cmnd to the function fc_remote_port_chkready.
> Made changes in fc_remote_port_chkready function to treat marginal and
> online as same.If scsi_cmd is passed fc_rport_chkmarginal_set_noretries
> will be called.
> 
> Also made changes in fc_remote_port_delete,fc_user_scan_tgt,
> fc_timeout_deleted_rport functions  to handle the new rport state
> FC_PORTSTATE_MARGINAL.
> 
> Signed-off-by: Muneendra <muneendra.kumar@broadcom.com>
> 
> ---
> v3:
> Rearranged the patch so that all the changes with respect to new
> rport state is part of this patch.
> Added a new argument to scsi_cmd  to fc_remote_port_chkready
> 
> v2:
> New patch
> ---
>   drivers/scsi/scsi_transport_fc.c | 40 +++++++++++++++++++-------------
>   include/scsi/scsi_transport_fc.h | 26 ++++++++++++++++++++-
>   2 files changed, 49 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
> index 2ff7f06203da..df66a51d62e6 100644
> --- a/drivers/scsi/scsi_transport_fc.c
> +++ b/drivers/scsi/scsi_transport_fc.c
> @@ -142,20 +142,23 @@ fc_enum_name_search(host_event_code, fc_host_event_code,
>   static struct {
>   	enum fc_port_state	value;
>   	char			*name;
> +	int			matchlen;
>   } fc_port_state_names[] = {
> -	{ FC_PORTSTATE_UNKNOWN,		"Unknown" },
> -	{ FC_PORTSTATE_NOTPRESENT,	"Not Present" },
> -	{ FC_PORTSTATE_ONLINE,		"Online" },
> -	{ FC_PORTSTATE_OFFLINE,		"Offline" },
> -	{ FC_PORTSTATE_BLOCKED,		"Blocked" },
> -	{ FC_PORTSTATE_BYPASSED,	"Bypassed" },
> -	{ FC_PORTSTATE_DIAGNOSTICS,	"Diagnostics" },
> -	{ FC_PORTSTATE_LINKDOWN,	"Linkdown" },
> -	{ FC_PORTSTATE_ERROR,		"Error" },
> -	{ FC_PORTSTATE_LOOPBACK,	"Loopback" },
> -	{ FC_PORTSTATE_DELETED,		"Deleted" },
> +	{ FC_PORTSTATE_UNKNOWN,		"Unknown", 7},
> +	{ FC_PORTSTATE_NOTPRESENT,	"Not Present", 11 },
> +	{ FC_PORTSTATE_ONLINE,		"Online", 6 },
> +	{ FC_PORTSTATE_OFFLINE,		"Offline", 7 },
> +	{ FC_PORTSTATE_BLOCKED,		"Blocked", 7 },
> +	{ FC_PORTSTATE_BYPASSED,	"Bypassed", 8 },
> +	{ FC_PORTSTATE_DIAGNOSTICS,	"Diagnostics", 11 },
> +	{ FC_PORTSTATE_LINKDOWN,	"Linkdown", 8 },
> +	{ FC_PORTSTATE_ERROR,		"Error", 5 },
> +	{ FC_PORTSTATE_LOOPBACK,	"Loopback", 8 },
> +	{ FC_PORTSTATE_DELETED,		"Deleted", 7 },
> +	{ FC_PORTSTATE_MARGINAL,	"Marginal", 8 },
>   };
>   fc_enum_name_search(port_state, fc_port_state, fc_port_state_names)
> +fc_enum_name_match(port_state, fc_port_state, fc_port_state_names)
>   #define FC_PORTSTATE_MAX_NAMELEN	20
>   
>   
> @@ -2095,7 +2098,8 @@ fc_user_scan_tgt(struct Scsi_Host *shost, uint channel, uint id, u64 lun)
>   		if (rport->scsi_target_id == -1)
>   			continue;
>   
> -		if (rport->port_state != FC_PORTSTATE_ONLINE)
> +		if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
> +			(rport->port_state != FC_PORTSTATE_MARGINAL))
>   			continue;
>   
>   		if ((channel == rport->channel) &&
> @@ -2958,7 +2962,8 @@ fc_remote_port_delete(struct fc_rport  *rport)
>   
>   	spin_lock_irqsave(shost->host_lock, flags);
>   
> -	if (rport->port_state != FC_PORTSTATE_ONLINE) {
> +	if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
> +		(rport->port_state != FC_PORTSTATE_MARGINAL)) {
>   		spin_unlock_irqrestore(shost->host_lock, flags);
>   		return;
>   	}
> @@ -3100,7 +3105,8 @@ fc_timeout_deleted_rport(struct work_struct *work)
>   	 * target, validate it still is. If not, tear down the
>   	 * scsi_target on it.
>   	 */
> -	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
> +	if (((rport->port_state == FC_PORTSTATE_ONLINE) ||
> +		(rport->port_state == FC_PORTSTATE_MARGINAL)) &&
>   	    (rport->scsi_target_id != -1) &&
>   	    !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
>   		dev_printk(KERN_ERR, &rport->dev,
> @@ -3243,7 +3249,8 @@ fc_scsi_scan_rport(struct work_struct *work)
>   	struct fc_internal *i = to_fc_internal(shost->transportt);
>   	unsigned long flags;
>   
> -	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
> +	if (((rport->port_state == FC_PORTSTATE_ONLINE) ||
> +		(rport->port_state == FC_PORTSTATE_ONLINE)) &&
>   	    (rport->roles & FC_PORT_ROLE_FCP_TARGET) &&
>   	    !(i->f->disable_target_scan)) {
>   		scsi_scan_target(&rport->dev, rport->channel,
> @@ -3747,7 +3754,8 @@ static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport)
>   	    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
>   		return BLK_STS_RESOURCE;
>   
> -	if (rport->port_state != FC_PORTSTATE_ONLINE)
> +	if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
> +		(rport->port_state != FC_PORTSTATE_MARGINAL))
>   		return BLK_STS_IOERR;
>   
>   	return BLK_STS_OK;
> diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
> index 1c7dd35cb7a0..7d010324c1e3 100644
> --- a/include/scsi/scsi_transport_fc.h
> +++ b/include/scsi/scsi_transport_fc.h
> @@ -14,6 +14,7 @@
>   #include <linux/bsg-lib.h>
>   #include <asm/unaligned.h>
>   #include <scsi/scsi.h>
> +#include <scsi/scsi_cmnd.h>
>   #include <scsi/scsi_netlink.h>
>   #include <scsi/scsi_host.h>
>   
> @@ -67,6 +68,7 @@ enum fc_port_state {
>   	FC_PORTSTATE_ERROR,
>   	FC_PORTSTATE_LOOPBACK,
>   	FC_PORTSTATE_DELETED,
> +	FC_PORTSTATE_MARGINAL,
>   };
>   
>   
> @@ -707,6 +709,23 @@ struct fc_function_template {
>   	unsigned long	disable_target_scan:1;
>   };
>   
> +/**
> + * fc_rport_chkmarginal_set_noretries - Set the SCMD_NORETRIES_ABORT bit
> + * in cmd->state if port state is marginal prior to initiating
> + * io to the port.
> + * @rport:	remote port to be checked
> + * @scmd:	scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on Marginal state
> + **/
> +static inline void
> +fc_rport_chkmarginal_set_noretries(struct fc_rport *rport, struct scsi_cmnd *cmd)
> +{
> +	if ((rport->port_state == FC_PORTSTATE_MARGINAL) &&
> +		 (cmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT))
> +		set_bit(SCMD_NORETRIES_ABORT, &cmd->state);
> +	else
> +		clear_bit(SCMD_NORETRIES_ABORT, &cmd->state);
> +
> +}
>   
>   /**
>    * fc_remote_port_chkready - called to validate the remote port state
> @@ -715,20 +734,25 @@ struct fc_function_template {
>    * Returns a scsi result code that can be returned by the LLDD.
>    *
>    * @rport:	remote port to be checked
> + * @cmd:	scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on Marginal state
>    **/
>   static inline int
> -fc_remote_port_chkready(struct fc_rport *rport)
> +fc_remote_port_chkready(struct fc_rport *rport, struct scsi_cmnd *cmd)
>   {
>   	int result;
>   
>   	switch (rport->port_state) {
>   	case FC_PORTSTATE_ONLINE:
> +	case FC_PORTSTATE_MARGINAL:
>   		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
>   			result = 0;
>   		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
>   			result = DID_IMM_RETRY << 16;
>   		else
>   			result = DID_NO_CONNECT << 16;
> +
> +		if (cmd)
> +			fc_rport_chkmarginal_set_noretries(rport, cmd);

I was just wondering why don't you do drop all the SCMD_NORETRIES_ABORT 
code and then in this function when you check for the marginal state do:

result = DID_TRANSPORT_MARGINAL;

?

So you would do:

1. Userspace calls in scsi_transport_fc and sets the port state to marginal.
2. New commands would see the above check and complete the command woth 
DID_TRANSPORT_MARGINAL.
3. If a command is retried by the scsi layer we would end up hitting 
this function and see the check and end up faling the IO like in #2. It 
would be similar to what happens when the dev loss or fast io fail 
timers fire.
Muneendra Kumar Oct. 19, 2020, 10:47 a.m. UTC | #2
Hi Mike,
Thanks for the review.

>> +fc_remote_port_chkready(struct fc_rport *rport, struct scsi_cmnd
>> +*cmd)
> >  {
>   >	int result;
>  >
>   >	switch (rport->port_state) {
>   >	case FC_PORTSTATE_ONLINE:
> >+	case FC_PORTSTATE_MARGINAL:
>   >		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
>   >			result = 0;
>   >		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
>>   			result = DID_IMM_RETRY << 16;
>>   		else
>>   			result = DID_NO_CONNECT << 16;
> >+
> >+		if (cmd)
> >+			fc_rport_chkmarginal_set_noretries(rport, cmd);

>I was just wondering why don't you do drop all the SCMD_NORETRIES_ABORT
>code and then in this function when you check for the marginal state do:

>result = DID_TRANSPORT_MARGINAL;

?
[Muneendra] Correct me if my understanding is correct?
You mean to say ,if the SCMD_NORETRIES_ABORT is set end up failing the IO
with DID_TRANSPORT_MARGINAL instead of retry?
I assume that your below point 3 talks the same.
Let me know if this is correct.

>So you would do:

>1. Userspace calls in scsi_transport_fc and sets the port state to
>marginal.
2. New commands would see the above check and complete the command with
DID_TRANSPORT_MARGINAL.
[Muneendra]Correct me if my understanding is right.
You mean to say if the port_state is set to marginal return all the new
commands with DID_TRANSPORT_MARGINAL  (without checking
SCMD_NORETRIES_ABORT)?

If yes then below are my concerns
In marginal state  what we want is the ios to be attempted at least once .

In marginal state we cannot drop all the commands as one of the concern we
have is if a target is non-mpio based targets then we will be dropping it
without any attempt.
With this we will be even dropping the TUR commands also which unnecessarily
lead to error handling.


3. If a command is retried by the scsi layer we would end up hitting this
function and see the check and end up faling the IO like in #2. It would be
similar to what happens when the dev loss or fast io fail timers fire.


[Muneendra]
Mike Christie Oct. 19, 2020, 4:10 p.m. UTC | #3
> On Oct 19, 2020, at 5:47 AM, Muneendra Kumar M <muneendra.kumar@broadcom.com> wrote:
> 
> Hi Mike,
> Thanks for the review.
> 
>>> +fc_remote_port_chkready(struct fc_rport *rport, struct scsi_cmnd
>>> +*cmd)
>>> {
>>> 	int result;
>>> 
>>> 	switch (rport->port_state) {
>>> 	case FC_PORTSTATE_ONLINE:
>>> +	case FC_PORTSTATE_MARGINAL:
>>> 		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
>>> 			result = 0;
>>> 		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
>>>  			result = DID_IMM_RETRY << 16;
>>>  		else
>>>  			result = DID_NO_CONNECT << 16;
>>> +
>>> +		if (cmd)
>>> +			fc_rport_chkmarginal_set_noretries(rport, cmd);
> 
>> I was just wondering why don't you do drop all the SCMD_NORETRIES_ABORT
>> code and then in this function when you check for the marginal state do:
> 
>> result = DID_TRANSPORT_MARGINAL;
> 
> ?
> [Muneendra] Correct me if my understanding is correct?
> You mean to say ,if the SCMD_NORETRIES_ABORT is set end up failing the IO
> with DID_TRANSPORT_MARGINAL instead of retry?

Sort of. I was saying to just drop the SCMD_NORETRIES_ABORT related code
and when you detect the port state here fail the IO.

> I assume that your below point 3 talks the same.
> Let me know if this is correct.
> 
>> So you would do:
> 
>> 1. Userspace calls in scsi_transport_fc and sets the port state to
>> marginal.
> 2. New commands would see the above check and complete the command with
> DID_TRANSPORT_MARGINAL.
> [Muneendra]Correct me if my understanding is right.
> You mean to say if the port_state is set to marginal return all the new
> commands with DID_TRANSPORT_MARGINAL  (without checking
> SCMD_NORETRIES_ABORT)?

Yes.

> 
> If yes then below are my concerns
> In marginal state  what we want is the ios to be attempted at least once .
> 
> In marginal state we cannot drop all the commands as one of the concern we
> have is if a target is non-mpio based targets then we will be dropping it
> without any attempt.
> With this we will be even dropping the TUR commands also which unnecessarily
> lead to error handling.
> 

I’m a little confused. In the 0/17 email you wrote:

-----

This feature is intended to be used when the device is part of a multipath
environment. When the service detects the poor connectivity, the multipath
path can be placed in a marginal path group and ignored further io
operations.

After placing a path in the marginal path group,the daemon sets the
port_state to Marginal which sets bit in scmd->state for all the

——

So it sounded like this is would only be used for mpio enabled paths.

Is this the daemon you mention in the 0/17 email:

https://github.com/brocade/bsn-fc-txptd

?

I might be completely misunderstanding you though. If the above link
is for your daemon then going off the 0/17 email and the GitHub README it
sounds like you are doing:

1. bsn-fc-txptd gets ELS, and moves affected paths to new marginal path group.
2. The non-marginal paths stay in the active path group and are continued to
be used like normal.
3. The paths in the marginal path group are not used until we get link up
or another ELS that indicates the paths are ok.

For these outstanding IOs on marginal paths, it sounds like you are flushing
the existing IO on them.mOnce they complete they will be in the marginal group
and not be used until #3.

So it’s not clear to me if you know the path is not optimal and might hit
a timeout, and you are not going to use it once the existing IO completes why
even try to send it? I mean in this setup, new commands that are entering
the dm-multipath layer will not be sent to these marginal paths right?

Also for the TUR comment, what TUR do you mean exactly? From the SCSI EH?
Mike Christie Oct. 19, 2020, 4:19 p.m. UTC | #4
> On Oct 19, 2020, at 11:10 AM, Michael Christie <michael.christie@oracle.com> wrote:

> 

> 

> So it’s not clear to me if you know the path is not optimal and might hit

> a timeout, and you are not going to use it once the existing IO completes why

> even try to send it? I mean in this setup, new commands that are entering

> the dm-multipath layer will not be sent to these marginal paths right?



Oh yeah, to be clear I meant why try to send it on the marginal path when you are
setting up the path groups so they are not used and only the optimal paths are used.
When the driver/scsi layer fails the IO then the multipath layer will make sure it
goes on a optimal path right so you do not have to worry about hitting a cmd timeout
and firing off the scsi eh.

However, one other question I had though, is are you setting up multipathd so the
marginal paths are used if the optimal ones were to fail (like the optimal paths hit a
link down, dev_loss_tmo or fast_io_fail fires, etc) or will they be treated
like failed paths?

So could you end up with 3 groups:

1. Active optimal paths
2. Marginal
3. failed

If the paths in 1 move to 3, then does multipathd handle it like a all paths down
or does multipathd switch to #2?
Hannes Reinecke Oct. 19, 2020, 5:16 p.m. UTC | #5
On 10/19/20 6:19 PM, Michael Christie wrote:
> 
> 
>> On Oct 19, 2020, at 11:10 AM, Michael Christie <michael.christie@oracle.com> wrote:
>>
>>
>> So it’s not clear to me if you know the path is not optimal and might hit
>> a timeout, and you are not going to use it once the existing IO completes why
>> even try to send it? I mean in this setup, new commands that are entering
>> the dm-multipath layer will not be sent to these marginal paths right?
> 
> 
> Oh yeah, to be clear I meant why try to send it on the marginal path when you are
> setting up the path groups so they are not used and only the optimal paths are used.
> When the driver/scsi layer fails the IO then the multipath layer will make sure it
> goes on a optimal path right so you do not have to worry about hitting a cmd timeout
> and firing off the scsi eh.
> 
> However, one other question I had though, is are you setting up multipathd so the
> marginal paths are used if the optimal ones were to fail (like the optimal paths hit a
> link down, dev_loss_tmo or fast_io_fail fires, etc) or will they be treated
> like failed paths?
> 
> So could you end up with 3 groups:
> 
> 1. Active optimal paths
> 2. Marginal
> 3. failed
> 
> If the paths in 1 move to 3, then does multipathd handle it like a all paths down
> or does multipathd switch to #2?
> 
Actually, marginal path work similar to the ALUA non-optimized state.
Yes, the system can sent I/O to it, but it'd be preferable for the I/O 
to be moved somewhere else.
If there is no other path (or no better path), yeah, tough.

Hence the answer would be 2)

Cheers,

Hannes
Muneendra Kumar Oct. 19, 2020, 5:31 p.m. UTC | #6
Hi Michael,


>
>
> Oh yeah, to be clear I meant why try to send it on the marginal path
> when you are setting up the path groups so they are not used and only the
> optimal paths are used.
> When the driver/scsi layer fails the IO then the multipath layer will
> make sure it goes on a optimal path right so you do not have to worry
> about hitting a cmd timeout and firing off the scsi eh.
>
> However, one other question I had though, is are you setting up
> multipathd so the marginal paths are used if the optimal ones were to
> fail (like the optimal paths hit a link down, dev_loss_tmo or
> fast_io_fail fires, etc) or will they be treated like failed paths?
>
> So could you end up with 3 groups:
>
> 1. Active optimal paths
> 2. Marginal
> 3. failed
>
> If the paths in 1 move to 3, then does multipathd handle it like a all
> paths down or does multipathd switch to #2?
>
>Actually, marginal path work similar to the ALUA non-optimized state.
>Yes, the system can sent I/O to it, but it'd be preferable for the I/O to
>be moved somewhere else.
>If there is no other path (or no better path), yeah, tough.

>Hence the answer would be 2)


[Muneendra]As Hannes mentioned if there are no active paths, the marginal
paths will be moved to normal and the system will send the io.

Regards,
Muneendra.
Muneendra Kumar Oct. 19, 2020, 6:03 p.m. UTC | #7
Hi Michael,
Regarding the TUR (Test Unit Ready)command which I was mentioning .
Multipath daemon issues TUR commands on a regular intervals to check the
path status.
When a port_state is set to marginal we are not suppose to end up failing
the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.
This may  leads to give wrong health status.
Hannes/James Correct me if this is wrong.

Regards,
Muneendra.

-----Original Message-----
From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]

Sent: Monday, October 19, 2020 11:01 PM
To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'
<michael.christie@oracle.com>
Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;
'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'
<emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>
Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport state
FC_PORTSTATE_MARGINAL

Hi Michael,


>

>

> Oh yeah, to be clear I meant why try to send it on the marginal path

> when you are setting up the path groups so they are not used and only the

> optimal paths are used.

> When the driver/scsi layer fails the IO then the multipath layer will

> make sure it goes on a optimal path right so you do not have to worry

> about hitting a cmd timeout and firing off the scsi eh.

>

> However, one other question I had though, is are you setting up

> multipathd so the marginal paths are used if the optimal ones were to

> fail (like the optimal paths hit a link down, dev_loss_tmo or

> fast_io_fail fires, etc) or will they be treated like failed paths?

>

> So could you end up with 3 groups:

>

> 1. Active optimal paths

> 2. Marginal

> 3. failed

>

> If the paths in 1 move to 3, then does multipathd handle it like a all

> paths down or does multipathd switch to #2?

>

>Actually, marginal path work similar to the ALUA non-optimized state.

>Yes, the system can sent I/O to it, but it'd be preferable for the I/O to

>be moved somewhere else.

>If there is no other path (or no better path), yeah, tough.


>Hence the answer would be 2)



[Muneendra]As Hannes mentioned if there are no active paths, the marginal
paths will be moved to normal and the system will send the io.

Regards,
Muneendra.
Mike Christie Oct. 19, 2020, 6:44 p.m. UTC | #8
On 10/19/20 1:03 PM, Muneendra Kumar M wrote:
> Hi Michael,
> Regarding the TUR (Test Unit Ready)command which I was mentioning .
> Multipath daemon issues TUR commands on a regular intervals to check the
> path status.
> When a port_state is set to marginal we are not suppose to end up failing
> the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.
> This may  leads to give wrong health status.


If your daemon works such that you only move paths from marginal to 
active if you get an ELS indicating the path is ok or you get a link up, 
then why have multipathd send path tester IO to the paths in the 
marginal path group? They do not do anything do they?



> Hannes/James Correct me if this is wrong.
> 
> Regards,
> Muneendra.
> 
> -----Original Message-----
> From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]
> Sent: Monday, October 19, 2020 11:01 PM
> To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'
> <michael.christie@oracle.com>
> Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;
> 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'
> <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>
> Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport state
> FC_PORTSTATE_MARGINAL
> 
> Hi Michael,
> 
> 
>>
>>
>> Oh yeah, to be clear I meant why try to send it on the marginal path
>> when you are setting up the path groups so they are not used and only the
>> optimal paths are used.
>> When the driver/scsi layer fails the IO then the multipath layer will
>> make sure it goes on a optimal path right so you do not have to worry
>> about hitting a cmd timeout and firing off the scsi eh.
>>
>> However, one other question I had though, is are you setting up
>> multipathd so the marginal paths are used if the optimal ones were to
>> fail (like the optimal paths hit a link down, dev_loss_tmo or
>> fast_io_fail fires, etc) or will they be treated like failed paths?
>>
>> So could you end up with 3 groups:
>>
>> 1. Active optimal paths
>> 2. Marginal
>> 3. failed
>>
>> If the paths in 1 move to 3, then does multipathd handle it like a all
>> paths down or does multipathd switch to #2?
>>
>> Actually, marginal path work similar to the ALUA non-optimized state.
>> Yes, the system can sent I/O to it, but it'd be preferable for the I/O to
>> be moved somewhere else.
>> If there is no other path (or no better path), yeah, tough.
> 
>> Hence the answer would be 2)
> 
> 
> [Muneendra]As Hannes mentioned if there are no active paths, the marginal
> paths will be moved to normal and the system will send the io.
> 
> Regards,
> Muneendra.
>
Mike Christie Oct. 19, 2020, 6:55 p.m. UTC | #9
On 10/19/20 12:31 PM, Muneendra Kumar M wrote:
> Hi Michael,
> 
> 
>>
>>
>> Oh yeah, to be clear I meant why try to send it on the marginal path
>> when you are setting up the path groups so they are not used and only the
>> optimal paths are used.
>> When the driver/scsi layer fails the IO then the multipath layer will
>> make sure it goes on a optimal path right so you do not have to worry
>> about hitting a cmd timeout and firing off the scsi eh.
>>
>> However, one other question I had though, is are you setting up
>> multipathd so the marginal paths are used if the optimal ones were to
>> fail (like the optimal paths hit a link down, dev_loss_tmo or
>> fast_io_fail fires, etc) or will they be treated like failed paths?
>>
>> So could you end up with 3 groups:
>>
>> 1. Active optimal paths
>> 2. Marginal
>> 3. failed
>>
>> If the paths in 1 move to 3, then does multipathd handle it like a all
>> paths down or does multipathd switch to #2?
>>
>> Actually, marginal path work similar to the ALUA non-optimized state.
>> Yes, the system can sent I/O to it, but it'd be preferable for the I/O to
>> be moved somewhere else.
>> If there is no other path (or no better path), yeah, tough.
> 
>> Hence the answer would be 2)
> 
> 
> [Muneendra]As Hannes mentioned if there are no active paths, the marginal
> paths will be moved to normal and the system will send the io.
What do you mean by normal?

- You don't mean the the fc remote port state will change to online right?

- Do you just mean the the marginal path group will become the active 
group in the dm-multipath layer?
Muneendra Kumar Oct. 19, 2020, 6:55 p.m. UTC | #10
Hi Micheal,
AFIK As long as the paths are available irrespective of  the path is moved
to marginal path group or not multipathd  will keep sending the send path
tester IO (TUR) to check the health status.

Benjamin,
Please let me know if iam wrong.

Regards,
Muneendra.


-----Original Message-----
From: Mike Christie [mailto:michael.christie@oracle.com]

Sent: Tuesday, October 20, 2020 12:15 AM
To: Muneendra Kumar M <muneendra.kumar@broadcom.com>; Hannes Reinecke
<hare@suse.de>
Cc: linux-scsi@vger.kernel.org; jsmart2021@gmail.com; emilne@redhat.com;
mkumar@redhat.com
Subject: Re: [PATCH v3 05/17] scsi_transport_fc: Added a new rport state
FC_PORTSTATE_MARGINAL

On 10/19/20 1:03 PM, Muneendra Kumar M wrote:
> Hi Michael,

> Regarding the TUR (Test Unit Ready)command which I was mentioning .

> Multipath daemon issues TUR commands on a regular intervals to check

> the path status.

> When a port_state is set to marginal we are not suppose to end up

> failing the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.

> This may  leads to give wrong health status.



If your daemon works such that you only move paths from marginal to active
if you get an ELS indicating the path is ok or you get a link up, then why
have multipathd send path tester IO to the paths in the marginal path group?
They do not do anything do they?



> Hannes/James Correct me if this is wrong.

>

> Regards,

> Muneendra.

>

> -----Original Message-----

> From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]

> Sent: Monday, October 19, 2020 11:01 PM

> To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'

> <michael.christie@oracle.com>

> Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;

> 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'

> <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>

> Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport

> state FC_PORTSTATE_MARGINAL

>

> Hi Michael,

>

>

>>

>>

>> Oh yeah, to be clear I meant why try to send it on the marginal path

>> when you are setting up the path groups so they are not used and only

>> the optimal paths are used.

>> When the driver/scsi layer fails the IO then the multipath layer will

>> make sure it goes on a optimal path right so you do not have to worry

>> about hitting a cmd timeout and firing off the scsi eh.

>>

>> However, one other question I had though, is are you setting up

>> multipathd so the marginal paths are used if the optimal ones were to

>> fail (like the optimal paths hit a link down, dev_loss_tmo or

>> fast_io_fail fires, etc) or will they be treated like failed paths?

>>

>> So could you end up with 3 groups:

>>

>> 1. Active optimal paths

>> 2. Marginal

>> 3. failed

>>

>> If the paths in 1 move to 3, then does multipathd handle it like a

>> all paths down or does multipathd switch to #2?

>>

>> Actually, marginal path work similar to the ALUA non-optimized state.

>> Yes, the system can sent I/O to it, but it'd be preferable for the

>> I/O to be moved somewhere else.

>> If there is no other path (or no better path), yeah, tough.

>

>> Hence the answer would be 2)

>

>

> [Muneendra]As Hannes mentioned if there are no active paths, the

> marginal paths will be moved to normal and the system will send the io.

>

> Regards,

> Muneendra.

>
Muneendra Kumar Oct. 19, 2020, 7:03 p.m. UTC | #11
HI Michael,

> [Muneendra]As Hannes mentioned if there are no active paths, the

> marginal paths will be moved to normal and the system will send the io.

>What do you mean by normal?


>- You don't mean the the fc remote port state will change to online right?

Fc remote port state will not change to online.

- Do you just mean the the marginal path group will become the active group
in the dm-multipath layer?

Yes

Regards,
Muneendra.
Hannes Reinecke Oct. 20, 2020, 6:03 a.m. UTC | #12
On 10/19/20 8:55 PM, Mike Christie wrote:
> On 10/19/20 12:31 PM, Muneendra Kumar M wrote:
>> Hi Michael,
>>
>>
>>>
>>>
>>> Oh yeah, to be clear I meant why try to send it on the marginal path
>>> when you are setting up the path groups so they are not used and only 
>>> the
>>> optimal paths are used.
>>> When the driver/scsi layer fails the IO then the multipath layer will
>>> make sure it goes on a optimal path right so you do not have to worry
>>> about hitting a cmd timeout and firing off the scsi eh.
>>>
>>> However, one other question I had though, is are you setting up
>>> multipathd so the marginal paths are used if the optimal ones were to
>>> fail (like the optimal paths hit a link down, dev_loss_tmo or
>>> fast_io_fail fires, etc) or will they be treated like failed paths?
>>>
>>> So could you end up with 3 groups:
>>>
>>> 1. Active optimal paths
>>> 2. Marginal
>>> 3. failed
>>>
>>> If the paths in 1 move to 3, then does multipathd handle it like a all
>>> paths down or does multipathd switch to #2?
>>>
>>> Actually, marginal path work similar to the ALUA non-optimized state.
>>> Yes, the system can sent I/O to it, but it'd be preferable for the 
>>> I/O to
>>> be moved somewhere else.
>>> If there is no other path (or no better path), yeah, tough.
>>
>>> Hence the answer would be 2)
>>
>>
>> [Muneendra]As Hannes mentioned if there are no active paths, the marginal
>> paths will be moved to normal and the system will send the io.
> What do you mean by normal?
> 
> - You don't mean the the fc remote port state will change to online right?
> 
> - Do you just mean the the marginal path group will become the active 
> group in the dm-multipath layer?

Actually, the latter is what I had in mind.

The paths should stay in 'marginal' until some admin interaction did 
take place.
That would be either a link reset (ie the fabric has been rescanned due 
to an RSCN event), or an admin resetting the state to 'normal' manually.
The daemons should never be moving the port out of 'marginal'.

So it really just influences the path grouping in multipathd, and 
multipath should switch to the marginal path group if all running paths 
are gone.

Cheers,

Hannes
Mike Christie Oct. 20, 2020, 4:48 p.m. UTC | #13
On 10/19/20 1:55 PM, Muneendra Kumar M wrote:
> Hi Micheal,
> AFIK As long as the paths are available irrespective of  the path is moved
> to marginal path group or not multipathd  will keep sending the send path
> tester IO (TUR) to check the health status.
> 

You can change the multipathd code.


> Benjamin,
> Please let me know if iam wrong >
> Regards,
> Muneendra.
> 
> 
> -----Original Message-----
> From: Mike Christie [mailto:michael.christie@oracle.com]
> Sent: Tuesday, October 20, 2020 12:15 AM
> To: Muneendra Kumar M <muneendra.kumar@broadcom.com>; Hannes Reinecke
> <hare@suse.de>
> Cc: linux-scsi@vger.kernel.org; jsmart2021@gmail.com; emilne@redhat.com;
> mkumar@redhat.com
> Subject: Re: [PATCH v3 05/17] scsi_transport_fc: Added a new rport state
> FC_PORTSTATE_MARGINAL
> 
> On 10/19/20 1:03 PM, Muneendra Kumar M wrote:
>> Hi Michael,
>> Regarding the TUR (Test Unit Ready)command which I was mentioning .
>> Multipath daemon issues TUR commands on a regular intervals to check
>> the path status.
>> When a port_state is set to marginal we are not suppose to end up
>> failing the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.
>> This may  leads to give wrong health status.
> 
> 
> If your daemon works such that you only move paths from marginal to active
> if you get an ELS indicating the path is ok or you get a link up, then why
> have multipathd send path tester IO to the paths in the marginal path group?
> They do not do anything do they?
> 
> 
> 
>> Hannes/James Correct me if this is wrong.
>>
>> Regards,
>> Muneendra.
>>
>> -----Original Message-----
>> From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]
>> Sent: Monday, October 19, 2020 11:01 PM
>> To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'
>> <michael.christie@oracle.com>
>> Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;
>> 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'
>> <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>
>> Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport
>> state FC_PORTSTATE_MARGINAL
>>
>> Hi Michael,
>>
>>
>>>
>>>
>>> Oh yeah, to be clear I meant why try to send it on the marginal path
>>> when you are setting up the path groups so they are not used and only
>>> the optimal paths are used.
>>> When the driver/scsi layer fails the IO then the multipath layer will
>>> make sure it goes on a optimal path right so you do not have to worry
>>> about hitting a cmd timeout and firing off the scsi eh.
>>>
>>> However, one other question I had though, is are you setting up
>>> multipathd so the marginal paths are used if the optimal ones were to
>>> fail (like the optimal paths hit a link down, dev_loss_tmo or
>>> fast_io_fail fires, etc) or will they be treated like failed paths?
>>>
>>> So could you end up with 3 groups:
>>>
>>> 1. Active optimal paths
>>> 2. Marginal
>>> 3. failed
>>>
>>> If the paths in 1 move to 3, then does multipathd handle it like a
>>> all paths down or does multipathd switch to #2?
>>>
>>> Actually, marginal path work similar to the ALUA non-optimized state.
>>> Yes, the system can sent I/O to it, but it'd be preferable for the
>>> I/O to be moved somewhere else.
>>> If there is no other path (or no better path), yeah, tough.
>>
>>> Hence the answer would be 2)
>>
>>
>> [Muneendra]As Hannes mentioned if there are no active paths, the
>> marginal paths will be moved to normal and the system will send the io.
>>
>> Regards,
>> Muneendra.
>>
Muneendra Kumar Oct. 20, 2020, 5:19 p.m. UTC | #14
HI Michael,

> AFIK As long as the paths are available irrespective of  the path is
> moved to marginal path group or not multipathd  will keep sending the
> send path tester IO (TUR) to check the health status.
>

>You can change the multipathd code.
You mean to say don't send the TUR commands for the devices under marginal
path groups ?

At present the multipathd checks the device state. If the device state is
"running" then the check_path
Will issue a TUR commands at regular intervals to check the path health
status.

Regards,
Muneendra.



> -----Original Message-----
> From: Mike Christie [mailto:michael.christie@oracle.com]
> Sent: Tuesday, October 20, 2020 12:15 AM
> To: Muneendra Kumar M <muneendra.kumar@broadcom.com>; Hannes Reinecke
> <hare@suse.de>
> Cc: linux-scsi@vger.kernel.org; jsmart2021@gmail.com;
> emilne@redhat.com; mkumar@redhat.com
> Subject: Re: [PATCH v3 05/17] scsi_transport_fc: Added a new rport
> state FC_PORTSTATE_MARGINAL
>
> On 10/19/20 1:03 PM, Muneendra Kumar M wrote:
>> Hi Michael,
>> Regarding the TUR (Test Unit Ready)command which I was mentioning .
>> Multipath daemon issues TUR commands on a regular intervals to check
>> the path status.
>> When a port_state is set to marginal we are not suppose to end up
>> failing the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.
>> This may  leads to give wrong health status.
>
>
> If your daemon works such that you only move paths from marginal to
> active if you get an ELS indicating the path is ok or you get a link
> up, then why have multipathd send path tester IO to the paths in the
> marginal path group?
> They do not do anything do they?
>
>
>
>> Hannes/James Correct me if this is wrong.
>>
>> Regards,
>> Muneendra.
>>
>> -----Original Message-----
>> From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]
>> Sent: Monday, October 19, 2020 11:01 PM
>> To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'
>> <michael.christie@oracle.com>
>> Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;
>> 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'
>> <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>
>> Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport
>> state FC_PORTSTATE_MARGINAL
>>
>> Hi Michael,
>>
>>
>>>
>>>
>>> Oh yeah, to be clear I meant why try to send it on the marginal path
>>> when you are setting up the path groups so they are not used and
>>> only the optimal paths are used.
>>> When the driver/scsi layer fails the IO then the multipath layer
>>> will make sure it goes on a optimal path right so you do not have to
>>> worry about hitting a cmd timeout and firing off the scsi eh.
>>>
>>> However, one other question I had though, is are you setting up
>>> multipathd so the marginal paths are used if the optimal ones were
>>> to fail (like the optimal paths hit a link down, dev_loss_tmo or
>>> fast_io_fail fires, etc) or will they be treated like failed paths?
>>>
>>> So could you end up with 3 groups:
>>>
>>> 1. Active optimal paths
>>> 2. Marginal
>>> 3. failed
>>>
>>> If the paths in 1 move to 3, then does multipathd handle it like a
>>> all paths down or does multipathd switch to #2?
>>>
>>> Actually, marginal path work similar to the ALUA non-optimized state.
>>> Yes, the system can sent I/O to it, but it'd be preferable for the
>>> I/O to be moved somewhere else.
>>> If there is no other path (or no better path), yeah, tough.
>>
>>> Hence the answer would be 2)
>>
>>
>> [Muneendra]As Hannes mentioned if there are no active paths, the
>> marginal paths will be moved to normal and the system will send the io.
>>
>> Regards,
>> Muneendra.
>>
Benjamin Marzinski Oct. 20, 2020, 6:14 p.m. UTC | #15
On Tue, Oct 20, 2020 at 12:25:42AM +0530, Muneendra Kumar M wrote:
> Hi Micheal,

> AFIK As long as the paths are available irrespective of  the path is moved

> to marginal path group or not multipathd  will keep sending the send path

> tester IO (TUR) to check the health status.

> 

> Benjamin,

> Please let me know if iam wrong.


You are correct. If a path is part of a multipath device, multipathd
will run periodic checks on it.

-Ben

> 

> Regards,

> Muneendra.

> 

> 

> -----Original Message-----

> From: Mike Christie [mailto:michael.christie@oracle.com]

> Sent: Tuesday, October 20, 2020 12:15 AM

> To: Muneendra Kumar M <muneendra.kumar@broadcom.com>; Hannes Reinecke

> <hare@suse.de>

> Cc: linux-scsi@vger.kernel.org; jsmart2021@gmail.com; emilne@redhat.com;

> mkumar@redhat.com

> Subject: Re: [PATCH v3 05/17] scsi_transport_fc: Added a new rport state

> FC_PORTSTATE_MARGINAL

> 

> On 10/19/20 1:03 PM, Muneendra Kumar M wrote:

> > Hi Michael,

> > Regarding the TUR (Test Unit Ready)command which I was mentioning .

> > Multipath daemon issues TUR commands on a regular intervals to check

> > the path status.

> > When a port_state is set to marginal we are not suppose to end up

> > failing the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.

> > This may  leads to give wrong health status.

> 

> 

> If your daemon works such that you only move paths from marginal to active

> if you get an ELS indicating the path is ok or you get a link up, then why

> have multipathd send path tester IO to the paths in the marginal path group?

> They do not do anything do they?

> 

> 

> 

> > Hannes/James Correct me if this is wrong.

> >

> > Regards,

> > Muneendra.

> >

> > -----Original Message-----

> > From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]

> > Sent: Monday, October 19, 2020 11:01 PM

> > To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'

> > <michael.christie@oracle.com>

> > Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;

> > 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'

> > <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>

> > Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport

> > state FC_PORTSTATE_MARGINAL

> >

> > Hi Michael,

> >

> >

> >>

> >>

> >> Oh yeah, to be clear I meant why try to send it on the marginal path

> >> when you are setting up the path groups so they are not used and only

> >> the optimal paths are used.

> >> When the driver/scsi layer fails the IO then the multipath layer will

> >> make sure it goes on a optimal path right so you do not have to worry

> >> about hitting a cmd timeout and firing off the scsi eh.

> >>

> >> However, one other question I had though, is are you setting up

> >> multipathd so the marginal paths are used if the optimal ones were to

> >> fail (like the optimal paths hit a link down, dev_loss_tmo or

> >> fast_io_fail fires, etc) or will they be treated like failed paths?

> >>

> >> So could you end up with 3 groups:

> >>

> >> 1. Active optimal paths

> >> 2. Marginal

> >> 3. failed

> >>

> >> If the paths in 1 move to 3, then does multipathd handle it like a

> >> all paths down or does multipathd switch to #2?

> >>

> >> Actually, marginal path work similar to the ALUA non-optimized state.

> >> Yes, the system can sent I/O to it, but it'd be preferable for the

> >> I/O to be moved somewhere else.

> >> If there is no other path (or no better path), yeah, tough.

> >

> >> Hence the answer would be 2)

> >

> >

> > [Muneendra]As Hannes mentioned if there are no active paths, the

> > marginal paths will be moved to normal and the system will send the io.

> >

> > Regards,

> > Muneendra.

> >
Benjamin Marzinski Oct. 20, 2020, 6:24 p.m. UTC | #16
On Tue, Oct 20, 2020 at 12:33:39AM +0530, Muneendra Kumar M wrote:
> HI Michael,
> 
> > [Muneendra]As Hannes mentioned if there are no active paths, the
> > marginal paths will be moved to normal and the system will send the io.
> >What do you mean by normal?
> 
> >- You don't mean the the fc remote port state will change to online right?
> Fc remote port state will not change to online.
> 
> - Do you just mean the the marginal path group will become the active group
> in the dm-multipath layer?
> 
> Yes
> 

Correct. The path group is still marginal. Being marginal simply means
that the path group has a lower priority than all the non-marginal path
groups.  However, if there are no usable paths in any non-marginal path
group, then a marginal path group will become the active path group (the
path group that the kernel will send IO to).

-Ben

> Regards,
> Muneendra.
Benjamin Marzinski Oct. 20, 2020, 6:44 p.m. UTC | #17
On Tue, Oct 20, 2020 at 10:49:56PM +0530, Muneendra Kumar M wrote:
> HI Michael,
> 
> > AFIK As long as the paths are available irrespective of  the path is
> > moved to marginal path group or not multipathd  will keep sending the
> > send path tester IO (TUR) to check the health status.
> >
> 
> >You can change the multipathd code.
> You mean to say don't send the TUR commands for the devices under marginal
> path groups ?

Multipath does need to keep checking the marginal paths. For one thing,
just because a path is marginal, doesn't mean it belongs in the same
path group as every other marginal path. If you have an active/passive
array, then it's quite possible that you will have multiple marginal
path groups. A path's priority is updated when it is checked, and many
devices use this to determine the correct path groups. Also the checker
is what determines if a path is in a ghost (passive) state, which tells
multipath which path group to try next. For another thing, if all the
non-marginal paths fail, then the marginal path group will also be the
active path group, and we definitely want to be checking the paths on
the active path group.

-Ben

> 
> At present the multipathd checks the device state. If the device state is
> "running" then the check_path
> Will issue a TUR commands at regular intervals to check the path health
> status.
> 
> Regards,
> Muneendra.
> 
> 
> 
> > -----Original Message-----
> > From: Mike Christie [mailto:michael.christie@oracle.com]
> > Sent: Tuesday, October 20, 2020 12:15 AM
> > To: Muneendra Kumar M <muneendra.kumar@broadcom.com>; Hannes Reinecke
> > <hare@suse.de>
> > Cc: linux-scsi@vger.kernel.org; jsmart2021@gmail.com;
> > emilne@redhat.com; mkumar@redhat.com
> > Subject: Re: [PATCH v3 05/17] scsi_transport_fc: Added a new rport
> > state FC_PORTSTATE_MARGINAL
> >
> > On 10/19/20 1:03 PM, Muneendra Kumar M wrote:
> >> Hi Michael,
> >> Regarding the TUR (Test Unit Ready)command which I was mentioning .
> >> Multipath daemon issues TUR commands on a regular intervals to check
> >> the path status.
> >> When a port_state is set to marginal we are not suppose to end up
> >> failing the cmd  with DID_TRANSPORT_MARGINAL with out proceeding it.
> >> This may  leads to give wrong health status.
> >
> >
> > If your daemon works such that you only move paths from marginal to
> > active if you get an ELS indicating the path is ok or you get a link
> > up, then why have multipathd send path tester IO to the paths in the
> > marginal path group?
> > They do not do anything do they?
> >
> >
> >
> >> Hannes/James Correct me if this is wrong.
> >>
> >> Regards,
> >> Muneendra.
> >>
> >> -----Original Message-----
> >> From: Muneendra Kumar M [mailto:muneendra.kumar@broadcom.com]
> >> Sent: Monday, October 19, 2020 11:01 PM
> >> To: 'Hannes Reinecke' <hare@suse.de>; 'Michael Christie'
> >> <michael.christie@oracle.com>
> >> Cc: 'linux-scsi@vger.kernel.org' <linux-scsi@vger.kernel.org>;
> >> 'jsmart2021@gmail.com' <jsmart2021@gmail.com>; 'emilne@redhat.com'
> >> <emilne@redhat.com>; 'mkumar@redhat.com' <mkumar@redhat.com>
> >> Subject: RE: [PATCH v3 05/17] scsi_transport_fc: Added a new rport
> >> state FC_PORTSTATE_MARGINAL
> >>
> >> Hi Michael,
> >>
> >>
> >>>
> >>>
> >>> Oh yeah, to be clear I meant why try to send it on the marginal path
> >>> when you are setting up the path groups so they are not used and
> >>> only the optimal paths are used.
> >>> When the driver/scsi layer fails the IO then the multipath layer
> >>> will make sure it goes on a optimal path right so you do not have to
> >>> worry about hitting a cmd timeout and firing off the scsi eh.
> >>>
> >>> However, one other question I had though, is are you setting up
> >>> multipathd so the marginal paths are used if the optimal ones were
> >>> to fail (like the optimal paths hit a link down, dev_loss_tmo or
> >>> fast_io_fail fires, etc) or will they be treated like failed paths?
> >>>
> >>> So could you end up with 3 groups:
> >>>
> >>> 1. Active optimal paths
> >>> 2. Marginal
> >>> 3. failed
> >>>
> >>> If the paths in 1 move to 3, then does multipathd handle it like a
> >>> all paths down or does multipathd switch to #2?
> >>>
> >>> Actually, marginal path work similar to the ALUA non-optimized state.
> >>> Yes, the system can sent I/O to it, but it'd be preferable for the
> >>> I/O to be moved somewhere else.
> >>> If there is no other path (or no better path), yeah, tough.
> >>
> >>> Hence the answer would be 2)
> >>
> >>
> >> [Muneendra]As Hannes mentioned if there are no active paths, the
> >> marginal paths will be moved to normal and the system will send the io.
> >>
> >> Regards,
> >> Muneendra.
> >>
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 2ff7f06203da..df66a51d62e6 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -142,20 +142,23 @@  fc_enum_name_search(host_event_code, fc_host_event_code,
 static struct {
 	enum fc_port_state	value;
 	char			*name;
+	int			matchlen;
 } fc_port_state_names[] = {
-	{ FC_PORTSTATE_UNKNOWN,		"Unknown" },
-	{ FC_PORTSTATE_NOTPRESENT,	"Not Present" },
-	{ FC_PORTSTATE_ONLINE,		"Online" },
-	{ FC_PORTSTATE_OFFLINE,		"Offline" },
-	{ FC_PORTSTATE_BLOCKED,		"Blocked" },
-	{ FC_PORTSTATE_BYPASSED,	"Bypassed" },
-	{ FC_PORTSTATE_DIAGNOSTICS,	"Diagnostics" },
-	{ FC_PORTSTATE_LINKDOWN,	"Linkdown" },
-	{ FC_PORTSTATE_ERROR,		"Error" },
-	{ FC_PORTSTATE_LOOPBACK,	"Loopback" },
-	{ FC_PORTSTATE_DELETED,		"Deleted" },
+	{ FC_PORTSTATE_UNKNOWN,		"Unknown", 7},
+	{ FC_PORTSTATE_NOTPRESENT,	"Not Present", 11 },
+	{ FC_PORTSTATE_ONLINE,		"Online", 6 },
+	{ FC_PORTSTATE_OFFLINE,		"Offline", 7 },
+	{ FC_PORTSTATE_BLOCKED,		"Blocked", 7 },
+	{ FC_PORTSTATE_BYPASSED,	"Bypassed", 8 },
+	{ FC_PORTSTATE_DIAGNOSTICS,	"Diagnostics", 11 },
+	{ FC_PORTSTATE_LINKDOWN,	"Linkdown", 8 },
+	{ FC_PORTSTATE_ERROR,		"Error", 5 },
+	{ FC_PORTSTATE_LOOPBACK,	"Loopback", 8 },
+	{ FC_PORTSTATE_DELETED,		"Deleted", 7 },
+	{ FC_PORTSTATE_MARGINAL,	"Marginal", 8 },
 };
 fc_enum_name_search(port_state, fc_port_state, fc_port_state_names)
+fc_enum_name_match(port_state, fc_port_state, fc_port_state_names)
 #define FC_PORTSTATE_MAX_NAMELEN	20
 
 
@@ -2095,7 +2098,8 @@  fc_user_scan_tgt(struct Scsi_Host *shost, uint channel, uint id, u64 lun)
 		if (rport->scsi_target_id == -1)
 			continue;
 
-		if (rport->port_state != FC_PORTSTATE_ONLINE)
+		if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
+			(rport->port_state != FC_PORTSTATE_MARGINAL))
 			continue;
 
 		if ((channel == rport->channel) &&
@@ -2958,7 +2962,8 @@  fc_remote_port_delete(struct fc_rport  *rport)
 
 	spin_lock_irqsave(shost->host_lock, flags);
 
-	if (rport->port_state != FC_PORTSTATE_ONLINE) {
+	if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
+		(rport->port_state != FC_PORTSTATE_MARGINAL)) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		return;
 	}
@@ -3100,7 +3105,8 @@  fc_timeout_deleted_rport(struct work_struct *work)
 	 * target, validate it still is. If not, tear down the
 	 * scsi_target on it.
 	 */
-	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+	if (((rport->port_state == FC_PORTSTATE_ONLINE) ||
+		(rport->port_state == FC_PORTSTATE_MARGINAL)) &&
 	    (rport->scsi_target_id != -1) &&
 	    !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
 		dev_printk(KERN_ERR, &rport->dev,
@@ -3243,7 +3249,8 @@  fc_scsi_scan_rport(struct work_struct *work)
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 	unsigned long flags;
 
-	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+	if (((rport->port_state == FC_PORTSTATE_ONLINE) ||
+		(rport->port_state == FC_PORTSTATE_ONLINE)) &&
 	    (rport->roles & FC_PORT_ROLE_FCP_TARGET) &&
 	    !(i->f->disable_target_scan)) {
 		scsi_scan_target(&rport->dev, rport->channel,
@@ -3747,7 +3754,8 @@  static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport)
 	    !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT))
 		return BLK_STS_RESOURCE;
 
-	if (rport->port_state != FC_PORTSTATE_ONLINE)
+	if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
+		(rport->port_state != FC_PORTSTATE_MARGINAL))
 		return BLK_STS_IOERR;
 
 	return BLK_STS_OK;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 1c7dd35cb7a0..7d010324c1e3 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -14,6 +14,7 @@ 
 #include <linux/bsg-lib.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_netlink.h>
 #include <scsi/scsi_host.h>
 
@@ -67,6 +68,7 @@  enum fc_port_state {
 	FC_PORTSTATE_ERROR,
 	FC_PORTSTATE_LOOPBACK,
 	FC_PORTSTATE_DELETED,
+	FC_PORTSTATE_MARGINAL,
 };
 
 
@@ -707,6 +709,23 @@  struct fc_function_template {
 	unsigned long	disable_target_scan:1;
 };
 
+/**
+ * fc_rport_chkmarginal_set_noretries - Set the SCMD_NORETRIES_ABORT bit
+ * in cmd->state if port state is marginal prior to initiating
+ * io to the port.
+ * @rport:	remote port to be checked
+ * @scmd:	scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on Marginal state
+ **/
+static inline void
+fc_rport_chkmarginal_set_noretries(struct fc_rport *rport, struct scsi_cmnd *cmd)
+{
+	if ((rport->port_state == FC_PORTSTATE_MARGINAL) &&
+		 (cmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT))
+		set_bit(SCMD_NORETRIES_ABORT, &cmd->state);
+	else
+		clear_bit(SCMD_NORETRIES_ABORT, &cmd->state);
+
+}
 
 /**
  * fc_remote_port_chkready - called to validate the remote port state
@@ -715,20 +734,25 @@  struct fc_function_template {
  * Returns a scsi result code that can be returned by the LLDD.
  *
  * @rport:	remote port to be checked
+ * @cmd:	scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on Marginal state
  **/
 static inline int
-fc_remote_port_chkready(struct fc_rport *rport)
+fc_remote_port_chkready(struct fc_rport *rport, struct scsi_cmnd *cmd)
 {
 	int result;
 
 	switch (rport->port_state) {
 	case FC_PORTSTATE_ONLINE:
+	case FC_PORTSTATE_MARGINAL:
 		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
 			result = 0;
 		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
 			result = DID_IMM_RETRY << 16;
 		else
 			result = DID_NO_CONNECT << 16;
+
+		if (cmd)
+			fc_rport_chkmarginal_set_noretries(rport, cmd);
 		break;
 	case FC_PORTSTATE_BLOCKED:
 		if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)