Message ID | 1603370091-9337-5-git-send-email-muneendra.kumar@broadcom.com |
---|---|
State | Superseded |
Headers | show |
Series | scsi: Support to handle Intermittent errors | expand |
See below. I think you wanted to check for FC_PORTSTATE_MARGINAL in the code you added to fc_scsi_scan_rport(). Instead the code tests for FC_PORTSTATE_ONLINE twice. -Ewan On Thu, 2020-10-22 at 18:04 +0530, Muneendra wrote: > Added a new rport state FC_PORTSTATE_MARGINAL. > > Added a new inline function fc_rport_chkmarginal_set_noretries > which will set the SCMD_NORETRIES_ABORT bit in cmd->state if rport > state > is marginal. > > Made changes in fc_eh_timed_out to call > fc_rport_chkmarginal_set_noretries > Also made changes in fc_remote_port_delete,fc_user_scan_tgt, > fc_timeout_deleted_rport functions to handle the new rport state > FC_PORTSTATE_MARGINAL. > > Signed-off-by: Muneendra <muneendra.kumar@broadcom.com> > > --- > v4: > Made changes in fc_eh_timed_out to call > fc_rport_chkmarginal_set_noretries > so that SCMD_NORETRIES_ABORT bit in cmd->state is set if rport state > is marginal. > > Removed the newly added scsi_cmd argument to fc_remote_port_chkready > as the current patch handles only SCSI EH timeout/abort case. > > v3: > Rearranged the patch so that all the changes with respect to new > rport state is part of this patch. > Added a new argument to scsi_cmd to fc_remote_port_chkready > > v2: > New patch > --- > drivers/scsi/scsi_transport_fc.c | 41 +++++++++++++++++++----------- > -- > include/scsi/scsi_transport_fc.h | 19 +++++++++++++++ > 2 files changed, 44 insertions(+), 16 deletions(-) > > diff --git a/drivers/scsi/scsi_transport_fc.c > b/drivers/scsi/scsi_transport_fc.c > index 2ff7f06203da..fcb38068e2a4 100644 > --- a/drivers/scsi/scsi_transport_fc.c > +++ b/drivers/scsi/scsi_transport_fc.c > @@ -142,20 +142,23 @@ fc_enum_name_search(host_event_code, > fc_host_event_code, > static struct { > enum fc_port_state value; > char *name; > + int matchlen; > } fc_port_state_names[] = { > - { FC_PORTSTATE_UNKNOWN, "Unknown" }, > - { FC_PORTSTATE_NOTPRESENT, "Not Present" }, > - { FC_PORTSTATE_ONLINE, "Online" }, > - { FC_PORTSTATE_OFFLINE, "Offline" }, > - { FC_PORTSTATE_BLOCKED, "Blocked" }, > - { FC_PORTSTATE_BYPASSED, "Bypassed" }, > - { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics" }, > - { FC_PORTSTATE_LINKDOWN, "Linkdown" }, > - { FC_PORTSTATE_ERROR, "Error" }, > - { FC_PORTSTATE_LOOPBACK, "Loopback" }, > - { FC_PORTSTATE_DELETED, "Deleted" }, > + { FC_PORTSTATE_UNKNOWN, "Unknown", 7}, > + { FC_PORTSTATE_NOTPRESENT, "Not Present", 11 }, > + { FC_PORTSTATE_ONLINE, "Online", 6 }, > + { FC_PORTSTATE_OFFLINE, "Offline", 7 }, > + { FC_PORTSTATE_BLOCKED, "Blocked", 7 }, > + { FC_PORTSTATE_BYPASSED, "Bypassed", 8 }, > + { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics", 11 }, > + { FC_PORTSTATE_LINKDOWN, "Linkdown", 8 }, > + { FC_PORTSTATE_ERROR, "Error", 5 }, > + { FC_PORTSTATE_LOOPBACK, "Loopback", 8 }, > + { FC_PORTSTATE_DELETED, "Deleted", 7 }, > + { FC_PORTSTATE_MARGINAL, "Marginal", 8 }, > }; > fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) > +fc_enum_name_match(port_state, fc_port_state, fc_port_state_names) > #define FC_PORTSTATE_MAX_NAMELEN 20 > > > @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) > { > struct fc_rport *rport = starget_to_rport(scsi_target(scmd- > >device)); > > + fc_rport_chkmarginal_set_noretries(rport, scmd); > if (rport->port_state == FC_PORTSTATE_BLOCKED) > return BLK_EH_RESET_TIMER; > > @@ -2095,7 +2099,8 @@ fc_user_scan_tgt(struct Scsi_Host *shost, uint > channel, uint id, u64 lun) > if (rport->scsi_target_id == -1) > continue; > > - if (rport->port_state != FC_PORTSTATE_ONLINE) > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) > continue; > > if ((channel == rport->channel) && > @@ -2958,7 +2963,8 @@ fc_remote_port_delete(struct fc_rport *rport) > > spin_lock_irqsave(shost->host_lock, flags); > > - if (rport->port_state != FC_PORTSTATE_ONLINE) { > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) { > spin_unlock_irqrestore(shost->host_lock, flags); > return; > } > @@ -3100,7 +3106,8 @@ fc_timeout_deleted_rport(struct work_struct > *work) > * target, validate it still is. If not, tear down the > * scsi_target on it. > */ > - if ((rport->port_state == FC_PORTSTATE_ONLINE) && > + if (((rport->port_state == FC_PORTSTATE_ONLINE) || > + (rport->port_state == FC_PORTSTATE_MARGINAL)) && > (rport->scsi_target_id != -1) && > !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) { > dev_printk(KERN_ERR, &rport->dev, > @@ -3243,7 +3250,8 @@ fc_scsi_scan_rport(struct work_struct *work) > struct fc_internal *i = to_fc_internal(shost->transportt); > unsigned long flags; > > - if ((rport->port_state == FC_PORTSTATE_ONLINE) && > + if (((rport->port_state == FC_PORTSTATE_ONLINE) || > + (rport->port_state == FC_PORTSTATE_ONLINE)) && I think the second line should have been FC_PORTSTATE_MARGINAL. > (rport->roles & FC_PORT_ROLE_FCP_TARGET) && > !(i->f->disable_target_scan)) { > scsi_scan_target(&rport->dev, rport->channel, > @@ -3747,7 +3755,8 @@ static blk_status_t fc_bsg_rport_prep(struct > fc_rport *rport) > !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) > return BLK_STS_RESOURCE; > > - if (rport->port_state != FC_PORTSTATE_ONLINE) > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) > return BLK_STS_IOERR; > > return BLK_STS_OK; > diff --git a/include/scsi/scsi_transport_fc.h > b/include/scsi/scsi_transport_fc.h > index 1c7dd35cb7a0..829bade13b89 100644 > --- a/include/scsi/scsi_transport_fc.h > +++ b/include/scsi/scsi_transport_fc.h > @@ -14,6 +14,7 @@ > #include <linux/bsg-lib.h> > #include <asm/unaligned.h> > #include <scsi/scsi.h> > +#include <scsi/scsi_cmnd.h> > #include <scsi/scsi_netlink.h> > #include <scsi/scsi_host.h> > > @@ -67,6 +68,7 @@ enum fc_port_state { > FC_PORTSTATE_ERROR, > FC_PORTSTATE_LOOPBACK, > FC_PORTSTATE_DELETED, > + FC_PORTSTATE_MARGINAL, > }; > > > @@ -707,6 +709,22 @@ struct fc_function_template { > unsigned long disable_target_scan:1; > }; > > +/** > + * fc_rport_chkmarginal_set_noretries - Set the SCMD_NORETRIES_ABORT > bit > + * in cmd->state if port state is marginal > + * @rport: remote port to be checked > + * @scmd: scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on > Marginal state > + **/ > +static inline void > +fc_rport_chkmarginal_set_noretries(struct fc_rport *rport, struct > scsi_cmnd *cmd) > +{ > + if ((rport->port_state == FC_PORTSTATE_MARGINAL) && > + (cmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT)) > + set_bit(SCMD_NORETRIES_ABORT, &cmd->state); > + else > + clear_bit(SCMD_NORETRIES_ABORT, &cmd->state); > + > +} > > /** > * fc_remote_port_chkready - called to validate the remote port > state > @@ -723,6 +741,7 @@ fc_remote_port_chkready(struct fc_rport *rport) > > switch (rport->port_state) { > case FC_PORTSTATE_ONLINE: > + case FC_PORTSTATE_MARGINAL: > if (rport->roles & FC_PORT_ROLE_FCP_TARGET) > result = 0; > else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
On 10/22/20 7:34 AM, Muneendra wrote: > @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) > { > struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); > > + fc_rport_chkmarginal_set_noretries(rport, scmd); > if (rport->port_state == FC_PORTSTATE_BLOCKED) > return BLK_EH_RESET_TIMER; If we are in port state marginal above, then we will try to abort the cmd, but if while doing the abort we call fc_remote_port_delete and fc_remote_port_add then the port state will be online when the EH callouts complete. In this case, the port state is online in the end, but we would fail the command like it was in marginal. > > @@ -2095,7 +2099,8 @@ fc_user_scan_tgt(struct Scsi_Host *shost, uint channel, uint id, u64 lun) > if (rport->scsi_target_id == -1) > continue; > > - if (rport->port_state != FC_PORTSTATE_ONLINE) > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) > continue; > > if ((channel == rport->channel) && > @@ -2958,7 +2963,8 @@ fc_remote_port_delete(struct fc_rport *rport) > > spin_lock_irqsave(shost->host_lock, flags); > > - if (rport->port_state != FC_PORTSTATE_ONLINE) { > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) { > spin_unlock_irqrestore(shost->host_lock, flags); > return; It looks like if fc_remote_port_delete is called, then we will allow that function to set the port_state to blocked. If the problem is resolved then fc_remote_port_add will set the state to online. So it would look like the port state is now ok in the kernel, but would userspace still have it in the marginal port group? Did you want this behavior or did you want it to stay in marginal until your daemon marks it as online?
Hi ewan, Thanks for the input. I will change this. Regards, Muneendra. -----Original Message----- From: Ewan D. Milne [mailto:emilne@redhat.com] Sent: Monday, October 26, 2020 5:18 PM To: Muneendra <muneendra.kumar@broadcom.com>; linux-scsi@vger.kernel.org; michael.christie@oracle.com; hare@suse.de Cc: jsmart2021@gmail.com; mkumar@redhat.com Subject: Re: [patch v4 4/5] scsi_transport_fc: Added a new rport state FC_PORTSTATE_MARGINAL See below. I think you wanted to check for FC_PORTSTATE_MARGINAL in the code you added to fc_scsi_scan_rport(). Instead the code tests for FC_PORTSTATE_ONLINE twice. -Ewan On Thu, 2020-10-22 at 18:04 +0530, Muneendra wrote: > Added a new rport state FC_PORTSTATE_MARGINAL. > > Added a new inline function fc_rport_chkmarginal_set_noretries > which will set the SCMD_NORETRIES_ABORT bit in cmd->state if rport > state is marginal. > > Made changes in fc_eh_timed_out to call > fc_rport_chkmarginal_set_noretries > Also made changes in fc_remote_port_delete,fc_user_scan_tgt, > fc_timeout_deleted_rport functions to handle the new rport state > FC_PORTSTATE_MARGINAL. > > Signed-off-by: Muneendra <muneendra.kumar@broadcom.com> > > --- > v4: > Made changes in fc_eh_timed_out to call > fc_rport_chkmarginal_set_noretries > so that SCMD_NORETRIES_ABORT bit in cmd->state is set if rport state > is marginal. > > Removed the newly added scsi_cmd argument to fc_remote_port_chkready > as the current patch handles only SCSI EH timeout/abort case. > > v3: > Rearranged the patch so that all the changes with respect to new rport > state is part of this patch. > Added a new argument to scsi_cmd to fc_remote_port_chkready > > v2: > New patch > --- > drivers/scsi/scsi_transport_fc.c | 41 +++++++++++++++++++----------- > -- > include/scsi/scsi_transport_fc.h | 19 +++++++++++++++ > 2 files changed, 44 insertions(+), 16 deletions(-) > > diff --git a/drivers/scsi/scsi_transport_fc.c > b/drivers/scsi/scsi_transport_fc.c > index 2ff7f06203da..fcb38068e2a4 100644 > --- a/drivers/scsi/scsi_transport_fc.c > +++ b/drivers/scsi/scsi_transport_fc.c > @@ -142,20 +142,23 @@ fc_enum_name_search(host_event_code, > fc_host_event_code, > static struct { > enum fc_port_state value; > char *name; > + int matchlen; > } fc_port_state_names[] = { > - { FC_PORTSTATE_UNKNOWN, "Unknown" }, > - { FC_PORTSTATE_NOTPRESENT, "Not Present" }, > - { FC_PORTSTATE_ONLINE, "Online" }, > - { FC_PORTSTATE_OFFLINE, "Offline" }, > - { FC_PORTSTATE_BLOCKED, "Blocked" }, > - { FC_PORTSTATE_BYPASSED, "Bypassed" }, > - { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics" }, > - { FC_PORTSTATE_LINKDOWN, "Linkdown" }, > - { FC_PORTSTATE_ERROR, "Error" }, > - { FC_PORTSTATE_LOOPBACK, "Loopback" }, > - { FC_PORTSTATE_DELETED, "Deleted" }, > + { FC_PORTSTATE_UNKNOWN, "Unknown", 7}, > + { FC_PORTSTATE_NOTPRESENT, "Not Present", 11 }, > + { FC_PORTSTATE_ONLINE, "Online", 6 }, > + { FC_PORTSTATE_OFFLINE, "Offline", 7 }, > + { FC_PORTSTATE_BLOCKED, "Blocked", 7 }, > + { FC_PORTSTATE_BYPASSED, "Bypassed", 8 }, > + { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics", 11 }, > + { FC_PORTSTATE_LINKDOWN, "Linkdown", 8 }, > + { FC_PORTSTATE_ERROR, "Error", 5 }, > + { FC_PORTSTATE_LOOPBACK, "Loopback", 8 }, > + { FC_PORTSTATE_DELETED, "Deleted", 7 }, > + { FC_PORTSTATE_MARGINAL, "Marginal", 8 }, > }; > fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) > +fc_enum_name_match(port_state, fc_port_state, fc_port_state_names) > #define FC_PORTSTATE_MAX_NAMELEN 20 > > > @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) { > struct fc_rport *rport = starget_to_rport(scsi_target(scmd- > >device)); > > + fc_rport_chkmarginal_set_noretries(rport, scmd); > if (rport->port_state == FC_PORTSTATE_BLOCKED) > return BLK_EH_RESET_TIMER; > > @@ -2095,7 +2099,8 @@ fc_user_scan_tgt(struct Scsi_Host *shost, uint > channel, uint id, u64 lun) > if (rport->scsi_target_id == -1) > continue; > > - if (rport->port_state != FC_PORTSTATE_ONLINE) > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) > continue; > > if ((channel == rport->channel) && > @@ -2958,7 +2963,8 @@ fc_remote_port_delete(struct fc_rport *rport) > > spin_lock_irqsave(shost->host_lock, flags); > > - if (rport->port_state != FC_PORTSTATE_ONLINE) { > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) { > spin_unlock_irqrestore(shost->host_lock, flags); > return; > } > @@ -3100,7 +3106,8 @@ fc_timeout_deleted_rport(struct work_struct > *work) > * target, validate it still is. If not, tear down the > * scsi_target on it. > */ > - if ((rport->port_state == FC_PORTSTATE_ONLINE) && > + if (((rport->port_state == FC_PORTSTATE_ONLINE) || > + (rport->port_state == FC_PORTSTATE_MARGINAL)) && > (rport->scsi_target_id != -1) && > !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) { > dev_printk(KERN_ERR, &rport->dev, > @@ -3243,7 +3250,8 @@ fc_scsi_scan_rport(struct work_struct *work) > struct fc_internal *i = to_fc_internal(shost->transportt); > unsigned long flags; > > - if ((rport->port_state == FC_PORTSTATE_ONLINE) && > + if (((rport->port_state == FC_PORTSTATE_ONLINE) || > + (rport->port_state == FC_PORTSTATE_ONLINE)) && I think the second line should have been FC_PORTSTATE_MARGINAL. > (rport->roles & FC_PORT_ROLE_FCP_TARGET) && > !(i->f->disable_target_scan)) { > scsi_scan_target(&rport->dev, rport->channel, @@ -3747,7 +3755,8 @@ > static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport) > !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) > return BLK_STS_RESOURCE; > > - if (rport->port_state != FC_PORTSTATE_ONLINE) > + if ((rport->port_state != FC_PORTSTATE_ONLINE) && > + (rport->port_state != FC_PORTSTATE_MARGINAL)) > return BLK_STS_IOERR; > > return BLK_STS_OK; > diff --git a/include/scsi/scsi_transport_fc.h > b/include/scsi/scsi_transport_fc.h > index 1c7dd35cb7a0..829bade13b89 100644 > --- a/include/scsi/scsi_transport_fc.h > +++ b/include/scsi/scsi_transport_fc.h > @@ -14,6 +14,7 @@ > #include <linux/bsg-lib.h> > #include <asm/unaligned.h> > #include <scsi/scsi.h> > +#include <scsi/scsi_cmnd.h> > #include <scsi/scsi_netlink.h> > #include <scsi/scsi_host.h> > > @@ -67,6 +68,7 @@ enum fc_port_state { > FC_PORTSTATE_ERROR, > FC_PORTSTATE_LOOPBACK, > FC_PORTSTATE_DELETED, > + FC_PORTSTATE_MARGINAL, > }; > > > @@ -707,6 +709,22 @@ struct fc_function_template { > unsigned long disable_target_scan:1; > }; > > +/** > + * fc_rport_chkmarginal_set_noretries - Set the SCMD_NORETRIES_ABORT > bit > + * in cmd->state if port state is marginal > + * @rport: remote port to be checked > + * @scmd: scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on > Marginal state > + **/ > +static inline void > +fc_rport_chkmarginal_set_noretries(struct fc_rport *rport, struct > scsi_cmnd *cmd) > +{ > + if ((rport->port_state == FC_PORTSTATE_MARGINAL) && > + (cmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT)) > + set_bit(SCMD_NORETRIES_ABORT, &cmd->state); > + else > + clear_bit(SCMD_NORETRIES_ABORT, &cmd->state); > + > +} > > /** > * fc_remote_port_chkready - called to validate the remote port state > @@ -723,6 +741,7 @@ fc_remote_port_chkready(struct fc_rport *rport) > > switch (rport->port_state) { > case FC_PORTSTATE_ONLINE: > + case FC_PORTSTATE_MARGINAL: > if (rport->roles & FC_PORT_ROLE_FCP_TARGET) > result = 0; > else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
Hi Mike, Below are my replies. -----Original Message----- From: Mike Christie [mailto:michael.christie@oracle.com] Sent: Monday, October 26, 2020 10:45 PM To: Muneendra <muneendra.kumar@broadcom.com>; linux-scsi@vger.kernel.org; hare@suse.de Cc: jsmart2021@gmail.com; emilne@redhat.com; mkumar@redhat.com Subject: Re: [patch v4 4/5] scsi_transport_fc: Added a new rport state FC_PORTSTATE_MARGINAL On 10/22/20 7:34 AM, Muneendra wrote: > @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) { > struct fc_rport *rport = > starget_to_rport(scsi_target(scmd->device)); > > + fc_rport_chkmarginal_set_noretries(rport, scmd); > if (rport->port_state == FC_PORTSTATE_BLOCKED) > return BLK_EH_RESET_TIMER; >If we are in port state marginal above, then we will try to abort the cmd, >but if while doing the abort we call fc_remote_port_delete and >fc_remote_port_add then the port state will be online when the EH callouts >complete. In >this case, the port state is online in the end, but we would >fail the command like it was in marginal. [Muneendra] I have to make sure the flag is set after the check for blocked state. If blocked, it's returning BLK_EH_RESET_TIMER, so it will restart the eh timer. The io will "sit out" like this, pending, until either the adapter fails it back due to logout or io completion, or fastio fail or rport devloss timesout and invokes the abort handler to force abort . > + (rport->port_state != FC_PORTSTATE_MARGINAL)) { > spin_unlock_irqrestore(shost->host_lock, flags); > return; >It looks like if fc_remote_port_delete is called, then we will allow that >function to set the port_state to blocked. If the problem is resolved then >fc_remote_port_add will set the state to online. So it would look like the >port state is >now ok in the kernel, but would userspace still have it in >the marginal port group? >Did you want this behavior or did you want it to stay in marginal until >your daemon marks it as online? [Muneendra] We need this behavior.User daemon should not depend on the rport_state to move a path from marginal path group.It should only depends on RSCN and LINKUP events/manual intervention. events that we look out (rscn for target-side cable bounces and link up/down for initiator cable bounces) will result in port state changes - so although we don't drive one from the other, they are correlated. Regards, Muneendra.
On 10/29/20 6:53 AM, Muneendra Kumar M wrote: > Hi Mike, > Below are my replies. > > -----Original Message----- > From: Mike Christie [mailto:michael.christie@oracle.com] > Sent: Monday, October 26, 2020 10:45 PM > To: Muneendra <muneendra.kumar@broadcom.com>; linux-scsi@vger.kernel.org; > hare@suse.de > Cc: jsmart2021@gmail.com; emilne@redhat.com; mkumar@redhat.com > Subject: Re: [patch v4 4/5] scsi_transport_fc: Added a new rport state > FC_PORTSTATE_MARGINAL > > On 10/22/20 7:34 AM, Muneendra wrote: >> @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) { >> struct fc_rport *rport = >> starget_to_rport(scsi_target(scmd->device)); >> >> + fc_rport_chkmarginal_set_noretries(rport, scmd); >> if (rport->port_state == FC_PORTSTATE_BLOCKED) >> return BLK_EH_RESET_TIMER; > >> If we are in port state marginal above, then we will try to abort the cmd, >> but if while doing the abort we call fc_remote_port_delete and >> fc_remote_port_add then the port state will be online when the EH callouts >> complete. In >this case, the port state is online in the end, but we would >> fail the command like it was in marginal. > [Muneendra] I have to make sure the flag is set after the check for blocked > state. If blocked, it's returning BLK_EH_RESET_TIMER, so it will restart > the eh > timer. The io will "sit out" like this, pending, until either the adapter > fails it back due to logout or io completion, or fastio fail or > rport devloss timesout and invokes the abort handler to force abort . Hey, I'm not sure if we are talking about the same thing. If port state is marginal above, then we set the NORETRIES bit then return BLK_EH_DONE which will start up the scsi eh_abort_handler and if that fails the rest of the scsi eh_*_handlers. While we are calling the eh handlers, if the driver does a fc_remote_port_delete then fc_remote_port_add we still have the NORETRIES bit set, so when we return from the eh_*_handlers we will fail the IO upwards. I was trying to ask if you wanted the IO failed upwards in that case. Because the port state went to online, did you want the normal (cleared NOTRIES bit) cmd retry behavior? It sounds like below you want the cleared NORETRIED bit behavior, right? > >> + (rport->port_state != FC_PORTSTATE_MARGINAL)) { >> spin_unlock_irqrestore(shost->host_lock, flags); >> return; > >> It looks like if fc_remote_port_delete is called, then we will allow that >> function to set the port_state to blocked. If the problem is resolved then >> fc_remote_port_add will set the state to online. So it would look like the >> port state is >now ok in the kernel, but would userspace still have it in >> the marginal port group? > >> Did you want this behavior or did you want it to stay in marginal until >> your daemon marks it as online? > [Muneendra] We need this behavior.User daemon > should not depend on the rport_state to move a path from marginal path > group.It should only depends on RSCN and LINKUP events/manual > intervention. events that we look out (rscn for target-side cable bounces > and link up/down for initiator cable bounces) will result in > port state changes - so although we don't drive one from the other, they are > correlated. > > Regards, > Muneendra. >
Hi Mike, > [Muneendra] I have to make sure the flag is set after the check for > blocked state. If blocked, it's returning BLK_EH_RESET_TIMER, so it > will restart the eh timer. The io will "sit out" like this, pending, > until either the adapter fails it back due to logout or io completion, > or fastio fail or rport devloss timesout and invokes the abort handler > to force abort . >Hey, >I'm not sure if we are talking about the same thing. If port state is >marginal above, then we set the NORETRIES bit then return BLK_EH_DONE which >will start up the scsi eh_abort_handler and if that fails the rest of the >scsi >eh_*_handlers. >While we are calling the eh handlers, if the driver does a >fc_remote_port_delete then fc_remote_port_add we still have the NORETRIES >bit set, so when we return from the eh_*_handlers we will fail the IO >upwards. >I was trying to ask if you wanted the IO failed upwards in that case. >Because the port state went to online, did you want the normal (cleared >NOTRIES bit) cmd retry behavior? It sounds like below you want the cleared >NORETRIED bit behavior, right? [Muneendra]Yes we need the normal cmd behavior(clear noretries bit) when the portstate went to normal. I think to achieve this we can clear the noretries bit in fc_block_scsi_eh/ fc_block_rport . As this is the last place where the individual abort_handler checks for blocked state. Is this fine? Regards, Muneendra. > >> + (rport->port_state != FC_PORTSTATE_MARGINAL)) { >> spin_unlock_irqrestore(shost->host_lock, flags); >> return; > >> It looks like if fc_remote_port_delete is called, then we will allow >> that function to set the port_state to blocked. If the problem is >> resolved then fc_remote_port_add will set the state to online. So it >> would look like the port state is >now ok in the kernel, but would >> userspace still have it in the marginal port group? > >> Did you want this behavior or did you want it to stay in marginal >> until your daemon marks it as online? > [Muneendra] We need this behavior.User daemon should not depend on the > rport_state to move a path from marginal path > group.It should only depends on RSCN and LINKUP events/manual > intervention. events that we look out (rscn for target-side cable > bounces and link up/down for initiator cable bounces) will result in > port state changes - so although we don't drive one from the other, > they are correlated. > > Regards, > Muneendra. >
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 2ff7f06203da..fcb38068e2a4 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -142,20 +142,23 @@ fc_enum_name_search(host_event_code, fc_host_event_code, static struct { enum fc_port_state value; char *name; + int matchlen; } fc_port_state_names[] = { - { FC_PORTSTATE_UNKNOWN, "Unknown" }, - { FC_PORTSTATE_NOTPRESENT, "Not Present" }, - { FC_PORTSTATE_ONLINE, "Online" }, - { FC_PORTSTATE_OFFLINE, "Offline" }, - { FC_PORTSTATE_BLOCKED, "Blocked" }, - { FC_PORTSTATE_BYPASSED, "Bypassed" }, - { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics" }, - { FC_PORTSTATE_LINKDOWN, "Linkdown" }, - { FC_PORTSTATE_ERROR, "Error" }, - { FC_PORTSTATE_LOOPBACK, "Loopback" }, - { FC_PORTSTATE_DELETED, "Deleted" }, + { FC_PORTSTATE_UNKNOWN, "Unknown", 7}, + { FC_PORTSTATE_NOTPRESENT, "Not Present", 11 }, + { FC_PORTSTATE_ONLINE, "Online", 6 }, + { FC_PORTSTATE_OFFLINE, "Offline", 7 }, + { FC_PORTSTATE_BLOCKED, "Blocked", 7 }, + { FC_PORTSTATE_BYPASSED, "Bypassed", 8 }, + { FC_PORTSTATE_DIAGNOSTICS, "Diagnostics", 11 }, + { FC_PORTSTATE_LINKDOWN, "Linkdown", 8 }, + { FC_PORTSTATE_ERROR, "Error", 5 }, + { FC_PORTSTATE_LOOPBACK, "Loopback", 8 }, + { FC_PORTSTATE_DELETED, "Deleted", 7 }, + { FC_PORTSTATE_MARGINAL, "Marginal", 8 }, }; fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) +fc_enum_name_match(port_state, fc_port_state, fc_port_state_names) #define FC_PORTSTATE_MAX_NAMELEN 20 @@ -2071,6 +2074,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) { struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); + fc_rport_chkmarginal_set_noretries(rport, scmd); if (rport->port_state == FC_PORTSTATE_BLOCKED) return BLK_EH_RESET_TIMER; @@ -2095,7 +2099,8 @@ fc_user_scan_tgt(struct Scsi_Host *shost, uint channel, uint id, u64 lun) if (rport->scsi_target_id == -1) continue; - if (rport->port_state != FC_PORTSTATE_ONLINE) + if ((rport->port_state != FC_PORTSTATE_ONLINE) && + (rport->port_state != FC_PORTSTATE_MARGINAL)) continue; if ((channel == rport->channel) && @@ -2958,7 +2963,8 @@ fc_remote_port_delete(struct fc_rport *rport) spin_lock_irqsave(shost->host_lock, flags); - if (rport->port_state != FC_PORTSTATE_ONLINE) { + if ((rport->port_state != FC_PORTSTATE_ONLINE) && + (rport->port_state != FC_PORTSTATE_MARGINAL)) { spin_unlock_irqrestore(shost->host_lock, flags); return; } @@ -3100,7 +3106,8 @@ fc_timeout_deleted_rport(struct work_struct *work) * target, validate it still is. If not, tear down the * scsi_target on it. */ - if ((rport->port_state == FC_PORTSTATE_ONLINE) && + if (((rport->port_state == FC_PORTSTATE_ONLINE) || + (rport->port_state == FC_PORTSTATE_MARGINAL)) && (rport->scsi_target_id != -1) && !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) { dev_printk(KERN_ERR, &rport->dev, @@ -3243,7 +3250,8 @@ fc_scsi_scan_rport(struct work_struct *work) struct fc_internal *i = to_fc_internal(shost->transportt); unsigned long flags; - if ((rport->port_state == FC_PORTSTATE_ONLINE) && + if (((rport->port_state == FC_PORTSTATE_ONLINE) || + (rport->port_state == FC_PORTSTATE_ONLINE)) && (rport->roles & FC_PORT_ROLE_FCP_TARGET) && !(i->f->disable_target_scan)) { scsi_scan_target(&rport->dev, rport->channel, @@ -3747,7 +3755,8 @@ static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport) !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) return BLK_STS_RESOURCE; - if (rport->port_state != FC_PORTSTATE_ONLINE) + if ((rport->port_state != FC_PORTSTATE_ONLINE) && + (rport->port_state != FC_PORTSTATE_MARGINAL)) return BLK_STS_IOERR; return BLK_STS_OK; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 1c7dd35cb7a0..829bade13b89 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -14,6 +14,7 @@ #include <linux/bsg-lib.h> #include <asm/unaligned.h> #include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> #include <scsi/scsi_netlink.h> #include <scsi/scsi_host.h> @@ -67,6 +68,7 @@ enum fc_port_state { FC_PORTSTATE_ERROR, FC_PORTSTATE_LOOPBACK, FC_PORTSTATE_DELETED, + FC_PORTSTATE_MARGINAL, }; @@ -707,6 +709,22 @@ struct fc_function_template { unsigned long disable_target_scan:1; }; +/** + * fc_rport_chkmarginal_set_noretries - Set the SCMD_NORETRIES_ABORT bit + * in cmd->state if port state is marginal + * @rport: remote port to be checked + * @scmd: scsi_cmd to set/clear the SCMD_NORETRIES_ABORT bit on Marginal state + **/ +static inline void +fc_rport_chkmarginal_set_noretries(struct fc_rport *rport, struct scsi_cmnd *cmd) +{ + if ((rport->port_state == FC_PORTSTATE_MARGINAL) && + (cmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT)) + set_bit(SCMD_NORETRIES_ABORT, &cmd->state); + else + clear_bit(SCMD_NORETRIES_ABORT, &cmd->state); + +} /** * fc_remote_port_chkready - called to validate the remote port state @@ -723,6 +741,7 @@ fc_remote_port_chkready(struct fc_rport *rport) switch (rport->port_state) { case FC_PORTSTATE_ONLINE: + case FC_PORTSTATE_MARGINAL: if (rport->roles & FC_PORT_ROLE_FCP_TARGET) result = 0; else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
Added a new rport state FC_PORTSTATE_MARGINAL. Added a new inline function fc_rport_chkmarginal_set_noretries which will set the SCMD_NORETRIES_ABORT bit in cmd->state if rport state is marginal. Made changes in fc_eh_timed_out to call fc_rport_chkmarginal_set_noretries Also made changes in fc_remote_port_delete,fc_user_scan_tgt, fc_timeout_deleted_rport functions to handle the new rport state FC_PORTSTATE_MARGINAL. Signed-off-by: Muneendra <muneendra.kumar@broadcom.com> --- v4: Made changes in fc_eh_timed_out to call fc_rport_chkmarginal_set_noretries so that SCMD_NORETRIES_ABORT bit in cmd->state is set if rport state is marginal. Removed the newly added scsi_cmd argument to fc_remote_port_chkready as the current patch handles only SCSI EH timeout/abort case. v3: Rearranged the patch so that all the changes with respect to new rport state is part of this patch. Added a new argument to scsi_cmd to fc_remote_port_chkready v2: New patch --- drivers/scsi/scsi_transport_fc.c | 41 +++++++++++++++++++------------- include/scsi/scsi_transport_fc.h | 19 +++++++++++++++ 2 files changed, 44 insertions(+), 16 deletions(-)