diff mbox series

[v3,02/35] scsi: Allow passthrough to override what errors to retry

Message ID 20221003175321.8040-3-michael.christie@oracle.com
State Superseded
Headers show
Series Allow scsi_execute users to control retries | expand

Commit Message

Mike Christie Oct. 3, 2022, 5:52 p.m. UTC
For passthrough, we don't retry any error we get a check condition for.
This results in a lot of callers driving their own retries for those types
of errors and retrying all errors, and there has been a request to retry
specific host byte errors.

This adds the core code to allow passthrough users to specify what errors
they want scsi-ml to retry for them. We can then convert users to drop
their sense parsing and retry handling.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/scsi_error.c | 73 +++++++++++++++++++++++++++++++++++++++
 drivers/scsi/scsi_lib.c   |  1 +
 include/scsi/scsi_cmnd.h  | 27 ++++++++++++++-
 3 files changed, 100 insertions(+), 1 deletion(-)

Comments

Bart Van Assche Oct. 4, 2022, 10:27 p.m. UTC | #1
On 10/3/22 10:52, Mike Christie wrote:
> +static enum scsi_disposition scsi_check_passthrough(struct scsi_cmnd *scmd)
> +{
> +	struct scsi_failure *failure;
> +	struct scsi_sense_hdr sshdr;
> +	enum scsi_disposition ret;
> +	u8 status;

How about using type enum sam_status instead of u8 for 'status'?

> +	int i;
> +
> +

A single blank line to separate declarations and code please.

> +	if (!scmd->result || !scmd->failures)
> +		return SCSI_RETURN_NOT_HANDLED;

A comment that explains what the "not handled" return value indicates
would be welcome, e.g. above the scsi_check_passthrough() function.

> +		if (((__get_status_byte(failure->result)) !=
> +		    SAM_STAT_CHECK_CONDITION) ||
> +		    (failure->sense == SCMD_FAILURE_SENSE_ANY))
> +			goto maybe_retry;

Please do not use more parentheses than necessary. I think that 6
parentheses can be left out from the if-expression without affecting the
readability of the above code.

> @@ -1608,6 +1608,7 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
>   
>   	/* Usually overridden by the ULP */
>   	cmd->allowed = 0;
> +	cmd->failures = NULL;
>   	memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
>   	return scsi_cmd_to_driver(cmd)->init_command(cmd);
>   }

Shouldn't the cmd->failures assignment be moved into scsi_initialize_rq()
rather than keeping it in scsi_prepare_cmd() such that the value set by
scsi_execute() is preserved?

> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
> index bac55decf900..9ab97e48c5c2 100644
> --- a/include/scsi/scsi_cmnd.h
> +++ b/include/scsi/scsi_cmnd.h
> @@ -65,6 +65,24 @@ enum scsi_cmnd_submitter {
>   	SUBMITTED_BY_SCSI_RESET_IOCTL = 2,
>   } __packed;
>   
> +#define SCMD_FAILURE_NONE	0
> +#define SCMD_FAILURE_ANY	-1

This is the same value as -EPERM. Would something like 0x7fffffff be a better
choice for SCMD_FAILURE_ANY?

> +struct scsi_failure {
> +	int result;
> +	u8 sense;
> +	u8 asc;
> +	u8 ascq;
> +
> +	s8 allowed;
> +	s8 retries;
> +};

Why has 'retries' type s8 instead of u8?

Thanks,

Bart.
Bart Van Assche Oct. 4, 2022, 10:55 p.m. UTC | #2
On 10/3/22 10:52, Mike Christie wrote:
> +static enum scsi_disposition scsi_check_passthrough(struct scsi_cmnd *scmd)
> +{
> +	struct scsi_failure *failure;
> +	struct scsi_sense_hdr sshdr;
> +	enum scsi_disposition ret;
> +	u8 status;
> +	int i;
> +
> +
> +	if (!scmd->result || !scmd->failures)
> +		return SCSI_RETURN_NOT_HANDLED;
> +
> +	for (i = 0, failure = &scmd->failures[i]; failure->result;
> +	     failure = &scmd->failures[++i]) {
> +		if (failure->result == SCMD_FAILURE_ANY)
> +			goto maybe_retry;
> +
> +		if (host_byte(scmd->result) &&
> +		    host_byte(scmd->result) == host_byte(failure->result))
> +			goto maybe_retry;
> +
> +		status = get_status_byte(scmd);
> +		if (!status)
> +			continue;
> +
> +		if (failure->result == SCMD_FAILURE_STAT_ANY &&
> +		    !scsi_status_is_good(scmd->result))
> +			goto maybe_retry;
> +
> +		if (status != __get_status_byte(failure->result))
> +			continue;
> +
> +		if (((__get_status_byte(failure->result)) !=
> +		    SAM_STAT_CHECK_CONDITION) ||
> +		    (failure->sense == SCMD_FAILURE_SENSE_ANY))
> +			goto maybe_retry;
> +
> +		ret = scsi_start_sense_processing(scmd, &sshdr);
> +		if (ret == NEEDS_RETRY)
> +			goto maybe_retry;
> +		else if (ret != SUCCESS)
> +			return ret;
> +
> +		if (failure->sense != sshdr.sense_key)
> +			continue;
> +
> +		if (failure->asc == SCMD_FAILURE_ASC_ANY)
> +			goto maybe_retry;
> +
> +		if (failure->asc != sshdr.asc)
> +			continue;
> +
> +		if (failure->ascq == SCMD_FAILURE_ASCQ_ANY ||
> +		    failure->ascq == sshdr.ascq)
> +			goto maybe_retry;
> +	}
> +
> +	return SCSI_RETURN_NOT_HANDLED;
> +
> +maybe_retry:
> +	if (failure->allowed == SCMD_FAILURE_NO_LIMIT ||
> +	    ++failure->retries <= failure->allowed)
> +		return NEEDS_RETRY;
> +
> +	return SUCCESS;
> +}

Since the logic in the above function is nontrivial and since injecting
errors may also be nontrivial, please add unit tests for this function. An
example is available in commit addbeea6f50b ("testing/selftests: Add tests
for the is_signed_type() macro").

Thanks,

Bart.
Mike Christie Oct. 5, 2022, 4:18 p.m. UTC | #3
On 10/4/22 5:27 PM, Bart Van Assche wrote:

Agree and will handle the other review comments.
 
>> @@ -1608,6 +1608,7 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
>>         /* Usually overridden by the ULP */
>>       cmd->allowed = 0;
>> +    cmd->failures = NULL;
>>       memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
>>       return scsi_cmd_to_driver(cmd)->init_command(cmd);
>>   }
> 
> Shouldn't the cmd->failures assignment be moved into scsi_initialize_rq()
> rather than keeping it in scsi_prepare_cmd() such that the value set by
> scsi_execute() is preserved?

There is that check:

       /*
         * Special handling for passthrough commands, which don't go to the ULP
         * at all:
         */
        if (blk_rq_is_passthrough(req))
                return scsi_setup_scsi_cmnd(sdev, req);

above this code so we only hit the failures = NULL code for non-passthrough. So
the value set by scsi_execute is preserved like is done for allowed and passthrough
commands.

I agree it should be moved though. It makes more sense to be in scsi_initialize_rq
since it's only called for non passthrough right now.



> 
>> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
>> index bac55decf900..9ab97e48c5c2 100644
>> --- a/include/scsi/scsi_cmnd.h
>> +++ b/include/scsi/scsi_cmnd.h
>> @@ -65,6 +65,24 @@ enum scsi_cmnd_submitter {
>>       SUBMITTED_BY_SCSI_RESET_IOCTL = 2,
>>   } __packed;
>>   +#define SCMD_FAILURE_NONE    0
>> +#define SCMD_FAILURE_ANY    -1
> 
> This is the same value as -EPERM. Would something like 0x7fffffff be a better
> choice for SCMD_FAILURE_ANY?

I didn't get that. All of this is used for the scsi_cmnd->result evaluation
and retries (host, status, internal bytes) which are all positive and where
we don't use -Exyx errors.


> 
>> +struct scsi_failure {
>> +    int result;
>> +    u8 sense;
>> +    u8 asc;
>> +    u8 ascq;
>> +
>> +    s8 allowed;
>> +    s8 retries;
>> +};
> 
> Why has 'retries' type s8 instead of u8?
> 

There is a:

#define SCMD_FAILURE_NO_LIMIT   -1

in the defines that got cut off when you replied which is used for some
users that had no limit on retries.
Bart Van Assche Oct. 19, 2022, 10:03 p.m. UTC | #4
On 10/5/22 09:18, Mike Christie wrote:
> On 10/4/22 5:27 PM, Bart Van Assche wrote:

>>> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
>>> index bac55decf900..9ab97e48c5c2 100644
>>> --- a/include/scsi/scsi_cmnd.h
>>> +++ b/include/scsi/scsi_cmnd.h
>>> @@ -65,6 +65,24 @@ enum scsi_cmnd_submitter {
>>>        SUBMITTED_BY_SCSI_RESET_IOCTL = 2,
>>>    } __packed;
>>>    +#define SCMD_FAILURE_NONE    0
>>> +#define SCMD_FAILURE_ANY    -1
>>
>> This is the same value as -EPERM. Would something like 0x7fffffff be a better
>> choice for SCMD_FAILURE_ANY?
> 
> I didn't get that. All of this is used for the scsi_cmnd->result evaluation
> and retries (host, status, internal bytes) which are all positive and where
> we don't use -Exyx errors.

(just noticed that I had not yet replied to this email and that this 
conversation is also relevant for v4 of this patch series).

Last time I checked I found a few SCSI LLDs that assign -Exyz error 
values to the scsi_cmnd->result field, e.g. the scsi_debug driver. 
However, I'm not sure that is still the case today.

>>> +struct scsi_failure {
>>> +    int result;
>>> +    u8 sense;
>>> +    u8 asc;
>>> +    u8 ascq;
>>> +
>>> +    s8 allowed;
>>> +    s8 retries;
>>> +};
>>
>> Why has 'retries' type s8 instead of u8?
> 
> There is a:
> 
> #define SCMD_FAILURE_NO_LIMIT   -1
> 
> in the defines that got cut off when you replied which is used for some
> users that had no limit on retries.

Hmm ... isn't SCMD_FAILURE_NO_LIMIT a value that only should be assigned 
to the .allowed member and not to the .retries member? Is my 
understanding correct that the .retries member should be set to zero 
before execution of a SCSI command start and that the only manipulation 
that happens on that member is incrementing it in scsi_check_passthrough()?

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 3f630798d1eb..55765efa9067 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1831,6 +1831,73 @@  bool scsi_noretry_cmd(struct scsi_cmnd *scmd)
 	return false;
 }
 
+static enum scsi_disposition scsi_check_passthrough(struct scsi_cmnd *scmd)
+{
+	struct scsi_failure *failure;
+	struct scsi_sense_hdr sshdr;
+	enum scsi_disposition ret;
+	u8 status;
+	int i;
+
+
+	if (!scmd->result || !scmd->failures)
+		return SCSI_RETURN_NOT_HANDLED;
+
+	for (i = 0, failure = &scmd->failures[i]; failure->result;
+	     failure = &scmd->failures[++i]) {
+		if (failure->result == SCMD_FAILURE_ANY)
+			goto maybe_retry;
+
+		if (host_byte(scmd->result) &&
+		    host_byte(scmd->result) == host_byte(failure->result))
+			goto maybe_retry;
+
+		status = get_status_byte(scmd);
+		if (!status)
+			continue;
+
+		if (failure->result == SCMD_FAILURE_STAT_ANY &&
+		    !scsi_status_is_good(scmd->result))
+			goto maybe_retry;
+
+		if (status != __get_status_byte(failure->result))
+			continue;
+
+		if (((__get_status_byte(failure->result)) !=
+		    SAM_STAT_CHECK_CONDITION) ||
+		    (failure->sense == SCMD_FAILURE_SENSE_ANY))
+			goto maybe_retry;
+
+		ret = scsi_start_sense_processing(scmd, &sshdr);
+		if (ret == NEEDS_RETRY)
+			goto maybe_retry;
+		else if (ret != SUCCESS)
+			return ret;
+
+		if (failure->sense != sshdr.sense_key)
+			continue;
+
+		if (failure->asc == SCMD_FAILURE_ASC_ANY)
+			goto maybe_retry;
+
+		if (failure->asc != sshdr.asc)
+			continue;
+
+		if (failure->ascq == SCMD_FAILURE_ASCQ_ANY ||
+		    failure->ascq == sshdr.ascq)
+			goto maybe_retry;
+	}
+
+	return SCSI_RETURN_NOT_HANDLED;
+
+maybe_retry:
+	if (failure->allowed == SCMD_FAILURE_NO_LIMIT ||
+	    ++failure->retries <= failure->allowed)
+		return NEEDS_RETRY;
+
+	return SUCCESS;
+}
+
 /**
  * scsi_decide_disposition - Disposition a cmd on return from LLD.
  * @scmd:	SCSI cmd to examine.
@@ -1859,6 +1926,12 @@  enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd)
 		return SUCCESS;
 	}
 
+	if (blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) {
+		rtn = scsi_check_passthrough(scmd);
+		if (rtn != SCSI_RETURN_NOT_HANDLED)
+			return rtn;
+	}
+
 	/*
 	 * first check the host byte, to see if there is anything in there
 	 * that would indicate what we need to do.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 497efc0da259..56aefe38d69b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1608,6 +1608,7 @@  static blk_status_t scsi_prepare_cmd(struct request *req)
 
 	/* Usually overridden by the ULP */
 	cmd->allowed = 0;
+	cmd->failures = NULL;
 	memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
 	return scsi_cmd_to_driver(cmd)->init_command(cmd);
 }
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index bac55decf900..9ab97e48c5c2 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -65,6 +65,24 @@  enum scsi_cmnd_submitter {
 	SUBMITTED_BY_SCSI_RESET_IOCTL = 2,
 } __packed;
 
+#define SCMD_FAILURE_NONE	0
+#define SCMD_FAILURE_ANY	-1
+#define SCMD_FAILURE_STAT_ANY	0xff
+#define SCMD_FAILURE_SENSE_ANY	0xff
+#define SCMD_FAILURE_ASC_ANY	0xff
+#define SCMD_FAILURE_ASCQ_ANY	0xff
+#define SCMD_FAILURE_NO_LIMIT	-1
+
+struct scsi_failure {
+	int result;
+	u8 sense;
+	u8 asc;
+	u8 ascq;
+
+	s8 allowed;
+	s8 retries;
+};
+
 struct scsi_cmnd {
 	struct scsi_device *device;
 	struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */
@@ -85,6 +103,8 @@  struct scsi_cmnd {
 
 	int retries;
 	int allowed;
+	/* optional array of failures that passthrough users want retried */
+	struct scsi_failure *failures;
 
 	unsigned char prot_op;
 	unsigned char prot_type;
@@ -330,9 +350,14 @@  static inline void set_status_byte(struct scsi_cmnd *cmd, char status)
 	cmd->result = (cmd->result & 0xffffff00) | status;
 }
 
+static inline u8 __get_status_byte(int result)
+{
+	return result & 0xff;
+}
+
 static inline u8 get_status_byte(struct scsi_cmnd *cmd)
 {
-	return cmd->result & 0xff;
+	return __get_status_byte(cmd->result);
 }
 
 static inline void set_host_byte(struct scsi_cmnd *cmd, char status)