diff mbox series

[07/13] sd: Translate data lifetime information

Message ID 20230920191442.3701673-8-bvanassche@acm.org
State New
Headers show
Series Pass data temperature information to zoned UFS devices | expand

Commit Message

Bart Van Assche Sept. 20, 2023, 7:14 p.m. UTC
Recently T10 standardized SBC constrained streams. This mechanism enables
passing data lifetime information to SCSI devices in the group number
field. Add support for translating write hint information into a
permanent stream number in the sd driver.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/sd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/sd.h |  1 +
 2 files changed, 63 insertions(+), 3 deletions(-)

Comments

Avri Altman Oct. 2, 2023, 1:11 p.m. UTC | #1
> 
> Recently T10 standardized SBC constrained streams. This mechanism enables
> passing data lifetime information to SCSI devices in the group number
> field. Add support for translating write hint information into a
> permanent stream number in the sd driver.
> 
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/sd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++-
> --
>  drivers/scsi/sd.h |  1 +
>  2 files changed, 63 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 879edbc1a065..7bbc58cd99d1 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1001,12 +1001,38 @@ static blk_status_t sd_setup_flush_cmnd(struct
> scsi_cmnd *cmd)
>         return BLK_STS_OK;
>  }
> 
> +/**
> + * sd_group_number() - Compute the GROUP NUMBER field
> + * @cmd: SCSI command for which to compute the value of the six-bit
> GROUP NUMBER
> + *     field.
> + *
> + * From "SBC-5 Constrained Streams with Data Lifetimes"
> + * (https://www.t10.org/cgi-bin/ac.pl?t=d&f=23-024r3.pdf):
> + * 0: no relative lifetime.
> + * 1: shortest relative lifetime.
> + * 2: second shortest relative lifetime.
> + * 3 - 0x3d: intermediate relative lifetimes.
> + * 0x3e: second longest relative lifetime.
> + * 0x3f: longest relative lifetime.
> + */
> +static u8 sd_group_number(struct scsi_cmnd *cmd)
> +{
> +       const struct request *rq = scsi_cmd_to_rq(cmd);
> +       struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
> +       const int max_gn = min_t(u16, sdkp->permanent_stream_count, 0x3f);
> +
> +       if (!sdkp->rscs || rq->write_hint == WRITE_LIFE_NOT_SET)
> +               return 0;
> +       return min(rq->write_hint - WRITE_LIFE_NONE, max_gn);
> +}
> +
>  static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
>                                        sector_t lba, unsigned int nr_blocks,
>                                        unsigned char flags, unsigned int dld)
>  {
>         cmd->cmd_len = SD_EXT_CDB_SIZE;
>         cmd->cmnd[0]  = VARIABLE_LENGTH_CMD;
> +       cmd->cmnd[6]  = sd_group_number(cmd);
>         cmd->cmnd[7]  = 0x18; /* Additional CDB len */
>         cmd->cmnd[9]  = write ? WRITE_32 : READ_32;
>         cmd->cmnd[10] = flags;
> @@ -1025,7 +1051,7 @@ static blk_status_t sd_setup_rw16_cmnd(struct
> scsi_cmnd *cmd, bool write,
>         cmd->cmd_len  = 16;
>         cmd->cmnd[0]  = write ? WRITE_16 : READ_16;
>         cmd->cmnd[1]  = flags | ((dld >> 2) & 0x01);
> -       cmd->cmnd[14] = (dld & 0x03) << 6;
> +       cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd);
>         cmd->cmnd[15] = 0;
>         put_unaligned_be64(lba, &cmd->cmnd[2]);
>         put_unaligned_be32(nr_blocks, &cmd->cmnd[10]);
> @@ -1040,7 +1066,7 @@ static blk_status_t sd_setup_rw10_cmnd(struct
> scsi_cmnd *cmd, bool write,
>         cmd->cmd_len = 10;
>         cmd->cmnd[0] = write ? WRITE_10 : READ_10;
>         cmd->cmnd[1] = flags;
> -       cmd->cmnd[6] = 0;
> +       cmd->cmnd[6] = sd_group_number(cmd);
>         cmd->cmnd[9] = 0;
>         put_unaligned_be32(lba, &cmd->cmnd[2]);
>         put_unaligned_be16(nr_blocks, &cmd->cmnd[7]);
> @@ -1177,7 +1203,8 @@ static blk_status_t
> sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>                 ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
>                                          protect | fua, dld);
>         } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
> -                  sdp->use_10_for_rw || protect) {
> +                  sdp->use_10_for_rw || protect ||
> +                  rq->write_hint != WRITE_LIFE_NOT_SET) {
Is this a typo?

>                 ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
>                                          protect | fua);
>         } else {
> @@ -2912,6 +2939,37 @@ sd_read_cache_type(struct scsi_disk *sdkp,
> unsigned char *buffer)
>         sdkp->DPOFUA = 0;
>  }
> 
> +static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
> +{
> +       struct scsi_device *sdp = sdkp->device;
> +       const struct scsi_io_group_descriptor *desc, *start, *end;
> +       struct scsi_sense_hdr sshdr;
> +       struct scsi_mode_data data;
> +       int res;
> +
> +       res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
> +                             /*subpage=*/0x05, buffer, SD_BUF_SIZE,
> +                             SD_TIMEOUT, sdkp->max_retries, &data, &sshdr);
> +       if (res < 0)
> +               return;
> +       start = (void *)buffer + data.header_length + 16;
> +       end = (void *)buffer + ((data.header_length + data.length)
> +                               & ~(sizeof(*end) - 1));
> +       /*
> +        * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs
> +        * should assign the lowest numbered stream identifiers to permanent
> +        * streams.
> +        */
> +       for (desc = start; desc < end; desc++)
> +               if (!desc->st_enble)
> +                       break;
I don't see how you can conclude that the stream is permanent,
without reading the perm bit from the stream status descriptor.

> +       sdkp->permanent_stream_count = desc - start;
> +       if (sdkp->rscs && sdkp->permanent_stream_count < 2)
> +               sdev_printk(KERN_INFO, sdp,
> +                           "Unexpected: RSCS has been set and the permanent stream
> count is %u\n",
> +                           sdkp->permanent_stream_count);
> +}
> +
>  /*
>   * The ATO bit indicates whether the DIF application tag is available
>   * for use by the operating system.
> @@ -3395,6 +3453,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
> 
>                 sd_read_write_protect_flag(sdkp, buffer);
>                 sd_read_cache_type(sdkp, buffer);
> +               sd_read_io_hints(sdkp, buffer);
>                 sd_read_app_tag_own(sdkp, buffer);
>                 sd_read_write_same(sdkp, buffer);
>                 sd_read_security(sdkp, buffer);
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 84685168b6e0..1863de5ebae4 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -125,6 +125,7 @@ struct scsi_disk {
>         unsigned int    physical_block_size;
>         unsigned int    max_medium_access_timeouts;
>         unsigned int    medium_access_timed_out;
> +       u16             permanent_stream_count; /* maximum number of streams
> */
This comment is a bit misleading:
The Block Limits Extension VPD page has a "maximum number of streams" field.
Maybe avoid the unnecessary confusion?

Thanks,
Avri

>         u8              media_present;
>         u8              write_prot;
>         u8              protection_type;/* Data Integrity Field */
Bart Van Assche Oct. 2, 2023, 5:42 p.m. UTC | #2
On 10/2/23 06:11, Avri Altman wrote:
>> sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>                  ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
>>                                           protect | fua, dld);
>>          } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
>> -                  sdp->use_10_for_rw || protect) {
>> +                  sdp->use_10_for_rw || protect ||
>> +                  rq->write_hint != WRITE_LIFE_NOT_SET) {
>
> Is this a typo?

I don't see a typo? Am I perhaps overlooking something?

>> +static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
>> +{
>> +       struct scsi_device *sdp = sdkp->device;
>> +       const struct scsi_io_group_descriptor *desc, *start, *end;
>> +       struct scsi_sense_hdr sshdr;
>> +       struct scsi_mode_data data;
>> +       int res;
>> +
>> +       res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
>> +                             /*subpage=*/0x05, buffer, SD_BUF_SIZE,
>> +                             SD_TIMEOUT, sdkp->max_retries, &data, &sshdr);
>> +       if (res < 0)
>> +               return;
>> +       start = (void *)buffer + data.header_length + 16;
>> +       end = (void *)buffer + ((data.header_length + data.length)
>> +                               & ~(sizeof(*end) - 1));
>> +       /*
>> +        * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs
>> +        * should assign the lowest numbered stream identifiers to permanent
>> +        * streams.
>> +        */
>> +       for (desc = start; desc < end; desc++)
>> +               if (!desc->st_enble)
>> +                       break;
> I don't see how you can conclude that the stream is permanent,
> without reading the perm bit from the stream status descriptor.

I will add code that retrieves the stream status and that checks the 
PERM bit.

>> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
>> index 84685168b6e0..1863de5ebae4 100644
>> --- a/drivers/scsi/sd.h
>> +++ b/drivers/scsi/sd.h
>> @@ -125,6 +125,7 @@ struct scsi_disk {
>>          unsigned int    physical_block_size;
>>          unsigned int    max_medium_access_timeouts;
>>          unsigned int    medium_access_timed_out;
>> +       u16             permanent_stream_count; /* maximum number of streams
>> */
>
> This comment is a bit misleading:
> The Block Limits Extension VPD page has a "maximum number of streams" field.
> Maybe avoid the unnecessary confusion?

I will change that comment or leave it out.

Thanks,

Bart.
Avri Altman Oct. 3, 2023, 5:48 a.m. UTC | #3
> On 10/2/23 06:11, Avri Altman wrote:
> >> sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
> >>                  ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
> >>                                           protect | fua, dld);
> >>          } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
> >> -                  sdp->use_10_for_rw || protect) {
> >> +                  sdp->use_10_for_rw || protect ||
> >> +                  rq->write_hint != WRITE_LIFE_NOT_SET) {
> >
> > Is this a typo?
> 
> I don't see a typo? Am I perhaps overlooking something?
Forcing READ(6) into READ(10) because that req carries a write-hint,
Deserves an extra line in the commit log IMO.

Thanks,
Avri
Bart Van Assche Oct. 3, 2023, 4:58 p.m. UTC | #4
On 10/2/23 22:48, Avri Altman wrote:
>> On 10/2/23 06:11, Avri Altman wrote:
>>>> sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>>>                   ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
>>>>                                            protect | fua, dld);
>>>>           } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
>>>> -                  sdp->use_10_for_rw || protect) {
>>>> +                  sdp->use_10_for_rw || protect ||
>>>> +                  rq->write_hint != WRITE_LIFE_NOT_SET) {
>>>
>>> Is this a typo?
>>
>> I don't see a typo? Am I perhaps overlooking something?
 >
> Forcing READ(6) into READ(10) because that req carries a write-hint,
> Deserves an extra line in the commit log IMO.

Right, I should explain that the READ(6) command does not support write 
hints and hence that READ(10) is selected if a write hint is present.

Thanks,

Bart.
Bart Van Assche Oct. 3, 2023, 4:59 p.m. UTC | #5
On 10/3/23 09:58, Bart Van Assche wrote:
> On 10/2/23 22:48, Avri Altman wrote:
>>> On 10/2/23 06:11, Avri Altman wrote:
>>>>> sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>>>>                   ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
>>>>>                                            protect | fua, dld);
>>>>>           } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
>>>>> -                  sdp->use_10_for_rw || protect) {
>>>>> +                  sdp->use_10_for_rw || protect ||
>>>>> +                  rq->write_hint != WRITE_LIFE_NOT_SET) {
>>>>
>>>> Is this a typo?
>>>
>>> I don't see a typo? Am I perhaps overlooking something?
>  >
>> Forcing READ(6) into READ(10) because that req carries a write-hint,
>> Deserves an extra line in the commit log IMO.
> 
> Right, I should explain that the READ(6) command does not support write 
> hints and hence that READ(10) is selected if a write hint is present.

(replying to my own email)

In my answer READ should be changed into WRITE.

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 879edbc1a065..7bbc58cd99d1 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1001,12 +1001,38 @@  static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 	return BLK_STS_OK;
 }
 
+/**
+ * sd_group_number() - Compute the GROUP NUMBER field
+ * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER
+ *	field.
+ *
+ * From "SBC-5 Constrained Streams with Data Lifetimes"
+ * (https://www.t10.org/cgi-bin/ac.pl?t=d&f=23-024r3.pdf):
+ * 0: no relative lifetime.
+ * 1: shortest relative lifetime.
+ * 2: second shortest relative lifetime.
+ * 3 - 0x3d: intermediate relative lifetimes.
+ * 0x3e: second longest relative lifetime.
+ * 0x3f: longest relative lifetime.
+ */
+static u8 sd_group_number(struct scsi_cmnd *cmd)
+{
+	const struct request *rq = scsi_cmd_to_rq(cmd);
+	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
+	const int max_gn = min_t(u16, sdkp->permanent_stream_count, 0x3f);
+
+	if (!sdkp->rscs || rq->write_hint == WRITE_LIFE_NOT_SET)
+		return 0;
+	return min(rq->write_hint - WRITE_LIFE_NONE, max_gn);
+}
+
 static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
 				       sector_t lba, unsigned int nr_blocks,
 				       unsigned char flags, unsigned int dld)
 {
 	cmd->cmd_len = SD_EXT_CDB_SIZE;
 	cmd->cmnd[0]  = VARIABLE_LENGTH_CMD;
+	cmd->cmnd[6]  = sd_group_number(cmd);
 	cmd->cmnd[7]  = 0x18; /* Additional CDB len */
 	cmd->cmnd[9]  = write ? WRITE_32 : READ_32;
 	cmd->cmnd[10] = flags;
@@ -1025,7 +1051,7 @@  static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write,
 	cmd->cmd_len  = 16;
 	cmd->cmnd[0]  = write ? WRITE_16 : READ_16;
 	cmd->cmnd[1]  = flags | ((dld >> 2) & 0x01);
-	cmd->cmnd[14] = (dld & 0x03) << 6;
+	cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd);
 	cmd->cmnd[15] = 0;
 	put_unaligned_be64(lba, &cmd->cmnd[2]);
 	put_unaligned_be32(nr_blocks, &cmd->cmnd[10]);
@@ -1040,7 +1066,7 @@  static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write,
 	cmd->cmd_len = 10;
 	cmd->cmnd[0] = write ? WRITE_10 : READ_10;
 	cmd->cmnd[1] = flags;
-	cmd->cmnd[6] = 0;
+	cmd->cmnd[6] = sd_group_number(cmd);
 	cmd->cmnd[9] = 0;
 	put_unaligned_be32(lba, &cmd->cmnd[2]);
 	put_unaligned_be16(nr_blocks, &cmd->cmnd[7]);
@@ -1177,7 +1203,8 @@  static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 		ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
 					 protect | fua, dld);
 	} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
-		   sdp->use_10_for_rw || protect) {
+		   sdp->use_10_for_rw || protect ||
+		   rq->write_hint != WRITE_LIFE_NOT_SET) {
 		ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
 					 protect | fua);
 	} else {
@@ -2912,6 +2939,37 @@  sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 	sdkp->DPOFUA = 0;
 }
 
+static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+	struct scsi_device *sdp = sdkp->device;
+	const struct scsi_io_group_descriptor *desc, *start, *end;
+	struct scsi_sense_hdr sshdr;
+	struct scsi_mode_data data;
+	int res;
+
+	res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
+			      /*subpage=*/0x05, buffer, SD_BUF_SIZE,
+			      SD_TIMEOUT, sdkp->max_retries, &data, &sshdr);
+	if (res < 0)
+		return;
+	start = (void *)buffer + data.header_length + 16;
+	end = (void *)buffer + ((data.header_length + data.length)
+				& ~(sizeof(*end) - 1));
+	/*
+	 * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs
+	 * should assign the lowest numbered stream identifiers to permanent
+	 * streams.
+	 */
+	for (desc = start; desc < end; desc++)
+		if (!desc->st_enble)
+			break;
+	sdkp->permanent_stream_count = desc - start;
+	if (sdkp->rscs && sdkp->permanent_stream_count < 2)
+		sdev_printk(KERN_INFO, sdp,
+			    "Unexpected: RSCS has been set and the permanent stream count is %u\n",
+			    sdkp->permanent_stream_count);
+}
+
 /*
  * The ATO bit indicates whether the DIF application tag is available
  * for use by the operating system.
@@ -3395,6 +3453,7 @@  static int sd_revalidate_disk(struct gendisk *disk)
 
 		sd_read_write_protect_flag(sdkp, buffer);
 		sd_read_cache_type(sdkp, buffer);
+		sd_read_io_hints(sdkp, buffer);
 		sd_read_app_tag_own(sdkp, buffer);
 		sd_read_write_same(sdkp, buffer);
 		sd_read_security(sdkp, buffer);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 84685168b6e0..1863de5ebae4 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -125,6 +125,7 @@  struct scsi_disk {
 	unsigned int	physical_block_size;
 	unsigned int	max_medium_access_timeouts;
 	unsigned int	medium_access_timed_out;
+	u16		permanent_stream_count;	/* maximum number of streams */
 	u8		media_present;
 	u8		write_prot;
 	u8		protection_type;/* Data Integrity Field */