diff mbox series

[v10,03/11] scsi: sd: Translate data lifetime information

Message ID 20240222214508.1630719-4-bvanassche@acm.org
State New
Headers show
Series Pass data lifetime information to SCSI disk devices | expand

Commit Message

Bart Van Assche Feb. 22, 2024, 9:44 p.m. UTC
Recently T10 standardized SBC constrained streams. This mechanism allows
to pass data lifetime information to SCSI devices in the group number
field. Add support for translating write hint information into a
permanent stream number in the sd driver.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/sd.c | 104 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/scsi/sd.h |   2 +
 2 files changed, 104 insertions(+), 2 deletions(-)

Comments

John Garry March 11, 2024, 10:46 a.m. UTC | #1
On 22/02/2024 21:44, Bart Van Assche wrote:
> +	sdkp->permanent_stream_count = desc - start;
> +	if (sdkp->permanent_stream_count < 2) {
> +		sd_printk(KERN_INFO, sdkp,
> +			  "Unexpected: RSCS has been set and the permanent stream count is %u\n",
> +			  sdkp->permanent_stream_count);
> +		return;
> +	}
> +
> +	sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n",
> +		  sdkp->permanent_stream_count);

I have been testing a recent linux-next (which includes this change), 
and I now see this following kernel log just for writing to the bdev 
file, like:

# mnt/test-pwritev2 -d -l 512 -p 512 /dev/sda
wrote 512 bytes at pos 512 write_size=512
# [   22.339526] sd 0:0:0:0: [sda] permanent stream count = 5

Is that log really required, especially at info level?

That is a scsi_debug disk - I assume that the relevant io hint feature 
is now default enabled there.

Thanks,
John
Bart Van Assche March 11, 2024, 5:14 p.m. UTC | #2
On 3/11/24 03:46, John Garry wrote:
> On 22/02/2024 21:44, Bart Van Assche wrote:
>> +    sdkp->permanent_stream_count = desc - start;
>> +    if (sdkp->permanent_stream_count < 2) {
>> +        sd_printk(KERN_INFO, sdkp,
>> +              "Unexpected: RSCS has been set and the permanent stream 
>> count is %u\n",
>> +              sdkp->permanent_stream_count);
>> +        return;
>> +    }
>> +
>> +    sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n",
>> +          sdkp->permanent_stream_count);
> 
> I have been testing a recent linux-next (which includes this change), 
> and I now see this following kernel log just for writing to the bdev 
> file, like:
> 
> # mnt/test-pwritev2 -d -l 512 -p 512 /dev/sda
> wrote 512 bytes at pos 512 write_size=512
> # [   22.339526] sd 0:0:0:0: [sda] permanent stream count = 5
> 
> Is that log really required, especially at info level?
> 
> That is a scsi_debug disk - I assume that the relevant io hint feature 
> is now default enabled there.

Hi John,

How about removing the sd_printk() call mentioned above and adding a
sysfs attribute instead that reports the number of permanent streams?

Thanks,

Bart.
John Garry March 11, 2024, 5:21 p.m. UTC | #3
On 11/03/2024 17:14, Bart Van Assche wrote:
>> I have been testing a recent linux-next (which includes this change), 
>> and I now see this following kernel log just for writing to the bdev 
>> file, like:
>>
>> # mnt/test-pwritev2 -d -l 512 -p 512 /dev/sda
>> wrote 512 bytes at pos 512 write_size=512
>> # [   22.339526] sd 0:0:0:0: [sda] permanent stream count = 5
>>
>> Is that log really required, especially at info level?
>>
>> That is a scsi_debug disk - I assume that the relevant io hint feature 
>> is now default enabled there.
> 
> Hi John,
> 
> How about removing the sd_printk() call mentioned above and adding a
> sysfs attribute instead that reports the number of permanent streams?

A sysfs entry sounds reasonable, but I don't know enough about this 
stream count to say whether that really is a good idea.

Thanks,
John
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 86b819fa04d9..48496e816409 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -47,6 +47,7 @@ 
 #include <linux/blkpg.h>
 #include <linux/blk-pm.h>
 #include <linux/delay.h>
+#include <linux/rw_hint.h>
 #include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
@@ -1080,12 +1081,35 @@  static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 	return BLK_STS_OK;
 }
 
+/**
+ * sd_group_number() - Compute the GROUP NUMBER field
+ * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER
+ *	field.
+ *
+ * From SBC-5 r05 (https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf):
+ * 0: no relative lifetime.
+ * 1: shortest relative lifetime.
+ * 2: second shortest relative lifetime.
+ * 3 - 0x3d: intermediate relative lifetimes.
+ * 0x3e: second longest relative lifetime.
+ * 0x3f: longest relative lifetime.
+ */
+static u8 sd_group_number(struct scsi_cmnd *cmd)
+{
+	const struct request *rq = scsi_cmd_to_rq(cmd);
+	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
+
+	return min3((u32)rq->write_hint, (u32)sdkp->permanent_stream_count,
+		    0x3fu);
+}
+
 static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
 				       sector_t lba, unsigned int nr_blocks,
 				       unsigned char flags, unsigned int dld)
 {
 	cmd->cmd_len = SD_EXT_CDB_SIZE;
 	cmd->cmnd[0]  = VARIABLE_LENGTH_CMD;
+	cmd->cmnd[6]  = sd_group_number(cmd);
 	cmd->cmnd[7]  = 0x18; /* Additional CDB len */
 	cmd->cmnd[9]  = write ? WRITE_32 : READ_32;
 	cmd->cmnd[10] = flags;
@@ -1104,7 +1128,7 @@  static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write,
 	cmd->cmd_len  = 16;
 	cmd->cmnd[0]  = write ? WRITE_16 : READ_16;
 	cmd->cmnd[1]  = flags | ((dld >> 2) & 0x01);
-	cmd->cmnd[14] = (dld & 0x03) << 6;
+	cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd);
 	cmd->cmnd[15] = 0;
 	put_unaligned_be64(lba, &cmd->cmnd[2]);
 	put_unaligned_be32(nr_blocks, &cmd->cmnd[10]);
@@ -1119,7 +1143,7 @@  static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write,
 	cmd->cmd_len = 10;
 	cmd->cmnd[0] = write ? WRITE_10 : READ_10;
 	cmd->cmnd[1] = flags;
-	cmd->cmnd[6] = 0;
+	cmd->cmnd[6] = sd_group_number(cmd);
 	cmd->cmnd[9] = 0;
 	put_unaligned_be32(lba, &cmd->cmnd[2]);
 	put_unaligned_be16(nr_blocks, &cmd->cmnd[7]);
@@ -3001,6 +3025,81 @@  sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 	sdkp->DPOFUA = 0;
 }
 
+static bool sd_is_perm_stream(struct scsi_disk *sdkp, unsigned int stream_id)
+{
+	u8 cdb[16] = { SERVICE_ACTION_IN_16, SAI_GET_STREAM_STATUS };
+	struct {
+		struct scsi_stream_status_header h;
+		struct scsi_stream_status s;
+	} buf;
+	struct scsi_device *sdev = sdkp->device;
+	struct scsi_sense_hdr sshdr;
+	const struct scsi_exec_args exec_args = {
+		.sshdr = &sshdr,
+	};
+	int res;
+
+	put_unaligned_be16(stream_id, &cdb[4]);
+	put_unaligned_be32(sizeof(buf), &cdb[10]);
+
+	res = scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, &buf, sizeof(buf),
+			       SD_TIMEOUT, sdkp->max_retries, &exec_args);
+	if (res < 0)
+		return false;
+	if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr))
+		sd_print_sense_hdr(sdkp, &sshdr);
+	if (res)
+		return false;
+	if (get_unaligned_be32(&buf.h.len) < sizeof(struct scsi_stream_status))
+		return false;
+	return buf.h.stream_status[0].perm;
+}
+
+static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+	struct scsi_device *sdp = sdkp->device;
+	const struct scsi_io_group_descriptor *desc, *start, *end;
+	struct scsi_sense_hdr sshdr;
+	struct scsi_mode_data data;
+	int res;
+
+	if (!sdkp->rscs)
+		return;
+
+	res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
+			      /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT,
+			      sdkp->max_retries, &data, &sshdr);
+	if (res < 0)
+		return;
+	start = (void *)buffer + data.header_length + 16;
+	end = (void *)buffer + ALIGN_DOWN(data.header_length + data.length,
+					  sizeof(*end));
+	/*
+	 * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs
+	 * should assign the lowest numbered stream identifiers to permanent
+	 * streams.
+	 */
+	for (desc = start; desc < end; desc++)
+		if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start))
+			break;
+	sdkp->permanent_stream_count = desc - start;
+	if (sdkp->permanent_stream_count < 2) {
+		sd_printk(KERN_INFO, sdkp,
+			  "Unexpected: RSCS has been set and the permanent stream count is %u\n",
+			  sdkp->permanent_stream_count);
+		return;
+	}
+
+	sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n",
+		  sdkp->permanent_stream_count);
+	/*
+	 * Use WRITE(10) instead of WRITE(6) if data lifetime information is
+	 * present because the WRITE(6) command does not have a GROUP NUMBER
+	 * field.
+	 */
+	sdp->use_10_for_rw = true;
+}
+
 /*
  * The ATO bit indicates whether the DIF application tag is available
  * for use by the operating system.
@@ -3481,6 +3580,7 @@  static int sd_revalidate_disk(struct gendisk *disk)
 
 		sd_read_write_protect_flag(sdkp, buffer);
 		sd_read_cache_type(sdkp, buffer);
+		sd_read_io_hints(sdkp, buffer);
 		sd_read_app_tag_own(sdkp, buffer);
 		sd_read_write_same(sdkp, buffer);
 		sd_read_security(sdkp, buffer);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index f1120de03d51..5c4285a582b2 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -125,6 +125,8 @@  struct scsi_disk {
 	unsigned int	physical_block_size;
 	unsigned int	max_medium_access_timeouts;
 	unsigned int	medium_access_timed_out;
+			/* number of permanent streams */
+	u16		permanent_stream_count;
 	u8		media_present;
 	u8		write_prot;
 	u8		protection_type;/* Data Integrity Field */