diff mbox series

[5/8] scsi: sd_zbc: Hide gap zones

Message ID 20220415201752.2793700-6-bvanassche@acm.org
State New
Headers show
Series Support zoned devices with gap zones | expand

Commit Message

Bart Van Assche April 15, 2022, 8:17 p.m. UTC
ZBC-2 allows host-managed disks to report gap zones. Another new feature
in ZBC-2 is support for constant zone starting LBA offsets. These two
features allow zoned disks to report a starting LBA offset that is a
power of two even if the number of logical blocks with data per zone is
not a power of two.

For zoned disks that report a constant zone starting LBA offset, hide
the gap zones from the block layer. Report the starting LBA offset as
zone size and report the number of logical blocks with data per zone as
the zone capacity.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
[ bvanassche: Reworked this patch ]
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/sd.h         |  5 +++
 drivers/scsi/sd_zbc.c     | 84 +++++++++++++++++++++++++++++++++++----
 include/scsi/scsi_proto.h |  8 +++-
 3 files changed, 88 insertions(+), 9 deletions(-)

Comments

Damien Le Moal April 17, 2022, 11:22 p.m. UTC | #1
On 4/16/22 05:17, Bart Van Assche wrote:
> ZBC-2 allows host-managed disks to report gap zones. Another new feature
> in ZBC-2 is support for constant zone starting LBA offsets. These two
> features allow zoned disks to report a starting LBA offset that is a
> power of two even if the number of logical blocks with data per zone is
> not a power of two.

I think this last sentence would be clearer phrased like this:

These two features allow zoned disks to report zone start LBAs that are a
power of two even if the number of logical blocks with data per zone is
not a power of two.


> 
> For zoned disks that report a constant zone starting LBA offset, hide
> the gap zones from the block layer. Report the starting LBA offset as
> zone size and report the number of logical blocks with data per zone as
> the zone capacity.
> 
> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> [ bvanassche: Reworked this patch ]
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/sd.h         |  5 +++
>  drivers/scsi/sd_zbc.c     | 84 +++++++++++++++++++++++++++++++++++----
>  include/scsi/scsi_proto.h |  8 +++-
>  3 files changed, 88 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 2e983578a699..e0793e789fdc 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -95,6 +95,11 @@ struct scsi_disk {
>  	u32		zones_optimal_open;
>  	u32		zones_optimal_nonseq;
>  	u32		zones_max_open;
> +	/*
> +	 * Either zero or a power of two. If not zero it means that the offset
> +	 * between zone starting LBAs is constant.
> +	 */
> +	u32		zone_starting_lba_gran;
>  	u32		*zones_wp_offset;
>  	spinlock_t	zones_wp_offset_lock;
>  	u32		*rev_wp_offset;
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index fe46b4b9913a..92eace611364 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -37,7 +37,7 @@ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
>  	case BLK_ZONE_COND_CLOSED:
>  		return zone->wp - zone->start;
>  	case BLK_ZONE_COND_FULL:
> -		return zone->len;
> +		return zone->capacity;
>  	case BLK_ZONE_COND_EMPTY:
>  	case BLK_ZONE_COND_OFFLINE:
>  	case BLK_ZONE_COND_READONLY:
> @@ -50,6 +50,12 @@ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
>  	}
>  }
>  
> +/* Whether or not a SCSI zone descriptor describes a gap zone. */
> +static bool sd_zbc_is_gap_zone(const u8 buf[64])
> +{
> +	return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP;
> +}
> +
>  /**
>   * sd_zbc_parse_report - Parse a SCSI zone descriptor
>   * @sdkp: SCSI disk pointer.
> @@ -68,8 +74,12 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
>  {
>  	struct scsi_device *sdp = sdkp->device;
>  	struct blk_zone zone = { 0 };
> +	sector_t gran;
> +	u64 start_lba;
>  	int ret;
>  
> +	if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf)))
> +		return -EINVAL;
>  	zone.type = buf[0] & 0x0f;
>  	zone.cond = (buf[1] >> 4) & 0xf;
>  	if (buf[1] & 0x01)
> @@ -79,9 +89,25 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
>  
>  	zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
>  	zone.capacity = zone.len;
> -	zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
> +	start_lba = get_unaligned_be64(&buf[16]);
> +	zone.start = logical_to_sectors(sdp, start_lba);
> +	if (sdkp->zone_starting_lba_gran) {
> +		idx = start_lba >> ilog2(sdkp->zone_starting_lba_gran);
> +		gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran);
> +		if (zone.capacity > zone.len || zone.len > gran) {
> +			sd_printk(KERN_ERR, sdkp,
> +				  "Invalid zone for LBA %llu: zone capacity %llu; zone length %llu; granularity %llu\n",
> +				  start_lba, zone.capacity, zone.len, gran);
> +			return -EINVAL;
> +		}
> +		/*
> +		 * Change the zone length obtained from REPORT ZONES into the
> +		 * zone starting LBA granularity.
> +		 */
> +		zone.len = gran;
> +	}
>  	if (zone.cond == ZBC_ZONE_COND_FULL)
> -		zone.wp = zone.start + zone.len;
> +		zone.wp = zone.start + zone.capacity;
>  	else
>  		zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
>  
> @@ -227,6 +253,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
>  						      sdkp->capacity);
>  	unsigned int nr, i;
>  	unsigned char *buf;
> +	u64 zone_length, start_lba;
>  	size_t offset, buflen = 0;
>  	int zone_idx = 0;
>  	int ret;
> @@ -254,16 +281,33 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
>  		if (!nr)
>  			break;
>  
> -		for (i = 0; i < nr && zone_idx < nr_zones; i++) {
> +		for (i = 0; i < nr && zone_idx < nr_zones; i++, zone_idx++) {
>  			offset += 64;
> +			zone_length = get_unaligned_be64(&buf[offset + 8]);
> +			start_lba = get_unaligned_be64(&buf[offset + 16]);
> +			if (start_lba < sectors_to_logical(sdkp->device, sector)
> +			    || start_lba + zone_length < start_lba) {
> +				sd_printk(KERN_ERR, sdkp,
> +					  "Zone %d is invalid: %llu + %llu\n",
> +					  zone_idx, start_lba, zone_length);
> +				ret = -EINVAL;
> +				goto out;
> +			}
> +			sector = logical_to_sectors(sdkp->device, start_lba +
> +						    zone_length);
> +			if (sd_zbc_is_gap_zone(&buf[offset])) {
> +				if (sdkp->zone_starting_lba_gran)
> +					continue;
> +				sd_printk(KERN_ERR, sdkp,
> +					  "Gap zone without constant LBA offsets\n");
> +				ret = -EINVAL;
> +				goto out;
> +			}
>  			ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
>  						  cb, data);
>  			if (ret)
>  				goto out;
> -			zone_idx++;
>  		}
> -
> -		sector += sd_zbc_zone_sectors(sdkp) * i;
>  	}
>  
>  	ret = zone_idx;
> @@ -580,6 +624,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
>  static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
>  					      unsigned char *buf)
>  {
> +	u64 zone_starting_lba_gran;
> +	u8 zone_alignment_method;
>  
>  	if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
>  		sd_printk(KERN_NOTICE, sdkp,
> @@ -599,6 +645,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
>  		sdkp->zones_optimal_open = 0;
>  		sdkp->zones_optimal_nonseq = 0;
>  		sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
> +		zone_alignment_method = buf[23] & 0xf;
> +		if (zone_alignment_method ==
> +		    ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS) {
> +			zone_starting_lba_gran =
> +				get_unaligned_be64(&buf[24]);
> +			if (zone_starting_lba_gran == 0) {
> +				sd_printk(KERN_ERR, sdkp,
> +					  "Inconsistent: zone starting LBA granularity is zero\n");
> +			}
> +			if (!is_power_of_2(zone_starting_lba_gran) ||
> +			    logical_to_sectors(sdkp->device,
> +					       zone_starting_lba_gran) >
> +			    UINT_MAX) {
> +				sd_printk(KERN_ERR, sdkp,
> +					  "Invalid zone starting LBA granularity %llu\n",
> +					  zone_starting_lba_gran);
> +				return -EINVAL;
> +			}
> +			sdkp->zone_starting_lba_gran = zone_starting_lba_gran;
> +			WARN_ON_ONCE(sdkp->zone_starting_lba_gran !=
> +				     zone_starting_lba_gran);
> +		}
>  	}
>  
>  	/*
> @@ -664,7 +732,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
>  		return -EFBIG;
>  	}
>  
> -	*zblocks = zone_blocks;
> +	*zblocks = sdkp->zone_starting_lba_gran ? : zone_blocks;
>  
>  	if (!is_power_of_2(*zblocks)) {
>  		sd_printk(KERN_ERR, sdkp,
> diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
> index f017843a8124..521f9feac778 100644
> --- a/include/scsi/scsi_proto.h
> +++ b/include/scsi/scsi_proto.h
> @@ -307,7 +307,9 @@ enum zbc_zone_type {
>  	ZBC_ZONE_TYPE_CONV		= 0x1,
>  	ZBC_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
>  	ZBC_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
> -	/* 0x4 to 0xf are reserved */
> +	ZBC_ZONE_TYPE_SEQ_OR_BEFORE_REQ	= 0x4,
> +	ZBC_ZONE_TYPE_GAP		= 0x5,
> +	/* 0x6 to 0xf are reserved */
>  };
>  
>  /* Zone conditions of REPORT ZONES zone descriptors */
> @@ -323,6 +325,10 @@ enum zbc_zone_cond {
>  	ZBC_ZONE_COND_OFFLINE		= 0xf,
>  };
>  
> +enum zbc_zone_alignment_method {
> +	ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS = 8,
> +};
> +
>  /* Version descriptor values for INQUIRY */
>  enum scsi_version_descriptor {
>  	SCSI_VERSION_DESCRIPTOR_FCP4	= 0x0a40,
Bart Van Assche April 18, 2022, 4:59 p.m. UTC | #2
On 4/17/22 16:22, Damien Le Moal wrote:
> On 4/16/22 05:17, Bart Van Assche wrote:
>> ZBC-2 allows host-managed disks to report gap zones. Another new feature
>> in ZBC-2 is support for constant zone starting LBA offsets. These two
>> features allow zoned disks to report a starting LBA offset that is a
>> power of two even if the number of logical blocks with data per zone is
>> not a power of two.
> 
> I think this last sentence would be clearer phrased like this:
> 
> These two features allow zoned disks to report zone start LBAs that are a
> power of two even if the number of logical blocks with data per zone is
> not a power of two.

I will modify the patch description.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 2e983578a699..e0793e789fdc 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -95,6 +95,11 @@  struct scsi_disk {
 	u32		zones_optimal_open;
 	u32		zones_optimal_nonseq;
 	u32		zones_max_open;
+	/*
+	 * Either zero or a power of two. If not zero it means that the offset
+	 * between zone starting LBAs is constant.
+	 */
+	u32		zone_starting_lba_gran;
 	u32		*zones_wp_offset;
 	spinlock_t	zones_wp_offset_lock;
 	u32		*rev_wp_offset;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index fe46b4b9913a..92eace611364 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -37,7 +37,7 @@  static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
 	case BLK_ZONE_COND_CLOSED:
 		return zone->wp - zone->start;
 	case BLK_ZONE_COND_FULL:
-		return zone->len;
+		return zone->capacity;
 	case BLK_ZONE_COND_EMPTY:
 	case BLK_ZONE_COND_OFFLINE:
 	case BLK_ZONE_COND_READONLY:
@@ -50,6 +50,12 @@  static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
 	}
 }
 
+/* Whether or not a SCSI zone descriptor describes a gap zone. */
+static bool sd_zbc_is_gap_zone(const u8 buf[64])
+{
+	return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP;
+}
+
 /**
  * sd_zbc_parse_report - Parse a SCSI zone descriptor
  * @sdkp: SCSI disk pointer.
@@ -68,8 +74,12 @@  static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
 {
 	struct scsi_device *sdp = sdkp->device;
 	struct blk_zone zone = { 0 };
+	sector_t gran;
+	u64 start_lba;
 	int ret;
 
+	if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf)))
+		return -EINVAL;
 	zone.type = buf[0] & 0x0f;
 	zone.cond = (buf[1] >> 4) & 0xf;
 	if (buf[1] & 0x01)
@@ -79,9 +89,25 @@  static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
 
 	zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
 	zone.capacity = zone.len;
-	zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
+	start_lba = get_unaligned_be64(&buf[16]);
+	zone.start = logical_to_sectors(sdp, start_lba);
+	if (sdkp->zone_starting_lba_gran) {
+		idx = start_lba >> ilog2(sdkp->zone_starting_lba_gran);
+		gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran);
+		if (zone.capacity > zone.len || zone.len > gran) {
+			sd_printk(KERN_ERR, sdkp,
+				  "Invalid zone for LBA %llu: zone capacity %llu; zone length %llu; granularity %llu\n",
+				  start_lba, zone.capacity, zone.len, gran);
+			return -EINVAL;
+		}
+		/*
+		 * Change the zone length obtained from REPORT ZONES into the
+		 * zone starting LBA granularity.
+		 */
+		zone.len = gran;
+	}
 	if (zone.cond == ZBC_ZONE_COND_FULL)
-		zone.wp = zone.start + zone.len;
+		zone.wp = zone.start + zone.capacity;
 	else
 		zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
 
@@ -227,6 +253,7 @@  int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
 						      sdkp->capacity);
 	unsigned int nr, i;
 	unsigned char *buf;
+	u64 zone_length, start_lba;
 	size_t offset, buflen = 0;
 	int zone_idx = 0;
 	int ret;
@@ -254,16 +281,33 @@  int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
 		if (!nr)
 			break;
 
-		for (i = 0; i < nr && zone_idx < nr_zones; i++) {
+		for (i = 0; i < nr && zone_idx < nr_zones; i++, zone_idx++) {
 			offset += 64;
+			zone_length = get_unaligned_be64(&buf[offset + 8]);
+			start_lba = get_unaligned_be64(&buf[offset + 16]);
+			if (start_lba < sectors_to_logical(sdkp->device, sector)
+			    || start_lba + zone_length < start_lba) {
+				sd_printk(KERN_ERR, sdkp,
+					  "Zone %d is invalid: %llu + %llu\n",
+					  zone_idx, start_lba, zone_length);
+				ret = -EINVAL;
+				goto out;
+			}
+			sector = logical_to_sectors(sdkp->device, start_lba +
+						    zone_length);
+			if (sd_zbc_is_gap_zone(&buf[offset])) {
+				if (sdkp->zone_starting_lba_gran)
+					continue;
+				sd_printk(KERN_ERR, sdkp,
+					  "Gap zone without constant LBA offsets\n");
+				ret = -EINVAL;
+				goto out;
+			}
 			ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
 						  cb, data);
 			if (ret)
 				goto out;
-			zone_idx++;
 		}
-
-		sector += sd_zbc_zone_sectors(sdkp) * i;
 	}
 
 	ret = zone_idx;
@@ -580,6 +624,8 @@  unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
 					      unsigned char *buf)
 {
+	u64 zone_starting_lba_gran;
+	u8 zone_alignment_method;
 
 	if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
 		sd_printk(KERN_NOTICE, sdkp,
@@ -599,6 +645,28 @@  static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
 		sdkp->zones_optimal_open = 0;
 		sdkp->zones_optimal_nonseq = 0;
 		sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
+		zone_alignment_method = buf[23] & 0xf;
+		if (zone_alignment_method ==
+		    ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS) {
+			zone_starting_lba_gran =
+				get_unaligned_be64(&buf[24]);
+			if (zone_starting_lba_gran == 0) {
+				sd_printk(KERN_ERR, sdkp,
+					  "Inconsistent: zone starting LBA granularity is zero\n");
+			}
+			if (!is_power_of_2(zone_starting_lba_gran) ||
+			    logical_to_sectors(sdkp->device,
+					       zone_starting_lba_gran) >
+			    UINT_MAX) {
+				sd_printk(KERN_ERR, sdkp,
+					  "Invalid zone starting LBA granularity %llu\n",
+					  zone_starting_lba_gran);
+				return -EINVAL;
+			}
+			sdkp->zone_starting_lba_gran = zone_starting_lba_gran;
+			WARN_ON_ONCE(sdkp->zone_starting_lba_gran !=
+				     zone_starting_lba_gran);
+		}
 	}
 
 	/*
@@ -664,7 +732,7 @@  static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
 		return -EFBIG;
 	}
 
-	*zblocks = zone_blocks;
+	*zblocks = sdkp->zone_starting_lba_gran ? : zone_blocks;
 
 	if (!is_power_of_2(*zblocks)) {
 		sd_printk(KERN_ERR, sdkp,
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index f017843a8124..521f9feac778 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -307,7 +307,9 @@  enum zbc_zone_type {
 	ZBC_ZONE_TYPE_CONV		= 0x1,
 	ZBC_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
 	ZBC_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
-	/* 0x4 to 0xf are reserved */
+	ZBC_ZONE_TYPE_SEQ_OR_BEFORE_REQ	= 0x4,
+	ZBC_ZONE_TYPE_GAP		= 0x5,
+	/* 0x6 to 0xf are reserved */
 };
 
 /* Zone conditions of REPORT ZONES zone descriptors */
@@ -323,6 +325,10 @@  enum zbc_zone_cond {
 	ZBC_ZONE_COND_OFFLINE		= 0xf,
 };
 
+enum zbc_zone_alignment_method {
+	ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS = 8,
+};
+
 /* Version descriptor values for INQUIRY */
 enum scsi_version_descriptor {
 	SCSI_VERSION_DESCRIPTOR_FCP4	= 0x0a40,