diff mbox series

[v15,01/19] block: Introduce more member variables related to zone write locking

Message ID 20231114211804.1449162-2-bvanassche@acm.org
State New
Headers show
Series Improve write performance for zoned UFS devices​ | expand

Commit Message

Bart Van Assche Nov. 14, 2023, 9:16 p.m. UTC
Many but not all storage controllers require serialization of zoned writes.
Introduce two new request queue limit member variables related to write
serialization. 'driver_preserves_write_order' allows block drivers to
indicate that the order of write commands is preserved and hence that
serialization of writes per zone is not required. 'use_zone_write_lock' is
set by disk_set_zoned() if and only if the block device has zones and if
the block driver does not preserve the order of write requests.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-settings.c   | 15 +++++++++++++++
 block/blk-zoned.c      |  1 +
 include/linux/blkdev.h | 10 ++++++++++
 3 files changed, 26 insertions(+)

Comments

Damien Le Moal Nov. 19, 2023, 11:29 p.m. UTC | #1
On 11/15/23 06:16, Bart Van Assche wrote:
> Many but not all storage controllers require serialization of zoned writes.
> Introduce two new request queue limit member variables related to write
> serialization. 'driver_preserves_write_order' allows block drivers to
> indicate that the order of write commands is preserved and hence that
> serialization of writes per zone is not required. 'use_zone_write_lock' is
> set by disk_set_zoned() if and only if the block device has zones and if
> the block driver does not preserve the order of write requests.
> 
> Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-settings.c   | 15 +++++++++++++++
>  block/blk-zoned.c      |  1 +
>  include/linux/blkdev.h | 10 ++++++++++
>  3 files changed, 26 insertions(+)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 0046b447268f..4c776c08f190 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -56,6 +56,8 @@ void blk_set_default_limits(struct queue_limits *lim)
>  	lim->alignment_offset = 0;
>  	lim->io_opt = 0;
>  	lim->misaligned = 0;
> +	lim->driver_preserves_write_order = false;
> +	lim->use_zone_write_lock = false;
>  	lim->zoned = BLK_ZONED_NONE;
>  	lim->zone_write_granularity = 0;
>  	lim->dma_alignment = 511;
> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>  	lim->max_dev_sectors = UINT_MAX;
>  	lim->max_write_zeroes_sectors = UINT_MAX;
>  	lim->max_zone_append_sectors = UINT_MAX;
> +	/* Request-based stacking drivers do not reorder requests. */

Rereading this patch, I do not think this statement is correct. I seriously
doubt that multipath will preserve write command order in all cases...

> +	lim->driver_preserves_write_order = true;

... so it is likely much safer to set the default to "false" as that is the
default for all requests in general.

>  }
>  EXPORT_SYMBOL(blk_set_stacking_limits);
>  
> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>  						   b->max_secure_erase_sectors);
>  	t->zone_write_granularity = max(t->zone_write_granularity,
>  					b->zone_write_granularity);
> +	t->driver_preserves_write_order = t->driver_preserves_write_order &&
> +		b->driver_preserves_write_order;
> +	t->use_zone_write_lock = t->use_zone_write_lock ||
> +		b->use_zone_write_lock;

Very minor nit: splitting the line after the equal would make this more readable.

>  	t->zoned = max(t->zoned, b->zoned);
>  	return ret;
>  }
> @@ -949,6 +957,13 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
>  	}
>  
>  	q->limits.zoned = model;
> +	/*
> +	 * Use the zone write lock only for zoned block devices and only if
> +	 * the block driver does not preserve the order of write commands.
> +	 */
> +	q->limits.use_zone_write_lock = model != BLK_ZONED_NONE &&
> +		!q->limits.driver_preserves_write_order;
> +
>  	if (model != BLK_ZONED_NONE) {
>  		/*
>  		 * Set the zone write granularity to the device logical block
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 619ee41a51cc..112620985bff 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -631,6 +631,7 @@ void disk_clear_zone_settings(struct gendisk *disk)
>  	q->limits.chunk_sectors = 0;
>  	q->limits.zone_write_granularity = 0;
>  	q->limits.max_zone_append_sectors = 0;
> +	q->limits.use_zone_write_lock = false;
>  
>  	blk_mq_unfreeze_queue(q);
>  }
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 51fa7ffdee83..2d452f5a36c8 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -316,6 +316,16 @@ struct queue_limits {
>  	unsigned char		misaligned;
>  	unsigned char		discard_misaligned;
>  	unsigned char		raid_partial_stripes_expensive;
> +	/*
> +	 * Whether or not the block driver preserves the order of write
> +	 * requests. Set by the block driver.
> +	 */
> +	bool			driver_preserves_write_order;
> +	/*
> +	 * Whether or not zone write locking should be used. Set by
> +	 * disk_set_zoned().
> +	 */
> +	bool			use_zone_write_lock;
>  	enum blk_zoned_model	zoned;
>  
>  	/*
Bart Van Assche Nov. 20, 2023, 8:44 p.m. UTC | #2
On 11/19/23 15:29, Damien Le Moal wrote:
> On 11/15/23 06:16, Bart Van Assche wrote:
>> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>   	lim->max_dev_sectors = UINT_MAX;
>>   	lim->max_write_zeroes_sectors = UINT_MAX;
>>   	lim->max_zone_append_sectors = UINT_MAX;
>> +	/* Request-based stacking drivers do not reorder requests. */
> 
> Rereading this patch, I do not think this statement is correct. I seriously
> doubt that multipath will preserve write command order in all cases...
> 
>> +	lim->driver_preserves_write_order = true;
> 
> ... so it is likely much safer to set the default to "false" as that is the
> default for all requests in general.

How about applying this (untested) patch on top of this patch series?

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4c776c08f190..aba1972e9767 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
  	lim->max_dev_sectors = UINT_MAX;
  	lim->max_write_zeroes_sectors = UINT_MAX;
  	lim->max_zone_append_sectors = UINT_MAX;
-	/* Request-based stacking drivers do not reorder requests. */
-	lim->driver_preserves_write_order = true;
  }
  EXPORT_SYMBOL(blk_set_stacking_limits);

diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 2d3e186ca87e..cb9abe4bd065 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
  #define linear_report_zones NULL
  #endif

+static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+	limits->driver_preserves_write_order = true;
+}
+
  static int linear_iterate_devices(struct dm_target *ti,
  				  iterate_devices_callout_fn fn, void *data)
  {
@@ -208,6 +213,7 @@ static struct target_type linear_target = {
  	.map    = linear_map,
  	.status = linear_status,
  	.prepare_ioctl = linear_prepare_ioctl,
+	.io_hints = linear_io_hints,
  	.iterate_devices = linear_iterate_devices,
  	.direct_access = linear_dax_direct_access,
  	.dax_zero_page_range = linear_dax_zero_page_range,

>> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>   						   b->max_secure_erase_sectors);
>>   	t->zone_write_granularity = max(t->zone_write_granularity,
>>   					b->zone_write_granularity);
>> +	t->driver_preserves_write_order = t->driver_preserves_write_order &&
>> +		b->driver_preserves_write_order;
>> +	t->use_zone_write_lock = t->use_zone_write_lock ||
>> +		b->use_zone_write_lock;
> 
> Very minor nit: splitting the line after the equal would make this more readable.

Hmm ... I have often seen other reviewers asking to maximize the use of each
source code line as much as reasonably possible.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 0046b447268f..4c776c08f190 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -56,6 +56,8 @@  void blk_set_default_limits(struct queue_limits *lim)
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
+	lim->driver_preserves_write_order = false;
+	lim->use_zone_write_lock = false;
 	lim->zoned = BLK_ZONED_NONE;
 	lim->zone_write_granularity = 0;
 	lim->dma_alignment = 511;
@@ -82,6 +84,8 @@  void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_zeroes_sectors = UINT_MAX;
 	lim->max_zone_append_sectors = UINT_MAX;
+	/* Request-based stacking drivers do not reorder requests. */
+	lim->driver_preserves_write_order = true;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -685,6 +689,10 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						   b->max_secure_erase_sectors);
 	t->zone_write_granularity = max(t->zone_write_granularity,
 					b->zone_write_granularity);
+	t->driver_preserves_write_order = t->driver_preserves_write_order &&
+		b->driver_preserves_write_order;
+	t->use_zone_write_lock = t->use_zone_write_lock ||
+		b->use_zone_write_lock;
 	t->zoned = max(t->zoned, b->zoned);
 	return ret;
 }
@@ -949,6 +957,13 @@  void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 	}
 
 	q->limits.zoned = model;
+	/*
+	 * Use the zone write lock only for zoned block devices and only if
+	 * the block driver does not preserve the order of write commands.
+	 */
+	q->limits.use_zone_write_lock = model != BLK_ZONED_NONE &&
+		!q->limits.driver_preserves_write_order;
+
 	if (model != BLK_ZONED_NONE) {
 		/*
 		 * Set the zone write granularity to the device logical block
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 619ee41a51cc..112620985bff 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -631,6 +631,7 @@  void disk_clear_zone_settings(struct gendisk *disk)
 	q->limits.chunk_sectors = 0;
 	q->limits.zone_write_granularity = 0;
 	q->limits.max_zone_append_sectors = 0;
+	q->limits.use_zone_write_lock = false;
 
 	blk_mq_unfreeze_queue(q);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51fa7ffdee83..2d452f5a36c8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,6 +316,16 @@  struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
+	/*
+	 * Whether or not the block driver preserves the order of write
+	 * requests. Set by the block driver.
+	 */
+	bool			driver_preserves_write_order;
+	/*
+	 * Whether or not zone write locking should be used. Set by
+	 * disk_set_zoned().
+	 */
+	bool			use_zone_write_lock;
 	enum blk_zoned_model	zoned;
 
 	/*