Message ID | 20241231042241.171227-1-ming.lei@redhat.com |
---|---|
State | New |
Headers | show |
Series | scsi: avoid to send scsi command with ->queue_limits lock held | expand |
On Wed, Jan 01, 2025 at 04:46:24PM +0530, Nilay Shroff wrote: > > > On 12/31/24 09:52, Ming Lei wrote: > > Block request queue is often frozen before acquiring the queue > > ->limits_lock. > > > > However, in sd_revalidate_disk(), queue_limits_start_update() is called > > before reading all kinds of queue limits from hardware, and this way > > causes ABBA lock easily[1][2] because queue usage counter is grabbed > > when allocating scsi command. > > > > [1] https://lore.kernel.org/linux-block/Z1A8fai9_fQFhs1s@hovoldconsulting.com/ > > [2] https://lore.kernel.org/linux-scsi/ZxG38G9BuFdBpBHZ@fedora/ > > > > Fix the issue by reading limits into one scsi disk shadow queue limits > > structure first, then sync it to the block queue limits with > > ->limits_lock. > > > > Cc: Christoph Hellwig <hch@lst.de> > > Cc: Damien Le Moal <dlemoal@kernel.org> > > Cc: Nilay Shroff <nilay@linux.ibm.com> > > Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API") > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > --- > > drivers/scsi/sd.c | 156 +++++++++++++++++++++++++++++++----------- > > drivers/scsi/sd.h | 59 +++++++++++++++- > > drivers/scsi/sd_dif.c | 3 +- > > drivers/scsi/sd_zbc.c | 14 ++-- > > 4 files changed, 181 insertions(+), 51 deletions(-) > > > > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c > > index 8947dab132d7..6af5334dee2f 100644 > > --- a/drivers/scsi/sd.c > > +++ b/drivers/scsi/sd.c > > @@ -102,10 +102,10 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); > > > > #define SD_MINORS 16 > > > > -static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, > > +static void sd_config_discard(struct scsi_disk *sdkp, struct sd_limits *lim, > > unsigned int mode); > > static void sd_config_write_same(struct scsi_disk *sdkp, > > - struct queue_limits *lim); > > + struct sd_limits *lim); > > static int sd_revalidate_disk(struct gendisk *); > > static void sd_unlock_native_capacity(struct gendisk *disk); > > static void sd_shutdown(struct device *); > > @@ -121,8 +121,64 @@ static const char *sd_cache_types[] = { > > "write back, no read (daft)" > > }; > > > > +static void sd_sync_limits(struct queue_limits *blk_lim, > > + const struct sd_limits *lim) > > +{ > > + if (lim->has_features) > > + blk_lim->features |= lim->features; > > + > > + if (lim->has_neg_features) > > + blk_lim->features &= ~lim->neg_features; > > + > > + if (lim->has_alignment_offset) > > + blk_lim->alignment_offset = lim->alignment_offset; > > + > > + if (lim->has_integrity) > > + blk_lim->integrity = lim->integrity; > > + > > + if (lim->has_bs) { > > + blk_lim->logical_block_size = lim->bs.logical_block_size; > > + blk_lim->physical_block_size = lim->bs.physical_block_size; > > + } > > + > > + if (lim->has_discard) { > > + blk_lim->discard_granularity = > > + lim->discard.discard_granularity; > > + blk_lim->discard_alignment = > > + lim->discard.discard_alignment; > > + blk_lim->max_hw_discard_sectors = > > + lim->discard.max_hw_discard_sectors; > > + } > > + > > + if (lim->has_ws) > > + blk_lim->max_write_zeroes_sectors = > > + lim->ws.max_write_zeroes_sectors; > > + > > + if (lim->has_aw) { > > + blk_lim->atomic_write_hw_max = lim->aw.atomic_write_hw_max; > > + blk_lim->atomic_write_hw_boundary = > > + lim->aw.atomic_write_hw_boundary; > > + blk_lim->atomic_write_hw_unit_min = > > + lim->aw.atomic_write_hw_unit_min; > > + blk_lim->atomic_write_hw_unit_max = > > + lim->aw.atomic_write_hw_unit_max; > > + } > > + > > + if (lim->has_io) { > > + blk_lim->max_dev_sectors = lim->io.max_dev_sectors; > > + blk_lim->io_opt = lim->io.io_opt; > > + blk_lim->io_min = lim->io.io_min; > > + } > > + > > + if (lim->has_zone) { > > + blk_lim->max_open_zones = lim->zone.max_open_zones; > > + blk_lim->max_active_zones = lim->zone.max_active_zones; > > + blk_lim->chunk_sectors = lim->zone.chunk_sectors; > > + } > > +} > > + > > static void sd_set_flush_flag(struct scsi_disk *sdkp, > > - struct queue_limits *lim) > > + struct sd_limits *lim) > > { > > if (sdkp->WCE) { > > lim->features |= BLK_FEAT_WRITE_CACHE; > > @@ -133,6 +189,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp, > > } else { > > lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); > > } > > + lim->has_features = 1; > > } > > > > static ssize_t > > @@ -170,15 +227,18 @@ cache_type_store(struct device *dev, struct device_attribute *attr, > > wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; > > > > if (sdkp->cache_override) { > > - struct queue_limits lim; > > + struct queue_limits blk_lim; > > + struct sd_limits lim = { 0 }; > > > > sdkp->WCE = wce; > > sdkp->RCD = rcd; > > > > - lim = queue_limits_start_update(sdkp->disk->queue); > > sd_set_flush_flag(sdkp, &lim); > > + > > + blk_lim = queue_limits_start_update(sdkp->disk->queue); > > + sd_sync_limits(&blk_lim, &lim); > > blk_mq_freeze_queue(sdkp->disk->queue); > > - ret = queue_limits_commit_update(sdkp->disk->queue, &lim); > > + ret = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); > > blk_mq_unfreeze_queue(sdkp->disk->queue); > > if (ret) > > return ret; > > @@ -468,7 +528,8 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, > > { > > struct scsi_disk *sdkp = to_scsi_disk(dev); > > struct scsi_device *sdp = sdkp->device; > > - struct queue_limits lim; > > + struct queue_limits blk_lim; > > + struct sd_limits lim = { 0 }; > > int mode, err; > > > > if (!capable(CAP_SYS_ADMIN)) > > @@ -481,10 +542,11 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, > > if (mode < 0) > > return -EINVAL; > > > > - lim = queue_limits_start_update(sdkp->disk->queue); > > sd_config_discard(sdkp, &lim, mode); > > + blk_lim = queue_limits_start_update(sdkp->disk->queue); > > + sd_sync_limits(&blk_lim, &lim); > > blk_mq_freeze_queue(sdkp->disk->queue); > > - err = queue_limits_commit_update(sdkp->disk->queue, &lim); > > + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); > > blk_mq_unfreeze_queue(sdkp->disk->queue); > > if (err) > > return err; > > @@ -570,7 +632,8 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, > > { > > struct scsi_disk *sdkp = to_scsi_disk(dev); > > struct scsi_device *sdp = sdkp->device; > > - struct queue_limits lim; > > + struct queue_limits blk_lim; > > + struct sd_limits lim = { 0 }; > > unsigned long max; > > int err; > > > > @@ -592,10 +655,11 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, > > sdkp->max_ws_blocks = max; > > } > > > > - lim = queue_limits_start_update(sdkp->disk->queue); > > sd_config_write_same(sdkp, &lim); > > + blk_lim = queue_limits_start_update(sdkp->disk->queue); > > + sd_sync_limits(&blk_lim, &lim); > > blk_mq_freeze_queue(sdkp->disk->queue); > > - err = queue_limits_commit_update(sdkp->disk->queue, &lim); > > + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); > > blk_mq_unfreeze_queue(sdkp->disk->queue); > > if (err) > > return err; > > @@ -847,14 +911,14 @@ static void sd_disable_discard(struct scsi_disk *sdkp) > > blk_queue_disable_discard(sdkp->disk->queue); > > } > > > > -static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, > > +static void sd_config_discard(struct scsi_disk *sdkp, struct sd_limits *lim, > > unsigned int mode) > > { > > unsigned int logical_block_size = sdkp->device->sector_size; > > unsigned int max_blocks = 0; > > > > - lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; > > - lim->discard_granularity = max(sdkp->physical_block_size, > > + lim->discard.discard_alignment = sdkp->unmap_alignment * logical_block_size; > > + lim->discard.discard_granularity = max(sdkp->physical_block_size, > > sdkp->unmap_granularity * logical_block_size); > > sdkp->provisioning_mode = mode; > > > > @@ -893,8 +957,9 @@ static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, > > break; > > } > > > > - lim->max_hw_discard_sectors = max_blocks * > > + lim->discard.max_hw_discard_sectors = max_blocks * > > (logical_block_size >> SECTOR_SHIFT); > > + lim->has_discard = 1; > > } > > > > static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) > > @@ -940,7 +1005,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) > > return scsi_alloc_sgtables(cmd); > > } > > > > -static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) > > +static void sd_config_atomic(struct scsi_disk *sdkp, struct sd_limits *lim) > > { > > unsigned int logical_block_size = sdkp->device->sector_size, > > physical_block_size_sectors, max_atomic, unit_min, unit_max; > > @@ -992,10 +1057,11 @@ static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) > > return; > > } > > > > - lim->atomic_write_hw_max = max_atomic * logical_block_size; > > - lim->atomic_write_hw_boundary = 0; > > - lim->atomic_write_hw_unit_min = unit_min * logical_block_size; > > - lim->atomic_write_hw_unit_max = unit_max * logical_block_size; > > + lim->aw.atomic_write_hw_max = max_atomic * logical_block_size; > > + lim->aw.atomic_write_hw_boundary = 0; > > + lim->aw.atomic_write_hw_unit_min = unit_min * logical_block_size; > > + lim->aw.atomic_write_hw_unit_max = unit_max * logical_block_size; > > + lim->has_aw = 1; > > } > > > > static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, > > @@ -1088,7 +1154,7 @@ static void sd_disable_write_same(struct scsi_disk *sdkp) > > } > > > > static void sd_config_write_same(struct scsi_disk *sdkp, > > - struct queue_limits *lim) > > + struct sd_limits *lim) > > { > > unsigned int logical_block_size = sdkp->device->sector_size; > > > > @@ -1143,8 +1209,9 @@ static void sd_config_write_same(struct scsi_disk *sdkp, > > } > > > > out: > > - lim->max_write_zeroes_sectors = > > + lim->ws.max_write_zeroes_sectors = > > sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); > > + lim->has_ws = 1; > > } > > > > static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) > > @@ -2574,7 +2641,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer > > } > > > > static void sd_config_protection(struct scsi_disk *sdkp, > > - struct queue_limits *lim) > > + struct sd_limits *lim) > > { > > struct scsi_device *sdp = sdkp->device; > > > > @@ -2628,7 +2695,7 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, > > #define READ_CAPACITY_RETRIES_ON_RESET 10 > > > > static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, > > - struct queue_limits *lim, unsigned char *buffer) > > + struct sd_limits *lim, unsigned char *buffer) > > { > > unsigned char cmd[16]; > > struct scsi_sense_hdr sshdr; > > @@ -2703,6 +2770,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, > > /* Lowest aligned logical block */ > > alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; > > lim->alignment_offset = alignment; > > + lim->has_alignment_offset = 1; > > if (alignment && sdkp->first_scan) > > sd_printk(KERN_NOTICE, sdkp, > > "physical block alignment offset: %u\n", alignment); > > @@ -2814,7 +2882,7 @@ static int sd_try_rc16_first(struct scsi_device *sdp) > > * read disk capacity > > */ > > static void > > -sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, > > +sd_read_capacity(struct scsi_disk *sdkp, struct sd_limits *lim, > > unsigned char *buffer) > > { > > int sector_size; > > @@ -2900,8 +2968,9 @@ sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, > > */ > > sector_size = 512; > > } > > - lim->logical_block_size = sector_size; > > - lim->physical_block_size = sdkp->physical_block_size; > > + lim->bs.logical_block_size = sector_size; > > + lim->bs.physical_block_size = sdkp->physical_block_size; > > + lim->has_bs = 1; > > sdkp->device->sector_size = sector_size; > > > > if (sdkp->capacity > 0xffffffff) > > @@ -3333,7 +3402,7 @@ static unsigned int sd_discard_mode(struct scsi_disk *sdkp) > > * Query disk device for preferred I/O sizes. > > */ > > static void sd_read_block_limits(struct scsi_disk *sdkp, > > - struct queue_limits *lim) > > + struct sd_limits *lim) > > { > > struct scsi_vpd *vpd; > > > > @@ -3395,7 +3464,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) > > > > /* Query block device characteristics */ > > static void sd_read_block_characteristics(struct scsi_disk *sdkp, > > - struct queue_limits *lim) > > + struct sd_limits *lim) > > { > > struct scsi_vpd *vpd; > > u16 rot; > > @@ -3412,8 +3481,10 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, > > sdkp->zoned = (vpd->data[8] >> 4) & 3; > > rcu_read_unlock(); > > > > - if (rot == 1) > > - lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); > > + if (rot == 1) { > > + lim->neg_features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); > > + lim->has_neg_features = 1; > > + } > > > > if (!sdkp->first_scan) > > return; > > @@ -3700,7 +3771,8 @@ static int sd_revalidate_disk(struct gendisk *disk) > > struct scsi_disk *sdkp = scsi_disk(disk); > > struct scsi_device *sdp = sdkp->device; > > sector_t old_capacity = sdkp->capacity; > > - struct queue_limits lim; > > + struct queue_limits blk_lim; > > + struct sd_limits lim = { 0 }; > > unsigned char *buffer; > > unsigned int dev_max; > > int err; > > @@ -3724,8 +3796,6 @@ static int sd_revalidate_disk(struct gendisk *disk) > > > > sd_spinup_disk(sdkp); > > > > - lim = queue_limits_start_update(sdkp->disk->queue); > > - > > /* > > * Without media there is no reason to ask; moreover, some devices > > * react badly if we do. > > @@ -3746,6 +3816,7 @@ static int sd_revalidate_disk(struct gendisk *disk) > > * doesn't support it should be treated as rotational. > > */ > > lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); > > + lim.has_features = 1; > > > > if (scsi_device_supports_vpd(sdp)) { > > sd_read_block_provisioning(sdkp); > > @@ -3779,23 +3850,24 @@ static int sd_revalidate_disk(struct gendisk *disk) > > > > /* Some devices report a maximum block count for READ/WRITE requests. */ > > dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); > > - lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); > > + lim.io.max_dev_sectors = logical_to_sectors(sdp, dev_max); > > > > if (sd_validate_min_xfer_size(sdkp)) > > - lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); > > + lim.io.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); > > else > > - lim.io_min = 0; > > + lim.io.io_min = 0; > > > > /* > > * Limit default to SCSI host optimal sector limit if set. There may be > > * an impact on performance for when the size of a request exceeds this > > * host limit. > > */ > > - lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; > > + lim.io.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; > > if (sd_validate_opt_xfer_size(sdkp, dev_max)) { > > - lim.io_opt = min_not_zero(lim.io_opt, > > + lim.io.io_opt = min_not_zero(lim.io.io_opt, > > logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); > > } > > + lim.has_io = 1; > > > > sdkp->first_scan = 0; > > > > @@ -3803,8 +3875,10 @@ static int sd_revalidate_disk(struct gendisk *disk) > > sd_config_write_same(sdkp, &lim); > > kfree(buffer); > > > > + blk_lim = queue_limits_start_update(sdkp->disk->queue); > > + sd_sync_limits(&blk_lim, &lim); > > blk_mq_freeze_queue(sdkp->disk->queue); > > - err = queue_limits_commit_update(sdkp->disk->queue, &lim); > > + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); > > blk_mq_unfreeze_queue(sdkp->disk->queue); > > if (err) > > return err; > > diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h > > index 36382eca941c..68c2db27cbf3 100644 > > --- a/drivers/scsi/sd.h > > +++ b/drivers/scsi/sd.h > > @@ -67,6 +67,59 @@ enum { > > SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ > > }; > > > > +struct sd_limits { > > + unsigned int has_features:1; > > + unsigned int has_neg_features:1; > > + unsigned int has_alignment_offset:1; > > + unsigned int has_bs:1; > > + unsigned int has_discard:1; > > + unsigned int has_integrity:1; > > + unsigned int has_aw:1; > > + unsigned int has_ws:1; > > + unsigned int has_io:1; > > + unsigned int has_zone:1; > > + > > + blk_features_t features; > > + blk_features_t neg_features; > > + unsigned int alignment_offset; > > + struct blk_integrity integrity; > > + > > + struct { > > + unsigned int logical_block_size; > > + unsigned int physical_block_size; > > + } bs; > > + > > + struct { > > + unsigned int discard_granularity; > > + unsigned int discard_alignment; > > + unsigned int max_hw_discard_sectors; > > + } discard; > > + > > + struct { > > + unsigned int max_write_zeroes_sectors; > > + } ws; > > + > > + struct { > > + unsigned int atomic_write_hw_max; > > + unsigned int atomic_write_hw_boundary; > > + unsigned int atomic_write_hw_unit_min; > > + unsigned int atomic_write_hw_unit_max; > > + } aw; > > + > > + struct { > > + unsigned int max_dev_sectors; > > + unsigned int io_opt; > > + unsigned int io_min; > > + } io; > > + > > + struct { > > + unsigned int zone_write_granularity; > > + unsigned int max_open_zones; > > + unsigned int max_active_zones; > > + unsigned int chunk_sectors; > > + } zone; > > +}; > > + > > /** > > * struct zoned_disk_info - Specific properties of a ZBC SCSI device. > > * @nr_zones: number of zones. > > @@ -228,11 +281,11 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec > > return sector >> (ilog2(sdev->sector_size) - 9); > > } > > > > -void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); > > +void sd_dif_config_host(struct scsi_disk *sdkp, struct sd_limits *lim); > > > > #ifdef CONFIG_BLK_DEV_ZONED > > > > -int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, > > +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct sd_limits *lim, > > u8 buf[SD_BUF_SIZE]); > > int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); > > blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, > > @@ -245,7 +298,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, > > #else /* CONFIG_BLK_DEV_ZONED */ > > > > static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, > > - struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) > > + struct sd_limits *lim, u8 buf[SD_BUF_SIZE]) > > { > > return 0; > > } > > diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c > > index ae6ce6f5d622..081168d4aee3 100644 > > --- a/drivers/scsi/sd_dif.c > > +++ b/drivers/scsi/sd_dif.c > > @@ -24,13 +24,14 @@ > > /* > > * Configure exchange of protection information between OS and HBA. > > */ > > -void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) > > +void sd_dif_config_host(struct scsi_disk *sdkp, struct sd_limits *lim) > > { > > struct scsi_device *sdp = sdkp->device; > > u8 type = sdkp->protection_type; > > struct blk_integrity *bi = &lim->integrity; > > int dif, dix; > > > > + lim->has_integrity = 1; > > memset(bi, 0, sizeof(*bi)); > > > > dif = scsi_host_dif_capable(sdp->host, type); > > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > > index 7a447ff600d2..c8e398a08b31 100644 > > --- a/drivers/scsi/sd_zbc.c > > +++ b/drivers/scsi/sd_zbc.c > > @@ -588,7 +588,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) > > * also the zoned device information in *sdkp. Called by sd_revalidate_disk() > > * before the gendisk capacity has been set. > > */ > > -int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, > > +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct sd_limits *lim, > > u8 buf[SD_BUF_SIZE]) > > { > > unsigned int nr_zones; > > @@ -598,6 +598,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, > > if (sdkp->device->type != TYPE_ZBC) > > return 0; > > > > + lim->has_features = 1; > > lim->features |= BLK_FEAT_ZONED; > > > > /* > > @@ -605,7 +606,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, > > * zones of host-managed devices must be aligned to the device physical > > * block size. > > */ > > - lim->zone_write_granularity = sdkp->physical_block_size; > > + lim->zone.zone_write_granularity = sdkp->physical_block_size; > > > > /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ > > sdkp->device->use_16_for_rw = 1; > > @@ -628,11 +629,12 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, > > > > /* The drive satisfies the kernel restrictions: set it up */ > > if (sdkp->zones_max_open == U32_MAX) > > - lim->max_open_zones = 0; > > + lim->zone.max_open_zones = 0; > > else > > - lim->max_open_zones = sdkp->zones_max_open; > > - lim->max_active_zones = 0; > > - lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); > > + lim->zone.max_open_zones = sdkp->zones_max_open; > > + lim->zone.max_active_zones = 0; > > + lim->zone.chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); > > + lim->has_zone = 1; > > > > return 0; > > > > I see that we use shadow queue for few fucntions other than sd_revalidate_disk(). > Do we really need to use shadow queue for those other functions like provisioning_mode_store(), > max_write_same_blocks_store() and cache_type_store() ? It seems shadow queue would be > only needed for sd_revalidate_disk(). Yes. But shadow limits structure is introduced and applied for these functions, so they have to switch to this pattern, and the change is correct. > > Moreover, with this change the locking order is to first acquire limit locks followed by > queue freeze lock and this patch only addresses scsi. However as we know we do have other places > where we first freeze queue and then acquire limits lock. So those places also need to be fixed > otherwise we would get lockdep splat. Are you going to fix it in another patch? Yes, and it belongs to another easier problem. If this patch can be accepted, we can adjust the lock order for these functions. Thanks, Ming
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8947dab132d7..6af5334dee2f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -102,10 +102,10 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); #define SD_MINORS 16 -static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, +static void sd_config_discard(struct scsi_disk *sdkp, struct sd_limits *lim, unsigned int mode); static void sd_config_write_same(struct scsi_disk *sdkp, - struct queue_limits *lim); + struct sd_limits *lim); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static void sd_shutdown(struct device *); @@ -121,8 +121,64 @@ static const char *sd_cache_types[] = { "write back, no read (daft)" }; +static void sd_sync_limits(struct queue_limits *blk_lim, + const struct sd_limits *lim) +{ + if (lim->has_features) + blk_lim->features |= lim->features; + + if (lim->has_neg_features) + blk_lim->features &= ~lim->neg_features; + + if (lim->has_alignment_offset) + blk_lim->alignment_offset = lim->alignment_offset; + + if (lim->has_integrity) + blk_lim->integrity = lim->integrity; + + if (lim->has_bs) { + blk_lim->logical_block_size = lim->bs.logical_block_size; + blk_lim->physical_block_size = lim->bs.physical_block_size; + } + + if (lim->has_discard) { + blk_lim->discard_granularity = + lim->discard.discard_granularity; + blk_lim->discard_alignment = + lim->discard.discard_alignment; + blk_lim->max_hw_discard_sectors = + lim->discard.max_hw_discard_sectors; + } + + if (lim->has_ws) + blk_lim->max_write_zeroes_sectors = + lim->ws.max_write_zeroes_sectors; + + if (lim->has_aw) { + blk_lim->atomic_write_hw_max = lim->aw.atomic_write_hw_max; + blk_lim->atomic_write_hw_boundary = + lim->aw.atomic_write_hw_boundary; + blk_lim->atomic_write_hw_unit_min = + lim->aw.atomic_write_hw_unit_min; + blk_lim->atomic_write_hw_unit_max = + lim->aw.atomic_write_hw_unit_max; + } + + if (lim->has_io) { + blk_lim->max_dev_sectors = lim->io.max_dev_sectors; + blk_lim->io_opt = lim->io.io_opt; + blk_lim->io_min = lim->io.io_min; + } + + if (lim->has_zone) { + blk_lim->max_open_zones = lim->zone.max_open_zones; + blk_lim->max_active_zones = lim->zone.max_active_zones; + blk_lim->chunk_sectors = lim->zone.chunk_sectors; + } +} + static void sd_set_flush_flag(struct scsi_disk *sdkp, - struct queue_limits *lim) + struct sd_limits *lim) { if (sdkp->WCE) { lim->features |= BLK_FEAT_WRITE_CACHE; @@ -133,6 +189,7 @@ static void sd_set_flush_flag(struct scsi_disk *sdkp, } else { lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); } + lim->has_features = 1; } static ssize_t @@ -170,15 +227,18 @@ cache_type_store(struct device *dev, struct device_attribute *attr, wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; if (sdkp->cache_override) { - struct queue_limits lim; + struct queue_limits blk_lim; + struct sd_limits lim = { 0 }; sdkp->WCE = wce; sdkp->RCD = rcd; - lim = queue_limits_start_update(sdkp->disk->queue); sd_set_flush_flag(sdkp, &lim); + + blk_lim = queue_limits_start_update(sdkp->disk->queue); + sd_sync_limits(&blk_lim, &lim); blk_mq_freeze_queue(sdkp->disk->queue); - ret = queue_limits_commit_update(sdkp->disk->queue, &lim); + ret = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); blk_mq_unfreeze_queue(sdkp->disk->queue); if (ret) return ret; @@ -468,7 +528,8 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; - struct queue_limits lim; + struct queue_limits blk_lim; + struct sd_limits lim = { 0 }; int mode, err; if (!capable(CAP_SYS_ADMIN)) @@ -481,10 +542,11 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, if (mode < 0) return -EINVAL; - lim = queue_limits_start_update(sdkp->disk->queue); sd_config_discard(sdkp, &lim, mode); + blk_lim = queue_limits_start_update(sdkp->disk->queue); + sd_sync_limits(&blk_lim, &lim); blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); blk_mq_unfreeze_queue(sdkp->disk->queue); if (err) return err; @@ -570,7 +632,8 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; - struct queue_limits lim; + struct queue_limits blk_lim; + struct sd_limits lim = { 0 }; unsigned long max; int err; @@ -592,10 +655,11 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, sdkp->max_ws_blocks = max; } - lim = queue_limits_start_update(sdkp->disk->queue); sd_config_write_same(sdkp, &lim); + blk_lim = queue_limits_start_update(sdkp->disk->queue); + sd_sync_limits(&blk_lim, &lim); blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); blk_mq_unfreeze_queue(sdkp->disk->queue); if (err) return err; @@ -847,14 +911,14 @@ static void sd_disable_discard(struct scsi_disk *sdkp) blk_queue_disable_discard(sdkp->disk->queue); } -static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, +static void sd_config_discard(struct scsi_disk *sdkp, struct sd_limits *lim, unsigned int mode) { unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; - lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; - lim->discard_granularity = max(sdkp->physical_block_size, + lim->discard.discard_alignment = sdkp->unmap_alignment * logical_block_size; + lim->discard.discard_granularity = max(sdkp->physical_block_size, sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; @@ -893,8 +957,9 @@ static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, break; } - lim->max_hw_discard_sectors = max_blocks * + lim->discard.max_hw_discard_sectors = max_blocks * (logical_block_size >> SECTOR_SHIFT); + lim->has_discard = 1; } static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) @@ -940,7 +1005,7 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) return scsi_alloc_sgtables(cmd); } -static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) +static void sd_config_atomic(struct scsi_disk *sdkp, struct sd_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size, physical_block_size_sectors, max_atomic, unit_min, unit_max; @@ -992,10 +1057,11 @@ static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) return; } - lim->atomic_write_hw_max = max_atomic * logical_block_size; - lim->atomic_write_hw_boundary = 0; - lim->atomic_write_hw_unit_min = unit_min * logical_block_size; - lim->atomic_write_hw_unit_max = unit_max * logical_block_size; + lim->aw.atomic_write_hw_max = max_atomic * logical_block_size; + lim->aw.atomic_write_hw_boundary = 0; + lim->aw.atomic_write_hw_unit_min = unit_min * logical_block_size; + lim->aw.atomic_write_hw_unit_max = unit_max * logical_block_size; + lim->has_aw = 1; } static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, @@ -1088,7 +1154,7 @@ static void sd_disable_write_same(struct scsi_disk *sdkp) } static void sd_config_write_same(struct scsi_disk *sdkp, - struct queue_limits *lim) + struct sd_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size; @@ -1143,8 +1209,9 @@ static void sd_config_write_same(struct scsi_disk *sdkp, } out: - lim->max_write_zeroes_sectors = + lim->ws.max_write_zeroes_sectors = sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); + lim->has_ws = 1; } static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) @@ -2574,7 +2641,7 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer } static void sd_config_protection(struct scsi_disk *sdkp, - struct queue_limits *lim) + struct sd_limits *lim) { struct scsi_device *sdp = sdkp->device; @@ -2628,7 +2695,7 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, #define READ_CAPACITY_RETRIES_ON_RESET 10 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, - struct queue_limits *lim, unsigned char *buffer) + struct sd_limits *lim, unsigned char *buffer) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; @@ -2703,6 +2770,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, /* Lowest aligned logical block */ alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; lim->alignment_offset = alignment; + lim->has_alignment_offset = 1; if (alignment && sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "physical block alignment offset: %u\n", alignment); @@ -2814,7 +2882,7 @@ static int sd_try_rc16_first(struct scsi_device *sdp) * read disk capacity */ static void -sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, +sd_read_capacity(struct scsi_disk *sdkp, struct sd_limits *lim, unsigned char *buffer) { int sector_size; @@ -2900,8 +2968,9 @@ sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, */ sector_size = 512; } - lim->logical_block_size = sector_size; - lim->physical_block_size = sdkp->physical_block_size; + lim->bs.logical_block_size = sector_size; + lim->bs.physical_block_size = sdkp->physical_block_size; + lim->has_bs = 1; sdkp->device->sector_size = sector_size; if (sdkp->capacity > 0xffffffff) @@ -3333,7 +3402,7 @@ static unsigned int sd_discard_mode(struct scsi_disk *sdkp) * Query disk device for preferred I/O sizes. */ static void sd_read_block_limits(struct scsi_disk *sdkp, - struct queue_limits *lim) + struct sd_limits *lim) { struct scsi_vpd *vpd; @@ -3395,7 +3464,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) /* Query block device characteristics */ static void sd_read_block_characteristics(struct scsi_disk *sdkp, - struct queue_limits *lim) + struct sd_limits *lim) { struct scsi_vpd *vpd; u16 rot; @@ -3412,8 +3481,10 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp, sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); - if (rot == 1) - lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); + if (rot == 1) { + lim->neg_features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); + lim->has_neg_features = 1; + } if (!sdkp->first_scan) return; @@ -3700,7 +3771,8 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; sector_t old_capacity = sdkp->capacity; - struct queue_limits lim; + struct queue_limits blk_lim; + struct sd_limits lim = { 0 }; unsigned char *buffer; unsigned int dev_max; int err; @@ -3724,8 +3796,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_spinup_disk(sdkp); - lim = queue_limits_start_update(sdkp->disk->queue); - /* * Without media there is no reason to ask; moreover, some devices * react badly if we do. @@ -3746,6 +3816,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * doesn't support it should be treated as rotational. */ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); + lim.has_features = 1; if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); @@ -3779,23 +3850,24 @@ static int sd_revalidate_disk(struct gendisk *disk) /* Some devices report a maximum block count for READ/WRITE requests. */ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); - lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); + lim.io.max_dev_sectors = logical_to_sectors(sdp, dev_max); if (sd_validate_min_xfer_size(sdkp)) - lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); + lim.io.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); else - lim.io_min = 0; + lim.io.io_min = 0; /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ - lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; + lim.io.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; if (sd_validate_opt_xfer_size(sdkp, dev_max)) { - lim.io_opt = min_not_zero(lim.io_opt, + lim.io.io_opt = min_not_zero(lim.io.io_opt, logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } + lim.has_io = 1; sdkp->first_scan = 0; @@ -3803,8 +3875,10 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_config_write_same(sdkp, &lim); kfree(buffer); + blk_lim = queue_limits_start_update(sdkp->disk->queue); + sd_sync_limits(&blk_lim, &lim); blk_mq_freeze_queue(sdkp->disk->queue); - err = queue_limits_commit_update(sdkp->disk->queue, &lim); + err = queue_limits_commit_update(sdkp->disk->queue, &blk_lim); blk_mq_unfreeze_queue(sdkp->disk->queue); if (err) return err; diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 36382eca941c..68c2db27cbf3 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -67,6 +67,59 @@ enum { SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ }; +struct sd_limits { + unsigned int has_features:1; + unsigned int has_neg_features:1; + unsigned int has_alignment_offset:1; + unsigned int has_bs:1; + unsigned int has_discard:1; + unsigned int has_integrity:1; + unsigned int has_aw:1; + unsigned int has_ws:1; + unsigned int has_io:1; + unsigned int has_zone:1; + + blk_features_t features; + blk_features_t neg_features; + unsigned int alignment_offset; + struct blk_integrity integrity; + + struct { + unsigned int logical_block_size; + unsigned int physical_block_size; + } bs; + + struct { + unsigned int discard_granularity; + unsigned int discard_alignment; + unsigned int max_hw_discard_sectors; + } discard; + + struct { + unsigned int max_write_zeroes_sectors; + } ws; + + struct { + unsigned int atomic_write_hw_max; + unsigned int atomic_write_hw_boundary; + unsigned int atomic_write_hw_unit_min; + unsigned int atomic_write_hw_unit_max; + } aw; + + struct { + unsigned int max_dev_sectors; + unsigned int io_opt; + unsigned int io_min; + } io; + + struct { + unsigned int zone_write_granularity; + unsigned int max_open_zones; + unsigned int max_active_zones; + unsigned int chunk_sectors; + } zone; +}; + /** * struct zoned_disk_info - Specific properties of a ZBC SCSI device. * @nr_zones: number of zones. @@ -228,11 +281,11 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec return sector >> (ilog2(sdev->sector_size) - 9); } -void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); +void sd_dif_config_host(struct scsi_disk *sdkp, struct sd_limits *lim); #ifdef CONFIG_BLK_DEV_ZONED -int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct sd_limits *lim, u8 buf[SD_BUF_SIZE]); int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, @@ -245,7 +298,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, #else /* CONFIG_BLK_DEV_ZONED */ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, - struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) + struct sd_limits *lim, u8 buf[SD_BUF_SIZE]) { return 0; } diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index ae6ce6f5d622..081168d4aee3 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -24,13 +24,14 @@ /* * Configure exchange of protection information between OS and HBA. */ -void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) +void sd_dif_config_host(struct scsi_disk *sdkp, struct sd_limits *lim) { struct scsi_device *sdp = sdkp->device; u8 type = sdkp->protection_type; struct blk_integrity *bi = &lim->integrity; int dif, dix; + lim->has_integrity = 1; memset(bi, 0, sizeof(*bi)); dif = scsi_host_dif_capable(sdp->host, type); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7a447ff600d2..c8e398a08b31 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -588,7 +588,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) * also the zoned device information in *sdkp. Called by sd_revalidate_disk() * before the gendisk capacity has been set. */ -int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, +int sd_zbc_read_zones(struct scsi_disk *sdkp, struct sd_limits *lim, u8 buf[SD_BUF_SIZE]) { unsigned int nr_zones; @@ -598,6 +598,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, if (sdkp->device->type != TYPE_ZBC) return 0; + lim->has_features = 1; lim->features |= BLK_FEAT_ZONED; /* @@ -605,7 +606,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, * zones of host-managed devices must be aligned to the device physical * block size. */ - lim->zone_write_granularity = sdkp->physical_block_size; + lim->zone.zone_write_granularity = sdkp->physical_block_size; /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ sdkp->device->use_16_for_rw = 1; @@ -628,11 +629,12 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, /* The drive satisfies the kernel restrictions: set it up */ if (sdkp->zones_max_open == U32_MAX) - lim->max_open_zones = 0; + lim->zone.max_open_zones = 0; else - lim->max_open_zones = sdkp->zones_max_open; - lim->max_active_zones = 0; - lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); + lim->zone.max_open_zones = sdkp->zones_max_open; + lim->zone.max_active_zones = 0; + lim->zone.chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); + lim->has_zone = 1; return 0;
Block request queue is often frozen before acquiring the queue ->limits_lock. However, in sd_revalidate_disk(), queue_limits_start_update() is called before reading all kinds of queue limits from hardware, and this way causes ABBA lock easily[1][2] because queue usage counter is grabbed when allocating scsi command. [1] https://lore.kernel.org/linux-block/Z1A8fai9_fQFhs1s@hovoldconsulting.com/ [2] https://lore.kernel.org/linux-scsi/ZxG38G9BuFdBpBHZ@fedora/ Fix the issue by reading limits into one scsi disk shadow queue limits structure first, then sync it to the block queue limits with ->limits_lock. Cc: Christoph Hellwig <hch@lst.de> Cc: Damien Le Moal <dlemoal@kernel.org> Cc: Nilay Shroff <nilay@linux.ibm.com> Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API") Signed-off-by: Ming Lei <ming.lei@redhat.com> --- drivers/scsi/sd.c | 156 +++++++++++++++++++++++++++++++----------- drivers/scsi/sd.h | 59 +++++++++++++++- drivers/scsi/sd_dif.c | 3 +- drivers/scsi/sd_zbc.c | 14 ++-- 4 files changed, 181 insertions(+), 51 deletions(-)