diff mbox series

[04/23] scsi: initialize scsi midlayer limits before allocating the queue

Message ID 20240409143748.980206-5-hch@lst.de
State New
Headers show
Series [01/23] block: add a helper to cancel atomic queue limit updates | expand

Commit Message

Christoph Hellwig April 9, 2024, 2:37 p.m. UTC
Turn __scsi_init_queue into scsi_init_limits which initializes
queue_limits structure that can be passed to blk_mq_alloc_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c             | 32 ++++++++++++++---------------
 drivers/scsi/scsi_scan.c            |  5 +++--
 drivers/scsi/scsi_transport_fc.c    | 11 +++++-----
 drivers/scsi/scsi_transport_iscsi.c |  5 +++--
 include/scsi/scsi_transport.h       |  2 +-
 5 files changed, 28 insertions(+), 27 deletions(-)

Comments

Guenter Roeck May 15, 2024, 9:50 p.m. UTC | #1
Hi,

On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote:
> Turn __scsi_init_queue into scsi_init_limits which initializes
> queue_limits structure that can be passed to blk_mq_alloc_queue.
> 

With this patch in linux mainline, I see

ata2: found unknown device (class 0)
ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100
------------[ cut here ]------------
WARNING: CPU: 0 PID: 27 at block/blk-settings.c:202 blk_validate_limits+0x28c/0x304
Modules linked in:
CPU: 0 PID: 27 Comm: kworker/u4:2 Not tainted 6.9.0-05151-g1b294a1f3561 #1
Hardware name: PowerMac3,1 PPC970FX 0x3c0301 PowerMac
Workqueue: async async_run_entry_fn
NIP:  c0000000007ccec8 LR: c0000000007c805c CTR: 0000000000000000
REGS: c000000006def690 TRAP: 0700   Not tainted  (6.9.0-05151-g1b294a1f3561)
MSR:  8000000000028032 <SF,EE,IR,DR,RI>  CR: 84004228  XER: 20000000
IRQMASK: 0
GPR00: c0000000007c8040 c000000006def930 c00000000159f900 c000000006defac8
GPR04: c00000000146e788 0000000000000000 0000000000000000 0000000000000100
GPR08: 0000000000000200 000000000000ff00 0000000000000000 0000000000004000
GPR12: 000000000fa82000 c000000003330000 c000000000116508 c0000000060c5c40
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000088
GPR20: 0000000000000000 c0000000026f2f40 c0000000025eeff0 0000000000000000
GPR24: c000000006defc80 c0000000031cb3a0 c000000002571c80 c000000006defac8
GPR28: c0000000033052e0 ffffffffffffffff 0000000000000010 c000000008f13df0
NIP [c0000000007ccec8] blk_validate_limits+0x28c/0x304
LR [c0000000007c805c] blk_alloc_queue+0xbc/0x344
Call Trace:
[c000000006def930] [c0000000007c8040] blk_alloc_queue+0xa0/0x344 (unreliable)
[c000000006def990] [c0000000007e2658] blk_mq_alloc_queue+0x60/0xf4
[c000000006defa60] [c000000000a7a260] scsi_alloc_sdev+0x280/0x464
[c000000006defb90] [c000000000a7a6b4] scsi_probe_and_add_lun+0x270/0x388
[c000000006defc60] [c000000000a7b070] __scsi_add_device+0x168/0x1b4
[c000000006defcc0] [c000000000b08fe0] ata_scsi_scan_host+0x294/0x39c
[c000000006defd80] [c000000000af7704] async_port_probe+0x6c/0x98
[c000000006defdb0] [c000000000120028] async_run_entry_fn+0x50/0x13c
[c000000006defe00] [c00000000010821c] process_one_work+0x2c0/0x828
[c000000006deff00] [c000000000109090] worker_thread+0x224/0x474
[c000000006deff90] [c000000000116658] kthread+0x158/0x17c

followed by

scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured
usb 1-1: new full-speed USB device number 2 using ohci-pci
scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured
scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured
scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured
input: QEMU QEMU USB Keyboard as /devices/pci0000:f0/0000:f0:0d.0/usb1/1-1/1-1:1.0/0003:0627:0001.0001/input/input0
scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured
ata2: WARNING: synchronous SCSI scan failed without making any progress, switching to async

and ultimately a boot hang. This is with the mac99 emulation in qemu.
The warning is always seen, the boot hang is seen when trying to boot
from ide/ata drive. Bisect log is attached.

Guenter

---
# bad: [1b294a1f35616977caddaddf3e9d28e576a1adbc] Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
# good: [a5131c3fdf2608f1c15f3809e201cf540eb28489] Merge tag 'x86-shstk-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
git bisect start 'HEAD' 'a5131c3fdf26'
# good: [f8beae078c82abde57fed4a5be0bbc3579b59ad0] Merge tag 'gtp-24-05-07' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/gtp Pablo neira Ayuso says:
git bisect good f8beae078c82abde57fed4a5be0bbc3579b59ad0
# good: [ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc] Merge tag 'gpio-updates-for-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux
git bisect good ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc
# bad: [113d1dd9c8ea2186d56a641a787e2588673c9c32] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
git bisect bad 113d1dd9c8ea2186d56a641a787e2588673c9c32
# good: [a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9] Merge tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
git bisect good a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9
# bad: [f92141e18c8b466027e226f3388de15b059b6f65] Merge patch series "convert SCSI to atomic queue limits, part 1 (v3)"
git bisect bad f92141e18c8b466027e226f3388de15b059b6f65
# good: [0e0a4da35284c85225e3b128912582ebc73256c8] Merge patch series "scsi: ufs: Remove overzealous memory barriers"
git bisect good 0e0a4da35284c85225e3b128912582ebc73256c8
# bad: [a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7] scsi: libata: Switch to using ->device_configure
git bisect bad a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7
# bad: [6248d7f7714f018f2c02f356582784e74596f8e8] scsi: core: Add a no_highmem flag to struct Scsi_Host
git bisect bad 6248d7f7714f018f2c02f356582784e74596f8e8
# good: [33507b3964f136ea1592718cb81885c8f9354f65] scsi: ufs: qcom: Add sanity checks for gear/lane values during ICC scaling
git bisect good 33507b3964f136ea1592718cb81885c8f9354f65
# good: [4373d2ecca7fa7ad04aa9c371c80049bafec2610] scsi: bsg: Pass queue_limits to bsg_setup_queue()
git bisect good 4373d2ecca7fa7ad04aa9c371c80049bafec2610
# bad: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue
git bisect bad afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94
# good: [9042fb6d2c085eccdf11069b04754dac807c36ea] scsi: mpi3mr: Pass queue_limits to bsg_setup_queue()
git bisect good 9042fb6d2c085eccdf11069b04754dac807c36ea
# first bad commit: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue
Guenter Roeck May 15, 2024, 11:52 p.m. UTC | #2
On 5/15/24 15:16, John Garry wrote:
> On 15/05/2024 15:50, Guenter Roeck wrote:
>> Hi,
>>
>> On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote:
>>> Turn __scsi_init_queue into scsi_init_limits which initializes
>>> queue_limits structure that can be passed to blk_mq_alloc_queue.
> 
> The previous behavior would sanitize max_segment_size < PAGE_SIZE, so I suppose you could try:
> 
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -199,6 +199,8 @@ static int blk_validate_limits(struct queue_limits *lim)
>                   */
>                  if (!lim->max_segment_size)
>                          lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
> +               else if (lim->max_segment_size < PAGE_SIZE)
> +                       lim->max_segment_size = PAGE_SIZE;
>                  if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
>                          return -EINVAL;
>          }
> 

With some debugging:

pata_macio_common_init() Calling ata_host_activate() with limit 65280
...
max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536
WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364
...

This is with PPC_BOOK3S_64 which selects a default page size of 64k.
Looking at the old code, I think it did what you suggested above,

void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
{
         if (max_size < PAGE_SIZE) {
                 max_size = PAGE_SIZE;
                 printk(KERN_INFO "%s: set to minimum %d\n",
                        __func__, max_size);
         }
...

but assuming that the driver requested a lower limit on purpose that
may not be the best solution.

Never mind, though - I updated my test configuration to explicitly
configure the page size to 4k to work around the problem. With that,
please consider this report a note in case someone hits the problem
on a real system (and sorry for the noise).

Thanks,
Guenter
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2e28e2360c8574..1deca84914e87a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -32,7 +32,7 @@ 
 #include <scsi/scsi_driver.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
-#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
+#include <scsi/scsi_transport.h> /* scsi_init_limits() */
 #include <scsi/scsi_dh.h>
 
 #include <trace/events/scsi.h>
@@ -1965,31 +1965,26 @@  static void scsi_map_queues(struct blk_mq_tag_set *set)
 	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
 }
 
-void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
+void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim)
 {
 	struct device *dev = shost->dma_dev;
 
-	/*
-	 * this limit is imposed by hardware restrictions
-	 */
-	blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
-					SG_MAX_SEGMENTS));
+	memset(lim, 0, sizeof(*lim));
+	lim->max_segments =
+		min_t(unsigned short, shost->sg_tablesize, SG_MAX_SEGMENTS);
 
 	if (scsi_host_prot_dma(shost)) {
 		shost->sg_prot_tablesize =
 			min_not_zero(shost->sg_prot_tablesize,
 				     (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
 		BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
-		blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
+		lim->max_integrity_segments = shost->sg_prot_tablesize;
 	}
 
-	blk_queue_max_hw_sectors(q, shost->max_sectors);
-	blk_queue_segment_boundary(q, shost->dma_boundary);
-	dma_set_seg_boundary(dev, shost->dma_boundary);
-
-	blk_queue_max_segment_size(q, shost->max_segment_size);
-	blk_queue_virt_boundary(q, shost->virt_boundary_mask);
-	dma_set_max_seg_size(dev, queue_max_segment_size(q));
+	lim->max_hw_sectors = shost->max_sectors;
+	lim->seg_boundary_mask = shost->dma_boundary;
+	lim->max_segment_size = shost->max_segment_size;
+	lim->virt_boundary_mask = shost->virt_boundary_mask;
 
 	/*
 	 * Set a reasonable default alignment:  The larger of 32-byte (dword),
@@ -1998,9 +1993,12 @@  void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 	 *
 	 * Devices that require a bigger alignment can increase it later.
 	 */
-	blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
+	lim->dma_alignment = max(4, dma_get_cache_alignment()) - 1;
+
+	dma_set_seg_boundary(dev, shost->dma_boundary);
+	dma_set_max_seg_size(dev, shost->max_segment_size);
 }
-EXPORT_SYMBOL_GPL(__scsi_init_queue);
+EXPORT_SYMBOL_GPL(scsi_init_limits);
 
 static const struct blk_mq_ops scsi_mq_ops_no_commit = {
 	.get_budget	= scsi_mq_get_budget,
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8d06475de17a33..205ab3b3ea89be 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -283,6 +283,7 @@  static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	struct request_queue *q;
 	int display_failure_msg = 1, ret;
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
+	struct queue_limits lim;
 
 	sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
 		       GFP_KERNEL);
@@ -332,7 +333,8 @@  static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 
 	sdev->sg_reserved_size = INT_MAX;
 
-	q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL);
+	scsi_init_limits(shost, &lim);
+	q = blk_mq_alloc_queue(&sdev->host->tag_set, &lim, NULL);
 	if (IS_ERR(q)) {
 		/* release fn is set up in scsi_sysfs_device_initialise, so
 		 * have to free and put manually here */
@@ -343,7 +345,6 @@  static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	kref_get(&sdev->host->tagset_refcnt);
 	sdev->request_queue = q;
 	q->queuedata = sdev;
-	__scsi_init_queue(sdev->host, q);
 
 	depth = sdev->host->cmd_per_lun ?: 1;
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 87b2235b8ece45..0799700b0fca77 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -4276,6 +4276,7 @@  fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 {
 	struct device *dev = &shost->shost_gendev;
 	struct fc_internal *i = to_fc_internal(shost->transportt);
+	struct queue_limits lim;
 	struct request_queue *q;
 	char bsg_name[20];
 
@@ -4286,8 +4287,8 @@  fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 
 	snprintf(bsg_name, sizeof(bsg_name),
 		 "fc_host%d", shost->host_no);
-
-	q = bsg_setup_queue(dev, bsg_name, NULL, fc_bsg_dispatch,
+	scsi_init_limits(shost, &lim);
+	q = bsg_setup_queue(dev, bsg_name, &lim, fc_bsg_dispatch,
 			fc_bsg_job_timeout, i->f->dd_bsg_size);
 	if (IS_ERR(q)) {
 		dev_err(dev,
@@ -4295,7 +4296,6 @@  fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 			shost->host_no);
 		return PTR_ERR(q);
 	}
-	__scsi_init_queue(shost, q);
 	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
 	fc_host->rqst_q = q;
 	return 0;
@@ -4311,6 +4311,7 @@  fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 {
 	struct device *dev = &rport->dev;
 	struct fc_internal *i = to_fc_internal(shost->transportt);
+	struct queue_limits lim;
 	struct request_queue *q;
 
 	rport->rqst_q = NULL;
@@ -4318,13 +4319,13 @@  fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 	if (!i->f->bsg_request)
 		return -ENOTSUPP;
 
-	q = bsg_setup_queue(dev, dev_name(dev), NULL, fc_bsg_dispatch_prep,
+	scsi_init_limits(shost, &lim);
+	q = bsg_setup_queue(dev, dev_name(dev), &lim, fc_bsg_dispatch_prep,
 				fc_bsg_job_timeout, i->f->dd_bsg_size);
 	if (IS_ERR(q)) {
 		dev_err(dev, "failed to setup bsg queue\n");
 		return PTR_ERR(q);
 	}
-	__scsi_init_queue(shost, q);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 	rport->rqst_q = q;
 	return 0;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index c131746bf20777..5e1bb488da15c0 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1535,6 +1535,7 @@  iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
 {
 	struct device *dev = &shost->shost_gendev;
 	struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
+	struct queue_limits lim;
 	struct request_queue *q;
 	char bsg_name[20];
 
@@ -1542,14 +1543,14 @@  iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
 		return -ENOTSUPP;
 
 	snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
-	q = bsg_setup_queue(dev, bsg_name, NULL, iscsi_bsg_host_dispatch, NULL,
+	scsi_init_limits(shost, &lim);
+	q = bsg_setup_queue(dev, bsg_name, &lim, iscsi_bsg_host_dispatch, NULL,
 			0);
 	if (IS_ERR(q)) {
 		shost_printk(KERN_ERR, shost, "bsg interface failed to "
 			     "initialize - no request queue\n");
 		return PTR_ERR(q);
 	}
-	__scsi_init_queue(shost, q);
 
 	ihost->bsg_q = q;
 	return 0;
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index a0458bda314872..1394cf313bb37c 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -83,6 +83,6 @@  scsi_transport_device_data(struct scsi_device *sdev)
 		+ shost->transportt->device_private_offset;
 }
 
-void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q);
+void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim);
 
 #endif /* SCSI_TRANSPORT_H */