Message ID | 20240409143748.980206-5-hch@lst.de |
---|---|
State | New |
Headers | show |
Series | [01/23] block: add a helper to cancel atomic queue limit updates | expand |
Hi, On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote: > Turn __scsi_init_queue into scsi_init_limits which initializes > queue_limits structure that can be passed to blk_mq_alloc_queue. > With this patch in linux mainline, I see ata2: found unknown device (class 0) ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 27 at block/blk-settings.c:202 blk_validate_limits+0x28c/0x304 Modules linked in: CPU: 0 PID: 27 Comm: kworker/u4:2 Not tainted 6.9.0-05151-g1b294a1f3561 #1 Hardware name: PowerMac3,1 PPC970FX 0x3c0301 PowerMac Workqueue: async async_run_entry_fn NIP: c0000000007ccec8 LR: c0000000007c805c CTR: 0000000000000000 REGS: c000000006def690 TRAP: 0700 Not tainted (6.9.0-05151-g1b294a1f3561) MSR: 8000000000028032 <SF,EE,IR,DR,RI> CR: 84004228 XER: 20000000 IRQMASK: 0 GPR00: c0000000007c8040 c000000006def930 c00000000159f900 c000000006defac8 GPR04: c00000000146e788 0000000000000000 0000000000000000 0000000000000100 GPR08: 0000000000000200 000000000000ff00 0000000000000000 0000000000004000 GPR12: 000000000fa82000 c000000003330000 c000000000116508 c0000000060c5c40 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000088 GPR20: 0000000000000000 c0000000026f2f40 c0000000025eeff0 0000000000000000 GPR24: c000000006defc80 c0000000031cb3a0 c000000002571c80 c000000006defac8 GPR28: c0000000033052e0 ffffffffffffffff 0000000000000010 c000000008f13df0 NIP [c0000000007ccec8] blk_validate_limits+0x28c/0x304 LR [c0000000007c805c] blk_alloc_queue+0xbc/0x344 Call Trace: [c000000006def930] [c0000000007c8040] blk_alloc_queue+0xa0/0x344 (unreliable) [c000000006def990] [c0000000007e2658] blk_mq_alloc_queue+0x60/0xf4 [c000000006defa60] [c000000000a7a260] scsi_alloc_sdev+0x280/0x464 [c000000006defb90] [c000000000a7a6b4] scsi_probe_and_add_lun+0x270/0x388 [c000000006defc60] [c000000000a7b070] __scsi_add_device+0x168/0x1b4 [c000000006defcc0] [c000000000b08fe0] ata_scsi_scan_host+0x294/0x39c [c000000006defd80] [c000000000af7704] async_port_probe+0x6c/0x98 [c000000006defdb0] [c000000000120028] async_run_entry_fn+0x50/0x13c [c000000006defe00] [c00000000010821c] process_one_work+0x2c0/0x828 [c000000006deff00] [c000000000109090] worker_thread+0x224/0x474 [c000000006deff90] [c000000000116658] kthread+0x158/0x17c followed by scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured usb 1-1: new full-speed USB device number 2 using ohci-pci scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured input: QEMU QEMU USB Keyboard as /devices/pci0000:f0/0000:f0:0d.0/usb1/1-1/1-1:1.0/0003:0627:0001.0001/input/input0 scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured ata2: WARNING: synchronous SCSI scan failed without making any progress, switching to async and ultimately a boot hang. This is with the mac99 emulation in qemu. The warning is always seen, the boot hang is seen when trying to boot from ide/ata drive. Bisect log is attached. Guenter --- # bad: [1b294a1f35616977caddaddf3e9d28e576a1adbc] Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next # good: [a5131c3fdf2608f1c15f3809e201cf540eb28489] Merge tag 'x86-shstk-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip git bisect start 'HEAD' 'a5131c3fdf26' # good: [f8beae078c82abde57fed4a5be0bbc3579b59ad0] Merge tag 'gtp-24-05-07' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/gtp Pablo neira Ayuso says: git bisect good f8beae078c82abde57fed4a5be0bbc3579b59ad0 # good: [ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc] Merge tag 'gpio-updates-for-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux git bisect good ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc # bad: [113d1dd9c8ea2186d56a641a787e2588673c9c32] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi git bisect bad 113d1dd9c8ea2186d56a641a787e2588673c9c32 # good: [a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9] Merge tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux git bisect good a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9 # bad: [f92141e18c8b466027e226f3388de15b059b6f65] Merge patch series "convert SCSI to atomic queue limits, part 1 (v3)" git bisect bad f92141e18c8b466027e226f3388de15b059b6f65 # good: [0e0a4da35284c85225e3b128912582ebc73256c8] Merge patch series "scsi: ufs: Remove overzealous memory barriers" git bisect good 0e0a4da35284c85225e3b128912582ebc73256c8 # bad: [a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7] scsi: libata: Switch to using ->device_configure git bisect bad a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7 # bad: [6248d7f7714f018f2c02f356582784e74596f8e8] scsi: core: Add a no_highmem flag to struct Scsi_Host git bisect bad 6248d7f7714f018f2c02f356582784e74596f8e8 # good: [33507b3964f136ea1592718cb81885c8f9354f65] scsi: ufs: qcom: Add sanity checks for gear/lane values during ICC scaling git bisect good 33507b3964f136ea1592718cb81885c8f9354f65 # good: [4373d2ecca7fa7ad04aa9c371c80049bafec2610] scsi: bsg: Pass queue_limits to bsg_setup_queue() git bisect good 4373d2ecca7fa7ad04aa9c371c80049bafec2610 # bad: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue git bisect bad afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94 # good: [9042fb6d2c085eccdf11069b04754dac807c36ea] scsi: mpi3mr: Pass queue_limits to bsg_setup_queue() git bisect good 9042fb6d2c085eccdf11069b04754dac807c36ea # first bad commit: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue
On 5/15/24 15:16, John Garry wrote: > On 15/05/2024 15:50, Guenter Roeck wrote: >> Hi, >> >> On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote: >>> Turn __scsi_init_queue into scsi_init_limits which initializes >>> queue_limits structure that can be passed to blk_mq_alloc_queue. > > The previous behavior would sanitize max_segment_size < PAGE_SIZE, so I suppose you could try: > > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > @@ -199,6 +199,8 @@ static int blk_validate_limits(struct queue_limits *lim) > */ > if (!lim->max_segment_size) > lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; > + else if (lim->max_segment_size < PAGE_SIZE) > + lim->max_segment_size = PAGE_SIZE; > if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) > return -EINVAL; > } > With some debugging: pata_macio_common_init() Calling ata_host_activate() with limit 65280 ... max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 ... This is with PPC_BOOK3S_64 which selects a default page size of 64k. Looking at the old code, I think it did what you suggested above, void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { if (max_size < PAGE_SIZE) { max_size = PAGE_SIZE; printk(KERN_INFO "%s: set to minimum %d\n", __func__, max_size); } ... but assuming that the driver requested a lower limit on purpose that may not be the best solution. Never mind, though - I updated my test configuration to explicitly configure the page size to 4k to work around the problem. With that, please consider this report a note in case someone hits the problem on a real system (and sorry for the noise). Thanks, Guenter
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2e28e2360c8574..1deca84914e87a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -32,7 +32,7 @@ #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> -#include <scsi/scsi_transport.h> /* __scsi_init_queue() */ +#include <scsi/scsi_transport.h> /* scsi_init_limits() */ #include <scsi/scsi_dh.h> #include <trace/events/scsi.h> @@ -1965,31 +1965,26 @@ static void scsi_map_queues(struct blk_mq_tag_set *set) blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); } -void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) +void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) { struct device *dev = shost->dma_dev; - /* - * this limit is imposed by hardware restrictions - */ - blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, - SG_MAX_SEGMENTS)); + memset(lim, 0, sizeof(*lim)); + lim->max_segments = + min_t(unsigned short, shost->sg_tablesize, SG_MAX_SEGMENTS); if (scsi_host_prot_dma(shost)) { shost->sg_prot_tablesize = min_not_zero(shost->sg_prot_tablesize, (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); - blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); + lim->max_integrity_segments = shost->sg_prot_tablesize; } - blk_queue_max_hw_sectors(q, shost->max_sectors); - blk_queue_segment_boundary(q, shost->dma_boundary); - dma_set_seg_boundary(dev, shost->dma_boundary); - - blk_queue_max_segment_size(q, shost->max_segment_size); - blk_queue_virt_boundary(q, shost->virt_boundary_mask); - dma_set_max_seg_size(dev, queue_max_segment_size(q)); + lim->max_hw_sectors = shost->max_sectors; + lim->seg_boundary_mask = shost->dma_boundary; + lim->max_segment_size = shost->max_segment_size; + lim->virt_boundary_mask = shost->virt_boundary_mask; /* * Set a reasonable default alignment: The larger of 32-byte (dword), @@ -1998,9 +1993,12 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) * * Devices that require a bigger alignment can increase it later. */ - blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1); + lim->dma_alignment = max(4, dma_get_cache_alignment()) - 1; + + dma_set_seg_boundary(dev, shost->dma_boundary); + dma_set_max_seg_size(dev, shost->max_segment_size); } -EXPORT_SYMBOL_GPL(__scsi_init_queue); +EXPORT_SYMBOL_GPL(scsi_init_limits); static const struct blk_mq_ops scsi_mq_ops_no_commit = { .get_budget = scsi_mq_get_budget, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a33..205ab3b3ea89be 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -283,6 +283,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, struct request_queue *q; int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + struct queue_limits lim; sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_KERNEL); @@ -332,7 +333,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, sdev->sg_reserved_size = INT_MAX; - q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL); + scsi_init_limits(shost, &lim); + q = blk_mq_alloc_queue(&sdev->host->tag_set, &lim, NULL); if (IS_ERR(q)) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ @@ -343,7 +345,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, kref_get(&sdev->host->tagset_refcnt); sdev->request_queue = q; q->queuedata = sdev; - __scsi_init_queue(sdev->host, q); depth = sdev->host->cmd_per_lun ?: 1; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 87b2235b8ece45..0799700b0fca77 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -4276,6 +4276,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) { struct device *dev = &shost->shost_gendev; struct fc_internal *i = to_fc_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; char bsg_name[20]; @@ -4286,8 +4287,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - - q = bsg_setup_queue(dev, bsg_name, NULL, fc_bsg_dispatch, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, bsg_name, &lim, fc_bsg_dispatch, fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, @@ -4295,7 +4296,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) shost->host_no); return PTR_ERR(q); } - __scsi_init_queue(shost, q); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; return 0; @@ -4311,6 +4311,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) { struct device *dev = &rport->dev; struct fc_internal *i = to_fc_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; rport->rqst_q = NULL; @@ -4318,13 +4319,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) if (!i->f->bsg_request) return -ENOTSUPP; - q = bsg_setup_queue(dev, dev_name(dev), NULL, fc_bsg_dispatch_prep, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, dev_name(dev), &lim, fc_bsg_dispatch_prep, fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); } - __scsi_init_queue(shost, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); rport->rqst_q = q; return 0; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index c131746bf20777..5e1bb488da15c0 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1535,6 +1535,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) { struct device *dev = &shost->shost_gendev; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; char bsg_name[20]; @@ -1542,14 +1543,14 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, NULL, iscsi_bsg_host_dispatch, NULL, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, bsg_name, &lim, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); return PTR_ERR(q); } - __scsi_init_queue(shost, q); ihost->bsg_q = q; return 0; diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h index a0458bda314872..1394cf313bb37c 100644 --- a/include/scsi/scsi_transport.h +++ b/include/scsi/scsi_transport.h @@ -83,6 +83,6 @@ scsi_transport_device_data(struct scsi_device *sdev) + shost->transportt->device_private_offset; } -void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q); +void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim); #endif /* SCSI_TRANSPORT_H */