Message ID | 20240215070300.2200308-18-hch@lst.de |
---|---|
State | New |
Headers | show |
Series | [01/17] ubd: pass queue_limits to blk_mq_alloc_disk | expand |
Hi Christoph, On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > Pass the queue limit set at initialization time directly to > blk_mq_alloc_disk instead of updating it right after the allocation. > > This requires refactoring the code a bit so that what was mmc_setup_queue > before also allocates the gendisk now and actually sets all limits. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass queue_limits to blk_mq_alloc_disk") in block/for-next. I have bisected the following failure on White-Hawk (also seen on other R-Car Gen3/4 systems) to this commit: renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at 0x00000000ee140000, max clock rate 200 MHz mmc0: new HS400 MMC card at address 0001 ------------[ cut here ]------------ WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 blk_validate_limits+0x12c/0x1e0 Modules linked in: CPU: 1 PID: 20 Comm: kworker/1:0 Not tainted 6.8.0-rc3-white-hawk-00084-g616f87661792 #223 Hardware name: Renesas White Hawk CPU and Breakout boards based on r8a779g0 (DT) Workqueue: events_freezable mmc_rescan pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : blk_validate_limits+0x12c/0x1e0 lr : blk_set_default_limits+0x14/0x1c sp : ffffffc0829336f0 x29: ffffffc0829336f0 x28: 0000000000000000 x27: 0000000000000000 Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: attached PHY driver (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=30) x26: ffffff8443afd808 x25: ffffffc0825bd000 x24: ffffffc082933878 x23: 00000000ffffffff x22: ffffffc08147b258 x21: ffffff8443ab0c10 x20: 000000000000001a x19: ffffff8443b00000 x18: 0000000043789380 x17: ffffffc0806b2ea8 x16: ffffffc0803ab8b4 x15: ffffffc0803ab444 x14: ffffffc08039c26c x13: ffffffc080506854 x12: ffffffc080506700 x11: ffffffc08050669c x10: ffffffc080506478 x9 : ffffffc0803ad1a0 x8 : ffffff8443019120 x7 : ffffffc0803ad1a0 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000a00 x3 : 0000000000000fff x2 : 0000000000000200 x1 : 0000000000010000 x0 : ffffffc082933878 Call trace: blk_validate_limits+0x12c/0x1e0 blk_alloc_queue+0x7c/0x244 blk_mq_alloc_queue+0x4c/0xac __blk_mq_alloc_disk+0x1c/0xc0 mmc_alloc_disk+0x134/0x2b4 mmc_init_queue+0x114/0x12c mmc_blk_alloc_req+0xf0/0x34c mmc_blk_probe+0x230/0x5b8 mmc_bus_probe+0x18/0x20 really_probe+0x138/0x270 __driver_probe_device+0xec/0x104 driver_probe_device+0x4c/0xf8 __device_attach_driver+0xa8/0xc8 bus_for_each_drv+0xa4/0xc8 __device_attach+0xe4/0x144 device_initial_probe+0x10/0x18 bus_probe_device+0x38/0xa0 device_add+0x520/0x654 mmc_add_card+0x12c/0x28c mmc_attach_mmc+0xb8/0x154 mmc_rescan+0x208/0x250 process_scheduled_works+0x2b8/0x41c worker_thread+0x1cc/0x24c kthread+0xd8/0xe8 ret_from_fork+0x10/0x20 irq event stamp: 434 hardirqs last enabled at (433): [<ffffffc0808e0ac0>] _raw_spin_unlock_irq+0x2c/0x40 hardirqs last disabled at (434): [<ffffffc0808dae28>] __schedule+0x1cc/0x84c softirqs last enabled at (192): [<ffffffc080010300>] __do_softirq+0x1ac/0x360 softirqs last disabled at (185): [<ffffffc08001550c>] ____do_softirq+0xc/0x14 ---[ end trace 0000000000000000 ]--- mmcblk: probe of mmc0:0001 failed with error -22 Reverting this commit fixes the issue. > --- a/drivers/mmc/core/queue.c > +++ b/drivers/mmc/core/queue.c > @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) > return sg; > } > > -static void mmc_queue_setup_discard(struct request_queue *q, > - struct mmc_card *card) > +static void mmc_queue_setup_discard(struct mmc_card *card, > + struct queue_limits *lim) > { > unsigned max_discard; > > @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, > if (!max_discard) > return; > > - blk_queue_max_discard_sectors(q, max_discard); > - q->limits.discard_granularity = card->pref_erase << 9; > - /* granularity must not be greater than max. discard */ > - if (card->pref_erase > max_discard) > - q->limits.discard_granularity = SECTOR_SIZE; > + lim->max_hw_discard_sectors = max_discard; > if (mmc_can_secure_erase_trim(card)) > - blk_queue_max_secure_erase_sectors(q, max_discard); > + lim->max_secure_erase_sectors = max_discard; > if (mmc_can_trim(card) && card->erased_byte == 0) > - blk_queue_max_write_zeroes_sectors(q, max_discard); > + lim->max_write_zeroes_sectors = max_discard; > + > + /* granularity must not be greater than max. discard */ > + if (card->pref_erase > max_discard) > + lim->discard_granularity = SECTOR_SIZE; > + else > + lim->discard_granularity = card->pref_erase << 9; > } > > static unsigned short mmc_get_max_segments(struct mmc_host *host) > @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { > .timeout = mmc_mq_timed_out, > }; > > -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, > + struct mmc_card *card) > { > struct mmc_host *host = card->host; > - unsigned block_size = 512; > + struct queue_limits lim = { }; > + struct gendisk *disk; > > - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > if (mmc_can_erase(card)) > - mmc_queue_setup_discard(mq->queue, card); > + mmc_queue_setup_discard(card, &lim); > > if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) > - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); > - blk_queue_max_hw_sectors(mq->queue, > - min(host->max_blk_count, host->max_req_size / 512)); > - if (host->can_dma_map_merge) > - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, > - mmc_dev(host)), > - "merging was advertised but not possible"); > - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); > - > - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { > - block_size = card->ext_csd.data_sector_size; > - WARN_ON(block_size != 512 && block_size != 4096); > - } > + lim.bounce = BLK_BOUNCE_HIGH; > + > + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); > + > + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) > + lim.logical_block_size = card->ext_csd.data_sector_size; > + else > + lim.logical_block_size = 512; > + > + WARN_ON_ONCE(lim.logical_block_size != 512 && > + lim.logical_block_size != 4096); > > - blk_queue_logical_block_size(mq->queue, block_size); > /* > - * After blk_queue_can_use_dma_map_merging() was called with succeed, > - * since it calls blk_queue_virt_boundary(), the mmc should not call > - * both blk_queue_max_segment_size(). > + * Setting a virt_boundary implicity sets a max_segment_size, so try > + * to set the hardware one here. > */ > - if (!host->can_dma_map_merge) > - blk_queue_max_segment_size(mq->queue, > - round_down(host->max_seg_size, block_size)); > + if (host->can_dma_map_merge) { > + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); > + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; > + } else { > + lim.max_segment_size = > + round_down(host->max_seg_size, lim.logical_block_size); > + lim.max_segments = host->max_segs; > + } > + > + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); > + if (IS_ERR(disk)) > + return disk; > + mq->queue = disk->queue; > + > + if (mmc_host_is_spi(host) && host->use_spi_crc) > + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > + blk_queue_rq_timeout(mq->queue, 60 * HZ); > + > + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > > dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); > > @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > init_waitqueue_head(&mq->wait); > > mmc_crypto_setup_queue(mq->queue, host); > + return disk; > } > > static inline bool mmc_merge_capable(struct mmc_host *host) > @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) > return ERR_PTR(ret); > > > - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); > - if (IS_ERR(disk)) { > + disk = mmc_alloc_disk(mq, card); > + if (IS_ERR(disk)) > blk_mq_free_tag_set(&mq->tag_set); > - return disk; > - } > - mq->queue = disk->queue; > - > - if (mmc_host_is_spi(host) && host->use_spi_crc) > - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > - blk_queue_rq_timeout(mq->queue, 60 * HZ); > - > - mmc_setup_queue(mq, card); > return disk; > } > > -- > 2.39.2 > > > ______________________________________________________ > Linux MTD discussion mailing list > http://lists.infradead.org/mailman/listinfo/linux-mtd/
On Tue, Feb 20, 2024 at 11:01 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > Pass the queue limit set at initialization time directly to > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > before also allocates the gendisk now and actually sets all limits. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > queue_limits to blk_mq_alloc_disk") in block/for-next. > > I have bisected the following failure on White-Hawk (also seen on > other R-Car Gen3/4 systems) to this commit: > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > 0x00000000ee140000, max clock rate 200 MHz > mmc0: new HS400 MMC card at address 0001 > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > blk_validate_limits+0x12c/0x1e0 Actual capacity should be: mmc0: new HS400 MMC card at address 0001 mmcblk0: mmc0:0001 G1M15L 29.6 GiB mmcblk0boot0: mmc0:0001 G1M15L 31.5 MiB mmcblk0boot1: mmc0:0001 G1M15L 31.5 MiB mmcblk0rpmb: mmc0:0001 G1M15L 4.00 MiB, chardev (245:0) Gr{oetje,eeting}s, Geert
On Tue, Feb 20, 2024 at 11:01:05PM +0100, Geert Uytterhoeven wrote: > Hi Christoph, > > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > Pass the queue limit set at initialization time directly to > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > before also allocates the gendisk now and actually sets all limits. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > queue_limits to blk_mq_alloc_disk") in block/for-next. > > I have bisected the following failure on White-Hawk (also seen on > other R-Car Gen3/4 systems) to this commit: > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > 0x00000000ee140000, max clock rate 200 MHz > mmc0: new HS400 MMC card at address 0001 > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > blk_validate_limits+0x12c/0x1e0 This is: if (lim->virt_boundary_mask) { if (WARN_ON_ONCE(lim->max_segment_size && lim->max_segment_size != UINT_MAX)) return -EINVAL; so we end up here with both a virt_boundary_mask and a max_segment_size set, which is rather bogus. I think the problem is the order of check in the core blk_validate_limits that artificially causes this. Can you try this patch? diff --git a/block/blk-settings.c b/block/blk-settings.c index c4406aacc0efc6..2120b6f9fef8ea 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -182,16 +182,6 @@ static int blk_validate_limits(struct queue_limits *lim) if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) return -EINVAL; - /* - * The maximum segment size has an odd historic 64k default that - * drivers probably should override. Just like the I/O size we - * require drivers to at least handle a full page per segment. - */ - if (!lim->max_segment_size) - lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) - return -EINVAL; - /* * Devices that require a virtual boundary do not support scatter/gather * I/O natively, but instead require a descriptor list entry for each @@ -203,6 +193,16 @@ static int blk_validate_limits(struct queue_limits *lim) lim->max_segment_size != UINT_MAX)) return -EINVAL; lim->max_segment_size = UINT_MAX; + } else { + /* + * The maximum segment size has an odd historic 64k default that + * drivers probably should override. Just like the I/O size we + * require drivers to at least handle a full page per segment. + */ + if (!lim->max_segment_size) + lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; + if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + return -EINVAL; } /*
Hi Christoph, On Wed, Feb 21, 2024 at 6:44 AM Christoph Hellwig <hch@lst.de> wrote: > On Tue, Feb 20, 2024 at 11:01:05PM +0100, Geert Uytterhoeven wrote: > > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > > Pass the queue limit set at initialization time directly to > > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > > before also allocates the gendisk now and actually sets all limits. > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > > queue_limits to blk_mq_alloc_disk") in block/for-next. > > > > I have bisected the following failure on White-Hawk (also seen on > > other R-Car Gen3/4 systems) to this commit: > > > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > > 0x00000000ee140000, max clock rate 200 MHz > > mmc0: new HS400 MMC card at address 0001 > > ------------[ cut here ]------------ > > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > > blk_validate_limits+0x12c/0x1e0 > > This is: > > if (lim->virt_boundary_mask) { > if (WARN_ON_ONCE(lim->max_segment_size && > lim->max_segment_size != UINT_MAX)) > return -EINVAL; > > so we end up here with both a virt_boundary_mask and a > max_segment_size set, which is rather bogus. I think the > problem is the order of check in the core blk_validate_limits > that artificially causes this. Can you try this patch? Thanks, good thinking, as that fixed the issue for me! Tested-by: Geert Uytterhoeven <geert+renesas@glider.be> Gr{oetje,eeting}s, Geert
Hi Christoph, On 15/02/2024 07:03, Christoph Hellwig wrote: > Pass the queue limit set at initialization time directly to > blk_mq_alloc_disk instead of updating it right after the allocation. > > This requires refactoring the code a bit so that what was mmc_setup_queue > before also allocates the gendisk now and actually sets all limits. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/mmc/core/queue.c | 97 +++++++++++++++++++++------------------- > 1 file changed, 52 insertions(+), 45 deletions(-) > > diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c > index 67ad186d132a69..2ae60d208cdf1e 100644 > --- a/drivers/mmc/core/queue.c > +++ b/drivers/mmc/core/queue.c > @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) > return sg; > } > > -static void mmc_queue_setup_discard(struct request_queue *q, > - struct mmc_card *card) > +static void mmc_queue_setup_discard(struct mmc_card *card, > + struct queue_limits *lim) > { > unsigned max_discard; > > @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, > if (!max_discard) > return; > > - blk_queue_max_discard_sectors(q, max_discard); > - q->limits.discard_granularity = card->pref_erase << 9; > - /* granularity must not be greater than max. discard */ > - if (card->pref_erase > max_discard) > - q->limits.discard_granularity = SECTOR_SIZE; > + lim->max_hw_discard_sectors = max_discard; > if (mmc_can_secure_erase_trim(card)) > - blk_queue_max_secure_erase_sectors(q, max_discard); > + lim->max_secure_erase_sectors = max_discard; > if (mmc_can_trim(card) && card->erased_byte == 0) > - blk_queue_max_write_zeroes_sectors(q, max_discard); > + lim->max_write_zeroes_sectors = max_discard; > + > + /* granularity must not be greater than max. discard */ > + if (card->pref_erase > max_discard) > + lim->discard_granularity = SECTOR_SIZE; > + else > + lim->discard_granularity = card->pref_erase << 9; > } > > static unsigned short mmc_get_max_segments(struct mmc_host *host) > @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { > .timeout = mmc_mq_timed_out, > }; > > -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, > + struct mmc_card *card) > { > struct mmc_host *host = card->host; > - unsigned block_size = 512; > + struct queue_limits lim = { }; > + struct gendisk *disk; > > - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > if (mmc_can_erase(card)) > - mmc_queue_setup_discard(mq->queue, card); > + mmc_queue_setup_discard(card, &lim); > > if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) > - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); > - blk_queue_max_hw_sectors(mq->queue, > - min(host->max_blk_count, host->max_req_size / 512)); > - if (host->can_dma_map_merge) > - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, > - mmc_dev(host)), > - "merging was advertised but not possible"); > - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); > - > - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { > - block_size = card->ext_csd.data_sector_size; > - WARN_ON(block_size != 512 && block_size != 4096); > - } > + lim.bounce = BLK_BOUNCE_HIGH; > + > + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); > + > + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) > + lim.logical_block_size = card->ext_csd.data_sector_size; > + else > + lim.logical_block_size = 512; > + > + WARN_ON_ONCE(lim.logical_block_size != 512 && > + lim.logical_block_size != 4096); > > - blk_queue_logical_block_size(mq->queue, block_size); > /* > - * After blk_queue_can_use_dma_map_merging() was called with succeed, > - * since it calls blk_queue_virt_boundary(), the mmc should not call > - * both blk_queue_max_segment_size(). > + * Setting a virt_boundary implicity sets a max_segment_size, so try > + * to set the hardware one here. > */ > - if (!host->can_dma_map_merge) > - blk_queue_max_segment_size(mq->queue, > - round_down(host->max_seg_size, block_size)); > + if (host->can_dma_map_merge) { > + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); > + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; > + } else { > + lim.max_segment_size = > + round_down(host->max_seg_size, lim.logical_block_size); > + lim.max_segments = host->max_segs; > + } > + > + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); > + if (IS_ERR(disk)) > + return disk; > + mq->queue = disk->queue; > + > + if (mmc_host_is_spi(host) && host->use_spi_crc) > + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > + blk_queue_rq_timeout(mq->queue, 60 * HZ); > + > + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > > dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); > > @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > init_waitqueue_head(&mq->wait); > > mmc_crypto_setup_queue(mq->queue, host); > + return disk; > } > > static inline bool mmc_merge_capable(struct mmc_host *host) > @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) > return ERR_PTR(ret); > > > - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); > - if (IS_ERR(disk)) { > + disk = mmc_alloc_disk(mq, card); > + if (IS_ERR(disk)) > blk_mq_free_tag_set(&mq->tag_set); > - return disk; > - } > - mq->queue = disk->queue; > - > - if (mmc_host_is_spi(host) && host->use_spi_crc) > - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > - blk_queue_rq_timeout(mq->queue, 60 * HZ); > - > - mmc_setup_queue(mq, card); > return disk; > } > We have just noticed that since Linux v6.9 was released, that if we build the kernel with 64kB MMU pages, then we see the following WARNING and probe failure ... [ 2.828612] ------------[ cut here ]------------ [ 2.829243] WARNING: CPU: 1 PID: 87 at block/blk-settings.c:206 blk_validate_limits+0x1cc/0x234 [ 2.830417] Modules linked in: [ 2.830963] CPU: 1 PID: 87 Comm: kworker/1:1 Not tainted 6.10.0-rc5 #1 [ 2.831773] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT) [ 2.832538] Workqueue: events_freezable mmc_rescan [ 2.833341] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.834263] pc : blk_validate_limits+0x1cc/0x234 [ 2.834927] lr : blk_set_default_limits+0x18/0x24 [ 2.835654] sp : ffff800084c4f730 [ 2.836161] x29: ffff800084c4f730 x28: 0000000000000000 x27: 0000000000000000 [ 2.837448] x26: 0000000000000000 x25: ffff000084130008 x24: 00000000ffffffff [ 2.838607] x23: ffff800082d84000 x22: ffff8000828b79c8 x21: ffff800084c4f8b8 [ 2.839754] x20: 0000000000000008 x19: ffff000084170000 x18: 0000000000000000 [ 2.840831] x17: 00000000900163fe x16: 000000008802ed71 x15: 00000000000003e8 [ 2.842228] x14: 0000000000000002 x13: 00000000000372ef x12: 0000000000002bb0 [ 2.843536] x11: 0000000000000000 x10: 0000000000004400 x9 : 0000000000000000 [ 2.847832] x8 : ffff0000841703a0 x7 : 0000000000000000 x6 : 0000000000000003 [ 2.855075] x5 : ffff000081279e00 x4 : 0000000000000000 x3 : 0000000000000200 [ 2.862140] x2 : 000000000000ffff x1 : 000000000000fe00 x0 : ffff800084c4f8b8 [ 2.869633] Call trace: [ 2.872055] blk_validate_limits+0x1cc/0x234 [ 2.876278] blk_alloc_queue+0x7c/0x260 [ 2.880038] blk_mq_alloc_queue+0x54/0xbc [ 2.884504] __blk_mq_alloc_disk+0x20/0x190 [ 2.888266] mmc_alloc_disk+0xbc/0x250 [ 2.892062] mmc_init_queue+0xe8/0x114 [ 2.896091] mmc_blk_alloc_req+0xb4/0x374 [ 2.900237] mmc_blk_probe+0x1d4/0x650 [ 2.904194] mmc_bus_probe+0x20/0x2c [ 2.907434] really_probe+0xbc/0x2a4 [ 2.911391] __driver_probe_device+0x78/0x12c [ 2.915627] driver_probe_device+0x40/0x160 [ 2.919587] __device_attach_driver+0xb8/0x134 [ 2.923881] bus_for_each_drv+0x80/0xdc [ 2.927664] __device_attach+0xa8/0x1b0 [ 2.931578] device_initial_probe+0x14/0x20 [ 2.935845] bus_probe_device+0xa8/0xac [ 2.939606] device_add+0x590/0x754 [ 2.942864] mmc_add_card+0x238/0x2dc [ 2.946691] mmc_attach_mmc+0x12c/0x174 [ 2.950494] mmc_rescan+0x27c/0x318 [ 2.954172] process_one_work+0x154/0x298 [ 2.957919] worker_thread+0x304/0x408 [ 2.961931] kthread+0x118/0x11c [ 2.965187] ret_from_fork+0x10/0x20 [ 2.968835] ---[ end trace 0000000000000000 ]--- [ 2.974440] mmcblk mmc0:0001: probe with driver mmcblk failed with error -22 Bisect points to this commit. This is very similar to the issue Geert reported [0] but we are still seeing this on the latest mainline with v6.10-rc5. When building with 4kB MMU pages we don't see this issue when testing on the same hardware. Let me know if you have any thoughts? Thanks Jon [0] https://lore.kernel.org/linux-mmc/CAMuHMdWV4nWQHUpBKM2gHWeH9j9c0Di4bhg+F4-SAPEAmZuNSA@mail.gmail.com/
On Thu, Jun 27, 2024 at 10:43:24AM +0100, Jon Hunter wrote: > We have just noticed that since Linux v6.9 was released, that if we > build the kernel with 64kB MMU pages, then we see the following WARNING > and probe failure ... The old code upgraded the limits to the PAGE_SIZE for this case after issunig a warning. Your driver probably incorrectly advertised the lower max_segment_size. Try setting it to 64k. I would have sent you a patch for that, but I can't see what mmc host driver you are using.
On 27/06/2024 10:49, Christoph Hellwig wrote: > On Thu, Jun 27, 2024 at 10:43:24AM +0100, Jon Hunter wrote: >> We have just noticed that since Linux v6.9 was released, that if we >> build the kernel with 64kB MMU pages, then we see the following WARNING >> and probe failure ... > > The old code upgraded the limits to the PAGE_SIZE for this case after > issunig a warning. Your driver probably incorrectly advertised the > lower max_segment_size. Try setting it to 64k. I would have sent you > a patch for that, but I can't see what mmc host driver you are using. We are using the sdhci-tegra.c driver. I don't see it set in there, but I see references to max_seg_size in the main sdhci.c driver. Thanks, Jon
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 67ad186d132a69..2ae60d208cdf1e 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) return sg; } -static void mmc_queue_setup_discard(struct request_queue *q, - struct mmc_card *card) +static void mmc_queue_setup_discard(struct mmc_card *card, + struct queue_limits *lim) { unsigned max_discard; @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - blk_queue_max_discard_sectors(q, max_discard); - q->limits.discard_granularity = card->pref_erase << 9; - /* granularity must not be greater than max. discard */ - if (card->pref_erase > max_discard) - q->limits.discard_granularity = SECTOR_SIZE; + lim->max_hw_discard_sectors = max_discard; if (mmc_can_secure_erase_trim(card)) - blk_queue_max_secure_erase_sectors(q, max_discard); + lim->max_secure_erase_sectors = max_discard; if (mmc_can_trim(card) && card->erased_byte == 0) - blk_queue_max_write_zeroes_sectors(q, max_discard); + lim->max_write_zeroes_sectors = max_discard; + + /* granularity must not be greater than max. discard */ + if (card->pref_erase > max_discard) + lim->discard_granularity = SECTOR_SIZE; + else + lim->discard_granularity = card->pref_erase << 9; } static unsigned short mmc_get_max_segments(struct mmc_host *host) @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { .timeout = mmc_mq_timed_out, }; -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, + struct mmc_card *card) { struct mmc_host *host = card->host; - unsigned block_size = 512; + struct queue_limits lim = { }; + struct gendisk *disk; - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); if (mmc_can_erase(card)) - mmc_queue_setup_discard(mq->queue, card); + mmc_queue_setup_discard(card, &lim); if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(mq->queue, - min(host->max_blk_count, host->max_req_size / 512)); - if (host->can_dma_map_merge) - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, - mmc_dev(host)), - "merging was advertised but not possible"); - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); - - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { - block_size = card->ext_csd.data_sector_size; - WARN_ON(block_size != 512 && block_size != 4096); - } + lim.bounce = BLK_BOUNCE_HIGH; + + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); + + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) + lim.logical_block_size = card->ext_csd.data_sector_size; + else + lim.logical_block_size = 512; + + WARN_ON_ONCE(lim.logical_block_size != 512 && + lim.logical_block_size != 4096); - blk_queue_logical_block_size(mq->queue, block_size); /* - * After blk_queue_can_use_dma_map_merging() was called with succeed, - * since it calls blk_queue_virt_boundary(), the mmc should not call - * both blk_queue_max_segment_size(). + * Setting a virt_boundary implicity sets a max_segment_size, so try + * to set the hardware one here. */ - if (!host->can_dma_map_merge) - blk_queue_max_segment_size(mq->queue, - round_down(host->max_seg_size, block_size)); + if (host->can_dma_map_merge) { + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; + } else { + lim.max_segment_size = + round_down(host->max_seg_size, lim.logical_block_size); + lim.max_segments = host->max_segs; + } + + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); + if (IS_ERR(disk)) + return disk; + mq->queue = disk->queue; + + if (mmc_host_is_spi(host) && host->use_spi_crc) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); + blk_queue_rq_timeout(mq->queue, 60 * HZ); + + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); mmc_crypto_setup_queue(mq->queue, host); + return disk; } static inline bool mmc_merge_capable(struct mmc_host *host) @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) return ERR_PTR(ret); - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); - if (IS_ERR(disk)) { + disk = mmc_alloc_disk(mq, card); + if (IS_ERR(disk)) blk_mq_free_tag_set(&mq->tag_set); - return disk; - } - mq->queue = disk->queue; - - if (mmc_host_is_spi(host) && host->use_spi_crc) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); - blk_queue_rq_timeout(mq->queue, 60 * HZ); - - mmc_setup_queue(mq, card); return disk; }
Pass the queue limit set at initialization time directly to blk_mq_alloc_disk instead of updating it right after the allocation. This requires refactoring the code a bit so that what was mmc_setup_queue before also allocates the gendisk now and actually sets all limits. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/mmc/core/queue.c | 97 +++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 45 deletions(-)