diff mbox series

[V2,05/13] block: only account passthrough IO from userspace

Message ID 20220122111054.1126146-6-ming.lei@redhat.com
State New
Headers show
Series block: don't drain file system I/O on del_gendisk | expand

Commit Message

Ming Lei Jan. 22, 2022, 11:10 a.m. UTC
Passthrough request from userspace has one active block_device/disk
associated, so they can be accounted via rq->q->disk. For other
passthrough request, there may not be disk/block_device for the queue,
since either the queue has not a disk or the disk may be deleted
already.

Add flag of BLK_MQ_REQ_USER_IO for only accounting passthrough request
from userspace.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c            | 13 ++++++++++++-
 drivers/nvme/host/ioctl.c |  2 +-
 drivers/scsi/scsi_ioctl.c |  3 ++-
 include/linux/blk-mq.h    |  2 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Jan. 24, 2022, 1:05 p.m. UTC | #1
On Sat, Jan 22, 2022 at 07:10:46PM +0800, Ming Lei wrote:
> Passthrough request from userspace has one active block_device/disk
> associated, so they can be accounted via rq->q->disk. For other
> passthrough request, there may not be disk/block_device for the queue,
> since either the queue has not a disk or the disk may be deleted
> already.
> 
> Add flag of BLK_MQ_REQ_USER_IO for only accounting passthrough request
> from userspace.

Please explain why you want to change this.

Also this is missing I/O from /dev/sg, CDROM CDDA BPC reading, the tape
drivers and bsg-lib.
Ming Lei Jan. 24, 2022, 11:09 p.m. UTC | #2
On Mon, Jan 24, 2022 at 02:05:55PM +0100, Christoph Hellwig wrote:
> On Sat, Jan 22, 2022 at 07:10:46PM +0800, Ming Lei wrote:
> > Passthrough request from userspace has one active block_device/disk
> > associated, so they can be accounted via rq->q->disk. For other
> > passthrough request, there may not be disk/block_device for the queue,
> > since either the queue has not a disk or the disk may be deleted
> > already.
> > 
> > Add flag of BLK_MQ_REQ_USER_IO for only accounting passthrough request
> > from userspace.
> 
> Please explain why you want to change this.

Please see the following code:

        /* passthrough requests can hold bios that do not have ->bi_bdev set */
        if (rq->bio && rq->bio->bi_bdev)
                rq->part = rq->bio->bi_bdev;
        else if (rq->q->disk)
                rq->part = rq->q->disk->part0;

q->disk can be cleared by disk_release() just when referring the above line, then
NULL ptr reference is caused, and similar issue with any reference to rq->part for
passthrough request sent not from userspace.

> 
> Also this is missing I/O from /dev/sg, CDROM CDDA BPC reading, the tape
> drivers and bsg-lib.

Except for CDROM CDDA BPC reading, the others don't have gendisk associated,
so they needn't such change. And it looks easy to do that for CDROM CDDA BPC
reading.


Thanks,
Ming
Christoph Hellwig Jan. 25, 2022, 6:16 a.m. UTC | #3
On Tue, Jan 25, 2022 at 07:09:09AM +0800, Ming Lei wrote:
> > Please explain why you want to change this.
> 
> Please see the following code:

This needs to go into the commit log.

> 
>         /* passthrough requests can hold bios that do not have ->bi_bdev set */
>         if (rq->bio && rq->bio->bi_bdev)
>                 rq->part = rq->bio->bi_bdev;
>         else if (rq->q->disk)
>                 rq->part = rq->q->disk->part0;
> 
> q->disk can be cleared by disk_release() just when referring the above line, then
> NULL ptr reference is caused, and similar issue with any reference to rq->part for
> passthrough request sent not from userspace.

So why not key off accouning off "rq->bio && rq->bio->bi_bdev"
and remove the need for the flag and the second half of the assignment
above?  That is much less error probe and removes code size.
Ming Lei Jan. 25, 2022, 9:09 a.m. UTC | #4
On Tue, Jan 25, 2022 at 08:19:06AM +0100, Christoph Hellwig wrote:
> On Tue, Jan 25, 2022 at 07:16:34AM +0100, Christoph Hellwig wrote:
> > So why not key off accouning off "rq->bio && rq->bio->bi_bdev"
> > and remove the need for the flag and the second half of the assignment
> > above?  That is much less error probe and removes code size.
> 
> Something like this, lightly tested:
> 

Follows another simple way by accounting all request with bio attached,
except for requests with kernel buffer.


diff --git a/block/blk-map.c b/block/blk-map.c
index 4526adde0156..1210b51c62ae 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -630,6 +630,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	struct bio *bio;
 	int ret;
 
+	rq->rq_flags &= ~RQF_IO_STAT;
+
 	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len || !kbuf)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 72ae9955cf27..eac589d2c340 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -903,7 +903,7 @@ static void __blk_account_io_start(struct request *rq)
 	/* passthrough requests can hold bios that do not have ->bi_bdev set */
 	if (rq->bio && rq->bio->bi_bdev)
 		rq->part = rq->bio->bi_bdev;
-	else if (rq->q->disk)
+	else if (rq->q->disk && rq->bio)
 		rq->part = rq->q->disk->part0;
 
 	part_stat_lock();


Thanks,
Ming
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a6d4780580fc..0d25cc5778c9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -336,6 +336,17 @@  void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static inline bool blk_mq_io_may_account(struct blk_mq_alloc_data *data)
+{
+	if (!blk_op_is_passthrough(data->cmd_flags))
+		return true;
+
+	if (data->flags & BLK_MQ_REQ_USER_IO)
+		return true;
+
+	return false;
+}
+
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns)
 {
@@ -351,7 +362,7 @@  static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 
 	if (data->flags & BLK_MQ_REQ_PM)
 		data->rq_flags |= RQF_PM;
-	if (blk_queue_io_stat(q))
+	if (blk_queue_io_stat(q) && blk_mq_io_may_account(data))
 		data->rq_flags |= RQF_IO_STAT;
 	rq->rq_flags = data->rq_flags;
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 22314962842d..f94afc38a6e3 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -66,7 +66,7 @@  static int nvme_submit_user_cmd(struct request_queue *q,
 	void *meta = NULL;
 	int ret;
 
-	req = nvme_alloc_request(q, cmd, 0);
+	req = nvme_alloc_request(q, cmd, BLK_MQ_REQ_USER_IO);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index e13fd380deb6..b262fe06dacc 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -440,7 +440,8 @@  static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode)
 
 	ret = -ENOMEM;
 	rq = scsi_alloc_request(sdev->request_queue, writing ?
-			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
+			     BLK_MQ_REQ_USER_IO);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = scsi_req(rq);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d319ffa59354..d2ad2ed11723 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -709,6 +709,8 @@  enum {
 	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
 	/* set RQF_PM */
 	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
+	/* user IO request */
+	BLK_MQ_REQ_USER_IO	= (__force blk_mq_req_flags_t)(1 << 3),
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,