diff mbox series

[v2,07/18] block: introduce duration-limits priority class

Message ID 20230112140412.667308-8-niklas.cassel@wdc.com
State Superseded
Headers show
Series Add Command Duration Limits support | expand

Commit Message

Niklas Cassel Jan. 12, 2023, 2:03 p.m. UTC
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>

Introduce the IOPRIO_CLASS_DL priority class to indicate that IOs should
be executed using duration-limits targets. The duration target to apply
to a command is indicated using the priority level. Up to 8 levels are
supported, with level 0 indiating "no limit".

This priority class has effect only if the target device supports the
command duration limits feature and this feature is enabled by the user.
In BFQ and mq-deadline, all requests with this new priority class are
handled using the highest priority class RT and priority level 0.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
---
 block/bfq-iosched.c         | 10 ++++++++++
 block/blk-ioprio.c          |  3 +++
 block/ioprio.c              |  3 ++-
 block/mq-deadline.c         |  1 +
 include/linux/ioprio.h      |  2 +-
 include/uapi/linux/ioprio.h |  7 +++++++
 6 files changed, 24 insertions(+), 2 deletions(-)

Comments

Hannes Reinecke Jan. 13, 2023, 11:55 a.m. UTC | #1
On 1/12/23 15:03, Niklas Cassel wrote:
> From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> 
> Introduce the IOPRIO_CLASS_DL priority class to indicate that IOs should
> be executed using duration-limits targets. The duration target to apply
> to a command is indicated using the priority level. Up to 8 levels are
> supported, with level 0 indiating "no limit".
> 
> This priority class has effect only if the target device supports the
> command duration limits feature and this feature is enabled by the user.
> In BFQ and mq-deadline, all requests with this new priority class are
> handled using the highest priority class RT and priority level 0.
> 
> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
> ---
>   block/bfq-iosched.c         | 10 ++++++++++
>   block/blk-ioprio.c          |  3 +++
>   block/ioprio.c              |  3 ++-
>   block/mq-deadline.c         |  1 +
>   include/linux/ioprio.h      |  2 +-
>   include/uapi/linux/ioprio.h |  7 +++++++
>   6 files changed, 24 insertions(+), 2 deletions(-)
> 
I wonder.

_Normally_ a command timeout is only in force once the command is being 
handed off to the driver. As such it doesn't apply for any scheduling 
being done before that; most notably the I/O scheduler is not affected 
by any command timeout.

And I was under the impression that CDL really only allows the drive to 
impose a command timeout on its own.
(Pray correct me if I'm mistaken)

Hence: does CDL really impinge on the I/O scheduler? Or shouldn't we 
treat CDL just like a 'normal' command timeout, only to be activated 
when normal command timeout is enabled?

Cheers,

Hannes
Christoph Hellwig Jan. 17, 2023, 8:13 a.m. UTC | #2
On Tue, Jan 17, 2023 at 05:06:52PM +0900, Damien Le Moal wrote:
> They can, by using a large limit for "low priority" IOs. But then, these
> would still have a limit while any IO issued simultaneously without a CDL
> index specified would have no limit at all. So strictly speaking, the no
> limit IOs should be considered as even lower priority that CDL IOs with
> large limits.
> 
> The other aspect here is that on ATA drives, CDL and NCQ priority cannot
> be used together. A mix of CDL and high priority commands cannot be sent
> to a device. Combining this with the above thinking, it made sense to me
> to have the CDL priority class handled the same as the RT class (as that
> is the one that maps to ATA NCQ high prio commands).

Ok.  Maybe document this a bit better in the commit log.
diff mbox series

Patch

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 815b884d6c5a..7add9346c585 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5545,6 +5545,14 @@  bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 		bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
 		bfqq->new_ioprio = 7;
 		break;
+	case IOPRIO_CLASS_DL:
+		/*
+		 * For the duration-limits class, we want the disk to do the
+		 * scheduling. So map all levels to the highest RT level.
+		 */
+		bfqq->new_ioprio = 0;
+		bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
+		break;
 	}
 
 	if (bfqq->new_ioprio >= IOPRIO_NR_LEVELS) {
@@ -5673,6 +5681,8 @@  static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
 		return &bfqg->async_bfqq[1][ioprio][act_idx];
 	case IOPRIO_CLASS_IDLE:
 		return &bfqg->async_idle_bfqq[act_idx];
+	case IOPRIO_CLASS_DL:
+		return &bfqg->async_bfqq[0][0][act_idx];
 	default:
 		return NULL;
 	}
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8bb6b8eba4ce..dfb5c3f447f4 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -27,6 +27,7 @@ 
  * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
  *		IOPRIO_CLASS_BE.
  * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
+ * @POLICY_ALL_TO_DL: change the I/O priority class into IOPRIO_CLASS_DL.
  *
  * See also <linux/ioprio.h>.
  */
@@ -35,6 +36,7 @@  enum prio_policy {
 	POLICY_NONE_TO_RT	= 1,
 	POLICY_RESTRICT_TO_BE	= 2,
 	POLICY_ALL_TO_IDLE	= 3,
+	POLICY_ALL_TO_DL	= 4,
 };
 
 static const char *policy_name[] = {
@@ -42,6 +44,7 @@  static const char *policy_name[] = {
 	[POLICY_NONE_TO_RT]	= "none-to-rt",
 	[POLICY_RESTRICT_TO_BE]	= "restrict-to-be",
 	[POLICY_ALL_TO_IDLE]	= "idle",
+	[POLICY_ALL_TO_DL]	= "duration-limits",
 };
 
 static struct blkcg_policy ioprio_policy;
diff --git a/block/ioprio.c b/block/ioprio.c
index 32a456b45804..1b3a9da82597 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -37,6 +37,7 @@  int ioprio_check_cap(int ioprio)
 
 	switch (class) {
 		case IOPRIO_CLASS_RT:
+		case IOPRIO_CLASS_DL:
 			/*
 			 * Originally this only checked for CAP_SYS_ADMIN,
 			 * which was implicitly allowed for pid 0 by security
@@ -47,7 +48,7 @@  int ioprio_check_cap(int ioprio)
 			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
 				return -EPERM;
 			fallthrough;
-			/* rt has prio field too */
+			/* RT and DL have prio field too */
 		case IOPRIO_CLASS_BE:
 			if (data >= IOPRIO_NR_LEVELS || data < 0)
 				return -EINVAL;
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f10c2a0d18d4..526d0ea4dbf9 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -113,6 +113,7 @@  static const enum dd_prio ioprio_class_to_prio[] = {
 	[IOPRIO_CLASS_RT]	= DD_RT_PRIO,
 	[IOPRIO_CLASS_BE]	= DD_BE_PRIO,
 	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
+	[IOPRIO_CLASS_DL]	= DD_RT_PRIO,
 };
 
 static inline struct rb_root *
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 7578d4f6a969..2f3fc2fbd668 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -20,7 +20,7 @@  static inline bool ioprio_valid(unsigned short ioprio)
 {
 	unsigned short class = IOPRIO_PRIO_CLASS(ioprio);
 
-	return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_IDLE;
+	return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_DL;
 }
 
 /*
diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h
index f70f2596a6bf..15908b9e9d8c 100644
--- a/include/uapi/linux/ioprio.h
+++ b/include/uapi/linux/ioprio.h
@@ -29,6 +29,7 @@  enum {
 	IOPRIO_CLASS_RT,
 	IOPRIO_CLASS_BE,
 	IOPRIO_CLASS_IDLE,
+	IOPRIO_CLASS_DL,
 };
 
 /*
@@ -37,6 +38,12 @@  enum {
 #define IOPRIO_NR_LEVELS	8
 #define IOPRIO_BE_NR		IOPRIO_NR_LEVELS
 
+/*
+ * The Duration limits class allows 8 levels: level 0 for "no limit" and levels
+ * 1 to 7, each corresponding to a read or write limit descriptor.
+ */
+#define IOPRIO_DL_NR_LEVELS	8
+
 enum {
 	IOPRIO_WHO_PROCESS = 1,
 	IOPRIO_WHO_PGRP,