[RFC,v7,07/12] blk-mq: Add support in hctx_tags_bitmap_show() for a shared sbitmap

Message ID 1591810159-240929-8-git-send-email-john.garry@huawei.com
State New
Headers show
Series
  • blk-mq/scsi: Provide hostwide shared tags for SCSI HBAs
Related show

Commit Message

John Garry June 10, 2020, 5:29 p.m.
Since a set-wide shared tag sbitmap may be used, it is no longer valid to
examine the per-hctx tagset for getting the active requests for a hctx
(when a shared sbitmap is used).

As such, add support for the shared sbitmap by using an intermediate
sbitmap per hctx, iterating all active tags for the specific hctx in the
shared sbitmap.

Originally-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Hannes Reinecke <hare@suse.de> #earlier version

Signed-off-by: John Garry <john.garry@huawei.com>

---
 block/blk-mq-debugfs.c | 62 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

-- 
2.26.2

Comments

Hannes Reinecke June 11, 2020, 1:19 p.m. | #1
On 6/10/20 7:29 PM, John Garry wrote:
> Since a set-wide shared tag sbitmap may be used, it is no longer valid to

> examine the per-hctx tagset for getting the active requests for a hctx

> (when a shared sbitmap is used).

> 

> As such, add support for the shared sbitmap by using an intermediate

> sbitmap per hctx, iterating all active tags for the specific hctx in the

> shared sbitmap.

> 

> Originally-by: Bart Van Assche <bvanassche@acm.org>

> Reviewed-by: Hannes Reinecke <hare@suse.de> #earlier version

> Signed-off-by: John Garry <john.garry@huawei.com>

> ---

>   block/blk-mq-debugfs.c | 62 ++++++++++++++++++++++++++++++++++++++++++

>   1 file changed, 62 insertions(+)

> 

> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c

> index 05b4be0c03d9..4da7e54adf3b 100644

> --- a/block/blk-mq-debugfs.c

> +++ b/block/blk-mq-debugfs.c

> @@ -495,6 +495,67 @@ static int hctx_tags_show(void *data, struct seq_file *m)

>   	return res;

>   }

>   

> +struct hctx_sb_data {

> +	struct sbitmap		*sb;	/* output bitmap */

> +	struct blk_mq_hw_ctx	*hctx;	/* input hctx */

> +};

> +

> +static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req,

> +			   void *priv, bool reserved)

> +{

> +	struct hctx_sb_data *hctx_sb_data = priv;

> +

> +	if (hctx == hctx_sb_data->hctx)

> +		sbitmap_set_bit(hctx_sb_data->sb, req->tag);

> +

> +	return true;

> +}

> +

> +static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx)

> +{

> +	struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx };

> +

> +	blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data);

> +}

> +

> +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct seq_file *m)

> +{

> +	struct blk_mq_hw_ctx *hctx = data;

> +	struct request_queue *q = hctx->queue;

> +	struct blk_mq_tag_set *set = q->tag_set;

> +	struct sbitmap shared_sb, *sb;

> +	int res;

> +

> +	if (!set)

> +		return 0;

> +

> +	/*

> +	 * We could use the allocated sbitmap for that hctx here, but

> +	 * that would mean that we would need to clean it prior to use.

> +	 */

> +	res = sbitmap_init_node(&shared_sb,

> +				set->__bitmap_tags.sb.depth,

> +				set->__bitmap_tags.sb.shift,

> +				GFP_KERNEL, NUMA_NO_NODE);

> +	if (res)

> +		return res;

> +	sb = &shared_sb;

> +

> +	res = mutex_lock_interruptible(&q->sysfs_lock);

> +	if (res)

> +		goto out;

> +	if (hctx->tags) {

> +		hctx_filter_sb(sb, hctx);

> +		sbitmap_bitmap_show(sb, m);

> +	}

> +

> +	mutex_unlock(&q->sysfs_lock);

> +

> +out:

> +	sbitmap_free(&shared_sb);

> +	return res;

> +}

> +

>   static int hctx_tags_bitmap_show(void *data, struct seq_file *m)

>   {

>   	struct blk_mq_hw_ctx *hctx = data;

> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs

>   	{"busy", 0400, hctx_busy_show},

>   	{"ctx_map", 0400, hctx_ctx_map_show},

>   	{"tags", 0400, hctx_tags_show},

> +	{"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},

>   	{"sched_tags", 0400, hctx_sched_tags_show},

>   	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},

>   	{"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},

> 

Ah, right. Here it is.

But I don't get it; why do we have to allocate a temporary bitmap and 
can't walk the existing shared sbitmap?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
John Garry June 11, 2020, 2:33 p.m. | #2
>> +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct 

>> seq_file *m)

>> +{

>> +    struct blk_mq_hw_ctx *hctx = data;

>> +    struct request_queue *q = hctx->queue;

>> +    struct blk_mq_tag_set *set = q->tag_set;

>> +    struct sbitmap shared_sb, *sb;

>> +    int res;

>> +

>> +    if (!set)

>> +        return 0;

>> +

>> +    /*

>> +     * We could use the allocated sbitmap for that hctx here, but

>> +     * that would mean that we would need to clean it prior to use.

>> +     */


*

>> +    res = sbitmap_init_node(&shared_sb,

>> +                set->__bitmap_tags.sb.depth,

>> +                set->__bitmap_tags.sb.shift,

>> +                GFP_KERNEL, NUMA_NO_NODE);

>> +    if (res)

>> +        return res;

>> +    sb = &shared_sb;

>> +

>> +    res = mutex_lock_interruptible(&q->sysfs_lock);

>> +    if (res)

>> +        goto out;

>> +    if (hctx->tags) {

>> +        hctx_filter_sb(sb, hctx);

>> +        sbitmap_bitmap_show(sb, m);

>> +    }

>> +

>> +    mutex_unlock(&q->sysfs_lock);

>> +

>> +out:

>> +    sbitmap_free(&shared_sb);

>> +    return res;

>> +}

>> +

>>   static int hctx_tags_bitmap_show(void *data, struct seq_file *m)

>>   {

>>       struct blk_mq_hw_ctx *hctx = data;

>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr 

>> blk_mq_debugfs_hctx_shared_sbitmap_attrs

>>       {"busy", 0400, hctx_busy_show},

>>       {"ctx_map", 0400, hctx_ctx_map_show},

>>       {"tags", 0400, hctx_tags_show},

>> +    {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},

>>       {"sched_tags", 0400, hctx_sched_tags_show},

>>       {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},

>>       {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},

>>

> Ah, right. Here it is.

> 

> But I don't get it; why do we have to allocate a temporary bitmap and 

> can't walk the existing shared sbitmap?


For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct 
sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. We 
could change sbitmap_bitmap_show() to accept a filter iterator - which I 
think you're getting at - but I am not sure it's worth the change. Or 
else use the allocated sbitmap for the hctx, as above*, but I may be 
remove that (see 4/12 response).

Cheers,
John
Hannes Reinecke June 12, 2020, 6:06 a.m. | #3
On 6/11/20 4:33 PM, John Garry wrote:
>>> +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct 

>>> seq_file *m)

>>> +{

>>> +    struct blk_mq_hw_ctx *hctx = data;

>>> +    struct request_queue *q = hctx->queue;

>>> +    struct blk_mq_tag_set *set = q->tag_set;

>>> +    struct sbitmap shared_sb, *sb;

>>> +    int res;

>>> +

>>> +    if (!set)

>>> +        return 0;

>>> +

>>> +    /*

>>> +     * We could use the allocated sbitmap for that hctx here, but

>>> +     * that would mean that we would need to clean it prior to use.

>>> +     */

> 

> *

> 

>>> +    res = sbitmap_init_node(&shared_sb,

>>> +                set->__bitmap_tags.sb.depth,

>>> +                set->__bitmap_tags.sb.shift,

>>> +                GFP_KERNEL, NUMA_NO_NODE);

>>> +    if (res)

>>> +        return res;

>>> +    sb = &shared_sb;

>>> +

>>> +    res = mutex_lock_interruptible(&q->sysfs_lock);

>>> +    if (res)

>>> +        goto out;

>>> +    if (hctx->tags) {

>>> +        hctx_filter_sb(sb, hctx);

>>> +        sbitmap_bitmap_show(sb, m);

>>> +    }

>>> +

>>> +    mutex_unlock(&q->sysfs_lock);

>>> +

>>> +out:

>>> +    sbitmap_free(&shared_sb);

>>> +    return res;

>>> +}

>>> +

>>>   static int hctx_tags_bitmap_show(void *data, struct seq_file *m)

>>>   {

>>>       struct blk_mq_hw_ctx *hctx = data;

>>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr 

>>> blk_mq_debugfs_hctx_shared_sbitmap_attrs

>>>       {"busy", 0400, hctx_busy_show},

>>>       {"ctx_map", 0400, hctx_ctx_map_show},

>>>       {"tags", 0400, hctx_tags_show},

>>> +    {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},

>>>       {"sched_tags", 0400, hctx_sched_tags_show},

>>>       {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},

>>>       {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},

>>>

>> Ah, right. Here it is.

>>

>> But I don't get it; why do we have to allocate a temporary bitmap and 

>> can't walk the existing shared sbitmap?

> 

> For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct 

> sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. We 

> could change sbitmap_bitmap_show() to accept a filter iterator - which I 

> think you're getting at - but I am not sure it's worth the change. Or 

> else use the allocated sbitmap for the hctx, as above*, but I may be 

> remove that (see 4/12 response).

> 

Yes, I do think I would prefer updating sbitmap_bitmap_show() to accept 
a filter. Especially as Ming Lei has now updated the tag iterators to 
accept a filter, too, so it should be an easy change.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
John Garry June 29, 2020, 3:32 p.m. | #4
Hi all,

I noticed that sbitmap_bitmap_show() only shows set bits and does not 
consider cleared bits. Is that the proper thing to do?

I ask, as from trying to support sbitmap_bitmap_show() for hostwide 
shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to 
find active requests (and associated tags/bits) for a particular hctx. 
So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), 
in that it would effectively show set and not cleared bits.

Any thoughts on this?

Thanks!
Hannes Reinecke June 30, 2020, 6:33 a.m. | #5
On 6/29/20 5:32 PM, John Garry wrote:
> Hi all,

> 

> I noticed that sbitmap_bitmap_show() only shows set bits and does not 

> consider cleared bits. Is that the proper thing to do?

> 

> I ask, as from trying to support sbitmap_bitmap_show() for hostwide 

> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to 

> find active requests (and associated tags/bits) for a particular hctx. 

> So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), 

> in that it would effectively show set and not cleared bits.

> 

Why would you need to do this?
Where would be the point traversing cleared bits?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@suse.de                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
John Garry June 30, 2020, 7:30 a.m. | #6
On 30/06/2020 07:33, Hannes Reinecke wrote:
> On 6/29/20 5:32 PM, John Garry wrote:

>> Hi all,

>>

>> I noticed that sbitmap_bitmap_show() only shows set bits and does not 

>> consider cleared bits. Is that the proper thing to do?

>>

>> I ask, as from trying to support sbitmap_bitmap_show() for hostwide 

>> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to 

>> find active requests (and associated tags/bits) for a particular hctx. 

>> So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(), 

>> in that it would effectively show set and not cleared bits.

>>

> Why would you need to do this?

> Where would be the point traversing cleared bits?


I'm not talking about traversing cleared bits specifically. I just think 
that today sbitmap_bitmap_show() only showing the bits in 
sbitmap_word.word may not be useful or even misleading, as in reality 
the "set" bits are sbitmap_word.word & ~sbitmap_word.cleared.

And for hostwide shared tags feature, iterating the busy rqs to find the 
per-hctx tags/bits would effectively give us the "set" bits, above, so 
there would be a difference.

Thanks,
John
John Garry June 30, 2020, 11:36 a.m. | #7
On 30/06/2020 08:30, John Garry wrote:
> On 30/06/2020 07:33, Hannes Reinecke wrote:

>> On 6/29/20 5:32 PM, John Garry wrote:

>>> Hi all,

>>>

>>> I noticed that sbitmap_bitmap_show() only shows set bits and does not 

>>> consider cleared bits. Is that the proper thing to do?

>>>

>>> I ask, as from trying to support sbitmap_bitmap_show() for hostwide 

>>> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to 

>>> find active requests (and associated tags/bits) for a particular 

>>> hctx. So, AFAICT, would give a change in behavior for 

>>> sbitmap_bitmap_show(), in that it would effectively show set and not 

>>> cleared bits.

>>>

>> Why would you need to do this?

>> Where would be the point traversing cleared bits?

> 

> I'm not talking about traversing cleared bits specifically. I just think 

> that today sbitmap_bitmap_show() only showing the bits in 

> sbitmap_word.word may not be useful or even misleading, as in reality 

> the "set" bits are sbitmap_word.word & ~sbitmap_word.cleared.

> 

> And for hostwide shared tags feature, iterating the busy rqs to find the 

> per-hctx tags/bits would effectively give us the "set" bits, above, so 

> there would be a difference.

> 


As an example, here's a sample tags_bitmap output:

00000000: 00f0 0fff 03c0 0000 0000 0000 efff fdff
00000010: 0000 c0f7 7fff ffff 0000 00e0 fef7 ffff
00000020: 0000 0000 f0ff ffff 0000 ffff 01d0 ffff
00000030: 0f80

And here's what we would have taking cleared bits into account:

00000000: 00f0 0fff 03c0 0000 0000 0000 0000 0000 (20 bits set)
00000010: 0000 0000 0000 0000 0000 0000 0000 0000
00000020: 0000 0000 0000 0000 0000 f8ff 0110 8000 (16 bits set)
00000030: 0f00					  (1 bit set)

Here's tags file output:

nr_tags=400
nr_reserved_tags=0
active_queues=2

bitmap_tags:
depth=400
busy=40
cleared=182
bits_per_word=64
map_nr=7
alloc_hint={22, 0, 0, 0, 103, 389, 231, 57, 377, 167, 0, 0, 69, 24, 44, 
50, 54,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
wake_batch=8
wake_index=0

[snip]

20+16+1=39 more closely matches with busy=40.

So it seems sensible to go this way for whether hostwide tags are used 
or not.

thanks,
John
Bart Van Assche June 30, 2020, 2:55 p.m. | #8
On 2020-06-29 08:32, John Garry wrote:
> I noticed that sbitmap_bitmap_show() only shows set bits and does not

> consider cleared bits. Is that the proper thing to do?

> 

> I ask, as from trying to support sbitmap_bitmap_show() for hostwide

> shared tags feature, we currently use blk_mq_queue_tag_busy_iter() to

> find active requests (and associated tags/bits) for a particular hctx.

> So, AFAICT, would give a change in behavior for sbitmap_bitmap_show(),

> in that it would effectively show set and not cleared bits.

> 

> Any thoughts on this?


Probably this is something that got overlooked when the cleared bits
were introduced? See e.g. 8c2def893afc ("sbitmap: fix
sbitmap_for_each_set()").

Bart.
John Garry July 13, 2020, 9:41 a.m. | #9
On 12/06/2020 07:06, Hannes Reinecke wrote:
>>>>

>>>> +out:

>>>> +    sbitmap_free(&shared_sb);

>>>> +    return res;

>>>> +}

>>>> +

>>>>   static int hctx_tags_bitmap_show(void *data, struct seq_file *m)

>>>>   {

>>>>       struct blk_mq_hw_ctx *hctx = data;

>>>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr 

>>>> blk_mq_debugfs_hctx_shared_sbitmap_attrs

>>>>       {"busy", 0400, hctx_busy_show},

>>>>       {"ctx_map", 0400, hctx_ctx_map_show},

>>>>       {"tags", 0400, hctx_tags_show},

>>>> +    {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},

>>>>       {"sched_tags", 0400, hctx_sched_tags_show},

>>>>       {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},

>>>>       {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},

>>>>

>>> Ah, right. Here it is.

>>>

>>> But I don't get it; why do we have to allocate a temporary bitmap and 

>>> can't walk the existing shared sbitmap?

>>


Just coming back to this now...

>> For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct 

>> sbitmap *. So we have to filter into a temp per-hctx struct sbitmap. 

>> We could change sbitmap_bitmap_show() to accept a filter iterator - 

>> which I think you're getting at - but I am not sure it's worth the 

>> change. Or else use the allocated sbitmap for the hctx, as above*, but 

>> I may be remove that (see 4/12 response).

>>

> Yes, I do think I would prefer updating sbitmap_bitmap_show() to accept 

> a filter. Especially as Ming Lei has now updated the tag iterators to 

> accept a filter, too, so it should be an easy change.


Can you please explain how you would use an iterator here?

In fact, I am half thinking of dropping this patch entirely.

So I feel that current behavior is a little strange, as some may expect 
/sys/kernel/debug/block/sdX/hctxY/tags_bitmap would only show tags for 
hctxY for sdX, while it is for hctxY for all queues. Same for 
/sys/kernel/debug/block/sdX/hctxY/tags

And then, for what we have in this patch:

static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx)
{
struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx };

blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data);
}

This will give tags only for this queue. So not the same. So I feel it's 
better to change current behavior (so consistent) or change neither. And 
changing current behavior would also mean we need to change what we show 
in /sys/kernel/debug/block/sdX/hctxY/tags, and that looks troublesome also.

Opinion?

Thanks,
John
Hannes Reinecke July 13, 2020, 12:20 p.m. | #10
On 7/13/20 11:41 AM, John Garry wrote:
> On 12/06/2020 07:06, Hannes Reinecke wrote:

>>>>>

>>>>> +out:

>>>>> +    sbitmap_free(&shared_sb);

>>>>> +    return res;

>>>>> +}

>>>>> +

>>>>>   static int hctx_tags_bitmap_show(void *data, struct seq_file *m)

>>>>>   {

>>>>>       struct blk_mq_hw_ctx *hctx = data;

>>>>> @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr

>>>>> blk_mq_debugfs_hctx_shared_sbitmap_attrs

>>>>>       {"busy", 0400, hctx_busy_show},

>>>>>       {"ctx_map", 0400, hctx_ctx_map_show},

>>>>>       {"tags", 0400, hctx_tags_show},

>>>>> +    {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},

>>>>>       {"sched_tags", 0400, hctx_sched_tags_show},

>>>>>       {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},

>>>>>       {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},

>>>>>

>>>> Ah, right. Here it is.

>>>>

>>>> But I don't get it; why do we have to allocate a temporary bitmap

>>>> and can't walk the existing shared sbitmap?

>>>

> 

> Just coming back to this now...

> 

>>> For the bitmap dump - sbitmap_bitmap_show() - it is passed a struct

>>> sbitmap *. So we have to filter into a temp per-hctx struct sbitmap.

>>> We could change sbitmap_bitmap_show() to accept a filter iterator -

>>> which I think you're getting at - but I am not sure it's worth the

>>> change. Or else use the allocated sbitmap for the hctx, as above*,

>>> but I may be remove that (see 4/12 response).

>>>

>> Yes, I do think I would prefer updating sbitmap_bitmap_show() to

>> accept a filter. Especially as Ming Lei has now updated the tag

>> iterators to accept a filter, too, so it should be an easy change.

> 

> Can you please explain how you would use an iterator here?

> 

> In fact, I am half thinking of dropping this patch entirely.

> 

> So I feel that current behavior is a little strange, as some may expect

> /sys/kernel/debug/block/sdX/hctxY/tags_bitmap would only show tags for

> hctxY for sdX, while it is for hctxY for all queues. Same for

> /sys/kernel/debug/block/sdX/hctxY/tags

> 

> And then, for what we have in this patch:

> 

> static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx)

> {

> struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx };

> 

> blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data);

> }

> 

> This will give tags only for this queue. So not the same. So I feel it's

> better to change current behavior (so consistent) or change neither. And

> changing current behavior would also mean we need to change what we show

> in /sys/kernel/debug/block/sdX/hctxY/tags, and that looks troublesome also.

> 

> Opinion?

> 

The whole notion of having sysfs presenting tags per hctx doesn't really
apply anymore when running with shared tags.

We could be sticking with the per-hctx attribute, but then busy tags
won't be displayed correctly as tags might be busy, but not on this hctx.
The alternative idea of projecting everything over to hctx0 (or just
duplicating the output from hctx0) would be technically correct, but
would be missing the per-hctx information.

Ideally we would have some sort of tri-state information here: busy,
busy on other hctx, not busy.
Then the per-hctx attribute would start making sense again.

Otherwise I would just leave it for now.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

Patch

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 05b4be0c03d9..4da7e54adf3b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -495,6 +495,67 @@  static int hctx_tags_show(void *data, struct seq_file *m)
 	return res;
 }
 
+struct hctx_sb_data {
+	struct sbitmap		*sb;	/* output bitmap */
+	struct blk_mq_hw_ctx	*hctx;	/* input hctx */
+};
+
+static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req,
+			   void *priv, bool reserved)
+{
+	struct hctx_sb_data *hctx_sb_data = priv;
+
+	if (hctx == hctx_sb_data->hctx)
+		sbitmap_set_bit(hctx_sb_data->sb, req->tag);
+
+	return true;
+}
+
+static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx)
+{
+	struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx };
+
+	blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data);
+}
+
+static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct request_queue *q = hctx->queue;
+	struct blk_mq_tag_set *set = q->tag_set;
+	struct sbitmap shared_sb, *sb;
+	int res;
+
+	if (!set)
+		return 0;
+
+	/*
+	 * We could use the allocated sbitmap for that hctx here, but
+	 * that would mean that we would need to clean it prior to use.
+	 */
+	res = sbitmap_init_node(&shared_sb,
+				set->__bitmap_tags.sb.depth,
+				set->__bitmap_tags.sb.shift,
+				GFP_KERNEL, NUMA_NO_NODE);
+	if (res)
+		return res;
+	sb = &shared_sb;
+
+	res = mutex_lock_interruptible(&q->sysfs_lock);
+	if (res)
+		goto out;
+	if (hctx->tags) {
+		hctx_filter_sb(sb, hctx);
+		sbitmap_bitmap_show(sb, m);
+	}
+
+	mutex_unlock(&q->sysfs_lock);
+
+out:
+	sbitmap_free(&shared_sb);
+	return res;
+}
+
 static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -823,6 +884,7 @@  static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs
 	{"busy", 0400, hctx_busy_show},
 	{"ctx_map", 0400, hctx_ctx_map_show},
 	{"tags", 0400, hctx_tags_show},
+	{"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show},
 	{"sched_tags", 0400, hctx_sched_tags_show},
 	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
 	{"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},