Skip to content

Commit

Permalink
blk-mq: fix tag_get wait task can't be awakened
Browse files Browse the repository at this point in the history
In case of shared tags, there might be more than one hctx which
allocates from the same tags, and each hctx is limited to allocate at
most:
        hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);

tag idle detection is lazy, and may be delayed for 30sec, so there
could be just one real active hctx(queue) but all others are actually
idle and still accounted as active because of the lazy idle detection.
Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
forever on this real active hctx.

Fix this by recalculating wake_batch when inc or dec active_queues.

Fixes: 0d2602c ("blk-mq: improve support for shared tags maps")
Suggested-by: Ming Lei <[email protected]>
Suggested-by: John Garry <[email protected]>
Signed-off-by: Laibin Qiu <[email protected]>
Reviewed-by: Andy Shevchenko <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Laibin Qiu authored and axboe committed Jan 13, 2022
1 parent fb3b067 commit 180dccb
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 10 deletions.
40 changes: 33 additions & 7 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"

/*
* Recalculate wakeup batch when tag is shared by hctx.
*/
static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
unsigned int users)
{
if (!users)
return;

sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
users);
sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
users);
}

/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
Expand All @@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
unsigned int users;

if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;

if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
!test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
atomic_inc(&hctx->tags->active_queues);
if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
return true;
}
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
atomic_inc(&hctx->tags->active_queues);
if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
return true;
}
}

users = atomic_inc_return(&hctx->tags->active_queues);

blk_mq_update_wake_batch(hctx->tags, users);

return true;
}

Expand All @@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
unsigned int users;

if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
Expand All @@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}

atomic_dec(&tags->active_queues);
users = atomic_dec_return(&tags->active_queues);

blk_mq_update_wake_batch(tags, users);

blk_mq_tag_wakeup_all(tags, false);
}
Expand Down
11 changes: 11 additions & 0 deletions include/linux/sbitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}

/**
* sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
* @sbq: Bitmap queue to recalculate wake batch.
* @users: Number of shares.
*
* Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
* by depth. This interface is for HCTX shared tags or queue shared tags.
*/
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
unsigned int users);

/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
Expand Down
25 changes: 22 additions & 3 deletions lib/sbitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);

static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int depth)
static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int wake_batch)
{
unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;

if (sbq->wake_batch != wake_batch) {
Expand All @@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}

static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int depth)
{
unsigned int wake_batch;

wake_batch = sbq_calc_wake_batch(sbq, depth);
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}

void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
unsigned int users)
{
unsigned int wake_batch;

wake_batch = clamp_val((sbq->sb.depth + users - 1) /
users, 4, SBQ_WAKE_BATCH);
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);

void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
Expand Down

0 comments on commit 180dccb

Please sign in to comment.