Skip to content

Commit

Permalink
blk-mq: init hctx sched after update ctx and hctx mapping
Browse files Browse the repository at this point in the history
Currently, when update nr_hw_queues, IO scheduler's init_hctx will
be invoked before the mapping between ctx and hctx is adapted
correctly by blk_mq_map_swqueue. The IO scheduler init_hctx (kyber)
may depend on this mapping and get wrong result and panic finally.
A simply way to fix this is that switch the IO scheduler to 'none'
before update the nr_hw_queues, and then switch it back after
update nr_hw_queues. blk_mq_sched_init_/exit_hctx are removed due
to nobody use them any more.

Signed-off-by: Jianchao Wang <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Jianchao Wang authored and axboe committed Aug 21, 2018
1 parent fcedba4 commit d48ece2
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 65 deletions.
44 changes: 0 additions & 44 deletions block/blk-mq-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
blk_mq_sched_free_tags(set, hctx, i);
}

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct elevator_queue *e = q->elevator;
int ret;

if (!e)
return 0;

ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
if (ret)
return ret;

if (e->type->ops.mq.init_hctx) {
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
if (ret) {
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
return ret;
}
}

blk_mq_debugfs_register_sched_hctx(q, hctx);

return 0;
}

void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct elevator_queue *e = q->elevator;

if (!e)
return;

blk_mq_debugfs_unregister_sched_hctx(hctx);

if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
hctx->sched_data = NULL;
}

blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}

int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
Expand Down
5 changes: 0 additions & 5 deletions block/blk-mq-sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);

static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
Expand Down
92 changes: 84 additions & 8 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2147,8 +2147,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_request)
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);

blk_mq_sched_exit_hctx(q, hctx, hctx_idx);

if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);

Expand Down Expand Up @@ -2216,12 +2214,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap;

if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
goto exit_hctx;

hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
if (!hctx->fq)
goto sched_exit_hctx;
goto exit_hctx;

if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
goto free_fq;
Expand All @@ -2235,8 +2230,6 @@ static int blk_mq_init_hctx(struct request_queue *q,

free_fq:
kfree(hctx->fq);
sched_exit_hctx:
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
Expand Down Expand Up @@ -2898,10 +2891,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
return ret;
}

/*
* request_queue and elevator_type pair.
* It is just used by __blk_mq_update_nr_hw_queues to cache
* the elevator_type associated with a request_queue.
*/
struct blk_mq_qe_pair {
struct list_head node;
struct request_queue *q;
struct elevator_type *type;
};

/*
* Cache the elevator_type in qe pair list and switch the
* io scheduler to 'none'
*/
static bool blk_mq_elv_switch_none(struct list_head *head,
struct request_queue *q)
{
struct blk_mq_qe_pair *qe;

if (!q->elevator)
return true;

qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
if (!qe)
return false;

INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
list_add(&qe->node, head);

mutex_lock(&q->sysfs_lock);
/*
* After elevator_switch_mq, the previous elevator_queue will be
* released by elevator_release. The reference of the io scheduler
* module get by elevator_get will also be put. So we need to get
* a reference of the io scheduler module here to prevent it to be
* removed.
*/
__module_get(qe->type->elevator_owner);
elevator_switch_mq(q, NULL);
mutex_unlock(&q->sysfs_lock);

return true;
}

static void blk_mq_elv_switch_back(struct list_head *head,
struct request_queue *q)
{
struct blk_mq_qe_pair *qe;
struct elevator_type *t = NULL;

list_for_each_entry(qe, head, node)
if (qe->q == q) {
t = qe->type;
break;
}

if (!t)
return;

list_del(&qe->node);
kfree(qe);

mutex_lock(&q->sysfs_lock);
elevator_switch_mq(q, t);
mutex_unlock(&q->sysfs_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int nr_hw_queues)
{
struct request_queue *q;
LIST_HEAD(head);

lockdep_assert_held(&set->tag_list_lock);

Expand All @@ -2912,6 +2976,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
* updating the new sw to hw queue mappings.
*/
list_for_each_entry(q, &set->tag_list, tag_set_list)
if (!blk_mq_elv_switch_none(&head, q))
goto switch_back;

set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
Expand All @@ -2920,6 +2992,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_queue_reinit(q);
}

switch_back:
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_elv_switch_back(&head, q);

list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_unfreeze_queue(q);
}
Expand Down
2 changes: 2 additions & 0 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq

int elevator_init(struct request_queue *);
int elevator_init_mq(struct request_queue *q);
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e);
void elevator_exit(struct request_queue *, struct elevator_queue *);
int elv_register_queue(struct request_queue *q);
void elv_unregister_queue(struct request_queue *q);
Expand Down
20 changes: 12 additions & 8 deletions block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
}
EXPORT_SYMBOL_GPL(elv_unregister);

static int elevator_switch_mq(struct request_queue *q,
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e)
{
int ret;

lockdep_assert_held(&q->sysfs_lock);

blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);

if (q->elevator) {
if (q->elevator->registered)
elv_unregister_queue(q);
Expand All @@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
blk_add_trace_msg(q, "elv switch: none");

out:
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
return ret;
}

Expand Down Expand Up @@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)

lockdep_assert_held(&q->sysfs_lock);

if (q->mq_ops)
return elevator_switch_mq(q, new_e);
if (q->mq_ops) {
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);

err = elevator_switch_mq(q, new_e);

blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);

return err;
}

/*
* Turn on BYPASS and drain all requests w/ elevator private data.
Expand Down

0 comments on commit d48ece2

Please sign in to comment.