Skip to content

Commit

Permalink
blk-mq-sched: add framework for MQ capable IO schedulers
Browse files Browse the repository at this point in the history
This adds a set of hooks that intercepts the blk-mq path of
allocating/inserting/issuing/completing requests, allowing
us to develop a scheduler within that framework.

We reuse the existing elevator scheduler API on the registration
side, but augment that with the scheduler flagging support for
the blk-mq interfce, and with a separate set of ops hooks for MQ
devices.

We split driver and scheduler tags, so we can run the scheduling
independently of device queue depth.

Signed-off-by: Jens Axboe <[email protected]>
Reviewed-by: Bart Van Assche <[email protected]>
Reviewed-by: Omar Sandoval <[email protected]>
  • Loading branch information
axboe committed Jan 17, 2017
1 parent 2af8cbe commit bd166ef
Show file tree
Hide file tree
Showing 17 changed files with 984 additions and 194 deletions.
2 changes: 1 addition & 1 deletion block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/

Expand Down
24 changes: 20 additions & 4 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol))
return 0;

blk_queue_bypass_start(q);
if (q->mq_ops) {
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
} else
blk_queue_bypass_start(q);
pd_prealloc:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
Expand Down Expand Up @@ -1261,7 +1265,10 @@ int blkcg_activate_policy(struct request_queue *q,

spin_unlock_irq(q->queue_lock);
out_bypass_end:
blk_queue_bypass_end(q);
if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
Expand All @@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol))
return;

blk_queue_bypass_start(q);
if (q->mq_ops) {
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
} else
blk_queue_bypass_start(q);

spin_lock_irq(q->queue_lock);

__clear_bit(pol->plid, q->blkcg_pols);
Expand All @@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
}

spin_unlock_irq(q->queue_lock);
blk_queue_bypass_end(q);

if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);

Expand Down
4 changes: 3 additions & 1 deletion block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h"

EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
Expand Down Expand Up @@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->internal_tag = -1;
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
Expand Down Expand Up @@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_insert_request(rq, false, true, false);
blk_mq_sched_insert_request(rq, false, true, false);
return 0;
}

Expand Down
3 changes: 2 additions & 1 deletion block/blk-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/sched/sysctl.h>

#include "blk.h"
#include "blk-mq-sched.h"

/*
* for max sense size
Expand Down Expand Up @@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be reused after dying flag is set
*/
if (q->mq_ops) {
blk_mq_insert_request(rq, at_head, true, false);
blk_mq_sched_insert_request(rq, at_head, true, false);
return;
}

Expand Down
12 changes: 7 additions & 5 deletions block/blk-flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"

/* FLUSH/FUA sequences */
enum {
Expand Down Expand Up @@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
* the comment in flush_end_io().
*/
spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
blk_mq_run_hw_queue(hctx, true);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);

blk_mq_run_hw_queue(hctx, true);
}

/**
Expand Down Expand Up @@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
if (q->mq_ops) {
blk_mq_insert_request(rq, false, true, false);
} else
if (q->mq_ops)
blk_mq_sched_insert_request(rq, false, true, false);
else
list_add_tail(&rq->queuelist, &q->queue_head);
return;
}
Expand Down
8 changes: 6 additions & 2 deletions block/blk-ioc.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED)
return;

if (et->ops.sq.elevator_exit_icq_fn)
if (et->uses_mq && et->ops.mq.exit_icq)
et->ops.mq.exit_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
et->ops.sq.elevator_exit_icq_fn(icq);

icq->flags |= ICQ_EXITED;
Expand Down Expand Up @@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
hlist_add_head(&icq->ioc_node, &ioc->icq_list);
list_add(&icq->q_node, &q->icq_list);
if (et->ops.sq.elevator_init_icq_fn)
if (et->uses_mq && et->ops.mq.init_icq)
et->ops.mq.init_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
Expand Down
2 changes: 1 addition & 1 deletion block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{
struct elevator_queue *e = q->elevator;

if (e->type->ops.sq.elevator_allow_rq_merge_fn)
if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0;

Expand Down
Loading

0 comments on commit bd166ef

Please sign in to comment.