Skip to content

Commit

Permalink
block: initial patch for on-stack per-task plugging
Browse files Browse the repository at this point in the history
This patch adds support for creating a queuing context outside
of the queue itself. This enables us to batch up pieces of IO
before grabbing the block device queue lock and submitting them to
the IO scheduler.

The context is created on the stack of the process and assigned in
the task structure, so that we can auto-unplug it if we hit a schedule
event.

The current queue plugging happens implicitly if IO is submitted to
an empty device, yet callers have to remember to unplug that IO when
they are going to wait for it. This is an ugly API and has caused bugs
in the past. Additionally, it requires hacks in the vm (->sync_page()
callback) to handle that logic. By switching to an explicit plugging
scheme we make the API a lot nicer and can get rid of the ->sync_page()
hack in the vm.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Jens Axboe committed Mar 10, 2011
1 parent a488e74 commit 73c1010
Show file tree
Hide file tree
Showing 10 changed files with 344 additions and 101 deletions.
369 changes: 271 additions & 98 deletions block/blk-core.c

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions block/blk-flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q)
static void flush_data_end_io(struct request *rq, int error)
{
struct request_queue *q = rq->q;
bool was_empty = elv_queue_empty(q);

/* after populating an empty queue, kick it to avoid stall */
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
__blk_run_queue(q);
}

Expand Down
6 changes: 5 additions & 1 deletion block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_rq_merge_ok);

static inline int elv_try_merge(struct request *__rq, struct bio *bio)
int elv_try_merge(struct request *__rq, struct bio *bio)
{
int ret = ELEVATOR_NO_MERGE;

Expand Down Expand Up @@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
struct list_head *entry;
int stop_flags;

BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

if (q->last_merge == rq)
q->last_merge = NULL;

Expand Down Expand Up @@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);

if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
Expand Down
2 changes: 2 additions & 0 deletions include/linux/blk_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ enum rq_flag_bits {
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_ON_PLUG, /* on plug list */
__REQ_NR_BITS, /* stops here */
};

Expand Down Expand Up @@ -193,5 +194,6 @@ enum rq_flag_bits {
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)

#endif /* __LINUX_BLK_TYPES_H */
42 changes: 42 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);

struct blk_plug {
unsigned long magic;
struct list_head list;
unsigned int should_sort;
};

extern void blk_start_plug(struct blk_plug *);
extern void blk_finish_plug(struct blk_plug *);
extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);

static inline void blk_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;

if (unlikely(plug))
__blk_flush_plug(tsk, plug);
}

static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;

return plug && !list_empty(&plug->list);
}

/*
* tag stuff
*/
Expand Down Expand Up @@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void)
return 0;
}

static inline void blk_start_plug(struct list_head *list)
{
}

static inline void blk_finish_plug(struct list_head *list)
{
}

static inline void blk_flush_plug(struct task_struct *tsk)
{
}

static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
return false;
}

#endif /* CONFIG_BLOCK */

#endif
1 change: 1 addition & 0 deletions include/linux/elevator.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int);
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
extern void elv_insert(struct request_queue *, struct request *, int);
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
extern int elv_try_merge(struct request *, struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *, int);
Expand Down
6 changes: 6 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ struct robust_list_head;
struct bio_list;
struct fs_struct;
struct perf_event_context;
struct blk_plug;

/*
* List of flags we want to share for kernel threads,
Expand Down Expand Up @@ -1429,6 +1430,11 @@ struct task_struct {
/* stacked block device info */
struct bio_list *bio_list;

#ifdef CONFIG_BLOCK
/* stack plugging */
struct blk_plug *plug;
#endif

/* VM state */
struct reclaim_state *reclaim_state;

Expand Down
1 change: 1 addition & 0 deletions kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);

WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));

if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
Expand Down
3 changes: 3 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
#ifdef CONFIG_FUTEX
p->robust_list = NULL;
#ifdef CONFIG_COMPAT
Expand Down
12 changes: 12 additions & 0 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -3978,6 +3978,16 @@ asmlinkage void __sched schedule(void)
switch_count = &prev->nvcsw;
}

/*
* If we are going to sleep and we have plugged IO queued, make
* sure to submit it to avoid deadlocks.
*/
if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
raw_spin_unlock(&rq->lock);
blk_flush_plug(prev);
raw_spin_lock(&rq->lock);
}

pre_schedule(rq, prev);

if (unlikely(!rq->nr_running))
Expand Down Expand Up @@ -5333,6 +5343,7 @@ void __sched io_schedule(void)

delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
blk_flush_plug(current);
current->in_iowait = 1;
schedule();
current->in_iowait = 0;
Expand All @@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)

delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
blk_flush_plug(current);
current->in_iowait = 1;
ret = schedule_timeout(timeout);
current->in_iowait = 0;
Expand Down

0 comments on commit 73c1010

Please sign in to comment.