Skip to content

Commit

Permalink
block: prepare I/O context code for BFQ
Browse files Browse the repository at this point in the history
BFQ uses struct cfq_io_context to store its per-process per-device data,
reusing the same code for cic handling of CFQ.  The code is not shared
ATM to minimize the impact of these patches.

This patch introduces a new hlist to each io_context to store all the
cic's allocated by BFQ to allow calling the right destructor on module
unload; the radix tree used for cic lookup needs to be duplicated
because it can contain dead keys inserted by a scheduler and later
retrieved by the other one.

Update the io_context exit and free paths to take care also of
the BFQ cic's.

Change the type of cfqq inside struct cfq_io_context to void *
to use it also for BFQ per-queue data.

A new bfq-specific ioprio_changed field is necessary, too, to avoid
clobbering cfq's one, so switch ioprio_changed to a bitmap, with one
element per scheduler.

Signed-off-by: Fabio Checconi <[email protected]>
Signed-off-by: Paolo Valente <[email protected]>
Signed-off-by: Francesco Allertsen <[email protected]>
  • Loading branch information
paolo-github authored and Rhcp committed Feb 3, 2012
1 parent 885325c commit 4bd412a
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 26 deletions.
40 changes: 34 additions & 6 deletions block/Kconfig.iosched
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,35 @@ config CFQ_GROUP_IOSCHED
Enable group IO scheduling in CFQ.

config IOSCHED_VR
tristate "V(R) I/O scheduler"
default y
---help---
Requests are chosen according to SSTF with a penalty of rev_penalty
for switching head direction.
tristate "V(R) I/O scheduler"
default y
---help---
Requests are chosen according to SSTF with a penalty of rev_penalty
for switching head direction.


config IOSCHED_BFQ
tristate "BFQ I/O scheduler"
depends on EXPERIMENTAL
default n
---help---
The BFQ I/O scheduler tries to distribute bandwidth among
all processes according to their weights.
It aims at distributing the bandwidth as desired, independently of
the disk parameters and with any workload. It also tries to
guarantee low latency to interactive and soft real-time
applications. If compiled built-in (saying Y here), BFQ can
be configured to support hierarchical scheduling.

config CGROUP_BFQIO
bool "BFQ hierarchical scheduling support"
depends on CGROUPS && IOSCHED_BFQ=y
default n
---help---
Enable hierarchical scheduling in BFQ, using the cgroups
filesystem interface. The name of the subsystem will be
bfqio.

choice
prompt "Default I/O scheduler"
default DEFAULT_CFQ
Expand All @@ -62,7 +86,10 @@ choice
config DEFAULT_CFQ
bool "CFQ" if IOSCHED_CFQ=y

config DEFAULT_VR
config DEFAULT_BFQ
bool "BFQ" if IOSCHED_BFQ=y

config DEFAULT_VR
bool "VR" if IOSCHED_VR=y

config DEFAULT_NOOP
Expand All @@ -74,6 +101,7 @@ config DEFAULT_IOSCHED
string
default "deadline" if DEFAULT_DEADLINE
default "cfq" if DEFAULT_CFQ
default "bfq" if DEFAULT_BFQ
default "vr" if DEFAULT_VR
default "noop" if DEFAULT_NOOP

Expand Down
27 changes: 15 additions & 12 deletions block/blk-ioc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/bitmap.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h>
Expand All @@ -16,13 +17,12 @@
*/
static struct kmem_cache *iocontext_cachep;

static void cfq_dtor(struct io_context *ioc)
static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list)
{
if (!hlist_empty(&ioc->cic_list)) {
if (!hlist_empty(list)) {
struct cfq_io_context *cic;

cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
cic_list);
cic = list_entry(list->first, struct cfq_io_context, cic_list);
cic->dtor(ioc);
}
}
Expand All @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc)

if (atomic_long_dec_and_test(&ioc->refcount)) {
rcu_read_lock();
cfq_dtor(ioc);

hlist_sched_dtor(ioc, &ioc->cic_list);
hlist_sched_dtor(ioc, &ioc->bfq_cic_list);
rcu_read_unlock();

kmem_cache_free(iocontext_cachep, ioc);
Expand All @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc)
}
EXPORT_SYMBOL(put_io_context);

static void cfq_exit(struct io_context *ioc)
static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list)
{
rcu_read_lock();

if (!hlist_empty(&ioc->cic_list)) {
if (!hlist_empty(list)) {
struct cfq_io_context *cic;

cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
cic_list);
cic = list_entry(list->first, struct cfq_io_context, cic_list);
cic->exit(ioc);
}
rcu_read_unlock();
Expand All @@ -75,8 +76,8 @@ void exit_io_context(struct task_struct *task)
task_unlock(task);

if (atomic_dec_and_test(&ioc->nr_tasks)) {
cfq_exit(ioc);

hlist_sched_exit(ioc, &ioc->cic_list);
hlist_sched_exit(ioc, &ioc->bfq_cic_list);
}
put_io_context(ioc);
}
Expand All @@ -90,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
atomic_long_set(&ret->refcount, 1);
atomic_set(&ret->nr_tasks, 1);
spin_lock_init(&ret->lock);
ret->ioprio_changed = 0;
bitmap_zero(ret->ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
ret->ioprio = 0;
ret->last_waited = 0; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->cic_list);
INIT_RADIX_TREE(&ret->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->bfq_cic_list);
ret->ioc_data = NULL;
}

Expand Down
10 changes: 7 additions & 3 deletions block/cfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -2755,7 +2755,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
static void cfq_ioc_set_ioprio(struct io_context *ioc)
{
call_for_each_cic(ioc, changed_ioprio);
ioc->ioprio_changed = 0;
}

static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Expand Down Expand Up @@ -3039,8 +3038,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
goto err_free;

out:
smp_read_barrier_depends();
if (unlikely(ioc->ioprio_changed))
/*
* test_and_clear_bit() implies a memory barrier, paired with
* the wmb() in fs/ioprio.c, so the value seen for ioprio is the
* new one.
*/
if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED,
ioc->ioprio_changed)))
cfq_ioc_set_ioprio(ioc);

#ifdef CONFIG_CFQ_GROUP_IOSCHED
Expand Down
9 changes: 7 additions & 2 deletions fs/ioprio.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

int set_task_ioprio(struct task_struct *task, int ioprio)
{
int err;
int err, i;
struct io_context *ioc;
const struct cred *cred = current_cred(), *tcred;

Expand Down Expand Up @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
err = -ENOMEM;
break;
}
/* let other ioc users see the new values */
smp_wmb();
task->io_context = ioc;
} while (1);

if (!err) {
ioc->ioprio = ioprio;
ioc->ioprio_changed = 1;
/* make sure schedulers see the new ioprio value */
wmb();
for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++)
set_bit(i, ioc->ioprio_changed);
}

task_unlock(task);
Expand Down
18 changes: 15 additions & 3 deletions include/linux/iocontext.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#ifndef IOCONTEXT_H
#define IOCONTEXT_H

#include <linux/bitmap.h>
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>

struct cfq_queue;
struct cfq_io_context {
void *key;

struct cfq_queue *cfqq[2];
void *cfqq[2];

struct io_context *ioc;

Expand All @@ -27,6 +27,16 @@ struct cfq_io_context {
struct rcu_head rcu_head;
};

/*
* Indexes into the ioprio_changed bitmap. A bit set indicates that
* the corresponding I/O scheduler needs to see a ioprio update.
*/
enum {
IOC_CFQ_IOPRIO_CHANGED,
IOC_BFQ_IOPRIO_CHANGED,
IOC_IOPRIO_CHANGED_BITS
};

/*
* I/O subsystem state of the associated processes. It is refcounted
* and kmalloc'ed. These could be shared between processes.
Expand All @@ -39,7 +49,7 @@ struct io_context {
spinlock_t lock;

unsigned short ioprio;
unsigned short ioprio_changed;
DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS);

#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
unsigned short cgroup_changed;
Expand All @@ -53,6 +63,8 @@ struct io_context {

struct radix_tree_root radix_root;
struct hlist_head cic_list;
struct radix_tree_root bfq_radix_root;
struct hlist_head bfq_cic_list;
void *ioc_data;
};

Expand Down

0 comments on commit 4bd412a

Please sign in to comment.