Skip to content

Commit

Permalink
Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull core block IO bits from Jens Axboe:
 "The most complicated part if this is the request allocation rework by
  Tejun, which has been queued up for a long time and has been in
  for-next ditto as well.

  There are a few commits from yesterday and today, mostly trivial and
  obvious fixes.  So I'm pretty confident that it is sound.  It's also
  smaller than usual."

* 'for-3.6/core' of git://git.kernel.dk/linux-block:
  block: remove dead func declaration
  block: add partition resize function to blkpg ioctl
  block: uninitialized ioc->nr_tasks triggers WARN_ON
  block: do not artificially constrain max_sectors for stacking drivers
  blkcg: implement per-blkg request allocation
  block: prepare for multiple request_lists
  block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv
  blkcg: inline bio_blkcg() and friends
  block: allocate io_context upfront
  block: refactor get_request[_wait]()
  block: drop custom queue draining used by scsi_transport_{iscsi|fc}
  mempool: add @gfp_mask to mempool_create_node()
  blkcg: make root blkcg allocation use %GFP_KERNEL
  blkcg: __blkg_lookup_create() doesn't need radix preload
torvalds committed Aug 1, 2012
2 parents fcff06c + 80799fb commit 8cf1a3f
Showing 21 changed files with 530 additions and 301 deletions.
7 changes: 7 additions & 0 deletions Documentation/block/queue-sysfs.txt
Original file line number Diff line number Diff line change
@@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice
this amount, since it applies only to reads or writes (not the accumulated
sum).

To avoid priority inversion through request starvation, a request
queue maintains a separate request pool per each cgroup when
CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
per-block-cgroup request pool. IOW, if there are N block cgroups,
each request queue may have upto N request pools, each independently
regulated by nr_requests.

read_ahead_kb (RW)
------------------
Maximum number of kilobytes to read-ahead for filesystems on this block
139 changes: 90 additions & 49 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);

static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];

struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
{
return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
struct blkcg, css);
}
EXPORT_SYMBOL_GPL(cgroup_to_blkcg);

static struct blkcg *task_blkcg(struct task_struct *tsk)
{
return container_of(task_subsys_state(tsk, blkio_subsys_id),
struct blkcg, css);
}

struct blkcg *bio_blkcg(struct bio *bio)
{
if (bio && bio->bi_css)
return container_of(bio->bi_css, struct blkcg, css);
return task_blkcg(current);
}
EXPORT_SYMBOL_GPL(bio_blkcg);

static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -84,23 +63,26 @@ static void blkg_free(struct blkcg_gq *blkg)
kfree(pd);
}

blk_exit_rl(&blkg->rl);
kfree(blkg);
}

/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
* @q: request_queue the new blkg is associated with
* @gfp_mask: allocation mask to use
*
* Allocate a new blkg assocating @blkcg and @q.
*/
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
gfp_t gfp_mask)
{
struct blkcg_gq *blkg;
int i;

/* alloc and init base part */
blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
if (!blkg)
return NULL;

@@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
blkg->blkcg = blkcg;
blkg->refcnt = 1;

/* root blkg uses @q->root_rl, init rl only for !root blkgs */
if (blkcg != &blkcg_root) {
if (blk_init_rl(&blkg->rl, q, gfp_mask))
goto err_free;
blkg->rl.blkg = blkg;
}

for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkg_policy_data *pd;
@@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
continue;

/* alloc per-policy data and attach it to blkg */
pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
if (!pd) {
blkg_free(blkg);
return NULL;
}
pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
if (!pd)
goto err_free;

blkg->pd[i] = pd;
pd->blkg = blkg;
@@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
}

return blkg;

err_free:
blkg_free(blkg);
return NULL;
}

static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
@@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blkg_lookup);

/*
* If @new_blkg is %NULL, this function tries to allocate a new one as
* necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
*/
static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q)
__releases(q->queue_lock) __acquires(q->queue_lock)
struct request_queue *q,
struct blkcg_gq *new_blkg)
{
struct blkcg_gq *blkg;
int ret;
@@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
blkg = __blkg_lookup(blkcg, q);
if (blkg) {
rcu_assign_pointer(blkcg->blkg_hint, blkg);
return blkg;
goto out_free;
}

/* blkg holds a reference to blkcg */
if (!css_tryget(&blkcg->css))
return ERR_PTR(-EINVAL);
if (!css_tryget(&blkcg->css)) {
blkg = ERR_PTR(-EINVAL);
goto out_free;
}

/* allocate */
ret = -ENOMEM;
blkg = blkg_alloc(blkcg, q);
if (unlikely(!blkg))
goto err_put;
if (!new_blkg) {
new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
if (unlikely(!new_blkg)) {
blkg = ERR_PTR(-ENOMEM);
goto out_put;
}
}
blkg = new_blkg;

/* insert */
ret = radix_tree_preload(GFP_ATOMIC);
if (ret)
goto err_free;

spin_lock(&blkcg->lock);
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
if (likely(!ret)) {
@@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
}
spin_unlock(&blkcg->lock);

radix_tree_preload_end();

if (!ret)
return blkg;
err_free:
blkg_free(blkg);
err_put:

blkg = ERR_PTR(ret);
out_put:
css_put(&blkcg->css);
return ERR_PTR(ret);
out_free:
blkg_free(new_blkg);
return blkg;
}

struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
@@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
*/
if (unlikely(blk_queue_bypass(q)))
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
return __blkg_lookup_create(blkcg, q);
return __blkg_lookup_create(blkcg, q, NULL);
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);

@@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
}
EXPORT_SYMBOL_GPL(__blkg_release);

/*
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
* because the root blkg uses @q->root_rl instead of its own rl.
*/
struct request_list *__blk_queue_next_rl(struct request_list *rl,
struct request_queue *q)
{
struct list_head *ent;
struct blkcg_gq *blkg;

/*
* Determine the current blkg list_head. The first entry is
* root_rl which is off @q->blkg_list and mapped to the head.
*/
if (rl == &q->root_rl) {
ent = &q->blkg_list;
} else {
blkg = container_of(rl, struct blkcg_gq, rl);
ent = &blkg->q_node;
}

/* walk to the next list_head, skip root blkcg */
ent = ent->next;
if (ent == &q->root_blkg->q_node)
ent = ent->next;
if (ent == &q->blkg_list)
return NULL;

blkg = container_of(ent, struct blkcg_gq, q_node);
return &blkg->rl;
}

static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
u64 val)
{
@@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
struct blkcg_gq *blkg;
struct blkg_policy_data *pd, *n;
int cnt = 0, ret;
bool preloaded;

if (blkcg_policy_enabled(q, pol))
return 0;

/* preallocations for root blkg */
blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!blkg)
return -ENOMEM;

preloaded = !radix_tree_preload(GFP_KERNEL);

blk_queue_bypass_start(q);

/* make sure the root blkg exists and count the existing blkgs */
spin_lock_irq(q->queue_lock);

rcu_read_lock();
blkg = __blkg_lookup_create(&blkcg_root, q);
blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
rcu_read_unlock();

if (preloaded)
radix_tree_preload_end();

if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto out_unlock;
}
q->root_blkg = blkg;
q->root_rl.blkg = blkg;

list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++;
Loading

0 comments on commit 8cf1a3f

Please sign in to comment.