Skip to content

Commit

Permalink
blkcg: always create the blkcg_gq for the root blkcg
Browse files Browse the repository at this point in the history
Currently, blkcg does a minor optimization where the root blkcg is
created when the first blkcg policy is activated on a queue and
destroyed on the deactivation of the last.  On systems where blkcg is
configured but not used, this saves one blkcg_gq struct per queue.  On
systems where blkcg is actually used, there's no difference.  The only
case where this can lead to any meaninful, albeit still minute, save
in memory consumption is when all blkcg policies are deactivated after
being widely used in the system, which is a hihgly unlikely scenario.

The conditional existence of root blkcg_gq has already created several
bugs in blkcg and became an issue once again for the new per-cgroup
wb_congested mechanism for cgroup writeback support leading to a NULL
dereference when no blkcg policy is active.  This is really not worth
bothering with.  This patch makes blkcg always allocate and link the
root blkcg_gq and release it only on queue destruction.

Signed-off-by: Tejun Heo <[email protected]>
Reported-by: Fengguang Wu <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
htejun authored and axboe committed Jun 2, 2015
1 parent efa7d1c commit ec13b1d
Showing 1 changed file with 41 additions and 55 deletions.
96 changes: 41 additions & 55 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg->online = true;
spin_unlock(&blkcg->lock);

if (!ret) {
if (blkcg == &blkcg_root) {
q->root_blkg = blkg;
q->root_rl.blkg = blkg;
}
if (!ret)
return blkg;
}

/* @blkg failed fully initialized, use the usual release path */
blkg_put(blkg);
Expand Down Expand Up @@ -339,15 +334,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
rcu_assign_pointer(blkcg->blkg_hint, NULL);

/*
* If root blkg is destroyed. Just clear the pointer since root_rl
* does not take reference on root blkg.
*/
if (blkcg == &blkcg_root) {
blkg->q->root_blkg = NULL;
blkg->q->root_rl.blkg = NULL;
}

/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
Expand Down Expand Up @@ -855,9 +841,45 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
*/
int blkcg_init_queue(struct request_queue *q)
{
might_sleep();
struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
int ret;

new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;

preloaded = !radix_tree_preload(GFP_KERNEL);

return blk_throtl_init(q);
/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code insertion.
*/
rcu_read_lock();
spin_lock_irq(q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();

if (preloaded)
radix_tree_preload_end();

if (IS_ERR(blkg)) {
kfree(new_blkg);
return PTR_ERR(blkg);
}

q->root_blkg = blkg;
q->root_rl.blkg = blkg;

ret = blk_throtl_init(q);
if (ret) {
spin_lock_irq(q->queue_lock);
blkg_destroy_all(q);
spin_unlock_irq(q->queue_lock);
}
return ret;
}

/**
Expand Down Expand Up @@ -958,52 +980,20 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
LIST_HEAD(pds);
struct blkcg_gq *blkg, *new_blkg;
struct blkcg_gq *blkg;
struct blkg_policy_data *pd, *n;
int cnt = 0, ret;
bool preloaded;

if (blkcg_policy_enabled(q, pol))
return 0;

/* preallocations for root blkg */
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;

/* count and allocate policy_data for all existing blkgs */
blk_queue_bypass_start(q);

preloaded = !radix_tree_preload(GFP_KERNEL);

/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code it.
*/
spin_lock_irq(q->queue_lock);

rcu_read_lock();
blkg = __blkg_lookup(&blkcg_root, q, false);
if (blkg)
blkg_free(new_blkg);
else
blkg = blkg_create(&blkcg_root, q, new_blkg);
rcu_read_unlock();

if (preloaded)
radix_tree_preload_end();

if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto out_unlock;
}

list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++;

spin_unlock_irq(q->queue_lock);

/* allocate policy_data for all existing blkgs */
while (cnt--) {
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
if (!pd) {
Expand Down Expand Up @@ -1072,10 +1062,6 @@ void blkcg_deactivate_policy(struct request_queue *q,

__clear_bit(pol->plid, q->blkcg_pols);

/* if no policy is left, no need for blkgs - shoot them down */
if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
blkg_destroy_all(q);

list_for_each_entry(blkg, &q->blkg_list, q_node) {
/* grab blkcg lock too while removing @pd from @blkg */
spin_lock(&blkg->blkcg->lock);
Expand Down

0 comments on commit ec13b1d

Please sign in to comment.