Skip to content

Commit

Permalink
blkcg: let blkcg core manage per-queue blkg list and counter
Browse files Browse the repository at this point in the history
With the previous patch to move blkg list heads and counters to
request_queue and blkg, logic to manage them in both policies are
almost identical and can be moved to blkcg core.

This patch moves blkg link logic into blkg_lookup_create(), implements
common blkg unlink code in blkg_destroy(), and updates
blkg_destory_all() so that it's policy specific and can skip root
group.  The updated blkg_destroy_all() is now used to both clear queue
for bypassing and elv switching, and release all blkgs on q exit.

This patch introduces a race window where policy [de]registration may
race against queue blkg clearing.  This can only be a problem on cfq
unload and shouldn't be a real problem in practice (and we have many
other places where this race already exists).  Future patches will
remove these unlikely races.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Vivek Goyal <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
htejun authored and axboe committed Mar 6, 2012
1 parent 4eef304 commit 03aa264
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 221 deletions.
72 changes: 56 additions & 16 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
/* insert */
spin_lock(&blkcg->lock);
swap(blkg, new_blkg);

hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
pol->ops.blkio_link_group_fn(q, blkg);
list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
q->nr_blkgs[plid]++;

spin_unlock(&blkcg->lock);
out:
blkg_free(new_blkg);
Expand Down Expand Up @@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
}
EXPORT_SYMBOL_GPL(blkg_lookup);

void blkg_destroy_all(struct request_queue *q)
static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
{
struct request_queue *q = blkg->q;

lockdep_assert_held(q->queue_lock);

/* Something wrong if we are trying to remove same group twice */
WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
list_del_init(&blkg->q_node[plid]);

WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
q->nr_blkgs[plid]--;

/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
blkg_put(blkg);
}

void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
bool destroy_root)
{
struct blkio_policy_type *pol;
struct blkio_group *blkg, *n;

while (true) {
bool done = true;

spin_lock(&blkio_list_lock);
spin_lock_irq(q->queue_lock);

/*
* clear_queue_fn() might return with non-empty group list
* if it raced cgroup removal and lost. cgroup removal is
* guaranteed to make forward progress and retrying after a
* while is enough. This ugliness is scheduled to be
* removed after locking update.
*/
list_for_each_entry(pol, &blkio_list, list)
if (!pol->ops.blkio_clear_queue_fn(q))
list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
q_node[plid]) {
/* skip root? */
if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
continue;

/*
* If cgroup removal path got to blk_group first
* and removed it from cgroup list, then it will
* take care of destroying cfqg also.
*/
if (!blkiocg_del_blkio_group(blkg))
blkg_destroy(blkg, plid);
else
done = false;
}

spin_unlock_irq(q->queue_lock);
spin_unlock(&blkio_list_lock);

/*
* Group list may not be empty if we raced cgroup removal
* and lost. cgroup removal is guaranteed to make forward
* progress and retrying after a while is enough. This
* ugliness is scheduled to be removed after locking
* update.
*/
if (done)
break;

msleep(10); /* just some random duration I like */
}
}
EXPORT_SYMBOL_GPL(blkg_destroy_all);

static void blkg_rcu_free(struct rcu_head *rcu_head)
{
Expand Down Expand Up @@ -1549,11 +1585,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
* this event.
*/
spin_lock(&blkio_list_lock);
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry(blkiop, &blkio_list, list) {
if (blkiop->plid != blkg->plid)
continue;
blkiop->ops.blkio_unlink_group_fn(q, blkg);
blkg_destroy(blkg, blkiop->plid);
}
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock(&blkio_list_lock);
} while (1);

Expand Down Expand Up @@ -1695,12 +1733,14 @@ static void blkcg_bypass_start(void)
__acquires(&all_q_mutex)
{
struct request_queue *q;
int i;

mutex_lock(&all_q_mutex);

list_for_each_entry(q, &all_q_list, all_q_node) {
blk_queue_bypass_start(q);
blkg_destroy_all(q);
for (i = 0; i < BLKIO_NR_POLICIES; i++)
blkg_destroy_all(q, i, false);
}
}

Expand Down
15 changes: 5 additions & 10 deletions block/blk-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,6 @@ struct blkio_group {
};

typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
typedef void (blkio_link_group_fn)(struct request_queue *q,
struct blkio_group *blkg);
typedef void (blkio_unlink_group_fn)(struct request_queue *q,
struct blkio_group *blkg);
typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
struct blkio_group *blkg, unsigned int weight);
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
Expand All @@ -214,9 +209,6 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,

struct blkio_policy_ops {
blkio_init_group_fn *blkio_init_group_fn;
blkio_link_group_fn *blkio_link_group_fn;
blkio_unlink_group_fn *blkio_unlink_group_fn;
blkio_clear_queue_fn *blkio_clear_queue_fn;
blkio_update_group_weight_fn *blkio_update_group_weight_fn;
blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
Expand All @@ -238,7 +230,8 @@ extern void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
extern void blkg_destroy_all(struct request_queue *q);
extern void blkg_destroy_all(struct request_queue *q,
enum blkio_policy_id plid, bool destroy_root);

/**
* blkg_to_pdata - get policy private data
Expand Down Expand Up @@ -319,7 +312,9 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
static inline void blkg_destroy_all(struct request_queue *q) { }
static inline void blkg_destroy_all(struct request_queue *q,
enum blkio_policy_id plid,
bool destory_root) { }

static inline void *blkg_to_pdata(struct blkio_group *blkg,
struct blkio_policy_type *pol) { return NULL; }
Expand Down
99 changes: 2 additions & 97 deletions block/blk-throttle.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
tg->iops[WRITE] = -1;
}

static void throtl_link_blkio_group(struct request_queue *q,
struct blkio_group *blkg)
{
list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
&q->blkg_list[BLKIO_POLICY_THROTL]);
q->nr_blkgs[BLKIO_POLICY_THROTL]++;
}

static struct
throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
{
Expand Down Expand Up @@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
}
}

static void
throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
{
struct blkio_group *blkg = tg_to_blkg(tg);

/* Something wrong if we are trying to remove same group twice */
WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));

list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);

/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
blkg_put(tg_to_blkg(tg));
td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
}

static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
{
struct request_queue *q = td->queue;
struct blkio_group *blkg, *n;
bool empty = true;

list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
q_node[BLKIO_POLICY_THROTL]) {
struct throtl_grp *tg = blkg_to_tg(blkg);

/* skip root? */
if (!release_root && tg == td->root_tg)
continue;

/*
* If cgroup removal path got to blk_group first and removed
* it from cgroup list, then it will take care of destroying
* cfqg also.
*/
if (!blkiocg_del_blkio_group(blkg))
throtl_destroy_tg(td, tg);
else
empty = false;
}
return empty;
}

/*
* Blk cgroup controller notification saying that blkio_group object is being
* delinked as associated cgroup object is going away. That also means that
* no new IO will come in this group. So get rid of this group as soon as
* any pending IO in the group is finished.
*
* This function is called under rcu_read_lock(). @q is the rcu protected
* pointer. That means @q is a valid request_queue pointer as long as we
* are rcu read lock.
*
* @q was fetched from blkio_group under blkio_cgroup->lock. That means
* it should not be NULL as even if queue was going away, cgroup deltion
* path got to it first.
*/
void throtl_unlink_blkio_group(struct request_queue *q,
struct blkio_group *blkg)
{
unsigned long flags;

spin_lock_irqsave(q->queue_lock, flags);
throtl_destroy_tg(q->td, blkg_to_tg(blkg));
spin_unlock_irqrestore(q->queue_lock, flags);
}

static bool throtl_clear_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);

/*
* Clear tgs but leave the root one alone. This is necessary
* because root_tg is expected to be persistent and safe because
* blk-throtl can never be disabled while @q is alive. This is a
* kludge to prepare for unified blkg. This whole function will be
* removed soon.
*/
return throtl_release_tgs(q->td, false);
}

static void throtl_update_blkio_group_common(struct throtl_data *td,
struct throtl_grp *tg)
{
Expand Down Expand Up @@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q)
static struct blkio_policy_type blkio_policy_throtl = {
.ops = {
.blkio_init_group_fn = throtl_init_blkio_group,
.blkio_link_group_fn = throtl_link_blkio_group,
.blkio_unlink_group_fn = throtl_unlink_blkio_group,
.blkio_clear_queue_fn = throtl_clear_queue,
.blkio_update_group_read_bps_fn =
throtl_update_blkio_group_read_bps,
.blkio_update_group_write_bps_fn =
Expand Down Expand Up @@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q)

throtl_shutdown_wq(q);

spin_lock_irq(q->queue_lock);
throtl_release_tgs(td, true);
blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);

/* If there are other groups */
spin_lock_irq(q->queue_lock);
wait = q->nr_blkgs[BLKIO_POLICY_THROTL];

spin_unlock_irq(q->queue_lock);

/*
Expand Down
Loading

0 comments on commit 03aa264

Please sign in to comment.