blkcg: let blkcg core manage per-queue blkg list and counter

With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo <[email protected]> Cc: Vivek Goyal <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
cbmeeks · Mar 6, 2012 · 03aa264 · 03aa264
1 parent 4eef304
commit 03aa264
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 221 deletions.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
@@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	/* insert */
 	spin_lock(&blkcg->lock);
 	swap(blkg, new_blkg);
+
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	pol->ops.blkio_link_group_fn(q, blkg);
+	list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+	q->nr_blkgs[plid]++;
+
 	spin_unlock(&blkcg->lock);
 out:
 	blkg_free(new_blkg);
@@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+{
+	struct request_queue *q = blkg->q;
+
+	lockdep_assert_held(q->queue_lock);
+
+	/* Something wrong if we are trying to remove same group twice */
+	WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+	list_del_init(&blkg->q_node[plid]);
+
+	WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+	q->nr_blkgs[plid]--;
+
+	/*
+	 * Put the reference taken at the time of creation so that when all
+	 * queues are gone, group can be destroyed.
+	 */
+	blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+		      bool destroy_root)
 {
-	struct blkio_policy_type *pol;
+	struct blkio_group *blkg, *n;
 
 	while (true) {
 		bool done = true;
 
-		spin_lock(&blkio_list_lock);
 		spin_lock_irq(q->queue_lock);
 
-		/*
-		 * clear_queue_fn() might return with non-empty group list
-		 * if it raced cgroup removal and lost.  cgroup removal is
-		 * guaranteed to make forward progress and retrying after a
-		 * while is enough.  This ugliness is scheduled to be
-		 * removed after locking update.
-		 */
-		list_for_each_entry(pol, &blkio_list, list)
-			if (!pol->ops.blkio_clear_queue_fn(q))
+		list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+					 q_node[plid]) {
+			/* skip root? */
+			if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+				continue;
+
+			/*
+			 * If cgroup removal path got to blk_group first
+			 * and removed it from cgroup list, then it will
+			 * take care of destroying cfqg also.
+			 */
+			if (!blkiocg_del_blkio_group(blkg))
+				blkg_destroy(blkg, plid);
+			else
 				done = false;
+		}
 
 		spin_unlock_irq(q->queue_lock);
-		spin_unlock(&blkio_list_lock);
 
+		/*
+		 * Group list may not be empty if we raced cgroup removal
+		 * and lost.  cgroup removal is guaranteed to make forward
+		 * progress and retrying after a while is enough.  This
+		 * ugliness is scheduled to be removed after locking
+		 * update.
+		 */
 		if (done)
 			break;
 
 		msleep(10);	/* just some random duration I like */
 	}
 }
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
@@ -1549,11 +1585,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 		 * this event.
 		 */
 		spin_lock(&blkio_list_lock);
+		spin_lock_irqsave(q->queue_lock, flags);
 		list_for_each_entry(blkiop, &blkio_list, list) {
 			if (blkiop->plid != blkg->plid)
 				continue;
-			blkiop->ops.blkio_unlink_group_fn(q, blkg);
+			blkg_destroy(blkg, blkiop->plid);
 		}
+		spin_unlock_irqrestore(q->queue_lock, flags);
 		spin_unlock(&blkio_list_lock);
 	} while (1);
 
@@ -1695,12 +1733,14 @@ static void blkcg_bypass_start(void)
 	__acquires(&all_q_mutex)
 {
 	struct request_queue *q;
+	int i;
 
 	mutex_lock(&all_q_mutex);
 
 	list_for_each_entry(q, &all_q_list, all_q_node) {
 		blk_queue_bypass_start(q);
-		blkg_destroy_all(q);
+		for (i = 0; i < BLKIO_NR_POLICIES; i++)
+			blkg_destroy_all(q, i, false);
 	}
 }
 

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
@@ -196,11 +196,6 @@ struct blkio_group {
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_link_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int weight);
 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
@@ -214,9 +209,6 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 
 struct blkio_policy_ops {
 	blkio_init_group_fn *blkio_init_group_fn;
-	blkio_link_group_fn *blkio_link_group_fn;
-	blkio_unlink_group_fn *blkio_unlink_group_fn;
-	blkio_clear_queue_fn *blkio_clear_queue_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
 	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
 	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -238,7 +230,8 @@ extern void blkcg_exit_queue(struct request_queue *q);
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q);
+extern void blkg_destroy_all(struct request_queue *q,
+			     enum blkio_policy_id plid, bool destroy_root);
 
 /**
  * blkg_to_pdata - get policy private data
@@ -319,7 +312,9 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
-static inline void blkg_destroy_all(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q,
+				    enum blkio_policy_id plid,
+				    bool destory_root) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
@@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	tg->iops[WRITE] = -1;
 }
 
-static void throtl_link_blkio_group(struct request_queue *q,
-				    struct blkio_group *blkg)
-{
-	list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
-		 &q->blkg_list[BLKIO_POLICY_THROTL]);
-	q->nr_blkgs[BLKIO_POLICY_THROTL]++;
-}
-
 static struct
 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
@@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
-static void
-throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
-{
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	/* Something wrong if we are trying to remove same group twice */
-	WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
-
-	list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
-
-	/*
-	 * Put the reference taken at the time of creation so that when all
-	 * queues are gone, group can be destroyed.
-	 */
-	blkg_put(tg_to_blkg(tg));
-	td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
-}
-
-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
-{
-	struct request_queue *q = td->queue;
-	struct blkio_group *blkg, *n;
-	bool empty = true;
-
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
-				 q_node[BLKIO_POLICY_THROTL]) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		/* skip root? */
-		if (!release_root && tg == td->root_tg)
-			continue;
-
-		/*
-		 * If cgroup removal path got to blk_group first and removed
-		 * it from cgroup list, then it will take care of destroying
-		 * cfqg also.
-		 */
-		if (!blkiocg_del_blkio_group(blkg))
-			throtl_destroy_tg(td, tg);
-		else
-			empty = false;
-	}
-	return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). @q is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if queue was going away, cgroup deltion
- * path got to it first.
- */
-void throtl_unlink_blkio_group(struct request_queue *q,
-			       struct blkio_group *blkg)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	throtl_destroy_tg(q->td, blkg_to_tg(blkg));
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static bool throtl_clear_queue(struct request_queue *q)
-{
-	lockdep_assert_held(q->queue_lock);
-
-	/*
-	 * Clear tgs but leave the root one alone.  This is necessary
-	 * because root_tg is expected to be persistent and safe because
-	 * blk-throtl can never be disabled while @q is alive.  This is a
-	 * kludge to prepare for unified blkg.  This whole function will be
-	 * removed soon.
-	 */
-	return throtl_release_tgs(q->td, false);
-}
-
 static void throtl_update_blkio_group_common(struct throtl_data *td,
 				struct throtl_grp *tg)
 {
@@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_init_group_fn = throtl_init_blkio_group,
-		.blkio_link_group_fn = throtl_link_blkio_group,
-		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
-		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
 					throtl_update_blkio_group_read_bps,
 		.blkio_update_group_write_bps_fn =
@@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q)
 
 	throtl_shutdown_wq(q);
 
-	spin_lock_irq(q->queue_lock);
-	throtl_release_tgs(td, true);
+	blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
 
 	/* If there are other groups */
+	spin_lock_irq(q->queue_lock);
 	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
-
 	spin_unlock_irq(q->queue_lock);
 
 	/*