Skip to content

Commit

Permalink
loop: charge i/o to mem and blk cg
Browse files Browse the repository at this point in the history
The current code only associates with the existing blkcg when aio is used
to access the backing file.  This patch covers all types of i/o to the
backing file and also associates the memcg so if the backing file is on
tmpfs, memory is charged appropriately.

This patch also exports cgroup_get_e_css and int_active_memcg so it can be
used by the loop module.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Dan Schatzberg <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Acked-by: Jens Axboe <[email protected]>
Cc: Chris Down <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Ming Lei <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Tejun Heo <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
dschatzberg authored and torvalds committed Jun 29, 2021
1 parent 04f94e3 commit c74d40e
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 21 deletions.
61 changes: 41 additions & 20 deletions drivers/block/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include <linux/uio.h>
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>

#include "loop.h"

Expand Down Expand Up @@ -516,8 +517,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);

if (cmd->css)
css_put(cmd->css);
cmd->ret = ret;
lo_rw_aio_do_completion(cmd);
}
Expand Down Expand Up @@ -578,16 +577,13 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
if (cmd->css)
kthread_associate_blkcg(cmd->css);

if (rw == WRITE)
ret = call_write_iter(file, &cmd->iocb, &iter);
else
ret = call_read_iter(file, &cmd->iocb, &iter);

lo_rw_aio_do_completion(cmd);
kthread_associate_blkcg(NULL);

if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
Expand Down Expand Up @@ -928,7 +924,7 @@ struct loop_worker {
struct list_head cmd_list;
struct list_head idle_list;
struct loop_device *lo;
struct cgroup_subsys_state *css;
struct cgroup_subsys_state *blkcg_css;
unsigned long last_ran_at;
};

Expand Down Expand Up @@ -957,18 +953,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)

spin_lock_irq(&lo->lo_work_lock);

if (queue_on_root_worker(cmd->css))
if (queue_on_root_worker(cmd->blkcg_css))
goto queue_work;

node = &lo->worker_tree.rb_node;

while (*node) {
parent = *node;
cur_worker = container_of(*node, struct loop_worker, rb_node);
if (cur_worker->css == cmd->css) {
if (cur_worker->blkcg_css == cmd->blkcg_css) {
worker = cur_worker;
break;
} else if ((long)cur_worker->css < (long)cmd->css) {
} else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) {
node = &(*node)->rb_left;
} else {
node = &(*node)->rb_right;
Expand All @@ -980,13 +976,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
/*
* In the event we cannot allocate a worker, just queue on the
* rootcg worker
* rootcg worker and issue the I/O as the rootcg
*/
if (!worker)
if (!worker) {
cmd->blkcg_css = NULL;
if (cmd->memcg_css)
css_put(cmd->memcg_css);
cmd->memcg_css = NULL;
goto queue_work;
}

worker->css = cmd->css;
css_get(worker->css);
worker->blkcg_css = cmd->blkcg_css;
css_get(worker->blkcg_css);
INIT_WORK(&worker->work, loop_workfn);
INIT_LIST_HEAD(&worker->cmd_list);
INIT_LIST_HEAD(&worker->idle_list);
Expand Down Expand Up @@ -1306,7 +1307,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
idle_list) {
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->css);
css_put(worker->blkcg_css);
kfree(worker);
}
spin_unlock_irq(&lo->lo_work_lock);
Expand Down Expand Up @@ -2100,13 +2101,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
}

/* always use the first bio's css */
cmd->blkcg_css = NULL;
cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP
if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
cmd->css = &bio_blkcg(rq->bio)->css;
css_get(cmd->css);
} else
if (rq->bio && rq->bio->bi_blkg) {
cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
#ifdef CONFIG_MEMCG
cmd->memcg_css =
cgroup_get_e_css(cmd->blkcg_css->cgroup,
&memory_cgrp_subsys);
#endif
}
#endif
cmd->css = NULL;
loop_queue_work(lo, cmd);

return BLK_STS_OK;
Expand All @@ -2118,13 +2124,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
const bool write = op_is_write(req_op(rq));
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
struct mem_cgroup *old_memcg = NULL;

if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
goto failed;
}

if (cmd->blkcg_css)
kthread_associate_blkcg(cmd->blkcg_css);
if (cmd->memcg_css)
old_memcg = set_active_memcg(
mem_cgroup_from_css(cmd->memcg_css));

ret = do_req_filebacked(lo, rq);

if (cmd->blkcg_css)
kthread_associate_blkcg(NULL);

if (cmd->memcg_css) {
set_active_memcg(old_memcg);
css_put(cmd->memcg_css);
}
failed:
/* complete non-aio request */
if (!cmd->use_aio || ret) {
Expand Down Expand Up @@ -2203,7 +2224,7 @@ static void loop_free_idle_workers(struct timer_list *timer)
break;
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->css);
css_put(worker->blkcg_css);
kfree(worker);
}
if (!list_empty(&lo->idle_worker_list))
Expand Down
3 changes: 2 additions & 1 deletion drivers/block/loop.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ struct loop_cmd {
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
struct cgroup_subsys_state *css;
struct cgroup_subsys_state *blkcg_css;
struct cgroup_subsys_state *memcg_css;
};

/* Support for loadable transfer modules */
Expand Down
6 changes: 6 additions & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}

static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
return NULL;
}

static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
Expand Down
1 change: 1 addition & 0 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
rcu_read_unlock();
return css;
}
EXPORT_SYMBOL_GPL(cgroup_get_e_css);

static void cgroup_get_live(struct cgroup *cgrp)
{
Expand Down
1 change: 1 addition & 0 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;

/* Active memory cgroup to use from an interrupt context */
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);

/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket;
Expand Down

0 comments on commit c74d40e

Please sign in to comment.