Skip to content

Commit

Permalink
Merge branch 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "Two patches to fix a deadlock which can be easily triggered if memcg
  charge moving is used.

  This bug was introduced while converting threadgroup locking to a
  global percpu_rwsem and is caused by cgroup controller task migration
  path depending on the ability to create new kthreads.  cpuset had a
  similar issue which was fixed by performing heavy-lifting operations
  asynchronous to task migration.  The two patches fix the same issue in
  memcg in a similar way.  The first patch makes the mechanism generic
  and the second relocates memcg charge moving outside the migration
  path.

  Given that we don't want to perform heavy operations while
  writelocking threadgroup lock anyway, moving them out of the way is a
  desirable solution.  One thing to note is that the problem was
  difficult to debug because lockdep couldn't figure out the deadlock
  condition.  Looking into how to improve that"

* 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  memcg: relocate charge moving from ->attach to ->post_attach
  cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback
  • Loading branch information
torvalds committed Apr 27, 2016
2 parents 3118e5f + 264a0ae commit 763cfc8
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 28 deletions.
1 change: 1 addition & 0 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ struct cgroup_subsys {
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset);
void (*post_attach)(void);
int (*can_fork)(struct task_struct *task);
void (*cancel_fork)(struct task_struct *task);
void (*fork)(struct task_struct *task);
Expand Down
6 changes: 0 additions & 6 deletions include/linux/cpuset.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current);
}

extern void cpuset_post_attach_flush(void);

#else /* !CONFIG_CPUSETS */

static inline bool cpusets_enabled(void) { return false; }
Expand Down Expand Up @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
return false;
}

static inline void cpuset_post_attach_flush(void)
{
}

#endif /* !CONFIG_CPUSETS */

#endif /* _LINUX_CPUSET_H */
7 changes: 5 additions & 2 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
int ret;
int ssid, ret;

if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
Expand Down Expand Up @@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
cgroup_kn_unlock(of->kn);
cpuset_post_attach_flush();
return ret ?: nbytes;
}

Expand Down
4 changes: 2 additions & 2 deletions kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/wait.h>

Expand Down Expand Up @@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
}
}

void cpuset_post_attach_flush(void)
static void cpuset_post_attach(void)
{
flush_workqueue(cpuset_migrate_mm_wq);
}
Expand Down Expand Up @@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.legacy_cftypes = files,
.early_init = true,
Expand Down
37 changes: 19 additions & 18 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct {
spinlock_t lock; /* for from, to */
struct mm_struct *mm;
struct mem_cgroup *from;
struct mem_cgroup *to;
unsigned long flags;
Expand Down Expand Up @@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)

static void mem_cgroup_clear_mc(void)
{
struct mm_struct *mm = mc.mm;

/*
* we must clear moving_task before waking up waiters at the end of
* task migration.
Expand All @@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
spin_lock(&mc.lock);
mc.from = NULL;
mc.to = NULL;
mc.mm = NULL;
spin_unlock(&mc.lock);

mmput(mm);
}

static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
Expand Down Expand Up @@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
VM_BUG_ON(mc.moved_swap);

spin_lock(&mc.lock);
mc.mm = mm;
mc.from = from;
mc.to = memcg;
mc.flags = move_flags;
Expand All @@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
} else {
mmput(mm);
}
mmput(mm);
return ret;
}

Expand Down Expand Up @@ -4852,11 +4860,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return ret;
}

static void mem_cgroup_move_charge(struct mm_struct *mm)
static void mem_cgroup_move_charge(void)
{
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm,
.mm = mc.mm,
};

lru_add_drain_all();
Expand All @@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
/*
* Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
Expand All @@ -4885,23 +4893,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
* additional charge, the page walk just aborts.
*/
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem);
up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account);
}

static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
struct cgroup_subsys_state *css;
struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p);

if (mm) {
if (mc.to)
mem_cgroup_move_charge(mm);
mmput(mm);
}
if (mc.to)
if (mc.to) {
mem_cgroup_move_charge();
mem_cgroup_clear_mc();
}
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
Expand All @@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
static void mem_cgroup_move_task(void)
{
}
#endif
Expand Down Expand Up @@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task,
.post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
Expand Down

0 comments on commit 763cfc8

Please sign in to comment.