Skip to content

Commit

Permalink
Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/g…
Browse files Browse the repository at this point in the history
…it/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "This includes Roman's cgroup2 freezer implementation.

  It's a separate machanism from cgroup1 freezer. Instead of blocking
  user tasks in arbitrary uninterruptible sleeps, the new implementation
  extends jobctl stop - frozen tasks are trapped in jobctl stop until
  thawed and can be killed and ptraced. Lots of thanks to Oleg for
  sheperding the effort.

  Other than that, there are a few trivial changes"

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: never call do_group_exit() with task->frozen bit set
  kernel: cgroup: fix misuse of %x
  cgroup: get rid of cgroup_freezer_frozen_exit()
  cgroup: prevent spurious transition into non-frozen state
  cgroup: Remove unused cgrp variable
  cgroup: document cgroup v2 freezer interface
  cgroup: add tracing points for cgroup v2 freezer
  cgroup: make TRACE_CGROUP_PATH irq-safe
  kselftests: cgroup: add freezer controller self-tests
  kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy()
  cgroup: cgroup v2 freezer
  cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock
  cgroup: implement __cgroup_task_count() helper
  cgroup: rename freezer.c into legacy_freezer.c
  cgroup: remove extra cgroup_migrate_finish() call
  • Loading branch information
torvalds committed May 9, 2019
2 parents 23c9706 + f2b31bb commit abde77e
Show file tree
Hide file tree
Showing 20 changed files with 2,012 additions and 443 deletions.
27 changes: 27 additions & 0 deletions Documentation/admin-guide/cgroup-v2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
populated
1 if the cgroup or its descendants contains any live
processes; otherwise, 0.
frozen
1 if the cgroup is frozen; otherwise, 0.

cgroup.max.descendants
A read-write single value files. The default is "max".
Expand Down Expand Up @@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.

cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".

Writing "1" to the file causes freezing of the cgroup and all
descendant cgroups. This means that all belonging processes will
be stopped and will not run until the cgroup will be explicitly
unfrozen. Freezing of the cgroup may take some time; when this action
is completed, the "frozen" value in the cgroup.events control file
will be updated to "1" and the corresponding notification will be
issued.

A cgroup can be frozen either by its own settings, or by settings
of any ancestor cgroups. If any of ancestor cgroups is frozen, the
cgroup will remain frozen.

Processes in the frozen cgroup can be killed by a fatal signal.
They also can enter and leave a frozen cgroup: either by an explicit
move by a user, or if freezing of the cgroup races with fork().
If a process is moved to a frozen cgroup, it stops. If a process is
moved out of a frozen cgroup, it becomes running.

Frozen status of a cgroup doesn't affect any cgroup tree operations:
it's possible to delete a frozen (and empty) cgroup, as well as
create new sub-cgroups.

Controllers
===========
Expand Down
33 changes: 33 additions & 0 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ enum {
* specified at mount time and thus is implemented here.
*/
CGRP_CPUSET_CLONE_CHILDREN,

/* Control group has to be frozen. */
CGRP_FREEZE,

/* Cgroup is frozen. */
CGRP_FROZEN,
};

/* cgroup_root->flags */
Expand Down Expand Up @@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
struct cgroup *updated_next; /* NULL iff not on the list */
};

struct cgroup_freezer_state {
/* Should the cgroup and its descendants be frozen. */
bool freeze;

/* Should the cgroup actually be frozen? */
int e_freeze;

/* Fields below are protected by css_set_lock */

/* Number of frozen descendant cgroups */
int nr_frozen_descendants;

/*
* Number of tasks, which are counted as frozen:
* frozen, SIGSTOPped, and PTRACEd.
*/
int nr_frozen_tasks;
};

struct cgroup {
/* self css with NULL ->ss, points back to this cgroup */
struct cgroup_subsys_state self;
Expand Down Expand Up @@ -349,6 +374,11 @@ struct cgroup {
* Dying cgroups are cgroups which were deleted by a user,
* but are still existing because someone else is holding a reference.
* max_descendants is a maximum allowed number of descent cgroups.
*
* nr_descendants and nr_dying_descendants are protected
* by cgroup_mutex and css_set_lock. It's fine to read them holding
* any of cgroup_mutex and css_set_lock; for writing both locks
* should be held.
*/
int nr_descendants;
int nr_dying_descendants;
Expand Down Expand Up @@ -448,6 +478,9 @@ struct cgroup {
/* If there is block congestion on this cgroup. */
atomic_t congestion_count;

/* Used to store internal freezer state */
struct cgroup_freezer_state freezer;

/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
Expand Down
43 changes: 43 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
free_cgroup_ns(ns);
}

#ifdef CONFIG_CGROUPS

void cgroup_enter_frozen(void);
void cgroup_leave_frozen(bool always_leave);
void cgroup_update_frozen(struct cgroup *cgrp);
void cgroup_freeze(struct cgroup *cgrp, bool freeze);
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
struct cgroup *dst);

static inline bool cgroup_task_freeze(struct task_struct *task)
{
bool ret;

if (task->flags & PF_KTHREAD)
return false;

rcu_read_lock();
ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
rcu_read_unlock();

return ret;
}

static inline bool cgroup_task_frozen(struct task_struct *task)
{
return task->frozen;
}

#else /* !CONFIG_CGROUPS */

static inline void cgroup_enter_frozen(void) { }
static inline void cgroup_leave_frozen(bool always_leave) { }
static inline bool cgroup_task_freeze(struct task_struct *task)
{
return false;
}
static inline bool cgroup_task_frozen(struct task_struct *task)
{
return false;
}

#endif /* !CONFIG_CGROUPS */

#endif /* _LINUX_CGROUP_H */
2 changes: 2 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,8 @@ struct task_struct {
#ifdef CONFIG_CGROUPS
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
/* task is frozen/stopped (used by the cgroup freezer) */
unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
/* to be used once the psi infrastructure lands upstream. */
Expand Down
2 changes: 2 additions & 0 deletions include/linux/sched/jobctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */

#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
Expand All @@ -26,6 +27,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)

#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
Expand Down
55 changes: 55 additions & 0 deletions include/trace/events/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
TP_ARGS(cgrp, path)
);

DEFINE_EVENT(cgroup, cgroup_freeze,

TP_PROTO(struct cgroup *cgrp, const char *path),

TP_ARGS(cgrp, path)
);

DEFINE_EVENT(cgroup, cgroup_unfreeze,

TP_PROTO(struct cgroup *cgrp, const char *path),

TP_ARGS(cgrp, path)
);

DECLARE_EVENT_CLASS(cgroup_migrate,

TP_PROTO(struct cgroup *dst_cgrp, const char *path,
Expand Down Expand Up @@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
TP_ARGS(dst_cgrp, path, task, threadgroup)
);

DECLARE_EVENT_CLASS(cgroup_event,

TP_PROTO(struct cgroup *cgrp, const char *path, int val),

TP_ARGS(cgrp, path, val),

TP_STRUCT__entry(
__field( int, root )
__field( int, id )
__field( int, level )
__string( path, path )
__field( int, val )
),

TP_fast_assign(
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgrp->id;
__entry->level = cgrp->level;
__assign_str(path, path);
__entry->val = val;
),

TP_printk("root=%d id=%d level=%d path=%s val=%d",
__entry->root, __entry->id, __entry->level, __get_str(path),
__entry->val)
);

DEFINE_EVENT(cgroup_event, cgroup_notify_populated,

TP_PROTO(struct cgroup *cgrp, const char *path, int val),

TP_ARGS(cgrp, path, val)
);

DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,

TP_PROTO(struct cgroup *cgrp, const char *path, int val),

TP_ARGS(cgrp, path, val)
);

#endif /* _TRACE_CGROUP_H */

/* This part must be outside protection */
Expand Down
4 changes: 2 additions & 2 deletions kernel/cgroup/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o

obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
obj-$(CONFIG_CGROUP_PIDS) += pids.o
obj-$(CONFIG_CGROUP_RDMA) += rdma.o
obj-$(CONFIG_CPUSETS) += cpuset.o
Expand Down
8 changes: 6 additions & 2 deletions kernel/cgroup/cgroup-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
#define TRACE_CGROUP_PATH(type, cgrp, ...) \
do { \
if (trace_cgroup_##type##_enabled()) { \
spin_lock(&trace_cgroup_path_lock); \
unsigned long flags; \
spin_lock_irqsave(&trace_cgroup_path_lock, \
flags); \
cgroup_path(cgrp, trace_cgroup_path, \
TRACE_CGROUP_PATH_LEN); \
trace_cgroup_##type(cgrp, trace_cgroup_path, \
##__VA_ARGS__); \
spin_unlock(&trace_cgroup_path_lock); \
spin_unlock_irqrestore(&trace_cgroup_path_lock, \
flags); \
} \
} while (0)

Expand Down Expand Up @@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
struct kernfs_root *kf_root);

int __cgroup_task_count(const struct cgroup *cgrp);
int cgroup_task_count(const struct cgroup *cgrp);

/*
Expand Down
16 changes: 0 additions & 16 deletions kernel/cgroup/cgroup-v1.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
return l;
}

/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*/
int cgroup_task_count(const struct cgroup *cgrp)
{
int count = 0;
struct cgrp_cset_link *link;

spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += link->cset->nr_tasks;
spin_unlock_irq(&css_set_lock);
return count;
}

/*
* Load a cgroup's pidarray with either procs' tgids or tasks' pids
*/
Expand Down
Loading

0 comments on commit abde77e

Please sign in to comment.