Skip to content

Commit

Permalink
Merge branch 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - The misc controller now reports allocation rejections through
   misc.events instead of printking

 - cgroup_mutex usage is reduced to improve scalability of some
   operations

 - vhost helper threads are now assigned to the right cgroup on cgroup2

 - Bug fixes

* 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: bpf: Move wrapper for __cgroup_bpf_*() to kernel/bpf/cgroup.c
  cgroup: Fix rootcg cpu.stat guest double counting
  cgroup: no need for cgroup_mutex for /proc/cgroups
  cgroup: remove cgroup_mutex from cgroupstats_build
  cgroup: reduce dependency on cgroup_mutex
  cgroup: cgroup-v1: do not exclude cgrp_dfl_root
  cgroup: Make rebind_subsystems() disable v2 controllers all at once
  docs/cgroup: add entry for misc.events
  misc_cgroup: remove error log to avoid log flood
  misc_cgroup: introduce misc.events to count failures
  • Loading branch information
torvalds committed Nov 2, 2021
2 parents 4075409 + 588e5d8 commit a85373f
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 116 deletions.
10 changes: 10 additions & 0 deletions Documentation/admin-guide/cgroup-v2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2318,6 +2318,16 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
Limits can be set higher than the capacity value in the misc.capacity
file.

misc.events
A read-only flat-keyed file which exists on non-root cgroups. The
following entries are defined. Unless specified otherwise, a value
change in this file generates a file modified event. All fields in
this file are hierarchical.

max
The number of times the cgroup's resource usage was
about to go over the max boundary.

Migration and Ownership
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
20 changes: 0 additions & 20 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,26 +157,6 @@ struct cgroup_bpf {
int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);

int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);

/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type,
u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);

int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
enum cgroup_bpf_attach_type atype);
Expand Down
6 changes: 5 additions & 1 deletion include/linux/misc_cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct misc_cg;
struct misc_res {
unsigned long max;
atomic_long_t usage;
bool failed;
atomic_long_t events;
};

/**
Expand All @@ -46,6 +46,10 @@ struct misc_res {
*/
struct misc_cg {
struct cgroup_subsys_state css;

/* misc.events */
struct cgroup_file events_file;

struct misc_res res[MISC_CG_RES_TYPES];
};

Expand Down
54 changes: 45 additions & 9 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,10 +430,10 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
* Exactly one of @prog or @link can be non-null.
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags)
static int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct bpf_prog *old_prog = NULL;
Expand Down Expand Up @@ -523,6 +523,20 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
return err;
}

static int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type,
u32 flags)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}

/* Swap updated BPF program for given link in effective program arrays across
* all descendant cgroups. This function is guaranteed to succeed.
*/
Expand Down Expand Up @@ -672,14 +686,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to detach or NULL
* @prog: A link to detach or NULL
* @link: A link to detach or NULL
* @type: Type of detach operation
*
* At most one of @prog or @link can be non-NULL.
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type)
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type)
{
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
Expand Down Expand Up @@ -730,9 +744,20 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
return err;
}

static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
mutex_unlock(&cgroup_mutex);
return ret;
}

/* Must be called with cgroup_mutex held to avoid races. */
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
enum bpf_attach_type type = attr->query.attach_type;
Expand Down Expand Up @@ -789,6 +814,17 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
return ret;
}

static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_query(cgrp, attr, uattr);
mutex_unlock(&cgroup_mutex);
return ret;
}

int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog)
{
Expand Down
17 changes: 4 additions & 13 deletions kernel/cgroup/cgroup-v1.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
for_each_root(root) {
struct cgroup *from_cgrp;

if (root == &cgrp_dfl_root)
continue;

spin_lock_irq(&css_set_lock);
from_cgrp = task_cgroup_from_root(from, root);
spin_unlock_irq(&css_set_lock);
Expand Down Expand Up @@ -662,19 +659,16 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)

seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
* Grab the subsystems state racily. No need to add avenue to
* cgroup_mutex contention.
*/
mutex_lock(&cgroup_mutex);

for_each_subsys(ss, i)
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->legacy_name, ss->root->hierarchy_id,
atomic_read(&ss->root->nr_cgrps),
cgroup_ssid_enabled(i));

mutex_unlock(&cgroup_mutex);
return 0;
}

Expand All @@ -701,18 +695,15 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
kernfs_type(kn) != KERNFS_DIR)
return -EINVAL;

mutex_lock(&cgroup_mutex);

/*
* We aren't being called from kernfs and there's no guarantee on
* @kn->priv's validity. For this and css_tryget_online_from_dir(),
* @kn->priv is RCU safe. Let's do the RCU dancing.
*/
rcu_read_lock();
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
if (!cgrp || cgroup_is_dead(cgrp)) {
if (!cgrp || !cgroup_tryget(cgrp)) {
rcu_read_unlock();
mutex_unlock(&cgroup_mutex);
return -ENOENT;
}
rcu_read_unlock();
Expand Down Expand Up @@ -740,7 +731,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
}
css_task_iter_end(&it);

mutex_unlock(&cgroup_mutex);
cgroup_put(cgrp);
return 0;
}

Expand Down
120 changes: 56 additions & 64 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1740,6 +1740,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
u16 dfl_disable_ss_mask = 0;

lockdep_assert_held(&cgroup_mutex);

Expand All @@ -1756,8 +1757,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;

/*
* Collect ssid's that need to be disabled from default
* hierarchy.
*/
if (ss->root == &cgrp_dfl_root)
dfl_disable_ss_mask |= 1 << ssid;

} while_each_subsys_mask();

if (dfl_disable_ss_mask) {
struct cgroup *scgrp = &cgrp_dfl_root.cgrp;

/*
* Controllers from default hierarchy that need to be rebound
* are all disabled together in one go.
*/
cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
WARN_ON(cgroup_apply_control(scgrp));
cgroup_finalize_control(scgrp, 0);
}

do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
Expand All @@ -1766,10 +1787,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)

WARN_ON(!css || cgroup_css(dcgrp, ss));

/* disable from the source */
src_root->subsys_mask &= ~(1 << ssid);
WARN_ON(cgroup_apply_control(scgrp));
cgroup_finalize_control(scgrp, 0);
if (src_root != &cgrp_dfl_root) {
/* disable from the source */
src_root->subsys_mask &= ~(1 << ssid);
WARN_ON(cgroup_apply_control(scgrp));
cgroup_finalize_control(scgrp, 0);
}

/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
Expand Down Expand Up @@ -5911,17 +5934,20 @@ struct cgroup *cgroup_get_from_id(u64 id)
struct kernfs_node *kn;
struct cgroup *cgrp = NULL;

mutex_lock(&cgroup_mutex);
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
if (!kn)
goto out_unlock;
goto out;

cgrp = kn->priv;
if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp))
rcu_read_lock();

cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
if (cgrp && !cgroup_tryget(cgrp))
cgrp = NULL;

rcu_read_unlock();

kernfs_put(kn);
out_unlock:
mutex_unlock(&cgroup_mutex);
out:
return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_id);
Expand Down Expand Up @@ -6474,30 +6500,34 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
*
* Find the cgroup at @path on the default hierarchy, increment its
* reference count and return it. Returns pointer to the found cgroup on
* success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
* if @path points to a non-directory.
* success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already
* been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory.
*/
struct cgroup *cgroup_get_from_path(const char *path)
{
struct kernfs_node *kn;
struct cgroup *cgrp;

mutex_lock(&cgroup_mutex);
struct cgroup *cgrp = ERR_PTR(-ENOENT);

kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv;
cgroup_get_live(cgrp);
} else {
cgrp = ERR_PTR(-ENOTDIR);
}
kernfs_put(kn);
} else {
cgrp = ERR_PTR(-ENOENT);
if (!kn)
goto out;

if (kernfs_type(kn) != KERNFS_DIR) {
cgrp = ERR_PTR(-ENOTDIR);
goto out_kernfs;
}

mutex_unlock(&cgroup_mutex);
rcu_read_lock();

cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
if (!cgrp || !cgroup_tryget(cgrp))
cgrp = ERR_PTR(-ENOENT);

rcu_read_unlock();

out_kernfs:
kernfs_put(kn);
out:
return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
Expand Down Expand Up @@ -6625,44 +6655,6 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)

#endif /* CONFIG_SOCK_CGROUP_DATA */

#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type,
u32 flags)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}

int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
mutex_unlock(&cgroup_mutex);
return ret;
}

int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
int ret;

mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_query(cgrp, attr, uattr);
mutex_unlock(&cgroup_mutex);
return ret;
}
#endif /* CONFIG_CGROUP_BPF */

#ifdef CONFIG_SYSFS
static ssize_t show_delegatable_files(struct cftype *files, char *buf,
ssize_t size, const char *prefix)
Expand Down
Loading

0 comments on commit a85373f

Please sign in to comment.