Skip to content

Commit

Permalink
Merge branch 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - For cpustat, cgroup has a percpu hierarchical stat mechanism which
   propagates up the hierarchy lazily.

   This contains commits to factor out and generalize the mechanism so
   that it can be used for other cgroup stats too.

   The original intention was to update memcg stats to use it but memcg
   went for a different approach, so still the only user is cpustat. The
   factoring out and generalization still make sense and it's likely
   that this can be used for other purposes in the future.

 - cgroup uses kernfs_notify() (which uses fsnotify()) to inform user
   space of certain events. A rate limiting mechanism is added.

 - Other misc changes.

* 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: css_set_lock should nest inside tasklist_lock
  rdmacg: Convert to use match_string() helper
  cgroup: Make cgroup_rstat_updated() ready for root cgroup usage
  cgroup: Add memory barriers to plug cgroup_rstat_updated() race window
  cgroup: Add cgroup_subsys->css_rstat_flush()
  cgroup: Replace cgroup_rstat_mutex with a spinlock
  cgroup: Factor out and expose cgroup_rstat_*() interface functions
  cgroup: Reorganize kernel/cgroup/rstat.c
  cgroup: Distinguish base resource stat implementation from rstat
  cgroup: Rename stat to rstat
  cgroup: Rename kernel/cgroup/stat.c to kernel/cgroup/rstat.c
  cgroup: Limit event generation frequency
  cgroup: Explicitly remove core interface files
  • Loading branch information
torvalds committed Jun 6, 2018
2 parents 0bbddb8 + d8742e2 commit 9f25a8d
Show file tree
Hide file tree
Showing 8 changed files with 554 additions and 417 deletions.
52 changes: 32 additions & 20 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ enum {
struct cgroup_file {
/* do not access any fields from outside cgroup core */
struct kernfs_node *kn;
unsigned long notified_at;
struct timer_list notify_timer;
};

/*
Expand All @@ -128,6 +130,9 @@ struct cgroup_subsys_state {
struct list_head sibling;
struct list_head children;

/* flush target list anchored at cgrp->rstat_css_list */
struct list_head rstat_css_node;

/*
* PI: Subsys-unique ID. 0 is unused and root is always 1. The
* matching css can be looked up using css_from_id().
Expand Down Expand Up @@ -256,12 +261,16 @@ struct css_set {
struct rcu_head rcu_head;
};

struct cgroup_base_stat {
struct task_cputime cputime;
};

/*
* cgroup basic resource usage statistics. Accounting is done per-cpu in
* cgroup_cpu_stat which is then lazily propagated up the hierarchy on
* reads.
* rstat - cgroup scalable recursive statistics. Accounting is done
* per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
* hierarchy on reads.
*
* When a stat gets updated, the cgroup_cpu_stat and its ancestors are
* When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
* linked into the updated tree. On the following read, propagation only
* considers and consumes the updated tree. This makes reading O(the
* number of descendants which have been active since last read) instead of
Expand All @@ -271,20 +280,24 @@ struct css_set {
* aren't active and stat may be read frequently. The combination can
* become very expensive. By propagating selectively, increasing reading
* frequency decreases the cost of each read.
*
* This struct hosts both the fields which implement the above -
* updated_children and updated_next - and the fields which track basic
* resource statistics on top of it - bsync, bstat and last_bstat.
*/
struct cgroup_cpu_stat {
struct cgroup_rstat_cpu {
/*
* ->sync protects all the current counters. These are the only
* fields which get updated in the hot path.
* ->bsync protects ->bstat. These are the only fields which get
* updated in the hot path.
*/
struct u64_stats_sync sync;
struct task_cputime cputime;
struct u64_stats_sync bsync;
struct cgroup_base_stat bstat;

/*
* Snapshots at the last reading. These are used to calculate the
* deltas to propagate to the global counters.
*/
struct task_cputime last_cputime;
struct cgroup_base_stat last_bstat;

/*
* Child cgroups with stat updates on this cpu since the last read
Expand All @@ -295,18 +308,12 @@ struct cgroup_cpu_stat {
* to the cgroup makes it unnecessary for each per-cpu struct to
* point back to the associated cgroup.
*
* Protected by per-cpu cgroup_cpu_stat_lock.
* Protected by per-cpu cgroup_rstat_cpu_lock.
*/
struct cgroup *updated_children; /* terminated by self cgroup */
struct cgroup *updated_next; /* NULL iff not on the list */
};

struct cgroup_stat {
/* per-cpu statistics are collected into the folowing global counters */
struct task_cputime cputime;
struct prev_cputime prev_cputime;
};

struct cgroup {
/* self css with NULL ->ss, points back to this cgroup */
struct cgroup_subsys_state self;
Expand Down Expand Up @@ -406,10 +413,14 @@ struct cgroup {
*/
struct cgroup *dom_cgrp;

/* per-cpu recursive resource statistics */
struct cgroup_rstat_cpu __percpu *rstat_cpu;
struct list_head rstat_css_list;

/* cgroup basic resource statistics */
struct cgroup_cpu_stat __percpu *cpu_stat;
struct cgroup_stat pending_stat; /* pending from children */
struct cgroup_stat stat;
struct cgroup_base_stat pending_bstat; /* pending from children */
struct cgroup_base_stat bstat;
struct prev_cputime prev_cputime; /* for printing out cputime */

/*
* list of pidlists, up to two for each namespace (one for procs, one
Expand Down Expand Up @@ -570,6 +581,7 @@ struct cgroup_subsys {
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
int (*css_extra_stat_show)(struct seq_file *seq,
struct cgroup_subsys_state *css);

Expand Down
12 changes: 10 additions & 2 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,11 +690,19 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
char *buf, size_t buflen) {}
#endif /* !CONFIG_CGROUPS */

#ifdef CONFIG_CGROUPS
/*
* Basic resource stats.
* cgroup scalable recursive statistics.
*/
#ifdef CONFIG_CGROUPS
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);

/*
* Basic resource stats.
*/
#ifdef CONFIG_CGROUP_CPUACCT
void cpuacct_charge(struct task_struct *tsk, u64 cputime);
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
Expand Down
2 changes: 1 addition & 1 deletion kernel/cgroup/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o

obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
obj-$(CONFIG_CGROUP_PIDS) += pids.o
Expand Down
11 changes: 5 additions & 6 deletions kernel/cgroup/cgroup-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,12 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
int cgroup_task_count(const struct cgroup *cgrp);

/*
* stat.c
* rstat.c
*/
void cgroup_stat_flush(struct cgroup *cgrp);
int cgroup_stat_init(struct cgroup *cgrp);
void cgroup_stat_exit(struct cgroup *cgrp);
void cgroup_stat_show_cputime(struct seq_file *seq);
void cgroup_stat_boot(void);
int cgroup_rstat_init(struct cgroup *cgrp);
void cgroup_rstat_exit(struct cgroup *cgrp);
void cgroup_rstat_boot(void);
void cgroup_base_stat_cputime_show(struct seq_file *seq);

/*
* namespace.c
Expand Down
Loading

0 comments on commit 9f25a8d

Please sign in to comment.