Skip to content

Commit

Permalink
Merge tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull more perf updates from Thomas Gleixner:
 "Perf updates all over the place:

  core:

   - Support for cgroup tracking in samples to allow cgroup based
     analysis

  tools:

   - Support for cgroup analysis

   - Commandline option and hotkey for perf top to change the sort order

   - A set of fixes all over the place

   - Various build system related improvements

   - Updates of the X86 pmu event JSON data

   - Documentation updates"

* tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits)
  perf python: Fix clang detection to strip out options passed in $CC
  perf tools: Support Python 3.8+ in Makefile
  perf script: Fix invalid read of directory entry after closedir()
  perf script report: Fix SEGFAULT when using DWARF mode
  perf script: add -S/--symbols documentation
  perf pmu-events x86: Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric
  perf events parser: Add missing Intel CPU events to parser
  perf script: Allow --symbol to accept hexadecimal addresses
  perf report/top TUI: Fix title line formatting
  perf top: Support hotkey to change sort order
  perf top: Support --group-sort-idx to change the sort order
  perf symbols: Fix arm64 gap between kernel start and module end
  perf build-test: Honour JOBS to override detection of number of cores
  perf script: Add --show-cgroup-events option
  perf top: Add --all-cgroups option
  perf record: Add --all-cgroups option
  perf record: Support synthesizing cgroup events
  perf report: Add 'cgroup' sort key
  perf cgroup: Maintain cgroup hierarchy
  perf tools: Basic support for CGROUP event
  ...
  • Loading branch information
torvalds committed Apr 5, 2020
2 parents d5ca327 + 7dc41b9 commit c48b072
Show file tree
Hide file tree
Showing 100 changed files with 3,181 additions and 622 deletions.
1 change: 1 addition & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,7 @@ struct perf_sample_data {
u64 stack_user_size;

u64 phys_addr;
u64 cgroup;
} ____cacheline_aligned;

/* default value for data source */
Expand Down
16 changes: 14 additions & 2 deletions include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
PERF_SAMPLE_CGROUP = 1U << 21,

PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */

__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
Expand Down Expand Up @@ -381,7 +382,8 @@ struct perf_event_attr {
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
__reserved_1 : 32;
cgroup : 1, /* include cgroup events */
__reserved_1 : 31;

union {
__u32 wakeup_events; /* wakeup every n events */
Expand Down Expand Up @@ -1012,6 +1014,16 @@ enum perf_event_type {
*/
PERF_RECORD_BPF_EVENT = 18,

/*
* struct {
* struct perf_event_header header;
* u64 id;
* char path[];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_CGROUP = 19,

PERF_RECORD_MAX, /* non-ABI */
};

Expand Down
3 changes: 2 additions & 1 deletion init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,8 @@ config CGROUP_PERF
help
This option extends the perf per-cpu mode to restrict monitoring
to threads which belong to the cgroup specified and run on the
designated cpu.
designated cpu. Or this can be used to have cgroup ID in samples
so that it can monitor performance events among cgroups.

Say N if unsure.

Expand Down
133 changes: 133 additions & 0 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;

static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
Expand Down Expand Up @@ -1861,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
size += sizeof(data->phys_addr);

if (sample_type & PERF_SAMPLE_CGROUP)
size += sizeof(data->cgroup);

event->header_size = size;
}

Expand Down Expand Up @@ -4608,6 +4612,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.cgroup)
atomic_dec(&nr_cgroup_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -6864,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
perf_output_put(handle, data->phys_addr);

if (sample_type & PERF_SAMPLE_CGROUP)
perf_output_put(handle, data->cgroup);

if (sample_type & PERF_SAMPLE_AUX) {
perf_output_put(handle, data->aux_size);

Expand Down Expand Up @@ -7063,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr);

#ifdef CONFIG_CGROUP_PERF
if (sample_type & PERF_SAMPLE_CGROUP) {
struct cgroup *cgrp;

/* protected by RCU */
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
data->cgroup = cgroup_id(cgrp);
}
#endif

if (sample_type & PERF_SAMPLE_AUX) {
u64 size;

Expand Down Expand Up @@ -7735,6 +7754,105 @@ void perf_event_namespaces(struct task_struct *task)
NULL);
}

/*
* cgroup tracking
*/
#ifdef CONFIG_CGROUP_PERF

struct perf_cgroup_event {
char *path;
int path_size;
struct {
struct perf_event_header header;
u64 id;
char path[];
} event_id;
};

static int perf_event_cgroup_match(struct perf_event *event)
{
return event->attr.cgroup;
}

static void perf_event_cgroup_output(struct perf_event *event, void *data)
{
struct perf_cgroup_event *cgroup_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
u16 header_size = cgroup_event->event_id.header.size;
int ret;

if (!perf_event_cgroup_match(event))
return;

perf_event_header__init_id(&cgroup_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
cgroup_event->event_id.header.size);
if (ret)
goto out;

perf_output_put(&handle, cgroup_event->event_id);
__output_copy(&handle, cgroup_event->path, cgroup_event->path_size);

perf_event__output_id_sample(event, &handle, &sample);

perf_output_end(&handle);
out:
cgroup_event->event_id.header.size = header_size;
}

static void perf_event_cgroup(struct cgroup *cgrp)
{
struct perf_cgroup_event cgroup_event;
char path_enomem[16] = "//enomem";
char *pathname;
size_t size;

if (!atomic_read(&nr_cgroup_events))
return;

cgroup_event = (struct perf_cgroup_event){
.event_id = {
.header = {
.type = PERF_RECORD_CGROUP,
.misc = 0,
.size = sizeof(cgroup_event.event_id),
},
.id = cgroup_id(cgrp),
},
};

pathname = kmalloc(PATH_MAX, GFP_KERNEL);
if (pathname == NULL) {
cgroup_event.path = path_enomem;
} else {
/* just to be sure to have enough space for alignment */
cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
cgroup_event.path = pathname;
}

/*
* Since our buffer works in 8 byte units we need to align our string
* size to a multiple of 8. However, we must guarantee the tail end is
* zero'd out to avoid leaking random bits to userspace.
*/
size = strlen(cgroup_event.path) + 1;
while (!IS_ALIGNED(size, sizeof(u64)))
cgroup_event.path[size++] = '\0';

cgroup_event.event_id.header.size += size;
cgroup_event.path_size = size;

perf_iterate_sb(perf_event_cgroup_output,
&cgroup_event,
NULL);

kfree(pathname);
}

#endif

/*
* mmap tracking
*/
Expand Down Expand Up @@ -10778,6 +10896,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.cgroup)
atomic_inc(&nr_cgroup_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -11157,6 +11277,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,

if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
ret = perf_reg_validate(attr->sample_regs_intr);

#ifndef CONFIG_CGROUP_PERF
if (attr->sample_type & PERF_SAMPLE_CGROUP)
return -EINVAL;
#endif

out:
return ret;

Expand Down Expand Up @@ -12754,6 +12880,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
kfree(jc);
}

static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
{
perf_event_cgroup(css->cgroup);
return 0;
}

static int __perf_cgroup_move(void *info)
{
struct task_struct *task = info;
Expand All @@ -12775,6 +12907,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free,
.css_online = perf_cgroup_css_online,
.attach = perf_cgroup_attach,
/*
* Implicitly enable on dfl hierarchy so that perf events can
Expand Down
3 changes: 2 additions & 1 deletion tools/build/Makefile.feature
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ FEATURE_TESTS_BASIC := \
setns \
libaio \
libzstd \
disassembler-four-args
disassembler-four-args \
file-handle

# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
Expand Down
5 changes: 4 additions & 1 deletion tools/build/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ FILES= \
test-llvm-version.bin \
test-libaio.bin \
test-libzstd.bin \
test-clang-bpf-global-var.bin
test-clang-bpf-global-var.bin \
test-file-handle.bin

FILES := $(addprefix $(OUTPUT),$(FILES))

Expand Down Expand Up @@ -327,6 +328,8 @@ $(OUTPUT)test-clang-bpf-global-var.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR

$(OUTPUT)test-file-handle.bin:
$(BUILD)

###############################

Expand Down
17 changes: 17 additions & 0 deletions tools/build/feature/test-file-handle.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <inttypes.h>

int main(void)
{
struct {
struct file_handle fh;
uint64_t cgroup_id;
} handle;
int mount_id;

name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0);
return 0;
}
16 changes: 14 additions & 2 deletions tools/include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
PERF_SAMPLE_CGROUP = 1U << 21,

PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */

__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
Expand Down Expand Up @@ -381,7 +382,8 @@ struct perf_event_attr {
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
__reserved_1 : 32;
cgroup : 1, /* include cgroup events */
__reserved_1 : 31;

union {
__u32 wakeup_events; /* wakeup every n events */
Expand Down Expand Up @@ -1012,6 +1014,16 @@ enum perf_event_type {
*/
PERF_RECORD_BPF_EVENT = 18,

/*
* struct {
* struct perf_event_header header;
* u64 id;
* char path[];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_CGROUP = 19,

PERF_RECORD_MAX, /* non-ABI */
};

Expand Down
7 changes: 7 additions & 0 deletions tools/lib/perf/include/perf/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ struct perf_record_bpf_event {
__u8 tag[BPF_TAG_SIZE]; // prog tag
};

struct perf_record_cgroup {
struct perf_event_header header;
__u64 id;
char path[PATH_MAX];
};

struct perf_record_sample {
struct perf_event_header header;
__u64 array[];
Expand Down Expand Up @@ -352,6 +358,7 @@ union perf_event {
struct perf_record_mmap2 mmap2;
struct perf_record_comm comm;
struct perf_record_namespaces namespaces;
struct perf_record_cgroup cgroup;
struct perf_record_fork fork;
struct perf_record_lost lost;
struct perf_record_lost_samples lost_samples;
Expand Down
14 changes: 8 additions & 6 deletions tools/perf/Documentation/perf-config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -405,14 +405,16 @@ ui.*::
This option is only applied to TUI.

call-graph.*::
When sub-commands 'top' and 'report' work with -g/—-children
there're options in control of call-graph.
The following controls the handling of call-graphs (obtained via the
-g/--call-graph options).

call-graph.record-mode::
The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
The value of 'dwarf' is effective only if perf detect needed library
(libunwind or a recent version of libdw).
'lbr' only work for cpus that support it.
The mode for user space can be 'fp' (frame pointer), 'dwarf'
and 'lbr'. The value 'dwarf' is effective only if libunwind
(or a recent version of libdw) is present on the system;
the value 'lbr' only works for certain cpus. The method for
kernel space is controlled not by this option but by the
kernel config (CONFIG_UNWINDER_*).

call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
Expand Down
Loading

0 comments on commit c48b072

Please sign in to comment.