Skip to content

Commit

Permalink
Merge tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/p…
Browse files Browse the repository at this point in the history
…ub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Add BPF based off-CPU profiling

 - Improvements for system wide recording, specially for Intel PT

 - Improve DWARF unwinding on arm64

 - Support Arm CoreSight trace data disassembly in 'perf script' python

 - Fix build with new libbpf version, related to supporting older
   versions of distro released libbpf packages

 - Fix event syntax error caused by ExtSel in the JSON events infra

 - Use stdio interface if slang is not supported in 'perf c2c'

 - Add 'perf test' checking for perf stat CSV output

 - Sync the msr-index.h copy with the kernel sources

* tag 'perf-tools-for-v5.19-2022-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (38 commits)
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  perf scripts python: Support Arm CoreSight trace data disassembly
  perf scripting python: Expose dso and map information
  perf jevents: Fix event syntax error caused by ExtSel
  perf tools arm64: Add support for VG register
  perf unwind arm64: Decouple Libunwind register names from Perf
  perf unwind: Use dynamic register set for DWARF unwind
  perf tools arm64: Copy perf_regs.h from the kernel
  perf unwind arm64: Use perf's copy of kernel headers
  perf c2c: Use stdio interface if slang is not supported
  perf test: Add a basic offcpu profiling test
  perf record: Add cgroup support for off-cpu profiling
  perf record: Handle argument change in sched_switch
  perf record: Implement basic filtering for off-cpu
  perf record: Enable off-cpu analysis with BPF
  perf report: Do not extend sample type of bpf-output event
  perf test: Add checking for perf stat CSV output.
  perf tools: Allow system-wide events to keep their own threads
  perf tools: Allow system-wide events to keep their own CPUs
  libperf evsel: Add comments for booleans
  ...
  • Loading branch information
torvalds committed May 29, 2022
2 parents 664a393 + 9dde6ca commit 09f73a1
Show file tree
Hide file tree
Showing 44 changed files with 1,594 additions and 197 deletions.
7 changes: 6 additions & 1 deletion tools/arch/arm64/include/uapi/asm/perf_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ enum perf_event_arm_regs {
PERF_REG_ARM64_LR,
PERF_REG_ARM64_SP,
PERF_REG_ARM64_PC,
PERF_REG_ARM64_MAX,

/* Extended/pseudo registers */
PERF_REG_ARM64_VG = 46, // SVE Vector Granule

PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
};
#endif /* _ASM_ARM64_PERF_REGS_H */
19 changes: 19 additions & 0 deletions tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@

/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)

Expand Down Expand Up @@ -154,6 +156,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19

/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
#define MSR_INTEGRITY_CAPS 0x000002d9
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)

#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
Expand Down Expand Up @@ -312,6 +319,7 @@

/* Run Time Average Power Limiting (RAPL) Interface */

#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606

#define MSR_PKG_POWER_LIMIT 0x00000610
Expand Down Expand Up @@ -502,8 +510,10 @@
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)

#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f

Expand All @@ -524,6 +534,11 @@
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)

/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302

/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9

Expand Down Expand Up @@ -688,6 +703,10 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff

/* AMD Branch Sampling configuration */
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
#define MSR_AMD_SAMP_BR_FROM 0xc0010300

#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8

Expand Down
4 changes: 4 additions & 0 deletions tools/build/Makefile.feature
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ FEATURE_TESTS_EXTRA := \
clang \
libbpf \
libbpf-btf__load_from_kernel_by_id \
libbpf-bpf_prog_load \
libbpf-bpf_object__next_program \
libbpf-bpf_object__next_map \
libbpf-bpf_create_map \
libpfm4 \
libdebuginfod \
clang-bpf-co-re
Expand Down
20 changes: 20 additions & 0 deletions tools/build/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ FILES= \
test-bpf.bin \
test-libbpf.bin \
test-libbpf-btf__load_from_kernel_by_id.bin \
test-libbpf-bpf_prog_load.bin \
test-libbpf-bpf_map_create.bin \
test-libbpf-bpf_object__next_program.bin \
test-libbpf-bpf_object__next_map.bin \
test-libbpf-btf__raw_data.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
Expand Down Expand Up @@ -291,6 +296,21 @@ $(OUTPUT)test-libbpf.bin:
$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
$(BUILD) -lbpf

$(OUTPUT)test-libbpf-bpf_prog_load.bin:
$(BUILD) -lbpf

$(OUTPUT)test-libbpf-bpf_map_create.bin:
$(BUILD) -lbpf

$(OUTPUT)test-libbpf-bpf_object__next_program.bin:
$(BUILD) -lbpf

$(OUTPUT)test-libbpf-bpf_object__next_map.bin:
$(BUILD) -lbpf

$(OUTPUT)test-libbpf-btf__raw_data.bin:
$(BUILD) -lbpf

$(OUTPUT)test-sdt.bin:
$(BUILD)

Expand Down
8 changes: 8 additions & 0 deletions tools/build/feature/test-libbpf-bpf_map_create.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/bpf.h>

int main(void)
{
return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */,
0 /* value_size */, 0 /* max_entries */, NULL /* opts */);
}
8 changes: 8 additions & 0 deletions tools/build/feature/test-libbpf-bpf_object__next_map.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>

int main(void)
{
bpf_object__next_map(NULL /* obj */, NULL /* prev */);
return 0;
}
8 changes: 8 additions & 0 deletions tools/build/feature/test-libbpf-bpf_object__next_program.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>

int main(void)
{
bpf_object__next_program(NULL /* obj */, NULL /* prev */);
return 0;
}
9 changes: 9 additions & 0 deletions tools/build/feature/test-libbpf-bpf_prog_load.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/bpf.h>

int main(void)
{
return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */,
NULL /* license */, NULL /* insns */,
0 /* insn_cnt */, NULL /* opts */);
}
5 changes: 3 additions & 2 deletions tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/libbpf.h>
#include <bpf/btf.h>

int main(void)
{
return btf__load_from_kernel_by_id(20151128, NULL);
btf__load_from_kernel_by_id(20151128);
return 0;
}
8 changes: 8 additions & 0 deletions tools/build/feature/test-libbpf-btf__raw_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <bpf/btf.h>

int main(void)
{
btf__raw_data(NULL /* btf_ro */, NULL /* size */);
return 0;
}
71 changes: 30 additions & 41 deletions tools/lib/perf/evlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
#include "internal.h"

void perf_evlist__init(struct perf_evlist *evlist)
{
Expand All @@ -39,19 +40,23 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
if (!evsel->own_cpus || evlist->has_user_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
if (!evsel->own_cpus ||
(!evsel->system_wide && evlist->has_user_cpus) ||
(!evsel->system_wide &&
!evsel->requires_cpu &&
perf_cpu_map__empty(evlist->user_requested_cpus))) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}

perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
if (!evsel->system_wide) {
perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
}

evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}

Expand Down Expand Up @@ -298,7 +303,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,

int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
Expand Down Expand Up @@ -428,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;

Expand Down Expand Up @@ -484,6 +489,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;

*nr_mmaps += 1;

if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
Expand Down Expand Up @@ -512,35 +519,13 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
return 0;
}

static int
mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int thread;
int nr_threads = perf_thread_map__nr(evlist->threads);

for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
int output_overwrite = -1;

if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
&output, &output_overwrite))
goto out_unmap;
}

return 0;

out_unmap:
perf_evlist__munmap(evlist);
return -1;
}

static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_mmaps = 0;
int cpu, thread;

for (cpu = 0; cpu < nr_cpus; cpu++) {
Expand All @@ -549,11 +534,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,

for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
thread, &output, &output_overwrite))
thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}

if (nr_mmaps != evlist->nr_mmaps)
pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);

return 0;

out_unmap:
Expand All @@ -565,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;

nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
if (perf_cpu_map__empty(evlist->user_requested_cpus))
nr_mmaps = perf_thread_map__nr(evlist->threads);
/* One for each CPU */
nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
if (perf_cpu_map__empty(evlist->all_cpus)) {
/* Plus one for each thread */
nr_mmaps += perf_thread_map__nr(evlist->threads);
/* Minus the per-thread CPU (-1) */
nr_mmaps -= 1;
}

return nr_mmaps;
}
Expand All @@ -577,7 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;

if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
Expand All @@ -596,9 +588,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;

if (perf_cpu_map__empty(cpus))
return mmap_per_thread(evlist, ops, mp);

return mmap_per_cpu(evlist, ops, mp);
}

Expand Down
11 changes: 11 additions & 0 deletions tools/lib/perf/include/internal/evsel.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,18 @@ struct perf_evsel {

/* parse modifier helper */
int nr_members;
/*
* system_wide is for events that need to be on every CPU, irrespective
* of user requested CPUs or threads. Map propagation will set cpus to
* this event's own_cpus, whereby they will contribute to evlist
* all_cpus.
*/
bool system_wide;
/*
* Some events, for example uncore events, require a CPU.
* i.e. it cannot be the 'any CPU' value of -1.
*/
bool requires_cpu;
int idx;
};

Expand Down
10 changes: 10 additions & 0 deletions tools/perf/Documentation/perf-record.txt
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,16 @@ include::intel-hybrid.txt[]
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.

--off-cpu::
Enable off-cpu profiling with BPF. The BPF program will collect
task scheduling information with (user) stacktrace and save them
as sample data of a software event named "offcpu-time". The
sample period will have the time the task slept in nanoseconds.

Note that BPF can collect stack traces using frame pointer ("fp")
only, as of now. So the applications built without the frame
pointer might see bogus addresses.

SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
Loading

0 comments on commit 09f73a1

Please sign in to comment.