Skip to content

Commit

Permalink
Merge tag 'perf-tools-for-v5.18-2022-04-02' of git://git.kernel.org/p…
Browse files Browse the repository at this point in the history
…ub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Avoid SEGV if core.cpus isn't set in 'perf stat'.

 - Stop depending on .git files for building PERF-VERSION-FILE, used in
   'perf --version', fixing some perf tools build scenarios.

 - Convert tracepoint.py example to python3.

 - Update UAPI header copies from the kernel sources: socket,
   mman-common, msr-index, KVM, i915 and cpufeatures.

 - Update copy of libbpf's hashmap.c.

 - Directly return instead of using local ret variable in
   evlist__create_syswide_maps(), found by coccinelle.

* tag 'perf-tools-for-v5.18-2022-04-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf python: Convert tracepoint.py example to python3
  perf evlist: Directly return instead of using local ret variable
  perf cpumap: More cpu map reuse by merge.
  perf cpumap: Add is_subset function
  perf evlist: Rename cpus to user_requested_cpus
  perf tools: Stop depending on .git files for building PERF-VERSION-FILE
  tools headers cpufeatures: Sync with the kernel sources
  tools headers UAPI: Sync drm/i915_drm.h with the kernel sources
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools kvm headers arm64: Update KVM headers from the kernel sources
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  tools headers UAPI: Sync asm-generic/mman-common.h with the kernel
  perf beauty: Update copy of linux/socket.h with the kernel sources
  perf tools: Update copy of libbpf's hashmap.c
  perf stat: Avoid SEGV if core.cpus isn't set
  • Loading branch information
torvalds committed Apr 2, 2022
2 parents d897b68 + 7e2022a commit be2d3ec
Show file tree
Hide file tree
Showing 31 changed files with 180 additions and 88 deletions.
10 changes: 10 additions & 0 deletions tools/arch/arm64/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,16 @@ struct kvm_arm_copy_mte_tags {
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED

/* arm64-specific kvm_run::system_event flags */
/*
* Reset caused by a PSCI v1.1 SYSTEM_RESET2 call.
* Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET.
*/
#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0)

/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)

#endif

#endif /* __ARM_KVM_H__ */
1 change: 1 addition & 0 deletions tools/arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
Expand Down
22 changes: 21 additions & 1 deletion tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
#define RTIT_CTL_EVENT_EN BIT(31)
#define RTIT_CTL_NOTNT BIT_ULL(55)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
Expand Down Expand Up @@ -360,11 +362,29 @@
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d


#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1

/* Control-flow Enforcement Technology MSRs */
#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */
#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */
#define CET_SHSTK_EN BIT_ULL(0)
#define CET_WRSS_EN BIT_ULL(1)
#define CET_ENDBR_EN BIT_ULL(2)
#define CET_LEG_IW_EN BIT_ULL(3)
#define CET_NO_TRACK_EN BIT_ULL(4)
#define CET_SUPPRESS_DISABLE BIT_ULL(5)
#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
#define CET_SUPPRESS BIT_ULL(10)
#define CET_WAIT_ENDBR BIT_ULL(11)

#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */
#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */
#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */
#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */
#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */

/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
Expand Down
2 changes: 2 additions & 0 deletions tools/include/uapi/asm-generic/mman-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */

#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */

/* compatibility flags */
#define MAP_FILE 0

Expand Down
45 changes: 40 additions & 5 deletions tools/include/uapi/drm/i915_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 {
/**
* When the EXEC_OBJECT_PINNED flag is specified this is populated by
* the user with the GTT offset at which this object will be pinned.
*
* When the I915_EXEC_NO_RELOC flag is specified this must contain the
* presumed_offset of the object.
*
* During execbuffer2 the kernel populates it with the value of the
* current GTT offset of the object, for future presumed_offset writes.
*
* See struct drm_i915_gem_create_ext for the rules when dealing with
* alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with
* minimum page sizes, like DG2.
*/
__u64 offset;

Expand Down Expand Up @@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext {
*
* The (page-aligned) allocated size for the object will be returned.
*
* Note that for some devices we have might have further minimum
* page-size restrictions(larger than 4K), like for device local-memory.
* However in general the final size here should always reflect any
* rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
* extension to place the object in device local-memory.
*
* DG2 64K min page size implications:
*
* On discrete platforms, starting from DG2, we have to contend with GTT
* page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
* objects. Specifically the hardware only supports 64K or larger GTT
* page sizes for such memory. The kernel will already ensure that all
* I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
* sizes underneath.
*
* Note that the returned size here will always reflect any required
* rounding up done by the kernel, i.e 4K will now become 64K on devices
* such as DG2.
*
* Special DG2 GTT address alignment requirement:
*
* The GTT alignment will also need to be at least 2M for such objects.
*
* Note that due to how the hardware implements 64K GTT page support, we
* have some further complications:
*
* 1) The entire PDE (which covers a 2MB virtual address range), must
* contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
* PDE is forbidden by the hardware.
*
* 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
* objects.
*
* To keep things simple for userland, we mandate that any GTT mappings
* must be aligned to and rounded up to 2MB. The kernel will internally
* pad them out to the next 2MB boundary. As this only wastes virtual
* address space and avoids userland having to copy any needlessly
* complicated PDE sharing scheme (coloring) and only affects DG2, this
* is deemed to be a good compromise.
*/
__u64 size;
/**
Expand Down
11 changes: 9 additions & 2 deletions tools/include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,19 +562,25 @@ struct kvm_s390_mem_op {
__u32 op; /* type of operation */
__u64 buf; /* buffer in userspace */
union {
__u8 ar; /* the access register number */
struct {
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* should be set to 0 */
__u8 reserved[32]; /* ignored */
};
};
/* types for kvm_s390_mem_op->op */
#define KVM_S390_MEMOP_LOGICAL_READ 0
#define KVM_S390_MEMOP_LOGICAL_WRITE 1
#define KVM_S390_MEMOP_SIDA_READ 2
#define KVM_S390_MEMOP_SIDA_WRITE 3
#define KVM_S390_MEMOP_ABSOLUTE_READ 4
#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)

/* for KVM_INTERRUPT */
struct kvm_interrupt {
Expand Down Expand Up @@ -1137,6 +1143,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_AIL_MODE_3 210
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
#define KVM_CAP_PMU_CAPABILITY 212
#define KVM_CAP_DISABLE_QUIRKS2 213

#ifdef KVM_CAP_IRQ_ROUTING

Expand Down
35 changes: 25 additions & 10 deletions tools/lib/perf/cpumap.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,26 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
return map->nr > 0 ? map->map[map->nr - 1] : result;
}

/** Is 'b' a subset of 'a'. */
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b)
{
if (a == b || !b)
return true;
if (!a || b->nr > a->nr)
return false;

for (int i = 0, j = 0; i < a->nr; i++) {
if (a->map[i].cpu > b->map[j].cpu)
return false;
if (a->map[i].cpu == b->map[j].cpu) {
j++;
if (j == b->nr)
return true;
}
}
return false;
}

/*
* Merge two cpumaps
*
Expand All @@ -335,17 +355,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
int i, j, k;
struct perf_cpu_map *merged;

if (!orig && !other)
return NULL;
if (!orig) {
perf_cpu_map__get(other);
return other;
}
if (!other)
return orig;
if (orig->nr == other->nr &&
!memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
if (perf_cpu_map__is_subset(orig, other))
return orig;
if (perf_cpu_map__is_subset(other, orig)) {
perf_cpu_map__put(orig);
return perf_cpu_map__get(other);
}

tmp_len = orig->nr + other->nr;
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
Expand Down
28 changes: 14 additions & 14 deletions tools/lib/perf/evlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
*/
if (!evsel->own_cpus || evlist->has_user_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->cpus);
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
Expand Down Expand Up @@ -123,10 +123,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)

void perf_evlist__exit(struct perf_evlist *evlist)
{
perf_cpu_map__put(evlist->cpus);
perf_cpu_map__put(evlist->user_requested_cpus);
perf_cpu_map__put(evlist->all_cpus);
perf_thread_map__put(evlist->threads);
evlist->cpus = NULL;
evlist->user_requested_cpus = NULL;
evlist->all_cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
Expand Down Expand Up @@ -155,9 +155,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
* original reference count of 1. If that is not the case it is up to
* the caller to increase the reference count.
*/
if (cpus != evlist->cpus) {
perf_cpu_map__put(evlist->cpus);
evlist->cpus = perf_cpu_map__get(cpus);
if (cpus != evlist->user_requested_cpus) {
perf_cpu_map__put(evlist->user_requested_cpus);
evlist->user_requested_cpus = perf_cpu_map__get(cpus);
}

if (threads != evlist->threads) {
Expand Down Expand Up @@ -294,7 +294,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,

int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nr_cpus = perf_cpu_map__nr(evlist->cpus);
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
Expand Down Expand Up @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
{
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;

Expand Down Expand Up @@ -536,7 +536,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
int nr_cpus = perf_cpu_map__nr(evlist->cpus);
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
int cpu, thread;

for (cpu = 0; cpu < nr_cpus; cpu++) {
Expand Down Expand Up @@ -564,8 +564,8 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;

nr_mmaps = perf_cpu_map__nr(evlist->cpus);
if (perf_cpu_map__empty(evlist->cpus))
nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
if (perf_cpu_map__empty(evlist->user_requested_cpus))
nr_mmaps = perf_thread_map__nr(evlist->threads);

return nr_mmaps;
Expand All @@ -576,7 +576,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->cpus;
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
const struct perf_thread_map *threads = evlist->threads;

if (!ops || !ops->get || !ops->mmap)
Expand Down
1 change: 1 addition & 0 deletions tools/lib/perf/include/internal/cpumap.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ struct perf_cpu_map {
#endif

int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);

#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
7 changes: 6 additions & 1 deletion tools/lib/perf/include/internal/evlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ struct perf_evlist {
int nr_entries;
int nr_groups;
bool has_user_cpus;
struct perf_cpu_map *cpus;
/**
* The cpus passed from the command line or all online CPUs by
* default.
*/
struct perf_cpu_map *user_requested_cpus;
/** The union of all evsel cpu maps. */
struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
Expand Down
14 changes: 2 additions & 12 deletions tools/perf/Makefile.perf
Original file line number Diff line number Diff line change
Expand Up @@ -691,9 +691,8 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(SCRIPTS) : % : %.sh
$(QUIET_GEN)$(INSTALL) '[email protected]' '$(OUTPUT)$@'

$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD ../../.git/ORIG_HEAD
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
$(Q)touch $(OUTPUT)PERF-VERSION-FILE

# These can record PERF_VERSION
perf.spec $(SCRIPTS) \
Expand Down Expand Up @@ -1139,21 +1138,12 @@ else
@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
endif

#
# Trick: if ../../.git does not exist - we are building out of tree for example,
# then force version regeneration:
#
ifeq ($(wildcard ../../.git/HEAD),)
GIT-HEAD-PHONY = ../../.git/HEAD ../../.git/ORIG_HEAD
else
GIT-HEAD-PHONY =
endif

FORCE:

.PHONY: all install clean config-clean strip install-gtk
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope FORCE prepare
.PHONY: libtraceevent_plugins archheaders

endif # force_fixdep
Loading

0 comments on commit be2d3ec

Please sign in to comment.