Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "A pile of perf related fixes:

  Kernel:
   - Fix SLOTS PEBS event constraints for Icelake CPUs

   - Add the missing mask bit to allow counting hardware generated
     prefetches on L3 for Icelake CPUs

   - Make the test for hypervisor platforms more accurate (as far as
     possible)

   - Handle PMUs correctly which override event->cpu

   - Yet another missing fallthrough annotation

  Tools:
     perf.data:
        - Fix loading of compressed data split across adjacent records
        - Fix buffer size setting for processing CPU topology perf.data
          header.

     perf stat:
        - Fix segfault for event group in repeat mode
        - Always separate "stalled cycles per insn" line, it was being
          appended to the "instructions" line.

     perf script:
        - Fix --max-blocks man page description.
        - Improve man page description of metrics.
        - Fix off by one in brstackinsn IPC computation.

     perf probe:
        - Avoid calling freeing routine multiple times for same pointer.

     perf build:
        - Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings
          treated as errors, breaking the build"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Mark expected switch fall-throughs
  perf/core: Fix creating kernel counters for PMUs that override event->cpu
  perf/x86: Apply more accurate check on hypervisor platform
  perf/x86/intel: Fix invalid Bit 13 for Icelake MSR_OFFCORE_RSP_x register
  perf/x86/intel: Fix SLOTS PEBS event constraint
  perf build: Do not use -Wshadow on gcc < 4.8
  perf probe: Avoid calling freeing routine multiple times for same pointer
  perf probe: Set pev->nargs to zero after freeing pev->args entries
  perf session: Fix loading of compressed data split across adjacent records
  perf stat: Always separate stalled cycles per insn
  perf stat: Fix segfault for event group in repeat mode
  perf tools: Fix proper buffer size for feature processing
  perf script: Fix off by one in brstackinsn IPC computation
  perf script: Improve man page description of metrics
  perf script: Fix --max-blocks man page description
  • Loading branch information
torvalds committed Jul 28, 2019
2 parents 431f288 + 289a2d2 commit 750991f
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 25 deletions.
7 changes: 3 additions & 4 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/hypervisor.h>

#include "../perf_event.h"

Expand Down Expand Up @@ -263,8 +262,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
};

static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
Expand Down Expand Up @@ -4053,7 +4052,7 @@ static bool check_msr(unsigned long msr, u64 mask)
* Disable the check for real HW, so we don't
* mess with potentionaly enabled registers:
*/
if (hypervisor_is_type(X86_HYPER_NATIVE))
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {

struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */

INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
Expand Down
2 changes: 1 addition & 1 deletion kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -11274,7 +11274,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}

perf_install_in_context(ctx, event, cpu);
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);

Expand Down
8 changes: 4 additions & 4 deletions tools/perf/Documentation/perf-script.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,11 @@ OPTIONS

With the metric option perf script can compute metrics for
sampling periods, similar to perf stat. This requires
specifying a group with multiple metrics with the :S option
specifying a group with multiple events defining metrics with the :S option
for perf record. perf will sample on the first event, and
compute metrics for all the events in the group. Please note
print computed metrics for all the events in the group. Please note
that the metric computed is averaged over the whole sampling
period, not just for the sample point.
period (since the last sample), not just for the sample point.

For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
Expand Down Expand Up @@ -384,7 +384,7 @@ include::itrace.txt[]
perf script --time 0%-10%,30%-40%

--max-blocks::
Set the maximum number of program blocks to print with brstackasm for
Set the maximum number of program blocks to print with brstackinsn for
each sample.

--reltime::
Expand Down
10 changes: 10 additions & 0 deletions tools/perf/builtin-probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,16 @@ __cmd_probe(int argc, const char **argv)

ret = perf_add_probe_events(params.events, params.nevents);
if (ret < 0) {

/*
* When perf_add_probe_events() fails it calls
* cleanup_perf_probe_events(pevs, npevs), i.e.
* cleanup_perf_probe_events(params.events, params.nevents), which
* will call clear_perf_probe_event(), so set nevents to zero
* to avoid cleanup_params() to call clear_perf_probe_event() again
* on the same pevs.
*/
params.nevents = 0;
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-script.c
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,

printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
Expand Down
9 changes: 8 additions & 1 deletion tools/perf/builtin-stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
* group leaders.
*/
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
perf_evlist__close(evsel_list);

/*
* We need to keep evsel_list alive, because it's processed
* later the evsel_list will be closed after.
*/
if (!STAT_RECORD)
perf_evlist__close(evsel_list);

return WEXITSTATUS(status);
}
Expand Down Expand Up @@ -1997,6 +2003,7 @@ int cmd_stat(int argc, const char **argv)
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
}

perf_evlist__close(evsel_list);
perf_session__delete(perf_stat.session);
}

Expand Down
2 changes: 2 additions & 0 deletions tools/perf/util/evsel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,7 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
evsel->ids = 0;
}

static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
Expand Down Expand Up @@ -2077,6 +2078,7 @@ void perf_evsel__close(struct perf_evsel *evsel)

perf_evsel__close_fd(evsel);
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
}

int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/util/header.c
Original file line number Diff line number Diff line change
Expand Up @@ -3747,7 +3747,7 @@ int perf_event__process_feature(struct perf_session *session,
return 0;

ff.buf = (void *)fe->data;
ff.size = event->header.size - sizeof(event->header);
ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;

if (feat_ops[feat].process(&ff, NULL))
Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/probe-event.c
Original file line number Diff line number Diff line change
Expand Up @@ -2230,6 +2230,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
field = next;
}
}
pev->nargs = 0;
zfree(&pev->args);
}

Expand Down
22 changes: 14 additions & 8 deletions tools/perf/util/session.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,27 @@ static int perf_session__process_compressed_event(struct perf_session *session,
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
size_t decomp_len = session->header.env.comp_mmap_len;
size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
struct decomp *decomp, *decomp_last = session->decomp_last;

decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
if (decomp_last) {
decomp_last_rem = decomp_last->size - decomp_last->head;
decomp_len += decomp_last_rem;
}

mmap_len = sizeof(struct decomp) + decomp_len;
decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (decomp == MAP_FAILED) {
pr_err("Couldn't allocate memory for decompression\n");
return -1;
}

decomp->file_pos = file_offset;
decomp->mmap_len = mmap_len;
decomp->head = 0;

if (decomp_last) {
decomp_last_rem = decomp_last->size - decomp_last->head;
if (decomp_last_rem) {
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
decomp->size = decomp_last_rem;
}
Expand All @@ -61,7 +67,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
if (!decomp_size) {
munmap(decomp, sizeof(struct decomp) + decomp_len);
munmap(decomp, mmap_len);
pr_err("Couldn't decompress data\n");
return -1;
}
Expand Down Expand Up @@ -255,15 +261,15 @@ static void perf_session__delete_threads(struct perf_session *session)
static void perf_session__release_decomp_events(struct perf_session *session)
{
struct decomp *next, *decomp;
size_t decomp_len;
size_t mmap_len;
next = session->decomp;
decomp_len = session->header.env.comp_mmap_len;
do {
decomp = next;
if (decomp == NULL)
break;
next = decomp->next;
munmap(decomp, decomp_len + sizeof(struct decomp));
mmap_len = decomp->mmap_len;
munmap(decomp, mmap_len);
} while (1);
}

Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/session.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct perf_session {
struct decomp {
struct decomp *next;
u64 file_pos;
size_t mmap_len;
u64 head;
size_t size;
char data[];
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/stat-shadow.c
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"stalled cycles per insn",
ratio);
} else if (have_frontend_stalled) {
print_metric(config, ctxp, NULL, NULL,
out->new_line(config, ctxp);
print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/util/zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
pr_err("failed to decompress (B): %ld -> %ld : %s\n",
src_size, output.size, ZSTD_getErrorName(ret));
pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
output.dst = dst + output.pos;
Expand Down
9 changes: 8 additions & 1 deletion tools/scripts/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
EXTRA_WARNINGS += -Wold-style-definition
EXTRA_WARNINGS += -Wpacked
EXTRA_WARNINGS += -Wredundant-decls
EXTRA_WARNINGS += -Wshadow
EXTRA_WARNINGS += -Wstrict-prototypes
EXTRA_WARNINGS += -Wswitch-default
EXTRA_WARNINGS += -Wswitch-enum
Expand Down Expand Up @@ -69,8 +68,16 @@ endif
# will do for now and keep the above -Wstrict-aliasing=3 in place
# in newer systems.
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
#
# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
# that takes into account Linus's comments (search for Wshadow) for the reasoning about
# -Wshadow not being interesting before gcc 4.8.

ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
EXTRA_WARNINGS += -fno-strict-aliasing
EXTRA_WARNINGS += -Wno-shadow
else
EXTRA_WARNINGS += -Wshadow
endif

ifneq ($(findstring $(MAKEFLAGS), w),w)
Expand Down

0 comments on commit 750991f

Please sign in to comment.