Skip to content

Commit

Permalink
perf stat: Use affinity for opening events
Browse files Browse the repository at this point in the history
Restructure the event opening in perf stat to cycle through the events
by CPU after setting affinity to that CPU.

This eliminates IPI overhead in the perf API.

We have to loop through the CPU in the outter builtin-stat code instead
of leaving that to low level functions.

It has to change the weak group fallback strategy slightly.  Since we
cannot easily undo the opens for other CPUs move the weak group retry to
a separate loop.

Before with a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
   42.75    4.050910          67     60046       110 perf_event_open

After:

   26.86    0.944396          16     58069       110 perf_event_open

(the number changes slightly because the weak group retries
work differently and the test case relies on weak groups)

Committer notes:

Added one of the hunks in a patch provided by Andi after I noticed that
the "event times" 'perf test' entry was segfaulting.

Signed-off-by: Andi Kleen <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Link: http://lore.kernel.org/lkml/[email protected] # Fix
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
  • Loading branch information
Andi Kleen authored and acmel committed Nov 29, 2019
1 parent e0e6a6c commit 4804e01
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 34 deletions.
2 changes: 1 addition & 1 deletion tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ static int record__open(struct record *rec)
if ((errno == EINVAL || errno == EBADF) &&
pos->leader != pos &&
pos->weak_group) {
pos = perf_evlist__reset_weak_group(evlist, pos);
pos = perf_evlist__reset_weak_group(evlist, pos, true);
goto try_again;
}
rc = -errno;
Expand Down
121 changes: 102 additions & 19 deletions tools/perf/builtin-stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include "util/target.h"
#include "util/time-utils.h"
#include "util/top.h"
#include "util/affinity.h"
#include "asm/bug.h"

#include <linux/time64.h>
Expand Down Expand Up @@ -440,6 +441,11 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
/*
* errored is a sticky flag that means one of the counter's
* cpu event had a problem and needs to be reexamined.
*/
counter->errored = true;

if ((counter->leader != counter) ||
!(counter->leader->core.nr_members > 1))
Expand Down Expand Up @@ -484,6 +490,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct affinity affinity;
int i, cpu;
bool second_pass = false;

if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
Expand All @@ -508,30 +517,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
perf_evlist__set_leader(evsel_list);

evlist__for_each_entry(evsel_list, counter) {
if (affinity__setup(&affinity) < 0)
return -1;

evlist__for_each_cpu (evsel_list, i, cpu) {
affinity__set(&affinity, cpu);

evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (counter->reset_group || counter->errored)
continue;
try_again:
if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {

/* Weak group failed. Reset the group. */
if ((errno == EINVAL || errno == EBADF) &&
counter->leader != counter &&
counter->weak_group) {
counter = perf_evlist__reset_weak_group(evsel_list, counter);
goto try_again;
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {

/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if ((errno == EINVAL || errno == EBADF) &&
counter->leader != counter &&
counter->weak_group) {
perf_evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}

switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}

}
counter->supported = true;
}
}

switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
if (second_pass) {
/*
* Now redo all the weak group after closing them,
* and also close errored counters.
*/

evlist__for_each_cpu(evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
/* First close errored or weak retry */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip_no_inc(counter, cpu))
continue;
perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
}
/* Now reopen weak */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {

switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
}
counter->supported = true;
}
}
counter->supported = true;
}
affinity__cleanup(&affinity);

evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
perf_evsel__free_fd(&counter->core);
continue;
}

l = strlen(counter->unit);
if (l > stat_config.unit_width)
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/tests/event-times.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)

evsel->core.attr.disabled = 1;

err = perf_evsel__open_per_cpu(evsel, cpus);
err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err) {
if (err == -EACCES)
return TEST_SKIP;
Expand All @@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
return -1;
}

err = perf_evsel__open_per_cpu(evsel, cpus);
err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err == -EACCES)
return TEST_SKIP;

Expand Down
10 changes: 8 additions & 2 deletions tools/perf/util/evlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -1636,7 +1636,8 @@ void perf_evlist__force_leader(struct evlist *evlist)
}

struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
struct evsel *evsel)
struct evsel *evsel,
bool close)
{
struct evsel *c2, *leader;
bool is_open = true;
Expand All @@ -1653,10 +1654,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
if (c2 == evsel)
is_open = false;
if (c2->leader == leader) {
if (is_open)
if (is_open && close)
perf_evsel__close(&c2->core);
c2->leader = c2;
c2->core.nr_members = 0;
/*
* Set this for all former members of the group
* to indicate they get reopened.
*/
c2->reset_group = true;
}
}
return leader;
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/evlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,5 +356,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist);
void perf_evlist__force_leader(struct evlist *evlist);

struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel);
struct evsel *evsel,
bool close);
#endif /* __PERF_EVLIST_H */
22 changes: 17 additions & 5 deletions tools/perf/util/evsel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1587,8 +1587,9 @@ static int perf_event_open(struct evsel *evsel,
return fd;
}

int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
int start_cpu, int end_cpu)
{
int cpu, thread, nthreads;
unsigned long flags = PERF_FLAG_FD_CLOEXEC;
Expand Down Expand Up @@ -1665,7 +1666,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,

display_attr(&evsel->core.attr);

for (cpu = 0; cpu < cpus->nr; cpu++) {
for (cpu = start_cpu; cpu < end_cpu; cpu++) {

for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
Expand Down Expand Up @@ -1843,16 +1844,27 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
return err;
}

int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
}

void evsel__close(struct evsel *evsel)
{
perf_evsel__close(&evsel->core);
perf_evsel__free_id(&evsel->core);
}

int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus)
struct perf_cpu_map *cpus,
int cpu)
{
return evsel__open(evsel, cpus, NULL);
if (cpu == -1)
return evsel__open_cpu(evsel, cpus, NULL, 0,
cpus ? cpus->nr : 1);

return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
}

int perf_evsel__open_per_thread(struct evsel *evsel,
Expand Down
5 changes: 4 additions & 1 deletion tools/perf/util/evsel.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ struct evsel {
struct evsel *metric_leader;
bool collect_stat;
bool weak_group;
bool reset_group;
bool errored;
bool percore;
int cpu_iter;
const char *pmu_name;
Expand Down Expand Up @@ -223,7 +225,8 @@ int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);

int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus);
struct perf_cpu_map *cpus,
int cpu);
int perf_evsel__open_per_thread(struct evsel *evsel,
struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
Expand Down
5 changes: 3 additions & 2 deletions tools/perf/util/stat.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)

int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target)
struct target *target,
int cpu)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel->leader;
Expand Down Expand Up @@ -518,7 +519,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}

if (target__has_cpu(target) && !target__has_per_thread(target))
return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);

return perf_evsel__open_per_thread(evsel, evsel->core.threads);
}
3 changes: 2 additions & 1 deletion tools/perf/util/stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);

int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target);
struct target *target,
int cpu);
void
perf_evlist__print_counters(struct evlist *evlist,
struct perf_stat_config *config,
Expand Down

0 comments on commit 4804e01

Please sign in to comment.