Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, plus on the kernel side:

   - a revert for a newly introduced PMU driver which isn't complete yet
     and where we ran out of time with fixes (to be tried again in
     v3.19) - this makes up for a large chunk of the diffstat.

   - compilation warning fixes

   - a printk message fix

   - event_idx usage fixes/cleanups"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf probe: Trivial typo fix for --demangle
  perf tools: Fix report -F dso_from for data without branch info
  perf tools: Fix report -F dso_to for data without branch info
  perf tools: Fix report -F symbol_from for data without branch info
  perf tools: Fix report -F symbol_to for data without branch info
  perf tools: Fix report -F mispredict for data without branch info
  perf tools: Fix report -F in_tx for data without branch info
  perf tools: Fix report -F abort for data without branch info
  perf tools: Make CPUINFO_PROC an array to support different kernel versions
  perf callchain: Use global caching provided by libunwind
  perf/x86/intel: Revert incomplete and undocumented Broadwell client support
  perf/x86: Fix compile warnings for intel_uncore
  perf: Fix typos in sample code in the perf_event.h header
  perf: Fix and clean up initialization of pmu::event_idx
  perf: Fix bogus kernel printk
  perf diff: Add missing hists__init() call at tool start
  • Loading branch information
torvalds committed Oct 31, 2014
2 parents c958f92 + d785452 commit 5656b40
Show file tree
Hide file tree
Showing 19 changed files with 158 additions and 279 deletions.
6 changes: 0 additions & 6 deletions arch/powerpc/perf/hv-24x7.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,6 @@ static int h_24x7_event_add(struct perf_event *event, int flags)
return 0;
}

static int h_24x7_event_idx(struct perf_event *event)
{
return 0;
}

static struct pmu h_24x7_pmu = {
.task_ctx_nr = perf_invalid_context,

Expand All @@ -433,7 +428,6 @@ static struct pmu h_24x7_pmu = {
.start = h_24x7_event_start,
.stop = h_24x7_event_stop,
.read = h_24x7_event_update,
.event_idx = h_24x7_event_idx,
};

static int hv_24x7_init(void)
Expand Down
6 changes: 0 additions & 6 deletions arch/powerpc/perf/hv-gpci.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,6 @@ static int h_gpci_event_init(struct perf_event *event)
return 0;
}

static int h_gpci_event_idx(struct perf_event *event)
{
return 0;
}

static struct pmu h_gpci_pmu = {
.task_ctx_nr = perf_invalid_context,

Expand All @@ -262,7 +257,6 @@ static struct pmu h_gpci_pmu = {
.start = h_gpci_event_start,
.stop = h_gpci_event_stop,
.read = h_gpci_event_update,
.event_idx = h_gpci_event_idx,
};

static int hv_gpci_init(void)
Expand Down
6 changes: 0 additions & 6 deletions arch/s390/kernel/perf_cpum_sf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1411,11 +1411,6 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
perf_pmu_enable(event->pmu);
}

static int cpumsf_pmu_event_idx(struct perf_event *event)
{
return event->hw.idx;
}

CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);

Expand Down Expand Up @@ -1458,7 +1453,6 @@ static struct pmu cpumf_sampling = {
.stop = cpumsf_pmu_stop,
.read = cpumsf_pmu_read,

.event_idx = cpumsf_pmu_event_idx,
.attr_groups = cpumsf_pmu_attr_groups,
};

Expand Down
4 changes: 4 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ config INSTRUCTION_DECODER
def_bool y
depends on KPROBES || PERF_EVENTS || UPROBES

config PERF_EVENTS_INTEL_UNCORE
def_bool y
depends on PERF_EVENTS && SUP_SUP_INTEL && PCI

config OUTPUT_FORMAT
string
default "elf32-i386" if X86_32
Expand Down
7 changes: 5 additions & 2 deletions arch/x86/kernel/cpu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o

obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
perf_event_intel_uncore_snb.o \
perf_event_intel_uncore_snbep.o \
perf_event_intel_uncore_nhmex.o
endif


Expand Down
14 changes: 3 additions & 11 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ static bool check_hw_exists(void)

msr_fail:
printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR
"Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);

return false;
}
Expand Down Expand Up @@ -444,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW)
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;

if (event->attr.sample_period && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, event->attr.sample_period) >
event->attr.sample_period)
return -EINVAL;
}

return x86_setup_perfctr(event);
}

Expand Down Expand Up @@ -987,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event)
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;

if (x86_pmu.limit_period)
left = x86_pmu.limit_period(event, left);

per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;

/*
Expand Down
1 change: 0 additions & 1 deletion arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,6 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
bool late_ack;
unsigned (*limit_period)(struct perf_event *event, unsigned l);

/*
* sysfs attrs
Expand Down
173 changes: 2 additions & 171 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};

static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
EVENT_CONSTRAINT_END
};

static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
Expand Down Expand Up @@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids

};

static __initconst const u64 hsw_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
[ C(RESULT_ACCESS) ] = 0x1b7,
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
L3_MISS|ANY_SNOOP */
[ C(RESULT_MISS) ] = 0x1b7,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
[ C(RESULT_MISS) ] = 0x1b7,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
[ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};

static __initconst const u64 hsw_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
[ C(RESULT_ACCESS) ] = 0x2d5,
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
L3_MISS|ANY_SNOOP */
[ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
[ C(RESULT_MISS) ] = 0x3fbc020122ull,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};

static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
Expand Down Expand Up @@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
return c;
}

/*
* Broadwell:
* The INST_RETIRED.ALL period always needs to have lowest
* 6bits cleared (BDM57). It shall not use a period smaller
* than 100 (BDM11). We combine the two to enforce
* a min-period of 128.
*/
static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
{
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) {
if (left < 128)
left = 128;
left &= ~0x3fu;
}
return left;
}

PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
Expand Down Expand Up @@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void)
case 69: /* 22nm Haswell ULT */
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_snb();

Expand All @@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void)
pr_cont("Haswell events, ");
break;

case 61: /* 14nm Broadwell Core-M */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

intel_pmu_lbr_init_snb();

x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;

x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
pr_cont("Broadwell events, ");
break;

default:
switch (x86_pmu.version) {
case 1:
Expand Down
14 changes: 7 additions & 7 deletions include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ struct perf_event_mmap_page {
/*
* Bits needed to read the hw events in user-space.
*
* u32 seq, time_mult, time_shift, idx, width;
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
* u64 cyc, time_offset;
* s64 pmc = 0;
Expand All @@ -383,11 +383,11 @@ struct perf_event_mmap_page {
* time_shift = pc->time_shift;
* }
*
* idx = pc->index;
* index = pc->index;
* count = pc->offset;
* if (pc->cap_usr_rdpmc && idx) {
* if (pc->cap_user_rdpmc && index) {
* width = pc->pmc_width;
* pmc = rdpmc(idx - 1);
* pmc = rdpmc(index - 1);
* }
*
* barrier();
Expand Down Expand Up @@ -415,7 +415,7 @@ struct perf_event_mmap_page {
};

/*
* If cap_usr_rdpmc this field provides the bit-width of the value
* If cap_user_rdpmc this field provides the bit-width of the value
* read using the rdpmc() or equivalent instruction. This can be used
* to sign extend the result like:
*
Expand All @@ -439,10 +439,10 @@ struct perf_event_mmap_page {
*
* Where time_offset,time_mult,time_shift and cyc are read in the
* seqcount loop described above. This delta can then be added to
* enabled and possible running (if idx), improving the scaling:
* enabled and possible running (if index), improving the scaling:
*
* enabled += delta;
* if (idx)
* if (index)
* running += delta;
*
* quot = count / running;
Expand Down
Loading

0 comments on commit 5656b40

Please sign in to comment.