Skip to content

Commit 3871d93

Browse files
committed
Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: "PMU driver updates: - Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature support for Zen 4 processors. - Extend the perf ABI to provide branch speculation information, if available, and use this on CPUs that have it (eg. LbrExtV2). - Improve Intel PEBS TSC timestamp handling & integration. - Add Intel Raptor Lake S CPU support. - Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs by utilizing IBS tagged load/store samples. - Clean up & optimize various x86 PMU details. HW breakpoints: - Big rework to optimize the code for systems with hundreds of CPUs and thousands of breakpoints: - Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem per-CPU rwsem that is read-locked during most of the key operations. - Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and fetch_bp_busy_slots(). - Apply micro-optimizations & cleanups. - Misc cleanups & enhancements" * tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits) perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex perf: Fix pmu_filter_match() perf: Fix lockdep_assert_event_ctx() perf/x86/amd/lbr: Adjust LBR regardless of filtering perf/x86/utils: Fix uninitialized var in get_branch_type() perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR perf/x86/amd: Support PERF_SAMPLE_ADDR perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT} perf/x86/amd: Support PERF_SAMPLE_DATA_SRC perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} perf/x86/uncore: Add new Raptor Lake S support perf/x86/cstate: Add new Raptor Lake S support perf/x86/msr: Add new Raptor Lake S support perf/x86: Add new Raptor Lake S support bpf: Check flags for branch stack in bpf_read_branch_records helper perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails perf: Use sample_flags for raw_data perf: Use sample_flags for addr ...
2 parents 30c9999 + 82aad7f commit 3871d93

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2613
-754
lines changed

arch/powerpc/kernel/hw_breakpoint.c

+40-13
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/kernel.h>
1616
#include <linux/sched.h>
1717
#include <linux/smp.h>
18+
#include <linux/spinlock.h>
1819
#include <linux/debugfs.h>
1920
#include <linux/init.h>
2021

@@ -129,7 +130,14 @@ struct breakpoint {
129130
bool ptrace_bp;
130131
};
131132

133+
/*
134+
* While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
135+
* rely on it safely synchronizing internals here; however, we can rely on it
136+
* not requesting more breakpoints than available.
137+
*/
138+
static DEFINE_SPINLOCK(cpu_bps_lock);
132139
static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
140+
static DEFINE_SPINLOCK(task_bps_lock);
133141
static LIST_HEAD(task_bps);
134142

135143
static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
@@ -174,14 +182,17 @@ static int task_bps_add(struct perf_event *bp)
174182
if (IS_ERR(tmp))
175183
return PTR_ERR(tmp);
176184

185+
spin_lock(&task_bps_lock);
177186
list_add(&tmp->list, &task_bps);
187+
spin_unlock(&task_bps_lock);
178188
return 0;
179189
}
180190

181191
static void task_bps_remove(struct perf_event *bp)
182192
{
183193
struct list_head *pos, *q;
184194

195+
spin_lock(&task_bps_lock);
185196
list_for_each_safe(pos, q, &task_bps) {
186197
struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
187198

@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
191202
break;
192203
}
193204
}
205+
spin_unlock(&task_bps_lock);
194206
}
195207

196208
/*
@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
200212
static bool all_task_bps_check(struct perf_event *bp)
201213
{
202214
struct breakpoint *tmp;
215+
bool ret = false;
203216

217+
spin_lock(&task_bps_lock);
204218
list_for_each_entry(tmp, &task_bps, list) {
205-
if (!can_co_exist(tmp, bp))
206-
return true;
219+
if (!can_co_exist(tmp, bp)) {
220+
ret = true;
221+
break;
222+
}
207223
}
208-
return false;
224+
spin_unlock(&task_bps_lock);
225+
return ret;
209226
}
210227

211228
/*
@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
215232
static bool same_task_bps_check(struct perf_event *bp)
216233
{
217234
struct breakpoint *tmp;
235+
bool ret = false;
218236

237+
spin_lock(&task_bps_lock);
219238
list_for_each_entry(tmp, &task_bps, list) {
220239
if (tmp->bp->hw.target == bp->hw.target &&
221-
!can_co_exist(tmp, bp))
222-
return true;
240+
!can_co_exist(tmp, bp)) {
241+
ret = true;
242+
break;
243+
}
223244
}
224-
return false;
245+
spin_unlock(&task_bps_lock);
246+
return ret;
225247
}
226248

227249
static int cpu_bps_add(struct perf_event *bp)
@@ -234,13 +256,15 @@ static int cpu_bps_add(struct perf_event *bp)
234256
if (IS_ERR(tmp))
235257
return PTR_ERR(tmp);
236258

259+
spin_lock(&cpu_bps_lock);
237260
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
238261
for (i = 0; i < nr_wp_slots(); i++) {
239262
if (!cpu_bp[i]) {
240263
cpu_bp[i] = tmp;
241264
break;
242265
}
243266
}
267+
spin_unlock(&cpu_bps_lock);
244268
return 0;
245269
}
246270

@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
249273
struct breakpoint **cpu_bp;
250274
int i = 0;
251275

276+
spin_lock(&cpu_bps_lock);
252277
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
253278
for (i = 0; i < nr_wp_slots(); i++) {
254279
if (!cpu_bp[i])
@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
260285
break;
261286
}
262287
}
288+
spin_unlock(&cpu_bps_lock);
263289
}
264290

265291
static bool cpu_bps_check(int cpu, struct perf_event *bp)
266292
{
267293
struct breakpoint **cpu_bp;
294+
bool ret = false;
268295
int i;
269296

297+
spin_lock(&cpu_bps_lock);
270298
cpu_bp = per_cpu_ptr(cpu_bps, cpu);
271299
for (i = 0; i < nr_wp_slots(); i++) {
272-
if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
273-
return true;
300+
if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
301+
ret = true;
302+
break;
303+
}
274304
}
275-
return false;
305+
spin_unlock(&cpu_bps_lock);
306+
return ret;
276307
}
277308

278309
static bool all_cpu_bps_check(struct perf_event *bp)
@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
286317
return false;
287318
}
288319

289-
/*
290-
* We don't use any locks to serialize accesses to cpu_bps or task_bps
291-
* because are already inside nr_bp_mutex.
292-
*/
293320
int arch_reserve_bp_slot(struct perf_event *bp)
294321
{
295322
int ret;

arch/powerpc/perf/core-book3s.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -2314,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
23142314
cpuhw = this_cpu_ptr(&cpu_hw_events);
23152315
power_pmu_bhrb_read(event, cpuhw);
23162316
data.br_stack = &cpuhw->bhrb_stack;
2317+
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
23172318
}
23182319

23192320
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
2320-
ppmu->get_mem_data_src)
2321+
ppmu->get_mem_data_src) {
23212322
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
2323+
data.sample_flags |= PERF_SAMPLE_DATA_SRC;
2324+
}
23222325

23232326
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
2324-
ppmu->get_mem_weight)
2327+
ppmu->get_mem_weight) {
23252328
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
2326-
2329+
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2330+
}
23272331
if (perf_event_overflow(event, &data, regs))
23282332
power_pmu_stop(event, 0);
23292333
} else if (period) {

arch/s390/kernel/perf_cpum_cf.c

+1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
664664
raw.frag.data = cpuhw->stop;
665665
raw.size = raw.frag.size;
666666
data.raw = &raw;
667+
data.sample_flags |= PERF_SAMPLE_RAW;
667668
}
668669

669670
overflow = perf_event_overflow(event, &data, &regs);

arch/s390/kernel/perf_pai_crypto.c

+1
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
366366
raw.frag.data = cpump->save;
367367
raw.size = raw.frag.size;
368368
data.raw = &raw;
369+
data.sample_flags |= PERF_SAMPLE_RAW;
369370
}
370371

371372
overflow = perf_event_overflow(event, &data, &regs);

arch/sh/include/asm/hw_breakpoint.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,7 @@ struct pmu;
4848
/* Maximum number of UBC channels */
4949
#define HBP_NUM 2
5050

51-
static inline int hw_breakpoint_slots(int type)
52-
{
53-
return HBP_NUM;
54-
}
51+
#define hw_breakpoint_slots(type) (HBP_NUM)
5552

5653
/* arch/sh/kernel/hw_breakpoint.c */
5754
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);

arch/x86/events/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0-only
2-
obj-y += core.o probe.o
2+
obj-y += core.o probe.o utils.o
33
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
44
obj-y += amd/
55
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o

arch/x86/events/amd/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0
2-
obj-$(CONFIG_CPU_SUP_AMD) += core.o
2+
obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o
33
obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o
44
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
55
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o

arch/x86/events/amd/brs.c

+68-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void)
8181
* a br_sel_map. Software filtering is not supported because it would not correlate well
8282
* with a sampling period.
8383
*/
84-
int amd_brs_setup_filter(struct perf_event *event)
84+
static int amd_brs_setup_filter(struct perf_event *event)
8585
{
8686
u64 type = event->attr.branch_sample_type;
8787

@@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event)
9696
return 0;
9797
}
9898

99+
static inline int amd_is_brs_event(struct perf_event *e)
100+
{
101+
return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
102+
}
103+
104+
int amd_brs_hw_config(struct perf_event *event)
105+
{
106+
int ret = 0;
107+
108+
/*
109+
* Due to interrupt holding, BRS is not recommended in
110+
* counting mode.
111+
*/
112+
if (!is_sampling_event(event))
113+
return -EINVAL;
114+
115+
/*
116+
* Due to the way BRS operates by holding the interrupt until
117+
* lbr_nr entries have been captured, it does not make sense
118+
* to allow sampling on BRS with an event that does not match
119+
* what BRS is capturing, i.e., retired taken branches.
120+
* Otherwise the correlation with the event's period is even
121+
* more loose:
122+
*
123+
* With retired taken branch:
124+
* Effective P = P + 16 + X
125+
* With any other event:
126+
* Effective P = P + Y + X
127+
*
128+
* Where X is the number of taken branches due to interrupt
129+
* skid. Skid is large.
130+
*
131+
* Where Y is the occurences of the event while BRS is
132+
* capturing the lbr_nr entries.
133+
*
134+
* By using retired taken branches, we limit the impact on the
135+
* Y variable. We know it cannot be more than the depth of
136+
* BRS.
137+
*/
138+
if (!amd_is_brs_event(event))
139+
return -EINVAL;
140+
141+
/*
142+
* BRS implementation does not work with frequency mode
143+
* reprogramming of the period.
144+
*/
145+
if (event->attr.freq)
146+
return -EINVAL;
147+
/*
148+
* The kernel subtracts BRS depth from period, so it must
149+
* be big enough.
150+
*/
151+
if (event->attr.sample_period <= x86_pmu.lbr_nr)
152+
return -EINVAL;
153+
154+
/*
155+
* Check if we can allow PERF_SAMPLE_BRANCH_STACK
156+
*/
157+
ret = amd_brs_setup_filter(event);
158+
159+
/* only set in case of success */
160+
if (!ret)
161+
event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
162+
163+
return ret;
164+
}
165+
99166
/* tos = top of stack, i.e., last valid entry written */
100167
static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
101168
{

0 commit comments

Comments
 (0)