Skip to content

Commit

Permalink
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  perf python scripting: Add futex-contention script
  perf python scripting: Fixup cut'n'paste error in sctop script
  perf scripting: Shut up 'perf record' final status
  perf record: Remove newline character from perror() argument
  perf python scripting: Support fedora 11 (audit 1.7.17)
  perf python scripting: Improve the syscalls-by-pid script
  perf python scripting: print the syscall name on sctop
  perf python scripting: Improve the syscalls-counts script
  perf python scripting: Improve the failed-syscalls-by-pid script
  kprobes: Remove redundant text_mutex lock in optimize
  x86/oprofile: Fix uninitialized variable use in debug printk
  tracing: Fix 'faild' -> 'failed' typo
  perf probe: Fix format specified for Dwarf_Off parameter
  perf trace: Fix detection of script extension
  perf trace: Use $PERF_EXEC_PATH in canned report scripts
  perf tools: Document event modifiers
  perf tools: Remove direct slang.h include
  perf_events: Fix for transaction recovery in group_sched_in()
  perf_events: Revert: Fix transaction recovery in group_sched_in()
  perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
  ...
  • Loading branch information
torvalds committed Oct 28, 2010
2 parents f66dd53 + e25804a commit a042e26
Show file tree
Hide file tree
Showing 48 changed files with 1,501 additions and 670 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b

/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
Expand Down
19 changes: 10 additions & 9 deletions arch/x86/include/asm/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,18 @@ union cpuid10_edx {
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)

/* IbsFetchCtl bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL

/* IbsOpCtl bits */
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */

#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
Expand Down
21 changes: 10 additions & 11 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ struct x86_pmu {
* Intel DebugStore bits
*/
int bts, pebs;
int bts_active, pebs_active;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
Expand Down Expand Up @@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}

#endif

static int reserve_ds_buffers(void);
static void reserve_ds_buffers(void);
static void release_ds_buffers(void);

static void hw_perf_event_destroy(struct perf_event *event)
Expand Down Expand Up @@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */
if (!x86_pmu.bts)
if (!x86_pmu.bts_active)
return -EOPNOTSUPP;

/* BTS is currently only allowed for user-mode. */
Expand All @@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
int precise = 0;

/* Support for constant skid */
if (x86_pmu.pebs)
if (x86_pmu.pebs_active) {
precise++;

/* Support for IP fixup */
if (x86_pmu.lbr_nr)
precise++;
/* Support for IP fixup */
if (x86_pmu.lbr_nr)
precise++;
}

if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
Expand Down Expand Up @@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else {
err = reserve_ds_buffers();
if (err)
release_pmc_hardware();
}
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&active_events);
Expand Down
216 changes: 150 additions & 66 deletions arch/x86/kernel/cpu/perf_event_intel_ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh = 1; /* always use a single PEBS record */
void *buffer;

if (!x86_pmu.pebs)
return 0;

buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!buffer))
return -ENOMEM;

max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;

ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;

ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
thresh * x86_pmu.pebs_record_size;

return 0;
}

static void release_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

if (!ds || !x86_pmu.pebs)
return;

kfree((void *)(unsigned long)ds->pebs_buffer_base);
ds->pebs_buffer_base = 0;
}

static int alloc_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh;
void *buffer;

if (!x86_pmu.bts)
return 0;

buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!buffer))
return -ENOMEM;

max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;

ds->bts_buffer_base = (u64)(unsigned long)buffer;
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;

return 0;
}

static void release_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

if (!ds || !x86_pmu.bts)
return;

kfree((void *)(unsigned long)ds->bts_buffer_base);
ds->bts_buffer_base = 0;
}

static int alloc_ds_buffer(int cpu)
{
int node = cpu_to_node(cpu);
struct debug_store *ds;

ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!ds))
return -ENOMEM;

per_cpu(cpu_hw_events, cpu).ds = ds;

return 0;
}

static void release_ds_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

if (!ds)
return;

per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree(ds);
}

static void release_ds_buffers(void)
{
int cpu;
Expand All @@ -82,93 +183,77 @@ static void release_ds_buffers(void)
return;

get_online_cpus();

for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);

for_each_possible_cpu(cpu) {
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

if (!ds)
continue;

per_cpu(cpu_hw_events, cpu).ds = NULL;

kfree((void *)(unsigned long)ds->pebs_buffer_base);
kfree((void *)(unsigned long)ds->bts_buffer_base);
kfree(ds);
release_pebs_buffer(cpu);
release_bts_buffer(cpu);
release_ds_buffer(cpu);
}

put_online_cpus();
}

static int reserve_ds_buffers(void)
static void reserve_ds_buffers(void)
{
int cpu, err = 0;
int bts_err = 0, pebs_err = 0;
int cpu;

x86_pmu.bts_active = 0;
x86_pmu.pebs_active = 0;

if (!x86_pmu.bts && !x86_pmu.pebs)
return 0;
return;

if (!x86_pmu.bts)
bts_err = 1;

if (!x86_pmu.pebs)
pebs_err = 1;

get_online_cpus();

for_each_possible_cpu(cpu) {
struct debug_store *ds;
void *buffer;
int max, thresh;
if (alloc_ds_buffer(cpu)) {
bts_err = 1;
pebs_err = 1;
}

if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;

err = -ENOMEM;
ds = kzalloc(sizeof(*ds), GFP_KERNEL);
if (unlikely(!ds))
if (!pebs_err && alloc_pebs_buffer(cpu))
pebs_err = 1;

if (bts_err && pebs_err)
break;
per_cpu(cpu_hw_events, cpu).ds = ds;

if (x86_pmu.bts) {
buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer))
break;

max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;

ds->bts_buffer_base = (u64)(unsigned long)buffer;
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;
}
}

if (x86_pmu.pebs) {
buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer))
break;

max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;

ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
/*
* Always use single record PEBS
*/
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
x86_pmu.pebs_record_size;
}
if (bts_err) {
for_each_possible_cpu(cpu)
release_bts_buffer(cpu);
}

err = 0;
if (pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
}

if (err)
release_ds_buffers();
else {
if (bts_err && pebs_err) {
for_each_possible_cpu(cpu)
release_ds_buffer(cpu);
} else {
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;

if (x86_pmu.pebs && !pebs_err)
x86_pmu.pebs_active = 1;

for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu);
}

put_online_cpus();

return err;
}

/*
Expand Down Expand Up @@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
if (!event)
return 0;

if (!ds)
if (!x86_pmu.bts_active)
return 0;

at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
Expand Down Expand Up @@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
struct pebs_record_core *at, *top;
int n;

if (!ds || !x86_pmu.pebs)
if (!x86_pmu.pebs_active)
return;

at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
Expand Down Expand Up @@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 status = 0;
int bit, n;

if (!ds || !x86_pmu.pebs)
if (!x86_pmu.pebs_active)
return;

at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
Expand Down Expand Up @@ -630,9 +715,8 @@ static void intel_ds_init(void)

#else /* CONFIG_CPU_SUP_INTEL */

static int reserve_ds_buffers(void)
static void reserve_ds_buffers(void)
{
return 0;
}

static void release_ds_buffers(void)
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/oprofile/nmi_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
case 0x11:
cpu_type = "x86-64/family11h";
break;
case 0x12:
cpu_type = "x86-64/family12h";
break;
case 0x14:
cpu_type = "x86-64/family14h";
break;
default:
return -ENODEV;
}
Expand Down
Loading

0 comments on commit a042e26

Please sign in to comment.