Skip to content

Commit

Permalink
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side changes:

   - Intel Knights Landing support.  (Harish Chegondi)

   - Intel Broadwell-EP uncore PMU support.  (Kan Liang)

   - Core code improvements.  (Peter Zijlstra.)

   - Event filter, LBR and PEBS fixes.  (Stephane Eranian)

   - Enable cycles:pp on Intel Atom.  (Stephane Eranian)

   - Add cycles:ppp support for Skylake.  (Andi Kleen)

   - Various x86 NMI overhead optimizations.  (Andi Kleen)

   - Intel PT enhancements.  (Takao Indoh)

   - AMD cache events fix.  (Vince Weaver)

  Tons of tooling changes:

   - Show random perf tool tips in the 'perf report' bottom line
     (Namhyung Kim)

   - perf report now defaults to --group if the perf.data file has
     grouped events, try it with:

      # perf record -e '{cycles,instructions}' -a sleep 1
      [ perf record: Woken up 1 times to write data ]
      [ perf record: Captured and wrote 1.093 MB perf.data (1247 samples) ]
      # perf report
      # Samples: 1K of event 'anon group { cycles, instructions }'
      # Event count (approx.): 1955219195
      #
      #       Overhead  Command     Shared Object      Symbol

         2.86%   0.22%  swapper     [kernel.kallsyms]  [k] intel_idle
         1.05%   0.33%  firefox     libxul.so          [.] js::SetObjectElement
         1.05%   0.00%  kworker/0:3 [kernel.kallsyms]  [k] gen6_ring_get_seqno
         0.88%   0.17%  chrome      chrome             [.] 0x0000000000ee27ab
         0.65%   0.86%  firefox     libxul.so          [.] js::ValueToId<(js::AllowGC)1>
         0.64%   0.23%  JS Helper   libxul.so          [.] js::SplayTree<js::jit::LiveRange*, js::jit::LiveRange>::splay
         0.62%   1.27%  firefox     libxul.so          [.] js::GetIterator
         0.61%   1.74%  firefox     libxul.so          [.] js::NativeSetProperty
         0.61%   0.31%  firefox     libxul.so          [.] js::SetPropertyByDefining

   - Introduce the 'perf stat record/report' workflow:

     Generate perf.data files from 'perf stat', to tap into the
     scripting capabilities perf has instead of defining a 'perf stat'
     specific scripting support to calculate event ratios, etc.

     Simple example:

        $ perf stat record -e cycles usleep 1

         Performance counter stats for 'usleep 1':

               1,134,996      cycles

             0.000670644 seconds time elapsed

        $ perf stat report

         Performance counter stats for '/home/acme/bin/perf stat record -e cycles usleep 1':

               1,134,996      cycles

             0.000670644 seconds time elapsed

        $

     It generates PERF_RECORD_ userspace records to store the details:

        $ perf report -D | grep PERF_RECORD
        0xf0 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27637
        0x118 [0x12]: PERF_RECORD_CPU_MAP nr: 1 cpu: 65535
        0x12a [0x40]: PERF_RECORD_STAT_CONFIG
        0x16a [0x30]: PERF_RECORD_STAT
        -1 -1 0x19a [0x40]: PERF_RECORD_MMAP -1/0: [0xffffffff81000000(0x1f000000) @ 0xffffffff81000000]: x [kernel.kallsyms]_text
        0x1da [0x18]: PERF_RECORD_STAT_ROUND
        [acme@ssdandy linux]$

     An effort was made to make perf.data files generated like this to
     not generate cryptic messages when processed by older tools.

     The 'perf script' bits need rebasing, will go up later.

   - Make command line options always available, even when they depend
     on some feature being enabled, warning the user about use of such
     options (Wang Nan)

   - Support hw breakpoint events (mem:0xAddress) in the default output
     mode in 'perf script' (Wang Nan)

   - Fixes and improvements for supporting annotating ARM binaries,
     support ARM call and jump instructions, more work needed to have
     arch specific stuff separated into tools/perf/arch/*/annotate/
     (Russell King)

   - Add initial 'perf config' command, for now just with a --list
     command to the contents of the configuration file in use and a
     basic man page describing its format, commands for doing edits and
     detailed documentation are being reviewed and proof-read.  (Taeung
     Song)

   - Allows BPF scriptlets specify arguments to be fetched using DWARF
     info, using a prologue generated at compile/build time (He Kuang,
     Wang Nan)

   - Allow attaching BPF scriptlets to module symbols (Wang Nan)

   - Allow attaching BPF scriptlets to userspace code using uprobe (Wang
     Nan)

   - BPF programs now can specify 'perf probe' tunables via its section
     name, separating key=val values using semicolons (Wang Nan)

     Testing some of these new BPF features:

        Use case: get callchains when receiving SSL packets, filter then in the
                  kernel, at arbitrary place.

        # cat ssl.bpf.c
        #define SEC(NAME) __attribute__((section(NAME), used))

        struct pt_regs;

        SEC("func=__inet_lookup_established hnum")
        int func(struct pt_regs *ctx, int err, unsigned short port)
        {
                return err == 0 && port == 443;
        }

        char _license[] SEC("license") = "GPL";
        int  _version   SEC("version") = LINUX_VERSION_CODE;
        #
        # perf record -a -g -e ssl.bpf.c
        ^C[ perf record: Woken up 1 times to write data ]
        [ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ]
        # perf script | head -30
        swapper     0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
           8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
           896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
           8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
           855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
           8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux)
           856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux)
           2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux)
           2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux)
           96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux)
           969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux)
           2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux)
           95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux)
          1163ffa start_kernel ([kernel.vmlinux].init.text)
          11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text)
          1163623 x86_64_start_kernel ([kernel.vmlinux].init.text)

        qemu-system-x86  9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
           8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
           896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
           8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
           855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
           856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux)
           8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux)
             430a br_handle_frame_finish ([bridge])
             48bc br_handle_frame ([bridge])
           855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
           8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
        #

   - Use 'perf probe' various options to list functions, see what
     variables can be collected at any given point, experiment first
     collecting without a filter, then filter, use it together with
     'perf trace', 'perf top', with or without callchains, if it
     explodes, please tell us!

   - Introduce a new callchain mode: "folded", that will list per line
     representations of all callchains for a give histogram entry,
     facilitating 'perf report' output processing by other tools, such
     as Brendan Gregg's flamegraph tools (Namhyung Kim)

     E.g:

        # perf report | grep -v ^# | head
           18.37%     0.00%  swapper  [kernel.kallsyms]   [k] cpu_startup_entry
                           |
                           ---cpu_startup_entry
                              |
                              |--12.07%--start_secondary
                              |
                               --6.30%--rest_init
                                         start_kernel
                                         x86_64_start_reservations
                                         x86_64_start_kernel
         #

     Becomes, in "folded" mode:

        # perf report -g folded | grep -v ^# | head -5
            18.37%     0.00%  swapper [kernel.kallsyms]   [k] cpu_startup_entry
          12.07% cpu_startup_entry;start_secondary
           6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            16.90%     0.00%  swapper [kernel.kallsyms]   [k] call_cpuidle
          11.23% call_cpuidle;cpu_startup_entry;start_secondary
           5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            16.90%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter
          11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary
           5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
            15.12%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter_state
         #

     The user can also select one of "count", "period" or "percent" as
     the first column.

  ... and lots of infrastructure enhancements, plus fixes and other
  changes, features I failed to list - see the shortlog and the git log
  for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (271 commits)
  perf evlist: Add --trace-fields option to show trace fields
  perf record: Store data mmaps for dwarf unwind
  perf libdw: Check for mmaps also in MAP__VARIABLE tree
  perf unwind: Check for mmaps also in MAP__VARIABLE tree
  perf unwind: Use find_map function in access_dso_mem
  perf evlist: Remove perf_evlist__(enable|disable)_event functions
  perf evlist: Make perf_evlist__open() open evsels with their cpus and threads (like perf record does)
  perf report: Show random usage tip on the help line
  perf hists: Export a couple of hist functions
  perf diff: Use perf_hpp__register_sort_field interface
  perf tools: Add overhead/overhead_children keys defaults via string
  perf tools: Remove list entry from struct sort_entry
  perf tools: Include all tools/lib directory for tags/cscope/TAGS targets
  perf script: Align event name properly
  perf tools: Add missing headers in perf's MANIFEST
  perf tools: Do not show trace command if it's not compiled in
  perf report: Change default to use event group view
  perf top: Decay periods in callchains
  tools lib: Move bitmap.[ch] from tools/perf/ to tools/{lib,include}/
  tools lib: Sync tools/lib/find_bit.c with the kernel
  ...
  • Loading branch information
torvalds committed Jan 11, 2016
2 parents 24af98c + 3eb9ede commit 5cb52b5
Show file tree
Hide file tree
Showing 240 changed files with 9,147 additions and 1,876 deletions.
37 changes: 37 additions & 0 deletions Documentation/trace/events-msr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

The x86 kernel supports tracing most MSR (Model Specific Register) accesses.
To see the definition of the MSRs on Intel systems please see the SDM
at http://www.intel.com/sdm (Volume 3)

Available trace points:

/sys/kernel/debug/tracing/events/msr/

Trace MSR reads

read_msr

msr: MSR number
val: Value written
failed: 1 if the access failed, otherwise 0


Trace MSR writes

write_msr

msr: MSR number
val: Value written
failed: 1 if the access failed, otherwise 0


Trace RDPMC in kernel

rdpmc

The trace data can be post processed with the postprocess/decode_msr.py script

cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h

to add symbolic MSR names.

37 changes: 37 additions & 0 deletions Documentation/trace/postprocess/decode_msr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/python
# add symbolic names to read_msr / write_msr in trace
# decode_msr msr-index.h < trace
import sys
import re

msrs = dict()

with open(sys.argv[1] if len(sys.argv) > 1 else "msr-index.h", "r") as f:
for j in f:
m = re.match(r'#define (MSR_\w+)\s+(0x[0-9a-fA-F]+)', j)
if m:
msrs[int(m.group(2), 16)] = m.group(1)

extra_ranges = (
( "MSR_LASTBRANCH_%d_FROM_IP", 0x680, 0x69F ),
( "MSR_LASTBRANCH_%d_TO_IP", 0x6C0, 0x6DF ),
( "LBR_INFO_%d", 0xdc0, 0xddf ),
)

for j in sys.stdin:
m = re.search(r'(read|write)_msr:\s+([0-9a-f]+)', j)
if m:
r = None
num = int(m.group(2), 16)
if num in msrs:
r = msrs[num]
else:
for er in extra_ranges:
if er[1] <= num <= er[2]:
r = er[0] % (num - er[1],)
break
if r:
j = j.replace(" " + m.group(2), " " + r + "(" + m.group(2) + ")")
print j,


1 change: 0 additions & 1 deletion arch/x86/include/asm/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/processor.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
#include <asm/rmwcc.h>
Expand Down
1 change: 0 additions & 1 deletion arch/x86/include/asm/atomic64_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/processor.h>
//#include <asm/cmpxchg.h>

/* An 64bit atomic type */
Expand Down
10 changes: 10 additions & 0 deletions arch/x86/include/asm/intel_pt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef _ASM_X86_INTEL_PT_H
#define _ASM_X86_INTEL_PT_H

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
void cpu_emergency_stop_pt(void);
#else
static inline void cpu_emergency_stop_pt(void) {}
#endif

#endif /* _ASM_X86_INTEL_PT_H */
57 changes: 57 additions & 0 deletions arch/x86/include/asm/msr-trace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM msr

#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE msr-trace

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH asm/

#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MSR_H

#include <linux/tracepoint.h>

/*
* Tracing for x86 model specific registers. Directly maps to the
* RDMSR/WRMSR instructions.
*/

DECLARE_EVENT_CLASS(msr_trace_class,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed),
TP_STRUCT__entry(
__field( unsigned, msr )
__field( u64, val )
__field( int, failed )
),
TP_fast_assign(
__entry->msr = msr;
__entry->val = val;
__entry->failed = failed;
),
TP_printk("%x, value %llx%s",
__entry->msr,
__entry->val,
__entry->failed ? " #GP" : "")
);

DEFINE_EVENT(msr_trace_class, read_msr,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);

DEFINE_EVENT(msr_trace_class, write_msr,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);

DEFINE_EVENT(msr_trace_class, rdpmc,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);

#endif /* _TRACE_MSR_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
31 changes: 31 additions & 0 deletions arch/x86/include/asm/msr.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif

#ifdef CONFIG_TRACEPOINTS
/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>

extern struct tracepoint __tracepoint_read_msr;
extern struct tracepoint __tracepoint_write_msr;
extern struct tracepoint __tracepoint_rdpmc;
#define msr_tracepoint_active(t) static_key_false(&(t).key)
extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
#else
#define msr_tracepoint_active(t) false
static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
#endif

static inline unsigned long long native_read_msr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);

asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}

Expand All @@ -78,13 +101,17 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
}

static inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}

/* Can be uninlined because referenced by paravirt */
Expand All @@ -102,6 +129,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}

Expand Down Expand Up @@ -160,6 +189,8 @@ static inline unsigned long long native_read_pmc(int counter)
DECLARE_ARGS(val, low, high);

asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
if (msr_tracepoint_active(__tracepoint_rdpmc))
do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}

Expand Down
9 changes: 9 additions & 0 deletions arch/x86/include/asm/uaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,5 +745,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
#undef __copy_from_user_overflow
#undef __copy_to_user_overflow

/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
*
* Caller must use pagefault_enable/disable, or run in interrupt context,
* and also do a uaccess_ok() check
*/
#define __copy_from_user_nmi __copy_from_user_inatomic

#endif /* _ASM_X86_UACCESS_H */

36 changes: 32 additions & 4 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,9 @@ int x86_pmu_hw_config(struct perf_event *event)
/* Support for IP fixup */
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;

if (x86_pmu.pebs_prec_dist)
precise++;
}

if (event->attr.precise_ip > precise)
Expand Down Expand Up @@ -1531,6 +1534,7 @@ static void __init filter_events(struct attribute **attrs)
{
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
int offset = 0;
int i, j;

for (i = 0; attrs[i]; i++) {
Expand All @@ -1539,14 +1543,22 @@ static void __init filter_events(struct attribute **attrs)
/* str trumps id */
if (pmu_attr->event_str)
continue;
if (x86_pmu.event_map(i))
if (x86_pmu.event_map(i + offset))
continue;

for (j = i; attrs[j]; j++)
attrs[j] = attrs[j + 1];

/* Check the shifted attr. */
i--;

/*
* event_map() is index based, the attrs array is organized
* by increasing event index. If we shift the events, then
* we need to compensate for the event_map(), otherwise
* we are looking up the wrong event in the map
*/
offset++;
}
}

Expand Down Expand Up @@ -2250,12 +2262,19 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
ss_base = get_segment_base(regs->ss);

fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
if (!access_ok(VERIFY_READ, fp, 8))
break;

bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
if (bytes != 0)
break;
bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
if (bytes != 0)
break;

Expand All @@ -2265,6 +2284,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
perf_callchain_store(entry, cs_base + frame.return_address);
fp = compat_ptr(ss_base + frame.next_frame);
}
pagefault_enable();
return 1;
}
#else
Expand Down Expand Up @@ -2302,21 +2322,29 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
if (perf_callchain_user32(regs, entry))
return;

pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
if (!access_ok(VERIFY_READ, fp, 16))
break;

bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
if (bytes != 0)
break;
bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
if (bytes != 0)
break;

if (!valid_user_frame(fp, sizeof(frame)))
break;

perf_callchain_store(entry, frame.return_address);
fp = frame.next_frame;
fp = (void __user *)frame.next_frame;
}
pagefault_enable();
}

/*
Expand Down
21 changes: 9 additions & 12 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,7 @@

#include <linux/perf_event.h>

#if 0
#undef wrmsrl
#define wrmsrl(msr, val) \
do { \
unsigned int _msr = (msr); \
u64 _val = (val); \
trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
(unsigned long long)(_val)); \
native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
} while (0)
#endif
/* To enable MSR tracing please use the generic trace points. */

/*
* | NHM/WSM | SNB |
Expand Down Expand Up @@ -318,6 +308,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

/* Constraint on specific umask bit only + event */
#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))

/* Like UEVENT_CONSTRAINT, but match flags too */
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
Expand Down Expand Up @@ -589,7 +583,8 @@ struct x86_pmu {
bts_active :1,
pebs :1,
pebs_active :1,
pebs_broken :1;
pebs_broken :1,
pebs_prec_dist :1;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
Expand Down Expand Up @@ -907,6 +902,8 @@ void intel_pmu_lbr_init_hsw(void);

void intel_pmu_lbr_init_skl(void);

void intel_pmu_lbr_init_knl(void);

int intel_pmu_setup_lbr_filter(struct perf_event *event);

void intel_pt_interrupt(void);
Expand Down
Loading

0 comments on commit 5cb52b5

Please sign in to comment.