Skip to content

Commit

Permalink
Merge branches 'core-urgent-for-linus', 'perf-urgent-for-linus', 'sch…
Browse files Browse the repository at this point in the history
…ed-urgent-for-linus' and 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  bugs, x86: Fix printk levels for panic, softlockups and stack dumps

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf top: Fix number of samples displayed
  perf tools: Fix strlen() bug in perf_event__synthesize_event_type()
  perf tools: Fix broken build by defining _GNU_SOURCE in Makefile
  x86/dumpstack: Remove unneeded check in dump_trace()
  perf: Fix broken interrupt rate throttling

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/rt: Fix task stack corruption under __ARCH_WANT_INTERRUPTS_ON_CTXSW
  sched: Fix ancient race in do_exit()
  sched/nohz: Fix nohz cpu idle load balancing state with cpu hotplug
  sched/s390: Fix compile error in sched/core.c
  sched: Fix rq->nr_uninterruptible update race

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/reboot: Remove VersaLogic Menlow reboot quirk
  x86/reboot: Skip DMI checks if reboot set by user
  x86: Properly parenthesize cmpxchg() macro arguments
  • Loading branch information
torvalds committed Feb 2, 2012
5 parents bd3ce7d + b0f4c4b + 45179fe + cb297a3 + e6d36a6 commit 2f2fde9
Show file tree
Hide file tree
Showing 23 changed files with 177 additions and 97 deletions.
6 changes: 3 additions & 3 deletions arch/x86/include/asm/cmpxchg.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,13 @@ extern void __add_wrong_size(void)

#ifdef __HAVE_ARCH_CMPXCHG
#define cmpxchg(ptr, old, new) \
__cmpxchg((ptr), (old), (new), sizeof(*ptr))
__cmpxchg(ptr, old, new, sizeof(*(ptr)))

#define sync_cmpxchg(ptr, old, new) \
__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
__sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))

#define cmpxchg_local(ptr, old, new) \
__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
#endif

/*
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/dumpstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
unsigned short ss;
unsigned long sp;
#endif
printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
printk(KERN_DEFAULT
"%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
Expand Down
8 changes: 4 additions & 4 deletions arch/x86/kernel/dumpstack_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
if (regs)
stack = (unsigned long *)regs->sp;
else if (task && task != current)
else if (task != current)
stack = (unsigned long *)task->thread.sp;
else
stack = &dummy;
Expand Down Expand Up @@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs)
unsigned char c;
u8 *ip;

printk(KERN_EMERG "Stack:\n");
printk(KERN_DEFAULT "Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
0, KERN_EMERG);
0, KERN_DEFAULT);

printk(KERN_EMERG "Code: ");
printk(KERN_DEFAULT "Code: ");

ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
Expand Down
36 changes: 26 additions & 10 deletions arch/x86/kernel/reboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ static int reboot_mode;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;

/* This variable is used privately to keep track of whether or not
* reboot_type is still set to its default value (i.e., reboot= hasn't
* been set on the command line). This is needed so that we can
* suppress DMI scanning for reboot quirks. Without it, it's
* impossible to override a faulty reboot quirk without recompiling.
*/
static int reboot_default = 1;

#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
static int reboot_cpu = -1;
#endif
Expand Down Expand Up @@ -67,6 +75,12 @@ bool port_cf9_safe = false;
static int __init reboot_setup(char *str)
{
for (;;) {
/* Having anything passed on the command line via
* reboot= will cause us to disable DMI checking
* below.
*/
reboot_default = 0;

switch (*str) {
case 'w':
reboot_mode = 0x1234;
Expand Down Expand Up @@ -295,14 +309,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
{ /* Handle problems with rebooting on VersaLogic Menlow boards */
.callback = set_bios_reboot,
.ident = "VersaLogic Menlow based board",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
},
},
{ /* Handle reboot issue on Acer Aspire one */
.callback = set_kbd_reboot,
.ident = "Acer Aspire One A110",
Expand All @@ -316,7 +322,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {

static int __init reboot_init(void)
{
dmi_check_system(reboot_dmi_table);
/* Only do the DMI check if reboot_type hasn't been overridden
* on the command line
*/
if (reboot_default) {
dmi_check_system(reboot_dmi_table);
}
return 0;
}
core_initcall(reboot_init);
Expand Down Expand Up @@ -465,7 +476,12 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {

static int __init pci_reboot_init(void)
{
dmi_check_system(pci_reboot_dmi_table);
/* Only do the DMI check if reboot_type hasn't been overridden
* on the command line
*/
if (reboot_default) {
dmi_check_system(pci_reboot_dmi_table);
}
return 0;
}
core_initcall(pci_reboot_init);
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,

stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");

tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
Expand All @@ -684,7 +684,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
sig = 0;

/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
printk(KERN_DEFAULT "CR2: %016lx\n", address);

oops_end(flags, regs, sig);
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ struct hw_perf_event {
u64 sample_period;
u64 last_period;
local64_t period_left;
u64 interrupts_seq;
u64 interrupts;

u64 freq_time_stamp;
Expand Down
104 changes: 66 additions & 38 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2300,6 +2300,9 @@ do { \
return div64_u64(dividend, divisor);
}

static DEFINE_PER_CPU(int, perf_throttled_count);
static DEFINE_PER_CPU(u64, perf_throttled_seq);

static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
Expand All @@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
}
}

static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
/*
* combine freq adjustment with unthrottling to avoid two passes over the
* events. At the same time, make sure, having freq events does not change
* the rate of unthrottling as that would introduce bias.
*/
static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
int needs_unthr)
{
struct perf_event *event;
struct hw_perf_event *hwc;
u64 interrupts, now;
u64 now, period = TICK_NSEC;
s64 delta;

if (!ctx->nr_freq)
/*
* only need to iterate over all events iff:
* - context have events in frequency mode (needs freq adjust)
* - there are events to unthrottle on this cpu
*/
if (!(ctx->nr_freq || needs_unthr))
return;

raw_spin_lock(&ctx->lock);

list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
Expand All @@ -2344,28 +2360,35 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)

hwc = &event->hw;

interrupts = hwc->interrupts;
hwc->interrupts = 0;

/*
* unthrottle events on the tick
*/
if (interrupts == MAX_INTERRUPTS) {
if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
hwc->interrupts = 0;
perf_log_throttle(event, 1);
event->pmu->start(event, 0);
}

if (!event->attr.freq || !event->attr.sample_freq)
continue;

event->pmu->read(event);
/*
* stop the event and update event->count
*/
event->pmu->stop(event, PERF_EF_UPDATE);

now = local64_read(&event->count);
delta = now - hwc->freq_count_stamp;
hwc->freq_count_stamp = now;

/*
* restart the event
* reload only if value has changed
*/
if (delta > 0)
perf_adjust_period(event, period, delta);

event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
}

raw_spin_unlock(&ctx->lock);
}

/*
Expand All @@ -2388,54 +2411,40 @@ static void rotate_ctx(struct perf_event_context *ctx)
*/
static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
struct perf_event_context *ctx = NULL;
int rotate = 0, remove = 1, freq = 0;
int rotate = 0, remove = 1;

if (cpuctx->ctx.nr_events) {
remove = 0;
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
rotate = 1;
if (cpuctx->ctx.nr_freq)
freq = 1;
}

ctx = cpuctx->task_ctx;
if (ctx && ctx->nr_events) {
remove = 0;
if (ctx->nr_events != ctx->nr_active)
rotate = 1;
if (ctx->nr_freq)
freq = 1;
}

if (!rotate && !freq)
if (!rotate)
goto done;

perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(cpuctx->ctx.pmu);

if (freq) {
perf_ctx_adjust_freq(&cpuctx->ctx, interval);
if (ctx)
perf_ctx_adjust_freq(ctx, interval);
}

if (rotate) {
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);

rotate_ctx(&cpuctx->ctx);
if (ctx)
rotate_ctx(ctx);
rotate_ctx(&cpuctx->ctx);
if (ctx)
rotate_ctx(ctx);

perf_event_sched_in(cpuctx, ctx, current);
}
perf_event_sched_in(cpuctx, ctx, current);

perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);

done:
if (remove)
list_del_init(&cpuctx->rotation_list);
Expand All @@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
{
struct list_head *head = &__get_cpu_var(rotation_list);
struct perf_cpu_context *cpuctx, *tmp;
struct perf_event_context *ctx;
int throttled;

WARN_ON(!irqs_disabled());

__this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0);

list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
ctx = &cpuctx->ctx;
perf_adjust_freq_unthr_context(ctx, throttled);

ctx = cpuctx->task_ctx;
if (ctx)
perf_adjust_freq_unthr_context(ctx, throttled);

if (cpuctx->jiffies_interval == 1 ||
!(jiffies % cpuctx->jiffies_interval))
perf_rotate_context(cpuctx);
Expand Down Expand Up @@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
{
int events = atomic_read(&event->event_limit);
struct hw_perf_event *hwc = &event->hw;
u64 seq;
int ret = 0;

/*
Expand All @@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(!is_sampling_event(event)))
return 0;

if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
if (throttle) {
seq = __this_cpu_read(perf_throttled_seq);
if (seq != hwc->interrupts_seq) {
hwc->interrupts_seq = seq;
hwc->interrupts = 1;
} else {
hwc->interrupts++;
if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count);
hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0);
ret = 1;
}
} else
hwc->interrupts++;
}

if (event->attr.freq) {
u64 now = perf_clock();
Expand Down
16 changes: 16 additions & 0 deletions kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,22 @@ void do_exit(long code)
if (tsk->nr_dirtied)
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
exit_rcu();

/*
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
* when the following two conditions become true.
* - There is race condition of mmap_sem (It is acquired by
* exit_mm()), and
* - SMI occurs before setting TASK_RUNINNG.
* (or hypervisor of virtual machine switches to other guest)
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
*
* To avoid it, we have to wait for releasing tsk->pi_lock which
* is held by try_to_wake_up()
*/
smp_mb();
raw_spin_unlock_wait(&tsk->pi_lock);

/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
Expand Down
Loading

0 comments on commit 2f2fde9

Please sign in to comment.