Skip to content

Commit

Permalink
stop_machine: Add function and caller debug info
Browse files Browse the repository at this point in the history
Crashes in stop-machine are hard to connect to the calling code, add a
little something to help with that.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Reviewed-by: Daniel Bristot de Oliveira <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
  • Loading branch information
Peter Zijlstra committed Nov 10, 2020
1 parent 23859ae commit a8b62fd
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 3 deletions.
5 changes: 5 additions & 0 deletions include/linux/stop_machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn;
unsigned long caller;
void *arg;
struct cpu_stop_done *done;
};
Expand All @@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask);

extern void print_stop_info(const char *log_lvl, struct task_struct *task);

#else /* CONFIG_SMP */

#include <linux/workqueue.h>
Expand Down Expand Up @@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return false;
}

static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }

#endif /* CONFIG_SMP */

/*
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -6447,6 +6447,7 @@ void sched_show_task(struct task_struct *p)
(unsigned long)task_thread_info(p)->flags);

print_worker_info(KERN_INFO, p);
print_stop_info(KERN_INFO, p);
show_stack(p, NULL, KERN_INFO);
put_task_stack(p);
}
Expand Down
27 changes: 24 additions & 3 deletions kernel/stop_machine.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,27 @@ struct cpu_stopper {
struct list_head works; /* list of pending works */

struct cpu_stop_work stop_work; /* for stop_cpus */
unsigned long caller;
cpu_stop_fn_t fn;
};

static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static bool stop_machine_initialized = false;

void print_stop_info(const char *log_lvl, struct task_struct *task)
{
/*
* If @task is a stopper task, it cannot migrate and task_cpu() is
* stable.
*/
struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task));

if (task != stopper->thread)
return;

printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
}

/* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex);
static bool stop_cpus_in_progress;
Expand Down Expand Up @@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
{
struct cpu_stop_done done;
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };

cpu_stop_init_done(&done, 1);
if (!cpu_stop_queue_work(cpu, &work))
Expand Down Expand Up @@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
work1 = work2 = (struct cpu_stop_work){
.fn = multi_cpu_stop,
.arg = &msdata,
.done = &done
.done = &done,
.caller = _RET_IP_,
};

cpu_stop_init_done(&done, 2);
Expand Down Expand Up @@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
return cpu_stop_queue_work(cpu, work_buf);
}

Expand Down Expand Up @@ -487,6 +504,8 @@ static void cpu_stopper_thread(unsigned int cpu)
int ret;

/* cpu stop callbacks must not sleep, make in_atomic() == T */
stopper->caller = work->caller;
stopper->fn = fn;
preempt_count_inc();
ret = fn(arg);
if (done) {
Expand All @@ -495,6 +514,8 @@ static void cpu_stopper_thread(unsigned int cpu)
cpu_stop_signal_done(done);
}
preempt_count_dec();
stopper->fn = NULL;
stopper->caller = 0;
WARN_ONCE(preempt_count(),
"cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
goto repeat;
Expand Down
2 changes: 2 additions & 0 deletions lib/dump_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/atomic.h>
#include <linux/kexec.h>
#include <linux/utsname.h>
#include <linux/stop_machine.h>

static char dump_stack_arch_desc_str[128];

Expand Down Expand Up @@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
log_lvl, dump_stack_arch_desc_str);

print_worker_info(log_lvl, current);
print_stop_info(log_lvl, current);
}

/**
Expand Down

0 comments on commit a8b62fd

Please sign in to comment.