Skip to content

Commit

Permalink
Merge branch 'sched/migrate-disable'
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Zijlstra committed Nov 10, 2020
2 parents b6d37a7 + c777d84 commit 12fa97c
Show file tree
Hide file tree
Showing 19 changed files with 1,038 additions and 227 deletions.
4 changes: 2 additions & 2 deletions fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
cpumask_pr_args(task->cpus_ptr));
cpumask_pr_args(&task->cpus_mask));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
cpumask_pr_args(task->cpus_ptr));
cpumask_pr_args(&task->cpus_mask));
}

static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
Expand Down
1 change: 1 addition & 0 deletions include/linux/cpuhotplug.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
Expand Down
6 changes: 6 additions & 0 deletions include/linux/cpumask.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
return cpumask_next_and(-1, src1p, src2p);
}

static inline int cpumask_any_distribute(const struct cpumask *srcp)
{
return cpumask_first(srcp);
}

#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
Expand Down Expand Up @@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p);
int cpumask_any_distribute(const struct cpumask *srcp);

/**
* for_each_cpu - iterate over every cpu in a mask
Expand Down
69 changes: 69 additions & 0 deletions include/linux/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,73 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,

#endif

#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)

/*
* Migrate-Disable and why it is undesired.
*
* When a preempted task becomes elegible to run under the ideal model (IOW it
* becomes one of the M highest priority tasks), it might still have to wait
* for the preemptee's migrate_disable() section to complete. Thereby suffering
* a reduction in bandwidth in the exact duration of the migrate_disable()
* section.
*
* Per this argument, the change from preempt_disable() to migrate_disable()
* gets us:
*
* - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
* it would have had to wait for the lower priority task.
*
* - a lower priority tasks; which under preempt_disable() could've instantly
* migrated away when another CPU becomes available, is now constrained
* by the ability to push the higher priority task away, which might itself be
* in a migrate_disable() section, reducing it's available bandwidth.
*
* IOW it trades latency / moves the interference term, but it stays in the
* system, and as long as it remains unbounded, the system is not fully
* deterministic.
*
*
* The reason we have it anyway.
*
* PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
* number of primitives into becoming preemptible, they would also allow
* migration. This turns out to break a bunch of per-cpu usage. To this end,
* all these primitives employ migirate_disable() to restore this implicit
* assumption.
*
* This is a 'temporary' work-around at best. The correct solution is getting
* rid of the above assumptions and reworking the code to employ explicit
* per-cpu locking or short preempt-disable regions.
*
* The end goal must be to get rid of migrate_disable(), alternatively we need
* a schedulability theory that does not depend on abritrary migration.
*
*
* Notes on the implementation.
*
* The implementation is particularly tricky since existing code patterns
* dictate neither migrate_disable() nor migrate_enable() is allowed to block.
* This means that it cannot use cpus_read_lock() to serialize against hotplug,
* nor can it easily migrate itself into a pending affinity mask change on
* migrate_enable().
*
*
* Note: even non-work-conserving schedulers like semi-partitioned depends on
* migration, so migrate_disable() is not only a problem for
* work-conserving schedulers.
*
*/
extern void migrate_disable(void);
extern void migrate_enable(void);

#elif defined(CONFIG_PREEMPT_RT)

static inline void migrate_disable(void) { }
static inline void migrate_enable(void) { }

#else /* !CONFIG_PREEMPT_RT */

/**
* migrate_disable - Prevent migration of the current task
*
Expand Down Expand Up @@ -352,4 +419,6 @@ static __always_inline void migrate_enable(void)
preempt_enable();
}

#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */

#endif /* __LINUX_PREEMPT_H */
5 changes: 5 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,11 @@ struct task_struct {
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
unsigned short migration_disabled;
#endif
unsigned short migration_flags;

#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/sched/hotplug.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
extern int sched_cpu_deactivate(unsigned int cpu);

#ifdef CONFIG_HOTPLUG_CPU
extern int sched_cpu_wait_empty(unsigned int cpu);
extern int sched_cpu_dying(unsigned int cpu);
#else
# define sched_cpu_wait_empty NULL
# define sched_cpu_dying NULL
#endif

Expand Down
5 changes: 5 additions & 0 deletions include/linux/stop_machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn;
unsigned long caller;
void *arg;
struct cpu_stop_done *done;
};
Expand All @@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask);

extern void print_stop_info(const char *log_lvl, struct task_struct *task);

#else /* CONFIG_SMP */

#include <linux/workqueue.h>
Expand Down Expand Up @@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return false;
}

static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }

#endif /* CONFIG_SMP */

/*
Expand Down
9 changes: 8 additions & 1 deletion kernel/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.name = "ap:online",
},
/*
* Handled on controll processor until the plugged processor manages
* Handled on control processor until the plugged processor manages
* this itself.
*/
[CPUHP_TEARDOWN_CPU] = {
Expand All @@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.teardown.single = takedown_cpu,
.cant_stop = true,
},

[CPUHP_AP_SCHED_WAIT_EMPTY] = {
.name = "sched:waitempty",
.startup.single = NULL,
.teardown.single = sched_cpu_wait_empty,
},

/* Handle smpboot threads park/unpark */
[CPUHP_AP_SMPBOOT_THREADS] = {
.name = "smpboot/threads:online",
Expand Down
Loading

0 comments on commit 12fa97c

Please sign in to comment.