Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this cycle were:

   - fix affine wakeups (Peter Zijlstra)

   - improve CPU onlining (and general bootup) scalability on systems
     with ridiculous number (thousands) of CPUs (Peter Zijlstra)

   - sched/numa updates (Rik van Riel)

   - sched/deadline updates (Byungchul Park)

   - sched/cpufreq enhancements and related cleanups (Viresh Kumar)

   - sched/debug enhancements (Xie XiuQi)

   - various fixes"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  sched/debug: Optimize sched_domain sysctl generation
  sched/topology: Avoid pointless rebuild
  sched/topology, cpuset: Avoid spurious/wrong domain rebuilds
  sched/topology: Improve comments
  sched/topology: Fix memory leak in __sdt_alloc()
  sched/completion: Document that reinit_completion() must be called after complete_all()
  sched/autogroup: Fix error reporting printk text in autogroup_create()
  sched/fair: Fix wake_affine() for !NUMA_BALANCING
  sched/debug: Intruduce task_state_to_char() helper function
  sched/debug: Show task state in /proc/sched_debug
  sched/debug: Use task_pid_nr_ns in /proc/$pid/sched
  sched/core: Remove unnecessary initialization init_idle_bootup_task()
  sched/deadline: Change return value of cpudl_find()
  sched/deadline: Make find_later_rq() choose a closer CPU in topology
  sched/numa: Scale scan period with tasks in group and shared/private
  sched/numa: Slow down scan rate if shared faults dominate
  sched/pelt: Fix false running accounting
  sched: Mark pick_next_task_dl() and build_sched_domain() as static
  sched/cpupri: Don't re-initialize 'struct cpupri'
  sched/deadline: Don't re-initialize 'struct cpudl'
  ...
  • Loading branch information
torvalds committed Sep 4, 2017
2 parents 621bee3 + bbdacdf commit f213a6c
Show file tree
Hide file tree
Showing 18 changed files with 459 additions and 263 deletions.
6 changes: 0 additions & 6 deletions arch/x86/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ static inline const struct cpumask *cpumask_of_node(int node)

extern void setup_node_to_cpumask_map(void);

/*
* Returns the number of the node containing Node 'node'. This
* architecture is flat, so it is a pretty simple function!
*/
#define parent_node(node) (node)

#define pcibus_to_node(bus) __pcibus_to_node(bus)

extern int __node_distance(int, int);
Expand Down
3 changes: 2 additions & 1 deletion fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -1408,12 +1408,13 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct pid_namespace *ns = inode->i_sb->s_fs_info;
struct task_struct *p;

p = get_proc_task(inode);
if (!p)
return -ESRCH;
proc_sched_show_task(p, m);
proc_sched_show_task(p, ns, m);

put_task_struct(p);

Expand Down
13 changes: 13 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,19 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
return task_pgrp_nr_ns(tsk, &init_pid_ns);
}

static inline char task_state_to_char(struct task_struct *task)
{
const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
unsigned long state = task->state;

state = state ? __ffs(state) + 1 : 0;

/* Make sure the string lines up properly with the number of task states: */
BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);

return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
}

/**
* is_global_init - check if a task structure is init. Since init
* is free to have sub-threads we need to check tgid.
Expand Down
4 changes: 3 additions & 1 deletion include/linux/sched/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

struct task_struct;
struct pid_namespace;

extern void dump_cpu_task(int cpu);

Expand Down Expand Up @@ -34,7 +35,8 @@ extern void sched_show_task(struct task_struct *p);

#ifdef CONFIG_SCHED_DEBUG
struct seq_file;
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
extern void proc_sched_show_task(struct task_struct *p,
struct pid_namespace *ns, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p);
#endif

Expand Down
1 change: 0 additions & 1 deletion include/linux/sched/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ extern int lockdep_tasklist_lock_is_held(void);

extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);

extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_dead(struct task_struct *p);
Expand Down
8 changes: 8 additions & 0 deletions include/linux/sched/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ struct sched_domain_shared {
atomic_t ref;
atomic_t nr_busy_cpus;
int has_idle_cores;

/*
* Some variables from the most recent sd_lb_stats for this domain,
* used by wake_affine().
*/
unsigned long nr_running;
unsigned long load;
unsigned long capacity;
};

struct sched_domain {
Expand Down
1 change: 0 additions & 1 deletion init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,6 @@ static noinline void __ref rest_init(void)
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
Expand Down
6 changes: 0 additions & 6 deletions kernel/cgroup/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -2344,13 +2344,7 @@ void cpuset_update_active_cpus(void)
* We're inside cpu hotplug critical region which usually nests
* inside cgroup synchronization. Bounce actual hotplug processing
* to a work item to avoid reverse locking order.
*
* We still need to do partition_sched_domains() synchronously;
* otherwise, the scheduler will get confused and put tasks to the
* dead CPU. Fall back to the default single domain.
* cpuset_hotplug_workfn() will rebuild it as necessary.
*/
partition_sched_domains(1, NULL, NULL);
schedule_work(&cpuset_hotplug_work);
}

Expand Down
3 changes: 1 addition & 2 deletions kernel/sched/autogroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ static inline struct autogroup *autogroup_create(void)
goto out_fail;

tg = sched_create_group(&root_task_group);

if (IS_ERR(tg))
goto out_free;

Expand Down Expand Up @@ -101,7 +100,7 @@ static inline struct autogroup *autogroup_create(void)
out_fail:
if (printk_ratelimit()) {
printk(KERN_WARNING "autogroup_create: %s failure.\n",
ag ? "sched_create_group()" : "kmalloc()");
ag ? "sched_create_group()" : "kzalloc()");
}

return autogroup_kref_get(&autogroup_default);
Expand Down
8 changes: 8 additions & 0 deletions kernel/sched/completion.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ EXPORT_SYMBOL(complete);
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*
* Since complete_all() sets the completion of @x permanently to done
* to allow multiple waiters to finish, a call to reinit_completion()
* must be used on @x if @x is to be used again. The code must make
* sure that all waiters have woken and finished before reinitializing
* @x. Also note that the function completion_done() can not be used
* to know if there are still waiters after complete_all() has been called.
*/
void complete_all(struct completion *x)
{
Expand Down Expand Up @@ -297,6 +304,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
* Return: 0 if there are waiters (wait_for_completion() in progress)
* 1 if there are no waiters.
*
* Note, this will always return true if complete_all() was called on @X.
*/
bool completion_done(struct completion *x)
{
Expand Down
22 changes: 5 additions & 17 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5133,24 +5133,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
return retval;
}

static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;

void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
int ppid;
unsigned long state = p->state;

/* Make sure the string lines up properly with the number of task states: */
BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);

if (!try_get_task_stack(p))
return;
if (state)
state = __ffs(state) + 1;
printk(KERN_INFO "%-15.15s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
if (state == TASK_RUNNING)

printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));

if (p->state == TASK_RUNNING)
printk(KERN_CONT " running task ");
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
Expand Down Expand Up @@ -5207,11 +5200,6 @@ void show_state_filter(unsigned long state_filter)
debug_show_all_locks();
}

void init_idle_bootup_task(struct task_struct *idle)
{
idle->sched_class = &idle_sched_class;
}

/**
* init_idle - set up an idle thread for a given CPU
* @idle: task in question
Expand Down Expand Up @@ -5468,7 +5456,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
*/
next = pick_next_task(rq, &fake_task, rf);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
put_prev_task(rq, next);

/*
* Rules for changing task_struct::cpus_allowed are holding
Expand Down
27 changes: 13 additions & 14 deletions kernel/sched/cpudeadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,29 +119,29 @@ static inline int cpudl_maximum(struct cpudl *cp)
* @p: the task
* @later_mask: a mask to fill in with the selected CPUs (or NULL)
*
* Returns: int - best CPU (heap maximum if suitable)
* Returns: int - CPUs were found
*/
int cpudl_find(struct cpudl *cp, struct task_struct *p,
struct cpumask *later_mask)
{
int best_cpu = -1;
const struct sched_dl_entity *dl_se = &p->dl;

if (later_mask &&
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
best_cpu = cpumask_any(later_mask);
goto out;
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
best_cpu = cpudl_maximum(cp);
if (later_mask)
cpumask_set_cpu(best_cpu, later_mask);
}
return 1;
} else {
int best_cpu = cpudl_maximum(cp);
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));

out:
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
if (later_mask)
cpumask_set_cpu(best_cpu, later_mask);

return best_cpu;
return 1;
}
}
return 0;
}

/*
Expand Down Expand Up @@ -246,7 +246,6 @@ int cpudl_init(struct cpudl *cp)
{
int i;

memset(cp, 0, sizeof(*cp));
raw_spin_lock_init(&cp->lock);
cp->size = 0;

Expand Down
2 changes: 0 additions & 2 deletions kernel/sched/cpupri.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ int cpupri_init(struct cpupri *cp)
{
int i;

memset(cp, 0, sizeof(*cp));

for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[i];

Expand Down
33 changes: 17 additions & 16 deletions kernel/sched/deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -1594,15 +1594,15 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
* let's hope p can move out.
*/
if (rq->curr->nr_cpus_allowed == 1 ||
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
!cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
return;

/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
if (p->nr_cpus_allowed != 1 &&
cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
cpudl_find(&rq->rd->cpudl, p, NULL))
return;

resched_curr(rq);
Expand Down Expand Up @@ -1655,7 +1655,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
return rb_entry(left, struct sched_dl_entity, rb_node);
}

struct task_struct *
static struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
Expand Down Expand Up @@ -1798,7 +1798,7 @@ static int find_later_rq(struct task_struct *task)
struct sched_domain *sd;
struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
int this_cpu = smp_processor_id();
int best_cpu, cpu = task_cpu(task);
int cpu = task_cpu(task);

/* Make sure the mask is initialized first */
if (unlikely(!later_mask))
Expand All @@ -1811,17 +1811,14 @@ static int find_later_rq(struct task_struct *task)
* We have to consider system topology and task affinity
* first, then we can look for a suitable cpu.
*/
best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
task, later_mask);
if (best_cpu == -1)
if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
return -1;

/*
* If we are here, some target has been found,
* the most suitable of which is cached in best_cpu.
* This is, among the runqueues where the current tasks
* have later deadlines than the task's one, the rq
* with the latest possible one.
* If we are here, some targets have been found, including
* the most suitable which is, among the runqueues where the
* current tasks have later deadlines than the task's one, the
* rq with the latest possible one.
*
* Now we check how well this matches with task's
* affinity and system topology.
Expand All @@ -1841,6 +1838,7 @@ static int find_later_rq(struct task_struct *task)
rcu_read_lock();
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;

/*
* If possible, preempting this_cpu is
Expand All @@ -1852,12 +1850,15 @@ static int find_later_rq(struct task_struct *task)
return this_cpu;
}

best_cpu = cpumask_first_and(later_mask,
sched_domain_span(sd));
/*
* Last chance: if best_cpu is valid and is
* in the mask, that becomes our choice.
* Last chance: if a cpu being in both later_mask
* and current sd span is valid, that becomes our
* choice. Of course, the latest possible cpu is
* already under consideration through later_mask.
*/
if (best_cpu < nr_cpu_ids &&
cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
if (best_cpu < nr_cpu_ids) {
rcu_read_unlock();
return best_cpu;
}
Expand Down
Loading

0 comments on commit f213a6c

Please sign in to comment.