Skip to content

Commit

Permalink
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar.

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Remove NULL assignment of dattr_cur
  sched: Remove the last NULL entry from sched_feat_names
  sched: Make sched_feat_names const
  sched/rt: Fix SCHED_RR across cgroups
  sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'
  sched: Make sure to not re-read variables after validation
  sched: Fix SD_OVERLAP
  sched: Don't try allocating memory from offline nodes
  sched/nohz: Fix rq->cpu_load calculations some more
  sched/x86: Use cpu_llc_shared_mask(cpu) for coregroup_mask
  • Loading branch information
torvalds committed Jun 5, 2012
2 parents 99becf1 + 6a4c96e commit 0b3e9f3
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 59 deletions.
2 changes: 1 addition & 1 deletion arch/blackfin/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
unsigned long newsp;

#ifdef __ARCH_SYNC_CORE_DCACHE
if (current->rt.nr_cpus_allowed == num_possible_cpus())
if (current->nr_cpus_allowed == num_possible_cpus())
set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
#endif

Expand Down
10 changes: 1 addition & 9 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
/* maps the cpu to the sched domain representing multi-core */
const struct cpumask *cpu_coregroup_mask(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
/*
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
return cpu_llc_shared_mask(cpu);
return cpu_llc_shared_mask(cpu);
}

static void impress_friends(void)
Expand Down
2 changes: 1 addition & 1 deletion include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ extern struct cred init_cred;
.normal_prio = MAX_PRIO-20, \
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.nr_cpus_allowed= NR_CPUS, \
.mm = NULL, \
.active_mm = &init_mm, \
.se = { \
Expand All @@ -157,7 +158,6 @@ extern struct cred init_cred;
.rt = { \
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
.time_slice = RR_TIMESLICE, \
.nr_cpus_allowed = NR_CPUS, \
}, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
Expand Down
3 changes: 2 additions & 1 deletion include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);


extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);

extern unsigned long get_parent_ip(unsigned long addr);

Expand Down Expand Up @@ -1187,7 +1188,6 @@ struct sched_rt_entity {
struct list_head run_list;
unsigned long timeout;
unsigned int time_slice;
int nr_cpus_allowed;

struct sched_rt_entity *back;
#ifdef CONFIG_RT_GROUP_SCHED
Expand Down Expand Up @@ -1252,6 +1252,7 @@ struct task_struct {
#endif

unsigned int policy;
int nr_cpus_allowed;
cpumask_t cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
Expand Down
68 changes: 51 additions & 17 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features =
#define SCHED_FEAT(name, enabled) \
#name ,

static __read_mostly char *sched_feat_names[] = {
static const char * const sched_feat_names[] = {
#include "features.h"
NULL
};

#undef SCHED_FEAT
Expand Down Expand Up @@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
sched_avg_update(this_rq);
}

#ifdef CONFIG_NO_HZ
/*
* There is no sane way to deal with nohz on smp when using jiffies because the
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
*
* Therefore we cannot use the delta approach from the regular tick since that
* would seriously skew the load calculation. However we'll make do for those
* updates happening while idle (nohz_idle_balance) or coming out of idle
* (tick_nohz_idle_exit).
*
* This means we might still be one tick off for nohz periods.
*/

/*
* Called from nohz_idle_balance() to update the load ratings before doing the
* idle balance.
*/
void update_idle_cpu_load(struct rq *this_rq)
{
unsigned long curr_jiffies = jiffies;
unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
unsigned long load = this_rq->load.weight;
unsigned long pending_updates;

/*
* Bloody broken means of dealing with nohz, but better than nothing..
* jiffies is updated by one cpu, another cpu can drift wrt the jiffy
* update and see 0 difference the one time and 2 the next, even though
* we ticked at roughtly the same rate.
*
* Hence we only use this from nohz_idle_balance() and skip this
* nonsense when called from the scheduler_tick() since that's
* guaranteed a stable rate.
* bail if there's load or we're actually up-to-date.
*/
if (load || curr_jiffies == this_rq->last_load_update_tick)
return;
Expand All @@ -2546,13 +2552,39 @@ void update_idle_cpu_load(struct rq *this_rq)
__update_cpu_load(this_rq, load, pending_updates);
}

/*
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
*/
void update_cpu_load_nohz(void)
{
struct rq *this_rq = this_rq();
unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
unsigned long pending_updates;

if (curr_jiffies == this_rq->last_load_update_tick)
return;

raw_spin_lock(&this_rq->lock);
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
if (pending_updates) {
this_rq->last_load_update_tick = curr_jiffies;
/*
* We were idle, this means load 0, the current load might be
* !0 due to remote wakeups and the sort.
*/
__update_cpu_load(this_rq, 0, pending_updates);
}
raw_spin_unlock(&this_rq->lock);
}
#endif /* CONFIG_NO_HZ */

/*
* Called from scheduler_tick()
*/
static void update_cpu_load_active(struct rq *this_rq)
{
/*
* See the mess in update_idle_cpu_load().
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
*/
this_rq->last_load_update_tick = jiffies;
__update_cpu_load(this_rq, this_rq->load.weight, 1);
Expand Down Expand Up @@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
p->sched_class->set_cpus_allowed(p, new_mask);

cpumask_copy(&p->cpus_allowed, new_mask);
p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}

/*
Expand Down Expand Up @@ -5997,11 +6029,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)

cpumask_or(covered, covered, sg_span);

sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
sg->sgp = *per_cpu_ptr(sdd->sgp, i);
atomic_inc(&sg->sgp->ref);

if (cpumask_test_cpu(cpu, sg_span))
if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
cpumask_first(sg_span) == cpu) {
WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
groups = sg;
}

if (!first)
first = sg;
Expand Down Expand Up @@ -6403,7 +6438,7 @@ static void sched_init_numa(void)
return;

for (j = 0; j < nr_node_ids; j++) {
struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j);
struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
if (!mask)
return;

Expand Down Expand Up @@ -6691,7 +6726,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
if (!doms_cur)
doms_cur = &fallback_doms;
cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
dattr_cur = NULL;
err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();

Expand Down
42 changes: 32 additions & 10 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
int want_sd = 1;
int sync = wake_flags & WF_SYNC;

if (p->rt.nr_cpus_allowed == 1)
if (p->nr_cpus_allowed == 1)
return prev_cpu;

if (sd_flag & SD_BALANCE_WAKE) {
Expand Down Expand Up @@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
unsigned long scale_rt_power(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 total, available;
u64 total, available, age_stamp, avg;

total = sched_avg_period() + (rq->clock - rq->age_stamp);
/*
* Since we're reading these variables without serialization make sure
* we read them once before doing sanity checks on them.
*/
age_stamp = ACCESS_ONCE(rq->age_stamp);
avg = ACCESS_ONCE(rq->rt_avg);

total = sched_avg_period() + (rq->clock - age_stamp);

if (unlikely(total < rq->rt_avg)) {
if (unlikely(total < avg)) {
/* Ensures that power won't end up being negative */
available = 0;
} else {
available = total - rq->rt_avg;
available = total - avg;
}

if (unlikely((s64)total < SCHED_POWER_SCALE))
Expand Down Expand Up @@ -3574,11 +3581,26 @@ void update_group_power(struct sched_domain *sd, int cpu)

power = 0;

group = child->groups;
do {
power += group->sgp->power;
group = group->next;
} while (group != child->groups);
if (child->flags & SD_OVERLAP) {
/*
* SD_OVERLAP domains cannot assume that child groups
* span the current group.
*/

for_each_cpu(cpu, sched_group_cpus(sdg))
power += power_of(cpu);
} else {
/*
* !SD_OVERLAP domains can assume that child groups
* span the current group.
*/

group = child->groups;
do {
power += group->sgp->power;
group = group->next;
} while (group != child->groups);
}

sdg->sgp->power = power;
}
Expand Down
Loading

0 comments on commit 0b3e9f3

Please sign in to comment.