Skip to content

Commit

Permalink
Merge tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
 "A set of scheduler fixes:

   - Address a load balancer regression by making the load balancer use
     the same logic as the wakeup path to spread tasks in the LLC domain

   - Prefer the CPU on which a task run last over the local CPU in the
     fast wakeup path for asymmetric CPU capacity systems to align with
     the symmetric case. This ensures more locality and prevents massive
     migration overhead on those asymetric systems

   - Fix a memory corruption bug in the scheduler debug code caused by
     handing a modified buffer pointer to kfree()"

* tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/debug: Fix memory corruption caused by multiple small reads of flags
  sched/fair: Prefer prev cpu in asymmetric wakeup path
  sched/fair: Ensure tasks spreading in LLC during LB
  • Loading branch information
torvalds committed Nov 15, 2020
2 parents 259c2fb + 8d4d9c7 commit d0a37fd
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 31 deletions.
12 changes: 6 additions & 6 deletions kernel/sched/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
unsigned long flags = *(unsigned long *)table->data;
size_t data_size = 0;
size_t len = 0;
char *tmp;
char *tmp, *buf;
int idx;

if (write)
Expand All @@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
return 0;
}

tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
if (!tmp)
buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;

for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
char *name = sd_flag_debug[idx].name;

len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
}

tmp += *ppos;
tmp = buf + *ppos;
len -= *ppos;

if (len > *lenp)
Expand All @@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
*lenp = len;
*ppos += len;

kfree(tmp);
kfree(buf);

return 0;
}
Expand Down
70 changes: 45 additions & 25 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
static int
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
{
unsigned long best_cap = 0;
unsigned long task_util, best_cap = 0;
int cpu, best_cpu = -1;
struct cpumask *cpus;

sync_entity_load_avg(&p->se);

cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);

task_util = uclamp_task_util(p);

for_each_cpu_wrap(cpu, cpus, target) {
unsigned long cpu_cap = capacity_of(cpu);

if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
continue;
if (task_fits_capacity(p, cpu_cap))
if (fits_capacity(task_util, cpu_cap))
return cpu;

if (cpu_cap > best_cap) {
Expand All @@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
return best_cpu;
}

static inline bool asym_fits_capacity(int task_util, int cpu)
{
if (static_branch_unlikely(&sched_asym_cpucapacity))
return fits_capacity(task_util, capacity_of(cpu));

return true;
}

/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
unsigned long task_util;
int i, recent_used_cpu;

/*
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
* On asymmetric system, update task utilization because we will check
* that the task fits with cpu's capacity.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
* cpuset defines a symmetric island (i.e. one unique
* capacity_orig value through the cpuset), the key will be set
* but the CPUs within that cpuset will not have a domain with
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
* capacity path.
*/
if (!sd)
goto symmetric;

i = select_idle_capacity(p, sd, target);
return ((unsigned)i < nr_cpumask_bits) ? i : target;
sync_entity_load_avg(&p->se);
task_util = uclamp_task_util(p);
}

symmetric:
if (available_idle_cpu(target) || sched_idle_cpu(target))
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
asym_fits_capacity(task_util, target))
return target;

/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
if (prev != target && cpus_share_cache(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
asym_fits_capacity(task_util, prev))
return prev;

/*
Expand All @@ -6258,7 +6256,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
asym_fits_capacity(task_util, recent_used_cpu)) {
/*
* Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake:
Expand All @@ -6267,6 +6266,26 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
return recent_used_cpu;
}

/*
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
* cpuset defines a symmetric island (i.e. one unique
* capacity_orig value through the cpuset), the key will be set
* but the CPUs within that cpuset will not have a domain with
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
* capacity path.
*/
if (sd) {
i = select_idle_capacity(p, sd, target);
return ((unsigned)i < nr_cpumask_bits) ? i : target;
}
}

sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
Expand Down Expand Up @@ -9031,7 +9050,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* emptying busiest.
*/
if (local->group_type == group_has_spare) {
if (busiest->group_type > group_fully_busy) {
if ((busiest->group_type > group_fully_busy) &&
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
/*
* If busiest is overloaded, try to fill spare
* capacity. This might end up creating spare capacity
Expand Down

0 comments on commit d0a37fd

Please sign in to comment.