Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull more scheduler updates from Ingo Molnar:
 "Second round of scheduler changes:
   - try-to-wakeup and IPI reduction speedups, from Andy Lutomirski
   - continued power scheduling cleanups and refactorings, from Nicolas
     Pitre
   - misc fixes and enhancements"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/deadline: Delete extraneous extern for to_ratio()
  sched/idle: Optimize try-to-wake-up IPI
  sched/idle: Simplify wake_up_idle_cpu()
  sched/idle: Clear polling before descheduling the idle thread
  sched, trace: Add a tracepoint for IPI-less remote wakeups
  cpuidle: Set polling in poll_idle
  sched: Remove redundant assignment to "rt_rq" in update_curr_rt(...)
  sched: Rename capacity related flags
  sched: Final power vs. capacity cleanups
  sched: Remove remaining dubious usage of "power"
  sched: Let 'struct sched_group_power' care about CPU capacity
  sched/fair: Disambiguate existing/remaining "capacity" usage
  sched/fair: Change "has_capacity" to "has_free_capacity"
  sched/fair: Remove "power" from 'struct numa_stats'
  sched: Fix signedness bug in yield_to()
  sched/fair: Use time_after() in record_wakee()
  sched/balancing: Reduce the rate of needless idle load balancing
  sched/fair: Fix unlocked reads of some cfs_b->quota/period
  • Loading branch information
torvalds committed Jun 13, 2014
2 parents 3737a12 + 535560d commit b2e09f6
Show file tree
Hide file tree
Showing 14 changed files with 416 additions and 326 deletions.
54 changes: 27 additions & 27 deletions arch/arm/kernel/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,30 @@
#include <asm/topology.h>

/*
* cpu power scale management
* cpu capacity scale management
*/

/*
* cpu power table
* cpu capacity table
* This per cpu data structure describes the relative capacity of each core.
* On a heteregenous system, cores don't have the same computation capacity
* and we reflect that difference in the cpu_power field so the scheduler can
* take this difference into account during load balance. A per cpu structure
* is preferred because each CPU updates its own cpu_power field during the
* load balance except for idle cores. One idle core is selected to run the
* rebalance_domains for all idle cores and the cpu_power can be updated
* during this sequence.
* and we reflect that difference in the cpu_capacity field so the scheduler
* can take this difference into account during load balance. A per cpu
* structure is preferred because each CPU updates its own cpu_capacity field
* during the load balance except for idle cores. One idle core is selected
* to run the rebalance_domains for all idle cores and the cpu_capacity can be
* updated during this sequence.
*/
static DEFINE_PER_CPU(unsigned long, cpu_scale);

unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
{
return per_cpu(cpu_scale, cpu);
}

static void set_power_scale(unsigned int cpu, unsigned long power)
static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
{
per_cpu(cpu_scale, cpu) = power;
per_cpu(cpu_scale, cpu) = capacity;
}

#ifdef CONFIG_OF
Expand All @@ -62,11 +62,11 @@ struct cpu_efficiency {
* Table of relative efficiency of each processors
* The efficiency value must fit in 20bit and the final
* cpu_scale value must be in the range
* 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
* 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
* in order to return at most 1 when DIV_ROUND_CLOSEST
* is used to compute the capacity of a CPU.
* Processors that are not defined in the table,
* use the default SCHED_POWER_SCALE value for cpu_scale.
* use the default SCHED_CAPACITY_SCALE value for cpu_scale.
*/
static const struct cpu_efficiency table_efficiency[] = {
{"arm,cortex-a15", 3891},
Expand All @@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1;
* Iterate all CPUs' descriptor in DT and compute the efficiency
* (as per table_efficiency). Also calculate a middle efficiency
* as close as possible to (max{eff_i} - min{eff_i}) / 2
* This is later used to scale the cpu_power field such that an
* 'average' CPU is of middle power. Also see the comments near
* table_efficiency[] and update_cpu_power().
* This is later used to scale the cpu_capacity field such that an
* 'average' CPU is of middle capacity. Also see the comments near
* table_efficiency[] and update_cpu_capacity().
*/
static void __init parse_dt_topology(void)
{
Expand Down Expand Up @@ -141,15 +141,15 @@ static void __init parse_dt_topology(void)
* cpu_scale because all CPUs have the same capacity. Otherwise, we
* compute a middle_capacity factor that will ensure that the capacity
* of an 'average' CPU of the system will be as close as possible to
* SCHED_POWER_SCALE, which is the default value, but with the
* SCHED_CAPACITY_SCALE, which is the default value, but with the
* constraint explained near table_efficiency[].
*/
if (4*max_capacity < (3*(max_capacity + min_capacity)))
middle_capacity = (min_capacity + max_capacity)
>> (SCHED_POWER_SHIFT+1);
>> (SCHED_CAPACITY_SHIFT+1);
else
middle_capacity = ((max_capacity / 3)
>> (SCHED_POWER_SHIFT-1)) + 1;
>> (SCHED_CAPACITY_SHIFT-1)) + 1;

}

Expand All @@ -158,20 +158,20 @@ static void __init parse_dt_topology(void)
* boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
* function returns directly for SMP system.
*/
static void update_cpu_power(unsigned int cpu)
static void update_cpu_capacity(unsigned int cpu)
{
if (!cpu_capacity(cpu))
return;

set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);

printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
cpu, arch_scale_freq_power(NULL, cpu));
printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
cpu, arch_scale_freq_capacity(NULL, cpu));
}

#else
static inline void parse_dt_topology(void) {}
static inline void update_cpu_power(unsigned int cpuid) {}
static inline void update_cpu_capacity(unsigned int cpuid) {}
#endif

/*
Expand Down Expand Up @@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid)

update_siblings_masks(cpuid);

update_cpu_power(cpuid);
update_cpu_capacity(cpuid);

printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
cpuid, cpu_topology[cpuid].thread_id,
Expand Down Expand Up @@ -297,7 +297,7 @@ void __init init_cpu_topology(void)
{
unsigned int cpu;

/* init core mask and power*/
/* init core mask and capacity */
for_each_possible_cpu(cpu) {
struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);

Expand All @@ -307,7 +307,7 @@ void __init init_cpu_topology(void)
cpumask_clear(&cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);

set_power_scale(cpu, SCHED_POWER_SCALE);
set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
}
smp_wmb();

Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ int setup_profiling_timer(unsigned int multiplier)
/* cpumask of CPUs with asymetric SMT dependancy */
static const int powerpc_smt_flags(void)
{
int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;

if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
Expand Down
7 changes: 5 additions & 2 deletions drivers/cpuidle/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,11 @@ static int poll_idle(struct cpuidle_device *dev,

t1 = ktime_get();
local_irq_enable();
while (!need_resched())
cpu_relax();
if (!current_set_polling_and_test()) {
while (!need_resched())
cpu_relax();
}
current_clr_polling();

t2 = ktime_get();
diff = ktime_to_us(ktime_sub(t2, t1));
Expand Down
2 changes: 1 addition & 1 deletion include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
Expand Down
14 changes: 7 additions & 7 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -847,10 +847,10 @@ enum cpu_idle_type {
};

/*
* Increase resolution of cpu_power calculations
* Increase resolution of cpu_capacity calculations
*/
#define SCHED_POWER_SHIFT 10
#define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT)
#define SCHED_CAPACITY_SHIFT 10
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)

/*
* sched-domains (multiprocessor balancing) declarations:
Expand All @@ -862,7 +862,7 @@ enum cpu_idle_type {
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */
#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
Expand All @@ -874,7 +874,7 @@ enum cpu_idle_type {
#ifdef CONFIG_SCHED_SMT
static inline const int cpu_smt_flags(void)
{
return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
}
#endif

Expand Down Expand Up @@ -1006,7 +1006,7 @@ typedef const int (*sched_domain_flags_f)(void);
struct sd_data {
struct sched_domain **__percpu sd;
struct sched_group **__percpu sg;
struct sched_group_power **__percpu sgp;
struct sched_group_capacity **__percpu sgc;
};

struct sched_domain_topology_level {
Expand Down Expand Up @@ -2173,7 +2173,7 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif

extern bool yield_to(struct task_struct *p, bool preempt);
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
/**
Expand Down
20 changes: 20 additions & 0 deletions include/trace/events/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,26 @@ TRACE_EVENT(sched_swap_numa,
__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
__entry->dst_cpu, __entry->dst_nid)
);

/*
* Tracepoint for waking a polling cpu without an IPI.
*/
TRACE_EVENT(sched_wake_idle_without_ipi,

TP_PROTO(int cpu),

TP_ARGS(cpu),

TP_STRUCT__entry(
__field( int, cpu )
),

TP_fast_assign(
__entry->cpu = cpu;
),

TP_printk("cpu=%d", __entry->cpu)
);
#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
Expand Down
Loading

0 comments on commit b2e09f6

Please sign in to comment.