Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The biggest change in this cycle is the rewrite of the main SMP load
  balancing metric: the CPU load/utilization.  The main goal was to make
  the metric more precise and more representative - see the changelog of
  this commit for the gory details:

    9d89c25 ("sched/fair: Rewrite runnable load and utilization average tracking")

  It is done in a way that significantly reduces complexity of the code:

    5 files changed, 249 insertions(+), 494 deletions(-)

  and the performance testing results are encouraging.  Nevertheless we
  need to keep an eye on potential regressions, since this potentially
  affects every SMP workload in existence.

  This work comes from Yuyang Du.

  Other changes:

   - SCHED_DL updates.  (Andrea Parri)

   - Simplify architecture callbacks by removing finish_arch_switch().
     (Peter Zijlstra et al)

   - cputime accounting: guarantee stime + utime == rtime.  (Peter
     Zijlstra)

   - optimize idle CPU wakeups some more - inspired by Facebook server
     loads.  (Mike Galbraith)

   - stop_machine fixes and updates.  (Oleg Nesterov)

   - Introduce the 'trace_sched_waking' tracepoint.  (Peter Zijlstra)

   - sched/numa tweaks.  (Srikar Dronamraju)

   - misc fixes and small cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  sched/deadline: Fix comment in enqueue_task_dl()
  sched/deadline: Fix comment in push_dl_tasks()
  sched: Change the sched_class::set_cpus_allowed() calling context
  sched: Make sched_class::set_cpus_allowed() unconditional
  sched: Fix a race between __kthread_bind() and sched_setaffinity()
  sched: Ensure a task has a non-normalized vruntime when returning back to CFS
  sched/numa: Fix NUMA_DIRECT topology identification
  tile: Reorganize _switch_to()
  sched, sparc32: Update scheduler comments in copy_thread()
  sched: Remove finish_arch_switch()
  sched, tile: Remove finish_arch_switch
  sched, sh: Fold finish_arch_switch() into switch_to()
  sched, score: Remove finish_arch_switch()
  sched, avr32: Remove finish_arch_switch()
  sched, MIPS: Get rid of finish_arch_switch()
  sched, arm: Remove finish_arch_switch()
  sched/fair: Clean up load average references
  sched/fair: Provide runnable_load_avg back to cfs_rq
  sched/fair: Remove task and group entity load when they are dead
  sched/fair: Init cfs_rq's sched_entity load average
  ...
  • Loading branch information
torvalds committed Sep 1, 2015
2 parents 3959df1 + ff277d4 commit a1d8561
Show file tree
Hide file tree
Showing 37 changed files with 762 additions and 959 deletions.
5 changes: 4 additions & 1 deletion arch/arm/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
* CPU.
*/
#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
#define finish_arch_switch(prev) dsb(ish)
#define __complete_pending_tlbi() dsb(ish)
#else
#define __complete_pending_tlbi()
#endif

/*
Expand All @@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info

#define switch_to(prev,next,last) \
do { \
__complete_pending_tlbi(); \
last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
} while (0)

Expand Down
7 changes: 5 additions & 2 deletions arch/avr32/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
*/
#ifdef CONFIG_OWNERSHIP_TRACE
#include <asm/ocd.h>
#define finish_arch_switch(prev) \
#define ocd_switch(prev, next) \
do { \
ocd_write(PID, prev->pid); \
ocd_write(PID, current->pid); \
ocd_write(PID, next->pid); \
} while(0)
#else
#define ocd_switch(prev, next)
#endif

/*
Expand All @@ -38,6 +40,7 @@ extern struct task_struct *__switch_to(struct task_struct *,
struct cpu_context *);
#define switch_to(prev, next, last) \
do { \
ocd_switch(prev, next); \
last = __switch_to(prev, &prev->thread.cpu_context + 1, \
&next->thread.cpu_context); \
} while (0)
Expand Down
48 changes: 23 additions & 25 deletions arch/mips/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,45 +83,43 @@ do { if (cpu_has_rw_llb) { \
} \
} while (0)

/*
* For newly created kernel threads switch_to() will return to
* ret_from_kernel_thread, newly created user threads to ret_from_fork.
* That is, everything following resume() will be skipped for new threads.
* So everything that matters to new threads should be placed before resume().
*/
#define switch_to(prev, next, last) \
do { \
u32 __c0_stat; \
s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
if (cpu_has_dsp) { \
__save_dsp(prev); \
if (cop2_present && (KSTK_STATUS(prev) & ST0_CU2)) { \
if (cop2_lazy_restore) \
KSTK_STATUS(prev) &= ~ST0_CU2; \
__c0_stat = read_c0_status(); \
write_c0_status(__c0_stat | ST0_CU2); \
cop2_save(prev); \
write_c0_status(__c0_stat & ~ST0_CU2); \
__restore_dsp(next); \
} \
if (cop2_present) { \
set_c0_status(ST0_CU2); \
if ((KSTK_STATUS(prev) & ST0_CU2)) { \
if (cop2_lazy_restore) \
KSTK_STATUS(prev) &= ~ST0_CU2; \
cop2_save(prev); \
} \
if (KSTK_STATUS(next) & ST0_CU2 && \
!cop2_lazy_restore) { \
cop2_restore(next); \
} \
clear_c0_status(ST0_CU2); \
} \
__clear_software_ll_bit(); \
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
__fpsave = FP_SAVE_SCALAR; \
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
__fpsave = FP_SAVE_VECTOR; \
(last) = resume(prev, next, task_thread_info(next), __fpsave); \
} while (0)

#define finish_arch_switch(prev) \
do { \
u32 __c0_stat; \
if (cop2_present && !cop2_lazy_restore && \
(KSTK_STATUS(current) & ST0_CU2)) { \
__c0_stat = read_c0_status(); \
write_c0_status(__c0_stat | ST0_CU2); \
cop2_restore(current); \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
if (cpu_has_dsp) \
__restore_dsp(current); \
if (cpu_has_userlocal) \
write_c0_userlocal(current_thread_info()->tp_value); \
write_c0_userlocal(task_thread_info(next)->tp_value); \
__restore_watch(); \
disable_msa(); \
(last) = resume(prev, next, task_thread_info(next), __fpsave); \
} while (0)

#endif /* _ASM_SWITCH_TO_H */
2 changes: 1 addition & 1 deletion arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2178,7 +2178,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vc->runner = vcpu;
if (n_ceded == vc->n_runnable) {
kvmppc_vcore_blocked(vc);
} else if (should_resched()) {
} else if (need_resched()) {
vc->vcore_state = VCORE_PREEMPT;
/* Let something else run */
cond_resched_lock(&vc->lock);
Expand Down
2 changes: 0 additions & 2 deletions arch/score/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,4 @@ do { \
(last) = resume(prev, next, task_thread_info(next)); \
} while (0)

#define finish_arch_switch(prev) do {} while (0)

#endif /* _ASM_SCORE_SWITCH_TO_H */
8 changes: 2 additions & 6 deletions arch/sh/include/asm/switch_to_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ do { \
\
if (is_dsp_enabled(prev)) \
__save_dsp(prev); \
if (is_dsp_enabled(next)) \
__restore_dsp(next); \
\
__ts1 = (u32 *)&prev->thread.sp; \
__ts2 = (u32 *)&prev->thread.pc; \
Expand Down Expand Up @@ -125,10 +127,4 @@ do { \
last = __last; \
} while (0)

#define finish_arch_switch(prev) \
do { \
if (is_dsp_enabled(prev)) \
__restore_dsp(prev); \
} while (0)

#endif /* __ASM_SH_SWITCH_TO_32_H */
10 changes: 5 additions & 5 deletions arch/sparc/kernel/process_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ);

/*
* A new process must start with interrupts closed in 2.5,
* because this is how Mingo's scheduler works (see schedule_tail
* and finish_arch_switch). If we do not do it, a timer interrupt hits
* before we unlock, attempts to re-take the rq->lock, and then we die.
* Thus, kpsr|=PSR_PIL.
* A new process must start with interrupts disabled, see schedule_tail()
* and finish_task_switch(). (If we do not do it and if a timer interrupt
* hits before we unlock and attempts to take the rq->lock, we deadlock.)
*
* Thus, kpsr |= PSR_PIL.
*/
ti->ksp = (unsigned long) new_stack;
p->thread.kregs = childregs;
Expand Down
8 changes: 3 additions & 5 deletions arch/tile/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,13 @@ extern unsigned long get_switch_to_pc(void);
* Kernel threads can check to see if they need to migrate their
* stack whenever they return from a context switch; for user
* threads, we defer until they are returning to user-space.
* We defer homecache migration until the runqueue lock is released.
*/
#define finish_arch_switch(prev) do { \
if (unlikely((prev)->state == TASK_DEAD)) \
__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
#define finish_arch_post_lock_switch() do { \
__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
(current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
if (current->mm == NULL && !kstack_hash && \
current_thread_info()->homecache_cpu != smp_processor_id()) \
current_thread_info()->homecache_cpu != raw_smp_processor_id()) \
homecache_migrate_kthread(); \
} while (0)

Expand Down
5 changes: 5 additions & 0 deletions arch/tile/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,11 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
hardwall_switch_tasks(prev, next);
#endif

/* Notify the simulator of task exit. */
if (unlikely(prev->state == TASK_DEAD))
__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT |
(prev->pid << _SIM_CONTROL_OPERATOR_BITS));

/*
* Switch kernel SP, PC, and callee-saved registers.
* In the context of the new task, return the old task pointer
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/include/asm/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(void)
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(!raw_cpu_read_4(__preempt_count));
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}

#ifdef CONFIG_PREEMPT
Expand Down
4 changes: 4 additions & 0 deletions drivers/cpuidle/cpuidle.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv,
* cpuidle mechanism enables interrupts and doing that with timekeeping
* suspended is generally unsafe.
*/
stop_critical_timings();
drv->states[index].enter_freeze(dev, drv, index);
WARN_ON(!irqs_disabled());
/*
Expand All @@ -131,6 +132,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv,
* critical sections, so tell RCU about that.
*/
RCU_NONIDLE(tick_unfreeze());
start_critical_timings();
}

/**
Expand Down Expand Up @@ -195,7 +197,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
trace_cpu_idle_rcuidle(index, dev->cpu);
time_start = ktime_get();

stop_critical_timings();
entered_state = target_state->enter(dev, drv, index);
start_critical_timings();

time_end = ktime_get();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
Expand Down
2 changes: 1 addition & 1 deletion drivers/xen/preempt.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
asmlinkage __visible void xen_maybe_preempt_hcall(void)
{
if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
&& should_resched())) {
&& need_resched())) {
/*
* Clear flag as we may be rescheduled on a different
* cpu.
Expand Down
5 changes: 3 additions & 2 deletions include/asm-generic/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(void)
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(!preempt_count() && tif_need_resched());
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
}

#ifdef CONFIG_PREEMPT
Expand Down
10 changes: 10 additions & 0 deletions include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ extern struct fs_struct init_fs;
#define INIT_CPUSET_SEQ(tsk)
#endif

#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define INIT_PREV_CPUTIME(x) .prev_cputime = { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \
},
#else
#define INIT_PREV_CPUTIME(x)
#endif

#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
.thread_head = LIST_HEAD_INIT(init_task.thread_node), \
Expand All @@ -46,6 +54,7 @@ extern struct fs_struct init_fs;
.cputime_atomic = INIT_CPUTIME_ATOMIC, \
.running = 0, \
}, \
INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \
}
Expand Down Expand Up @@ -246,6 +255,7 @@ extern struct task_group root_task_group;
INIT_TASK_RCU_TASKS(tsk) \
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_PREV_CPUTIME(tsk) \
INIT_VTIME(tsk) \
INIT_NUMA_BALANCING(tsk) \
INIT_KASAN(tsk) \
Expand Down
1 change: 1 addition & 0 deletions include/linux/kthread.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
})

void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
bool kthread_should_stop(void);
bool kthread_should_park(void);
Expand Down
19 changes: 14 additions & 5 deletions include/linux/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,20 @@
*/
#define in_nmi() (preempt_count() & NMI_MASK)

/*
* The preempt_count offset after preempt_disable();
*/
#if defined(CONFIG_PREEMPT_COUNT)
# define PREEMPT_DISABLE_OFFSET 1
# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET
#else
# define PREEMPT_DISABLE_OFFSET 0
# define PREEMPT_DISABLE_OFFSET 0
#endif

/*
* The preempt_count offset after spin_lock()
*/
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET

/*
* The preempt_count offset needed for things like:
*
Expand All @@ -103,7 +111,7 @@
*
* Work as expected.
*/
#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET)
#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)

/*
* Are we running in atomic context? WARNING: this macro cannot
Expand All @@ -124,7 +132,8 @@
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void preempt_count_add(int val);
extern void preempt_count_sub(int val);
#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
#define preempt_count_dec_and_test() \
({ preempt_count_sub(1); should_resched(0); })
#else
#define preempt_count_add(val) __preempt_count_add(val)
#define preempt_count_sub(val) __preempt_count_sub(val)
Expand Down Expand Up @@ -184,7 +193,7 @@ do { \

#define preempt_check_resched() \
do { \
if (should_resched()) \
if (should_resched(0)) \
__preempt_schedule(); \
} while (0)

Expand Down
Loading

0 comments on commit a1d8561

Please sign in to comment.