Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Optimized support for Intel "Cluster-on-Die" (CoD) topologies (Dave
     Hansen)

   - Various sched/idle refinements for better idle handling (Nicolas
     Pitre, Daniel Lezcano, Chuansheng Liu, Vincent Guittot)

   - sched/numa updates and optimizations (Rik van Riel)

   - sysbench speedup (Vincent Guittot)

   - capacity calculation cleanups/refactoring (Vincent Guittot)

   - Various cleanups to thread group iteration (Oleg Nesterov)

   - Double-rq-lock removal optimization and various refactorings
     (Kirill Tkhai)

   - various sched/deadline fixes

  ... and lots of other changes"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (72 commits)
  sched/dl: Use dl_bw_of() under rcu_read_lock_sched()
  sched/fair: Delete resched_cpu() from idle_balance()
  sched, time: Fix build error with 64 bit cputime_t on 32 bit systems
  sched: Improve sysbench performance by fixing spurious active migration
  sched/x86: Fix up typo in topology detection
  x86, sched: Add new topology for multi-NUMA-node CPUs
  sched/rt: Use resched_curr() in task_tick_rt()
  sched: Use rq->rd in sched_setaffinity() under RCU read lock
  sched: cleanup: Rename 'out_unlock' to 'out_free_new_mask'
  sched: Use dl_bw_of() under RCU read lock
  sched/fair: Remove duplicate code from can_migrate_task()
  sched, mips, ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW
  sched: print_rq(): Don't use tasklist_lock
  sched: normalize_rt_tasks(): Don't use _irqsave for tasklist_lock, use task_rq_lock()
  sched: Fix the task-group check in tg_has_rt_tasks()
  sched/fair: Leverage the idle state info when choosing the "idlest" cpu
  sched: Let the scheduler see CPU idle states
  sched/deadline: Fix inter- exclusive cpusets migrations
  sched/deadline: Clear dl_entity params when setscheduling to different class
  sched/numa: Kill the wrong/dead TASK_DEAD check in task_numa_fault()
  ...
  • Loading branch information
torvalds committed Oct 13, 2014
2 parents 13ead80 + f10e00f commit faafcba
Show file tree
Hide file tree
Showing 55 changed files with 1,075 additions and 552 deletions.
340 changes: 292 additions & 48 deletions Documentation/scheduler/sched-deadline.txt

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions arch/arm/kernel/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
*/
static DEFINE_PER_CPU(unsigned long, cpu_scale);

unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
return per_cpu(cpu_scale, cpu);
}
Expand Down Expand Up @@ -166,7 +166,7 @@ static void update_cpu_capacity(unsigned int cpu)
set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);

printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
cpu, arch_scale_freq_capacity(NULL, cpu));
cpu, arch_scale_cpu_capacity(NULL, cpu));
}

#else
Expand Down
1 change: 0 additions & 1 deletion arch/cris/arch-v10/drivers/sync_serial.c
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
}
local_irq_restore(flags);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&port->out_wait_q, &wait);
if (signal_pending(current))
return -EINTR;
Expand Down
1 change: 0 additions & 1 deletion arch/cris/arch-v32/drivers/sync_serial.c
Original file line number Diff line number Diff line change
Expand Up @@ -1089,7 +1089,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
}

schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&port->out_wait_q, &wait);

if (signal_pending(current))
Expand Down
1 change: 0 additions & 1 deletion arch/ia64/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>

#define __ARCH_WANT_UNLOCKED_CTXSW
#define ARCH_HAS_PREFETCH_SWITCH_STACK

#define IA64_NUM_PHYS_STACK_REG 96
Expand Down
6 changes: 0 additions & 6 deletions arch/mips/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,12 +397,6 @@ unsigned long get_wchan(struct task_struct *p);
#define ARCH_HAS_PREFETCHW
#define prefetchw(x) __builtin_prefetch((x), 1, 1)

/*
* See Documentation/scheduler/sched-arch.txt; prevents deadlock on SMP
* systems.
*/
#define __ARCH_WANT_UNLOCKED_CTXSW

#endif

#endif /* _ASM_PROCESSOR_H */
2 changes: 2 additions & 0 deletions arch/powerpc/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ static inline void setup_cputime_one_jiffy(void) { }
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;

#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)

#ifdef __KERNEL__

/*
Expand Down
5 changes: 1 addition & 4 deletions arch/powerpc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_event.h>
#include <linux/magic.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <linux/hugetlb.h>
Expand Down Expand Up @@ -521,7 +520,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
const struct exception_table_entry *entry;
unsigned long *stackend;

/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
Expand Down Expand Up @@ -550,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
regs->nip);

stackend = end_of_stack(current);
if (current != &init_task && *stackend != STACK_END_MAGIC)
if (task_stack_end_corrupted(current))
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");

die("Kernel access of bad area", regs, sig);
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
typedef unsigned long long __nocast cputime_t;
typedef unsigned long long __nocast cputime64_t;

#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)

static inline unsigned long __div(unsigned long long n, unsigned long base)
{
#ifndef CONFIG_64BIT
Expand Down
1 change: 0 additions & 1 deletion arch/um/drivers/random.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
set_task_state(current, TASK_INTERRUPTIBLE);

schedule();
set_task_state(current, TASK_RUNNING);
remove_wait_queue(&host_read_wait, &wait);

if (atomic_dec_and_test(&host_sleep_count)) {
Expand Down
55 changes: 46 additions & 9 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,20 @@ void smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}

static bool
topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;

return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
}

static bool
topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;

return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
return !WARN_ONCE(!topology_same_node(c, o),
"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
"[node: %d != %d]. Ignoring dependency.\n",
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
Expand Down Expand Up @@ -341,17 +349,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}

static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
/*
* Unlike the other levels, we do not enforce keeping a
* multicore group inside a NUMA node. If this happens, we will
* discard the MC level of the topology later.
*/
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (c->phys_proc_id == o->phys_proc_id) {
if (cpu_has(c, X86_FEATURE_AMD_DCM))
return true;

return topology_sane(c, o, "mc");
}
if (c->phys_proc_id == o->phys_proc_id)
return true;
return false;
}

static struct sched_domain_topology_level numa_inside_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
#endif
{ NULL, },
};
/*
* set_sched_topology() sets the topology internal to a CPU. The
* NUMA topologies are layered on top of it to build the full
* system topology.
*
* If NUMA nodes are observed to occur within a CPU package, this
* function should be called. It forces the sched domain code to
* only use the SMT level for the CPU portion of the topology.
* This essentially falls back to relying on NUMA information
* from the SRAT table to describe the entire system topology
* (except for hyperthreads).
*/
static void primarily_use_numa_for_topology(void)
{
set_sched_topology(numa_inside_package_topology);
}

void set_cpu_sibling_map(int cpu)
{
bool has_smt = smp_num_siblings > 1;
Expand Down Expand Up @@ -388,7 +423,7 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);

if ((i == cpu) || (has_mp && match_mc(c, o))) {
if ((i == cpu) || (has_mp && match_die(c, o))) {
link_mask(core, cpu, i);

/*
Expand All @@ -410,6 +445,8 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
}

Expand Down
5 changes: 1 addition & 4 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
* Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
* Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
*/
#include <linux/magic.h> /* STACK_END_MAGIC */
#include <linux/sched.h> /* test_thread_flag(), ... */
#include <linux/kdebug.h> /* oops_begin/end, ... */
#include <linux/module.h> /* search_exception_table */
Expand Down Expand Up @@ -649,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int signal, int si_code)
{
struct task_struct *tsk = current;
unsigned long *stackend;
unsigned long flags;
int sig;

Expand Down Expand Up @@ -709,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,

show_fault_oops(regs, error_code, address);

stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
if (task_stack_end_corrupted(tsk))
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");

tsk->thread.cr2 = address;
Expand Down
15 changes: 8 additions & 7 deletions drivers/cpuidle/cpuidle.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,14 @@ void cpuidle_uninstall_idle_handler(void)
{
if (enabled_devices) {
initialized = 0;
kick_all_cpus_sync();
wake_up_all_idle_cpus();
}

/*
* Make sure external observers (such as the scheduler)
* are done looking at pointed idle states.
*/
synchronize_rcu();
}

/**
Expand Down Expand Up @@ -530,11 +536,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register);

#ifdef CONFIG_SMP

static void smp_callback(void *v)
{
/* we already woke the CPU up, nothing more to do */
}

/*
* This function gets called when a part of the kernel has a new latency
* requirement. This means we need to get all processors out of their C-state,
Expand All @@ -544,7 +545,7 @@ static void smp_callback(void *v)
static int cpuidle_latency_notify(struct notifier_block *b,
unsigned long l, void *v)
{
smp_call_function(smp_callback, NULL, 1);
wake_up_all_idle_cpus();
return NOTIFY_OK;
}

Expand Down
1 change: 0 additions & 1 deletion drivers/gpu/vga/vgaarb.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,6 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
}
schedule();
remove_wait_queue(&vga_wait_queue, &wait);
set_current_state(TASK_RUNNING);
}
return rc;
}
Expand Down
1 change: 0 additions & 1 deletion drivers/md/dm-bufio.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,6 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)

io_schedule();

set_task_state(current, TASK_RUNNING);
remove_wait_queue(&c->free_buffer_wait, &wait);

dm_bufio_lock(c);
Expand Down
1 change: 0 additions & 1 deletion drivers/parisc/power.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ static int kpowerswd(void *param)
unsigned long soft_power_reg = (unsigned long) param;

schedule_timeout_interruptible(pwrsw_enabled ? HZ : HZ/POWERSWITCH_POLL_PER_SEC);
__set_current_state(TASK_RUNNING);

if (unlikely(!pwrsw_enabled))
continue;
Expand Down
2 changes: 0 additions & 2 deletions drivers/s390/net/claw.c
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ claw_open(struct net_device *dev)
spin_unlock_irqrestore(
get_ccwdev_lock(privptr->channel[i].cdev), saveflags);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&privptr->channel[i].wait, &wait);
if(rc != 0)
ccw_check_return_code(privptr->channel[i].cdev, rc);
Expand Down Expand Up @@ -828,7 +827,6 @@ claw_release(struct net_device *dev)
spin_unlock_irqrestore(
get_ccwdev_lock(privptr->channel[i].cdev), saveflags);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&privptr->channel[i].wait, &wait);
if (rc != 0) {
ccw_check_return_code(privptr->channel[i].cdev, rc);
Expand Down
1 change: 0 additions & 1 deletion drivers/scsi/fcoe/fcoe.c
Original file line number Diff line number Diff line change
Expand Up @@ -1884,7 +1884,6 @@ static int fcoe_percpu_receive_thread(void *arg)
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&p->fcoe_rx_list.lock);
schedule();
set_current_state(TASK_RUNNING);
goto retry;
}

Expand Down
1 change: 0 additions & 1 deletion drivers/scsi/qla2xxx/qla_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -4875,7 +4875,6 @@ qla2x00_do_dpc(void *data)
"DPC handler sleeping.\n");

schedule();
__set_current_state(TASK_RUNNING);

if (!base_vha->flags.init_done || ha->flags.mbox_busy)
goto end_loop;
Expand Down
3 changes: 0 additions & 3 deletions drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3215,7 +3215,6 @@ kiblnd_connd (void *arg)

schedule_timeout(timeout);

set_current_state(TASK_RUNNING);
remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
}
Expand Down Expand Up @@ -3432,7 +3431,6 @@ kiblnd_scheduler(void *arg)
busy_loops = 0;

remove_wait_queue(&sched->ibs_waitq, &wait);
set_current_state(TASK_RUNNING);
spin_lock_irqsave(&sched->ibs_lock, flags);
}

Expand Down Expand Up @@ -3507,7 +3505,6 @@ kiblnd_failover_thread(void *arg)

rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) :
cfs_time_seconds(1));
set_current_state(TASK_RUNNING);
remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
write_lock_irqsave(glock, flags);

Expand Down
1 change: 0 additions & 1 deletion drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,6 @@ ksocknal_connd (void *arg)
nloops = 0;
schedule_timeout(timeout);

set_current_state(TASK_RUNNING);
remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
spin_lock_bh(connd_lock);
}
Expand Down
1 change: 0 additions & 1 deletion drivers/staging/lustre/lustre/libcfs/fail.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
id, ms);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(cfs_time_seconds(ms) / 1000);
set_current_state(TASK_RUNNING);
CERROR("cfs_fail_timeout id %x awake\n", id);
}
return ret;
Expand Down
1 change: 0 additions & 1 deletion drivers/tty/bfin_jtag_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ bfin_jc_emudat_manager(void *arg)
pr_debug("waiting for readers\n");
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
__set_current_state(TASK_RUNNING);
continue;
}

Expand Down
1 change: 0 additions & 1 deletion fs/afs/vlocation.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
/* second+ BUSY - sleep a little bit */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(1);
__set_current_state(TASK_RUNNING);
}
continue;
}
Expand Down
2 changes: 0 additions & 2 deletions fs/jfs/jfs_logmgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1585,7 +1585,6 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
set_current_state(TASK_UNINTERRUPTIBLE);
LOGGC_UNLOCK(log);
schedule();
__set_current_state(TASK_RUNNING);
LOGGC_LOCK(log);
remove_wait_queue(&target->gcwait, &__wait);
}
Expand Down Expand Up @@ -2359,7 +2358,6 @@ int jfsIOWait(void *arg)
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(&log_redrive_lock);
schedule();
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());

Expand Down
Loading

0 comments on commit faafcba

Please sign in to comment.