Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits)
  sched: Use correct macro to display sched_child_runs_first in /proc/sched_debug
  sched: No need for bootmem special cases
  sched: Revert nohz_ratelimit() for now
  sched: Reduce update_group_power() calls
  sched: Update rq->clock for nohz balanced cpus
  sched: Fix spelling of sibling
  sched, cpuset: Drop __cpuexit from cpu hotplug callbacks
  sched: Fix the racy usage of thread_group_cputimer() in fastpath_timer_check()
  sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()
  sched: thread_group_cputime: Simplify, document the "alive" check
  sched: Remove the obsolete exit_state/signal hacks
  sched: task_tick_rt: Remove the obsolete ->signal != NULL check
  sched: __sched_setscheduler: Read the RLIMIT_RTPRIO value lockless
  sched: Fix comments to make them DocBook happy
  sched: Fix fix_small_capacity
  powerpc: Exclude arch_sd_sibiling_asym_packing() on UP
  powerpc: Enable asymmetric SMT scheduling on POWER7
  sched: Add asymmetric group packing option for sibling domain
  sched: Fix capacity calculations for SMT4
  sched: Change nohz idle load balancing logic to push model
  ...
  • Loading branch information
torvalds committed Aug 6, 2010
2 parents 4aed2fd + 0bcfe75 commit c4efd6b
Show file tree
Hide file tree
Showing 28 changed files with 877 additions and 410 deletions.
4 changes: 2 additions & 2 deletions arch/parisc/kernel/ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
unsigned long ret;

pop_return_trace(&trace, &ret);
trace.rettime = cpu_clock(raw_smp_processor_id());
trace.rettime = local_clock();
ftrace_graph_return(&trace);

if (unlikely(!ret)) {
Expand Down Expand Up @@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}

calltime = cpu_clock(raw_smp_processor_id());
calltime = local_clock();

if (push_return_trace(old, calltime,
self_addr, &trace.depth) == -EBUSY) {
Expand Down
3 changes: 2 additions & 1 deletion arch/powerpc/include/asm/cputable.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)

#ifndef __ASSEMBLY__

Expand Down Expand Up @@ -412,7 +413,7 @@ extern const char *powerpc_base_platform;
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO)
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
Expand Down
11 changes: 11 additions & 0 deletions arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -1299,3 +1299,14 @@ unsigned long randomize_et_dyn(unsigned long base)

return ret;
}

#ifdef CONFIG_SMP
int arch_sd_sibling_asym_packing(void)
{
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
return SD_ASYM_PACKING;
}
return 0;
}
#endif
25 changes: 25 additions & 0 deletions include/linux/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t);
#endif
struct notifier_block;

/*
* CPU notifier priorities.
*/
enum {
/*
* SCHED_ACTIVE marks a cpu which is coming up active during
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
* notifier. CPUSET_ACTIVE adjusts cpuset according to
* cpu_active mask right after SCHED_ACTIVE. During
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
* ordered in the similar way.
*
* This ordering guarantees consistent cpu_active mask and
* migration behavior to all cpu notifiers.
*/
CPU_PRI_SCHED_ACTIVE = INT_MAX,
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
CPU_PRI_CPUSET_INACTIVE = INT_MIN,

/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
};

#ifdef CONFIG_SMP
/* Need to know about CPUs going up/down? */
#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
Expand Down
6 changes: 6 additions & 0 deletions include/linux/cpuset.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */

extern int cpuset_init(void);
extern void cpuset_init_smp(void);
extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
Expand Down Expand Up @@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}

static inline void cpuset_update_active_cpus(void)
{
partition_sched_domains(1, NULL, NULL);
}

static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
Expand Down
2 changes: 1 addition & 1 deletion include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,7 @@ static inline void perf_event_disable(struct perf_event *event) { }
#define perf_cpu_notifier(fn) \
do { \
static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = 20 }; \
{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \
Expand Down
59 changes: 29 additions & 30 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,19 +272,10 @@ extern int runqueue_is_locked(int cpu);

extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
extern int get_nohz_load_balancer(void);
extern int nohz_ratelimit(int cpu);
extern void select_nohz_load_balancer(int stop_tick);
extern int get_nohz_timer_target(void);
#else
static inline int select_nohz_load_balancer(int cpu)
{
return 0;
}

static inline int nohz_ratelimit(int cpu)
{
return 0;
}
static inline void select_nohz_load_balancer(int stop_tick) { }
#endif

/*
Expand Down Expand Up @@ -801,7 +792,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */

#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */

enum powersavings_balance_level {
Expand Down Expand Up @@ -836,6 +827,8 @@ static inline int sd_balance_for_package_power(void)
return SD_PREFER_SIBLING;
}

extern int __weak arch_sd_sibiling_asym_packing(void);

/*
* Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
Expand All @@ -857,7 +850,7 @@ struct sched_group {
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
* single CPU.
*/
unsigned int cpu_power;
unsigned int cpu_power, cpu_power_orig;

/*
* The CPUs this group covers.
Expand Down Expand Up @@ -1693,6 +1686,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
Expand Down Expand Up @@ -1787,20 +1781,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
#endif

/*
* Architectures can set this to 1 if they have specified
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
* but then during bootup it turns out that sched_clock()
* is reliable after all:
* Do not use outside of architecture code which knows its limitations.
*
* sched_clock() has no promise of monotonicity or bounded drift between
* CPUs, use (which you should not) requires disabling IRQs.
*
* Please use one of the three interfaces below.
*/
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
extern int sched_clock_stable;
#endif

/* ftrace calls sched_clock() directly */
extern unsigned long long notrace sched_clock(void);
/*
* See the comment in kernel/sched_clock.c
*/
extern u64 cpu_clock(int cpu);
extern u64 local_clock(void);
extern u64 sched_clock_cpu(int cpu);


extern void sched_clock_init(void);
extern u64 sched_clock_cpu(int cpu);

#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline void sched_clock_tick(void)
Expand All @@ -1815,17 +1812,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
{
}
#else
/*
* Architectures can set this to 1 if they have specified
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
* but then during bootup it turns out that sched_clock()
* is reliable after all:
*/
extern int sched_clock_stable;

extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif

/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
*/
extern unsigned long long cpu_clock(int cpu);

extern unsigned long long
task_sched_runtime(struct task_struct *task);
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
Expand Down
1 change: 1 addition & 0 deletions include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
| arch_sd_sibling_asym_packing() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
Expand Down
6 changes: 0 additions & 6 deletions kernel/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
return -EINVAL;

cpu_hotplug_begin();
set_cpu_active(cpu, false);
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
set_cpu_active(cpu, true);

nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
Expand All @@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)

err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);

Expand Down Expand Up @@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out_notify;
BUG_ON(!cpu_online(cpu));

set_cpu_active(cpu, true);

/* Now call notifier in preparation. */
cpu_notify(CPU_ONLINE | mod, hcpu);

Expand Down
21 changes: 2 additions & 19 deletions kernel/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
* but making no active use of cpusets.
*
* This routine ensures that top_cpuset.cpus_allowed tracks
* cpu_online_map on each CPU hotplug (cpuhp) event.
* cpu_active_mask on each CPU hotplug (cpuhp) event.
*
* Called within get_online_cpus(). Needs to call cgroup_lock()
* before calling generate_sched_domains().
*/
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
void cpuset_update_active_cpus(void)
{
struct sched_domain_attr *attr;
cpumask_var_t *doms;
int ndoms;

switch (phase) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
break;

default:
return NOTIFY_DONE;
}

cgroup_lock();
mutex_lock(&callback_mutex);
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
Expand All @@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,

/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);

return NOTIFY_OK;
}

#ifdef CONFIG_MEMORY_HOTPLUG
Expand Down Expand Up @@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];

hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10);

cpuset_wq = create_singlethread_workqueue("cpuset");
Expand Down
2 changes: 1 addition & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;

new_flags &= ~PF_SUPERPRIV;
new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC;
new_flags |= PF_STARTING;
p->flags = new_flags;
Expand Down
8 changes: 2 additions & 6 deletions kernel/hrtimer.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
static int hrtimer_get_target(int this_cpu, int pinned)
{
#ifdef CONFIG_NO_HZ
if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
int preferred_cpu = get_nohz_load_balancer();

if (preferred_cpu >= 0)
return preferred_cpu;
}
if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
return get_nohz_timer_target();
#endif
return this_cpu;
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/lockdep.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],

static inline u64 lockstat_clock(void)
{
return cpu_clock(smp_processor_id());
return local_clock();
}

static int lock_point(unsigned long points[], unsigned long ip)
Expand Down
2 changes: 1 addition & 1 deletion kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)

static inline u64 perf_clock(void)
{
return cpu_clock(raw_smp_processor_id());
return local_clock();
}

/*
Expand Down
Loading

0 comments on commit c4efd6b

Please sign in to comment.