Skip to content

Commit

Permalink
Merge tag 'sched-core-2021-08-30' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - The biggest change in this cycle is scheduler support for asymmetric
   scheduling affinity, to support the execution of legacy 32-bit tasks
   on AArch32 systems that also have 64-bit-only CPUs.

   Architectures can fill in this functionality by defining their own
   task_cpu_possible_mask(p). When this is done, the scheduler will make
   sure the task will only be scheduled on CPUs that support it.

   (The actual arm64 specific changes are not part of this tree.)

   For other architectures there will be no change in functionality.

 - Add cgroup SCHED_IDLE support

 - Increase node-distance flexibility & delay determining it until a CPU
   is brought online. (This enables platforms where node distance isn't
   final until the CPU is only.)

 - Deadline scheduler enhancements & fixes

 - Misc fixes & cleanups.

* tag 'sched-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  eventfd: Make signal recursion protection a task bit
  sched/fair: Mark tg_is_idle() an inline in the !CONFIG_FAIR_GROUP_SCHED case
  sched: Introduce dl_task_check_affinity() to check proposed affinity
  sched: Allow task CPU affinity to be restricted on asymmetric systems
  sched: Split the guts of sched_setaffinity() into a helper function
  sched: Introduce task_struct::user_cpus_ptr to track requested affinity
  sched: Reject CPU affinity changes based on task_cpu_possible_mask()
  cpuset: Cleanup cpuset_cpus_allowed_fallback() use in select_fallback_rq()
  cpuset: Honour task_cpu_possible_mask() in guarantee_online_cpus()
  cpuset: Don't use the cpu_possible_mask as a last resort for cgroup v1
  sched: Introduce task_cpu_possible_mask() to limit fallback rq selection
  sched: Cgroup SCHED_IDLE support
  sched/topology: Skip updating masks for non-online nodes
  sched: Replace deprecated CPU-hotplug functions.
  sched: Skip priority checks with SCHED_FLAG_KEEP_PARAMS
  sched: Fix UCLAMP_FLAG_IDLE setting
  sched/deadline: Fix missing clock update in migrate_task_rq_dl()
  sched/fair: Avoid a second scan of target in select_idle_cpu
  sched/fair: Use prev instead of new target as recent_used_cpu
  sched: Don't report SCHED_FLAG_SUGOV in sched_getattr()
  ...
  • Loading branch information
torvalds committed Aug 30, 2021
2 parents 230bda0 + b542e38 commit 5d3c0db
Show file tree
Hide file tree
Showing 17 changed files with 729 additions and 185 deletions.
2 changes: 1 addition & 1 deletion fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
if (iocb->ki_eventfd && eventfd_signal_count()) {
if (iocb->ki_eventfd && eventfd_signal_allowed()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
Expand Down
12 changes: 5 additions & 7 deletions fs/eventfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
#include <linux/idr.h>
#include <linux/uio.h>

DEFINE_PER_CPU(int, eventfd_wake_count);

static DEFINE_IDA(eventfd_ida);

struct eventfd_ctx {
Expand Down Expand Up @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
* Deadlock or stack overflow issues can happen if we recurse here
* through waitqueue wakeup handlers. If the caller users potentially
* nested waitqueues with custom wakeup handlers, then it should
* check eventfd_signal_count() before calling this function. If
* it returns true, the eventfd_signal() call should be deferred to a
* check eventfd_signal_allowed() before calling this function. If
* it returns false, the eventfd_signal() call should be deferred to a
* safe context.
*/
if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
if (WARN_ON_ONCE(current->in_eventfd_signal))
return 0;

spin_lock_irqsave(&ctx->wqh.lock, flags);
this_cpu_inc(eventfd_wake_count);
current->in_eventfd_signal = 1;
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
this_cpu_dec(eventfd_wake_count);
current->in_eventfd_signal = 0;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);

return n;
Expand Down
8 changes: 5 additions & 3 deletions include/linux/cpuset.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/jump_label.h>

#ifdef CONFIG_CPUSETS
Expand Down Expand Up @@ -58,7 +59,7 @@ extern void cpuset_wait_for_hotplug(void);
extern void cpuset_read_lock(void);
extern void cpuset_read_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
Expand Down Expand Up @@ -184,11 +185,12 @@ static inline void cpuset_read_unlock(void) { }
static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask)
{
cpumask_copy(mask, cpu_possible_mask);
cpumask_copy(mask, task_cpu_possible_mask(p));
}

static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
{
return false;
}

static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
Expand Down
11 changes: 5 additions & 6 deletions include/linux/eventfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/percpu-defs.h>
#include <linux/percpu.h>
#include <linux/sched.h>

/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
Expand Down Expand Up @@ -43,11 +44,9 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);

DECLARE_PER_CPU(int, eventfd_wake_count);

static inline bool eventfd_signal_count(void)
static inline bool eventfd_signal_allowed(void)
{
return this_cpu_read(eventfd_wake_count);
return !current->in_eventfd_signal;
}

#else /* CONFIG_EVENTFD */
Expand Down Expand Up @@ -78,9 +77,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
return -ENOSYS;
}

static inline bool eventfd_signal_count(void)
static inline bool eventfd_signal_allowed(void)
{
return false;
return true;
}

static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
Expand Down
14 changes: 14 additions & 0 deletions include/linux/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,18 @@
static inline void leave_mm(int cpu) { }
#endif

/*
* CPUs that are capable of running user task @p. Must contain at least one
* active CPU. It is assumed that the kernel can run on all CPUs, so calling
* this for a kernel thread is pointless.
*
* By default, we assume a sane, homogeneous system.
*/
#ifndef task_cpu_possible_mask
# define task_cpu_possible_mask(p) cpu_possible_mask
# define task_cpu_possible(cpu, p) true
#else
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif

#endif
25 changes: 25 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,7 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
#ifdef CONFIG_SMP
Expand Down Expand Up @@ -863,6 +864,10 @@ struct task_struct {
/* Used by page_owner=on to detect recursion in page tracking. */
unsigned in_page_owner:1;
#endif
#ifdef CONFIG_EVENTFD
/* Recursion prevention for eventfd_signal() */
unsigned in_eventfd_signal:1;
#endif

unsigned long atomic_flags; /* Flags requiring atomic access. */

Expand Down Expand Up @@ -1705,6 +1710,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
extern void release_user_cpus_ptr(struct task_struct *p);
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
Expand All @@ -1715,6 +1725,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
return -EINVAL;
return 0;
}
static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
{
if (src->user_cpus_ptr)
return -EINVAL;
return 0;
}
static inline void release_user_cpus_ptr(struct task_struct *p)
{
WARN_ON(p->user_cpus_ptr);
}

static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
{
return 0;
}
#endif

extern int yield_to(struct task_struct *p, bool preempt);
Expand Down
18 changes: 0 additions & 18 deletions include/linux/sched/sysctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,12 @@ enum { sysctl_hung_task_timeout_secs = 0 };

extern unsigned int sysctl_sched_child_runs_first;

extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;

enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END,
};
extern unsigned int sysctl_sched_tunable_scaling;

extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_size;

#ifdef CONFIG_SCHED_DEBUG
extern __read_mostly unsigned int sysctl_sched_migration_cost;
extern __read_mostly unsigned int sysctl_sched_nr_migrate;

extern int sysctl_resched_latency_warn_ms;
extern int sysctl_resched_latency_warn_once;
#endif

/*
* control realtime throttling:
Expand Down
2 changes: 1 addition & 1 deletion include/linux/wait.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct task_struct;

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.head = { &(name).head, &(name).head } }
.head = LIST_HEAD_INIT(name.head) }

#define DECLARE_WAIT_QUEUE_HEAD(name) \
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
Expand Down
1 change: 1 addition & 0 deletions init/init_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct task_struct init_task
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
Expand Down
59 changes: 39 additions & 20 deletions kernel/cgroup/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,18 +372,29 @@ static inline bool is_in_v2_mode(void)
}

/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus.
* Return in pmask the portion of a task's cpusets's cpus_allowed that
* are online and are capable of running the task. If none are found,
* walk up the cpuset hierarchy until we find one that does have some
* appropriate cpus.
*
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask.
*
* Call with callback_lock or cpuset_mutex held.
*/
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
static void guarantee_online_cpus(struct task_struct *tsk,
struct cpumask *pmask)
{
while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
struct cpuset *cs;

if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
cpumask_copy(pmask, cpu_online_mask);

rcu_read_lock();
cs = task_cs(tsk);

while (!cpumask_intersects(cs->effective_cpus, pmask)) {
cs = parent_cs(cs);
if (unlikely(!cs)) {
/*
Expand All @@ -393,11 +404,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* cpuset's effective_cpus is on its way to be
* identical to cpu_online_mask.
*/
cpumask_copy(pmask, cpu_online_mask);
return;
goto out_unlock;
}
}
cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
cpumask_and(pmask, pmask, cs->effective_cpus);

out_unlock:
rcu_read_unlock();
}

/*
Expand Down Expand Up @@ -2199,15 +2212,13 @@ static void cpuset_attach(struct cgroup_taskset *tset)

percpu_down_write(&cpuset_rwsem);

/* prepare for attach */
if (cs == &top_cpuset)
cpumask_copy(cpus_attach, cpu_possible_mask);
else
guarantee_online_cpus(cs, cpus_attach);

guarantee_online_mems(cs, &cpuset_attach_nodemask_to);

cgroup_taskset_for_each(task, css, tset) {
if (cs != &top_cpuset)
guarantee_online_cpus(task, cpus_attach);
else
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
Expand Down Expand Up @@ -3302,9 +3313,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
unsigned long flags;

spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock();
guarantee_online_cpus(task_cs(tsk), pmask);
rcu_read_unlock();
guarantee_online_cpus(tsk, pmask);
spin_unlock_irqrestore(&callback_lock, flags);
}

Expand All @@ -3318,13 +3327,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
* which will not contain a sane cpumask during cases such as cpu hotplugging.
* This is the absolute last resort for the scheduler and it is only used if
* _every_ other avenue has been traveled.
*
* Returns true if the affinity of @tsk was changed, false otherwise.
**/

void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
const struct cpumask *cs_mask;
bool changed = false;

rcu_read_lock();
do_set_cpus_allowed(tsk, is_in_v2_mode() ?
task_cs(tsk)->cpus_allowed : cpu_possible_mask);
cs_mask = task_cs(tsk)->cpus_allowed;
if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
do_set_cpus_allowed(tsk, cs_mask);
changed = true;
}
rcu_read_unlock();

/*
Expand All @@ -3344,6 +3362,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* select_fallback_rq() will fix things ups and set cpu_possible_mask
* if required.
*/
return changed;
}

void __init cpuset_init_current_mems_allowed(void)
Expand Down
2 changes: 2 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
release_user_cpus_ptr(tsk);
scs_release(tsk);

#ifndef CONFIG_THREAD_INFO_IN_TASK
Expand Down Expand Up @@ -924,6 +925,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
dup_user_cpus_ptr(tsk, orig, node);

/*
* One for the user space visible state that goes away when reaped.
Expand Down
Loading

0 comments on commit 5d3c0db

Please sign in to comment.