Skip to content

Commit

Permalink
sched/core: Provide a pointer to the valid CPU mask
Browse files Browse the repository at this point in the history
In commit:

  4b53a34 ("sched/core: Remove the tsk_nr_cpus_allowed() wrapper")

the tsk_nr_cpus_allowed() wrapper was removed. There was not
much difference in !RT but in RT we used this to implement
migrate_disable(). Within a migrate_disable() section the CPU mask is
restricted to single CPU while the "normal" CPU mask remains untouched.

As an alternative implementation Ingo suggested to use:

	struct task_struct {
		const cpumask_t		*cpus_ptr;
		cpumask_t		cpus_mask;
        };
with
	t->cpus_ptr = &t->cpus_mask;

In -RT we then can switch the cpus_ptr to:

	t->cpus_ptr = &cpumask_of(task_cpu(p));

in a migration disabled region. The rules are simple:

 - Code that 'uses' ->cpus_allowed would use the pointer.
 - Code that 'modifies' ->cpus_allowed would use the direct mask.

Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Sebastian Andrzej Siewior authored and Ingo Molnar committed Jun 3, 2019
1 parent f2c7c76 commit 3bd3706
Show file tree
Hide file tree
Showing 23 changed files with 75 additions and 73 deletions.
2 changes: 1 addition & 1 deletion arch/ia64/kernel/mca.c
Original file line number Diff line number Diff line change
Expand Up @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
cpumask_set_cpu(cpu, &p->cpus_allowed);
cpumask_set_cpu(cpu, &p->cpus_mask);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
Expand Down
4 changes: 2 additions & 2 deletions arch/mips/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
* isn't set), we undo the restriction on cpus_allowed.
* isn't set), we undo the restriction on cpus_mask.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
Expand All @@ -57,7 +57,7 @@ do { \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
prev->cpus_allowed = prev->thread.user_cpus_allowed; \
prev->cpus_mask = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/mips-mt-fpaff.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
if (retval)
goto out_unlock;

cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
cpumask_and(&mask, &allowed, cpu_active_mask);

out_unlock:
Expand Down
6 changes: 3 additions & 3 deletions arch/mips/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
* restricted the allowed set to exclude any CPUs with FPUs,
* we'll skip the procedure.
*/
if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
cpumask_t tmask;

current->thread.user_cpus_allowed
= current->cpus_allowed;
cpumask_and(&tmask, &current->cpus_allowed,
= current->cpus_mask;
cpumask_and(&tmask, &current->cpus_mask,
&mt_fpu_cpumask);
set_cpus_allowed_ptr(current, &tmask);
set_thread_flag(TIF_FPUBOUND);
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/platforms/cell/spufs/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);

/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/cpu/resctrl/pseudo_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
Expand Down
6 changes: 3 additions & 3 deletions drivers/infiniband/hw/hfi1/affinity.c
Original file line number Diff line number Diff line change
Expand Up @@ -1038,15 +1038,15 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
*proc_mask = &current->cpus_allowed;
*proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;

/*
* check whether process/context affinity has already
* been set
*/
if (cpumask_weight(proc_mask) == 1) {
if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
Expand All @@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
} else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
Expand Down
3 changes: 1 addition & 2 deletions drivers/infiniband/hw/hfi1/sdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -855,14 +855,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;

/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
if (cpumask_weight(current_mask) != 1)
if (current->nr_cpus_allowed != 1)
goto out;

cpu_id = smp_processor_id();
Expand Down
7 changes: 3 additions & 4 deletions drivers/infiniband/hw/qib/qib_file_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
{
struct qib_filedata *fd = fp->private_data;
const unsigned int weight = cpumask_weight(&current->cpus_allowed);
const unsigned int weight = current->nr_cpus_allowed;
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
int local_cpu;

Expand Down Expand Up @@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
else {
int unit;
const unsigned int cpu = cpumask_first(&current->cpus_allowed);
const unsigned int weight =
cpumask_weight(&current->cpus_allowed);
const unsigned int cpu = cpumask_first(current->cpus_ptr);
const unsigned int weight = current->nr_cpus_allowed;

if (weight == 1 && !test_bit(cpu, qib_cpulist))
if (!find_hca(cpu, &unit) && unit >= 0)
Expand Down
4 changes: 2 additions & 2 deletions fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
cpumask_pr_args(&task->cpus_allowed));
cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
cpumask_pr_args(&task->cpus_allowed));
cpumask_pr_args(task->cpus_ptr));
}

static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
Expand Down
5 changes: 3 additions & 2 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,8 @@ struct task_struct {

unsigned int policy;
int nr_cpus_allowed;
cpumask_t cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;

#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
Expand Down Expand Up @@ -1399,7 +1400,7 @@ extern struct pid *cad_pid;
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
Expand Down
3 changes: 2 additions & 1 deletion init/init_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ struct task_struct init_task
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_allowed = CPU_MASK_ALL,
.cpus_ptr = &init_task.cpus_mask,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
.active_mm = &init_mm,
Expand Down
2 changes: 1 addition & 1 deletion kernel/cgroup/cpuset.c
Original file line number Diff line number Diff line change
Expand Up @@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task)
if (task_css_is_root(task, cpuset_cgrp_id))
return;

set_cpus_allowed_ptr(task, &current->cpus_allowed);
set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
}

Expand Down
2 changes: 2 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_STACKPROTECTOR
tsk->stack_canary = get_random_canary();
#endif
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;

/*
* One for us, one for whoever does the "release_task()" (usually
Expand Down
40 changes: 20 additions & 20 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
*/
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
{
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
return false;

if (is_per_cpu_kthread(p))
Expand Down Expand Up @@ -1025,7 +1025,7 @@ static int migration_cpu_stop(void *data)
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
* __migrate_task() such that we will not miss enforcing cpus_allowed
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
Expand Down Expand Up @@ -1056,7 +1056,7 @@ static int migration_cpu_stop(void *data)
*/
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
cpumask_copy(&p->cpus_allowed, new_mask);
cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}

Expand Down Expand Up @@ -1126,7 +1126,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}

if (cpumask_equal(&p->cpus_allowed, new_mask))
if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;

if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
Expand Down Expand Up @@ -1286,10 +1286,10 @@ static int migrate_swap_stop(void *data)
if (task_cpu(arg->src_task) != arg->src_cpu)
goto unlock;

if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
goto unlock;

if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
goto unlock;

__migrate_swap_task(arg->src_task, arg->dst_cpu);
Expand Down Expand Up @@ -1331,10 +1331,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
goto out;

if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
goto out;

if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
goto out;

trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
Expand Down Expand Up @@ -1479,7 +1479,7 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);

/*
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
* ->cpus_ptr is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
Expand Down Expand Up @@ -1519,14 +1519,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu))
continue;
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu;
}
}

for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, &p->cpus_allowed) {
for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!is_cpu_allowed(p, dest_cpu))
continue;

Expand Down Expand Up @@ -1570,7 +1570,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}

/*
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
Expand All @@ -1580,11 +1580,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
cpu = cpumask_any(&p->cpus_allowed);
cpu = cpumask_any(p->cpus_ptr);

/*
* In order not to call set_task_cpu() on a blocking task we need
* to rely on ttwu() to place the task on a valid ->cpus_allowed
* to rely on ttwu() to place the task on a valid ->cpus_ptr
* CPU.
*
* Since this is common to all placement strategies, this lives here.
Expand Down Expand Up @@ -2395,7 +2395,7 @@ void wake_up_new_task(struct task_struct *p)
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path
* - cpus_ptr can change in the fork path
* - any previously selected CPU might disappear through hotplug
*
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
Expand Down Expand Up @@ -4267,7 +4267,7 @@ static int __sched_setscheduler(struct task_struct *p,
* the entire root_domain to become SCHED_DEADLINE. We
* will also fail if there's no bandwidth available.
*/
if (!cpumask_subset(span, &p->cpus_allowed) ||
if (!cpumask_subset(span, p->cpus_ptr) ||
rq->rd->dl_bw.bw == 0) {
task_rq_unlock(rq, p, &rf);
return -EPERM;
Expand Down Expand Up @@ -4866,7 +4866,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
goto out_unlock;

raw_spin_lock_irqsave(&p->pi_lock, flags);
cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);

out_unlock:
Expand Down Expand Up @@ -5443,7 +5443,7 @@ int task_can_attach(struct task_struct *p,
* allowed nodes is unnecessary. Thus, cpusets are not
* applicable for such threads. This prevents checking for
* success of set_cpus_allowed_ptr() on all attached tasks
* before cpus_allowed may be changed.
* before cpus_mask may be changed.
*/
if (p->flags & PF_NO_SETAFFINITY) {
ret = -EINVAL;
Expand All @@ -5470,7 +5470,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (curr_cpu == target_cpu)
return 0;

if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;

/* TODO: This is not properly updating schedstats */
Expand Down Expand Up @@ -5608,7 +5608,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
put_prev_task(rq, next);

/*
* Rules for changing task_struct::cpus_allowed are holding
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
* stabilizes the mask.
*
Expand Down
4 changes: 2 additions & 2 deletions kernel/sched/cpudeadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const struct sched_dl_entity *dl_se = &p->dl;

if (later_mask &&
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
return 1;
} else {
int best_cpu = cpudl_maximum(cp);

WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));

if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
if (later_mask)
cpumask_set_cpu(best_cpu, later_mask);
Expand Down
4 changes: 2 additions & 2 deletions kernel/sched/cpupri.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip)
continue;

if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
continue;

if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);

/*
* We have to ensure that we have at least one bit
Expand Down
Loading

0 comments on commit 3bd3706

Please sign in to comment.