Skip to content

Commit

Permalink
sched: psi: pass enqueue/dequeue flags to psi callbacks directly
Browse files Browse the repository at this point in the history
What psi needs to do on each enqueue and dequeue has gotten more
subtle, and the generic sched code trying to distill this into a bool
for the callbacks is awkward.

Pass the flags directly and let psi parse them. For that to work, the
#include "stats.h" (which has the psi callback implementations) needs
to be below the flag definitions in "sched.h". Move that section
further down, next to some of the other accounting stuff.

This also puts the ENQUEUE_SAVE/RESTORE branch behind the psi jump
label, slightly reducing overhead when PSI=y but runtime disabled.

Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Johannes Weiner <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
  • Loading branch information
hnaz authored and Peter Zijlstra committed Oct 26, 2024
1 parent 23f1178 commit 1a61510
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 44 deletions.
12 changes: 6 additions & 6 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2024,10 +2024,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
*/
uclamp_rq_inc(rq, p);

if (!(flags & ENQUEUE_RESTORE)) {
psi_enqueue(p, flags);

if (!(flags & ENQUEUE_RESTORE))
sched_info_enqueue(rq, p);
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
}

if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
Expand All @@ -2044,10 +2044,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);

if (!(flags & DEQUEUE_SAVE)) {
if (!(flags & DEQUEUE_SAVE))
sched_info_dequeue(rq, p);
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
}

psi_dequeue(p, flags);

/*
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
Expand Down
56 changes: 28 additions & 28 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -2093,34 +2093,6 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p)

#endif /* CONFIG_SMP */

#include "stats.h"

#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)

extern void __sched_core_account_forceidle(struct rq *rq);

static inline void sched_core_account_forceidle(struct rq *rq)
{
if (schedstat_enabled())
__sched_core_account_forceidle(rq);
}

extern void __sched_core_tick(struct rq *rq);

static inline void sched_core_tick(struct rq *rq)
{
if (sched_core_enabled(rq) && schedstat_enabled())
__sched_core_tick(rq);
}

#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */

static inline void sched_core_account_forceidle(struct rq *rq) { }

static inline void sched_core_tick(struct rq *rq) { }

#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */

#ifdef CONFIG_CGROUP_SCHED

/*
Expand Down Expand Up @@ -3191,6 +3163,34 @@ extern void nohz_run_idle_balance(int cpu);
static inline void nohz_run_idle_balance(int cpu) { }
#endif

#include "stats.h"

#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)

extern void __sched_core_account_forceidle(struct rq *rq);

static inline void sched_core_account_forceidle(struct rq *rq)
{
if (schedstat_enabled())
__sched_core_account_forceidle(rq);
}

extern void __sched_core_tick(struct rq *rq);

static inline void sched_core_tick(struct rq *rq)
{
if (sched_core_enabled(rq) && schedstat_enabled())
__sched_core_tick(rq);
}

#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */

static inline void sched_core_account_forceidle(struct rq *rq) { }

static inline void sched_core_tick(struct rq *rq) { }

#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */

#ifdef CONFIG_IRQ_TIME_ACCOUNTING

struct irqtime {
Expand Down
29 changes: 19 additions & 10 deletions kernel/sched/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,25 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
* go through migration requeues. In this case, *sleeping* states need
* to be transferred.
*/
static inline void psi_enqueue(struct task_struct *p, bool migrate)
static inline void psi_enqueue(struct task_struct *p, int flags)
{
int clear = 0, set = 0;

if (static_branch_likely(&psi_disabled))
return;

/* Same runqueue, nothing changed for psi */
if (flags & ENQUEUE_RESTORE)
return;

if (p->se.sched_delayed) {
/* CPU migration of "sleeping" task */
SCHED_WARN_ON(!migrate);
SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED));
if (p->in_memstall)
set |= TSK_MEMSTALL;
if (p->in_iowait)
set |= TSK_IOWAIT;
} else if (migrate) {
} else if (flags & ENQUEUE_MIGRATED) {
/* CPU migration of runnable task */
set = TSK_RUNNING;
if (p->in_memstall)
Expand All @@ -158,24 +162,29 @@ static inline void psi_enqueue(struct task_struct *p, bool migrate)
psi_task_change(p, clear, set);
}

static inline void psi_dequeue(struct task_struct *p, bool migrate)
static inline void psi_dequeue(struct task_struct *p, int flags)
{
if (static_branch_likely(&psi_disabled))
return;

/*
* When migrating a task to another CPU, clear all psi
* state. The enqueue callback above will work it out.
*/
if (migrate)
psi_task_change(p, p->psi_flags, 0);
/* Same runqueue, nothing changed for psi */
if (flags & DEQUEUE_SAVE)
return;

/*
* A voluntary sleep is a dequeue followed by a task switch. To
* avoid walking all ancestors twice, psi_task_switch() handles
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
* Do nothing here.
*/
if (flags & DEQUEUE_SLEEP)
return;

/*
* When migrating a task to another CPU, clear all psi
* state. The enqueue callback above will work it out.
*/
psi_task_change(p, p->psi_flags, 0);
}

static inline void psi_ttwu_dequeue(struct task_struct *p)
Expand Down

0 comments on commit 1a61510

Please sign in to comment.