Skip to content

Commit

Permalink
Merge tag 'sched-core-2022-05-23' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Updates to scheduler metrics:
     - PELT fixes & enhancements
     - PSI fixes & enhancements
     - Refactor cpu_util_without()

 - Updates to instrumentation/debugging:
     - Remove sched_trace_*() helper functions - can be done via debug
       info
     - Fix double update_rq_clock() warnings

 - Introduce & use "preemption model accessors" to simplify some of the
   Kconfig complexity.

 - Make softirq handling RT-safe.

 - Misc smaller fixes & cleanups.

* tag 'sched-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  topology: Remove unused cpu_cluster_mask()
  sched: Reverse sched_class layout
  sched/deadline: Remove superfluous rq clock update in push_dl_task()
  sched/core: Avoid obvious double update_rq_clock warning
  smp: Make softirq handling RT safe in flush_smp_call_function_queue()
  smp: Rename flush_smp_call_function_from_idle()
  sched: Fix missing prototype warnings
  sched/fair: Remove cfs_rq_tg_path()
  sched/fair: Remove sched_trace_*() helper functions
  sched/fair: Refactor cpu_util_without()
  sched/fair: Revise comment about lb decision matrix
  sched/psi: report zeroes for CPU full at the system level
  sched/fair: Delete useless condition in tg_unthrottle_up()
  sched/fair: Fix cfs_rq_clock_pelt() for throttled cfs_rq
  sched/fair: Move calculate of avg_load to a better location
  mailmap: Update my email address to @redhat.com
  MAINTAINERS: Add myself as scheduler topology reviewer
  psi: Fix trigger being fired unexpectedly at initial
  ftrace: Use preemption model accessors for trace header printout
  kcsan: Use preemption model accessors
  • Loading branch information
torvalds committed May 24, 2022
2 parents cfeb252 + 991d8d8 commit 6f3f04c
Show file tree
Hide file tree
Showing 23 changed files with 213 additions and 331 deletions.
1 change: 1 addition & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ Vasily Averin <[email protected]> <[email protected]>
Vasily Averin <[email protected]> <[email protected]>
Vasily Averin <[email protected]> <[email protected]>
Vasily Averin <[email protected]> <[email protected]>
Valentin Schneider <[email protected]> <[email protected]>
Vinod Koul <[email protected]> <[email protected]>
Vinod Koul <[email protected]> <[email protected]>
Vinod Koul <[email protected]> <[email protected]>
Expand Down
9 changes: 4 additions & 5 deletions Documentation/accounting/psi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,7 @@ Pressure interface
Pressure information for each resource is exported through the
respective file in /proc/pressure/ -- cpu, memory, and io.

The format for CPU is as such::

some avg10=0.00 avg60=0.00 avg300=0.00 total=0

and for memory and IO::
The format is as such::

some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
Expand All @@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
still doing productive work. As such, time spent in this subset of the
stall state is tracked separately and exported in the "full" averages.

CPU full is undefined at the system level, but has been reported
since 5.13, so it is set to zero for backward compatibility.

The ratios (in %) are tracked as recent trends over ten, sixty, and
three hundred second windows, which gives insight into short term events
as well as medium and long term trends. The total absolute stall time
Expand Down
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -17524,6 +17524,7 @@ R: Steven Rostedt <[email protected]> (SCHED_FIFO/SCHED_RR)
R: Ben Segall <[email protected]> (CONFIG_CFS_BANDWIDTH)
R: Mel Gorman <[email protected]> (CONFIG_NUMA_BALANCING)
R: Daniel Bristot de Oliveira <[email protected]> (SCHED_DEADLINE)
R: Valentin Schneider <[email protected]> (TOPOLOGY)
L: [email protected]
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
Expand Down
12 changes: 6 additions & 6 deletions include/asm-generic/vmlinux.lds.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@
*/
#define SCHED_DATA \
STRUCT_ALIGN(); \
__begin_sched_classes = .; \
*(__idle_sched_class) \
*(__fair_sched_class) \
*(__rt_sched_class) \
*(__dl_sched_class) \
__sched_class_highest = .; \
*(__stop_sched_class) \
__end_sched_classes = .;
*(__dl_sched_class) \
*(__rt_sched_class) \
*(__fair_sched_class) \
*(__idle_sched_class) \
__sched_class_lowest = .;

/* The actual configuration determine if the init/exit sections
* are handled as text/data or they can be discarded (which
Expand Down
9 changes: 9 additions & 0 deletions include/linux/interrupt.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,15 @@ struct softirq_action
asmlinkage void do_softirq(void);
asmlinkage void __do_softirq(void);

#ifdef CONFIG_PREEMPT_RT
extern void do_softirq_post_smp_call_flush(unsigned int was_pending);
#else
static inline void do_softirq_post_smp_call_flush(unsigned int unused)
{
do_softirq();
}
#endif

extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void);
extern void __raise_softirq_irqoff(unsigned int nr);
Expand Down
16 changes: 2 additions & 14 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -2382,20 +2382,6 @@ static inline void rseq_syscall(struct pt_regs *regs)

#endif

const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);

const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);

int sched_trace_rq_cpu(struct rq *rq);
int sched_trace_rq_cpu_capacity(struct rq *rq);
int sched_trace_rq_nr_running(struct rq *rq);

const struct cpumask *sched_trace_rd_span(struct root_domain *rd);

#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
Expand All @@ -2406,4 +2392,6 @@ static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
#endif

extern void sched_set_stop_task(int cpu, struct task_struct *stop);

#endif
7 changes: 0 additions & 7 deletions include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
}
#endif

#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
static inline const struct cpumask *cpu_cluster_mask(int cpu)
{
return topology_cluster_cpumask(cpu);
}
#endif

static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));
Expand Down
5 changes: 3 additions & 2 deletions kernel/kcsan/kcsan_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1380,13 +1380,14 @@ static const void *nthreads_gen_params(const void *prev, char *desc)
else
nthreads *= 2;

if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
if (!preempt_model_preemptible() ||
!IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
/*
* Without any preemption, keep 2 CPUs free for other tasks, one
* of which is the main test case function checking for
* completion or failure.
*/
const long min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
const long min_unused_cpus = preempt_model_none() ? 2 : 0;
const long min_required_cpus = 2 + min_unused_cpus;

if (num_online_cpus() < min_required_cpus) {
Expand Down
2 changes: 2 additions & 0 deletions kernel/sched/build_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
/* Headers: */
#include <linux/sched/clock.h>
#include <linux/sched/cputime.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/posix-timers.h>
#include <linux/sched/rt.h>

Expand All @@ -31,6 +32,7 @@
#include <uapi/linux/sched/types.h>

#include "sched.h"
#include "smp.h"

#include "autogroup.h"
#include "stats.h"
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/build_utility.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/nohz.h>
#include <linux/sched/mm.h>
#include <linux/sched/rseq_api.h>
#include <linux/sched/task_stack.h>
Expand Down
23 changes: 13 additions & 10 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
#include <linux/topology.h>
#include <linux/sched/clock.h>
#include <linux/sched/cond_resched.h>
#include <linux/sched/cputime.h>
#include <linux/sched/debug.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/init.h>
#include <linux/sched/isolation.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/mm.h>
Expand Down Expand Up @@ -610,10 +613,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
swap(rq1, rq2);

raw_spin_rq_lock(rq1);
if (__rq_lockp(rq1) == __rq_lockp(rq2))
return;
if (__rq_lockp(rq1) != __rq_lockp(rq2))
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);

raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
double_rq_clock_clear_update(rq1, rq2);
}
#endif

Expand Down Expand Up @@ -2190,7 +2193,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
if (p->sched_class == rq->curr->sched_class)
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
else if (p->sched_class > rq->curr->sched_class)
else if (sched_class_above(p->sched_class, rq->curr->sched_class))
resched_curr(rq);

/*
Expand Down Expand Up @@ -2408,7 +2411,7 @@ static int migration_cpu_stop(void *data)
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
flush_smp_call_function_from_idle();
flush_smp_call_function_queue();

raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
Expand Down Expand Up @@ -5689,7 +5692,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* higher scheduling class, because otherwise those lose the
* opportunity to pull in more work from other CPUs.
*/
if (likely(prev->sched_class <= &fair_sched_class &&
if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {

p = pick_next_task_fair(rq, prev, rf);
Expand Down Expand Up @@ -9469,11 +9472,11 @@ void __init sched_init(void)
int i;

/* Make sure the linker didn't screw up */
BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
&fair_sched_class + 1 != &rt_sched_class ||
&rt_sched_class + 1 != &dl_sched_class);
BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
&fair_sched_class != &rt_sched_class + 1 ||
&rt_sched_class != &dl_sched_class + 1);
#ifdef CONFIG_SMP
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
#endif

wait_bit_init();
Expand Down
15 changes: 4 additions & 11 deletions kernel/sched/deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
return (dl_se->runtime <= 0);
}

extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);

/*
* This function implements the GRUB accounting rule:
* according to the GRUB reclaiming algorithm, the runtime is
Expand Down Expand Up @@ -1832,6 +1830,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)

static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
{
struct rq_flags rf;
struct rq *rq;

if (READ_ONCE(p->__state) != TASK_WAKING)
Expand All @@ -1843,7 +1842,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
* from try_to_wake_up(). Hence, p->pi_lock is locked, but
* rq->lock is not... So, lock it
*/
raw_spin_rq_lock(rq);
rq_lock(rq, &rf);
if (p->dl.dl_non_contending) {
update_rq_clock(rq);
sub_running_bw(&p->dl, &rq->dl);
Expand All @@ -1859,7 +1858,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
put_task_struct(p);
}
sub_rq_bw(&p->dl, &rq->dl);
raw_spin_rq_unlock(rq);
rq_unlock(rq, &rf);
}

static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
Expand Down Expand Up @@ -2319,13 +2318,7 @@ static int push_dl_task(struct rq *rq)

deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, later_rq->cpu);

/*
* Update the later_rq clock here, because the clock is used
* by the cpufreq_update_util() inside __add_running_bw().
*/
update_rq_clock(later_rq);
activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
activate_task(later_rq, next_task, 0);
ret = 1;

resched_curr(later_rq);
Expand Down
Loading

0 comments on commit 6f3f04c

Please sign in to comment.