Skip to content

Commit

Permalink
sched/cpufreq: Rename arch_update_thermal_pressure() => arch_update_h…
Browse files Browse the repository at this point in the history
…w_pressure()

Now that cpufreq provides a pressure value to the scheduler, rename
arch_update_thermal_pressure into HW pressure to reflect that it returns
a pressure applied by HW (i.e. with a high frequency change) and not
always related to thermal mitigation but also generated by max current
limitation as an example. Such high frequency signal needs filtering to be
smoothed and provide an value that reflects the average available capacity
into the scheduler time scale.

Signed-off-by: Vincent Guittot <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Tested-by: Lukasz Luba <[email protected]>
Reviewed-by: Qais Yousef <[email protected]>
Reviewed-by: Lukasz Luba <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
vingu-linaro authored and Ingo Molnar committed Apr 24, 2024
1 parent c281afe commit d4dbc99
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 77 deletions.
6 changes: 3 additions & 3 deletions arch/arm/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology

/* Replace task scheduler's default thermal pressure API */
#define arch_scale_thermal_pressure topology_get_thermal_pressure
#define arch_update_thermal_pressure topology_update_thermal_pressure
/* Replace task scheduler's default HW pressure API */
#define arch_scale_hw_pressure topology_get_hw_pressure
#define arch_update_hw_pressure topology_update_hw_pressure

#else

Expand Down
6 changes: 3 additions & 3 deletions arch/arm64/include/asm/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ void update_freq_counters_refs(void);
/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology

/* Replace task scheduler's default thermal pressure API */
#define arch_scale_thermal_pressure topology_get_thermal_pressure
#define arch_update_thermal_pressure topology_update_thermal_pressure
/* Replace task scheduler's default HW pressure API */
#define arch_scale_hw_pressure topology_get_hw_pressure
#define arch_update_hw_pressure topology_update_hw_pressure

#include <asm-generic/topology.h>

Expand Down
26 changes: 13 additions & 13 deletions drivers/base/arch_topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <linux/units.h>

#define CREATE_TRACE_POINTS
#include <trace/events/thermal_pressure.h>
#include <trace/events/hw_pressure.h>

static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
Expand Down Expand Up @@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}

DEFINE_PER_CPU(unsigned long, thermal_pressure);
DEFINE_PER_CPU(unsigned long, hw_pressure);

/**
* topology_update_thermal_pressure() - Update thermal pressure for CPUs
* topology_update_hw_pressure() - Update HW pressure for CPUs
* @cpus : The related CPUs for which capacity has been reduced
* @capped_freq : The maximum allowed frequency that CPUs can run at
*
* Update the value of thermal pressure for all @cpus in the mask. The
* Update the value of HW pressure for all @cpus in the mask. The
* cpumask should include all (online+offline) affected CPUs, to avoid
* operating on stale data when hot-plug is used for some CPUs. The
* @capped_freq reflects the currently allowed max CPUs frequency due to
* thermal capping. It might be also a boost frequency value, which is bigger
* HW capping. It might be also a boost frequency value, which is bigger
* than the internal 'capacity_freq_ref' max frequency. In such case the
* pressure value should simply be removed, since this is an indication that
* there is no thermal throttling. The @capped_freq must be provided in kHz.
* there is no HW throttling. The @capped_freq must be provided in kHz.
*/
void topology_update_thermal_pressure(const struct cpumask *cpus,
void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
{
unsigned long max_capacity, capacity, th_pressure;
unsigned long max_capacity, capacity, hw_pressure;
u32 max_freq;
int cpu;

Expand All @@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,

/*
* Handle properly the boost frequencies, which should simply clean
* the thermal pressure value.
* the HW pressure value.
*/
if (max_freq <= capped_freq)
capacity = max_capacity;
else
capacity = mult_frac(max_capacity, capped_freq, max_freq);

th_pressure = max_capacity - capacity;
hw_pressure = max_capacity - capacity;

trace_thermal_pressure_update(cpu, th_pressure);
trace_hw_pressure_update(cpu, hw_pressure);

for_each_cpu(cpu, cpus)
WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
}
EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);

static ssize_t cpu_capacity_show(struct device *dev,
struct device_attribute *attr,
Expand Down
4 changes: 2 additions & 2 deletions drivers/cpufreq/qcom-cpufreq-hw.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)

throttled_freq = freq_hz / HZ_PER_KHZ;

/* Update thermal pressure (the boost frequencies are accepted) */
arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
/* Update HW pressure (the boost frequencies are accepted) */
arch_update_hw_pressure(policy->related_cpus, throttled_freq);

/*
* In the unlikely case policy is unregistered do not enable
Expand Down
8 changes: 4 additions & 4 deletions include/linux/arch_topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ void topology_scale_freq_tick(void);
void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus);
void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus);

DECLARE_PER_CPU(unsigned long, thermal_pressure);
DECLARE_PER_CPU(unsigned long, hw_pressure);

static inline unsigned long topology_get_thermal_pressure(int cpu)
static inline unsigned long topology_get_hw_pressure(int cpu)
{
return per_cpu(thermal_pressure, cpu);
return per_cpu(hw_pressure, cpu);
}

void topology_update_thermal_pressure(const struct cpumask *cpus,
void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq);

struct cpu_topology {
Expand Down
8 changes: 4 additions & 4 deletions include/linux/sched/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,17 +270,17 @@ unsigned long arch_scale_cpu_capacity(int cpu)
}
#endif

#ifndef arch_scale_thermal_pressure
#ifndef arch_scale_hw_pressure
static __always_inline
unsigned long arch_scale_thermal_pressure(int cpu)
unsigned long arch_scale_hw_pressure(int cpu)
{
return 0;
}
#endif

#ifndef arch_update_thermal_pressure
#ifndef arch_update_hw_pressure
static __always_inline
void arch_update_thermal_pressure(const struct cpumask *cpus,
void arch_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_frequency)
{ }
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM thermal_pressure
#define TRACE_SYSTEM hw_pressure

#if !defined(_TRACE_THERMAL_PRESSURE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_THERMAL_PRESSURE_H

#include <linux/tracepoint.h>

TRACE_EVENT(thermal_pressure_update,
TP_PROTO(int cpu, unsigned long thermal_pressure),
TP_ARGS(cpu, thermal_pressure),
TRACE_EVENT(hw_pressure_update,
TP_PROTO(int cpu, unsigned long hw_pressure),
TP_ARGS(cpu, hw_pressure),

TP_STRUCT__entry(
__field(unsigned long, thermal_pressure)
__field(unsigned long, hw_pressure)
__field(int, cpu)
),

TP_fast_assign(
__entry->thermal_pressure = thermal_pressure;
__entry->hw_pressure = hw_pressure;
__entry->cpu = cpu;
),

TP_printk("cpu=%d thermal_pressure=%lu", __entry->cpu, __entry->thermal_pressure)
TP_printk("cpu=%d hw_pressure=%lu", __entry->cpu, __entry->hw_pressure)
);
#endif /* _TRACE_THERMAL_PRESSURE_H */

Expand Down
2 changes: 1 addition & 1 deletion include/trace/events/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ DECLARE_TRACE(pelt_dl_tp,
TP_PROTO(struct rq *rq),
TP_ARGS(rq));

DECLARE_TRACE(pelt_thermal_tp,
DECLARE_TRACE(pelt_hw_tp,
TP_PROTO(struct rq *rq),
TP_ARGS(rq));

Expand Down
12 changes: 6 additions & 6 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -547,24 +547,24 @@ config HAVE_SCHED_AVG_IRQ
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP

config SCHED_THERMAL_PRESSURE
config SCHED_HW_PRESSURE
bool
default y if ARM && ARM_CPU_TOPOLOGY
default y if ARM64
depends on SMP
depends on CPU_FREQ_THERMAL
help
Select this option to enable thermal pressure accounting in the
scheduler. Thermal pressure is the value conveyed to the scheduler
Select this option to enable HW pressure accounting in the
scheduler. HW pressure is the value conveyed to the scheduler
that reflects the reduction in CPU compute capacity resulted from
thermal throttling. Thermal throttling occurs when the performance of
a CPU is capped due to high operating temperatures.
HW throttling. HW throttling occurs when the performance of
a CPU is capped due to high operating temperatures as an example.

If selected, the scheduler will be able to balance tasks accordingly,
i.e. put less load on throttled CPUs than on non/less throttled ones.

This requires the architecture to implement
arch_update_thermal_pressure() and arch_scale_thermal_pressure().
arch_update_hw_pressure() and arch_scale_thermal_pressure().

config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
Expand Down
8 changes: 4 additions & 4 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_hw_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
Expand Down Expand Up @@ -5668,7 +5668,7 @@ void sched_tick(void)
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
struct rq_flags rf;
unsigned long thermal_pressure;
unsigned long hw_pressure;
u64 resched_latency;

if (housekeeping_cpu(cpu, HK_TYPE_TICK))
Expand All @@ -5679,8 +5679,8 @@ void sched_tick(void)
rq_lock(rq, &rf);

update_rq_clock(rq);
thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure);
curr->sched_class->task_tick(rq, curr, 0);
if (sched_feat(LATENCY_WARN))
resched_latency = cpu_resched_latency(rq);
Expand Down
16 changes: 8 additions & 8 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;

const_debug unsigned int sysctl_sched_migration_cost = 500000UL;

int sched_thermal_decay_shift;
int sched_hw_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
{
int _shift = 0;

if (kstrtoint(str, 0, &_shift))
pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");

sched_thermal_decay_shift = clamp(_shift, 0, 10);
sched_hw_decay_shift = clamp(_shift, 0, 10);
return 1;
}
__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
Expand Down Expand Up @@ -4969,7 +4969,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu)
{
unsigned long capacity = arch_scale_cpu_capacity(cpu);

capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));

return capacity;
}
Expand Down Expand Up @@ -5002,7 +5002,7 @@ static inline int util_fits_cpu(unsigned long util,
* Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it
* should fit a little cpu even if there's some pressure.
*
* Only exception is for thermal pressure since it has a direct impact
* Only exception is for HW or cpufreq pressure since it has a direct impact
* on available OPP of the system.
*
* We honour it for uclamp_min only as a drop in performance level
Expand Down Expand Up @@ -9324,7 +9324,7 @@ static inline bool others_have_blocked(struct rq *rq)
if (cpu_util_dl(rq))
return true;

if (thermal_load_avg(rq))
if (hw_load_avg(rq))
return true;

if (cpu_util_irq(rq))
Expand Down Expand Up @@ -9354,7 +9354,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
{
const struct sched_class *curr_class;
u64 now = rq_clock_pelt(rq);
unsigned long thermal_pressure;
unsigned long hw_pressure;
bool decayed;

/*
Expand All @@ -9363,11 +9363,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
*/
curr_class = rq->curr->sched_class;

thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));

decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) |
update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure) |
update_irq_load_avg(rq, 0);

if (others_have_blocked(rq))
Expand Down
18 changes: 9 additions & 9 deletions kernel/sched/pelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,30 +384,30 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
return 0;
}

#ifdef CONFIG_SCHED_THERMAL_PRESSURE
#ifdef CONFIG_SCHED_HW_PRESSURE
/*
* thermal:
* hardware:
*
* load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked
*
* util_avg and runnable_load_avg are not supported and meaningless.
*
* Unlike rt/dl utilization tracking that track time spent by a cpu
* running a rt/dl task through util_avg, the average thermal pressure is
* tracked through load_avg. This is because thermal pressure signal is
* running a rt/dl task through util_avg, the average HW pressure is
* tracked through load_avg. This is because HW pressure signal is
* time weighted "delta" capacity unlike util_avg which is binary.
* "delta capacity" = actual capacity -
* capped capacity a cpu due to a thermal event.
* capped capacity a cpu due to a HW event.
*/

int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity)
{
if (___update_load_sum(now, &rq->avg_thermal,
if (___update_load_sum(now, &rq->avg_hw,
capacity,
capacity,
capacity)) {
___update_load_avg(&rq->avg_thermal, 1);
trace_pelt_thermal_tp(rq);
___update_load_avg(&rq->avg_hw, 1);
trace_pelt_hw_tp(rq);
return 1;
}

Expand Down
Loading

0 comments on commit d4dbc99

Please sign in to comment.