Skip to content

Commit

Permalink
Merge tag 'pm+acpi-4.6-rc3' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/rafael/linux-pm

Pull power management and ACPI fixes from Rafael Wysocki:
 "Fixes for some issues discovered after recent changes and for some
  that have just been found lately regardless of those changes
  (intel_pstate, intel_idle, PM core, mailbox/pcc, turbostat) plus
  support for some new CPU models (intel_idle, Intel RAPL driver,
  turbostat) and documentation updates (intel_pstate, PM core).

  Specifics:

   - intel_pstate fixes for two issues exposed by the recent switch over
     from using timers and for one issue introduced during the 4.4 cycle
     plus new comments describing data structures used by the driver
     (Rafael Wysocki, Srinivas Pandruvada).

   - intel_idle fixes related to CPU offline/online (Richard Cochran).

   - intel_idle support (new CPU IDs and state definitions mostly) for
     Skylake-X and Kabylake processors (Len Brown).

   - PCC mailbox driver fix for an out-of-bounds memory access that may
     cause the kernel to panic() (Shanker Donthineni).

   - New (missing) CPU ID for one apparently overlooked Haswell model in
     the Intel RAPL power capping driver (Srinivas Pandruvada).

   - Fix for the PM core's wakeup IRQs framework to make it work after
     wakeup settings reconfiguration from sysfs (Grygorii Strashko).

   - Runtime PM documentation update to make it describe what needs to
     be done during device removal more precisely (Krzysztof Kozlowski).

   - Stale comment removal cleanup in the cpufreq-dt driver (Viresh
     Kumar).

   - turbostat utility fixes and support for Broxton, Skylake-X and
     Kabylake processors (Len Brown)"

* tag 'pm+acpi-4.6-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (28 commits)
  PM / wakeirq: fix wakeirq setting after wakup re-configuration from sysfs
  tools/power turbostat: work around RC6 counter wrap
  tools/power turbostat: initial KBL support
  tools/power turbostat: initial SKX support
  tools/power turbostat: decode BXT TSC frequency via CPUID
  tools/power turbostat: initial BXT support
  tools/power turbostat: print IRTL MSRs
  tools/power turbostat: SGX state should print only if --debug
  intel_idle: Add KBL support
  intel_idle: Add SKX support
  intel_idle: Clean up all registered devices on exit.
  intel_idle: Propagate hot plug errors.
  intel_idle: Don't overreact to a cpuidle registration failure.
  intel_idle: Setup the timer broadcast only on successful driver load.
  intel_idle: Avoid a double free of the per-CPU data.
  intel_idle: Fix dangling registration on error path.
  intel_idle: Fix deallocation order on the driver exit path.
  intel_idle: Remove redundant initialization calls.
  intel_idle: Fix a helper function's return value.
  intel_idle: remove useless return from void function.
  ...
  • Loading branch information
torvalds committed Apr 9, 2016
2 parents 9ef11ce + 73659be commit 40bca9d
Show file tree
Hide file tree
Showing 9 changed files with 380 additions and 62 deletions.
4 changes: 4 additions & 0 deletions Documentation/power/runtime_pm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,10 @@ drivers to make their ->remove() callbacks avoid races with runtime PM directly,
but also it allows of more flexibility in the handling of devices during the
removal of their drivers.

Drivers in ->remove() callback should undo the runtime PM changes done
in ->probe(). Usually this means calling pm_runtime_disable(),
pm_runtime_dont_use_autosuspend() etc.

The user space can effectively disallow the driver of the device to power manage
it at run time by changing the value of its /sys/devices/.../power/control
attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,14 @@
#define MSR_PKG_C9_RESIDENCY 0x00000631
#define MSR_PKG_C10_RESIDENCY 0x00000632

/* Interrupt Response Limit */
#define MSR_PKGC3_IRTL 0x0000060a
#define MSR_PKGC6_IRTL 0x0000060b
#define MSR_PKGC7_IRTL 0x0000060c
#define MSR_PKGC8_IRTL 0x00000633
#define MSR_PKGC9_IRTL 0x00000634
#define MSR_PKGC10_IRTL 0x00000635

/* Run Time Average Power Limiting (RAPL) Interface */

#define MSR_RAPL_POWER_UNIT 0x00000606
Expand Down
2 changes: 2 additions & 0 deletions drivers/base/power/wakeup.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
return -EEXIST;
}
dev->power.wakeup = ws;
if (dev->power.wakeirq)
device_wakeup_attach_irq(dev, dev->power.wakeirq);
spin_unlock_irq(&dev->power.lock);
return 0;
}
Expand Down
3 changes: 0 additions & 3 deletions drivers/cpufreq/cpufreq-dt.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
* Copyright (C) 2014 Linaro.
* Viresh Kumar <[email protected]>
*
* The OPP code in function set_target() is reused from
* drivers/cpufreq/omap-cpufreq.c
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
Expand Down
206 changes: 189 additions & 17 deletions drivers/cpufreq/intel_pstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x)
return ret;
}

/**
* struct sample - Store performance sample
* @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual
* performance during last sample period
* @busy_scaled: Scaled busy value which is used to calculate next
* P state. This can be different than core_pct_busy
* to account for cpu idle period
* @aperf: Difference of actual performance frequency clock count
* read from APERF MSR between last and current sample
* @mperf: Difference of maximum performance frequency clock count
* read from MPERF MSR between last and current sample
* @tsc: Difference of time stamp counter between last and
* current sample
* @freq: Effective frequency calculated from APERF/MPERF
* @time: Current time from scheduler
*
* This structure is used in the cpudata structure to store performance sample
* data for choosing next P State.
*/
struct sample {
int32_t core_pct_busy;
int32_t busy_scaled;
Expand All @@ -74,6 +93,20 @@ struct sample {
u64 time;
};

/**
* struct pstate_data - Store P state data
* @current_pstate: Current requested P state
* @min_pstate: Min P state possible for this platform
* @max_pstate: Max P state possible for this platform
* @max_pstate_physical:This is physical Max P state for a processor
* This can be higher than the max_pstate which can
* be limited by platform thermal design power limits
* @scaling: Scaling factor to convert frequency to cpufreq
* frequency units
* @turbo_pstate: Max Turbo P state possible for this platform
*
* Stores the per cpu model P state limits and current P state.
*/
struct pstate_data {
int current_pstate;
int min_pstate;
Expand All @@ -83,13 +116,38 @@ struct pstate_data {
int turbo_pstate;
};

/**
* struct vid_data - Stores voltage information data
* @min: VID data for this platform corresponding to
* the lowest P state
* @max: VID data corresponding to the highest P State.
* @turbo: VID data for turbo P state
* @ratio: Ratio of (vid max - vid min) /
* (max P state - Min P State)
*
* Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling)
* This data is used in Atom platforms, where in addition to target P state,
* the voltage data needs to be specified to select next P State.
*/
struct vid_data {
int min;
int max;
int turbo;
int32_t ratio;
};

/**
* struct _pid - Stores PID data
* @setpoint: Target set point for busyness or performance
* @integral: Storage for accumulated error values
* @p_gain: PID proportional gain
* @i_gain: PID integral gain
* @d_gain: PID derivative gain
* @deadband: PID deadband
* @last_err: Last error storage for integral part of PID calculation
*
* Stores PID coefficients and last error for PID controller.
*/
struct _pid {
int setpoint;
int32_t integral;
Expand All @@ -100,6 +158,23 @@ struct _pid {
int32_t last_err;
};

/**
* struct cpudata - Per CPU instance data storage
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
* @last_sample_time: Last Sample time
* @prev_aperf: Last APERF value read from APERF MSR
* @prev_mperf: Last MPERF value read from MPERF MSR
* @prev_tsc: Last timestamp counter (TSC) value
* @prev_cummulative_iowait: IO Wait time difference from last and
* current sample
* @sample: Storage for storing last Sample data
*
* This structure stores per CPU instance data for all CPUs.
*/
struct cpudata {
int cpu;

Expand All @@ -118,6 +193,19 @@ struct cpudata {
};

static struct cpudata **all_cpu_data;

/**
* struct pid_adjust_policy - Stores static PID configuration data
* @sample_rate_ms: PID calculation sample rate in ms
* @sample_rate_ns: Sample rate calculation in ns
* @deadband: PID deadband
* @setpoint: PID Setpoint
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
*
* Stores per CPU model static PID configuration data.
*/
struct pstate_adjust_policy {
int sample_rate_ms;
s64 sample_rate_ns;
Expand All @@ -128,6 +216,20 @@ struct pstate_adjust_policy {
int i_gain_pct;
};

/**
* struct pstate_funcs - Per CPU model specific callbacks
* @get_max: Callback to get maximum non turbo effective P state
* @get_max_physical: Callback to get maximum non turbo physical P state
* @get_min: Callback to get minimum P state
* @get_turbo: Callback to get turbo P state
* @get_scaling: Callback to get frequency scaling factor
* @get_val: Callback to convert P state to actual MSR write value
* @get_vid: Callback to get VID data for Atom platforms
* @get_target_pstate: Callback to a function to calculate next P state to use
*
* Core and Atom CPU models have different way to get P State limits. This
* structure is used to store those callbacks.
*/
struct pstate_funcs {
int (*get_max)(void);
int (*get_max_physical)(void);
Expand All @@ -139,6 +241,11 @@ struct pstate_funcs {
int32_t (*get_target_pstate)(struct cpudata *);
};

/**
* struct cpu_defaults- Per CPU model default config data
* @pid_policy: PID config data
* @funcs: Callback function data
*/
struct cpu_defaults {
struct pstate_adjust_policy pid_policy;
struct pstate_funcs funcs;
Expand All @@ -151,6 +258,34 @@ static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;


/**
* struct perf_limits - Store user and policy limits
* @no_turbo: User requested turbo state from intel_pstate sysfs
* @turbo_disabled: Platform turbo status either from msr
* MSR_IA32_MISC_ENABLE or when maximum available pstate
* matches the maximum turbo pstate
* @max_perf_pct: Effective maximum performance limit in percentage, this
* is minimum of either limits enforced by cpufreq policy
* or limits from user set limits via intel_pstate sysfs
* @min_perf_pct: Effective minimum performance limit in percentage, this
* is maximum of either limits enforced by cpufreq policy
* or limits from user set limits via intel_pstate sysfs
* @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
* This value is used to limit max pstate
* @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
* This value is used to limit min pstate
* @max_policy_pct: The maximum performance in percentage enforced by
* cpufreq setpolicy interface
* @max_sysfs_pct: The maximum performance in percentage enforced by
* intel pstate sysfs interface
* @min_policy_pct: The minimum performance in percentage enforced by
* cpufreq setpolicy interface
* @min_sysfs_pct: The minimum performance in percentage enforced by
* intel pstate sysfs interface
*
* Storage for user and policy defined limits.
*/
struct perf_limits {
int no_turbo;
int turbo_disabled;
Expand Down Expand Up @@ -910,7 +1045,14 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
return true;
/*
* First time this function is invoked in a given cycle, all of the
* previous sample data fields are equal to zero or stale and they must
* be populated with meaningful numbers for things to work, so assume
* that sample.time will always be reset before setting the utilization
* update hook and make the caller skip the sample then.
*/
return !!cpu->last_sample_time;
}

static inline int32_t get_avg_frequency(struct cpudata *cpu)
Expand Down Expand Up @@ -984,8 +1126,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
* enough period of time to adjust our busyness.
*/
duration_ns = cpu->sample.time - cpu->last_sample_time;
if ((s64)duration_ns > pid_params.sample_rate_ns * 3
&& cpu->last_sample_time > 0) {
if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
Expand Down Expand Up @@ -1100,10 +1241,8 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
intel_pstate_get_cpu_pstates(cpu);

intel_pstate_busy_pid_reset(cpu);
intel_pstate_sample(cpu, 0);

cpu->update_util.func = intel_pstate_update_util;
cpufreq_set_update_util_data(cpunum, &cpu->update_util);

pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);

Expand All @@ -1122,22 +1261,54 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
return get_avg_frequency(cpu);
}

static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
{
struct cpudata *cpu = all_cpu_data[cpu_num];

/* Prevent intel_pstate_update_util() from using stale data. */
cpu->sample.time = 0;
cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
}

static void intel_pstate_clear_update_util_hook(unsigned int cpu)
{
cpufreq_set_update_util_data(cpu, NULL);
synchronize_sched();
}

static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
limits->max_perf = int_tofp(1);
limits->min_perf_pct = 100;
limits->min_perf = int_tofp(1);
limits->max_policy_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
}

static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
if (!policy->cpuinfo.max_freq)
return -ENODEV;

if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
intel_pstate_clear_update_util_hook(policy->cpu);

if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits = &performance_limits;
if (hwp_active)
intel_pstate_hwp_set(policy->cpus);
return 0;
if (policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
intel_pstate_set_performance_limits(limits);
goto out;
}
} else {
pr_debug("intel_pstate: set powersave\n");
limits = &powersave_limits;
}

pr_debug("intel_pstate: set powersave\n");
limits = &powersave_limits;
limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
Expand All @@ -1163,6 +1334,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
int_tofp(100));

out:
intel_pstate_set_update_util_hook(policy->cpu);

if (hwp_active)
intel_pstate_hwp_set(policy->cpus);

Expand All @@ -1187,8 +1361,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)

pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);

cpufreq_set_update_util_data(cpu_num, NULL);
synchronize_sched();
intel_pstate_clear_update_util_hook(cpu_num);

if (hwp_active)
return;
Expand Down Expand Up @@ -1455,8 +1628,7 @@ static int __init intel_pstate_init(void)
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
cpufreq_set_update_util_data(cpu, NULL);
synchronize_sched();
intel_pstate_clear_update_util_hook(cpu);
kfree(all_cpu_data[cpu]);
}
}
Expand Down
Loading

0 comments on commit 40bca9d

Please sign in to comment.